1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (c) 2020, The Linux Foundation. All rights reserved.
5 #include <linux/kernel.h>
6 #include <linux/sizes.h>
7 #include <linux/videodev2.h>
10 #include "hfi_plat_bufs.h"
13 #define MIN_INPUT_BUFFERS 4
14 #define MIN_ENC_OUTPUT_BUFFERS 4
16 #define NV12_UBWC_Y_TILE_WIDTH 32
17 #define NV12_UBWC_Y_TILE_HEIGHT 8
18 #define NV12_UBWC_UV_TILE_WIDTH 16
19 #define NV12_UBWC_UV_TILE_HEIGHT 8
20 #define TP10_UBWC_Y_TILE_WIDTH 48
21 #define TP10_UBWC_Y_TILE_HEIGHT 4
22 #define METADATA_STRIDE_MULTIPLE 64
23 #define METADATA_HEIGHT_MULTIPLE 16
24 #define HFI_DMA_ALIGNMENT 256
26 #define MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE 64
27 #define MAX_FE_NBR_CTRL_LCU32_LINE_BUFFER_SIZE 64
28 #define MAX_FE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE 64
29 #define MAX_FE_NBR_DATA_LUMA_LINE_BUFFER_SIZE 640
30 #define MAX_FE_NBR_DATA_CB_LINE_BUFFER_SIZE 320
31 #define MAX_FE_NBR_DATA_CR_LINE_BUFFER_SIZE 320
33 #define MAX_SE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE (128 / 8)
34 #define MAX_SE_NBR_CTRL_LCU32_LINE_BUFFER_SIZE (128 / 8)
35 #define MAX_SE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE (128 / 8)
37 #define MAX_PE_NBR_DATA_LCU64_LINE_BUFFER_SIZE (64 * 2 * 3)
38 #define MAX_PE_NBR_DATA_LCU32_LINE_BUFFER_SIZE (32 * 2 * 3)
39 #define MAX_PE_NBR_DATA_LCU16_LINE_BUFFER_SIZE (16 * 2 * 3)
41 #define MAX_TILE_COLUMNS 32 /* 8K/256 */
43 #define VPP_CMD_MAX_SIZE BIT(20)
44 #define NUM_HW_PIC_BUF 32
45 #define BIN_BUFFER_THRESHOLD (1280 * 736)
46 #define H264D_MAX_SLICE 1800
47 /* sizeof(h264d_buftab_t) aligned to 256 */
48 #define SIZE_H264D_BUFTAB_T 256
49 /* sizeof(h264d_hw_pic_t) aligned to 32 */
50 #define SIZE_H264D_HW_PIC_T BIT(11)
51 #define SIZE_H264D_BSE_CMD_PER_BUF (32 * 4)
52 #define SIZE_H264D_VPP_CMD_PER_BUF 512
54 /* Line Buffer definitions, One for Luma and 1/2 for each Chroma */
55 #define SIZE_H264D_LB_FE_TOP_DATA(width, height) \
56 (MAX_FE_NBR_DATA_LUMA_LINE_BUFFER_SIZE * ALIGN((width), 16) * 3)
58 #define SIZE_H264D_LB_FE_TOP_CTRL(width, height) \
59 (MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE * (((width) + 15) >> 4))
61 #define SIZE_H264D_LB_FE_LEFT_CTRL(width, height) \
62 (MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE * (((height) + 15) >> 4))
64 #define SIZE_H264D_LB_SE_TOP_CTRL(width, height) \
65 (MAX_SE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE * (((width) + 15) >> 4))
67 #define SIZE_H264D_LB_SE_LEFT_CTRL(width, height) \
68 (MAX_SE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE * (((height) + 15) >> 4))
70 #define SIZE_H264D_LB_PE_TOP_DATA(width, height) \
71 (MAX_PE_NBR_DATA_LCU64_LINE_BUFFER_SIZE * (((width) + 15) >> 4))
73 #define SIZE_H264D_LB_VSP_TOP(width, height) (((((width) + 15) >> 4) << 7))
75 #define SIZE_H264D_LB_RECON_DMA_METADATA_WR(width, height) \
76 (ALIGN((height), 16) * 32)
78 #define SIZE_H264D_QP(width, height) \
79 ((((width) + 63) >> 6) * (((height) + 63) >> 6) * 128)
81 #define SIZE_HW_PIC(size_per_buf) (NUM_HW_PIC_BUF * (size_per_buf))
83 #define H264_CABAC_HDR_RATIO_HD_TOT 1
84 #define H264_CABAC_RES_RATIO_HD_TOT 3
87 * Some content need more bin buffer, but limit buffer
88 * size for high resolution
90 #define NUM_SLIST_BUF_H264 (256 + 32)
91 #define SIZE_SLIST_BUF_H264 512
92 #define LCU_MAX_SIZE_PELS 64
93 #define LCU_MIN_SIZE_PELS 16
94 #define SIZE_SEI_USERDATA 4096
96 #define H265D_MAX_SLICE 3600
97 #define SIZE_H265D_HW_PIC_T SIZE_H264D_HW_PIC_T
98 #define SIZE_H265D_BSE_CMD_PER_BUF (16 * sizeof(u32))
99 #define SIZE_H265D_VPP_CMD_PER_BUF 256
101 #define SIZE_H265D_LB_FE_TOP_DATA(width, height) \
102 (MAX_FE_NBR_DATA_LUMA_LINE_BUFFER_SIZE * (ALIGN(width, 64) + 8) * 2)
104 #define SIZE_H265D_LB_FE_TOP_CTRL(width, height) \
105 (MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE * \
106 (ALIGN(width, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS))
108 #define SIZE_H265D_LB_FE_LEFT_CTRL(width, height) \
109 (MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE * \
110 (ALIGN(height, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS))
112 #define SIZE_H265D_LB_SE_TOP_CTRL(width, height) \
113 ((LCU_MAX_SIZE_PELS / 8 * (128 / 8)) * (((width) + 15) >> 4))
115 static inline u32 size_h265d_lb_se_left_ctrl(u32 width, u32 height)
119 x = ((height + 16 - 1) / 8) * MAX_SE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE;
120 y = ((height + 32 - 1) / 8) * MAX_SE_NBR_CTRL_LCU32_LINE_BUFFER_SIZE;
121 z = ((height + 64 - 1) / 8) * MAX_SE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE;
123 return max3(x, y, z);
126 #define SIZE_H265D_LB_PE_TOP_DATA(width, height) \
127 (MAX_PE_NBR_DATA_LCU64_LINE_BUFFER_SIZE * \
128 (ALIGN(width, LCU_MIN_SIZE_PELS) / LCU_MIN_SIZE_PELS))
130 #define SIZE_H265D_LB_VSP_TOP(width, height) ((((width) + 63) >> 6) * 128)
132 #define SIZE_H265D_LB_VSP_LEFT(width, height) ((((height) + 63) >> 6) * 128)
134 #define SIZE_H265D_LB_RECON_DMA_METADATA_WR(width, height) \
135 SIZE_H264D_LB_RECON_DMA_METADATA_WR(width, height)
137 #define SIZE_H265D_QP(width, height) SIZE_H264D_QP(width, height)
139 #define H265_CABAC_HDR_RATIO_HD_TOT 2
140 #define H265_CABAC_RES_RATIO_HD_TOT 2
143 * Some content need more bin buffer, but limit buffer size
144 * for high resolution
146 #define SIZE_SLIST_BUF_H265 BIT(10)
147 #define NUM_SLIST_BUF_H265 (80 + 20)
148 #define H265_NUM_TILE_COL 32
149 #define H265_NUM_TILE_ROW 128
150 #define H265_NUM_TILE (H265_NUM_TILE_ROW * H265_NUM_TILE_COL + 1)
152 static inline u32 size_vpxd_lb_fe_left_ctrl(u32 width, u32 height)
156 x = ((height + 15) >> 4) * MAX_FE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE;
157 y = ((height + 31) >> 5) * MAX_FE_NBR_CTRL_LCU32_LINE_BUFFER_SIZE;
158 z = ((height + 63) >> 6) * MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE;
160 return max3(x, y, z);
163 #define SIZE_VPXD_LB_FE_TOP_CTRL(width, height) \
164 (((ALIGN(width, 64) + 8) * 10 * 2)) /* small line */
165 #define SIZE_VPXD_LB_SE_TOP_CTRL(width, height) \
166 ((((width) + 15) >> 4) * MAX_FE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE)
168 static inline u32 size_vpxd_lb_se_left_ctrl(u32 width, u32 height)
172 x = ((height + 15) >> 4) * MAX_SE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE;
173 y = ((height + 31) >> 5) * MAX_SE_NBR_CTRL_LCU32_LINE_BUFFER_SIZE;
174 z = ((height + 63) >> 6) * MAX_SE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE;
176 return max3(x, y, z);
179 #define SIZE_VPXD_LB_RECON_DMA_METADATA_WR(width, height) \
180 ALIGN((ALIGN(height, 16) / (4 / 2)) * 64, 32)
181 #define SIZE_VP8D_LB_FE_TOP_DATA(width, height) \
182 ((ALIGN(width, 16) + 8) * 10 * 2)
183 #define SIZE_VP9D_LB_FE_TOP_DATA(width, height) \
184 ((ALIGN(ALIGN(width, 16), 64) + 8) * 10 * 2)
185 #define SIZE_VP8D_LB_PE_TOP_DATA(width, height) \
186 ((ALIGN(width, 16) >> 4) * 64)
187 #define SIZE_VP9D_LB_PE_TOP_DATA(width, height) \
188 ((ALIGN(ALIGN(width, 16), 64) >> 6) * 176)
189 #define SIZE_VP8D_LB_VSP_TOP(width, height) \
190 (((ALIGN(width, 16) >> 4) * 64 / 2) + 256)
191 #define SIZE_VP9D_LB_VSP_TOP(width, height) \
192 (((ALIGN(ALIGN(width, 16), 64) >> 6) * 64 * 8) + 256)
194 #define HFI_IRIS2_VP9D_COMV_SIZE \
195 ((((8192 + 63) >> 6) * ((4320 + 63) >> 6) * 8 * 8 * 2 * 8))
197 #define VPX_DECODER_FRAME_CONCURENCY_LVL 2
198 #define VPX_DECODER_FRAME_BIN_HDR_BUDGET_RATIO_NUM 1
199 #define VPX_DECODER_FRAME_BIN_HDR_BUDGET_RATIO_DEN 2
200 #define VPX_DECODER_FRAME_BIN_RES_BUDGET_RATIO_NUM 3
201 #define VPX_DECODER_FRAME_BIN_RES_BUDGET_RATIO_DEN 2
203 #define VP8_NUM_FRAME_INFO_BUF (5 + 1)
204 #define VP9_NUM_FRAME_INFO_BUF 32
205 #define VP8_NUM_PROBABILITY_TABLE_BUF VP8_NUM_FRAME_INFO_BUF
206 #define VP9_NUM_PROBABILITY_TABLE_BUF (VP9_NUM_FRAME_INFO_BUF + 4)
207 #define VP8_PROB_TABLE_SIZE 3840
208 #define VP9_PROB_TABLE_SIZE 3840
210 #define VP9_UDC_HEADER_BUF_SIZE (3 * 128)
211 #define MAX_SUPERFRAME_HEADER_LEN 34
212 #define CCE_TILE_OFFSET_SIZE ALIGN(32 * 4 * 4, 32)
214 #define QMATRIX_SIZE (sizeof(u32) * 128 + 256)
215 #define MP2D_QPDUMP_SIZE 115200
216 #define HFI_IRIS2_ENC_PERSIST_SIZE 204800
217 #define HFI_MAX_COL_FRAME 6
218 #define HFI_VENUS_VENC_TRE_WB_BUFF_SIZE (65 << 4) /* in Bytes */
219 #define HFI_VENUS_VENC_DB_LINE_BUFF_PER_MB 512
220 #define HFI_VENUS_VPPSG_MAX_REGISTERS 2048
221 #define HFI_VENUS_WIDTH_ALIGNMENT 128
222 #define HFI_VENUS_WIDTH_TEN_BIT_ALIGNMENT 192
223 #define HFI_VENUS_HEIGHT_ALIGNMENT 32
225 #define SYSTEM_LAL_TILE10 192
226 #define NUM_MBS_720P (((1280 + 15) >> 4) * ((720 + 15) >> 4))
227 #define NUM_MBS_4K (((4096 + 15) >> 4) * ((2304 + 15) >> 4))
228 #define MB_SIZE_IN_PIXEL (16 * 16)
229 #define HDR10PLUS_PAYLOAD_SIZE 1024
230 #define HDR10_HIST_EXTRADATA_SIZE 4096
232 static u32 size_vpss_lb(u32 width, u32 height, u32 num_vpp_pipes)
234 u32 vpss_4tap_top_buffer_size, vpss_div2_top_buffer_size;
235 u32 vpss_4tap_left_buffer_size, vpss_div2_left_buffer_size;
236 u32 opb_wr_top_line_luma_buf_size, opb_wr_top_line_chroma_buf_size;
237 u32 opb_lb_wr_llb_y_buffer_size, opb_lb_wr_llb_uv_buffer_size;
238 u32 macrotiling_size;
241 vpss_4tap_top_buffer_size = 0;
242 vpss_div2_top_buffer_size = 0;
243 vpss_4tap_left_buffer_size = 0;
244 vpss_div2_left_buffer_size = 0;
246 macrotiling_size = 32;
247 opb_wr_top_line_luma_buf_size =
248 ALIGN(width, macrotiling_size) / macrotiling_size * 256;
249 opb_wr_top_line_luma_buf_size =
250 ALIGN(opb_wr_top_line_luma_buf_size, HFI_DMA_ALIGNMENT) +
251 (MAX_TILE_COLUMNS - 1) * 256;
252 opb_wr_top_line_luma_buf_size =
253 max(opb_wr_top_line_luma_buf_size, (32 * ALIGN(height, 16)));
254 opb_wr_top_line_chroma_buf_size = opb_wr_top_line_luma_buf_size;
255 opb_lb_wr_llb_y_buffer_size = ALIGN((ALIGN(height, 16) / 2) * 64, 32);
256 opb_lb_wr_llb_uv_buffer_size = opb_lb_wr_llb_y_buffer_size;
257 size = num_vpp_pipes *
258 2 * (vpss_4tap_top_buffer_size + vpss_div2_top_buffer_size) +
259 2 * (vpss_4tap_left_buffer_size + vpss_div2_left_buffer_size) +
260 opb_wr_top_line_luma_buf_size +
261 opb_wr_top_line_chroma_buf_size +
262 opb_lb_wr_llb_uv_buffer_size +
263 opb_lb_wr_llb_y_buffer_size;
268 static u32 size_h264d_hw_bin_buffer(u32 width, u32 height)
270 u32 size_yuv, size_bin_hdr, size_bin_res;
274 product = width * height;
275 size_yuv = (product <= BIN_BUFFER_THRESHOLD) ?
276 ((BIN_BUFFER_THRESHOLD * 3) >> 1) : ((product * 3) >> 1);
278 size_bin_hdr = size_yuv * H264_CABAC_HDR_RATIO_HD_TOT;
279 size_bin_res = size_yuv * H264_CABAC_RES_RATIO_HD_TOT;
280 size_bin_hdr = ALIGN(size_bin_hdr, HFI_DMA_ALIGNMENT);
281 size_bin_res = ALIGN(size_bin_res, HFI_DMA_ALIGNMENT);
282 size = size_bin_hdr + size_bin_res;
287 static u32 h264d_scratch_size(u32 width, u32 height, bool is_interlaced)
289 u32 aligned_width = ALIGN(width, 16);
290 u32 aligned_height = ALIGN(height, 16);
294 size = size_h264d_hw_bin_buffer(aligned_width, aligned_height);
299 static u32 size_h265d_hw_bin_buffer(u32 width, u32 height)
301 u32 size_yuv, size_bin_hdr, size_bin_res;
305 product = width * height;
306 size_yuv = (product <= BIN_BUFFER_THRESHOLD) ?
307 ((BIN_BUFFER_THRESHOLD * 3) >> 1) : ((product * 3) >> 1);
308 size_bin_hdr = size_yuv * H265_CABAC_HDR_RATIO_HD_TOT;
309 size_bin_res = size_yuv * H265_CABAC_RES_RATIO_HD_TOT;
310 size_bin_hdr = ALIGN(size_bin_hdr, HFI_DMA_ALIGNMENT);
311 size_bin_res = ALIGN(size_bin_res, HFI_DMA_ALIGNMENT);
312 size = size_bin_hdr + size_bin_res;
317 static u32 h265d_scratch_size(u32 width, u32 height, bool is_interlaced)
319 u32 aligned_width = ALIGN(width, 16);
320 u32 aligned_height = ALIGN(height, 16);
324 size = size_h265d_hw_bin_buffer(aligned_width, aligned_height);
329 static u32 vpxd_scratch_size(u32 width, u32 height, bool is_interlaced)
331 u32 aligned_width = ALIGN(width, 16);
332 u32 aligned_height = ALIGN(height, 16);
333 u32 size_yuv = aligned_width * aligned_height * 3 / 2;
336 if (!is_interlaced) {
337 u32 binbuffer1_size, binbufer2_size;
339 binbuffer1_size = max_t(u32, size_yuv,
340 ((BIN_BUFFER_THRESHOLD * 3) >> 1));
341 binbuffer1_size *= VPX_DECODER_FRAME_CONCURENCY_LVL *
342 VPX_DECODER_FRAME_BIN_HDR_BUDGET_RATIO_NUM /
343 VPX_DECODER_FRAME_BIN_HDR_BUDGET_RATIO_DEN;
344 binbufer2_size = max_t(u32, size_yuv,
345 ((BIN_BUFFER_THRESHOLD * 3) >> 1));
346 binbufer2_size *= VPX_DECODER_FRAME_CONCURENCY_LVL *
347 VPX_DECODER_FRAME_BIN_RES_BUDGET_RATIO_NUM /
348 VPX_DECODER_FRAME_BIN_RES_BUDGET_RATIO_DEN;
349 size = ALIGN(binbuffer1_size + binbufer2_size,
356 static u32 mpeg2d_scratch_size(u32 width, u32 height, bool is_interlaced)
361 static u32 calculate_enc_output_frame_size(u32 width, u32 height, u32 rc_type)
363 u32 aligned_width, aligned_height;
368 * Encoder output size calculation: 32 Align width/height
369 * For resolution < 720p : YUVsize * 4
370 * For resolution > 720p & <= 4K : YUVsize / 2
371 * For resolution > 4k : YUVsize / 4
372 * Initially frame_size = YUVsize * 2;
374 aligned_width = ALIGN(width, 32);
375 aligned_height = ALIGN(height, 32);
376 mbs_per_frame = (ALIGN(aligned_height, 16) *
377 ALIGN(aligned_width, 16)) / 256;
378 frame_size = width * height * 3;
380 if (mbs_per_frame < NUM_MBS_720P)
381 frame_size = frame_size << 1;
382 else if (mbs_per_frame <= NUM_MBS_4K)
383 frame_size = frame_size >> 2;
385 frame_size = frame_size >> 3;
387 if (rc_type == HFI_RATE_CONTROL_OFF || rc_type == HFI_RATE_CONTROL_CQ)
388 frame_size = frame_size << 1;
391 * In case of opaque color format bitdepth will be known
392 * with first ETB, buffers allocated already with 8 bit
393 * won't be sufficient for 10 bit
394 * calculate size considering 10-bit by default
395 * For 10-bit cases size = size * 1.25
400 return ALIGN(frame_size, SZ_4K);
403 static u32 calculate_enc_scratch_size(u32 width, u32 height, u32 work_mode,
404 u32 lcu_size, u32 num_vpp_pipes,
407 u32 aligned_width, aligned_height, bitstream_size;
408 u32 total_bitbin_buffers, size_single_pipe, bitbin_size;
409 u32 sao_bin_buffer_size, padded_bin_size, size;
411 aligned_width = ALIGN(width, lcu_size);
412 aligned_height = ALIGN(height, lcu_size);
414 calculate_enc_output_frame_size(width, height, rc_type);
416 bitstream_size = ALIGN(bitstream_size, HFI_DMA_ALIGNMENT);
418 if (work_mode == VIDC_WORK_MODE_2) {
419 total_bitbin_buffers = 3;
420 bitbin_size = bitstream_size * 17 / 10;
421 bitbin_size = ALIGN(bitbin_size, HFI_DMA_ALIGNMENT);
423 total_bitbin_buffers = 1;
424 bitstream_size = aligned_width * aligned_height * 3;
425 bitbin_size = ALIGN(bitstream_size, HFI_DMA_ALIGNMENT);
428 if (num_vpp_pipes > 2)
429 size_single_pipe = bitbin_size / 2;
431 size_single_pipe = bitbin_size;
433 size_single_pipe = ALIGN(size_single_pipe, HFI_DMA_ALIGNMENT);
434 sao_bin_buffer_size =
435 (64 * (((width + 32) * (height + 32)) >> 10)) + 384;
436 padded_bin_size = ALIGN(size_single_pipe, HFI_DMA_ALIGNMENT);
437 size_single_pipe = sao_bin_buffer_size + padded_bin_size;
438 size_single_pipe = ALIGN(size_single_pipe, HFI_DMA_ALIGNMENT);
439 bitbin_size = size_single_pipe * num_vpp_pipes;
440 size = ALIGN(bitbin_size, HFI_DMA_ALIGNMENT) *
441 total_bitbin_buffers + 512;
446 static u32 h264e_scratch_size(u32 width, u32 height, u32 work_mode,
447 u32 num_vpp_pipes, u32 rc_type)
449 return calculate_enc_scratch_size(width, height, work_mode, 16,
450 num_vpp_pipes, rc_type);
453 static u32 h265e_scratch_size(u32 width, u32 height, u32 work_mode,
454 u32 num_vpp_pipes, u32 rc_type)
456 return calculate_enc_scratch_size(width, height, work_mode, 32,
457 num_vpp_pipes, rc_type);
460 static u32 vp8e_scratch_size(u32 width, u32 height, u32 work_mode,
461 u32 num_vpp_pipes, u32 rc_type)
463 return calculate_enc_scratch_size(width, height, work_mode, 16,
464 num_vpp_pipes, rc_type);
467 static u32 hfi_iris2_h264d_comv_size(u32 width, u32 height,
468 u32 yuv_buf_min_count)
470 u32 frame_width_in_mbs = ((width + 15) >> 4);
471 u32 frame_height_in_mbs = ((height + 15) >> 4);
472 u32 col_mv_aligned_width = (frame_width_in_mbs << 7);
473 u32 col_zero_aligned_width = (frame_width_in_mbs << 2);
474 u32 col_zero_size = 0, size_colloc = 0, comv_size = 0;
476 col_mv_aligned_width = ALIGN(col_mv_aligned_width, 16);
477 col_zero_aligned_width = ALIGN(col_zero_aligned_width, 16);
479 col_zero_aligned_width * ((frame_height_in_mbs + 1) >> 1);
480 col_zero_size = ALIGN(col_zero_size, 64);
482 col_zero_size = ALIGN(col_zero_size, 512);
483 size_colloc = col_mv_aligned_width * ((frame_height_in_mbs + 1) >> 1);
484 size_colloc = ALIGN(size_colloc, 64);
486 size_colloc = ALIGN(size_colloc, 512);
487 size_colloc += (col_zero_size + SIZE_H264D_BUFTAB_T * 2);
488 comv_size = size_colloc * yuv_buf_min_count;
494 static u32 size_h264d_bse_cmd_buf(u32 height)
496 u32 aligned_height = ALIGN(height, 32);
498 return min_t(u32, (((aligned_height + 15) >> 4) * 3 * 4),
499 H264D_MAX_SLICE) * SIZE_H264D_BSE_CMD_PER_BUF;
502 static u32 size_h264d_vpp_cmd_buf(u32 height)
504 u32 aligned_height = ALIGN(height, 32);
507 size = min_t(u32, (((aligned_height + 15) >> 4) * 3 * 4),
508 H264D_MAX_SLICE) * SIZE_H264D_VPP_CMD_PER_BUF;
509 if (size > VPP_CMD_MAX_SIZE)
510 size = VPP_CMD_MAX_SIZE;
515 static u32 hfi_iris2_h264d_non_comv_size(u32 width, u32 height,
518 u32 size_bse, size_vpp, size;
520 size_bse = size_h264d_bse_cmd_buf(height);
521 size_vpp = size_h264d_vpp_cmd_buf(height);
523 ALIGN(size_bse, HFI_DMA_ALIGNMENT) +
524 ALIGN(size_vpp, HFI_DMA_ALIGNMENT) +
525 ALIGN(SIZE_HW_PIC(SIZE_H264D_HW_PIC_T), HFI_DMA_ALIGNMENT) +
526 ALIGN(SIZE_H264D_LB_FE_TOP_DATA(width, height),
528 ALIGN(SIZE_H264D_LB_FE_TOP_CTRL(width, height),
530 ALIGN(SIZE_H264D_LB_FE_LEFT_CTRL(width, height),
531 HFI_DMA_ALIGNMENT) * num_vpp_pipes +
532 ALIGN(SIZE_H264D_LB_SE_TOP_CTRL(width, height),
534 ALIGN(SIZE_H264D_LB_SE_LEFT_CTRL(width, height),
535 HFI_DMA_ALIGNMENT) * num_vpp_pipes +
536 ALIGN(SIZE_H264D_LB_PE_TOP_DATA(width, height),
538 ALIGN(SIZE_H264D_LB_VSP_TOP(width, height), HFI_DMA_ALIGNMENT) +
539 ALIGN(SIZE_H264D_LB_RECON_DMA_METADATA_WR(width, height),
540 HFI_DMA_ALIGNMENT) * 2 +
541 ALIGN(SIZE_H264D_QP(width, height), HFI_DMA_ALIGNMENT);
543 return ALIGN(size, HFI_DMA_ALIGNMENT);
546 static u32 size_h265d_bse_cmd_buf(u32 width, u32 height)
550 size = (ALIGN(width, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) *
551 (ALIGN(height, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) *
553 size = min_t(u32, size, H265D_MAX_SLICE + 1);
554 size = 2 * size * SIZE_H265D_BSE_CMD_PER_BUF;
556 return ALIGN(size, HFI_DMA_ALIGNMENT);
559 static u32 size_h265d_vpp_cmd_buf(u32 width, u32 height)
563 size = (ALIGN(width, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) *
564 (ALIGN(height, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) *
566 size = min_t(u32, size, H265D_MAX_SLICE + 1);
567 size = ALIGN(size, 4);
568 size = 2 * size * SIZE_H265D_VPP_CMD_PER_BUF;
569 size = ALIGN(size, HFI_DMA_ALIGNMENT);
570 if (size > VPP_CMD_MAX_SIZE)
571 size = VPP_CMD_MAX_SIZE;
576 static u32 hfi_iris2_h265d_comv_size(u32 width, u32 height,
577 u32 yuv_buf_count_min)
581 size = ALIGN(((((width + 15) >> 4) * ((height + 15) >> 4)) << 8), 512);
582 size *= yuv_buf_count_min;
588 static u32 hfi_iris2_h265d_non_comv_size(u32 width, u32 height,
591 u32 size_bse, size_vpp, size;
593 size_bse = size_h265d_bse_cmd_buf(width, height);
594 size_vpp = size_h265d_vpp_cmd_buf(width, height);
596 ALIGN(size_bse, HFI_DMA_ALIGNMENT) +
597 ALIGN(size_vpp, HFI_DMA_ALIGNMENT) +
598 ALIGN(NUM_HW_PIC_BUF * 20 * 22 * 4, HFI_DMA_ALIGNMENT) +
599 ALIGN(2 * sizeof(u16) *
600 (ALIGN(width, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) *
601 (ALIGN(height, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS),
603 ALIGN(SIZE_HW_PIC(SIZE_H265D_HW_PIC_T), HFI_DMA_ALIGNMENT) +
604 ALIGN(SIZE_H265D_LB_FE_TOP_DATA(width, height),
606 ALIGN(SIZE_H265D_LB_FE_TOP_CTRL(width, height),
608 ALIGN(SIZE_H265D_LB_FE_LEFT_CTRL(width, height),
609 HFI_DMA_ALIGNMENT) * num_vpp_pipes +
610 ALIGN(size_h265d_lb_se_left_ctrl(width, height),
611 HFI_DMA_ALIGNMENT) * num_vpp_pipes +
612 ALIGN(SIZE_H265D_LB_SE_TOP_CTRL(width, height),
614 ALIGN(SIZE_H265D_LB_PE_TOP_DATA(width, height),
616 ALIGN(SIZE_H265D_LB_VSP_TOP(width, height), HFI_DMA_ALIGNMENT) +
617 ALIGN(SIZE_H265D_LB_VSP_LEFT(width, height),
618 HFI_DMA_ALIGNMENT) * num_vpp_pipes +
619 ALIGN(SIZE_H265D_LB_RECON_DMA_METADATA_WR(width, height),
622 ALIGN(SIZE_H265D_QP(width, height), HFI_DMA_ALIGNMENT);
624 return ALIGN(size, HFI_DMA_ALIGNMENT);
627 static u32 hfi_iris2_vp8d_comv_size(u32 width, u32 height,
628 u32 yuv_min_buf_count)
630 return (((width + 15) >> 4) * ((height + 15) >> 4) * 8 * 2);
633 static u32 h264d_scratch1_size(u32 width, u32 height, u32 min_buf_count,
634 bool split_mode_enabled, u32 num_vpp_pipes)
636 u32 co_mv_size, nonco_mv_size, vpss_lb_size = 0;
638 co_mv_size = hfi_iris2_h264d_comv_size(width, height, min_buf_count);
639 nonco_mv_size = hfi_iris2_h264d_non_comv_size(width, height,
641 if (split_mode_enabled)
642 vpss_lb_size = size_vpss_lb(width, height, num_vpp_pipes);
644 return co_mv_size + nonco_mv_size + vpss_lb_size;
647 static u32 h265d_scratch1_size(u32 width, u32 height, u32 min_buf_count,
648 bool split_mode_enabled, u32 num_vpp_pipes)
650 u32 co_mv_size, nonco_mv_size, vpss_lb_size = 0;
652 co_mv_size = hfi_iris2_h265d_comv_size(width, height, min_buf_count);
653 nonco_mv_size = hfi_iris2_h265d_non_comv_size(width, height,
655 if (split_mode_enabled)
656 vpss_lb_size = size_vpss_lb(width, height, num_vpp_pipes);
658 return co_mv_size + nonco_mv_size + vpss_lb_size +
659 HDR10_HIST_EXTRADATA_SIZE;
662 static u32 vp8d_scratch1_size(u32 width, u32 height, u32 min_buf_count,
663 bool split_mode_enabled, u32 num_vpp_pipes)
665 u32 vpss_lb_size = 0, size;
667 size = hfi_iris2_vp8d_comv_size(width, height, 0);
668 size += ALIGN(size_vpxd_lb_fe_left_ctrl(width, height),
669 HFI_DMA_ALIGNMENT) * num_vpp_pipes +
670 ALIGN(size_vpxd_lb_se_left_ctrl(width, height),
671 HFI_DMA_ALIGNMENT) * num_vpp_pipes +
672 ALIGN(SIZE_VP8D_LB_VSP_TOP(width, height), HFI_DMA_ALIGNMENT) +
673 ALIGN(SIZE_VPXD_LB_FE_TOP_CTRL(width, height),
675 2 * ALIGN(SIZE_VPXD_LB_RECON_DMA_METADATA_WR(width, height),
677 ALIGN(SIZE_VPXD_LB_SE_TOP_CTRL(width, height),
679 ALIGN(SIZE_VP8D_LB_PE_TOP_DATA(width, height),
681 ALIGN(SIZE_VP8D_LB_FE_TOP_DATA(width, height),
683 if (split_mode_enabled)
684 vpss_lb_size = size_vpss_lb(width, height, num_vpp_pipes);
686 size += vpss_lb_size;
691 static u32 vp9d_scratch1_size(u32 width, u32 height, u32 min_buf_count,
692 bool split_mode_enabled, u32 num_vpp_pipes)
694 u32 vpss_lb_size = 0;
698 ALIGN(size_vpxd_lb_fe_left_ctrl(width, height),
699 HFI_DMA_ALIGNMENT) * num_vpp_pipes +
700 ALIGN(size_vpxd_lb_se_left_ctrl(width, height),
701 HFI_DMA_ALIGNMENT) * num_vpp_pipes +
702 ALIGN(SIZE_VP9D_LB_VSP_TOP(width, height), HFI_DMA_ALIGNMENT) +
703 ALIGN(SIZE_VPXD_LB_FE_TOP_CTRL(width, height),
705 2 * ALIGN(SIZE_VPXD_LB_RECON_DMA_METADATA_WR(width, height),
707 ALIGN(SIZE_VPXD_LB_SE_TOP_CTRL(width, height),
709 ALIGN(SIZE_VP9D_LB_PE_TOP_DATA(width, height),
711 ALIGN(SIZE_VP9D_LB_FE_TOP_DATA(width, height),
714 if (split_mode_enabled)
715 vpss_lb_size = size_vpss_lb(width, height, num_vpp_pipes);
717 size += vpss_lb_size + HDR10_HIST_EXTRADATA_SIZE;
722 static u32 mpeg2d_scratch1_size(u32 width, u32 height, u32 min_buf_count,
723 bool split_mode_enabled, u32 num_vpp_pipes)
725 u32 vpss_lb_size = 0;
729 ALIGN(size_vpxd_lb_fe_left_ctrl(width, height),
730 HFI_DMA_ALIGNMENT) * num_vpp_pipes +
731 ALIGN(size_vpxd_lb_se_left_ctrl(width, height),
732 HFI_DMA_ALIGNMENT) * num_vpp_pipes +
733 ALIGN(SIZE_VP8D_LB_VSP_TOP(width, height), HFI_DMA_ALIGNMENT) +
734 ALIGN(SIZE_VPXD_LB_FE_TOP_CTRL(width, height),
736 2 * ALIGN(SIZE_VPXD_LB_RECON_DMA_METADATA_WR(width, height),
738 ALIGN(SIZE_VPXD_LB_SE_TOP_CTRL(width, height),
740 ALIGN(SIZE_VP8D_LB_PE_TOP_DATA(width, height),
742 ALIGN(SIZE_VP8D_LB_FE_TOP_DATA(width, height),
745 if (split_mode_enabled)
746 vpss_lb_size = size_vpss_lb(width, height, num_vpp_pipes);
748 size += vpss_lb_size;
754 calculate_enc_scratch1_size(u32 width, u32 height, u32 lcu_size, u32 num_ref,
755 bool ten_bit, u32 num_vpp_pipes, bool is_h265)
757 u32 line_buf_ctrl_size, line_buf_data_size, leftline_buf_ctrl_size;
758 u32 line_buf_sde_size, sps_pps_slice_hdr, topline_buf_ctrl_size_FE;
759 u32 leftline_buf_ctrl_size_FE, line_buf_recon_pix_size;
760 u32 leftline_buf_recon_pix_size, lambda_lut_size, override_buffer_size;
761 u32 col_mv_buf_size, vpp_reg_buffer_size, ir_buffer_size;
762 u32 vpss_line_buf, leftline_buf_meta_recony, h265e_colrcbuf_size;
763 u32 h265e_framerc_bufsize, h265e_lcubitcnt_bufsize;
764 u32 h265e_lcubitmap_bufsize, se_stats_bufsize;
765 u32 bse_reg_buffer_size, bse_slice_cmd_buffer_size, slice_info_bufsize;
766 u32 line_buf_ctrl_size_buffid2, slice_cmd_buffer_size;
767 u32 width_lcu_num, height_lcu_num, width_coded, height_coded;
768 u32 frame_num_lcu, linebuf_meta_recon_uv, topline_bufsize_fe_1stg_sao;
769 u32 size, bit_depth, num_lcu_mb;
770 u32 vpss_line_buffer_size_1;
772 width_lcu_num = (width + lcu_size - 1) / lcu_size;
773 height_lcu_num = (height + lcu_size - 1) / lcu_size;
774 frame_num_lcu = width_lcu_num * height_lcu_num;
775 width_coded = width_lcu_num * lcu_size;
776 height_coded = height_lcu_num * lcu_size;
777 num_lcu_mb = (height_coded / lcu_size) *
778 ((width_coded + lcu_size * 8) / lcu_size);
779 slice_info_bufsize = 256 + (frame_num_lcu << 4);
780 slice_info_bufsize = ALIGN(slice_info_bufsize, HFI_DMA_ALIGNMENT);
781 line_buf_ctrl_size = ALIGN(width_coded, HFI_DMA_ALIGNMENT);
782 line_buf_ctrl_size_buffid2 = ALIGN(width_coded, HFI_DMA_ALIGNMENT);
784 bit_depth = ten_bit ? 10 : 8;
786 (((((bit_depth * width_coded + 1024) +
787 (HFI_DMA_ALIGNMENT - 1)) & (~(HFI_DMA_ALIGNMENT - 1))) * 1) +
788 (((((bit_depth * width_coded + 1024) >> 1) +
789 (HFI_DMA_ALIGNMENT - 1)) & (~(HFI_DMA_ALIGNMENT - 1))) * 2));
791 leftline_buf_ctrl_size = is_h265 ?
792 ((height_coded + 32) / 32 * 4 * 16) :
793 ((height_coded + 15) / 16 * 5 * 16);
795 if (num_vpp_pipes > 1) {
796 leftline_buf_ctrl_size += 512;
797 leftline_buf_ctrl_size =
798 ALIGN(leftline_buf_ctrl_size, 512) * num_vpp_pipes;
801 leftline_buf_ctrl_size =
802 ALIGN(leftline_buf_ctrl_size, HFI_DMA_ALIGNMENT);
803 leftline_buf_recon_pix_size = (((ten_bit + 1) * 2 *
804 (height_coded) + HFI_DMA_ALIGNMENT) +
805 (HFI_DMA_ALIGNMENT << (num_vpp_pipes - 1)) - 1) &
806 (~((HFI_DMA_ALIGNMENT << (num_vpp_pipes - 1)) - 1)) * 1;
808 topline_buf_ctrl_size_FE = is_h265 ? (64 * (width_coded >> 5)) :
809 (HFI_DMA_ALIGNMENT + 16 * (width_coded >> 4));
810 topline_buf_ctrl_size_FE =
811 ALIGN(topline_buf_ctrl_size_FE, HFI_DMA_ALIGNMENT);
812 leftline_buf_ctrl_size_FE =
813 (((HFI_DMA_ALIGNMENT + 64 * (height_coded >> 4)) +
814 (HFI_DMA_ALIGNMENT << (num_vpp_pipes - 1)) - 1) &
815 (~((HFI_DMA_ALIGNMENT << (num_vpp_pipes - 1)) - 1)) * 1) *
817 leftline_buf_meta_recony = (HFI_DMA_ALIGNMENT + 64 *
818 ((height_coded) / (8 * (ten_bit ? 4 : 8))));
819 leftline_buf_meta_recony =
820 ALIGN(leftline_buf_meta_recony, HFI_DMA_ALIGNMENT);
821 leftline_buf_meta_recony = leftline_buf_meta_recony * num_vpp_pipes;
822 linebuf_meta_recon_uv = (HFI_DMA_ALIGNMENT + 64 *
823 ((height_coded) / (4 * (ten_bit ? 4 : 8))));
824 linebuf_meta_recon_uv = ALIGN(linebuf_meta_recon_uv, HFI_DMA_ALIGNMENT);
825 linebuf_meta_recon_uv = linebuf_meta_recon_uv * num_vpp_pipes;
826 line_buf_recon_pix_size = ((ten_bit ? 3 : 2) * width_coded);
827 line_buf_recon_pix_size =
828 ALIGN(line_buf_recon_pix_size, HFI_DMA_ALIGNMENT);
829 slice_cmd_buffer_size = ALIGN(20480, HFI_DMA_ALIGNMENT);
830 sps_pps_slice_hdr = 2048 + 4096;
831 col_mv_buf_size = is_h265 ? (16 * ((frame_num_lcu << 2) + 32)) :
832 (3 * 16 * (width_lcu_num * height_lcu_num + 32));
834 ALIGN(col_mv_buf_size, HFI_DMA_ALIGNMENT) * (num_ref + 1);
835 h265e_colrcbuf_size =
836 (((width_lcu_num + 7) >> 3) * 16 * 2 * height_lcu_num);
837 if (num_vpp_pipes > 1)
838 h265e_colrcbuf_size =
839 ALIGN(h265e_colrcbuf_size, HFI_DMA_ALIGNMENT) *
842 h265e_colrcbuf_size = ALIGN(h265e_colrcbuf_size, HFI_DMA_ALIGNMENT) *
844 h265e_framerc_bufsize = (is_h265) ? (256 + 16 *
845 (14 + (((height_coded >> 5) + 7) >> 3))) :
846 (256 + 16 * (14 + (((height_coded >> 4) + 7) >> 3)));
847 h265e_framerc_bufsize *= 6; /* multiply by max numtilescol */
848 if (num_vpp_pipes > 1)
849 h265e_framerc_bufsize =
850 ALIGN(h265e_framerc_bufsize, HFI_DMA_ALIGNMENT) *
853 h265e_framerc_bufsize = ALIGN(h265e_framerc_bufsize, 512) *
855 h265e_lcubitcnt_bufsize = 256 + 4 * frame_num_lcu;
856 h265e_lcubitcnt_bufsize =
857 ALIGN(h265e_lcubitcnt_bufsize, HFI_DMA_ALIGNMENT);
858 h265e_lcubitmap_bufsize = 256 + (frame_num_lcu >> 3);
859 h265e_lcubitmap_bufsize =
860 ALIGN(h265e_lcubitmap_bufsize, HFI_DMA_ALIGNMENT);
861 line_buf_sde_size = 256 + 16 * (width_coded >> 4);
862 line_buf_sde_size = ALIGN(line_buf_sde_size, HFI_DMA_ALIGNMENT);
863 if ((width_coded * height_coded) > (4096 * 2160))
864 se_stats_bufsize = 0;
865 else if ((width_coded * height_coded) > (1920 * 1088))
866 se_stats_bufsize = (40 * 4 * frame_num_lcu + 256 + 256);
868 se_stats_bufsize = (1024 * frame_num_lcu + 256 + 256);
870 se_stats_bufsize = ALIGN(se_stats_bufsize, HFI_DMA_ALIGNMENT) * 2;
871 bse_slice_cmd_buffer_size = (((8192 << 2) + 7) & (~7)) * 6;
872 bse_reg_buffer_size = (((512 << 3) + 7) & (~7)) * 4;
873 vpp_reg_buffer_size =
874 (((HFI_VENUS_VPPSG_MAX_REGISTERS << 3) + 31) & (~31)) * 10;
875 lambda_lut_size = 256 * 11;
876 override_buffer_size = 16 * ((num_lcu_mb + 7) >> 3);
877 override_buffer_size =
878 ALIGN(override_buffer_size, HFI_DMA_ALIGNMENT) * 2;
879 ir_buffer_size = (((frame_num_lcu << 1) + 7) & (~7)) * 3;
880 vpss_line_buffer_size_1 = (((8192 >> 2) << 5) * num_vpp_pipes) + 64;
882 (((((max(width_coded, height_coded) + 3) >> 2) << 5) + 256) *
883 16) + vpss_line_buffer_size_1;
884 topline_bufsize_fe_1stg_sao = 16 * (width_coded >> 5);
885 topline_bufsize_fe_1stg_sao =
886 ALIGN(topline_bufsize_fe_1stg_sao, HFI_DMA_ALIGNMENT);
889 line_buf_ctrl_size + line_buf_data_size +
890 line_buf_ctrl_size_buffid2 + leftline_buf_ctrl_size +
891 vpss_line_buf + col_mv_buf_size + topline_buf_ctrl_size_FE +
892 leftline_buf_ctrl_size_FE + line_buf_recon_pix_size +
893 leftline_buf_recon_pix_size +
894 leftline_buf_meta_recony + linebuf_meta_recon_uv +
895 h265e_colrcbuf_size + h265e_framerc_bufsize +
896 h265e_lcubitcnt_bufsize + h265e_lcubitmap_bufsize +
898 topline_bufsize_fe_1stg_sao + override_buffer_size +
899 bse_reg_buffer_size + vpp_reg_buffer_size + sps_pps_slice_hdr +
900 slice_cmd_buffer_size + bse_slice_cmd_buffer_size +
901 ir_buffer_size + slice_info_bufsize + lambda_lut_size +
902 se_stats_bufsize + 1024;
907 static u32 h264e_scratch1_size(u32 width, u32 height, u32 num_ref, bool ten_bit,
910 return calculate_enc_scratch1_size(width, height, 16, num_ref, ten_bit,
911 num_vpp_pipes, false);
914 static u32 h265e_scratch1_size(u32 width, u32 height, u32 num_ref, bool ten_bit,
917 return calculate_enc_scratch1_size(width, height, 32, num_ref, ten_bit,
918 num_vpp_pipes, true);
921 static u32 vp8e_scratch1_size(u32 width, u32 height, u32 num_ref, bool ten_bit,
924 return calculate_enc_scratch1_size(width, height, 16, num_ref, ten_bit,
928 static u32 ubwc_metadata_plane_stride(u32 width, u32 metadata_stride_multi,
931 return ALIGN(((width + (tile_width_pels - 1)) / tile_width_pels),
932 metadata_stride_multi);
935 static u32 ubwc_metadata_plane_bufheight(u32 height, u32 metadata_height_multi,
936 u32 tile_height_pels)
938 return ALIGN(((height + (tile_height_pels - 1)) / tile_height_pels),
939 metadata_height_multi);
942 static u32 ubwc_metadata_plane_buffer_size(u32 metadata_stride,
943 u32 metadata_buf_height)
945 return ALIGN(metadata_stride * metadata_buf_height, SZ_4K);
948 static u32 enc_scratch2_size(u32 width, u32 height, u32 num_ref, bool ten_bit)
950 u32 aligned_width, aligned_height, chroma_height, ref_buf_height;
951 u32 luma_size, chroma_size;
952 u32 metadata_stride, meta_buf_height, meta_size_y, meta_size_c;
953 u32 ref_luma_stride_bytes, ref_chroma_height_bytes;
954 u32 ref_buf_size, ref_stride;
958 aligned_height = ALIGN(height, HFI_VENUS_HEIGHT_ALIGNMENT);
959 chroma_height = height >> 1;
960 chroma_height = ALIGN(chroma_height,
961 HFI_VENUS_HEIGHT_ALIGNMENT);
962 aligned_width = ALIGN(width, HFI_VENUS_WIDTH_ALIGNMENT);
964 ubwc_metadata_plane_stride(width, 64,
965 NV12_UBWC_Y_TILE_WIDTH);
967 ubwc_metadata_plane_bufheight(height, 16,
968 NV12_UBWC_Y_TILE_HEIGHT);
969 meta_size_y = ubwc_metadata_plane_buffer_size(metadata_stride,
971 meta_size_c = ubwc_metadata_plane_buffer_size(metadata_stride,
973 size = (aligned_height + chroma_height) * aligned_width +
974 meta_size_y + meta_size_c;
975 size = (size * (num_ref + 3)) + 4096;
977 ref_buf_height = (height + (HFI_VENUS_HEIGHT_ALIGNMENT - 1))
978 & (~(HFI_VENUS_HEIGHT_ALIGNMENT - 1));
979 ref_luma_stride_bytes =
980 ((width + SYSTEM_LAL_TILE10 - 1) / SYSTEM_LAL_TILE10) *
982 ref_stride = 4 * (ref_luma_stride_bytes / 3);
983 ref_stride = (ref_stride + (128 - 1)) & (~(128 - 1));
984 luma_size = ref_buf_height * ref_stride;
985 ref_chroma_height_bytes = (((height + 1) >> 1) +
986 (32 - 1)) & (~(32 - 1));
987 chroma_size = ref_stride * ref_chroma_height_bytes;
988 luma_size = (luma_size + (SZ_4K - 1)) & (~(SZ_4K - 1));
989 chroma_size = (chroma_size + (SZ_4K - 1)) & (~(SZ_4K - 1));
990 ref_buf_size = luma_size + chroma_size;
992 ubwc_metadata_plane_stride(width,
993 METADATA_STRIDE_MULTIPLE,
994 TP10_UBWC_Y_TILE_WIDTH);
996 ubwc_metadata_plane_bufheight(height,
997 METADATA_HEIGHT_MULTIPLE,
998 TP10_UBWC_Y_TILE_HEIGHT);
999 meta_size_y = ubwc_metadata_plane_buffer_size(metadata_stride,
1001 meta_size_c = ubwc_metadata_plane_buffer_size(metadata_stride,
1003 size = ref_buf_size + meta_size_y + meta_size_c;
1004 size = (size * (num_ref + 3)) + 4096;
1010 static u32 enc_persist_size(void)
1012 return HFI_IRIS2_ENC_PERSIST_SIZE;
1015 static u32 h264d_persist1_size(void)
1017 return ALIGN((SIZE_SLIST_BUF_H264 * NUM_SLIST_BUF_H264
1018 + NUM_HW_PIC_BUF * SIZE_SEI_USERDATA), HFI_DMA_ALIGNMENT);
1021 static u32 h265d_persist1_size(void)
1023 return ALIGN((SIZE_SLIST_BUF_H265 * NUM_SLIST_BUF_H265 + H265_NUM_TILE
1024 * sizeof(u32) + NUM_HW_PIC_BUF * SIZE_SEI_USERDATA), HFI_DMA_ALIGNMENT);
1027 static u32 vp8d_persist1_size(void)
1029 return ALIGN(VP8_NUM_PROBABILITY_TABLE_BUF * VP8_PROB_TABLE_SIZE,
1033 static u32 vp9d_persist1_size(void)
1036 ALIGN(VP9_NUM_PROBABILITY_TABLE_BUF * VP9_PROB_TABLE_SIZE,
1037 HFI_DMA_ALIGNMENT) +
1038 ALIGN(HFI_IRIS2_VP9D_COMV_SIZE, HFI_DMA_ALIGNMENT) +
1039 ALIGN(MAX_SUPERFRAME_HEADER_LEN, HFI_DMA_ALIGNMENT) +
1040 ALIGN(VP9_UDC_HEADER_BUF_SIZE, HFI_DMA_ALIGNMENT) +
1041 ALIGN(VP9_NUM_FRAME_INFO_BUF * CCE_TILE_OFFSET_SIZE,
1045 static u32 mpeg2d_persist1_size(void)
1047 return QMATRIX_SIZE + MP2D_QPDUMP_SIZE;
1050 struct dec_bufsize_ops {
1051 u32 (*scratch)(u32 width, u32 height, bool is_interlaced);
1052 u32 (*scratch1)(u32 width, u32 height, u32 min_buf_count,
1053 bool split_mode_enabled, u32 num_vpp_pipes);
1054 u32 (*persist1)(void);
1057 struct enc_bufsize_ops {
1058 u32 (*scratch)(u32 width, u32 height, u32 work_mode, u32 num_vpp_pipes,
1060 u32 (*scratch1)(u32 width, u32 height, u32 num_ref, bool ten_bit,
1062 u32 (*scratch2)(u32 width, u32 height, u32 num_ref, bool ten_bit);
1063 u32 (*persist)(void);
1066 static struct dec_bufsize_ops dec_h264_ops = {
1067 .scratch = h264d_scratch_size,
1068 .scratch1 = h264d_scratch1_size,
1069 .persist1 = h264d_persist1_size,
1072 static struct dec_bufsize_ops dec_h265_ops = {
1073 .scratch = h265d_scratch_size,
1074 .scratch1 = h265d_scratch1_size,
1075 .persist1 = h265d_persist1_size,
1078 static struct dec_bufsize_ops dec_vp8_ops = {
1079 .scratch = vpxd_scratch_size,
1080 .scratch1 = vp8d_scratch1_size,
1081 .persist1 = vp8d_persist1_size,
1084 static struct dec_bufsize_ops dec_vp9_ops = {
1085 .scratch = vpxd_scratch_size,
1086 .scratch1 = vp9d_scratch1_size,
1087 .persist1 = vp9d_persist1_size,
1090 static struct dec_bufsize_ops dec_mpeg2_ops = {
1091 .scratch = mpeg2d_scratch_size,
1092 .scratch1 = mpeg2d_scratch1_size,
1093 .persist1 = mpeg2d_persist1_size,
1096 static struct enc_bufsize_ops enc_h264_ops = {
1097 .scratch = h264e_scratch_size,
1098 .scratch1 = h264e_scratch1_size,
1099 .scratch2 = enc_scratch2_size,
1100 .persist = enc_persist_size,
1103 static struct enc_bufsize_ops enc_h265_ops = {
1104 .scratch = h265e_scratch_size,
1105 .scratch1 = h265e_scratch1_size,
1106 .scratch2 = enc_scratch2_size,
1107 .persist = enc_persist_size,
1110 static struct enc_bufsize_ops enc_vp8_ops = {
1111 .scratch = vp8e_scratch_size,
1112 .scratch1 = vp8e_scratch1_size,
1113 .scratch2 = enc_scratch2_size,
1114 .persist = enc_persist_size,
1118 calculate_dec_input_frame_size(u32 width, u32 height, u32 codec,
1119 u32 max_mbs_per_frame, u32 buffer_size_limit)
1121 u32 frame_size, num_mbs;
1123 u32 base_res_mbs = NUM_MBS_4K;
1126 * Decoder input size calculation:
1127 * If clip is 8k buffer size is calculated for 8k : 8k mbs/4
1128 * For 8k cases we expect width/height to be set always.
1129 * In all other cases size is calculated for 4k:
1130 * 4k mbs for VP8/VP9 and 4k/2 for remaining codecs
1132 num_mbs = (ALIGN(height, 16) * ALIGN(width, 16)) / 256;
1133 if (num_mbs > NUM_MBS_4K) {
1135 base_res_mbs = max_mbs_per_frame;
1137 base_res_mbs = NUM_MBS_4K;
1138 if (codec == V4L2_PIX_FMT_VP9)
1144 frame_size = base_res_mbs * MB_SIZE_IN_PIXEL * 3 / 2 / div_factor;
1146 /* multiply by 10/8 (1.25) to get size for 10 bit case */
1147 if (codec == V4L2_PIX_FMT_VP9 || codec == V4L2_PIX_FMT_HEVC)
1148 frame_size = frame_size + (frame_size >> 2);
1150 if (buffer_size_limit && buffer_size_limit < frame_size)
1151 frame_size = buffer_size_limit;
1153 return ALIGN(frame_size, SZ_4K);
1156 static int output_buffer_count(u32 session_type, u32 codec)
1158 u32 output_min_count;
1160 if (session_type == VIDC_SESSION_TYPE_DEC) {
1162 case V4L2_PIX_FMT_MPEG2:
1163 case V4L2_PIX_FMT_VP8:
1164 output_min_count = 6;
1166 case V4L2_PIX_FMT_VP9:
1167 output_min_count = 11;
1169 case V4L2_PIX_FMT_H264:
1170 case V4L2_PIX_FMT_HEVC:
1172 output_min_count = 18;
1176 output_min_count = MIN_ENC_OUTPUT_BUFFERS;
1179 return output_min_count;
1182 static int bufreq_dec(struct hfi_plat_buffers_params *params, u32 buftype,
1183 struct hfi_buffer_requirements *bufreq)
1185 enum hfi_version version = params->version;
1186 u32 codec = params->codec;
1187 u32 width = params->width, height = params->height, out_min_count;
1188 u32 out_width = params->out_width, out_height = params->out_height;
1189 struct dec_bufsize_ops *dec_ops;
1190 bool is_secondary_output = params->dec.is_secondary_output;
1191 bool is_interlaced = params->dec.is_interlaced;
1192 u32 max_mbs_per_frame = params->dec.max_mbs_per_frame;
1193 u32 buffer_size_limit = params->dec.buffer_size_limit;
1194 u32 num_vpp_pipes = params->num_vpp_pipes;
1197 case V4L2_PIX_FMT_H264:
1198 dec_ops = &dec_h264_ops;
1200 case V4L2_PIX_FMT_HEVC:
1201 dec_ops = &dec_h265_ops;
1203 case V4L2_PIX_FMT_VP8:
1204 dec_ops = &dec_vp8_ops;
1206 case V4L2_PIX_FMT_VP9:
1207 dec_ops = &dec_vp9_ops;
1209 case V4L2_PIX_FMT_MPEG2:
1210 dec_ops = &dec_mpeg2_ops;
1216 out_min_count = output_buffer_count(VIDC_SESSION_TYPE_DEC, codec);
1217 /* Max of driver and FW count */
1218 out_min_count = max(out_min_count, hfi_bufreq_get_count_min(bufreq, version));
1220 bufreq->type = buftype;
1221 bufreq->region_size = 0;
1222 bufreq->count_actual = 1;
1223 hfi_bufreq_set_count_min(bufreq, version, 1);
1224 hfi_bufreq_set_hold_count(bufreq, version, 1);
1225 bufreq->contiguous = 1;
1226 bufreq->alignment = 256;
1228 if (buftype == HFI_BUFFER_INPUT) {
1229 hfi_bufreq_set_count_min(bufreq, version, MIN_INPUT_BUFFERS);
1231 calculate_dec_input_frame_size(width, height, codec,
1234 } else if (buftype == HFI_BUFFER_OUTPUT || buftype == HFI_BUFFER_OUTPUT2) {
1235 hfi_bufreq_set_count_min(bufreq, version, out_min_count);
1237 venus_helper_get_framesz_raw(params->hfi_color_fmt,
1238 out_width, out_height);
1239 if (buftype == HFI_BUFFER_OUTPUT &&
1240 params->dec.is_secondary_output)
1242 venus_helper_get_framesz_raw(params->hfi_dpb_color_fmt,
1243 out_width, out_height);
1244 } else if (buftype == HFI_BUFFER_INTERNAL_SCRATCH(version)) {
1245 bufreq->size = dec_ops->scratch(width, height, is_interlaced);
1246 } else if (buftype == HFI_BUFFER_INTERNAL_SCRATCH_1(version)) {
1247 bufreq->size = dec_ops->scratch1(width, height, VB2_MAX_FRAME,
1248 is_secondary_output,
1250 } else if (buftype == HFI_BUFFER_INTERNAL_PERSIST_1) {
1251 bufreq->size = dec_ops->persist1();
1259 static int bufreq_enc(struct hfi_plat_buffers_params *params, u32 buftype,
1260 struct hfi_buffer_requirements *bufreq)
1262 enum hfi_version version = params->version;
1263 struct enc_bufsize_ops *enc_ops;
1264 u32 width = params->width;
1265 u32 height = params->height;
1266 bool is_tenbit = params->enc.is_tenbit;
1267 u32 num_bframes = params->enc.num_b_frames;
1268 u32 codec = params->codec;
1269 u32 work_mode = params->enc.work_mode;
1270 u32 rc_type = params->enc.rc_type;
1271 u32 num_vpp_pipes = params->num_vpp_pipes;
1272 u32 num_ref, count_min;
1275 case V4L2_PIX_FMT_H264:
1276 enc_ops = &enc_h264_ops;
1278 case V4L2_PIX_FMT_HEVC:
1279 enc_ops = &enc_h265_ops;
1281 case V4L2_PIX_FMT_VP8:
1282 enc_ops = &enc_vp8_ops;
1288 num_ref = num_bframes > 0 ? num_bframes + 1 : 1;
1290 bufreq->type = buftype;
1291 bufreq->region_size = 0;
1292 bufreq->count_actual = 1;
1293 hfi_bufreq_set_count_min(bufreq, version, 1);
1294 hfi_bufreq_set_hold_count(bufreq, version, 1);
1295 bufreq->contiguous = 1;
1296 bufreq->alignment = 256;
1298 if (buftype == HFI_BUFFER_INPUT) {
1299 hfi_bufreq_set_count_min(bufreq, version, MIN_INPUT_BUFFERS);
1301 venus_helper_get_framesz_raw(params->hfi_color_fmt,
1303 } else if (buftype == HFI_BUFFER_OUTPUT ||
1304 buftype == HFI_BUFFER_OUTPUT2) {
1305 count_min = output_buffer_count(VIDC_SESSION_TYPE_ENC, codec);
1306 hfi_bufreq_set_count_min(bufreq, version, count_min);
1307 bufreq->size = calculate_enc_output_frame_size(width, height,
1309 } else if (buftype == HFI_BUFFER_INTERNAL_SCRATCH(version)) {
1310 bufreq->size = enc_ops->scratch(width, height, work_mode,
1311 num_vpp_pipes, rc_type);
1312 } else if (buftype == HFI_BUFFER_INTERNAL_SCRATCH_1(version)) {
1313 bufreq->size = enc_ops->scratch1(width, height, num_ref,
1314 is_tenbit, num_vpp_pipes);
1315 } else if (buftype == HFI_BUFFER_INTERNAL_SCRATCH_2(version)) {
1316 bufreq->size = enc_ops->scratch2(width, height, num_ref,
1318 } else if (buftype == HFI_BUFFER_INTERNAL_PERSIST) {
1319 bufreq->size = enc_ops->persist();
1327 int hfi_plat_bufreq_v6(struct hfi_plat_buffers_params *params, u32 session_type,
1328 u32 buftype, struct hfi_buffer_requirements *bufreq)
1330 if (session_type == VIDC_SESSION_TYPE_DEC)
1331 return bufreq_dec(params, buftype, bufreq);
1333 return bufreq_enc(params, buftype, bufreq);