2 * Support for Intel Camera Imaging ISP subsystem.
3 * Copyright (c) 2015, Intel Corporation.
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 #ifndef _REF_VECTOR_FUNC_H_INCLUDED_
16 #define _REF_VECTOR_FUNC_H_INCLUDED_
18 #include "storage_class.h"
20 #ifdef INLINE_VECTOR_FUNC
21 #define STORAGE_CLASS_REF_VECTOR_FUNC_H STORAGE_CLASS_INLINE
22 #define STORAGE_CLASS_REF_VECTOR_DATA_H STORAGE_CLASS_INLINE_DATA
23 #else /* INLINE_VECTOR_FUNC */
24 #define STORAGE_CLASS_REF_VECTOR_FUNC_H STORAGE_CLASS_EXTERN
25 #define STORAGE_CLASS_REF_VECTOR_DATA_H STORAGE_CLASS_EXTERN_DATA
26 #endif /* INLINE_VECTOR_FUNC */
29 #include "ref_vector_func_types.h"
31 /** @brief Doubling multiply accumulate with saturation
33 * @param[in] acc accumulator
34 * @param[in] a multiply input
35 * @param[in] b multiply input
39 * This function will do a doubling multiply ont
40 * inputs a and b, and will add the result to acc.
41 * in case of an overflow of acc, it will saturate.
43 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector2w OP_1w_maccd_sat(
48 /** @brief Doubling multiply accumulate
50 * @param[in] acc accumulator
51 * @param[in] a multiply input
52 * @param[in] b multiply input
56 * This function will do a doubling multiply ont
57 * inputs a and b, and will add the result to acc.
58 * in case of overflow it will not saturate but wrap around.
60 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector2w OP_1w_maccd(
65 /** @brief Re-aligning multiply
67 * @param[in] a multiply input
68 * @param[in] b multiply input
69 * @param[in] shift shift amount
71 * @return (a*b)>>shift
73 * This function will multiply a with b, followed by a right
74 * shift with rounding. the result is saturated and casted
75 * to single precision.
77 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_mul_realigning(
82 /** @brief Leading bit index
86 * @return index of the leading bit of each element
88 * This function finds the index of leading one (set) bit of the
89 * input. The index starts with 0 for the LSB and can go upto
90 * ISP_VEC_ELEMBITS-1 for the MSB. For an input equal to zero,
91 * the returned index is -1.
93 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_lod(
96 /** @brief Config Unit Input Processing
99 * @param[in] input_scale input scaling factor
100 * @param[in] input_offset input offset factor
102 * @return scaled & offset added input clamped to MAXVALUE
104 * As part of input processing for piecewise linear estimation config unit,
105 * this function will perform scaling followed by adding offset and
106 * then clamping to the MAX InputValue
107 * It asserts -MAX_SHIFT_1W <= input_scale <= MAX_SHIFT_1W, and
108 * -MAX_SHIFT_1W <= input_offset <= MAX_SHIFT_1W
110 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_input_scaling_offset_clamping(
112 tscalar1w_5bit_signed input_scale,
113 tscalar1w_5bit_signed input_offset);
115 /** @brief Config Unit Output Processing
117 * @param[in] a output
118 * @param[in] output_scale output scaling factor
120 * @return scaled & clamped output value
122 * As part of output processing for piecewise linear estimation config unit,
123 * This function will perform scaling and then clamping to output
125 * It asserts -MAX_SHIFT_1W <= output_scale <= MAX_SHIFT_1W
127 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_output_scaling_clamping(
129 tscalar1w_5bit_signed output_scale);
131 /** @brief Config Unit Piecewiselinear estimation
134 * @param[in] config_points config parameter structure
136 * @return piecewise linear estimated output
138 * Given a set of N points {(x1,y1),()x2,y2), ....,(xn,yn)}, to find
139 * the functional value at an arbitrary point around the input set,
140 * this function will perform input processing followed by piecewise
141 * linear estimation and then output processing to yield the final value.
143 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_piecewise_estimation(
145 ref_config_points config_points);
147 /** @brief Fast Config Unit
150 * @param[in] init_vectors LUT data structure
152 * @return piecewise linear estimated output
153 * This block gets an input x and a set of input configuration points stored in a look-up
154 * table of 32 elements. First, the x input is clipped to be within the range [x1, xn+1].
155 * Then, it computes the interval in which the input lies. Finally, the output is computed
156 * by performing linear interpolation based on the interval properties (i.e. x_prev, slope,
157 * and offset). This block assumes that the points are equally spaced and that the interval
158 * size is a power of 2.
160 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_XCU(
162 xcu_ref_init_vectors init_vectors);
168 * @param[in] init_vectors LUT data structure
170 * @return logarithmic piecewise linear estimated output.
171 * This block gets an input x and a set of input configuration points stored in a look-up
172 * table of 32 elements. It computes the interval in which the input lies.
173 * Then output is computed by performing linear interpolation based on the interval
174 * properties (i.e. x_prev, slope, * and offset).
175 * This BBB assumes spacing x-coordinates of "init vectors" increase exponentially as
177 * interval size : 2^0 2^1 2^2 2^3
178 * x-coordinates: x0<--->x1<---->x2<---->x3<---->
180 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_LXCU(
182 xcu_ref_init_vectors init_vectors);
186 * @param[in] coring_vec Amount of coring based on brightness level
187 * @param[in] filt_input Vector of input pixels on which Coring is applied
188 * @param[in] m_CnrCoring0 Coring Level0
190 * @return vector of filtered pixels after coring is applied
192 * This function will perform adaptive coring based on brightness level to
195 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w coring(
196 tvector1w coring_vec,
197 tvector1w filt_input,
198 tscalar1w m_CnrCoring0 );
200 /** @brief Normalised FIR with coefficients [3,4,1]
202 * @param[in] m 1x3 matrix with pixels
204 * @return filtered output
206 * This function will calculate the
207 * Normalised FIR with coefficients [3,4,1],
208 *-5dB at Fs/2, -90 degree phase shift (quarter pixel)
210 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_5dB_m90_nrm (
211 const s_1w_1x3_matrix m);
213 /** @brief Normalised FIR with coefficients [1,4,3]
215 * @param[in] m 1x3 matrix with pixels
217 * @return filtered output
219 * This function will calculate the
220 * Normalised FIR with coefficients [1,4,3],
221 *-5dB at Fs/2, +90 degree phase shift (quarter pixel)
223 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_5dB_p90_nrm (
224 const s_1w_1x3_matrix m);
226 /** @brief Normalised FIR with coefficients [1,2,1]
228 * @param[in] m 1x3 matrix with pixels
230 * @return filtered output
232 * This function will calculate the
233 * Normalised FIR with coefficients [1,2,1], -6dB at Fs/2
235 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_6dB_nrm (
236 const s_1w_1x3_matrix m);
238 /** @brief Normalised FIR with coefficients [13,16,3]
240 * @param[in] m 1x3 matrix with pixels
242 * @return filtered output
244 * This function will calculate the
245 * Normalised FIR with coefficients [13,16,3],
247 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_6dB_nrm_ph0 (
248 const s_1w_1x3_matrix m);
250 /** @brief Normalised FIR with coefficients [9,16,7]
252 * @param[in] m 1x3 matrix with pixels
254 * @return filtered output
256 * This function will calculate the
257 * Normalised FIR with coefficients [9,16,7],
259 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_6dB_nrm_ph1 (
260 const s_1w_1x3_matrix m);
262 /** @brief Normalised FIR with coefficients [5,16,11]
264 * @param[in] m 1x3 matrix with pixels
266 * @return filtered output
268 * This function will calculate the
269 * Normalised FIR with coefficients [5,16,11],
271 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_6dB_nrm_ph2 (
272 const s_1w_1x3_matrix m);
274 /** @brief Normalised FIR with coefficients [1,16,15]
276 * @param[in] m 1x3 matrix with pixels
278 * @return filtered output
280 * This function will calculate the
281 * Normalised FIR with coefficients [1,16,15],
283 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_6dB_nrm_ph3 (
284 const s_1w_1x3_matrix m);
286 /** @brief Normalised FIR with programable phase shift
288 * @param[in] m 1x3 matrix with pixels
289 * @param[in] coeff phase shift
291 * @return filtered output
293 * This function will calculate the
294 * Normalised FIR with coefficients [8-coeff,16,8+coeff],
296 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_6dB_nrm_calc_coeff (
297 const s_1w_1x3_matrix m, tscalar1w_3bit coeff);
299 /** @brief 3 tap FIR with coefficients [1,1,1]
301 * @param[in] m 1x3 matrix with pixels
303 * @return filtered output
305 * This function will calculate the
306 * FIR with coefficients [1,1,1], -9dB at Fs/2 normalized with factor 1/2
308 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_9dB_nrm (
309 const s_1w_1x3_matrix m);
312 /** @brief symmetric 3 tap FIR acts as LPF or BSF
314 * @param[in] m 1x3 matrix with pixels
315 * @param[in] k filter coefficient shift
316 * @param[in] bsf_flag 1 for BSF and 0 for LPF
318 * @return filtered output
320 * This function performs variable coefficient symmetric 3 tap filter which can
321 * be either used as Low Pass Filter or Band Stop Filter.
322 * Symmetric 3tap tap filter with DC gain 1 has filter coefficients [a, 1-2a, a]
323 * For LPF 'a' can be approximated as (1 - 2^(-k))/4, k = 0, 1, 2, ...
324 * and filter output can be approximated as:
325 * out_LPF = ((v00 + v02) - ((v00 + v02) >> k) + (2 * (v01 + (v01 >> k)))) >> 2
326 * For BSF 'a' can be approximated as (1 + 2^(-k))/4, k = 0, 1, 2, ...
327 * and filter output can be approximated as:
328 * out_BSF = ((v00 + v02) + ((v00 + v02) >> k) + (2 * (v01 - (v01 >> k)))) >> 2
329 * For a given filter coefficient shift 'k' and bsf_flag this function
330 * behaves either as LPF or BSF.
331 * All computation is done using 1w arithmetic and implementation does not use
332 * any multiplication.
334 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
335 sym_fir1x3m_lpf_bsf(s_1w_1x3_matrix m,
337 tscalar_bool bsf_flag);
340 /** @brief Normalised 2D FIR with coefficients [1;2;1] * [1,2,1]
342 * @param[in] m 3x3 matrix with pixels
344 * @return filtered output
346 * This function will calculate the
347 * Normalised FIR with coefficients [1;2;1] * [1,2,1]
348 * Unity gain filter through repeated scaling and rounding
349 * - 6 rotate operations per output
350 * - 8 vector operations per output
352 * 14 total operations
354 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir3x3m_6dB_nrm (
355 const s_1w_3x3_matrix m);
357 /** @brief Normalised 2D FIR with coefficients [1;1;1] * [1,1,1]
359 * @param[in] m 3x3 matrix with pixels
361 * @return filtered output
363 * This function will calculate the
364 * Normalised FIR with coefficients [1;1;1] * [1,1,1]
366 * (near) Unity gain filter through repeated scaling and rounding
367 * - 6 rotate operations per output
368 * - 8 vector operations per output
372 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir3x3m_9dB_nrm (
373 const s_1w_3x3_matrix m);
375 /** @brief Normalised dual output 2D FIR with coefficients [1;2;1] * [1,2,1]
377 * @param[in] m 4x3 matrix with pixels
379 * @return two filtered outputs (2x1 matrix)
381 * This function will calculate the
382 * Normalised FIR with coefficients [1;2;1] * [1,2,1]
383 * and produce two outputs (vertical)
384 * Unity gain filter through repeated scaling and rounding
385 * compute two outputs per call to re-use common intermediates
386 * - 4 rotate operations per output
387 * - 6 vector operations per output (alternative possible, but in this
388 * form it's not obvious to re-use variables)
390 * 10 total operations
392 STORAGE_CLASS_REF_VECTOR_FUNC_H s_1w_2x1_matrix fir3x3m_6dB_out2x1_nrm (
393 const s_1w_4x3_matrix m);
395 /** @brief Normalised dual output 2D FIR with coefficients [1;1;1] * [1,1,1]
397 * @param[in] m 4x3 matrix with pixels
399 * @return two filtered outputs (2x1 matrix)
401 * This function will calculate the
402 * Normalised FIR with coefficients [1;1;1] * [1,1,1]
403 * and produce two outputs (vertical)
404 * (near) Unity gain filter through repeated scaling and rounding
405 * compute two outputs per call to re-use common intermediates
406 * - 4 rotate operations per output
407 * - 7 vector operations per output (alternative possible, but in this
408 * form it's not obvious to re-use variables)
410 * 11 total operations
412 STORAGE_CLASS_REF_VECTOR_FUNC_H s_1w_2x1_matrix fir3x3m_9dB_out2x1_nrm (
413 const s_1w_4x3_matrix m);
415 /** @brief Normalised 2D FIR 5x5
417 * @param[in] m 5x5 matrix with pixels
419 * @return filtered output
421 * This function will calculate the
422 * Normalised FIR with coefficients [1;1;1] * [1;2;1] * [1,2,1] * [1,1,1]
423 * and produce a filtered output
424 * (near) Unity gain filter through repeated scaling and rounding
425 * - 20 rotate operations per output
426 * - 28 vector operations per output
428 * 48 total operations
430 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir5x5m_15dB_nrm (
431 const s_1w_5x5_matrix m);
433 /** @brief Normalised FIR 1x5
435 * @param[in] m 1x5 matrix with pixels
437 * @return filtered output
439 * This function will calculate the
440 * Normalised FIR with coefficients [1,2,1] * [1,1,1] = [1,4,6,4,1]
441 * and produce a filtered output
442 * (near) Unity gain filter through repeated scaling and rounding
443 * - 4 rotate operations per output
444 * - 5 vector operations per output
448 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x5m_12dB_nrm (
449 const s_1w_1x5_matrix m);
451 /** @brief Normalised 2D FIR 5x5
453 * @param[in] m 5x5 matrix with pixels
455 * @return filtered output
457 * This function will calculate the
458 * Normalised FIR with coefficients [1;2;1] * [1;2;1] * [1,2,1] * [1,2,1]
459 * and produce a filtered output
460 * (near) Unity gain filter through repeated scaling and rounding
461 * - 20 rotate operations per output
462 * - 30 vector operations per output
464 * 50 total operations
466 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir5x5m_12dB_nrm (
467 const s_1w_5x5_matrix m);
469 /** @brief Approximate averaging FIR 1x5
471 * @param[in] m 1x5 matrix with pixels
473 * @return filtered output
475 * This function will produce filtered output by
476 * applying the filter coefficients (1/8) * [1,1,1,1,1]
478 * 5 vector operations
480 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x5m_box (
483 /** @brief Approximate averaging FIR 1x9
485 * @param[in] m 1x9 matrix with pixels
487 * @return filtered output
489 * This function will produce filtered output by
490 * applying the filter coefficients (1/16) * [1,1,1,1,1,1,1,1,1]
492 * 9 vector operations
494 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x9m_box (
497 /** @brief Approximate averaging FIR 1x11
499 * @param[in] m 1x11 matrix with pixels
501 * @return filtered output
503 * This function will produce filtered output by
504 * applying the filter coefficients (1/16) * [1,1,1,1,1,1,1,1,1,1,1]
506 * 12 vector operations
508 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x11m_box (
511 /** @brief Symmetric 7 tap filter with normalization
513 * @param[in] in 1x7 matrix with pixels
514 * @param[in] coeff 1x4 matrix with coefficients
515 * @param[in] out_shift output pixel shift value for normalization
517 * @return symmetric 7 tap filter output
519 * This function performs symmetric 7 tap filter over input pixels.
520 * Filter sum is normalized by shifting out_shift bits.
521 * Filter sum: p0*c3 + p1*c2 + p2*c1 + p3*c0 + p4*c1 + p5*c2 + p6*c3
522 * is implemented as: (p0 + p6)*c3 + (p1 + p5)*c2 + (p2 + p4)*c1 + p3*c0 to
523 * reduce multiplication.
524 * Input pixels should to be scaled, otherwise overflow is possible during
527 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
528 fir1x7m_sym_nrm(s_1w_1x7_matrix in,
529 s_1w_1x4_matrix coeff,
530 tvector1w out_shift);
532 /** @brief Symmetric 7 tap filter with normalization at input side
534 * @param[in] in 1x7 matrix with pixels
535 * @param[in] coeff 1x4 matrix with coefficients
537 * @return symmetric 7 tap filter output
539 * This function performs symmetric 7 tap filter over input pixels.
540 * Filter sum: p0*c3 + p1*c2 + p2*c1 + p3*c0 + p4*c1 + p5*c2 + p6*c3
541 * = (p0 + p6)*c3 + (p1 + p5)*c2 + (p2 + p4)*c1 + p3*c0
542 * Input pixels and coefficients are in Qn format, where n =
543 * ISP_VEC_ELEMBITS - 1 (ie Q15 for Broxton)
544 * To avoid double precision arithmetic input pixel sum and final sum is
545 * implemented using avgrnd and coefficient multiplication using qrmul.
546 * Final result is in Qm format where m = ISP_VEC_ELEMBITS - 2 (ie Q14 for
549 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
550 fir1x7m_sym_innrm_approx(s_1w_1x7_matrix in,
551 s_1w_1x4_matrix coeff);
553 /** @brief Symmetric 7 tap filter with normalization at output side
555 * @param[in] in 1x7 matrix with pixels
556 * @param[in] coeff 1x4 matrix with coefficients
558 * @return symmetric 7 tap filter output
560 * This function performs symmetric 7 tap filter over input pixels.
561 * Filter sum: p0*c3 + p1*c2 + p2*c1 + p3*c0 + p4*c1 + p5*c2 + p6*c3
562 * = (p0 + p6)*c3 + (p1 + p5)*c2 + (p2 + p4)*c1 + p3*c0
563 * Input pixels are in Qn and coefficients are in Qm format, where n =
564 * ISP_VEC_ELEMBITS - 2 and m = ISP_VEC_ELEMBITS - 1 (ie Q14 and Q15
565 * respectively for Broxton)
566 * To avoid double precision arithmetic input pixel sum and final sum is
567 * implemented using addsat and coefficient multiplication using qrmul.
568 * Final sum is left shifted by 2 and saturated to produce result is Qm format
569 * (ie Q15 for Broxton)
571 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
572 fir1x7m_sym_outnrm_approx(s_1w_1x7_matrix in,
573 s_1w_1x4_matrix coeff);
575 /** @brief 4 tap filter with normalization
577 * @param[in] in 1x4 matrix with pixels
578 * @param[in] coeff 1x4 matrix with coefficients
579 * @param[in] out_shift output pixel shift value for normalization
581 * @return 4 tap filter output
583 * This function performs 4 tap filter over input pixels.
584 * Filter sum is normalized by shifting out_shift bits.
585 * Filter sum: p0*c0 + p1*c1 + p2*c2 + p3*c3
587 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
588 fir1x4m_nrm(s_1w_1x4_matrix in,
589 s_1w_1x4_matrix coeff,
590 tvector1w out_shift);
592 /** @brief 4 tap filter with normalization for half pixel interpolation
594 * @param[in] in 1x4 matrix with pixels
596 * @return 4 tap filter output with filter tap [-1 9 9 -1]/16
598 * This function performs 4 tap filter over input pixels.
599 * Filter sum: -p0 + 9*p1 + 9*p2 - p3
600 * This filter implementation is completely free from multiplication and double
601 * precision arithmetic.
602 * Typical usage of this filter is to half pixel interpolation of Bezier
605 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
606 fir1x4m_bicubic_bezier_half(s_1w_1x4_matrix in);
608 /** @brief 4 tap filter with normalization for quarter pixel interpolation
610 * @param[in] in 1x4 matrix with pixels
611 * @param[in] coeff 1x4 matrix with coefficients
613 * @return 4 tap filter output
615 * This function performs 4 tap filter over input pixels.
616 * Filter sum: p0*c0 + p1*c1 + p2*c2 + p3*c3
617 * To avoid double precision arithmetic we implemented multiplication using
618 * qrmul and addition using avgrnd. Coefficients( c0 to c3) formats are assumed
619 * to be: Qm, Qn, Qo, Qm, where m = n + 2 and o = n + 1.
620 * Typical usage of this filter is to quarter pixel interpolation of Bezier
621 * surface with filter coefficients:[-9 111 29 -3]/128. For which coefficient
622 * values should be: [-9216/2^17 28416/2^15 1484/2^16 -3072/2^17] for
623 * ISP_VEC_ELEMBITS = 16.
625 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
626 fir1x4m_bicubic_bezier_quarter(s_1w_1x4_matrix in,
627 s_1w_1x4_matrix coeff);
630 /** @brief Symmetric 3 tap filter with normalization
632 * @param[in] in 1x3 matrix with pixels
633 * @param[in] coeff 1x2 matrix with coefficients
634 * @param[in] out_shift output pixel shift value for normalization
636 * @return symmetric 3 tap filter output
638 * This function performs symmetric 3 tap filter input pixels.
639 * Filter sum is normalized by shifting out_shift bits.
640 * Filter sum: p0*c1 + p1*c0 + p2*c1
641 * is implemented as: (p0 + p2)*c1 + p1*c0 to reduce multiplication.
642 * Input pixels should to be scaled, otherwise overflow is possible during
645 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
646 fir1x3m_sym_nrm(s_1w_1x3_matrix in,
647 s_1w_1x2_matrix coeff,
648 tvector1w out_shift);
650 /** @brief Symmetric 3 tap filter with normalization
652 * @param[in] in 1x3 matrix with pixels
653 * @param[in] coeff 1x2 matrix with coefficients
655 * @return symmetric 3 tap filter output
657 * This function performs symmetric 3 tap filter over input pixels.
658 * Filter sum: p0*c1 + p1*c0 + p2*c1 = (p0 + p2)*c1 + p1*c0
659 * Input pixels are in Qn and coefficient c0 is in Qm and c1 is in Qn format,
660 * where n = ISP_VEC_ELEMBITS - 1 and m = ISP_VEC_ELEMBITS - 2 ( ie Q15 and Q14
661 * respectively for Broxton)
662 * To avoid double precision arithmetic input pixel sum is implemented using
663 * avgrnd, coefficient multiplication using qrmul and final sum using addsat
664 * Final sum is Qm format (ie Q14 for Broxton)
666 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
667 fir1x3m_sym_nrm_approx(s_1w_1x3_matrix in,
668 s_1w_1x2_matrix coeff);
670 /** @brief Mean of 1x3 matrix
672 * @param[in] m 1x3 matrix with pixels
674 * @return mean of 1x3 matrix
676 * This function calculates the mean of 1x3 pixels,
677 * with a factor of 4/3.
679 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean1x3m(
682 /** @brief Mean of 3x3 matrix
684 * @param[in] m 3x3 matrix with pixels
686 * @return mean of 3x3 matrix
688 * This function calculates the mean of 3x3 pixels,
689 * with a factor of 16/9.
691 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean3x3m(
694 /** @brief Mean of 1x4 matrix
696 * @param[in] m 1x4 matrix with pixels
698 * @return mean of 1x4 matrix
700 * This function calculates the mean of 1x4 pixels
702 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean1x4m(
705 /** @brief Mean of 4x4 matrix
707 * @param[in] m 4x4 matrix with pixels
709 * @return mean of 4x4 matrix
711 * This function calculates the mean of 4x4 matrix with pixels
713 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean4x4m(
716 /** @brief Mean of 2x3 matrix
718 * @param[in] m 2x3 matrix with pixels
720 * @return mean of 2x3 matrix
722 * This function calculates the mean of 2x3 matrix with pixels
723 * with a factor of 8/6.
725 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean2x3m(
728 /** @brief Mean of 1x5 matrix
730 * @param[in] m 1x5 matrix with pixels
732 * @return mean of 1x5 matrix
734 * This function calculates the mean of 1x5 matrix with pixels
735 * with a factor of 8/5.
737 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean1x5m(s_1w_1x5_matrix m);
739 /** @brief Mean of 1x6 matrix
741 * @param[in] m 1x6 matrix with pixels
743 * @return mean of 1x6 matrix
745 * This function calculates the mean of 1x6 matrix with pixels
746 * with a factor of 8/6.
748 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean1x6m(
751 /** @brief Mean of 5x5 matrix
753 * @param[in] m 5x5 matrix with pixels
755 * @return mean of 5x5 matrix
757 * This function calculates the mean of 5x5 matrix with pixels
758 * with a factor of 32/25.
760 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean5x5m(
763 /** @brief Mean of 6x6 matrix
765 * @param[in] m 6x6 matrix with pixels
767 * @return mean of 6x6 matrix
769 * This function calculates the mean of 6x6 matrix with pixels
770 * with a factor of 64/36.
772 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean6x6m(
775 /** @brief Minimum of 4x4 matrix
777 * @param[in] m 4x4 matrix with pixels
779 * @return minimum of 4x4 matrix
781 * This function calculates the minimum of
782 * 4x4 matrix with pixels.
784 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w min4x4m(
787 /** @brief Maximum of 4x4 matrix
789 * @param[in] m 4x4 matrix with pixels
791 * @return maximum of 4x4 matrix
793 * This function calculates the maximum of
794 * 4x4 matrix with pixels.
796 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w max4x4m(
799 /** @brief SAD between two 3x3 matrices
801 * @param[in] a 3x3 matrix with pixels
803 * @param[in] b 3x3 matrix with pixels
805 * @return 3x3 matrix SAD
807 * This function calculates the sum of absolute difference between two matrices.
808 * Both input pixels and SAD are normalized by a factor of SAD3x3_IN_SHIFT and
809 * SAD3x3_OUT_SHIFT respectively.
810 * Computed SAD is 1/(2 ^ (SAD3x3_IN_SHIFT + SAD3x3_OUT_SHIFT)) ie 1/16 factor
811 * of original SAD and it's more precise than sad3x3m()
813 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w sad3x3m_precise(
817 /** @brief SAD between two 3x3 matrices
819 * @param[in] a 3x3 matrix with pixels
821 * @param[in] b 3x3 matrix with pixels
823 * @return 3x3 matrix SAD
825 * This function calculates the sum of absolute difference between two matrices.
826 * This version saves cycles by avoiding input normalization and wide vector
827 * operation during sum computation
828 * Input pixel differences are computed by absolute of rounded, halved
829 * subtraction. Normalized sum is computed by rounded averages.
830 * Computed SAD is (1/2)*(1/16) = 1/32 factor of original SAD. Factor 1/2 comes
831 * from input halving operation and factor 1/16 comes from mean operation
833 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w sad3x3m(
837 /** @brief SAD between two 5x5 matrices
839 * @param[in] a 5x5 matrix with pixels
841 * @param[in] b 5x5 matrix with pixels
843 * @return 5x5 matrix SAD
845 * Computed SAD is = 1/32 factor of original SAD.
847 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w sad5x5m(
851 /** @brief Absolute gradient between two sets of 1x5 matrices
853 * @param[in] m0 first set of 1x5 matrix with pixels
854 * @param[in] m1 second set of 1x5 matrix with pixels
856 * @return absolute gradient between two 1x5 matrices
858 * This function computes mean of two input 1x5 matrices and returns
859 * absolute difference between two mean values.
861 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
862 absgrad1x5m(s_1w_1x5_matrix m0, s_1w_1x5_matrix m1);
864 /** @brief Bi-linear Interpolation optimized(approximate)
866 * @param[in] a input0
867 * @param[in] b input1
868 * @param[in] c cloned weight factor
870 * @return (a-b)*c + b
872 * This function will do bi-linear Interpolation on
873 * inputs a and b using constant weight factor c
875 * Inputs a,b are assumed in S1.15 format
876 * Weight factor has to be in range [0,1] and is assumed to be in S2.14 format
878 * The bilinear interpolation equation is (a*c) + b*(1-c),
879 * But this is implemented as (a-b)*c + b for optimization
881 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_bilinear_interpol_approx_c(
886 /** @brief Bi-linear Interpolation optimized(approximate)
888 * @param[in] a input0
889 * @param[in] b input1
890 * @param[in] c weight factor
892 * @return (a-b)*c + b
894 * This function will do bi-linear Interpolation on
895 * inputs a and b using weight factor c
897 * Inputs a,b are assumed in S1.15 format
898 * Weight factor has to be in range [0,1] and is assumed to be in S2.14 format
900 * The bilinear interpolation equation is (a*c) + b*(1-c),
901 * But this is implemented as (a-b)*c + b for optimization
903 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_bilinear_interpol_approx(
908 /** @brief Bi-linear Interpolation
910 * @param[in] a input0
911 * @param[in] b input1
912 * @param[in] c weight factor
914 * @return (a*c) + b*(1-c)
916 * This function will do bi-linear Interpolation on
917 * inputs a and b using weight factor c
919 * Inputs a,b are assumed in S1.15 format
920 * Weight factor has to be in range [0,1] and is assumed to be in S2.14 format
922 * The bilinear interpolation equation is (a*c) + b*(1-c),
924 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_bilinear_interpol(
929 /** @brief Generic Block Matching Algorithm
930 * @param[in] search_window pointer to input search window of 16x16 pixels
931 * @param[in] ref_block pointer to input reference block of 8x8 pixels, where N<=M
932 * @param[in] output pointer to output sads
933 * @param[in] search_sz search size for SAD computation
934 * @param[in] ref_sz block size
935 * @param[in] pixel_shift pixel shift to search the data
936 * @param[in] search_block_sz search window block size
937 * @param[in] shift shift value, with which the output is shifted right
939 * @return 0 when the computation is successful.
941 * * This function compares the reference block with a block of size NxN in the search
942 * window. Sum of absolute differences for each pixel in the reference block and the
943 * corresponding pixel in the search block. Whole search window os traversed with the
944 * reference block with the given pixel shift.
947 STORAGE_CLASS_REF_VECTOR_FUNC_H int generic_block_matching_algorithm(
948 tscalar1w **search_window,
949 tscalar1w **ref_block,
955 tscalar1w_4bit_bma_shift shift);
958 /** @brief OP_1w_asp_bma_16_1_32way
960 /** @brief OP_1w_asp_bma_16_1_32way_nomask
963 * @param[in] search_area input search window of 16x16 pixels
964 * @param[in] input_block input reference block of 8x8 pixels, where N<=M
965 * @param[in] shift shift value, with which the output is shifted right
967 * @return 81 SADs for all the search blocks.
969 * This function compares the reference block with a block of size 8x8 pixels in the
970 * search window of 16x16 pixels. Sum of absolute differences for each pixel in the
971 * reference block and the corresponding pixel in the search block is calculated.
972 * Whole search window is traversed with the reference block with the pixel shift of 1
973 * pixels. The output is right shifted with the given shift value. The shift value is
979 STORAGE_CLASS_REF_VECTOR_FUNC_H bma_output_16_1 OP_1w_asp_bma_16_1_32way(
981 STORAGE_CLASS_REF_VECTOR_FUNC_H bma_output_16_1 OP_1w_asp_bma_16_1_32way_nomask(
983 bma_16x16_search_window search_area,
984 ref_block_8x8 input_block,
985 tscalar1w_4bit_bma_shift shift);
988 /** @brief OP_1w_asp_bma_16_2_32way
990 /** @brief OP_1w_asp_bma_16_2_32way_nomask
993 * @param[in] search_area input search window of 16x16 pixels
994 * @param[in] input_block input reference block of 8x8 pixels, where N<=M
995 * @param[in] shift shift value, with which the output is shifted right
997 * @return 25 SADs for all the search blocks.
998 * This function compares the reference block with a block of size 8x8 in the search
999 * window of 16x61. Sum of absolute differences for each pixel in the reference block
1000 * and the corresponding pixel in the search block is computed. Whole search window is
1001 * traversed with the reference block with the given pixel shift of 2 pixels. The output
1002 * is right shifted with the given shift value. The shift value is a 4 bit value.
1007 STORAGE_CLASS_REF_VECTOR_FUNC_H bma_output_16_2 OP_1w_asp_bma_16_2_32way(
1009 STORAGE_CLASS_REF_VECTOR_FUNC_H bma_output_16_2 OP_1w_asp_bma_16_2_32way_nomask(
1011 bma_16x16_search_window search_area,
1012 ref_block_8x8 input_block,
1013 tscalar1w_4bit_bma_shift shift);
1015 /** @brief OP_1w_asp_bma_14_1_32way
1017 /** @brief OP_1w_asp_bma_14_1_32way_nomask
1020 * @param[in] search_area input search block of 16x16 pixels with search window of 14x14 pixels
1021 * @param[in] input_block input reference block of 8x8 pixels, where N<=M
1022 * @param[in] shift shift value, with which the output is shifted right
1024 * @return 49 SADs for all the search blocks.
1025 * This function compares the reference block with a block of size 8x8 in the search
1026 * window of 14x14. Sum of absolute differences for each pixel in the reference block
1027 * and the corresponding pixel in the search block. Whole search window is traversed
1028 * with the reference block with 2 pixel shift. The output is right shifted with the
1029 * given shift value. The shift value is a 4 bit value. Input is always a 16x16 block
1030 * but the search window is 14x14, with last 2 pixels of row and column are not used
1036 STORAGE_CLASS_REF_VECTOR_FUNC_H bma_output_14_1 OP_1w_asp_bma_14_1_32way(
1038 STORAGE_CLASS_REF_VECTOR_FUNC_H bma_output_14_1 OP_1w_asp_bma_14_1_32way_nomask(
1040 bma_16x16_search_window search_area,
1041 ref_block_8x8 input_block,
1042 tscalar1w_4bit_bma_shift shift);
1045 /** @brief OP_1w_asp_bma_14_2_32way
1047 /** @brief OP_1w_asp_bma_14_2_32way_nomask
1050 * @param[in] search_area input search block of 16x16 pixels with search window of 14x14 pixels
1051 * @param[in] input_block input reference block of 8x8 pixels, where N<=M
1052 * @param[in] shift shift value, with which the output is shifted right
1054 * @return 16 SADs for all the search blocks.
1055 * This function compares the reference block with a block of size 8x8 in the search
1056 * window of 14x14. Sum of absolute differences for each pixel in the reference block
1057 * and the corresponding pixel in the search block. Whole search window is traversed
1058 * with the reference block with 2 pixels shift. The output is right shifted with the
1059 * given shift value. The shift value is a 4 bit value.
1064 STORAGE_CLASS_REF_VECTOR_FUNC_H bma_output_14_2 OP_1w_asp_bma_14_2_32way(
1066 STORAGE_CLASS_REF_VECTOR_FUNC_H bma_output_14_2 OP_1w_asp_bma_14_2_32way_nomask(
1068 bma_16x16_search_window search_area,
1069 ref_block_8x8 input_block,
1070 tscalar1w_4bit_bma_shift shift);
1073 /** @brief multiplex addition and passing
1075 * @param[in] _a first pixel
1076 * @param[in] _b second pixel
1077 * @param[in] _c condition flag
1079 * @return (_a + _b) if condition flag is true
1080 * _a if condition flag is false
1082 * This function does multiplex addition depending on the input condition flag
1084 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_cond_add(
1091 /** @brief OP_1w_single_bfa_7x7
1093 * @param[in] weights - spatial and range weight lut
1094 * @param[in] threshold - threshold plane, for range weight scaling
1095 * @param[in] central_pix - central pixel plane
1096 * @param[in] src_plane - src pixel plane
1098 * @return Bilateral filter output
1100 * This function implements, 7x7 single bilateral filter.
1101 * Output = {sum(pixel * weight), sum(weight)}
1102 * Where sum is summation over 7x7 block set.
1103 * weight = spatial weight * range weight
1104 * spatial weights are loaded from spatial_weight_lut depending on src pixel
1105 * position in the 7x7 block
1106 * range weights are computed by table look up from range_weight_lut depending
1107 * on scaled absolute difference between src and central pixels.
1108 * threshold is used as scaling factor. range_weight_lut consists of
1109 * BFA_RW_LUT_SIZE numbers of LUT entries to model any distribution function.
1110 * Piecewise linear approximation technique is used to compute range weight
1111 * It computes absolute difference between central pixel and 61 src pixels.
1113 STORAGE_CLASS_REF_VECTOR_FUNC_H bfa_7x7_output OP_1w_single_bfa_7x7(
1114 bfa_weights weights,
1115 tvector1w threshold,
1116 tvector1w central_pix,
1117 s_1w_7x7_matrix src_plane);
1119 /** @brief OP_1w_joint_bfa_7x7
1121 * @param[in] weights - spatial and range weight lut
1122 * @param[in] threshold0 - 1st threshold plane, for range weight scaling
1123 * @param[in] central_pix0 - 1st central pixel plane
1124 * @param[in] src0_plane - 1st pixel plane
1125 * @param[in] threshold1 - 2nd threshold plane, for range weight scaling
1126 * @param[in] central_pix1 - 2nd central pixel plane
1127 * @param[in] src1_plane - 2nd pixel plane
1129 * @return Joint bilateral filter output
1131 * This function implements, 7x7 joint bilateral filter.
1132 * Output = {sum(pixel * weight), sum(weight)}
1133 * Where sum is summation over 7x7 block set.
1134 * weight = spatial weight * range weight
1135 * spatial weights are loaded from spatial_weight_lut depending on src pixel
1136 * position in the 7x7 block
1137 * range weights are computed by table look up from range_weight_lut depending
1138 * on sum of scaled absolute difference between central pixel and two src pixel
1139 * planes. threshold is used as scaling factor. range_weight_lut consists of
1140 * BFA_RW_LUT_SIZE numbers of LUT entries to model any distribution function.
1141 * Piecewise linear approximation technique is used to compute range weight
1142 * It computes absolute difference between central pixel and 61 src pixels.
1144 STORAGE_CLASS_REF_VECTOR_FUNC_H bfa_7x7_output OP_1w_joint_bfa_7x7(
1145 bfa_weights weights,
1146 tvector1w threshold0,
1147 tvector1w central_pix0,
1148 s_1w_7x7_matrix src0_plane,
1149 tvector1w threshold1,
1150 tvector1w central_pix1,
1151 s_1w_7x7_matrix src1_plane);
1153 /** @brief bbb_bfa_gen_spatial_weight_lut
1155 * @param[in] in - 7x7 matrix of spatial weights
1156 * @param[in] out - generated LUT
1160 * This function implements, creates spatial weight look up table used
1161 * for bilaterl filter instruction.
1163 STORAGE_CLASS_REF_VECTOR_FUNC_H void bbb_bfa_gen_spatial_weight_lut(
1165 tvector1w out[BFA_MAX_KWAY]);
1167 /** @brief bbb_bfa_gen_range_weight_lut
1169 * @param[in] in - input range weight,
1170 * @param[in] out - generated LUT
1174 * This function implements, creates range weight look up table used
1175 * for bilaterl filter instruction.
1176 * 8 unsigned 7b weights are represented in 7 16bits LUT
1177 * LUT formation is done as follows:
1178 * higher 8 bit: Point(N) = Point(N+1) - Point(N)
1179 * lower 8 bit: Point(N) = Point(N)
1180 * Weight function can be any monotonic decreasing function for x >= 0
1182 STORAGE_CLASS_REF_VECTOR_FUNC_H void bbb_bfa_gen_range_weight_lut(
1183 tvector1w in[BFA_RW_LUT_SIZE+1],
1184 tvector1w out[BFA_RW_LUT_SIZE]);
1188 /** @brief OP_1w_imax32
1190 * @param[in] src - structure that holds an array of 32 elements.
1192 * @return maximum element among input array.
1194 *This function gets maximum element from an array of 32 elements.
1196 STORAGE_CLASS_REF_VECTOR_FUNC_H int OP_1w_imax32(
1197 imax32_ref_in_vector src);
1199 /** @brief OP_1w_imaxidx32
1201 * @param[in] src - structure that holds a vector of elements.
1203 * @return index of first element with maximum value among array.
1205 * This function gets index of first element with maximum value
1208 STORAGE_CLASS_REF_VECTOR_FUNC_H int OP_1w_imaxidx32(
1209 imax32_ref_in_vector src);
1212 #ifndef INLINE_VECTOR_FUNC
1213 #define STORAGE_CLASS_REF_VECTOR_FUNC_C
1214 #define STORAGE_CLASS_REF_VECTOR_DATA_C const
1215 #else /* INLINE_VECTOR_FUNC */
1216 #define STORAGE_CLASS_REF_VECTOR_FUNC_C STORAGE_CLASS_REF_VECTOR_FUNC_H
1217 #define STORAGE_CLASS_REF_VECTOR_DATA_C STORAGE_CLASS_REF_VECTOR_DATA_H
1218 #include "ref_vector_func.c"
1219 #define VECTOR_FUNC_INLINED
1220 #endif /* INLINE_VECTOR_FUNC */
1222 #endif /*_REF_VECTOR_FUNC_H_INCLUDED_*/