drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/ref_vector_func.h

   1 /*
   2  * Support for Intel Camera Imaging ISP subsystem.
   3  * Copyright (c) 2015, Intel Corporation.
   4  *
   5  * This program is free software; you can redistribute it and/or modify it
   6  * under the terms and conditions of the GNU General Public License,
   7  * version 2, as published by the Free Software Foundation.
   8  *
   9  * This program is distributed in the hope it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  12  * more details.
  13  */
  14
  15 #ifndef _REF_VECTOR_FUNC_H_INCLUDED_
  16 #define _REF_VECTOR_FUNC_H_INCLUDED_
  17
  18 #include "storage_class.h"
  19
  20 #ifdef INLINE_VECTOR_FUNC
  21 #define STORAGE_CLASS_REF_VECTOR_FUNC_H STORAGE_CLASS_INLINE
  22 #define STORAGE_CLASS_REF_VECTOR_DATA_H STORAGE_CLASS_INLINE_DATA
  23 #else /* INLINE_VECTOR_FUNC */
  24 #define STORAGE_CLASS_REF_VECTOR_FUNC_H STORAGE_CLASS_EXTERN
  25 #define STORAGE_CLASS_REF_VECTOR_DATA_H STORAGE_CLASS_EXTERN_DATA
  26 #endif  /* INLINE_VECTOR_FUNC */
  27
  28
  29 #include "ref_vector_func_types.h"
  30
  31 /** @brief Doubling multiply accumulate with saturation
  32  *
  33  * @param[in] acc accumulator
  34  * @param[in] a multiply input
  35  * @param[in] b multiply input
  36   *
  37  * @return              acc + (a*b)
  38  *
  39  * This function will do a doubling multiply ont
  40  * inputs a and b, and will add the result to acc.
  41  * in case of an overflow of acc, it will saturate.
  42  */
  43 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector2w OP_1w_maccd_sat(
  44         tvector2w acc,
  45         tvector1w a,
  46         tvector1w b );
  47
  48 /** @brief Doubling multiply accumulate
  49  *
  50  * @param[in] acc accumulator
  51  * @param[in] a multiply input
  52  * @param[in] b multiply input
  53   *
  54  * @return              acc + (a*b)
  55  *
  56  * This function will do a doubling multiply ont
  57  * inputs a and b, and will add the result to acc.
  58  * in case of overflow it will not saturate but wrap around.
  59  */
  60 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector2w OP_1w_maccd(
  61         tvector2w acc,
  62         tvector1w a,
  63         tvector1w b );
  64
  65 /** @brief Re-aligning multiply
  66  *
  67  * @param[in] a multiply input
  68  * @param[in] b multiply input
  69  * @param[in] shift shift amount
  70  *
  71  * @return              (a*b)>>shift
  72  *
  73  * This function will multiply a with b, followed by a right
  74  * shift with rounding. the result is saturated and casted
  75  * to single precision.
  76  */
  77 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_mul_realigning(
  78         tvector1w a,
  79         tvector1w b,
  80         tscalar1w shift );
  81
  82 /** @brief Leading bit index
  83  *
  84  * @param[in] a         input
  85  *
  86  * @return              index of the leading bit of each element
  87  *
  88  * This function finds the index of leading one (set) bit of the
  89  * input. The index starts with 0 for the LSB and can go upto
  90  * ISP_VEC_ELEMBITS-1 for the MSB. For an input equal to zero,
  91  * the returned index is -1.
  92  */
  93 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_lod(
  94                 tvector1w a);
  95
  96 /** @brief Config Unit Input Processing
  97  *
  98  * @param[in] a             input
  99  * @param[in] input_scale   input scaling factor
 100  * @param[in] input_offset  input offset factor
 101  *
 102  * @return                  scaled & offset added input clamped to MAXVALUE
 103  *
 104  * As part of input processing for piecewise linear estimation config unit,
 105  * this function will perform scaling followed by adding offset and
 106  * then clamping to the MAX InputValue
 107  * It asserts -MAX_SHIFT_1W <= input_scale <= MAX_SHIFT_1W, and
 108  * -MAX_SHIFT_1W <= input_offset <= MAX_SHIFT_1W
 109  */
 110 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_input_scaling_offset_clamping(
 111         tvector1w a,
 112         tscalar1w_5bit_signed input_scale,
 113         tscalar1w_5bit_signed input_offset);
 114
 115 /** @brief Config Unit Output Processing
 116  *
 117  * @param[in] a              output
 118  * @param[in] output_scale   output scaling factor
 119  *
 120  * @return                   scaled & clamped output value
 121  *
 122  * As part of output processing for piecewise linear estimation config unit,
 123  * This function will perform scaling and then clamping to output
 124  * MAX value.
 125  * It asserts -MAX_SHIFT_1W <= output_scale <= MAX_SHIFT_1W
 126  */
 127 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_output_scaling_clamping(
 128         tvector1w a,
 129         tscalar1w_5bit_signed output_scale);
 130
 131 /** @brief Config Unit Piecewiselinear estimation
 132  *
 133  * @param[in] a                   input
 134  * @param[in] config_points   config parameter structure
 135  *
 136  * @return                         piecewise linear estimated output
 137  *
 138  * Given a set of N points {(x1,y1),()x2,y2), ....,(xn,yn)}, to find
 139  * the functional value at an arbitrary point around the input set,
 140  * this function will perform input processing followed by piecewise
 141  * linear estimation and then output processing to yield the final value.
 142  */
 143 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_piecewise_estimation(
 144         tvector1w a,
 145         ref_config_points config_points);
 146
 147 /** @brief Fast Config Unit
 148  *
 149  * @param[in] x                 input
 150  * @param[in] init_vectors      LUT data structure
 151  *
 152  * @return      piecewise linear estimated output
 153  * This block gets an input x and a set of input configuration points stored in a look-up
 154  * table of 32 elements. First, the x input is clipped to be within the range [x1, xn+1].
 155  * Then, it computes the interval in which the input lies. Finally, the output is computed
 156  * by performing linear interpolation based on the interval properties (i.e. x_prev, slope,
 157  * and offset). This block assumes that the points are equally spaced and that the interval
 158  * size is a power of 2.
 159  **/
 160 STORAGE_CLASS_REF_VECTOR_FUNC_H  tvector1w OP_1w_XCU(
 161         tvector1w x,
 162         xcu_ref_init_vectors init_vectors);
 163
 164
 165 /** @brief LXCU
 166  *
 167  * @param[in] x                 input
 168  * @param[in] init_vectors      LUT data structure
 169  *
 170  * @return   logarithmic piecewise linear estimated output.
 171  * This block gets an input x and a set of input configuration points stored in a look-up
 172  * table of 32 elements. It computes the interval in which the input lies.
 173  * Then output is computed by performing linear interpolation based on the interval
 174  * properties (i.e. x_prev, slope, * and offset).
 175  * This BBB assumes spacing x-coordinates of "init vectors" increase exponentially as
 176  * shown below.
 177  * interval size :   2^0    2^1      2^2    2^3
 178  * x-coordinates: x0<--->x1<---->x2<---->x3<---->
 179  **/
 180 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_LXCU(
 181         tvector1w x,
 182         xcu_ref_init_vectors init_vectors);
 183
 184 /** @brief Coring
 185  *
 186  * @param[in] coring_vec   Amount of coring based on brightness level
 187  * @param[in] filt_input   Vector of input pixels on which Coring is applied
 188  * @param[in] m_CnrCoring0 Coring Level0
 189  *
 190  * @return                 vector of filtered pixels after coring is applied
 191  *
 192  * This function will perform adaptive coring based on brightness level to
 193  * remove noise
 194  */
 195 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w coring(
 196         tvector1w coring_vec,
 197         tvector1w filt_input,
 198         tscalar1w m_CnrCoring0 );
 199
 200 /** @brief Normalised FIR with coefficients [3,4,1]
 201  *
 202  * @param[in] m 1x3 matrix with pixels
 203  *
 204  * @return              filtered output
 205  *
 206  * This function will calculate the
 207  * Normalised FIR with coefficients [3,4,1],
 208  *-5dB at Fs/2, -90 degree phase shift (quarter pixel)
 209  */
 210 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_5dB_m90_nrm (
 211         const s_1w_1x3_matrix           m);
 212
 213 /** @brief Normalised FIR with coefficients [1,4,3]
 214  *
 215  * @param[in] m 1x3 matrix with pixels
 216  *
 217  * @return              filtered output
 218  *
 219  * This function will calculate the
 220  * Normalised FIR with coefficients [1,4,3],
 221  *-5dB at Fs/2, +90 degree phase shift (quarter pixel)
 222  */
 223 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_5dB_p90_nrm (
 224         const s_1w_1x3_matrix           m);
 225
 226 /** @brief Normalised FIR with coefficients [1,2,1]
 227  *
 228  * @param[in] m 1x3 matrix with pixels
 229  *
 230  * @return              filtered output
 231  *
 232  * This function will calculate the
 233  * Normalised FIR with coefficients [1,2,1], -6dB at Fs/2
 234  */
 235 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_6dB_nrm (
 236         const s_1w_1x3_matrix           m);
 237
 238 /** @brief Normalised FIR with coefficients [13,16,3]
 239  *
 240  * @param[in] m 1x3 matrix with pixels
 241  *
 242  * @return              filtered output
 243  *
 244  * This function will calculate the
 245  * Normalised FIR with coefficients [13,16,3],
 246  */
 247 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_6dB_nrm_ph0 (
 248         const s_1w_1x3_matrix           m);
 249
 250 /** @brief Normalised FIR with coefficients [9,16,7]
 251  *
 252  * @param[in] m 1x3 matrix with pixels
 253  *
 254  * @return              filtered output
 255  *
 256  * This function will calculate the
 257  * Normalised FIR with coefficients [9,16,7],
 258  */
 259 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_6dB_nrm_ph1 (
 260         const s_1w_1x3_matrix           m);
 261
 262 /** @brief Normalised FIR with coefficients [5,16,11]
 263  *
 264  * @param[in] m 1x3 matrix with pixels
 265  *
 266  * @return              filtered output
 267  *
 268  * This function will calculate the
 269  * Normalised FIR with coefficients [5,16,11],
 270  */
 271 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_6dB_nrm_ph2 (
 272         const s_1w_1x3_matrix           m);
 273
 274 /** @brief Normalised FIR with coefficients [1,16,15]
 275  *
 276  * @param[in] m 1x3 matrix with pixels
 277  *
 278  * @return              filtered output
 279  *
 280  * This function will calculate the
 281  * Normalised FIR with coefficients [1,16,15],
 282  */
 283 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_6dB_nrm_ph3 (
 284         const s_1w_1x3_matrix           m);
 285
 286 /** @brief Normalised FIR with programable phase shift
 287  *
 288  * @param[in] m 1x3 matrix with pixels
 289  * @param[in] coeff     phase shift
 290  *
 291  * @return              filtered output
 292  *
 293  * This function will calculate the
 294  * Normalised FIR with coefficients [8-coeff,16,8+coeff],
 295  */
 296 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_6dB_nrm_calc_coeff (
 297         const s_1w_1x3_matrix           m, tscalar1w_3bit coeff);
 298
 299 /** @brief 3 tap FIR with coefficients [1,1,1]
 300  *
 301  * @param[in] m 1x3 matrix with pixels
 302  *
 303  * @return              filtered output
 304  *
 305  * This function will calculate the
 306  * FIR with coefficients [1,1,1], -9dB at Fs/2 normalized with factor 1/2
 307  */
 308 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_9dB_nrm (
 309         const s_1w_1x3_matrix           m);
 310
 311 #ifdef ISP2401
 312 /** @brief      symmetric 3 tap FIR acts as LPF or BSF
 313  *
 314  * @param[in] m 1x3 matrix with pixels
 315  * @param[in] k filter coefficient shift
 316  * @param[in] bsf_flag 1 for BSF and 0 for LPF
 317  *
 318  * @return    filtered output
 319  *
 320  * This function performs variable coefficient symmetric 3 tap filter which can
 321  * be either used as Low Pass Filter or Band Stop Filter.
 322  * Symmetric 3tap tap filter with DC gain 1 has filter coefficients [a, 1-2a, a]
 323  * For LPF 'a' can be approximated as (1 - 2^(-k))/4, k = 0, 1, 2, ...
 324  * and filter output can be approximated as:
 325  * out_LPF = ((v00 + v02) - ((v00 + v02) >> k) + (2 * (v01 + (v01 >> k)))) >> 2
 326  * For BSF 'a' can be approximated as (1 + 2^(-k))/4, k = 0, 1, 2, ...
 327  * and filter output can be approximated as:
 328  * out_BSF = ((v00 + v02) + ((v00 + v02) >> k) + (2 * (v01 - (v01 >> k)))) >> 2
 329  * For a given filter coefficient shift 'k' and bsf_flag this function
 330  * behaves either as LPF or BSF.
 331  * All computation is done using 1w arithmetic and implementation does not use
 332  * any multiplication.
 333  */
 334 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
 335 sym_fir1x3m_lpf_bsf(s_1w_1x3_matrix m,
 336                     tscalar1w k,
 337                     tscalar_bool bsf_flag);
 338 #endif
 339
 340 /** @brief Normalised 2D FIR with coefficients  [1;2;1] * [1,2,1]
 341  *
 342  * @param[in] m 3x3 matrix with pixels
 343  *
 344  * @return              filtered output
 345  *
 346  * This function will calculate the
 347  * Normalised FIR with coefficients  [1;2;1] * [1,2,1]
 348  * Unity gain filter through repeated scaling and rounding
 349  *      - 6 rotate operations per output
 350  *      - 8 vector operations per output
 351  * _______
 352  *   14 total operations
 353  */
 354 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir3x3m_6dB_nrm (
 355         const s_1w_3x3_matrix           m);
 356
 357 /** @brief Normalised 2D FIR with coefficients  [1;1;1] * [1,1,1]
 358  *
 359  * @param[in] m 3x3 matrix with pixels
 360  *
 361  * @return              filtered output
 362  *
 363  * This function will calculate the
 364  * Normalised FIR with coefficients [1;1;1] * [1,1,1]
 365  *
 366  * (near) Unity gain filter through repeated scaling and rounding
 367  *      - 6 rotate operations per output
 368  *      - 8 vector operations per output
 369  * _______
 370  *   14 operations
 371  */
 372 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir3x3m_9dB_nrm (
 373         const s_1w_3x3_matrix           m);
 374
 375 /** @brief Normalised dual output 2D FIR with coefficients  [1;2;1] * [1,2,1]
 376  *
 377  * @param[in] m 4x3 matrix with pixels
 378  *
 379  * @return              two filtered outputs (2x1 matrix)
 380  *
 381  * This function will calculate the
 382  * Normalised FIR with coefficients  [1;2;1] * [1,2,1]
 383  * and produce two outputs (vertical)
 384  * Unity gain filter through repeated scaling and rounding
 385  * compute two outputs per call to re-use common intermediates
 386  *      - 4 rotate operations per output
 387  *      - 6 vector operations per output (alternative possible, but in this
 388  *          form it's not obvious to re-use variables)
 389  * _______
 390  *   10 total operations
 391  */
 392  STORAGE_CLASS_REF_VECTOR_FUNC_H s_1w_2x1_matrix fir3x3m_6dB_out2x1_nrm (
 393         const s_1w_4x3_matrix           m);
 394
 395 /** @brief Normalised dual output 2D FIR with coefficients [1;1;1] * [1,1,1]
 396  *
 397  * @param[in] m 4x3 matrix with pixels
 398  *
 399  * @return              two filtered outputs (2x1 matrix)
 400  *
 401  * This function will calculate the
 402  * Normalised FIR with coefficients [1;1;1] * [1,1,1]
 403  * and produce two outputs (vertical)
 404  * (near) Unity gain filter through repeated scaling and rounding
 405  * compute two outputs per call to re-use common intermediates
 406  *      - 4 rotate operations per output
 407  *      - 7 vector operations per output (alternative possible, but in this
 408  *          form it's not obvious to re-use variables)
 409  * _______
 410  *   11 total operations
 411  */
 412 STORAGE_CLASS_REF_VECTOR_FUNC_H s_1w_2x1_matrix fir3x3m_9dB_out2x1_nrm (
 413         const s_1w_4x3_matrix           m);
 414
 415 /** @brief Normalised 2D FIR 5x5
 416  *
 417  * @param[in] m 5x5 matrix with pixels
 418  *
 419  * @return              filtered output
 420  *
 421  * This function will calculate the
 422  * Normalised FIR with coefficients [1;1;1] * [1;2;1] * [1,2,1] * [1,1,1]
 423  * and produce a filtered output
 424  * (near) Unity gain filter through repeated scaling and rounding
 425  *      - 20 rotate operations per output
 426  *      - 28 vector operations per output
 427  * _______
 428  *   48 total operations
 429 */
 430 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir5x5m_15dB_nrm (
 431         const s_1w_5x5_matrix   m);
 432
 433 /** @brief Normalised FIR 1x5
 434  *
 435  * @param[in] m 1x5 matrix with pixels
 436  *
 437  * @return              filtered output
 438  *
 439  * This function will calculate the
 440  * Normalised FIR with coefficients [1,2,1] * [1,1,1] = [1,4,6,4,1]
 441  * and produce a filtered output
 442  * (near) Unity gain filter through repeated scaling and rounding
 443  *      - 4 rotate operations per output
 444  *      - 5 vector operations per output
 445  * _______
 446  *   9 total operations
 447 */
 448 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x5m_12dB_nrm (
 449         const s_1w_1x5_matrix m);
 450
 451 /** @brief Normalised 2D FIR 5x5
 452  *
 453  * @param[in] m 5x5 matrix with pixels
 454  *
 455  * @return              filtered output
 456  *
 457  * This function will calculate the
 458  * Normalised FIR with coefficients [1;2;1] * [1;2;1] * [1,2,1] * [1,2,1]
 459  * and produce a filtered output
 460  * (near) Unity gain filter through repeated scaling and rounding
 461  *      - 20 rotate operations per output
 462  *      - 30 vector operations per output
 463  * _______
 464  *   50 total operations
 465 */
 466 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir5x5m_12dB_nrm (
 467         const s_1w_5x5_matrix m);
 468
 469 /** @brief Approximate averaging FIR 1x5
 470  *
 471  * @param[in] m 1x5 matrix with pixels
 472  *
 473  * @return              filtered output
 474  *
 475  * This function will produce filtered output by
 476  * applying the filter coefficients (1/8) * [1,1,1,1,1]
 477  * _______
 478  *   5 vector operations
 479 */
 480 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x5m_box (
 481         s_1w_1x5_matrix m);
 482
 483 /** @brief Approximate averaging FIR 1x9
 484  *
 485  * @param[in] m 1x9 matrix with pixels
 486  *
 487  * @return              filtered output
 488  *
 489  * This function will produce filtered output by
 490  * applying the filter coefficients (1/16) * [1,1,1,1,1,1,1,1,1]
 491  * _______
 492  *   9 vector operations
 493 */
 494 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x9m_box (
 495         s_1w_1x9_matrix m);
 496
 497 /** @brief Approximate averaging FIR 1x11
 498  *
 499  * @param[in] m 1x11 matrix with pixels
 500  *
 501  * @return              filtered output
 502  *
 503  * This function will produce filtered output by
 504  * applying the filter coefficients (1/16) * [1,1,1,1,1,1,1,1,1,1,1]
 505  * _______
 506  *   12 vector operations
 507 */
 508 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x11m_box (
 509         s_1w_1x11_matrix m);
 510
 511 /** @brief Symmetric 7 tap filter with normalization
 512  *
 513  *  @param[in] in 1x7 matrix with pixels
 514  *  @param[in] coeff 1x4 matrix with coefficients
 515  *  @param[in] out_shift output pixel shift value for normalization
 516  *
 517  *  @return symmetric 7 tap filter output
 518  *
 519  * This function performs symmetric 7 tap filter over input pixels.
 520  * Filter sum is normalized by shifting out_shift bits.
 521  * Filter sum: p0*c3 + p1*c2 + p2*c1 + p3*c0 + p4*c1 + p5*c2 + p6*c3
 522  * is implemented as: (p0 + p6)*c3 + (p1 + p5)*c2 + (p2 + p4)*c1 + p3*c0 to
 523  * reduce multiplication.
 524  * Input pixels should to be scaled, otherwise overflow is possible during
 525  * addition
 526 */
 527 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
 528 fir1x7m_sym_nrm(s_1w_1x7_matrix in,
 529                 s_1w_1x4_matrix coeff,
 530                 tvector1w out_shift);
 531
 532 /** @brief Symmetric 7 tap filter with normalization at input side
 533  *
 534  *  @param[in] in 1x7 matrix with pixels
 535  *  @param[in] coeff 1x4 matrix with coefficients
 536  *
 537  *  @return symmetric 7 tap filter output
 538  *
 539  * This function performs symmetric 7 tap filter over input pixels.
 540  * Filter sum: p0*c3 + p1*c2 + p2*c1 + p3*c0 + p4*c1 + p5*c2 + p6*c3
 541  *          = (p0 + p6)*c3 + (p1 + p5)*c2 + (p2 + p4)*c1 + p3*c0
 542  * Input pixels and coefficients are in Qn format, where n =
 543  * ISP_VEC_ELEMBITS - 1 (ie Q15 for Broxton)
 544  * To avoid double precision arithmetic input pixel sum and final sum is
 545  * implemented using avgrnd and coefficient multiplication using qrmul.
 546  * Final result is in Qm format where m = ISP_VEC_ELEMBITS - 2 (ie Q14 for
 547  * Broxton)
 548 */
 549 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
 550 fir1x7m_sym_innrm_approx(s_1w_1x7_matrix in,
 551                          s_1w_1x4_matrix coeff);
 552
 553 /** @brief Symmetric 7 tap filter with normalization at output side
 554  *
 555  *  @param[in] in 1x7 matrix with pixels
 556  *  @param[in] coeff 1x4 matrix with coefficients
 557  *
 558  *  @return symmetric 7 tap filter output
 559  *
 560  * This function performs symmetric 7 tap filter over input pixels.
 561  * Filter sum: p0*c3 + p1*c2 + p2*c1 + p3*c0 + p4*c1 + p5*c2 + p6*c3
 562  *          = (p0 + p6)*c3 + (p1 + p5)*c2 + (p2 + p4)*c1 + p3*c0
 563  * Input pixels are in Qn and coefficients are in Qm format, where n =
 564  * ISP_VEC_ELEMBITS - 2 and m = ISP_VEC_ELEMBITS - 1 (ie Q14 and Q15
 565  * respectively for Broxton)
 566  * To avoid double precision arithmetic input pixel sum and final sum is
 567  * implemented using addsat and coefficient multiplication using qrmul.
 568  * Final sum is left shifted by 2 and saturated to produce result is Qm format
 569  * (ie Q15 for Broxton)
 570 */
 571 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
 572 fir1x7m_sym_outnrm_approx(s_1w_1x7_matrix in,
 573                          s_1w_1x4_matrix coeff);
 574
 575 /** @brief 4 tap filter with normalization
 576  *
 577  *  @param[in] in 1x4 matrix with pixels
 578  *  @param[in] coeff 1x4 matrix with coefficients
 579  *  @param[in] out_shift output pixel shift value for normalization
 580  *
 581  *  @return 4 tap filter output
 582  *
 583  * This function performs 4 tap filter over input pixels.
 584  * Filter sum is normalized by shifting out_shift bits.
 585  * Filter sum: p0*c0 + p1*c1 + p2*c2 + p3*c3
 586 */
 587 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
 588 fir1x4m_nrm(s_1w_1x4_matrix in,
 589                 s_1w_1x4_matrix coeff,
 590                 tvector1w out_shift);
 591
 592 /** @brief 4 tap filter with normalization for half pixel interpolation
 593  *
 594  *  @param[in] in 1x4 matrix with pixels
 595  *
 596  *  @return 4 tap filter output with filter tap [-1 9 9 -1]/16
 597  *
 598  * This function performs 4 tap filter over input pixels.
 599  * Filter sum: -p0 + 9*p1 + 9*p2 - p3
 600  * This filter implementation is completely free from multiplication and double
 601  * precision arithmetic.
 602  * Typical usage of this filter is to half pixel interpolation of Bezier
 603  * surface
 604  * */
 605 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
 606 fir1x4m_bicubic_bezier_half(s_1w_1x4_matrix in);
 607
 608 /** @brief 4 tap filter with normalization for quarter pixel interpolation
 609  *
 610  *  @param[in] in 1x4 matrix with pixels
 611  *  @param[in] coeff 1x4 matrix with coefficients
 612  *
 613  *  @return 4 tap filter output
 614  *
 615  * This function performs 4 tap filter over input pixels.
 616  * Filter sum: p0*c0 + p1*c1 + p2*c2 + p3*c3
 617  * To avoid double precision arithmetic we implemented multiplication using
 618  * qrmul and addition using avgrnd. Coefficients( c0 to c3) formats are assumed
 619  * to be: Qm, Qn, Qo, Qm, where m = n + 2 and o = n + 1.
 620  * Typical usage of this filter is to quarter pixel interpolation of Bezier
 621  * surface with filter coefficients:[-9 111 29 -3]/128. For which coefficient
 622  * values should be: [-9216/2^17  28416/2^15  1484/2^16 -3072/2^17] for
 623  * ISP_VEC_ELEMBITS = 16.
 624 */
 625 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
 626 fir1x4m_bicubic_bezier_quarter(s_1w_1x4_matrix in,
 627                         s_1w_1x4_matrix coeff);
 628
 629
 630 /** @brief Symmetric 3 tap filter with normalization
 631  *
 632  *  @param[in] in 1x3 matrix with pixels
 633  *  @param[in] coeff 1x2 matrix with coefficients
 634  *  @param[in] out_shift output pixel shift value for normalization
 635  *
 636  *  @return symmetric 3 tap filter output
 637  *
 638  * This function performs symmetric 3 tap filter input pixels.
 639  * Filter sum is normalized by shifting out_shift bits.
 640  * Filter sum: p0*c1 + p1*c0 + p2*c1
 641  * is implemented as: (p0 + p2)*c1 + p1*c0 to reduce multiplication.
 642  * Input pixels should to be scaled, otherwise overflow is possible during
 643  * addition
 644 */
 645 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
 646 fir1x3m_sym_nrm(s_1w_1x3_matrix in,
 647                 s_1w_1x2_matrix coeff,
 648                 tvector1w out_shift);
 649
 650 /** @brief Symmetric 3 tap filter with normalization
 651  *
 652  *  @param[in] in 1x3 matrix with pixels
 653  *  @param[in] coeff 1x2 matrix with coefficients
 654  *
 655  *  @return symmetric 3 tap filter output
 656  *
 657  * This function performs symmetric 3 tap filter over input pixels.
 658  * Filter sum: p0*c1 + p1*c0 + p2*c1 = (p0 + p2)*c1 + p1*c0
 659  * Input pixels are in Qn and coefficient c0 is in Qm and c1 is in Qn format,
 660  * where n = ISP_VEC_ELEMBITS - 1 and m = ISP_VEC_ELEMBITS - 2 ( ie Q15 and Q14
 661  * respectively for Broxton)
 662  * To avoid double precision arithmetic input pixel sum is implemented using
 663  * avgrnd, coefficient multiplication using qrmul and final sum using addsat
 664  * Final sum is Qm format (ie Q14 for Broxton)
 665 */
 666 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
 667 fir1x3m_sym_nrm_approx(s_1w_1x3_matrix in,
 668                        s_1w_1x2_matrix coeff);
 669
 670 /** @brief Mean of 1x3 matrix
 671  *
 672  *  @param[in] m 1x3 matrix with pixels
 673  *
 674  *  @return mean of 1x3 matrix
 675  *
 676  * This function calculates the mean of 1x3 pixels,
 677  * with a factor of 4/3.
 678 */
 679 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean1x3m(
 680         s_1w_1x3_matrix m);
 681
 682 /** @brief Mean of 3x3 matrix
 683  *
 684  *  @param[in] m 3x3 matrix with pixels
 685  *
 686  *  @return mean of 3x3 matrix
 687  *
 688  * This function calculates the mean of 3x3 pixels,
 689  * with a factor of 16/9.
 690 */
 691 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean3x3m(
 692         s_1w_3x3_matrix m);
 693
 694 /** @brief Mean of 1x4 matrix
 695  *
 696  *  @param[in] m 1x4 matrix with pixels
 697  *
 698  *  @return mean of 1x4 matrix
 699  *
 700  * This function calculates the mean of 1x4 pixels
 701 */
 702 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean1x4m(
 703         s_1w_1x4_matrix m);
 704
 705 /** @brief Mean of 4x4 matrix
 706  *
 707  *  @param[in] m 4x4 matrix with pixels
 708  *
 709  *  @return mean of 4x4 matrix
 710  *
 711  * This function calculates the mean of 4x4 matrix with pixels
 712 */
 713 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean4x4m(
 714         s_1w_4x4_matrix m);
 715
 716 /** @brief Mean of 2x3 matrix
 717  *
 718  *  @param[in] m 2x3 matrix with pixels
 719  *
 720  *  @return mean of 2x3 matrix
 721  *
 722  * This function calculates the mean of 2x3 matrix with pixels
 723  * with a factor of 8/6.
 724 */
 725 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean2x3m(
 726         s_1w_2x3_matrix m);
 727
 728 /** @brief Mean of 1x5 matrix
 729  *
 730  *  @param[in] m 1x5 matrix with pixels
 731  *
 732  *  @return mean of 1x5 matrix
 733  *
 734  * This function calculates the mean of 1x5 matrix with pixels
 735  * with a factor of 8/5.
 736 */
 737 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean1x5m(s_1w_1x5_matrix m);
 738
 739 /** @brief Mean of 1x6 matrix
 740  *
 741  *  @param[in] m 1x6 matrix with pixels
 742  *
 743  *  @return mean of 1x6 matrix
 744  *
 745  * This function calculates the mean of 1x6 matrix with pixels
 746  * with a factor of 8/6.
 747 */
 748 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean1x6m(
 749         s_1w_1x6_matrix m);
 750
 751 /** @brief Mean of 5x5 matrix
 752  *
 753  *  @param[in] m 5x5 matrix with pixels
 754  *
 755  *  @return mean of 5x5 matrix
 756  *
 757  * This function calculates the mean of 5x5 matrix with pixels
 758  * with a factor of 32/25.
 759 */
 760 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean5x5m(
 761         s_1w_5x5_matrix m);
 762
 763 /** @brief Mean of 6x6 matrix
 764  *
 765  *  @param[in] m 6x6 matrix with pixels
 766  *
 767  *  @return mean of 6x6 matrix
 768  *
 769  * This function calculates the mean of 6x6 matrix with pixels
 770  * with a factor of 64/36.
 771 */
 772 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean6x6m(
 773         s_1w_6x6_matrix m);
 774
 775 /** @brief Minimum of 4x4 matrix
 776  *
 777  *  @param[in] m 4x4 matrix with pixels
 778  *
 779  *  @return minimum of 4x4 matrix
 780  *
 781  * This function calculates the  minimum of
 782  * 4x4 matrix with pixels.
 783 */
 784 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w min4x4m(
 785         s_1w_4x4_matrix m);
 786
 787 /** @brief Maximum of 4x4 matrix
 788  *
 789  *  @param[in] m 4x4 matrix with pixels
 790  *
 791  *  @return maximum of 4x4 matrix
 792  *
 793  * This function calculates the  maximum of
 794  * 4x4 matrix with pixels.
 795 */
 796 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w max4x4m(
 797         s_1w_4x4_matrix m);
 798
 799 /** @brief SAD between two 3x3 matrices
 800  *
 801  *  @param[in] a 3x3 matrix with pixels
 802  *
 803  *  @param[in] b 3x3 matrix with pixels
 804  *
 805  *  @return 3x3 matrix SAD
 806  *
 807  * This function calculates the sum of absolute difference between two matrices.
 808  * Both input pixels and SAD are normalized by a factor of SAD3x3_IN_SHIFT and
 809  * SAD3x3_OUT_SHIFT respectively.
 810  * Computed SAD is 1/(2 ^ (SAD3x3_IN_SHIFT + SAD3x3_OUT_SHIFT)) ie 1/16 factor
 811  * of original SAD and it's more precise than sad3x3m()
 812 */
 813 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w sad3x3m_precise(
 814         s_1w_3x3_matrix a,
 815         s_1w_3x3_matrix b);
 816
 817 /** @brief SAD between two 3x3 matrices
 818  *
 819  *  @param[in] a 3x3 matrix with pixels
 820  *
 821  *  @param[in] b 3x3 matrix with pixels
 822  *
 823  *  @return 3x3 matrix SAD
 824  *
 825  * This function calculates the sum of absolute difference between two matrices.
 826  * This version saves cycles by avoiding input normalization and wide vector
 827  * operation during sum computation
 828  * Input pixel differences are computed by absolute of rounded, halved
 829  * subtraction. Normalized sum is computed by rounded averages.
 830  * Computed SAD is (1/2)*(1/16) = 1/32 factor of original SAD. Factor 1/2 comes
 831  * from input halving operation and factor 1/16 comes from mean operation
 832 */
 833 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w sad3x3m(
 834         s_1w_3x3_matrix a,
 835         s_1w_3x3_matrix b);
 836
 837 /** @brief SAD between two 5x5 matrices
 838  *
 839  *  @param[in] a 5x5 matrix with pixels
 840  *
 841  *  @param[in] b 5x5 matrix with pixels
 842  *
 843  *  @return 5x5 matrix SAD
 844  *
 845  * Computed SAD is = 1/32 factor of original SAD.
 846 */
 847 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w sad5x5m(
 848         s_1w_5x5_matrix a,
 849         s_1w_5x5_matrix b);
 850
 851 /** @brief Absolute gradient between two sets of 1x5 matrices
 852  *
 853  *  @param[in] m0 first set of 1x5 matrix with pixels
 854  *  @param[in] m1 second set of 1x5 matrix with pixels
 855  *
 856  *  @return absolute gradient between two 1x5 matrices
 857  *
 858  * This function computes mean of two input 1x5 matrices and returns
 859  * absolute difference between two mean values.
 860  */
 861 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
 862 absgrad1x5m(s_1w_1x5_matrix m0, s_1w_1x5_matrix m1);
 863
 864 /** @brief Bi-linear Interpolation optimized(approximate)
 865  *
 866  * @param[in] a input0
 867  * @param[in] b input1
 868  * @param[in] c cloned weight factor
 869   *
 870  * @return              (a-b)*c + b
 871  *
 872  * This function will do bi-linear Interpolation on
 873  * inputs a and b using constant weight factor c
 874  *
 875  * Inputs a,b are assumed in S1.15 format
 876  * Weight factor has to be in range [0,1] and is assumed to be in S2.14 format
 877  *
 878  * The bilinear interpolation equation is (a*c) + b*(1-c),
 879  * But this is implemented as (a-b)*c + b for optimization
 880  */
 881 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_bilinear_interpol_approx_c(
 882         tvector1w a,
 883         tvector1w b,
 884         tscalar1w_weight c);
 885
 886 /** @brief Bi-linear Interpolation optimized(approximate)
 887  *
 888  * @param[in] a input0
 889  * @param[in] b input1
 890  * @param[in] c weight factor
 891   *
 892  * @return              (a-b)*c + b
 893  *
 894  * This function will do bi-linear Interpolation on
 895  * inputs a and b using weight factor c
 896  *
 897  * Inputs a,b are assumed in S1.15 format
 898  * Weight factor has to be in range [0,1] and is assumed to be in S2.14 format
 899  *
 900  * The bilinear interpolation equation is (a*c) + b*(1-c),
 901  * But this is implemented as (a-b)*c + b for optimization
 902  */
 903 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_bilinear_interpol_approx(
 904         tvector1w a,
 905         tvector1w b,
 906         tvector1w_weight c);
 907
 908 /** @brief Bi-linear Interpolation
 909  *
 910  * @param[in] a input0
 911  * @param[in] b input1
 912  * @param[in] c weight factor
 913   *
 914  * @return              (a*c) + b*(1-c)
 915  *
 916  * This function will do bi-linear Interpolation on
 917  * inputs a and b using weight factor c
 918  *
 919  * Inputs a,b are assumed in S1.15 format
 920  * Weight factor has to be in range [0,1] and is assumed to be in S2.14 format
 921  *
 922  * The bilinear interpolation equation is (a*c) + b*(1-c),
 923  */
 924 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_bilinear_interpol(
 925         tvector1w a,
 926         tvector1w b,
 927         tscalar1w_weight c);
 928
 929 /** @brief Generic Block Matching Algorithm
 930  * @param[in] search_window pointer to input search window of 16x16 pixels
 931  * @param[in] ref_block pointer to input reference block of 8x8 pixels, where N<=M
 932  * @param[in] output pointer to output sads
 933  * @param[in] search_sz search size for SAD computation
 934  * @param[in] ref_sz block size
 935  * @param[in] pixel_shift pixel shift to search the data
 936  * @param[in] search_block_sz search window block size
 937  * @param[in] shift shift value, with which the output is shifted right
 938  *
 939  * @return   0 when the computation is successful.
 940
 941  * * This function compares the reference block with a block of size NxN in the search
 942  * window. Sum of absolute differences for each pixel in the reference block and the
 943  * corresponding pixel in the search block. Whole search window os traversed with the
 944  * reference block with the given pixel shift.
 945  *
 946  */
 947 STORAGE_CLASS_REF_VECTOR_FUNC_H int generic_block_matching_algorithm(
 948         tscalar1w **search_window,
 949         tscalar1w **ref_block,
 950         tscalar1w *output,
 951         int search_sz,
 952         int ref_sz,
 953         int pixel_shift,
 954         int search_block_sz,
 955         tscalar1w_4bit_bma_shift shift);
 956
 957 #ifndef ISP2401
 958 /** @brief OP_1w_asp_bma_16_1_32way
 959 #else
 960 /** @brief OP_1w_asp_bma_16_1_32way_nomask
 961 #endif
 962  *
 963  * @param[in] search_area input search window of 16x16 pixels
 964  * @param[in] input_block input reference block of 8x8 pixels, where N<=M
 965  * @param[in] shift shift value, with which the output is shifted right
 966  *
 967  * @return   81 SADs for all the search blocks.
 968
 969  * This function compares the reference block with a block of size 8x8 pixels in the
 970  * search window of 16x16 pixels. Sum of absolute differences for each pixel in the
 971  * reference block and the corresponding pixel in the search block is calculated.
 972  * Whole search window is traversed with the reference block with the pixel shift of 1
 973  * pixels. The output is right shifted with the given shift value. The shift value is
 974  * a 4 bit value.
 975  *
 976  */
 977
 978 #ifndef ISP2401
 979 STORAGE_CLASS_REF_VECTOR_FUNC_H bma_output_16_1 OP_1w_asp_bma_16_1_32way(
 980 #else
 981 STORAGE_CLASS_REF_VECTOR_FUNC_H bma_output_16_1 OP_1w_asp_bma_16_1_32way_nomask(
 982 #endif
 983         bma_16x16_search_window search_area,
 984         ref_block_8x8 input_block,
 985         tscalar1w_4bit_bma_shift shift);
 986
 987 #ifndef ISP2401
 988 /** @brief OP_1w_asp_bma_16_2_32way
 989 #else
 990 /** @brief OP_1w_asp_bma_16_2_32way_nomask
 991 #endif
 992  *
 993  * @param[in] search_area input search window of 16x16 pixels
 994  * @param[in] input_block input reference block of 8x8 pixels, where N<=M
 995  * @param[in] shift shift value, with which the output is shifted right
 996  *
 997  * @return   25 SADs for all the search blocks.
 998  * This function compares the reference block with a block of size 8x8 in the search
 999  * window of 16x61. Sum of absolute differences for each pixel in the reference block
1000  * and the corresponding pixel in the search block is computed. Whole search window is
1001  * traversed with the reference block with the given pixel shift of 2 pixels. The output
1002  * is right shifted with the given shift value. The shift value is a 4 bit value.
1003  *
1004  */
1005
1006 #ifndef ISP2401
1007 STORAGE_CLASS_REF_VECTOR_FUNC_H bma_output_16_2 OP_1w_asp_bma_16_2_32way(
1008 #else
1009 STORAGE_CLASS_REF_VECTOR_FUNC_H bma_output_16_2 OP_1w_asp_bma_16_2_32way_nomask(
1010 #endif
1011         bma_16x16_search_window search_area,
1012         ref_block_8x8 input_block,
1013         tscalar1w_4bit_bma_shift shift);
1014 #ifndef ISP2401
1015 /** @brief OP_1w_asp_bma_14_1_32way
1016 #else
1017 /** @brief OP_1w_asp_bma_14_1_32way_nomask
1018 #endif
1019  *
1020  * @param[in] search_area input search block of 16x16 pixels with search window of 14x14 pixels
1021  * @param[in] input_block input reference block of 8x8 pixels, where N<=M
1022  * @param[in] shift shift value, with which the output is shifted right
1023  *
1024  * @return   49 SADs for all the search blocks.
1025  * This function compares the reference block with a block of size 8x8 in the search
1026  * window of 14x14. Sum of absolute differences for each pixel in the reference block
1027  * and the corresponding pixel in the search block. Whole search window is traversed
1028  * with the reference block with 2 pixel shift. The output is right shifted with the
1029  * given shift value. The shift value is a 4 bit value. Input is always a 16x16 block
1030  * but the search window is 14x14, with last 2 pixels of row and column are not used
1031  * for computation.
1032  *
1033  */
1034
1035 #ifndef ISP2401
1036 STORAGE_CLASS_REF_VECTOR_FUNC_H bma_output_14_1 OP_1w_asp_bma_14_1_32way(
1037 #else
1038 STORAGE_CLASS_REF_VECTOR_FUNC_H bma_output_14_1 OP_1w_asp_bma_14_1_32way_nomask(
1039 #endif
1040         bma_16x16_search_window search_area,
1041         ref_block_8x8 input_block,
1042         tscalar1w_4bit_bma_shift shift);
1043
1044 #ifndef ISP2401
1045 /** @brief OP_1w_asp_bma_14_2_32way
1046 #else
1047 /** @brief OP_1w_asp_bma_14_2_32way_nomask
1048 #endif
1049  *
1050  * @param[in] search_area input search block of 16x16 pixels with search window of 14x14 pixels
1051  * @param[in] input_block input reference block of 8x8 pixels, where N<=M
1052  * @param[in] shift shift value, with which the output is shifted right
1053  *
1054  * @return   16 SADs for all the search blocks.
1055  * This function compares the reference block with a block of size 8x8 in the search
1056  * window of 14x14. Sum of absolute differences for each pixel in the reference block
1057  * and the corresponding pixel in the search block. Whole search window is traversed
1058  * with the reference block with 2 pixels shift. The output is right shifted with the
1059  * given shift value. The shift value is a 4 bit value.
1060  *
1061  */
1062
1063 #ifndef ISP2401
1064 STORAGE_CLASS_REF_VECTOR_FUNC_H bma_output_14_2 OP_1w_asp_bma_14_2_32way(
1065 #else
1066 STORAGE_CLASS_REF_VECTOR_FUNC_H bma_output_14_2 OP_1w_asp_bma_14_2_32way_nomask(
1067 #endif
1068         bma_16x16_search_window search_area,
1069         ref_block_8x8 input_block,
1070         tscalar1w_4bit_bma_shift shift);
1071
1072 #ifdef ISP2401
1073 /** @brief multiplex addition and passing
1074  *
1075  *  @param[in] _a first pixel
1076  *  @param[in] _b second pixel
1077  *  @param[in] _c condition flag
1078  *
1079  *  @return (_a + _b) if condition flag is true
1080  *          _a if condition flag is false
1081  *
1082  * This function does multiplex addition depending on the input condition flag
1083  */
1084 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_cond_add(
1085         tvector1w _a,
1086         tvector1w _b,
1087         tflags _c);
1088
1089 #endif
1090 #ifdef HAS_bfa_unit
1091 /** @brief OP_1w_single_bfa_7x7
1092  *
1093  * @param[in] weights - spatial and range weight lut
1094  * @param[in] threshold - threshold plane, for range weight scaling
1095  * @param[in] central_pix - central pixel plane
1096  * @param[in] src_plane - src pixel plane
1097  *
1098  * @return   Bilateral filter output
1099  *
1100  * This function implements, 7x7 single bilateral filter.
1101  * Output = {sum(pixel * weight), sum(weight)}
1102  * Where sum is summation over 7x7 block set.
1103  * weight = spatial weight * range weight
1104  * spatial weights are loaded from spatial_weight_lut depending on src pixel
1105  * position in the 7x7 block
1106  * range weights are computed by table look up from range_weight_lut depending
1107  * on scaled absolute difference between src and central pixels.
1108  * threshold is used as scaling factor. range_weight_lut consists of
1109  * BFA_RW_LUT_SIZE numbers of LUT entries to model any distribution function.
1110  * Piecewise linear approximation technique is used to compute range weight
1111  * It computes absolute difference between central pixel and 61 src pixels.
1112  */
1113 STORAGE_CLASS_REF_VECTOR_FUNC_H bfa_7x7_output OP_1w_single_bfa_7x7(
1114         bfa_weights weights,
1115         tvector1w threshold,
1116         tvector1w central_pix,
1117         s_1w_7x7_matrix src_plane);
1118
1119 /** @brief OP_1w_joint_bfa_7x7
1120  *
1121  * @param[in] weights - spatial and range weight lut
1122  * @param[in] threshold0 - 1st threshold plane, for range weight scaling
1123  * @param[in] central_pix0 - 1st central pixel plane
1124  * @param[in] src0_plane - 1st pixel plane
1125  * @param[in] threshold1 - 2nd threshold plane, for range weight scaling
1126  * @param[in] central_pix1 - 2nd central pixel plane
1127  * @param[in] src1_plane - 2nd pixel plane
1128  *
1129  * @return   Joint bilateral filter output
1130  *
1131  * This function implements, 7x7 joint bilateral filter.
1132  * Output = {sum(pixel * weight), sum(weight)}
1133  * Where sum is summation over 7x7 block set.
1134  * weight = spatial weight * range weight
1135  * spatial weights are loaded from spatial_weight_lut depending on src pixel
1136  * position in the 7x7 block
1137  * range weights are computed by table look up from range_weight_lut depending
1138  * on sum of scaled absolute difference between central pixel and two src pixel
1139  * planes. threshold is used as scaling factor. range_weight_lut consists of
1140  * BFA_RW_LUT_SIZE numbers of LUT entries to model any distribution function.
1141  * Piecewise linear approximation technique is used to compute range weight
1142  * It computes absolute difference between central pixel and 61 src pixels.
1143  */
1144 STORAGE_CLASS_REF_VECTOR_FUNC_H bfa_7x7_output OP_1w_joint_bfa_7x7(
1145         bfa_weights weights,
1146         tvector1w threshold0,
1147         tvector1w central_pix0,
1148         s_1w_7x7_matrix src0_plane,
1149         tvector1w threshold1,
1150         tvector1w central_pix1,
1151         s_1w_7x7_matrix src1_plane);
1152
1153 /** @brief bbb_bfa_gen_spatial_weight_lut
1154  *
1155  * @param[in] in - 7x7 matrix of spatial weights
1156  * @param[in] out - generated LUT
1157  *
1158  * @return   None
1159  *
1160  * This function implements, creates spatial weight look up table used
1161  * for bilaterl filter instruction.
1162  */
1163 STORAGE_CLASS_REF_VECTOR_FUNC_H void bbb_bfa_gen_spatial_weight_lut(
1164         s_1w_7x7_matrix in,
1165         tvector1w out[BFA_MAX_KWAY]);
1166
1167 /** @brief bbb_bfa_gen_range_weight_lut
1168  *
1169  * @param[in] in - input range weight,
1170  * @param[in] out - generated LUT
1171  *
1172  * @return   None
1173  *
1174  * This function implements, creates range weight look up table used
1175  * for bilaterl filter instruction.
1176  * 8 unsigned 7b weights are represented in 7 16bits LUT
1177  * LUT formation is done as follows:
1178  * higher 8 bit: Point(N) = Point(N+1) - Point(N)
1179  * lower 8 bit: Point(N) = Point(N)
1180  * Weight function can be any monotonic decreasing function for x >= 0
1181  */
1182 STORAGE_CLASS_REF_VECTOR_FUNC_H void bbb_bfa_gen_range_weight_lut(
1183         tvector1w in[BFA_RW_LUT_SIZE+1],
1184         tvector1w out[BFA_RW_LUT_SIZE]);
1185 #endif
1186
1187 #ifdef ISP2401
1188 /** @brief OP_1w_imax32
1189  *
1190  * @param[in] src - structure that holds an array of 32 elements.
1191  *
1192  * @return  maximum element among input array.
1193  *
1194  *This function gets maximum element from an array of 32 elements.
1195  */
1196 STORAGE_CLASS_REF_VECTOR_FUNC_H int OP_1w_imax32(
1197         imax32_ref_in_vector src);
1198
1199 /** @brief OP_1w_imaxidx32
1200  *
1201  * @param[in] src - structure that holds a vector of elements.
1202  *
1203  * @return  index of first element with maximum value among array.
1204  *
1205  * This function gets index of first element with maximum value
1206  * from 32 elements.
1207  */
1208 STORAGE_CLASS_REF_VECTOR_FUNC_H int OP_1w_imaxidx32(
1209         imax32_ref_in_vector src);
1210
1211 #endif
1212 #ifndef INLINE_VECTOR_FUNC
1213 #define STORAGE_CLASS_REF_VECTOR_FUNC_C
1214 #define STORAGE_CLASS_REF_VECTOR_DATA_C const
1215 #else /* INLINE_VECTOR_FUNC */
1216 #define STORAGE_CLASS_REF_VECTOR_FUNC_C STORAGE_CLASS_REF_VECTOR_FUNC_H
1217 #define STORAGE_CLASS_REF_VECTOR_DATA_C STORAGE_CLASS_REF_VECTOR_DATA_H
1218 #include "ref_vector_func.c"
1219 #define VECTOR_FUNC_INLINED
1220 #endif  /* INLINE_VECTOR_FUNC */
1221
1222 #endif /*_REF_VECTOR_FUNC_H_INCLUDED_*/