LCOV - code coverage report
Current view: top level - libavcodec/x86 - hevcdsp_init.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 320 599 53.4 %
Date: 2017-12-17 23:02:56 Functions: 34 860 4.0 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2013 Seppo Tomperi
       3             :  * Copyright (c) 2013 - 2014 Pierre-Edouard Lepere
       4             :  *
       5             :  * This file is part of FFmpeg.
       6             :  *
       7             :  * FFmpeg is free software; you can redistribute it and/or
       8             :  * modify it under the terms of the GNU Lesser General Public
       9             :  * License as published by the Free Software Foundation; either
      10             :  * version 2.1 of the License, or (at your option) any later version.
      11             :  *
      12             :  * FFmpeg is distributed in the hope that it will be useful,
      13             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      14             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      15             :  * Lesser General Public License for more details.
      16             :  *
      17             :  * You should have received a copy of the GNU Lesser General Public
      18             :  * License along with FFmpeg; if not, write to the Free Software
      19             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      20             :  */
      21             : 
      22             : #include "config.h"
      23             : 
      24             : #include "libavutil/cpu.h"
      25             : #include "libavutil/x86/asm.h"
      26             : #include "libavutil/x86/cpu.h"
      27             : #include "libavcodec/get_bits.h" /* required for hevcdsp.h GetBitContext */
      28             : #include "libavcodec/hevcdsp.h"
      29             : #include "libavcodec/x86/hevcdsp.h"
      30             : 
      31             : #define LFC_FUNC(DIR, DEPTH, OPT) \
      32             : void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, int *tc, uint8_t *no_p, uint8_t *no_q);
      33             : 
      34             : #define LFL_FUNC(DIR, DEPTH, OPT) \
      35             : void ff_hevc_ ## DIR ## _loop_filter_luma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, int beta, int *tc, uint8_t *no_p, uint8_t *no_q);
      36             : 
      37             : #define LFC_FUNCS(type, depth, opt) \
      38             :     LFC_FUNC(h, depth, opt)  \
      39             :     LFC_FUNC(v, depth, opt)
      40             : 
      41             : #define LFL_FUNCS(type, depth, opt) \
      42             :     LFL_FUNC(h, depth, opt)  \
      43             :     LFL_FUNC(v, depth, opt)
      44             : 
      45             : LFC_FUNCS(uint8_t,   8, sse2)
      46             : LFC_FUNCS(uint8_t,  10, sse2)
      47             : LFC_FUNCS(uint8_t,  12, sse2)
      48             : LFC_FUNCS(uint8_t,   8, avx)
      49             : LFC_FUNCS(uint8_t,  10, avx)
      50             : LFC_FUNCS(uint8_t,  12, avx)
      51             : LFL_FUNCS(uint8_t,   8, sse2)
      52             : LFL_FUNCS(uint8_t,  10, sse2)
      53             : LFL_FUNCS(uint8_t,  12, sse2)
      54             : LFL_FUNCS(uint8_t,   8, ssse3)
      55             : LFL_FUNCS(uint8_t,  10, ssse3)
      56             : LFL_FUNCS(uint8_t,  12, ssse3)
      57             : LFL_FUNCS(uint8_t,   8, avx)
      58             : LFL_FUNCS(uint8_t,  10, avx)
      59             : LFL_FUNCS(uint8_t,  12, avx)
      60             : 
      61             : #define IDCT_DC_FUNCS(W, opt) \
      62             : void ff_hevc_idct_ ## W ## _dc_8_ ## opt(int16_t *coeffs); \
      63             : void ff_hevc_idct_ ## W ## _dc_10_ ## opt(int16_t *coeffs); \
      64             : void ff_hevc_idct_ ## W ## _dc_12_ ## opt(int16_t *coeffs)
      65             : 
      66             : IDCT_DC_FUNCS(4x4,   mmxext);
      67             : IDCT_DC_FUNCS(8x8,   mmxext);
      68             : IDCT_DC_FUNCS(8x8,   sse2);
      69             : IDCT_DC_FUNCS(16x16, sse2);
      70             : IDCT_DC_FUNCS(32x32, sse2);
      71             : IDCT_DC_FUNCS(16x16, avx2);
      72             : IDCT_DC_FUNCS(32x32, avx2);
      73             : 
      74             : #define IDCT_FUNCS(opt)                                             \
      75             : void ff_hevc_idct_4x4_8_    ## opt(int16_t *coeffs, int col_limit); \
      76             : void ff_hevc_idct_4x4_10_   ## opt(int16_t *coeffs, int col_limit); \
      77             : void ff_hevc_idct_8x8_8_    ## opt(int16_t *coeffs, int col_limit); \
      78             : void ff_hevc_idct_8x8_10_   ## opt(int16_t *coeffs, int col_limit); \
      79             : void ff_hevc_idct_16x16_8_  ## opt(int16_t *coeffs, int col_limit); \
      80             : void ff_hevc_idct_16x16_10_ ## opt(int16_t *coeffs, int col_limit); \
      81             : void ff_hevc_idct_32x32_8_  ## opt(int16_t *coeffs, int col_limit); \
      82             : void ff_hevc_idct_32x32_10_ ## opt(int16_t *coeffs, int col_limit);
      83             : 
      84             : IDCT_FUNCS(sse2)
      85             : IDCT_FUNCS(avx)
      86             : 
      87             : #define mc_rep_func(name, bitd, step, W, opt) \
      88             : void ff_hevc_put_hevc_##name##W##_##bitd##_##opt(int16_t *_dst,                                                 \
      89             :                                                 uint8_t *_src, ptrdiff_t _srcstride, int height,                \
      90             :                                                 intptr_t mx, intptr_t my, int width)                            \
      91             : {                                                                                                               \
      92             :     int i;                                                                                                      \
      93             :     uint8_t *src;                                                                                               \
      94             :     int16_t *dst;                                                                                               \
      95             :     for (i = 0; i < W; i += step) {                                                                             \
      96             :         src  = _src + (i * ((bitd + 7) / 8));                                                                   \
      97             :         dst = _dst + i;                                                                                         \
      98             :         ff_hevc_put_hevc_##name##step##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width);            \
      99             :     }                                                                                                           \
     100             : }
     101             : #define mc_rep_uni_func(name, bitd, step, W, opt) \
     102             : void ff_hevc_put_hevc_uni_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride,                        \
     103             :                                                     uint8_t *_src, ptrdiff_t _srcstride, int height,            \
     104             :                                                     intptr_t mx, intptr_t my, int width)                        \
     105             : {                                                                                                               \
     106             :     int i;                                                                                                      \
     107             :     uint8_t *src;                                                                                               \
     108             :     uint8_t *dst;                                                                                               \
     109             :     for (i = 0; i < W; i += step) {                                                                             \
     110             :         src = _src + (i * ((bitd + 7) / 8));                                                                    \
     111             :         dst = _dst + (i * ((bitd + 7) / 8));                                                                    \
     112             :         ff_hevc_put_hevc_uni_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride,                     \
     113             :                                                           height, mx, my, width);                               \
     114             :     }                                                                                                           \
     115             : }
     116             : #define mc_rep_bi_func(name, bitd, step, W, opt) \
     117             : void ff_hevc_put_hevc_bi_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, uint8_t *_src,          \
     118             :                                                    ptrdiff_t _srcstride, int16_t* _src2,                        \
     119             :                                                    int height, intptr_t mx, intptr_t my, int width)             \
     120             : {                                                                                                               \
     121             :     int i;                                                                                                      \
     122             :     uint8_t  *src;                                                                                              \
     123             :     uint8_t  *dst;                                                                                              \
     124             :     int16_t  *src2;                                                                                             \
     125             :     for (i = 0; i < W ; i += step) {                                                                            \
     126             :         src  = _src + (i * ((bitd + 7) / 8));                                                                   \
     127             :         dst  = _dst + (i * ((bitd + 7) / 8));                                                                   \
     128             :         src2 = _src2 + i;                                                                                       \
     129             :         ff_hevc_put_hevc_bi_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, src2,                \
     130             :                                                           height, mx, my, width);                               \
     131             :     }                                                                                                           \
     132             : }
     133             : 
     134             : #define mc_rep_funcs(name, bitd, step, W, opt)        \
     135             :     mc_rep_func(name, bitd, step, W, opt)            \
     136             :     mc_rep_uni_func(name, bitd, step, W, opt)        \
     137             :     mc_rep_bi_func(name, bitd, step, W, opt)
     138             : 
     139             : #define mc_rep_func2(name, bitd, step1, step2, W, opt) \
     140             : void ff_hevc_put_hevc_##name##W##_##bitd##_##opt(int16_t *dst,                                                  \
     141             :                                                  uint8_t *src, ptrdiff_t _srcstride, int height,                \
     142             :                                                  intptr_t mx, intptr_t my, int width)                           \
     143             : {                                                                                                               \
     144             :     ff_hevc_put_hevc_##name##step1##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width);               \
     145             :     ff_hevc_put_hevc_##name##step2##_##bitd##_##opt(dst + step1, src + (step1 * ((bitd + 7) / 8)),              \
     146             :                                                     _srcstride, height, mx, my, width);                         \
     147             : }
     148             : #define mc_rep_uni_func2(name, bitd, step1, step2, W, opt) \
     149             : void ff_hevc_put_hevc_uni_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride,                         \
     150             :                                                      uint8_t *src, ptrdiff_t _srcstride, int height,            \
     151             :                                                      intptr_t mx, intptr_t my, int width)                       \
     152             : {                                                                                                               \
     153             :     ff_hevc_put_hevc_uni_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, height, mx, my, width);\
     154             :     ff_hevc_put_hevc_uni_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride,            \
     155             :                                                         src + (step1 * ((bitd + 7) / 8)), _srcstride,           \
     156             :                                                         height, mx, my, width);                                 \
     157             : }
     158             : #define mc_rep_bi_func2(name, bitd, step1, step2, W, opt) \
     159             : void ff_hevc_put_hevc_bi_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,            \
     160             :                                                     ptrdiff_t _srcstride, int16_t* src2,                        \
     161             :                                                     int height, intptr_t mx, intptr_t my, int width)            \
     162             : {                                                                                                               \
     163             :     ff_hevc_put_hevc_bi_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, height, mx, my, width);\
     164             :     ff_hevc_put_hevc_bi_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride,             \
     165             :                                                        src + (step1 * ((bitd + 7) / 8)), _srcstride,            \
     166             :                                                        src2 + step1, height, mx, my, width);                    \
     167             : }
     168             : 
     169             : #define mc_rep_funcs2(name, bitd, step1, step2, W, opt) \
     170             :     mc_rep_func2(name, bitd, step1, step2, W, opt)      \
     171             :     mc_rep_uni_func2(name, bitd, step1, step2, W, opt)  \
     172             :     mc_rep_bi_func2(name, bitd, step1, step2, W, opt)
     173             : 
     174             : #if ARCH_X86_64 && HAVE_SSE4_EXTERNAL
     175             : 
     176             : #define mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4)                                       \
     177             : void ff_hevc_put_hevc_##name##width1##_10_##opt1(int16_t *dst, uint8_t *src, ptrdiff_t _srcstride,            \
     178             :                                                  int height, intptr_t mx, intptr_t my, int width)             \
     179             :                                                                                                               \
     180             : {                                                                                                             \
     181             :     ff_hevc_put_hevc_##name##width2##_10_##opt1(dst, src, _srcstride, height, mx, my, width);                 \
     182             :     ff_hevc_put_hevc_##name##width3##_10_##opt2(dst+ width2, src+ width4, _srcstride, height, mx, my, width); \
     183             : }
     184             : 
     185             : #define mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4)                                    \
     186             : void ff_hevc_put_hevc_bi_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,          \
     187             :                                                     ptrdiff_t _srcstride, int16_t *src2,                      \
     188             :                                                     int height, intptr_t mx, intptr_t my, int width)          \
     189             : {                                                                                                             \
     190             :     ff_hevc_put_hevc_bi_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, src2,                     \
     191             :                                                    height, mx, my, width);                                    \
     192             :     ff_hevc_put_hevc_bi_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, src2+width2,\
     193             :                                                    height, mx, my, width);                                    \
     194             : }
     195             : 
     196             : #define mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4)                                   \
     197             : void ff_hevc_put_hevc_uni_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride,                       \
     198             :                                                      uint8_t *src, ptrdiff_t _srcstride, int height,          \
     199             :                                                      intptr_t mx, intptr_t my, int width)                     \
     200             : {                                                                                                             \
     201             :     ff_hevc_put_hevc_uni_##name##width2##_10_##opt1(dst, dststride, src, _srcstride,                          \
     202             :                                                       height, mx, my, width);                                 \
     203             :     ff_hevc_put_hevc_uni_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride,            \
     204             :                                                       height, mx, my, width);                                 \
     205             : }
     206             : 
     207             : #define mc_rep_mixs_10(name, width1, width2, width3, opt1, opt2, width4)   \
     208             : mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4)            \
     209             : mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4)         \
     210             : mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4)
     211             : 
     212             : #define mc_rep_mix_8(name, width1, width2, width3, opt1, opt2)                                                \
     213             : void ff_hevc_put_hevc_##name##width1##_8_##opt1(int16_t *dst, uint8_t *src, ptrdiff_t _srcstride,             \
     214             :                                                 int height, intptr_t mx, intptr_t my, int width)              \
     215             :                                                                                                               \
     216             : {                                                                                                             \
     217             :     ff_hevc_put_hevc_##name##width2##_8_##opt1(dst, src, _srcstride, height, mx, my, width);                  \
     218             :     ff_hevc_put_hevc_##name##width3##_8_##opt2(dst+ width2, src+ width2, _srcstride, height, mx, my, width);  \
     219             : }
     220             : 
     221             : #define mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2)                                             \
     222             : void ff_hevc_put_hevc_bi_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,           \
     223             :                                                    ptrdiff_t _srcstride, int16_t* src2,                       \
     224             :                                                    int height, intptr_t mx, intptr_t my, int width)           \
     225             : {                                                                                                             \
     226             :     ff_hevc_put_hevc_bi_##name##width2##_8_##opt1(dst, dststride, src, _srcstride,                            \
     227             :                                                   src2, height, mx, my, width);                               \
     228             :     ff_hevc_put_hevc_bi_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride,              \
     229             :                                                   src2+width2, height, mx, my, width);                        \
     230             : }
     231             : 
     232             : #define mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2)                                            \
     233             : void ff_hevc_put_hevc_uni_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride,                        \
     234             :                                                     uint8_t *src, ptrdiff_t _srcstride, int height,           \
     235             :                                                     intptr_t mx, intptr_t my, int width)                      \
     236             : {                                                                                                             \
     237             :     ff_hevc_put_hevc_uni_##name##width2##_8_##opt1(dst, dststride, src, _srcstride,                           \
     238             :                                                    height, mx, my, width);                                    \
     239             :     ff_hevc_put_hevc_uni_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride,             \
     240             :                                                    height, mx, my, width);                                    \
     241             : }
     242             : 
     243             : #define mc_rep_mixs_8(name, width1, width2, width3, opt1, opt2)   \
     244             : mc_rep_mix_8(name, width1, width2, width3, opt1, opt2)            \
     245             : mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2)         \
     246             : mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2)
     247             : 
     248             : #if HAVE_AVX2_EXTERNAL
     249             : 
     250           0 : mc_rep_mixs_8(pel_pixels, 48, 32, 16, avx2, sse4)
     251           0 : mc_rep_mixs_8(epel_hv,    48, 32, 16, avx2, sse4)
     252           0 : mc_rep_mixs_8(epel_h ,    48, 32, 16, avx2, sse4)
     253           0 : mc_rep_mixs_8(epel_v ,    48, 32, 16, avx2, sse4)
     254             : 
     255           0 : mc_rep_mix_10(pel_pixels, 24, 16, 8, avx2, sse4, 32)
     256           0 : mc_bi_rep_mix_10(pel_pixels,24, 16, 8, avx2, sse4, 32)
     257           0 : mc_rep_mixs_10(epel_hv,   24, 16, 8, avx2, sse4, 32)
     258           0 : mc_rep_mixs_10(epel_h ,   24, 16, 8, avx2, sse4, 32)
     259           0 : mc_rep_mixs_10(epel_v ,   24, 16, 8, avx2, sse4, 32)
     260             : 
     261             : 
     262           0 : mc_rep_mixs_10(qpel_h ,   24, 16, 8, avx2, sse4, 32)
     263           0 : mc_rep_mixs_10(qpel_v ,   24, 16, 8, avx2, sse4, 32)
     264           0 : mc_rep_mixs_10(qpel_hv,   24, 16, 8, avx2, sse4, 32)
     265             : 
     266             : 
     267          77 : mc_rep_uni_func(pel_pixels, 8, 64, 128, avx2)//used for 10bit
     268           0 : mc_rep_uni_func(pel_pixels, 8, 32, 96, avx2) //used for 10bit
     269             : 
     270         300 : mc_rep_funcs(pel_pixels, 8, 32, 64, avx2)
     271             : 
     272          28 : mc_rep_func(pel_pixels, 10, 16, 32, avx2)
     273           0 : mc_rep_func(pel_pixels, 10, 16, 48, avx2)
     274           7 : mc_rep_func(pel_pixels, 10, 32, 64, avx2)
     275             : 
     276          85 : mc_rep_bi_func(pel_pixels, 10, 16, 32, avx2)
     277           0 : mc_rep_bi_func(pel_pixels, 10, 16, 48, avx2)
     278          20 : mc_rep_bi_func(pel_pixels, 10, 32, 64, avx2)
     279             : 
     280           0 : mc_rep_funcs(epel_h, 8, 32, 64, avx2)
     281             : 
     282           0 : mc_rep_funcs(epel_v, 8, 32, 64, avx2)
     283             : 
     284         488 : mc_rep_funcs(epel_h, 10, 16, 32, avx2)
     285           0 : mc_rep_funcs(epel_h, 10, 16, 48, avx2)
     286           0 : mc_rep_funcs(epel_h, 10, 32, 64, avx2)
     287             : 
     288          60 : mc_rep_funcs(epel_v, 10, 16, 32, avx2)
     289           0 : mc_rep_funcs(epel_v, 10, 16, 48, avx2)
     290           0 : mc_rep_funcs(epel_v, 10, 32, 64, avx2)
     291             : 
     292             : 
     293           0 : mc_rep_funcs(epel_hv,  8, 32, 64, avx2)
     294             : 
     295         754 : mc_rep_funcs(epel_hv, 10, 16, 32, avx2)
     296           0 : mc_rep_funcs(epel_hv, 10, 16, 48, avx2)
     297           0 : mc_rep_funcs(epel_hv, 10, 32, 64, avx2)
     298             : 
     299           0 : mc_rep_funcs(qpel_h, 8, 32, 64, avx2)
     300           0 : mc_rep_mixs_8(qpel_h ,  48, 32, 16, avx2, sse4)
     301             : 
     302           0 : mc_rep_funcs(qpel_v, 8, 32, 64, avx2)
     303           0 : mc_rep_mixs_8(qpel_v,  48, 32, 16, avx2, sse4)
     304             : 
     305         545 : mc_rep_funcs(qpel_h, 10, 16, 32, avx2)
     306           0 : mc_rep_funcs(qpel_h, 10, 16, 48, avx2)
     307         179 : mc_rep_funcs(qpel_h, 10, 32, 64, avx2)
     308             : 
     309         294 : mc_rep_funcs(qpel_v, 10, 16, 32, avx2)
     310           0 : mc_rep_funcs(qpel_v, 10, 16, 48, avx2)
     311          94 : mc_rep_funcs(qpel_v, 10, 32, 64, avx2)
     312             : 
     313        1040 : mc_rep_funcs(qpel_hv, 10, 16, 32, avx2)
     314           0 : mc_rep_funcs(qpel_hv, 10, 16, 48, avx2)
     315         313 : mc_rep_funcs(qpel_hv, 10, 32, 64, avx2)
     316             : 
     317             : #endif //AVX2
     318             : 
     319           0 : mc_rep_funcs(pel_pixels, 8, 16, 64, sse4)
     320           0 : mc_rep_funcs(pel_pixels, 8, 16, 48, sse4)
     321           0 : mc_rep_funcs(pel_pixels, 8, 16, 32, sse4)
     322           0 : mc_rep_funcs(pel_pixels, 8,  8, 24, sse4)
     323           0 : mc_rep_funcs(pel_pixels,10,  8, 64, sse4)
     324           0 : mc_rep_funcs(pel_pixels,10,  8, 48, sse4)
     325           0 : mc_rep_funcs(pel_pixels,10,  8, 32, sse4)
     326           0 : mc_rep_funcs(pel_pixels,10,  8, 24, sse4)
     327           0 : mc_rep_funcs(pel_pixels,10,  8, 16, sse4)
     328           0 : mc_rep_funcs(pel_pixels,10,  4, 12, sse4)
     329           0 : mc_rep_funcs(pel_pixels,12,  8, 64, sse4)
     330           0 : mc_rep_funcs(pel_pixels,12,  8, 48, sse4)
     331           0 : mc_rep_funcs(pel_pixels,12,  8, 32, sse4)
     332           0 : mc_rep_funcs(pel_pixels,12,  8, 24, sse4)
     333           0 : mc_rep_funcs(pel_pixels,12,  8, 16, sse4)
     334           0 : mc_rep_funcs(pel_pixels,12,  4, 12, sse4)
     335             : 
     336           0 : mc_rep_funcs(epel_h, 8, 16, 64, sse4)
     337           0 : mc_rep_funcs(epel_h, 8, 16, 48, sse4)
     338           0 : mc_rep_funcs(epel_h, 8, 16, 32, sse4)
     339           0 : mc_rep_funcs(epel_h, 8,  8, 24, sse4)
     340           0 : mc_rep_funcs(epel_h,10,  8, 64, sse4)
     341           0 : mc_rep_funcs(epel_h,10,  8, 48, sse4)
     342           0 : mc_rep_funcs(epel_h,10,  8, 32, sse4)
     343           0 : mc_rep_funcs(epel_h,10,  8, 24, sse4)
     344           0 : mc_rep_funcs(epel_h,10,  8, 16, sse4)
     345           0 : mc_rep_funcs(epel_h,10,  4, 12, sse4)
     346           0 : mc_rep_funcs(epel_h,12,  8, 64, sse4)
     347           0 : mc_rep_funcs(epel_h,12,  8, 48, sse4)
     348           0 : mc_rep_funcs(epel_h,12,  8, 32, sse4)
     349           0 : mc_rep_funcs(epel_h,12,  8, 24, sse4)
     350           0 : mc_rep_funcs(epel_h,12,  8, 16, sse4)
     351           0 : mc_rep_funcs(epel_h,12,  4, 12, sse4)
     352           0 : mc_rep_funcs(epel_v, 8, 16, 64, sse4)
     353           0 : mc_rep_funcs(epel_v, 8, 16, 48, sse4)
     354           0 : mc_rep_funcs(epel_v, 8, 16, 32, sse4)
     355           0 : mc_rep_funcs(epel_v, 8,  8, 24, sse4)
     356           0 : mc_rep_funcs(epel_v,10,  8, 64, sse4)
     357           0 : mc_rep_funcs(epel_v,10,  8, 48, sse4)
     358           0 : mc_rep_funcs(epel_v,10,  8, 32, sse4)
     359           0 : mc_rep_funcs(epel_v,10,  8, 24, sse4)
     360           0 : mc_rep_funcs(epel_v,10,  8, 16, sse4)
     361           0 : mc_rep_funcs(epel_v,10,  4, 12, sse4)
     362           0 : mc_rep_funcs(epel_v,12,  8, 64, sse4)
     363           0 : mc_rep_funcs(epel_v,12,  8, 48, sse4)
     364           0 : mc_rep_funcs(epel_v,12,  8, 32, sse4)
     365           0 : mc_rep_funcs(epel_v,12,  8, 24, sse4)
     366           0 : mc_rep_funcs(epel_v,12,  8, 16, sse4)
     367           0 : mc_rep_funcs(epel_v,12,  4, 12, sse4)
     368           0 : mc_rep_funcs(epel_hv, 8, 16, 64, sse4)
     369           0 : mc_rep_funcs(epel_hv, 8, 16, 48, sse4)
     370           0 : mc_rep_funcs(epel_hv, 8, 16, 32, sse4)
     371           0 : mc_rep_funcs(epel_hv, 8,  8, 24, sse4)
     372           0 : mc_rep_funcs2(epel_hv,8,  8,  4, 12, sse4)
     373           0 : mc_rep_funcs(epel_hv,10,  8, 64, sse4)
     374           0 : mc_rep_funcs(epel_hv,10,  8, 48, sse4)
     375           0 : mc_rep_funcs(epel_hv,10,  8, 32, sse4)
     376           0 : mc_rep_funcs(epel_hv,10,  8, 24, sse4)
     377           0 : mc_rep_funcs(epel_hv,10,  8, 16, sse4)
     378           0 : mc_rep_funcs(epel_hv,10,  4, 12, sse4)
     379           0 : mc_rep_funcs(epel_hv,12,  8, 64, sse4)
     380           0 : mc_rep_funcs(epel_hv,12,  8, 48, sse4)
     381           0 : mc_rep_funcs(epel_hv,12,  8, 32, sse4)
     382           0 : mc_rep_funcs(epel_hv,12,  8, 24, sse4)
     383           0 : mc_rep_funcs(epel_hv,12,  8, 16, sse4)
     384           0 : mc_rep_funcs(epel_hv,12,  4, 12, sse4)
     385             : 
     386           0 : mc_rep_funcs(qpel_h, 8, 16, 64, sse4)
     387           0 : mc_rep_funcs(qpel_h, 8, 16, 48, sse4)
     388           0 : mc_rep_funcs(qpel_h, 8, 16, 32, sse4)
     389           0 : mc_rep_funcs(qpel_h, 8,  8, 24, sse4)
     390           0 : mc_rep_funcs(qpel_h,10,  8, 64, sse4)
     391           0 : mc_rep_funcs(qpel_h,10,  8, 48, sse4)
     392           0 : mc_rep_funcs(qpel_h,10,  8, 32, sse4)
     393           0 : mc_rep_funcs(qpel_h,10,  8, 24, sse4)
     394           0 : mc_rep_funcs(qpel_h,10,  8, 16, sse4)
     395           0 : mc_rep_funcs(qpel_h,10,  4, 12, sse4)
     396           0 : mc_rep_funcs(qpel_h,12,  8, 64, sse4)
     397           0 : mc_rep_funcs(qpel_h,12,  8, 48, sse4)
     398           0 : mc_rep_funcs(qpel_h,12,  8, 32, sse4)
     399           0 : mc_rep_funcs(qpel_h,12,  8, 24, sse4)
     400           0 : mc_rep_funcs(qpel_h,12,  8, 16, sse4)
     401           0 : mc_rep_funcs(qpel_h,12,  4, 12, sse4)
     402           0 : mc_rep_funcs(qpel_v, 8, 16, 64, sse4)
     403           0 : mc_rep_funcs(qpel_v, 8, 16, 48, sse4)
     404           0 : mc_rep_funcs(qpel_v, 8, 16, 32, sse4)
     405           0 : mc_rep_funcs(qpel_v, 8,  8, 24, sse4)
     406           0 : mc_rep_funcs(qpel_v,10,  8, 64, sse4)
     407           0 : mc_rep_funcs(qpel_v,10,  8, 48, sse4)
     408           0 : mc_rep_funcs(qpel_v,10,  8, 32, sse4)
     409           0 : mc_rep_funcs(qpel_v,10,  8, 24, sse4)
     410           0 : mc_rep_funcs(qpel_v,10,  8, 16, sse4)
     411           0 : mc_rep_funcs(qpel_v,10,  4, 12, sse4)
     412           0 : mc_rep_funcs(qpel_v,12,  8, 64, sse4)
     413           0 : mc_rep_funcs(qpel_v,12,  8, 48, sse4)
     414           0 : mc_rep_funcs(qpel_v,12,  8, 32, sse4)
     415           0 : mc_rep_funcs(qpel_v,12,  8, 24, sse4)
     416           0 : mc_rep_funcs(qpel_v,12,  8, 16, sse4)
     417           0 : mc_rep_funcs(qpel_v,12,  4, 12, sse4)
     418           0 : mc_rep_funcs(qpel_hv, 8,  8, 64, sse4)
     419           0 : mc_rep_funcs(qpel_hv, 8,  8, 48, sse4)
     420           0 : mc_rep_funcs(qpel_hv, 8,  8, 32, sse4)
     421           0 : mc_rep_funcs(qpel_hv, 8,  8, 24, sse4)
     422           0 : mc_rep_funcs(qpel_hv, 8,  8, 16, sse4)
     423           0 : mc_rep_funcs2(qpel_hv,8,  8,  4, 12, sse4)
     424           0 : mc_rep_funcs(qpel_hv,10,  8, 64, sse4)
     425           0 : mc_rep_funcs(qpel_hv,10,  8, 48, sse4)
     426           0 : mc_rep_funcs(qpel_hv,10,  8, 32, sse4)
     427           0 : mc_rep_funcs(qpel_hv,10,  8, 24, sse4)
     428           0 : mc_rep_funcs(qpel_hv,10,  8, 16, sse4)
     429           0 : mc_rep_funcs(qpel_hv,10,  4, 12, sse4)
     430           0 : mc_rep_funcs(qpel_hv,12,  8, 64, sse4)
     431           0 : mc_rep_funcs(qpel_hv,12,  8, 48, sse4)
     432           0 : mc_rep_funcs(qpel_hv,12,  8, 32, sse4)
     433           0 : mc_rep_funcs(qpel_hv,12,  8, 24, sse4)
     434           0 : mc_rep_funcs(qpel_hv,12,  8, 16, sse4)
     435           0 : mc_rep_funcs(qpel_hv,12,  4, 12, sse4)
     436             : 
     437             : #define mc_rep_uni_w(bitd, step, W, opt) \
     438             : void ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, \
     439             :                                                int height, int denom,  int _wx, int _ox)                                \
     440             : {                                                                                                                       \
     441             :     int i;                                                                                                              \
     442             :     int16_t *src;                                                                                                       \
     443             :     uint8_t *dst;                                                                                                       \
     444             :     for (i = 0; i < W; i += step) {                                                                                     \
     445             :         src= _src + i;                                                                                                  \
     446             :         dst= _dst + (i * ((bitd + 7) / 8));                                                                             \
     447             :         ff_hevc_put_hevc_uni_w##step##_##bitd##_##opt(dst, dststride, src,                                   \
     448             :                                                      height, denom, _wx, _ox);                                          \
     449             :     }                                                                                                                   \
     450             : }
     451             : 
     452           0 : mc_rep_uni_w(8, 6, 12, sse4)
     453           0 : mc_rep_uni_w(8, 8, 16, sse4)
     454           0 : mc_rep_uni_w(8, 8, 24, sse4)
     455           0 : mc_rep_uni_w(8, 8, 32, sse4)
     456           0 : mc_rep_uni_w(8, 8, 48, sse4)
     457           0 : mc_rep_uni_w(8, 8, 64, sse4)
     458             : 
     459           0 : mc_rep_uni_w(10, 6, 12, sse4)
     460           0 : mc_rep_uni_w(10, 8, 16, sse4)
     461           0 : mc_rep_uni_w(10, 8, 24, sse4)
     462           0 : mc_rep_uni_w(10, 8, 32, sse4)
     463           0 : mc_rep_uni_w(10, 8, 48, sse4)
     464           0 : mc_rep_uni_w(10, 8, 64, sse4)
     465             : 
     466           0 : mc_rep_uni_w(12, 6, 12, sse4)
     467           0 : mc_rep_uni_w(12, 8, 16, sse4)
     468           0 : mc_rep_uni_w(12, 8, 24, sse4)
     469           0 : mc_rep_uni_w(12, 8, 32, sse4)
     470           0 : mc_rep_uni_w(12, 8, 48, sse4)
     471           0 : mc_rep_uni_w(12, 8, 64, sse4)
     472             : 
     473             : #define mc_rep_bi_w(bitd, step, W, opt) \
     474             : void ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, \
     475             :                                               int16_t *_src2, int height,                                               \
     476             :                                               int denom,  int _wx0,  int _wx1, int _ox0, int _ox1)                      \
     477             : {                                                                                                                       \
     478             :     int i;                                                                                                              \
     479             :     int16_t *src;                                                                                                       \
     480             :     int16_t *src2;                                                                                                      \
     481             :     uint8_t *dst;                                                                                                       \
     482             :     for (i = 0; i < W; i += step) {                                                                                     \
     483             :         src  = _src  + i;                                                                                               \
     484             :         src2 = _src2 + i;                                                                                               \
     485             :         dst  = _dst  + (i * ((bitd + 7) / 8));                                                                          \
     486             :         ff_hevc_put_hevc_bi_w##step##_##bitd##_##opt(dst, dststride, src, src2,                             \
     487             :                                                      height, denom, _wx0, _wx1, _ox0, _ox1);                             \
     488             :     }                                                                                                                   \
     489             : }
     490             : 
     491           0 : mc_rep_bi_w(8, 6, 12, sse4)
     492           0 : mc_rep_bi_w(8, 8, 16, sse4)
     493           0 : mc_rep_bi_w(8, 8, 24, sse4)
     494           0 : mc_rep_bi_w(8, 8, 32, sse4)
     495           0 : mc_rep_bi_w(8, 8, 48, sse4)
     496           0 : mc_rep_bi_w(8, 8, 64, sse4)
     497             : 
     498           0 : mc_rep_bi_w(10, 6, 12, sse4)
     499           0 : mc_rep_bi_w(10, 8, 16, sse4)
     500           0 : mc_rep_bi_w(10, 8, 24, sse4)
     501           0 : mc_rep_bi_w(10, 8, 32, sse4)
     502           0 : mc_rep_bi_w(10, 8, 48, sse4)
     503           0 : mc_rep_bi_w(10, 8, 64, sse4)
     504             : 
     505           0 : mc_rep_bi_w(12, 6, 12, sse4)
     506           0 : mc_rep_bi_w(12, 8, 16, sse4)
     507           0 : mc_rep_bi_w(12, 8, 24, sse4)
     508           0 : mc_rep_bi_w(12, 8, 32, sse4)
     509           0 : mc_rep_bi_w(12, 8, 48, sse4)
     510           0 : mc_rep_bi_w(12, 8, 64, sse4)
     511             : 
     512             : #define mc_uni_w_func(name, bitd, W, opt) \
     513             : void ff_hevc_put_hevc_uni_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride,         \
     514             :                                                       uint8_t *_src, ptrdiff_t _srcstride,          \
     515             :                                                       int height, int denom,                        \
     516             :                                                       int _wx, int _ox,                             \
     517             :                                                       intptr_t mx, intptr_t my, int width)          \
     518             : {                                                                                                   \
     519             :     LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]);                                            \
     520             :     ff_hevc_put_hevc_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width);     \
     521             :     ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(_dst, _dststride, temp, height, denom, _wx, _ox);\
     522             : }
     523             : 
     524             : #define mc_uni_w_funcs(name, bitd, opt)      \
     525             :         mc_uni_w_func(name, bitd, 4, opt)    \
     526             :         mc_uni_w_func(name, bitd, 8, opt)    \
     527             :         mc_uni_w_func(name, bitd, 12, opt)   \
     528             :         mc_uni_w_func(name, bitd, 16, opt)   \
     529             :         mc_uni_w_func(name, bitd, 24, opt)   \
     530             :         mc_uni_w_func(name, bitd, 32, opt)   \
     531             :         mc_uni_w_func(name, bitd, 48, opt)   \
     532             :         mc_uni_w_func(name, bitd, 64, opt)
     533             : 
     534           0 : mc_uni_w_funcs(pel_pixels, 8, sse4)
     535           0 : mc_uni_w_func(pel_pixels, 8, 6, sse4)
     536           0 : mc_uni_w_funcs(epel_h, 8, sse4)
     537           0 : mc_uni_w_func(epel_h, 8, 6, sse4)
     538           0 : mc_uni_w_funcs(epel_v, 8, sse4)
     539           0 : mc_uni_w_func(epel_v, 8, 6, sse4)
     540           0 : mc_uni_w_funcs(epel_hv, 8, sse4)
     541           0 : mc_uni_w_func(epel_hv, 8, 6, sse4)
     542           0 : mc_uni_w_funcs(qpel_h, 8, sse4)
     543           0 : mc_uni_w_funcs(qpel_v, 8, sse4)
     544           0 : mc_uni_w_funcs(qpel_hv, 8, sse4)
     545             : 
     546           0 : mc_uni_w_funcs(pel_pixels, 10, sse4)
     547           0 : mc_uni_w_func(pel_pixels, 10, 6, sse4)
     548           0 : mc_uni_w_funcs(epel_h, 10, sse4)
     549           0 : mc_uni_w_func(epel_h, 10, 6, sse4)
     550           0 : mc_uni_w_funcs(epel_v, 10, sse4)
     551           0 : mc_uni_w_func(epel_v, 10, 6, sse4)
     552           0 : mc_uni_w_funcs(epel_hv, 10, sse4)
     553           0 : mc_uni_w_func(epel_hv, 10, 6, sse4)
     554           0 : mc_uni_w_funcs(qpel_h, 10, sse4)
     555           0 : mc_uni_w_funcs(qpel_v, 10, sse4)
     556           0 : mc_uni_w_funcs(qpel_hv, 10, sse4)
     557             : 
     558           0 : mc_uni_w_funcs(pel_pixels, 12, sse4)
     559           0 : mc_uni_w_func(pel_pixels, 12, 6, sse4)
     560           0 : mc_uni_w_funcs(epel_h, 12, sse4)
     561           0 : mc_uni_w_func(epel_h, 12, 6, sse4)
     562           0 : mc_uni_w_funcs(epel_v, 12, sse4)
     563           0 : mc_uni_w_func(epel_v, 12, 6, sse4)
     564           0 : mc_uni_w_funcs(epel_hv, 12, sse4)
     565           0 : mc_uni_w_func(epel_hv, 12, 6, sse4)
     566           0 : mc_uni_w_funcs(qpel_h, 12, sse4)
     567           0 : mc_uni_w_funcs(qpel_v, 12, sse4)
     568           0 : mc_uni_w_funcs(qpel_hv, 12, sse4)
     569             : 
     570             : #define mc_bi_w_func(name, bitd, W, opt) \
     571             : void ff_hevc_put_hevc_bi_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride,           \
     572             :                                                      uint8_t *_src, ptrdiff_t _srcstride,            \
     573             :                                                      int16_t *_src2,                                 \
     574             :                                                      int height, int denom,                          \
     575             :                                                      int _wx0, int _wx1, int _ox0, int _ox1,         \
     576             :                                                      intptr_t mx, intptr_t my, int width)            \
     577             : {                                                                                                    \
     578             :     LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]);                                             \
     579             :     ff_hevc_put_hevc_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width);      \
     580             :     ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(_dst, _dststride, temp, _src2,                         \
     581             :                                               height, denom, _wx0, _wx1, _ox0, _ox1);                \
     582             : }
     583             : 
     584             : #define mc_bi_w_funcs(name, bitd, opt)      \
     585             :         mc_bi_w_func(name, bitd, 4, opt)    \
     586             :         mc_bi_w_func(name, bitd, 8, opt)    \
     587             :         mc_bi_w_func(name, bitd, 12, opt)   \
     588             :         mc_bi_w_func(name, bitd, 16, opt)   \
     589             :         mc_bi_w_func(name, bitd, 24, opt)   \
     590             :         mc_bi_w_func(name, bitd, 32, opt)   \
     591             :         mc_bi_w_func(name, bitd, 48, opt)   \
     592             :         mc_bi_w_func(name, bitd, 64, opt)
     593             : 
     594           0 : mc_bi_w_funcs(pel_pixels, 8, sse4)
     595           0 : mc_bi_w_func(pel_pixels, 8, 6, sse4)
     596           0 : mc_bi_w_funcs(epel_h, 8, sse4)
     597           0 : mc_bi_w_func(epel_h, 8, 6, sse4)
     598           0 : mc_bi_w_funcs(epel_v, 8, sse4)
     599           0 : mc_bi_w_func(epel_v, 8, 6, sse4)
     600           0 : mc_bi_w_funcs(epel_hv, 8, sse4)
     601           0 : mc_bi_w_func(epel_hv, 8, 6, sse4)
     602           0 : mc_bi_w_funcs(qpel_h, 8, sse4)
     603           0 : mc_bi_w_funcs(qpel_v, 8, sse4)
     604           0 : mc_bi_w_funcs(qpel_hv, 8, sse4)
     605             : 
     606           0 : mc_bi_w_funcs(pel_pixels, 10, sse4)
     607           0 : mc_bi_w_func(pel_pixels, 10, 6, sse4)
     608           0 : mc_bi_w_funcs(epel_h, 10, sse4)
     609           0 : mc_bi_w_func(epel_h, 10, 6, sse4)
     610           0 : mc_bi_w_funcs(epel_v, 10, sse4)
     611           0 : mc_bi_w_func(epel_v, 10, 6, sse4)
     612           0 : mc_bi_w_funcs(epel_hv, 10, sse4)
     613           0 : mc_bi_w_func(epel_hv, 10, 6, sse4)
     614           0 : mc_bi_w_funcs(qpel_h, 10, sse4)
     615           0 : mc_bi_w_funcs(qpel_v, 10, sse4)
     616           0 : mc_bi_w_funcs(qpel_hv, 10, sse4)
     617             : 
     618           0 : mc_bi_w_funcs(pel_pixels, 12, sse4)
     619           0 : mc_bi_w_func(pel_pixels, 12, 6, sse4)
     620           0 : mc_bi_w_funcs(epel_h, 12, sse4)
     621           0 : mc_bi_w_func(epel_h, 12, 6, sse4)
     622           0 : mc_bi_w_funcs(epel_v, 12, sse4)
     623           0 : mc_bi_w_func(epel_v, 12, 6, sse4)
     624           0 : mc_bi_w_funcs(epel_hv, 12, sse4)
     625           0 : mc_bi_w_func(epel_hv, 12, 6, sse4)
     626           0 : mc_bi_w_funcs(qpel_h, 12, sse4)
     627           0 : mc_bi_w_funcs(qpel_v, 12, sse4)
     628           0 : mc_bi_w_funcs(qpel_hv, 12, sse4)
     629             : #endif //ARCH_X86_64 && HAVE_SSE4_EXTERNAL
     630             : 
     631             : #define SAO_BAND_FILTER_FUNCS(bitd, opt)                                                                                   \
     632             : void ff_hevc_sao_band_filter_8_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src,  \
     633             :                                             int16_t *sao_offset_val, int sao_left_class, int width, int height);           \
     634             : void ff_hevc_sao_band_filter_16_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
     635             :                                             int16_t *sao_offset_val, int sao_left_class, int width, int height);           \
     636             : void ff_hevc_sao_band_filter_32_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
     637             :                                             int16_t *sao_offset_val, int sao_left_class, int width, int height);           \
     638             : void ff_hevc_sao_band_filter_48_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
     639             :                                             int16_t *sao_offset_val, int sao_left_class, int width, int height);           \
     640             : void ff_hevc_sao_band_filter_64_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
     641             :                                              int16_t *sao_offset_val, int sao_left_class, int width, int height);
     642             : 
     643             : SAO_BAND_FILTER_FUNCS(8,  sse2)
     644             : SAO_BAND_FILTER_FUNCS(10, sse2)
     645             : SAO_BAND_FILTER_FUNCS(12, sse2)
     646             : SAO_BAND_FILTER_FUNCS(8,   avx)
     647             : SAO_BAND_FILTER_FUNCS(10,  avx)
     648             : SAO_BAND_FILTER_FUNCS(12,  avx)
     649             : SAO_BAND_FILTER_FUNCS(8,  avx2)
     650             : SAO_BAND_FILTER_FUNCS(10, avx2)
     651             : SAO_BAND_FILTER_FUNCS(12, avx2)
     652             : 
     653             : #define SAO_BAND_INIT(bitd, opt) do {                                       \
     654             :     c->sao_band_filter[0]      = ff_hevc_sao_band_filter_8_##bitd##_##opt;  \
     655             :     c->sao_band_filter[1]      = ff_hevc_sao_band_filter_16_##bitd##_##opt; \
     656             :     c->sao_band_filter[2]      = ff_hevc_sao_band_filter_32_##bitd##_##opt; \
     657             :     c->sao_band_filter[3]      = ff_hevc_sao_band_filter_48_##bitd##_##opt; \
     658             :     c->sao_band_filter[4]      = ff_hevc_sao_band_filter_64_##bitd##_##opt; \
     659             : } while (0)
     660             : 
     661             : #define SAO_EDGE_FILTER_FUNCS(bitd, opt)                                                                                    \
     662             : void ff_hevc_sao_edge_filter_8_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val,  \
     663             :                                               int eo, int width, int height);                                               \
     664             : void ff_hevc_sao_edge_filter_16_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val, \
     665             :                                                int eo, int width, int height);                                              \
     666             : void ff_hevc_sao_edge_filter_32_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val, \
     667             :                                                int eo, int width, int height);                                              \
     668             : void ff_hevc_sao_edge_filter_48_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val, \
     669             :                                                int eo, int width, int height);                                              \
     670             : void ff_hevc_sao_edge_filter_64_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val, \
     671             :                                                int eo, int width, int height);                                              \
     672             : 
     673             : SAO_EDGE_FILTER_FUNCS(8, ssse3)
     674             : SAO_EDGE_FILTER_FUNCS(8, avx2)
     675             : SAO_EDGE_FILTER_FUNCS(10, sse2)
     676             : SAO_EDGE_FILTER_FUNCS(10, avx2)
     677             : SAO_EDGE_FILTER_FUNCS(12, sse2)
     678             : SAO_EDGE_FILTER_FUNCS(12, avx2)
     679             : 
     680             : #define SAO_EDGE_INIT(bitd, opt) do {                                       \
     681             :     c->sao_edge_filter[0]      = ff_hevc_sao_edge_filter_8_##bitd##_##opt;  \
     682             :     c->sao_edge_filter[1]      = ff_hevc_sao_edge_filter_16_##bitd##_##opt; \
     683             :     c->sao_edge_filter[2]      = ff_hevc_sao_edge_filter_32_##bitd##_##opt; \
     684             :     c->sao_edge_filter[3]      = ff_hevc_sao_edge_filter_48_##bitd##_##opt; \
     685             :     c->sao_edge_filter[4]      = ff_hevc_sao_edge_filter_64_##bitd##_##opt; \
     686             : } while (0)
     687             : 
     688             : #define EPEL_LINKS(pointer, my, mx, fname, bitd, opt )           \
     689             :         PEL_LINK(pointer, 1, my , mx , fname##4 ,  bitd, opt ); \
     690             :         PEL_LINK(pointer, 2, my , mx , fname##6 ,  bitd, opt ); \
     691             :         PEL_LINK(pointer, 3, my , mx , fname##8 ,  bitd, opt ); \
     692             :         PEL_LINK(pointer, 4, my , mx , fname##12,  bitd, opt ); \
     693             :         PEL_LINK(pointer, 5, my , mx , fname##16,  bitd, opt ); \
     694             :         PEL_LINK(pointer, 6, my , mx , fname##24,  bitd, opt ); \
     695             :         PEL_LINK(pointer, 7, my , mx , fname##32,  bitd, opt ); \
     696             :         PEL_LINK(pointer, 8, my , mx , fname##48,  bitd, opt ); \
     697             :         PEL_LINK(pointer, 9, my , mx , fname##64,  bitd, opt )
     698             : #define QPEL_LINKS(pointer, my, mx, fname, bitd, opt)           \
     699             :         PEL_LINK(pointer, 1, my , mx , fname##4 ,  bitd, opt ); \
     700             :         PEL_LINK(pointer, 3, my , mx , fname##8 ,  bitd, opt ); \
     701             :         PEL_LINK(pointer, 4, my , mx , fname##12,  bitd, opt ); \
     702             :         PEL_LINK(pointer, 5, my , mx , fname##16,  bitd, opt ); \
     703             :         PEL_LINK(pointer, 6, my , mx , fname##24,  bitd, opt ); \
     704             :         PEL_LINK(pointer, 7, my , mx , fname##32,  bitd, opt ); \
     705             :         PEL_LINK(pointer, 8, my , mx , fname##48,  bitd, opt ); \
     706             :         PEL_LINK(pointer, 9, my , mx , fname##64,  bitd, opt )
     707             : 
     708         474 : void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
     709             : {
     710         474 :     int cpu_flags = av_get_cpu_flags();
     711             : 
     712         474 :     if (bit_depth == 8) {
     713         357 :         if (EXTERNAL_MMXEXT(cpu_flags)) {
     714          34 :             c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_mmxext;
     715          34 :             c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_mmxext;
     716             : 
     717          34 :             c->add_residual[0] = ff_hevc_add_residual_4_8_mmxext;
     718             :         }
     719         357 :         if (EXTERNAL_SSE2(cpu_flags)) {
     720          28 :             c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
     721          28 :             c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;
     722             :             if (ARCH_X86_64) {
     723          28 :                 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2;
     724          28 :                 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2;
     725             : 
     726          28 :                 c->idct[2] = ff_hevc_idct_16x16_8_sse2;
     727          28 :                 c->idct[3] = ff_hevc_idct_32x32_8_sse2;
     728             :             }
     729          28 :             SAO_BAND_INIT(8, sse2);
     730             : 
     731          28 :             c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_sse2;
     732          28 :             c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_sse2;
     733          28 :             c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_sse2;
     734             : 
     735          28 :             c->idct[0]    = ff_hevc_idct_4x4_8_sse2;
     736          28 :             c->idct[1]    = ff_hevc_idct_8x8_8_sse2;
     737             : 
     738          28 :             c->add_residual[1] = ff_hevc_add_residual_8_8_sse2;
     739          28 :             c->add_residual[2] = ff_hevc_add_residual_16_8_sse2;
     740          28 :             c->add_residual[3] = ff_hevc_add_residual_32_8_sse2;
     741             :         }
     742         357 :         if (EXTERNAL_SSSE3(cpu_flags)) {
     743             :             if(ARCH_X86_64) {
     744          22 :                 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
     745          22 :                 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3;
     746             :             }
     747          22 :             SAO_EDGE_INIT(8, ssse3);
     748             :         }
     749         357 :         if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
     750             : 
     751          19 :             EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels,  8, sse4);
     752          19 :             EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h,      8, sse4);
     753          19 :             EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v,      8, sse4);
     754          19 :             EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv,     8, sse4);
     755             : 
     756          19 :             QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 8, sse4);
     757          19 :             QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h,     8, sse4);
     758          19 :             QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v,     8, sse4);
     759          19 :             QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv,    8, sse4);
     760             :         }
     761         357 :         if (EXTERNAL_AVX(cpu_flags)) {
     762          10 :             c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_avx;
     763          10 :             c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_avx;
     764             :             if (ARCH_X86_64) {
     765          10 :                 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx;
     766          10 :                 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx;
     767             : 
     768          10 :                 c->idct[2] = ff_hevc_idct_16x16_8_avx;
     769          10 :                 c->idct[3] = ff_hevc_idct_32x32_8_avx;
     770             :             }
     771          10 :             SAO_BAND_INIT(8, avx);
     772             : 
     773          10 :             c->idct[0] = ff_hevc_idct_4x4_8_avx;
     774          10 :             c->idct[1] = ff_hevc_idct_8x8_8_avx;
     775             : 
     776          10 :             c->add_residual[1] = ff_hevc_add_residual_8_8_avx;
     777          10 :             c->add_residual[2] = ff_hevc_add_residual_16_8_avx;
     778          10 :             c->add_residual[3] = ff_hevc_add_residual_32_8_avx;
     779             :         }
     780         357 :         if (EXTERNAL_AVX2(cpu_flags)) {
     781           4 :             c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_8_avx2;
     782           4 :             c->sao_band_filter[1] = ff_hevc_sao_band_filter_16_8_avx2;
     783             :         }
     784         357 :         if (EXTERNAL_AVX2_FAST(cpu_flags)) {
     785           4 :             c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_avx2;
     786           4 :             c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_avx2;
     787             :             if (ARCH_X86_64) {
     788           4 :                 c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_avx2;
     789           4 :                 c->put_hevc_epel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_avx2;
     790           4 :                 c->put_hevc_epel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_avx2;
     791             : 
     792           4 :                 c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_avx2;
     793           4 :                 c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_avx2;
     794           4 :                 c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_avx2;
     795             : 
     796           4 :                 c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2;
     797           4 :                 c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2;
     798           4 :                 c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2;
     799             : 
     800           4 :                 c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2;
     801           4 :                 c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2;
     802           4 :                 c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2;
     803             : 
     804           4 :                 c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_8_avx2;
     805           4 :                 c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_8_avx2;
     806           4 :                 c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_8_avx2;
     807             : 
     808           4 :                 c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_8_avx2;
     809           4 :                 c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_8_avx2;
     810           4 :                 c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_8_avx2;
     811             : 
     812           4 :                 c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_8_avx2;
     813           4 :                 c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_8_avx2;
     814           4 :                 c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_8_avx2;
     815             : 
     816           4 :                 c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_8_avx2;
     817           4 :                 c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_8_avx2;
     818           4 :                 c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_8_avx2;
     819             : 
     820           4 :                 c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_8_avx2;
     821           4 :                 c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_8_avx2;
     822           4 :                 c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_8_avx2;
     823             : 
     824           4 :                 c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_8_avx2;
     825           4 :                 c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_8_avx2;
     826           4 :                 c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_8_avx2;
     827             : 
     828           4 :                 c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_8_avx2;
     829           4 :                 c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_8_avx2;
     830           4 :                 c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_8_avx2;
     831             : 
     832           4 :                 c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_8_avx2;
     833           4 :                 c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_8_avx2;
     834           4 :                 c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_8_avx2;
     835             : 
     836           4 :                 c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_8_avx2;
     837           4 :                 c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_8_avx2;
     838           4 :                 c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_8_avx2;
     839             : 
     840           4 :                 c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_8_avx2;
     841           4 :                 c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_8_avx2;
     842           4 :                 c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_8_avx2;
     843             : 
     844           4 :                 c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_8_avx2;
     845           4 :                 c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_8_avx2;
     846           4 :                 c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_8_avx2;
     847             : 
     848           4 :                 c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_8_avx2;
     849           4 :                 c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_8_avx2;
     850           4 :                 c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_8_avx2;
     851             : 
     852           4 :                 c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_8_avx2;
     853           4 :                 c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_8_avx2;
     854           4 :                 c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_8_avx2;
     855             : 
     856           4 :                 c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_8_avx2;
     857           4 :                 c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_8_avx2;
     858           4 :                 c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_8_avx2;
     859             : 
     860           4 :                 c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_8_avx2;
     861           4 :                 c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_8_avx2;
     862           4 :                 c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_8_avx2;
     863             : 
     864           4 :                 c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_8_avx2;
     865           4 :                 c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_8_avx2;
     866           4 :                 c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_8_avx2;
     867             : 
     868           4 :                 c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_8_avx2;
     869           4 :                 c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_8_avx2;
     870           4 :                 c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_8_avx2;
     871             :             }
     872           4 :             SAO_BAND_INIT(8, avx2);
     873             : 
     874           4 :             c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_8_avx2;
     875           4 :             c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_8_avx2;
     876           4 :             c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_8_avx2;
     877             : 
     878           4 :             c->add_residual[3] = ff_hevc_add_residual_32_8_avx2;
     879             :         }
     880         117 :     } else if (bit_depth == 10) {
     881          71 :         if (EXTERNAL_MMXEXT(cpu_flags)) {
     882          36 :             c->add_residual[0] = ff_hevc_add_residual_4_10_mmxext;
     883          36 :             c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_mmxext;
     884          36 :             c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_mmxext;
     885             :         }
     886          71 :         if (EXTERNAL_SSE2(cpu_flags)) {
     887          30 :             c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2;
     888          30 :             c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2;
     889             :             if (ARCH_X86_64) {
     890          30 :                 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_sse2;
     891          30 :                 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2;
     892             : 
     893          30 :                 c->idct[2] = ff_hevc_idct_16x16_10_sse2;
     894          30 :                 c->idct[3] = ff_hevc_idct_32x32_10_sse2;
     895             :             }
     896          30 :             SAO_BAND_INIT(10, sse2);
     897          30 :             SAO_EDGE_INIT(10, sse2);
     898             : 
     899          30 :             c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_sse2;
     900          30 :             c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_sse2;
     901          30 :             c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_sse2;
     902             : 
     903          30 :             c->idct[0]    = ff_hevc_idct_4x4_10_sse2;
     904          30 :             c->idct[1]    = ff_hevc_idct_8x8_10_sse2;
     905             : 
     906          30 :             c->add_residual[1] = ff_hevc_add_residual_8_10_sse2;
     907          30 :             c->add_residual[2] = ff_hevc_add_residual_16_10_sse2;
     908          30 :             c->add_residual[3] = ff_hevc_add_residual_32_10_sse2;
     909             :         }
     910          71 :         if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
     911          24 :             c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3;
     912          24 :             c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3;
     913             :         }
     914          71 :         if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
     915          21 :             EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4);
     916          21 :             EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h,     10, sse4);
     917          21 :             EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v,     10, sse4);
     918          21 :             EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv,    10, sse4);
     919             : 
     920          21 :             QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 10, sse4);
     921          21 :             QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h,     10, sse4);
     922          21 :             QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v,     10, sse4);
     923          21 :             QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv,    10, sse4);
     924             :         }
     925          71 :         if (EXTERNAL_AVX(cpu_flags)) {
     926          12 :             c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_avx;
     927          12 :             c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_avx;
     928             :             if (ARCH_X86_64) {
     929          12 :                 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_avx;
     930          12 :                 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_avx;
     931             : 
     932          12 :                 c->idct[2] = ff_hevc_idct_16x16_10_avx;
     933          12 :                 c->idct[3] = ff_hevc_idct_32x32_10_avx;
     934             :             }
     935             : 
     936          12 :             c->idct[0] = ff_hevc_idct_4x4_10_avx;
     937          12 :             c->idct[1] = ff_hevc_idct_8x8_10_avx;
     938             : 
     939          12 :             SAO_BAND_INIT(10, avx);
     940             :         }
     941          71 :         if (EXTERNAL_AVX2(cpu_flags)) {
     942           6 :             c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_10_avx2;
     943             :         }
     944          71 :         if (EXTERNAL_AVX2_FAST(cpu_flags)) {
     945           6 :             c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_avx2;
     946           6 :             c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_avx2;
     947             :             if (ARCH_X86_64) {
     948           6 :                 c->put_hevc_epel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_10_avx2;
     949           6 :                 c->put_hevc_epel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_10_avx2;
     950           6 :                 c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_10_avx2;
     951           6 :                 c->put_hevc_epel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_10_avx2;
     952           6 :                 c->put_hevc_epel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_10_avx2;
     953             : 
     954           6 :                 c->put_hevc_qpel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_10_avx2;
     955           6 :                 c->put_hevc_qpel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_10_avx2;
     956           6 :                 c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_10_avx2;
     957           6 :                 c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_10_avx2;
     958           6 :                 c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_10_avx2;
     959             : 
     960           6 :                 c->put_hevc_epel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2;
     961           6 :                 c->put_hevc_epel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2;
     962           6 :                 c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2;
     963           6 :                 c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels96_8_avx2;
     964           6 :                 c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels128_8_avx2;
     965             : 
     966           6 :                 c->put_hevc_qpel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2;
     967           6 :                 c->put_hevc_qpel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2;
     968           6 :                 c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2;
     969           6 :                 c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels96_8_avx2;
     970           6 :                 c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels128_8_avx2;
     971             : 
     972           6 :                 c->put_hevc_epel_bi[5][0][0] = ff_hevc_put_hevc_bi_pel_pixels16_10_avx2;
     973           6 :                 c->put_hevc_epel_bi[6][0][0] = ff_hevc_put_hevc_bi_pel_pixels24_10_avx2;
     974           6 :                 c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_10_avx2;
     975           6 :                 c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_10_avx2;
     976           6 :                 c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_10_avx2;
     977           6 :                 c->put_hevc_qpel_bi[5][0][0] = ff_hevc_put_hevc_bi_pel_pixels16_10_avx2;
     978           6 :                 c->put_hevc_qpel_bi[6][0][0] = ff_hevc_put_hevc_bi_pel_pixels24_10_avx2;
     979           6 :                 c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_10_avx2;
     980           6 :                 c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_10_avx2;
     981           6 :                 c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_10_avx2;
     982             : 
     983           6 :                 c->put_hevc_epel[5][0][1] = ff_hevc_put_hevc_epel_h16_10_avx2;
     984           6 :                 c->put_hevc_epel[6][0][1] = ff_hevc_put_hevc_epel_h24_10_avx2;
     985           6 :                 c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_10_avx2;
     986           6 :                 c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_10_avx2;
     987           6 :                 c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_10_avx2;
     988             : 
     989           6 :                 c->put_hevc_epel_uni[5][0][1] = ff_hevc_put_hevc_uni_epel_h16_10_avx2;
     990           6 :                 c->put_hevc_epel_uni[6][0][1] = ff_hevc_put_hevc_uni_epel_h24_10_avx2;
     991           6 :                 c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_10_avx2;
     992           6 :                 c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_10_avx2;
     993           6 :                 c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_10_avx2;
     994             : 
     995           6 :                 c->put_hevc_epel_bi[5][0][1] = ff_hevc_put_hevc_bi_epel_h16_10_avx2;
     996           6 :                 c->put_hevc_epel_bi[6][0][1] = ff_hevc_put_hevc_bi_epel_h24_10_avx2;
     997           6 :                 c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_10_avx2;
     998           6 :                 c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_10_avx2;
     999           6 :                 c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_10_avx2;
    1000             : 
    1001           6 :                 c->put_hevc_epel[5][1][0] = ff_hevc_put_hevc_epel_v16_10_avx2;
    1002           6 :                 c->put_hevc_epel[6][1][0] = ff_hevc_put_hevc_epel_v24_10_avx2;
    1003           6 :                 c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_10_avx2;
    1004           6 :                 c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_10_avx2;
    1005           6 :                 c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_10_avx2;
    1006             : 
    1007           6 :                 c->put_hevc_epel_uni[5][1][0] = ff_hevc_put_hevc_uni_epel_v16_10_avx2;
    1008           6 :                 c->put_hevc_epel_uni[6][1][0] = ff_hevc_put_hevc_uni_epel_v24_10_avx2;
    1009           6 :                 c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_10_avx2;
    1010           6 :                 c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_10_avx2;
    1011           6 :                 c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_10_avx2;
    1012             : 
    1013           6 :                 c->put_hevc_epel_bi[5][1][0] = ff_hevc_put_hevc_bi_epel_v16_10_avx2;
    1014           6 :                 c->put_hevc_epel_bi[6][1][0] = ff_hevc_put_hevc_bi_epel_v24_10_avx2;
    1015           6 :                 c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_10_avx2;
    1016           6 :                 c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_10_avx2;
    1017           6 :                 c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_10_avx2;
    1018             : 
    1019           6 :                 c->put_hevc_epel[5][1][1] = ff_hevc_put_hevc_epel_hv16_10_avx2;
    1020           6 :                 c->put_hevc_epel[6][1][1] = ff_hevc_put_hevc_epel_hv24_10_avx2;
    1021           6 :                 c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_10_avx2;
    1022           6 :                 c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_10_avx2;
    1023           6 :                 c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_10_avx2;
    1024             : 
    1025           6 :                 c->put_hevc_epel_uni[5][1][1] = ff_hevc_put_hevc_uni_epel_hv16_10_avx2;
    1026           6 :                 c->put_hevc_epel_uni[6][1][1] = ff_hevc_put_hevc_uni_epel_hv24_10_avx2;
    1027           6 :                 c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_10_avx2;
    1028           6 :                 c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_10_avx2;
    1029           6 :                 c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_10_avx2;
    1030             : 
    1031           6 :                 c->put_hevc_epel_bi[5][1][1] = ff_hevc_put_hevc_bi_epel_hv16_10_avx2;
    1032           6 :                 c->put_hevc_epel_bi[6][1][1] = ff_hevc_put_hevc_bi_epel_hv24_10_avx2;
    1033           6 :                 c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_10_avx2;
    1034           6 :                 c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_10_avx2;
    1035           6 :                 c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_10_avx2;
    1036             : 
    1037           6 :                 c->put_hevc_qpel[5][0][1] = ff_hevc_put_hevc_qpel_h16_10_avx2;
    1038           6 :                 c->put_hevc_qpel[6][0][1] = ff_hevc_put_hevc_qpel_h24_10_avx2;
    1039           6 :                 c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_10_avx2;
    1040           6 :                 c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_10_avx2;
    1041           6 :                 c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_10_avx2;
    1042             : 
    1043           6 :                 c->put_hevc_qpel_uni[5][0][1] = ff_hevc_put_hevc_uni_qpel_h16_10_avx2;
    1044           6 :                 c->put_hevc_qpel_uni[6][0][1] = ff_hevc_put_hevc_uni_qpel_h24_10_avx2;
    1045           6 :                 c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_10_avx2;
    1046           6 :                 c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_10_avx2;
    1047           6 :                 c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_10_avx2;
    1048             : 
    1049           6 :                 c->put_hevc_qpel_bi[5][0][1] = ff_hevc_put_hevc_bi_qpel_h16_10_avx2;
    1050           6 :                 c->put_hevc_qpel_bi[6][0][1] = ff_hevc_put_hevc_bi_qpel_h24_10_avx2;
    1051           6 :                 c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_10_avx2;
    1052           6 :                 c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_10_avx2;
    1053           6 :                 c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_10_avx2;
    1054             : 
    1055           6 :                 c->put_hevc_qpel[5][1][0] = ff_hevc_put_hevc_qpel_v16_10_avx2;
    1056           6 :                 c->put_hevc_qpel[6][1][0] = ff_hevc_put_hevc_qpel_v24_10_avx2;
    1057           6 :                 c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_10_avx2;
    1058           6 :                 c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_10_avx2;
    1059           6 :                 c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_10_avx2;
    1060             : 
    1061           6 :                 c->put_hevc_qpel_uni[5][1][0] = ff_hevc_put_hevc_uni_qpel_v16_10_avx2;
    1062           6 :                 c->put_hevc_qpel_uni[6][1][0] = ff_hevc_put_hevc_uni_qpel_v24_10_avx2;
    1063           6 :                 c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_10_avx2;
    1064           6 :                 c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_10_avx2;
    1065           6 :                 c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_10_avx2;
    1066             : 
    1067           6 :                 c->put_hevc_qpel_bi[5][1][0] = ff_hevc_put_hevc_bi_qpel_v16_10_avx2;
    1068           6 :                 c->put_hevc_qpel_bi[6][1][0] = ff_hevc_put_hevc_bi_qpel_v24_10_avx2;
    1069           6 :                 c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_10_avx2;
    1070           6 :                 c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_10_avx2;
    1071           6 :                 c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_10_avx2;
    1072             : 
    1073           6 :                 c->put_hevc_qpel[5][1][1] = ff_hevc_put_hevc_qpel_hv16_10_avx2;
    1074           6 :                 c->put_hevc_qpel[6][1][1] = ff_hevc_put_hevc_qpel_hv24_10_avx2;
    1075           6 :                 c->put_hevc_qpel[7][1][1] = ff_hevc_put_hevc_qpel_hv32_10_avx2;
    1076           6 :                 c->put_hevc_qpel[8][1][1] = ff_hevc_put_hevc_qpel_hv48_10_avx2;
    1077           6 :                 c->put_hevc_qpel[9][1][1] = ff_hevc_put_hevc_qpel_hv64_10_avx2;
    1078             : 
    1079           6 :                 c->put_hevc_qpel_uni[5][1][1] = ff_hevc_put_hevc_uni_qpel_hv16_10_avx2;
    1080           6 :                 c->put_hevc_qpel_uni[6][1][1] = ff_hevc_put_hevc_uni_qpel_hv24_10_avx2;
    1081           6 :                 c->put_hevc_qpel_uni[7][1][1] = ff_hevc_put_hevc_uni_qpel_hv32_10_avx2;
    1082           6 :                 c->put_hevc_qpel_uni[8][1][1] = ff_hevc_put_hevc_uni_qpel_hv48_10_avx2;
    1083           6 :                 c->put_hevc_qpel_uni[9][1][1] = ff_hevc_put_hevc_uni_qpel_hv64_10_avx2;
    1084             : 
    1085           6 :                 c->put_hevc_qpel_bi[5][1][1] = ff_hevc_put_hevc_bi_qpel_hv16_10_avx2;
    1086           6 :                 c->put_hevc_qpel_bi[6][1][1] = ff_hevc_put_hevc_bi_qpel_hv24_10_avx2;
    1087           6 :                 c->put_hevc_qpel_bi[7][1][1] = ff_hevc_put_hevc_bi_qpel_hv32_10_avx2;
    1088           6 :                 c->put_hevc_qpel_bi[8][1][1] = ff_hevc_put_hevc_bi_qpel_hv48_10_avx2;
    1089           6 :                 c->put_hevc_qpel_bi[9][1][1] = ff_hevc_put_hevc_bi_qpel_hv64_10_avx2;
    1090             :             }
    1091           6 :             SAO_BAND_INIT(10, avx2);
    1092           6 :             SAO_EDGE_INIT(10, avx2);
    1093             : 
    1094           6 :             c->add_residual[2] = ff_hevc_add_residual_16_10_avx2;
    1095           6 :             c->add_residual[3] = ff_hevc_add_residual_32_10_avx2;
    1096             :         }
    1097          46 :     } else if (bit_depth == 12) {
    1098           7 :         if (EXTERNAL_MMXEXT(cpu_flags)) {
    1099           0 :             c->idct_dc[0] = ff_hevc_idct_4x4_dc_12_mmxext;
    1100           0 :             c->idct_dc[1] = ff_hevc_idct_8x8_dc_12_mmxext;
    1101             :         }
    1102           7 :         if (EXTERNAL_SSE2(cpu_flags)) {
    1103           0 :             c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_sse2;
    1104           0 :             c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_sse2;
    1105             :             if (ARCH_X86_64) {
    1106           0 :                 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_sse2;
    1107           0 :                 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2;
    1108             :             }
    1109           0 :             SAO_BAND_INIT(12, sse2);
    1110           0 :             SAO_EDGE_INIT(12, sse2);
    1111             : 
    1112           0 :             c->idct_dc[1] = ff_hevc_idct_8x8_dc_12_sse2;
    1113           0 :             c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_sse2;
    1114           0 :             c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_sse2;
    1115             :         }
    1116           7 :         if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
    1117           0 :             c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_ssse3;
    1118           0 :             c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_ssse3;
    1119             :         }
    1120           7 :         if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
    1121           0 :             EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 12, sse4);
    1122           0 :             EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h,     12, sse4);
    1123           0 :             EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v,     12, sse4);
    1124           0 :             EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv,    12, sse4);
    1125             : 
    1126           0 :             QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 12, sse4);
    1127           0 :             QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h,     12, sse4);
    1128           0 :             QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v,     12, sse4);
    1129           0 :             QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv,    12, sse4);
    1130             :         }
    1131           7 :         if (EXTERNAL_AVX(cpu_flags)) {
    1132           0 :             c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_avx;
    1133           0 :             c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_avx;
    1134             :             if (ARCH_X86_64) {
    1135           0 :                 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_avx;
    1136           0 :                 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_avx;
    1137             :             }
    1138           0 :             SAO_BAND_INIT(12, avx);
    1139             :         }
    1140           7 :         if (EXTERNAL_AVX2(cpu_flags)) {
    1141           0 :             c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_12_avx2;
    1142             :         }
    1143           7 :         if (EXTERNAL_AVX2_FAST(cpu_flags)) {
    1144           0 :             c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_avx2;
    1145           0 :             c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_avx2;
    1146             : 
    1147           0 :             SAO_BAND_INIT(12, avx2);
    1148           0 :             SAO_EDGE_INIT(12, avx2);
    1149             :         }
    1150             :     }
    1151         474 : }

Generated by: LCOV version 1.13