LCOV - code coverage report
Current view: top level - libavcodec/x86 - h264_qpel.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 100 100 100.0 %
Date: 2017-12-17 23:02:56 Functions: 237 237 100.0 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
       3             :  * Copyright (c) 2011 Daniel Kang
       4             :  *
       5             :  * This file is part of FFmpeg.
       6             :  *
       7             :  * FFmpeg is free software; you can redistribute it and/or
       8             :  * modify it under the terms of the GNU Lesser General Public
       9             :  * License as published by the Free Software Foundation; either
      10             :  * version 2.1 of the License, or (at your option) any later version.
      11             :  *
      12             :  * FFmpeg is distributed in the hope that it will be useful,
      13             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      14             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      15             :  * Lesser General Public License for more details.
      16             :  *
      17             :  * You should have received a copy of the GNU Lesser General Public
      18             :  * License along with FFmpeg; if not, write to the Free Software
      19             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      20             :  */
      21             : 
      22             : #include "libavutil/attributes.h"
      23             : #include "libavutil/cpu.h"
      24             : #include "libavutil/x86/asm.h"
      25             : #include "libavutil/x86/cpu.h"
      26             : #include "libavcodec/h264dec.h"
      27             : #include "libavcodec/h264qpel.h"
      28             : #include "libavcodec/pixels.h"
      29             : #include "fpel.h"
      30             : 
      31             : #if HAVE_X86ASM
      32             : void ff_put_pixels4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
      33             :                               int dstStride, int src1Stride, int h);
      34             : void ff_avg_pixels4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
      35             :                               int dstStride, int src1Stride, int h);
      36             : void ff_put_pixels8_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
      37             :                               int dstStride, int src1Stride, int h);
      38             : void ff_avg_pixels8_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
      39             :                               int dstStride, int src1Stride, int h);
      40             : void ff_put_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
      41             :                                int dstStride, int src1Stride, int h);
      42             : void ff_avg_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
      43             :                                int dstStride, int src1Stride, int h);
      44             : #define ff_put_pixels8_l2_sse2  ff_put_pixels8_l2_mmxext
      45             : #define ff_avg_pixels8_l2_sse2  ff_avg_pixels8_l2_mmxext
      46             : #define ff_put_pixels16_l2_sse2 ff_put_pixels16_l2_mmxext
      47             : #define ff_avg_pixels16_l2_sse2 ff_avg_pixels16_l2_mmxext
      48             : #define ff_put_pixels16_mmxext  ff_put_pixels16_mmx
      49             : #define ff_put_pixels8_mmxext   ff_put_pixels8_mmx
      50             : #define ff_put_pixels4_mmxext   ff_put_pixels4_mmx
      51             : 
      52             : #define DEF_QPEL(OPNAME)\
      53             : void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\
      54             : void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\
      55             : void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_ssse3(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\
      56             : void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_l2_mmxext(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);\
      57             : void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_mmxext(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);\
      58             : void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);\
      59             : void ff_ ## OPNAME ## _h264_qpel4_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\
      60             : void ff_ ## OPNAME ## _h264_qpel8or16_v_lowpass_op_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h);\
      61             : void ff_ ## OPNAME ## _h264_qpel8or16_v_lowpass_sse2(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h);\
      62             : void ff_ ## OPNAME ## _h264_qpel4_hv_lowpass_v_mmxext(const uint8_t *src, int16_t *tmp, int srcStride);\
      63             : void ff_ ## OPNAME ## _h264_qpel4_hv_lowpass_h_mmxext(int16_t *tmp, uint8_t *dst, int dstStride);\
      64             : void ff_ ## OPNAME ## _h264_qpel8or16_hv1_lowpass_op_mmxext(const uint8_t *src, int16_t *tmp, int srcStride, int size);\
      65             : void ff_ ## OPNAME ## _h264_qpel8or16_hv1_lowpass_op_sse2(const uint8_t *src, int16_t *tmp, int srcStride, int size);\
      66             : void ff_ ## OPNAME ## _h264_qpel8or16_hv2_lowpass_op_mmxext(uint8_t *dst, int16_t *tmp, int dstStride, int unused, int h);\
      67             : void ff_ ## OPNAME ## _h264_qpel8or16_hv2_lowpass_ssse3(uint8_t *dst, int16_t *tmp, int dstStride, int tmpStride, int size);\
      68             : void ff_ ## OPNAME ## _pixels4_l2_shift5_mmxext(uint8_t *dst, const int16_t *src16, const uint8_t *src8, int dstStride, int src8Stride, int h);\
      69             : void ff_ ## OPNAME ## _pixels8_l2_shift5_mmxext(uint8_t *dst, const int16_t *src16, const uint8_t *src8, int dstStride, int src8Stride, int h);
      70             : 
      71             : DEF_QPEL(avg)
      72             : DEF_QPEL(put)
      73             : 
      74          40 : static av_always_inline void ff_put_h264_qpel8or16_hv1_lowpass_mmxext(int16_t *tmp, const uint8_t *src, int tmpStride, int srcStride, int size)
      75             : {
      76          40 :     int w = (size + 8) >> 2;
      77          40 :     src -= 2 * srcStride + 2;
      78         280 :     while (w--) {
      79         200 :         ff_put_h264_qpel8or16_hv1_lowpass_op_mmxext(src, tmp, srcStride, size);
      80         200 :         tmp += 4;
      81         200 :         src += 4;
      82             :     }
      83          40 : }
      84             : 
      85             : #define QPEL_H264(OPNAME, OP, MMX)\
      86             : static av_always_inline void ff_ ## OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride){\
      87             :     int w=3;\
      88             :     src -= 2*srcStride+2;\
      89             :     while(w--){\
      90             :         ff_ ## OPNAME ## h264_qpel4_hv_lowpass_v_mmxext(src, tmp, srcStride);\
      91             :         tmp += 4;\
      92             :         src += 4;\
      93             :     }\
      94             :     tmp -= 3*4;\
      95             :     ff_ ## OPNAME ## h264_qpel4_hv_lowpass_h_mmxext(tmp, dst, dstStride);\
      96             : }\
      97             : \
      98             : static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h){\
      99             :     src -= 2*srcStride;\
     100             :     ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_op_mmxext(dst, src, dstStride, srcStride, h);\
     101             :     src += 4;\
     102             :     dst += 4;\
     103             :     ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_op_mmxext(dst, src, dstStride, srcStride, h);\
     104             : }\
     105             : static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, int dstStride, int tmpStride, int size){\
     106             :     int w = size>>4;\
     107             :     do{\
     108             :     ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_op_mmxext(dst, tmp, dstStride, 0, size);\
     109             :     tmp += 8;\
     110             :     dst += 8;\
     111             :     }while(w--);\
     112             : }\
     113             : \
     114             : static av_always_inline void ff_ ## OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
     115             :     ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst  , src  , dstStride, srcStride, 8);\
     116             : }\
     117             : static av_always_inline void ff_ ## OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
     118             :     ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst  , src  , dstStride, srcStride, 16);\
     119             :     ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
     120             : }\
     121             : \
     122             : static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
     123             :     ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst  , src  , dstStride, srcStride);\
     124             :     ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
     125             :     src += 8*srcStride;\
     126             :     dst += 8*dstStride;\
     127             :     ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst  , src  , dstStride, srcStride);\
     128             :     ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
     129             : }\
     130             : \
     131             : static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride){\
     132             :     ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst  , src  , src2  , dstStride, src2Stride);\
     133             :     ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
     134             :     src += 8*dstStride;\
     135             :     dst += 8*dstStride;\
     136             :     src2 += 8*src2Stride;\
     137             :     ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst  , src  , src2  , dstStride, src2Stride);\
     138             :     ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
     139             : }\
     140             : \
     141             : static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\
     142             :     ff_put_h264_qpel8or16_hv1_lowpass_ ## MMX(tmp, src, tmpStride, srcStride, size);\
     143             :     ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(dst, tmp, dstStride, tmpStride, size);\
     144             : }\
     145             : static av_always_inline void ff_ ## OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride){\
     146             :     ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst  , tmp  , src  , dstStride, tmpStride, srcStride, 8);\
     147             : }\
     148             : \
     149             : static av_always_inline void ff_ ## OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride){\
     150             :     ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst  , tmp  , src  , dstStride, tmpStride, srcStride, 16);\
     151             : }\
     152             : \
     153             : static av_always_inline void ff_ ## OPNAME ## pixels16_l2_shift5_ ## MMX(uint8_t *dst, const int16_t *src16, const uint8_t *src8, int dstStride, int src8Stride, int h)\
     154             : {\
     155             :     ff_ ## OPNAME ## pixels8_l2_shift5_ ## MMX(dst  , src16  , src8  , dstStride, src8Stride, h);\
     156             :     ff_ ## OPNAME ## pixels8_l2_shift5_ ## MMX(dst+8, src16+8, src8+8, dstStride, src8Stride, h);\
     157             : }\
     158             : 
     159             : 
     160             : #if ARCH_X86_64
     161             : #define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
     162             : 
     163             : void ff_avg_h264_qpel16_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);
     164             : void ff_put_h264_qpel16_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);
     165             : 
     166             : #else // ARCH_X86_64
     167             : #define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
     168             : static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride){\
     169             :     ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst  , src  , src2  , dstStride, src2Stride);\
     170             :     ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
     171             :     src += 8*dstStride;\
     172             :     dst += 8*dstStride;\
     173             :     src2 += 8*src2Stride;\
     174             :     ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst  , src  , src2  , dstStride, src2Stride);\
     175             :     ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
     176             : }
     177             : #endif // ARCH_X86_64
     178             : 
     179             : #define QPEL_H264_H_XMM(OPNAME, OP, MMX)\
     180             : QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
     181             : static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
     182             :     ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst  , src  , dstStride, srcStride);\
     183             :     ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
     184             :     src += 8*srcStride;\
     185             :     dst += 8*dstStride;\
     186             :     ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst  , src  , dstStride, srcStride);\
     187             :     ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
     188             : }\
     189             : 
     190             : #define QPEL_H264_V_XMM(OPNAME, OP, MMX)\
     191             : static av_always_inline void ff_ ## OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
     192             :     ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst  , src  , dstStride, srcStride, 8);\
     193             : }\
     194             : static av_always_inline void ff_ ## OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
     195             :     ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst  , src  , dstStride, srcStride, 16);\
     196             :     ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
     197             : }
     198             : 
     199        9142 : static av_always_inline void put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp,
     200             :                                                                  const uint8_t *src,
     201             :                                                                  int tmpStride,
     202             :                                                                  int srcStride,
     203             :                                                                  int size)
     204             : {
     205        9142 :     int w = (size+8)>>3;
     206        9142 :     src -= 2*srcStride+2;
     207       39200 :     while(w--){
     208       20916 :         ff_put_h264_qpel8or16_hv1_lowpass_op_sse2(src, tmp, srcStride, size);
     209       20916 :         tmp += 8;
     210       20916 :         src += 8;
     211             :     }
     212        9142 : }
     213             : 
     214             : #define QPEL_H264_HV_XMM(OPNAME, OP, MMX)\
     215             : static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\
     216             :     put_h264_qpel8or16_hv1_lowpass_sse2(tmp, src, tmpStride, srcStride, size);\
     217             :     ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(dst, tmp, dstStride, tmpStride, size);\
     218             : }\
     219             : static av_always_inline void ff_ ## OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride){\
     220             :     ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 8);\
     221             : }\
     222             : static av_always_inline void ff_ ## OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride){\
     223             :     ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 16);\
     224             : }\
     225             : 
     226             : #define ff_put_h264_qpel8_h_lowpass_l2_sse2  ff_put_h264_qpel8_h_lowpass_l2_mmxext
     227             : #define ff_avg_h264_qpel8_h_lowpass_l2_sse2  ff_avg_h264_qpel8_h_lowpass_l2_mmxext
     228             : #define ff_put_h264_qpel16_h_lowpass_l2_sse2 ff_put_h264_qpel16_h_lowpass_l2_mmxext
     229             : #define ff_avg_h264_qpel16_h_lowpass_l2_sse2 ff_avg_h264_qpel16_h_lowpass_l2_mmxext
     230             : 
     231             : #define ff_put_h264_qpel8_v_lowpass_ssse3  ff_put_h264_qpel8_v_lowpass_sse2
     232             : #define ff_avg_h264_qpel8_v_lowpass_ssse3  ff_avg_h264_qpel8_v_lowpass_sse2
     233             : #define ff_put_h264_qpel16_v_lowpass_ssse3 ff_put_h264_qpel16_v_lowpass_sse2
     234             : #define ff_avg_h264_qpel16_v_lowpass_ssse3 ff_avg_h264_qpel16_v_lowpass_sse2
     235             : 
     236             : #define ff_put_h264_qpel8or16_hv2_lowpass_sse2 ff_put_h264_qpel8or16_hv2_lowpass_mmxext
     237             : #define ff_avg_h264_qpel8or16_hv2_lowpass_sse2 ff_avg_h264_qpel8or16_hv2_lowpass_mmxext
     238             : 
     239             : #define H264_MC(OPNAME, SIZE, MMX, ALIGN) \
     240             : H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\
     241             : H264_MC_V(OPNAME, SIZE, MMX, ALIGN)\
     242             : H264_MC_H(OPNAME, SIZE, MMX, ALIGN)\
     243             : H264_MC_HV(OPNAME, SIZE, MMX, ALIGN)\
     244             : 
     245       30493 : static void put_h264_qpel16_mc00_sse2 (uint8_t *dst, const uint8_t *src,
     246             :                                        ptrdiff_t stride)
     247             : {
     248       30493 :     ff_put_pixels16_sse2(dst, src, stride, 16);
     249       30493 : }
     250       13426 : static void avg_h264_qpel16_mc00_sse2 (uint8_t *dst, const uint8_t *src,
     251             :                                        ptrdiff_t stride)
     252             : {
     253       13426 :     ff_avg_pixels16_sse2(dst, src, stride, 16);
     254       13426 : }
     255             : #define put_h264_qpel8_mc00_sse2 put_h264_qpel8_mc00_mmxext
     256             : #define avg_h264_qpel8_mc00_sse2 avg_h264_qpel8_mc00_mmxext
     257             : 
     258             : #define H264_MC_C(OPNAME, SIZE, MMX, ALIGN) \
     259             : static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## MMX (uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     260             : {\
     261             :     ff_ ## OPNAME ## pixels ## SIZE ## _ ## MMX(dst, src, stride, SIZE);\
     262             : }\
     263             : 
     264             : #define H264_MC_H(OPNAME, SIZE, MMX, ALIGN) \
     265             : static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     266             : {\
     267             :     ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src, stride, stride);\
     268             : }\
     269             : \
     270             : static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     271             : {\
     272             :     ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## MMX(dst, src, stride, stride);\
     273             : }\
     274             : \
     275             : static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     276             : {\
     277             :     ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src+1, stride, stride);\
     278             : }\
     279             : 
     280             : #define H264_MC_V(OPNAME, SIZE, MMX, ALIGN) \
     281             : static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     282             : {\
     283             :     LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
     284             :     ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
     285             :     ff_ ## OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src, temp, stride, stride, SIZE);\
     286             : }\
     287             : \
     288             : static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     289             : {\
     290             :     ff_ ## OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## MMX(dst, src, stride, stride);\
     291             : }\
     292             : \
     293             : static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     294             : {\
     295             :     LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
     296             :     ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
     297             :     ff_ ## OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src+stride, temp, stride, stride, SIZE);\
     298             : }\
     299             : 
     300             : #define H264_MC_HV(OPNAME, SIZE, MMX, ALIGN) \
     301             : static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     302             : {\
     303             :     LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
     304             :     ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
     305             :     ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\
     306             : }\
     307             : \
     308             : static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     309             : {\
     310             :     LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
     311             :     ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\
     312             :     ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\
     313             : }\
     314             : \
     315             : static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     316             : {\
     317             :     LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
     318             :     ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
     319             :     ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\
     320             : }\
     321             : \
     322             : static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     323             : {\
     324             :     LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
     325             :     ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\
     326             :     ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\
     327             : }\
     328             : \
     329             : static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     330             : {\
     331             :     LOCAL_ALIGNED(ALIGN, uint16_t, temp, [SIZE*(SIZE<8?12:24)]);\
     332             :     ff_ ## OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(dst, temp, src, stride, SIZE, stride);\
     333             : }\
     334             : \
     335             : static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     336             : {\
     337             :     LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
     338             :     uint8_t * const halfHV= temp;\
     339             :     int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
     340             :     av_assert2(((int)temp & 7) == 0);\
     341             :     ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
     342             :     ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfHV, stride, SIZE);\
     343             : }\
     344             : \
     345             : static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     346             : {\
     347             :     LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
     348             :     uint8_t * const halfHV= temp;\
     349             :     int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
     350             :     av_assert2(((int)temp & 7) == 0);\
     351             :     ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
     352             :     ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfHV, stride, SIZE);\
     353             : }\
     354             : \
     355             : static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     356             : {\
     357             :     LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
     358             :     uint8_t * const halfHV= temp;\
     359             :     int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
     360             :     av_assert2(((int)temp & 7) == 0);\
     361             :     ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
     362             :     ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_mmxext(dst, halfV+2, halfHV, stride, SIZE, SIZE);\
     363             : }\
     364             : \
     365             : static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     366             : {\
     367             :     LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
     368             :     uint8_t * const halfHV= temp;\
     369             :     int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
     370             :     av_assert2(((int)temp & 7) == 0);\
     371             :     ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
     372             :     ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_mmxext(dst, halfV+3, halfHV, stride, SIZE, SIZE);\
     373             : }\
     374             : 
     375             : #define H264_MC_4816(MMX)\
     376             : H264_MC(put_, 4, MMX, 8)\
     377             : H264_MC(put_, 8, MMX, 8)\
     378             : H264_MC(put_, 16,MMX, 8)\
     379             : H264_MC(avg_, 4, MMX, 8)\
     380             : H264_MC(avg_, 8, MMX, 8)\
     381             : H264_MC(avg_, 16,MMX, 8)\
     382             : 
     383             : #define H264_MC_816(QPEL, XMM)\
     384             : QPEL(put_, 8, XMM, 16)\
     385             : QPEL(put_, 16,XMM, 16)\
     386             : QPEL(avg_, 8, XMM, 16)\
     387             : QPEL(avg_, 16,XMM, 16)\
     388             : 
     389        1617 : QPEL_H264(put_,        PUT_OP, mmxext)
     390         200 : QPEL_H264(avg_, AVG_MMXEXT_OP, mmxext)
     391       25032 : QPEL_H264_V_XMM(put_,       PUT_OP, sse2)
     392         208 : QPEL_H264_V_XMM(avg_,AVG_MMXEXT_OP, sse2)
     393          72 : QPEL_H264_HV_XMM(put_,       PUT_OP, sse2)
     394           8 : QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, sse2)
     395        2256 : QPEL_H264_H_XMM(put_,       PUT_OP, ssse3)
     396         624 : QPEL_H264_H_XMM(avg_,AVG_MMXEXT_OP, ssse3)
     397       18024 : QPEL_H264_HV_XMM(put_,       PUT_OP, ssse3)
     398         180 : QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, ssse3)
     399             : 
     400       13644 : H264_MC_4816(mmxext)
     401       10790 : H264_MC_816(H264_MC_V, sse2)
     402          72 : H264_MC_816(H264_MC_HV, sse2)
     403       21643 : H264_MC_816(H264_MC_H, ssse3)
     404       23520 : H264_MC_816(H264_MC_HV, ssse3)
     405             : 
     406             : 
     407             : //10bit
     408             : #define LUMA_MC_OP(OP, NUM, DEPTH, TYPE, OPT) \
     409             : void ff_ ## OP ## _h264_qpel ## NUM ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT \
     410             :     (uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
     411             : 
     412             : #define LUMA_MC_ALL(DEPTH, TYPE, OPT) \
     413             :     LUMA_MC_OP(put,  4, DEPTH, TYPE, OPT) \
     414             :     LUMA_MC_OP(avg,  4, DEPTH, TYPE, OPT) \
     415             :     LUMA_MC_OP(put,  8, DEPTH, TYPE, OPT) \
     416             :     LUMA_MC_OP(avg,  8, DEPTH, TYPE, OPT) \
     417             :     LUMA_MC_OP(put, 16, DEPTH, TYPE, OPT) \
     418             :     LUMA_MC_OP(avg, 16, DEPTH, TYPE, OPT)
     419             : 
     420             : #define LUMA_MC_816(DEPTH, TYPE, OPT) \
     421             :     LUMA_MC_OP(put,  8, DEPTH, TYPE, OPT) \
     422             :     LUMA_MC_OP(avg,  8, DEPTH, TYPE, OPT) \
     423             :     LUMA_MC_OP(put, 16, DEPTH, TYPE, OPT) \
     424             :     LUMA_MC_OP(avg, 16, DEPTH, TYPE, OPT)
     425             : 
     426             : LUMA_MC_ALL(10, mc00, mmxext)
     427             : LUMA_MC_ALL(10, mc10, mmxext)
     428             : LUMA_MC_ALL(10, mc20, mmxext)
     429             : LUMA_MC_ALL(10, mc30, mmxext)
     430             : LUMA_MC_ALL(10, mc01, mmxext)
     431             : LUMA_MC_ALL(10, mc11, mmxext)
     432             : LUMA_MC_ALL(10, mc21, mmxext)
     433             : LUMA_MC_ALL(10, mc31, mmxext)
     434             : LUMA_MC_ALL(10, mc02, mmxext)
     435             : LUMA_MC_ALL(10, mc12, mmxext)
     436             : LUMA_MC_ALL(10, mc22, mmxext)
     437             : LUMA_MC_ALL(10, mc32, mmxext)
     438             : LUMA_MC_ALL(10, mc03, mmxext)
     439             : LUMA_MC_ALL(10, mc13, mmxext)
     440             : LUMA_MC_ALL(10, mc23, mmxext)
     441             : LUMA_MC_ALL(10, mc33, mmxext)
     442             : 
     443             : LUMA_MC_816(10, mc00, sse2)
     444             : LUMA_MC_816(10, mc10, sse2)
     445             : LUMA_MC_816(10, mc10, sse2_cache64)
     446             : LUMA_MC_816(10, mc10, ssse3_cache64)
     447             : LUMA_MC_816(10, mc20, sse2)
     448             : LUMA_MC_816(10, mc20, sse2_cache64)
     449             : LUMA_MC_816(10, mc20, ssse3_cache64)
     450             : LUMA_MC_816(10, mc30, sse2)
     451             : LUMA_MC_816(10, mc30, sse2_cache64)
     452             : LUMA_MC_816(10, mc30, ssse3_cache64)
     453             : LUMA_MC_816(10, mc01, sse2)
     454             : LUMA_MC_816(10, mc11, sse2)
     455             : LUMA_MC_816(10, mc21, sse2)
     456             : LUMA_MC_816(10, mc31, sse2)
     457             : LUMA_MC_816(10, mc02, sse2)
     458             : LUMA_MC_816(10, mc12, sse2)
     459             : LUMA_MC_816(10, mc22, sse2)
     460             : LUMA_MC_816(10, mc32, sse2)
     461             : LUMA_MC_816(10, mc03, sse2)
     462             : LUMA_MC_816(10, mc13, sse2)
     463             : LUMA_MC_816(10, mc23, sse2)
     464             : LUMA_MC_816(10, mc33, sse2)
     465             : 
     466             : #define QPEL16_OPMC(OP, MC, MMX)\
     467             : void ff_ ## OP ## _h264_qpel16_ ## MC ## _10_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride){\
     468             :     ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst   , src   , stride);\
     469             :     ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst+16, src+16, stride);\
     470             :     src += 8*stride;\
     471             :     dst += 8*stride;\
     472             :     ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst   , src   , stride);\
     473             :     ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst+16, src+16, stride);\
     474             : }
     475             : 
     476             : #define QPEL16_OP(MC, MMX)\
     477             : QPEL16_OPMC(put, MC, MMX)\
     478             : QPEL16_OPMC(avg, MC, MMX)
     479             : 
     480             : #define QPEL16(MMX)\
     481             : QPEL16_OP(mc00, MMX)\
     482             : QPEL16_OP(mc01, MMX)\
     483             : QPEL16_OP(mc02, MMX)\
     484             : QPEL16_OP(mc03, MMX)\
     485             : QPEL16_OP(mc10, MMX)\
     486             : QPEL16_OP(mc11, MMX)\
     487             : QPEL16_OP(mc12, MMX)\
     488             : QPEL16_OP(mc13, MMX)\
     489             : QPEL16_OP(mc20, MMX)\
     490             : QPEL16_OP(mc21, MMX)\
     491             : QPEL16_OP(mc22, MMX)\
     492             : QPEL16_OP(mc23, MMX)\
     493             : QPEL16_OP(mc30, MMX)\
     494             : QPEL16_OP(mc31, MMX)\
     495             : QPEL16_OP(mc32, MMX)\
     496             : QPEL16_OP(mc33, MMX)
     497             : 
     498             : #if ARCH_X86_32 // ARCH_X86_64 implies SSE2+
     499             : QPEL16(mmxext)
     500             : #endif
     501             : 
     502             : #endif /* HAVE_X86ASM */
     503             : 
     504             : #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX)                          \
     505             :     do {                                                                     \
     506             :     c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
     507             :     c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
     508             :     c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
     509             :     c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
     510             :     c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
     511             :     c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
     512             :     c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
     513             :     c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
     514             :     c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
     515             :     c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
     516             :     c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
     517             :     c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
     518             :     c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
     519             :     c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
     520             :     c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
     521             :     c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \
     522             :     } while (0)
     523             : 
     524             : #define H264_QPEL_FUNCS(x, y, CPU)                                                            \
     525             :     do {                                                                                      \
     526             :         c->put_h264_qpel_pixels_tab[0][x + y * 4] = put_h264_qpel16_mc ## x ## y ## _ ## CPU; \
     527             :         c->put_h264_qpel_pixels_tab[1][x + y * 4] = put_h264_qpel8_mc  ## x ## y ## _ ## CPU; \
     528             :         c->avg_h264_qpel_pixels_tab[0][x + y * 4] = avg_h264_qpel16_mc ## x ## y ## _ ## CPU; \
     529             :         c->avg_h264_qpel_pixels_tab[1][x + y * 4] = avg_h264_qpel8_mc  ## x ## y ## _ ## CPU; \
     530             :     } while (0)
     531             : 
     532             : #define H264_QPEL_FUNCS_10(x, y, CPU)                                                               \
     533             :     do {                                                                                            \
     534             :         c->put_h264_qpel_pixels_tab[0][x + y * 4] = ff_put_h264_qpel16_mc ## x ## y ## _10_ ## CPU; \
     535             :         c->put_h264_qpel_pixels_tab[1][x + y * 4] = ff_put_h264_qpel8_mc  ## x ## y ## _10_ ## CPU; \
     536             :         c->avg_h264_qpel_pixels_tab[0][x + y * 4] = ff_avg_h264_qpel16_mc ## x ## y ## _10_ ## CPU; \
     537             :         c->avg_h264_qpel_pixels_tab[1][x + y * 4] = ff_avg_h264_qpel8_mc  ## x ## y ## _10_ ## CPU; \
     538             :     } while (0)
     539             : 
     540         620 : av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth)
     541             : {
     542             : #if HAVE_X86ASM
     543         620 :     int high_bit_depth = bit_depth > 8;
     544         620 :     int cpu_flags = av_get_cpu_flags();
     545             : 
     546         620 :     if (EXTERNAL_MMXEXT(cpu_flags)) {
     547          83 :         if (!high_bit_depth) {
     548          39 :             SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, );
     549          39 :             SET_QPEL_FUNCS(put_h264_qpel, 1,  8, mmxext, );
     550          39 :             SET_QPEL_FUNCS(put_h264_qpel, 2,  4, mmxext, );
     551          39 :             SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmxext, );
     552          39 :             SET_QPEL_FUNCS(avg_h264_qpel, 1,  8, mmxext, );
     553          39 :             SET_QPEL_FUNCS(avg_h264_qpel, 2,  4, mmxext, );
     554          44 :         } else if (bit_depth == 10) {
     555             : #if ARCH_X86_32
     556             :             SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_mmxext, ff_);
     557             :             SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_mmxext, ff_);
     558             :             SET_QPEL_FUNCS(put_h264_qpel, 1,  8, 10_mmxext, ff_);
     559             :             SET_QPEL_FUNCS(avg_h264_qpel, 1,  8, 10_mmxext, ff_);
     560             : #endif
     561          22 :             SET_QPEL_FUNCS(put_h264_qpel, 2, 4,  10_mmxext, ff_);
     562          22 :             SET_QPEL_FUNCS(avg_h264_qpel, 2, 4,  10_mmxext, ff_);
     563             :         }
     564             :     }
     565             : 
     566         620 :     if (EXTERNAL_SSE2(cpu_flags)) {
     567          71 :         if (!high_bit_depth) {
     568          35 :             H264_QPEL_FUNCS(0, 1, sse2);
     569          35 :             H264_QPEL_FUNCS(0, 2, sse2);
     570          35 :             H264_QPEL_FUNCS(0, 3, sse2);
     571          35 :             H264_QPEL_FUNCS(1, 1, sse2);
     572          35 :             H264_QPEL_FUNCS(1, 2, sse2);
     573          35 :             H264_QPEL_FUNCS(1, 3, sse2);
     574          35 :             H264_QPEL_FUNCS(2, 1, sse2);
     575          35 :             H264_QPEL_FUNCS(2, 2, sse2);
     576          35 :             H264_QPEL_FUNCS(2, 3, sse2);
     577          35 :             H264_QPEL_FUNCS(3, 1, sse2);
     578          35 :             H264_QPEL_FUNCS(3, 2, sse2);
     579          35 :             H264_QPEL_FUNCS(3, 3, sse2);
     580             :         }
     581             : 
     582          71 :         if (bit_depth == 10) {
     583          18 :             SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_);
     584          18 :             SET_QPEL_FUNCS(put_h264_qpel, 1,  8, 10_sse2, ff_);
     585          18 :             SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_);
     586          18 :             SET_QPEL_FUNCS(avg_h264_qpel, 1,  8, 10_sse2, ff_);
     587          18 :             H264_QPEL_FUNCS_10(1, 0, sse2_cache64);
     588          18 :             H264_QPEL_FUNCS_10(2, 0, sse2_cache64);
     589          18 :             H264_QPEL_FUNCS_10(3, 0, sse2_cache64);
     590             :         }
     591             :     }
     592             : 
     593         620 :     if (EXTERNAL_SSE2_FAST(cpu_flags)) {
     594          71 :         if (!high_bit_depth) {
     595          35 :             H264_QPEL_FUNCS(0, 0, sse2);
     596             :         }
     597             :     }
     598             : 
     599         620 :     if (EXTERNAL_SSSE3(cpu_flags)) {
     600          59 :         if (!high_bit_depth) {
     601          31 :             H264_QPEL_FUNCS(1, 0, ssse3);
     602          31 :             H264_QPEL_FUNCS(1, 1, ssse3);
     603          31 :             H264_QPEL_FUNCS(1, 2, ssse3);
     604          31 :             H264_QPEL_FUNCS(1, 3, ssse3);
     605          31 :             H264_QPEL_FUNCS(2, 0, ssse3);
     606          31 :             H264_QPEL_FUNCS(2, 1, ssse3);
     607          31 :             H264_QPEL_FUNCS(2, 2, ssse3);
     608          31 :             H264_QPEL_FUNCS(2, 3, ssse3);
     609          31 :             H264_QPEL_FUNCS(3, 0, ssse3);
     610          31 :             H264_QPEL_FUNCS(3, 1, ssse3);
     611          31 :             H264_QPEL_FUNCS(3, 2, ssse3);
     612          31 :             H264_QPEL_FUNCS(3, 3, ssse3);
     613             :         }
     614             : 
     615          59 :         if (bit_depth == 10) {
     616          14 :             H264_QPEL_FUNCS_10(1, 0, ssse3_cache64);
     617          14 :             H264_QPEL_FUNCS_10(2, 0, ssse3_cache64);
     618          14 :             H264_QPEL_FUNCS_10(3, 0, ssse3_cache64);
     619             :         }
     620             :     }
     621             : 
     622         620 :     if (EXTERNAL_AVX(cpu_flags)) {
     623             :         /* AVX implies 64 byte cache lines without the need to avoid unaligned
     624             :          * memory accesses that cross the boundary between two cache lines.
     625             :          * TODO: Port X264_CPU_CACHELINE_32/64 detection from x264 to avoid
     626             :          * having to treat SSE2 functions with such properties as AVX. */
     627          35 :         if (bit_depth == 10) {
     628           6 :             H264_QPEL_FUNCS_10(1, 0, sse2);
     629           6 :             H264_QPEL_FUNCS_10(2, 0, sse2);
     630           6 :             H264_QPEL_FUNCS_10(3, 0, sse2);
     631             :         }
     632             :     }
     633             : #endif
     634         620 : }

Generated by: LCOV version 1.13