LCOV - code coverage report
Current view: top level - src/libavcodec/x86 - h264_qpel.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 91 91 100.0 %
Date: 2017-01-22 02:20:28 Functions: 237 237 100.0 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
       3             :  * Copyright (c) 2011 Daniel Kang
       4             :  *
       5             :  * This file is part of FFmpeg.
       6             :  *
       7             :  * FFmpeg is free software; you can redistribute it and/or
       8             :  * modify it under the terms of the GNU Lesser General Public
       9             :  * License as published by the Free Software Foundation; either
      10             :  * version 2.1 of the License, or (at your option) any later version.
      11             :  *
      12             :  * FFmpeg is distributed in the hope that it will be useful,
      13             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      14             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      15             :  * Lesser General Public License for more details.
      16             :  *
      17             :  * You should have received a copy of the GNU Lesser General Public
      18             :  * License along with FFmpeg; if not, write to the Free Software
      19             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      20             :  */
      21             : 
      22             : #include "libavutil/attributes.h"
      23             : #include "libavutil/cpu.h"
      24             : #include "libavutil/x86/asm.h"
      25             : #include "libavutil/x86/cpu.h"
      26             : #include "libavcodec/h264dec.h"
      27             : #include "libavcodec/h264qpel.h"
      28             : #include "libavcodec/pixels.h"
      29             : #include "fpel.h"
      30             : 
      31             : #if HAVE_YASM
      32             : void ff_put_pixels4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
      33             :                               int dstStride, int src1Stride, int h);
      34             : void ff_avg_pixels4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
      35             :                               int dstStride, int src1Stride, int h);
      36             : void ff_put_pixels8_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
      37             :                               int dstStride, int src1Stride, int h);
      38             : void ff_avg_pixels8_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
      39             :                               int dstStride, int src1Stride, int h);
      40             : void ff_put_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
      41             :                                int dstStride, int src1Stride, int h);
      42             : void ff_avg_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
      43             :                                int dstStride, int src1Stride, int h);
      44             : #define ff_put_pixels8_l2_sse2  ff_put_pixels8_l2_mmxext
      45             : #define ff_avg_pixels8_l2_sse2  ff_avg_pixels8_l2_mmxext
      46             : #define ff_put_pixels16_l2_sse2 ff_put_pixels16_l2_mmxext
      47             : #define ff_avg_pixels16_l2_sse2 ff_avg_pixels16_l2_mmxext
      48             : #define ff_put_pixels16_mmxext  ff_put_pixels16_mmx
      49             : #define ff_put_pixels8_mmxext   ff_put_pixels8_mmx
      50             : #define ff_put_pixels4_mmxext   ff_put_pixels4_mmx
      51             : 
      52             : #define DEF_QPEL(OPNAME)\
      53             : void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\
      54             : void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\
      55             : void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_ssse3(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\
      56             : void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_l2_mmxext(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);\
      57             : void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_mmxext(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);\
      58             : void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);\
      59             : void ff_ ## OPNAME ## _h264_qpel4_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\
      60             : void ff_ ## OPNAME ## _h264_qpel8or16_v_lowpass_op_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h);\
      61             : void ff_ ## OPNAME ## _h264_qpel8or16_v_lowpass_sse2(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h);\
      62             : void ff_ ## OPNAME ## _h264_qpel4_hv_lowpass_v_mmxext(const uint8_t *src, int16_t *tmp, int srcStride);\
      63             : void ff_ ## OPNAME ## _h264_qpel4_hv_lowpass_h_mmxext(int16_t *tmp, uint8_t *dst, int dstStride);\
      64             : void ff_ ## OPNAME ## _h264_qpel8or16_hv1_lowpass_op_mmxext(const uint8_t *src, int16_t *tmp, int srcStride, int size);\
      65             : void ff_ ## OPNAME ## _h264_qpel8or16_hv1_lowpass_op_sse2(const uint8_t *src, int16_t *tmp, int srcStride, int size);\
      66             : void ff_ ## OPNAME ## _h264_qpel8or16_hv2_lowpass_op_mmxext(uint8_t *dst, int16_t *tmp, int dstStride, int unused, int h);\
      67             : void ff_ ## OPNAME ## _h264_qpel8or16_hv2_lowpass_ssse3(uint8_t *dst, int16_t *tmp, int dstStride, int tmpStride, int size);\
      68             : void ff_ ## OPNAME ## _pixels4_l2_shift5_mmxext(uint8_t *dst, const int16_t *src16, const uint8_t *src8, int dstStride, int src8Stride, int h);\
      69             : void ff_ ## OPNAME ## _pixels8_l2_shift5_mmxext(uint8_t *dst, const int16_t *src16, const uint8_t *src8, int dstStride, int src8Stride, int h);
      70             : 
      71             : DEF_QPEL(avg)
      72             : DEF_QPEL(put)
      73             : 
      74             : #define QPEL_H264(OPNAME, OP, MMX)\
      75             : static av_always_inline void ff_ ## OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride){\
      76             :     int w=3;\
      77             :     src -= 2*srcStride+2;\
      78             :     while(w--){\
      79             :         ff_ ## OPNAME ## h264_qpel4_hv_lowpass_v_mmxext(src, tmp, srcStride);\
      80             :         tmp += 4;\
      81             :         src += 4;\
      82             :     }\
      83             :     tmp -= 3*4;\
      84             :     ff_ ## OPNAME ## h264_qpel4_hv_lowpass_h_mmxext(tmp, dst, dstStride);\
      85             : }\
      86             : \
      87             : static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h){\
      88             :     src -= 2*srcStride;\
      89             :     ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_op_mmxext(dst, src, dstStride, srcStride, h);\
      90             :     src += 4;\
      91             :     dst += 4;\
      92             :     ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_op_mmxext(dst, src, dstStride, srcStride, h);\
      93             : }\
      94             : static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv1_lowpass_ ## MMX(int16_t *tmp, const uint8_t *src, int tmpStride, int srcStride, int size){\
      95             :     int w = (size+8)>>2;\
      96             :     src -= 2*srcStride+2;\
      97             :     while(w--){\
      98             :         ff_ ## OPNAME ## h264_qpel8or16_hv1_lowpass_op_mmxext(src, tmp, srcStride, size);\
      99             :         tmp += 4;\
     100             :         src += 4;\
     101             :     }\
     102             : }\
     103             : static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, int dstStride, int tmpStride, int size){\
     104             :     int w = size>>4;\
     105             :     do{\
     106             :     ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_op_mmxext(dst, tmp, dstStride, 0, size);\
     107             :     tmp += 8;\
     108             :     dst += 8;\
     109             :     }while(w--);\
     110             : }\
     111             : \
     112             : static av_always_inline void ff_ ## OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
     113             :     ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst  , src  , dstStride, srcStride, 8);\
     114             : }\
     115             : static av_always_inline void ff_ ## OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
     116             :     ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst  , src  , dstStride, srcStride, 16);\
     117             :     ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
     118             : }\
     119             : \
     120             : static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
     121             :     ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst  , src  , dstStride, srcStride);\
     122             :     ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
     123             :     src += 8*srcStride;\
     124             :     dst += 8*dstStride;\
     125             :     ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst  , src  , dstStride, srcStride);\
     126             :     ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
     127             : }\
     128             : \
     129             : static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride){\
     130             :     ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst  , src  , src2  , dstStride, src2Stride);\
     131             :     ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
     132             :     src += 8*dstStride;\
     133             :     dst += 8*dstStride;\
     134             :     src2 += 8*src2Stride;\
     135             :     ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst  , src  , src2  , dstStride, src2Stride);\
     136             :     ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
     137             : }\
     138             : \
     139             : static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\
     140             :     ff_put_h264_qpel8or16_hv1_lowpass_ ## MMX(tmp, src, tmpStride, srcStride, size);\
     141             :     ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(dst, tmp, dstStride, tmpStride, size);\
     142             : }\
     143             : static av_always_inline void ff_ ## OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride){\
     144             :     ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst  , tmp  , src  , dstStride, tmpStride, srcStride, 8);\
     145             : }\
     146             : \
     147             : static av_always_inline void ff_ ## OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride){\
     148             :     ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst  , tmp  , src  , dstStride, tmpStride, srcStride, 16);\
     149             : }\
     150             : \
     151             : static av_always_inline void ff_ ## OPNAME ## pixels16_l2_shift5_ ## MMX(uint8_t *dst, const int16_t *src16, const uint8_t *src8, int dstStride, int src8Stride, int h)\
     152             : {\
     153             :     ff_ ## OPNAME ## pixels8_l2_shift5_ ## MMX(dst  , src16  , src8  , dstStride, src8Stride, h);\
     154             :     ff_ ## OPNAME ## pixels8_l2_shift5_ ## MMX(dst+8, src16+8, src8+8, dstStride, src8Stride, h);\
     155             : }\
     156             : 
     157             : 
     158             : #if ARCH_X86_64
     159             : #define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
     160             : 
     161             : void ff_avg_h264_qpel16_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);
     162             : void ff_put_h264_qpel16_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);
     163             : 
     164             : #else // ARCH_X86_64
     165             : #define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
     166             : static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride){\
     167             :     ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst  , src  , src2  , dstStride, src2Stride);\
     168             :     ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
     169             :     src += 8*dstStride;\
     170             :     dst += 8*dstStride;\
     171             :     src2 += 8*src2Stride;\
     172             :     ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst  , src  , src2  , dstStride, src2Stride);\
     173             :     ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
     174             : }
     175             : #endif // ARCH_X86_64
     176             : 
     177             : #define QPEL_H264_H_XMM(OPNAME, OP, MMX)\
     178             : QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
     179             : static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
     180             :     ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst  , src  , dstStride, srcStride);\
     181             :     ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
     182             :     src += 8*srcStride;\
     183             :     dst += 8*dstStride;\
     184             :     ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst  , src  , dstStride, srcStride);\
     185             :     ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
     186             : }\
     187             : 
     188             : #define QPEL_H264_V_XMM(OPNAME, OP, MMX)\
     189             : static av_always_inline void ff_ ## OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
     190             :     ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst  , src  , dstStride, srcStride, 8);\
     191             : }\
     192             : static av_always_inline void ff_ ## OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
     193             :     ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst  , src  , dstStride, srcStride, 16);\
     194             :     ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
     195             : }
     196             : 
     197        8904 : static av_always_inline void put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp,
     198             :                                                                  const uint8_t *src,
     199             :                                                                  int tmpStride,
     200             :                                                                  int srcStride,
     201             :                                                                  int size)
     202             : {
     203        8904 :     int w = (size+8)>>3;
     204        8904 :     src -= 2*srcStride+2;
     205       38214 :     while(w--){
     206       20406 :         ff_put_h264_qpel8or16_hv1_lowpass_op_sse2(src, tmp, srcStride, size);
     207       20406 :         tmp += 8;
     208       20406 :         src += 8;
     209             :     }
     210        8904 : }
     211             : 
     212             : #define QPEL_H264_HV_XMM(OPNAME, OP, MMX)\
     213             : static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\
     214             :     put_h264_qpel8or16_hv1_lowpass_sse2(tmp, src, tmpStride, srcStride, size);\
     215             :     ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(dst, tmp, dstStride, tmpStride, size);\
     216             : }\
     217             : static av_always_inline void ff_ ## OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride){\
     218             :     ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 8);\
     219             : }\
     220             : static av_always_inline void ff_ ## OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride){\
     221             :     ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 16);\
     222             : }\
     223             : 
     224             : #define ff_put_h264_qpel8_h_lowpass_l2_sse2  ff_put_h264_qpel8_h_lowpass_l2_mmxext
     225             : #define ff_avg_h264_qpel8_h_lowpass_l2_sse2  ff_avg_h264_qpel8_h_lowpass_l2_mmxext
     226             : #define ff_put_h264_qpel16_h_lowpass_l2_sse2 ff_put_h264_qpel16_h_lowpass_l2_mmxext
     227             : #define ff_avg_h264_qpel16_h_lowpass_l2_sse2 ff_avg_h264_qpel16_h_lowpass_l2_mmxext
     228             : 
     229             : #define ff_put_h264_qpel8_v_lowpass_ssse3  ff_put_h264_qpel8_v_lowpass_sse2
     230             : #define ff_avg_h264_qpel8_v_lowpass_ssse3  ff_avg_h264_qpel8_v_lowpass_sse2
     231             : #define ff_put_h264_qpel16_v_lowpass_ssse3 ff_put_h264_qpel16_v_lowpass_sse2
     232             : #define ff_avg_h264_qpel16_v_lowpass_ssse3 ff_avg_h264_qpel16_v_lowpass_sse2
     233             : 
     234             : #define ff_put_h264_qpel8or16_hv2_lowpass_sse2 ff_put_h264_qpel8or16_hv2_lowpass_mmxext
     235             : #define ff_avg_h264_qpel8or16_hv2_lowpass_sse2 ff_avg_h264_qpel8or16_hv2_lowpass_mmxext
     236             : 
     237             : #define H264_MC(OPNAME, SIZE, MMX, ALIGN) \
     238             : H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\
     239             : H264_MC_V(OPNAME, SIZE, MMX, ALIGN)\
     240             : H264_MC_H(OPNAME, SIZE, MMX, ALIGN)\
     241             : H264_MC_HV(OPNAME, SIZE, MMX, ALIGN)\
     242             : 
     243       30475 : static void put_h264_qpel16_mc00_sse2 (uint8_t *dst, const uint8_t *src,
     244             :                                        ptrdiff_t stride)
     245             : {
     246       30475 :     ff_put_pixels16_sse2(dst, src, stride, 16);
     247       30475 : }
     248       13426 : static void avg_h264_qpel16_mc00_sse2 (uint8_t *dst, const uint8_t *src,
     249             :                                        ptrdiff_t stride)
     250             : {
     251       13426 :     ff_avg_pixels16_sse2(dst, src, stride, 16);
     252       13426 : }
     253             : #define put_h264_qpel8_mc00_sse2 put_h264_qpel8_mc00_mmxext
     254             : #define avg_h264_qpel8_mc00_sse2 avg_h264_qpel8_mc00_mmxext
     255             : 
     256             : #define H264_MC_C(OPNAME, SIZE, MMX, ALIGN) \
     257             : static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## MMX (uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     258             : {\
     259             :     ff_ ## OPNAME ## pixels ## SIZE ## _ ## MMX(dst, src, stride, SIZE);\
     260             : }\
     261             : 
     262             : #define H264_MC_H(OPNAME, SIZE, MMX, ALIGN) \
     263             : static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     264             : {\
     265             :     ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src, stride, stride);\
     266             : }\
     267             : \
     268             : static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     269             : {\
     270             :     ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## MMX(dst, src, stride, stride);\
     271             : }\
     272             : \
     273             : static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     274             : {\
     275             :     ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src+1, stride, stride);\
     276             : }\
     277             : 
     278             : #define H264_MC_V(OPNAME, SIZE, MMX, ALIGN) \
     279             : static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     280             : {\
     281             :     LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
     282             :     ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
     283             :     ff_ ## OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src, temp, stride, stride, SIZE);\
     284             : }\
     285             : \
     286             : static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     287             : {\
     288             :     ff_ ## OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## MMX(dst, src, stride, stride);\
     289             : }\
     290             : \
     291             : static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     292             : {\
     293             :     LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
     294             :     ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
     295             :     ff_ ## OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src+stride, temp, stride, stride, SIZE);\
     296             : }\
     297             : 
     298             : #define H264_MC_HV(OPNAME, SIZE, MMX, ALIGN) \
     299             : static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     300             : {\
     301             :     LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
     302             :     ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
     303             :     ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\
     304             : }\
     305             : \
     306             : static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     307             : {\
     308             :     LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
     309             :     ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\
     310             :     ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\
     311             : }\
     312             : \
     313             : static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     314             : {\
     315             :     LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
     316             :     ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
     317             :     ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\
     318             : }\
     319             : \
     320             : static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     321             : {\
     322             :     LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
     323             :     ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\
     324             :     ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\
     325             : }\
     326             : \
     327             : static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     328             : {\
     329             :     LOCAL_ALIGNED(ALIGN, uint16_t, temp, [SIZE*(SIZE<8?12:24)]);\
     330             :     ff_ ## OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(dst, temp, src, stride, SIZE, stride);\
     331             : }\
     332             : \
     333             : static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     334             : {\
     335             :     LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
     336             :     uint8_t * const halfHV= temp;\
     337             :     int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
     338             :     av_assert2(((int)temp & 7) == 0);\
     339             :     ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
     340             :     ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfHV, stride, SIZE);\
     341             : }\
     342             : \
     343             : static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     344             : {\
     345             :     LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
     346             :     uint8_t * const halfHV= temp;\
     347             :     int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
     348             :     av_assert2(((int)temp & 7) == 0);\
     349             :     ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
     350             :     ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfHV, stride, SIZE);\
     351             : }\
     352             : \
     353             : static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     354             : {\
     355             :     LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
     356             :     uint8_t * const halfHV= temp;\
     357             :     int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
     358             :     av_assert2(((int)temp & 7) == 0);\
     359             :     ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
     360             :     ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_mmxext(dst, halfV+2, halfHV, stride, SIZE, SIZE);\
     361             : }\
     362             : \
     363             : static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     364             : {\
     365             :     LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
     366             :     uint8_t * const halfHV= temp;\
     367             :     int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
     368             :     av_assert2(((int)temp & 7) == 0);\
     369             :     ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
     370             :     ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_mmxext(dst, halfV+3, halfHV, stride, SIZE, SIZE);\
     371             : }\
     372             : 
     373             : #define H264_MC_4816(MMX)\
     374             : H264_MC(put_, 4, MMX, 8)\
     375             : H264_MC(put_, 8, MMX, 8)\
     376             : H264_MC(put_, 16,MMX, 8)\
     377             : H264_MC(avg_, 4, MMX, 8)\
     378             : H264_MC(avg_, 8, MMX, 8)\
     379             : H264_MC(avg_, 16,MMX, 8)\
     380             : 
     381             : #define H264_MC_816(QPEL, XMM)\
     382             : QPEL(put_, 8, XMM, 16)\
     383             : QPEL(put_, 16,XMM, 16)\
     384             : QPEL(avg_, 8, XMM, 16)\
     385             : QPEL(avg_, 16,XMM, 16)\
     386             : 
     387        1627 : QPEL_H264(put_,        PUT_OP, mmxext)
     388         200 : QPEL_H264(avg_, AVG_MMXEXT_OP, mmxext)
     389       24626 : QPEL_H264_V_XMM(put_,       PUT_OP, sse2)
     390         208 : QPEL_H264_V_XMM(avg_,AVG_MMXEXT_OP, sse2)
     391          72 : QPEL_H264_HV_XMM(put_,       PUT_OP, sse2)
     392           8 : QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, sse2)
     393        2256 : QPEL_H264_H_XMM(put_,       PUT_OP, ssse3)
     394         624 : QPEL_H264_H_XMM(avg_,AVG_MMXEXT_OP, ssse3)
     395       17548 : QPEL_H264_HV_XMM(put_,       PUT_OP, ssse3)
     396         180 : QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, ssse3)
     397             : 
     398       13618 : H264_MC_4816(mmxext)
     399       10572 : H264_MC_816(H264_MC_V, sse2)
     400          72 : H264_MC_816(H264_MC_HV, sse2)
     401       21601 : H264_MC_816(H264_MC_H, ssse3)
     402       23094 : H264_MC_816(H264_MC_HV, ssse3)
     403             : 
     404             : 
     405             : //10bit
     406             : #define LUMA_MC_OP(OP, NUM, DEPTH, TYPE, OPT) \
     407             : void ff_ ## OP ## _h264_qpel ## NUM ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT \
     408             :     (uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
     409             : 
     410             : #define LUMA_MC_ALL(DEPTH, TYPE, OPT) \
     411             :     LUMA_MC_OP(put,  4, DEPTH, TYPE, OPT) \
     412             :     LUMA_MC_OP(avg,  4, DEPTH, TYPE, OPT) \
     413             :     LUMA_MC_OP(put,  8, DEPTH, TYPE, OPT) \
     414             :     LUMA_MC_OP(avg,  8, DEPTH, TYPE, OPT) \
     415             :     LUMA_MC_OP(put, 16, DEPTH, TYPE, OPT) \
     416             :     LUMA_MC_OP(avg, 16, DEPTH, TYPE, OPT)
     417             : 
     418             : #define LUMA_MC_816(DEPTH, TYPE, OPT) \
     419             :     LUMA_MC_OP(put,  8, DEPTH, TYPE, OPT) \
     420             :     LUMA_MC_OP(avg,  8, DEPTH, TYPE, OPT) \
     421             :     LUMA_MC_OP(put, 16, DEPTH, TYPE, OPT) \
     422             :     LUMA_MC_OP(avg, 16, DEPTH, TYPE, OPT)
     423             : 
     424             : LUMA_MC_ALL(10, mc00, mmxext)
     425             : LUMA_MC_ALL(10, mc10, mmxext)
     426             : LUMA_MC_ALL(10, mc20, mmxext)
     427             : LUMA_MC_ALL(10, mc30, mmxext)
     428             : LUMA_MC_ALL(10, mc01, mmxext)
     429             : LUMA_MC_ALL(10, mc11, mmxext)
     430             : LUMA_MC_ALL(10, mc21, mmxext)
     431             : LUMA_MC_ALL(10, mc31, mmxext)
     432             : LUMA_MC_ALL(10, mc02, mmxext)
     433             : LUMA_MC_ALL(10, mc12, mmxext)
     434             : LUMA_MC_ALL(10, mc22, mmxext)
     435             : LUMA_MC_ALL(10, mc32, mmxext)
     436             : LUMA_MC_ALL(10, mc03, mmxext)
     437             : LUMA_MC_ALL(10, mc13, mmxext)
     438             : LUMA_MC_ALL(10, mc23, mmxext)
     439             : LUMA_MC_ALL(10, mc33, mmxext)
     440             : 
     441             : LUMA_MC_816(10, mc00, sse2)
     442             : LUMA_MC_816(10, mc10, sse2)
     443             : LUMA_MC_816(10, mc10, sse2_cache64)
     444             : LUMA_MC_816(10, mc10, ssse3_cache64)
     445             : LUMA_MC_816(10, mc20, sse2)
     446             : LUMA_MC_816(10, mc20, sse2_cache64)
     447             : LUMA_MC_816(10, mc20, ssse3_cache64)
     448             : LUMA_MC_816(10, mc30, sse2)
     449             : LUMA_MC_816(10, mc30, sse2_cache64)
     450             : LUMA_MC_816(10, mc30, ssse3_cache64)
     451             : LUMA_MC_816(10, mc01, sse2)
     452             : LUMA_MC_816(10, mc11, sse2)
     453             : LUMA_MC_816(10, mc21, sse2)
     454             : LUMA_MC_816(10, mc31, sse2)
     455             : LUMA_MC_816(10, mc02, sse2)
     456             : LUMA_MC_816(10, mc12, sse2)
     457             : LUMA_MC_816(10, mc22, sse2)
     458             : LUMA_MC_816(10, mc32, sse2)
     459             : LUMA_MC_816(10, mc03, sse2)
     460             : LUMA_MC_816(10, mc13, sse2)
     461             : LUMA_MC_816(10, mc23, sse2)
     462             : LUMA_MC_816(10, mc33, sse2)
     463             : 
     464             : #define QPEL16_OPMC(OP, MC, MMX)\
     465             : void ff_ ## OP ## _h264_qpel16_ ## MC ## _10_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride){\
     466             :     ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst   , src   , stride);\
     467             :     ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst+16, src+16, stride);\
     468             :     src += 8*stride;\
     469             :     dst += 8*stride;\
     470             :     ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst   , src   , stride);\
     471             :     ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst+16, src+16, stride);\
     472             : }
     473             : 
     474             : #define QPEL16_OP(MC, MMX)\
     475             : QPEL16_OPMC(put, MC, MMX)\
     476             : QPEL16_OPMC(avg, MC, MMX)
     477             : 
     478             : #define QPEL16(MMX)\
     479             : QPEL16_OP(mc00, MMX)\
     480             : QPEL16_OP(mc01, MMX)\
     481             : QPEL16_OP(mc02, MMX)\
     482             : QPEL16_OP(mc03, MMX)\
     483             : QPEL16_OP(mc10, MMX)\
     484             : QPEL16_OP(mc11, MMX)\
     485             : QPEL16_OP(mc12, MMX)\
     486             : QPEL16_OP(mc13, MMX)\
     487             : QPEL16_OP(mc20, MMX)\
     488             : QPEL16_OP(mc21, MMX)\
     489             : QPEL16_OP(mc22, MMX)\
     490             : QPEL16_OP(mc23, MMX)\
     491             : QPEL16_OP(mc30, MMX)\
     492             : QPEL16_OP(mc31, MMX)\
     493             : QPEL16_OP(mc32, MMX)\
     494             : QPEL16_OP(mc33, MMX)
     495             : 
     496             : #if ARCH_X86_32 // ARCH_X86_64 implies SSE2+
     497             : QPEL16(mmxext)
     498             : #endif
     499             : 
     500             : #endif /* HAVE_YASM */
     501             : 
     502             : #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX)                          \
     503             :     do {                                                                     \
     504             :     c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
     505             :     c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
     506             :     c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
     507             :     c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
     508             :     c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
     509             :     c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
     510             :     c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
     511             :     c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
     512             :     c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
     513             :     c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
     514             :     c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
     515             :     c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
     516             :     c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
     517             :     c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
     518             :     c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
     519             :     c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \
     520             :     } while (0)
     521             : 
     522             : #define H264_QPEL_FUNCS(x, y, CPU)                                                            \
     523             :     do {                                                                                      \
     524             :         c->put_h264_qpel_pixels_tab[0][x + y * 4] = put_h264_qpel16_mc ## x ## y ## _ ## CPU; \
     525             :         c->put_h264_qpel_pixels_tab[1][x + y * 4] = put_h264_qpel8_mc  ## x ## y ## _ ## CPU; \
     526             :         c->avg_h264_qpel_pixels_tab[0][x + y * 4] = avg_h264_qpel16_mc ## x ## y ## _ ## CPU; \
     527             :         c->avg_h264_qpel_pixels_tab[1][x + y * 4] = avg_h264_qpel8_mc  ## x ## y ## _ ## CPU; \
     528             :     } while (0)
     529             : 
     530             : #define H264_QPEL_FUNCS_10(x, y, CPU)                                                               \
     531             :     do {                                                                                            \
     532             :         c->put_h264_qpel_pixels_tab[0][x + y * 4] = ff_put_h264_qpel16_mc ## x ## y ## _10_ ## CPU; \
     533             :         c->put_h264_qpel_pixels_tab[1][x + y * 4] = ff_put_h264_qpel8_mc  ## x ## y ## _10_ ## CPU; \
     534             :         c->avg_h264_qpel_pixels_tab[0][x + y * 4] = ff_avg_h264_qpel16_mc ## x ## y ## _10_ ## CPU; \
     535             :         c->avg_h264_qpel_pixels_tab[1][x + y * 4] = ff_avg_h264_qpel8_mc  ## x ## y ## _10_ ## CPU; \
     536             :     } while (0)
     537             : 
     538         583 : av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth)
     539             : {
     540             : #if HAVE_YASM
     541         583 :     int high_bit_depth = bit_depth > 8;
     542         583 :     int cpu_flags = av_get_cpu_flags();
     543             : 
     544         583 :     if (EXTERNAL_MMXEXT(cpu_flags)) {
     545          70 :         if (!high_bit_depth) {
     546          34 :             SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, );
     547          34 :             SET_QPEL_FUNCS(put_h264_qpel, 1,  8, mmxext, );
     548          34 :             SET_QPEL_FUNCS(put_h264_qpel, 2,  4, mmxext, );
     549          34 :             SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmxext, );
     550          34 :             SET_QPEL_FUNCS(avg_h264_qpel, 1,  8, mmxext, );
     551          34 :             SET_QPEL_FUNCS(avg_h264_qpel, 2,  4, mmxext, );
     552          36 :         } else if (bit_depth == 10) {
     553             : #if ARCH_X86_32
     554             :             SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_mmxext, ff_);
     555             :             SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_mmxext, ff_);
     556             :             SET_QPEL_FUNCS(put_h264_qpel, 1,  8, 10_mmxext, ff_);
     557             :             SET_QPEL_FUNCS(avg_h264_qpel, 1,  8, 10_mmxext, ff_);
     558             : #endif
     559          18 :             SET_QPEL_FUNCS(put_h264_qpel, 2, 4,  10_mmxext, ff_);
     560          18 :             SET_QPEL_FUNCS(avg_h264_qpel, 2, 4,  10_mmxext, ff_);
     561             :         }
     562             :     }
     563             : 
     564         583 :     if (EXTERNAL_SSE2(cpu_flags)) {
     565          58 :         if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW) && !high_bit_depth) {
     566             :             // these functions are slower than mmx on AMD, but faster on Intel
     567          30 :             H264_QPEL_FUNCS(0, 0, sse2);
     568             :         }
     569             : 
     570          58 :         if (!high_bit_depth) {
     571          30 :             H264_QPEL_FUNCS(0, 1, sse2);
     572          30 :             H264_QPEL_FUNCS(0, 2, sse2);
     573          30 :             H264_QPEL_FUNCS(0, 3, sse2);
     574          30 :             H264_QPEL_FUNCS(1, 1, sse2);
     575          30 :             H264_QPEL_FUNCS(1, 2, sse2);
     576          30 :             H264_QPEL_FUNCS(1, 3, sse2);
     577          30 :             H264_QPEL_FUNCS(2, 1, sse2);
     578          30 :             H264_QPEL_FUNCS(2, 2, sse2);
     579          30 :             H264_QPEL_FUNCS(2, 3, sse2);
     580          30 :             H264_QPEL_FUNCS(3, 1, sse2);
     581          30 :             H264_QPEL_FUNCS(3, 2, sse2);
     582          30 :             H264_QPEL_FUNCS(3, 3, sse2);
     583             :         }
     584             : 
     585          58 :         if (bit_depth == 10) {
     586          14 :             SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_);
     587          14 :             SET_QPEL_FUNCS(put_h264_qpel, 1,  8, 10_sse2, ff_);
     588          14 :             SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_);
     589          14 :             SET_QPEL_FUNCS(avg_h264_qpel, 1,  8, 10_sse2, ff_);
     590          14 :             H264_QPEL_FUNCS_10(1, 0, sse2_cache64);
     591          14 :             H264_QPEL_FUNCS_10(2, 0, sse2_cache64);
     592          14 :             H264_QPEL_FUNCS_10(3, 0, sse2_cache64);
     593             :         }
     594             :     }
     595             : 
     596         583 :     if (EXTERNAL_SSSE3(cpu_flags)) {
     597          46 :         if (!high_bit_depth) {
     598          26 :             H264_QPEL_FUNCS(1, 0, ssse3);
     599          26 :             H264_QPEL_FUNCS(1, 1, ssse3);
     600          26 :             H264_QPEL_FUNCS(1, 2, ssse3);
     601          26 :             H264_QPEL_FUNCS(1, 3, ssse3);
     602          26 :             H264_QPEL_FUNCS(2, 0, ssse3);
     603          26 :             H264_QPEL_FUNCS(2, 1, ssse3);
     604          26 :             H264_QPEL_FUNCS(2, 2, ssse3);
     605          26 :             H264_QPEL_FUNCS(2, 3, ssse3);
     606          26 :             H264_QPEL_FUNCS(3, 0, ssse3);
     607          26 :             H264_QPEL_FUNCS(3, 1, ssse3);
     608          26 :             H264_QPEL_FUNCS(3, 2, ssse3);
     609          26 :             H264_QPEL_FUNCS(3, 3, ssse3);
     610             :         }
     611             : 
     612          46 :         if (bit_depth == 10) {
     613          10 :             H264_QPEL_FUNCS_10(1, 0, ssse3_cache64);
     614          10 :             H264_QPEL_FUNCS_10(2, 0, ssse3_cache64);
     615          10 :             H264_QPEL_FUNCS_10(3, 0, ssse3_cache64);
     616             :         }
     617             :     }
     618             : 
     619         583 :     if (EXTERNAL_AVX(cpu_flags)) {
     620             :         /* AVX implies 64 byte cache lines without the need to avoid unaligned
     621             :          * memory accesses that cross the boundary between two cache lines.
     622             :          * TODO: Port X264_CPU_CACHELINE_32/64 detection from x264 to avoid
     623             :          * having to treat SSE2 functions with such properties as AVX. */
     624          22 :         if (bit_depth == 10) {
     625           2 :             H264_QPEL_FUNCS_10(1, 0, sse2);
     626           2 :             H264_QPEL_FUNCS_10(2, 0, sse2);
     627           2 :             H264_QPEL_FUNCS_10(3, 0, sse2);
     628             :         }
     629             :     }
     630             : #endif
     631         583 : }

Generated by: LCOV version 1.12