LCOV - code coverage report
Current view: top level - libavcodec/x86 - vp8dsp_init.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 90 90 100.0 %
Date: 2017-10-18 21:45:51 Functions: 34 34 100.0 %

          Line data    Source code
       1             : /*
       2             :  * VP8 DSP functions x86-optimized
       3             :  * Copyright (c) 2010 Ronald S. Bultje <rsbultje@gmail.com>
       4             :  * Copyright (c) 2010 Fiona Glaser <fiona@x264.com>
       5             :  *
       6             :  * This file is part of FFmpeg.
       7             :  *
       8             :  * FFmpeg is free software; you can redistribute it and/or
       9             :  * modify it under the terms of the GNU Lesser General Public
      10             :  * License as published by the Free Software Foundation; either
      11             :  * version 2.1 of the License, or (at your option) any later version.
      12             :  *
      13             :  * FFmpeg is distributed in the hope that it will be useful,
      14             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      15             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      16             :  * Lesser General Public License for more details.
      17             :  *
      18             :  * You should have received a copy of the GNU Lesser General Public
      19             :  * License along with FFmpeg; if not, write to the Free Software
      20             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      21             :  */
      22             : 
      23             : #include "libavutil/attributes.h"
      24             : #include "libavutil/cpu.h"
      25             : #include "libavutil/mem.h"
      26             : #include "libavutil/x86/cpu.h"
      27             : #include "libavcodec/vp8dsp.h"
      28             : 
      29             : #if HAVE_X86ASM
      30             : 
      31             : /*
      32             :  * MC functions
      33             :  */
      34             : void ff_put_vp8_epel4_h4_mmxext(uint8_t *dst, ptrdiff_t dststride,
      35             :                                 uint8_t *src, ptrdiff_t srcstride,
      36             :                                 int height, int mx, int my);
      37             : void ff_put_vp8_epel4_h6_mmxext(uint8_t *dst, ptrdiff_t dststride,
      38             :                                 uint8_t *src, ptrdiff_t srcstride,
      39             :                                 int height, int mx, int my);
      40             : void ff_put_vp8_epel4_v4_mmxext(uint8_t *dst, ptrdiff_t dststride,
      41             :                                 uint8_t *src, ptrdiff_t srcstride,
      42             :                                 int height, int mx, int my);
      43             : void ff_put_vp8_epel4_v6_mmxext(uint8_t *dst, ptrdiff_t dststride,
      44             :                                 uint8_t *src, ptrdiff_t srcstride,
      45             :                                 int height, int mx, int my);
      46             : 
      47             : void ff_put_vp8_epel8_h4_sse2  (uint8_t *dst, ptrdiff_t dststride,
      48             :                                 uint8_t *src, ptrdiff_t srcstride,
      49             :                                 int height, int mx, int my);
      50             : void ff_put_vp8_epel8_h6_sse2  (uint8_t *dst, ptrdiff_t dststride,
      51             :                                 uint8_t *src, ptrdiff_t srcstride,
      52             :                                 int height, int mx, int my);
      53             : void ff_put_vp8_epel8_v4_sse2  (uint8_t *dst, ptrdiff_t dststride,
      54             :                                 uint8_t *src, ptrdiff_t srcstride,
      55             :                                 int height, int mx, int my);
      56             : void ff_put_vp8_epel8_v6_sse2  (uint8_t *dst, ptrdiff_t dststride,
      57             :                                 uint8_t *src, ptrdiff_t srcstride,
      58             :                                 int height, int mx, int my);
      59             : 
      60             : void ff_put_vp8_epel4_h4_ssse3 (uint8_t *dst, ptrdiff_t dststride,
      61             :                                 uint8_t *src, ptrdiff_t srcstride,
      62             :                                 int height, int mx, int my);
      63             : void ff_put_vp8_epel4_h6_ssse3 (uint8_t *dst, ptrdiff_t dststride,
      64             :                                 uint8_t *src, ptrdiff_t srcstride,
      65             :                                 int height, int mx, int my);
      66             : void ff_put_vp8_epel4_v4_ssse3 (uint8_t *dst, ptrdiff_t dststride,
      67             :                                 uint8_t *src, ptrdiff_t srcstride,
      68             :                                 int height, int mx, int my);
      69             : void ff_put_vp8_epel4_v6_ssse3 (uint8_t *dst, ptrdiff_t dststride,
      70             :                                 uint8_t *src, ptrdiff_t srcstride,
      71             :                                 int height, int mx, int my);
      72             : void ff_put_vp8_epel8_h4_ssse3 (uint8_t *dst, ptrdiff_t dststride,
      73             :                                 uint8_t *src, ptrdiff_t srcstride,
      74             :                                 int height, int mx, int my);
      75             : void ff_put_vp8_epel8_h6_ssse3 (uint8_t *dst, ptrdiff_t dststride,
      76             :                                 uint8_t *src, ptrdiff_t srcstride,
      77             :                                 int height, int mx, int my);
      78             : void ff_put_vp8_epel8_v4_ssse3 (uint8_t *dst, ptrdiff_t dststride,
      79             :                                 uint8_t *src, ptrdiff_t srcstride,
      80             :                                 int height, int mx, int my);
      81             : void ff_put_vp8_epel8_v6_ssse3 (uint8_t *dst, ptrdiff_t dststride,
      82             :                                 uint8_t *src, ptrdiff_t srcstride,
      83             :                                 int height, int mx, int my);
      84             : 
      85             : void ff_put_vp8_bilinear4_h_mmxext(uint8_t *dst, ptrdiff_t dststride,
      86             :                                    uint8_t *src, ptrdiff_t srcstride,
      87             :                                    int height, int mx, int my);
      88             : void ff_put_vp8_bilinear8_h_sse2  (uint8_t *dst, ptrdiff_t dststride,
      89             :                                    uint8_t *src, ptrdiff_t srcstride,
      90             :                                    int height, int mx, int my);
      91             : void ff_put_vp8_bilinear4_h_ssse3 (uint8_t *dst, ptrdiff_t dststride,
      92             :                                    uint8_t *src, ptrdiff_t srcstride,
      93             :                                    int height, int mx, int my);
      94             : void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, ptrdiff_t dststride,
      95             :                                    uint8_t *src, ptrdiff_t srcstride,
      96             :                                    int height, int mx, int my);
      97             : 
      98             : void ff_put_vp8_bilinear4_v_mmxext(uint8_t *dst, ptrdiff_t dststride,
      99             :                                    uint8_t *src, ptrdiff_t srcstride,
     100             :                                    int height, int mx, int my);
     101             : void ff_put_vp8_bilinear8_v_sse2  (uint8_t *dst, ptrdiff_t dststride,
     102             :                                    uint8_t *src, ptrdiff_t srcstride,
     103             :                                    int height, int mx, int my);
     104             : void ff_put_vp8_bilinear4_v_ssse3 (uint8_t *dst, ptrdiff_t dststride,
     105             :                                    uint8_t *src, ptrdiff_t srcstride,
     106             :                                    int height, int mx, int my);
     107             : void ff_put_vp8_bilinear8_v_ssse3 (uint8_t *dst, ptrdiff_t dststride,
     108             :                                    uint8_t *src, ptrdiff_t srcstride,
     109             :                                    int height, int mx, int my);
     110             : 
     111             : 
     112             : void ff_put_vp8_pixels8_mmx (uint8_t *dst, ptrdiff_t dststride,
     113             :                              uint8_t *src, ptrdiff_t srcstride,
     114             :                              int height, int mx, int my);
     115             : void ff_put_vp8_pixels16_mmx(uint8_t *dst, ptrdiff_t dststride,
     116             :                              uint8_t *src, ptrdiff_t srcstride,
     117             :                              int height, int mx, int my);
     118             : void ff_put_vp8_pixels16_sse(uint8_t *dst, ptrdiff_t dststride,
     119             :                              uint8_t *src, ptrdiff_t srcstride,
     120             :                              int height, int mx, int my);
     121             : 
     122             : #define TAP_W16(OPT, FILTERTYPE, TAPTYPE) \
     123             : static void ff_put_vp8_ ## FILTERTYPE ## 16_ ## TAPTYPE ## _ ## OPT( \
     124             :     uint8_t *dst,  ptrdiff_t dststride, uint8_t *src, \
     125             :     ptrdiff_t srcstride, int height, int mx, int my) \
     126             : { \
     127             :     ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
     128             :         dst,     dststride, src,     srcstride, height, mx, my); \
     129             :     ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
     130             :         dst + 8, dststride, src + 8, srcstride, height, mx, my); \
     131             : }
     132             : #define TAP_W8(OPT, FILTERTYPE, TAPTYPE) \
     133             : static void ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
     134             :     uint8_t *dst,  ptrdiff_t dststride, uint8_t *src, \
     135             :     ptrdiff_t srcstride, int height, int mx, int my) \
     136             : { \
     137             :     ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \
     138             :         dst,     dststride, src,     srcstride, height, mx, my); \
     139             :     ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \
     140             :         dst + 4, dststride, src + 4, srcstride, height, mx, my); \
     141             : }
     142             : 
     143             : #if ARCH_X86_32
     144             : TAP_W8 (mmxext, epel, h4)
     145             : TAP_W8 (mmxext, epel, h6)
     146             : TAP_W16(mmxext, epel, h6)
     147             : TAP_W8 (mmxext, epel, v4)
     148             : TAP_W8 (mmxext, epel, v6)
     149             : TAP_W16(mmxext, epel, v6)
     150             : TAP_W8 (mmxext, bilinear, h)
     151             : TAP_W16(mmxext, bilinear, h)
     152             : TAP_W8 (mmxext, bilinear, v)
     153             : TAP_W16(mmxext, bilinear, v)
     154             : #endif
     155             : 
     156           4 : TAP_W16(sse2,  epel, h6)
     157           4 : TAP_W16(sse2,  epel, v6)
     158           4 : TAP_W16(sse2,  bilinear, h)
     159           4 : TAP_W16(sse2,  bilinear, v)
     160             : 
     161           2 : TAP_W16(ssse3, epel, h6)
     162           2 : TAP_W16(ssse3, epel, v6)
     163           2 : TAP_W16(ssse3, bilinear, h)
     164           2 : TAP_W16(ssse3, bilinear, v)
     165             : 
     166             : #define HVTAP(OPT, ALIGN, TAPNUMX, TAPNUMY, SIZE, MAXHEIGHT) \
     167             : static void ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## v ## TAPNUMY ## _ ## OPT( \
     168             :     uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \
     169             :     ptrdiff_t srcstride, int height, int mx, int my) \
     170             : { \
     171             :     LOCAL_ALIGNED(ALIGN, uint8_t, tmp, [SIZE * (MAXHEIGHT + TAPNUMY - 1)]); \
     172             :     uint8_t *tmpptr = tmp + SIZE * (TAPNUMY / 2 - 1); \
     173             :     src -= srcstride * (TAPNUMY / 2 - 1); \
     174             :     ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## _ ## OPT( \
     175             :         tmp, SIZE,      src,    srcstride, height + TAPNUMY - 1, mx, my); \
     176             :     ff_put_vp8_epel ## SIZE ## _v ## TAPNUMY ## _ ## OPT( \
     177             :         dst, dststride, tmpptr, SIZE,      height,               mx, my); \
     178             : }
     179             : 
     180             : #if ARCH_X86_32
     181             : #define HVTAPMMX(x, y) \
     182             : HVTAP(mmxext, 8, x, y,  4,  8) \
     183             : HVTAP(mmxext, 8, x, y,  8, 16)
     184             : 
     185             : HVTAP(mmxext, 8, 6, 6, 16, 16)
     186             : #else
     187             : #define HVTAPMMX(x, y) \
     188             : HVTAP(mmxext, 8, x, y,  4,  8)
     189             : #endif
     190             : 
     191           2 : HVTAPMMX(4, 4)
     192           2 : HVTAPMMX(4, 6)
     193           2 : HVTAPMMX(6, 4)
     194           2 : HVTAPMMX(6, 6)
     195             : 
     196             : #define HVTAPSSE2(x, y, w) \
     197             : HVTAP(sse2,  16, x, y, w, 16) \
     198             : HVTAP(ssse3, 16, x, y, w, 16)
     199             : 
     200           3 : HVTAPSSE2(4, 4, 8)
     201           3 : HVTAPSSE2(4, 6, 8)
     202           3 : HVTAPSSE2(6, 4, 8)
     203           3 : HVTAPSSE2(6, 6, 8)
     204           3 : HVTAPSSE2(6, 6, 16)
     205             : 
     206           1 : HVTAP(ssse3, 16, 4, 4, 4, 8)
     207           1 : HVTAP(ssse3, 16, 4, 6, 4, 8)
     208           1 : HVTAP(ssse3, 16, 6, 4, 4, 8)
     209           1 : HVTAP(ssse3, 16, 6, 6, 4, 8)
     210             : 
     211             : #define HVBILIN(OPT, ALIGN, SIZE, MAXHEIGHT) \
     212             : static void ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT( \
     213             :     uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \
     214             :     ptrdiff_t srcstride, int height, int mx, int my) \
     215             : { \
     216             :     LOCAL_ALIGNED(ALIGN, uint8_t, tmp, [SIZE * (MAXHEIGHT + 2)]); \
     217             :     ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT( \
     218             :         tmp, SIZE,      src, srcstride, height + 1, mx, my); \
     219             :     ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT( \
     220             :         dst, dststride, tmp, SIZE,      height,     mx, my); \
     221             : }
     222             : 
     223           2 : HVBILIN(mmxext,  8,  4,  8)
     224             : #if ARCH_X86_32
     225             : HVBILIN(mmxext,  8,  8, 16)
     226             : HVBILIN(mmxext,  8, 16, 16)
     227             : #endif
     228           2 : HVBILIN(sse2,  8,  8, 16)
     229           2 : HVBILIN(sse2,  8, 16, 16)
     230           1 : HVBILIN(ssse3, 8,  4,  8)
     231           1 : HVBILIN(ssse3, 8,  8, 16)
     232           1 : HVBILIN(ssse3, 8, 16, 16)
     233             : 
     234             : void ff_vp8_idct_dc_add_mmx(uint8_t *dst, int16_t block[16],
     235             :                             ptrdiff_t stride);
     236             : void ff_vp8_idct_dc_add_sse2(uint8_t *dst, int16_t block[16],
     237             :                              ptrdiff_t stride);
     238             : void ff_vp8_idct_dc_add_sse4(uint8_t *dst, int16_t block[16],
     239             :                              ptrdiff_t stride);
     240             : void ff_vp8_idct_dc_add4y_mmx(uint8_t *dst, int16_t block[4][16],
     241             :                                ptrdiff_t stride);
     242             : void ff_vp8_idct_dc_add4y_sse2(uint8_t *dst, int16_t block[4][16],
     243             :                                ptrdiff_t stride);
     244             : void ff_vp8_idct_dc_add4uv_mmx(uint8_t *dst, int16_t block[2][16],
     245             :                                ptrdiff_t stride);
     246             : void ff_vp8_luma_dc_wht_mmx(int16_t block[4][4][16], int16_t dc[16]);
     247             : void ff_vp8_luma_dc_wht_sse(int16_t block[4][4][16], int16_t dc[16]);
     248             : void ff_vp8_idct_add_mmx(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
     249             : void ff_vp8_idct_add_sse(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
     250             : 
     251             : #define DECLARE_LOOP_FILTER(NAME)                                       \
     252             : void ff_vp8_v_loop_filter_simple_ ## NAME(uint8_t *dst,                 \
     253             :                                           ptrdiff_t stride,             \
     254             :                                           int flim);                    \
     255             : void ff_vp8_h_loop_filter_simple_ ## NAME(uint8_t *dst,                 \
     256             :                                           ptrdiff_t stride,             \
     257             :                                           int flim);                    \
     258             : void ff_vp8_v_loop_filter16y_inner_ ## NAME (uint8_t *dst,              \
     259             :                                              ptrdiff_t stride,          \
     260             :                                              int e, int i, int hvt);    \
     261             : void ff_vp8_h_loop_filter16y_inner_ ## NAME (uint8_t *dst,              \
     262             :                                              ptrdiff_t stride,          \
     263             :                                              int e, int i, int hvt);    \
     264             : void ff_vp8_v_loop_filter8uv_inner_ ## NAME (uint8_t *dstU,             \
     265             :                                              uint8_t *dstV,             \
     266             :                                              ptrdiff_t s,               \
     267             :                                              int e, int i, int hvt);    \
     268             : void ff_vp8_h_loop_filter8uv_inner_ ## NAME (uint8_t *dstU,             \
     269             :                                              uint8_t *dstV,             \
     270             :                                              ptrdiff_t s,               \
     271             :                                              int e, int i, int hvt);    \
     272             : void ff_vp8_v_loop_filter16y_mbedge_ ## NAME(uint8_t *dst,              \
     273             :                                              ptrdiff_t stride,          \
     274             :                                              int e, int i, int hvt);    \
     275             : void ff_vp8_h_loop_filter16y_mbedge_ ## NAME(uint8_t *dst,              \
     276             :                                              ptrdiff_t stride,          \
     277             :                                              int e, int i, int hvt);    \
     278             : void ff_vp8_v_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU,             \
     279             :                                              uint8_t *dstV,             \
     280             :                                              ptrdiff_t s,               \
     281             :                                              int e, int i, int hvt);    \
     282             : void ff_vp8_h_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU,             \
     283             :                                              uint8_t *dstV,             \
     284             :                                              ptrdiff_t s,               \
     285             :                                              int e, int i, int hvt);
     286             : 
     287             : DECLARE_LOOP_FILTER(mmx)
     288             : DECLARE_LOOP_FILTER(mmxext)
     289             : DECLARE_LOOP_FILTER(sse2)
     290             : DECLARE_LOOP_FILTER(ssse3)
     291             : DECLARE_LOOP_FILTER(sse4)
     292             : 
     293             : #endif /* HAVE_X86ASM */
     294             : 
     295             : #define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \
     296             :     c->put_vp8_epel_pixels_tab[IDX][0][2] = ff_put_vp8_epel ## SIZE ## _h6_ ## OPT; \
     297             :     c->put_vp8_epel_pixels_tab[IDX][2][0] = ff_put_vp8_epel ## SIZE ## _v6_ ## OPT; \
     298             :     c->put_vp8_epel_pixels_tab[IDX][2][2] = ff_put_vp8_epel ## SIZE ## _h6v6_ ## OPT
     299             : 
     300             : #define VP8_MC_FUNC(IDX, SIZE, OPT) \
     301             :     c->put_vp8_epel_pixels_tab[IDX][0][1] = ff_put_vp8_epel ## SIZE ## _h4_ ## OPT; \
     302             :     c->put_vp8_epel_pixels_tab[IDX][1][0] = ff_put_vp8_epel ## SIZE ## _v4_ ## OPT; \
     303             :     c->put_vp8_epel_pixels_tab[IDX][1][1] = ff_put_vp8_epel ## SIZE ## _h4v4_ ## OPT; \
     304             :     c->put_vp8_epel_pixels_tab[IDX][1][2] = ff_put_vp8_epel ## SIZE ## _h6v4_ ## OPT; \
     305             :     c->put_vp8_epel_pixels_tab[IDX][2][1] = ff_put_vp8_epel ## SIZE ## _h4v6_ ## OPT; \
     306             :     VP8_LUMA_MC_FUNC(IDX, SIZE, OPT)
     307             : 
     308             : #define VP8_BILINEAR_MC_FUNC(IDX, SIZE, OPT) \
     309             :     c->put_vp8_bilinear_pixels_tab[IDX][0][1] = ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT; \
     310             :     c->put_vp8_bilinear_pixels_tab[IDX][0][2] = ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT; \
     311             :     c->put_vp8_bilinear_pixels_tab[IDX][1][0] = ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT; \
     312             :     c->put_vp8_bilinear_pixels_tab[IDX][1][1] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
     313             :     c->put_vp8_bilinear_pixels_tab[IDX][1][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
     314             :     c->put_vp8_bilinear_pixels_tab[IDX][2][0] = ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT; \
     315             :     c->put_vp8_bilinear_pixels_tab[IDX][2][1] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
     316             :     c->put_vp8_bilinear_pixels_tab[IDX][2][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT
     317             : 
     318             : 
     319          73 : av_cold void ff_vp78dsp_init_x86(VP8DSPContext *c)
     320             : {
     321             : #if HAVE_X86ASM
     322          73 :     int cpu_flags = av_get_cpu_flags();
     323             : 
     324          73 :     if (EXTERNAL_MMX(cpu_flags)) {
     325             : #if ARCH_X86_32
     326             :         c->put_vp8_epel_pixels_tab[0][0][0]     =
     327             :         c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_mmx;
     328             : #endif
     329          20 :         c->put_vp8_epel_pixels_tab[1][0][0]     =
     330          20 :         c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmx;
     331             :     }
     332             : 
     333             :     /* note that 4-tap width=16 functions are missing because w=16
     334             :      * is only used for luma, and luma is always a copy or sixtap. */
     335          73 :     if (EXTERNAL_MMXEXT(cpu_flags)) {
     336          19 :         VP8_MC_FUNC(2, 4, mmxext);
     337          19 :         VP8_BILINEAR_MC_FUNC(2, 4, mmxext);
     338             : #if ARCH_X86_32
     339             :         VP8_LUMA_MC_FUNC(0, 16, mmxext);
     340             :         VP8_MC_FUNC(1, 8, mmxext);
     341             :         VP8_BILINEAR_MC_FUNC(0, 16, mmxext);
     342             :         VP8_BILINEAR_MC_FUNC(1,  8, mmxext);
     343             : #endif
     344             :     }
     345             : 
     346          73 :     if (EXTERNAL_SSE(cpu_flags)) {
     347          18 :         c->put_vp8_epel_pixels_tab[0][0][0]     =
     348          18 :         c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
     349             :     }
     350             : 
     351          73 :     if (EXTERNAL_SSE2(cpu_flags) || EXTERNAL_SSE2_SLOW(cpu_flags)) {
     352          17 :         VP8_LUMA_MC_FUNC(0, 16, sse2);
     353          17 :         VP8_MC_FUNC(1, 8, sse2);
     354          17 :         VP8_BILINEAR_MC_FUNC(0, 16, sse2);
     355          17 :         VP8_BILINEAR_MC_FUNC(1, 8, sse2);
     356             :     }
     357             : 
     358          73 :     if (EXTERNAL_SSSE3(cpu_flags)) {
     359          15 :         VP8_LUMA_MC_FUNC(0, 16, ssse3);
     360          15 :         VP8_MC_FUNC(1, 8, ssse3);
     361          15 :         VP8_MC_FUNC(2, 4, ssse3);
     362          15 :         VP8_BILINEAR_MC_FUNC(0, 16, ssse3);
     363          15 :         VP8_BILINEAR_MC_FUNC(1, 8, ssse3);
     364          15 :         VP8_BILINEAR_MC_FUNC(2, 4, ssse3);
     365             :     }
     366             : #endif /* HAVE_X86ASM */
     367          73 : }
     368             : 
     369         135 : av_cold void ff_vp8dsp_init_x86(VP8DSPContext *c)
     370             : {
     371             : #if HAVE_X86ASM
     372         135 :     int cpu_flags = av_get_cpu_flags();
     373             : 
     374         135 :     if (EXTERNAL_MMX(cpu_flags)) {
     375          80 :         c->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_mmx;
     376             : #if ARCH_X86_32
     377             :         c->vp8_idct_dc_add    = ff_vp8_idct_dc_add_mmx;
     378             :         c->vp8_idct_dc_add4y  = ff_vp8_idct_dc_add4y_mmx;
     379             :         c->vp8_idct_add       = ff_vp8_idct_add_mmx;
     380             :         c->vp8_luma_dc_wht    = ff_vp8_luma_dc_wht_mmx;
     381             : 
     382             :         c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx;
     383             :         c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx;
     384             : 
     385             :         c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx;
     386             :         c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx;
     387             :         c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx;
     388             :         c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx;
     389             : 
     390             :         c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_mmx;
     391             :         c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_mmx;
     392             :         c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_mmx;
     393             :         c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_mmx;
     394             : #endif
     395             :     }
     396             : 
     397             :     /* note that 4-tap width=16 functions are missing because w=16
     398             :      * is only used for luma, and luma is always a copy or sixtap. */
     399         135 :     if (EXTERNAL_MMXEXT(cpu_flags)) {
     400             : #if ARCH_X86_32
     401             :         c->vp8_v_loop_filter_simple   = ff_vp8_v_loop_filter_simple_mmxext;
     402             :         c->vp8_h_loop_filter_simple   = ff_vp8_h_loop_filter_simple_mmxext;
     403             : 
     404             :         c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext;
     405             :         c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext;
     406             :         c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmxext;
     407             :         c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmxext;
     408             : 
     409             :         c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_mmxext;
     410             :         c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_mmxext;
     411             :         c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_mmxext;
     412             :         c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_mmxext;
     413             : #endif
     414             :     }
     415             : 
     416         135 :     if (EXTERNAL_SSE(cpu_flags)) {
     417          68 :         c->vp8_idct_add                         = ff_vp8_idct_add_sse;
     418          68 :         c->vp8_luma_dc_wht                      = ff_vp8_luma_dc_wht_sse;
     419             :     }
     420             : 
     421         135 :     if (EXTERNAL_SSE2(cpu_flags) || EXTERNAL_SSE2_SLOW(cpu_flags)) {
     422          62 :         c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2;
     423             : 
     424          62 :         c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2;
     425          62 :         c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2;
     426             : 
     427          62 :         c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_sse2;
     428          62 :         c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_sse2;
     429             :     }
     430             : 
     431         135 :     if (EXTERNAL_SSE2(cpu_flags)) {
     432          62 :         c->vp8_idct_dc_add            = ff_vp8_idct_dc_add_sse2;
     433          62 :         c->vp8_idct_dc_add4y          = ff_vp8_idct_dc_add4y_sse2;
     434             : 
     435          62 :         c->vp8_h_loop_filter_simple   = ff_vp8_h_loop_filter_simple_sse2;
     436             : 
     437          62 :         c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2;
     438          62 :         c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2;
     439             : 
     440          62 :         c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_sse2;
     441          62 :         c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_sse2;
     442             :     }
     443             : 
     444         135 :     if (EXTERNAL_SSSE3(cpu_flags)) {
     445          50 :         c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_ssse3;
     446          50 :         c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_ssse3;
     447             : 
     448          50 :         c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_ssse3;
     449          50 :         c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_ssse3;
     450          50 :         c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_ssse3;
     451          50 :         c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_ssse3;
     452             : 
     453          50 :         c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_ssse3;
     454          50 :         c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_ssse3;
     455          50 :         c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_ssse3;
     456          50 :         c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_ssse3;
     457             :     }
     458             : 
     459         135 :     if (EXTERNAL_SSE4(cpu_flags)) {
     460          44 :         c->vp8_idct_dc_add            = ff_vp8_idct_dc_add_sse4;
     461             : 
     462          44 :         c->vp8_h_loop_filter_simple   = ff_vp8_h_loop_filter_simple_sse4;
     463          44 :         c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_sse4;
     464          44 :         c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_sse4;
     465             :     }
     466             : #endif /* HAVE_X86ASM */
     467         135 : }

Generated by: LCOV version 1.13