LCOV - code coverage report
Current view: top level - src/libavcodec/x86 - vp8dsp_init.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 90 90 100.0 %
Date: 2017-01-22 02:20:28 Functions: 34 34 100.0 %

          Line data    Source code
       1             : /*
       2             :  * VP8 DSP functions x86-optimized
       3             :  * Copyright (c) 2010 Ronald S. Bultje <rsbultje@gmail.com>
       4             :  * Copyright (c) 2010 Fiona Glaser <fiona@x264.com>
       5             :  *
       6             :  * This file is part of FFmpeg.
       7             :  *
       8             :  * FFmpeg is free software; you can redistribute it and/or
       9             :  * modify it under the terms of the GNU Lesser General Public
      10             :  * License as published by the Free Software Foundation; either
      11             :  * version 2.1 of the License, or (at your option) any later version.
      12             :  *
      13             :  * FFmpeg is distributed in the hope that it will be useful,
      14             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      15             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      16             :  * Lesser General Public License for more details.
      17             :  *
      18             :  * You should have received a copy of the GNU Lesser General Public
      19             :  * License along with FFmpeg; if not, write to the Free Software
      20             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      21             :  */
      22             : 
      23             : #include "libavutil/attributes.h"
      24             : #include "libavutil/cpu.h"
      25             : #include "libavutil/mem.h"
      26             : #include "libavutil/x86/cpu.h"
      27             : #include "libavcodec/vp8dsp.h"
      28             : 
      29             : #if HAVE_YASM
      30             : 
      31             : /*
      32             :  * MC functions
      33             :  */
      34             : void ff_put_vp8_epel4_h4_mmxext(uint8_t *dst, ptrdiff_t dststride,
      35             :                                 uint8_t *src, ptrdiff_t srcstride,
      36             :                                 int height, int mx, int my);
      37             : void ff_put_vp8_epel4_h6_mmxext(uint8_t *dst, ptrdiff_t dststride,
      38             :                                 uint8_t *src, ptrdiff_t srcstride,
      39             :                                 int height, int mx, int my);
      40             : void ff_put_vp8_epel4_v4_mmxext(uint8_t *dst, ptrdiff_t dststride,
      41             :                                 uint8_t *src, ptrdiff_t srcstride,
      42             :                                 int height, int mx, int my);
      43             : void ff_put_vp8_epel4_v6_mmxext(uint8_t *dst, ptrdiff_t dststride,
      44             :                                 uint8_t *src, ptrdiff_t srcstride,
      45             :                                 int height, int mx, int my);
      46             : 
      47             : void ff_put_vp8_epel8_h4_sse2  (uint8_t *dst, ptrdiff_t dststride,
      48             :                                 uint8_t *src, ptrdiff_t srcstride,
      49             :                                 int height, int mx, int my);
      50             : void ff_put_vp8_epel8_h6_sse2  (uint8_t *dst, ptrdiff_t dststride,
      51             :                                 uint8_t *src, ptrdiff_t srcstride,
      52             :                                 int height, int mx, int my);
      53             : void ff_put_vp8_epel8_v4_sse2  (uint8_t *dst, ptrdiff_t dststride,
      54             :                                 uint8_t *src, ptrdiff_t srcstride,
      55             :                                 int height, int mx, int my);
      56             : void ff_put_vp8_epel8_v6_sse2  (uint8_t *dst, ptrdiff_t dststride,
      57             :                                 uint8_t *src, ptrdiff_t srcstride,
      58             :                                 int height, int mx, int my);
      59             : 
      60             : void ff_put_vp8_epel4_h4_ssse3 (uint8_t *dst, ptrdiff_t dststride,
      61             :                                 uint8_t *src, ptrdiff_t srcstride,
      62             :                                 int height, int mx, int my);
      63             : void ff_put_vp8_epel4_h6_ssse3 (uint8_t *dst, ptrdiff_t dststride,
      64             :                                 uint8_t *src, ptrdiff_t srcstride,
      65             :                                 int height, int mx, int my);
      66             : void ff_put_vp8_epel4_v4_ssse3 (uint8_t *dst, ptrdiff_t dststride,
      67             :                                 uint8_t *src, ptrdiff_t srcstride,
      68             :                                 int height, int mx, int my);
      69             : void ff_put_vp8_epel4_v6_ssse3 (uint8_t *dst, ptrdiff_t dststride,
      70             :                                 uint8_t *src, ptrdiff_t srcstride,
      71             :                                 int height, int mx, int my);
      72             : void ff_put_vp8_epel8_h4_ssse3 (uint8_t *dst, ptrdiff_t dststride,
      73             :                                 uint8_t *src, ptrdiff_t srcstride,
      74             :                                 int height, int mx, int my);
      75             : void ff_put_vp8_epel8_h6_ssse3 (uint8_t *dst, ptrdiff_t dststride,
      76             :                                 uint8_t *src, ptrdiff_t srcstride,
      77             :                                 int height, int mx, int my);
      78             : void ff_put_vp8_epel8_v4_ssse3 (uint8_t *dst, ptrdiff_t dststride,
      79             :                                 uint8_t *src, ptrdiff_t srcstride,
      80             :                                 int height, int mx, int my);
      81             : void ff_put_vp8_epel8_v6_ssse3 (uint8_t *dst, ptrdiff_t dststride,
      82             :                                 uint8_t *src, ptrdiff_t srcstride,
      83             :                                 int height, int mx, int my);
      84             : 
      85             : void ff_put_vp8_bilinear4_h_mmxext(uint8_t *dst, ptrdiff_t dststride,
      86             :                                    uint8_t *src, ptrdiff_t srcstride,
      87             :                                    int height, int mx, int my);
      88             : void ff_put_vp8_bilinear8_h_sse2  (uint8_t *dst, ptrdiff_t dststride,
      89             :                                    uint8_t *src, ptrdiff_t srcstride,
      90             :                                    int height, int mx, int my);
      91             : void ff_put_vp8_bilinear4_h_ssse3 (uint8_t *dst, ptrdiff_t dststride,
      92             :                                    uint8_t *src, ptrdiff_t srcstride,
      93             :                                    int height, int mx, int my);
      94             : void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, ptrdiff_t dststride,
      95             :                                    uint8_t *src, ptrdiff_t srcstride,
      96             :                                    int height, int mx, int my);
      97             : 
      98             : void ff_put_vp8_bilinear4_v_mmxext(uint8_t *dst, ptrdiff_t dststride,
      99             :                                    uint8_t *src, ptrdiff_t srcstride,
     100             :                                    int height, int mx, int my);
     101             : void ff_put_vp8_bilinear8_v_sse2  (uint8_t *dst, ptrdiff_t dststride,
     102             :                                    uint8_t *src, ptrdiff_t srcstride,
     103             :                                    int height, int mx, int my);
     104             : void ff_put_vp8_bilinear4_v_ssse3 (uint8_t *dst, ptrdiff_t dststride,
     105             :                                    uint8_t *src, ptrdiff_t srcstride,
     106             :                                    int height, int mx, int my);
     107             : void ff_put_vp8_bilinear8_v_ssse3 (uint8_t *dst, ptrdiff_t dststride,
     108             :                                    uint8_t *src, ptrdiff_t srcstride,
     109             :                                    int height, int mx, int my);
     110             : 
     111             : 
     112             : void ff_put_vp8_pixels8_mmx (uint8_t *dst, ptrdiff_t dststride,
     113             :                              uint8_t *src, ptrdiff_t srcstride,
     114             :                              int height, int mx, int my);
     115             : void ff_put_vp8_pixels16_mmx(uint8_t *dst, ptrdiff_t dststride,
     116             :                              uint8_t *src, ptrdiff_t srcstride,
     117             :                              int height, int mx, int my);
     118             : void ff_put_vp8_pixels16_sse(uint8_t *dst, ptrdiff_t dststride,
     119             :                              uint8_t *src, ptrdiff_t srcstride,
     120             :                              int height, int mx, int my);
     121             : 
     122             : #define TAP_W16(OPT, FILTERTYPE, TAPTYPE) \
     123             : static void ff_put_vp8_ ## FILTERTYPE ## 16_ ## TAPTYPE ## _ ## OPT( \
     124             :     uint8_t *dst,  ptrdiff_t dststride, uint8_t *src, \
     125             :     ptrdiff_t srcstride, int height, int mx, int my) \
     126             : { \
     127             :     ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
     128             :         dst,     dststride, src,     srcstride, height, mx, my); \
     129             :     ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
     130             :         dst + 8, dststride, src + 8, srcstride, height, mx, my); \
     131             : }
     132             : #define TAP_W8(OPT, FILTERTYPE, TAPTYPE) \
     133             : static void ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
     134             :     uint8_t *dst,  ptrdiff_t dststride, uint8_t *src, \
     135             :     ptrdiff_t srcstride, int height, int mx, int my) \
     136             : { \
     137             :     ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \
     138             :         dst,     dststride, src,     srcstride, height, mx, my); \
     139             :     ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \
     140             :         dst + 4, dststride, src + 4, srcstride, height, mx, my); \
     141             : }
     142             : 
     143             : #if ARCH_X86_32
     144             : TAP_W8 (mmxext, epel, h4)
     145             : TAP_W8 (mmxext, epel, h6)
     146             : TAP_W16(mmxext, epel, h6)
     147             : TAP_W8 (mmxext, epel, v4)
     148             : TAP_W8 (mmxext, epel, v6)
     149             : TAP_W16(mmxext, epel, v6)
     150             : TAP_W8 (mmxext, bilinear, h)
     151             : TAP_W16(mmxext, bilinear, h)
     152             : TAP_W8 (mmxext, bilinear, v)
     153             : TAP_W16(mmxext, bilinear, v)
     154             : #endif
     155             : 
     156           4 : TAP_W16(sse2,  epel, h6)
     157           4 : TAP_W16(sse2,  epel, v6)
     158           4 : TAP_W16(sse2,  bilinear, h)
     159           4 : TAP_W16(sse2,  bilinear, v)
     160             : 
     161           2 : TAP_W16(ssse3, epel, h6)
     162           2 : TAP_W16(ssse3, epel, v6)
     163           2 : TAP_W16(ssse3, bilinear, h)
     164           2 : TAP_W16(ssse3, bilinear, v)
     165             : 
     166             : #define HVTAP(OPT, ALIGN, TAPNUMX, TAPNUMY, SIZE, MAXHEIGHT) \
     167             : static void ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## v ## TAPNUMY ## _ ## OPT( \
     168             :     uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \
     169             :     ptrdiff_t srcstride, int height, int mx, int my) \
     170             : { \
     171             :     LOCAL_ALIGNED(ALIGN, uint8_t, tmp, [SIZE * (MAXHEIGHT + TAPNUMY - 1)]); \
     172             :     uint8_t *tmpptr = tmp + SIZE * (TAPNUMY / 2 - 1); \
     173             :     src -= srcstride * (TAPNUMY / 2 - 1); \
     174             :     ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## _ ## OPT( \
     175             :         tmp, SIZE,      src,    srcstride, height + TAPNUMY - 1, mx, my); \
     176             :     ff_put_vp8_epel ## SIZE ## _v ## TAPNUMY ## _ ## OPT( \
     177             :         dst, dststride, tmpptr, SIZE,      height,               mx, my); \
     178             : }
     179             : 
     180             : #if ARCH_X86_32
     181             : #define HVTAPMMX(x, y) \
     182             : HVTAP(mmxext, 8, x, y,  4,  8) \
     183             : HVTAP(mmxext, 8, x, y,  8, 16)
     184             : 
     185             : HVTAP(mmxext, 8, 6, 6, 16, 16)
     186             : #else
     187             : #define HVTAPMMX(x, y) \
     188             : HVTAP(mmxext, 8, x, y,  4,  8)
     189             : #endif
     190             : 
     191           2 : HVTAPMMX(4, 4)
     192           2 : HVTAPMMX(4, 6)
     193           2 : HVTAPMMX(6, 4)
     194           2 : HVTAPMMX(6, 6)
     195             : 
     196             : #define HVTAPSSE2(x, y, w) \
     197             : HVTAP(sse2,  16, x, y, w, 16) \
     198             : HVTAP(ssse3, 16, x, y, w, 16)
     199             : 
     200           3 : HVTAPSSE2(4, 4, 8)
     201           3 : HVTAPSSE2(4, 6, 8)
     202           3 : HVTAPSSE2(6, 4, 8)
     203           3 : HVTAPSSE2(6, 6, 8)
     204           3 : HVTAPSSE2(6, 6, 16)
     205             : 
     206           1 : HVTAP(ssse3, 16, 4, 4, 4, 8)
     207           1 : HVTAP(ssse3, 16, 4, 6, 4, 8)
     208           1 : HVTAP(ssse3, 16, 6, 4, 4, 8)
     209           1 : HVTAP(ssse3, 16, 6, 6, 4, 8)
     210             : 
     211             : #define HVBILIN(OPT, ALIGN, SIZE, MAXHEIGHT) \
     212             : static void ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT( \
     213             :     uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \
     214             :     ptrdiff_t srcstride, int height, int mx, int my) \
     215             : { \
     216             :     LOCAL_ALIGNED(ALIGN, uint8_t, tmp, [SIZE * (MAXHEIGHT + 2)]); \
     217             :     ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT( \
     218             :         tmp, SIZE,      src, srcstride, height + 1, mx, my); \
     219             :     ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT( \
     220             :         dst, dststride, tmp, SIZE,      height,     mx, my); \
     221             : }
     222             : 
     223           2 : HVBILIN(mmxext,  8,  4,  8)
     224             : #if ARCH_X86_32
     225             : HVBILIN(mmxext,  8,  8, 16)
     226             : HVBILIN(mmxext,  8, 16, 16)
     227             : #endif
     228           2 : HVBILIN(sse2,  8,  8, 16)
     229           2 : HVBILIN(sse2,  8, 16, 16)
     230           1 : HVBILIN(ssse3, 8,  4,  8)
     231           1 : HVBILIN(ssse3, 8,  8, 16)
     232           1 : HVBILIN(ssse3, 8, 16, 16)
     233             : 
     234             : void ff_vp8_idct_dc_add_mmx(uint8_t *dst, int16_t block[16],
     235             :                             ptrdiff_t stride);
     236             : void ff_vp8_idct_dc_add_sse4(uint8_t *dst, int16_t block[16],
     237             :                              ptrdiff_t stride);
     238             : void ff_vp8_idct_dc_add4y_mmx(uint8_t *dst, int16_t block[4][16],
     239             :                                ptrdiff_t stride);
     240             : void ff_vp8_idct_dc_add4y_sse2(uint8_t *dst, int16_t block[4][16],
     241             :                                ptrdiff_t stride);
     242             : void ff_vp8_idct_dc_add4uv_mmx(uint8_t *dst, int16_t block[2][16],
     243             :                                ptrdiff_t stride);
     244             : void ff_vp8_luma_dc_wht_mmx(int16_t block[4][4][16], int16_t dc[16]);
     245             : void ff_vp8_luma_dc_wht_sse(int16_t block[4][4][16], int16_t dc[16]);
     246             : void ff_vp8_idct_add_mmx(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
     247             : void ff_vp8_idct_add_sse(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
     248             : 
     249             : #define DECLARE_LOOP_FILTER(NAME)                                       \
     250             : void ff_vp8_v_loop_filter_simple_ ## NAME(uint8_t *dst,                 \
     251             :                                           ptrdiff_t stride,             \
     252             :                                           int flim);                    \
     253             : void ff_vp8_h_loop_filter_simple_ ## NAME(uint8_t *dst,                 \
     254             :                                           ptrdiff_t stride,             \
     255             :                                           int flim);                    \
     256             : void ff_vp8_v_loop_filter16y_inner_ ## NAME (uint8_t *dst,              \
     257             :                                              ptrdiff_t stride,          \
     258             :                                              int e, int i, int hvt);    \
     259             : void ff_vp8_h_loop_filter16y_inner_ ## NAME (uint8_t *dst,              \
     260             :                                              ptrdiff_t stride,          \
     261             :                                              int e, int i, int hvt);    \
     262             : void ff_vp8_v_loop_filter8uv_inner_ ## NAME (uint8_t *dstU,             \
     263             :                                              uint8_t *dstV,             \
     264             :                                              ptrdiff_t s,               \
     265             :                                              int e, int i, int hvt);    \
     266             : void ff_vp8_h_loop_filter8uv_inner_ ## NAME (uint8_t *dstU,             \
     267             :                                              uint8_t *dstV,             \
     268             :                                              ptrdiff_t s,               \
     269             :                                              int e, int i, int hvt);    \
     270             : void ff_vp8_v_loop_filter16y_mbedge_ ## NAME(uint8_t *dst,              \
     271             :                                              ptrdiff_t stride,          \
     272             :                                              int e, int i, int hvt);    \
     273             : void ff_vp8_h_loop_filter16y_mbedge_ ## NAME(uint8_t *dst,              \
     274             :                                              ptrdiff_t stride,          \
     275             :                                              int e, int i, int hvt);    \
     276             : void ff_vp8_v_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU,             \
     277             :                                              uint8_t *dstV,             \
     278             :                                              ptrdiff_t s,               \
     279             :                                              int e, int i, int hvt);    \
     280             : void ff_vp8_h_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU,             \
     281             :                                              uint8_t *dstV,             \
     282             :                                              ptrdiff_t s,               \
     283             :                                              int e, int i, int hvt);
     284             : 
     285             : DECLARE_LOOP_FILTER(mmx)
     286             : DECLARE_LOOP_FILTER(mmxext)
     287             : DECLARE_LOOP_FILTER(sse2)
     288             : DECLARE_LOOP_FILTER(ssse3)
     289             : DECLARE_LOOP_FILTER(sse4)
     290             : 
     291             : #endif /* HAVE_YASM */
     292             : 
     293             : #define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \
     294             :     c->put_vp8_epel_pixels_tab[IDX][0][2] = ff_put_vp8_epel ## SIZE ## _h6_ ## OPT; \
     295             :     c->put_vp8_epel_pixels_tab[IDX][2][0] = ff_put_vp8_epel ## SIZE ## _v6_ ## OPT; \
     296             :     c->put_vp8_epel_pixels_tab[IDX][2][2] = ff_put_vp8_epel ## SIZE ## _h6v6_ ## OPT
     297             : 
     298             : #define VP8_MC_FUNC(IDX, SIZE, OPT) \
     299             :     c->put_vp8_epel_pixels_tab[IDX][0][1] = ff_put_vp8_epel ## SIZE ## _h4_ ## OPT; \
     300             :     c->put_vp8_epel_pixels_tab[IDX][1][0] = ff_put_vp8_epel ## SIZE ## _v4_ ## OPT; \
     301             :     c->put_vp8_epel_pixels_tab[IDX][1][1] = ff_put_vp8_epel ## SIZE ## _h4v4_ ## OPT; \
     302             :     c->put_vp8_epel_pixels_tab[IDX][1][2] = ff_put_vp8_epel ## SIZE ## _h6v4_ ## OPT; \
     303             :     c->put_vp8_epel_pixels_tab[IDX][2][1] = ff_put_vp8_epel ## SIZE ## _h4v6_ ## OPT; \
     304             :     VP8_LUMA_MC_FUNC(IDX, SIZE, OPT)
     305             : 
     306             : #define VP8_BILINEAR_MC_FUNC(IDX, SIZE, OPT) \
     307             :     c->put_vp8_bilinear_pixels_tab[IDX][0][1] = ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT; \
     308             :     c->put_vp8_bilinear_pixels_tab[IDX][0][2] = ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT; \
     309             :     c->put_vp8_bilinear_pixels_tab[IDX][1][0] = ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT; \
     310             :     c->put_vp8_bilinear_pixels_tab[IDX][1][1] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
     311             :     c->put_vp8_bilinear_pixels_tab[IDX][1][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
     312             :     c->put_vp8_bilinear_pixels_tab[IDX][2][0] = ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT; \
     313             :     c->put_vp8_bilinear_pixels_tab[IDX][2][1] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
     314             :     c->put_vp8_bilinear_pixels_tab[IDX][2][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT
     315             : 
     316             : 
     317          71 : av_cold void ff_vp78dsp_init_x86(VP8DSPContext *c)
     318             : {
     319             : #if HAVE_YASM
     320          71 :     int cpu_flags = av_get_cpu_flags();
     321             : 
     322          71 :     if (EXTERNAL_MMX(cpu_flags)) {
     323             : #if ARCH_X86_32
     324             :         c->put_vp8_epel_pixels_tab[0][0][0]     =
     325             :         c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_mmx;
     326             : #endif
     327          18 :         c->put_vp8_epel_pixels_tab[1][0][0]     =
     328          18 :         c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmx;
     329             :     }
     330             : 
     331             :     /* note that 4-tap width=16 functions are missing because w=16
     332             :      * is only used for luma, and luma is always a copy or sixtap. */
     333          71 :     if (EXTERNAL_MMXEXT(cpu_flags)) {
     334          17 :         VP8_MC_FUNC(2, 4, mmxext);
     335          17 :         VP8_BILINEAR_MC_FUNC(2, 4, mmxext);
     336             : #if ARCH_X86_32
     337             :         VP8_LUMA_MC_FUNC(0, 16, mmxext);
     338             :         VP8_MC_FUNC(1, 8, mmxext);
     339             :         VP8_BILINEAR_MC_FUNC(0, 16, mmxext);
     340             :         VP8_BILINEAR_MC_FUNC(1,  8, mmxext);
     341             : #endif
     342             :     }
     343             : 
     344          71 :     if (EXTERNAL_SSE(cpu_flags)) {
     345          16 :         c->put_vp8_epel_pixels_tab[0][0][0]     =
     346          16 :         c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
     347             :     }
     348             : 
     349          71 :     if (HAVE_SSE2_EXTERNAL && cpu_flags & (AV_CPU_FLAG_SSE2 | AV_CPU_FLAG_SSE2SLOW)) {
     350          15 :         VP8_LUMA_MC_FUNC(0, 16, sse2);
     351          15 :         VP8_MC_FUNC(1, 8, sse2);
     352          15 :         VP8_BILINEAR_MC_FUNC(0, 16, sse2);
     353          15 :         VP8_BILINEAR_MC_FUNC(1, 8, sse2);
     354             :     }
     355             : 
     356          71 :     if (EXTERNAL_SSSE3(cpu_flags)) {
     357          13 :         VP8_LUMA_MC_FUNC(0, 16, ssse3);
     358          13 :         VP8_MC_FUNC(1, 8, ssse3);
     359          13 :         VP8_MC_FUNC(2, 4, ssse3);
     360          13 :         VP8_BILINEAR_MC_FUNC(0, 16, ssse3);
     361          13 :         VP8_BILINEAR_MC_FUNC(1, 8, ssse3);
     362          13 :         VP8_BILINEAR_MC_FUNC(2, 4, ssse3);
     363             :     }
     364             : #endif /* HAVE_YASM */
     365          71 : }
     366             : 
     367         123 : av_cold void ff_vp8dsp_init_x86(VP8DSPContext *c)
     368             : {
     369             : #if HAVE_YASM
     370         123 :     int cpu_flags = av_get_cpu_flags();
     371             : 
     372         123 :     if (EXTERNAL_MMX(cpu_flags)) {
     373          68 :         c->vp8_idct_dc_add    = ff_vp8_idct_dc_add_mmx;
     374          68 :         c->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_mmx;
     375             : #if ARCH_X86_32
     376             :         c->vp8_idct_dc_add4y  = ff_vp8_idct_dc_add4y_mmx;
     377             :         c->vp8_idct_add       = ff_vp8_idct_add_mmx;
     378             :         c->vp8_luma_dc_wht    = ff_vp8_luma_dc_wht_mmx;
     379             : 
     380             :         c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx;
     381             :         c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx;
     382             : 
     383             :         c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx;
     384             :         c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx;
     385             :         c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx;
     386             :         c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx;
     387             : 
     388             :         c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_mmx;
     389             :         c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_mmx;
     390             :         c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_mmx;
     391             :         c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_mmx;
     392             : #endif
     393             :     }
     394             : 
     395             :     /* note that 4-tap width=16 functions are missing because w=16
     396             :      * is only used for luma, and luma is always a copy or sixtap. */
     397         123 :     if (EXTERNAL_MMXEXT(cpu_flags)) {
     398             : #if ARCH_X86_32
     399             :         c->vp8_v_loop_filter_simple   = ff_vp8_v_loop_filter_simple_mmxext;
     400             :         c->vp8_h_loop_filter_simple   = ff_vp8_h_loop_filter_simple_mmxext;
     401             : 
     402             :         c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext;
     403             :         c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext;
     404             :         c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmxext;
     405             :         c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmxext;
     406             : 
     407             :         c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_mmxext;
     408             :         c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_mmxext;
     409             :         c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_mmxext;
     410             :         c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_mmxext;
     411             : #endif
     412             :     }
     413             : 
     414         123 :     if (EXTERNAL_SSE(cpu_flags)) {
     415          56 :         c->vp8_idct_add                         = ff_vp8_idct_add_sse;
     416          56 :         c->vp8_luma_dc_wht                      = ff_vp8_luma_dc_wht_sse;
     417             :     }
     418             : 
     419         123 :     if (HAVE_SSE2_EXTERNAL && cpu_flags & (AV_CPU_FLAG_SSE2 | AV_CPU_FLAG_SSE2SLOW)) {
     420          50 :         c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2;
     421             : 
     422          50 :         c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2;
     423          50 :         c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2;
     424             : 
     425          50 :         c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_sse2;
     426          50 :         c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_sse2;
     427             :     }
     428             : 
     429         123 :     if (EXTERNAL_SSE2(cpu_flags)) {
     430          50 :         c->vp8_idct_dc_add4y          = ff_vp8_idct_dc_add4y_sse2;
     431             : 
     432          50 :         c->vp8_h_loop_filter_simple   = ff_vp8_h_loop_filter_simple_sse2;
     433             : 
     434          50 :         c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2;
     435          50 :         c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2;
     436             : 
     437          50 :         c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_sse2;
     438          50 :         c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_sse2;
     439             :     }
     440             : 
     441         123 :     if (EXTERNAL_SSSE3(cpu_flags)) {
     442          38 :         c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_ssse3;
     443          38 :         c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_ssse3;
     444             : 
     445          38 :         c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_ssse3;
     446          38 :         c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_ssse3;
     447          38 :         c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_ssse3;
     448          38 :         c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_ssse3;
     449             : 
     450          38 :         c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_ssse3;
     451          38 :         c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_ssse3;
     452          38 :         c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_ssse3;
     453          38 :         c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_ssse3;
     454             :     }
     455             : 
     456         123 :     if (EXTERNAL_SSE4(cpu_flags)) {
     457          32 :         c->vp8_idct_dc_add            = ff_vp8_idct_dc_add_sse4;
     458             : 
     459          32 :         c->vp8_h_loop_filter_simple   = ff_vp8_h_loop_filter_simple_sse4;
     460          32 :         c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_sse4;
     461          32 :         c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_sse4;
     462             :     }
     463             : #endif /* HAVE_YASM */
     464         123 : }

Generated by: LCOV version 1.12