LCOV - code coverage report
Current view: top level - src/libavcodec/x86 - hpeldsp_init.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 69 101 68.3 %
Date: 2017-01-22 02:20:28 Functions: 9 30 30.0 %

          Line data    Source code
       1             : /*
       2             :  * SIMD-optimized halfpel functions
       3             :  * Copyright (c) 2000, 2001 Fabrice Bellard
       4             :  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
       5             :  *
       6             :  * This file is part of FFmpeg.
       7             :  *
       8             :  * FFmpeg is free software; you can redistribute it and/or
       9             :  * modify it under the terms of the GNU Lesser General Public
      10             :  * License as published by the Free Software Foundation; either
      11             :  * version 2.1 of the License, or (at your option) any later version.
      12             :  *
      13             :  * FFmpeg is distributed in the hope that it will be useful,
      14             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      15             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      16             :  * Lesser General Public License for more details.
      17             :  *
      18             :  * You should have received a copy of the GNU Lesser General Public
      19             :  * License along with FFmpeg; if not, write to the Free Software
      20             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      21             :  *
      22             :  * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
      23             :  */
      24             : 
      25             : #include "libavutil/attributes.h"
      26             : #include "libavutil/cpu.h"
      27             : #include "libavutil/x86/cpu.h"
      28             : #include "libavcodec/avcodec.h"
      29             : #include "libavcodec/hpeldsp.h"
      30             : #include "libavcodec/pixels.h"
      31             : #include "fpel.h"
      32             : #include "hpeldsp.h"
      33             : 
      34             : void ff_put_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
      35             :                               ptrdiff_t line_size, int h);
      36             : void ff_put_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels,
      37             :                              ptrdiff_t line_size, int h);
      38             : void ff_put_pixels16_x2_mmxext(uint8_t *block, const uint8_t *pixels,
      39             :                                ptrdiff_t line_size, int h);
      40             : void ff_put_pixels16_x2_3dnow(uint8_t *block, const uint8_t *pixels,
      41             :                               ptrdiff_t line_size, int h);
      42             : void ff_put_pixels16_x2_sse2(uint8_t *block, const uint8_t *pixels,
      43             :                              ptrdiff_t line_size, int h);
      44             : void ff_avg_pixels16_x2_sse2(uint8_t *block, const uint8_t *pixels,
      45             :                              ptrdiff_t line_size, int h);
      46             : void ff_put_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels,
      47             :                              ptrdiff_t line_size, int h);
      48             : void ff_avg_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels,
      49             :                              ptrdiff_t line_size, int h);
      50             : void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
      51             :                                      ptrdiff_t line_size, int h);
      52             : void ff_put_no_rnd_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels,
      53             :                                     ptrdiff_t line_size, int h);
      54             : void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block,
      55             :                                            const uint8_t *pixels,
      56             :                                            ptrdiff_t line_size, int h);
      57             : void ff_put_no_rnd_pixels8_x2_exact_3dnow(uint8_t *block,
      58             :                                           const uint8_t *pixels,
      59             :                                           ptrdiff_t line_size, int h);
      60             : void ff_put_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
      61             :                               ptrdiff_t line_size, int h);
      62             : void ff_put_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels,
      63             :                              ptrdiff_t line_size, int h);
      64             : void ff_put_no_rnd_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
      65             :                                      ptrdiff_t line_size, int h);
      66             : void ff_put_no_rnd_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels,
      67             :                                     ptrdiff_t line_size, int h);
      68             : void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block,
      69             :                                            const uint8_t *pixels,
      70             :                                            ptrdiff_t line_size, int h);
      71             : void ff_put_no_rnd_pixels8_y2_exact_3dnow(uint8_t *block,
      72             :                                           const uint8_t *pixels,
      73             :                                           ptrdiff_t line_size, int h);
      74             : void ff_avg_pixels8_3dnow(uint8_t *block, const uint8_t *pixels,
      75             :                           ptrdiff_t line_size, int h);
      76             : void ff_avg_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
      77             :                               ptrdiff_t line_size, int h);
      78             : void ff_avg_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels,
      79             :                              ptrdiff_t line_size, int h);
      80             : void ff_avg_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
      81             :                               ptrdiff_t line_size, int h);
      82             : void ff_avg_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels,
      83             :                              ptrdiff_t line_size, int h);
      84             : void ff_avg_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels,
      85             :                               ptrdiff_t line_size, int h);
      86             : void ff_avg_approx_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels,
      87             :                                       ptrdiff_t line_size, int h);
      88             : void ff_avg_approx_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels,
      89             :                                      ptrdiff_t line_size, int h);
      90             : 
      91             : #define avg_pixels8_mmx         ff_avg_pixels8_mmx
      92             : #define avg_pixels8_x2_mmx      ff_avg_pixels8_x2_mmx
      93             : #define avg_pixels16_mmx        ff_avg_pixels16_mmx
      94             : #define avg_pixels8_xy2_mmx     ff_avg_pixels8_xy2_mmx
      95             : #define avg_pixels16_xy2_mmx    ff_avg_pixels16_xy2_mmx
      96             : #define put_pixels8_mmx         ff_put_pixels8_mmx
      97             : #define put_pixels16_mmx        ff_put_pixels16_mmx
      98             : #define put_pixels8_xy2_mmx     ff_put_pixels8_xy2_mmx
      99             : #define put_pixels16_xy2_mmx    ff_put_pixels16_xy2_mmx
     100             : #define avg_no_rnd_pixels16_mmx ff_avg_pixels16_mmx
     101             : #define put_no_rnd_pixels8_mmx  ff_put_pixels8_mmx
     102             : #define put_no_rnd_pixels16_mmx ff_put_pixels16_mmx
     103             : 
     104             : #if HAVE_INLINE_ASM
     105             : 
     106             : /***********************************/
     107             : /* MMX no rounding */
     108             : #define DEF(x, y) x ## _no_rnd_ ## y ## _mmx
     109             : #define SET_RND  MOVQ_WONE
     110             : #define PAVGBP(a, b, c, d, e, f)        PAVGBP_MMX_NO_RND(a, b, c, d, e, f)
     111             : #define PAVGB(a, b, c, e)               PAVGB_MMX_NO_RND(a, b, c, e)
     112             : #define STATIC static
     113             : 
     114             : #include "rnd_template.c"
     115             : #include "hpeldsp_rnd_template.c"
     116             : 
     117             : #undef DEF
     118             : #undef SET_RND
     119             : #undef PAVGBP
     120             : #undef PAVGB
     121             : #undef STATIC
     122             : 
     123             : #if HAVE_MMX
     124           0 : CALL_2X_PIXELS(avg_no_rnd_pixels16_y2_mmx, avg_no_rnd_pixels8_y2_mmx, 8)
     125          74 : CALL_2X_PIXELS(put_no_rnd_pixels16_y2_mmx, put_no_rnd_pixels8_y2_mmx, 8)
     126             : 
     127           0 : CALL_2X_PIXELS(avg_no_rnd_pixels16_xy2_mmx, avg_no_rnd_pixels8_xy2_mmx, 8)
     128        7956 : CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_mmx, put_no_rnd_pixels8_xy2_mmx, 8)
     129             : #endif
     130             : 
     131             : /***********************************/
     132             : /* MMX rounding */
     133             : 
     134             : #define DEF(x, y) x ## _ ## y ## _mmx
     135             : #define SET_RND  MOVQ_WTWO
     136             : #define PAVGBP(a, b, c, d, e, f)        PAVGBP_MMX(a, b, c, d, e, f)
     137             : #define PAVGB(a, b, c, e)               PAVGB_MMX(a, b, c, e)
     138             : 
     139             : #include "hpeldsp_rnd_template.c"
     140             : 
     141             : #undef DEF
     142             : #define DEF(x, y) ff_ ## x ## _ ## y ## _mmx
     143             : #define STATIC
     144             : 
     145             : #include "rnd_template.c"
     146             : 
     147             : #undef DEF
     148             : #undef SET_RND
     149             : #undef PAVGBP
     150             : #undef PAVGB
     151             : 
     152             : #if HAVE_MMX
     153           0 : CALL_2X_PIXELS(avg_pixels16_y2_mmx, avg_pixels8_y2_mmx, 8)
     154           0 : CALL_2X_PIXELS(put_pixels16_y2_mmx, put_pixels8_y2_mmx, 8)
     155             : 
     156           0 : CALL_2X_PIXELS_EXPORT(ff_avg_pixels16_xy2_mmx, ff_avg_pixels8_xy2_mmx, 8)
     157           0 : CALL_2X_PIXELS_EXPORT(ff_put_pixels16_xy2_mmx, ff_put_pixels8_xy2_mmx, 8)
     158             : #endif
     159             : 
     160             : #endif /* HAVE_INLINE_ASM */
     161             : 
     162             : 
     163             : #if HAVE_YASM
     164             : 
     165             : #define HPELDSP_AVG_PIXELS16(CPUEXT)                      \
     166             :     CALL_2X_PIXELS(put_no_rnd_pixels16_x2 ## CPUEXT, ff_put_no_rnd_pixels8_x2 ## CPUEXT, 8) \
     167             :     CALL_2X_PIXELS(put_pixels16_y2        ## CPUEXT, ff_put_pixels8_y2        ## CPUEXT, 8) \
     168             :     CALL_2X_PIXELS(put_no_rnd_pixels16_y2 ## CPUEXT, ff_put_no_rnd_pixels8_y2 ## CPUEXT, 8) \
     169             :     CALL_2X_PIXELS(avg_pixels16           ## CPUEXT, ff_avg_pixels8           ## CPUEXT, 8) \
     170             :     CALL_2X_PIXELS(avg_pixels16_x2        ## CPUEXT, ff_avg_pixels8_x2        ## CPUEXT, 8) \
     171             :     CALL_2X_PIXELS(avg_pixels16_y2        ## CPUEXT, ff_avg_pixels8_y2        ## CPUEXT, 8) \
     172             :     CALL_2X_PIXELS(avg_pixels16_xy2       ## CPUEXT, ff_avg_pixels8_xy2       ## CPUEXT, 8) \
     173             :     CALL_2X_PIXELS(avg_approx_pixels16_xy2## CPUEXT, ff_avg_approx_pixels8_xy2## CPUEXT, 8)
     174             : 
     175           0 : HPELDSP_AVG_PIXELS16(_3dnow)
     176        7897 : HPELDSP_AVG_PIXELS16(_mmxext)
     177             : 
     178             : #endif /* HAVE_YASM */
     179             : 
     180             : #define SET_HPEL_FUNCS_EXT(PFX, IDX, SIZE, CPU)                             \
     181             :     if (HAVE_MMX_EXTERNAL)                                                  \
     182             :     c->PFX ## _pixels_tab IDX [0] = PFX ## _pixels ## SIZE ## _     ## CPU;
     183             : 
     184             : #if HAVE_MMX_INLINE
     185             : #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU)                                     \
     186             :     do {                                                                        \
     187             :         SET_HPEL_FUNCS_EXT(PFX, IDX, SIZE, CPU)                                 \
     188             :         c->PFX ## _pixels_tab IDX [1] = PFX ## _pixels ## SIZE ## _x2_  ## CPU; \
     189             :         c->PFX ## _pixels_tab IDX [2] = PFX ## _pixels ## SIZE ## _y2_  ## CPU; \
     190             :         c->PFX ## _pixels_tab IDX [3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU; \
     191             :     } while (0)
     192             : #else
     193             : #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU)                                     \
     194             :     do {                                                                        \
     195             :         SET_HPEL_FUNCS_EXT(PFX, IDX, SIZE, CPU)                                 \
     196             :     } while (0)
     197             : #endif
     198             : 
     199         140 : static void hpeldsp_init_mmx(HpelDSPContext *c, int flags, int cpu_flags)
     200             : {
     201         140 :     SET_HPEL_FUNCS(put,        [0], 16, mmx);
     202         140 :     SET_HPEL_FUNCS(put_no_rnd, [0], 16, mmx);
     203         140 :     SET_HPEL_FUNCS(avg,        [0], 16, mmx);
     204         140 :     SET_HPEL_FUNCS(avg_no_rnd,    , 16, mmx);
     205         140 :     SET_HPEL_FUNCS(put,        [1],  8, mmx);
     206         140 :     SET_HPEL_FUNCS(put_no_rnd, [1],  8, mmx);
     207             :     if (HAVE_MMX_EXTERNAL) {
     208         140 :         c->avg_pixels_tab[1][0] = ff_avg_pixels8_mmx;
     209         140 :         c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_mmx;
     210             :     }
     211             : #if HAVE_MMX_INLINE
     212         140 :     c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx;
     213         140 :     c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmx;
     214             : #endif
     215         140 : }
     216             : 
     217         140 : static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags, int cpu_flags)
     218             : {
     219             : #if HAVE_MMXEXT_EXTERNAL
     220         140 :     c->put_pixels_tab[0][1] = ff_put_pixels16_x2_mmxext;
     221         140 :     c->put_pixels_tab[0][2] = put_pixels16_y2_mmxext;
     222             : 
     223         140 :     c->avg_pixels_tab[0][0] = avg_pixels16_mmxext;
     224         140 :     c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmxext;
     225         140 :     c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmxext;
     226         140 :     c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmxext;
     227             : 
     228         140 :     c->put_pixels_tab[1][1] = ff_put_pixels8_x2_mmxext;
     229         140 :     c->put_pixels_tab[1][2] = ff_put_pixels8_y2_mmxext;
     230             : 
     231         140 :     c->avg_pixels_tab[1][0] = ff_avg_pixels8_mmxext;
     232         140 :     c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_mmxext;
     233         140 :     c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_mmxext;
     234         140 :     c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmxext;
     235             : 
     236         140 :     if (!(flags & AV_CODEC_FLAG_BITEXACT)) {
     237         137 :         c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmxext;
     238         137 :         c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmxext;
     239         137 :         c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_mmxext;
     240         137 :         c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_mmxext;
     241             : 
     242         137 :         c->avg_pixels_tab[0][3] = avg_approx_pixels16_xy2_mmxext;
     243         137 :         c->avg_pixels_tab[1][3] = ff_avg_approx_pixels8_xy2_mmxext;
     244             :     }
     245             : 
     246         140 :     if (CONFIG_VP3_DECODER && flags & AV_CODEC_FLAG_BITEXACT) {
     247           3 :         c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext;
     248           3 :         c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
     249             :     }
     250             : #endif /* HAVE_MMXEXT_EXTERNAL */
     251         140 : }
     252             : 
     253           0 : static void hpeldsp_init_3dnow(HpelDSPContext *c, int flags, int cpu_flags)
     254             : {
     255             : #if HAVE_AMD3DNOW_EXTERNAL
     256           0 :     c->put_pixels_tab[0][1] = ff_put_pixels16_x2_3dnow;
     257           0 :     c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
     258             : 
     259           0 :     c->avg_pixels_tab[0][0] = avg_pixels16_3dnow;
     260           0 :     c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow;
     261           0 :     c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow;
     262           0 :     c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
     263             : 
     264           0 :     c->put_pixels_tab[1][1] = ff_put_pixels8_x2_3dnow;
     265           0 :     c->put_pixels_tab[1][2] = ff_put_pixels8_y2_3dnow;
     266             : 
     267           0 :     c->avg_pixels_tab[1][0] = ff_avg_pixels8_3dnow;
     268           0 :     c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_3dnow;
     269           0 :     c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_3dnow;
     270           0 :     c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_3dnow;
     271             : 
     272           0 :     if (!(flags & AV_CODEC_FLAG_BITEXACT)){
     273           0 :         c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow;
     274           0 :         c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow;
     275           0 :         c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_3dnow;
     276           0 :         c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_3dnow;
     277             : 
     278           0 :         c->avg_pixels_tab[0][3] = avg_approx_pixels16_xy2_3dnow;
     279           0 :         c->avg_pixels_tab[1][3] = ff_avg_approx_pixels8_xy2_3dnow;
     280             :     }
     281             : 
     282           0 :     if (CONFIG_VP3_DECODER && flags & AV_CODEC_FLAG_BITEXACT) {
     283           0 :         c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow;
     284           0 :         c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow;
     285             :     }
     286             : #endif /* HAVE_AMD3DNOW_EXTERNAL */
     287           0 : }
     288             : 
     289         140 : static void hpeldsp_init_sse2(HpelDSPContext *c, int flags, int cpu_flags)
     290             : {
     291             : #if HAVE_SSE2_EXTERNAL
     292         140 :     if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW)) {
     293             :         // these functions are slower than mmx on AMD, but faster on Intel
     294         140 :         c->put_pixels_tab[0][0]        = ff_put_pixels16_sse2;
     295         140 :         c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_sse2;
     296         140 :         c->put_pixels_tab[0][1]        = ff_put_pixels16_x2_sse2;
     297         140 :         c->put_pixels_tab[0][2]        = ff_put_pixels16_y2_sse2;
     298         140 :         c->put_pixels_tab[0][3]        = ff_put_pixels16_xy2_sse2;
     299         140 :         c->avg_pixels_tab[0][0]        = ff_avg_pixels16_sse2;
     300         140 :         c->avg_pixels_tab[0][1]        = ff_avg_pixels16_x2_sse2;
     301         140 :         c->avg_pixels_tab[0][2]        = ff_avg_pixels16_y2_sse2;
     302         140 :         c->avg_pixels_tab[0][3]        = ff_avg_pixels16_xy2_sse2;
     303             :     }
     304             : #endif /* HAVE_SSE2_EXTERNAL */
     305         140 : }
     306             : 
     307         140 : static void hpeldsp_init_ssse3(HpelDSPContext *c, int flags, int cpu_flags)
     308             : {
     309             : #if HAVE_SSSE3_EXTERNAL
     310         140 :     c->put_pixels_tab[0][3]            = ff_put_pixels16_xy2_ssse3;
     311         140 :     c->avg_pixels_tab[0][3]            = ff_avg_pixels16_xy2_ssse3;
     312         140 :     c->put_pixels_tab[1][3]            = ff_put_pixels8_xy2_ssse3;
     313         140 :     c->avg_pixels_tab[1][3]            = ff_avg_pixels8_xy2_ssse3;
     314             : #endif
     315         140 : }
     316             : 
     317         932 : av_cold void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags)
     318             : {
     319         932 :     int cpu_flags = av_get_cpu_flags();
     320             : 
     321         932 :     if (INLINE_MMX(cpu_flags))
     322         140 :         hpeldsp_init_mmx(c, flags, cpu_flags);
     323             : 
     324         932 :     if (EXTERNAL_AMD3DNOW(cpu_flags))
     325           0 :         hpeldsp_init_3dnow(c, flags, cpu_flags);
     326             : 
     327         932 :     if (EXTERNAL_MMXEXT(cpu_flags))
     328         140 :         hpeldsp_init_mmxext(c, flags, cpu_flags);
     329             : 
     330         932 :     if (EXTERNAL_SSE2(cpu_flags))
     331         140 :         hpeldsp_init_sse2(c, flags, cpu_flags);
     332             : 
     333         932 :     if (EXTERNAL_SSSE3(cpu_flags))
     334         140 :         hpeldsp_init_ssse3(c, flags, cpu_flags);
     335         932 : }

Generated by: LCOV version 1.12