LCOV - code coverage report
Current view: top level - libavcodec/x86 - hpeldsp_init.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 66 95 69.5 %
Date: 2017-10-18 21:45:51 Functions: 9 30 30.0 %

          Line data    Source code
       1             : /*
       2             :  * SIMD-optimized halfpel functions
       3             :  * Copyright (c) 2000, 2001 Fabrice Bellard
       4             :  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
       5             :  *
       6             :  * This file is part of FFmpeg.
       7             :  *
       8             :  * FFmpeg is free software; you can redistribute it and/or
       9             :  * modify it under the terms of the GNU Lesser General Public
      10             :  * License as published by the Free Software Foundation; either
      11             :  * version 2.1 of the License, or (at your option) any later version.
      12             :  *
      13             :  * FFmpeg is distributed in the hope that it will be useful,
      14             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      15             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      16             :  * Lesser General Public License for more details.
      17             :  *
      18             :  * You should have received a copy of the GNU Lesser General Public
      19             :  * License along with FFmpeg; if not, write to the Free Software
      20             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      21             :  *
      22             :  * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
      23             :  */
      24             : 
      25             : #include "libavutil/attributes.h"
      26             : #include "libavutil/cpu.h"
      27             : #include "libavutil/x86/cpu.h"
      28             : #include "libavcodec/avcodec.h"
      29             : #include "libavcodec/hpeldsp.h"
      30             : #include "libavcodec/pixels.h"
      31             : #include "fpel.h"
      32             : #include "hpeldsp.h"
      33             : 
      34             : void ff_put_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
      35             :                               ptrdiff_t line_size, int h);
      36             : void ff_put_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels,
      37             :                              ptrdiff_t line_size, int h);
      38             : void ff_put_pixels16_x2_mmxext(uint8_t *block, const uint8_t *pixels,
      39             :                                ptrdiff_t line_size, int h);
      40             : void ff_put_pixels16_x2_3dnow(uint8_t *block, const uint8_t *pixels,
      41             :                               ptrdiff_t line_size, int h);
      42             : void ff_put_pixels16_x2_sse2(uint8_t *block, const uint8_t *pixels,
      43             :                              ptrdiff_t line_size, int h);
      44             : void ff_avg_pixels16_x2_sse2(uint8_t *block, const uint8_t *pixels,
      45             :                              ptrdiff_t line_size, int h);
      46             : void ff_put_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels,
      47             :                              ptrdiff_t line_size, int h);
      48             : void ff_avg_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels,
      49             :                              ptrdiff_t line_size, int h);
      50             : void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
      51             :                                      ptrdiff_t line_size, int h);
      52             : void ff_put_no_rnd_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels,
      53             :                                     ptrdiff_t line_size, int h);
      54             : void ff_put_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
      55             :                               ptrdiff_t line_size, int h);
      56             : void ff_put_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels,
      57             :                              ptrdiff_t line_size, int h);
      58             : void ff_put_no_rnd_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
      59             :                                      ptrdiff_t line_size, int h);
      60             : void ff_put_no_rnd_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels,
      61             :                                     ptrdiff_t line_size, int h);
      62             : void ff_avg_pixels8_3dnow(uint8_t *block, const uint8_t *pixels,
      63             :                           ptrdiff_t line_size, int h);
      64             : void ff_avg_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
      65             :                               ptrdiff_t line_size, int h);
      66             : void ff_avg_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels,
      67             :                              ptrdiff_t line_size, int h);
      68             : void ff_avg_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
      69             :                               ptrdiff_t line_size, int h);
      70             : void ff_avg_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels,
      71             :                              ptrdiff_t line_size, int h);
      72             : void ff_avg_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels,
      73             :                               ptrdiff_t line_size, int h);
      74             : void ff_avg_approx_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels,
      75             :                                       ptrdiff_t line_size, int h);
      76             : void ff_avg_approx_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels,
      77             :                                      ptrdiff_t line_size, int h);
      78             : 
      79             : #define avg_pixels8_mmx         ff_avg_pixels8_mmx
      80             : #define avg_pixels8_x2_mmx      ff_avg_pixels8_x2_mmx
      81             : #define avg_pixels16_mmx        ff_avg_pixels16_mmx
      82             : #define avg_pixels8_xy2_mmx     ff_avg_pixels8_xy2_mmx
      83             : #define avg_pixels16_xy2_mmx    ff_avg_pixels16_xy2_mmx
      84             : #define put_pixels8_mmx         ff_put_pixels8_mmx
      85             : #define put_pixels16_mmx        ff_put_pixels16_mmx
      86             : #define put_pixels8_xy2_mmx     ff_put_pixels8_xy2_mmx
      87             : #define put_pixels16_xy2_mmx    ff_put_pixels16_xy2_mmx
      88             : #define avg_no_rnd_pixels16_mmx ff_avg_pixels16_mmx
      89             : #define put_no_rnd_pixels8_mmx  ff_put_pixels8_mmx
      90             : #define put_no_rnd_pixels16_mmx ff_put_pixels16_mmx
      91             : 
      92             : #if HAVE_INLINE_ASM
      93             : 
      94             : /***********************************/
      95             : /* MMX no rounding */
      96             : #define DEF(x, y) x ## _no_rnd_ ## y ## _mmx
      97             : #define SET_RND  MOVQ_WONE
      98             : #define PAVGBP(a, b, c, d, e, f)        PAVGBP_MMX_NO_RND(a, b, c, d, e, f)
      99             : #define PAVGB(a, b, c, e)               PAVGB_MMX_NO_RND(a, b, c, e)
     100             : #define STATIC static
     101             : 
     102             : #include "rnd_template.c"
     103             : #include "hpeldsp_rnd_template.c"
     104             : 
     105             : #undef DEF
     106             : #undef SET_RND
     107             : #undef PAVGBP
     108             : #undef PAVGB
     109             : #undef STATIC
     110             : 
     111             : #if HAVE_MMX
     112           0 : CALL_2X_PIXELS(avg_no_rnd_pixels16_y2_mmx, avg_no_rnd_pixels8_y2_mmx, 8)
     113          74 : CALL_2X_PIXELS(put_no_rnd_pixels16_y2_mmx, put_no_rnd_pixels8_y2_mmx, 8)
     114             : 
     115           0 : CALL_2X_PIXELS(avg_no_rnd_pixels16_xy2_mmx, avg_no_rnd_pixels8_xy2_mmx, 8)
     116        7956 : CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_mmx, put_no_rnd_pixels8_xy2_mmx, 8)
     117             : #endif
     118             : 
     119             : /***********************************/
     120             : /* MMX rounding */
     121             : 
     122             : #define DEF(x, y) x ## _ ## y ## _mmx
     123             : #define SET_RND  MOVQ_WTWO
     124             : #define PAVGBP(a, b, c, d, e, f)        PAVGBP_MMX(a, b, c, d, e, f)
     125             : #define PAVGB(a, b, c, e)               PAVGB_MMX(a, b, c, e)
     126             : 
     127             : #include "hpeldsp_rnd_template.c"
     128             : 
     129             : #undef DEF
     130             : #define DEF(x, y) ff_ ## x ## _ ## y ## _mmx
     131             : #define STATIC
     132             : 
     133             : #include "rnd_template.c"
     134             : 
     135             : #undef DEF
     136             : #undef SET_RND
     137             : #undef PAVGBP
     138             : #undef PAVGB
     139             : 
     140             : #if HAVE_MMX
     141           0 : CALL_2X_PIXELS(avg_pixels16_y2_mmx, avg_pixels8_y2_mmx, 8)
     142           0 : CALL_2X_PIXELS(put_pixels16_y2_mmx, put_pixels8_y2_mmx, 8)
     143             : 
     144           0 : CALL_2X_PIXELS_EXPORT(ff_avg_pixels16_xy2_mmx, ff_avg_pixels8_xy2_mmx, 8)
     145           0 : CALL_2X_PIXELS_EXPORT(ff_put_pixels16_xy2_mmx, ff_put_pixels8_xy2_mmx, 8)
     146             : #endif
     147             : 
     148             : #endif /* HAVE_INLINE_ASM */
     149             : 
     150             : 
     151             : #if HAVE_X86ASM
     152             : 
     153             : #define HPELDSP_AVG_PIXELS16(CPUEXT)                      \
     154             :     CALL_2X_PIXELS(put_no_rnd_pixels16_x2 ## CPUEXT, ff_put_no_rnd_pixels8_x2 ## CPUEXT, 8) \
     155             :     CALL_2X_PIXELS(put_pixels16_y2        ## CPUEXT, ff_put_pixels8_y2        ## CPUEXT, 8) \
     156             :     CALL_2X_PIXELS(put_no_rnd_pixels16_y2 ## CPUEXT, ff_put_no_rnd_pixels8_y2 ## CPUEXT, 8) \
     157             :     CALL_2X_PIXELS(avg_pixels16           ## CPUEXT, ff_avg_pixels8           ## CPUEXT, 8) \
     158             :     CALL_2X_PIXELS(avg_pixels16_x2        ## CPUEXT, ff_avg_pixels8_x2        ## CPUEXT, 8) \
     159             :     CALL_2X_PIXELS(avg_pixels16_y2        ## CPUEXT, ff_avg_pixels8_y2        ## CPUEXT, 8) \
     160             :     CALL_2X_PIXELS(avg_pixels16_xy2       ## CPUEXT, ff_avg_pixels8_xy2       ## CPUEXT, 8) \
     161             :     CALL_2X_PIXELS(avg_approx_pixels16_xy2## CPUEXT, ff_avg_approx_pixels8_xy2## CPUEXT, 8)
     162             : 
     163           0 : HPELDSP_AVG_PIXELS16(_3dnow)
     164        7897 : HPELDSP_AVG_PIXELS16(_mmxext)
     165             : 
     166             : #endif /* HAVE_X86ASM */
     167             : 
     168             : #define SET_HPEL_FUNCS_EXT(PFX, IDX, SIZE, CPU)                             \
     169             :     if (HAVE_MMX_EXTERNAL)                                                  \
     170             :     c->PFX ## _pixels_tab IDX [0] = PFX ## _pixels ## SIZE ## _     ## CPU;
     171             : 
     172             : #if HAVE_MMX_INLINE
     173             : #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU)                                     \
     174             :     do {                                                                        \
     175             :         SET_HPEL_FUNCS_EXT(PFX, IDX, SIZE, CPU)                                 \
     176             :         c->PFX ## _pixels_tab IDX [1] = PFX ## _pixels ## SIZE ## _x2_  ## CPU; \
     177             :         c->PFX ## _pixels_tab IDX [2] = PFX ## _pixels ## SIZE ## _y2_  ## CPU; \
     178             :         c->PFX ## _pixels_tab IDX [3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU; \
     179             :     } while (0)
     180             : #else
     181             : #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU)                                     \
     182             :     do {                                                                        \
     183             :         SET_HPEL_FUNCS_EXT(PFX, IDX, SIZE, CPU)                                 \
     184             :     } while (0)
     185             : #endif
     186             : 
     187         140 : static void hpeldsp_init_mmx(HpelDSPContext *c, int flags)
     188             : {
     189         140 :     SET_HPEL_FUNCS(put,        [0], 16, mmx);
     190         140 :     SET_HPEL_FUNCS(put_no_rnd, [0], 16, mmx);
     191         140 :     SET_HPEL_FUNCS(avg,        [0], 16, mmx);
     192         140 :     SET_HPEL_FUNCS(avg_no_rnd,    , 16, mmx);
     193         140 :     SET_HPEL_FUNCS(put,        [1],  8, mmx);
     194         140 :     SET_HPEL_FUNCS(put_no_rnd, [1],  8, mmx);
     195             :     if (HAVE_MMX_EXTERNAL) {
     196         140 :         c->avg_pixels_tab[1][0] = ff_avg_pixels8_mmx;
     197         140 :         c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_mmx;
     198             :     }
     199             : #if HAVE_MMX_INLINE
     200         140 :     c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx;
     201         140 :     c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmx;
     202             : #endif
     203         140 : }
     204             : 
     205         140 : static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags)
     206             : {
     207             : #if HAVE_MMXEXT_EXTERNAL
     208         140 :     c->put_pixels_tab[0][1] = ff_put_pixels16_x2_mmxext;
     209         140 :     c->put_pixels_tab[0][2] = put_pixels16_y2_mmxext;
     210             : 
     211         140 :     c->avg_pixels_tab[0][0] = avg_pixels16_mmxext;
     212         140 :     c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmxext;
     213         140 :     c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmxext;
     214         140 :     c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmxext;
     215             : 
     216         140 :     c->put_pixels_tab[1][1] = ff_put_pixels8_x2_mmxext;
     217         140 :     c->put_pixels_tab[1][2] = ff_put_pixels8_y2_mmxext;
     218             : 
     219         140 :     c->avg_pixels_tab[1][0] = ff_avg_pixels8_mmxext;
     220         140 :     c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_mmxext;
     221         140 :     c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_mmxext;
     222         140 :     c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmxext;
     223             : 
     224         140 :     if (!(flags & AV_CODEC_FLAG_BITEXACT)) {
     225         137 :         c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmxext;
     226         137 :         c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmxext;
     227         137 :         c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_mmxext;
     228         137 :         c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_mmxext;
     229             : 
     230         137 :         c->avg_pixels_tab[0][3] = avg_approx_pixels16_xy2_mmxext;
     231         137 :         c->avg_pixels_tab[1][3] = ff_avg_approx_pixels8_xy2_mmxext;
     232             :     }
     233             : #endif /* HAVE_MMXEXT_EXTERNAL */
     234         140 : }
     235             : 
     236           0 : static void hpeldsp_init_3dnow(HpelDSPContext *c, int flags)
     237             : {
     238             : #if HAVE_AMD3DNOW_EXTERNAL
     239           0 :     c->put_pixels_tab[0][1] = ff_put_pixels16_x2_3dnow;
     240           0 :     c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
     241             : 
     242           0 :     c->avg_pixels_tab[0][0] = avg_pixels16_3dnow;
     243           0 :     c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow;
     244           0 :     c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow;
     245           0 :     c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
     246             : 
     247           0 :     c->put_pixels_tab[1][1] = ff_put_pixels8_x2_3dnow;
     248           0 :     c->put_pixels_tab[1][2] = ff_put_pixels8_y2_3dnow;
     249             : 
     250           0 :     c->avg_pixels_tab[1][0] = ff_avg_pixels8_3dnow;
     251           0 :     c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_3dnow;
     252           0 :     c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_3dnow;
     253           0 :     c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_3dnow;
     254             : 
     255           0 :     if (!(flags & AV_CODEC_FLAG_BITEXACT)){
     256           0 :         c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow;
     257           0 :         c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow;
     258           0 :         c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_3dnow;
     259           0 :         c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_3dnow;
     260             : 
     261           0 :         c->avg_pixels_tab[0][3] = avg_approx_pixels16_xy2_3dnow;
     262           0 :         c->avg_pixels_tab[1][3] = ff_avg_approx_pixels8_xy2_3dnow;
     263             :     }
     264             : #endif /* HAVE_AMD3DNOW_EXTERNAL */
     265           0 : }
     266             : 
     267         140 : static void hpeldsp_init_sse2_fast(HpelDSPContext *c, int flags)
     268             : {
     269             : #if HAVE_SSE2_EXTERNAL
     270         140 :     c->put_pixels_tab[0][0]        = ff_put_pixels16_sse2;
     271         140 :     c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_sse2;
     272         140 :     c->put_pixels_tab[0][1]        = ff_put_pixels16_x2_sse2;
     273         140 :     c->put_pixels_tab[0][2]        = ff_put_pixels16_y2_sse2;
     274         140 :     c->put_pixels_tab[0][3]        = ff_put_pixels16_xy2_sse2;
     275         140 :     c->avg_pixels_tab[0][0]        = ff_avg_pixels16_sse2;
     276         140 :     c->avg_pixels_tab[0][1]        = ff_avg_pixels16_x2_sse2;
     277         140 :     c->avg_pixels_tab[0][2]        = ff_avg_pixels16_y2_sse2;
     278         140 :     c->avg_pixels_tab[0][3]        = ff_avg_pixels16_xy2_sse2;
     279             : #endif /* HAVE_SSE2_EXTERNAL */
     280         140 : }
     281             : 
     282         140 : static void hpeldsp_init_ssse3(HpelDSPContext *c, int flags)
     283             : {
     284             : #if HAVE_SSSE3_EXTERNAL
     285         140 :     c->put_pixels_tab[0][3]            = ff_put_pixels16_xy2_ssse3;
     286         140 :     c->avg_pixels_tab[0][3]            = ff_avg_pixels16_xy2_ssse3;
     287         140 :     c->put_pixels_tab[1][3]            = ff_put_pixels8_xy2_ssse3;
     288         140 :     c->avg_pixels_tab[1][3]            = ff_avg_pixels8_xy2_ssse3;
     289             : #endif
     290         140 : }
     291             : 
     292         962 : av_cold void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags)
     293             : {
     294         962 :     int cpu_flags = av_get_cpu_flags();
     295             : 
     296         962 :     if (INLINE_MMX(cpu_flags))
     297         140 :         hpeldsp_init_mmx(c, flags);
     298             : 
     299         962 :     if (EXTERNAL_AMD3DNOW(cpu_flags))
     300           0 :         hpeldsp_init_3dnow(c, flags);
     301             : 
     302         962 :     if (EXTERNAL_MMXEXT(cpu_flags))
     303         140 :         hpeldsp_init_mmxext(c, flags);
     304             : 
     305         962 :     if (EXTERNAL_SSE2_FAST(cpu_flags))
     306         140 :         hpeldsp_init_sse2_fast(c, flags);
     307             : 
     308         962 :     if (EXTERNAL_SSSE3(cpu_flags))
     309         140 :         hpeldsp_init_ssse3(c, flags);
     310             : 
     311             :     if (CONFIG_VP3_DECODER)
     312         962 :         ff_hpeldsp_vp3_init_x86(c, cpu_flags, flags);
     313         962 : }

Generated by: LCOV version 1.13