LCOV - code coverage report
Current view: top level - src/libavcodec/x86 - dirac_dwt_init.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 4 36 11.1 %
Date: 2017-01-21 09:32:20 Functions: 1 9 11.1 %

          Line data    Source code
       1             : /*
       2             :  * x86 optimized discrete wavelet transform
       3             :  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
       4             :  * Copyright (c) 2010 David Conrad
       5             :  *
       6             :  * This file is part of FFmpeg.
       7             :  *
       8             :  * FFmpeg is free software; you can redistribute it and/or
       9             :  * modify it under the terms of the GNU Lesser General Public
      10             :  * License as published by the Free Software Foundation; either
      11             :  * version 2.1 of the License, or (at your option) any later version.
      12             :  *
      13             :  * FFmpeg is distributed in the hope that it will be useful,
      14             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      15             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      16             :  * Lesser General Public License for more details.
      17             :  *
      18             :  * You should have received a copy of the GNU Lesser General Public
      19             :  * License along with FFmpeg; if not, write to the Free Software
      20             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      21             :  */
      22             : 
      23             : #include "libavutil/x86/asm.h"
      24             : #include "libavutil/x86/cpu.h"
      25             : #include "libavcodec/dirac_dwt.h"
      26             : 
      27             : #define COMPOSE_VERTICAL(ext, align) \
      28             : void ff_vertical_compose53iL0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int width); \
      29             : void ff_vertical_compose_dirac53iH0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int width); \
      30             : void ff_vertical_compose_dd137iL0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int16_t *b3, int16_t *b4, int width); \
      31             : void ff_vertical_compose_dd97iH0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int16_t *b3, int16_t *b4, int width); \
      32             : void ff_vertical_compose_haar##ext(int16_t *b0, int16_t *b1, int width); \
      33             : void ff_horizontal_compose_haar0i##ext(int16_t *b, int16_t *tmp, int w);\
      34             : void ff_horizontal_compose_haar1i##ext(int16_t *b, int16_t *tmp, int w);\
      35             : \
      36             : static void vertical_compose53iL0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, int width) \
      37             : { \
      38             :     int i, width_align = width&~(align-1); \
      39             :     int16_t *b0 = (int16_t *)_b0; \
      40             :     int16_t *b1 = (int16_t *)_b1; \
      41             :     int16_t *b2 = (int16_t *)_b2; \
      42             : \
      43             :     for(i=width_align; i<width; i++) \
      44             :         b1[i] = COMPOSE_53iL0(b0[i], b1[i], b2[i]); \
      45             : \
      46             :     ff_vertical_compose53iL0##ext(b0, b1, b2, width_align); \
      47             : } \
      48             : \
      49             : static void vertical_compose_dirac53iH0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, int width) \
      50             : { \
      51             :     int i, width_align = width&~(align-1); \
      52             :     int16_t *b0 = (int16_t *)_b0; \
      53             :     int16_t *b1 = (int16_t *)_b1; \
      54             :     int16_t *b2 = (int16_t *)_b2; \
      55             : \
      56             :     for(i=width_align; i<width; i++) \
      57             :         b1[i] = COMPOSE_DIRAC53iH0(b0[i], b1[i], b2[i]); \
      58             : \
      59             :     ff_vertical_compose_dirac53iH0##ext(b0, b1, b2, width_align); \
      60             : } \
      61             : \
      62             : static void vertical_compose_dd137iL0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, \
      63             :                                            uint8_t *_b3, uint8_t *_b4, int width) \
      64             : { \
      65             :     int i, width_align = width&~(align-1); \
      66             :     int16_t *b0 = (int16_t *)_b0; \
      67             :     int16_t *b1 = (int16_t *)_b1; \
      68             :     int16_t *b2 = (int16_t *)_b2; \
      69             :     int16_t *b3 = (int16_t *)_b3; \
      70             :     int16_t *b4 = (int16_t *)_b4; \
      71             : \
      72             :     for(i=width_align; i<width; i++) \
      73             :         b2[i] = COMPOSE_DD137iL0(b0[i], b1[i], b2[i], b3[i], b4[i]); \
      74             : \
      75             :     ff_vertical_compose_dd137iL0##ext(b0, b1, b2, b3, b4, width_align); \
      76             : } \
      77             : \
      78             : static void vertical_compose_dd97iH0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, \
      79             :                                           uint8_t *_b3, uint8_t *_b4, int width) \
      80             : { \
      81             :     int i, width_align = width&~(align-1); \
      82             :     int16_t *b0 = (int16_t *)_b0; \
      83             :     int16_t *b1 = (int16_t *)_b1; \
      84             :     int16_t *b2 = (int16_t *)_b2; \
      85             :     int16_t *b3 = (int16_t *)_b3; \
      86             :     int16_t *b4 = (int16_t *)_b4; \
      87             : \
      88             :     for(i=width_align; i<width; i++) \
      89             :         b2[i] = COMPOSE_DD97iH0(b0[i], b1[i], b2[i], b3[i], b4[i]); \
      90             : \
      91             :     ff_vertical_compose_dd97iH0##ext(b0, b1, b2, b3, b4, width_align); \
      92             : } \
      93             : static void vertical_compose_haar##ext(uint8_t *_b0, uint8_t *_b1, int width) \
      94             : { \
      95             :     int i, width_align = width&~(align-1); \
      96             :     int16_t *b0 = (int16_t *)_b0; \
      97             :     int16_t *b1 = (int16_t *)_b1; \
      98             : \
      99             :     for(i=width_align; i<width; i++) { \
     100             :         b0[i] = COMPOSE_HAARiL0(b0[i], b1[i]); \
     101             :         b1[i] = COMPOSE_HAARiH0(b1[i], b0[i]); \
     102             :     } \
     103             : \
     104             :     ff_vertical_compose_haar##ext(b0, b1, width_align); \
     105             : } \
     106             : static void horizontal_compose_haar0i##ext(uint8_t *_b, uint8_t *_tmp, int w)\
     107             : {\
     108             :     int w2= w>>1;\
     109             :     int x= w2 - (w2&(align-1));\
     110             :     int16_t *b = (int16_t *)_b; \
     111             :     int16_t *tmp = (int16_t *)_tmp; \
     112             : \
     113             :     ff_horizontal_compose_haar0i##ext(b, tmp, w);\
     114             : \
     115             :     for (; x < w2; x++) {\
     116             :         b[2*x  ] = tmp[x];\
     117             :         b[2*x+1] = COMPOSE_HAARiH0(b[x+w2], tmp[x]);\
     118             :     }\
     119             : }\
     120             : static void horizontal_compose_haar1i##ext(uint8_t *_b, uint8_t *_tmp, int w)\
     121             : {\
     122             :     int w2= w>>1;\
     123             :     int x= w2 - (w2&(align-1));\
     124             :     int16_t *b = (int16_t *)_b; \
     125             :     int16_t *tmp = (int16_t *)_tmp; \
     126             : \
     127             :     ff_horizontal_compose_haar1i##ext(b, tmp, w);\
     128             : \
     129             :     for (; x < w2; x++) {\
     130             :         b[2*x  ] = (tmp[x] + 1)>>1;\
     131             :         b[2*x+1] = (COMPOSE_HAARiH0(b[x+w2], tmp[x]) + 1)>>1;\
     132             :     }\
     133             : }\
     134             : \
     135             : 
     136             : #if HAVE_YASM
     137             : #if !ARCH_X86_64
     138             : COMPOSE_VERTICAL(_mmx, 4)
     139             : #endif
     140           0 : COMPOSE_VERTICAL(_sse2, 8)
     141             : 
     142             : 
     143             : void ff_horizontal_compose_dd97i_ssse3(int16_t *_b, int16_t *_tmp, int w);
     144             : 
     145           0 : static void horizontal_compose_dd97i_ssse3(uint8_t *_b, uint8_t *_tmp, int w)
     146             : {
     147           0 :     int w2= w>>1;
     148           0 :     int x= w2 - (w2&7);
     149           0 :     int16_t *b = (int16_t *)_b;
     150           0 :     int16_t *tmp = (int16_t *)_tmp;
     151             : 
     152           0 :     ff_horizontal_compose_dd97i_ssse3(b, tmp, w);
     153             : 
     154           0 :     for (; x < w2; x++) {
     155           0 :         b[2*x  ] = (tmp[x] + 1)>>1;
     156           0 :         b[2*x+1] = (COMPOSE_DD97iH0(tmp[x-1], tmp[x], b[x+w2], tmp[x+1], tmp[x+2]) + 1)>>1;
     157             :     }
     158           0 : }
     159             : #endif
     160             : 
     161         255 : void ff_spatial_idwt_init_x86(DWTContext *d, enum dwt_type type)
     162             : {
     163             : #if HAVE_YASM
     164         255 :   int mm_flags = av_get_cpu_flags();
     165             : 
     166             : #if !ARCH_X86_64
     167             :     if (!(mm_flags & AV_CPU_FLAG_MMX))
     168             :         return;
     169             : 
     170             :     switch (type) {
     171             :     case DWT_DIRAC_DD9_7:
     172             :         d->vertical_compose_l0 = (void*)vertical_compose53iL0_mmx;
     173             :         d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_mmx;
     174             :         break;
     175             :     case DWT_DIRAC_LEGALL5_3:
     176             :         d->vertical_compose_l0 = (void*)vertical_compose53iL0_mmx;
     177             :         d->vertical_compose_h0 = (void*)vertical_compose_dirac53iH0_mmx;
     178             :         break;
     179             :     case DWT_DIRAC_DD13_7:
     180             :         d->vertical_compose_l0 = (void*)vertical_compose_dd137iL0_mmx;
     181             :         d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_mmx;
     182             :         break;
     183             :     case DWT_DIRAC_HAAR0:
     184             :         d->vertical_compose   = (void*)vertical_compose_haar_mmx;
     185             :         d->horizontal_compose = horizontal_compose_haar0i_mmx;
     186             :         break;
     187             :     case DWT_DIRAC_HAAR1:
     188             :         d->vertical_compose   = (void*)vertical_compose_haar_mmx;
     189             :         d->horizontal_compose = horizontal_compose_haar1i_mmx;
     190             :         break;
     191             :     }
     192             : #endif
     193             : 
     194         255 :     if (!(mm_flags & AV_CPU_FLAG_SSE2))
     195         255 :         return;
     196             : 
     197           0 :     switch (type) {
     198             :     case DWT_DIRAC_DD9_7:
     199           0 :         d->vertical_compose_l0 = (void*)vertical_compose53iL0_sse2;
     200           0 :         d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_sse2;
     201           0 :         break;
     202             :     case DWT_DIRAC_LEGALL5_3:
     203           0 :         d->vertical_compose_l0 = (void*)vertical_compose53iL0_sse2;
     204           0 :         d->vertical_compose_h0 = (void*)vertical_compose_dirac53iH0_sse2;
     205           0 :         break;
     206             :     case DWT_DIRAC_DD13_7:
     207           0 :         d->vertical_compose_l0 = (void*)vertical_compose_dd137iL0_sse2;
     208           0 :         d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_sse2;
     209           0 :         break;
     210             :     case DWT_DIRAC_HAAR0:
     211           0 :         d->vertical_compose   = (void*)vertical_compose_haar_sse2;
     212           0 :         d->horizontal_compose = horizontal_compose_haar0i_sse2;
     213           0 :         break;
     214             :     case DWT_DIRAC_HAAR1:
     215           0 :         d->vertical_compose   = (void*)vertical_compose_haar_sse2;
     216           0 :         d->horizontal_compose = horizontal_compose_haar1i_sse2;
     217           0 :         break;
     218             :     }
     219             : 
     220           0 :     if (!(mm_flags & AV_CPU_FLAG_SSSE3))
     221           0 :         return;
     222             : 
     223           0 :     switch (type) {
     224             :     case DWT_DIRAC_DD9_7:
     225           0 :         d->horizontal_compose = horizontal_compose_dd97i_ssse3;
     226           0 :         break;
     227             :     }
     228             : #endif // HAVE_YASM
     229             : }

Generated by: LCOV version 1.12