LCOV - code coverage report
Current view: top level - src/libavcodec/x86 - mdct15_init.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 46 46 100.0 %
Date: 2017-09-20 05:29:53 Functions: 2 2 100.0 %

          Line data    Source code
       1             : /*
       2             :  * SIMD optimized non-power-of-two MDCT functions
       3             :  *
       4             :  * Copyright (C) 2017 Rostislav Pehlivanov <atomnuker@gmail.com>
       5             :  *
       6             :  * This file is part of FFmpeg.
       7             :  *
       8             :  * FFmpeg is free software; you can redistribute it and/or
       9             :  * modify it under the terms of the GNU Lesser General Public
      10             :  * License as published by the Free Software Foundation; either
      11             :  * version 2.1 of the License, or (at your option) any later version.
      12             :  *
      13             :  * FFmpeg is distributed in the hope that it will be useful,
      14             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      15             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      16             :  * Lesser General Public License for more details.
      17             :  *
      18             :  * You should have received a copy of the GNU Lesser General Public
      19             :  * License along with FFmpeg; if not, write to the Free Software
      20             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      21             :  */
      22             : 
      23             : #include "config.h"
      24             : 
      25             : #include "libavutil/x86/cpu.h"
      26             : #include "libavcodec/mdct15.h"
      27             : 
      28             : void ff_mdct15_postreindex_sse3(FFTComplex *out, FFTComplex *in, FFTComplex *exp, int *lut, ptrdiff_t len8);
      29             : void ff_mdct15_postreindex_avx2(FFTComplex *out, FFTComplex *in, FFTComplex *exp, int *lut, ptrdiff_t len8);
      30             : 
      31             : void ff_fft15_avx(FFTComplex *out, FFTComplex *in, FFTComplex *exptab, ptrdiff_t stride);
      32             : 
      33         100 : static void perm_twiddles(MDCT15Context *s)
      34             : {
      35             :     int k;
      36             :     FFTComplex exp_5point[4];
      37             : 
      38             :     FFTComplex tmp[21], tmp2[30];
      39         100 :     memcpy(tmp, s->exptab, sizeof(FFTComplex)*21);
      40             : 
      41             :     /* 15-point FFT twiddles */
      42         600 :     for (k = 0; k < 5; k++) {
      43         500 :         tmp2[6*k + 0] = tmp[k +  0];
      44         500 :         tmp2[6*k + 2] = tmp[k +  5];
      45         500 :         tmp2[6*k + 4] = tmp[k + 10];
      46             : 
      47         500 :         tmp2[6*k + 1] = tmp[2 * (k + 0)];
      48         500 :         tmp2[6*k + 3] = tmp[2 * (k + 5)];
      49         500 :         tmp2[6*k + 5] = tmp[2 *  k + 5 ];
      50             :     }
      51             : 
      52         700 :     for (k = 0; k < 6; k++) {
      53        9600 :         FFTComplex ac_exp[] = {
      54        1200 :             { tmp2[6*1 + k].re,  tmp2[6*1 + k].re },
      55        1200 :             { tmp2[6*2 + k].re,  tmp2[6*2 + k].re },
      56        1200 :             { tmp2[6*3 + k].re,  tmp2[6*3 + k].re },
      57        1200 :             { tmp2[6*4 + k].re,  tmp2[6*4 + k].re },
      58        1200 :             { tmp2[6*1 + k].im, -tmp2[6*1 + k].im },
      59        1200 :             { tmp2[6*2 + k].im, -tmp2[6*2 + k].im },
      60        1200 :             { tmp2[6*3 + k].im, -tmp2[6*3 + k].im },
      61        1200 :             { tmp2[6*4 + k].im, -tmp2[6*4 + k].im },
      62             :         };
      63         600 :         memcpy(s->exptab + 8*k, ac_exp, 8*sizeof(FFTComplex));
      64             :     }
      65             : 
      66             :     /* Specialcase when k = 0 */
      67         400 :     for (k = 0; k < 3; k++) {
      68        2400 :         FFTComplex dc_exp[] = {
      69         600 :             { tmp2[2*k + 0].re, -tmp2[2*k + 0].im },
      70         600 :             { tmp2[2*k + 0].im,  tmp2[2*k + 0].re },
      71         600 :             { tmp2[2*k + 1].re, -tmp2[2*k + 1].im },
      72         600 :             { tmp2[2*k + 1].im,  tmp2[2*k + 1].re },
      73             :         };
      74         300 :         memcpy(s->exptab + 8*6 + 4*k, dc_exp, 4*sizeof(FFTComplex));
      75             :     }
      76             : 
      77             :     /* 5-point FFT twiddles */
      78         100 :     exp_5point[0].re = exp_5point[0].im = tmp[19].re;
      79         100 :     exp_5point[1].re = exp_5point[1].im = tmp[19].im;
      80         100 :     exp_5point[2].re = exp_5point[2].im = tmp[20].re;
      81         100 :     exp_5point[3].re = exp_5point[3].im = tmp[20].im;
      82             : 
      83         100 :     memcpy(s->exptab + 8*6 + 4*3, exp_5point, 4*sizeof(FFTComplex));
      84         100 : }
      85             : 
      86         890 : av_cold void ff_mdct15_init_x86(MDCT15Context *s)
      87             : {
      88         890 :     int adjust_twiddles = 0;
      89         890 :     int cpu_flags = av_get_cpu_flags();
      90             : 
      91         890 :     if (EXTERNAL_SSE3(cpu_flags))
      92         100 :         s->postreindex = ff_mdct15_postreindex_sse3;
      93             : 
      94         890 :     if (ARCH_X86_64 && EXTERNAL_AVX(cpu_flags)) {
      95         100 :         s->fft15 = ff_fft15_avx;
      96         100 :         adjust_twiddles = 1;
      97             :     }
      98             : 
      99         890 :     if (ARCH_X86_64 && EXTERNAL_AVX2_FAST(cpu_flags))
     100         100 :         s->postreindex = ff_mdct15_postreindex_avx2;
     101             : 
     102         890 :     if (adjust_twiddles)
     103         100 :         perm_twiddles(s);
     104         890 : }

Generated by: LCOV version 1.13