FFmpeg coverage

Directory:	../../../ffmpeg/
File:	src/libavcodec/x86/mpegaudiodsp.c
Date:	2025-10-15 07:48:07

	Exec	Total	Coverage
Lines:	58	70	82.9%
Functions:	5	8	62.5%
Branches:	24	70	34.3%

  
      Line
      Branch
      Exec
      Source
    
      /*
    
       * SIMD-optimized MP3 decoding functions
    
       * Copyright (c) 2010 Vitor Sessak
    
       *
    
       * This file is part of FFmpeg.
    
       *
    
       * FFmpeg is free software; you can redistribute it and/or
    
       * modify it under the terms of the GNU Lesser General Public
    
       * License as published by the Free Software Foundation; either
    
       * version 2.1 of the License, or (at your option) any later version.
    
       *
    
       * FFmpeg is distributed in the hope that it will be useful,
    
       * but WITHOUT ANY WARRANTY; without even the implied warranty of
    
       * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    
       * Lesser General Public License for more details.
    
       *
    
       * You should have received a copy of the GNU Lesser General Public
    
       * License along with FFmpeg; if not, write to the Free Software
    
       * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
    
       */
    
      #include <stddef.h>
    
      #include "config.h"
    
      #include "libavutil/attributes.h"
    
      #include "libavutil/cpu.h"
    
      #include "libavutil/mem_internal.h"
    
      #include "libavutil/x86/asm.h"
    
      #include "libavutil/x86/cpu.h"
    
      #include "libavcodec/mpegaudiodsp.h"
    
      #define DECL(CPU)\
    
      static void imdct36_blocks_ ## CPU(float *out, float *buf, float *in, int count, int switch_point, int block_type);\
    
      void ff_imdct36_float_ ## CPU(float *out, float *buf, float *in, float *win);
    
      #if HAVE_X86ASM
    
      DECL(sse2)
    
      DECL(sse3)
    
      DECL(ssse3)
    
      DECL(avx)
    
      #endif /* HAVE_X86ASM */
    
      void ff_four_imdct36_float_sse(float *out, float *buf, float *in, float *win,
    
                                     float *tmpbuf);
    
      void ff_four_imdct36_float_avx(float *out, float *buf, float *in, float *win,
    
                                     float *tmpbuf);
    
      void ff_dct32_float_sse2(float *out, const float *in);
    
      void ff_dct32_float_avx (float *out, const float *in);
    
      DECLARE_ALIGNED(16, static float, mdct_win_sse)[2][4][4*40];
    
      #if HAVE_6REGS && HAVE_SSE_INLINE
    
      #define MACS(rt, ra, rb) rt+=(ra)*(rb)
    
      #define MLSS(rt, ra, rb) rt-=(ra)*(rb)
    
      #define SUM8(op, sum, w, p)               \
    
      {                                         \
    
          op(sum, (w)[0 * 64], (p)[0 * 64]);    \
    
          op(sum, (w)[1 * 64], (p)[1 * 64]);    \
    
          op(sum, (w)[2 * 64], (p)[2 * 64]);    \
    
          op(sum, (w)[3 * 64], (p)[3 * 64]);    \
    
          op(sum, (w)[4 * 64], (p)[4 * 64]);    \
    
          op(sum, (w)[5 * 64], (p)[5 * 64]);    \
    
          op(sum, (w)[6 * 64], (p)[6 * 64]);    \
    
          op(sum, (w)[7 * 64], (p)[7 * 64]);    \
    
      }
    
      4536
      static void apply_window(const float *buf, const float *win1,
    
                               const float *win2, float *sum1, float *sum2, int len)
    
      {
    
      4536
          x86_reg count = - 4*len;
    
      4536
          const float *win1a = win1+len;
    
      4536
          const float *win2a = win2+len;
    
      4536
          const float *bufa  = buf+len;
    
      4536
          float *sum1a = sum1+len;
    
      4536
          float *sum2a = sum2+len;
    
      #define MULT(a, b)                                 \
    
          "movaps " #a "(%1,%0), %%xmm1           \n\t"  \
    
          "movaps " #a "(%3,%0), %%xmm2           \n\t"  \
    
          "mulps         %%xmm2, %%xmm1           \n\t"  \
    
          "subps         %%xmm1, %%xmm0           \n\t"  \
    
          "mulps  " #b "(%2,%0), %%xmm2           \n\t"  \
    
          "subps         %%xmm2, %%xmm4           \n\t"  \
    
      4536
          __asm__ volatile(
    
                  "1:                                   \n\t"
    
                  "xorps       %%xmm0, %%xmm0           \n\t"
    
                  "xorps       %%xmm4, %%xmm4           \n\t"
    
                  MULT(   0,   0)
    
                  MULT( 256,  64)
    
                  MULT( 512, 128)
    
                  MULT( 768, 192)
    
                  MULT(1024, 256)
    
                  MULT(1280, 320)
    
                  MULT(1536, 384)
    
                  MULT(1792, 448)
    
                  "movaps      %%xmm0, (%4,%0)          \n\t"
    
                  "movaps      %%xmm4, (%5,%0)          \n\t"
    
                  "add            $16,  %0              \n\t"
    
                  "jl              1b                   \n\t"
    
                  :"+&r"(count)
    
                  :"r"(win1a), "r"(win2a), "r"(bufa), "r"(sum1a), "r"(sum2a)
    
                  );
    
      #undef MULT
    
      4536
      }
    
      2268
      static void apply_window_mp3(float *in, float *win, int *unused, float *out,
    
                                   ptrdiff_t incr)
    
      {
    
      2268
          LOCAL_ALIGNED_16(float, suma, [17]);
    
      2268
          LOCAL_ALIGNED_16(float, sumb, [17]);
    
      2268
          LOCAL_ALIGNED_16(float, sumc, [17]);
    
      2268
          LOCAL_ALIGNED_16(float, sumd, [17]);
    
          float sum;
    
          /* copy to avoid wrap */
    
      2268
          __asm__ volatile(
    
                  "movaps    0(%0), %%xmm0   \n\t" \
    
                  "movaps   16(%0), %%xmm1   \n\t" \
    
                  "movaps   32(%0), %%xmm2   \n\t" \
    
                  "movaps   48(%0), %%xmm3   \n\t" \
    
                  "movaps   %%xmm0,   0(%1) \n\t" \
    
                  "movaps   %%xmm1,  16(%1) \n\t" \
    
                  "movaps   %%xmm2,  32(%1) \n\t" \
    
                  "movaps   %%xmm3,  48(%1) \n\t" \
    
                  "movaps   64(%0), %%xmm0   \n\t" \
    
                  "movaps   80(%0), %%xmm1   \n\t" \
    
                  "movaps   96(%0), %%xmm2   \n\t" \
    
                  "movaps  112(%0), %%xmm3   \n\t" \
    
                  "movaps   %%xmm0,  64(%1) \n\t" \
    
                  "movaps   %%xmm1,  80(%1) \n\t" \
    
                  "movaps   %%xmm2,  96(%1) \n\t" \
    
                  "movaps   %%xmm3, 112(%1) \n\t"
    
      2268
                  ::"r"(in), "r"(in+512)
    
                  :"memory"
    
                  );
    
      2268
          apply_window(in + 16, win     , win + 512, suma, sumc, 16);
    
      2268
          apply_window(in + 32, win + 48, win + 640, sumb, sumd, 16);
    
      2268
          SUM8(MACS, suma[0], win + 32, in + 48);
    
      2268
          sumc[ 0] = 0;
    
      2268
          sumb[16] = 0;
    
      2268
          sumd[16] = 0;
    
      #define SUMS(suma, sumb, sumc, sumd, out1, out2)               \
    
                  "movups " #sumd "(%4),       %%xmm0          \n\t" \
    
                  "shufps         $0x1b,       %%xmm0, %%xmm0  \n\t" \
    
                  "subps  " #suma "(%1),       %%xmm0          \n\t" \
    
                  "movaps        %%xmm0," #out1 "(%0)          \n\t" \
    
      \
    
                  "movups " #sumc "(%3),       %%xmm0          \n\t" \
    
                  "shufps         $0x1b,       %%xmm0, %%xmm0  \n\t" \
    
                  "addps  " #sumb "(%2),       %%xmm0          \n\t" \
    
                  "movaps        %%xmm0," #out2 "(%0)          \n\t"
    
        1/2✓ Branch 0 taken 2268 times.
✗ Branch 1 not taken.

      2268
          if (incr == 1) {
    
      2268
              __asm__ volatile(
    
                  SUMS( 0, 48,  4, 52,  0, 112)
    
                  SUMS(16, 32, 20, 36, 16,  96)
    
                  SUMS(32, 16, 36, 20, 32,  80)
    
                  SUMS(48,  0, 52,  4, 48,  64)
    
                  :"+&r"(out)
    
                  :"r"(&suma[0]), "r"(&sumb[0]), "r"(&sumc[0]), "r"(&sumd[0])
    
                  :"memory"
    
                  );
    
      2268
              out += 16*incr;
    
          } else {
    
              int j;
    
      ✗
              float *out2 = out + 32 * incr;
    
      ✗
              out[0  ]  = -suma[   0];
    
      ✗
              out += incr;
    
      ✗
              out2 -= incr;
    
      ✗
              for(j=1;j<16;j++) {
    
      ✗
                  *out  = -suma[   j] + sumd[16-j];
    
      ✗
                  *out2 =  sumb[16-j] + sumc[   j];
    
      ✗
                  out  += incr;
    
      ✗
                  out2 -= incr;
    
              }
    
          }
    
      2268
          sum = 0;
    
      2268
          SUM8(MLSS, sum, win + 16 + 32, in + 32);
    
      2268
          *out = sum;
    
      2268
      }
    
      #endif /* HAVE_6REGS && HAVE_SSE_INLINE */
    
      #if HAVE_X86ASM
    
      #define DECL_IMDCT_BLOCKS(CPU1, CPU2)                                       \
    
      static void imdct36_blocks_ ## CPU1(float *out, float *buf, float *in,      \
    
                                     int count, int switch_point, int block_type) \
    
      {                                                                           \
    
          int align_end = count - (count & 3);                                \
    
          int j;                                                              \
    
          for (j = 0; j < align_end; j+= 4) {                                 \
    
              LOCAL_ALIGNED_16(float, tmpbuf, [1024]);                        \
    
              float *win = mdct_win_sse[switch_point && j < 4][block_type];   \
    
              /* apply window & overlap with previous buffer */               \
    
                                                                              \
    
              /* select window */                                             \
    
              ff_four_imdct36_float_ ## CPU2(out, buf, in, win, tmpbuf);      \
    
              in      += 4*18;                                                \
    
              buf     += 4*18;                                                \
    
              out     += 4;                                                   \
    
          }                                                                   \
    
          for (; j < count; j++) {                                            \
    
              /* apply window & overlap with previous buffer */               \
    
                                                                              \
    
              /* select window */                                             \
    
              int win_idx = (switch_point && j < 2) ? 0 : block_type;         \
    
              float *win = ff_mdct_win_float[win_idx + (4 & -(j & 1))];       \
    
                                                                              \
    
              ff_imdct36_float_ ## CPU1(out, buf, in, win);                   \
    
                                                                              \
    
              in  += 18;                                                      \
    
              buf++;                                                          \
    
              out++;                                                          \
    
          }                                                                   \
    
      }
    
      #if HAVE_SSE
    
      ✗
      DECL_IMDCT_BLOCKS(sse2,sse)
    
      ✗
      DECL_IMDCT_BLOCKS(sse3,sse)
    
      ✗
      DECL_IMDCT_BLOCKS(ssse3,sse)
    
      #endif
    
      #if HAVE_AVX_EXTERNAL
    
        6/12✗ Branch 0 not taken.
✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 5 taken 40 times.
✓ Branch 6 taken 112 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 202 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 12 taken 202 times.
✓ Branch 13 taken 112 times.

      354
      DECL_IMDCT_BLOCKS(avx,avx)
    
      #endif
    
      #endif /* HAVE_X86ASM */
    
      97
      av_cold void ff_mpadsp_init_x86_tabs(void)
    
      {
    
          int i, j;
    
        2/2✓ Branch 0 taken 388 times.
✓ Branch 1 taken 97 times.

      485
          for (j = 0; j < 4; j++) {
    
        2/2✓ Branch 0 taken 15520 times.
✓ Branch 1 taken 388 times.

      15908
              for (i = 0; i < 40; i ++) {
    
      15520
                  mdct_win_sse[0][j][4*i    ] = ff_mdct_win_float[j    ][i];
    
      15520
                  mdct_win_sse[0][j][4*i + 1] = ff_mdct_win_float[j + 4][i];
    
      15520
                  mdct_win_sse[0][j][4*i + 2] = ff_mdct_win_float[j    ][i];
    
      15520
                  mdct_win_sse[0][j][4*i + 3] = ff_mdct_win_float[j + 4][i];
    
      15520
                  mdct_win_sse[1][j][4*i    ] = ff_mdct_win_float[0    ][i];
    
      15520
                  mdct_win_sse[1][j][4*i + 1] = ff_mdct_win_float[4    ][i];
    
      15520
                  mdct_win_sse[1][j][4*i + 2] = ff_mdct_win_float[j    ][i];
    
      15520
                  mdct_win_sse[1][j][4*i + 3] = ff_mdct_win_float[j + 4][i];
    
              }
    
          }
    
      97
      }
    
      166
      av_cold void ff_mpadsp_init_x86(MPADSPContext *s)
    
      {
    
      166
          av_unused int cpu_flags = av_get_cpu_flags();
    
      #if HAVE_6REGS && HAVE_SSE_INLINE
    
        2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 134 times.

      166
          if (INLINE_SSE(cpu_flags)) {
    
      32
              s->apply_window_float = apply_window_mp3;
    
          }
    
      #endif /* HAVE_SSE_INLINE */
    
      #if HAVE_X86ASM
    
      #if HAVE_SSE
    
        2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 134 times.

      166
          if (EXTERNAL_SSE2(cpu_flags)) {
    
      32
              s->imdct36_blocks_float = imdct36_blocks_sse2;
    
      32
              s->dct32_float          = ff_dct32_float_sse2;
    
          }
    
        2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 134 times.

      166
          if (EXTERNAL_SSE3(cpu_flags)) {
    
      32
              s->imdct36_blocks_float = imdct36_blocks_sse3;
    
          }
    
        2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 134 times.

      166
          if (EXTERNAL_SSSE3(cpu_flags)) {
    
      32
              s->imdct36_blocks_float = imdct36_blocks_ssse3;
    
          }
    
      #endif
    
      #if HAVE_AVX_EXTERNAL
    
        2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 134 times.

      166
          if (EXTERNAL_AVX(cpu_flags)) {
    
      32
              s->imdct36_blocks_float = imdct36_blocks_avx;
    
          }
    
        3/4✓ Branch 0 taken 32 times.
✓ Branch 1 taken 134 times.
✓ Branch 2 taken 32 times.
✗ Branch 3 not taken.

      166
          if (EXTERNAL_AVX_FAST(cpu_flags))
    
      32
              s->dct32_float          = ff_dct32_float_avx;
    
      #endif
    
      #endif /* HAVE_X86ASM */
    
      166
      }

Line	Branch	Exec	Source
1			/*
2			* SIMD-optimized MP3 decoding functions
3			* Copyright (c) 2010 Vitor Sessak
4			*
5			* This file is part of FFmpeg.
6			*
7			* FFmpeg is free software; you can redistribute it and/or
8			* modify it under the terms of the GNU Lesser General Public
9			* License as published by the Free Software Foundation; either
10			* version 2.1 of the License, or (at your option) any later version.
11			*
12			* FFmpeg is distributed in the hope that it will be useful,
13			* but WITHOUT ANY WARRANTY; without even the implied warranty of
14			* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15			* Lesser General Public License for more details.
16			*
17			* You should have received a copy of the GNU Lesser General Public
18			* License along with FFmpeg; if not, write to the Free Software
19			* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20			*/
21
22			#include <stddef.h>
23
24			#include "config.h"
25			#include "libavutil/attributes.h"
26			#include "libavutil/cpu.h"
27			#include "libavutil/mem_internal.h"
28			#include "libavutil/x86/asm.h"
29			#include "libavutil/x86/cpu.h"
30			#include "libavcodec/mpegaudiodsp.h"
31
32			#define DECL(CPU)\
33			static void imdct36_blocks_ ## CPU(float out, float buf, float *in, int count, int switch_point, int block_type);\
34			void ff_imdct36_float_ ## CPU(float out, float buf, float in, float win);
35
36			#if HAVE_X86ASM
37			DECL(sse2)
38			DECL(sse3)
39			DECL(ssse3)
40			DECL(avx)
41			#endif /* HAVE_X86ASM */
42
43			void ff_four_imdct36_float_sse(float out, float buf, float in, float win,
44			float *tmpbuf);
45			void ff_four_imdct36_float_avx(float out, float buf, float in, float win,
46			float *tmpbuf);
47
48			void ff_dct32_float_sse2(float out, const float in);
49			void ff_dct32_float_avx (float out, const float in);
50
51			DECLARE_ALIGNED(16, static float, mdct_win_sse)[2][4][4*40];
52
53			#if HAVE_6REGS && HAVE_SSE_INLINE
54
55			#define MACS(rt, ra, rb) rt+=(ra)*(rb)
56			#define MLSS(rt, ra, rb) rt-=(ra)*(rb)
57
58			#define SUM8(op, sum, w, p) \
59			{ \
60			op(sum, (w)[0 * 64], (p)[0 * 64]); \
61			op(sum, (w)[1 * 64], (p)[1 * 64]); \
62			op(sum, (w)[2 * 64], (p)[2 * 64]); \
63			op(sum, (w)[3 * 64], (p)[3 * 64]); \
64			op(sum, (w)[4 * 64], (p)[4 * 64]); \
65			op(sum, (w)[5 * 64], (p)[5 * 64]); \
66			op(sum, (w)[6 * 64], (p)[6 * 64]); \
67			op(sum, (w)[7 * 64], (p)[7 * 64]); \
68			}
69
70		4536	static void apply_window(const float buf, const float win1,
71			const float win2, float sum1, float *sum2, int len)
72			{
73		4536	x86_reg count = - 4*len;
74		4536	const float *win1a = win1+len;
75		4536	const float *win2a = win2+len;
76		4536	const float *bufa = buf+len;
77		4536	float *sum1a = sum1+len;
78		4536	float *sum2a = sum2+len;
79
80
81			#define MULT(a, b) \
82			"movaps " #a "(%1,%0), %%xmm1 \n\t" \
83			"movaps " #a "(%3,%0), %%xmm2 \n\t" \
84			"mulps %%xmm2, %%xmm1 \n\t" \
85			"subps %%xmm1, %%xmm0 \n\t" \
86			"mulps " #b "(%2,%0), %%xmm2 \n\t" \
87			"subps %%xmm2, %%xmm4 \n\t" \
88
89		4536	__asm__ volatile(
90			"1: \n\t"
91			"xorps %%xmm0, %%xmm0 \n\t"
92			"xorps %%xmm4, %%xmm4 \n\t"
93
94			MULT( 0, 0)
95			MULT( 256, 64)
96			MULT( 512, 128)
97			MULT( 768, 192)
98			MULT(1024, 256)
99			MULT(1280, 320)
100			MULT(1536, 384)
101			MULT(1792, 448)
102
103			"movaps %%xmm0, (%4,%0) \n\t"
104			"movaps %%xmm4, (%5,%0) \n\t"
105			"add $16, %0 \n\t"
106			"jl 1b \n\t"
107			:"+&r"(count)
108			:"r"(win1a), "r"(win2a), "r"(bufa), "r"(sum1a), "r"(sum2a)
109			);
110
111			#undef MULT
112		4536	}
113
114		2268	static void apply_window_mp3(float in, float win, int unused, float out,
115			ptrdiff_t incr)
116			{
117		2268	LOCAL_ALIGNED_16(float, suma, [17]);
118		2268	LOCAL_ALIGNED_16(float, sumb, [17]);
119		2268	LOCAL_ALIGNED_16(float, sumc, [17]);
120		2268	LOCAL_ALIGNED_16(float, sumd, [17]);
121
122			float sum;
123
124			/* copy to avoid wrap */
125		2268	__asm__ volatile(
126			"movaps 0(%0), %%xmm0 \n\t" \
127			"movaps 16(%0), %%xmm1 \n\t" \
128			"movaps 32(%0), %%xmm2 \n\t" \
129			"movaps 48(%0), %%xmm3 \n\t" \
130			"movaps %%xmm0, 0(%1) \n\t" \
131			"movaps %%xmm1, 16(%1) \n\t" \
132			"movaps %%xmm2, 32(%1) \n\t" \
133			"movaps %%xmm3, 48(%1) \n\t" \
134			"movaps 64(%0), %%xmm0 \n\t" \
135			"movaps 80(%0), %%xmm1 \n\t" \
136			"movaps 96(%0), %%xmm2 \n\t" \
137			"movaps 112(%0), %%xmm3 \n\t" \
138			"movaps %%xmm0, 64(%1) \n\t" \
139			"movaps %%xmm1, 80(%1) \n\t" \
140			"movaps %%xmm2, 96(%1) \n\t" \
141			"movaps %%xmm3, 112(%1) \n\t"
142		2268	::"r"(in), "r"(in+512)
143			:"memory"
144			);
145
146		2268	apply_window(in + 16, win , win + 512, suma, sumc, 16);
147		2268	apply_window(in + 32, win + 48, win + 640, sumb, sumd, 16);
148
149		2268	SUM8(MACS, suma[0], win + 32, in + 48);
150
151		2268	sumc[ 0] = 0;
152		2268	sumb[16] = 0;
153		2268	sumd[16] = 0;
154
155			#define SUMS(suma, sumb, sumc, sumd, out1, out2) \
156			"movups " #sumd "(%4), %%xmm0 \n\t" \
157			"shufps $0x1b, %%xmm0, %%xmm0 \n\t" \
158			"subps " #suma "(%1), %%xmm0 \n\t" \
159			"movaps %%xmm0," #out1 "(%0) \n\t" \
160			\
161			"movups " #sumc "(%3), %%xmm0 \n\t" \
162			"shufps $0x1b, %%xmm0, %%xmm0 \n\t" \
163			"addps " #sumb "(%2), %%xmm0 \n\t" \
164			"movaps %%xmm0," #out2 "(%0) \n\t"
165
166	1/2 ✓ Branch 0 taken 2268 times. ✗ Branch 1 not taken.	2268	if (incr == 1) {
167		2268	__asm__ volatile(
168			SUMS( 0, 48, 4, 52, 0, 112)
169			SUMS(16, 32, 20, 36, 16, 96)
170			SUMS(32, 16, 36, 20, 32, 80)
171			SUMS(48, 0, 52, 4, 48, 64)
172
173			:"+&r"(out)
174			:"r"(&suma[0]), "r"(&sumb[0]), "r"(&sumc[0]), "r"(&sumd[0])
175			:"memory"
176			);
177		2268	out += 16*incr;
178			} else {
179			int j;
180		✗	float out2 = out + 32 incr;
181		✗	out[0 ] = -suma[ 0];
182		✗	out += incr;
183		✗	out2 -= incr;
184		✗	for(j=1;j<16;j++) {
185		✗	*out = -suma[ j] + sumd[16-j];
186		✗	*out2 = sumb[16-j] + sumc[ j];
187		✗	out += incr;
188		✗	out2 -= incr;
189			}
190			}
191
192		2268	sum = 0;
193		2268	SUM8(MLSS, sum, win + 16 + 32, in + 32);
194		2268	*out = sum;
195		2268	}
196
197			#endif /* HAVE_6REGS && HAVE_SSE_INLINE */
198
199			#if HAVE_X86ASM
200			#define DECL_IMDCT_BLOCKS(CPU1, CPU2) \
201			static void imdct36_blocks_ ## CPU1(float out, float buf, float *in, \
202			int count, int switch_point, int block_type) \
203			{ \
204			int align_end = count - (count & 3); \
205			int j; \
206			for (j = 0; j < align_end; j+= 4) { \
207			LOCAL_ALIGNED_16(float, tmpbuf, [1024]); \
208			float *win = mdct_win_sse[switch_point && j < 4][block_type]; \
209			/* apply window & overlap with previous buffer */ \
210			\
211			/* select window */ \
212			ff_four_imdct36_float_ ## CPU2(out, buf, in, win, tmpbuf); \
213			in += 4*18; \
214			buf += 4*18; \
215			out += 4; \
216			} \
217			for (; j < count; j++) { \
218			/* apply window & overlap with previous buffer */ \
219			\
220			/* select window */ \
221			int win_idx = (switch_point && j < 2) ? 0 : block_type; \
222			float *win = ff_mdct_win_float[win_idx + (4 & -(j & 1))]; \
223			\
224			ff_imdct36_float_ ## CPU1(out, buf, in, win); \
225			\
226			in += 18; \
227			buf++; \
228			out++; \
229			} \
230			}
231
232			#if HAVE_SSE
233		✗	DECL_IMDCT_BLOCKS(sse2,sse)
234		✗	DECL_IMDCT_BLOCKS(sse3,sse)
235		✗	DECL_IMDCT_BLOCKS(ssse3,sse)
236			#endif
237			#if HAVE_AVX_EXTERNAL
238	6/12 ✗ Branch 0 not taken. ✓ Branch 1 taken 40 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✓ Branch 5 taken 40 times. ✓ Branch 6 taken 112 times. ✗ Branch 7 not taken. ✓ Branch 8 taken 202 times. ✗ Branch 9 not taken. ✗ Branch 10 not taken. ✓ Branch 12 taken 202 times. ✓ Branch 13 taken 112 times.	354	DECL_IMDCT_BLOCKS(avx,avx)
239			#endif
240			#endif /* HAVE_X86ASM */
241
242		97	av_cold void ff_mpadsp_init_x86_tabs(void)
243			{
244			int i, j;
245	2/2 ✓ Branch 0 taken 388 times. ✓ Branch 1 taken 97 times.	485	for (j = 0; j < 4; j++) {
246	2/2 ✓ Branch 0 taken 15520 times. ✓ Branch 1 taken 388 times.	15908	for (i = 0; i < 40; i ++) {
247		15520	mdct_win_sse[0][j][4*i ] = ff_mdct_win_float[j ][i];
248		15520	mdct_win_sse[0][j][4*i + 1] = ff_mdct_win_float[j + 4][i];
249		15520	mdct_win_sse[0][j][4*i + 2] = ff_mdct_win_float[j ][i];
250		15520	mdct_win_sse[0][j][4*i + 3] = ff_mdct_win_float[j + 4][i];
251		15520	mdct_win_sse[1][j][4*i ] = ff_mdct_win_float[0 ][i];
252		15520	mdct_win_sse[1][j][4*i + 1] = ff_mdct_win_float[4 ][i];
253		15520	mdct_win_sse[1][j][4*i + 2] = ff_mdct_win_float[j ][i];
254		15520	mdct_win_sse[1][j][4*i + 3] = ff_mdct_win_float[j + 4][i];
255			}
256			}
257		97	}
258
259		166	av_cold void ff_mpadsp_init_x86(MPADSPContext *s)
260			{
261		166	av_unused int cpu_flags = av_get_cpu_flags();
262
263			#if HAVE_6REGS && HAVE_SSE_INLINE
264	2/2 ✓ Branch 0 taken 32 times. ✓ Branch 1 taken 134 times.	166	if (INLINE_SSE(cpu_flags)) {
265		32	s->apply_window_float = apply_window_mp3;
266			}
267			#endif /* HAVE_SSE_INLINE */
268
269			#if HAVE_X86ASM
270			#if HAVE_SSE
271	2/2 ✓ Branch 0 taken 32 times. ✓ Branch 1 taken 134 times.	166	if (EXTERNAL_SSE2(cpu_flags)) {
272		32	s->imdct36_blocks_float = imdct36_blocks_sse2;
273		32	s->dct32_float = ff_dct32_float_sse2;
274			}
275	2/2 ✓ Branch 0 taken 32 times. ✓ Branch 1 taken 134 times.	166	if (EXTERNAL_SSE3(cpu_flags)) {
276		32	s->imdct36_blocks_float = imdct36_blocks_sse3;
277			}
278	2/2 ✓ Branch 0 taken 32 times. ✓ Branch 1 taken 134 times.	166	if (EXTERNAL_SSSE3(cpu_flags)) {
279		32	s->imdct36_blocks_float = imdct36_blocks_ssse3;
280			}
281			#endif
282			#if HAVE_AVX_EXTERNAL
283	2/2 ✓ Branch 0 taken 32 times. ✓ Branch 1 taken 134 times.	166	if (EXTERNAL_AVX(cpu_flags)) {
284		32	s->imdct36_blocks_float = imdct36_blocks_avx;
285			}
286	3/4 ✓ Branch 0 taken 32 times. ✓ Branch 1 taken 134 times. ✓ Branch 2 taken 32 times. ✗ Branch 3 not taken.	166	if (EXTERNAL_AVX_FAST(cpu_flags))
287		32	s->dct32_float = ff_dct32_float_avx;
288			#endif
289			#endif /* HAVE_X86ASM */
290		166	}
291

Function (Line)	Call count	Block coverage
apply_window (line 70)	called 4536 times, returned 4536 times	100.0%
apply_window_mp3 (line 114)	called 2268 times, returned 2268 times	70.0%
ff_mpadsp_init_x86 (line 259)	called 166 times, returned 166 times	100.0%
ff_mpadsp_init_x86_tabs (line 242)	called 97 times, returned 97 times	100.0%
imdct36_blocks_avx (line 238)	called 112 times, returned 112 times	76.0%
imdct36_blocks_sse2 (line 233)	not called	0.0%
imdct36_blocks_sse3 (line 234)	not called	0.0%
imdct36_blocks_ssse3 (line 235)	not called	0.0%