FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/x86/mpegaudiodsp.c
Date: 2024-04-19 07:31:02
Exec Total Coverage
Lines: 58 70 82.9%
Functions: 5 8 62.5%
Branches: 24 70 34.3%

Line Branch Exec Source
1 /*
2 * SIMD-optimized MP3 decoding functions
3 * Copyright (c) 2010 Vitor Sessak
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 #include <stddef.h>
23
24 #include "config.h"
25 #include "libavutil/attributes.h"
26 #include "libavutil/cpu.h"
27 #include "libavutil/mem_internal.h"
28 #include "libavutil/x86/asm.h"
29 #include "libavutil/x86/cpu.h"
30 #include "libavcodec/mpegaudiodsp.h"
31
32 #define DECL(CPU)\
33 static void imdct36_blocks_ ## CPU(float *out, float *buf, float *in, int count, int switch_point, int block_type);\
34 void ff_imdct36_float_ ## CPU(float *out, float *buf, float *in, float *win);
35
36 #if HAVE_X86ASM
37 DECL(sse2)
38 DECL(sse3)
39 DECL(ssse3)
40 DECL(avx)
41 #endif /* HAVE_X86ASM */
42
43 void ff_four_imdct36_float_sse(float *out, float *buf, float *in, float *win,
44 float *tmpbuf);
45 void ff_four_imdct36_float_avx(float *out, float *buf, float *in, float *win,
46 float *tmpbuf);
47
48 void ff_dct32_float_sse2(float *out, const float *in);
49 void ff_dct32_float_avx (float *out, const float *in);
50
51 DECLARE_ALIGNED(16, static float, mdct_win_sse)[2][4][4*40];
52
53 #if HAVE_6REGS && HAVE_SSE_INLINE
54
55 #define MACS(rt, ra, rb) rt+=(ra)*(rb)
56 #define MLSS(rt, ra, rb) rt-=(ra)*(rb)
57
58 #define SUM8(op, sum, w, p) \
59 { \
60 op(sum, (w)[0 * 64], (p)[0 * 64]); \
61 op(sum, (w)[1 * 64], (p)[1 * 64]); \
62 op(sum, (w)[2 * 64], (p)[2 * 64]); \
63 op(sum, (w)[3 * 64], (p)[3 * 64]); \
64 op(sum, (w)[4 * 64], (p)[4 * 64]); \
65 op(sum, (w)[5 * 64], (p)[5 * 64]); \
66 op(sum, (w)[6 * 64], (p)[6 * 64]); \
67 op(sum, (w)[7 * 64], (p)[7 * 64]); \
68 }
69
70 4536 static void apply_window(const float *buf, const float *win1,
71 const float *win2, float *sum1, float *sum2, int len)
72 {
73 4536 x86_reg count = - 4*len;
74 4536 const float *win1a = win1+len;
75 4536 const float *win2a = win2+len;
76 4536 const float *bufa = buf+len;
77 4536 float *sum1a = sum1+len;
78 4536 float *sum2a = sum2+len;
79
80
81 #define MULT(a, b) \
82 "movaps " #a "(%1,%0), %%xmm1 \n\t" \
83 "movaps " #a "(%3,%0), %%xmm2 \n\t" \
84 "mulps %%xmm2, %%xmm1 \n\t" \
85 "subps %%xmm1, %%xmm0 \n\t" \
86 "mulps " #b "(%2,%0), %%xmm2 \n\t" \
87 "subps %%xmm2, %%xmm4 \n\t" \
88
89 4536 __asm__ volatile(
90 "1: \n\t"
91 "xorps %%xmm0, %%xmm0 \n\t"
92 "xorps %%xmm4, %%xmm4 \n\t"
93
94 MULT( 0, 0)
95 MULT( 256, 64)
96 MULT( 512, 128)
97 MULT( 768, 192)
98 MULT(1024, 256)
99 MULT(1280, 320)
100 MULT(1536, 384)
101 MULT(1792, 448)
102
103 "movaps %%xmm0, (%4,%0) \n\t"
104 "movaps %%xmm4, (%5,%0) \n\t"
105 "add $16, %0 \n\t"
106 "jl 1b \n\t"
107 :"+&r"(count)
108 :"r"(win1a), "r"(win2a), "r"(bufa), "r"(sum1a), "r"(sum2a)
109 );
110
111 #undef MULT
112 4536 }
113
114 2268 static void apply_window_mp3(float *in, float *win, int *unused, float *out,
115 ptrdiff_t incr)
116 {
117 2268 LOCAL_ALIGNED_16(float, suma, [17]);
118 2268 LOCAL_ALIGNED_16(float, sumb, [17]);
119 2268 LOCAL_ALIGNED_16(float, sumc, [17]);
120 2268 LOCAL_ALIGNED_16(float, sumd, [17]);
121
122 float sum;
123
124 /* copy to avoid wrap */
125 2268 __asm__ volatile(
126 "movaps 0(%0), %%xmm0 \n\t" \
127 "movaps 16(%0), %%xmm1 \n\t" \
128 "movaps 32(%0), %%xmm2 \n\t" \
129 "movaps 48(%0), %%xmm3 \n\t" \
130 "movaps %%xmm0, 0(%1) \n\t" \
131 "movaps %%xmm1, 16(%1) \n\t" \
132 "movaps %%xmm2, 32(%1) \n\t" \
133 "movaps %%xmm3, 48(%1) \n\t" \
134 "movaps 64(%0), %%xmm0 \n\t" \
135 "movaps 80(%0), %%xmm1 \n\t" \
136 "movaps 96(%0), %%xmm2 \n\t" \
137 "movaps 112(%0), %%xmm3 \n\t" \
138 "movaps %%xmm0, 64(%1) \n\t" \
139 "movaps %%xmm1, 80(%1) \n\t" \
140 "movaps %%xmm2, 96(%1) \n\t" \
141 "movaps %%xmm3, 112(%1) \n\t"
142 2268 ::"r"(in), "r"(in+512)
143 :"memory"
144 );
145
146 2268 apply_window(in + 16, win , win + 512, suma, sumc, 16);
147 2268 apply_window(in + 32, win + 48, win + 640, sumb, sumd, 16);
148
149 2268 SUM8(MACS, suma[0], win + 32, in + 48);
150
151 2268 sumc[ 0] = 0;
152 2268 sumb[16] = 0;
153 2268 sumd[16] = 0;
154
155 #define SUMS(suma, sumb, sumc, sumd, out1, out2) \
156 "movups " #sumd "(%4), %%xmm0 \n\t" \
157 "shufps $0x1b, %%xmm0, %%xmm0 \n\t" \
158 "subps " #suma "(%1), %%xmm0 \n\t" \
159 "movaps %%xmm0," #out1 "(%0) \n\t" \
160 \
161 "movups " #sumc "(%3), %%xmm0 \n\t" \
162 "shufps $0x1b, %%xmm0, %%xmm0 \n\t" \
163 "addps " #sumb "(%2), %%xmm0 \n\t" \
164 "movaps %%xmm0," #out2 "(%0) \n\t"
165
166
1/2
✓ Branch 0 taken 2268 times.
✗ Branch 1 not taken.
2268 if (incr == 1) {
167 2268 __asm__ volatile(
168 SUMS( 0, 48, 4, 52, 0, 112)
169 SUMS(16, 32, 20, 36, 16, 96)
170 SUMS(32, 16, 36, 20, 32, 80)
171 SUMS(48, 0, 52, 4, 48, 64)
172
173 :"+&r"(out)
174 :"r"(&suma[0]), "r"(&sumb[0]), "r"(&sumc[0]), "r"(&sumd[0])
175 :"memory"
176 );
177 2268 out += 16*incr;
178 } else {
179 int j;
180 float *out2 = out + 32 * incr;
181 out[0 ] = -suma[ 0];
182 out += incr;
183 out2 -= incr;
184 for(j=1;j<16;j++) {
185 *out = -suma[ j] + sumd[16-j];
186 *out2 = sumb[16-j] + sumc[ j];
187 out += incr;
188 out2 -= incr;
189 }
190 }
191
192 2268 sum = 0;
193 2268 SUM8(MLSS, sum, win + 16 + 32, in + 32);
194 2268 *out = sum;
195 2268 }
196
197 #endif /* HAVE_6REGS && HAVE_SSE_INLINE */
198
199 #if HAVE_X86ASM
200 #define DECL_IMDCT_BLOCKS(CPU1, CPU2) \
201 static void imdct36_blocks_ ## CPU1(float *out, float *buf, float *in, \
202 int count, int switch_point, int block_type) \
203 { \
204 int align_end = count - (count & 3); \
205 int j; \
206 for (j = 0; j < align_end; j+= 4) { \
207 LOCAL_ALIGNED_16(float, tmpbuf, [1024]); \
208 float *win = mdct_win_sse[switch_point && j < 4][block_type]; \
209 /* apply window & overlap with previous buffer */ \
210 \
211 /* select window */ \
212 ff_four_imdct36_float_ ## CPU2(out, buf, in, win, tmpbuf); \
213 in += 4*18; \
214 buf += 4*18; \
215 out += 4; \
216 } \
217 for (; j < count; j++) { \
218 /* apply window & overlap with previous buffer */ \
219 \
220 /* select window */ \
221 int win_idx = (switch_point && j < 2) ? 0 : block_type; \
222 float *win = ff_mdct_win_float[win_idx + (4 & -(j & 1))]; \
223 \
224 ff_imdct36_float_ ## CPU1(out, buf, in, win); \
225 \
226 in += 18; \
227 buf++; \
228 out++; \
229 } \
230 }
231
232 #if HAVE_SSE
233 DECL_IMDCT_BLOCKS(sse2,sse)
234 DECL_IMDCT_BLOCKS(sse3,sse)
235 DECL_IMDCT_BLOCKS(ssse3,sse)
236 #endif
237 #if HAVE_AVX_EXTERNAL
238
6/12
✗ Branch 0 not taken.
✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 5 taken 40 times.
✓ Branch 6 taken 112 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 202 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 12 taken 202 times.
✓ Branch 13 taken 112 times.
354 DECL_IMDCT_BLOCKS(avx,avx)
239 #endif
240 #endif /* HAVE_X86ASM */
241
242 97 av_cold void ff_mpadsp_init_x86_tabs(void)
243 {
244 int i, j;
245
2/2
✓ Branch 0 taken 388 times.
✓ Branch 1 taken 97 times.
485 for (j = 0; j < 4; j++) {
246
2/2
✓ Branch 0 taken 15520 times.
✓ Branch 1 taken 388 times.
15908 for (i = 0; i < 40; i ++) {
247 15520 mdct_win_sse[0][j][4*i ] = ff_mdct_win_float[j ][i];
248 15520 mdct_win_sse[0][j][4*i + 1] = ff_mdct_win_float[j + 4][i];
249 15520 mdct_win_sse[0][j][4*i + 2] = ff_mdct_win_float[j ][i];
250 15520 mdct_win_sse[0][j][4*i + 3] = ff_mdct_win_float[j + 4][i];
251 15520 mdct_win_sse[1][j][4*i ] = ff_mdct_win_float[0 ][i];
252 15520 mdct_win_sse[1][j][4*i + 1] = ff_mdct_win_float[4 ][i];
253 15520 mdct_win_sse[1][j][4*i + 2] = ff_mdct_win_float[j ][i];
254 15520 mdct_win_sse[1][j][4*i + 3] = ff_mdct_win_float[j + 4][i];
255 }
256 }
257 97 }
258
259 156 av_cold void ff_mpadsp_init_x86(MPADSPContext *s)
260 {
261 156 av_unused int cpu_flags = av_get_cpu_flags();
262
263 #if HAVE_6REGS && HAVE_SSE_INLINE
264
2/2
✓ Branch 0 taken 32 times.
✓ Branch 1 taken 124 times.
156 if (INLINE_SSE(cpu_flags)) {
265 32 s->apply_window_float = apply_window_mp3;
266 }
267 #endif /* HAVE_SSE_INLINE */
268
269 #if HAVE_X86ASM
270 #if HAVE_SSE
271
2/2
✓ Branch 0 taken 32 times.
✓ Branch 1 taken 124 times.
156 if (EXTERNAL_SSE2(cpu_flags)) {
272 32 s->imdct36_blocks_float = imdct36_blocks_sse2;
273 32 s->dct32_float = ff_dct32_float_sse2;
274 }
275
2/2
✓ Branch 0 taken 32 times.
✓ Branch 1 taken 124 times.
156 if (EXTERNAL_SSE3(cpu_flags)) {
276 32 s->imdct36_blocks_float = imdct36_blocks_sse3;
277 }
278
2/2
✓ Branch 0 taken 32 times.
✓ Branch 1 taken 124 times.
156 if (EXTERNAL_SSSE3(cpu_flags)) {
279 32 s->imdct36_blocks_float = imdct36_blocks_ssse3;
280 }
281 #endif
282 #if HAVE_AVX_EXTERNAL
283
2/2
✓ Branch 0 taken 32 times.
✓ Branch 1 taken 124 times.
156 if (EXTERNAL_AVX(cpu_flags)) {
284 32 s->imdct36_blocks_float = imdct36_blocks_avx;
285 }
286
3/4
✓ Branch 0 taken 32 times.
✓ Branch 1 taken 124 times.
✓ Branch 2 taken 32 times.
✗ Branch 3 not taken.
156 if (EXTERNAL_AVX_FAST(cpu_flags))
287 32 s->dct32_float = ff_dct32_float_avx;
288 #endif
289 #endif /* HAVE_X86ASM */
290 156 }
291