FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/x86/mpegaudiodsp.c
Date: 2022-12-05 03:11:11
Exec Total Coverage
Lines: 55 67 82.1%
Functions: 5 8 62.5%
Branches: 21 66 31.8%

Line Branch Exec Source
1 /*
2 * SIMD-optimized MP3 decoding functions
3 * Copyright (c) 2010 Vitor Sessak
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 #include <stddef.h>
23
24 #include "config.h"
25 #include "libavutil/attributes.h"
26 #include "libavutil/cpu.h"
27 #include "libavutil/mem_internal.h"
28 #include "libavutil/x86/asm.h"
29 #include "libavutil/x86/cpu.h"
30 #include "libavcodec/mpegaudiodsp.h"
31
32 #define DECL(CPU)\
33 static void imdct36_blocks_ ## CPU(float *out, float *buf, float *in, int count, int switch_point, int block_type);\
34 void ff_imdct36_float_ ## CPU(float *out, float *buf, float *in, float *win);
35
36 #if HAVE_X86ASM
37 DECL(sse2)
38 DECL(sse3)
39 DECL(ssse3)
40 DECL(avx)
41 #endif /* HAVE_X86ASM */
42
43 void ff_four_imdct36_float_sse(float *out, float *buf, float *in, float *win,
44 float *tmpbuf);
45 void ff_four_imdct36_float_avx(float *out, float *buf, float *in, float *win,
46 float *tmpbuf);
47
48 DECLARE_ALIGNED(16, static float, mdct_win_sse)[2][4][4*40];
49
50 #if HAVE_6REGS && HAVE_SSE_INLINE
51
52 #define MACS(rt, ra, rb) rt+=(ra)*(rb)
53 #define MLSS(rt, ra, rb) rt-=(ra)*(rb)
54
55 #define SUM8(op, sum, w, p) \
56 { \
57 op(sum, (w)[0 * 64], (p)[0 * 64]); \
58 op(sum, (w)[1 * 64], (p)[1 * 64]); \
59 op(sum, (w)[2 * 64], (p)[2 * 64]); \
60 op(sum, (w)[3 * 64], (p)[3 * 64]); \
61 op(sum, (w)[4 * 64], (p)[4 * 64]); \
62 op(sum, (w)[5 * 64], (p)[5 * 64]); \
63 op(sum, (w)[6 * 64], (p)[6 * 64]); \
64 op(sum, (w)[7 * 64], (p)[7 * 64]); \
65 }
66
67 4536 static void apply_window(const float *buf, const float *win1,
68 const float *win2, float *sum1, float *sum2, int len)
69 {
70 4536 x86_reg count = - 4*len;
71 4536 const float *win1a = win1+len;
72 4536 const float *win2a = win2+len;
73 4536 const float *bufa = buf+len;
74 4536 float *sum1a = sum1+len;
75 4536 float *sum2a = sum2+len;
76
77
78 #define MULT(a, b) \
79 "movaps " #a "(%1,%0), %%xmm1 \n\t" \
80 "movaps " #a "(%3,%0), %%xmm2 \n\t" \
81 "mulps %%xmm2, %%xmm1 \n\t" \
82 "subps %%xmm1, %%xmm0 \n\t" \
83 "mulps " #b "(%2,%0), %%xmm2 \n\t" \
84 "subps %%xmm2, %%xmm4 \n\t" \
85
86 4536 __asm__ volatile(
87 "1: \n\t"
88 "xorps %%xmm0, %%xmm0 \n\t"
89 "xorps %%xmm4, %%xmm4 \n\t"
90
91 MULT( 0, 0)
92 MULT( 256, 64)
93 MULT( 512, 128)
94 MULT( 768, 192)
95 MULT(1024, 256)
96 MULT(1280, 320)
97 MULT(1536, 384)
98 MULT(1792, 448)
99
100 "movaps %%xmm0, (%4,%0) \n\t"
101 "movaps %%xmm4, (%5,%0) \n\t"
102 "add $16, %0 \n\t"
103 "jl 1b \n\t"
104 :"+&r"(count)
105 :"r"(win1a), "r"(win2a), "r"(bufa), "r"(sum1a), "r"(sum2a)
106 );
107
108 #undef MULT
109 4536 }
110
111 2268 static void apply_window_mp3(float *in, float *win, int *unused, float *out,
112 ptrdiff_t incr)
113 {
114 2268 LOCAL_ALIGNED_16(float, suma, [17]);
115 2268 LOCAL_ALIGNED_16(float, sumb, [17]);
116 2268 LOCAL_ALIGNED_16(float, sumc, [17]);
117 2268 LOCAL_ALIGNED_16(float, sumd, [17]);
118
119 float sum;
120
121 /* copy to avoid wrap */
122 2268 __asm__ volatile(
123 "movaps 0(%0), %%xmm0 \n\t" \
124 "movaps 16(%0), %%xmm1 \n\t" \
125 "movaps 32(%0), %%xmm2 \n\t" \
126 "movaps 48(%0), %%xmm3 \n\t" \
127 "movaps %%xmm0, 0(%1) \n\t" \
128 "movaps %%xmm1, 16(%1) \n\t" \
129 "movaps %%xmm2, 32(%1) \n\t" \
130 "movaps %%xmm3, 48(%1) \n\t" \
131 "movaps 64(%0), %%xmm0 \n\t" \
132 "movaps 80(%0), %%xmm1 \n\t" \
133 "movaps 96(%0), %%xmm2 \n\t" \
134 "movaps 112(%0), %%xmm3 \n\t" \
135 "movaps %%xmm0, 64(%1) \n\t" \
136 "movaps %%xmm1, 80(%1) \n\t" \
137 "movaps %%xmm2, 96(%1) \n\t" \
138 "movaps %%xmm3, 112(%1) \n\t"
139 2268 ::"r"(in), "r"(in+512)
140 :"memory"
141 );
142
143 2268 apply_window(in + 16, win , win + 512, suma, sumc, 16);
144 2268 apply_window(in + 32, win + 48, win + 640, sumb, sumd, 16);
145
146 2268 SUM8(MACS, suma[0], win + 32, in + 48);
147
148 2268 sumc[ 0] = 0;
149 2268 sumb[16] = 0;
150 2268 sumd[16] = 0;
151
152 #define SUMS(suma, sumb, sumc, sumd, out1, out2) \
153 "movups " #sumd "(%4), %%xmm0 \n\t" \
154 "shufps $0x1b, %%xmm0, %%xmm0 \n\t" \
155 "subps " #suma "(%1), %%xmm0 \n\t" \
156 "movaps %%xmm0," #out1 "(%0) \n\t" \
157 \
158 "movups " #sumc "(%3), %%xmm0 \n\t" \
159 "shufps $0x1b, %%xmm0, %%xmm0 \n\t" \
160 "addps " #sumb "(%2), %%xmm0 \n\t" \
161 "movaps %%xmm0," #out2 "(%0) \n\t"
162
163
1/2
✓ Branch 0 taken 2268 times.
✗ Branch 1 not taken.
2268 if (incr == 1) {
164 2268 __asm__ volatile(
165 SUMS( 0, 48, 4, 52, 0, 112)
166 SUMS(16, 32, 20, 36, 16, 96)
167 SUMS(32, 16, 36, 20, 32, 80)
168 SUMS(48, 0, 52, 4, 48, 64)
169
170 :"+&r"(out)
171 :"r"(&suma[0]), "r"(&sumb[0]), "r"(&sumc[0]), "r"(&sumd[0])
172 :"memory"
173 );
174 2268 out += 16*incr;
175 } else {
176 int j;
177 float *out2 = out + 32 * incr;
178 out[0 ] = -suma[ 0];
179 out += incr;
180 out2 -= incr;
181 for(j=1;j<16;j++) {
182 *out = -suma[ j] + sumd[16-j];
183 *out2 = sumb[16-j] + sumc[ j];
184 out += incr;
185 out2 -= incr;
186 }
187 }
188
189 2268 sum = 0;
190 2268 SUM8(MLSS, sum, win + 16 + 32, in + 32);
191 2268 *out = sum;
192 2268 }
193
194 #endif /* HAVE_6REGS && HAVE_SSE_INLINE */
195
196 #if HAVE_X86ASM
197 #define DECL_IMDCT_BLOCKS(CPU1, CPU2) \
198 static void imdct36_blocks_ ## CPU1(float *out, float *buf, float *in, \
199 int count, int switch_point, int block_type) \
200 { \
201 int align_end = count - (count & 3); \
202 int j; \
203 for (j = 0; j < align_end; j+= 4) { \
204 LOCAL_ALIGNED_16(float, tmpbuf, [1024]); \
205 float *win = mdct_win_sse[switch_point && j < 4][block_type]; \
206 /* apply window & overlap with previous buffer */ \
207 \
208 /* select window */ \
209 ff_four_imdct36_float_ ## CPU2(out, buf, in, win, tmpbuf); \
210 in += 4*18; \
211 buf += 4*18; \
212 out += 4; \
213 } \
214 for (; j < count; j++) { \
215 /* apply window & overlap with previous buffer */ \
216 \
217 /* select window */ \
218 int win_idx = (switch_point && j < 2) ? 0 : block_type; \
219 float *win = ff_mdct_win_float[win_idx + (4 & -(j & 1))]; \
220 \
221 ff_imdct36_float_ ## CPU1(out, buf, in, win); \
222 \
223 in += 18; \
224 buf++; \
225 out++; \
226 } \
227 }
228
229 #if HAVE_SSE
230 DECL_IMDCT_BLOCKS(sse2,sse)
231 DECL_IMDCT_BLOCKS(sse3,sse)
232 DECL_IMDCT_BLOCKS(ssse3,sse)
233 #endif
234 #if HAVE_AVX_EXTERNAL
235
6/12
✗ Branch 0 not taken.
✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 5 taken 40 times.
✓ Branch 6 taken 112 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 202 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 12 taken 202 times.
✓ Branch 13 taken 112 times.
354 DECL_IMDCT_BLOCKS(avx,avx)
236 #endif
237 #endif /* HAVE_X86ASM */
238
239 97 av_cold void ff_mpadsp_init_x86_tabs(void)
240 {
241 int i, j;
242
2/2
✓ Branch 0 taken 388 times.
✓ Branch 1 taken 97 times.
485 for (j = 0; j < 4; j++) {
243
2/2
✓ Branch 0 taken 15520 times.
✓ Branch 1 taken 388 times.
15908 for (i = 0; i < 40; i ++) {
244 15520 mdct_win_sse[0][j][4*i ] = ff_mdct_win_float[j ][i];
245 15520 mdct_win_sse[0][j][4*i + 1] = ff_mdct_win_float[j + 4][i];
246 15520 mdct_win_sse[0][j][4*i + 2] = ff_mdct_win_float[j ][i];
247 15520 mdct_win_sse[0][j][4*i + 3] = ff_mdct_win_float[j + 4][i];
248 15520 mdct_win_sse[1][j][4*i ] = ff_mdct_win_float[0 ][i];
249 15520 mdct_win_sse[1][j][4*i + 1] = ff_mdct_win_float[4 ][i];
250 15520 mdct_win_sse[1][j][4*i + 2] = ff_mdct_win_float[j ][i];
251 15520 mdct_win_sse[1][j][4*i + 3] = ff_mdct_win_float[j + 4][i];
252 }
253 }
254 97 }
255
256 166 av_cold void ff_mpadsp_init_x86(MPADSPContext *s)
257 {
258 166 av_unused int cpu_flags = av_get_cpu_flags();
259
260 #if HAVE_6REGS && HAVE_SSE_INLINE
261
2/2
✓ Branch 0 taken 32 times.
✓ Branch 1 taken 134 times.
166 if (INLINE_SSE(cpu_flags)) {
262 32 s->apply_window_float = apply_window_mp3;
263 }
264 #endif /* HAVE_SSE_INLINE */
265
266 #if HAVE_X86ASM
267 #if HAVE_SSE
268
2/2
✓ Branch 0 taken 32 times.
✓ Branch 1 taken 134 times.
166 if (EXTERNAL_SSE2(cpu_flags)) {
269 32 s->imdct36_blocks_float = imdct36_blocks_sse2;
270 }
271
2/2
✓ Branch 0 taken 32 times.
✓ Branch 1 taken 134 times.
166 if (EXTERNAL_SSE3(cpu_flags)) {
272 32 s->imdct36_blocks_float = imdct36_blocks_sse3;
273 }
274
2/2
✓ Branch 0 taken 32 times.
✓ Branch 1 taken 134 times.
166 if (EXTERNAL_SSSE3(cpu_flags)) {
275 32 s->imdct36_blocks_float = imdct36_blocks_ssse3;
276 }
277 #endif
278 #if HAVE_AVX_EXTERNAL
279
2/2
✓ Branch 0 taken 32 times.
✓ Branch 1 taken 134 times.
166 if (EXTERNAL_AVX(cpu_flags)) {
280 32 s->imdct36_blocks_float = imdct36_blocks_avx;
281 }
282 #endif
283 #endif /* HAVE_X86ASM */
284 166 }
285