GCC Code Coverage Report
Directory: ../../../ffmpeg/ Exec Total Coverage
File: src/libavcodec/x86/mpegaudiodsp.c Lines: 55 67 82.1 %
Date: 2021-01-20 23:14:43 Branches: 21 66 31.8 %

Line Branch Exec Source
1
/*
2
 * SIMD-optimized MP3 decoding functions
3
 * Copyright (c) 2010 Vitor Sessak
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21
22
#include "libavutil/attributes.h"
23
#include "libavutil/cpu.h"
24
#include "libavutil/internal.h"
25
#include "libavutil/mem_internal.h"
26
#include "libavutil/x86/asm.h"
27
#include "libavutil/x86/cpu.h"
28
#include "libavcodec/mpegaudiodsp.h"
29
30
#define DECL(CPU)\
31
static void imdct36_blocks_ ## CPU(float *out, float *buf, float *in, int count, int switch_point, int block_type);\
32
void ff_imdct36_float_ ## CPU(float *out, float *buf, float *in, float *win);
33
34
#if HAVE_X86ASM
35
#if ARCH_X86_32
36
DECL(sse)
37
#endif
38
DECL(sse2)
39
DECL(sse3)
40
DECL(ssse3)
41
DECL(avx)
42
#endif /* HAVE_X86ASM */
43
44
void ff_four_imdct36_float_sse(float *out, float *buf, float *in, float *win,
45
                               float *tmpbuf);
46
void ff_four_imdct36_float_avx(float *out, float *buf, float *in, float *win,
47
                               float *tmpbuf);
48
49
DECLARE_ALIGNED(16, static float, mdct_win_sse)[2][4][4*40];
50
51
#if HAVE_6REGS && HAVE_SSE_INLINE
52
53
#define MACS(rt, ra, rb) rt+=(ra)*(rb)
54
#define MLSS(rt, ra, rb) rt-=(ra)*(rb)
55
56
#define SUM8(op, sum, w, p)               \
57
{                                         \
58
    op(sum, (w)[0 * 64], (p)[0 * 64]);    \
59
    op(sum, (w)[1 * 64], (p)[1 * 64]);    \
60
    op(sum, (w)[2 * 64], (p)[2 * 64]);    \
61
    op(sum, (w)[3 * 64], (p)[3 * 64]);    \
62
    op(sum, (w)[4 * 64], (p)[4 * 64]);    \
63
    op(sum, (w)[5 * 64], (p)[5 * 64]);    \
64
    op(sum, (w)[6 * 64], (p)[6 * 64]);    \
65
    op(sum, (w)[7 * 64], (p)[7 * 64]);    \
66
}
67
68
4176
static void apply_window(const float *buf, const float *win1,
69
                         const float *win2, float *sum1, float *sum2, int len)
70
{
71
4176
    x86_reg count = - 4*len;
72
4176
    const float *win1a = win1+len;
73
4176
    const float *win2a = win2+len;
74
4176
    const float *bufa  = buf+len;
75
4176
    float *sum1a = sum1+len;
76
4176
    float *sum2a = sum2+len;
77
78
79
#define MULT(a, b)                                 \
80
    "movaps " #a "(%1,%0), %%xmm1           \n\t"  \
81
    "movaps " #a "(%3,%0), %%xmm2           \n\t"  \
82
    "mulps         %%xmm2, %%xmm1           \n\t"  \
83
    "subps         %%xmm1, %%xmm0           \n\t"  \
84
    "mulps  " #b "(%2,%0), %%xmm2           \n\t"  \
85
    "subps         %%xmm2, %%xmm4           \n\t"  \
86
87
4176
    __asm__ volatile(
88
            "1:                                   \n\t"
89
            "xorps       %%xmm0, %%xmm0           \n\t"
90
            "xorps       %%xmm4, %%xmm4           \n\t"
91
92
            MULT(   0,   0)
93
            MULT( 256,  64)
94
            MULT( 512, 128)
95
            MULT( 768, 192)
96
            MULT(1024, 256)
97
            MULT(1280, 320)
98
            MULT(1536, 384)
99
            MULT(1792, 448)
100
101
            "movaps      %%xmm0, (%4,%0)          \n\t"
102
            "movaps      %%xmm4, (%5,%0)          \n\t"
103
            "add            $16,  %0              \n\t"
104
            "jl              1b                   \n\t"
105
            :"+&r"(count)
106
            :"r"(win1a), "r"(win2a), "r"(bufa), "r"(sum1a), "r"(sum2a)
107
            );
108
109
#undef MULT
110
4176
}
111
112
2088
static void apply_window_mp3(float *in, float *win, int *unused, float *out,
113
                             ptrdiff_t incr)
114
{
115
2088
    LOCAL_ALIGNED_16(float, suma, [17]);
116
2088
    LOCAL_ALIGNED_16(float, sumb, [17]);
117
2088
    LOCAL_ALIGNED_16(float, sumc, [17]);
118
2088
    LOCAL_ALIGNED_16(float, sumd, [17]);
119
120
    float sum;
121
122
    /* copy to avoid wrap */
123
2088
    __asm__ volatile(
124
            "movaps    0(%0), %%xmm0   \n\t" \
125
            "movaps   16(%0), %%xmm1   \n\t" \
126
            "movaps   32(%0), %%xmm2   \n\t" \
127
            "movaps   48(%0), %%xmm3   \n\t" \
128
            "movaps   %%xmm0,   0(%1) \n\t" \
129
            "movaps   %%xmm1,  16(%1) \n\t" \
130
            "movaps   %%xmm2,  32(%1) \n\t" \
131
            "movaps   %%xmm3,  48(%1) \n\t" \
132
            "movaps   64(%0), %%xmm0   \n\t" \
133
            "movaps   80(%0), %%xmm1   \n\t" \
134
            "movaps   96(%0), %%xmm2   \n\t" \
135
            "movaps  112(%0), %%xmm3   \n\t" \
136
            "movaps   %%xmm0,  64(%1) \n\t" \
137
            "movaps   %%xmm1,  80(%1) \n\t" \
138
            "movaps   %%xmm2,  96(%1) \n\t" \
139
            "movaps   %%xmm3, 112(%1) \n\t"
140
2088
            ::"r"(in), "r"(in+512)
141
            :"memory"
142
            );
143
144
2088
    apply_window(in + 16, win     , win + 512, suma, sumc, 16);
145
2088
    apply_window(in + 32, win + 48, win + 640, sumb, sumd, 16);
146
147
2088
    SUM8(MACS, suma[0], win + 32, in + 48);
148
149
2088
    sumc[ 0] = 0;
150
2088
    sumb[16] = 0;
151
2088
    sumd[16] = 0;
152
153
#define SUMS(suma, sumb, sumc, sumd, out1, out2)               \
154
            "movups " #sumd "(%4),       %%xmm0          \n\t" \
155
            "shufps         $0x1b,       %%xmm0, %%xmm0  \n\t" \
156
            "subps  " #suma "(%1),       %%xmm0          \n\t" \
157
            "movaps        %%xmm0," #out1 "(%0)          \n\t" \
158
\
159
            "movups " #sumc "(%3),       %%xmm0          \n\t" \
160
            "shufps         $0x1b,       %%xmm0, %%xmm0  \n\t" \
161
            "addps  " #sumb "(%2),       %%xmm0          \n\t" \
162
            "movaps        %%xmm0," #out2 "(%0)          \n\t"
163
164
2088
    if (incr == 1) {
165
2088
        __asm__ volatile(
166
            SUMS( 0, 48,  4, 52,  0, 112)
167
            SUMS(16, 32, 20, 36, 16,  96)
168
            SUMS(32, 16, 36, 20, 32,  80)
169
            SUMS(48,  0, 52,  4, 48,  64)
170
171
            :"+&r"(out)
172
            :"r"(&suma[0]), "r"(&sumb[0]), "r"(&sumc[0]), "r"(&sumd[0])
173
            :"memory"
174
            );
175
2088
        out += 16*incr;
176
    } else {
177
        int j;
178
        float *out2 = out + 32 * incr;
179
        out[0  ]  = -suma[   0];
180
        out += incr;
181
        out2 -= incr;
182
        for(j=1;j<16;j++) {
183
            *out  = -suma[   j] + sumd[16-j];
184
            *out2 =  sumb[16-j] + sumc[   j];
185
            out  += incr;
186
            out2 -= incr;
187
        }
188
    }
189
190
2088
    sum = 0;
191
2088
    SUM8(MLSS, sum, win + 16 + 32, in + 32);
192
2088
    *out = sum;
193
2088
}
194
195
#endif /* HAVE_6REGS && HAVE_SSE_INLINE */
196
197
#if HAVE_X86ASM
198
#define DECL_IMDCT_BLOCKS(CPU1, CPU2)                                       \
199
static void imdct36_blocks_ ## CPU1(float *out, float *buf, float *in,      \
200
                               int count, int switch_point, int block_type) \
201
{                                                                           \
202
    int align_end = count - (count & 3);                                \
203
    int j;                                                              \
204
    for (j = 0; j < align_end; j+= 4) {                                 \
205
        LOCAL_ALIGNED_16(float, tmpbuf, [1024]);                        \
206
        float *win = mdct_win_sse[switch_point && j < 4][block_type];   \
207
        /* apply window & overlap with previous buffer */               \
208
                                                                        \
209
        /* select window */                                             \
210
        ff_four_imdct36_float_ ## CPU2(out, buf, in, win, tmpbuf);      \
211
        in      += 4*18;                                                \
212
        buf     += 4*18;                                                \
213
        out     += 4;                                                   \
214
    }                                                                   \
215
    for (; j < count; j++) {                                            \
216
        /* apply window & overlap with previous buffer */               \
217
                                                                        \
218
        /* select window */                                             \
219
        int win_idx = (switch_point && j < 2) ? 0 : block_type;         \
220
        float *win = ff_mdct_win_float[win_idx + (4 & -(j & 1))];       \
221
                                                                        \
222
        ff_imdct36_float_ ## CPU1(out, buf, in, win);                   \
223
                                                                        \
224
        in  += 18;                                                      \
225
        buf++;                                                          \
226
        out++;                                                          \
227
    }                                                                   \
228
}
229
230
#if HAVE_SSE
231
#if ARCH_X86_32
232
DECL_IMDCT_BLOCKS(sse,sse)
233
#endif
234
DECL_IMDCT_BLOCKS(sse2,sse)
235
DECL_IMDCT_BLOCKS(sse3,sse)
236
DECL_IMDCT_BLOCKS(ssse3,sse)
237
#endif
238
#if HAVE_AVX_EXTERNAL
239



315
DECL_IMDCT_BLOCKS(avx,avx)
240
#endif
241
#endif /* HAVE_X86ASM */
242
243
78
av_cold void ff_mpadsp_init_x86_tabs(void)
244
{
245
    int i, j;
246
390
    for (j = 0; j < 4; j++) {
247
12792
        for (i = 0; i < 40; i ++) {
248
12480
            mdct_win_sse[0][j][4*i    ] = ff_mdct_win_float[j    ][i];
249
12480
            mdct_win_sse[0][j][4*i + 1] = ff_mdct_win_float[j + 4][i];
250
12480
            mdct_win_sse[0][j][4*i + 2] = ff_mdct_win_float[j    ][i];
251
12480
            mdct_win_sse[0][j][4*i + 3] = ff_mdct_win_float[j + 4][i];
252
12480
            mdct_win_sse[1][j][4*i    ] = ff_mdct_win_float[0    ][i];
253
12480
            mdct_win_sse[1][j][4*i + 1] = ff_mdct_win_float[4    ][i];
254
12480
            mdct_win_sse[1][j][4*i + 2] = ff_mdct_win_float[j    ][i];
255
12480
            mdct_win_sse[1][j][4*i + 3] = ff_mdct_win_float[j + 4][i];
256
        }
257
    }
258
78
}
259
260
140
av_cold void ff_mpadsp_init_x86(MPADSPContext *s)
261
{
262
140
    av_unused int cpu_flags = av_get_cpu_flags();
263
264
#if HAVE_6REGS && HAVE_SSE_INLINE
265
140
    if (INLINE_SSE(cpu_flags)) {
266
24
        s->apply_window_float = apply_window_mp3;
267
    }
268
#endif /* HAVE_SSE_INLINE */
269
270
#if HAVE_X86ASM
271
#if HAVE_SSE
272
#if ARCH_X86_32
273
    if (EXTERNAL_SSE(cpu_flags)) {
274
        s->imdct36_blocks_float = imdct36_blocks_sse;
275
    }
276
#endif
277
140
    if (EXTERNAL_SSE2(cpu_flags)) {
278
24
        s->imdct36_blocks_float = imdct36_blocks_sse2;
279
    }
280
140
    if (EXTERNAL_SSE3(cpu_flags)) {
281
24
        s->imdct36_blocks_float = imdct36_blocks_sse3;
282
    }
283
140
    if (EXTERNAL_SSSE3(cpu_flags)) {
284
24
        s->imdct36_blocks_float = imdct36_blocks_ssse3;
285
    }
286
#endif
287
#if HAVE_AVX_EXTERNAL
288
140
    if (EXTERNAL_AVX(cpu_flags)) {
289
24
        s->imdct36_blocks_float = imdct36_blocks_avx;
290
    }
291
#endif
292
#endif /* HAVE_X86ASM */
293
140
}