GCC Code Coverage Report
Directory: ../../../ffmpeg/ Exec Total Coverage
File: src/libavcodec/x86/mpegaudiodsp.c Lines: 53 65 81.5 %
Date: 2020-08-14 10:39:37 Branches: 21 66 31.8 %

Line Branch Exec Source
1
/*
2
 * SIMD-optimized MP3 decoding functions
3
 * Copyright (c) 2010 Vitor Sessak
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21
22
#include "libavutil/attributes.h"
23
#include "libavutil/cpu.h"
24
#include "libavutil/internal.h"
25
#include "libavutil/x86/asm.h"
26
#include "libavutil/x86/cpu.h"
27
#include "libavcodec/mpegaudiodsp.h"
28
29
#define DECL(CPU)\
30
static void imdct36_blocks_ ## CPU(float *out, float *buf, float *in, int count, int switch_point, int block_type);\
31
void ff_imdct36_float_ ## CPU(float *out, float *buf, float *in, float *win);
32
33
#if HAVE_X86ASM
34
#if ARCH_X86_32
35
DECL(sse)
36
#endif
37
DECL(sse2)
38
DECL(sse3)
39
DECL(ssse3)
40
DECL(avx)
41
#endif /* HAVE_X86ASM */
42
43
void ff_four_imdct36_float_sse(float *out, float *buf, float *in, float *win,
44
                               float *tmpbuf);
45
void ff_four_imdct36_float_avx(float *out, float *buf, float *in, float *win,
46
                               float *tmpbuf);
47
48
DECLARE_ALIGNED(16, static float, mdct_win_sse)[2][4][4*40];
49
50
#if HAVE_6REGS && HAVE_SSE_INLINE
51
52
#define MACS(rt, ra, rb) rt+=(ra)*(rb)
53
#define MLSS(rt, ra, rb) rt-=(ra)*(rb)
54
55
#define SUM8(op, sum, w, p)               \
56
{                                         \
57
    op(sum, (w)[0 * 64], (p)[0 * 64]);    \
58
    op(sum, (w)[1 * 64], (p)[1 * 64]);    \
59
    op(sum, (w)[2 * 64], (p)[2 * 64]);    \
60
    op(sum, (w)[3 * 64], (p)[3 * 64]);    \
61
    op(sum, (w)[4 * 64], (p)[4 * 64]);    \
62
    op(sum, (w)[5 * 64], (p)[5 * 64]);    \
63
    op(sum, (w)[6 * 64], (p)[6 * 64]);    \
64
    op(sum, (w)[7 * 64], (p)[7 * 64]);    \
65
}
66
67
4176
static void apply_window(const float *buf, const float *win1,
68
                         const float *win2, float *sum1, float *sum2, int len)
69
{
70
4176
    x86_reg count = - 4*len;
71
4176
    const float *win1a = win1+len;
72
4176
    const float *win2a = win2+len;
73
4176
    const float *bufa  = buf+len;
74
4176
    float *sum1a = sum1+len;
75
4176
    float *sum2a = sum2+len;
76
77
78
#define MULT(a, b)                                 \
79
    "movaps " #a "(%1,%0), %%xmm1           \n\t"  \
80
    "movaps " #a "(%3,%0), %%xmm2           \n\t"  \
81
    "mulps         %%xmm2, %%xmm1           \n\t"  \
82
    "subps         %%xmm1, %%xmm0           \n\t"  \
83
    "mulps  " #b "(%2,%0), %%xmm2           \n\t"  \
84
    "subps         %%xmm2, %%xmm4           \n\t"  \
85
86
4176
    __asm__ volatile(
87
            "1:                                   \n\t"
88
            "xorps       %%xmm0, %%xmm0           \n\t"
89
            "xorps       %%xmm4, %%xmm4           \n\t"
90
91
            MULT(   0,   0)
92
            MULT( 256,  64)
93
            MULT( 512, 128)
94
            MULT( 768, 192)
95
            MULT(1024, 256)
96
            MULT(1280, 320)
97
            MULT(1536, 384)
98
            MULT(1792, 448)
99
100
            "movaps      %%xmm0, (%4,%0)          \n\t"
101
            "movaps      %%xmm4, (%5,%0)          \n\t"
102
            "add            $16,  %0              \n\t"
103
            "jl              1b                   \n\t"
104
            :"+&r"(count)
105
            :"r"(win1a), "r"(win2a), "r"(bufa), "r"(sum1a), "r"(sum2a)
106
            );
107
108
#undef MULT
109
4176
}
110
111
2088
static void apply_window_mp3(float *in, float *win, int *unused, float *out,
112
                             ptrdiff_t incr)
113
{
114
2088
    LOCAL_ALIGNED_16(float, suma, [17]);
115
2088
    LOCAL_ALIGNED_16(float, sumb, [17]);
116
2088
    LOCAL_ALIGNED_16(float, sumc, [17]);
117
2088
    LOCAL_ALIGNED_16(float, sumd, [17]);
118
119
    float sum;
120
121
    /* copy to avoid wrap */
122
2088
    __asm__ volatile(
123
            "movaps    0(%0), %%xmm0   \n\t" \
124
            "movaps   16(%0), %%xmm1   \n\t" \
125
            "movaps   32(%0), %%xmm2   \n\t" \
126
            "movaps   48(%0), %%xmm3   \n\t" \
127
            "movaps   %%xmm0,   0(%1) \n\t" \
128
            "movaps   %%xmm1,  16(%1) \n\t" \
129
            "movaps   %%xmm2,  32(%1) \n\t" \
130
            "movaps   %%xmm3,  48(%1) \n\t" \
131
            "movaps   64(%0), %%xmm0   \n\t" \
132
            "movaps   80(%0), %%xmm1   \n\t" \
133
            "movaps   96(%0), %%xmm2   \n\t" \
134
            "movaps  112(%0), %%xmm3   \n\t" \
135
            "movaps   %%xmm0,  64(%1) \n\t" \
136
            "movaps   %%xmm1,  80(%1) \n\t" \
137
            "movaps   %%xmm2,  96(%1) \n\t" \
138
            "movaps   %%xmm3, 112(%1) \n\t"
139
2088
            ::"r"(in), "r"(in+512)
140
            :"memory"
141
            );
142
143
2088
    apply_window(in + 16, win     , win + 512, suma, sumc, 16);
144
2088
    apply_window(in + 32, win + 48, win + 640, sumb, sumd, 16);
145
146
2088
    SUM8(MACS, suma[0], win + 32, in + 48);
147
148
2088
    sumc[ 0] = 0;
149
2088
    sumb[16] = 0;
150
2088
    sumd[16] = 0;
151
152
#define SUMS(suma, sumb, sumc, sumd, out1, out2)               \
153
            "movups " #sumd "(%4),       %%xmm0          \n\t" \
154
            "shufps         $0x1b,       %%xmm0, %%xmm0  \n\t" \
155
            "subps  " #suma "(%1),       %%xmm0          \n\t" \
156
            "movaps        %%xmm0," #out1 "(%0)          \n\t" \
157
\
158
            "movups " #sumc "(%3),       %%xmm0          \n\t" \
159
            "shufps         $0x1b,       %%xmm0, %%xmm0  \n\t" \
160
            "addps  " #sumb "(%2),       %%xmm0          \n\t" \
161
            "movaps        %%xmm0," #out2 "(%0)          \n\t"
162
163
2088
    if (incr == 1) {
164
2088
        __asm__ volatile(
165
            SUMS( 0, 48,  4, 52,  0, 112)
166
            SUMS(16, 32, 20, 36, 16,  96)
167
            SUMS(32, 16, 36, 20, 32,  80)
168
            SUMS(48,  0, 52,  4, 48,  64)
169
170
            :"+&r"(out)
171
            :"r"(&suma[0]), "r"(&sumb[0]), "r"(&sumc[0]), "r"(&sumd[0])
172
            :"memory"
173
            );
174
2088
        out += 16*incr;
175
    } else {
176
        int j;
177
        float *out2 = out + 32 * incr;
178
        out[0  ]  = -suma[   0];
179
        out += incr;
180
        out2 -= incr;
181
        for(j=1;j<16;j++) {
182
            *out  = -suma[   j] + sumd[16-j];
183
            *out2 =  sumb[16-j] + sumc[   j];
184
            out  += incr;
185
            out2 -= incr;
186
        }
187
    }
188
189
2088
    sum = 0;
190
2088
    SUM8(MLSS, sum, win + 16 + 32, in + 32);
191
2088
    *out = sum;
192
2088
}
193
194
#endif /* HAVE_6REGS && HAVE_SSE_INLINE */
195
196
#if HAVE_X86ASM
197
#define DECL_IMDCT_BLOCKS(CPU1, CPU2)                                       \
198
static void imdct36_blocks_ ## CPU1(float *out, float *buf, float *in,      \
199
                               int count, int switch_point, int block_type) \
200
{                                                                           \
201
    int align_end = count - (count & 3);                                \
202
    int j;                                                              \
203
    for (j = 0; j < align_end; j+= 4) {                                 \
204
        LOCAL_ALIGNED_16(float, tmpbuf, [1024]);                        \
205
        float *win = mdct_win_sse[switch_point && j < 4][block_type];   \
206
        /* apply window & overlap with previous buffer */               \
207
                                                                        \
208
        /* select window */                                             \
209
        ff_four_imdct36_float_ ## CPU2(out, buf, in, win, tmpbuf);      \
210
        in      += 4*18;                                                \
211
        buf     += 4*18;                                                \
212
        out     += 4;                                                   \
213
    }                                                                   \
214
    for (; j < count; j++) {                                            \
215
        /* apply window & overlap with previous buffer */               \
216
                                                                        \
217
        /* select window */                                             \
218
        int win_idx = (switch_point && j < 2) ? 0 : block_type;         \
219
        float *win = ff_mdct_win_float[win_idx + (4 & -(j & 1))];       \
220
                                                                        \
221
        ff_imdct36_float_ ## CPU1(out, buf, in, win);                   \
222
                                                                        \
223
        in  += 18;                                                      \
224
        buf++;                                                          \
225
        out++;                                                          \
226
    }                                                                   \
227
}
228
229
#if HAVE_SSE
230
#if ARCH_X86_32
231
DECL_IMDCT_BLOCKS(sse,sse)
232
#endif
233
DECL_IMDCT_BLOCKS(sse2,sse)
234
DECL_IMDCT_BLOCKS(sse3,sse)
235
DECL_IMDCT_BLOCKS(ssse3,sse)
236
#endif
237
#if HAVE_AVX_EXTERNAL
238



315
DECL_IMDCT_BLOCKS(avx,avx)
239
#endif
240
#endif /* HAVE_X86ASM */
241
242
137
av_cold void ff_mpadsp_init_x86(MPADSPContext *s)
243
{
244
137
    av_unused int cpu_flags = av_get_cpu_flags();
245
246
    int i, j;
247
685
    for (j = 0; j < 4; j++) {
248
22468
        for (i = 0; i < 40; i ++) {
249
21920
            mdct_win_sse[0][j][4*i    ] = ff_mdct_win_float[j    ][i];
250
21920
            mdct_win_sse[0][j][4*i + 1] = ff_mdct_win_float[j + 4][i];
251
21920
            mdct_win_sse[0][j][4*i + 2] = ff_mdct_win_float[j    ][i];
252
21920
            mdct_win_sse[0][j][4*i + 3] = ff_mdct_win_float[j + 4][i];
253
21920
            mdct_win_sse[1][j][4*i    ] = ff_mdct_win_float[0    ][i];
254
21920
            mdct_win_sse[1][j][4*i + 1] = ff_mdct_win_float[4    ][i];
255
21920
            mdct_win_sse[1][j][4*i + 2] = ff_mdct_win_float[j    ][i];
256
21920
            mdct_win_sse[1][j][4*i + 3] = ff_mdct_win_float[j + 4][i];
257
        }
258
    }
259
260
#if HAVE_6REGS && HAVE_SSE_INLINE
261
137
    if (INLINE_SSE(cpu_flags)) {
262
24
        s->apply_window_float = apply_window_mp3;
263
    }
264
#endif /* HAVE_SSE_INLINE */
265
266
#if HAVE_X86ASM
267
#if HAVE_SSE
268
#if ARCH_X86_32
269
    if (EXTERNAL_SSE(cpu_flags)) {
270
        s->imdct36_blocks_float = imdct36_blocks_sse;
271
    }
272
#endif
273
137
    if (EXTERNAL_SSE2(cpu_flags)) {
274
24
        s->imdct36_blocks_float = imdct36_blocks_sse2;
275
    }
276
137
    if (EXTERNAL_SSE3(cpu_flags)) {
277
24
        s->imdct36_blocks_float = imdct36_blocks_sse3;
278
    }
279
137
    if (EXTERNAL_SSSE3(cpu_flags)) {
280
24
        s->imdct36_blocks_float = imdct36_blocks_ssse3;
281
    }
282
#endif
283
#if HAVE_AVX_EXTERNAL
284
137
    if (EXTERNAL_AVX(cpu_flags)) {
285
24
        s->imdct36_blocks_float = imdct36_blocks_avx;
286
    }
287
#endif
288
#endif /* HAVE_X86ASM */
289
137
}