1 |
|
|
/* |
2 |
|
|
* SIMD optimized MPEG-4 Parametric Stereo decoding functions |
3 |
|
|
* |
4 |
|
|
* This file is part of FFmpeg. |
5 |
|
|
* |
6 |
|
|
* FFmpeg is free software; you can redistribute it and/or |
7 |
|
|
* modify it under the terms of the GNU Lesser General Public |
8 |
|
|
* License as published by the Free Software Foundation; either |
9 |
|
|
* version 2.1 of the License, or (at your option) any later version. |
10 |
|
|
* |
11 |
|
|
* FFmpeg is distributed in the hope that it will be useful, |
12 |
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 |
|
|
* Lesser General Public License for more details. |
15 |
|
|
* |
16 |
|
|
* You should have received a copy of the GNU Lesser General Public |
17 |
|
|
* License along with FFmpeg; if not, write to the Free Software |
18 |
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 |
|
|
*/ |
20 |
|
|
|
21 |
|
|
#include <stddef.h> |
22 |
|
|
|
23 |
|
|
#include "config.h" |
24 |
|
|
|
25 |
|
|
#include "libavutil/x86/cpu.h" |
26 |
|
|
#include "libavutil/attributes.h" |
27 |
|
|
#include "libavcodec/aacpsdsp.h" |
28 |
|
|
|
29 |
|
|
void ff_ps_add_squares_sse (float *dst, const float (*src)[2], int n); |
30 |
|
|
void ff_ps_add_squares_sse3 (float *dst, const float (*src)[2], int n); |
31 |
|
|
void ff_ps_mul_pair_single_sse (float (*dst)[2], float (*src0)[2], |
32 |
|
|
float *src1, int n); |
33 |
|
|
void ff_ps_hybrid_analysis_sse (float (*out)[2], float (*in)[2], |
34 |
|
|
const float (*filter)[8][2], |
35 |
|
|
ptrdiff_t stride, int n); |
36 |
|
|
void ff_ps_hybrid_analysis_sse3(float (*out)[2], float (*in)[2], |
37 |
|
|
const float (*filter)[8][2], |
38 |
|
|
ptrdiff_t stride, int n); |
39 |
|
|
void ff_ps_stereo_interpolate_sse3(float (*l)[2], float (*r)[2], |
40 |
|
|
float h[2][4], float h_step[2][4], |
41 |
|
|
int len); |
42 |
|
|
void ff_ps_stereo_interpolate_ipdopd_sse3(float (*l)[2], float (*r)[2], |
43 |
|
|
float h[2][4], float h_step[2][4], |
44 |
|
|
int len); |
45 |
|
|
void ff_ps_hybrid_synthesis_deint_sse(float out[2][38][64], float (*in)[32][2], |
46 |
|
|
int i, int len); |
47 |
|
|
void ff_ps_hybrid_synthesis_deint_sse4(float out[2][38][64], float (*in)[32][2], |
48 |
|
|
int i, int len); |
49 |
|
|
void ff_ps_hybrid_analysis_ileave_sse(float (*out)[32][2], float L[2][38][64], |
50 |
|
|
int i, int len); |
51 |
|
|
|
52 |
|
329 |
av_cold void ff_psdsp_init_x86(PSDSPContext *s) |
53 |
|
|
{ |
54 |
|
329 |
int cpu_flags = av_get_cpu_flags(); |
55 |
|
|
|
56 |
✓✓ |
329 |
if (EXTERNAL_SSE(cpu_flags)) { |
57 |
|
46 |
s->add_squares = ff_ps_add_squares_sse; |
58 |
|
46 |
s->mul_pair_single = ff_ps_mul_pair_single_sse; |
59 |
|
46 |
s->hybrid_analysis_ileave = ff_ps_hybrid_analysis_ileave_sse; |
60 |
|
46 |
s->hybrid_synthesis_deint = ff_ps_hybrid_synthesis_deint_sse; |
61 |
|
46 |
s->hybrid_analysis = ff_ps_hybrid_analysis_sse; |
62 |
|
|
} |
63 |
✓✓ |
329 |
if (EXTERNAL_SSE3(cpu_flags)) { |
64 |
|
44 |
s->add_squares = ff_ps_add_squares_sse3; |
65 |
|
44 |
s->stereo_interpolate[0] = ff_ps_stereo_interpolate_sse3; |
66 |
|
44 |
s->stereo_interpolate[1] = ff_ps_stereo_interpolate_ipdopd_sse3; |
67 |
|
44 |
s->hybrid_analysis = ff_ps_hybrid_analysis_sse3; |
68 |
|
|
} |
69 |
✓✓ |
329 |
if (EXTERNAL_SSE4(cpu_flags)) { |
70 |
|
42 |
s->hybrid_synthesis_deint = ff_ps_hybrid_synthesis_deint_sse4; |
71 |
|
|
} |
72 |
|
329 |
} |