Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * SIMD-optimized LPC functions | ||
3 | * Copyright (c) 2007 Loren Merritt | ||
4 | * | ||
5 | * This file is part of FFmpeg. | ||
6 | * | ||
7 | * FFmpeg is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU Lesser General Public | ||
9 | * License as published by the Free Software Foundation; either | ||
10 | * version 2.1 of the License, or (at your option) any later version. | ||
11 | * | ||
12 | * FFmpeg is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
15 | * Lesser General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU Lesser General Public | ||
18 | * License along with FFmpeg; if not, write to the Free Software | ||
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
20 | */ | ||
21 | |||
22 | #include "libavutil/attributes.h" | ||
23 | #include "libavutil/x86/asm.h" | ||
24 | #include "libavutil/x86/cpu.h" | ||
25 | #include "libavcodec/lpc.h" | ||
26 | |||
27 | void ff_lpc_apply_welch_window_sse2(const int32_t *data, ptrdiff_t len, | ||
28 | double *w_data); | ||
29 | void ff_lpc_apply_welch_window_avx2(const int32_t *data, ptrdiff_t len, | ||
30 | double *w_data); | ||
31 | |||
32 | DECLARE_ASM_CONST(16, double, pd_1)[2] = { 1.0, 1.0 }; | ||
33 | |||
34 | #if HAVE_SSE2_INLINE | ||
35 | |||
36 | 5202 | static void lpc_compute_autocorr_sse2(const double *data, ptrdiff_t len, int lag, | |
37 | double *autoc) | ||
38 | { | ||
39 | int j; | ||
40 | |||
41 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5202 times.
|
5202 | if((x86_reg)data & 15) |
42 | ✗ | data++; | |
43 | |||
44 |
2/2✓ Branch 0 taken 20810 times.
✓ Branch 1 taken 5202 times.
|
26012 | for(j=0; j<lag; j+=2){ |
45 | 20810 | x86_reg i = -len*sizeof(double); | |
46 |
2/2✓ Branch 0 taken 5202 times.
✓ Branch 1 taken 15608 times.
|
20810 | if(j == lag-2) { |
47 | 5202 | __asm__ volatile( | |
48 | "movsd "MANGLE(pd_1)", %%xmm0 \n\t" | ||
49 | "movsd "MANGLE(pd_1)", %%xmm1 \n\t" | ||
50 | "movsd "MANGLE(pd_1)", %%xmm2 \n\t" | ||
51 | "1: \n\t" | ||
52 | "movapd (%2,%0), %%xmm3 \n\t" | ||
53 | "movupd -8(%3,%0), %%xmm4 \n\t" | ||
54 | "movapd (%3,%0), %%xmm5 \n\t" | ||
55 | "mulpd %%xmm3, %%xmm4 \n\t" | ||
56 | "mulpd %%xmm3, %%xmm5 \n\t" | ||
57 | "mulpd -16(%3,%0), %%xmm3 \n\t" | ||
58 | "addpd %%xmm4, %%xmm1 \n\t" | ||
59 | "addpd %%xmm5, %%xmm0 \n\t" | ||
60 | "addpd %%xmm3, %%xmm2 \n\t" | ||
61 | "add $16, %0 \n\t" | ||
62 | "jl 1b \n\t" | ||
63 | "movhlps %%xmm0, %%xmm3 \n\t" | ||
64 | "movhlps %%xmm1, %%xmm4 \n\t" | ||
65 | "movhlps %%xmm2, %%xmm5 \n\t" | ||
66 | "addsd %%xmm3, %%xmm0 \n\t" | ||
67 | "addsd %%xmm4, %%xmm1 \n\t" | ||
68 | "addsd %%xmm5, %%xmm2 \n\t" | ||
69 | "movsd %%xmm0, (%1) \n\t" | ||
70 | "movsd %%xmm1, 8(%1) \n\t" | ||
71 | "movsd %%xmm2, 16(%1) \n\t" | ||
72 | :"+&r"(i) | ||
73 | 5202 | :"r"(autoc+j), "r"(data+len), "r"(data+len-j) | |
74 | NAMED_CONSTRAINTS_ARRAY_ADD(pd_1) | ||
75 | :"memory" | ||
76 | ); | ||
77 | } else { | ||
78 | 15608 | __asm__ volatile( | |
79 | "movsd "MANGLE(pd_1)", %%xmm0 \n\t" | ||
80 | "movsd "MANGLE(pd_1)", %%xmm1 \n\t" | ||
81 | "1: \n\t" | ||
82 | "movapd (%3,%0), %%xmm3 \n\t" | ||
83 | "movupd -8(%4,%0), %%xmm4 \n\t" | ||
84 | "mulpd %%xmm3, %%xmm4 \n\t" | ||
85 | "mulpd (%4,%0), %%xmm3 \n\t" | ||
86 | "addpd %%xmm4, %%xmm1 \n\t" | ||
87 | "addpd %%xmm3, %%xmm0 \n\t" | ||
88 | "add $16, %0 \n\t" | ||
89 | "jl 1b \n\t" | ||
90 | "movhlps %%xmm0, %%xmm3 \n\t" | ||
91 | "movhlps %%xmm1, %%xmm4 \n\t" | ||
92 | "addsd %%xmm3, %%xmm0 \n\t" | ||
93 | "addsd %%xmm4, %%xmm1 \n\t" | ||
94 | "movsd %%xmm0, %1 \n\t" | ||
95 | "movsd %%xmm1, %2 \n\t" | ||
96 | 15608 | :"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1]) | |
97 | 15608 | :"r"(data+len), "r"(data+len-j) | |
98 | NAMED_CONSTRAINTS_ARRAY_ADD(pd_1) | ||
99 | ); | ||
100 | } | ||
101 | } | ||
102 | 5202 | } | |
103 | |||
104 | #endif /* HAVE_SSE2_INLINE */ | ||
105 | |||
106 | 195 | av_cold void ff_lpc_init_x86(LPCContext *c) | |
107 | { | ||
108 | 195 | int cpu_flags = av_get_cpu_flags(); | |
109 | |||
110 | #if HAVE_SSE2_INLINE | ||
111 |
2/2✓ Branch 0 taken 43 times.
✓ Branch 1 taken 152 times.
|
195 | if (INLINE_SSE2_SLOW(cpu_flags)) |
112 | 43 | c->lpc_compute_autocorr = lpc_compute_autocorr_sse2; | |
113 | #endif | ||
114 | |||
115 |
2/2✓ Branch 0 taken 43 times.
✓ Branch 1 taken 152 times.
|
195 | if (EXTERNAL_SSE2(cpu_flags)) |
116 | 43 | c->lpc_apply_welch_window = ff_lpc_apply_welch_window_sse2; | |
117 | |||
118 |
2/2✓ Branch 0 taken 19 times.
✓ Branch 1 taken 176 times.
|
195 | if (EXTERNAL_AVX2(cpu_flags)) |
119 | 19 | c->lpc_apply_welch_window = ff_lpc_apply_welch_window_avx2; | |
120 | 195 | } | |
121 |