Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * This file is part of FFmpeg. | ||
3 | * | ||
4 | * FFmpeg is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * FFmpeg is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License along | ||
15 | * with FFmpeg; if not, write to the Free Software Foundation, Inc., | ||
16 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||
17 | */ | ||
18 | |||
19 | #include "libavcodec/aacpsdsp.h" | ||
20 | #include "libavutil/intfloat.h" | ||
21 | #include "libavutil/mem_internal.h" | ||
22 | |||
23 | #include "checkasm.h" | ||
24 | |||
25 | #define N 32 | ||
26 | #define STRIDE 128 | ||
27 | #define BUF_SIZE (N * STRIDE) | ||
28 | |||
29 | #define randomize(buf, len) do { \ | ||
30 | int i; \ | ||
31 | for (i = 0; i < len; i++) { \ | ||
32 | const INTFLOAT f = (INTFLOAT)rnd() / UINT_MAX; \ | ||
33 | (buf)[i] = f; \ | ||
34 | } \ | ||
35 | } while (0) | ||
36 | |||
37 | #define EPS 0.005 | ||
38 | |||
39 | 4 | static void clear_less_significant_bits(INTFLOAT *buf, int len, int bits) | |
40 | { | ||
41 | int i; | ||
42 |
2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 4 times.
|
36 | for (i = 0; i < len; i++) { |
43 | 32 | union av_intfloat32 u = { .f = buf[i] }; | |
44 | 32 | u.i &= (0xffffffff << bits); | |
45 | 32 | buf[i] = u.f; | |
46 | } | ||
47 | 4 | } | |
48 | |||
49 | 3 | static void test_add_squares(void) | |
50 | { | ||
51 | 3 | LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE]); | |
52 | 3 | LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE]); | |
53 | 3 | LOCAL_ALIGNED_16(INTFLOAT, src, [BUF_SIZE], [2]); | |
54 | |||
55 | 3 | declare_func(void, INTFLOAT *dst, | |
56 | const INTFLOAT (*src)[2], int n); | ||
57 | |||
58 |
2/2✓ Branch 1 taken 24576 times.
✓ Branch 2 taken 3 times.
|
24579 | randomize((INTFLOAT *)src, BUF_SIZE * 2); |
59 |
2/2✓ Branch 1 taken 12288 times.
✓ Branch 2 taken 3 times.
|
12291 | randomize(dst0, BUF_SIZE); |
60 | 3 | memcpy(dst1, dst0, BUF_SIZE * sizeof(INTFLOAT)); | |
61 | 3 | call_ref(dst0, src, BUF_SIZE); | |
62 | 3 | call_new(dst1, src, BUF_SIZE); | |
63 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
|
3 | if (!float_near_abs_eps_array(dst0, dst1, EPS, BUF_SIZE)) |
64 | ✗ | fail(); | |
65 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
|
3 | bench_new(dst1, src, BUF_SIZE); |
66 | 3 | } | |
67 | |||
68 | 2 | static void test_mul_pair_single(void) | |
69 | { | ||
70 | 2 | LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]); | |
71 | 2 | LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]); | |
72 | 2 | LOCAL_ALIGNED_16(INTFLOAT, src0, [BUF_SIZE], [2]); | |
73 | 2 | LOCAL_ALIGNED_16(INTFLOAT, src1, [BUF_SIZE]); | |
74 | |||
75 | 2 | declare_func(void, INTFLOAT (*dst)[2], | |
76 | INTFLOAT (*src0)[2], INTFLOAT *src1, int n); | ||
77 | |||
78 |
2/2✓ Branch 1 taken 16384 times.
✓ Branch 2 taken 2 times.
|
16386 | randomize((INTFLOAT *)src0, BUF_SIZE * 2); |
79 |
2/2✓ Branch 1 taken 8192 times.
✓ Branch 2 taken 2 times.
|
8194 | randomize(src1, BUF_SIZE); |
80 | 2 | call_ref(dst0, src0, src1, BUF_SIZE); | |
81 | 2 | call_new(dst1, src0, src1, BUF_SIZE); | |
82 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
|
2 | if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2)) |
83 | ✗ | fail(); | |
84 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
|
2 | bench_new(dst1, src0, src1, BUF_SIZE); |
85 | 2 | } | |
86 | |||
87 | 3 | static void test_hybrid_analysis(void) | |
88 | { | ||
89 | 3 | LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]); | |
90 | 3 | LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]); | |
91 | 3 | LOCAL_ALIGNED_16(INTFLOAT, in, [13], [2]); | |
92 | 3 | LOCAL_ALIGNED_16(INTFLOAT, filter, [N], [8][2]); | |
93 | |||
94 | 3 | declare_func(void, INTFLOAT (*out)[2], INTFLOAT (*in)[2], | |
95 | const INTFLOAT (*filter)[8][2], | ||
96 | ptrdiff_t stride, int n); | ||
97 | |||
98 |
2/2✓ Branch 1 taken 78 times.
✓ Branch 2 taken 3 times.
|
81 | randomize((INTFLOAT *)in, 13 * 2); |
99 |
2/2✓ Branch 1 taken 1536 times.
✓ Branch 2 taken 3 times.
|
1539 | randomize((INTFLOAT *)filter, N * 8 * 2); |
100 | |||
101 |
2/2✓ Branch 1 taken 24576 times.
✓ Branch 2 taken 3 times.
|
24579 | randomize((INTFLOAT *)dst0, BUF_SIZE * 2); |
102 | 3 | memcpy(dst1, dst0, BUF_SIZE * 2 * sizeof(INTFLOAT)); | |
103 | |||
104 | 3 | call_ref(dst0, in, filter, STRIDE, N); | |
105 | 3 | call_new(dst1, in, filter, STRIDE, N); | |
106 | |||
107 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
|
3 | if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2)) |
108 | ✗ | fail(); | |
109 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
|
3 | bench_new(dst1, in, filter, STRIDE, N); |
110 | 3 | } | |
111 | |||
112 | 2 | static void test_hybrid_analysis_ileave(void) | |
113 | { | ||
114 | 2 | LOCAL_ALIGNED_16(INTFLOAT, in, [2], [38][64]); | |
115 | 2 | LOCAL_ALIGNED_16(INTFLOAT, out0, [91], [32][2]); | |
116 | 2 | LOCAL_ALIGNED_16(INTFLOAT, out1, [91], [32][2]); | |
117 | |||
118 | 2 | declare_func(void, INTFLOAT (*out)[32][2], INTFLOAT L[2][38][64], | |
119 | int i, int len); | ||
120 | |||
121 |
2/2✓ Branch 1 taken 11648 times.
✓ Branch 2 taken 2 times.
|
11650 | randomize((INTFLOAT *)out0, 91 * 32 * 2); |
122 |
2/2✓ Branch 1 taken 9728 times.
✓ Branch 2 taken 2 times.
|
9730 | randomize((INTFLOAT *)in, 2 * 38 * 64); |
123 | 2 | memcpy(out1, out0, 91 * 32 * 2 * sizeof(INTFLOAT)); | |
124 | |||
125 | /* len is hardcoded to 32 as that's the only value used in | ||
126 | libavcodec. asm functions are likely to be optimized | ||
127 | hardcoding this value in their loops and could fail with | ||
128 | anything else. | ||
129 | i is hardcoded to the two values currently used by the | ||
130 | aac decoder because the arm neon implementation is | ||
131 | micro-optimized for them and will fail for almost every | ||
132 | other value. */ | ||
133 | 2 | call_ref(out0, in, 3, 32); | |
134 | 2 | call_new(out1, in, 3, 32); | |
135 | |||
136 | /* the function just moves data around, so memcmp is enough */ | ||
137 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT))) |
138 | ✗ | fail(); | |
139 | |||
140 | 2 | call_ref(out0, in, 5, 32); | |
141 | 2 | call_new(out1, in, 5, 32); | |
142 | |||
143 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT))) |
144 | ✗ | fail(); | |
145 | |||
146 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
|
2 | bench_new(out1, in, 3, 32); |
147 | 2 | } | |
148 | |||
149 | 3 | static void test_hybrid_synthesis_deint(void) | |
150 | { | ||
151 | 3 | LOCAL_ALIGNED_16(INTFLOAT, out0, [2], [38][64]); | |
152 | 3 | LOCAL_ALIGNED_16(INTFLOAT, out1, [2], [38][64]); | |
153 | 3 | LOCAL_ALIGNED_16(INTFLOAT, in, [91], [32][2]); | |
154 | |||
155 | 3 | declare_func(void, INTFLOAT out[2][38][64], INTFLOAT (*in)[32][2], | |
156 | int i, int len); | ||
157 | |||
158 |
2/2✓ Branch 1 taken 17472 times.
✓ Branch 2 taken 3 times.
|
17475 | randomize((INTFLOAT *)in, 91 * 32 * 2); |
159 |
2/2✓ Branch 1 taken 14592 times.
✓ Branch 2 taken 3 times.
|
14595 | randomize((INTFLOAT *)out0, 2 * 38 * 64); |
160 | 3 | memcpy(out1, out0, 2 * 38 * 64 * sizeof(INTFLOAT)); | |
161 | |||
162 | /* len is hardcoded to 32 as that's the only value used in | ||
163 | libavcodec. asm functions are likely to be optimized | ||
164 | hardcoding this value in their loops and could fail with | ||
165 | anything else. | ||
166 | i is hardcoded to the two values currently used by the | ||
167 | aac decoder because the arm neon implementation is | ||
168 | micro-optimized for them and will fail for almost every | ||
169 | other value. */ | ||
170 | 3 | call_ref(out0, in, 3, 32); | |
171 | 3 | call_new(out1, in, 3, 32); | |
172 | |||
173 | /* the function just moves data around, so memcmp is enough */ | ||
174 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
|
3 | if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT))) |
175 | ✗ | fail(); | |
176 | |||
177 | 3 | call_ref(out0, in, 5, 32); | |
178 | 3 | call_new(out1, in, 5, 32); | |
179 | |||
180 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
|
3 | if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT))) |
181 | ✗ | fail(); | |
182 | |||
183 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
|
3 | bench_new(out1, in, 3, 32); |
184 | 3 | } | |
185 | |||
186 | 13 | static void test_stereo_interpolate(PSDSPContext *psdsp) | |
187 | { | ||
188 | int i; | ||
189 | 13 | LOCAL_ALIGNED_16(INTFLOAT, l, [BUF_SIZE], [2]); | |
190 | 13 | LOCAL_ALIGNED_16(INTFLOAT, r, [BUF_SIZE], [2]); | |
191 | 13 | LOCAL_ALIGNED_16(INTFLOAT, l0, [BUF_SIZE], [2]); | |
192 | 13 | LOCAL_ALIGNED_16(INTFLOAT, r0, [BUF_SIZE], [2]); | |
193 | 13 | LOCAL_ALIGNED_16(INTFLOAT, l1, [BUF_SIZE], [2]); | |
194 | 13 | LOCAL_ALIGNED_16(INTFLOAT, r1, [BUF_SIZE], [2]); | |
195 | 13 | LOCAL_ALIGNED_16(INTFLOAT, h, [2], [4]); | |
196 | 13 | LOCAL_ALIGNED_16(INTFLOAT, h_step, [2], [4]); | |
197 | |||
198 | 13 | declare_func(void, INTFLOAT (*l)[2], INTFLOAT (*r)[2], | |
199 | INTFLOAT h[2][4], INTFLOAT h_step[2][4], int len); | ||
200 | |||
201 |
2/2✓ Branch 1 taken 106496 times.
✓ Branch 2 taken 13 times.
|
106509 | randomize((INTFLOAT *)l, BUF_SIZE * 2); |
202 |
2/2✓ Branch 1 taken 106496 times.
✓ Branch 2 taken 13 times.
|
106509 | randomize((INTFLOAT *)r, BUF_SIZE * 2); |
203 | |||
204 |
2/2✓ Branch 0 taken 26 times.
✓ Branch 1 taken 13 times.
|
39 | for (i = 0; i < 2; i++) { |
205 |
4/4✓ Branch 2 taken 13 times.
✓ Branch 3 taken 13 times.
✓ Branch 5 taken 4 times.
✓ Branch 6 taken 22 times.
|
26 | if (check_func(psdsp->stereo_interpolate[i], "ps_stereo_interpolate%s", i ? "_ipdopd" : "")) { |
206 | 4 | memcpy(l0, l, BUF_SIZE * 2 * sizeof(INTFLOAT)); | |
207 | 4 | memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT)); | |
208 | 4 | memcpy(r0, r, BUF_SIZE * 2 * sizeof(INTFLOAT)); | |
209 | 4 | memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT)); | |
210 | |||
211 |
2/2✓ Branch 1 taken 32 times.
✓ Branch 2 taken 4 times.
|
36 | randomize((INTFLOAT *)h, 2 * 4); |
212 |
2/2✓ Branch 1 taken 32 times.
✓ Branch 2 taken 4 times.
|
36 | randomize((INTFLOAT *)h_step, 2 * 4); |
213 | // Clear the least significant 14 bits of h_step, to avoid | ||
214 | // divergence when accumulating h_step BUF_SIZE times into | ||
215 | // a float variable which may or may not have extra intermediate | ||
216 | // precision. Therefore clear roughly log2(BUF_SIZE) less | ||
217 | // significant bits, to get the same result regardless of any | ||
218 | // extra precision in the accumulator. | ||
219 | 4 | clear_less_significant_bits((INTFLOAT *)h_step, 2 * 4, 14); | |
220 | |||
221 | 4 | call_ref(l0, r0, h, h_step, BUF_SIZE); | |
222 | 4 | call_new(l1, r1, h, h_step, BUF_SIZE); | |
223 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | if (!float_near_abs_eps_array((float *)l0, (float *)l1, EPS, BUF_SIZE * 2) || |
224 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 4 times.
|
4 | !float_near_abs_eps_array((float *)r0, (float *)r1, EPS, BUF_SIZE * 2)) |
225 | ✗ | fail(); | |
226 | |||
227 | 4 | memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT)); | |
228 | 4 | memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT)); | |
229 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 4 times.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
|
4 | bench_new(l1, r1, h, h_step, BUF_SIZE); |
230 | } | ||
231 | } | ||
232 | 13 | } | |
233 | |||
234 | 13 | void checkasm_check_aacpsdsp(void) | |
235 | { | ||
236 | PSDSPContext psdsp; | ||
237 | |||
238 | 13 | ff_psdsp_init(&psdsp); | |
239 | |||
240 |
2/2✓ Branch 3 taken 3 times.
✓ Branch 4 taken 10 times.
|
13 | if (check_func(psdsp.add_squares, "ps_add_squares")) |
241 | 3 | test_add_squares(); | |
242 | 13 | report("add_squares"); | |
243 | |||
244 |
2/2✓ Branch 3 taken 2 times.
✓ Branch 4 taken 11 times.
|
13 | if (check_func(psdsp.mul_pair_single, "ps_mul_pair_single")) |
245 | 2 | test_mul_pair_single(); | |
246 | 13 | report("mul_pair_single"); | |
247 | |||
248 |
2/2✓ Branch 3 taken 3 times.
✓ Branch 4 taken 10 times.
|
13 | if (check_func(psdsp.hybrid_analysis, "ps_hybrid_analysis")) |
249 | 3 | test_hybrid_analysis(); | |
250 | 13 | report("hybrid_analysis"); | |
251 | |||
252 |
2/2✓ Branch 3 taken 2 times.
✓ Branch 4 taken 11 times.
|
13 | if (check_func(psdsp.hybrid_analysis_ileave, "ps_hybrid_analysis_ileave")) |
253 | 2 | test_hybrid_analysis_ileave(); | |
254 | 13 | report("hybrid_analysis_ileave"); | |
255 | |||
256 |
2/2✓ Branch 3 taken 3 times.
✓ Branch 4 taken 10 times.
|
13 | if (check_func(psdsp.hybrid_synthesis_deint, "ps_hybrid_synthesis_deint")) |
257 | 3 | test_hybrid_synthesis_deint(); | |
258 | 13 | report("hybrid_synthesis_deint"); | |
259 | |||
260 | 13 | test_stereo_interpolate(&psdsp); | |
261 | 13 | report("stereo_interpolate"); | |
262 | 13 | } | |
263 |