| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * This file is part of FFmpeg. | ||
| 3 | * | ||
| 4 | * FFmpeg is free software; you can redistribute it and/or modify | ||
| 5 | * it under the terms of the GNU General Public License as published by | ||
| 6 | * the Free Software Foundation; either version 2 of the License, or | ||
| 7 | * (at your option) any later version. | ||
| 8 | * | ||
| 9 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 12 | * GNU General Public License for more details. | ||
| 13 | * | ||
| 14 | * You should have received a copy of the GNU General Public License along | ||
| 15 | * with FFmpeg; if not, write to the Free Software Foundation, Inc., | ||
| 16 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include "libavcodec/aacpsdsp.h" | ||
| 20 | #include "libavutil/intfloat.h" | ||
| 21 | #include "libavutil/mem_internal.h" | ||
| 22 | |||
| 23 | #include "checkasm.h" | ||
| 24 | |||
| 25 | #define N 32 | ||
| 26 | #define STRIDE 128 | ||
| 27 | #define BUF_SIZE (N * STRIDE) | ||
| 28 | |||
| 29 | #define randomize(buf, len) do { \ | ||
| 30 | int i; \ | ||
| 31 | for (i = 0; i < len; i++) { \ | ||
| 32 | const INTFLOAT f = (INTFLOAT)rnd() / UINT_MAX; \ | ||
| 33 | (buf)[i] = f; \ | ||
| 34 | } \ | ||
| 35 | } while (0) | ||
| 36 | |||
| 37 | #define EPS 0.005 | ||
| 38 | |||
| 39 | 4 | static void clear_less_significant_bits(INTFLOAT *buf, int len, int bits) | |
| 40 | { | ||
| 41 | int i; | ||
| 42 |
2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 4 times.
|
36 | for (i = 0; i < len; i++) { |
| 43 | 32 | union av_intfloat32 u = { .f = buf[i] }; | |
| 44 | 32 | u.i &= (0xffffffff << bits); | |
| 45 | 32 | buf[i] = u.f; | |
| 46 | } | ||
| 47 | 4 | } | |
| 48 | |||
| 49 | 3 | static void test_add_squares(void) | |
| 50 | { | ||
| 51 | 3 | LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE]); | |
| 52 | 3 | LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE]); | |
| 53 | 3 | LOCAL_ALIGNED_16(INTFLOAT, src, [BUF_SIZE], [2]); | |
| 54 | |||
| 55 | 3 | declare_func(void, INTFLOAT *dst, | |
| 56 | const INTFLOAT (*src)[2], int n); | ||
| 57 | |||
| 58 |
2/2✓ Branch 1 taken 24576 times.
✓ Branch 2 taken 3 times.
|
24579 | randomize((INTFLOAT *)src, BUF_SIZE * 2); |
| 59 |
2/2✓ Branch 1 taken 12288 times.
✓ Branch 2 taken 3 times.
|
12291 | randomize(dst0, BUF_SIZE); |
| 60 | 3 | memcpy(dst1, dst0, BUF_SIZE * sizeof(INTFLOAT)); | |
| 61 | 3 | call_ref(dst0, src, BUF_SIZE); | |
| 62 | 3 | call_new(dst1, src, BUF_SIZE); | |
| 63 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
|
3 | if (!float_near_abs_eps_array(dst0, dst1, EPS, BUF_SIZE)) |
| 64 | ✗ | fail(); | |
| 65 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
|
3 | bench_new(dst1, src, BUF_SIZE); |
| 66 | 3 | } | |
| 67 | |||
| 68 | 2 | static void test_mul_pair_single(void) | |
| 69 | { | ||
| 70 | 2 | LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]); | |
| 71 | 2 | LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]); | |
| 72 | 2 | LOCAL_ALIGNED_16(INTFLOAT, src0, [BUF_SIZE], [2]); | |
| 73 | 2 | LOCAL_ALIGNED_16(INTFLOAT, src1, [BUF_SIZE]); | |
| 74 | |||
| 75 | 2 | declare_func(void, INTFLOAT (*dst)[2], | |
| 76 | INTFLOAT (*src0)[2], INTFLOAT *src1, int n); | ||
| 77 | |||
| 78 |
2/2✓ Branch 1 taken 16384 times.
✓ Branch 2 taken 2 times.
|
16386 | randomize((INTFLOAT *)src0, BUF_SIZE * 2); |
| 79 |
2/2✓ Branch 1 taken 8192 times.
✓ Branch 2 taken 2 times.
|
8194 | randomize(src1, BUF_SIZE); |
| 80 | 2 | call_ref(dst0, src0, src1, BUF_SIZE); | |
| 81 | 2 | call_new(dst1, src0, src1, BUF_SIZE); | |
| 82 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
|
2 | if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2)) |
| 83 | ✗ | fail(); | |
| 84 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
|
2 | bench_new(dst1, src0, src1, BUF_SIZE); |
| 85 | 2 | } | |
| 86 | |||
| 87 | 3 | static void test_hybrid_analysis(void) | |
| 88 | { | ||
| 89 | 3 | LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]); | |
| 90 | 3 | LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]); | |
| 91 | 3 | LOCAL_ALIGNED_16(INTFLOAT, in, [13], [2]); | |
| 92 | 3 | LOCAL_ALIGNED_16(INTFLOAT, filter, [N], [8][2]); | |
| 93 | |||
| 94 | 3 | declare_func(void, INTFLOAT (*out)[2], INTFLOAT (*in)[2], | |
| 95 | const INTFLOAT (*filter)[8][2], | ||
| 96 | ptrdiff_t stride, int n); | ||
| 97 | |||
| 98 |
2/2✓ Branch 1 taken 78 times.
✓ Branch 2 taken 3 times.
|
81 | randomize((INTFLOAT *)in, 13 * 2); |
| 99 |
2/2✓ Branch 1 taken 1536 times.
✓ Branch 2 taken 3 times.
|
1539 | randomize((INTFLOAT *)filter, N * 8 * 2); |
| 100 | |||
| 101 |
2/2✓ Branch 1 taken 24576 times.
✓ Branch 2 taken 3 times.
|
24579 | randomize((INTFLOAT *)dst0, BUF_SIZE * 2); |
| 102 | 3 | memcpy(dst1, dst0, BUF_SIZE * 2 * sizeof(INTFLOAT)); | |
| 103 | |||
| 104 | 3 | call_ref(dst0, in, filter, STRIDE, N); | |
| 105 | 3 | call_new(dst1, in, filter, STRIDE, N); | |
| 106 | |||
| 107 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
|
3 | if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2)) |
| 108 | ✗ | fail(); | |
| 109 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
|
3 | bench_new(dst1, in, filter, STRIDE, N); |
| 110 | 3 | } | |
| 111 | |||
| 112 | 2 | static void test_hybrid_analysis_ileave(void) | |
| 113 | { | ||
| 114 | 2 | LOCAL_ALIGNED_16(INTFLOAT, in, [2], [38][64]); | |
| 115 | 2 | LOCAL_ALIGNED_16(INTFLOAT, out0, [91], [32][2]); | |
| 116 | 2 | LOCAL_ALIGNED_16(INTFLOAT, out1, [91], [32][2]); | |
| 117 | |||
| 118 | 2 | declare_func(void, INTFLOAT (*out)[32][2], INTFLOAT L[2][38][64], | |
| 119 | int i, int len); | ||
| 120 | |||
| 121 |
2/2✓ Branch 1 taken 11648 times.
✓ Branch 2 taken 2 times.
|
11650 | randomize((INTFLOAT *)out0, 91 * 32 * 2); |
| 122 |
2/2✓ Branch 1 taken 9728 times.
✓ Branch 2 taken 2 times.
|
9730 | randomize((INTFLOAT *)in, 2 * 38 * 64); |
| 123 | 2 | memcpy(out1, out0, 91 * 32 * 2 * sizeof(INTFLOAT)); | |
| 124 | |||
| 125 | /* len is hardcoded to 32 as that's the only value used in | ||
| 126 | libavcodec. asm functions are likely to be optimized | ||
| 127 | hardcoding this value in their loops and could fail with | ||
| 128 | anything else. | ||
| 129 | i is hardcoded to the two values currently used by the | ||
| 130 | aac decoder because the arm neon implementation is | ||
| 131 | micro-optimized for them and will fail for almost every | ||
| 132 | other value. */ | ||
| 133 | 2 | call_ref(out0, in, 3, 32); | |
| 134 | 2 | call_new(out1, in, 3, 32); | |
| 135 | |||
| 136 | /* the function just moves data around, so memcmp is enough */ | ||
| 137 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT))) |
| 138 | ✗ | fail(); | |
| 139 | |||
| 140 | 2 | call_ref(out0, in, 5, 32); | |
| 141 | 2 | call_new(out1, in, 5, 32); | |
| 142 | |||
| 143 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT))) |
| 144 | ✗ | fail(); | |
| 145 | |||
| 146 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
|
2 | bench_new(out1, in, 3, 32); |
| 147 | 2 | } | |
| 148 | |||
| 149 | 3 | static void test_hybrid_synthesis_deint(void) | |
| 150 | { | ||
| 151 | 3 | LOCAL_ALIGNED_16(INTFLOAT, out0, [2], [38][64]); | |
| 152 | 3 | LOCAL_ALIGNED_16(INTFLOAT, out1, [2], [38][64]); | |
| 153 | 3 | LOCAL_ALIGNED_16(INTFLOAT, in, [91], [32][2]); | |
| 154 | |||
| 155 | 3 | declare_func(void, INTFLOAT out[2][38][64], INTFLOAT (*in)[32][2], | |
| 156 | int i, int len); | ||
| 157 | |||
| 158 |
2/2✓ Branch 1 taken 17472 times.
✓ Branch 2 taken 3 times.
|
17475 | randomize((INTFLOAT *)in, 91 * 32 * 2); |
| 159 |
2/2✓ Branch 1 taken 14592 times.
✓ Branch 2 taken 3 times.
|
14595 | randomize((INTFLOAT *)out0, 2 * 38 * 64); |
| 160 | 3 | memcpy(out1, out0, 2 * 38 * 64 * sizeof(INTFLOAT)); | |
| 161 | |||
| 162 | /* len is hardcoded to 32 as that's the only value used in | ||
| 163 | libavcodec. asm functions are likely to be optimized | ||
| 164 | hardcoding this value in their loops and could fail with | ||
| 165 | anything else. | ||
| 166 | i is hardcoded to the two values currently used by the | ||
| 167 | aac decoder because the arm neon implementation is | ||
| 168 | micro-optimized for them and will fail for almost every | ||
| 169 | other value. */ | ||
| 170 | 3 | call_ref(out0, in, 3, 32); | |
| 171 | 3 | call_new(out1, in, 3, 32); | |
| 172 | |||
| 173 | /* the function just moves data around, so memcmp is enough */ | ||
| 174 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
|
3 | if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT))) |
| 175 | ✗ | fail(); | |
| 176 | |||
| 177 | 3 | call_ref(out0, in, 5, 32); | |
| 178 | 3 | call_new(out1, in, 5, 32); | |
| 179 | |||
| 180 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
|
3 | if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT))) |
| 181 | ✗ | fail(); | |
| 182 | |||
| 183 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
|
3 | bench_new(out1, in, 3, 32); |
| 184 | 3 | } | |
| 185 | |||
| 186 | 14 | static void test_stereo_interpolate(PSDSPContext *psdsp) | |
| 187 | { | ||
| 188 | int i; | ||
| 189 | 14 | LOCAL_ALIGNED_16(INTFLOAT, l, [BUF_SIZE], [2]); | |
| 190 | 14 | LOCAL_ALIGNED_16(INTFLOAT, r, [BUF_SIZE], [2]); | |
| 191 | 14 | LOCAL_ALIGNED_16(INTFLOAT, l0, [BUF_SIZE], [2]); | |
| 192 | 14 | LOCAL_ALIGNED_16(INTFLOAT, r0, [BUF_SIZE], [2]); | |
| 193 | 14 | LOCAL_ALIGNED_16(INTFLOAT, l1, [BUF_SIZE], [2]); | |
| 194 | 14 | LOCAL_ALIGNED_16(INTFLOAT, r1, [BUF_SIZE], [2]); | |
| 195 | 14 | LOCAL_ALIGNED_16(INTFLOAT, h, [2], [4]); | |
| 196 | 14 | LOCAL_ALIGNED_16(INTFLOAT, h_step, [2], [4]); | |
| 197 | |||
| 198 | 14 | declare_func(void, INTFLOAT (*l)[2], INTFLOAT (*r)[2], | |
| 199 | INTFLOAT h[2][4], INTFLOAT h_step[2][4], int len); | ||
| 200 | |||
| 201 |
2/2✓ Branch 1 taken 114688 times.
✓ Branch 2 taken 14 times.
|
114702 | randomize((INTFLOAT *)l, BUF_SIZE * 2); |
| 202 |
2/2✓ Branch 1 taken 114688 times.
✓ Branch 2 taken 14 times.
|
114702 | randomize((INTFLOAT *)r, BUF_SIZE * 2); |
| 203 | |||
| 204 |
2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 14 times.
|
42 | for (i = 0; i < 2; i++) { |
| 205 |
4/4✓ Branch 2 taken 14 times.
✓ Branch 3 taken 14 times.
✓ Branch 5 taken 4 times.
✓ Branch 6 taken 24 times.
|
28 | if (check_func(psdsp->stereo_interpolate[i], "ps_stereo_interpolate%s", i ? "_ipdopd" : "")) { |
| 206 | 4 | memcpy(l0, l, BUF_SIZE * 2 * sizeof(INTFLOAT)); | |
| 207 | 4 | memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT)); | |
| 208 | 4 | memcpy(r0, r, BUF_SIZE * 2 * sizeof(INTFLOAT)); | |
| 209 | 4 | memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT)); | |
| 210 | |||
| 211 |
2/2✓ Branch 1 taken 32 times.
✓ Branch 2 taken 4 times.
|
36 | randomize((INTFLOAT *)h, 2 * 4); |
| 212 |
2/2✓ Branch 1 taken 32 times.
✓ Branch 2 taken 4 times.
|
36 | randomize((INTFLOAT *)h_step, 2 * 4); |
| 213 | // Clear the least significant 14 bits of h_step, to avoid | ||
| 214 | // divergence when accumulating h_step BUF_SIZE times into | ||
| 215 | // a float variable which may or may not have extra intermediate | ||
| 216 | // precision. Therefore clear roughly log2(BUF_SIZE) less | ||
| 217 | // significant bits, to get the same result regardless of any | ||
| 218 | // extra precision in the accumulator. | ||
| 219 | 4 | clear_less_significant_bits((INTFLOAT *)h_step, 2 * 4, 14); | |
| 220 | |||
| 221 | 4 | call_ref(l0, r0, h, h_step, BUF_SIZE); | |
| 222 | 4 | call_new(l1, r1, h, h_step, BUF_SIZE); | |
| 223 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | if (!float_near_abs_eps_array((float *)l0, (float *)l1, EPS, BUF_SIZE * 2) || |
| 224 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 4 times.
|
4 | !float_near_abs_eps_array((float *)r0, (float *)r1, EPS, BUF_SIZE * 2)) |
| 225 | ✗ | fail(); | |
| 226 | |||
| 227 | 4 | memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT)); | |
| 228 | 4 | memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT)); | |
| 229 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 4 times.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
|
4 | bench_new(l1, r1, h, h_step, BUF_SIZE); |
| 230 | } | ||
| 231 | } | ||
| 232 | 14 | } | |
| 233 | |||
| 234 | 14 | void checkasm_check_aacpsdsp(void) | |
| 235 | { | ||
| 236 | PSDSPContext psdsp; | ||
| 237 | |||
| 238 | 14 | ff_psdsp_init(&psdsp); | |
| 239 | |||
| 240 |
2/2✓ Branch 3 taken 3 times.
✓ Branch 4 taken 11 times.
|
14 | if (check_func(psdsp.add_squares, "ps_add_squares")) |
| 241 | 3 | test_add_squares(); | |
| 242 | 14 | report("add_squares"); | |
| 243 | |||
| 244 |
2/2✓ Branch 3 taken 2 times.
✓ Branch 4 taken 12 times.
|
14 | if (check_func(psdsp.mul_pair_single, "ps_mul_pair_single")) |
| 245 | 2 | test_mul_pair_single(); | |
| 246 | 14 | report("mul_pair_single"); | |
| 247 | |||
| 248 |
2/2✓ Branch 3 taken 3 times.
✓ Branch 4 taken 11 times.
|
14 | if (check_func(psdsp.hybrid_analysis, "ps_hybrid_analysis")) |
| 249 | 3 | test_hybrid_analysis(); | |
| 250 | 14 | report("hybrid_analysis"); | |
| 251 | |||
| 252 |
2/2✓ Branch 3 taken 2 times.
✓ Branch 4 taken 12 times.
|
14 | if (check_func(psdsp.hybrid_analysis_ileave, "ps_hybrid_analysis_ileave")) |
| 253 | 2 | test_hybrid_analysis_ileave(); | |
| 254 | 14 | report("hybrid_analysis_ileave"); | |
| 255 | |||
| 256 |
2/2✓ Branch 3 taken 3 times.
✓ Branch 4 taken 11 times.
|
14 | if (check_func(psdsp.hybrid_synthesis_deint, "ps_hybrid_synthesis_deint")) |
| 257 | 3 | test_hybrid_synthesis_deint(); | |
| 258 | 14 | report("hybrid_synthesis_deint"); | |
| 259 | |||
| 260 | 14 | test_stereo_interpolate(&psdsp); | |
| 261 | 14 | report("stereo_interpolate"); | |
| 262 | 14 | } | |
| 263 |