| Line | Branch | Exec | Source | 
|---|---|---|---|
| 1 | /* | ||
| 2 | * Copyright (c) 2022 Ben Avison | ||
| 3 | * | ||
| 4 | * This file is part of FFmpeg. | ||
| 5 | * | ||
| 6 | * FFmpeg is free software; you can redistribute it and/or modify | ||
| 7 | * it under the terms of the GNU General Public License as published by | ||
| 8 | * the Free Software Foundation; either version 2 of the License, or | ||
| 9 | * (at your option) any later version. | ||
| 10 | * | ||
| 11 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 14 | * GNU General Public License for more details. | ||
| 15 | * | ||
| 16 | * You should have received a copy of the GNU General Public License along | ||
| 17 | * with FFmpeg; if not, write to the Free Software Foundation, Inc., | ||
| 18 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||
| 19 | */ | ||
| 20 | |||
| 21 | #include <string.h> | ||
| 22 | |||
| 23 | #include "checkasm.h" | ||
| 24 | |||
| 25 | #include "libavcodec/vc1dsp.h" | ||
| 26 | |||
| 27 | #include "libavutil/common.h" | ||
| 28 | #include "libavutil/internal.h" | ||
| 29 | #include "libavutil/intreadwrite.h" | ||
| 30 | #include "libavutil/mem.h" | ||
| 31 | #include "libavutil/mem_internal.h" | ||
| 32 | |||
| 33 | #define VC1DSP_TEST(func) { #func, offsetof(VC1DSPContext, func) }, | ||
| 34 | #define VC1DSP_SIZED_TEST(func, width, height) { #func, offsetof(VC1DSPContext, func), width, height }, | ||
| 35 | |||
| 36 | typedef struct { | ||
| 37 | const char *name; | ||
| 38 | size_t offset; | ||
| 39 | int width; | ||
| 40 | int height; | ||
| 41 | } test; | ||
| 42 | |||
| 43 | typedef struct matrix { | ||
| 44 | size_t width; | ||
| 45 | size_t height; | ||
| 46 | float d[]; | ||
| 47 | } matrix; | ||
| 48 | |||
| 49 | static const matrix T8 = { 8, 8, { | ||
| 50 | 12, 12, 12, 12, 12, 12, 12, 12, | ||
| 51 | 16, 15, 9, 4, -4, -9, -15, -16, | ||
| 52 | 16, 6, -6, -16, -16, -6, 6, 16, | ||
| 53 | 15, -4, -16, -9, 9, 16, 4, -15, | ||
| 54 | 12, -12, -12, 12, 12, -12, -12, 12, | ||
| 55 | 9, -16, 4, 15, -15, -4, 16, -9, | ||
| 56 | 6, -16, 16, -6, -6, 16, -16, 6, | ||
| 57 | 4, -9, 15, -16, 16, -15, 9, -4 | ||
| 58 | } }; | ||
| 59 | |||
| 60 | static const matrix T4 = { 4, 4, { | ||
| 61 | 17, 17, 17, 17, | ||
| 62 | 22, 10, -10, -22, | ||
| 63 | 17, -17, -17, 17, | ||
| 64 | 10, -22, 22, -10 | ||
| 65 | } }; | ||
| 66 | |||
| 67 | static const matrix T8t = { 8, 8, { | ||
| 68 | 12, 16, 16, 15, 12, 9, 6, 4, | ||
| 69 | 12, 15, 6, -4, -12, -16, -16, -9, | ||
| 70 | 12, 9, -6, -16, -12, 4, 16, 15, | ||
| 71 | 12, 4, -16, -9, 12, 15, -6, -16, | ||
| 72 | 12, -4, -16, 9, 12, -15, -6, 16, | ||
| 73 | 12, -9, -6, 16, -12, -4, 16, -15, | ||
| 74 | 12, -15, 6, 4, -12, 16, -16, 9, | ||
| 75 | 12, -16, 16, -15, 12, -9, 6, -4 | ||
| 76 | } }; | ||
| 77 | |||
| 78 | static const matrix T4t = { 4, 4, { | ||
| 79 | 17, 22, 17, 10, | ||
| 80 | 17, 10, -17, -22, | ||
| 81 | 17, -10, -17, 22, | ||
| 82 | 17, -22, 17, -10 | ||
| 83 | } }; | ||
| 84 | |||
| 85 | 60 | static matrix *new_matrix(size_t width, size_t height) | |
| 86 | { | ||
| 87 | 60 | matrix *out = av_mallocz(sizeof (matrix) + height * width * sizeof (float)); | |
| 88 | 
        1/2✗ Branch 0 not taken. 
          ✓ Branch 1 taken 60 times. 
         | 
      60 | if (out == NULL) { | 
| 89 | ✗ | fprintf(stderr, "Memory allocation failure\n"); | |
| 90 | ✗ | exit(EXIT_FAILURE); | |
| 91 | } | ||
| 92 | 60 | out->width = width; | |
| 93 | 60 | out->height = height; | |
| 94 | 60 | return out; | |
| 95 | } | ||
| 96 | |||
| 97 | 48 | static matrix *multiply(const matrix *a, const matrix *b) | |
| 98 | { | ||
| 99 | matrix *out; | ||
| 100 | 
        1/2✗ Branch 0 not taken. 
          ✓ Branch 1 taken 48 times. 
         | 
      48 | if (a->width != b->height) { | 
| 101 | ✗ | fprintf(stderr, "Incompatible multiplication\n"); | |
| 102 | ✗ | exit(EXIT_FAILURE); | |
| 103 | } | ||
| 104 | 48 | out = new_matrix(b->width, a->height); | |
| 105 | 
        2/2✓ Branch 0 taken 288 times. 
          ✓ Branch 1 taken 48 times. 
         | 
      336 | for (int j = 0; j < out->height; ++j) | 
| 106 | 
        2/2✓ Branch 0 taken 1728 times. 
          ✓ Branch 1 taken 288 times. 
         | 
      2016 | for (int i = 0; i < out->width; ++i) { | 
| 107 | 1728 | float sum = 0; | |
| 108 | 
        2/2✓ Branch 0 taken 11520 times. 
          ✓ Branch 1 taken 1728 times. 
         | 
      13248 | for (int k = 0; k < a->width; ++k) | 
| 109 | 11520 | sum += a->d[j * a->width + k] * b->d[k * b->width + i]; | |
| 110 | 1728 | out->d[j * out->width + i] = sum; | |
| 111 | } | ||
| 112 | 48 | return out; | |
| 113 | } | ||
| 114 | |||
| 115 | 12 | static void normalise(matrix *a) | |
| 116 | { | ||
| 117 | 
        2/2✓ Branch 0 taken 72 times. 
          ✓ Branch 1 taken 12 times. 
         | 
      84 | for (int j = 0; j < a->height; ++j) | 
| 118 | 
        2/2✓ Branch 0 taken 432 times. 
          ✓ Branch 1 taken 72 times. 
         | 
      504 | for (int i = 0; i < a->width; ++i) { | 
| 119 | 432 | float *p = a->d + j * a->width + i; | |
| 120 | 432 | *p *= 64; | |
| 121 | 
        2/2✓ Branch 0 taken 144 times. 
          ✓ Branch 1 taken 288 times. 
         | 
      432 | if (a->height == 4) | 
| 122 | 144 | *p /= (const unsigned[]) { 289, 292, 289, 292 } [j]; | |
| 123 | else | ||
| 124 | 288 | *p /= (const unsigned[]) { 288, 289, 292, 289, 288, 289, 292, 289 } [j]; | |
| 125 | 
        2/2✓ Branch 0 taken 144 times. 
          ✓ Branch 1 taken 288 times. 
         | 
      432 | if (a->width == 4) | 
| 126 | 144 | *p /= (const unsigned[]) { 289, 292, 289, 292 } [i]; | |
| 127 | else | ||
| 128 | 288 | *p /= (const unsigned[]) { 288, 289, 292, 289, 288, 289, 292, 289 } [i]; | |
| 129 | } | ||
| 130 | 12 | } | |
| 131 | |||
| 132 | 36 | static void divide_and_round_nearest(matrix *a, float by) | |
| 133 | { | ||
| 134 | 
        2/2✓ Branch 0 taken 216 times. 
          ✓ Branch 1 taken 36 times. 
         | 
      252 | for (int j = 0; j < a->height; ++j) | 
| 135 | 
        2/2✓ Branch 0 taken 1296 times. 
          ✓ Branch 1 taken 216 times. 
         | 
      1512 | for (int i = 0; i < a->width; ++i) { | 
| 136 | 1296 | float *p = a->d + j * a->width + i; | |
| 137 | 1296 | *p = rintf(*p / by); | |
| 138 | } | ||
| 139 | 36 | } | |
| 140 | |||
| 141 | 12 | static void tweak(matrix *a) | |
| 142 | { | ||
| 143 | 
        2/2✓ Branch 0 taken 24 times. 
          ✓ Branch 1 taken 12 times. 
         | 
      36 | for (int j = 4; j < a->height; ++j) | 
| 144 | 
        2/2✓ Branch 0 taken 144 times. 
          ✓ Branch 1 taken 24 times. 
         | 
      168 | for (int i = 0; i < a->width; ++i) { | 
| 145 | 144 | float *p = a->d + j * a->width + i; | |
| 146 | 144 | *p += 1; | |
| 147 | } | ||
| 148 | 12 | } | |
| 149 | |||
| 150 | /* The VC-1 spec places restrictions on the values permitted at three | ||
| 151 | * different stages: | ||
| 152 | * - D: the input coefficients in frequency domain | ||
| 153 | * - E: the intermediate coefficients, inverse-transformed only horizontally | ||
| 154 | * - R: the fully inverse-transformed coefficients | ||
| 155 | * | ||
| 156 | * To fully cater for the ranges specified requires various intermediate | ||
| 157 | * values to be held to 17-bit precision; yet these conditions do not appear | ||
| 158 | * to be utilised in real-world streams. At least some assembly | ||
| 159 | * implementations have chosen to restrict these values to 16-bit precision, | ||
| 160 | * to accelerate the decoding of real-world streams at the cost of strict | ||
| 161 | * adherence to the spec. To avoid our test marking these as failures, | ||
| 162 | * reduce our random inputs. | ||
| 163 | */ | ||
| 164 | #define ATTENUATION 4 | ||
| 165 | |||
| 166 | 12 | static matrix *generate_inverse_quantized_transform_coefficients(size_t width, size_t height) | |
| 167 | { | ||
| 168 | matrix *raw, *tmp, *D, *E, *R; | ||
| 169 | 12 | raw = new_matrix(width, height); | |
| 170 | 
        2/2✓ Branch 0 taken 432 times. 
          ✓ Branch 1 taken 12 times. 
         | 
      444 | for (int i = 0; i < width * height; ++i) | 
| 171 | 432 | raw->d[i] = (int) (rnd() % (1024/ATTENUATION)) - 512/ATTENUATION; | |
| 172 | 
        2/2✓ Branch 0 taken 6 times. 
          ✓ Branch 1 taken 6 times. 
         | 
      12 | tmp = multiply(height == 8 ? &T8 : &T4, raw); | 
| 173 | 
        2/2✓ Branch 0 taken 6 times. 
          ✓ Branch 1 taken 6 times. 
         | 
      12 | D = multiply(tmp, width == 8 ? &T8t : &T4t); | 
| 174 | 12 | normalise(D); | |
| 175 | 12 | divide_and_round_nearest(D, 1); | |
| 176 | 
        2/2✓ Branch 0 taken 432 times. 
          ✓ Branch 1 taken 12 times. 
         | 
      444 | for (int i = 0; i < width * height; ++i) { | 
| 177 | 
        2/4✓ Branch 0 taken 432 times. 
          ✗ Branch 1 not taken. 
          ✗ Branch 2 not taken. 
          ✓ Branch 3 taken 432 times. 
         | 
      432 | if (D->d[i] < -2048/ATTENUATION || D->d[i] > 2048/ATTENUATION-1) { | 
| 178 | /* Rare, so simply try again */ | ||
| 179 | ✗ | av_free(raw); | |
| 180 | ✗ | av_free(tmp); | |
| 181 | ✗ | av_free(D); | |
| 182 | ✗ | return generate_inverse_quantized_transform_coefficients(width, height); | |
| 183 | } | ||
| 184 | } | ||
| 185 | 
        2/2✓ Branch 0 taken 6 times. 
          ✓ Branch 1 taken 6 times. 
         | 
      12 | E = multiply(D, width == 8 ? &T8 : &T4); | 
| 186 | 12 | divide_and_round_nearest(E, 8); | |
| 187 | 
        2/2✓ Branch 0 taken 432 times. 
          ✓ Branch 1 taken 12 times. 
         | 
      444 | for (int i = 0; i < width * height; ++i) | 
| 188 | 
        2/4✓ Branch 0 taken 432 times. 
          ✗ Branch 1 not taken. 
          ✗ Branch 2 not taken. 
          ✓ Branch 3 taken 432 times. 
         | 
      432 | if (E->d[i] < -4096/ATTENUATION || E->d[i] > 4096/ATTENUATION-1) { | 
| 189 | /* Rare, so simply try again */ | ||
| 190 | ✗ | av_free(raw); | |
| 191 | ✗ | av_free(tmp); | |
| 192 | ✗ | av_free(D); | |
| 193 | ✗ | av_free(E); | |
| 194 | ✗ | return generate_inverse_quantized_transform_coefficients(width, height); | |
| 195 | } | ||
| 196 | 
        2/2✓ Branch 0 taken 6 times. 
          ✓ Branch 1 taken 6 times. 
         | 
      12 | R = multiply(height == 8 ? &T8t : &T4t, E); | 
| 197 | 12 | tweak(R); | |
| 198 | 12 | divide_and_round_nearest(R, 128); | |
| 199 | 
        2/2✓ Branch 0 taken 432 times. 
          ✓ Branch 1 taken 12 times. 
         | 
      444 | for (int i = 0; i < width * height; ++i) | 
| 200 | 
        2/4✓ Branch 0 taken 432 times. 
          ✗ Branch 1 not taken. 
          ✗ Branch 2 not taken. 
          ✓ Branch 3 taken 432 times. 
         | 
      432 | if (R->d[i] < -512/ATTENUATION || R->d[i] > 512/ATTENUATION-1) { | 
| 201 | /* Rare, so simply try again */ | ||
| 202 | ✗ | av_free(raw); | |
| 203 | ✗ | av_free(tmp); | |
| 204 | ✗ | av_free(D); | |
| 205 | ✗ | av_free(E); | |
| 206 | ✗ | av_free(R); | |
| 207 | ✗ | return generate_inverse_quantized_transform_coefficients(width, height); | |
| 208 | } | ||
| 209 | 12 | av_free(raw); | |
| 210 | 12 | av_free(tmp); | |
| 211 | 12 | av_free(E); | |
| 212 | 12 | av_free(R); | |
| 213 | 12 | return D; | |
| 214 | } | ||
| 215 | |||
| 216 | #define RANDOMIZE_BUFFER16(name, size) \ | ||
| 217 | do { \ | ||
| 218 | int i; \ | ||
| 219 | for (i = 0; i < size; ++i) { \ | ||
| 220 | uint16_t r = rnd(); \ | ||
| 221 | AV_WN16A(name##0 + i, r); \ | ||
| 222 | AV_WN16A(name##1 + i, r); \ | ||
| 223 | } \ | ||
| 224 | } while (0) | ||
| 225 | |||
| 226 | #define RANDOMIZE_BUFFER8(name, size) \ | ||
| 227 | do { \ | ||
| 228 | int i; \ | ||
| 229 | for (i = 0; i < size; ++i) { \ | ||
| 230 | uint8_t r = rnd(); \ | ||
| 231 | name##0[i] = r; \ | ||
| 232 | name##1[i] = r; \ | ||
| 233 | } \ | ||
| 234 | } while (0) | ||
| 235 | |||
| 236 | #define RANDOMIZE_BUFFER8_MID_WEIGHTED(name, size) \ | ||
| 237 | do { \ | ||
| 238 | uint8_t *p##0 = name##0, *p##1 = name##1; \ | ||
| 239 | int i = (size); \ | ||
| 240 | while (i-- > 0) { \ | ||
| 241 | int x = 0x80 | (rnd() & 0x7F); \ | ||
| 242 | x >>= rnd() % 9; \ | ||
| 243 | if (rnd() & 1) \ | ||
| 244 | x = -x; \ | ||
| 245 | *p##1++ = *p##0++ = 0x80 + x; \ | ||
| 246 | } \ | ||
| 247 | } while (0) | ||
| 248 | |||
| 249 | 13 | static void check_inv_trans_inplace(void) | |
| 250 | { | ||
| 251 | /* Inverse transform input coefficients are stored in a 16-bit buffer | ||
| 252 | * with row stride of 8 coefficients irrespective of transform size. | ||
| 253 | * vc1_inv_trans_8x8 differs from the others in two ways: coefficients | ||
| 254 | * are stored in column-major order, and the outputs are written back | ||
| 255 | * to the input buffer, so we oversize it slightly to catch overruns. */ | ||
| 256 | 13 | LOCAL_ALIGNED_16(int16_t, inv_trans_in0, [10 * 8]); | |
| 257 | 13 | LOCAL_ALIGNED_16(int16_t, inv_trans_in1, [10 * 8]); | |
| 258 | |||
| 259 | VC1DSPContext h; | ||
| 260 | |||
| 261 | 13 | ff_vc1dsp_init(&h); | |
| 262 | |||
| 263 | 
        2/2✓ Branch 3 taken 1 times. 
          ✓ Branch 4 taken 12 times. 
         | 
      13 | if (check_func(h.vc1_inv_trans_8x8, "vc1dsp.vc1_inv_trans_8x8")) { | 
| 264 | matrix *coeffs; | ||
| 265 | 1 | declare_func(void, int16_t *); | |
| 266 | 
        2/2✓ Branch 1 taken 80 times. 
          ✓ Branch 2 taken 1 times. 
         | 
      81 | RANDOMIZE_BUFFER16(inv_trans_in, 10 * 8); | 
| 267 | 1 | coeffs = generate_inverse_quantized_transform_coefficients(8, 8); | |
| 268 | 
        2/2✓ Branch 0 taken 8 times. 
          ✓ Branch 1 taken 1 times. 
         | 
      9 | for (int j = 0; j < 8; ++j) | 
| 269 | 
        2/2✓ Branch 0 taken 64 times. 
          ✓ Branch 1 taken 8 times. 
         | 
      72 | for (int i = 0; i < 8; ++i) { | 
| 270 | 64 | int idx = 8 + i * 8 + j; | |
| 271 | 64 | inv_trans_in1[idx] = inv_trans_in0[idx] = coeffs->d[j * 8 + i]; | |
| 272 | } | ||
| 273 | 1 | call_ref(inv_trans_in0 + 8); | |
| 274 | 1 | call_new(inv_trans_in1 + 8); | |
| 275 | 
        1/2✗ Branch 0 not taken. 
          ✓ Branch 1 taken 1 times. 
         | 
      1 | if (memcmp(inv_trans_in0, inv_trans_in1, 10 * 8 * sizeof (int16_t))) | 
| 276 | ✗ | fail(); | |
| 277 | 
        1/8✗ Branch 1 not taken. 
          ✓ Branch 2 taken 1 times. 
          ✗ Branch 39 not taken. 
          ✗ Branch 40 not taken. 
          ✗ Branch 41 not taken. 
          ✗ Branch 42 not taken. 
          ✗ Branch 43 not taken. 
          ✗ Branch 44 not taken. 
         | 
      1 | bench_new(inv_trans_in1 + 8); | 
| 278 | 1 | av_free(coeffs); | |
| 279 | } | ||
| 280 | 13 | } | |
| 281 | |||
| 282 | 13 | static void check_inv_trans_adding(void) | |
| 283 | { | ||
| 284 | /* Inverse transform input coefficients are stored in a 16-bit buffer | ||
| 285 | * with row stride of 8 coefficients irrespective of transform size. */ | ||
| 286 | 13 | LOCAL_ALIGNED_16(int16_t, inv_trans_in0, [8 * 8]); | |
| 287 | 13 | LOCAL_ALIGNED_16(int16_t, inv_trans_in1, [8 * 8]); | |
| 288 | |||
| 289 | /* For all but vc1_inv_trans_8x8, the inverse transform is narrowed and | ||
| 290 | * added with saturation to an array of unsigned 8-bit values. Oversize | ||
| 291 | * this by 8 samples left and right and one row above and below. */ | ||
| 292 | 13 | LOCAL_ALIGNED_8(uint8_t, inv_trans_out0, [10 * 24]); | |
| 293 | 13 | LOCAL_ALIGNED_8(uint8_t, inv_trans_out1, [10 * 24]); | |
| 294 | |||
| 295 | VC1DSPContext h; | ||
| 296 | |||
| 297 | 13 | const test tests[] = { | |
| 298 | VC1DSP_SIZED_TEST(vc1_inv_trans_8x4, 8, 4) | ||
| 299 | VC1DSP_SIZED_TEST(vc1_inv_trans_4x8, 4, 8) | ||
| 300 | VC1DSP_SIZED_TEST(vc1_inv_trans_4x4, 4, 4) | ||
| 301 | VC1DSP_SIZED_TEST(vc1_inv_trans_8x8_dc, 8, 8) | ||
| 302 | VC1DSP_SIZED_TEST(vc1_inv_trans_8x4_dc, 8, 4) | ||
| 303 | VC1DSP_SIZED_TEST(vc1_inv_trans_4x8_dc, 4, 8) | ||
| 304 | VC1DSP_SIZED_TEST(vc1_inv_trans_4x4_dc, 4, 4) | ||
| 305 | }; | ||
| 306 | |||
| 307 | 13 | ff_vc1dsp_init(&h); | |
| 308 | |||
| 309 | 
        2/2✓ Branch 0 taken 91 times. 
          ✓ Branch 1 taken 13 times. 
         | 
      104 | for (size_t t = 0; t < FF_ARRAY_ELEMS(tests); ++t) { | 
| 310 | 91 | void (*func)(uint8_t *, ptrdiff_t, int16_t *) = *(void **)((intptr_t) &h + tests[t].offset); | |
| 311 | 
        2/2✓ Branch 3 taken 11 times. 
          ✓ Branch 4 taken 80 times. 
         | 
      91 | if (check_func(func, "vc1dsp.%s", tests[t].name)) { | 
| 312 | matrix *coeffs; | ||
| 313 | 
        2/2✓ Branch 1 taken 4 times. 
          ✓ Branch 2 taken 7 times. 
         | 
      11 | declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, int16_t *); | 
| 314 | 
        2/2✓ Branch 1 taken 704 times. 
          ✓ Branch 2 taken 11 times. 
         | 
      715 | RANDOMIZE_BUFFER16(inv_trans_in, 8 * 8); | 
| 315 | 
        2/2✓ Branch 1 taken 2640 times. 
          ✓ Branch 2 taken 11 times. 
         | 
      2651 | RANDOMIZE_BUFFER8(inv_trans_out, 10 * 24); | 
| 316 | 11 | coeffs = generate_inverse_quantized_transform_coefficients(tests[t].width, tests[t].height); | |
| 317 | 
        2/2✓ Branch 0 taken 64 times. 
          ✓ Branch 1 taken 11 times. 
         | 
      75 | for (int j = 0; j < tests[t].height; ++j) | 
| 318 | 
        2/2✓ Branch 0 taken 368 times. 
          ✓ Branch 1 taken 64 times. 
         | 
      432 | for (int i = 0; i < tests[t].width; ++i) { | 
| 319 | 368 | int idx = j * 8 + i; | |
| 320 | 368 | inv_trans_in1[idx] = inv_trans_in0[idx] = coeffs->d[j * tests[t].width + i]; | |
| 321 | } | ||
| 322 | 11 | call_ref(inv_trans_out0 + 24 + 8, 24, inv_trans_in0); | |
| 323 | 11 | call_new(inv_trans_out1 + 24 + 8, 24, inv_trans_in1); | |
| 324 | 
        1/2✗ Branch 0 not taken. 
          ✓ Branch 1 taken 11 times. 
         | 
      11 | if (memcmp(inv_trans_out0, inv_trans_out1, 10 * 24)) | 
| 325 | ✗ | fail(); | |
| 326 | 
        1/8✗ Branch 1 not taken. 
          ✓ Branch 2 taken 11 times. 
          ✗ Branch 39 not taken. 
          ✗ Branch 40 not taken. 
          ✗ Branch 41 not taken. 
          ✗ Branch 42 not taken. 
          ✗ Branch 43 not taken. 
          ✗ Branch 44 not taken. 
         | 
      11 | bench_new(inv_trans_out1 + 24 + 8, 24, inv_trans_in1 + 8); | 
| 327 | 11 | av_free(coeffs); | |
| 328 | } | ||
| 329 | } | ||
| 330 | 13 | } | |
| 331 | |||
| 332 | 13 | static void check_loop_filter(void) | |
| 333 | { | ||
| 334 | /* Deblocking filter buffers are big enough to hold a 16x16 block, | ||
| 335 | * plus 16 columns left and 4 rows above to hold filter inputs | ||
| 336 | * (depending on whether v or h neighbouring block edge, oversized | ||
| 337 | * horizontally to maintain 16-byte alignment) plus 16 columns and | ||
| 338 | * 4 rows below to catch write overflows */ | ||
| 339 | 13 | LOCAL_ALIGNED_16(uint8_t, filter_buf0, [24 * 48]); | |
| 340 | 13 | LOCAL_ALIGNED_16(uint8_t, filter_buf1, [24 * 48]); | |
| 341 | |||
| 342 | VC1DSPContext h; | ||
| 343 | |||
| 344 | 13 | const test tests[] = { | |
| 345 | VC1DSP_TEST(vc1_v_loop_filter4) | ||
| 346 | VC1DSP_TEST(vc1_h_loop_filter4) | ||
| 347 | VC1DSP_TEST(vc1_v_loop_filter8) | ||
| 348 | VC1DSP_TEST(vc1_h_loop_filter8) | ||
| 349 | VC1DSP_TEST(vc1_v_loop_filter16) | ||
| 350 | VC1DSP_TEST(vc1_h_loop_filter16) | ||
| 351 | }; | ||
| 352 | |||
| 353 | 13 | ff_vc1dsp_init(&h); | |
| 354 | |||
| 355 | 
        2/2✓ Branch 0 taken 78 times. 
          ✓ Branch 1 taken 13 times. 
         | 
      91 | for (size_t t = 0; t < FF_ARRAY_ELEMS(tests); ++t) { | 
| 356 | 78 | void (*func)(uint8_t *, ptrdiff_t, int) = *(void **)((intptr_t) &h + tests[t].offset); | |
| 357 | 
        2/2✓ Branch 1 taken 72 times. 
          ✓ Branch 2 taken 6 times. 
         | 
      78 | declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, int); | 
| 358 | 
        2/2✓ Branch 3 taken 20 times. 
          ✓ Branch 4 taken 58 times. 
         | 
      78 | if (check_func(func, "vc1dsp.%s", tests[t].name)) { | 
| 359 | 
        2/2✓ Branch 0 taken 20000 times. 
          ✓ Branch 1 taken 20 times. 
         | 
      20020 | for (int count = 1000; count > 0; --count) { | 
| 360 | 20000 | int pq = rnd() % 31 + 1; | |
| 361 | 
        4/4✓ Branch 3 taken 11518009 times. 
          ✓ Branch 4 taken 11521991 times. 
          ✓ Branch 5 taken 23040000 times. 
          ✓ Branch 6 taken 20000 times. 
         | 
      23060000 | RANDOMIZE_BUFFER8_MID_WEIGHTED(filter_buf, 24 * 48); | 
| 362 | 20000 | call_ref(filter_buf0 + 4 * 48 + 16, 48, pq); | |
| 363 | 20000 | call_new(filter_buf1 + 4 * 48 + 16, 48, pq); | |
| 364 | 
        1/2✗ Branch 0 not taken. 
          ✓ Branch 1 taken 20000 times. 
         | 
      20000 | if (memcmp(filter_buf0, filter_buf1, 24 * 48)) | 
| 365 | ✗ | fail(); | |
| 366 | } | ||
| 367 | } | ||
| 368 | 
        2/2✓ Branch 0 taken 1872 times. 
          ✓ Branch 1 taken 78 times. 
         | 
      1950 | for (int j = 0; j < 24; ++j) | 
| 369 | 
        2/2✓ Branch 0 taken 89856 times. 
          ✓ Branch 1 taken 1872 times. 
         | 
      91728 | for (int i = 0; i < 48; ++i) | 
| 370 | 
        4/4✓ Branch 0 taken 59904 times. 
          ✓ Branch 1 taken 29952 times. 
          ✓ Branch 2 taken 49920 times. 
          ✓ Branch 3 taken 9984 times. 
         | 
      89856 | filter_buf1[j * 48 + i] = 0x60 + 0x40 * (i >= 16 && j >= 4); | 
| 371 | 
        2/2✓ Branch 3 taken 20 times. 
          ✓ Branch 4 taken 58 times. 
         | 
      78 | if (check_func(func, "vc1dsp.%s_bestcase", tests[t].name)) | 
| 372 | 
        1/8✗ Branch 1 not taken. 
          ✓ Branch 2 taken 20 times. 
          ✗ Branch 39 not taken. 
          ✗ Branch 40 not taken. 
          ✗ Branch 41 not taken. 
          ✗ Branch 42 not taken. 
          ✗ Branch 43 not taken. 
          ✗ Branch 44 not taken. 
         | 
      20 | bench_new(filter_buf1 + 4 * 48 + 16, 48, 1); | 
| 373 | 
        2/2✓ Branch 3 taken 20 times. 
          ✓ Branch 4 taken 58 times. 
         | 
      78 | if (check_func(func, "vc1dsp.%s_worstcase", tests[t].name)) | 
| 374 | 
        1/8✗ Branch 1 not taken. 
          ✓ Branch 2 taken 20 times. 
          ✗ Branch 39 not taken. 
          ✗ Branch 40 not taken. 
          ✗ Branch 41 not taken. 
          ✗ Branch 42 not taken. 
          ✗ Branch 43 not taken. 
          ✗ Branch 44 not taken. 
         | 
      20 | bench_new(filter_buf1 + 4 * 48 + 16, 48, 31); | 
| 375 | } | ||
| 376 | 13 | } | |
| 377 | |||
| 378 | #define TEST_UNESCAPE \ | ||
| 379 | do { \ | ||
| 380 | for (int count = 100; count > 0; --count) { \ | ||
| 381 | escaped_offset = rnd() & 7; \ | ||
| 382 | unescaped_offset = rnd() & 7; \ | ||
| 383 | escaped_len = (1u << (rnd() % 8) + 3) - (rnd() & 7); \ | ||
| 384 | RANDOMIZE_BUFFER8(unescaped, UNESCAPE_BUF_SIZE); \ | ||
| 385 | len0 = call_ref(escaped0 + escaped_offset, escaped_len, unescaped0 + unescaped_offset); \ | ||
| 386 | len1 = call_new(escaped1 + escaped_offset, escaped_len, unescaped1 + unescaped_offset); \ | ||
| 387 | if (len0 != len1 || memcmp(unescaped0, unescaped1, UNESCAPE_BUF_SIZE)) \ | ||
| 388 | fail(); \ | ||
| 389 | } \ | ||
| 390 | } while (0) | ||
| 391 | |||
| 392 | 13 | static void check_unescape(void) | |
| 393 | { | ||
| 394 | /* This appears to be a typical length of buffer in use */ | ||
| 395 | #define LOG2_UNESCAPE_BUF_SIZE 17 | ||
| 396 | #define UNESCAPE_BUF_SIZE (1u<<LOG2_UNESCAPE_BUF_SIZE) | ||
| 397 | 13 | LOCAL_ALIGNED_8(uint8_t, escaped0, [UNESCAPE_BUF_SIZE]); | |
| 398 | 13 | LOCAL_ALIGNED_8(uint8_t, escaped1, [UNESCAPE_BUF_SIZE]); | |
| 399 | 13 | LOCAL_ALIGNED_8(uint8_t, unescaped0, [UNESCAPE_BUF_SIZE]); | |
| 400 | 13 | LOCAL_ALIGNED_8(uint8_t, unescaped1, [UNESCAPE_BUF_SIZE]); | |
| 401 | |||
| 402 | VC1DSPContext h; | ||
| 403 | |||
| 404 | 13 | ff_vc1dsp_init(&h); | |
| 405 | |||
| 406 | 
        2/2✓ Branch 3 taken 1 times. 
          ✓ Branch 4 taken 12 times. 
         | 
      13 | if (check_func(h.vc1_unescape_buffer, "vc1dsp.vc1_unescape_buffer")) { | 
| 407 | int len0, len1, escaped_offset, unescaped_offset, escaped_len; | ||
| 408 | 1 | declare_func(int, const uint8_t *, int, uint8_t *); | |
| 409 | |||
| 410 | /* Test data which consists of escapes sequences packed as tightly as possible */ | ||
| 411 | 
        2/2✓ Branch 0 taken 131072 times. 
          ✓ Branch 1 taken 1 times. 
         | 
      131073 | for (int x = 0; x < UNESCAPE_BUF_SIZE; ++x) | 
| 412 | 
        2/2✓ Branch 0 taken 43691 times. 
          ✓ Branch 1 taken 87381 times. 
         | 
      131072 | escaped1[x] = escaped0[x] = 3 * (x % 3 == 0); | 
| 413 | 
        6/8✓ Branch 5 taken 13107200 times. 
          ✓ Branch 6 taken 100 times. 
          ✓ Branch 14 taken 100 times. 
          ✗ Branch 15 not taken. 
          ✗ Branch 16 not taken. 
          ✓ Branch 17 taken 100 times. 
          ✓ Branch 20 taken 100 times. 
          ✓ Branch 21 taken 1 times. 
         | 
      13107301 | TEST_UNESCAPE; | 
| 414 | |||
| 415 | /* Test random data */ | ||
| 416 | 
        2/2✓ Branch 1 taken 131072 times. 
          ✓ Branch 2 taken 1 times. 
         | 
      131073 | RANDOMIZE_BUFFER8(escaped, UNESCAPE_BUF_SIZE); | 
| 417 | 
        6/8✓ Branch 5 taken 13107200 times. 
          ✓ Branch 6 taken 100 times. 
          ✓ Branch 14 taken 100 times. 
          ✗ Branch 15 not taken. 
          ✗ Branch 16 not taken. 
          ✓ Branch 17 taken 100 times. 
          ✓ Branch 20 taken 100 times. 
          ✓ Branch 21 taken 1 times. 
         | 
      13107301 | TEST_UNESCAPE; | 
| 418 | |||
| 419 | /* Test data with escape sequences at random intervals */ | ||
| 420 | 
        2/2✓ Branch 0 taken 1367 times. 
          ✓ Branch 1 taken 1 times. 
         | 
      1368 | for (int x = 0; x <= UNESCAPE_BUF_SIZE - 4;) { | 
| 421 | int gap, gap_msb; | ||
| 422 | 1367 | escaped1[x+0] = escaped0[x+0] = 0; | |
| 423 | 1367 | escaped1[x+1] = escaped0[x+1] = 0; | |
| 424 | 1367 | escaped1[x+2] = escaped0[x+2] = 3; | |
| 425 | 1367 | escaped1[x+3] = escaped0[x+3] = rnd() & 3; | |
| 426 | 1367 | gap_msb = 2u << (rnd() % 8); | |
| 427 | 1367 | gap = (rnd() &~ -gap_msb) | gap_msb; | |
| 428 | 1367 | x += gap; | |
| 429 | } | ||
| 430 | 
        6/8✓ Branch 5 taken 13107200 times. 
          ✓ Branch 6 taken 100 times. 
          ✓ Branch 14 taken 100 times. 
          ✗ Branch 15 not taken. 
          ✗ Branch 16 not taken. 
          ✓ Branch 17 taken 100 times. 
          ✓ Branch 20 taken 100 times. 
          ✓ Branch 21 taken 1 times. 
         | 
      13107301 | TEST_UNESCAPE; | 
| 431 | |||
| 432 | /* Test data which is known to contain no escape sequences */ | ||
| 433 | 1 | memset(escaped0, 0xFF, UNESCAPE_BUF_SIZE); | |
| 434 | 1 | memset(escaped1, 0xFF, UNESCAPE_BUF_SIZE); | |
| 435 | 
        6/8✓ Branch 5 taken 13107200 times. 
          ✓ Branch 6 taken 100 times. 
          ✓ Branch 14 taken 100 times. 
          ✗ Branch 15 not taken. 
          ✗ Branch 16 not taken. 
          ✓ Branch 17 taken 100 times. 
          ✓ Branch 20 taken 100 times. 
          ✓ Branch 21 taken 1 times. 
         | 
      13107301 | TEST_UNESCAPE; | 
| 436 | |||
| 437 | /* Benchmark the no-escape-sequences case */ | ||
| 438 | 
        1/8✗ Branch 1 not taken. 
          ✓ Branch 2 taken 1 times. 
          ✗ Branch 39 not taken. 
          ✗ Branch 40 not taken. 
          ✗ Branch 41 not taken. 
          ✗ Branch 42 not taken. 
          ✗ Branch 43 not taken. 
          ✗ Branch 44 not taken. 
         | 
      1 | bench_new(escaped1, UNESCAPE_BUF_SIZE, unescaped1); | 
| 439 | } | ||
| 440 | 13 | } | |
| 441 | |||
| 442 | 13 | static void check_mspel_pixels(void) | |
| 443 | { | ||
| 444 | 13 | LOCAL_ALIGNED_16(uint8_t, src0, [32 * 32]); | |
| 445 | 13 | LOCAL_ALIGNED_16(uint8_t, src1, [32 * 32]); | |
| 446 | 13 | LOCAL_ALIGNED_16(uint8_t, dst0, [32 * 32]); | |
| 447 | 13 | LOCAL_ALIGNED_16(uint8_t, dst1, [32 * 32]); | |
| 448 | |||
| 449 | VC1DSPContext h; | ||
| 450 | |||
| 451 | 13 | const test tests[] = { | |
| 452 | VC1DSP_SIZED_TEST(put_vc1_mspel_pixels_tab[0][0], 16, 16) | ||
| 453 | VC1DSP_SIZED_TEST(put_vc1_mspel_pixels_tab[1][0], 8, 8) | ||
| 454 | VC1DSP_SIZED_TEST(avg_vc1_mspel_pixels_tab[0][0], 16, 16) | ||
| 455 | VC1DSP_SIZED_TEST(avg_vc1_mspel_pixels_tab[1][0], 8, 8) | ||
| 456 | }; | ||
| 457 | |||
| 458 | 13 | ff_vc1dsp_init(&h); | |
| 459 | |||
| 460 | 
        2/2✓ Branch 0 taken 52 times. 
          ✓ Branch 1 taken 13 times. 
         | 
      65 | for (size_t t = 0; t < FF_ARRAY_ELEMS(tests); ++t) { | 
| 461 | 52 | void (*func)(uint8_t *, const uint8_t*, ptrdiff_t, int) = *(void **)((intptr_t) &h + tests[t].offset); | |
| 462 | 
        2/2✓ Branch 3 taken 8 times. 
          ✓ Branch 4 taken 44 times. 
         | 
      52 | if (check_func(func, "vc1dsp.%s", tests[t].name)) { | 
| 463 | 
        2/2✓ Branch 1 taken 4 times. 
          ✓ Branch 2 taken 4 times. 
         | 
      8 | declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, const uint8_t*, ptrdiff_t, int); | 
| 464 | 
        2/2✓ Branch 1 taken 8192 times. 
          ✓ Branch 2 taken 8 times. 
         | 
      8200 | RANDOMIZE_BUFFER8(dst, 32 * 32); | 
| 465 | 
        2/2✓ Branch 1 taken 8192 times. 
          ✓ Branch 2 taken 8 times. 
         | 
      8200 | RANDOMIZE_BUFFER8(src, 32 * 32); | 
| 466 | 8 | call_ref(dst0, src0, 32, 0); | |
| 467 | 8 | call_new(dst1, src1, 32, 0); | |
| 468 | 
        1/2✗ Branch 0 not taken. 
          ✓ Branch 1 taken 8 times. 
         | 
      8 | if (memcmp(dst0, dst1, 32 * 32)) { | 
| 469 | ✗ | fail(); | |
| 470 | } | ||
| 471 | 
        1/8✗ Branch 1 not taken. 
          ✓ Branch 2 taken 8 times. 
          ✗ Branch 39 not taken. 
          ✗ Branch 40 not taken. 
          ✗ Branch 41 not taken. 
          ✗ Branch 42 not taken. 
          ✗ Branch 43 not taken. 
          ✗ Branch 44 not taken. 
         | 
      8 | bench_new(dst1, src0, 32, 0); | 
| 472 | } | ||
| 473 | } | ||
| 474 | 13 | } | |
| 475 | |||
| 476 | 13 | void checkasm_check_vc1dsp(void) | |
| 477 | { | ||
| 478 | 13 | check_inv_trans_inplace(); | |
| 479 | 13 | check_inv_trans_adding(); | |
| 480 | 13 | report("inv_trans"); | |
| 481 | |||
| 482 | 13 | check_loop_filter(); | |
| 483 | 13 | report("loop_filter"); | |
| 484 | |||
| 485 | 13 | check_unescape(); | |
| 486 | 13 | report("unescape_buffer"); | |
| 487 | |||
| 488 | 13 | check_mspel_pixels(); | |
| 489 | 13 | report("mspel_pixels"); | |
| 490 | 13 | } | |
| 491 |