| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * | ||
| 3 | * This file is part of FFmpeg. | ||
| 4 | * | ||
| 5 | * FFmpeg is free software; you can redistribute it and/or modify | ||
| 6 | * it under the terms of the GNU General Public License as published by | ||
| 7 | * the Free Software Foundation; either version 2 of the License, or | ||
| 8 | * (at your option) any later version. | ||
| 9 | * | ||
| 10 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 13 | * GNU General Public License for more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License along | ||
| 16 | * with FFmpeg; if not, write to the Free Software Foundation, Inc., | ||
| 17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||
| 18 | */ | ||
| 19 | |||
| 20 | #include <string.h> | ||
| 21 | |||
| 22 | #include "libavutil/common.h" | ||
| 23 | #include "libavutil/intreadwrite.h" | ||
| 24 | #include "libavutil/mem.h" | ||
| 25 | #include "libavutil/mem_internal.h" | ||
| 26 | |||
| 27 | #include "libswscale/swscale.h" | ||
| 28 | #include "libswscale/swscale_internal.h" | ||
| 29 | |||
| 30 | #include "checkasm.h" | ||
| 31 | |||
| 32 | #define randomize_buffers(buf, size) \ | ||
| 33 | do { \ | ||
| 34 | int j; \ | ||
| 35 | for (j = 0; j < size; j+=4) \ | ||
| 36 | AV_WN32(buf + j, rnd()); \ | ||
| 37 | } while (0) | ||
| 38 | |||
| 39 | 608 | static void yuv2planeX_8_ref(const int16_t *filter, int filterSize, | |
| 40 | const int16_t **src, uint8_t *dest, int dstW, | ||
| 41 | const uint8_t *dither, int offset) | ||
| 42 | { | ||
| 43 | // This corresponds to the yuv2planeX_8_c function | ||
| 44 | int i; | ||
| 45 |
2/2✓ Branch 0 taken 122112 times.
✓ Branch 1 taken 608 times.
|
122720 | for (i = 0; i < dstW; i++) { |
| 46 | 122112 | int val = dither[(i + offset) & 7] << 12; | |
| 47 | int j; | ||
| 48 |
2/2✓ Branch 0 taken 915840 times.
✓ Branch 1 taken 122112 times.
|
1037952 | for (j = 0; j < filterSize; j++) |
| 49 | 915840 | val += src[j][i] * filter[j]; | |
| 50 | |||
| 51 | 122112 | dest[i]= av_clip_uint8(val >> 19); | |
| 52 | } | ||
| 53 | 608 | } | |
| 54 | |||
| 55 | #define CMP_FUNC(bits) \ | ||
| 56 | static int cmp_off_by_n_##bits(const uint##bits##_t *ref, const uint##bits##_t *test, \ | ||
| 57 | size_t n, int accuracy) \ | ||
| 58 | { \ | ||
| 59 | for (size_t i = 0; i < n; i++) { \ | ||
| 60 | if (abs((int)ref[i] - (int)test[i]) > accuracy) \ | ||
| 61 | return 1; \ | ||
| 62 | } \ | ||
| 63 | return 0; \ | ||
| 64 | } | ||
| 65 | |||
| 66 |
3/4✗ Branch 0 not taken.
✓ Branch 1 taken 362752 times.
✓ Branch 2 taken 362752 times.
✓ Branch 3 taken 824 times.
|
363576 | CMP_FUNC(8) |
| 67 |
3/4✗ Branch 0 not taken.
✓ Branch 1 taken 1089536 times.
✓ Branch 2 taken 1089536 times.
✓ Branch 3 taken 2128 times.
|
1091664 | CMP_FUNC(16) |
| 68 | |||
| 69 | #define SHOW_DIFF_FUNC(bits) \ | ||
| 70 | static void print_data_##bits(const uint##bits##_t *p, size_t len, size_t offset) \ | ||
| 71 | { \ | ||
| 72 | size_t i = 0; \ | ||
| 73 | for (; i < len; i++) { \ | ||
| 74 | if (i % 8 == 0) { \ | ||
| 75 | printf("0x%04zx: ", i+offset); \ | ||
| 76 | } \ | ||
| 77 | printf("0x%02x ", (uint32_t) p[i]); \ | ||
| 78 | if (i % 8 == 7) { \ | ||
| 79 | printf("\n"); \ | ||
| 80 | } \ | ||
| 81 | } \ | ||
| 82 | if (i % 8 != 0) { \ | ||
| 83 | printf("\n"); \ | ||
| 84 | } \ | ||
| 85 | } \ | ||
| 86 | static size_t show_differences_##bits(const uint##bits##_t *a, const uint##bits##_t *b, \ | ||
| 87 | size_t len) \ | ||
| 88 | { \ | ||
| 89 | for (size_t i = 0; i < len; i++) { \ | ||
| 90 | if (a[i] != b[i]) { \ | ||
| 91 | size_t offset_of_mismatch = i; \ | ||
| 92 | size_t offset; \ | ||
| 93 | if (i >= 8) i-=8; \ | ||
| 94 | offset = i & (~7); \ | ||
| 95 | printf("test a:\n"); \ | ||
| 96 | print_data_##bits(&a[offset], 32, offset); \ | ||
| 97 | printf("\ntest b:\n"); \ | ||
| 98 | print_data_##bits(&b[offset], 32, offset); \ | ||
| 99 | printf("\n"); \ | ||
| 100 | return offset_of_mismatch; \ | ||
| 101 | } \ | ||
| 102 | } \ | ||
| 103 | return len; \ | ||
| 104 | } | ||
| 105 | |||
| 106 | ✗ | SHOW_DIFF_FUNC(8) | |
| 107 | ✗ | SHOW_DIFF_FUNC(16) | |
| 108 | |||
| 109 | 26 | static void check_yuv2yuv1(int accurate) | |
| 110 | { | ||
| 111 | SwsContext *sws; | ||
| 112 | SwsInternal *c; | ||
| 113 | int osi, isi; | ||
| 114 | int dstW, offset; | ||
| 115 | size_t fail_offset; | ||
| 116 | 26 | const int input_sizes[] = {8, 24, 128, 144, 256, 512}; | |
| 117 | #define LARGEST_INPUT_SIZE 512 | ||
| 118 | |||
| 119 | 26 | const int offsets[] = {0, 3, 8, 11, 16, 19}; | |
| 120 | 26 | const int OFFSET_SIZES = sizeof(offsets)/sizeof(offsets[0]); | |
| 121 |
2/2✓ Branch 0 taken 13 times.
✓ Branch 1 taken 13 times.
|
26 | const char *accurate_str = (accurate) ? "accurate" : "approximate"; |
| 122 | |||
| 123 | 26 | declare_func(void, | |
| 124 | const int16_t *src, uint8_t *dest, | ||
| 125 | int dstW, const uint8_t *dither, int offset); | ||
| 126 | |||
| 127 | 26 | LOCAL_ALIGNED_16(int16_t, src_pixels, [LARGEST_INPUT_SIZE]); | |
| 128 | 26 | LOCAL_ALIGNED_16(uint8_t, dst0, [LARGEST_INPUT_SIZE]); | |
| 129 | 26 | LOCAL_ALIGNED_16(uint8_t, dst1, [LARGEST_INPUT_SIZE]); | |
| 130 | 26 | LOCAL_ALIGNED_8(uint8_t, dither, [8]); | |
| 131 | |||
| 132 |
2/2✓ Branch 1 taken 52 times.
✓ Branch 2 taken 26 times.
|
78 | randomize_buffers((uint8_t*)dither, 8); |
| 133 |
2/2✓ Branch 1 taken 6656 times.
✓ Branch 2 taken 26 times.
|
6682 | randomize_buffers((uint8_t*)src_pixels, LARGEST_INPUT_SIZE * sizeof(int16_t)); |
| 134 | 26 | sws = sws_alloc_context(); | |
| 135 |
2/2✓ Branch 0 taken 13 times.
✓ Branch 1 taken 13 times.
|
26 | if (accurate) |
| 136 | 13 | sws->flags |= SWS_ACCURATE_RND; | |
| 137 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 26 times.
|
26 | if (sws_init_context(sws, NULL, NULL) < 0) |
| 138 | ✗ | fail(); | |
| 139 | |||
| 140 | 26 | c = sws_internal(sws); | |
| 141 | 26 | ff_sws_init_scale(c); | |
| 142 |
2/2✓ Branch 0 taken 156 times.
✓ Branch 1 taken 26 times.
|
182 | for (isi = 0; isi < FF_ARRAY_ELEMS(input_sizes); ++isi) { |
| 143 | 156 | dstW = input_sizes[isi]; | |
| 144 |
2/2✓ Branch 0 taken 936 times.
✓ Branch 1 taken 156 times.
|
1092 | for (osi = 0; osi < OFFSET_SIZES; osi++) { |
| 145 | 936 | offset = offsets[osi]; | |
| 146 |
2/2✓ Branch 3 taken 144 times.
✓ Branch 4 taken 792 times.
|
936 | if (check_func(c->yuv2plane1, "yuv2yuv1_%d_%d_%s", offset, dstW, accurate_str)){ |
| 147 | 144 | memset(dst0, 0, LARGEST_INPUT_SIZE * sizeof(dst0[0])); | |
| 148 | 144 | memset(dst1, 0, LARGEST_INPUT_SIZE * sizeof(dst1[0])); | |
| 149 | |||
| 150 | 144 | call_ref(src_pixels, dst0, dstW, dither, offset); | |
| 151 | 144 | call_new(src_pixels, dst1, dstW, dither, offset); | |
| 152 |
3/4✓ Branch 0 taken 36 times.
✓ Branch 1 taken 108 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 144 times.
|
144 | if (cmp_off_by_n_8(dst0, dst1, dstW * sizeof(dst0[0]), accurate ? 0 : 2)) { |
| 153 | ✗ | fail(); | |
| 154 | ✗ | printf("failed: yuv2yuv1_%d_%di_%s\n", offset, dstW, accurate_str); | |
| 155 | ✗ | fail_offset = show_differences_8(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0])); | |
| 156 | ✗ | printf("failing values: src: 0x%04x dither: 0x%02x dst-c: %02x dst-asm: %02x\n", | |
| 157 | ✗ | (int) src_pixels[fail_offset], | |
| 158 | ✗ | (int) dither[(fail_offset + fail_offset) & 7], | |
| 159 | ✗ | (int) dst0[fail_offset], | |
| 160 | ✗ | (int) dst1[fail_offset]); | |
| 161 | } | ||
| 162 |
2/2✓ Branch 0 taken 24 times.
✓ Branch 1 taken 120 times.
|
144 | if (dstW == LARGEST_INPUT_SIZE) |
| 163 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 24 times.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
|
24 | bench_new(src_pixels, dst1, dstW, dither, offset); |
| 164 | } | ||
| 165 | } | ||
| 166 | } | ||
| 167 | 26 | sws_freeContext(sws); | |
| 168 | 26 | } | |
| 169 | |||
| 170 | 234 | static void check_yuv2yuvX(int accurate, int bit_depth, int dst_pix_format) | |
| 171 | { | ||
| 172 | SwsContext *sws; | ||
| 173 | SwsInternal *c; | ||
| 174 | int fsi, osi, isi, i, j; | ||
| 175 | int dstW; | ||
| 176 | #define LARGEST_FILTER 16 | ||
| 177 | // ff_yuv2planeX_8_sse2 can't handle odd filter sizes | ||
| 178 | 234 | const int filter_sizes[] = {2, 4, 8, 16}; | |
| 179 | 234 | const int FILTER_SIZES = sizeof(filter_sizes)/sizeof(filter_sizes[0]); | |
| 180 | #define LARGEST_INPUT_SIZE 512 | ||
| 181 | static const int input_sizes[] = {8, 24, 128, 144, 256, 512}; | ||
| 182 |
2/2✓ Branch 0 taken 117 times.
✓ Branch 1 taken 117 times.
|
234 | const char *accurate_str = (accurate) ? "accurate" : "approximate"; |
| 183 | |||
| 184 |
2/2✓ Branch 1 taken 216 times.
✓ Branch 2 taken 18 times.
|
234 | declare_func_emms(AV_CPU_FLAG_MMX, void, const int16_t *filter, |
| 185 | int filterSize, const int16_t **src, uint8_t *dest, | ||
| 186 | int dstW, const uint8_t *dither, int offset); | ||
| 187 | |||
| 188 | const int16_t **src; | ||
| 189 | 234 | LOCAL_ALIGNED_16(int16_t, src_pixels, [LARGEST_FILTER * LARGEST_INPUT_SIZE]); | |
| 190 | 234 | LOCAL_ALIGNED_16(int16_t, filter_coeff, [LARGEST_FILTER]); | |
| 191 | 234 | LOCAL_ALIGNED_16(uint16_t, dst0, [LARGEST_INPUT_SIZE]); | |
| 192 | 234 | LOCAL_ALIGNED_16(uint16_t, dst1, [LARGEST_INPUT_SIZE]); | |
| 193 | 234 | LOCAL_ALIGNED_16(uint8_t, dither, [LARGEST_INPUT_SIZE]); | |
| 194 | union VFilterData{ | ||
| 195 | const int16_t *src; | ||
| 196 | uint16_t coeff[8]; | ||
| 197 | } *vFilterData; | ||
| 198 | 234 | uint8_t d_val = rnd(); | |
| 199 | 234 | memset(dither, d_val, LARGEST_INPUT_SIZE); | |
| 200 |
2/2✓ Branch 1 taken 958464 times.
✓ Branch 2 taken 234 times.
|
958698 | randomize_buffers((uint8_t*)src_pixels, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int16_t)); |
| 201 | 234 | sws = sws_alloc_context(); | |
| 202 | 234 | sws->dst_format = dst_pix_format; | |
| 203 |
2/2✓ Branch 0 taken 117 times.
✓ Branch 1 taken 117 times.
|
234 | if (accurate) |
| 204 | 117 | sws->flags |= SWS_ACCURATE_RND; | |
| 205 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 234 times.
|
234 | if (sws_init_context(sws, NULL, NULL) < 0) |
| 206 | ✗ | fail(); | |
| 207 | |||
| 208 | 234 | c = sws_internal(sws); | |
| 209 | 234 | c->dstBpc = bit_depth; | |
| 210 | 234 | ff_sws_init_scale(c); | |
| 211 |
2/2✓ Branch 0 taken 1404 times.
✓ Branch 1 taken 234 times.
|
1638 | for(isi = 0; isi < FF_ARRAY_ELEMS(input_sizes); ++isi){ |
| 212 | 1404 | dstW = input_sizes[isi]; | |
| 213 |
2/2✓ Branch 0 taken 5616 times.
✓ Branch 1 taken 1404 times.
|
7020 | for(osi = 0; osi < 64; osi += 16){ |
| 214 |
2/2✓ Branch 0 taken 1170 times.
✓ Branch 1 taken 4446 times.
|
5616 | if (dstW <= osi) |
| 215 | 1170 | continue; | |
| 216 |
2/2✓ Branch 0 taken 17784 times.
✓ Branch 1 taken 4446 times.
|
22230 | for (fsi = 0; fsi < FILTER_SIZES; ++fsi) { |
| 217 | // Generate filter coefficients for the given filter size, | ||
| 218 | // with some properties: | ||
| 219 | // - The coefficients add up to the intended sum (4096, 1<<12) | ||
| 220 | // - The coefficients contain negative values | ||
| 221 | // - The filter intermediates don't overflow for worst case | ||
| 222 | // inputs (all positive coefficients are coupled with | ||
| 223 | // input_max and all negative coefficients with input_min, | ||
| 224 | // or vice versa). | ||
| 225 | // Produce a filter with all coefficients set to | ||
| 226 | // -((1<<12)/(filter_size-1)) except for one (randomly chosen) | ||
| 227 | // which is set to ((1<<13)-1). | ||
| 228 |
2/2✓ Branch 0 taken 133380 times.
✓ Branch 1 taken 17784 times.
|
151164 | for (i = 0; i < filter_sizes[fsi]; ++i) |
| 229 | 133380 | filter_coeff[i] = -((1 << 12) / (filter_sizes[fsi] - 1)); | |
| 230 | 17784 | filter_coeff[rnd() % filter_sizes[fsi]] = (1 << 13) - 1; | |
| 231 | |||
| 232 | 17784 | src = av_malloc(sizeof(int16_t*) * filter_sizes[fsi]); | |
| 233 | 17784 | vFilterData = av_malloc((filter_sizes[fsi] + 2) * sizeof(union VFilterData)); | |
| 234 | 17784 | memset(vFilterData, 0, (filter_sizes[fsi] + 2) * sizeof(union VFilterData)); | |
| 235 |
2/2✓ Branch 0 taken 133380 times.
✓ Branch 1 taken 17784 times.
|
151164 | for (i = 0; i < filter_sizes[fsi]; ++i) { |
| 236 | 133380 | src[i] = &src_pixels[i * LARGEST_INPUT_SIZE]; | |
| 237 | 133380 | vFilterData[i].src = src[i] - osi; | |
| 238 |
2/2✓ Branch 0 taken 533520 times.
✓ Branch 1 taken 133380 times.
|
666900 | for(j = 0; j < 4; ++j) |
| 239 | 533520 | vFilterData[i].coeff[j + 4] = filter_coeff[i]; | |
| 240 | } | ||
| 241 |
6/6✓ Branch 2 taken 15808 times.
✓ Branch 3 taken 1976 times.
✓ Branch 5 taken 7904 times.
✓ Branch 6 taken 7904 times.
✓ Branch 8 taken 2736 times.
✓ Branch 9 taken 15048 times.
|
17784 | if (check_func(c->yuv2planeX, "yuv2yuvX_%d%s_%d_%d_%d_%s", bit_depth, (bit_depth == 8) ? "" : (isBE(dst_pix_format) ? "BE" : "LE"), filter_sizes[fsi], osi, dstW, accurate_str)) { |
| 242 | // use vFilterData for the mmx function | ||
| 243 |
2/2✓ Branch 0 taken 228 times.
✓ Branch 1 taken 2508 times.
|
2736 | const int16_t *filter = c->use_mmx_vfilter ? (const int16_t*)vFilterData : &filter_coeff[0]; |
| 244 | 2736 | memset(dst0, 0, LARGEST_INPUT_SIZE * sizeof(dst0[0])); | |
| 245 | 2736 | memset(dst1, 0, LARGEST_INPUT_SIZE * sizeof(dst1[0])); | |
| 246 | |||
| 247 |
2/2✓ Branch 0 taken 608 times.
✓ Branch 1 taken 2128 times.
|
2736 | if (c->dstBpc == 8) { |
| 248 | // We can't use call_ref here, because we don't know if use_mmx_vfilter was set for that | ||
| 249 | // function or not, so we can't pass it the parameters correctly. | ||
| 250 | |||
| 251 | 608 | yuv2planeX_8_ref(&filter_coeff[0], filter_sizes[fsi], src, (uint8_t*)dst0, dstW - osi, dither, osi); | |
| 252 | 608 | call_new(filter, filter_sizes[fsi], src, (uint8_t*)dst1, dstW - osi, dither, osi); | |
| 253 | |||
| 254 |
3/4✓ Branch 0 taken 304 times.
✓ Branch 1 taken 304 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 608 times.
|
608 | if (cmp_off_by_n_8((uint8_t*)dst0, (uint8_t*)dst1, LARGEST_INPUT_SIZE, accurate ? 0 : 2)) { |
| 255 | ✗ | fail(); | |
| 256 | ✗ | printf("failed: yuv2yuvX_%d_%d_%d_%d_%s\n", bit_depth, filter_sizes[fsi], osi, dstW, accurate_str); | |
| 257 | ✗ | show_differences_8((uint8_t*)dst0, (uint8_t*)dst1, LARGEST_INPUT_SIZE); | |
| 258 | } | ||
| 259 | } else { | ||
| 260 | 2128 | call_ref(&filter_coeff[0], filter_sizes[fsi], src, (uint8_t*)dst0, dstW - osi, dither, osi); | |
| 261 | 2128 | call_new(&filter_coeff[0], filter_sizes[fsi], src, (uint8_t*)dst1, dstW - osi, dither, osi); | |
| 262 | |||
| 263 |
3/4✓ Branch 0 taken 1064 times.
✓ Branch 1 taken 1064 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2128 times.
|
2128 | if (cmp_off_by_n_16(dst0, dst1, LARGEST_INPUT_SIZE, accurate ? 0 : 2)) { |
| 264 | ✗ | fail(); | |
| 265 | ✗ | printf("failed: yuv2yuvX_%d%s_%d_%d_%d_%s\n", bit_depth, isBE(dst_pix_format) ? "BE" : "LE", filter_sizes[fsi], osi, dstW, accurate_str); | |
| 266 | ✗ | show_differences_16(dst0, dst1, LARGEST_INPUT_SIZE); | |
| 267 | } | ||
| 268 | } | ||
| 269 |
2/2✓ Branch 0 taken 576 times.
✓ Branch 1 taken 2160 times.
|
2736 | if (dstW == LARGEST_INPUT_SIZE) |
| 270 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 576 times.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
|
576 | bench_new(filter, filter_sizes[fsi], src, (uint8_t*)dst1, dstW - osi, dither, osi); |
| 271 | |||
| 272 | } | ||
| 273 | 17784 | av_freep(&src); | |
| 274 | 17784 | av_freep(&vFilterData); | |
| 275 | } | ||
| 276 | } | ||
| 277 | } | ||
| 278 | 234 | sws_freeContext(sws); | |
| 279 | #undef FILTER_SIZES | ||
| 280 | 234 | } | |
| 281 | |||
| 282 | 26 | static void check_yuv2nv12cX(int accurate) | |
| 283 | { | ||
| 284 | SwsContext *sws; | ||
| 285 | SwsInternal *c; | ||
| 286 | #define LARGEST_FILTER 16 | ||
| 287 | 26 | const int filter_sizes[] = {2, 4, 8, 16}; | |
| 288 | #define LARGEST_INPUT_SIZE 512 | ||
| 289 | static const int input_sizes[] = {8, 24, 128, 144, 256, 512}; | ||
| 290 |
2/2✓ Branch 0 taken 13 times.
✓ Branch 1 taken 13 times.
|
26 | const char *accurate_str = (accurate) ? "accurate" : "approximate"; |
| 291 | |||
| 292 |
2/2✓ Branch 1 taken 24 times.
✓ Branch 2 taken 2 times.
|
26 | declare_func_emms(AV_CPU_FLAG_MMX, void, enum AVPixelFormat dstFormat, |
| 293 | const uint8_t *chrDither, const int16_t *chrFilter, | ||
| 294 | int chrFilterSize, const int16_t **chrUSrc, | ||
| 295 | const int16_t **chrVSrc, uint8_t *dest, int dstW); | ||
| 296 | |||
| 297 | const int16_t *srcU[LARGEST_FILTER], *srcV[LARGEST_FILTER]; | ||
| 298 | 26 | LOCAL_ALIGNED_16(int16_t, srcU_pixels, [LARGEST_FILTER * LARGEST_INPUT_SIZE]); | |
| 299 | 26 | LOCAL_ALIGNED_16(int16_t, srcV_pixels, [LARGEST_FILTER * LARGEST_INPUT_SIZE]); | |
| 300 | 26 | LOCAL_ALIGNED_16(int16_t, filter_coeff, [LARGEST_FILTER]); | |
| 301 | 26 | LOCAL_ALIGNED_16(uint8_t, dst0, [LARGEST_INPUT_SIZE * 2]); | |
| 302 | 26 | LOCAL_ALIGNED_16(uint8_t, dst1, [LARGEST_INPUT_SIZE * 2]); | |
| 303 | 26 | LOCAL_ALIGNED_16(uint8_t, dither, [LARGEST_INPUT_SIZE]); | |
| 304 | 26 | uint8_t d_val = rnd(); | |
| 305 | 26 | memset(dither, d_val, LARGEST_INPUT_SIZE); | |
| 306 |
2/2✓ Branch 1 taken 106496 times.
✓ Branch 2 taken 26 times.
|
106522 | randomize_buffers((uint8_t*)srcU_pixels, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int16_t)); |
| 307 |
2/2✓ Branch 1 taken 106496 times.
✓ Branch 2 taken 26 times.
|
106522 | randomize_buffers((uint8_t*)srcV_pixels, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int16_t)); |
| 308 |
2/2✓ Branch 0 taken 416 times.
✓ Branch 1 taken 26 times.
|
442 | for (int i = 0; i < LARGEST_FILTER; i++) { |
| 309 | 416 | srcU[i] = &srcU_pixels[i * LARGEST_INPUT_SIZE]; | |
| 310 | 416 | srcV[i] = &srcV_pixels[i * LARGEST_INPUT_SIZE]; | |
| 311 | } | ||
| 312 | |||
| 313 | 26 | sws = sws_alloc_context(); | |
| 314 | 26 | sws->dst_format = AV_PIX_FMT_NV12; | |
| 315 |
2/2✓ Branch 0 taken 13 times.
✓ Branch 1 taken 13 times.
|
26 | if (accurate) |
| 316 | 13 | sws->flags |= SWS_ACCURATE_RND; | |
| 317 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 26 times.
|
26 | if (sws_init_context(sws, NULL, NULL) < 0) |
| 318 | ✗ | fail(); | |
| 319 | |||
| 320 | 26 | c = sws_internal(sws); | |
| 321 | 26 | ff_sws_init_scale(c); | |
| 322 |
2/2✓ Branch 0 taken 156 times.
✓ Branch 1 taken 26 times.
|
182 | for (int isi = 0; isi < FF_ARRAY_ELEMS(input_sizes); isi++){ |
| 323 | 156 | const int dstW = input_sizes[isi]; | |
| 324 |
2/2✓ Branch 0 taken 624 times.
✓ Branch 1 taken 156 times.
|
780 | for (int fsi = 0; fsi < FF_ARRAY_ELEMS(filter_sizes); fsi++) { |
| 325 | 624 | const int filter_size = filter_sizes[fsi]; | |
| 326 |
2/2✓ Branch 0 taken 4680 times.
✓ Branch 1 taken 624 times.
|
5304 | for (int i = 0; i < filter_size; i++) |
| 327 | 4680 | filter_coeff[i] = -((1 << 12) / (filter_size - 1)); | |
| 328 | 624 | filter_coeff[rnd() % filter_size] = (1 << 13) - 1; | |
| 329 | |||
| 330 |
2/2✓ Branch 3 taken 72 times.
✓ Branch 4 taken 552 times.
|
624 | if (check_func(c->yuv2nv12cX, "yuv2nv12cX_%d_%d_%s", filter_size, dstW, accurate_str)){ |
| 331 | 72 | memset(dst0, 0, LARGEST_INPUT_SIZE * sizeof(dst0[0])); | |
| 332 | 72 | memset(dst1, 0, LARGEST_INPUT_SIZE * sizeof(dst1[0])); | |
| 333 | |||
| 334 | 72 | call_ref(sws->dst_format, dither, &filter_coeff[0], filter_size, srcU, srcV, dst0, dstW); | |
| 335 | 72 | call_new(sws->dst_format, dither, &filter_coeff[0], filter_size, srcU, srcV, dst1, dstW); | |
| 336 | |||
| 337 |
3/4✓ Branch 0 taken 24 times.
✓ Branch 1 taken 48 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 72 times.
|
72 | if (cmp_off_by_n_8(dst0, dst1, dstW * 2 * sizeof(dst0[0]), accurate ? 0 : 2)) { |
| 338 | ✗ | fail(); | |
| 339 | ✗ | printf("failed: yuv2nv12wX_%d_%d_%s\n", filter_size, dstW, accurate_str); | |
| 340 | ✗ | show_differences_8(dst0, dst1, dstW * 2 * sizeof(dst0[0])); | |
| 341 | } | ||
| 342 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 60 times.
|
72 | if (dstW == LARGEST_INPUT_SIZE) |
| 343 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 12 times.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
|
12 | bench_new(sws->dst_format, dither, &filter_coeff[0], filter_size, srcU, srcV, dst1, dstW); |
| 344 | |||
| 345 | } | ||
| 346 | } | ||
| 347 | } | ||
| 348 | 26 | sws_freeContext(sws); | |
| 349 | 26 | } | |
| 350 | #undef LARGEST_FILTER | ||
| 351 | #undef LARGEST_INPUT_SIZE | ||
| 352 | |||
| 353 | #undef SRC_PIXELS | ||
| 354 | #define SRC_PIXELS 512 | ||
| 355 | |||
| 356 | 13 | static void check_hscale(void) | |
| 357 | { | ||
| 358 | #define MAX_FILTER_WIDTH 40 | ||
| 359 | #define FILTER_SIZES 6 | ||
| 360 | static const int filter_sizes[FILTER_SIZES] = { 4, 8, 12, 16, 32, 40 }; | ||
| 361 | |||
| 362 | #define HSCALE_PAIRS 2 | ||
| 363 | static const int hscale_pairs[HSCALE_PAIRS][2] = { | ||
| 364 | { 8, 14 }, | ||
| 365 | { 8, 18 }, | ||
| 366 | }; | ||
| 367 | |||
| 368 | #define LARGEST_INPUT_SIZE 512 | ||
| 369 | static const int input_sizes[] = {8, 24, 128, 144, 256, 512}; | ||
| 370 | |||
| 371 | int i, j, fsi, hpi, width, dstWi; | ||
| 372 | SwsContext *sws; | ||
| 373 | SwsInternal *c; | ||
| 374 | |||
| 375 | // padded | ||
| 376 | 13 | LOCAL_ALIGNED_32(uint8_t, src, [FFALIGN(SRC_PIXELS + MAX_FILTER_WIDTH - 1, 4)]); | |
| 377 | 13 | LOCAL_ALIGNED_32(uint32_t, dst0, [SRC_PIXELS]); | |
| 378 | 13 | LOCAL_ALIGNED_32(uint32_t, dst1, [SRC_PIXELS]); | |
| 379 | |||
| 380 | // padded | ||
| 381 | 13 | LOCAL_ALIGNED_32(int16_t, filter, [SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH]); | |
| 382 | 13 | LOCAL_ALIGNED_32(int32_t, filterPos, [SRC_PIXELS]); | |
| 383 | 13 | LOCAL_ALIGNED_32(int16_t, filterAvx2, [SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH]); | |
| 384 | 13 | LOCAL_ALIGNED_32(int32_t, filterPosAvx, [SRC_PIXELS]); | |
| 385 | |||
| 386 | // The dst parameter here is either int16_t or int32_t but we use void* to | ||
| 387 | // just cover both cases. | ||
| 388 | 13 | declare_func(void, SwsInternal *c, int16_t *dst, int dstW, | |
| 389 | const uint8_t *src, const int16_t *filter, | ||
| 390 | const int32_t *filterPos, int filterSize); | ||
| 391 | |||
| 392 | 13 | sws = sws_alloc_context(); | |
| 393 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 13 times.
|
13 | if (sws_init_context(sws, NULL, NULL) < 0) |
| 394 | ✗ | fail(); | |
| 395 | |||
| 396 | 13 | c = sws_internal(sws); | |
| 397 |
2/2✓ Branch 1 taken 1794 times.
✓ Branch 2 taken 13 times.
|
1807 | randomize_buffers(src, SRC_PIXELS + MAX_FILTER_WIDTH - 1); |
| 398 | |||
| 399 |
2/2✓ Branch 0 taken 26 times.
✓ Branch 1 taken 13 times.
|
39 | for (hpi = 0; hpi < HSCALE_PAIRS; hpi++) { |
| 400 |
2/2✓ Branch 0 taken 156 times.
✓ Branch 1 taken 26 times.
|
182 | for (fsi = 0; fsi < FILTER_SIZES; fsi++) { |
| 401 |
2/2✓ Branch 0 taken 936 times.
✓ Branch 1 taken 156 times.
|
1092 | for (dstWi = 0; dstWi < FF_ARRAY_ELEMS(input_sizes); dstWi++) { |
| 402 | 936 | width = filter_sizes[fsi]; | |
| 403 | |||
| 404 | 936 | c->srcBpc = hscale_pairs[hpi][0]; | |
| 405 | 936 | c->dstBpc = hscale_pairs[hpi][1]; | |
| 406 | 936 | c->hLumFilterSize = c->hChrFilterSize = width; | |
| 407 | |||
| 408 |
2/2✓ Branch 0 taken 479232 times.
✓ Branch 1 taken 936 times.
|
480168 | for (i = 0; i < SRC_PIXELS; i++) { |
| 409 | 479232 | filterPos[i] = i; | |
| 410 | 479232 | filterPosAvx[i] = i; | |
| 411 | |||
| 412 | // These filter coefficients are chosen to try break two corner | ||
| 413 | // cases, namely: | ||
| 414 | // | ||
| 415 | // - Negative filter coefficients. The filters output signed | ||
| 416 | // values, and it should be possible to end up with negative | ||
| 417 | // output values. | ||
| 418 | // | ||
| 419 | // - Positive clipping. The hscale filter function has clipping | ||
| 420 | // at (1<<15) - 1 | ||
| 421 | // | ||
| 422 | // The coefficients sum to the 1.0 point for the hscale | ||
| 423 | // functions (1 << 14). | ||
| 424 | |||
| 425 |
2/2✓ Branch 0 taken 8945664 times.
✓ Branch 1 taken 479232 times.
|
9424896 | for (j = 0; j < width; j++) { |
| 426 | 8945664 | filter[i * width + j] = -((1 << 14) / (width - 1)); | |
| 427 | } | ||
| 428 | 479232 | filter[i * width + (rnd() % width)] = ((1 << 15) - 1); | |
| 429 | } | ||
| 430 | |||
| 431 |
2/2✓ Branch 0 taken 37440 times.
✓ Branch 1 taken 936 times.
|
38376 | for (i = 0; i < MAX_FILTER_WIDTH; i++) { |
| 432 | // These values should be unused in SIMD implementations but | ||
| 433 | // may still be read, random coefficients here should help show | ||
| 434 | // issues where they are used in error. | ||
| 435 | |||
| 436 | 37440 | filter[SRC_PIXELS * width + i] = rnd(); | |
| 437 | } | ||
| 438 | 936 | sws->dst_w = c->chrDstW = input_sizes[dstWi]; | |
| 439 | 936 | ff_sws_init_scale(c); | |
| 440 | 936 | memcpy(filterAvx2, filter, sizeof(uint16_t) * (SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH)); | |
| 441 | 936 | ff_shuffle_filter_coefficients(c, filterPosAvx, width, filterAvx2, sws->dst_w); | |
| 442 | |||
| 443 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 936 times.
|
936 | av_assert0(c->hyScale == c->hcScale); |
| 444 |
2/2✓ Branch 3 taken 288 times.
✓ Branch 4 taken 648 times.
|
936 | if (check_func(c->hcScale, "hscale_%d_to_%d__fs_%d_dstW_%d", c->srcBpc, c->dstBpc + 1, width, sws->dst_w)) { |
| 445 | 288 | memset(dst0, 0, SRC_PIXELS * sizeof(dst0[0])); | |
| 446 | 288 | memset(dst1, 0, SRC_PIXELS * sizeof(dst1[0])); | |
| 447 | |||
| 448 | 288 | call_ref(NULL, (int16_t *)dst0, sws->dst_w, src, filter, filterPos, width); | |
| 449 | 288 | call_new(NULL, (int16_t *)dst1, sws->dst_w, src, filterAvx2, filterPosAvx, width); | |
| 450 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 288 times.
|
288 | if (memcmp(dst0, dst1, sws->dst_w * sizeof(dst0[0]))) |
| 451 | ✗ | fail(); | |
| 452 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 288 times.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
|
288 | bench_new(NULL, (int16_t *)dst0, sws->dst_w, src, filter, filterPosAvx, width); |
| 453 | } | ||
| 454 | } | ||
| 455 | } | ||
| 456 | } | ||
| 457 | 13 | sws_freeContext(sws); | |
| 458 | 13 | } | |
| 459 | |||
| 460 | 13 | void checkasm_check_sw_scale(void) | |
| 461 | { | ||
| 462 | 13 | check_hscale(); | |
| 463 | 13 | report("hscale"); | |
| 464 | 13 | check_yuv2yuv1(0); | |
| 465 | 13 | check_yuv2yuv1(1); | |
| 466 | 13 | report("yuv2yuv1"); | |
| 467 | 13 | check_yuv2yuvX(0, 8, AV_PIX_FMT_YUV420P); | |
| 468 | 13 | check_yuv2yuvX(1, 8, AV_PIX_FMT_YUV420P); | |
| 469 | 13 | report("yuv2yuvX_8"); | |
| 470 | 13 | check_yuv2yuvX(0, 9, AV_PIX_FMT_YUV420P9LE); | |
| 471 | 13 | check_yuv2yuvX(1, 9, AV_PIX_FMT_YUV420P9LE); | |
| 472 | 13 | report("yuv2yuvX_9LE"); | |
| 473 | 13 | check_yuv2yuvX(0, 9, AV_PIX_FMT_YUV420P9BE); | |
| 474 | 13 | check_yuv2yuvX(1, 9, AV_PIX_FMT_YUV420P9BE); | |
| 475 | 13 | report("yuv2yuvX_9BE"); | |
| 476 | 13 | check_yuv2yuvX(0, 10, AV_PIX_FMT_YUV420P10LE); | |
| 477 | 13 | check_yuv2yuvX(1, 10, AV_PIX_FMT_YUV420P10LE); | |
| 478 | 13 | report("yuv2yuvX_10LE"); | |
| 479 | 13 | check_yuv2yuvX(0, 10, AV_PIX_FMT_YUV420P10BE); | |
| 480 | 13 | check_yuv2yuvX(1, 10, AV_PIX_FMT_YUV420P10BE); | |
| 481 | 13 | report("yuv2yuvX_10BE"); | |
| 482 | 13 | check_yuv2yuvX(0, 12, AV_PIX_FMT_YUV420P12LE); | |
| 483 | 13 | check_yuv2yuvX(1, 12, AV_PIX_FMT_YUV420P12LE); | |
| 484 | 13 | report("yuv2yuvX_12LE"); | |
| 485 | 13 | check_yuv2yuvX(0, 12, AV_PIX_FMT_YUV420P12BE); | |
| 486 | 13 | check_yuv2yuvX(1, 12, AV_PIX_FMT_YUV420P12BE); | |
| 487 | 13 | report("yuv2yuvX_12BE"); | |
| 488 | 13 | check_yuv2yuvX(0, 14, AV_PIX_FMT_YUV420P14LE); | |
| 489 | 13 | check_yuv2yuvX(1, 14, AV_PIX_FMT_YUV420P14LE); | |
| 490 | 13 | report("yuv2yuvX_14LE"); | |
| 491 | 13 | check_yuv2yuvX(0, 14, AV_PIX_FMT_YUV420P14BE); | |
| 492 | 13 | check_yuv2yuvX(1, 14, AV_PIX_FMT_YUV420P14BE); | |
| 493 | 13 | report("yuv2yuvX_14BE"); | |
| 494 | 13 | check_yuv2nv12cX(0); | |
| 495 | 13 | check_yuv2nv12cX(1); | |
| 496 | 13 | report("yuv2nv12cX"); | |
| 497 | 13 | } | |
| 498 |