| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * | ||
| 3 | * This file is part of FFmpeg. | ||
| 4 | * | ||
| 5 | * FFmpeg is free software; you can redistribute it and/or modify | ||
| 6 | * it under the terms of the GNU General Public License as published by | ||
| 7 | * the Free Software Foundation; either version 2 of the License, or | ||
| 8 | * (at your option) any later version. | ||
| 9 | * | ||
| 10 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 13 | * GNU General Public License for more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License along | ||
| 16 | * with FFmpeg; if not, write to the Free Software Foundation, Inc., | ||
| 17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||
| 18 | */ | ||
| 19 | |||
| 20 | #include <string.h> | ||
| 21 | |||
| 22 | #include "libavutil/common.h" | ||
| 23 | #include "libavutil/intreadwrite.h" | ||
| 24 | #include "libavutil/mem_internal.h" | ||
| 25 | #include "libavutil/pixdesc.h" | ||
| 26 | |||
| 27 | #include "libswscale/rgb2rgb.h" | ||
| 28 | #include "libswscale/swscale.h" | ||
| 29 | #include "libswscale/swscale_internal.h" | ||
| 30 | |||
| 31 | #include "checkasm.h" | ||
| 32 | |||
| 33 | #define randomize_buffers(buf, size) \ | ||
| 34 | do { \ | ||
| 35 | int j; \ | ||
| 36 | for (j = 0; j < size; j+=4) \ | ||
| 37 | AV_WN32(buf + j, rnd()); \ | ||
| 38 | } while (0) | ||
| 39 | |||
| 40 | static const uint8_t width[] = {12, 16, 20, 32, 36, 128}; | ||
| 41 | static const struct {uint8_t w, h, s;} planes[] = { | ||
| 42 | {12,16,12}, {16,16,16}, {20,23,25}, {32,18,48}, {8,128,16}, {128,128,128} | ||
| 43 | }; | ||
| 44 | |||
| 45 | #define MAX_STRIDE 128 | ||
| 46 | #define MAX_HEIGHT 128 | ||
| 47 | |||
| 48 | 126 | static void check_shuffle_bytes(void * func, const char * report) | |
| 49 | { | ||
| 50 | int i; | ||
| 51 | 126 | LOCAL_ALIGNED_32(uint8_t, src0, [MAX_STRIDE]); | |
| 52 | 126 | LOCAL_ALIGNED_32(uint8_t, src1, [MAX_STRIDE]); | |
| 53 | 126 | LOCAL_ALIGNED_32(uint8_t, dst0, [MAX_STRIDE]); | |
| 54 | 126 | LOCAL_ALIGNED_32(uint8_t, dst1, [MAX_STRIDE]); | |
| 55 | |||
| 56 | 126 | declare_func(void, const uint8_t *src, uint8_t *dst, int src_size); | |
| 57 | |||
| 58 | 126 | memset(dst0, 0, MAX_STRIDE); | |
| 59 | 126 | memset(dst1, 0, MAX_STRIDE); | |
| 60 |
2/2✓ Branch 1 taken 4032 times.
✓ Branch 2 taken 126 times.
|
4158 | randomize_buffers(src0, MAX_STRIDE); |
| 61 | 126 | memcpy(src1, src0, MAX_STRIDE); | |
| 62 | |||
| 63 |
2/2✓ Branch 3 taken 27 times.
✓ Branch 4 taken 99 times.
|
126 | if (check_func(func, "%s", report)) { |
| 64 |
2/2✓ Branch 0 taken 162 times.
✓ Branch 1 taken 27 times.
|
189 | for (i = 0; i < 6; i ++) { |
| 65 | 162 | call_ref(src0, dst0, width[i]); | |
| 66 | 162 | call_new(src1, dst1, width[i]); | |
| 67 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 162 times.
|
162 | if (memcmp(dst0, dst1, MAX_STRIDE)) |
| 68 | ✗ | fail(); | |
| 69 | } | ||
| 70 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 27 times.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
|
27 | bench_new(src0, dst0, width[5]); |
| 71 | } | ||
| 72 | 126 | } | |
| 73 | |||
| 74 | 14 | static void check_uyvy_to_422p(void) | |
| 75 | { | ||
| 76 | int i; | ||
| 77 | |||
| 78 | 14 | LOCAL_ALIGNED_32(uint8_t, src0, [MAX_STRIDE * MAX_HEIGHT * 2]); | |
| 79 | 14 | LOCAL_ALIGNED_32(uint8_t, src1, [MAX_STRIDE * MAX_HEIGHT * 2]); | |
| 80 | 14 | LOCAL_ALIGNED_32(uint8_t, dst_y_0, [MAX_STRIDE * MAX_HEIGHT]); | |
| 81 | 14 | LOCAL_ALIGNED_32(uint8_t, dst_y_1, [MAX_STRIDE * MAX_HEIGHT]); | |
| 82 | 14 | LOCAL_ALIGNED_32(uint8_t, dst_u_0, [(MAX_STRIDE/2) * MAX_HEIGHT]); | |
| 83 | 14 | LOCAL_ALIGNED_32(uint8_t, dst_u_1, [(MAX_STRIDE/2) * MAX_HEIGHT]); | |
| 84 | 14 | LOCAL_ALIGNED_32(uint8_t, dst_v_0, [(MAX_STRIDE/2) * MAX_HEIGHT]); | |
| 85 | 14 | LOCAL_ALIGNED_32(uint8_t, dst_v_1, [(MAX_STRIDE/2) * MAX_HEIGHT]); | |
| 86 | |||
| 87 | 14 | declare_func(void, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | |
| 88 | const uint8_t *src, int width, int height, | ||
| 89 | int lumStride, int chromStride, int srcStride); | ||
| 90 | |||
| 91 |
2/2✓ Branch 1 taken 114688 times.
✓ Branch 2 taken 14 times.
|
114702 | randomize_buffers(src0, MAX_STRIDE * MAX_HEIGHT * 2); |
| 92 | 14 | memcpy(src1, src0, MAX_STRIDE * MAX_HEIGHT * 2); | |
| 93 | |||
| 94 |
2/2✓ Branch 3 taken 4 times.
✓ Branch 4 taken 10 times.
|
14 | if (check_func(uyvytoyuv422, "uyvytoyuv422")) { |
| 95 |
2/2✓ Branch 0 taken 24 times.
✓ Branch 1 taken 4 times.
|
28 | for (i = 0; i < 6; i ++) { |
| 96 | 24 | memset(dst_y_0, 0, MAX_STRIDE * MAX_HEIGHT); | |
| 97 | 24 | memset(dst_y_1, 0, MAX_STRIDE * MAX_HEIGHT); | |
| 98 | 24 | memset(dst_u_0, 0, (MAX_STRIDE/2) * MAX_HEIGHT); | |
| 99 | 24 | memset(dst_u_1, 0, (MAX_STRIDE/2) * MAX_HEIGHT); | |
| 100 | 24 | memset(dst_v_0, 0, (MAX_STRIDE/2) * MAX_HEIGHT); | |
| 101 | 24 | memset(dst_v_1, 0, (MAX_STRIDE/2) * MAX_HEIGHT); | |
| 102 | |||
| 103 | 24 | call_ref(dst_y_0, dst_u_0, dst_v_0, src0, planes[i].w, planes[i].h, | |
| 104 | MAX_STRIDE, MAX_STRIDE / 2, planes[i].s); | ||
| 105 | 24 | call_new(dst_y_1, dst_u_1, dst_v_1, src1, planes[i].w, planes[i].h, | |
| 106 | MAX_STRIDE, MAX_STRIDE / 2, planes[i].s); | ||
| 107 |
1/2✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
|
24 | if (memcmp(dst_y_0, dst_y_1, MAX_STRIDE * MAX_HEIGHT) || |
| 108 |
1/2✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
|
24 | memcmp(dst_u_0, dst_u_1, (MAX_STRIDE/2) * MAX_HEIGHT) || |
| 109 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.
|
24 | memcmp(dst_v_0, dst_v_1, (MAX_STRIDE/2) * MAX_HEIGHT)) |
| 110 | ✗ | fail(); | |
| 111 | } | ||
| 112 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 4 times.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
|
4 | bench_new(dst_y_1, dst_u_1, dst_v_1, src1, planes[5].w, planes[5].h, |
| 113 | MAX_STRIDE, MAX_STRIDE / 2, planes[5].s); | ||
| 114 | } | ||
| 115 | 14 | } | |
| 116 | |||
| 117 | #define NUM_LINES 5 | ||
| 118 | #define MAX_LINE_SIZE 1920 | ||
| 119 | #define BUFSIZE (NUM_LINES * MAX_LINE_SIZE) | ||
| 120 | |||
| 121 | 15 | static int cmp_off_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy) | |
| 122 | { | ||
| 123 |
2/2✓ Branch 0 taken 72000 times.
✓ Branch 1 taken 15 times.
|
72015 | for (size_t i = 0; i < n; i++) { |
| 124 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 72000 times.
|
72000 | if (abs(ref[i] - test[i]) > accuracy) |
| 125 | ✗ | return 1; | |
| 126 | } | ||
| 127 | 15 | return 0; | |
| 128 | } | ||
| 129 | |||
| 130 | 14 | static void check_rgb24toyv12(SwsContext *sws) | |
| 131 | { | ||
| 132 | static const int input_sizes[] = {16, 128, 512, MAX_LINE_SIZE, -MAX_LINE_SIZE}; | ||
| 133 | 14 | SwsInternal *ctx = sws_internal(sws); | |
| 134 | |||
| 135 | 14 | LOCAL_ALIGNED_32(uint8_t, src, [BUFSIZE * 3]); | |
| 136 | 14 | LOCAL_ALIGNED_32(uint8_t, buf_y_0, [BUFSIZE]); | |
| 137 | 14 | LOCAL_ALIGNED_32(uint8_t, buf_y_1, [BUFSIZE]); | |
| 138 | 14 | LOCAL_ALIGNED_32(uint8_t, buf_u_0, [BUFSIZE / 4]); | |
| 139 | 14 | LOCAL_ALIGNED_32(uint8_t, buf_u_1, [BUFSIZE / 4]); | |
| 140 | 14 | LOCAL_ALIGNED_32(uint8_t, buf_v_0, [BUFSIZE / 4]); | |
| 141 | 14 | LOCAL_ALIGNED_32(uint8_t, buf_v_1, [BUFSIZE / 4]); | |
| 142 | |||
| 143 | 14 | declare_func(void, const uint8_t *src, uint8_t *ydst, uint8_t *udst, | |
| 144 | uint8_t *vdst, int width, int height, int lumStride, | ||
| 145 | int chromStride, int srcStride, const int32_t *rgb2yuv); | ||
| 146 | |||
| 147 |
2/2✓ Branch 1 taken 100800 times.
✓ Branch 2 taken 14 times.
|
100814 | randomize_buffers(src, BUFSIZE * 3); |
| 148 | |||
| 149 |
2/2✓ Branch 0 taken 70 times.
✓ Branch 1 taken 14 times.
|
84 | for (int isi = 0; isi < FF_ARRAY_ELEMS(input_sizes); isi++) { |
| 150 | 70 | int input_size = input_sizes[isi]; | |
| 151 | 70 | int negstride = input_size < 0; | |
| 152 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 56 times.
|
70 | const char *negstride_str = negstride ? "_negstride" : ""; |
| 153 | 70 | int width = FFABS(input_size); | |
| 154 | 70 | int linesize = width + 32; | |
| 155 | /* calculate height based on specified width to use the entire buffer. */ | ||
| 156 | 70 | int height = (BUFSIZE / linesize) & ~1; | |
| 157 | 70 | uint8_t *src0 = src; | |
| 158 | 70 | uint8_t *src1 = src; | |
| 159 | 70 | uint8_t *dst_y_0 = buf_y_0; | |
| 160 | 70 | uint8_t *dst_y_1 = buf_y_1; | |
| 161 | 70 | uint8_t *dst_u_0 = buf_u_0; | |
| 162 | 70 | uint8_t *dst_u_1 = buf_u_1; | |
| 163 | 70 | uint8_t *dst_v_0 = buf_v_0; | |
| 164 | 70 | uint8_t *dst_v_1 = buf_v_1; | |
| 165 | |||
| 166 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 56 times.
|
70 | if (negstride) { |
| 167 | 14 | src0 += (height - 1) * (linesize * 3); | |
| 168 | 14 | src1 += (height - 1) * (linesize * 3); | |
| 169 | 14 | dst_y_0 += (height - 1) * linesize; | |
| 170 | 14 | dst_y_1 += (height - 1) * linesize; | |
| 171 | 14 | dst_u_0 += ((height / 2) - 1) * (linesize / 2); | |
| 172 | 14 | dst_u_1 += ((height / 2) - 1) * (linesize / 2); | |
| 173 | 14 | dst_v_0 += ((height / 2) - 1) * (linesize / 2); | |
| 174 | 14 | dst_v_1 += ((height / 2) - 1) * (linesize / 2); | |
| 175 | 14 | linesize *= -1; | |
| 176 | } | ||
| 177 | |||
| 178 |
2/2✓ Branch 3 taken 5 times.
✓ Branch 4 taken 65 times.
|
70 | if (check_func(ff_rgb24toyv12, "rgb24toyv12_%d_%d%s", width, height, negstride_str)) { |
| 179 | 5 | memset(buf_y_0, 0xFF, BUFSIZE); | |
| 180 | 5 | memset(buf_y_1, 0xFF, BUFSIZE); | |
| 181 | 5 | memset(buf_u_0, 0xFF, BUFSIZE / 4); | |
| 182 | 5 | memset(buf_u_1, 0xFF, BUFSIZE / 4); | |
| 183 | 5 | memset(buf_v_0, 0xFF, BUFSIZE / 4); | |
| 184 | 5 | memset(buf_v_1, 0xFF, BUFSIZE / 4); | |
| 185 | |||
| 186 | 5 | call_ref(src0, dst_y_0, dst_u_0, dst_v_0, width, height, | |
| 187 | linesize, linesize / 2, linesize * 3, ctx->input_rgb2yuv_table); | ||
| 188 | 5 | call_new(src1, dst_y_1, dst_u_1, dst_v_1, width, height, | |
| 189 | linesize, linesize / 2, linesize * 3, ctx->input_rgb2yuv_table); | ||
| 190 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | if (cmp_off_by_n(buf_y_0, buf_y_1, BUFSIZE, 1) || |
| 191 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | cmp_off_by_n(buf_u_0, buf_u_1, BUFSIZE / 4, 1) || |
| 192 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 5 times.
|
5 | cmp_off_by_n(buf_v_0, buf_v_1, BUFSIZE / 4, 1)) |
| 193 | ✗ | fail(); | |
| 194 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 5 times.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
|
5 | bench_new(src1, dst_y_1, dst_u_1, dst_v_1, width, height, |
| 195 | linesize, linesize / 2, linesize * 3, ctx->input_rgb2yuv_table); | ||
| 196 | } | ||
| 197 | } | ||
| 198 | 14 | } | |
| 199 | |||
| 200 | #undef NUM_LINES | ||
| 201 | #undef MAX_LINE_SIZE | ||
| 202 | #undef BUFSIZE | ||
| 203 | |||
| 204 | 14 | static void check_interleave_bytes(void) | |
| 205 | { | ||
| 206 | 14 | LOCAL_ALIGNED_16(uint8_t, src0_buf, [MAX_STRIDE*MAX_HEIGHT+1]); | |
| 207 | 14 | LOCAL_ALIGNED_16(uint8_t, src1_buf, [MAX_STRIDE*MAX_HEIGHT+1]); | |
| 208 | 14 | LOCAL_ALIGNED_16(uint8_t, dst0_buf, [2*MAX_STRIDE*MAX_HEIGHT+2]); | |
| 209 | 14 | LOCAL_ALIGNED_16(uint8_t, dst1_buf, [2*MAX_STRIDE*MAX_HEIGHT+2]); | |
| 210 | // Intentionally using unaligned buffers, as this function doesn't have | ||
| 211 | // any alignment requirements. | ||
| 212 | 14 | uint8_t *src0 = src0_buf + 1; | |
| 213 | 14 | uint8_t *src1 = src1_buf + 1; | |
| 214 | 14 | uint8_t *dst0 = dst0_buf + 2; | |
| 215 | 14 | uint8_t *dst1 = dst1_buf + 2; | |
| 216 | |||
| 217 | 14 | declare_func(void, const uint8_t *, const uint8_t *, | |
| 218 | uint8_t *, int, int, int, int, int); | ||
| 219 | |||
| 220 |
2/2✓ Branch 1 taken 57344 times.
✓ Branch 2 taken 14 times.
|
57358 | randomize_buffers(src0, MAX_STRIDE * MAX_HEIGHT); |
| 221 |
2/2✓ Branch 1 taken 57344 times.
✓ Branch 2 taken 14 times.
|
57358 | randomize_buffers(src1, MAX_STRIDE * MAX_HEIGHT); |
| 222 | |||
| 223 |
2/2✓ Branch 3 taken 2 times.
✓ Branch 4 taken 12 times.
|
14 | if (check_func(interleaveBytes, "interleave_bytes")) { |
| 224 |
2/2✓ Branch 0 taken 34 times.
✓ Branch 1 taken 2 times.
|
36 | for (int i = 0; i <= 16; i++) { |
| 225 | // Try all widths [1,16], and try one random width. | ||
| 226 | |||
| 227 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 32 times.
|
34 | int w = i > 0 ? i : (1 + (rnd() % (MAX_STRIDE-2))); |
| 228 | 34 | int h = 1 + (rnd() % (MAX_HEIGHT-2)); | |
| 229 | |||
| 230 | 34 | int src0_offset = 0, src0_stride = MAX_STRIDE; | |
| 231 | 34 | int src1_offset = 0, src1_stride = MAX_STRIDE; | |
| 232 | 34 | int dst_offset = 0, dst_stride = 2 * MAX_STRIDE; | |
| 233 | |||
| 234 | 34 | memset(dst0, 0, 2 * MAX_STRIDE * MAX_HEIGHT); | |
| 235 | 34 | memset(dst1, 0, 2 * MAX_STRIDE * MAX_HEIGHT); | |
| 236 | |||
| 237 | // Try different combinations of negative strides | ||
| 238 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 18 times.
|
34 | if (i & 1) { |
| 239 | 16 | src0_offset = (h-1)*src0_stride; | |
| 240 | 16 | src0_stride = -src0_stride; | |
| 241 | } | ||
| 242 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 18 times.
|
34 | if (i & 2) { |
| 243 | 16 | src1_offset = (h-1)*src1_stride; | |
| 244 | 16 | src1_stride = -src1_stride; | |
| 245 | } | ||
| 246 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 18 times.
|
34 | if (i & 4) { |
| 247 | 16 | dst_offset = (h-1)*dst_stride; | |
| 248 | 16 | dst_stride = -dst_stride; | |
| 249 | } | ||
| 250 | |||
| 251 | 34 | call_ref(src0 + src0_offset, src1 + src1_offset, dst0 + dst_offset, | |
| 252 | w, h, src0_stride, src1_stride, dst_stride); | ||
| 253 | 34 | call_new(src0 + src0_offset, src1 + src1_offset, dst1 + dst_offset, | |
| 254 | w, h, src0_stride, src1_stride, dst_stride); | ||
| 255 | // Check a one pixel-pair edge around the destination area, | ||
| 256 | // to catch overwrites past the end. | ||
| 257 | 34 | checkasm_check(uint8_t, dst0, 2*MAX_STRIDE, dst1, 2*MAX_STRIDE, | |
| 258 | 2 * w + 2, h + 1, "dst"); | ||
| 259 | } | ||
| 260 | |||
| 261 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
|
2 | bench_new(src0, src1, dst1, 127, MAX_HEIGHT, |
| 262 | MAX_STRIDE, MAX_STRIDE, 2*MAX_STRIDE); | ||
| 263 | } | ||
| 264 |
2/2✓ Branch 3 taken 2 times.
✓ Branch 4 taken 12 times.
|
14 | if (check_func(interleaveBytes, "interleave_bytes_aligned")) { |
| 265 | // Bench the function in a more typical case, with aligned | ||
| 266 | // buffers and widths. | ||
| 267 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
|
2 | bench_new(src0_buf, src1_buf, dst1_buf, 128, MAX_HEIGHT, |
| 268 | MAX_STRIDE, MAX_STRIDE, 2*MAX_STRIDE); | ||
| 269 | } | ||
| 270 | 14 | } | |
| 271 | |||
| 272 | 14 | static void check_deinterleave_bytes(void) | |
| 273 | { | ||
| 274 | 14 | LOCAL_ALIGNED_16(uint8_t, src_buf, [2*MAX_STRIDE*MAX_HEIGHT+2]); | |
| 275 | 14 | LOCAL_ALIGNED_16(uint8_t, dst0_u_buf, [MAX_STRIDE*MAX_HEIGHT+1]); | |
| 276 | 14 | LOCAL_ALIGNED_16(uint8_t, dst0_v_buf, [MAX_STRIDE*MAX_HEIGHT+1]); | |
| 277 | 14 | LOCAL_ALIGNED_16(uint8_t, dst1_u_buf, [MAX_STRIDE*MAX_HEIGHT+1]); | |
| 278 | 14 | LOCAL_ALIGNED_16(uint8_t, dst1_v_buf, [MAX_STRIDE*MAX_HEIGHT+1]); | |
| 279 | // Intentionally using unaligned buffers, as this function doesn't have | ||
| 280 | // any alignment requirements. | ||
| 281 | 14 | uint8_t *src = src_buf + 2; | |
| 282 | 14 | uint8_t *dst0_u = dst0_u_buf + 1; | |
| 283 | 14 | uint8_t *dst0_v = dst0_v_buf + 1; | |
| 284 | 14 | uint8_t *dst1_u = dst1_u_buf + 1; | |
| 285 | 14 | uint8_t *dst1_v = dst1_v_buf + 1; | |
| 286 | |||
| 287 | 14 | declare_func(void, const uint8_t *src, uint8_t *dst1, uint8_t *dst2, | |
| 288 | int width, int height, int srcStride, | ||
| 289 | int dst1Stride, int dst2Stride); | ||
| 290 | |||
| 291 |
2/2✓ Branch 1 taken 114688 times.
✓ Branch 2 taken 14 times.
|
114702 | randomize_buffers(src, 2*MAX_STRIDE*MAX_HEIGHT); |
| 292 | |||
| 293 |
2/2✓ Branch 3 taken 3 times.
✓ Branch 4 taken 11 times.
|
14 | if (check_func(deinterleaveBytes, "deinterleave_bytes")) { |
| 294 |
2/2✓ Branch 0 taken 51 times.
✓ Branch 1 taken 3 times.
|
54 | for (int i = 0; i <= 16; i++) { |
| 295 | // Try all widths [1,16], and try one random width. | ||
| 296 | |||
| 297 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 48 times.
|
51 | int w = i > 0 ? i : (1 + (rnd() % (MAX_STRIDE-2))); |
| 298 | 51 | int h = 1 + (rnd() % (MAX_HEIGHT-2)); | |
| 299 | |||
| 300 | 51 | int src_offset = 0, src_stride = 2 * MAX_STRIDE; | |
| 301 | 51 | int dst_u_offset = 0, dst_u_stride = MAX_STRIDE; | |
| 302 | 51 | int dst_v_offset = 0, dst_v_stride = MAX_STRIDE; | |
| 303 | |||
| 304 | 51 | memset(dst0_u, 0, MAX_STRIDE * MAX_HEIGHT); | |
| 305 | 51 | memset(dst0_v, 0, MAX_STRIDE * MAX_HEIGHT); | |
| 306 | 51 | memset(dst1_u, 0, MAX_STRIDE * MAX_HEIGHT); | |
| 307 | 51 | memset(dst1_v, 0, MAX_STRIDE * MAX_HEIGHT); | |
| 308 | |||
| 309 | // Try different combinations of negative strides | ||
| 310 |
2/2✓ Branch 0 taken 24 times.
✓ Branch 1 taken 27 times.
|
51 | if (i & 1) { |
| 311 | 24 | src_offset = (h-1)*src_stride; | |
| 312 | 24 | src_stride = -src_stride; | |
| 313 | } | ||
| 314 |
2/2✓ Branch 0 taken 24 times.
✓ Branch 1 taken 27 times.
|
51 | if (i & 2) { |
| 315 | 24 | dst_u_offset = (h-1)*dst_u_stride; | |
| 316 | 24 | dst_u_stride = -dst_u_stride; | |
| 317 | } | ||
| 318 |
2/2✓ Branch 0 taken 24 times.
✓ Branch 1 taken 27 times.
|
51 | if (i & 4) { |
| 319 | 24 | dst_v_offset = (h-1)*dst_v_stride; | |
| 320 | 24 | dst_v_stride = -dst_v_stride; | |
| 321 | } | ||
| 322 | |||
| 323 | 51 | call_ref(src + src_offset, dst0_u + dst_u_offset, dst0_v + dst_v_offset, | |
| 324 | w, h, src_stride, dst_u_stride, dst_v_stride); | ||
| 325 | 51 | call_new(src + src_offset, dst1_u + dst_u_offset, dst1_v + dst_v_offset, | |
| 326 | w, h, src_stride, dst_u_stride, dst_v_stride); | ||
| 327 | // Check a one pixel-pair edge around the destination area, | ||
| 328 | // to catch overwrites past the end. | ||
| 329 | 51 | checkasm_check(uint8_t, dst0_u, MAX_STRIDE, dst1_u, MAX_STRIDE, | |
| 330 | w + 1, h + 1, "dst_u"); | ||
| 331 | 51 | checkasm_check(uint8_t, dst0_v, MAX_STRIDE, dst1_v, MAX_STRIDE, | |
| 332 | w + 1, h + 1, "dst_v"); | ||
| 333 | } | ||
| 334 | |||
| 335 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
|
3 | bench_new(src, dst1_u, dst1_v, 127, MAX_HEIGHT, |
| 336 | 2*MAX_STRIDE, MAX_STRIDE, MAX_STRIDE); | ||
| 337 | } | ||
| 338 |
2/2✓ Branch 3 taken 3 times.
✓ Branch 4 taken 11 times.
|
14 | if (check_func(deinterleaveBytes, "deinterleave_bytes_aligned")) { |
| 339 | // Bench the function in a more typical case, with aligned | ||
| 340 | // buffers and widths. | ||
| 341 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
|
3 | bench_new(src_buf, dst1_u_buf, dst1_v_buf, 128, MAX_HEIGHT, |
| 342 | 2*MAX_STRIDE, MAX_STRIDE, MAX_STRIDE); | ||
| 343 | } | ||
| 344 | 14 | } | |
| 345 | |||
| 346 | #define MAX_LINE_SIZE 1920 | ||
| 347 | static const int input_sizes[] = {8, 128, 1080, MAX_LINE_SIZE}; | ||
| 348 | static const enum AVPixelFormat rgb_formats[] = { | ||
| 349 | AV_PIX_FMT_RGB24, | ||
| 350 | AV_PIX_FMT_BGR24, | ||
| 351 | AV_PIX_FMT_RGBA, | ||
| 352 | AV_PIX_FMT_BGRA, | ||
| 353 | AV_PIX_FMT_ABGR, | ||
| 354 | AV_PIX_FMT_ARGB, | ||
| 355 | }; | ||
| 356 | |||
| 357 | 14 | static void check_rgb_to_y(SwsContext *sws) | |
| 358 | { | ||
| 359 | 14 | SwsInternal *ctx = sws_internal(sws); | |
| 360 | |||
| 361 | 14 | LOCAL_ALIGNED_16(uint8_t, src24, [MAX_LINE_SIZE * 3]); | |
| 362 | 14 | LOCAL_ALIGNED_16(uint8_t, src32, [MAX_LINE_SIZE * 4]); | |
| 363 | 14 | LOCAL_ALIGNED_32(uint8_t, dst0_y, [MAX_LINE_SIZE * 2]); | |
| 364 | 14 | LOCAL_ALIGNED_32(uint8_t, dst1_y, [MAX_LINE_SIZE * 2]); | |
| 365 | |||
| 366 | 14 | declare_func(void, uint8_t *dst, const uint8_t *src, | |
| 367 | const uint8_t *unused1, const uint8_t *unused2, int width, | ||
| 368 | uint32_t *rgb2yuv, void *opq); | ||
| 369 | |||
| 370 |
2/2✓ Branch 1 taken 20160 times.
✓ Branch 2 taken 14 times.
|
20174 | randomize_buffers(src24, MAX_LINE_SIZE * 3); |
| 371 |
2/2✓ Branch 1 taken 26880 times.
✓ Branch 2 taken 14 times.
|
26894 | randomize_buffers(src32, MAX_LINE_SIZE * 4); |
| 372 | |||
| 373 |
2/2✓ Branch 0 taken 84 times.
✓ Branch 1 taken 14 times.
|
98 | for (int i = 0; i < FF_ARRAY_ELEMS(rgb_formats); i++) { |
| 374 | 84 | const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(rgb_formats[i]); | |
| 375 | |||
| 376 | 84 | sws->src_format = rgb_formats[i]; | |
| 377 | 84 | ff_sws_init_scale(ctx); | |
| 378 | |||
| 379 |
2/2✓ Branch 0 taken 336 times.
✓ Branch 1 taken 84 times.
|
420 | for (int j = 0; j < FF_ARRAY_ELEMS(input_sizes); j++) { |
| 380 | 336 | int w = input_sizes[j]; | |
| 381 | |||
| 382 |
2/2✓ Branch 3 taken 104 times.
✓ Branch 4 taken 232 times.
|
336 | if (check_func(ctx->lumToYV12, "%s_to_y_%d", desc->name, w)) { |
| 383 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 64 times.
|
104 | const uint8_t *src = desc->nb_components == 3 ? src24 : src32; |
| 384 | 104 | memset(dst0_y, 0xFA, MAX_LINE_SIZE * 2); | |
| 385 | 104 | memset(dst1_y, 0xFA, MAX_LINE_SIZE * 2); | |
| 386 | |||
| 387 | 104 | call_ref(dst0_y, src, NULL, NULL, w, ctx->input_rgb2yuv_table, NULL); | |
| 388 | 104 | call_new(dst1_y, src, NULL, NULL, w, ctx->input_rgb2yuv_table, NULL); | |
| 389 | |||
| 390 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 104 times.
|
104 | if (memcmp(dst0_y, dst1_y, w * 2)) |
| 391 | ✗ | fail(); | |
| 392 | |||
| 393 |
2/2✓ Branch 0 taken 64 times.
✓ Branch 1 taken 40 times.
|
104 | if (desc->nb_components == 3 || |
| 394 | // only bench native endian formats | ||
| 395 |
4/4✓ Branch 0 taken 48 times.
✓ Branch 1 taken 16 times.
✓ Branch 2 taken 16 times.
✓ Branch 3 taken 32 times.
|
64 | (sws->src_format == AV_PIX_FMT_RGB32 || sws->src_format == AV_PIX_FMT_RGB32_1)) |
| 396 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 72 times.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
|
72 | bench_new(dst1_y, src, NULL, NULL, w, ctx->input_rgb2yuv_table, NULL); |
| 397 | } | ||
| 398 | } | ||
| 399 | } | ||
| 400 | 14 | } | |
| 401 | |||
| 402 | 14 | static void check_rgb_to_uv(SwsContext *sws) | |
| 403 | { | ||
| 404 | 14 | SwsInternal *ctx = sws_internal(sws); | |
| 405 | |||
| 406 | 14 | LOCAL_ALIGNED_16(uint8_t, src24, [MAX_LINE_SIZE * 3]); | |
| 407 | 14 | LOCAL_ALIGNED_16(uint8_t, src32, [MAX_LINE_SIZE * 4]); | |
| 408 | 14 | LOCAL_ALIGNED_16(uint8_t, dst0_u, [MAX_LINE_SIZE * 2]); | |
| 409 | 14 | LOCAL_ALIGNED_16(uint8_t, dst0_v, [MAX_LINE_SIZE * 2]); | |
| 410 | 14 | LOCAL_ALIGNED_16(uint8_t, dst1_u, [MAX_LINE_SIZE * 2]); | |
| 411 | 14 | LOCAL_ALIGNED_16(uint8_t, dst1_v, [MAX_LINE_SIZE * 2]); | |
| 412 | |||
| 413 | 14 | declare_func(void, uint8_t *dstU, uint8_t *dstV, | |
| 414 | const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, | ||
| 415 | int width, uint32_t *pal, void *opq); | ||
| 416 | |||
| 417 |
2/2✓ Branch 1 taken 20160 times.
✓ Branch 2 taken 14 times.
|
20174 | randomize_buffers(src24, MAX_LINE_SIZE * 3); |
| 418 |
2/2✓ Branch 1 taken 26880 times.
✓ Branch 2 taken 14 times.
|
26894 | randomize_buffers(src32, MAX_LINE_SIZE * 4); |
| 419 | |||
| 420 |
2/2✓ Branch 0 taken 168 times.
✓ Branch 1 taken 14 times.
|
182 | for (int i = 0; i < 2 * FF_ARRAY_ELEMS(rgb_formats); i++) { |
| 421 | 168 | enum AVPixelFormat src_fmt = rgb_formats[i / 2]; | |
| 422 | 168 | const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(src_fmt); | |
| 423 | |||
| 424 | 168 | ctx->chrSrcHSubSample = (i % 2) ? 0 : 1; | |
| 425 | 168 | sws->src_format = src_fmt; | |
| 426 |
2/2✓ Branch 0 taken 84 times.
✓ Branch 1 taken 84 times.
|
168 | sws->dst_format = ctx->chrSrcHSubSample ? AV_PIX_FMT_YUV420P : AV_PIX_FMT_YUV444P; |
| 427 | 168 | ff_sws_init_scale(ctx); | |
| 428 | |||
| 429 |
2/2✓ Branch 0 taken 672 times.
✓ Branch 1 taken 168 times.
|
840 | for (int j = 0; j < FF_ARRAY_ELEMS(input_sizes); j++) { |
| 430 | 672 | int w = input_sizes[j] >> ctx->chrSrcHSubSample; | |
| 431 | |||
| 432 |
4/4✓ Branch 2 taken 336 times.
✓ Branch 3 taken 336 times.
✓ Branch 5 taken 128 times.
✓ Branch 6 taken 544 times.
|
672 | if (check_func(ctx->chrToYV12, "%s_to_uv%s_%d", desc->name, |
| 433 | ctx->chrSrcHSubSample ? "_half" : "", | ||
| 434 | input_sizes[j])) { | ||
| 435 |
2/2✓ Branch 0 taken 48 times.
✓ Branch 1 taken 80 times.
|
128 | const uint8_t *src = desc->nb_components == 3 ? src24 : src32; |
| 436 | 128 | memset(dst0_u, 0xFF, MAX_LINE_SIZE * 2); | |
| 437 | 128 | memset(dst0_v, 0xFF, MAX_LINE_SIZE * 2); | |
| 438 | 128 | memset(dst1_u, 0xFF, MAX_LINE_SIZE * 2); | |
| 439 | 128 | memset(dst1_v, 0xFF, MAX_LINE_SIZE * 2); | |
| 440 | |||
| 441 | 128 | call_ref(dst0_u, dst0_v, NULL, src, src, w, ctx->input_rgb2yuv_table, NULL); | |
| 442 | 128 | call_new(dst1_u, dst1_v, NULL, src, src, w, ctx->input_rgb2yuv_table, NULL); | |
| 443 | |||
| 444 |
2/4✓ Branch 0 taken 128 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 128 times.
|
128 | if (memcmp(dst0_u, dst1_u, w * 2) || memcmp(dst0_v, dst1_v, w * 2)) |
| 445 | ✗ | fail(); | |
| 446 | |||
| 447 |
2/2✓ Branch 0 taken 80 times.
✓ Branch 1 taken 48 times.
|
128 | if (desc->nb_components == 3 || |
| 448 | // only bench native endian formats | ||
| 449 |
4/4✓ Branch 0 taken 60 times.
✓ Branch 1 taken 20 times.
✓ Branch 2 taken 20 times.
✓ Branch 3 taken 40 times.
|
80 | (sws->src_format == AV_PIX_FMT_RGB32 || sws->src_format == AV_PIX_FMT_RGB32_1)) |
| 450 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 88 times.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
|
88 | bench_new(dst1_u, dst1_v, NULL, src, src, w, ctx->input_rgb2yuv_table, NULL); |
| 451 | } | ||
| 452 | } | ||
| 453 | } | ||
| 454 | 14 | } | |
| 455 | |||
| 456 | 14 | static void check_rgba_to_a(SwsContext *sws) | |
| 457 | { | ||
| 458 | 14 | SwsInternal *ctx = sws_internal(sws); | |
| 459 | |||
| 460 | 14 | LOCAL_ALIGNED_16(uint8_t, src, [MAX_LINE_SIZE * 4]); | |
| 461 | 14 | LOCAL_ALIGNED_32(uint8_t, dst0_y, [MAX_LINE_SIZE * 2]); | |
| 462 | 14 | LOCAL_ALIGNED_32(uint8_t, dst1_y, [MAX_LINE_SIZE * 2]); | |
| 463 | |||
| 464 | 14 | declare_func(void, uint8_t *dst, const uint8_t *src1, | |
| 465 | const uint8_t *src2, const uint8_t *src3, int width, | ||
| 466 | uint32_t *rgb2yuv, void *opq); | ||
| 467 | |||
| 468 |
2/2✓ Branch 1 taken 26880 times.
✓ Branch 2 taken 14 times.
|
26894 | randomize_buffers(src, MAX_LINE_SIZE * 4); |
| 469 | |||
| 470 |
2/2✓ Branch 0 taken 84 times.
✓ Branch 1 taken 14 times.
|
98 | for (int i = 0; i < FF_ARRAY_ELEMS(rgb_formats); i++) { |
| 471 | 84 | const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(rgb_formats[i]); | |
| 472 |
2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 56 times.
|
84 | if (desc->nb_components < 4) |
| 473 | 28 | continue; | |
| 474 | |||
| 475 | 56 | sws->src_format = rgb_formats[i]; | |
| 476 | 56 | ff_sws_init_scale(ctx); | |
| 477 | |||
| 478 |
2/2✓ Branch 0 taken 224 times.
✓ Branch 1 taken 56 times.
|
280 | for (int j = 0; j < FF_ARRAY_ELEMS(input_sizes); j++) { |
| 479 | 224 | int w = input_sizes[j]; | |
| 480 | |||
| 481 |
1/2✗ Branch 3 not taken.
✓ Branch 4 taken 224 times.
|
224 | if (check_func(ctx->alpToYV12, "%s_to_y_%d", desc->name, w)) { |
| 482 | ✗ | memset(dst0_y, 0xFA, MAX_LINE_SIZE * 2); | |
| 483 | ✗ | memset(dst1_y, 0xFA, MAX_LINE_SIZE * 2); | |
| 484 | |||
| 485 | ✗ | call_ref(dst0_y, NULL, NULL, src, w, ctx->input_rgb2yuv_table, NULL); | |
| 486 | ✗ | call_new(dst1_y, NULL, NULL, src, w, ctx->input_rgb2yuv_table, NULL); | |
| 487 | |||
| 488 | ✗ | if (memcmp(dst0_y, dst1_y, w * 2)) | |
| 489 | ✗ | fail(); | |
| 490 | |||
| 491 | // only bench native endian formats | ||
| 492 | ✗ | if (sws->src_format == AV_PIX_FMT_RGB32 || sws->src_format == AV_PIX_FMT_RGB32_1) | |
| 493 | ✗ | bench_new(dst1_y, NULL, NULL, src, w, ctx->input_rgb2yuv_table, NULL); | |
| 494 | } | ||
| 495 | } | ||
| 496 | } | ||
| 497 | 14 | } | |
| 498 | |||
| 499 | |||
| 500 | static const int packed_rgb_fmts[] = { | ||
| 501 | AV_PIX_FMT_RGB24, | ||
| 502 | AV_PIX_FMT_BGR24, | ||
| 503 | AV_PIX_FMT_ARGB, | ||
| 504 | AV_PIX_FMT_RGBA, | ||
| 505 | AV_PIX_FMT_ABGR, | ||
| 506 | AV_PIX_FMT_BGRA, | ||
| 507 | AV_PIX_FMT_RGB48BE, | ||
| 508 | AV_PIX_FMT_RGB48LE, | ||
| 509 | AV_PIX_FMT_RGB565BE, | ||
| 510 | AV_PIX_FMT_RGB565LE, | ||
| 511 | AV_PIX_FMT_RGB555BE, | ||
| 512 | AV_PIX_FMT_RGB555LE, | ||
| 513 | AV_PIX_FMT_BGR565BE, | ||
| 514 | AV_PIX_FMT_BGR565LE, | ||
| 515 | AV_PIX_FMT_BGR555BE, | ||
| 516 | AV_PIX_FMT_BGR555LE, | ||
| 517 | AV_PIX_FMT_RGB444LE, | ||
| 518 | AV_PIX_FMT_RGB444BE, | ||
| 519 | AV_PIX_FMT_BGR444LE, | ||
| 520 | AV_PIX_FMT_BGR444BE, | ||
| 521 | AV_PIX_FMT_BGR48BE, | ||
| 522 | AV_PIX_FMT_BGR48LE, | ||
| 523 | AV_PIX_FMT_RGBA64BE, | ||
| 524 | AV_PIX_FMT_RGBA64LE, | ||
| 525 | AV_PIX_FMT_BGRA64BE, | ||
| 526 | AV_PIX_FMT_BGRA64LE, | ||
| 527 | AV_PIX_FMT_RGB8, | ||
| 528 | AV_PIX_FMT_BGR8, | ||
| 529 | AV_PIX_FMT_RGB4, | ||
| 530 | AV_PIX_FMT_BGR4, | ||
| 531 | AV_PIX_FMT_RGB4_BYTE, | ||
| 532 | AV_PIX_FMT_BGR4_BYTE, | ||
| 533 | }; | ||
| 534 | |||
| 535 | #define INPUT_SIZE 512 | ||
| 536 | |||
| 537 | 14 | static void check_yuv2packed1(void) | |
| 538 | { | ||
| 539 | static const int alpha_values[] = {0, 2048, 4096}; | ||
| 540 | |||
| 541 |
2/2✓ Branch 1 taken 13 times.
✓ Branch 2 taken 1 times.
|
14 | declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, |
| 542 | void, SwsInternal *c, const int16_t *lumSrc, | ||
| 543 | const int16_t *chrUSrc[2], const int16_t *chrVSrc[2], | ||
| 544 | const int16_t *alpSrc, uint8_t *dest, | ||
| 545 | int dstW, int uvalpha, int y); | ||
| 546 | |||
| 547 | const int16_t *luma; | ||
| 548 | const int16_t *chru[2]; | ||
| 549 | const int16_t *chrv[2]; | ||
| 550 | const int16_t *alpha; | ||
| 551 | |||
| 552 | 14 | LOCAL_ALIGNED_8(int32_t, src_y, [2 * INPUT_SIZE]); | |
| 553 | 14 | LOCAL_ALIGNED_8(int32_t, src_u, [2 * INPUT_SIZE]); | |
| 554 | 14 | LOCAL_ALIGNED_8(int32_t, src_v, [2 * INPUT_SIZE]); | |
| 555 | 14 | LOCAL_ALIGNED_8(int32_t, src_a, [2 * INPUT_SIZE]); | |
| 556 | |||
| 557 | 14 | LOCAL_ALIGNED_8(uint8_t, dst0, [INPUT_SIZE * sizeof(int32_t[4])]); | |
| 558 | 14 | LOCAL_ALIGNED_8(uint8_t, dst1, [INPUT_SIZE * sizeof(int32_t[4])]); | |
| 559 | |||
| 560 |
2/2✓ Branch 1 taken 14336 times.
✓ Branch 2 taken 14 times.
|
14350 | randomize_buffers((uint8_t*)src_y, 2 * INPUT_SIZE * sizeof(int32_t)); |
| 561 |
2/2✓ Branch 1 taken 14336 times.
✓ Branch 2 taken 14 times.
|
14350 | randomize_buffers((uint8_t*)src_u, 2 * INPUT_SIZE * sizeof(int32_t)); |
| 562 |
2/2✓ Branch 1 taken 14336 times.
✓ Branch 2 taken 14 times.
|
14350 | randomize_buffers((uint8_t*)src_v, 2 * INPUT_SIZE * sizeof(int32_t)); |
| 563 |
2/2✓ Branch 1 taken 14336 times.
✓ Branch 2 taken 14 times.
|
14350 | randomize_buffers((uint8_t*)src_a, 2 * INPUT_SIZE * sizeof(int32_t)); |
| 564 | |||
| 565 | /* Limit to 14 bit input range */ | ||
| 566 |
2/2✓ Branch 0 taken 14336 times.
✓ Branch 1 taken 14 times.
|
14350 | for (int i = 0; i < 2 * INPUT_SIZE; i++) { |
| 567 | 14336 | src_y[i] &= 0x3FFF3FFF; | |
| 568 | 14336 | src_a[i] &= 0x3FFF3FFF; | |
| 569 | 14336 | src_u[i] &= 0x3FFF3FFF; | |
| 570 | 14336 | src_v[i] &= 0x3FFF3FFF; | |
| 571 | } | ||
| 572 | |||
| 573 | 14 | luma = (int16_t *)src_y; | |
| 574 | 14 | alpha = (int16_t *)src_a; | |
| 575 |
2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 14 times.
|
42 | for (int i = 0; i < 2; i++) { |
| 576 | 28 | chru[i] = (int16_t *)(src_u + i*INPUT_SIZE); | |
| 577 | 28 | chrv[i] = (int16_t *)(src_v + i*INPUT_SIZE); | |
| 578 | } | ||
| 579 | |||
| 580 |
2/2✓ Branch 0 taken 448 times.
✓ Branch 1 taken 14 times.
|
462 | for (int fmi = 0; fmi < FF_ARRAY_ELEMS(packed_rgb_fmts); fmi++) { |
| 581 | 448 | const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(packed_rgb_fmts[fmi]); | |
| 582 | 448 | int line_size = INPUT_SIZE * desc->comp[0].step; | |
| 583 | SwsContext *sws; | ||
| 584 | SwsInternal *c; | ||
| 585 | |||
| 586 |
2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 420 times.
|
448 | if (desc->flags & AV_PIX_FMT_FLAG_BITSTREAM) |
| 587 | 28 | line_size = AV_CEIL_RSHIFT(line_size, 3); | |
| 588 | |||
| 589 | 896 | sws = sws_getContext(MAX_LINE_SIZE, MAX_LINE_SIZE, AV_PIX_FMT_YUV420P, | |
| 590 | 448 | MAX_LINE_SIZE, MAX_LINE_SIZE, packed_rgb_fmts[fmi], | |
| 591 | SWS_ACCURATE_RND | SWS_BITEXACT, NULL, NULL, NULL); | ||
| 592 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 448 times.
|
448 | if (!sws) |
| 593 | ✗ | fail(); | |
| 594 | |||
| 595 | 448 | c = sws_internal(sws); | |
| 596 | |||
| 597 |
2/2✓ Branch 0 taken 1344 times.
✓ Branch 1 taken 448 times.
|
1792 | for (int ai = 0; ai < FF_ARRAY_ELEMS(alpha_values); ai++) { |
| 598 | 1344 | const int chr_alpha = alpha_values[ai]; | |
| 599 |
2/2✓ Branch 3 taken 96 times.
✓ Branch 4 taken 1248 times.
|
1344 | if (check_func(c->yuv2packed1, "yuv2%s_1_%d_%d", desc->name, chr_alpha, INPUT_SIZE)) { |
| 600 | 96 | memset(dst0, 0xFF, INPUT_SIZE * sizeof(int32_t[4])); | |
| 601 | 96 | memset(dst1, 0xFF, INPUT_SIZE * sizeof(int32_t[4])); | |
| 602 | |||
| 603 | 96 | call_ref(c, luma, chru, chrv, alpha, dst0, INPUT_SIZE, chr_alpha, 0); | |
| 604 | 96 | call_new(c, luma, chru, chrv, alpha, dst1, INPUT_SIZE, chr_alpha, 0); | |
| 605 | |||
| 606 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 96 times.
|
96 | if (memcmp(dst0, dst1, line_size)) |
| 607 | ✗ | fail(); | |
| 608 | |||
| 609 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 96 times.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
|
96 | bench_new(c, luma, chru, chrv, alpha, dst1, INPUT_SIZE, chr_alpha, 0); |
| 610 | } | ||
| 611 | } | ||
| 612 | |||
| 613 | 448 | sws_freeContext(sws); | |
| 614 | } | ||
| 615 | 14 | } | |
| 616 | |||
| 617 | 14 | static void check_yuv2packed2(void) | |
| 618 | { | ||
| 619 | static const int alpha_values[] = {0, 2048, 4096}; | ||
| 620 | |||
| 621 |
2/2✓ Branch 1 taken 13 times.
✓ Branch 2 taken 1 times.
|
14 | declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, |
| 622 | void, SwsInternal *c, const int16_t *lumSrc[2], | ||
| 623 | const int16_t *chrUSrc[2], const int16_t *chrVSrc[2], | ||
| 624 | const int16_t *alpSrc[2], uint8_t *dest, | ||
| 625 | int dstW, int yalpha, int uvalpha, int y); | ||
| 626 | |||
| 627 | const int16_t *luma[2]; | ||
| 628 | const int16_t *chru[2]; | ||
| 629 | const int16_t *chrv[2]; | ||
| 630 | const int16_t *alpha[2]; | ||
| 631 | |||
| 632 | 14 | LOCAL_ALIGNED_8(int32_t, src_y, [2 * INPUT_SIZE]); | |
| 633 | 14 | LOCAL_ALIGNED_8(int32_t, src_u, [2 * INPUT_SIZE]); | |
| 634 | 14 | LOCAL_ALIGNED_8(int32_t, src_v, [2 * INPUT_SIZE]); | |
| 635 | 14 | LOCAL_ALIGNED_8(int32_t, src_a, [2 * INPUT_SIZE]); | |
| 636 | |||
| 637 | 14 | LOCAL_ALIGNED_8(uint8_t, dst0, [INPUT_SIZE * sizeof(int32_t[4])]); | |
| 638 | 14 | LOCAL_ALIGNED_8(uint8_t, dst1, [INPUT_SIZE * sizeof(int32_t[4])]); | |
| 639 | |||
| 640 |
2/2✓ Branch 1 taken 14336 times.
✓ Branch 2 taken 14 times.
|
14350 | randomize_buffers((uint8_t*)src_y, 2 * INPUT_SIZE * sizeof(int32_t)); |
| 641 |
2/2✓ Branch 1 taken 14336 times.
✓ Branch 2 taken 14 times.
|
14350 | randomize_buffers((uint8_t*)src_u, 2 * INPUT_SIZE * sizeof(int32_t)); |
| 642 |
2/2✓ Branch 1 taken 14336 times.
✓ Branch 2 taken 14 times.
|
14350 | randomize_buffers((uint8_t*)src_v, 2 * INPUT_SIZE * sizeof(int32_t)); |
| 643 |
2/2✓ Branch 1 taken 14336 times.
✓ Branch 2 taken 14 times.
|
14350 | randomize_buffers((uint8_t*)src_a, 2 * INPUT_SIZE * sizeof(int32_t)); |
| 644 | |||
| 645 | /* Limit to 14 bit input range */ | ||
| 646 |
2/2✓ Branch 0 taken 14336 times.
✓ Branch 1 taken 14 times.
|
14350 | for (int i = 0; i < 2 * INPUT_SIZE; i++) { |
| 647 | 14336 | src_y[i] &= 0x3FFF3FFF; | |
| 648 | 14336 | src_u[i] &= 0x3FFF3FFF; | |
| 649 | 14336 | src_v[i] &= 0x3FFF3FFF; | |
| 650 | 14336 | src_a[i] &= 0x3FFF3FFF; | |
| 651 | } | ||
| 652 | |||
| 653 |
2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 14 times.
|
42 | for (int i = 0; i < 2; i++) { |
| 654 | 28 | luma[i] = (int16_t *)(src_y + i*INPUT_SIZE); | |
| 655 | 28 | chru[i] = (int16_t *)(src_u + i*INPUT_SIZE); | |
| 656 | 28 | chrv[i] = (int16_t *)(src_v + i*INPUT_SIZE); | |
| 657 | 28 | alpha[i] = (int16_t *)(src_a + i*INPUT_SIZE); | |
| 658 | } | ||
| 659 | |||
| 660 |
2/2✓ Branch 0 taken 448 times.
✓ Branch 1 taken 14 times.
|
462 | for (int fmi = 0; fmi < FF_ARRAY_ELEMS(packed_rgb_fmts); fmi++) { |
| 661 | 448 | const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(packed_rgb_fmts[fmi]); | |
| 662 | 448 | int line_size = INPUT_SIZE * desc->comp[0].step; | |
| 663 | SwsContext *sws; | ||
| 664 | SwsInternal *c; | ||
| 665 | |||
| 666 |
2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 420 times.
|
448 | if (desc->flags & AV_PIX_FMT_FLAG_BITSTREAM) |
| 667 | 28 | line_size = AV_CEIL_RSHIFT(line_size, 3); | |
| 668 | |||
| 669 | 896 | sws = sws_getContext(MAX_LINE_SIZE, MAX_LINE_SIZE, AV_PIX_FMT_YUV420P, | |
| 670 | 448 | MAX_LINE_SIZE, MAX_LINE_SIZE, packed_rgb_fmts[fmi], | |
| 671 | SWS_ACCURATE_RND | SWS_BITEXACT, NULL, NULL, NULL); | ||
| 672 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 448 times.
|
448 | if (!sws) |
| 673 | ✗ | fail(); | |
| 674 | |||
| 675 | 448 | c = sws_internal(sws); | |
| 676 | |||
| 677 |
2/2✓ Branch 0 taken 1344 times.
✓ Branch 1 taken 448 times.
|
1792 | for (int ai = 0; ai < FF_ARRAY_ELEMS(alpha_values); ai++) { |
| 678 | 1344 | const int lum_alpha = alpha_values[ai]; | |
| 679 | 1344 | const int chr_alpha = alpha_values[ai]; | |
| 680 |
2/2✓ Branch 3 taken 96 times.
✓ Branch 4 taken 1248 times.
|
1344 | if (check_func(c->yuv2packed2, "yuv2%s_2_%d_%d", desc->name, lum_alpha, INPUT_SIZE)) { |
| 681 | 96 | memset(dst0, 0xFF, INPUT_SIZE * sizeof(int32_t[4])); | |
| 682 | 96 | memset(dst1, 0xFF, INPUT_SIZE * sizeof(int32_t[4])); | |
| 683 | |||
| 684 | 96 | call_ref(c, luma, chru, chrv, alpha, dst0, INPUT_SIZE, lum_alpha, chr_alpha, 0); | |
| 685 | 96 | call_new(c, luma, chru, chrv, alpha, dst1, INPUT_SIZE, lum_alpha, chr_alpha, 0); | |
| 686 | |||
| 687 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 96 times.
|
96 | if (memcmp(dst0, dst1, line_size)) |
| 688 | ✗ | fail(); | |
| 689 | |||
| 690 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 96 times.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
|
96 | bench_new(c, luma, chru, chrv, alpha, dst1, INPUT_SIZE, lum_alpha, chr_alpha, 0); |
| 691 | } | ||
| 692 | } | ||
| 693 | |||
| 694 | 448 | sws_freeContext(sws); | |
| 695 | } | ||
| 696 | 14 | } | |
| 697 | |||
| 698 | 14 | static void check_yuv2packedX(void) | |
| 699 | { | ||
| 700 | #define LARGEST_FILTER 16 | ||
| 701 | static const int filter_sizes[] = {2, 16}; | ||
| 702 | |||
| 703 |
2/2✓ Branch 1 taken 13 times.
✓ Branch 2 taken 1 times.
|
14 | declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, |
| 704 | void, SwsInternal *c, const int16_t *lumFilter, | ||
| 705 | const int16_t **lumSrcx, int lumFilterSize, | ||
| 706 | const int16_t *chrFilter, const int16_t **chrUSrcx, | ||
| 707 | const int16_t **chrVSrcx, int chrFilterSize, | ||
| 708 | const int16_t **alpSrcx, uint8_t *dest, | ||
| 709 | int dstW, int y); | ||
| 710 | |||
| 711 | const int16_t *luma[LARGEST_FILTER]; | ||
| 712 | const int16_t *chru[LARGEST_FILTER]; | ||
| 713 | const int16_t *chrv[LARGEST_FILTER]; | ||
| 714 | const int16_t *alpha[LARGEST_FILTER]; | ||
| 715 | |||
| 716 | 14 | LOCAL_ALIGNED_8(int16_t, luma_filter, [LARGEST_FILTER]); | |
| 717 | 14 | LOCAL_ALIGNED_8(int16_t, chr_filter, [LARGEST_FILTER]); | |
| 718 | |||
| 719 | 14 | LOCAL_ALIGNED_8(int32_t, src_y, [LARGEST_FILTER * INPUT_SIZE]); | |
| 720 | 14 | LOCAL_ALIGNED_8(int32_t, src_u, [LARGEST_FILTER * INPUT_SIZE]); | |
| 721 | 14 | LOCAL_ALIGNED_8(int32_t, src_v, [LARGEST_FILTER * INPUT_SIZE]); | |
| 722 | 14 | LOCAL_ALIGNED_8(int32_t, src_a, [LARGEST_FILTER * INPUT_SIZE]); | |
| 723 | |||
| 724 | 14 | LOCAL_ALIGNED_8(uint8_t, dst0, [INPUT_SIZE * sizeof(int32_t[4])]); | |
| 725 | 14 | LOCAL_ALIGNED_8(uint8_t, dst1, [INPUT_SIZE * sizeof(int32_t[4])]); | |
| 726 | |||
| 727 |
2/2✓ Branch 1 taken 114688 times.
✓ Branch 2 taken 14 times.
|
114702 | randomize_buffers((uint8_t*)src_y, LARGEST_FILTER * INPUT_SIZE * sizeof(int32_t)); |
| 728 |
2/2✓ Branch 1 taken 114688 times.
✓ Branch 2 taken 14 times.
|
114702 | randomize_buffers((uint8_t*)src_u, LARGEST_FILTER * INPUT_SIZE * sizeof(int32_t)); |
| 729 |
2/2✓ Branch 1 taken 114688 times.
✓ Branch 2 taken 14 times.
|
114702 | randomize_buffers((uint8_t*)src_v, LARGEST_FILTER * INPUT_SIZE * sizeof(int32_t)); |
| 730 |
2/2✓ Branch 1 taken 114688 times.
✓ Branch 2 taken 14 times.
|
114702 | randomize_buffers((uint8_t*)src_a, LARGEST_FILTER * INPUT_SIZE * sizeof(int32_t)); |
| 731 | |||
| 732 | /* Limit to 14 bit input range */ | ||
| 733 |
2/2✓ Branch 0 taken 114688 times.
✓ Branch 1 taken 14 times.
|
114702 | for (int i = 0; i < LARGEST_FILTER * INPUT_SIZE; i++) { |
| 734 | 114688 | src_y[i] &= 0x3FFF3FFF; | |
| 735 | 114688 | src_u[i] &= 0x3FFF3FFF; | |
| 736 | 114688 | src_v[i] &= 0x3FFF3FFF; | |
| 737 | 114688 | src_a[i] &= 0x3FFF3FFF; | |
| 738 | } | ||
| 739 | |||
| 740 |
2/2✓ Branch 0 taken 224 times.
✓ Branch 1 taken 14 times.
|
238 | for (int i = 0; i < LARGEST_FILTER; i++) { |
| 741 | 224 | luma[i] = (int16_t *)(src_y + i*INPUT_SIZE); | |
| 742 | 224 | chru[i] = (int16_t *)(src_u + i*INPUT_SIZE); | |
| 743 | 224 | chrv[i] = (int16_t *)(src_v + i*INPUT_SIZE); | |
| 744 | 224 | alpha[i] = (int16_t *)(src_a + i*INPUT_SIZE); | |
| 745 | } | ||
| 746 | |||
| 747 |
2/2✓ Branch 0 taken 448 times.
✓ Branch 1 taken 14 times.
|
462 | for (int fmi = 0; fmi < FF_ARRAY_ELEMS(packed_rgb_fmts); fmi++) { |
| 748 | 448 | const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(packed_rgb_fmts[fmi]); | |
| 749 | 448 | int line_size = INPUT_SIZE * desc->comp[0].step; | |
| 750 | SwsContext *sws; | ||
| 751 | SwsInternal *c; | ||
| 752 | |||
| 753 |
2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 420 times.
|
448 | if (desc->flags & AV_PIX_FMT_FLAG_BITSTREAM) |
| 754 | 28 | line_size = AV_CEIL_RSHIFT(line_size, 3); | |
| 755 | |||
| 756 | 896 | sws = sws_getContext(MAX_LINE_SIZE, MAX_LINE_SIZE, AV_PIX_FMT_YUV420P, | |
| 757 | 448 | MAX_LINE_SIZE, MAX_LINE_SIZE, packed_rgb_fmts[fmi], | |
| 758 | SWS_ACCURATE_RND | SWS_BITEXACT, NULL, NULL, NULL); | ||
| 759 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 448 times.
|
448 | if (!sws) |
| 760 | ✗ | fail(); | |
| 761 | |||
| 762 | 448 | c = sws_internal(sws); | |
| 763 | |||
| 764 |
2/2✓ Branch 0 taken 896 times.
✓ Branch 1 taken 448 times.
|
1344 | for (int fsi = 0; fsi < FF_ARRAY_ELEMS(filter_sizes); fsi++) { |
| 765 | 896 | const int luma_filter_size = filter_sizes[fsi]; | |
| 766 | 896 | const int chr_filter_size = filter_sizes[fsi]; | |
| 767 | |||
| 768 |
2/2✓ Branch 0 taken 8064 times.
✓ Branch 1 taken 896 times.
|
8960 | for (int i = 0; i < luma_filter_size; i++) |
| 769 | 8064 | luma_filter[i] = -((1 << 12) / (luma_filter_size - 1)); | |
| 770 | 896 | luma_filter[rnd() % luma_filter_size] = (1 << 13) - 1; | |
| 771 | |||
| 772 |
2/2✓ Branch 0 taken 8064 times.
✓ Branch 1 taken 896 times.
|
8960 | for (int i = 0; i < chr_filter_size; i++) |
| 773 | 8064 | chr_filter[i] = -((1 << 12) / (chr_filter_size - 1)); | |
| 774 | 896 | chr_filter[rnd() % chr_filter_size] = (1 << 13) - 1; | |
| 775 | |||
| 776 |
2/2✓ Branch 3 taken 64 times.
✓ Branch 4 taken 832 times.
|
896 | if (check_func(c->yuv2packedX, "yuv2%s_X_%d_%d", desc->name, luma_filter_size, INPUT_SIZE)) { |
| 777 | 64 | memset(dst0, 0xFF, INPUT_SIZE * sizeof(int32_t[4])); | |
| 778 | 64 | memset(dst1, 0xFF, INPUT_SIZE * sizeof(int32_t[4])); | |
| 779 | |||
| 780 | 64 | call_ref(c, luma_filter, luma, luma_filter_size, | |
| 781 | chr_filter, chru, chrv, chr_filter_size, | ||
| 782 | alpha, dst0, INPUT_SIZE, 0); | ||
| 783 | |||
| 784 | 64 | call_new(c, luma_filter, luma, luma_filter_size, | |
| 785 | chr_filter, chru, chrv, chr_filter_size, | ||
| 786 | alpha, dst1, INPUT_SIZE, 0); | ||
| 787 | |||
| 788 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 64 times.
|
64 | if (memcmp(dst0, dst1, line_size)) |
| 789 | ✗ | fail(); | |
| 790 | |||
| 791 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 64 times.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
|
64 | bench_new(c, luma_filter, luma, luma_filter_size, |
| 792 | chr_filter, chru, chrv, chr_filter_size, | ||
| 793 | alpha, dst1, INPUT_SIZE, 0); | ||
| 794 | } | ||
| 795 | } | ||
| 796 | |||
| 797 | 448 | sws_freeContext(sws); | |
| 798 | } | ||
| 799 | 14 | } | |
| 800 | |||
| 801 | #undef INPUT_SIZE | ||
| 802 | #undef LARGEST_FILTER | ||
| 803 | |||
| 804 | 14 | void checkasm_check_sw_rgb(void) | |
| 805 | { | ||
| 806 | SwsContext *sws; | ||
| 807 | |||
| 808 | 14 | ff_sws_rgb2rgb_init(); | |
| 809 | |||
| 810 | 14 | check_shuffle_bytes(shuffle_bytes_2103, "shuffle_bytes_2103"); | |
| 811 | 14 | report("shuffle_bytes_2103"); | |
| 812 | |||
| 813 | 14 | check_shuffle_bytes(shuffle_bytes_0321, "shuffle_bytes_0321"); | |
| 814 | 14 | report("shuffle_bytes_0321"); | |
| 815 | |||
| 816 | 14 | check_shuffle_bytes(shuffle_bytes_1230, "shuffle_bytes_1230"); | |
| 817 | 14 | report("shuffle_bytes_1230"); | |
| 818 | |||
| 819 | 14 | check_shuffle_bytes(shuffle_bytes_3012, "shuffle_bytes_3012"); | |
| 820 | 14 | report("shuffle_bytes_3012"); | |
| 821 | |||
| 822 | 14 | check_shuffle_bytes(shuffle_bytes_3210, "shuffle_bytes_3210"); | |
| 823 | 14 | report("shuffle_bytes_3210"); | |
| 824 | |||
| 825 | 14 | check_shuffle_bytes(shuffle_bytes_3102, "shuffle_bytes_3102"); | |
| 826 | 14 | report("shuffle_bytes_3102"); | |
| 827 | |||
| 828 | 14 | check_shuffle_bytes(shuffle_bytes_2013, "shuffle_bytes_2013"); | |
| 829 | 14 | report("shuffle_bytes_2013"); | |
| 830 | |||
| 831 | 14 | check_shuffle_bytes(shuffle_bytes_1203, "shuffle_bytes_1203"); | |
| 832 | 14 | report("shuffle_bytes_1203"); | |
| 833 | |||
| 834 | 14 | check_shuffle_bytes(shuffle_bytes_2130, "shuffle_bytes_2130"); | |
| 835 | 14 | report("shuffle_bytes_2130"); | |
| 836 | |||
| 837 | { | ||
| 838 | /* rgb24tobgr24 operates on 3-byte pixels, so test widths must be | ||
| 839 | * multiples of 3 to avoid reading past the source buffer. */ | ||
| 840 | static const int rgb24_width[] = {3, 12, 24, 36, 48, 126, 1920 * 3}; | ||
| 841 | int i; | ||
| 842 | #define RGB24_BENCH_WIDTH (1920 * 3) | ||
| 843 | 14 | LOCAL_ALIGNED_32(uint8_t, src0, [RGB24_BENCH_WIDTH]); | |
| 844 | 14 | LOCAL_ALIGNED_32(uint8_t, src1, [RGB24_BENCH_WIDTH]); | |
| 845 | 14 | LOCAL_ALIGNED_32(uint8_t, dst0, [RGB24_BENCH_WIDTH]); | |
| 846 | 14 | LOCAL_ALIGNED_32(uint8_t, dst1, [RGB24_BENCH_WIDTH]); | |
| 847 | |||
| 848 | 14 | declare_func(void, const uint8_t *src, uint8_t *dst, int src_size); | |
| 849 | |||
| 850 | 14 | memset(dst0, 0, RGB24_BENCH_WIDTH); | |
| 851 | 14 | memset(dst1, 0, RGB24_BENCH_WIDTH); | |
| 852 |
2/2✓ Branch 1 taken 20160 times.
✓ Branch 2 taken 14 times.
|
20174 | randomize_buffers(src0, RGB24_BENCH_WIDTH); |
| 853 | 14 | memcpy(src1, src0, RGB24_BENCH_WIDTH); | |
| 854 | |||
| 855 |
2/2✓ Branch 3 taken 2 times.
✓ Branch 4 taken 12 times.
|
14 | if (check_func(rgb24tobgr24, "rgb24tobgr24")) { |
| 856 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
|
16 | for (i = 0; i < FF_ARRAY_ELEMS(rgb24_width); i++) { |
| 857 | 14 | call_ref(src0, dst0, rgb24_width[i]); | |
| 858 | 14 | call_new(src1, dst1, rgb24_width[i]); | |
| 859 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 14 times.
|
14 | if (memcmp(dst0, dst1, rgb24_width[i])) |
| 860 | ✗ | fail(); | |
| 861 | } | ||
| 862 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
|
2 | bench_new(src0, dst0, RGB24_BENCH_WIDTH); |
| 863 | } | ||
| 864 | #undef RGB24_BENCH_WIDTH | ||
| 865 | } | ||
| 866 | 14 | report("rgb24tobgr24"); | |
| 867 | |||
| 868 | { | ||
| 869 | /* rgb32tobgr24: 4-byte pixels → 3-byte pixels. | ||
| 870 | * Test widths must be multiples of 4 (one pixel). | ||
| 871 | * Sizes chosen to exercise each codepath tier: | ||
| 872 | * 4 = scalar only (1 pixel) | ||
| 873 | * 16 = scalar only (4 pixels, loop iteration) | ||
| 874 | * 32 = medium only | ||
| 875 | * 48 = medium + scalar | ||
| 876 | * 64 = fast only (exact) | ||
| 877 | * 68 = fast + scalar (skip medium) | ||
| 878 | * 100 = fast + medium + scalar (all tiers) | ||
| 879 | * 128 = fast only (multi-iteration) | ||
| 880 | * 1920*4 = fast only (benchmark width) | ||
| 881 | */ | ||
| 882 | static const int rgb32_widths[] = {4, 16, 32, 48, 64, 68, 100, 128, 1920 * 4}; | ||
| 883 | #define RGB32_BENCH_WIDTH (1920 * 4) | ||
| 884 | #define RGB32_DST_SIZE (RGB32_BENCH_WIDTH * 3 / 4 + 8) | ||
| 885 | 14 | LOCAL_ALIGNED_32(uint8_t, src0, [RGB32_BENCH_WIDTH]); | |
| 886 | 14 | LOCAL_ALIGNED_32(uint8_t, src1, [RGB32_BENCH_WIDTH]); | |
| 887 | 14 | LOCAL_ALIGNED_32(uint8_t, dst0, [RGB32_DST_SIZE]); | |
| 888 | 14 | LOCAL_ALIGNED_32(uint8_t, dst1, [RGB32_DST_SIZE]); | |
| 889 | |||
| 890 | 14 | declare_func(void, const uint8_t *src, uint8_t *dst, int src_size); | |
| 891 | |||
| 892 |
2/2✓ Branch 1 taken 26880 times.
✓ Branch 2 taken 14 times.
|
26894 | randomize_buffers(src0, RGB32_BENCH_WIDTH); |
| 893 | 14 | memcpy(src1, src0, RGB32_BENCH_WIDTH); | |
| 894 | |||
| 895 |
2/2✓ Branch 3 taken 2 times.
✓ Branch 4 taken 12 times.
|
14 | if (check_func(rgb32tobgr24, "rgb32tobgr24")) { |
| 896 |
2/2✓ Branch 0 taken 18 times.
✓ Branch 1 taken 2 times.
|
20 | for (int i = 0; i < FF_ARRAY_ELEMS(rgb32_widths); i++) { |
| 897 | 18 | int out_size = rgb32_widths[i] * 3 / 4; | |
| 898 | 18 | memset(dst0, 0xAA, RGB32_DST_SIZE); | |
| 899 | 18 | memset(dst1, 0xAA, RGB32_DST_SIZE); | |
| 900 | 18 | call_ref(src0, dst0, rgb32_widths[i]); | |
| 901 | 18 | call_new(src1, dst1, rgb32_widths[i]); | |
| 902 |
1/2✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
|
18 | if (memcmp(dst0, dst1, out_size) || |
| 903 |
1/2✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
|
18 | dst0[out_size] != 0xAA || |
| 904 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 18 times.
|
18 | dst1[out_size] != 0xAA) |
| 905 | ✗ | fail(); | |
| 906 | } | ||
| 907 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
|
2 | bench_new(src0, dst0, RGB32_BENCH_WIDTH); |
| 908 | } | ||
| 909 | #undef RGB32_DST_SIZE | ||
| 910 | #undef RGB32_BENCH_WIDTH | ||
| 911 | } | ||
| 912 | 14 | report("rgb32tobgr24"); | |
| 913 | |||
| 914 | { | ||
| 915 | /* rgb24tobgr32: 3-byte pixels → 4-byte pixels. | ||
| 916 | * Test widths must be multiples of 3 (one pixel). | ||
| 917 | * Sizes chosen to exercise each codepath tier: | ||
| 918 | * 3 = scalar only (1 pixel) | ||
| 919 | * 12 = scalar only (4 pixels, loop iteration) | ||
| 920 | * 24 = medium only | ||
| 921 | * 36 = medium + scalar | ||
| 922 | * 48 = fast only (exact) | ||
| 923 | * 51 = fast + scalar (skip medium) | ||
| 924 | * 126 = fast + medium + scalar (all tiers) | ||
| 925 | * 1920*3 = fast only (benchmark width) | ||
| 926 | */ | ||
| 927 | static const int rgb24to32_widths[] = {3, 12, 24, 36, 48, 51, 126, 1920 * 3}; | ||
| 928 | #define RGB24TO32_BENCH_WIDTH (1920 * 3) | ||
| 929 | #define RGB24TO32_DST_SIZE (RGB24TO32_BENCH_WIDTH * 4 / 3 + 8) | ||
| 930 | 14 | LOCAL_ALIGNED_32(uint8_t, src0, [RGB24TO32_BENCH_WIDTH]); | |
| 931 | 14 | LOCAL_ALIGNED_32(uint8_t, src1, [RGB24TO32_BENCH_WIDTH]); | |
| 932 | 14 | LOCAL_ALIGNED_32(uint8_t, dst0, [RGB24TO32_DST_SIZE]); | |
| 933 | 14 | LOCAL_ALIGNED_32(uint8_t, dst1, [RGB24TO32_DST_SIZE]); | |
| 934 | |||
| 935 | 14 | declare_func(void, const uint8_t *src, uint8_t *dst, int src_size); | |
| 936 | |||
| 937 |
2/2✓ Branch 1 taken 20160 times.
✓ Branch 2 taken 14 times.
|
20174 | randomize_buffers(src0, RGB24TO32_BENCH_WIDTH); |
| 938 | 14 | memcpy(src1, src0, RGB24TO32_BENCH_WIDTH); | |
| 939 | |||
| 940 |
2/2✓ Branch 3 taken 2 times.
✓ Branch 4 taken 12 times.
|
14 | if (check_func(rgb24tobgr32, "rgb24tobgr32")) { |
| 941 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 2 times.
|
18 | for (int i = 0; i < FF_ARRAY_ELEMS(rgb24to32_widths); i++) { |
| 942 | 16 | int out_size = rgb24to32_widths[i] * 4 / 3; | |
| 943 | 16 | memset(dst0, 0xAA, RGB24TO32_DST_SIZE); | |
| 944 | 16 | memset(dst1, 0xAA, RGB24TO32_DST_SIZE); | |
| 945 | 16 | call_ref(src0, dst0, rgb24to32_widths[i]); | |
| 946 | 16 | call_new(src1, dst1, rgb24to32_widths[i]); | |
| 947 |
1/2✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
|
16 | if (memcmp(dst0, dst1, out_size) || |
| 948 |
1/2✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
|
16 | dst0[out_size] != 0xAA || |
| 949 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
|
16 | dst1[out_size] != 0xAA) |
| 950 | ✗ | fail(); | |
| 951 | } | ||
| 952 |
1/8✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
|
2 | bench_new(src0, dst0, RGB24TO32_BENCH_WIDTH); |
| 953 | } | ||
| 954 | #undef RGB24TO32_DST_SIZE | ||
| 955 | #undef RGB24TO32_BENCH_WIDTH | ||
| 956 | } | ||
| 957 | 14 | report("rgb24tobgr32"); | |
| 958 | |||
| 959 | 14 | check_uyvy_to_422p(); | |
| 960 | 14 | report("uyvytoyuv422"); | |
| 961 | |||
| 962 | 14 | check_interleave_bytes(); | |
| 963 | 14 | report("interleave_bytes"); | |
| 964 | |||
| 965 | 14 | check_deinterleave_bytes(); | |
| 966 | 14 | report("deinterleave_bytes"); | |
| 967 | |||
| 968 | 14 | sws = sws_getContext(MAX_LINE_SIZE, MAX_LINE_SIZE, AV_PIX_FMT_RGB24, | |
| 969 | MAX_LINE_SIZE, MAX_LINE_SIZE, AV_PIX_FMT_YUV420P, | ||
| 970 | SWS_ACCURATE_RND | SWS_BITEXACT, NULL, NULL, NULL); | ||
| 971 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 14 times.
|
14 | if (!sws) |
| 972 | ✗ | fail(); | |
| 973 | |||
| 974 | 14 | check_rgb_to_y(sws); | |
| 975 | 14 | report("rgb_to_y"); | |
| 976 | |||
| 977 | 14 | check_rgb_to_uv(sws); | |
| 978 | 14 | report("rgb_to_uv"); | |
| 979 | |||
| 980 | 14 | check_rgba_to_a(sws); | |
| 981 | 14 | report("rgba_to_a"); | |
| 982 | |||
| 983 | 14 | check_rgb24toyv12(sws); | |
| 984 | 14 | report("rgb24toyv12"); | |
| 985 | |||
| 986 | 14 | sws_freeContext(sws); | |
| 987 | |||
| 988 | 14 | check_yuv2packed1(); | |
| 989 | 14 | report("yuv2packed1"); | |
| 990 | |||
| 991 | 14 | check_yuv2packed2(); | |
| 992 | 14 | report("yuv2packed2"); | |
| 993 | |||
| 994 | 14 | check_yuv2packedX(); | |
| 995 | 14 | report("yuv2packedX"); | |
| 996 | 14 | } | |
| 997 |