| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /** | ||
| 2 | * Copyright (C) 2025 Niklas Haas | ||
| 3 | * | ||
| 4 | * This file is part of FFmpeg. | ||
| 5 | * | ||
| 6 | * FFmpeg is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU Lesser General Public | ||
| 8 | * License as published by the Free Software Foundation; either | ||
| 9 | * version 2.1 of the License, or (at your option) any later version. | ||
| 10 | * | ||
| 11 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 14 | * Lesser General Public License for more details. | ||
| 15 | * | ||
| 16 | * You should have received a copy of the GNU Lesser General Public | ||
| 17 | * License along with FFmpeg; if not, write to the Free Software | ||
| 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 19 | */ | ||
| 20 | |||
| 21 | #include "libavutil/avassert.h" | ||
| 22 | #include "libavutil/bswap.h" | ||
| 23 | #include "libavutil/rational.h" | ||
| 24 | |||
| 25 | #include "ops.h" | ||
| 26 | #include "ops_internal.h" | ||
| 27 | |||
| 28 | #define RET(x) \ | ||
| 29 | do { \ | ||
| 30 | if ((ret = (x)) < 0) \ | ||
| 31 | return ret; \ | ||
| 32 | } while (0) | ||
| 33 | |||
| 34 | /** | ||
| 35 | * Try to commute a clear op with the next operation. Makes any adjustments | ||
| 36 | * to the operations as needed, but does not perform the actual commutation. | ||
| 37 | * | ||
| 38 | * Returns whether successful. | ||
| 39 | */ | ||
| 40 | 51904 | static bool op_commute_clear(SwsOp *op, SwsOp *next) | |
| 41 | { | ||
| 42 | SwsOp tmp; | ||
| 43 | |||
| 44 | av_assert1(op->op == SWS_OP_CLEAR); | ||
| 45 |
4/6✓ Branch 0 taken 9120 times.
✓ Branch 1 taken 15310 times.
✓ Branch 2 taken 3529 times.
✓ Branch 3 taken 23945 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
|
51904 | switch (next->op) { |
| 46 | 9120 | case SWS_OP_CONVERT: | |
| 47 | 9120 | op->type = next->convert.to; | |
| 48 | /* fall through */ | ||
| 49 | 24430 | case SWS_OP_LSHIFT: | |
| 50 | case SWS_OP_RSHIFT: | ||
| 51 | case SWS_OP_DITHER: | ||
| 52 | case SWS_OP_MIN: | ||
| 53 | case SWS_OP_MAX: | ||
| 54 | case SWS_OP_SCALE: | ||
| 55 | case SWS_OP_READ: | ||
| 56 | case SWS_OP_SWIZZLE: | ||
| 57 | 24430 | ff_sws_apply_op_q(next, op->c.q4); | |
| 58 | 24430 | return true; | |
| 59 | 3529 | case SWS_OP_SWAP_BYTES: | |
| 60 |
2/3✓ Branch 0 taken 2920 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 609 times.
|
3529 | switch (next->type) { |
| 61 | 2920 | case SWS_PIXEL_U16: | |
| 62 | 2920 | ff_sws_apply_op_q(next, op->c.q4); /* always works */ | |
| 63 | 2920 | return true; | |
| 64 | ✗ | case SWS_PIXEL_U32: | |
| 65 | ✗ | for (int i = 0; i < 4; i++) { | |
| 66 | ✗ | uint32_t v = av_bswap32(op->c.q4[i].num); | |
| 67 | ✗ | if (v > INT_MAX) | |
| 68 | ✗ | return false; /* can't represent as AVRational anymore */ | |
| 69 | ✗ | tmp.c.q4[i] = Q(v); | |
| 70 | } | ||
| 71 | ✗ | op->c = tmp.c; | |
| 72 | ✗ | return true; | |
| 73 | 609 | default: | |
| 74 | 609 | return false; | |
| 75 | } | ||
| 76 | 23945 | case SWS_OP_INVALID: | |
| 77 | case SWS_OP_WRITE: | ||
| 78 | case SWS_OP_LINEAR: | ||
| 79 | case SWS_OP_PACK: | ||
| 80 | case SWS_OP_UNPACK: | ||
| 81 | case SWS_OP_CLEAR: | ||
| 82 | 23945 | return false; | |
| 83 | ✗ | case SWS_OP_TYPE_NB: | |
| 84 | ✗ | break; | |
| 85 | } | ||
| 86 | |||
| 87 | ✗ | av_unreachable("Invalid operation type!"); | |
| 88 | return false; | ||
| 89 | } | ||
| 90 | |||
| 91 | /** | ||
| 92 | * Try to commute a swizzle op with the next operation. Makes any adjustments | ||
| 93 | * to the operations as needed, but does not perform the actual commutation. | ||
| 94 | * | ||
| 95 | * Returns whether successful. | ||
| 96 | */ | ||
| 97 | 60367 | static bool op_commute_swizzle(SwsOp *op, SwsOp *next) | |
| 98 | { | ||
| 99 | 60367 | bool seen[4] = {0}; | |
| 100 | |||
| 101 | av_assert1(op->op == SWS_OP_SWIZZLE); | ||
| 102 |
5/7✓ Branch 0 taken 19578 times.
✓ Branch 1 taken 11779 times.
✓ Branch 2 taken 3178 times.
✓ Branch 3 taken 2814 times.
✓ Branch 4 taken 23018 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
|
60367 | switch (next->op) { |
| 103 | 19578 | case SWS_OP_CONVERT: | |
| 104 | 19578 | op->type = next->convert.to; | |
| 105 | /* fall through */ | ||
| 106 | 31357 | case SWS_OP_SWAP_BYTES: | |
| 107 | case SWS_OP_LSHIFT: | ||
| 108 | case SWS_OP_RSHIFT: | ||
| 109 | case SWS_OP_SCALE: | ||
| 110 | 31357 | return true; | |
| 111 | |||
| 112 | /** | ||
| 113 | * We can commute per-channel ops only if the per-channel constants are the | ||
| 114 | * same for all duplicated channels; e.g.: | ||
| 115 | * SWIZZLE {0, 0, 0, 3} | ||
| 116 | * NEXT {x, x, x, w} | ||
| 117 | * -> | ||
| 118 | * NEXT {x, _, _, w} | ||
| 119 | * SWIZZLE {0, 0, 0, 3} | ||
| 120 | */ | ||
| 121 | 3178 | case SWS_OP_MIN: | |
| 122 | case SWS_OP_MAX: { | ||
| 123 | 3178 | const SwsConst c = next->c; | |
| 124 |
2/2✓ Branch 0 taken 12712 times.
✓ Branch 1 taken 3178 times.
|
15890 | for (int i = 0; i < 4; i++) { |
| 125 |
2/2✓ Branch 0 taken 2934 times.
✓ Branch 1 taken 9778 times.
|
12712 | if (next->comps.unused[i]) |
| 126 | 2934 | continue; | |
| 127 | 9778 | const int j = op->swizzle.in[i]; | |
| 128 |
3/4✓ Branch 0 taken 2124 times.
✓ Branch 1 taken 7654 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2124 times.
|
9778 | if (seen[j] && av_cmp_q(next->c.q4[j], c.q4[i])) |
| 129 | ✗ | return false; | |
| 130 | 9778 | next->c.q4[j] = c.q4[i]; | |
| 131 | 9778 | seen[j] = true; | |
| 132 | } | ||
| 133 | 3178 | return true; | |
| 134 | } | ||
| 135 | |||
| 136 | 2814 | case SWS_OP_DITHER: { | |
| 137 | 2814 | const SwsDitherOp d = next->dither; | |
| 138 |
2/2✓ Branch 0 taken 10440 times.
✓ Branch 1 taken 2406 times.
|
12846 | for (int i = 0; i < 4; i++) { |
| 139 |
2/2✓ Branch 0 taken 2260 times.
✓ Branch 1 taken 8180 times.
|
10440 | if (next->comps.unused[i]) |
| 140 | 2260 | continue; | |
| 141 | 8180 | const int j = op->swizzle.in[i]; | |
| 142 |
4/4✓ Branch 0 taken 1740 times.
✓ Branch 1 taken 6440 times.
✓ Branch 2 taken 408 times.
✓ Branch 3 taken 1332 times.
|
8180 | if (seen[j] && next->dither.y_offset[j] != d.y_offset[i]) |
| 143 | 408 | return false; | |
| 144 | 7772 | next->dither.y_offset[j] = d.y_offset[i]; | |
| 145 | 7772 | seen[j] = true; | |
| 146 | } | ||
| 147 | 2406 | return true; | |
| 148 | } | ||
| 149 | |||
| 150 | 23018 | case SWS_OP_INVALID: | |
| 151 | case SWS_OP_READ: | ||
| 152 | case SWS_OP_WRITE: | ||
| 153 | case SWS_OP_SWIZZLE: | ||
| 154 | case SWS_OP_CLEAR: | ||
| 155 | case SWS_OP_LINEAR: | ||
| 156 | case SWS_OP_PACK: | ||
| 157 | case SWS_OP_UNPACK: | ||
| 158 | 23018 | return false; | |
| 159 | ✗ | case SWS_OP_TYPE_NB: | |
| 160 | ✗ | break; | |
| 161 | } | ||
| 162 | |||
| 163 | ✗ | av_unreachable("Invalid operation type!"); | |
| 164 | return false; | ||
| 165 | } | ||
| 166 | |||
| 167 | /* returns log2(x) only if x is a power of two, or 0 otherwise */ | ||
| 168 | 39107 | static int exact_log2(const int x) | |
| 169 | { | ||
| 170 | int p; | ||
| 171 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 39107 times.
|
39107 | if (x <= 0) |
| 172 | ✗ | return 0; | |
| 173 | 39107 | p = av_log2(x); | |
| 174 |
2/2✓ Branch 0 taken 14607 times.
✓ Branch 1 taken 24500 times.
|
39107 | return (1 << p) == x ? p : 0; |
| 175 | } | ||
| 176 | |||
| 177 | 77966 | static int exact_log2_q(const AVRational x) | |
| 178 | { | ||
| 179 |
2/2✓ Branch 0 taken 21513 times.
✓ Branch 1 taken 56453 times.
|
77966 | if (x.den == 1) |
| 180 | 21513 | return exact_log2(x.num); | |
| 181 |
2/2✓ Branch 0 taken 17594 times.
✓ Branch 1 taken 38859 times.
|
56453 | else if (x.num == 1) |
| 182 | 17594 | return -exact_log2(x.den); | |
| 183 | else | ||
| 184 | 38859 | return 0; | |
| 185 | } | ||
| 186 | |||
| 187 | /** | ||
| 188 | * If a linear operation can be reduced to a scalar multiplication, returns | ||
| 189 | * the corresponding scaling factor, or 0 otherwise. | ||
| 190 | */ | ||
| 191 | 110683 | static bool extract_scalar(const SwsLinearOp *c, SwsComps prev, SwsComps next, | |
| 192 | SwsConst *out_scale) | ||
| 193 | { | ||
| 194 | 110683 | SwsConst scale = {0}; | |
| 195 | |||
| 196 | /* There are components not on the main diagonal */ | ||
| 197 |
2/2✓ Branch 0 taken 84166 times.
✓ Branch 1 taken 26517 times.
|
110683 | if (c->mask & ~SWS_MASK_DIAG4) |
| 198 | 84166 | return false; | |
| 199 | |||
| 200 |
2/2✓ Branch 0 taken 81106 times.
✓ Branch 1 taken 10262 times.
|
91368 | for (int i = 0; i < 4; i++) { |
| 201 | 81106 | const AVRational s = c->m[i][i]; | |
| 202 |
3/4✓ Branch 0 taken 81106 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 10702 times.
✓ Branch 3 taken 70404 times.
|
81106 | if ((prev.flags[i] & SWS_COMP_ZERO) || next.unused[i]) |
| 203 | 10702 | continue; | |
| 204 |
4/4✓ Branch 0 taken 43887 times.
✓ Branch 1 taken 26517 times.
✓ Branch 3 taken 16255 times.
✓ Branch 4 taken 27632 times.
|
70404 | if (scale.q.den && av_cmp_q(s, scale.q)) |
| 205 | 16255 | return false; | |
| 206 | 54149 | scale.q = s; | |
| 207 | } | ||
| 208 | |||
| 209 |
1/2✓ Branch 0 taken 10262 times.
✗ Branch 1 not taken.
|
10262 | if (scale.q.den) |
| 210 | 10262 | *out_scale = scale; | |
| 211 | 10262 | return scale.q.den; | |
| 212 | } | ||
| 213 | |||
| 214 | /* Extracts an integer clear operation (subset) from the given linear op. */ | ||
| 215 | 119429 | static bool extract_constant_rows(SwsLinearOp *c, SwsComps prev, | |
| 216 | SwsConst *out_clear) | ||
| 217 | { | ||
| 218 | 119429 | SwsConst clear = {0}; | |
| 219 | 119429 | bool ret = false; | |
| 220 | |||
| 221 |
2/2✓ Branch 0 taken 477716 times.
✓ Branch 1 taken 119429 times.
|
597145 | for (int i = 0; i < 4; i++) { |
| 222 | 477716 | bool const_row = c->m[i][4].den == 1; /* offset is integer */ | |
| 223 |
2/2✓ Branch 0 taken 1910864 times.
✓ Branch 1 taken 477716 times.
|
2388580 | for (int j = 0; j < 4; j++) { |
| 224 |
2/2✓ Branch 0 taken 788502 times.
✓ Branch 1 taken 1122362 times.
|
2699366 | const_row &= c->m[i][j].num == 0 || /* scalar is zero */ |
| 225 |
2/2✓ Branch 0 taken 17994 times.
✓ Branch 1 taken 770508 times.
|
788502 | (prev.flags[j] & SWS_COMP_ZERO); /* input is zero */ |
| 226 | } | ||
| 227 |
4/4✓ Branch 0 taken 11430 times.
✓ Branch 1 taken 466286 times.
✓ Branch 2 taken 10740 times.
✓ Branch 3 taken 690 times.
|
477716 | if (const_row && (c->mask & SWS_MASK_ROW(i))) { |
| 228 | 10740 | clear.q4[i] = c->m[i][4]; | |
| 229 |
2/2✓ Branch 0 taken 53700 times.
✓ Branch 1 taken 10740 times.
|
64440 | for (int j = 0; j < 5; j++) |
| 230 | 53700 | c->m[i][j] = Q(i == j); | |
| 231 | 10740 | c->mask &= ~SWS_MASK_ROW(i); | |
| 232 | 10740 | ret = true; | |
| 233 | } | ||
| 234 | } | ||
| 235 | |||
| 236 |
2/2✓ Branch 0 taken 8746 times.
✓ Branch 1 taken 110683 times.
|
119429 | if (ret) |
| 237 | 8746 | *out_clear = clear; | |
| 238 | 119429 | return ret; | |
| 239 | } | ||
| 240 | |||
| 241 | /* Unswizzle a linear operation by aligning single-input rows with | ||
| 242 | * their corresponding diagonal */ | ||
| 243 | 100421 | static bool extract_swizzle(SwsLinearOp *op, SwsComps prev, SwsSwizzleOp *out_swiz) | |
| 244 | { | ||
| 245 | 100421 | SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3); | |
| 246 | 100421 | SwsLinearOp c = *op; | |
| 247 | |||
| 248 | /* Find non-zero coefficients in the main 4x4 matrix */ | ||
| 249 | 100421 | uint32_t nonzero = 0; | |
| 250 |
2/2✓ Branch 0 taken 401684 times.
✓ Branch 1 taken 100421 times.
|
502105 | for (int i = 0; i < 4; i++) { |
| 251 |
2/2✓ Branch 0 taken 1606736 times.
✓ Branch 1 taken 401684 times.
|
2008420 | for (int j = 0; j < 4; j++) { |
| 252 |
4/4✓ Branch 0 taken 695866 times.
✓ Branch 1 taken 910870 times.
✓ Branch 2 taken 5874 times.
✓ Branch 3 taken 689992 times.
|
1606736 | if (!c.m[i][j].num || (prev.flags[j] & SWS_COMP_ZERO)) |
| 253 | 916744 | continue; | |
| 254 | 689992 | nonzero |= SWS_MASK(i, j); | |
| 255 | } | ||
| 256 | } | ||
| 257 | |||
| 258 | /* If a value is unique in its row and the target column is | ||
| 259 | * empty, move it there and update the input swizzle */ | ||
| 260 |
2/2✓ Branch 0 taken 401684 times.
✓ Branch 1 taken 100421 times.
|
502105 | for (int i = 0; i < 4; i++) { |
| 261 |
2/2✓ Branch 0 taken 395810 times.
✓ Branch 1 taken 5874 times.
|
401684 | if (nonzero & SWS_MASK_COL(i)) |
| 262 | 395810 | continue; /* target column is not empty */ | |
| 263 |
2/2✓ Branch 0 taken 7944 times.
✓ Branch 1 taken 690 times.
|
8634 | for (int j = 0; j < 4; j++) { |
| 264 |
2/2✓ Branch 0 taken 5184 times.
✓ Branch 1 taken 2760 times.
|
7944 | if ((nonzero & SWS_MASK_ROW(i)) == SWS_MASK(i, j)) { |
| 265 | /* Move coefficient to the diagonal */ | ||
| 266 | 5184 | c.m[i][i] = c.m[i][j]; | |
| 267 | 5184 | c.m[i][j] = Q(0); | |
| 268 | 5184 | swiz.in[i] = j; | |
| 269 | 5184 | break; | |
| 270 | } | ||
| 271 | } | ||
| 272 | } | ||
| 273 | |||
| 274 |
2/2✓ Branch 0 taken 97829 times.
✓ Branch 1 taken 2592 times.
|
100421 | if (swiz.mask == SWS_SWIZZLE(0, 1, 2, 3).mask) |
| 275 | 97829 | return false; /* no swizzle was identified */ | |
| 276 | |||
| 277 | 2592 | c.mask = ff_sws_linear_mask(c); | |
| 278 | 2592 | *out_swiz = swiz; | |
| 279 | 2592 | *op = c; | |
| 280 | 2592 | return true; | |
| 281 | } | ||
| 282 | |||
| 283 | 31704 | int ff_sws_op_list_optimize(SwsOpList *ops) | |
| 284 | { | ||
| 285 | int ret; | ||
| 286 | |||
| 287 | 369071 | retry: | |
| 288 | 400775 | ff_sws_op_list_update_comps(ops); | |
| 289 | |||
| 290 | /* Apply all in-place optimizations (that do not re-order the list) */ | ||
| 291 |
2/2✓ Branch 0 taken 2295687 times.
✓ Branch 1 taken 97819 times.
|
2393506 | for (int n = 0; n < ops->num_ops; n++) { |
| 292 | 2295687 | SwsOp dummy = {0}; | |
| 293 | 2295687 | SwsOp *op = &ops->ops[n]; | |
| 294 |
2/2✓ Branch 0 taken 1894912 times.
✓ Branch 1 taken 400775 times.
|
2295687 | SwsOp *prev = n ? &ops->ops[n - 1] : &dummy; |
| 295 |
2/2✓ Branch 0 taken 2197868 times.
✓ Branch 1 taken 97819 times.
|
2295687 | SwsOp *next = n + 1 < ops->num_ops ? &ops->ops[n + 1] : &dummy; |
| 296 | |||
| 297 | /* common helper variable */ | ||
| 298 | 2295687 | bool noop = true; | |
| 299 | |||
| 300 |
4/4✓ Branch 0 taken 76349 times.
✓ Branch 1 taken 2219338 times.
✓ Branch 2 taken 2030 times.
✓ Branch 3 taken 74319 times.
|
2295687 | if (next->comps.unused[0] && next->comps.unused[1] && |
| 301 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 2030 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
2030 | next->comps.unused[2] && next->comps.unused[3]) |
| 302 | { | ||
| 303 | /* Remove completely unused operations */ | ||
| 304 | ✗ | ff_sws_op_list_remove_at(ops, n, 1); | |
| 305 | 302956 | goto retry; | |
| 306 | } | ||
| 307 | |||
| 308 |
13/13✓ Branch 0 taken 400775 times.
✓ Branch 1 taken 191954 times.
✓ Branch 2 taken 74731 times.
✓ Branch 3 taken 42882 times.
✓ Branch 4 taken 163399 times.
✓ Branch 5 taken 279539 times.
✓ Branch 6 taken 479592 times.
✓ Branch 7 taken 88800 times.
✓ Branch 8 taken 56308 times.
✓ Branch 9 taken 98471 times.
✓ Branch 10 taken 224401 times.
✓ Branch 11 taken 77966 times.
✓ Branch 12 taken 116869 times.
|
2295687 | switch (op->op) { |
| 309 | 400775 | case SWS_OP_READ: | |
| 310 | /* "Compress" planar reads where not all components are needed */ | ||
| 311 |
2/2✓ Branch 0 taken 221889 times.
✓ Branch 1 taken 178886 times.
|
400775 | if (!op->rw.packed) { |
| 312 | 221889 | SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3); | |
| 313 | 221889 | int nb_planes = 0; | |
| 314 |
2/2✓ Branch 0 taken 546443 times.
✓ Branch 1 taken 221889 times.
|
768332 | for (int i = 0; i < op->rw.elems; i++) { |
| 315 |
2/2✓ Branch 0 taken 5180 times.
✓ Branch 1 taken 541263 times.
|
546443 | if (next->comps.unused[i]) { |
| 316 | 5180 | swiz.in[i] = 3 - (i - nb_planes); /* map to unused plane */ | |
| 317 | 5180 | continue; | |
| 318 | } | ||
| 319 | |||
| 320 | 541263 | const int idx = nb_planes++; | |
| 321 | av_assert1(idx <= i); | ||
| 322 | 541263 | ops->order_src.in[idx] = ops->order_src.in[i]; | |
| 323 | 541263 | swiz.in[i] = idx; | |
| 324 | } | ||
| 325 | |||
| 326 |
2/2✓ Branch 0 taken 4230 times.
✓ Branch 1 taken 217659 times.
|
221889 | if (nb_planes < op->rw.elems) { |
| 327 | 4230 | op->rw.elems = nb_planes; | |
| 328 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 4230 times.
|
4230 | RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) { |
| 329 | .op = SWS_OP_SWIZZLE, | ||
| 330 | .type = op->type, | ||
| 331 | .swizzle = swiz, | ||
| 332 | })); | ||
| 333 | 4230 | goto retry; | |
| 334 | } | ||
| 335 | } | ||
| 336 | 396545 | break; | |
| 337 | |||
| 338 | 191954 | case SWS_OP_SWAP_BYTES: | |
| 339 | /* Redundant (double) swap */ | ||
| 340 |
2/2✓ Branch 0 taken 209 times.
✓ Branch 1 taken 191745 times.
|
191954 | if (next->op == SWS_OP_SWAP_BYTES) { |
| 341 | 209 | ff_sws_op_list_remove_at(ops, n, 2); | |
| 342 | 209 | goto retry; | |
| 343 | } | ||
| 344 | 191745 | break; | |
| 345 | |||
| 346 | 74731 | case SWS_OP_UNPACK: | |
| 347 | /* Redundant unpack+pack */ | ||
| 348 |
3/4✓ Branch 0 taken 46 times.
✓ Branch 1 taken 74685 times.
✓ Branch 2 taken 46 times.
✗ Branch 3 not taken.
|
74731 | if (next->op == SWS_OP_PACK && next->type == op->type && |
| 349 |
1/2✓ Branch 0 taken 46 times.
✗ Branch 1 not taken.
|
46 | next->pack.pattern[0] == op->pack.pattern[0] && |
| 350 |
1/2✓ Branch 0 taken 46 times.
✗ Branch 1 not taken.
|
46 | next->pack.pattern[1] == op->pack.pattern[1] && |
| 351 |
1/2✓ Branch 0 taken 46 times.
✗ Branch 1 not taken.
|
46 | next->pack.pattern[2] == op->pack.pattern[2] && |
| 352 |
1/2✓ Branch 0 taken 46 times.
✗ Branch 1 not taken.
|
46 | next->pack.pattern[3] == op->pack.pattern[3]) |
| 353 | { | ||
| 354 | 46 | ff_sws_op_list_remove_at(ops, n, 2); | |
| 355 | 46 | goto retry; | |
| 356 | } | ||
| 357 | 74685 | break; | |
| 358 | |||
| 359 | 42882 | case SWS_OP_LSHIFT: | |
| 360 | case SWS_OP_RSHIFT: | ||
| 361 | /* Two shifts in the same direction */ | ||
| 362 |
2/2✓ Branch 0 taken 204 times.
✓ Branch 1 taken 42678 times.
|
42882 | if (next->op == op->op) { |
| 363 | 204 | op->c.u += next->c.u; | |
| 364 | 204 | ff_sws_op_list_remove_at(ops, n + 1, 1); | |
| 365 | 204 | goto retry; | |
| 366 | } | ||
| 367 | |||
| 368 | /* No-op shift */ | ||
| 369 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 42678 times.
|
42678 | if (!op->c.u) { |
| 370 | ✗ | ff_sws_op_list_remove_at(ops, n, 1); | |
| 371 | ✗ | goto retry; | |
| 372 | } | ||
| 373 | 42678 | break; | |
| 374 | |||
| 375 | 163399 | case SWS_OP_CLEAR: | |
| 376 |
2/2✓ Branch 0 taken 653596 times.
✓ Branch 1 taken 163399 times.
|
816995 | for (int i = 0; i < 4; i++) { |
| 377 |
2/2✓ Branch 0 taken 437815 times.
✓ Branch 1 taken 215781 times.
|
653596 | if (!op->c.q4[i].den) |
| 378 | 437815 | continue; | |
| 379 | |||
| 380 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 215781 times.
|
215781 | if ((prev->comps.flags[i] & SWS_COMP_ZERO) && |
| 381 | ✗ | !(prev->comps.flags[i] & SWS_COMP_GARBAGE) && | |
| 382 | ✗ | op->c.q4[i].num == 0) | |
| 383 | { | ||
| 384 | /* Redundant clear-to-zero of zero component */ | ||
| 385 | ✗ | op->c.q4[i].den = 0; | |
| 386 |
2/2✓ Branch 0 taken 31704 times.
✓ Branch 1 taken 184077 times.
|
215781 | } else if (next->comps.unused[i]) { |
| 387 | /* Unnecessary clear of unused component */ | ||
| 388 | 31704 | op->c.q4[i] = (AVRational) {0, 0}; | |
| 389 |
1/2✓ Branch 0 taken 184077 times.
✗ Branch 1 not taken.
|
184077 | } else if (op->c.q4[i].den) { |
| 390 | 184077 | noop = false; | |
| 391 | } | ||
| 392 | } | ||
| 393 | |||
| 394 |
2/2✓ Branch 0 taken 31704 times.
✓ Branch 1 taken 131695 times.
|
163399 | if (noop) { |
| 395 | 31704 | ff_sws_op_list_remove_at(ops, n, 1); | |
| 396 | 31704 | goto retry; | |
| 397 | } | ||
| 398 | |||
| 399 | /* Transitive clear */ | ||
| 400 |
2/2✓ Branch 0 taken 190 times.
✓ Branch 1 taken 131505 times.
|
131695 | if (next->op == SWS_OP_CLEAR) { |
| 401 |
2/2✓ Branch 0 taken 760 times.
✓ Branch 1 taken 190 times.
|
950 | for (int i = 0; i < 4; i++) { |
| 402 |
2/2✓ Branch 0 taken 190 times.
✓ Branch 1 taken 570 times.
|
760 | if (next->c.q4[i].den) |
| 403 | 190 | op->c.q4[i] = next->c.q4[i]; | |
| 404 | } | ||
| 405 | 190 | ff_sws_op_list_remove_at(ops, n + 1, 1); | |
| 406 | 190 | goto retry; | |
| 407 | } | ||
| 408 | 131505 | break; | |
| 409 | |||
| 410 | 279539 | case SWS_OP_SWIZZLE: | |
| 411 |
2/2✓ Branch 0 taken 1118156 times.
✓ Branch 1 taken 279539 times.
|
1397695 | for (int i = 0; i < 4; i++) { |
| 412 |
2/2✓ Branch 0 taken 274964 times.
✓ Branch 1 taken 843192 times.
|
1118156 | if (next->comps.unused[i]) |
| 413 | 274964 | continue; | |
| 414 |
2/2✓ Branch 0 taken 577097 times.
✓ Branch 1 taken 266095 times.
|
843192 | if (op->swizzle.in[i] != i) |
| 415 | 577097 | noop = false; | |
| 416 | } | ||
| 417 | |||
| 418 | /* Identity swizzle */ | ||
| 419 |
2/2✓ Branch 0 taken 45109 times.
✓ Branch 1 taken 234430 times.
|
279539 | if (noop) { |
| 420 | 45109 | ff_sws_op_list_remove_at(ops, n, 1); | |
| 421 | 45109 | goto retry; | |
| 422 | } | ||
| 423 | |||
| 424 | /* Transitive swizzle */ | ||
| 425 |
2/2✓ Branch 0 taken 3586 times.
✓ Branch 1 taken 230844 times.
|
234430 | if (next->op == SWS_OP_SWIZZLE) { |
| 426 | 3586 | const SwsSwizzleOp orig = op->swizzle; | |
| 427 |
2/2✓ Branch 0 taken 14344 times.
✓ Branch 1 taken 3586 times.
|
17930 | for (int i = 0; i < 4; i++) |
| 428 | 14344 | op->swizzle.in[i] = orig.in[next->swizzle.in[i]]; | |
| 429 | 3586 | ff_sws_op_list_remove_at(ops, n + 1, 1); | |
| 430 | 3586 | goto retry; | |
| 431 | } | ||
| 432 | |||
| 433 | /* Swizzle planes instead of components, if possible */ | ||
| 434 |
4/4✓ Branch 0 taken 49443 times.
✓ Branch 1 taken 181401 times.
✓ Branch 2 taken 8896 times.
✓ Branch 3 taken 40547 times.
|
230844 | if (prev->op == SWS_OP_READ && !prev->rw.packed) { |
| 435 |
2/2✓ Branch 0 taken 13178 times.
✓ Branch 1 taken 1090 times.
|
14268 | for (int dst = 0; dst < prev->rw.elems; dst++) { |
| 436 | 13178 | const int src = op->swizzle.in[dst]; | |
| 437 |
4/4✓ Branch 0 taken 8180 times.
✓ Branch 1 taken 4998 times.
✓ Branch 2 taken 7806 times.
✓ Branch 3 taken 374 times.
|
13178 | if (src > dst && src < prev->rw.elems) { |
| 438 | 7806 | FFSWAP(int, ops->order_src.in[dst], ops->order_src.in[src]); | |
| 439 |
2/2✓ Branch 0 taken 27330 times.
✓ Branch 1 taken 7806 times.
|
35136 | for (int i = dst; i < 4; i++) { |
| 440 |
2/2✓ Branch 0 taken 7806 times.
✓ Branch 1 taken 19524 times.
|
27330 | if (op->swizzle.in[i] == dst) |
| 441 | 7806 | op->swizzle.in[i] = src; | |
| 442 |
2/2✓ Branch 0 taken 7806 times.
✓ Branch 1 taken 11718 times.
|
19524 | else if (op->swizzle.in[i] == src) |
| 443 | 7806 | op->swizzle.in[i] = dst; | |
| 444 | } | ||
| 445 | 7806 | goto retry; | |
| 446 | } | ||
| 447 | } | ||
| 448 | } | ||
| 449 | |||
| 450 |
4/4✓ Branch 0 taken 25297 times.
✓ Branch 1 taken 197741 times.
✓ Branch 2 taken 13413 times.
✓ Branch 3 taken 11884 times.
|
223038 | if (next->op == SWS_OP_WRITE && !next->rw.packed) { |
| 451 |
2/2✓ Branch 0 taken 15805 times.
✓ Branch 1 taken 1118 times.
|
16923 | for (int dst = 0; dst < next->rw.elems; dst++) { |
| 452 | 15805 | const int src = op->swizzle.in[dst]; | |
| 453 |
4/4✓ Branch 0 taken 12751 times.
✓ Branch 1 taken 3054 times.
✓ Branch 2 taken 12295 times.
✓ Branch 3 taken 456 times.
|
15805 | if (src > dst && src < next->rw.elems) { |
| 454 | 12295 | FFSWAP(int, ops->order_dst.in[dst], ops->order_dst.in[src]); | |
| 455 | 12295 | FFSWAP(int, op->swizzle.in[dst], op->swizzle.in[src]); | |
| 456 | 12295 | goto retry; | |
| 457 | } | ||
| 458 | } | ||
| 459 | } | ||
| 460 | 210743 | break; | |
| 461 | |||
| 462 | 479592 | case SWS_OP_CONVERT: | |
| 463 | /* No-op conversion */ | ||
| 464 |
2/2✓ Branch 0 taken 9088 times.
✓ Branch 1 taken 470504 times.
|
479592 | if (op->type == op->convert.to) { |
| 465 | 9088 | ff_sws_op_list_remove_at(ops, n, 1); | |
| 466 | 9088 | goto retry; | |
| 467 | } | ||
| 468 | |||
| 469 | /* Transitive conversion */ | ||
| 470 |
2/2✓ Branch 0 taken 12941 times.
✓ Branch 1 taken 457563 times.
|
470504 | if (next->op == SWS_OP_CONVERT && |
| 471 |
1/2✓ Branch 0 taken 12941 times.
✗ Branch 1 not taken.
|
12941 | op->convert.expand == next->convert.expand) |
| 472 | { | ||
| 473 | av_assert1(op->convert.to == next->type); | ||
| 474 | 12941 | op->convert.to = next->convert.to; | |
| 475 | 12941 | ff_sws_op_list_remove_at(ops, n + 1, 1); | |
| 476 | 12941 | goto retry; | |
| 477 | } | ||
| 478 | |||
| 479 | /* Conversion followed by integer expansion */ | ||
| 480 |
3/4✓ Branch 0 taken 60026 times.
✓ Branch 1 taken 397537 times.
✓ Branch 2 taken 60026 times.
✗ Branch 3 not taken.
|
457563 | if (next->op == SWS_OP_SCALE && !op->convert.expand && |
| 481 |
1/2✓ Branch 0 taken 60026 times.
✗ Branch 1 not taken.
|
60026 | ff_sws_pixel_type_is_int(op->type) && |
| 482 |
4/4✓ Branch 0 taken 1078 times.
✓ Branch 1 taken 58948 times.
✓ Branch 2 taken 360 times.
✓ Branch 3 taken 718 times.
|
61104 | ff_sws_pixel_type_is_int(op->convert.to) && |
| 483 | 1078 | !av_cmp_q(next->c.q, ff_sws_pixel_expand(op->type, op->convert.to))) | |
| 484 | { | ||
| 485 | 360 | op->convert.expand = true; | |
| 486 | 360 | ff_sws_op_list_remove_at(ops, n + 1, 1); | |
| 487 | 360 | goto retry; | |
| 488 | } | ||
| 489 | 457203 | break; | |
| 490 | |||
| 491 | 88800 | case SWS_OP_MIN: | |
| 492 |
2/2✓ Branch 0 taken 355200 times.
✓ Branch 1 taken 88800 times.
|
444000 | for (int i = 0; i < 4; i++) { |
| 493 |
4/4✓ Branch 0 taken 261646 times.
✓ Branch 1 taken 93554 times.
✓ Branch 2 taken 3000 times.
✓ Branch 3 taken 258646 times.
|
355200 | if (next->comps.unused[i] || !op->c.q4[i].den) |
| 494 | 96554 | continue; | |
| 495 |
2/2✓ Branch 1 taken 201974 times.
✓ Branch 2 taken 56672 times.
|
258646 | if (av_cmp_q(op->c.q4[i], prev->comps.max[i]) < 0) |
| 496 | 201974 | noop = false; | |
| 497 | } | ||
| 498 | |||
| 499 |
2/2✓ Branch 0 taken 13260 times.
✓ Branch 1 taken 75540 times.
|
88800 | if (noop) { |
| 500 | 13260 | ff_sws_op_list_remove_at(ops, n, 1); | |
| 501 | 13260 | goto retry; | |
| 502 | } | ||
| 503 | 75540 | break; | |
| 504 | |||
| 505 | 56308 | case SWS_OP_MAX: | |
| 506 |
2/2✓ Branch 0 taken 225232 times.
✓ Branch 1 taken 56308 times.
|
281540 | for (int i = 0; i < 4; i++) { |
| 507 |
3/4✓ Branch 0 taken 165892 times.
✓ Branch 1 taken 59340 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 165892 times.
|
225232 | if (next->comps.unused[i] || !op->c.q4[i].den) |
| 508 | 59340 | continue; | |
| 509 |
2/2✓ Branch 1 taken 93120 times.
✓ Branch 2 taken 72772 times.
|
165892 | if (av_cmp_q(prev->comps.min[i], op->c.q4[i]) < 0) |
| 510 | 93120 | noop = false; | |
| 511 | } | ||
| 512 | |||
| 513 |
2/2✓ Branch 0 taken 21302 times.
✓ Branch 1 taken 35006 times.
|
56308 | if (noop) { |
| 514 | 21302 | ff_sws_op_list_remove_at(ops, n, 1); | |
| 515 | 21302 | goto retry; | |
| 516 | } | ||
| 517 | 35006 | break; | |
| 518 | |||
| 519 | 98471 | case SWS_OP_DITHER: | |
| 520 |
2/2✓ Branch 0 taken 380124 times.
✓ Branch 1 taken 87107 times.
|
467231 | for (int i = 0; i < 4; i++) { |
| 521 |
4/4✓ Branch 0 taken 284215 times.
✓ Branch 1 taken 95909 times.
✓ Branch 2 taken 32716 times.
✓ Branch 3 taken 251499 times.
|
380124 | if (next->comps.unused[i] || op->dither.y_offset[i] < 0) |
| 522 | 128625 | continue; | |
| 523 |
2/2✓ Branch 0 taken 11364 times.
✓ Branch 1 taken 240135 times.
|
251499 | if (prev->comps.flags[i] & SWS_COMP_EXACT) { |
| 524 | 11364 | op->dither.y_offset[i] = -1; /* unnecessary dither */ | |
| 525 | 11364 | goto retry; | |
| 526 | } else { | ||
| 527 | 240135 | noop = false; | |
| 528 | } | ||
| 529 | } | ||
| 530 | |||
| 531 |
2/2✓ Branch 0 taken 1712 times.
✓ Branch 1 taken 85395 times.
|
87107 | if (noop) { |
| 532 | 1712 | ff_sws_op_list_remove_at(ops, n, 1); | |
| 533 | 1712 | goto retry; | |
| 534 | } | ||
| 535 | 85395 | break; | |
| 536 | |||
| 537 | 224401 | case SWS_OP_LINEAR: { | |
| 538 | SwsSwizzleOp swizzle; | ||
| 539 | SwsConst c; | ||
| 540 | |||
| 541 | /* No-op (identity) linear operation */ | ||
| 542 |
2/2✓ Branch 0 taken 3018 times.
✓ Branch 1 taken 221383 times.
|
224401 | if (!op->lin.mask) { |
| 543 | 3018 | ff_sws_op_list_remove_at(ops, n, 1); | |
| 544 | 126572 | goto retry; | |
| 545 | } | ||
| 546 | |||
| 547 |
2/2✓ Branch 0 taken 59722 times.
✓ Branch 1 taken 161661 times.
|
221383 | if (next->op == SWS_OP_LINEAR) { |
| 548 | /* 5x5 matrix multiplication after appending [ 0 0 0 0 1 ] */ | ||
| 549 | 59722 | const SwsLinearOp m1 = op->lin; | |
| 550 | 59722 | const SwsLinearOp m2 = next->lin; | |
| 551 |
2/2✓ Branch 0 taken 238888 times.
✓ Branch 1 taken 59722 times.
|
298610 | for (int i = 0; i < 4; i++) { |
| 552 |
2/2✓ Branch 0 taken 1194440 times.
✓ Branch 1 taken 238888 times.
|
1433328 | for (int j = 0; j < 5; j++) { |
| 553 | 1194440 | AVRational sum = Q(0); | |
| 554 |
2/2✓ Branch 0 taken 4777760 times.
✓ Branch 1 taken 1194440 times.
|
5972200 | for (int k = 0; k < 4; k++) |
| 555 | 4777760 | sum = av_add_q(sum, av_mul_q(m2.m[i][k], m1.m[k][j])); | |
| 556 |
2/2✓ Branch 0 taken 238888 times.
✓ Branch 1 taken 955552 times.
|
1194440 | if (j == 4) /* m1.m[4][j] == 1 */ |
| 557 | 238888 | sum = av_add_q(sum, m2.m[i][4]); | |
| 558 | 1194440 | op->lin.m[i][j] = sum; | |
| 559 | } | ||
| 560 | } | ||
| 561 | 59722 | op->lin.mask = ff_sws_linear_mask(op->lin); | |
| 562 | 59722 | ff_sws_op_list_remove_at(ops, n + 1, 1); | |
| 563 | 59722 | goto retry; | |
| 564 | } | ||
| 565 | |||
| 566 | /* Optimize away zero columns */ | ||
| 567 |
2/2✓ Branch 0 taken 634416 times.
✓ Branch 1 taken 147525 times.
|
781941 | for (int j = 0; j < 4; j++) { |
| 568 | 634416 | const uint32_t col = SWS_MASK_COL(j); | |
| 569 |
4/4✓ Branch 0 taken 44074 times.
✓ Branch 1 taken 590342 times.
✓ Branch 2 taken 29938 times.
✓ Branch 3 taken 14136 times.
|
634416 | if (!(prev->comps.flags[j] & SWS_COMP_ZERO) || !(op->lin.mask & col)) |
| 570 | 620280 | continue; | |
| 571 |
2/2✓ Branch 0 taken 56544 times.
✓ Branch 1 taken 14136 times.
|
70680 | for (int i = 0; i < 4; i++) |
| 572 | 56544 | op->lin.m[i][j] = Q(i == j); | |
| 573 | 14136 | op->lin.mask &= ~col; | |
| 574 | 14136 | goto retry; | |
| 575 | } | ||
| 576 | |||
| 577 | /* Optimize away unused rows */ | ||
| 578 |
2/2✓ Branch 0 taken 577872 times.
✓ Branch 1 taken 119429 times.
|
697301 | for (int i = 0; i < 4; i++) { |
| 579 | 577872 | const uint32_t row = SWS_MASK_ROW(i); | |
| 580 |
4/4✓ Branch 0 taken 197992 times.
✓ Branch 1 taken 379880 times.
✓ Branch 2 taken 169896 times.
✓ Branch 3 taken 28096 times.
|
577872 | if (!next->comps.unused[i] || !(op->lin.mask & row)) |
| 581 | 549776 | continue; | |
| 582 |
2/2✓ Branch 0 taken 140480 times.
✓ Branch 1 taken 28096 times.
|
168576 | for (int j = 0; j < 5; j++) |
| 583 | 140480 | op->lin.m[i][j] = Q(i == j); | |
| 584 | 28096 | op->lin.mask &= ~row; | |
| 585 | 28096 | goto retry; | |
| 586 | } | ||
| 587 | |||
| 588 | /* Convert constant rows to explicit clear instruction */ | ||
| 589 |
2/2✓ Branch 1 taken 8746 times.
✓ Branch 2 taken 110683 times.
|
119429 | if (extract_constant_rows(&op->lin, prev->comps, &c)) { |
| 590 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 8746 times.
|
8746 | RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) { |
| 591 | .op = SWS_OP_CLEAR, | ||
| 592 | .type = op->type, | ||
| 593 | .comps = op->comps, | ||
| 594 | .c = c, | ||
| 595 | })); | ||
| 596 | 8746 | goto retry; | |
| 597 | } | ||
| 598 | |||
| 599 | /* Multiplication by scalar constant */ | ||
| 600 |
2/2✓ Branch 1 taken 10262 times.
✓ Branch 2 taken 100421 times.
|
110683 | if (extract_scalar(&op->lin, prev->comps, next->comps, &c)) { |
| 601 | 10262 | op->op = SWS_OP_SCALE; | |
| 602 | 10262 | op->c = c; | |
| 603 | 10262 | goto retry; | |
| 604 | } | ||
| 605 | |||
| 606 | /* Swizzle by fixed pattern */ | ||
| 607 |
2/2✓ Branch 1 taken 2592 times.
✓ Branch 2 taken 97829 times.
|
100421 | if (extract_swizzle(&op->lin, prev->comps, &swizzle)) { |
| 608 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 2592 times.
|
2592 | RET(ff_sws_op_list_insert_at(ops, n, &(SwsOp) { |
| 609 | .op = SWS_OP_SWIZZLE, | ||
| 610 | .type = op->type, | ||
| 611 | .swizzle = swizzle, | ||
| 612 | })); | ||
| 613 | 2592 | goto retry; | |
| 614 | } | ||
| 615 | 97829 | break; | |
| 616 | } | ||
| 617 | |||
| 618 | 77966 | case SWS_OP_SCALE: { | |
| 619 | 77966 | const int factor2 = exact_log2_q(op->c.q); | |
| 620 | |||
| 621 | /* No-op scaling */ | ||
| 622 |
3/4✓ Branch 0 taken 17594 times.
✓ Branch 1 taken 60372 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 17594 times.
|
77966 | if (op->c.q.num == 1 && op->c.q.den == 1) { |
| 623 | ✗ | ff_sws_op_list_remove_at(ops, n, 1); | |
| 624 | ✗ | goto retry; | |
| 625 | } | ||
| 626 | |||
| 627 | /* Scaling by exact power of two */ | ||
| 628 |
4/4✓ Branch 0 taken 14607 times.
✓ Branch 1 taken 63359 times.
✓ Branch 2 taken 978 times.
✓ Branch 3 taken 13629 times.
|
77966 | if (factor2 && ff_sws_pixel_type_is_int(op->type)) { |
| 629 |
1/2✓ Branch 0 taken 978 times.
✗ Branch 1 not taken.
|
978 | op->op = factor2 > 0 ? SWS_OP_LSHIFT : SWS_OP_RSHIFT; |
| 630 | 978 | op->c.u = FFABS(factor2); | |
| 631 | 978 | goto retry; | |
| 632 | } | ||
| 633 | 76988 | break; | |
| 634 | } | ||
| 635 | } | ||
| 636 | } | ||
| 637 | |||
| 638 | /* Push clears to the back to void any unused components */ | ||
| 639 |
2/2✓ Branch 0 taken 735517 times.
✓ Branch 1 taken 70469 times.
|
805986 | for (int n = 0; n < ops->num_ops - 1; n++) { |
| 640 | 735517 | SwsOp *op = &ops->ops[n]; | |
| 641 | 735517 | SwsOp *next = &ops->ops[n + 1]; | |
| 642 | |||
| 643 |
2/2✓ Branch 0 taken 51904 times.
✓ Branch 1 taken 683613 times.
|
735517 | switch (op->op) { |
| 644 | 51904 | case SWS_OP_CLEAR: | |
| 645 |
2/2✓ Branch 1 taken 27350 times.
✓ Branch 2 taken 24554 times.
|
51904 | if (op_commute_clear(op, next)) { |
| 646 | 27350 | FFSWAP(SwsOp, *op, *next); | |
| 647 | 27350 | goto retry; | |
| 648 | } | ||
| 649 | 24554 | break; | |
| 650 | } | ||
| 651 | } | ||
| 652 | |||
| 653 | /* Apply any remaining preferential re-ordering optimizations; do these | ||
| 654 | * last because they are more likely to block other optimizations if done | ||
| 655 | * too aggressively */ | ||
| 656 |
2/2✓ Branch 0 taken 403812 times.
✓ Branch 1 taken 31704 times.
|
435516 | for (int n = 0; n < ops->num_ops - 1; n++) { |
| 657 | 403812 | SwsOp *op = &ops->ops[n]; | |
| 658 | 403812 | SwsOp *next = &ops->ops[n + 1]; | |
| 659 | |||
| 660 |
3/3✓ Branch 0 taken 60367 times.
✓ Branch 1 taken 24204 times.
✓ Branch 2 taken 319241 times.
|
403812 | switch (op->op) { |
| 661 | 60367 | case SWS_OP_SWIZZLE: { | |
| 662 | /* Try to push swizzles towards the output */ | ||
| 663 |
2/2✓ Branch 1 taken 36941 times.
✓ Branch 2 taken 23426 times.
|
60367 | if (op_commute_swizzle(op, next)) { |
| 664 | 36941 | FFSWAP(SwsOp, *op, *next); | |
| 665 | 36941 | goto retry; | |
| 666 | } | ||
| 667 | 23426 | break; | |
| 668 | } | ||
| 669 | |||
| 670 | 24204 | case SWS_OP_SCALE: | |
| 671 | /* Scaling by integer before conversion to int */ | ||
| 672 |
4/4✓ Branch 0 taken 6362 times.
✓ Branch 1 taken 17842 times.
✓ Branch 2 taken 1824 times.
✓ Branch 3 taken 4538 times.
|
24204 | if (op->c.q.den == 1 && next->op == SWS_OP_CONVERT && |
| 673 |
1/2✓ Branch 0 taken 1824 times.
✗ Branch 1 not taken.
|
1824 | ff_sws_pixel_type_is_int(next->convert.to)) |
| 674 | { | ||
| 675 | 1824 | op->type = next->convert.to; | |
| 676 | 1824 | FFSWAP(SwsOp, *op, *next); | |
| 677 | 1824 | goto retry; | |
| 678 | } | ||
| 679 | 22380 | break; | |
| 680 | } | ||
| 681 | } | ||
| 682 | |||
| 683 | 31704 | return 0; | |
| 684 | } | ||
| 685 | |||
| 686 | 16765 | int ff_sws_solve_shuffle(const SwsOpList *const ops, uint8_t shuffle[], | |
| 687 | int size, uint8_t clear_val, | ||
| 688 | int *read_bytes, int *write_bytes) | ||
| 689 | { | ||
| 690 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 16765 times.
|
16765 | if (!ops->num_ops) |
| 691 | ✗ | return AVERROR(EINVAL); | |
| 692 | |||
| 693 | 16765 | const SwsOp *read = ff_sws_op_list_input(ops); | |
| 694 |
7/8✓ Branch 0 taken 16765 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 16513 times.
✓ Branch 3 taken 252 times.
✓ Branch 4 taken 10241 times.
✓ Branch 5 taken 6272 times.
✓ Branch 6 taken 7741 times.
✓ Branch 7 taken 2500 times.
|
16765 | if (!read || read->rw.frac || (!read->rw.packed && read->rw.elems > 1)) |
| 695 | 7993 | return AVERROR(ENOTSUP); | |
| 696 | |||
| 697 | 8772 | const int read_size = ff_sws_pixel_type_size(read->type); | |
| 698 | 8772 | uint32_t mask[4] = {0}; | |
| 699 |
2/2✓ Branch 0 taken 18817 times.
✓ Branch 1 taken 8772 times.
|
27589 | for (int i = 0; i < read->rw.elems; i++) |
| 700 | 18817 | mask[i] = 0x01010101 * i * read_size + 0x03020100; | |
| 701 | |||
| 702 |
1/2✓ Branch 0 taken 12052 times.
✗ Branch 1 not taken.
|
12052 | for (int opidx = 1; opidx < ops->num_ops; opidx++) { |
| 703 | 12052 | const SwsOp *op = &ops->ops[opidx]; | |
| 704 |
6/6✓ Branch 0 taken 379 times.
✓ Branch 1 taken 2677 times.
✓ Branch 2 taken 210 times.
✓ Branch 3 taken 4938 times.
✓ Branch 4 taken 528 times.
✓ Branch 5 taken 3320 times.
|
12052 | switch (op->op) { |
| 705 | 379 | case SWS_OP_SWIZZLE: { | |
| 706 | 379 | uint32_t orig[4] = { mask[0], mask[1], mask[2], mask[3] }; | |
| 707 |
2/2✓ Branch 0 taken 1516 times.
✓ Branch 1 taken 379 times.
|
1895 | for (int i = 0; i < 4; i++) |
| 708 | 1516 | mask[i] = orig[op->swizzle.in[i]]; | |
| 709 | 379 | break; | |
| 710 | } | ||
| 711 | |||
| 712 | 2677 | case SWS_OP_SWAP_BYTES: | |
| 713 |
2/2✓ Branch 0 taken 10708 times.
✓ Branch 1 taken 2677 times.
|
13385 | for (int i = 0; i < 4; i++) { |
| 714 |
2/3✓ Branch 0 taken 10200 times.
✓ Branch 1 taken 508 times.
✗ Branch 2 not taken.
|
10708 | switch (ff_sws_pixel_type_size(op->type)) { |
| 715 | 10200 | case 2: mask[i] = av_bswap16(mask[i]); break; | |
| 716 | 508 | case 4: mask[i] = av_bswap32(mask[i]); break; | |
| 717 | } | ||
| 718 | } | ||
| 719 | 2677 | break; | |
| 720 | |||
| 721 | 210 | case SWS_OP_CLEAR: | |
| 722 |
2/2✓ Branch 0 taken 758 times.
✓ Branch 1 taken 54 times.
|
812 | for (int i = 0; i < 4; i++) { |
| 723 |
2/2✓ Branch 0 taken 548 times.
✓ Branch 1 taken 210 times.
|
758 | if (!op->c.q4[i].den) |
| 724 | 548 | continue; | |
| 725 |
3/4✓ Branch 0 taken 54 times.
✓ Branch 1 taken 156 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 54 times.
|
210 | if (op->c.q4[i].num != 0 || !clear_val) |
| 726 | 156 | return AVERROR(ENOTSUP); | |
| 727 | 54 | mask[i] = 0x1010101ul * clear_val; | |
| 728 | } | ||
| 729 | 54 | break; | |
| 730 | |||
| 731 | 4938 | case SWS_OP_CONVERT: { | |
| 732 |
2/2✓ Branch 0 taken 4768 times.
✓ Branch 1 taken 170 times.
|
4938 | if (!op->convert.expand) |
| 733 | 4768 | return AVERROR(ENOTSUP); | |
| 734 |
2/2✓ Branch 0 taken 680 times.
✓ Branch 1 taken 170 times.
|
850 | for (int i = 0; i < 4; i++) { |
| 735 |
1/3✓ Branch 0 taken 680 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
|
680 | switch (ff_sws_pixel_type_size(op->type)) { |
| 736 | 680 | case 1: mask[i] = 0x01010101 * (mask[i] & 0xFF); break; | |
| 737 | ✗ | case 2: mask[i] = 0x00010001 * (mask[i] & 0xFFFF); break; | |
| 738 | } | ||
| 739 | } | ||
| 740 | 170 | break; | |
| 741 | } | ||
| 742 | |||
| 743 | 528 | case SWS_OP_WRITE: { | |
| 744 |
6/6✓ Branch 0 taken 514 times.
✓ Branch 1 taken 14 times.
✓ Branch 2 taken 262 times.
✓ Branch 3 taken 252 times.
✓ Branch 4 taken 211 times.
✓ Branch 5 taken 51 times.
|
528 | if (op->rw.frac || (!op->rw.packed && op->rw.elems > 1)) |
| 745 | 225 | return AVERROR(ENOTSUP); | |
| 746 | |||
| 747 | /* Initialize to no-op */ | ||
| 748 | 303 | memset(shuffle, clear_val, size); | |
| 749 | |||
| 750 | 303 | const int write_size = ff_sws_pixel_type_size(op->type); | |
| 751 | 303 | const int read_chunk = read->rw.elems * read_size; | |
| 752 | 303 | const int write_chunk = op->rw.elems * write_size; | |
| 753 | 303 | const int num_groups = size / FFMAX(read_chunk, write_chunk); | |
| 754 |
2/2✓ Branch 0 taken 1110 times.
✓ Branch 1 taken 303 times.
|
1413 | for (int n = 0; n < num_groups; n++) { |
| 755 | 1110 | const int base_in = n * read_chunk; | |
| 756 | 1110 | const int base_out = n * write_chunk; | |
| 757 |
2/2✓ Branch 0 taken 2842 times.
✓ Branch 1 taken 1110 times.
|
3952 | for (int i = 0; i < op->rw.elems; i++) { |
| 758 | 2842 | const int offset = base_out + i * write_size; | |
| 759 |
2/2✓ Branch 0 taken 4354 times.
✓ Branch 1 taken 2842 times.
|
7196 | for (int b = 0; b < write_size; b++) { |
| 760 | 4354 | const uint8_t idx = mask[i] >> (b * 8); | |
| 761 |
2/2✓ Branch 0 taken 4138 times.
✓ Branch 1 taken 216 times.
|
4354 | if (idx != clear_val) |
| 762 | 4138 | shuffle[offset + b] = base_in + idx; | |
| 763 | } | ||
| 764 | } | ||
| 765 | } | ||
| 766 | |||
| 767 | 303 | *read_bytes = num_groups * read_chunk; | |
| 768 | 303 | *write_bytes = num_groups * write_chunk; | |
| 769 | 303 | return num_groups; | |
| 770 | } | ||
| 771 | |||
| 772 | 3320 | default: | |
| 773 | 3320 | return AVERROR(ENOTSUP); | |
| 774 | } | ||
| 775 | } | ||
| 776 | |||
| 777 | ✗ | return AVERROR(EINVAL); | |
| 778 | } | ||
| 779 |