| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /** | ||
| 2 | * Copyright (C) 2025 Niklas Haas | ||
| 3 | * | ||
| 4 | * This file is part of FFmpeg. | ||
| 5 | * | ||
| 6 | * FFmpeg is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU Lesser General Public | ||
| 8 | * License as published by the Free Software Foundation; either | ||
| 9 | * version 2.1 of the License, or (at your option) any later version. | ||
| 10 | * | ||
| 11 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 14 | * Lesser General Public License for more details. | ||
| 15 | * | ||
| 16 | * You should have received a copy of the GNU Lesser General Public | ||
| 17 | * License along with FFmpeg; if not, write to the Free Software | ||
| 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 19 | */ | ||
| 20 | |||
| 21 | #include "libavutil/avassert.h" | ||
| 22 | |||
| 23 | #include "ops_backend.h" | ||
| 24 | |||
| 25 | #ifndef BIT_DEPTH | ||
| 26 | # define BIT_DEPTH 32 | ||
| 27 | #endif | ||
| 28 | |||
| 29 | #if BIT_DEPTH == 32 | ||
| 30 | # define PIXEL_TYPE SWS_PIXEL_F32 | ||
| 31 | # define PIXEL_MAX FLT_MAX | ||
| 32 | # define pixel_t float | ||
| 33 | # define inter_t float | ||
| 34 | # define block_t f32block_t | ||
| 35 | # define px f32 | ||
| 36 | #else | ||
| 37 | # error Invalid BIT_DEPTH | ||
| 38 | #endif | ||
| 39 | |||
| 40 | #define IS_FLOAT 1 | ||
| 41 | #define FMT_CHAR f | ||
| 42 | #include "ops_tmpl_common.c" | ||
| 43 | |||
| 44 | 1150 | DECL_SETUP(setup_dither, params, out) | |
| 45 | { | ||
| 46 | 1150 | const SwsOp *op = params->op; | |
| 47 | 1150 | const int size = 1 << op->dither.size_log2; | |
| 48 |
2/2✓ Branch 0 taken 56 times.
✓ Branch 1 taken 1094 times.
|
1150 | if (size == 1) { |
| 49 | /* We special case this value */ | ||
| 50 | av_assert1(!av_cmp_q(op->dither.matrix[0], av_make_q(1, 2))); | ||
| 51 | 56 | out->priv.ptr = NULL; | |
| 52 | 56 | return 0; | |
| 53 | } | ||
| 54 | |||
| 55 | 1094 | const int width = FFMAX(size, SWS_BLOCK_SIZE); | |
| 56 | 1094 | pixel_t *matrix = out->priv.ptr = av_malloc(sizeof(pixel_t) * size * width); | |
| 57 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1094 times.
|
1094 | if (!matrix) |
| 58 | ✗ | return AVERROR(ENOMEM); | |
| 59 | 1094 | out->free = ff_op_priv_free; | |
| 60 | |||
| 61 | static_assert(sizeof(out->priv.ptr) <= sizeof(uint8_t[8]), | ||
| 62 | ">8 byte pointers not supported"); | ||
| 63 | |||
| 64 | 1094 | int8_t *offset = &out->priv.i8[8]; | |
| 65 |
2/2✓ Branch 0 taken 4376 times.
✓ Branch 1 taken 1094 times.
|
5470 | for (int i = 0; i < 4; i++) |
| 66 | 4376 | offset[i] = op->dither.y_offset[i]; | |
| 67 | |||
| 68 |
2/2✓ Branch 0 taken 38896 times.
✓ Branch 1 taken 1094 times.
|
39990 | for (int y = 0; y < size; y++) { |
| 69 |
2/2✓ Branch 0 taken 5058656 times.
✓ Branch 1 taken 38896 times.
|
5097552 | for (int x = 0; x < size; x++) |
| 70 |
1/2✓ Branch 0 taken 5058656 times.
✗ Branch 1 not taken.
|
5058656 | matrix[y * width + x] = av_q2pixel(op->dither.matrix[y * size + x]); |
| 71 |
2/2✓ Branch 0 taken 200096 times.
✓ Branch 1 taken 38896 times.
|
238992 | for (int x = size; x < width; x++) /* pad to block size */ |
| 72 | 200096 | matrix[y * width + x] = matrix[y * width + (x % size)]; | |
| 73 | } | ||
| 74 | |||
| 75 | 1094 | return 0; | |
| 76 | } | ||
| 77 | |||
| 78 | 191808 | DECL_FUNC(dither, const int size_log2) | |
| 79 | { | ||
| 80 | 191808 | const pixel_t *restrict matrix = impl->priv.ptr; | |
| 81 | 191808 | const int8_t *restrict offset = &impl->priv.i8[8]; | |
| 82 | 191808 | const int mask = (1 << size_log2) - 1; | |
| 83 | 191808 | const int y_line = iter->y; | |
| 84 | 191808 | const int size = 1 << size_log2; | |
| 85 | 191808 | const int width = FFMAX(size, SWS_BLOCK_SIZE); | |
| 86 | 191808 | const int base = iter->x & ~(SWS_BLOCK_SIZE - 1) & (size - 1); | |
| 87 | |||
| 88 | #define DITHER_COMP(VAR, IDX) \ | ||
| 89 | if (offset[IDX] >= 0) { \ | ||
| 90 | const int row = (y_line + offset[IDX]) & mask; \ | ||
| 91 | SWS_LOOP \ | ||
| 92 | for (int i = 0; i < SWS_BLOCK_SIZE; i++) \ | ||
| 93 | VAR[i] += size_log2 ? matrix[row * width + base + i] : (pixel_t) 0.5; \ | ||
| 94 | } | ||
| 95 | |||
| 96 |
6/6✓ Branch 0 taken 166752 times.
✓ Branch 1 taken 25056 times.
✓ Branch 2 taken 5315584 times.
✓ Branch 3 taken 20480 times.
✓ Branch 4 taken 5336064 times.
✓ Branch 5 taken 166752 times.
|
5527872 | DITHER_COMP(x, 0) |
| 97 |
6/6✓ Branch 0 taken 161280 times.
✓ Branch 1 taken 30528 times.
✓ Branch 2 taken 5140480 times.
✓ Branch 3 taken 20480 times.
✓ Branch 4 taken 5160960 times.
✓ Branch 5 taken 161280 times.
|
5352768 | DITHER_COMP(y, 1) |
| 98 |
6/6✓ Branch 0 taken 157824 times.
✓ Branch 1 taken 33984 times.
✓ Branch 2 taken 5029888 times.
✓ Branch 3 taken 20480 times.
✓ Branch 4 taken 5050368 times.
✓ Branch 5 taken 157824 times.
|
5242176 | DITHER_COMP(z, 2) |
| 99 |
6/6✓ Branch 0 taken 64224 times.
✓ Branch 1 taken 127584 times.
✓ Branch 2 taken 2034688 times.
✓ Branch 3 taken 20480 times.
✓ Branch 4 taken 2055168 times.
✓ Branch 5 taken 64224 times.
|
2246976 | DITHER_COMP(w, 3) |
| 100 | |||
| 101 | 191808 | CONTINUE(x, y, z, w); | |
| 102 | 191808 | } | |
| 103 | |||
| 104 | #define WRAP_DITHER(N) \ | ||
| 105 | DECL_IMPL(dither, dither##N, N) \ | ||
| 106 | \ | ||
| 107 | DECL_ENTRY(dither##N, SWS_COMP_ALL, \ | ||
| 108 | .op = SWS_OP_DITHER, \ | ||
| 109 | .dither_size = N, \ | ||
| 110 | .setup = fn(setup_dither), \ | ||
| 111 | ); | ||
| 112 | |||
| 113 | 640 | WRAP_DITHER(0) | |
| 114 | 640 | WRAP_DITHER(1) | |
| 115 | 640 | WRAP_DITHER(2) | |
| 116 | 640 | WRAP_DITHER(3) | |
| 117 | 186688 | WRAP_DITHER(4) | |
| 118 | 640 | WRAP_DITHER(5) | |
| 119 | 640 | WRAP_DITHER(6) | |
| 120 | 640 | WRAP_DITHER(7) | |
| 121 | 640 | WRAP_DITHER(8) | |
| 122 | |||
| 123 | typedef struct { | ||
| 124 | /* Stored in split form for convenience */ | ||
| 125 | pixel_t m[4][4]; | ||
| 126 | pixel_t k[4]; | ||
| 127 | } fn(LinCoeffs); | ||
| 128 | |||
| 129 | 772 | DECL_SETUP(setup_linear, params, out) | |
| 130 | { | ||
| 131 | 772 | const SwsOp *op = params->op; | |
| 132 | fn(LinCoeffs) c; | ||
| 133 | |||
| 134 |
2/2✓ Branch 0 taken 3088 times.
✓ Branch 1 taken 772 times.
|
3860 | for (int i = 0; i < 4; i++) { |
| 135 |
2/2✓ Branch 0 taken 12352 times.
✓ Branch 1 taken 3088 times.
|
15440 | for (int j = 0; j < 4; j++) |
| 136 |
1/2✓ Branch 0 taken 12352 times.
✗ Branch 1 not taken.
|
12352 | c.m[i][j] = av_q2pixel(op->lin.m[i][j]); |
| 137 |
1/2✓ Branch 0 taken 3088 times.
✗ Branch 1 not taken.
|
3088 | c.k[i] = av_q2pixel(op->lin.m[i][4]); |
| 138 | } | ||
| 139 | |||
| 140 | 772 | return SETUP_MEMDUP(c, out); | |
| 141 | } | ||
| 142 | |||
| 143 | /** | ||
| 144 | * Fully general case for a 5x5 linear affine transformation. Should never be | ||
| 145 | * called without constant `mask`. This function will compile down to the | ||
| 146 | * appropriately optimized version for the required subset of operations when | ||
| 147 | * called with a constant mask. | ||
| 148 | */ | ||
| 149 | 179744 | DECL_FUNC(linear_mask, const uint32_t mask) | |
| 150 | { | ||
| 151 | 179744 | const fn(LinCoeffs) c = *(const fn(LinCoeffs) *) impl->priv.ptr; | |
| 152 | |||
| 153 | SWS_LOOP | ||
| 154 |
2/2✓ Branch 0 taken 5751808 times.
✓ Branch 1 taken 179744 times.
|
5931552 | for (int i = 0; i < SWS_BLOCK_SIZE; i++) { |
| 155 | 5751808 | const pixel_t xx = x[i]; | |
| 156 | 5751808 | const pixel_t yy = y[i]; | |
| 157 | 5751808 | const pixel_t zz = z[i]; | |
| 158 | 5751808 | const pixel_t ww = w[i]; | |
| 159 | |||
| 160 |
2/2✓ Branch 0 taken 4435968 times.
✓ Branch 1 taken 1315840 times.
|
5751808 | x[i] = (mask & SWS_MASK_OFF(0)) ? c.k[0] : 0; |
| 161 |
2/2✓ Branch 0 taken 5672960 times.
✓ Branch 1 taken 78848 times.
|
5751808 | x[i] += (mask & SWS_MASK(0, 0)) ? c.m[0][0] * xx : xx; |
| 162 |
2/2✓ Branch 0 taken 2195456 times.
✓ Branch 1 taken 3556352 times.
|
5751808 | x[i] += (mask & SWS_MASK(0, 1)) ? c.m[0][1] * yy : 0; |
| 163 |
2/2✓ Branch 0 taken 4038656 times.
✓ Branch 1 taken 1713152 times.
|
5751808 | x[i] += (mask & SWS_MASK(0, 2)) ? c.m[0][2] * zz : 0; |
| 164 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5751808 times.
|
5751808 | x[i] += (mask & SWS_MASK(0, 3)) ? c.m[0][3] * ww : 0; |
| 165 | |||
| 166 |
2/2✓ Branch 0 taken 3757056 times.
✓ Branch 1 taken 1994752 times.
|
5751808 | y[i] = (mask & SWS_MASK_OFF(1)) ? c.k[1] : 0; |
| 167 |
2/2✓ Branch 0 taken 3696640 times.
✓ Branch 1 taken 2055168 times.
|
5751808 | y[i] += (mask & SWS_MASK(1, 0)) ? c.m[1][0] * xx : 0; |
| 168 |
2/2✓ Branch 0 taken 4657152 times.
✓ Branch 1 taken 1094656 times.
|
5751808 | y[i] += (mask & SWS_MASK(1, 1)) ? c.m[1][1] * yy : yy; |
| 169 |
2/2✓ Branch 0 taken 3696640 times.
✓ Branch 1 taken 2055168 times.
|
5751808 | y[i] += (mask & SWS_MASK(1, 2)) ? c.m[1][2] * zz : 0; |
| 170 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5751808 times.
|
5751808 | y[i] += (mask & SWS_MASK(1, 3)) ? c.m[1][3] * ww : 0; |
| 171 | |||
| 172 |
2/2✓ Branch 0 taken 3757056 times.
✓ Branch 1 taken 1994752 times.
|
5751808 | z[i] = (mask & SWS_MASK_OFF(2)) ? c.k[2] : 0; |
| 173 |
2/2✓ Branch 0 taken 3696640 times.
✓ Branch 1 taken 2055168 times.
|
5751808 | z[i] += (mask & SWS_MASK(2, 0)) ? c.m[2][0] * xx : 0; |
| 174 |
2/2✓ Branch 0 taken 3696640 times.
✓ Branch 1 taken 2055168 times.
|
5751808 | z[i] += (mask & SWS_MASK(2, 1)) ? c.m[2][1] * yy : 0; |
| 175 |
2/2✓ Branch 0 taken 4657152 times.
✓ Branch 1 taken 1094656 times.
|
5751808 | z[i] += (mask & SWS_MASK(2, 2)) ? c.m[2][2] * zz : zz; |
| 176 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5751808 times.
|
5751808 | z[i] += (mask & SWS_MASK(2, 3)) ? c.m[2][3] * ww : 0; |
| 177 | |||
| 178 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5751808 times.
|
5751808 | w[i] = (mask & SWS_MASK_OFF(3)) ? c.k[3] : 0; |
| 179 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5751808 times.
|
5751808 | w[i] += (mask & SWS_MASK(3, 0)) ? c.m[3][0] * xx : 0; |
| 180 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5751808 times.
|
5751808 | w[i] += (mask & SWS_MASK(3, 1)) ? c.m[3][1] * yy : 0; |
| 181 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5751808 times.
|
5751808 | w[i] += (mask & SWS_MASK(3, 2)) ? c.m[3][2] * zz : 0; |
| 182 |
2/2✓ Branch 0 taken 1278976 times.
✓ Branch 1 taken 4472832 times.
|
5751808 | w[i] += (mask & SWS_MASK(3, 3)) ? c.m[3][3] * ww : ww; |
| 183 | } | ||
| 184 | |||
| 185 | 179744 | CONTINUE(x, y, z, w); | |
| 186 | 179744 | } | |
| 187 | |||
| 188 | #define WRAP_LINEAR(NAME, MASK) \ | ||
| 189 | DECL_IMPL(linear_mask, linear_##NAME, MASK) \ | ||
| 190 | \ | ||
| 191 | DECL_ENTRY(linear_##NAME, SWS_COMP_ALL, \ | ||
| 192 | .op = SWS_OP_LINEAR, \ | ||
| 193 | .setup = fn(setup_linear), \ | ||
| 194 | .linear_mask = (MASK), \ | ||
| 195 | ); | ||
| 196 | |||
| 197 | 19168 | WRAP_LINEAR(luma, SWS_MASK_LUMA) | |
| 198 | 2464 | WRAP_LINEAR(alpha, SWS_MASK_ALPHA) | |
| 199 | 1888 | WRAP_LINEAR(lumalpha, SWS_MASK_LUMA | SWS_MASK_ALPHA) | |
| 200 | ✗ | WRAP_LINEAR(yalpha, SWS_MASK(1, 1)) /* ya alpha */ | |
| 201 | 9952 | WRAP_LINEAR(dot3, 0x7) | |
| 202 | ✗ | WRAP_LINEAR(dot3a, 0x7 | SWS_MASK_ALPHA) | |
| 203 | 736 | WRAP_LINEAR(row0, SWS_MASK_ROW(0) ^ SWS_MASK(0, 3)) /* row0 sans alpha */ | |
| 204 | 10528 | WRAP_LINEAR(diag3, SWS_MASK_DIAG3) | |
| 205 | 17600 | WRAP_LINEAR(diag4, SWS_MASK_DIAG4) | |
| 206 | 1888 | WRAP_LINEAR(diagoff3, SWS_MASK_DIAG3 | SWS_MASK_OFF3) | |
| 207 | 47392 | WRAP_LINEAR(affine3, SWS_MASK_MAT3 | SWS_MASK_OFF3) | |
| 208 | 576 | WRAP_LINEAR(affine3uv, SWS_MASK_MAT3 | SWS_MASK_OFF(1) | SWS_MASK_OFF(2)) | |
| 209 | 49536 | WRAP_LINEAR(affine3x, SWS_MASK_MAT3 ^ SWS_MASK(0, 1) | SWS_MASK_OFF3) | |
| 210 | 8064 | WRAP_LINEAR(affine3xa, SWS_MASK_MAT3 ^ SWS_MASK(0, 1) | SWS_MASK_OFF3 | SWS_MASK_ALPHA) | |
| 211 | ✗ | WRAP_LINEAR(affine3xy, SWS_MASK_MAT3 ^ SWS_MASK(0, 0) ^ SWS_MASK(0, 1) | SWS_MASK_OFF3) | |
| 212 | 9952 | WRAP_LINEAR(affine3a, SWS_MASK_MAT3 | SWS_MASK_OFF3 | SWS_MASK_ALPHA) | |
| 213 | |||
| 214 | static const SwsOpTable fn(op_table_float) = { | ||
| 215 | .block_size = SWS_BLOCK_SIZE, | ||
| 216 | .entries = { | ||
| 217 | REF_COMMON_PATTERNS(convert_uint8), | ||
| 218 | REF_COMMON_PATTERNS(convert_uint16), | ||
| 219 | REF_COMMON_PATTERNS(convert_uint32), | ||
| 220 | |||
| 221 | &fn(op_clear_0001), | ||
| 222 | REF_COMMON_PATTERNS(min), | ||
| 223 | REF_COMMON_PATTERNS(max), | ||
| 224 | REF_COMMON_PATTERNS(scale), | ||
| 225 | |||
| 226 | &fn(op_dither0), | ||
| 227 | &fn(op_dither1), | ||
| 228 | &fn(op_dither2), | ||
| 229 | &fn(op_dither3), | ||
| 230 | &fn(op_dither4), | ||
| 231 | &fn(op_dither5), | ||
| 232 | &fn(op_dither6), | ||
| 233 | &fn(op_dither7), | ||
| 234 | &fn(op_dither8), | ||
| 235 | |||
| 236 | &fn(op_clear_0001), | ||
| 237 | &fn(op_clear_1000), | ||
| 238 | &fn(op_clear_1100), | ||
| 239 | |||
| 240 | &fn(op_linear_luma), | ||
| 241 | &fn(op_linear_alpha), | ||
| 242 | &fn(op_linear_lumalpha), | ||
| 243 | &fn(op_linear_yalpha), | ||
| 244 | &fn(op_linear_dot3), | ||
| 245 | &fn(op_linear_dot3a), | ||
| 246 | &fn(op_linear_row0), | ||
| 247 | &fn(op_linear_diag3), | ||
| 248 | &fn(op_linear_diag4), | ||
| 249 | &fn(op_linear_diagoff3), | ||
| 250 | &fn(op_linear_affine3), | ||
| 251 | &fn(op_linear_affine3uv), | ||
| 252 | &fn(op_linear_affine3x), | ||
| 253 | &fn(op_linear_affine3xa), | ||
| 254 | &fn(op_linear_affine3xy), | ||
| 255 | &fn(op_linear_affine3a), | ||
| 256 | |||
| 257 | &fn(op_filter1_v), | ||
| 258 | &fn(op_filter2_v), | ||
| 259 | &fn(op_filter3_v), | ||
| 260 | &fn(op_filter4_v), | ||
| 261 | |||
| 262 | &fn(op_filter1_h), | ||
| 263 | &fn(op_filter2_h), | ||
| 264 | &fn(op_filter3_h), | ||
| 265 | &fn(op_filter4_h), | ||
| 266 | |||
| 267 | NULL | ||
| 268 | }, | ||
| 269 | }; | ||
| 270 | |||
| 271 | #undef PIXEL_TYPE | ||
| 272 | #undef PIXEL_MAX | ||
| 273 | #undef pixel_t | ||
| 274 | #undef inter_t | ||
| 275 | #undef block_t | ||
| 276 | #undef px | ||
| 277 | |||
| 278 | #undef FMT_CHAR | ||
| 279 | #undef IS_FLOAT | ||
| 280 |