Line | Branch | Exec | Source |
---|---|---|---|
1 | /** | ||
2 | * Copyright (C) 2025 Niklas Haas | ||
3 | * | ||
4 | * This file is part of FFmpeg. | ||
5 | * | ||
6 | * FFmpeg is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU Lesser General Public | ||
8 | * License as published by the Free Software Foundation; either | ||
9 | * version 2.1 of the License, or (at your option) any later version. | ||
10 | * | ||
11 | * FFmpeg is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
14 | * Lesser General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU Lesser General Public | ||
17 | * License along with FFmpeg; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | */ | ||
20 | |||
21 | #include "libavutil/avassert.h" | ||
22 | |||
23 | #include "ops_backend.h" | ||
24 | |||
25 | #ifndef BIT_DEPTH | ||
26 | # define BIT_DEPTH 32 | ||
27 | #endif | ||
28 | |||
29 | #if BIT_DEPTH == 32 | ||
30 | # define PIXEL_TYPE SWS_PIXEL_F32 | ||
31 | # define PIXEL_MAX FLT_MAX | ||
32 | # define PIXEL_MIN FLT_MIN | ||
33 | # define pixel_t float | ||
34 | # define block_t f32block_t | ||
35 | # define px f32 | ||
36 | #else | ||
37 | # error Invalid BIT_DEPTH | ||
38 | #endif | ||
39 | |||
40 | #define IS_FLOAT 1 | ||
41 | #define FMT_CHAR f | ||
42 | #include "ops_tmpl_common.c" | ||
43 | |||
44 | 468 | DECL_SETUP(setup_dither) | |
45 | { | ||
46 | 468 | const int size = 1 << op->dither.size_log2; | |
47 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 468 times.
|
468 | if (!size) { |
48 | /* We special case this value */ | ||
49 | av_assert1(!av_cmp_q(op->dither.matrix[0], av_make_q(1, 2))); | ||
50 | ✗ | out->ptr = NULL; | |
51 | ✗ | return 0; | |
52 | } | ||
53 | |||
54 | 468 | const int width = FFMAX(size, SWS_BLOCK_SIZE); | |
55 | 468 | pixel_t *matrix = out->ptr = av_malloc(sizeof(pixel_t) * size * width); | |
56 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 468 times.
|
468 | if (!matrix) |
57 | ✗ | return AVERROR(ENOMEM); | |
58 | |||
59 |
2/2✓ Branch 0 taken 26572 times.
✓ Branch 1 taken 468 times.
|
27040 | for (int y = 0; y < size; y++) { |
60 |
2/2✓ Branch 0 taken 4543812 times.
✓ Branch 1 taken 26572 times.
|
4570384 | for (int x = 0; x < size; x++) |
61 |
1/2✓ Branch 0 taken 4543812 times.
✗ Branch 1 not taken.
|
4543812 | matrix[y * width + x] = av_q2pixel(op->dither.matrix[y * size + x]); |
62 |
2/2✓ Branch 0 taken 33852 times.
✓ Branch 1 taken 26572 times.
|
60424 | for (int x = size; x < width; x++) /* pad to block size */ |
63 | 33852 | matrix[y * width + x] = matrix[y * width + (x % size)]; | |
64 | } | ||
65 | |||
66 | 468 | return 0; | |
67 | } | ||
68 | |||
69 | 432 | DECL_FUNC(dither, const int size_log2) | |
70 | { | ||
71 | 432 | const pixel_t *restrict matrix = impl->priv.ptr; | |
72 | 432 | const int mask = (1 << size_log2) - 1; | |
73 | 432 | const int y_line = iter->y; | |
74 | 432 | const int row0 = (y_line + 0) & mask; | |
75 | 432 | const int row1 = (y_line + 3) & mask; | |
76 | 432 | const int row2 = (y_line + 2) & mask; | |
77 | 432 | const int row3 = (y_line + 5) & mask; | |
78 | 432 | const int size = 1 << size_log2; | |
79 | 432 | const int width = FFMAX(size, SWS_BLOCK_SIZE); | |
80 | 432 | const int base = iter->x & ~(SWS_BLOCK_SIZE - 1) & (size - 1); | |
81 | |||
82 | SWS_LOOP | ||
83 |
2/2✓ Branch 0 taken 13824 times.
✓ Branch 1 taken 432 times.
|
14256 | for (int i = 0; i < SWS_BLOCK_SIZE; i++) { |
84 |
2/2✓ Branch 0 taken 12288 times.
✓ Branch 1 taken 1536 times.
|
13824 | x[i] += size_log2 ? matrix[row0 * width + base + i] : (pixel_t) 0.5; |
85 |
2/2✓ Branch 0 taken 12288 times.
✓ Branch 1 taken 1536 times.
|
13824 | y[i] += size_log2 ? matrix[row1 * width + base + i] : (pixel_t) 0.5; |
86 |
2/2✓ Branch 0 taken 12288 times.
✓ Branch 1 taken 1536 times.
|
13824 | z[i] += size_log2 ? matrix[row2 * width + base + i] : (pixel_t) 0.5; |
87 |
2/2✓ Branch 0 taken 12288 times.
✓ Branch 1 taken 1536 times.
|
13824 | w[i] += size_log2 ? matrix[row3 * width + base + i] : (pixel_t) 0.5; |
88 | } | ||
89 | |||
90 | 432 | CONTINUE(block_t, x, y, z, w); | |
91 | 432 | } | |
92 | |||
93 | #define WRAP_DITHER(N) \ | ||
94 | DECL_IMPL(dither##N) \ | ||
95 | { \ | ||
96 | CALL(dither, N); \ | ||
97 | } \ | ||
98 | \ | ||
99 | DECL_ENTRY(dither##N, \ | ||
100 | .op = SWS_OP_DITHER, \ | ||
101 | .dither_size = N, \ | ||
102 | .setup = fn(setup_dither), \ | ||
103 | .free = av_free, \ | ||
104 | ); | ||
105 | |||
106 | 48 | WRAP_DITHER(0) | |
107 | 48 | WRAP_DITHER(1) | |
108 | 48 | WRAP_DITHER(2) | |
109 | 48 | WRAP_DITHER(3) | |
110 | 48 | WRAP_DITHER(4) | |
111 | 48 | WRAP_DITHER(5) | |
112 | 48 | WRAP_DITHER(6) | |
113 | 48 | WRAP_DITHER(7) | |
114 | 48 | WRAP_DITHER(8) | |
115 | |||
116 | typedef struct { | ||
117 | /* Stored in split form for convenience */ | ||
118 | pixel_t m[4][4]; | ||
119 | pixel_t k[4]; | ||
120 | } fn(LinCoeffs); | ||
121 | |||
122 | 273 | DECL_SETUP(setup_linear) | |
123 | { | ||
124 | fn(LinCoeffs) c; | ||
125 | |||
126 |
2/2✓ Branch 0 taken 1092 times.
✓ Branch 1 taken 273 times.
|
1365 | for (int i = 0; i < 4; i++) { |
127 |
2/2✓ Branch 0 taken 4368 times.
✓ Branch 1 taken 1092 times.
|
5460 | for (int j = 0; j < 4; j++) |
128 |
1/2✓ Branch 0 taken 4368 times.
✗ Branch 1 not taken.
|
4368 | c.m[i][j] = av_q2pixel(op->lin.m[i][j]); |
129 |
1/2✓ Branch 0 taken 1092 times.
✗ Branch 1 not taken.
|
1092 | c.k[i] = av_q2pixel(op->lin.m[i][4]); |
130 | } | ||
131 | |||
132 | 273 | return SETUP_MEMDUP(c); | |
133 | } | ||
134 | |||
135 | /** | ||
136 | * Fully general case for a 5x5 linear affine transformation. Should never be | ||
137 | * called without constant `mask`. This function will compile down to the | ||
138 | * appropriately optimized version for the required subset of operations when | ||
139 | * called with a constant mask. | ||
140 | */ | ||
141 | 252 | DECL_FUNC(linear_mask, const uint32_t mask) | |
142 | { | ||
143 | 252 | const fn(LinCoeffs) c = *(const fn(LinCoeffs) *) impl->priv.ptr; | |
144 | |||
145 | SWS_LOOP | ||
146 |
2/2✓ Branch 0 taken 8064 times.
✓ Branch 1 taken 252 times.
|
8316 | for (int i = 0; i < SWS_BLOCK_SIZE; i++) { |
147 | 8064 | const pixel_t xx = x[i]; | |
148 | 8064 | const pixel_t yy = y[i]; | |
149 | 8064 | const pixel_t zz = z[i]; | |
150 | 8064 | const pixel_t ww = w[i]; | |
151 | |||
152 |
2/2✓ Branch 0 taken 5760 times.
✓ Branch 1 taken 2304 times.
|
8064 | x[i] = (mask & SWS_MASK_OFF(0)) ? c.k[0] : 0; |
153 |
2/2✓ Branch 0 taken 7680 times.
✓ Branch 1 taken 384 times.
|
8064 | x[i] += (mask & SWS_MASK(0, 0)) ? c.m[0][0] * xx : xx; |
154 |
2/2✓ Branch 0 taken 4992 times.
✓ Branch 1 taken 3072 times.
|
8064 | x[i] += (mask & SWS_MASK(0, 1)) ? c.m[0][1] * yy : 0; |
155 |
2/2✓ Branch 0 taken 4992 times.
✓ Branch 1 taken 3072 times.
|
8064 | x[i] += (mask & SWS_MASK(0, 2)) ? c.m[0][2] * zz : 0; |
156 |
2/2✓ Branch 0 taken 1920 times.
✓ Branch 1 taken 6144 times.
|
8064 | x[i] += (mask & SWS_MASK(0, 3)) ? c.m[0][3] * ww : 0; |
157 | |||
158 |
2/2✓ Branch 0 taken 3456 times.
✓ Branch 1 taken 4608 times.
|
8064 | y[i] = (mask & SWS_MASK_OFF(1)) ? c.k[1] : 0; |
159 |
2/2✓ Branch 0 taken 3456 times.
✓ Branch 1 taken 4608 times.
|
8064 | y[i] += (mask & SWS_MASK(1, 0)) ? c.m[1][0] * xx : 0; |
160 |
2/2✓ Branch 0 taken 4992 times.
✓ Branch 1 taken 3072 times.
|
8064 | y[i] += (mask & SWS_MASK(1, 1)) ? c.m[1][1] * yy : yy; |
161 |
2/2✓ Branch 0 taken 3456 times.
✓ Branch 1 taken 4608 times.
|
8064 | y[i] += (mask & SWS_MASK(1, 2)) ? c.m[1][2] * zz : 0; |
162 |
2/2✓ Branch 0 taken 768 times.
✓ Branch 1 taken 7296 times.
|
8064 | y[i] += (mask & SWS_MASK(1, 3)) ? c.m[1][3] * ww : 0; |
163 | |||
164 |
2/2✓ Branch 0 taken 3456 times.
✓ Branch 1 taken 4608 times.
|
8064 | z[i] = (mask & SWS_MASK_OFF(2)) ? c.k[2] : 0; |
165 |
2/2✓ Branch 0 taken 3456 times.
✓ Branch 1 taken 4608 times.
|
8064 | z[i] += (mask & SWS_MASK(2, 0)) ? c.m[2][0] * xx : 0; |
166 |
2/2✓ Branch 0 taken 3456 times.
✓ Branch 1 taken 4608 times.
|
8064 | z[i] += (mask & SWS_MASK(2, 1)) ? c.m[2][1] * yy : 0; |
167 |
2/2✓ Branch 0 taken 4992 times.
✓ Branch 1 taken 3072 times.
|
8064 | z[i] += (mask & SWS_MASK(2, 2)) ? c.m[2][2] * zz : zz; |
168 |
2/2✓ Branch 0 taken 768 times.
✓ Branch 1 taken 7296 times.
|
8064 | z[i] += (mask & SWS_MASK(2, 3)) ? c.m[2][3] * ww : 0; |
169 | |||
170 |
2/2✓ Branch 0 taken 3456 times.
✓ Branch 1 taken 4608 times.
|
8064 | w[i] = (mask & SWS_MASK_OFF(3)) ? c.k[3] : 0; |
171 |
2/2✓ Branch 0 taken 768 times.
✓ Branch 1 taken 7296 times.
|
8064 | w[i] += (mask & SWS_MASK(3, 0)) ? c.m[3][0] * xx : 0; |
172 |
2/2✓ Branch 0 taken 768 times.
✓ Branch 1 taken 7296 times.
|
8064 | w[i] += (mask & SWS_MASK(3, 1)) ? c.m[3][1] * yy : 0; |
173 |
2/2✓ Branch 0 taken 768 times.
✓ Branch 1 taken 7296 times.
|
8064 | w[i] += (mask & SWS_MASK(3, 2)) ? c.m[3][2] * zz : 0; |
174 |
2/2✓ Branch 0 taken 4224 times.
✓ Branch 1 taken 3840 times.
|
8064 | w[i] += (mask & SWS_MASK(3, 3)) ? c.m[3][3] * ww : ww; |
175 | } | ||
176 | |||
177 | 252 | CONTINUE(block_t, x, y, z, w); | |
178 | 252 | } | |
179 | |||
180 | #define WRAP_LINEAR(NAME, MASK) \ | ||
181 | DECL_IMPL(linear_##NAME) \ | ||
182 | { \ | ||
183 | CALL(linear_mask, MASK); \ | ||
184 | } \ | ||
185 | \ | ||
186 | DECL_ENTRY(linear_##NAME, \ | ||
187 | .op = SWS_OP_LINEAR, \ | ||
188 | .setup = fn(setup_linear), \ | ||
189 | .free = av_free, \ | ||
190 | .linear_mask = (MASK), \ | ||
191 | ); | ||
192 | |||
193 | 24 | WRAP_LINEAR(luma, SWS_MASK_LUMA) | |
194 | 12 | WRAP_LINEAR(alpha, SWS_MASK_ALPHA) | |
195 | 12 | WRAP_LINEAR(lumalpha, SWS_MASK_LUMA | SWS_MASK_ALPHA) | |
196 | 12 | WRAP_LINEAR(dot3, 0x7) | |
197 | 24 | WRAP_LINEAR(row0, SWS_MASK_ROW(0)) | |
198 | 12 | WRAP_LINEAR(row0a, SWS_MASK_ROW(0) | SWS_MASK_ALPHA) | |
199 | 12 | WRAP_LINEAR(diag3, SWS_MASK_DIAG3) | |
200 | 12 | WRAP_LINEAR(diag4, SWS_MASK_DIAG4) | |
201 | 24 | WRAP_LINEAR(diagoff3, SWS_MASK_DIAG3 | SWS_MASK_OFF3) | |
202 | 12 | WRAP_LINEAR(matrix3, SWS_MASK_MAT3) | |
203 | 12 | WRAP_LINEAR(affine3, SWS_MASK_MAT3 | SWS_MASK_OFF3) | |
204 | 60 | WRAP_LINEAR(affine3a, SWS_MASK_MAT3 | SWS_MASK_OFF3 | SWS_MASK_ALPHA) | |
205 | 12 | WRAP_LINEAR(matrix4, SWS_MASK_MAT4) | |
206 | 12 | WRAP_LINEAR(affine4, SWS_MASK_MAT4 | SWS_MASK_OFF4) | |
207 | |||
208 | static const SwsOpTable fn(op_table_float) = { | ||
209 | .block_size = SWS_BLOCK_SIZE, | ||
210 | .entries = { | ||
211 | REF_COMMON_PATTERNS(convert_uint8), | ||
212 | REF_COMMON_PATTERNS(convert_uint16), | ||
213 | REF_COMMON_PATTERNS(convert_uint32), | ||
214 | |||
215 | &fn(op_clear_1110), | ||
216 | REF_COMMON_PATTERNS(min), | ||
217 | REF_COMMON_PATTERNS(max), | ||
218 | REF_COMMON_PATTERNS(scale), | ||
219 | |||
220 | &fn(op_dither0), | ||
221 | &fn(op_dither1), | ||
222 | &fn(op_dither2), | ||
223 | &fn(op_dither3), | ||
224 | &fn(op_dither4), | ||
225 | &fn(op_dither5), | ||
226 | &fn(op_dither6), | ||
227 | &fn(op_dither7), | ||
228 | &fn(op_dither8), | ||
229 | |||
230 | &fn(op_linear_luma), | ||
231 | &fn(op_linear_alpha), | ||
232 | &fn(op_linear_lumalpha), | ||
233 | &fn(op_linear_dot3), | ||
234 | &fn(op_linear_row0), | ||
235 | &fn(op_linear_row0a), | ||
236 | &fn(op_linear_diag3), | ||
237 | &fn(op_linear_diag4), | ||
238 | &fn(op_linear_diagoff3), | ||
239 | &fn(op_linear_matrix3), | ||
240 | &fn(op_linear_affine3), | ||
241 | &fn(op_linear_affine3a), | ||
242 | &fn(op_linear_matrix4), | ||
243 | &fn(op_linear_affine4), | ||
244 | |||
245 | NULL | ||
246 | }, | ||
247 | }; | ||
248 | |||
249 | #undef PIXEL_TYPE | ||
250 | #undef PIXEL_MAX | ||
251 | #undef PIXEL_MIN | ||
252 | #undef pixel_t | ||
253 | #undef block_t | ||
254 | #undef px | ||
255 | |||
256 | #undef FMT_CHAR | ||
257 | #undef IS_FLOAT | ||
258 |