Line | Branch | Exec | Source |
---|---|---|---|
1 | /** | ||
2 | * Copyright (C) 2025 Niklas Haas | ||
3 | * | ||
4 | * This file is part of FFmpeg. | ||
5 | * | ||
6 | * FFmpeg is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU Lesser General Public | ||
8 | * License as published by the Free Software Foundation; either | ||
9 | * version 2.1 of the License, or (at your option) any later version. | ||
10 | * | ||
11 | * FFmpeg is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
14 | * Lesser General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU Lesser General Public | ||
17 | * License along with FFmpeg; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | */ | ||
20 | |||
21 | #include "libavutil/avassert.h" | ||
22 | #include "libavutil/bswap.h" | ||
23 | |||
24 | #include "ops_backend.h" | ||
25 | |||
26 | #ifndef BIT_DEPTH | ||
27 | # define BIT_DEPTH 8 | ||
28 | #endif | ||
29 | |||
30 | #if BIT_DEPTH == 32 | ||
31 | # define PIXEL_TYPE SWS_PIXEL_U32 | ||
32 | # define PIXEL_MAX 0xFFFFFFFFu | ||
33 | # define SWAP_BYTES av_bswap32 | ||
34 | # define pixel_t uint32_t | ||
35 | # define block_t u32block_t | ||
36 | # define px u32 | ||
37 | #elif BIT_DEPTH == 16 | ||
38 | # define PIXEL_TYPE SWS_PIXEL_U16 | ||
39 | # define PIXEL_MAX 0xFFFFu | ||
40 | # define SWAP_BYTES av_bswap16 | ||
41 | # define pixel_t uint16_t | ||
42 | # define block_t u16block_t | ||
43 | # define px u16 | ||
44 | #elif BIT_DEPTH == 8 | ||
45 | # define PIXEL_TYPE SWS_PIXEL_U8 | ||
46 | # define PIXEL_MAX 0xFFu | ||
47 | # define pixel_t uint8_t | ||
48 | # define block_t u8block_t | ||
49 | # define px u8 | ||
50 | #else | ||
51 | # error Invalid BIT_DEPTH | ||
52 | #endif | ||
53 | |||
54 | #define IS_FLOAT 0 | ||
55 | #define FMT_CHAR u | ||
56 | #define PIXEL_MIN 0 | ||
57 | #include "ops_tmpl_common.c" | ||
58 | |||
59 | 8920 | DECL_READ(read_planar, const int elems) | |
60 | { | ||
61 | block_t x, y, z, w; | ||
62 | |||
63 | SWS_LOOP | ||
64 |
2/2✓ Branch 0 taken 142720 times.
✓ Branch 1 taken 4460 times.
|
294360 | for (int i = 0; i < SWS_BLOCK_SIZE; i++) { |
65 | 285440 | x[i] = in0[i]; | |
66 |
2/2✓ Branch 0 taken 115456 times.
✓ Branch 1 taken 27264 times.
|
285440 | if (elems > 1) |
67 | 230912 | y[i] = in1[i]; | |
68 |
2/2✓ Branch 0 taken 112768 times.
✓ Branch 1 taken 29952 times.
|
285440 | if (elems > 2) |
69 | 225536 | z[i] = in2[i]; | |
70 |
2/2✓ Branch 0 taken 84608 times.
✓ Branch 1 taken 58112 times.
|
285440 | if (elems > 3) |
71 | 169216 | w[i] = in3[i]; | |
72 | } | ||
73 | |||
74 | 8920 | CONTINUE(block_t, x, y, z, w); | |
75 | 8920 | } | |
76 | |||
77 | 312 | DECL_READ(read_packed, const int elems) | |
78 | { | ||
79 | block_t x, y, z, w; | ||
80 | |||
81 | SWS_LOOP | ||
82 |
2/2✓ Branch 0 taken 4992 times.
✓ Branch 1 taken 156 times.
|
10296 | for (int i = 0; i < SWS_BLOCK_SIZE; i++) { |
83 | 9984 | x[i] = in0[elems * i + 0]; | |
84 |
1/2✓ Branch 0 taken 4992 times.
✗ Branch 1 not taken.
|
9984 | if (elems > 1) |
85 | 9984 | y[i] = in0[elems * i + 1]; | |
86 |
2/2✓ Branch 0 taken 3328 times.
✓ Branch 1 taken 1664 times.
|
9984 | if (elems > 2) |
87 | 6656 | z[i] = in0[elems * i + 2]; | |
88 |
2/2✓ Branch 0 taken 1664 times.
✓ Branch 1 taken 3328 times.
|
9984 | if (elems > 3) |
89 | 3328 | w[i] = in0[elems * i + 3]; | |
90 | } | ||
91 | |||
92 | 312 | CONTINUE(block_t, x, y, z, w); | |
93 | 312 | } | |
94 | |||
95 | 8936 | DECL_WRITE(write_planar, const int elems) | |
96 | { | ||
97 | SWS_LOOP | ||
98 |
2/2✓ Branch 0 taken 142976 times.
✓ Branch 1 taken 4468 times.
|
294888 | for (int i = 0; i < SWS_BLOCK_SIZE; i++) { |
99 | 285952 | out0[i] = x[i]; | |
100 |
2/2✓ Branch 0 taken 113920 times.
✓ Branch 1 taken 29056 times.
|
285952 | if (elems > 1) |
101 | 227840 | out1[i] = y[i]; | |
102 |
2/2✓ Branch 0 taken 88448 times.
✓ Branch 1 taken 54528 times.
|
285952 | if (elems > 2) |
103 | 176896 | out2[i] = z[i]; | |
104 |
2/2✓ Branch 0 taken 60800 times.
✓ Branch 1 taken 82176 times.
|
285952 | if (elems > 3) |
105 | 121600 | out3[i] = w[i]; | |
106 | } | ||
107 | 8936 | } | |
108 | |||
109 | 312 | DECL_WRITE(write_packed, const int elems) | |
110 | { | ||
111 | SWS_LOOP | ||
112 |
2/2✓ Branch 0 taken 4992 times.
✓ Branch 1 taken 156 times.
|
10296 | for (int i = 0; i < SWS_BLOCK_SIZE; i++) { |
113 | 9984 | out0[elems * i + 0] = x[i]; | |
114 |
1/2✓ Branch 0 taken 4992 times.
✗ Branch 1 not taken.
|
9984 | if (elems > 1) |
115 | 9984 | out0[elems * i + 1] = y[i]; | |
116 |
2/2✓ Branch 0 taken 3328 times.
✓ Branch 1 taken 1664 times.
|
9984 | if (elems > 2) |
117 | 6656 | out0[elems * i + 2] = z[i]; | |
118 |
2/2✓ Branch 0 taken 1664 times.
✓ Branch 1 taken 3328 times.
|
9984 | if (elems > 3) |
119 | 3328 | out0[elems * i + 3] = w[i]; | |
120 | } | ||
121 | 312 | } | |
122 | |||
123 | #define WRAP_READ(FUNC, ELEMS, FRAC, PACKED) \ | ||
124 | DECL_IMPL_READ(FUNC##ELEMS) \ | ||
125 | { \ | ||
126 | CALL_READ(FUNC, ELEMS); \ | ||
127 | for (int i = 0; i < (PACKED ? 1 : ELEMS); i++) \ | ||
128 | iter->in[i] += sizeof(block_t) * (PACKED ? ELEMS : 1) >> FRAC; \ | ||
129 | } \ | ||
130 | \ | ||
131 | DECL_ENTRY(FUNC##ELEMS, \ | ||
132 | .op = SWS_OP_READ, \ | ||
133 | .rw = { \ | ||
134 | .elems = ELEMS, \ | ||
135 | .packed = PACKED, \ | ||
136 | .frac = FRAC, \ | ||
137 | }, \ | ||
138 | ); | ||
139 | |||
140 |
2/2✓ Branch 1 taken 852 times.
✓ Branch 2 taken 852 times.
|
3408 | WRAP_READ(read_planar, 1, 0, false) |
141 |
2/2✓ Branch 1 taken 168 times.
✓ Branch 2 taken 84 times.
|
504 | WRAP_READ(read_planar, 2, 0, false) |
142 |
2/2✓ Branch 1 taken 2640 times.
✓ Branch 2 taken 880 times.
|
7040 | WRAP_READ(read_planar, 3, 0, false) |
143 |
2/2✓ Branch 1 taken 10576 times.
✓ Branch 2 taken 2644 times.
|
26440 | WRAP_READ(read_planar, 4, 0, false) |
144 |
2/2✓ Branch 1 taken 52 times.
✓ Branch 2 taken 52 times.
|
208 | WRAP_READ(read_packed, 2, 0, true) |
145 |
2/2✓ Branch 1 taken 52 times.
✓ Branch 2 taken 52 times.
|
208 | WRAP_READ(read_packed, 3, 0, true) |
146 |
2/2✓ Branch 1 taken 52 times.
✓ Branch 2 taken 52 times.
|
208 | WRAP_READ(read_packed, 4, 0, true) |
147 | |||
148 | #define WRAP_WRITE(FUNC, ELEMS, FRAC, PACKED) \ | ||
149 | DECL_IMPL(FUNC##ELEMS) \ | ||
150 | { \ | ||
151 | CALL_WRITE(FUNC, ELEMS); \ | ||
152 | for (int i = 0; i < (PACKED ? 1 : ELEMS); i++) \ | ||
153 | iter->out[i] += sizeof(block_t) * (PACKED ? ELEMS : 1) >> FRAC; \ | ||
154 | } \ | ||
155 | \ | ||
156 | DECL_ENTRY(FUNC##ELEMS, \ | ||
157 | .op = SWS_OP_WRITE, \ | ||
158 | .rw = { \ | ||
159 | .elems = ELEMS, \ | ||
160 | .packed = PACKED, \ | ||
161 | .frac = FRAC, \ | ||
162 | }, \ | ||
163 | ); | ||
164 | |||
165 |
2/2✓ Branch 1 taken 908 times.
✓ Branch 2 taken 908 times.
|
3632 | WRAP_WRITE(write_planar, 1, 0, false) |
166 |
2/2✓ Branch 1 taken 1592 times.
✓ Branch 2 taken 796 times.
|
4776 | WRAP_WRITE(write_planar, 2, 0, false) |
167 |
2/2✓ Branch 1 taken 2592 times.
✓ Branch 2 taken 864 times.
|
6912 | WRAP_WRITE(write_planar, 3, 0, false) |
168 |
2/2✓ Branch 1 taken 7600 times.
✓ Branch 2 taken 1900 times.
|
19000 | WRAP_WRITE(write_planar, 4, 0, false) |
169 |
2/2✓ Branch 1 taken 52 times.
✓ Branch 2 taken 52 times.
|
208 | WRAP_WRITE(write_packed, 2, 0, true) |
170 |
2/2✓ Branch 1 taken 52 times.
✓ Branch 2 taken 52 times.
|
208 | WRAP_WRITE(write_packed, 3, 0, true) |
171 |
2/2✓ Branch 1 taken 52 times.
✓ Branch 2 taken 52 times.
|
208 | WRAP_WRITE(write_packed, 4, 0, true) |
172 | |||
173 | #if BIT_DEPTH == 8 | ||
174 | 16 | DECL_READ(read_nibbles, const int elems) | |
175 | { | ||
176 | block_t x, y, z, w; | ||
177 | |||
178 | SWS_LOOP | ||
179 |
2/2✓ Branch 0 taken 256 times.
✓ Branch 1 taken 16 times.
|
272 | for (int i = 0; i < SWS_BLOCK_SIZE; i += 2) { |
180 | 256 | const pixel_t val = ((const pixel_t *) in0)[i >> 1]; | |
181 | 256 | x[i + 0] = val >> 4; /* high nibble */ | |
182 | 256 | x[i + 1] = val & 0xF; /* low nibble */ | |
183 | } | ||
184 | |||
185 | 16 | CONTINUE(block_t, x, y, z, w); | |
186 | 16 | } | |
187 | |||
188 | 16 | DECL_READ(read_bits, const int elems) | |
189 | { | ||
190 | block_t x, y, z, w; | ||
191 | |||
192 | SWS_LOOP | ||
193 |
2/2✓ Branch 0 taken 64 times.
✓ Branch 1 taken 16 times.
|
80 | for (int i = 0; i < SWS_BLOCK_SIZE; i += 8) { |
194 | 64 | const pixel_t val = ((const pixel_t *) in0)[i >> 3]; | |
195 | 64 | x[i + 0] = (val >> 7) & 1; | |
196 | 64 | x[i + 1] = (val >> 6) & 1; | |
197 | 64 | x[i + 2] = (val >> 5) & 1; | |
198 | 64 | x[i + 3] = (val >> 4) & 1; | |
199 | 64 | x[i + 4] = (val >> 3) & 1; | |
200 | 64 | x[i + 5] = (val >> 2) & 1; | |
201 | 64 | x[i + 6] = (val >> 1) & 1; | |
202 | 64 | x[i + 7] = (val >> 0) & 1; | |
203 | } | ||
204 | |||
205 | 16 | CONTINUE(block_t, x, y, z, w); | |
206 | 16 | } | |
207 | |||
208 |
2/2✓ Branch 1 taken 16 times.
✓ Branch 2 taken 16 times.
|
32 | WRAP_READ(read_nibbles, 1, 1, false) |
209 |
2/2✓ Branch 1 taken 16 times.
✓ Branch 2 taken 16 times.
|
32 | WRAP_READ(read_bits, 1, 3, false) |
210 | |||
211 | 8 | DECL_WRITE(write_nibbles, const int elems) | |
212 | { | ||
213 | SWS_LOOP | ||
214 |
2/2✓ Branch 0 taken 128 times.
✓ Branch 1 taken 8 times.
|
136 | for (int i = 0; i < SWS_BLOCK_SIZE; i += 2) |
215 | 128 | out0[i >> 1] = x[i] << 4 | x[i + 1]; | |
216 | 8 | } | |
217 | |||
218 | 16 | DECL_WRITE(write_bits, const int elems) | |
219 | { | ||
220 | SWS_LOOP | ||
221 |
2/2✓ Branch 0 taken 64 times.
✓ Branch 1 taken 16 times.
|
80 | for (int i = 0; i < SWS_BLOCK_SIZE; i += 8) { |
222 | 64 | out0[i >> 3] = x[i + 0] << 7 | | |
223 | 64 | x[i + 1] << 6 | | |
224 | 64 | x[i + 2] << 5 | | |
225 | 64 | x[i + 3] << 4 | | |
226 | 64 | x[i + 4] << 3 | | |
227 | 64 | x[i + 5] << 2 | | |
228 | 64 | x[i + 6] << 1 | | |
229 | 64 | x[i + 7]; | |
230 | } | ||
231 | 16 | } | |
232 | |||
233 |
2/2✓ Branch 1 taken 8 times.
✓ Branch 2 taken 8 times.
|
16 | WRAP_WRITE(write_nibbles, 1, 1, false) |
234 |
2/2✓ Branch 1 taken 16 times.
✓ Branch 2 taken 16 times.
|
32 | WRAP_WRITE(write_bits, 1, 3, false) |
235 | #endif /* BIT_DEPTH == 8 */ | ||
236 | |||
237 | #ifdef SWAP_BYTES | ||
238 | 208 | DECL_PATTERN(swap_bytes) | |
239 | { | ||
240 | SWS_LOOP | ||
241 |
2/2✓ Branch 0 taken 3328 times.
✓ Branch 1 taken 104 times.
|
6864 | for (int i = 0; i < SWS_BLOCK_SIZE; i++) { |
242 |
1/2✓ Branch 0 taken 3328 times.
✗ Branch 1 not taken.
|
6656 | if (X) |
243 | 6656 | x[i] = SWAP_BYTES(x[i]); | |
244 |
2/2✓ Branch 0 taken 1536 times.
✓ Branch 1 taken 1792 times.
|
6656 | if (Y) |
245 | 3072 | y[i] = SWAP_BYTES(y[i]); | |
246 |
2/2✓ Branch 0 taken 1536 times.
✓ Branch 1 taken 1792 times.
|
6656 | if (Z) |
247 | 3072 | z[i] = SWAP_BYTES(z[i]); | |
248 |
2/2✓ Branch 0 taken 1536 times.
✓ Branch 1 taken 1792 times.
|
6656 | if (W) |
249 | 3072 | w[i] = SWAP_BYTES(w[i]); | |
250 | } | ||
251 | |||
252 | 208 | CONTINUE(block_t, x, y, z, w); | |
253 | 208 | } | |
254 | |||
255 | 208 | WRAP_COMMON_PATTERNS(swap_bytes, .op = SWS_OP_SWAP_BYTES); | |
256 | #endif /* SWAP_BYTES */ | ||
257 | |||
258 | #if BIT_DEPTH == 8 | ||
259 | 48 | DECL_PATTERN(expand16) | |
260 | { | ||
261 | u16block_t x16, y16, z16, w16; | ||
262 | |||
263 | SWS_LOOP | ||
264 |
2/2✓ Branch 0 taken 1536 times.
✓ Branch 1 taken 48 times.
|
1584 | for (int i = 0; i < SWS_BLOCK_SIZE; i++) { |
265 |
1/2✓ Branch 0 taken 1536 times.
✗ Branch 1 not taken.
|
1536 | if (X) |
266 | 1536 | x16[i] = x[i] << 8 | x[i]; | |
267 |
2/2✓ Branch 0 taken 768 times.
✓ Branch 1 taken 768 times.
|
1536 | if (Y) |
268 | 768 | y16[i] = y[i] << 8 | y[i]; | |
269 |
2/2✓ Branch 0 taken 768 times.
✓ Branch 1 taken 768 times.
|
1536 | if (Z) |
270 | 768 | z16[i] = z[i] << 8 | z[i]; | |
271 |
2/2✓ Branch 0 taken 768 times.
✓ Branch 1 taken 768 times.
|
1536 | if (W) |
272 | 768 | w16[i] = w[i] << 8 | w[i]; | |
273 | } | ||
274 | |||
275 | 48 | CONTINUE(u16block_t, x16, y16, z16, w16); | |
276 | 48 | } | |
277 | |||
278 | 96 | WRAP_COMMON_PATTERNS(expand16, | |
279 | .op = SWS_OP_CONVERT, | ||
280 | .convert.to = SWS_PIXEL_U16, | ||
281 | .convert.expand = true, | ||
282 | ); | ||
283 | |||
284 | 48 | DECL_PATTERN(expand32) | |
285 | { | ||
286 | u32block_t x32, y32, z32, w32; | ||
287 | |||
288 | SWS_LOOP | ||
289 |
2/2✓ Branch 0 taken 1536 times.
✓ Branch 1 taken 48 times.
|
1584 | for (int i = 0; i < SWS_BLOCK_SIZE; i++) { |
290 | 1536 | x32[i] = x[i] << 24 | x[i] << 16 | x[i] << 8 | x[i]; | |
291 | 1536 | y32[i] = y[i] << 24 | y[i] << 16 | y[i] << 8 | y[i]; | |
292 | 1536 | z32[i] = z[i] << 24 | z[i] << 16 | z[i] << 8 | z[i]; | |
293 | 1536 | w32[i] = w[i] << 24 | w[i] << 16 | w[i] << 8 | w[i]; | |
294 | } | ||
295 | |||
296 | 48 | CONTINUE(u32block_t, x32, y32, z32, w32); | |
297 | 48 | } | |
298 | |||
299 | 96 | WRAP_COMMON_PATTERNS(expand32, | |
300 | .op = SWS_OP_CONVERT, | ||
301 | .convert.to = SWS_PIXEL_U32, | ||
302 | .convert.expand = true, | ||
303 | ); | ||
304 | #endif | ||
305 | |||
306 | #define WRAP_PACK_UNPACK(X, Y, Z, W) \ | ||
307 | inline DECL_IMPL(pack_##X##Y##Z##W) \ | ||
308 | { \ | ||
309 | SWS_LOOP \ | ||
310 | for (int i = 0; i < SWS_BLOCK_SIZE; i++) { \ | ||
311 | x[i] = x[i] << (Y+Z+W); \ | ||
312 | if (Y) \ | ||
313 | x[i] |= y[i] << (Z+W); \ | ||
314 | if (Z) \ | ||
315 | x[i] |= z[i] << W; \ | ||
316 | if (W) \ | ||
317 | x[i] |= w[i]; \ | ||
318 | } \ | ||
319 | \ | ||
320 | CONTINUE(block_t, x, y, z, w); \ | ||
321 | } \ | ||
322 | \ | ||
323 | DECL_ENTRY(pack_##X##Y##Z##W, \ | ||
324 | .op = SWS_OP_PACK, \ | ||
325 | .pack.pattern = { X, Y, Z, W }, \ | ||
326 | ); \ | ||
327 | \ | ||
328 | inline DECL_IMPL(unpack_##X##Y##Z##W) \ | ||
329 | { \ | ||
330 | SWS_LOOP \ | ||
331 | for (int i = 0; i < SWS_BLOCK_SIZE; i++) { \ | ||
332 | const pixel_t val = x[i]; \ | ||
333 | x[i] = val >> (Y+Z+W); \ | ||
334 | if (Y) \ | ||
335 | y[i] = (val >> (Z+W)) & ((1 << Y) - 1); \ | ||
336 | if (Z) \ | ||
337 | z[i] = (val >> W) & ((1 << Z) - 1); \ | ||
338 | if (W) \ | ||
339 | w[i] = val & ((1 << W) - 1); \ | ||
340 | } \ | ||
341 | \ | ||
342 | CONTINUE(block_t, x, y, z, w); \ | ||
343 | } \ | ||
344 | \ | ||
345 | DECL_ENTRY(unpack_##X##Y##Z##W, \ | ||
346 | .op = SWS_OP_UNPACK, \ | ||
347 | .pack.pattern = { X, Y, Z, W }, \ | ||
348 | ); | ||
349 | |||
350 |
2/2✓ Branch 0 taken 1024 times.
✓ Branch 1 taken 32 times.
|
2112 | WRAP_PACK_UNPACK( 3, 3, 2, 0) |
351 |
2/2✓ Branch 0 taken 1024 times.
✓ Branch 1 taken 32 times.
|
2112 | WRAP_PACK_UNPACK( 2, 3, 3, 0) |
352 |
2/2✓ Branch 0 taken 1024 times.
✓ Branch 1 taken 32 times.
|
2112 | WRAP_PACK_UNPACK( 1, 2, 1, 0) |
353 |
2/2✓ Branch 0 taken 768 times.
✓ Branch 1 taken 24 times.
|
1584 | WRAP_PACK_UNPACK( 5, 6, 5, 0) |
354 |
2/2✓ Branch 0 taken 768 times.
✓ Branch 1 taken 24 times.
|
1584 | WRAP_PACK_UNPACK( 5, 5, 5, 0) |
355 |
2/2✓ Branch 0 taken 768 times.
✓ Branch 1 taken 24 times.
|
1584 | WRAP_PACK_UNPACK( 4, 4, 4, 0) |
356 |
2/2✓ Branch 0 taken 768 times.
✓ Branch 1 taken 24 times.
|
1584 | WRAP_PACK_UNPACK( 2, 10, 10, 10) |
357 |
2/2✓ Branch 0 taken 768 times.
✓ Branch 1 taken 24 times.
|
1584 | WRAP_PACK_UNPACK(10, 10, 10, 2) |
358 | |||
359 | #if BIT_DEPTH != 8 | ||
360 | 1280 | DECL_PATTERN(lshift) | |
361 | { | ||
362 | 1280 | const uint8_t amount = impl->priv.u8[0]; | |
363 | |||
364 | SWS_LOOP | ||
365 |
2/2✓ Branch 0 taken 20480 times.
✓ Branch 1 taken 640 times.
|
42240 | for (int i = 0; i < SWS_BLOCK_SIZE; i++) { |
366 | 40960 | x[i] <<= amount; | |
367 | 40960 | y[i] <<= amount; | |
368 | 40960 | z[i] <<= amount; | |
369 | 40960 | w[i] <<= amount; | |
370 | } | ||
371 | |||
372 | 1280 | CONTINUE(block_t, x, y, z, w); | |
373 | 1280 | } | |
374 | |||
375 | 1280 | DECL_PATTERN(rshift) | |
376 | { | ||
377 | 1280 | const uint8_t amount = impl->priv.u8[0]; | |
378 | |||
379 | SWS_LOOP | ||
380 |
2/2✓ Branch 0 taken 20480 times.
✓ Branch 1 taken 640 times.
|
42240 | for (int i = 0; i < SWS_BLOCK_SIZE; i++) { |
381 | 40960 | x[i] >>= amount; | |
382 | 40960 | y[i] >>= amount; | |
383 | 40960 | z[i] >>= amount; | |
384 | 40960 | w[i] >>= amount; | |
385 | } | ||
386 | |||
387 | 1280 | CONTINUE(block_t, x, y, z, w); | |
388 | 1280 | } | |
389 | |||
390 | 1280 | WRAP_COMMON_PATTERNS(lshift, | |
391 | .op = SWS_OP_LSHIFT, | ||
392 | .setup = ff_sws_setup_u8, | ||
393 | .flexible = true, | ||
394 | ); | ||
395 | |||
396 | 1280 | WRAP_COMMON_PATTERNS(rshift, | |
397 | .op = SWS_OP_RSHIFT, | ||
398 | .setup = ff_sws_setup_u8, | ||
399 | .flexible = true, | ||
400 | ); | ||
401 | #endif /* BIT_DEPTH != 8 */ | ||
402 | |||
403 | 256 | DECL_PATTERN(convert_float) | |
404 | { | ||
405 | f32block_t xf, yf, zf, wf; | ||
406 | |||
407 | SWS_LOOP | ||
408 |
2/2✓ Branch 0 taken 4096 times.
✓ Branch 1 taken 128 times.
|
8448 | for (int i = 0; i < SWS_BLOCK_SIZE; i++) { |
409 | 8192 | xf[i] = x[i]; | |
410 | 8192 | yf[i] = y[i]; | |
411 | 8192 | zf[i] = z[i]; | |
412 | 8192 | wf[i] = w[i]; | |
413 | } | ||
414 | |||
415 | 256 | CONTINUE(f32block_t, xf, yf, zf, wf); | |
416 | 256 | } | |
417 | |||
418 | 256 | WRAP_COMMON_PATTERNS(convert_float, | |
419 | .op = SWS_OP_CONVERT, | ||
420 | .convert.to = SWS_PIXEL_F32, | ||
421 | ); | ||
422 | |||
423 | /** | ||
424 | * Swizzle by directly swapping the order of arguments to the continuation. | ||
425 | * Note that this is only safe to do if no arguments are duplicated. | ||
426 | */ | ||
427 | #define DECL_SWIZZLE(X, Y, Z, W) \ | ||
428 | static SWS_FUNC void \ | ||
429 | fn(swizzle_##X##Y##Z##W)(SwsOpIter *restrict iter, \ | ||
430 | const SwsOpImpl *restrict impl, \ | ||
431 | block_t c0, block_t c1, block_t c2, block_t c3) \ | ||
432 | { \ | ||
433 | CONTINUE(block_t, c##X, c##Y, c##Z, c##W); \ | ||
434 | } \ | ||
435 | \ | ||
436 | DECL_ENTRY(swizzle_##X##Y##Z##W, \ | ||
437 | .op = SWS_OP_SWIZZLE, \ | ||
438 | .swizzle.in = { X, Y, Z, W }, \ | ||
439 | ); | ||
440 | |||
441 | 32 | DECL_SWIZZLE(3, 0, 1, 2) | |
442 | 32 | DECL_SWIZZLE(3, 0, 2, 1) | |
443 | 32 | DECL_SWIZZLE(2, 1, 0, 3) | |
444 | 32 | DECL_SWIZZLE(3, 2, 1, 0) | |
445 | 32 | DECL_SWIZZLE(3, 1, 0, 2) | |
446 | 32 | DECL_SWIZZLE(3, 2, 0, 1) | |
447 | 32 | DECL_SWIZZLE(1, 2, 0, 3) | |
448 | 32 | DECL_SWIZZLE(1, 0, 2, 3) | |
449 | 32 | DECL_SWIZZLE(2, 0, 1, 3) | |
450 | 32 | DECL_SWIZZLE(2, 3, 1, 0) | |
451 | 32 | DECL_SWIZZLE(2, 1, 3, 0) | |
452 | 32 | DECL_SWIZZLE(1, 2, 3, 0) | |
453 | 32 | DECL_SWIZZLE(1, 3, 2, 0) | |
454 | 32 | DECL_SWIZZLE(0, 2, 1, 3) | |
455 | 32 | DECL_SWIZZLE(0, 2, 3, 1) | |
456 | 1424 | DECL_SWIZZLE(0, 3, 1, 2) | |
457 | 32 | DECL_SWIZZLE(3, 1, 2, 0) | |
458 | 32 | DECL_SWIZZLE(0, 3, 2, 1) | |
459 | |||
460 | /* Broadcast luma -> rgb (only used for y(a) -> rgb(a)) */ | ||
461 | #define DECL_EXPAND_LUMA(X, W, T0, T1) \ | ||
462 | static SWS_FUNC void \ | ||
463 | fn(expand_luma_##X##W)(SwsOpIter *restrict iter, \ | ||
464 | const SwsOpImpl *restrict impl, \ | ||
465 | block_t c0, block_t c1, block_t c2, block_t c3) \ | ||
466 | { \ | ||
467 | SWS_LOOP \ | ||
468 | for (int i = 0; i < SWS_BLOCK_SIZE; i++) \ | ||
469 | T0[i] = T1[i] = c0[i]; \ | ||
470 | \ | ||
471 | CONTINUE(block_t, c##X, T0, T1, c##W); \ | ||
472 | } \ | ||
473 | \ | ||
474 | DECL_ENTRY(expand_luma_##X##W, \ | ||
475 | .op = SWS_OP_SWIZZLE, \ | ||
476 | .swizzle.in = { X, 0, 0, W }, \ | ||
477 | ); | ||
478 | |||
479 |
2/2✓ Branch 0 taken 2048 times.
✓ Branch 1 taken 64 times.
|
4224 | DECL_EXPAND_LUMA(0, 3, c1, c2) |
480 |
2/2✓ Branch 0 taken 2048 times.
✓ Branch 1 taken 64 times.
|
4224 | DECL_EXPAND_LUMA(3, 0, c1, c2) |
481 |
2/2✓ Branch 0 taken 2048 times.
✓ Branch 1 taken 64 times.
|
4224 | DECL_EXPAND_LUMA(1, 0, c2, c3) |
482 |
2/2✓ Branch 0 taken 2048 times.
✓ Branch 1 taken 64 times.
|
4224 | DECL_EXPAND_LUMA(0, 1, c2, c3) |
483 | |||
484 | static const SwsOpTable fn(op_table_int) = { | ||
485 | .block_size = SWS_BLOCK_SIZE, | ||
486 | .entries = { | ||
487 | &fn(op_read_planar1), | ||
488 | &fn(op_read_planar2), | ||
489 | &fn(op_read_planar3), | ||
490 | &fn(op_read_planar4), | ||
491 | &fn(op_read_packed2), | ||
492 | &fn(op_read_packed3), | ||
493 | &fn(op_read_packed4), | ||
494 | |||
495 | &fn(op_write_planar1), | ||
496 | &fn(op_write_planar2), | ||
497 | &fn(op_write_planar3), | ||
498 | &fn(op_write_planar4), | ||
499 | &fn(op_write_packed2), | ||
500 | &fn(op_write_packed3), | ||
501 | &fn(op_write_packed4), | ||
502 | |||
503 | #if BIT_DEPTH == 8 | ||
504 | &fn(op_read_bits1), | ||
505 | &fn(op_read_nibbles1), | ||
506 | &fn(op_write_bits1), | ||
507 | &fn(op_write_nibbles1), | ||
508 | |||
509 | &fn(op_pack_1210), | ||
510 | &fn(op_pack_2330), | ||
511 | &fn(op_pack_3320), | ||
512 | |||
513 | &fn(op_unpack_1210), | ||
514 | &fn(op_unpack_2330), | ||
515 | &fn(op_unpack_3320), | ||
516 | |||
517 | REF_COMMON_PATTERNS(expand16), | ||
518 | REF_COMMON_PATTERNS(expand32), | ||
519 | #elif BIT_DEPTH == 16 | ||
520 | &fn(op_pack_4440), | ||
521 | &fn(op_pack_5550), | ||
522 | &fn(op_pack_5650), | ||
523 | &fn(op_unpack_4440), | ||
524 | &fn(op_unpack_5550), | ||
525 | &fn(op_unpack_5650), | ||
526 | #elif BIT_DEPTH == 32 | ||
527 | &fn(op_pack_2101010), | ||
528 | &fn(op_pack_1010102), | ||
529 | &fn(op_unpack_2101010), | ||
530 | &fn(op_unpack_1010102), | ||
531 | #endif | ||
532 | |||
533 | #ifdef SWAP_BYTES | ||
534 | REF_COMMON_PATTERNS(swap_bytes), | ||
535 | #endif | ||
536 | |||
537 | REF_COMMON_PATTERNS(min), | ||
538 | REF_COMMON_PATTERNS(max), | ||
539 | REF_COMMON_PATTERNS(scale), | ||
540 | REF_COMMON_PATTERNS(convert_float), | ||
541 | |||
542 | &fn(op_clear_1110), | ||
543 | &fn(op_clear_0111), | ||
544 | &fn(op_clear_0011), | ||
545 | &fn(op_clear_1001), | ||
546 | &fn(op_clear_1100), | ||
547 | &fn(op_clear_0101), | ||
548 | &fn(op_clear_1010), | ||
549 | &fn(op_clear_1000), | ||
550 | &fn(op_clear_0100), | ||
551 | &fn(op_clear_0010), | ||
552 | |||
553 | &fn(op_swizzle_3012), | ||
554 | &fn(op_swizzle_3021), | ||
555 | &fn(op_swizzle_2103), | ||
556 | &fn(op_swizzle_3210), | ||
557 | &fn(op_swizzle_3102), | ||
558 | &fn(op_swizzle_3201), | ||
559 | &fn(op_swizzle_1203), | ||
560 | &fn(op_swizzle_1023), | ||
561 | &fn(op_swizzle_2013), | ||
562 | &fn(op_swizzle_2310), | ||
563 | &fn(op_swizzle_2130), | ||
564 | &fn(op_swizzle_1230), | ||
565 | &fn(op_swizzle_1320), | ||
566 | &fn(op_swizzle_0213), | ||
567 | &fn(op_swizzle_0231), | ||
568 | &fn(op_swizzle_0312), | ||
569 | &fn(op_swizzle_3120), | ||
570 | &fn(op_swizzle_0321), | ||
571 | |||
572 | &fn(op_expand_luma_03), | ||
573 | &fn(op_expand_luma_30), | ||
574 | &fn(op_expand_luma_10), | ||
575 | &fn(op_expand_luma_01), | ||
576 | |||
577 | #if BIT_DEPTH != 8 | ||
578 | REF_COMMON_PATTERNS(lshift), | ||
579 | REF_COMMON_PATTERNS(rshift), | ||
580 | REF_COMMON_PATTERNS(convert_uint8), | ||
581 | #endif /* BIT_DEPTH != 8 */ | ||
582 | |||
583 | #if BIT_DEPTH != 16 | ||
584 | REF_COMMON_PATTERNS(convert_uint16), | ||
585 | #endif | ||
586 | #if BIT_DEPTH != 32 | ||
587 | REF_COMMON_PATTERNS(convert_uint32), | ||
588 | #endif | ||
589 | |||
590 | NULL | ||
591 | }, | ||
592 | }; | ||
593 | |||
594 | #undef PIXEL_TYPE | ||
595 | #undef PIXEL_MAX | ||
596 | #undef PIXEL_MIN | ||
597 | #undef SWAP_BYTES | ||
598 | #undef pixel_t | ||
599 | #undef block_t | ||
600 | #undef px | ||
601 | |||
602 | #undef FMT_CHAR | ||
603 | #undef IS_FLOAT | ||
604 |