FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavfilter/vf_fsppdsp.c
Date: 2026-05-03 08:24:11
Exec Total Coverage
Lines: 112 184 60.9%
Functions: 4 6 66.7%
Branches: 30 34 88.2%

Line Branch Exec Source
1 /*
2 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
3 * Copyright (C) 2005 Nikolaj Poroshin <porosh3@psu.ru>
4 * Copyright (c) 2014 Arwa Arif <arwaarif1994@gmail.com>
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23 #include <stdint.h>
24
25 #include "vf_fsppdsp.h"
26
27 #include "libavutil/common.h"
28 #include "libavutil/mathematics.h"
29 #include "libavutil/mem_internal.h"
30
31 #define DCTSIZE 8
32
33 #define FIX(x,s) (int)((x) * (1 << s) + 0.5)
34
35 #define MULTIPLY16H(x,k) (((x) * (k)) >> 16)
36 #define THRESHOLD(r,x,t) \
37 if (((unsigned)((x) + t)) >= t * 2) r = (x); \
38 else r = 0;
39 #define DESCALE(x,n) (((x) + (1 << ((n) - 1))) >> n)
40
41 typedef int32_t int_simd16_t;
42
43 enum {
44 FIX_0_382683433 = FIX(0.382683433, 14),
45 FIX_0_541196100 = FIX(0.541196100, 14),
46 FIX_0_707106781 = FIX(M_SQRT1_2 , 14),
47 FIX_1_306562965 = FIX(1.306562965, 14),
48 FIX_1_414213562_A = FIX(M_SQRT2 , 14),
49 FIX_1_847759065 = FIX(1.847759065, 13),
50 FIX_2_613125930 = FIX(-2.613125930, 13),
51 FIX_1_414213562 = FIX(M_SQRT2 , 13),
52 FIX_1_082392200 = FIX(1.082392200, 13),
53 };
54
55 DECLARE_ALIGNED(8, const uint8_t, ff_fspp_dither)[8][8] = {
56 { 0, 48, 12, 60, 3, 51, 15, 63, },
57 { 32, 16, 44, 28, 35, 19, 47, 31, },
58 { 8, 56, 4, 52, 11, 59, 7, 55, },
59 { 40, 24, 36, 20, 43, 27, 39, 23, },
60 { 2, 50, 14, 62, 1, 49, 13, 61, },
61 { 34, 18, 46, 30, 33, 17, 45, 29, },
62 { 10, 58, 6, 54, 9, 57, 5, 53, },
63 { 42, 26, 38, 22, 41, 25, 37, 21, },
64 };
65
66 //This func reads from 1 slice, 1 and clears 0 & 1
67 3 void ff_store_slice_c(uint8_t *restrict dst, int16_t *restrict src,
68 ptrdiff_t dst_stride, ptrdiff_t src_stride,
69 ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
70 {
71 #define STORE(pos) \
72 temp = (src[x + pos] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
73 src[x + pos] = src[x + pos - 8 * src_stride] = 0; \
74 temp = av_clip_uint8(temp); \
75 dst[x + pos] = temp;
76
77
2/2
✓ Branch 0 taken 15 times.
✓ Branch 1 taken 3 times.
18 for (int y = 0; y < height; y++) {
78 15 const uint8_t *d = ff_fspp_dither[y];
79
2/2
✓ Branch 0 taken 310 times.
✓ Branch 1 taken 15 times.
325 for (int x = 0; x < width; x += 8) {
80 int temp;
81 310 STORE(0);
82 310 STORE(1);
83 310 STORE(2);
84 310 STORE(3);
85 310 STORE(4);
86 310 STORE(5);
87 310 STORE(6);
88 310 STORE(7);
89 }
90 15 src += src_stride;
91 15 dst += dst_stride;
92 }
93 3 }
94
95 //This func reads from 2 slices, 0 & 2 and clears 2-nd
96 3 void ff_store_slice2_c(uint8_t *restrict dst, int16_t *restrict src,
97 ptrdiff_t dst_stride, ptrdiff_t src_stride,
98 ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
99 {
100 #define STORE2(pos) \
101 temp = (src[x + pos] + src[x + pos + 16 * src_stride] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
102 src[x + pos + 16 * src_stride] = 0; \
103 temp = av_clip_uint8(temp); \
104 dst[x + pos] = temp;
105
106
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 3 times.
9 for (int y = 0; y < height; y++) {
107 6 const uint8_t *d = ff_fspp_dither[y];
108
2/2
✓ Branch 0 taken 156 times.
✓ Branch 1 taken 6 times.
162 for (int x = 0; x < width; x += 8) {
109 int temp;
110 156 STORE2(0);
111 156 STORE2(1);
112 156 STORE2(2);
113 156 STORE2(3);
114 156 STORE2(4);
115 156 STORE2(5);
116 156 STORE2(6);
117 156 STORE2(7);
118 }
119 6 src += src_stride;
120 6 dst += dst_stride;
121 }
122 3 }
123
124 3 void ff_mul_thrmat_c(const int16_t *restrict thr_adr_noq, int16_t *restrict thr_adr, int q)
125 {
126
2/2
✓ Branch 0 taken 192 times.
✓ Branch 1 taken 3 times.
195 for (int a = 0; a < 64; a++)
127 192 thr_adr[a] = q * thr_adr_noq[a];
128 3 }
129
130 3 void ff_column_fidct_c(const int16_t *restrict thr_adr, const int16_t *restrict data,
131 int16_t *restrict output, int cnt)
132 {
133 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
134 int_simd16_t tmp10, tmp11, tmp12, tmp13;
135 int_simd16_t z1,z2,z3,z4,z5, z10, z11, z12, z13;
136 int_simd16_t d0, d1, d2, d3, d4, d5, d6, d7;
137
138 int16_t *wsptr;
139
140 3 wsptr = output;
141
142
2/2
✓ Branch 0 taken 96 times.
✓ Branch 1 taken 3 times.
99 for (; cnt > 0; cnt -= 2) { //start positions
143 96 const int16_t *threshold = thr_adr;//threshold_mtx
144
2/2
✓ Branch 0 taken 768 times.
✓ Branch 1 taken 96 times.
864 for (int ctr = DCTSIZE; ctr > 0; ctr--) {
145 // Process columns from input, add to output.
146 768 tmp0 = data[DCTSIZE * 0] + data[DCTSIZE * 7];
147 768 tmp7 = data[DCTSIZE * 0] - data[DCTSIZE * 7];
148
149 768 tmp1 = data[DCTSIZE * 1] + data[DCTSIZE * 6];
150 768 tmp6 = data[DCTSIZE * 1] - data[DCTSIZE * 6];
151
152 768 tmp2 = data[DCTSIZE * 2] + data[DCTSIZE * 5];
153 768 tmp5 = data[DCTSIZE * 2] - data[DCTSIZE * 5];
154
155 768 tmp3 = data[DCTSIZE * 3] + data[DCTSIZE * 4];
156 768 tmp4 = data[DCTSIZE * 3] - data[DCTSIZE * 4];
157
158 // Even part of FDCT
159
160 768 tmp10 = tmp0 + tmp3;
161 768 tmp13 = tmp0 - tmp3;
162 768 tmp11 = tmp1 + tmp2;
163 768 tmp12 = tmp1 - tmp2;
164
165 768 d0 = tmp10 + tmp11;
166 768 d4 = tmp10 - tmp11;
167
168 768 z1 = MULTIPLY16H(tmp12 + tmp13, FIX_0_707106781 << 2);
169 768 d2 = tmp13 + z1;
170 768 d6 = tmp13 - z1;
171
172 // Even part of IDCT
173
174
2/2
✓ Branch 0 taken 64 times.
✓ Branch 1 taken 704 times.
768 THRESHOLD(tmp0, d0, threshold[0 * 8]);
175
2/2
✓ Branch 0 taken 41 times.
✓ Branch 1 taken 727 times.
768 THRESHOLD(tmp1, d2, threshold[2 * 8]);
176
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 754 times.
768 THRESHOLD(tmp2, d4, threshold[4 * 8]);
177
2/2
✓ Branch 0 taken 56 times.
✓ Branch 1 taken 712 times.
768 THRESHOLD(tmp3, d6, threshold[6 * 8]);
178 768 tmp0 += 2;
179 768 tmp10 = (tmp0 + tmp2) >> 2;
180 768 tmp11 = (tmp0 - tmp2) >> 2;
181
182 768 tmp13 = (tmp1 + tmp3) >>2; //+2 ! (psnr decides)
183 768 tmp12 = MULTIPLY16H((tmp1 - tmp3), FIX_1_414213562_A) - tmp13; //<<2
184
185 768 tmp0 = tmp10 + tmp13; //->temps
186 768 tmp3 = tmp10 - tmp13; //->temps
187 768 tmp1 = tmp11 + tmp12; //->temps
188 768 tmp2 = tmp11 - tmp12; //->temps
189
190 // Odd part of FDCT
191
192 768 tmp10 = tmp4 + tmp5;
193 768 tmp11 = tmp5 + tmp6;
194 768 tmp12 = tmp6 + tmp7;
195
196 768 z5 = MULTIPLY16H(tmp10 - tmp12, FIX_0_382683433 << 2);
197 768 z2 = MULTIPLY16H(tmp10, FIX_0_541196100 << 2) + z5;
198 768 z4 = MULTIPLY16H(tmp12, FIX_1_306562965 << 2) + z5;
199 768 z3 = MULTIPLY16H(tmp11, FIX_0_707106781 << 2);
200
201 768 z11 = tmp7 + z3;
202 768 z13 = tmp7 - z3;
203
204 768 d5 = z13 + z2;
205 768 d3 = z13 - z2;
206 768 d1 = z11 + z4;
207 768 d7 = z11 - z4;
208
209 // Odd part of IDCT
210
211
2/2
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 744 times.
768 THRESHOLD(tmp4, d1, threshold[1 * 8]);
212
2/2
✓ Branch 0 taken 28 times.
✓ Branch 1 taken 740 times.
768 THRESHOLD(tmp5, d3, threshold[3 * 8]);
213
2/2
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 744 times.
768 THRESHOLD(tmp6, d5, threshold[5 * 8]);
214
2/2
✓ Branch 0 taken 11 times.
✓ Branch 1 taken 757 times.
768 THRESHOLD(tmp7, d7, threshold[7 * 8]);
215
216 //Simd version uses here a shortcut for the tmp5,tmp6,tmp7 == 0
217 768 z13 = tmp6 + tmp5;
218 768 z10 = (tmp6 - tmp5) * 2;
219 768 z11 = tmp4 + tmp7;
220 768 z12 = (tmp4 - tmp7) * 2;
221
222 768 tmp7 = (z11 + z13) >> 2; //+2 !
223 768 tmp11 = MULTIPLY16H(z11 - z13, FIX_1_414213562 << 1);
224 768 z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
225 768 tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
226 768 tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - !!
227
228 768 tmp6 = tmp12 - tmp7;
229 768 tmp5 = tmp11 - tmp6;
230 768 tmp4 = tmp10 + tmp5;
231
232 768 wsptr[DCTSIZE * 0] += (tmp0 + tmp7);
233 768 wsptr[DCTSIZE * 1] += (tmp1 + tmp6);
234 768 wsptr[DCTSIZE * 2] += (tmp2 + tmp5);
235 768 wsptr[DCTSIZE * 3] += (tmp3 - tmp4);
236 768 wsptr[DCTSIZE * 4] += (tmp3 + tmp4);
237 768 wsptr[DCTSIZE * 5] += (tmp2 - tmp5);
238 768 wsptr[DCTSIZE * 6] = (tmp1 - tmp6);
239 768 wsptr[DCTSIZE * 7] = (tmp0 - tmp7);
240 //
241 768 data++; //next column
242 768 wsptr++;
243 768 threshold++;
244 }
245 96 data += 8; //skip each second start pos
246 96 wsptr += 8;
247 }
248 3 }
249
250 void ff_row_idct_c(const int16_t *restrict wsptr, int16_t *restrict output_adr,
251 ptrdiff_t output_stride, int cnt)
252 {
253 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
254 int_simd16_t tmp10, tmp11, tmp12, tmp13;
255 int_simd16_t z5, z10, z11, z12, z13;
256 int16_t *outptr;
257
258 cnt *= 4;
259 outptr = output_adr;
260 for (; cnt > 0; cnt--) {
261 // Even part
262 //Simd version reads 4x4 block and transposes it
263 tmp10 = wsptr[2] + wsptr[3];
264 tmp11 = wsptr[2] - wsptr[3];
265
266 tmp13 = wsptr[0] + wsptr[1];
267 tmp12 = (MULTIPLY16H(wsptr[0] - wsptr[1], FIX_1_414213562_A) * 4) - tmp13;//this shift order to avoid overflow
268
269 tmp0 = tmp10 + tmp13; //->temps
270 tmp3 = tmp10 - tmp13; //->temps
271 tmp1 = tmp11 + tmp12;
272 tmp2 = tmp11 - tmp12;
273
274 // Odd part
275 //Also transpose, with previous:
276 // ---- ---- ||||
277 // ---- ---- idct ||||
278 // ---- ---- ---> ||||
279 // ---- ---- ||||
280 z13 = wsptr[4] + wsptr[5];
281 z10 = wsptr[4] - wsptr[5];
282 z11 = wsptr[6] + wsptr[7];
283 z12 = wsptr[6] - wsptr[7];
284
285 tmp7 = z11 + z13;
286 tmp11 = MULTIPLY16H(z11 - z13, FIX_1_414213562);
287
288 z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
289 tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
290 tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - FIX_
291
292 tmp6 = tmp12 * 8 - tmp7;
293 tmp5 = tmp11 * 8 - tmp6;
294 tmp4 = tmp10 * 8 + tmp5;
295
296 // Final output stage: descale and write column
297 outptr[0 * output_stride] += DESCALE(tmp0 + tmp7, 3);
298 outptr[1 * output_stride] += DESCALE(tmp1 + tmp6, 3);
299 outptr[2 * output_stride] += DESCALE(tmp2 + tmp5, 3);
300 outptr[3 * output_stride] += DESCALE(tmp3 - tmp4, 3);
301 outptr[4 * output_stride] += DESCALE(tmp3 + tmp4, 3);
302 outptr[5 * output_stride] += DESCALE(tmp2 - tmp5, 3);
303 outptr[6 * output_stride] += DESCALE(tmp1 - tmp6, 3); //no += ?
304 outptr[7 * output_stride] += DESCALE(tmp0 - tmp7, 3); //no += ?
305 outptr++;
306
307 wsptr += DCTSIZE; // advance pointer to next row
308 }
309 }
310
311 void ff_row_fdct_c(int16_t *restrict data, const uint8_t *restrict pixels,
312 ptrdiff_t line_size, int cnt)
313 {
314 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
315 int_simd16_t tmp10, tmp11, tmp12, tmp13;
316 int_simd16_t z1, z2, z3, z4, z5, z11, z13;
317 int16_t *dataptr;
318
319 cnt *= 4;
320 // Pass 1: process rows.
321
322 dataptr = data;
323 for (; cnt > 0; cnt--) {
324 tmp0 = pixels[line_size * 0] + pixels[line_size * 7];
325 tmp7 = pixels[line_size * 0] - pixels[line_size * 7];
326 tmp1 = pixels[line_size * 1] + pixels[line_size * 6];
327 tmp6 = pixels[line_size * 1] - pixels[line_size * 6];
328 tmp2 = pixels[line_size * 2] + pixels[line_size * 5];
329 tmp5 = pixels[line_size * 2] - pixels[line_size * 5];
330 tmp3 = pixels[line_size * 3] + pixels[line_size * 4];
331 tmp4 = pixels[line_size * 3] - pixels[line_size * 4];
332
333 // Even part
334
335 tmp10 = tmp0 + tmp3;
336 tmp13 = tmp0 - tmp3;
337 tmp11 = tmp1 + tmp2;
338 tmp12 = tmp1 - tmp2;
339 //Even columns are written first, this leads to different order of columns
340 //in column_fidct(), but they are processed independently, so all ok.
341 //Later in the row_idct() columns are read in the same order.
342 dataptr[2] = tmp10 + tmp11;
343 dataptr[3] = tmp10 - tmp11;
344
345 z1 = MULTIPLY16H(tmp12 + tmp13, FIX_0_707106781 << 2);
346 dataptr[0] = tmp13 + z1;
347 dataptr[1] = tmp13 - z1;
348
349 // Odd part
350
351 tmp10 = tmp4 + tmp5;
352 tmp11 = tmp5 + tmp6;
353 tmp12 = tmp6 + tmp7;
354
355 z5 = MULTIPLY16H(tmp10 - tmp12, FIX_0_382683433 << 2);
356 z2 = MULTIPLY16H(tmp10, FIX_0_541196100 << 2) + z5;
357 z4 = MULTIPLY16H(tmp12, FIX_1_306562965 << 2) + z5;
358 z3 = MULTIPLY16H(tmp11, FIX_0_707106781 << 2);
359
360 z11 = tmp7 + z3;
361 z13 = tmp7 - z3;
362
363 dataptr[4] = z13 + z2;
364 dataptr[5] = z13 - z2;
365 dataptr[6] = z11 + z4;
366 dataptr[7] = z11 - z4;
367
368 pixels++; // advance pointer to next column
369 dataptr += DCTSIZE;
370 }
371 }
372