FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavfilter/vf_fspp.c
Date: 2023-09-24 13:02:57
Exec Total Coverage
Lines: 0 334 0.0%
Functions: 0 10 0.0%
Branches: 0 158 0.0%

Line Branch Exec Source
1 /*
2 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
3 * Copyright (C) 2005 Nikolaj Poroshin <porosh3@psu.ru>
4 * Copyright (c) 2014 Arwa Arif <arwaarif1994@gmail.com>
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23 /**
24 * @file
25 * Fast Simple Post-processing filter
26 * This implementation is based on an algorithm described in
27 * "Aria Nosratinia Embedded Post-Processing for
28 * Enhancement of Compressed Images (1999)"
29 * (http://www.utdallas.edu/~aria/papers/vlsisp99.pdf)
30 * Further, with splitting (I)DCT into horizontal/vertical passes, one of
31 * them can be performed once per block, not per pixel. This allows for much
32 * higher speed.
33 *
34 * Originally written by Michael Niedermayer and Nikolaj for the MPlayer
35 * project, and ported by Arwa Arif for FFmpeg.
36 */
37
38 #include "libavutil/emms.h"
39 #include "libavutil/imgutils.h"
40 #include "libavutil/mem_internal.h"
41 #include "libavutil/opt.h"
42 #include "libavutil/pixdesc.h"
43 #include "internal.h"
44 #include "qp_table.h"
45 #include "vf_fspp.h"
46 #include "video.h"
47
48 #define OFFSET(x) offsetof(FSPPContext, x)
49 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
50 static const AVOption fspp_options[] = {
51 { "quality", "set quality", OFFSET(log2_count), AV_OPT_TYPE_INT, {.i64 = 4}, 4, MAX_LEVEL, FLAGS },
52 { "qp", "force a constant quantizer parameter", OFFSET(qp), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 64, FLAGS },
53 { "strength", "set filter strength", OFFSET(strength), AV_OPT_TYPE_INT, {.i64 = 0}, -15, 32, FLAGS },
54 { "use_bframe_qp", "use B-frames' QP", OFFSET(use_bframe_qp), AV_OPT_TYPE_BOOL,{.i64 = 0}, 0, 1, FLAGS },
55 { NULL }
56 };
57
58 AVFILTER_DEFINE_CLASS(fspp);
59
60 DECLARE_ALIGNED(32, static const uint8_t, dither)[8][8] = {
61 { 0, 48, 12, 60, 3, 51, 15, 63, },
62 { 32, 16, 44, 28, 35, 19, 47, 31, },
63 { 8, 56, 4, 52, 11, 59, 7, 55, },
64 { 40, 24, 36, 20, 43, 27, 39, 23, },
65 { 2, 50, 14, 62, 1, 49, 13, 61, },
66 { 34, 18, 46, 30, 33, 17, 45, 29, },
67 { 10, 58, 6, 54, 9, 57, 5, 53, },
68 { 42, 26, 38, 22, 41, 25, 37, 21, },
69 };
70
71 static const short custom_threshold[64] = {
72 // values (296) can't be too high
73 // -it causes too big quant dependence
74 // or maybe overflow(check), which results in some flashing
75 71, 296, 295, 237, 71, 40, 38, 19,
76 245, 193, 185, 121, 102, 73, 53, 27,
77 158, 129, 141, 107, 97, 73, 50, 26,
78 102, 116, 109, 98, 82, 66, 45, 23,
79 71, 94, 95, 81, 70, 56, 38, 20,
80 56, 77, 74, 66, 56, 44, 30, 15,
81 38, 53, 50, 45, 38, 30, 21, 11,
82 20, 27, 26, 23, 20, 15, 11, 5
83 };
84
85 //This func reads from 1 slice, 1 and clears 0 & 1
86 static void store_slice_c(uint8_t *dst, int16_t *src,
87 ptrdiff_t dst_stride, ptrdiff_t src_stride,
88 ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
89 {
90 int y, x;
91 #define STORE(pos) \
92 temp = (src[x + pos] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
93 src[x + pos] = src[x + pos - 8 * src_stride] = 0; \
94 if (temp & 0x100) temp = ~(temp >> 31); \
95 dst[x + pos] = temp;
96
97 for (y = 0; y < height; y++) {
98 const uint8_t *d = dither[y];
99 for (x = 0; x < width; x += 8) {
100 int temp;
101 STORE(0);
102 STORE(1);
103 STORE(2);
104 STORE(3);
105 STORE(4);
106 STORE(5);
107 STORE(6);
108 STORE(7);
109 }
110 src += src_stride;
111 dst += dst_stride;
112 }
113 }
114
115 //This func reads from 2 slices, 0 & 2 and clears 2-nd
116 static void store_slice2_c(uint8_t *dst, int16_t *src,
117 ptrdiff_t dst_stride, ptrdiff_t src_stride,
118 ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
119 {
120 int y, x;
121 #define STORE2(pos) \
122 temp = (src[x + pos] + src[x + pos + 16 * src_stride] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
123 src[x + pos + 16 * src_stride] = 0; \
124 if (temp & 0x100) temp = ~(temp >> 31); \
125 dst[x + pos] = temp;
126
127 for (y = 0; y < height; y++) {
128 const uint8_t *d = dither[y];
129 for (x = 0; x < width; x += 8) {
130 int temp;
131 STORE2(0);
132 STORE2(1);
133 STORE2(2);
134 STORE2(3);
135 STORE2(4);
136 STORE2(5);
137 STORE2(6);
138 STORE2(7);
139 }
140 src += src_stride;
141 dst += dst_stride;
142 }
143 }
144
145 static void mul_thrmat_c(int16_t *thr_adr_noq, int16_t *thr_adr, int q)
146 {
147 int a;
148 for (a = 0; a < 64; a++)
149 thr_adr[a] = q * thr_adr_noq[a];
150 }
151
152 static void filter(FSPPContext *p, uint8_t *dst, uint8_t *src,
153 int dst_stride, int src_stride,
154 int width, int height,
155 uint8_t *qp_store, int qp_stride, int is_luma)
156 {
157 int x, x0, y, es, qy, t;
158
159 const int stride = is_luma ? p->temp_stride : (width + 16);
160 const int step = 6 - p->log2_count;
161 const int qpsh = 4 - p->hsub * !is_luma;
162 const int qpsv = 4 - p->vsub * !is_luma;
163
164 DECLARE_ALIGNED(32, int32_t, block_align)[4 * 8 * BLOCKSZ + 4 * 8 * BLOCKSZ];
165 int16_t *block = (int16_t *)block_align;
166 int16_t *block3 = (int16_t *)(block_align + 4 * 8 * BLOCKSZ);
167
168 memset(block3, 0, 4 * 8 * BLOCKSZ);
169
170 if (!src || !dst) return;
171
172 for (y = 0; y < height; y++) {
173 int index = 8 + 8 * stride + y * stride;
174 memcpy(p->src + index, src + y * src_stride, width);
175 for (x = 0; x < 8; x++) {
176 p->src[index - x - 1] = p->src[index + x ];
177 p->src[index + width + x ] = p->src[index + width - x - 1];
178 }
179 }
180
181 for (y = 0; y < 8; y++) {
182 memcpy(p->src + ( 7 - y ) * stride, p->src + ( y + 8 ) * stride, stride);
183 memcpy(p->src + (height + 8 + y) * stride, p->src + (height - y + 7) * stride, stride);
184 }
185 //FIXME (try edge emu)
186
187 for (y = 8; y < 24; y++)
188 memset(p->temp + 8 + y * stride, 0, width * sizeof(int16_t));
189
190 for (y = step; y < height + 8; y += step) { //step= 1,2
191 const int y1 = y - 8 + step; //l5-7 l4-6;
192 qy = y - 4;
193
194 if (qy > height - 1) qy = height - 1;
195 if (qy < 0) qy = 0;
196
197 qy = (qy >> qpsv) * qp_stride;
198 p->row_fdct(block, p->src + y * stride + 2 - (y&1), stride, 2);
199
200 for (x0 = 0; x0 < width + 8 - 8 * (BLOCKSZ - 1); x0 += 8 * (BLOCKSZ - 1)) {
201 p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y&1), stride, 2 * (BLOCKSZ - 1));
202
203 if (p->qp)
204 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + 0 * 8, block3 + 0 * 8, 8 * (BLOCKSZ - 1)); //yes, this is a HOTSPOT
205 else
206 for (x = 0; x < 8 * (BLOCKSZ - 1); x += 8) {
207 t = x + x0 - 2; //correct t=x+x0-2-(y&1), but its the same
208
209 if (t < 0) t = 0; //t always < width-2
210
211 t = qp_store[qy + (t >> qpsh)];
212 t = ff_norm_qscale(t, p->qscale_type);
213
214 if (t != p->prev_q) p->prev_q = t, p->mul_thrmat((int16_t *)(&p->threshold_mtx_noq[0]), (int16_t *)(&p->threshold_mtx[0]), t);
215 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + x * 8, block3 + x * 8, 8); //yes, this is a HOTSPOT
216 }
217 p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, 2 * (BLOCKSZ - 1));
218 memmove(block, block + (BLOCKSZ - 1) * 64, 8 * 8 * sizeof(int16_t)); //cycling
219 memmove(block3, block3 + (BLOCKSZ - 1) * 64, 6 * 8 * sizeof(int16_t));
220 }
221
222 es = width + 8 - x0; // 8, ...
223 if (es > 8)
224 p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y & 1), stride, (es - 4) >> 2);
225
226 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block, block3, es&(~1));
227 if (es > 3)
228 p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, es >> 2);
229
230 if (!(y1 & 7) && y1) {
231 if (y1 & 8)
232 p->store_slice(dst + (y1 - 8) * dst_stride, p->temp + 8 + 8 * stride,
233 dst_stride, stride, width, 8, 5 - p->log2_count);
234 else
235 p->store_slice2(dst + (y1 - 8) * dst_stride, p->temp + 8 + 0 * stride,
236 dst_stride, stride, width, 8, 5 - p->log2_count);
237 }
238 }
239
240 if (y & 7) { // height % 8 != 0
241 if (y & 8)
242 p->store_slice(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 8 * stride,
243 dst_stride, stride, width, y&7, 5 - p->log2_count);
244 else
245 p->store_slice2(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 0 * stride,
246 dst_stride, stride, width, y&7, 5 - p->log2_count);
247 }
248 }
249
250 static void column_fidct_c(int16_t *thr_adr, int16_t *data, int16_t *output, int cnt)
251 {
252 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
253 int_simd16_t tmp10, tmp11, tmp12, tmp13;
254 int_simd16_t z1,z2,z3,z4,z5, z10, z11, z12, z13;
255 int_simd16_t d0, d1, d2, d3, d4, d5, d6, d7;
256
257 int16_t *dataptr;
258 int16_t *wsptr;
259 int16_t *threshold;
260 int ctr;
261
262 dataptr = data;
263 wsptr = output;
264
265 for (; cnt > 0; cnt -= 2) { //start positions
266 threshold = (int16_t *)thr_adr;//threshold_mtx
267 for (ctr = DCTSIZE; ctr > 0; ctr--) {
268 // Process columns from input, add to output.
269 tmp0 = dataptr[DCTSIZE * 0] + dataptr[DCTSIZE * 7];
270 tmp7 = dataptr[DCTSIZE * 0] - dataptr[DCTSIZE * 7];
271
272 tmp1 = dataptr[DCTSIZE * 1] + dataptr[DCTSIZE * 6];
273 tmp6 = dataptr[DCTSIZE * 1] - dataptr[DCTSIZE * 6];
274
275 tmp2 = dataptr[DCTSIZE * 2] + dataptr[DCTSIZE * 5];
276 tmp5 = dataptr[DCTSIZE * 2] - dataptr[DCTSIZE * 5];
277
278 tmp3 = dataptr[DCTSIZE * 3] + dataptr[DCTSIZE * 4];
279 tmp4 = dataptr[DCTSIZE * 3] - dataptr[DCTSIZE * 4];
280
281 // Even part of FDCT
282
283 tmp10 = tmp0 + tmp3;
284 tmp13 = tmp0 - tmp3;
285 tmp11 = tmp1 + tmp2;
286 tmp12 = tmp1 - tmp2;
287
288 d0 = tmp10 + tmp11;
289 d4 = tmp10 - tmp11;
290
291 z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
292 d2 = tmp13 + z1;
293 d6 = tmp13 - z1;
294
295 // Even part of IDCT
296
297 THRESHOLD(tmp0, d0, threshold[0 * 8]);
298 THRESHOLD(tmp1, d2, threshold[2 * 8]);
299 THRESHOLD(tmp2, d4, threshold[4 * 8]);
300 THRESHOLD(tmp3, d6, threshold[6 * 8]);
301 tmp0 += 2;
302 tmp10 = (tmp0 + tmp2) >> 2;
303 tmp11 = (tmp0 - tmp2) >> 2;
304
305 tmp13 = (tmp1 + tmp3) >>2; //+2 ! (psnr decides)
306 tmp12 = MULTIPLY16H((tmp1 - tmp3), FIX_1_414213562_A) - tmp13; //<<2
307
308 tmp0 = tmp10 + tmp13; //->temps
309 tmp3 = tmp10 - tmp13; //->temps
310 tmp1 = tmp11 + tmp12; //->temps
311 tmp2 = tmp11 - tmp12; //->temps
312
313 // Odd part of FDCT
314
315 tmp10 = tmp4 + tmp5;
316 tmp11 = tmp5 + tmp6;
317 tmp12 = tmp6 + tmp7;
318
319 z5 = MULTIPLY16H((tmp10 - tmp12) << 2, FIX_0_382683433);
320 z2 = MULTIPLY16H(tmp10 << 2, FIX_0_541196100) + z5;
321 z4 = MULTIPLY16H(tmp12 << 2, FIX_1_306562965) + z5;
322 z3 = MULTIPLY16H(tmp11 << 2, FIX_0_707106781);
323
324 z11 = tmp7 + z3;
325 z13 = tmp7 - z3;
326
327 d5 = z13 + z2;
328 d3 = z13 - z2;
329 d1 = z11 + z4;
330 d7 = z11 - z4;
331
332 // Odd part of IDCT
333
334 THRESHOLD(tmp4, d1, threshold[1 * 8]);
335 THRESHOLD(tmp5, d3, threshold[3 * 8]);
336 THRESHOLD(tmp6, d5, threshold[5 * 8]);
337 THRESHOLD(tmp7, d7, threshold[7 * 8]);
338
339 //Simd version uses here a shortcut for the tmp5,tmp6,tmp7 == 0
340 z13 = tmp6 + tmp5;
341 z10 = (tmp6 - tmp5) << 1;
342 z11 = tmp4 + tmp7;
343 z12 = (tmp4 - tmp7) << 1;
344
345 tmp7 = (z11 + z13) >> 2; //+2 !
346 tmp11 = MULTIPLY16H((z11 - z13) << 1, FIX_1_414213562);
347 z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
348 tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
349 tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - !!
350
351 tmp6 = tmp12 - tmp7;
352 tmp5 = tmp11 - tmp6;
353 tmp4 = tmp10 + tmp5;
354
355 wsptr[DCTSIZE * 0] += (tmp0 + tmp7);
356 wsptr[DCTSIZE * 1] += (tmp1 + tmp6);
357 wsptr[DCTSIZE * 2] += (tmp2 + tmp5);
358 wsptr[DCTSIZE * 3] += (tmp3 - tmp4);
359 wsptr[DCTSIZE * 4] += (tmp3 + tmp4);
360 wsptr[DCTSIZE * 5] += (tmp2 - tmp5);
361 wsptr[DCTSIZE * 6] = (tmp1 - tmp6);
362 wsptr[DCTSIZE * 7] = (tmp0 - tmp7);
363 //
364 dataptr++; //next column
365 wsptr++;
366 threshold++;
367 }
368 dataptr += 8; //skip each second start pos
369 wsptr += 8;
370 }
371 }
372
373 static void row_idct_c(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_stride, int cnt)
374 {
375 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
376 int_simd16_t tmp10, tmp11, tmp12, tmp13;
377 int_simd16_t z5, z10, z11, z12, z13;
378 int16_t *outptr;
379 int16_t *wsptr;
380
381 cnt *= 4;
382 wsptr = workspace;
383 outptr = output_adr;
384 for (; cnt > 0; cnt--) {
385 // Even part
386 //Simd version reads 4x4 block and transposes it
387 tmp10 = wsptr[2] + wsptr[3];
388 tmp11 = wsptr[2] - wsptr[3];
389
390 tmp13 = wsptr[0] + wsptr[1];
391 tmp12 = (MULTIPLY16H(wsptr[0] - wsptr[1], FIX_1_414213562_A) << 2) - tmp13;//this shift order to avoid overflow
392
393 tmp0 = tmp10 + tmp13; //->temps
394 tmp3 = tmp10 - tmp13; //->temps
395 tmp1 = tmp11 + tmp12;
396 tmp2 = tmp11 - tmp12;
397
398 // Odd part
399 //Also transpose, with previous:
400 // ---- ---- ||||
401 // ---- ---- idct ||||
402 // ---- ---- ---> ||||
403 // ---- ---- ||||
404 z13 = wsptr[4] + wsptr[5];
405 z10 = wsptr[4] - wsptr[5];
406 z11 = wsptr[6] + wsptr[7];
407 z12 = wsptr[6] - wsptr[7];
408
409 tmp7 = z11 + z13;
410 tmp11 = MULTIPLY16H(z11 - z13, FIX_1_414213562);
411
412 z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
413 tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
414 tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - FIX_
415
416 tmp6 = (tmp12 << 3) - tmp7;
417 tmp5 = (tmp11 << 3) - tmp6;
418 tmp4 = (tmp10 << 3) + tmp5;
419
420 // Final output stage: descale and write column
421 outptr[0 * output_stride] += DESCALE(tmp0 + tmp7, 3);
422 outptr[1 * output_stride] += DESCALE(tmp1 + tmp6, 3);
423 outptr[2 * output_stride] += DESCALE(tmp2 + tmp5, 3);
424 outptr[3 * output_stride] += DESCALE(tmp3 - tmp4, 3);
425 outptr[4 * output_stride] += DESCALE(tmp3 + tmp4, 3);
426 outptr[5 * output_stride] += DESCALE(tmp2 - tmp5, 3);
427 outptr[6 * output_stride] += DESCALE(tmp1 - tmp6, 3); //no += ?
428 outptr[7 * output_stride] += DESCALE(tmp0 - tmp7, 3); //no += ?
429 outptr++;
430
431 wsptr += DCTSIZE; // advance pointer to next row
432 }
433 }
434
435 static void row_fdct_c(int16_t *data, const uint8_t *pixels, ptrdiff_t line_size, int cnt)
436 {
437 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
438 int_simd16_t tmp10, tmp11, tmp12, tmp13;
439 int_simd16_t z1, z2, z3, z4, z5, z11, z13;
440 int16_t *dataptr;
441
442 cnt *= 4;
443 // Pass 1: process rows.
444
445 dataptr = data;
446 for (; cnt > 0; cnt--) {
447 tmp0 = pixels[line_size * 0] + pixels[line_size * 7];
448 tmp7 = pixels[line_size * 0] - pixels[line_size * 7];
449 tmp1 = pixels[line_size * 1] + pixels[line_size * 6];
450 tmp6 = pixels[line_size * 1] - pixels[line_size * 6];
451 tmp2 = pixels[line_size * 2] + pixels[line_size * 5];
452 tmp5 = pixels[line_size * 2] - pixels[line_size * 5];
453 tmp3 = pixels[line_size * 3] + pixels[line_size * 4];
454 tmp4 = pixels[line_size * 3] - pixels[line_size * 4];
455
456 // Even part
457
458 tmp10 = tmp0 + tmp3;
459 tmp13 = tmp0 - tmp3;
460 tmp11 = tmp1 + tmp2;
461 tmp12 = tmp1 - tmp2;
462 //Even columns are written first, this leads to different order of columns
463 //in column_fidct(), but they are processed independently, so all ok.
464 //Later in the row_idct() columns readed at the same order.
465 dataptr[2] = tmp10 + tmp11;
466 dataptr[3] = tmp10 - tmp11;
467
468 z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
469 dataptr[0] = tmp13 + z1;
470 dataptr[1] = tmp13 - z1;
471
472 // Odd part
473
474 tmp10 = (tmp4 + tmp5) << 2;
475 tmp11 = (tmp5 + tmp6) << 2;
476 tmp12 = (tmp6 + tmp7) << 2;
477
478 z5 = MULTIPLY16H(tmp10 - tmp12, FIX_0_382683433);
479 z2 = MULTIPLY16H(tmp10, FIX_0_541196100) + z5;
480 z4 = MULTIPLY16H(tmp12, FIX_1_306562965) + z5;
481 z3 = MULTIPLY16H(tmp11, FIX_0_707106781);
482
483 z11 = tmp7 + z3;
484 z13 = tmp7 - z3;
485
486 dataptr[4] = z13 + z2;
487 dataptr[5] = z13 - z2;
488 dataptr[6] = z11 + z4;
489 dataptr[7] = z11 - z4;
490
491 pixels++; // advance pointer to next column
492 dataptr += DCTSIZE;
493 }
494 }
495
496 static const enum AVPixelFormat pix_fmts[] = {
497 AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P,
498 AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV411P,
499 AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV440P,
500 AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ422P,
501 AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ440P,
502 AV_PIX_FMT_GBRP, AV_PIX_FMT_GRAY8,
503 AV_PIX_FMT_NONE
504 };
505
506 static int config_input(AVFilterLink *inlink)
507 {
508 AVFilterContext *ctx = inlink->dst;
509 FSPPContext *fspp = ctx->priv;
510 const int h = FFALIGN(inlink->h + 16, 16);
511 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
512
513 fspp->hsub = desc->log2_chroma_w;
514 fspp->vsub = desc->log2_chroma_h;
515
516 fspp->temp_stride = FFALIGN(inlink->w + 16, 16);
517 fspp->temp = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->temp));
518 fspp->src = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->src));
519
520 if (!fspp->temp || !fspp->src)
521 return AVERROR(ENOMEM);
522
523 fspp->store_slice = store_slice_c;
524 fspp->store_slice2 = store_slice2_c;
525 fspp->mul_thrmat = mul_thrmat_c;
526 fspp->column_fidct = column_fidct_c;
527 fspp->row_idct = row_idct_c;
528 fspp->row_fdct = row_fdct_c;
529
530 #if ARCH_X86
531 ff_fspp_init_x86(fspp);
532 #endif
533
534 return 0;
535 }
536
537 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
538 {
539 AVFilterContext *ctx = inlink->dst;
540 FSPPContext *fspp = ctx->priv;
541 AVFilterLink *outlink = ctx->outputs[0];
542 AVFrame *out = in;
543
544 int qp_stride = 0;
545 int8_t *qp_table = NULL;
546 int i, bias;
547 int ret = 0;
548 int custom_threshold_m[64];
549
550 bias = (1 << 4) + fspp->strength;
551
552 for (i = 0; i < 64; i++) //FIXME: tune custom_threshold[] and remove this !
553 custom_threshold_m[i] = (int)(custom_threshold[i] * (bias / 71.0) + 0.5);
554
555 for (i = 0; i < 8; i++) {
556 fspp->threshold_mtx_noq[2 * i] = (uint64_t)custom_threshold_m[i * 8 + 2]
557 |(((uint64_t)custom_threshold_m[i * 8 + 6]) << 16)
558 |(((uint64_t)custom_threshold_m[i * 8 + 0]) << 32)
559 |(((uint64_t)custom_threshold_m[i * 8 + 4]) << 48);
560
561 fspp->threshold_mtx_noq[2 * i + 1] = (uint64_t)custom_threshold_m[i * 8 + 5]
562 |(((uint64_t)custom_threshold_m[i * 8 + 3]) << 16)
563 |(((uint64_t)custom_threshold_m[i * 8 + 1]) << 32)
564 |(((uint64_t)custom_threshold_m[i * 8 + 7]) << 48);
565 }
566
567 if (fspp->qp)
568 fspp->prev_q = fspp->qp, fspp->mul_thrmat((int16_t *)(&fspp->threshold_mtx_noq[0]), (int16_t *)(&fspp->threshold_mtx[0]), fspp->qp);
569
570 /* if we are not in a constant user quantizer mode and we don't want to use
571 * the quantizers from the B-frames (B-frames often have a higher QP), we
572 * need to save the qp table from the last non B-frame; this is what the
573 * following code block does */
574 if (!fspp->qp && (fspp->use_bframe_qp || in->pict_type != AV_PICTURE_TYPE_B)) {
575 ret = ff_qp_table_extract(in, &qp_table, &qp_stride, NULL, &fspp->qscale_type);
576 if (ret < 0) {
577 av_frame_free(&in);
578 return ret;
579 }
580
581 if (!fspp->use_bframe_qp && in->pict_type != AV_PICTURE_TYPE_B) {
582 av_freep(&fspp->non_b_qp_table);
583 fspp->non_b_qp_table = qp_table;
584 fspp->non_b_qp_stride = qp_stride;
585 }
586 }
587
588 if (fspp->log2_count && !ctx->is_disabled) {
589 if (!fspp->use_bframe_qp && fspp->non_b_qp_table) {
590 qp_table = fspp->non_b_qp_table;
591 qp_stride = fspp->non_b_qp_stride;
592 }
593
594 if (qp_table || fspp->qp) {
595 const int cw = AV_CEIL_RSHIFT(inlink->w, fspp->hsub);
596 const int ch = AV_CEIL_RSHIFT(inlink->h, fspp->vsub);
597
598 /* get a new frame if in-place is not possible or if the dimensions
599 * are not multiple of 8 */
600 if (!av_frame_is_writable(in) || (inlink->w & 7) || (inlink->h & 7)) {
601 const int aligned_w = FFALIGN(inlink->w, 8);
602 const int aligned_h = FFALIGN(inlink->h, 8);
603
604 out = ff_get_video_buffer(outlink, aligned_w, aligned_h);
605 if (!out) {
606 av_frame_free(&in);
607 ret = AVERROR(ENOMEM);
608 goto finish;
609 }
610 av_frame_copy_props(out, in);
611 out->width = in->width;
612 out->height = in->height;
613 }
614
615 filter(fspp, out->data[0], in->data[0], out->linesize[0], in->linesize[0],
616 inlink->w, inlink->h, qp_table, qp_stride, 1);
617 filter(fspp, out->data[1], in->data[1], out->linesize[1], in->linesize[1],
618 cw, ch, qp_table, qp_stride, 0);
619 filter(fspp, out->data[2], in->data[2], out->linesize[2], in->linesize[2],
620 cw, ch, qp_table, qp_stride, 0);
621 emms_c();
622 }
623 }
624
625 if (in != out) {
626 if (in->data[3])
627 av_image_copy_plane(out->data[3], out->linesize[3],
628 in ->data[3], in ->linesize[3],
629 inlink->w, inlink->h);
630 av_frame_free(&in);
631 }
632 ret = ff_filter_frame(outlink, out);
633 finish:
634 if (qp_table != fspp->non_b_qp_table)
635 av_freep(&qp_table);
636 return ret;
637 }
638
639 static av_cold void uninit(AVFilterContext *ctx)
640 {
641 FSPPContext *fspp = ctx->priv;
642 av_freep(&fspp->temp);
643 av_freep(&fspp->src);
644 av_freep(&fspp->non_b_qp_table);
645 }
646
647 static const AVFilterPad fspp_inputs[] = {
648 {
649 .name = "default",
650 .type = AVMEDIA_TYPE_VIDEO,
651 .config_props = config_input,
652 .filter_frame = filter_frame,
653 },
654 };
655
656 const AVFilter ff_vf_fspp = {
657 .name = "fspp",
658 .description = NULL_IF_CONFIG_SMALL("Apply Fast Simple Post-processing filter."),
659 .priv_size = sizeof(FSPPContext),
660 .uninit = uninit,
661 FILTER_INPUTS(fspp_inputs),
662 FILTER_OUTPUTS(ff_video_default_filterpad),
663 FILTER_PIXFMTS_ARRAY(pix_fmts),
664 .priv_class = &fspp_class,
665 .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
666 };
667