FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavfilter/vf_fspp.c
Date: 2024-11-20 23:03:26
Exec Total Coverage
Lines: 0 334 0.0%
Functions: 0 10 0.0%
Branches: 0 158 0.0%

Line Branch Exec Source
1 /*
2 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
3 * Copyright (C) 2005 Nikolaj Poroshin <porosh3@psu.ru>
4 * Copyright (c) 2014 Arwa Arif <arwaarif1994@gmail.com>
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23 /**
24 * @file
25 * Fast Simple Post-processing filter
26 * This implementation is based on an algorithm described in
27 * "Aria Nosratinia Embedded Post-Processing for
28 * Enhancement of Compressed Images (1999)"
29 * (http://www.utdallas.edu/~aria/papers/vlsisp99.pdf)
30 * Further, with splitting (I)DCT into horizontal/vertical passes, one of
31 * them can be performed once per block, not per pixel. This allows for much
32 * higher speed.
33 *
34 * Originally written by Michael Niedermayer and Nikolaj for the MPlayer
35 * project, and ported by Arwa Arif for FFmpeg.
36 */
37
38 #include "libavutil/emms.h"
39 #include "libavutil/imgutils.h"
40 #include "libavutil/mem.h"
41 #include "libavutil/mem_internal.h"
42 #include "libavutil/opt.h"
43 #include "libavutil/pixdesc.h"
44
45 #include "filters.h"
46 #include "qp_table.h"
47 #include "vf_fspp.h"
48 #include "video.h"
49
50 #define OFFSET(x) offsetof(FSPPContext, x)
51 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
52 static const AVOption fspp_options[] = {
53 { "quality", "set quality", OFFSET(log2_count), AV_OPT_TYPE_INT, {.i64 = 4}, 4, MAX_LEVEL, FLAGS },
54 { "qp", "force a constant quantizer parameter", OFFSET(qp), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 64, FLAGS },
55 { "strength", "set filter strength", OFFSET(strength), AV_OPT_TYPE_INT, {.i64 = 0}, -15, 32, FLAGS },
56 { "use_bframe_qp", "use B-frames' QP", OFFSET(use_bframe_qp), AV_OPT_TYPE_BOOL,{.i64 = 0}, 0, 1, FLAGS },
57 { NULL }
58 };
59
60 AVFILTER_DEFINE_CLASS(fspp);
61
62 DECLARE_ALIGNED(32, static const uint8_t, dither)[8][8] = {
63 { 0, 48, 12, 60, 3, 51, 15, 63, },
64 { 32, 16, 44, 28, 35, 19, 47, 31, },
65 { 8, 56, 4, 52, 11, 59, 7, 55, },
66 { 40, 24, 36, 20, 43, 27, 39, 23, },
67 { 2, 50, 14, 62, 1, 49, 13, 61, },
68 { 34, 18, 46, 30, 33, 17, 45, 29, },
69 { 10, 58, 6, 54, 9, 57, 5, 53, },
70 { 42, 26, 38, 22, 41, 25, 37, 21, },
71 };
72
73 static const short custom_threshold[64] = {
74 // values (296) can't be too high
75 // -it causes too big quant dependence
76 // or maybe overflow(check), which results in some flashing
77 71, 296, 295, 237, 71, 40, 38, 19,
78 245, 193, 185, 121, 102, 73, 53, 27,
79 158, 129, 141, 107, 97, 73, 50, 26,
80 102, 116, 109, 98, 82, 66, 45, 23,
81 71, 94, 95, 81, 70, 56, 38, 20,
82 56, 77, 74, 66, 56, 44, 30, 15,
83 38, 53, 50, 45, 38, 30, 21, 11,
84 20, 27, 26, 23, 20, 15, 11, 5
85 };
86
87 //This func reads from 1 slice, 1 and clears 0 & 1
88 static void store_slice_c(uint8_t *dst, int16_t *src,
89 ptrdiff_t dst_stride, ptrdiff_t src_stride,
90 ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
91 {
92 int y, x;
93 #define STORE(pos) \
94 temp = (src[x + pos] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
95 src[x + pos] = src[x + pos - 8 * src_stride] = 0; \
96 if (temp & 0x100) temp = ~(temp >> 31); \
97 dst[x + pos] = temp;
98
99 for (y = 0; y < height; y++) {
100 const uint8_t *d = dither[y];
101 for (x = 0; x < width; x += 8) {
102 int temp;
103 STORE(0);
104 STORE(1);
105 STORE(2);
106 STORE(3);
107 STORE(4);
108 STORE(5);
109 STORE(6);
110 STORE(7);
111 }
112 src += src_stride;
113 dst += dst_stride;
114 }
115 }
116
117 //This func reads from 2 slices, 0 & 2 and clears 2-nd
118 static void store_slice2_c(uint8_t *dst, int16_t *src,
119 ptrdiff_t dst_stride, ptrdiff_t src_stride,
120 ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
121 {
122 int y, x;
123 #define STORE2(pos) \
124 temp = (src[x + pos] + src[x + pos + 16 * src_stride] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
125 src[x + pos + 16 * src_stride] = 0; \
126 if (temp & 0x100) temp = ~(temp >> 31); \
127 dst[x + pos] = temp;
128
129 for (y = 0; y < height; y++) {
130 const uint8_t *d = dither[y];
131 for (x = 0; x < width; x += 8) {
132 int temp;
133 STORE2(0);
134 STORE2(1);
135 STORE2(2);
136 STORE2(3);
137 STORE2(4);
138 STORE2(5);
139 STORE2(6);
140 STORE2(7);
141 }
142 src += src_stride;
143 dst += dst_stride;
144 }
145 }
146
147 static void mul_thrmat_c(int16_t *thr_adr_noq, int16_t *thr_adr, int q)
148 {
149 int a;
150 for (a = 0; a < 64; a++)
151 thr_adr[a] = q * thr_adr_noq[a];
152 }
153
154 static void filter(FSPPContext *p, uint8_t *dst, uint8_t *src,
155 int dst_stride, int src_stride,
156 int width, int height,
157 uint8_t *qp_store, int qp_stride, int is_luma)
158 {
159 int x, x0, y, es, qy, t;
160
161 const int stride = is_luma ? p->temp_stride : (width + 16);
162 const int step = 6 - p->log2_count;
163 const int qpsh = 4 - p->hsub * !is_luma;
164 const int qpsv = 4 - p->vsub * !is_luma;
165
166 DECLARE_ALIGNED(32, int32_t, block_align)[4 * 8 * BLOCKSZ + 4 * 8 * BLOCKSZ];
167 int16_t *block = (int16_t *)block_align;
168 int16_t *block3 = (int16_t *)(block_align + 4 * 8 * BLOCKSZ);
169
170 memset(block3, 0, 4 * 8 * BLOCKSZ);
171
172 if (!src || !dst) return;
173
174 for (y = 0; y < height; y++) {
175 int index = 8 + 8 * stride + y * stride;
176 memcpy(p->src + index, src + y * src_stride, width);
177 for (x = 0; x < 8; x++) {
178 p->src[index - x - 1] = p->src[index + x ];
179 p->src[index + width + x ] = p->src[index + width - x - 1];
180 }
181 }
182
183 for (y = 0; y < 8; y++) {
184 memcpy(p->src + ( 7 - y ) * stride, p->src + ( y + 8 ) * stride, stride);
185 memcpy(p->src + (height + 8 + y) * stride, p->src + (height - y + 7) * stride, stride);
186 }
187 //FIXME (try edge emu)
188
189 for (y = 8; y < 24; y++)
190 memset(p->temp + 8 + y * stride, 0, width * sizeof(int16_t));
191
192 for (y = step; y < height + 8; y += step) { //step= 1,2
193 const int y1 = y - 8 + step; //l5-7 l4-6;
194 qy = y - 4;
195
196 if (qy > height - 1) qy = height - 1;
197 if (qy < 0) qy = 0;
198
199 qy = (qy >> qpsv) * qp_stride;
200 p->row_fdct(block, p->src + y * stride + 2 - (y&1), stride, 2);
201
202 for (x0 = 0; x0 < width + 8 - 8 * (BLOCKSZ - 1); x0 += 8 * (BLOCKSZ - 1)) {
203 p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y&1), stride, 2 * (BLOCKSZ - 1));
204
205 if (p->qp)
206 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + 0 * 8, block3 + 0 * 8, 8 * (BLOCKSZ - 1)); //yes, this is a HOTSPOT
207 else
208 for (x = 0; x < 8 * (BLOCKSZ - 1); x += 8) {
209 t = x + x0 - 2; //correct t=x+x0-2-(y&1), but its the same
210
211 if (t < 0) t = 0; //t always < width-2
212
213 t = qp_store[qy + (t >> qpsh)];
214 t = ff_norm_qscale(t, p->qscale_type);
215
216 if (t != p->prev_q) p->prev_q = t, p->mul_thrmat((int16_t *)(&p->threshold_mtx_noq[0]), (int16_t *)(&p->threshold_mtx[0]), t);
217 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + x * 8, block3 + x * 8, 8); //yes, this is a HOTSPOT
218 }
219 p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, 2 * (BLOCKSZ - 1));
220 memmove(block, block + (BLOCKSZ - 1) * 64, 8 * 8 * sizeof(int16_t)); //cycling
221 memmove(block3, block3 + (BLOCKSZ - 1) * 64, 6 * 8 * sizeof(int16_t));
222 }
223
224 es = width + 8 - x0; // 8, ...
225 if (es > 8)
226 p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y & 1), stride, (es - 4) >> 2);
227
228 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block, block3, es&(~1));
229 if (es > 3)
230 p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, es >> 2);
231
232 if (!(y1 & 7) && y1) {
233 if (y1 & 8)
234 p->store_slice(dst + (y1 - 8) * dst_stride, p->temp + 8 + 8 * stride,
235 dst_stride, stride, width, 8, 5 - p->log2_count);
236 else
237 p->store_slice2(dst + (y1 - 8) * dst_stride, p->temp + 8 + 0 * stride,
238 dst_stride, stride, width, 8, 5 - p->log2_count);
239 }
240 }
241
242 if (y & 7) { // height % 8 != 0
243 if (y & 8)
244 p->store_slice(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 8 * stride,
245 dst_stride, stride, width, y&7, 5 - p->log2_count);
246 else
247 p->store_slice2(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 0 * stride,
248 dst_stride, stride, width, y&7, 5 - p->log2_count);
249 }
250 }
251
252 static void column_fidct_c(int16_t *thr_adr, int16_t *data, int16_t *output, int cnt)
253 {
254 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
255 int_simd16_t tmp10, tmp11, tmp12, tmp13;
256 int_simd16_t z1,z2,z3,z4,z5, z10, z11, z12, z13;
257 int_simd16_t d0, d1, d2, d3, d4, d5, d6, d7;
258
259 int16_t *dataptr;
260 int16_t *wsptr;
261 int16_t *threshold;
262 int ctr;
263
264 dataptr = data;
265 wsptr = output;
266
267 for (; cnt > 0; cnt -= 2) { //start positions
268 threshold = (int16_t *)thr_adr;//threshold_mtx
269 for (ctr = DCTSIZE; ctr > 0; ctr--) {
270 // Process columns from input, add to output.
271 tmp0 = dataptr[DCTSIZE * 0] + dataptr[DCTSIZE * 7];
272 tmp7 = dataptr[DCTSIZE * 0] - dataptr[DCTSIZE * 7];
273
274 tmp1 = dataptr[DCTSIZE * 1] + dataptr[DCTSIZE * 6];
275 tmp6 = dataptr[DCTSIZE * 1] - dataptr[DCTSIZE * 6];
276
277 tmp2 = dataptr[DCTSIZE * 2] + dataptr[DCTSIZE * 5];
278 tmp5 = dataptr[DCTSIZE * 2] - dataptr[DCTSIZE * 5];
279
280 tmp3 = dataptr[DCTSIZE * 3] + dataptr[DCTSIZE * 4];
281 tmp4 = dataptr[DCTSIZE * 3] - dataptr[DCTSIZE * 4];
282
283 // Even part of FDCT
284
285 tmp10 = tmp0 + tmp3;
286 tmp13 = tmp0 - tmp3;
287 tmp11 = tmp1 + tmp2;
288 tmp12 = tmp1 - tmp2;
289
290 d0 = tmp10 + tmp11;
291 d4 = tmp10 - tmp11;
292
293 z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
294 d2 = tmp13 + z1;
295 d6 = tmp13 - z1;
296
297 // Even part of IDCT
298
299 THRESHOLD(tmp0, d0, threshold[0 * 8]);
300 THRESHOLD(tmp1, d2, threshold[2 * 8]);
301 THRESHOLD(tmp2, d4, threshold[4 * 8]);
302 THRESHOLD(tmp3, d6, threshold[6 * 8]);
303 tmp0 += 2;
304 tmp10 = (tmp0 + tmp2) >> 2;
305 tmp11 = (tmp0 - tmp2) >> 2;
306
307 tmp13 = (tmp1 + tmp3) >>2; //+2 ! (psnr decides)
308 tmp12 = MULTIPLY16H((tmp1 - tmp3), FIX_1_414213562_A) - tmp13; //<<2
309
310 tmp0 = tmp10 + tmp13; //->temps
311 tmp3 = tmp10 - tmp13; //->temps
312 tmp1 = tmp11 + tmp12; //->temps
313 tmp2 = tmp11 - tmp12; //->temps
314
315 // Odd part of FDCT
316
317 tmp10 = tmp4 + tmp5;
318 tmp11 = tmp5 + tmp6;
319 tmp12 = tmp6 + tmp7;
320
321 z5 = MULTIPLY16H((tmp10 - tmp12) << 2, FIX_0_382683433);
322 z2 = MULTIPLY16H(tmp10 << 2, FIX_0_541196100) + z5;
323 z4 = MULTIPLY16H(tmp12 << 2, FIX_1_306562965) + z5;
324 z3 = MULTIPLY16H(tmp11 << 2, FIX_0_707106781);
325
326 z11 = tmp7 + z3;
327 z13 = tmp7 - z3;
328
329 d5 = z13 + z2;
330 d3 = z13 - z2;
331 d1 = z11 + z4;
332 d7 = z11 - z4;
333
334 // Odd part of IDCT
335
336 THRESHOLD(tmp4, d1, threshold[1 * 8]);
337 THRESHOLD(tmp5, d3, threshold[3 * 8]);
338 THRESHOLD(tmp6, d5, threshold[5 * 8]);
339 THRESHOLD(tmp7, d7, threshold[7 * 8]);
340
341 //Simd version uses here a shortcut for the tmp5,tmp6,tmp7 == 0
342 z13 = tmp6 + tmp5;
343 z10 = (tmp6 - tmp5) << 1;
344 z11 = tmp4 + tmp7;
345 z12 = (tmp4 - tmp7) << 1;
346
347 tmp7 = (z11 + z13) >> 2; //+2 !
348 tmp11 = MULTIPLY16H((z11 - z13) << 1, FIX_1_414213562);
349 z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
350 tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
351 tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - !!
352
353 tmp6 = tmp12 - tmp7;
354 tmp5 = tmp11 - tmp6;
355 tmp4 = tmp10 + tmp5;
356
357 wsptr[DCTSIZE * 0] += (tmp0 + tmp7);
358 wsptr[DCTSIZE * 1] += (tmp1 + tmp6);
359 wsptr[DCTSIZE * 2] += (tmp2 + tmp5);
360 wsptr[DCTSIZE * 3] += (tmp3 - tmp4);
361 wsptr[DCTSIZE * 4] += (tmp3 + tmp4);
362 wsptr[DCTSIZE * 5] += (tmp2 - tmp5);
363 wsptr[DCTSIZE * 6] = (tmp1 - tmp6);
364 wsptr[DCTSIZE * 7] = (tmp0 - tmp7);
365 //
366 dataptr++; //next column
367 wsptr++;
368 threshold++;
369 }
370 dataptr += 8; //skip each second start pos
371 wsptr += 8;
372 }
373 }
374
375 static void row_idct_c(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_stride, int cnt)
376 {
377 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
378 int_simd16_t tmp10, tmp11, tmp12, tmp13;
379 int_simd16_t z5, z10, z11, z12, z13;
380 int16_t *outptr;
381 int16_t *wsptr;
382
383 cnt *= 4;
384 wsptr = workspace;
385 outptr = output_adr;
386 for (; cnt > 0; cnt--) {
387 // Even part
388 //Simd version reads 4x4 block and transposes it
389 tmp10 = wsptr[2] + wsptr[3];
390 tmp11 = wsptr[2] - wsptr[3];
391
392 tmp13 = wsptr[0] + wsptr[1];
393 tmp12 = (MULTIPLY16H(wsptr[0] - wsptr[1], FIX_1_414213562_A) << 2) - tmp13;//this shift order to avoid overflow
394
395 tmp0 = tmp10 + tmp13; //->temps
396 tmp3 = tmp10 - tmp13; //->temps
397 tmp1 = tmp11 + tmp12;
398 tmp2 = tmp11 - tmp12;
399
400 // Odd part
401 //Also transpose, with previous:
402 // ---- ---- ||||
403 // ---- ---- idct ||||
404 // ---- ---- ---> ||||
405 // ---- ---- ||||
406 z13 = wsptr[4] + wsptr[5];
407 z10 = wsptr[4] - wsptr[5];
408 z11 = wsptr[6] + wsptr[7];
409 z12 = wsptr[6] - wsptr[7];
410
411 tmp7 = z11 + z13;
412 tmp11 = MULTIPLY16H(z11 - z13, FIX_1_414213562);
413
414 z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
415 tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
416 tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - FIX_
417
418 tmp6 = (tmp12 << 3) - tmp7;
419 tmp5 = (tmp11 << 3) - tmp6;
420 tmp4 = (tmp10 << 3) + tmp5;
421
422 // Final output stage: descale and write column
423 outptr[0 * output_stride] += DESCALE(tmp0 + tmp7, 3);
424 outptr[1 * output_stride] += DESCALE(tmp1 + tmp6, 3);
425 outptr[2 * output_stride] += DESCALE(tmp2 + tmp5, 3);
426 outptr[3 * output_stride] += DESCALE(tmp3 - tmp4, 3);
427 outptr[4 * output_stride] += DESCALE(tmp3 + tmp4, 3);
428 outptr[5 * output_stride] += DESCALE(tmp2 - tmp5, 3);
429 outptr[6 * output_stride] += DESCALE(tmp1 - tmp6, 3); //no += ?
430 outptr[7 * output_stride] += DESCALE(tmp0 - tmp7, 3); //no += ?
431 outptr++;
432
433 wsptr += DCTSIZE; // advance pointer to next row
434 }
435 }
436
437 static void row_fdct_c(int16_t *data, const uint8_t *pixels, ptrdiff_t line_size, int cnt)
438 {
439 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
440 int_simd16_t tmp10, tmp11, tmp12, tmp13;
441 int_simd16_t z1, z2, z3, z4, z5, z11, z13;
442 int16_t *dataptr;
443
444 cnt *= 4;
445 // Pass 1: process rows.
446
447 dataptr = data;
448 for (; cnt > 0; cnt--) {
449 tmp0 = pixels[line_size * 0] + pixels[line_size * 7];
450 tmp7 = pixels[line_size * 0] - pixels[line_size * 7];
451 tmp1 = pixels[line_size * 1] + pixels[line_size * 6];
452 tmp6 = pixels[line_size * 1] - pixels[line_size * 6];
453 tmp2 = pixels[line_size * 2] + pixels[line_size * 5];
454 tmp5 = pixels[line_size * 2] - pixels[line_size * 5];
455 tmp3 = pixels[line_size * 3] + pixels[line_size * 4];
456 tmp4 = pixels[line_size * 3] - pixels[line_size * 4];
457
458 // Even part
459
460 tmp10 = tmp0 + tmp3;
461 tmp13 = tmp0 - tmp3;
462 tmp11 = tmp1 + tmp2;
463 tmp12 = tmp1 - tmp2;
464 //Even columns are written first, this leads to different order of columns
465 //in column_fidct(), but they are processed independently, so all ok.
466 //Later in the row_idct() columns readed at the same order.
467 dataptr[2] = tmp10 + tmp11;
468 dataptr[3] = tmp10 - tmp11;
469
470 z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
471 dataptr[0] = tmp13 + z1;
472 dataptr[1] = tmp13 - z1;
473
474 // Odd part
475
476 tmp10 = (tmp4 + tmp5) << 2;
477 tmp11 = (tmp5 + tmp6) << 2;
478 tmp12 = (tmp6 + tmp7) << 2;
479
480 z5 = MULTIPLY16H(tmp10 - tmp12, FIX_0_382683433);
481 z2 = MULTIPLY16H(tmp10, FIX_0_541196100) + z5;
482 z4 = MULTIPLY16H(tmp12, FIX_1_306562965) + z5;
483 z3 = MULTIPLY16H(tmp11, FIX_0_707106781);
484
485 z11 = tmp7 + z3;
486 z13 = tmp7 - z3;
487
488 dataptr[4] = z13 + z2;
489 dataptr[5] = z13 - z2;
490 dataptr[6] = z11 + z4;
491 dataptr[7] = z11 - z4;
492
493 pixels++; // advance pointer to next column
494 dataptr += DCTSIZE;
495 }
496 }
497
498 static const enum AVPixelFormat pix_fmts[] = {
499 AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P,
500 AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV411P,
501 AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV440P,
502 AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ422P,
503 AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ440P,
504 AV_PIX_FMT_GBRP, AV_PIX_FMT_GRAY8,
505 AV_PIX_FMT_NONE
506 };
507
508 static int config_input(AVFilterLink *inlink)
509 {
510 AVFilterContext *ctx = inlink->dst;
511 FSPPContext *fspp = ctx->priv;
512 const int h = FFALIGN(inlink->h + 16, 16);
513 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
514
515 fspp->hsub = desc->log2_chroma_w;
516 fspp->vsub = desc->log2_chroma_h;
517
518 fspp->temp_stride = FFALIGN(inlink->w + 16, 16);
519 fspp->temp = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->temp));
520 fspp->src = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->src));
521
522 if (!fspp->temp || !fspp->src)
523 return AVERROR(ENOMEM);
524
525 fspp->store_slice = store_slice_c;
526 fspp->store_slice2 = store_slice2_c;
527 fspp->mul_thrmat = mul_thrmat_c;
528 fspp->column_fidct = column_fidct_c;
529 fspp->row_idct = row_idct_c;
530 fspp->row_fdct = row_fdct_c;
531
532 #if ARCH_X86
533 ff_fspp_init_x86(fspp);
534 #endif
535
536 return 0;
537 }
538
539 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
540 {
541 AVFilterContext *ctx = inlink->dst;
542 FSPPContext *fspp = ctx->priv;
543 AVFilterLink *outlink = ctx->outputs[0];
544 AVFrame *out = in;
545
546 int qp_stride = 0;
547 int8_t *qp_table = NULL;
548 int i, bias;
549 int ret = 0;
550 int custom_threshold_m[64];
551
552 bias = (1 << 4) + fspp->strength;
553
554 for (i = 0; i < 64; i++) //FIXME: tune custom_threshold[] and remove this !
555 custom_threshold_m[i] = (int)(custom_threshold[i] * (bias / 71.0) + 0.5);
556
557 for (i = 0; i < 8; i++) {
558 fspp->threshold_mtx_noq[2 * i] = (uint64_t)custom_threshold_m[i * 8 + 2]
559 |(((uint64_t)custom_threshold_m[i * 8 + 6]) << 16)
560 |(((uint64_t)custom_threshold_m[i * 8 + 0]) << 32)
561 |(((uint64_t)custom_threshold_m[i * 8 + 4]) << 48);
562
563 fspp->threshold_mtx_noq[2 * i + 1] = (uint64_t)custom_threshold_m[i * 8 + 5]
564 |(((uint64_t)custom_threshold_m[i * 8 + 3]) << 16)
565 |(((uint64_t)custom_threshold_m[i * 8 + 1]) << 32)
566 |(((uint64_t)custom_threshold_m[i * 8 + 7]) << 48);
567 }
568
569 if (fspp->qp)
570 fspp->prev_q = fspp->qp, fspp->mul_thrmat((int16_t *)(&fspp->threshold_mtx_noq[0]), (int16_t *)(&fspp->threshold_mtx[0]), fspp->qp);
571
572 /* if we are not in a constant user quantizer mode and we don't want to use
573 * the quantizers from the B-frames (B-frames often have a higher QP), we
574 * need to save the qp table from the last non B-frame; this is what the
575 * following code block does */
576 if (!fspp->qp && (fspp->use_bframe_qp || in->pict_type != AV_PICTURE_TYPE_B)) {
577 ret = ff_qp_table_extract(in, &qp_table, &qp_stride, NULL, &fspp->qscale_type);
578 if (ret < 0) {
579 av_frame_free(&in);
580 return ret;
581 }
582
583 if (!fspp->use_bframe_qp && in->pict_type != AV_PICTURE_TYPE_B) {
584 av_freep(&fspp->non_b_qp_table);
585 fspp->non_b_qp_table = qp_table;
586 fspp->non_b_qp_stride = qp_stride;
587 }
588 }
589
590 if (fspp->log2_count && !ctx->is_disabled) {
591 if (!fspp->use_bframe_qp && fspp->non_b_qp_table) {
592 qp_table = fspp->non_b_qp_table;
593 qp_stride = fspp->non_b_qp_stride;
594 }
595
596 if (qp_table || fspp->qp) {
597 const int cw = AV_CEIL_RSHIFT(inlink->w, fspp->hsub);
598 const int ch = AV_CEIL_RSHIFT(inlink->h, fspp->vsub);
599
600 /* get a new frame if in-place is not possible or if the dimensions
601 * are not multiple of 8 */
602 if (!av_frame_is_writable(in) || (inlink->w & 7) || (inlink->h & 7)) {
603 const int aligned_w = FFALIGN(inlink->w, 8);
604 const int aligned_h = FFALIGN(inlink->h, 8);
605
606 out = ff_get_video_buffer(outlink, aligned_w, aligned_h);
607 if (!out) {
608 av_frame_free(&in);
609 ret = AVERROR(ENOMEM);
610 goto finish;
611 }
612 av_frame_copy_props(out, in);
613 out->width = in->width;
614 out->height = in->height;
615 }
616
617 filter(fspp, out->data[0], in->data[0], out->linesize[0], in->linesize[0],
618 inlink->w, inlink->h, qp_table, qp_stride, 1);
619 filter(fspp, out->data[1], in->data[1], out->linesize[1], in->linesize[1],
620 cw, ch, qp_table, qp_stride, 0);
621 filter(fspp, out->data[2], in->data[2], out->linesize[2], in->linesize[2],
622 cw, ch, qp_table, qp_stride, 0);
623 emms_c();
624 }
625 }
626
627 if (in != out) {
628 if (in->data[3])
629 av_image_copy_plane(out->data[3], out->linesize[3],
630 in ->data[3], in ->linesize[3],
631 inlink->w, inlink->h);
632 av_frame_free(&in);
633 }
634 ret = ff_filter_frame(outlink, out);
635 finish:
636 if (qp_table != fspp->non_b_qp_table)
637 av_freep(&qp_table);
638 return ret;
639 }
640
641 static av_cold void uninit(AVFilterContext *ctx)
642 {
643 FSPPContext *fspp = ctx->priv;
644 av_freep(&fspp->temp);
645 av_freep(&fspp->src);
646 av_freep(&fspp->non_b_qp_table);
647 }
648
649 static const AVFilterPad fspp_inputs[] = {
650 {
651 .name = "default",
652 .type = AVMEDIA_TYPE_VIDEO,
653 .config_props = config_input,
654 .filter_frame = filter_frame,
655 },
656 };
657
658 const AVFilter ff_vf_fspp = {
659 .name = "fspp",
660 .description = NULL_IF_CONFIG_SMALL("Apply Fast Simple Post-processing filter."),
661 .priv_size = sizeof(FSPPContext),
662 .uninit = uninit,
663 FILTER_INPUTS(fspp_inputs),
664 FILTER_OUTPUTS(ff_video_default_filterpad),
665 FILTER_PIXFMTS_ARRAY(pix_fmts),
666 .priv_class = &fspp_class,
667 .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
668 };
669