FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavfilter/vf_fspp.c
Date: 2022-12-05 03:11:11
Exec Total Coverage
Lines: 0 327 0.0%
Functions: 0 10 0.0%
Branches: 0 158 0.0%

Line Branch Exec Source
1 /*
2 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
3 * Copyright (C) 2005 Nikolaj Poroshin <porosh3@psu.ru>
4 * Copyright (c) 2014 Arwa Arif <arwaarif1994@gmail.com>
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23 /**
24 * @file
25 * Fast Simple Post-processing filter
26 * This implementation is based on an algorithm described in
27 * "Aria Nosratinia Embedded Post-Processing for
28 * Enhancement of Compressed Images (1999)"
29 * (http://www.utdallas.edu/~aria/papers/vlsisp99.pdf)
30 * Further, with splitting (I)DCT into horizontal/vertical passes, one of
31 * them can be performed once per block, not per pixel. This allows for much
32 * higher speed.
33 *
34 * Originally written by Michael Niedermayer and Nikolaj for the MPlayer
35 * project, and ported by Arwa Arif for FFmpeg.
36 */
37
38 #include "libavutil/imgutils.h"
39 #include "libavutil/mem_internal.h"
40 #include "libavutil/opt.h"
41 #include "libavutil/pixdesc.h"
42 #include "internal.h"
43 #include "qp_table.h"
44 #include "vf_fspp.h"
45
46 #define OFFSET(x) offsetof(FSPPContext, x)
47 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
48 static const AVOption fspp_options[] = {
49 { "quality", "set quality", OFFSET(log2_count), AV_OPT_TYPE_INT, {.i64 = 4}, 4, MAX_LEVEL, FLAGS },
50 { "qp", "force a constant quantizer parameter", OFFSET(qp), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 64, FLAGS },
51 { "strength", "set filter strength", OFFSET(strength), AV_OPT_TYPE_INT, {.i64 = 0}, -15, 32, FLAGS },
52 { "use_bframe_qp", "use B-frames' QP", OFFSET(use_bframe_qp), AV_OPT_TYPE_BOOL,{.i64 = 0}, 0, 1, FLAGS },
53 { NULL }
54 };
55
56 AVFILTER_DEFINE_CLASS(fspp);
57
58 DECLARE_ALIGNED(32, static const uint8_t, dither)[8][8] = {
59 { 0, 48, 12, 60, 3, 51, 15, 63, },
60 { 32, 16, 44, 28, 35, 19, 47, 31, },
61 { 8, 56, 4, 52, 11, 59, 7, 55, },
62 { 40, 24, 36, 20, 43, 27, 39, 23, },
63 { 2, 50, 14, 62, 1, 49, 13, 61, },
64 { 34, 18, 46, 30, 33, 17, 45, 29, },
65 { 10, 58, 6, 54, 9, 57, 5, 53, },
66 { 42, 26, 38, 22, 41, 25, 37, 21, },
67 };
68
69 static const short custom_threshold[64] = {
70 // values (296) can't be too high
71 // -it causes too big quant dependence
72 // or maybe overflow(check), which results in some flashing
73 71, 296, 295, 237, 71, 40, 38, 19,
74 245, 193, 185, 121, 102, 73, 53, 27,
75 158, 129, 141, 107, 97, 73, 50, 26,
76 102, 116, 109, 98, 82, 66, 45, 23,
77 71, 94, 95, 81, 70, 56, 38, 20,
78 56, 77, 74, 66, 56, 44, 30, 15,
79 38, 53, 50, 45, 38, 30, 21, 11,
80 20, 27, 26, 23, 20, 15, 11, 5
81 };
82
83 //This func reads from 1 slice, 1 and clears 0 & 1
84 static void store_slice_c(uint8_t *dst, int16_t *src,
85 ptrdiff_t dst_stride, ptrdiff_t src_stride,
86 ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
87 {
88 int y, x;
89 #define STORE(pos) \
90 temp = (src[x + pos] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
91 src[x + pos] = src[x + pos - 8 * src_stride] = 0; \
92 if (temp & 0x100) temp = ~(temp >> 31); \
93 dst[x + pos] = temp;
94
95 for (y = 0; y < height; y++) {
96 const uint8_t *d = dither[y];
97 for (x = 0; x < width; x += 8) {
98 int temp;
99 STORE(0);
100 STORE(1);
101 STORE(2);
102 STORE(3);
103 STORE(4);
104 STORE(5);
105 STORE(6);
106 STORE(7);
107 }
108 src += src_stride;
109 dst += dst_stride;
110 }
111 }
112
113 //This func reads from 2 slices, 0 & 2 and clears 2-nd
114 static void store_slice2_c(uint8_t *dst, int16_t *src,
115 ptrdiff_t dst_stride, ptrdiff_t src_stride,
116 ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
117 {
118 int y, x;
119 #define STORE2(pos) \
120 temp = (src[x + pos] + src[x + pos + 16 * src_stride] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
121 src[x + pos + 16 * src_stride] = 0; \
122 if (temp & 0x100) temp = ~(temp >> 31); \
123 dst[x + pos] = temp;
124
125 for (y = 0; y < height; y++) {
126 const uint8_t *d = dither[y];
127 for (x = 0; x < width; x += 8) {
128 int temp;
129 STORE2(0);
130 STORE2(1);
131 STORE2(2);
132 STORE2(3);
133 STORE2(4);
134 STORE2(5);
135 STORE2(6);
136 STORE2(7);
137 }
138 src += src_stride;
139 dst += dst_stride;
140 }
141 }
142
143 static void mul_thrmat_c(int16_t *thr_adr_noq, int16_t *thr_adr, int q)
144 {
145 int a;
146 for (a = 0; a < 64; a++)
147 thr_adr[a] = q * thr_adr_noq[a];
148 }
149
150 static void filter(FSPPContext *p, uint8_t *dst, uint8_t *src,
151 int dst_stride, int src_stride,
152 int width, int height,
153 uint8_t *qp_store, int qp_stride, int is_luma)
154 {
155 int x, x0, y, es, qy, t;
156
157 const int stride = is_luma ? p->temp_stride : (width + 16);
158 const int step = 6 - p->log2_count;
159 const int qpsh = 4 - p->hsub * !is_luma;
160 const int qpsv = 4 - p->vsub * !is_luma;
161
162 DECLARE_ALIGNED(32, int32_t, block_align)[4 * 8 * BLOCKSZ + 4 * 8 * BLOCKSZ];
163 int16_t *block = (int16_t *)block_align;
164 int16_t *block3 = (int16_t *)(block_align + 4 * 8 * BLOCKSZ);
165
166 memset(block3, 0, 4 * 8 * BLOCKSZ);
167
168 if (!src || !dst) return;
169
170 for (y = 0; y < height; y++) {
171 int index = 8 + 8 * stride + y * stride;
172 memcpy(p->src + index, src + y * src_stride, width);
173 for (x = 0; x < 8; x++) {
174 p->src[index - x - 1] = p->src[index + x ];
175 p->src[index + width + x ] = p->src[index + width - x - 1];
176 }
177 }
178
179 for (y = 0; y < 8; y++) {
180 memcpy(p->src + ( 7 - y ) * stride, p->src + ( y + 8 ) * stride, stride);
181 memcpy(p->src + (height + 8 + y) * stride, p->src + (height - y + 7) * stride, stride);
182 }
183 //FIXME (try edge emu)
184
185 for (y = 8; y < 24; y++)
186 memset(p->temp + 8 + y * stride, 0, width * sizeof(int16_t));
187
188 for (y = step; y < height + 8; y += step) { //step= 1,2
189 const int y1 = y - 8 + step; //l5-7 l4-6;
190 qy = y - 4;
191
192 if (qy > height - 1) qy = height - 1;
193 if (qy < 0) qy = 0;
194
195 qy = (qy >> qpsv) * qp_stride;
196 p->row_fdct(block, p->src + y * stride + 2 - (y&1), stride, 2);
197
198 for (x0 = 0; x0 < width + 8 - 8 * (BLOCKSZ - 1); x0 += 8 * (BLOCKSZ - 1)) {
199 p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y&1), stride, 2 * (BLOCKSZ - 1));
200
201 if (p->qp)
202 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + 0 * 8, block3 + 0 * 8, 8 * (BLOCKSZ - 1)); //yes, this is a HOTSPOT
203 else
204 for (x = 0; x < 8 * (BLOCKSZ - 1); x += 8) {
205 t = x + x0 - 2; //correct t=x+x0-2-(y&1), but its the same
206
207 if (t < 0) t = 0; //t always < width-2
208
209 t = qp_store[qy + (t >> qpsh)];
210 t = ff_norm_qscale(t, p->qscale_type);
211
212 if (t != p->prev_q) p->prev_q = t, p->mul_thrmat((int16_t *)(&p->threshold_mtx_noq[0]), (int16_t *)(&p->threshold_mtx[0]), t);
213 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + x * 8, block3 + x * 8, 8); //yes, this is a HOTSPOT
214 }
215 p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, 2 * (BLOCKSZ - 1));
216 memmove(block, block + (BLOCKSZ - 1) * 64, 8 * 8 * sizeof(int16_t)); //cycling
217 memmove(block3, block3 + (BLOCKSZ - 1) * 64, 6 * 8 * sizeof(int16_t));
218 }
219
220 es = width + 8 - x0; // 8, ...
221 if (es > 8)
222 p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y & 1), stride, (es - 4) >> 2);
223
224 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block, block3, es&(~1));
225 if (es > 3)
226 p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, es >> 2);
227
228 if (!(y1 & 7) && y1) {
229 if (y1 & 8)
230 p->store_slice(dst + (y1 - 8) * dst_stride, p->temp + 8 + 8 * stride,
231 dst_stride, stride, width, 8, 5 - p->log2_count);
232 else
233 p->store_slice2(dst + (y1 - 8) * dst_stride, p->temp + 8 + 0 * stride,
234 dst_stride, stride, width, 8, 5 - p->log2_count);
235 }
236 }
237
238 if (y & 7) { // height % 8 != 0
239 if (y & 8)
240 p->store_slice(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 8 * stride,
241 dst_stride, stride, width, y&7, 5 - p->log2_count);
242 else
243 p->store_slice2(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 0 * stride,
244 dst_stride, stride, width, y&7, 5 - p->log2_count);
245 }
246 }
247
248 static void column_fidct_c(int16_t *thr_adr, int16_t *data, int16_t *output, int cnt)
249 {
250 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
251 int_simd16_t tmp10, tmp11, tmp12, tmp13;
252 int_simd16_t z1,z2,z3,z4,z5, z10, z11, z12, z13;
253 int_simd16_t d0, d1, d2, d3, d4, d5, d6, d7;
254
255 int16_t *dataptr;
256 int16_t *wsptr;
257 int16_t *threshold;
258 int ctr;
259
260 dataptr = data;
261 wsptr = output;
262
263 for (; cnt > 0; cnt -= 2) { //start positions
264 threshold = (int16_t *)thr_adr;//threshold_mtx
265 for (ctr = DCTSIZE; ctr > 0; ctr--) {
266 // Process columns from input, add to output.
267 tmp0 = dataptr[DCTSIZE * 0] + dataptr[DCTSIZE * 7];
268 tmp7 = dataptr[DCTSIZE * 0] - dataptr[DCTSIZE * 7];
269
270 tmp1 = dataptr[DCTSIZE * 1] + dataptr[DCTSIZE * 6];
271 tmp6 = dataptr[DCTSIZE * 1] - dataptr[DCTSIZE * 6];
272
273 tmp2 = dataptr[DCTSIZE * 2] + dataptr[DCTSIZE * 5];
274 tmp5 = dataptr[DCTSIZE * 2] - dataptr[DCTSIZE * 5];
275
276 tmp3 = dataptr[DCTSIZE * 3] + dataptr[DCTSIZE * 4];
277 tmp4 = dataptr[DCTSIZE * 3] - dataptr[DCTSIZE * 4];
278
279 // Even part of FDCT
280
281 tmp10 = tmp0 + tmp3;
282 tmp13 = tmp0 - tmp3;
283 tmp11 = tmp1 + tmp2;
284 tmp12 = tmp1 - tmp2;
285
286 d0 = tmp10 + tmp11;
287 d4 = tmp10 - tmp11;
288
289 z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
290 d2 = tmp13 + z1;
291 d6 = tmp13 - z1;
292
293 // Even part of IDCT
294
295 THRESHOLD(tmp0, d0, threshold[0 * 8]);
296 THRESHOLD(tmp1, d2, threshold[2 * 8]);
297 THRESHOLD(tmp2, d4, threshold[4 * 8]);
298 THRESHOLD(tmp3, d6, threshold[6 * 8]);
299 tmp0 += 2;
300 tmp10 = (tmp0 + tmp2) >> 2;
301 tmp11 = (tmp0 - tmp2) >> 2;
302
303 tmp13 = (tmp1 + tmp3) >>2; //+2 ! (psnr decides)
304 tmp12 = MULTIPLY16H((tmp1 - tmp3), FIX_1_414213562_A) - tmp13; //<<2
305
306 tmp0 = tmp10 + tmp13; //->temps
307 tmp3 = tmp10 - tmp13; //->temps
308 tmp1 = tmp11 + tmp12; //->temps
309 tmp2 = tmp11 - tmp12; //->temps
310
311 // Odd part of FDCT
312
313 tmp10 = tmp4 + tmp5;
314 tmp11 = tmp5 + tmp6;
315 tmp12 = tmp6 + tmp7;
316
317 z5 = MULTIPLY16H((tmp10 - tmp12) << 2, FIX_0_382683433);
318 z2 = MULTIPLY16H(tmp10 << 2, FIX_0_541196100) + z5;
319 z4 = MULTIPLY16H(tmp12 << 2, FIX_1_306562965) + z5;
320 z3 = MULTIPLY16H(tmp11 << 2, FIX_0_707106781);
321
322 z11 = tmp7 + z3;
323 z13 = tmp7 - z3;
324
325 d5 = z13 + z2;
326 d3 = z13 - z2;
327 d1 = z11 + z4;
328 d7 = z11 - z4;
329
330 // Odd part of IDCT
331
332 THRESHOLD(tmp4, d1, threshold[1 * 8]);
333 THRESHOLD(tmp5, d3, threshold[3 * 8]);
334 THRESHOLD(tmp6, d5, threshold[5 * 8]);
335 THRESHOLD(tmp7, d7, threshold[7 * 8]);
336
337 //Simd version uses here a shortcut for the tmp5,tmp6,tmp7 == 0
338 z13 = tmp6 + tmp5;
339 z10 = (tmp6 - tmp5) << 1;
340 z11 = tmp4 + tmp7;
341 z12 = (tmp4 - tmp7) << 1;
342
343 tmp7 = (z11 + z13) >> 2; //+2 !
344 tmp11 = MULTIPLY16H((z11 - z13) << 1, FIX_1_414213562);
345 z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
346 tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
347 tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - !!
348
349 tmp6 = tmp12 - tmp7;
350 tmp5 = tmp11 - tmp6;
351 tmp4 = tmp10 + tmp5;
352
353 wsptr[DCTSIZE * 0] += (tmp0 + tmp7);
354 wsptr[DCTSIZE * 1] += (tmp1 + tmp6);
355 wsptr[DCTSIZE * 2] += (tmp2 + tmp5);
356 wsptr[DCTSIZE * 3] += (tmp3 - tmp4);
357 wsptr[DCTSIZE * 4] += (tmp3 + tmp4);
358 wsptr[DCTSIZE * 5] += (tmp2 - tmp5);
359 wsptr[DCTSIZE * 6] = (tmp1 - tmp6);
360 wsptr[DCTSIZE * 7] = (tmp0 - tmp7);
361 //
362 dataptr++; //next column
363 wsptr++;
364 threshold++;
365 }
366 dataptr += 8; //skip each second start pos
367 wsptr += 8;
368 }
369 }
370
371 static void row_idct_c(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_stride, int cnt)
372 {
373 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
374 int_simd16_t tmp10, tmp11, tmp12, tmp13;
375 int_simd16_t z5, z10, z11, z12, z13;
376 int16_t *outptr;
377 int16_t *wsptr;
378
379 cnt *= 4;
380 wsptr = workspace;
381 outptr = output_adr;
382 for (; cnt > 0; cnt--) {
383 // Even part
384 //Simd version reads 4x4 block and transposes it
385 tmp10 = wsptr[2] + wsptr[3];
386 tmp11 = wsptr[2] - wsptr[3];
387
388 tmp13 = wsptr[0] + wsptr[1];
389 tmp12 = (MULTIPLY16H(wsptr[0] - wsptr[1], FIX_1_414213562_A) << 2) - tmp13;//this shift order to avoid overflow
390
391 tmp0 = tmp10 + tmp13; //->temps
392 tmp3 = tmp10 - tmp13; //->temps
393 tmp1 = tmp11 + tmp12;
394 tmp2 = tmp11 - tmp12;
395
396 // Odd part
397 //Also transpose, with previous:
398 // ---- ---- ||||
399 // ---- ---- idct ||||
400 // ---- ---- ---> ||||
401 // ---- ---- ||||
402 z13 = wsptr[4] + wsptr[5];
403 z10 = wsptr[4] - wsptr[5];
404 z11 = wsptr[6] + wsptr[7];
405 z12 = wsptr[6] - wsptr[7];
406
407 tmp7 = z11 + z13;
408 tmp11 = MULTIPLY16H(z11 - z13, FIX_1_414213562);
409
410 z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
411 tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
412 tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - FIX_
413
414 tmp6 = (tmp12 << 3) - tmp7;
415 tmp5 = (tmp11 << 3) - tmp6;
416 tmp4 = (tmp10 << 3) + tmp5;
417
418 // Final output stage: descale and write column
419 outptr[0 * output_stride] += DESCALE(tmp0 + tmp7, 3);
420 outptr[1 * output_stride] += DESCALE(tmp1 + tmp6, 3);
421 outptr[2 * output_stride] += DESCALE(tmp2 + tmp5, 3);
422 outptr[3 * output_stride] += DESCALE(tmp3 - tmp4, 3);
423 outptr[4 * output_stride] += DESCALE(tmp3 + tmp4, 3);
424 outptr[5 * output_stride] += DESCALE(tmp2 - tmp5, 3);
425 outptr[6 * output_stride] += DESCALE(tmp1 - tmp6, 3); //no += ?
426 outptr[7 * output_stride] += DESCALE(tmp0 - tmp7, 3); //no += ?
427 outptr++;
428
429 wsptr += DCTSIZE; // advance pointer to next row
430 }
431 }
432
433 static void row_fdct_c(int16_t *data, const uint8_t *pixels, ptrdiff_t line_size, int cnt)
434 {
435 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
436 int_simd16_t tmp10, tmp11, tmp12, tmp13;
437 int_simd16_t z1, z2, z3, z4, z5, z11, z13;
438 int16_t *dataptr;
439
440 cnt *= 4;
441 // Pass 1: process rows.
442
443 dataptr = data;
444 for (; cnt > 0; cnt--) {
445 tmp0 = pixels[line_size * 0] + pixels[line_size * 7];
446 tmp7 = pixels[line_size * 0] - pixels[line_size * 7];
447 tmp1 = pixels[line_size * 1] + pixels[line_size * 6];
448 tmp6 = pixels[line_size * 1] - pixels[line_size * 6];
449 tmp2 = pixels[line_size * 2] + pixels[line_size * 5];
450 tmp5 = pixels[line_size * 2] - pixels[line_size * 5];
451 tmp3 = pixels[line_size * 3] + pixels[line_size * 4];
452 tmp4 = pixels[line_size * 3] - pixels[line_size * 4];
453
454 // Even part
455
456 tmp10 = tmp0 + tmp3;
457 tmp13 = tmp0 - tmp3;
458 tmp11 = tmp1 + tmp2;
459 tmp12 = tmp1 - tmp2;
460 //Even columns are written first, this leads to different order of columns
461 //in column_fidct(), but they are processed independently, so all ok.
462 //Later in the row_idct() columns readed at the same order.
463 dataptr[2] = tmp10 + tmp11;
464 dataptr[3] = tmp10 - tmp11;
465
466 z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
467 dataptr[0] = tmp13 + z1;
468 dataptr[1] = tmp13 - z1;
469
470 // Odd part
471
472 tmp10 = (tmp4 + tmp5) << 2;
473 tmp11 = (tmp5 + tmp6) << 2;
474 tmp12 = (tmp6 + tmp7) << 2;
475
476 z5 = MULTIPLY16H(tmp10 - tmp12, FIX_0_382683433);
477 z2 = MULTIPLY16H(tmp10, FIX_0_541196100) + z5;
478 z4 = MULTIPLY16H(tmp12, FIX_1_306562965) + z5;
479 z3 = MULTIPLY16H(tmp11, FIX_0_707106781);
480
481 z11 = tmp7 + z3;
482 z13 = tmp7 - z3;
483
484 dataptr[4] = z13 + z2;
485 dataptr[5] = z13 - z2;
486 dataptr[6] = z11 + z4;
487 dataptr[7] = z11 - z4;
488
489 pixels++; // advance pointer to next column
490 dataptr += DCTSIZE;
491 }
492 }
493
494 static const enum AVPixelFormat pix_fmts[] = {
495 AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P,
496 AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV411P,
497 AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV440P,
498 AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ422P,
499 AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ440P,
500 AV_PIX_FMT_GBRP, AV_PIX_FMT_GRAY8,
501 AV_PIX_FMT_NONE
502 };
503
504 static int config_input(AVFilterLink *inlink)
505 {
506 AVFilterContext *ctx = inlink->dst;
507 FSPPContext *fspp = ctx->priv;
508 const int h = FFALIGN(inlink->h + 16, 16);
509 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
510
511 fspp->hsub = desc->log2_chroma_w;
512 fspp->vsub = desc->log2_chroma_h;
513
514 fspp->temp_stride = FFALIGN(inlink->w + 16, 16);
515 fspp->temp = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->temp));
516 fspp->src = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->src));
517
518 if (!fspp->temp || !fspp->src)
519 return AVERROR(ENOMEM);
520
521 fspp->store_slice = store_slice_c;
522 fspp->store_slice2 = store_slice2_c;
523 fspp->mul_thrmat = mul_thrmat_c;
524 fspp->column_fidct = column_fidct_c;
525 fspp->row_idct = row_idct_c;
526 fspp->row_fdct = row_fdct_c;
527
528 #if ARCH_X86
529 ff_fspp_init_x86(fspp);
530 #endif
531
532 return 0;
533 }
534
535 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
536 {
537 AVFilterContext *ctx = inlink->dst;
538 FSPPContext *fspp = ctx->priv;
539 AVFilterLink *outlink = ctx->outputs[0];
540 AVFrame *out = in;
541
542 int qp_stride = 0;
543 int8_t *qp_table = NULL;
544 int i, bias;
545 int ret = 0;
546 int custom_threshold_m[64];
547
548 bias = (1 << 4) + fspp->strength;
549
550 for (i = 0; i < 64; i++) //FIXME: tune custom_threshold[] and remove this !
551 custom_threshold_m[i] = (int)(custom_threshold[i] * (bias / 71.0) + 0.5);
552
553 for (i = 0; i < 8; i++) {
554 fspp->threshold_mtx_noq[2 * i] = (uint64_t)custom_threshold_m[i * 8 + 2]
555 |(((uint64_t)custom_threshold_m[i * 8 + 6]) << 16)
556 |(((uint64_t)custom_threshold_m[i * 8 + 0]) << 32)
557 |(((uint64_t)custom_threshold_m[i * 8 + 4]) << 48);
558
559 fspp->threshold_mtx_noq[2 * i + 1] = (uint64_t)custom_threshold_m[i * 8 + 5]
560 |(((uint64_t)custom_threshold_m[i * 8 + 3]) << 16)
561 |(((uint64_t)custom_threshold_m[i * 8 + 1]) << 32)
562 |(((uint64_t)custom_threshold_m[i * 8 + 7]) << 48);
563 }
564
565 if (fspp->qp)
566 fspp->prev_q = fspp->qp, fspp->mul_thrmat((int16_t *)(&fspp->threshold_mtx_noq[0]), (int16_t *)(&fspp->threshold_mtx[0]), fspp->qp);
567
568 /* if we are not in a constant user quantizer mode and we don't want to use
569 * the quantizers from the B-frames (B-frames often have a higher QP), we
570 * need to save the qp table from the last non B-frame; this is what the
571 * following code block does */
572 if (!fspp->qp && (fspp->use_bframe_qp || in->pict_type != AV_PICTURE_TYPE_B)) {
573 ret = ff_qp_table_extract(in, &qp_table, &qp_stride, NULL, &fspp->qscale_type);
574 if (ret < 0) {
575 av_frame_free(&in);
576 return ret;
577 }
578
579 if (!fspp->use_bframe_qp && in->pict_type != AV_PICTURE_TYPE_B) {
580 av_freep(&fspp->non_b_qp_table);
581 fspp->non_b_qp_table = qp_table;
582 fspp->non_b_qp_stride = qp_stride;
583 }
584 }
585
586 if (fspp->log2_count && !ctx->is_disabled) {
587 if (!fspp->use_bframe_qp && fspp->non_b_qp_table) {
588 qp_table = fspp->non_b_qp_table;
589 qp_stride = fspp->non_b_qp_stride;
590 }
591
592 if (qp_table || fspp->qp) {
593 const int cw = AV_CEIL_RSHIFT(inlink->w, fspp->hsub);
594 const int ch = AV_CEIL_RSHIFT(inlink->h, fspp->vsub);
595
596 /* get a new frame if in-place is not possible or if the dimensions
597 * are not multiple of 8 */
598 if (!av_frame_is_writable(in) || (inlink->w & 7) || (inlink->h & 7)) {
599 const int aligned_w = FFALIGN(inlink->w, 8);
600 const int aligned_h = FFALIGN(inlink->h, 8);
601
602 out = ff_get_video_buffer(outlink, aligned_w, aligned_h);
603 if (!out) {
604 av_frame_free(&in);
605 ret = AVERROR(ENOMEM);
606 goto finish;
607 }
608 av_frame_copy_props(out, in);
609 out->width = in->width;
610 out->height = in->height;
611 }
612
613 filter(fspp, out->data[0], in->data[0], out->linesize[0], in->linesize[0],
614 inlink->w, inlink->h, qp_table, qp_stride, 1);
615 filter(fspp, out->data[1], in->data[1], out->linesize[1], in->linesize[1],
616 cw, ch, qp_table, qp_stride, 0);
617 filter(fspp, out->data[2], in->data[2], out->linesize[2], in->linesize[2],
618 cw, ch, qp_table, qp_stride, 0);
619 emms_c();
620 }
621 }
622
623 if (in != out) {
624 if (in->data[3])
625 av_image_copy_plane(out->data[3], out->linesize[3],
626 in ->data[3], in ->linesize[3],
627 inlink->w, inlink->h);
628 av_frame_free(&in);
629 }
630 ret = ff_filter_frame(outlink, out);
631 finish:
632 if (qp_table != fspp->non_b_qp_table)
633 av_freep(&qp_table);
634 return ret;
635 }
636
637 static av_cold void uninit(AVFilterContext *ctx)
638 {
639 FSPPContext *fspp = ctx->priv;
640 av_freep(&fspp->temp);
641 av_freep(&fspp->src);
642 av_freep(&fspp->non_b_qp_table);
643 }
644
645 static const AVFilterPad fspp_inputs[] = {
646 {
647 .name = "default",
648 .type = AVMEDIA_TYPE_VIDEO,
649 .config_props = config_input,
650 .filter_frame = filter_frame,
651 },
652 };
653
654 static const AVFilterPad fspp_outputs[] = {
655 {
656 .name = "default",
657 .type = AVMEDIA_TYPE_VIDEO,
658 },
659 };
660
661 const AVFilter ff_vf_fspp = {
662 .name = "fspp",
663 .description = NULL_IF_CONFIG_SMALL("Apply Fast Simple Post-processing filter."),
664 .priv_size = sizeof(FSPPContext),
665 .uninit = uninit,
666 FILTER_INPUTS(fspp_inputs),
667 FILTER_OUTPUTS(fspp_outputs),
668 FILTER_PIXFMTS_ARRAY(pix_fmts),
669 .priv_class = &fspp_class,
670 .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
671 };
672