FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavfilter/vf_fspp.c
Date: 2024-04-19 17:50:32
Exec Total Coverage
Lines: 0 334 0.0%
Functions: 0 10 0.0%
Branches: 0 158 0.0%

Line Branch Exec Source
1 /*
2 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
3 * Copyright (C) 2005 Nikolaj Poroshin <porosh3@psu.ru>
4 * Copyright (c) 2014 Arwa Arif <arwaarif1994@gmail.com>
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23 /**
24 * @file
25 * Fast Simple Post-processing filter
26 * This implementation is based on an algorithm described in
27 * "Aria Nosratinia Embedded Post-Processing for
28 * Enhancement of Compressed Images (1999)"
29 * (http://www.utdallas.edu/~aria/papers/vlsisp99.pdf)
30 * Further, with splitting (I)DCT into horizontal/vertical passes, one of
31 * them can be performed once per block, not per pixel. This allows for much
32 * higher speed.
33 *
34 * Originally written by Michael Niedermayer and Nikolaj for the MPlayer
35 * project, and ported by Arwa Arif for FFmpeg.
36 */
37
38 #include "libavutil/emms.h"
39 #include "libavutil/imgutils.h"
40 #include "libavutil/mem.h"
41 #include "libavutil/mem_internal.h"
42 #include "libavutil/opt.h"
43 #include "libavutil/pixdesc.h"
44 #include "internal.h"
45 #include "qp_table.h"
46 #include "vf_fspp.h"
47 #include "video.h"
48
49 #define OFFSET(x) offsetof(FSPPContext, x)
50 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
51 static const AVOption fspp_options[] = {
52 { "quality", "set quality", OFFSET(log2_count), AV_OPT_TYPE_INT, {.i64 = 4}, 4, MAX_LEVEL, FLAGS },
53 { "qp", "force a constant quantizer parameter", OFFSET(qp), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 64, FLAGS },
54 { "strength", "set filter strength", OFFSET(strength), AV_OPT_TYPE_INT, {.i64 = 0}, -15, 32, FLAGS },
55 { "use_bframe_qp", "use B-frames' QP", OFFSET(use_bframe_qp), AV_OPT_TYPE_BOOL,{.i64 = 0}, 0, 1, FLAGS },
56 { NULL }
57 };
58
59 AVFILTER_DEFINE_CLASS(fspp);
60
61 DECLARE_ALIGNED(32, static const uint8_t, dither)[8][8] = {
62 { 0, 48, 12, 60, 3, 51, 15, 63, },
63 { 32, 16, 44, 28, 35, 19, 47, 31, },
64 { 8, 56, 4, 52, 11, 59, 7, 55, },
65 { 40, 24, 36, 20, 43, 27, 39, 23, },
66 { 2, 50, 14, 62, 1, 49, 13, 61, },
67 { 34, 18, 46, 30, 33, 17, 45, 29, },
68 { 10, 58, 6, 54, 9, 57, 5, 53, },
69 { 42, 26, 38, 22, 41, 25, 37, 21, },
70 };
71
72 static const short custom_threshold[64] = {
73 // values (296) can't be too high
74 // -it causes too big quant dependence
75 // or maybe overflow(check), which results in some flashing
76 71, 296, 295, 237, 71, 40, 38, 19,
77 245, 193, 185, 121, 102, 73, 53, 27,
78 158, 129, 141, 107, 97, 73, 50, 26,
79 102, 116, 109, 98, 82, 66, 45, 23,
80 71, 94, 95, 81, 70, 56, 38, 20,
81 56, 77, 74, 66, 56, 44, 30, 15,
82 38, 53, 50, 45, 38, 30, 21, 11,
83 20, 27, 26, 23, 20, 15, 11, 5
84 };
85
86 //This func reads from 1 slice, 1 and clears 0 & 1
87 static void store_slice_c(uint8_t *dst, int16_t *src,
88 ptrdiff_t dst_stride, ptrdiff_t src_stride,
89 ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
90 {
91 int y, x;
92 #define STORE(pos) \
93 temp = (src[x + pos] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
94 src[x + pos] = src[x + pos - 8 * src_stride] = 0; \
95 if (temp & 0x100) temp = ~(temp >> 31); \
96 dst[x + pos] = temp;
97
98 for (y = 0; y < height; y++) {
99 const uint8_t *d = dither[y];
100 for (x = 0; x < width; x += 8) {
101 int temp;
102 STORE(0);
103 STORE(1);
104 STORE(2);
105 STORE(3);
106 STORE(4);
107 STORE(5);
108 STORE(6);
109 STORE(7);
110 }
111 src += src_stride;
112 dst += dst_stride;
113 }
114 }
115
116 //This func reads from 2 slices, 0 & 2 and clears 2-nd
117 static void store_slice2_c(uint8_t *dst, int16_t *src,
118 ptrdiff_t dst_stride, ptrdiff_t src_stride,
119 ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
120 {
121 int y, x;
122 #define STORE2(pos) \
123 temp = (src[x + pos] + src[x + pos + 16 * src_stride] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
124 src[x + pos + 16 * src_stride] = 0; \
125 if (temp & 0x100) temp = ~(temp >> 31); \
126 dst[x + pos] = temp;
127
128 for (y = 0; y < height; y++) {
129 const uint8_t *d = dither[y];
130 for (x = 0; x < width; x += 8) {
131 int temp;
132 STORE2(0);
133 STORE2(1);
134 STORE2(2);
135 STORE2(3);
136 STORE2(4);
137 STORE2(5);
138 STORE2(6);
139 STORE2(7);
140 }
141 src += src_stride;
142 dst += dst_stride;
143 }
144 }
145
146 static void mul_thrmat_c(int16_t *thr_adr_noq, int16_t *thr_adr, int q)
147 {
148 int a;
149 for (a = 0; a < 64; a++)
150 thr_adr[a] = q * thr_adr_noq[a];
151 }
152
153 static void filter(FSPPContext *p, uint8_t *dst, uint8_t *src,
154 int dst_stride, int src_stride,
155 int width, int height,
156 uint8_t *qp_store, int qp_stride, int is_luma)
157 {
158 int x, x0, y, es, qy, t;
159
160 const int stride = is_luma ? p->temp_stride : (width + 16);
161 const int step = 6 - p->log2_count;
162 const int qpsh = 4 - p->hsub * !is_luma;
163 const int qpsv = 4 - p->vsub * !is_luma;
164
165 DECLARE_ALIGNED(32, int32_t, block_align)[4 * 8 * BLOCKSZ + 4 * 8 * BLOCKSZ];
166 int16_t *block = (int16_t *)block_align;
167 int16_t *block3 = (int16_t *)(block_align + 4 * 8 * BLOCKSZ);
168
169 memset(block3, 0, 4 * 8 * BLOCKSZ);
170
171 if (!src || !dst) return;
172
173 for (y = 0; y < height; y++) {
174 int index = 8 + 8 * stride + y * stride;
175 memcpy(p->src + index, src + y * src_stride, width);
176 for (x = 0; x < 8; x++) {
177 p->src[index - x - 1] = p->src[index + x ];
178 p->src[index + width + x ] = p->src[index + width - x - 1];
179 }
180 }
181
182 for (y = 0; y < 8; y++) {
183 memcpy(p->src + ( 7 - y ) * stride, p->src + ( y + 8 ) * stride, stride);
184 memcpy(p->src + (height + 8 + y) * stride, p->src + (height - y + 7) * stride, stride);
185 }
186 //FIXME (try edge emu)
187
188 for (y = 8; y < 24; y++)
189 memset(p->temp + 8 + y * stride, 0, width * sizeof(int16_t));
190
191 for (y = step; y < height + 8; y += step) { //step= 1,2
192 const int y1 = y - 8 + step; //l5-7 l4-6;
193 qy = y - 4;
194
195 if (qy > height - 1) qy = height - 1;
196 if (qy < 0) qy = 0;
197
198 qy = (qy >> qpsv) * qp_stride;
199 p->row_fdct(block, p->src + y * stride + 2 - (y&1), stride, 2);
200
201 for (x0 = 0; x0 < width + 8 - 8 * (BLOCKSZ - 1); x0 += 8 * (BLOCKSZ - 1)) {
202 p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y&1), stride, 2 * (BLOCKSZ - 1));
203
204 if (p->qp)
205 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + 0 * 8, block3 + 0 * 8, 8 * (BLOCKSZ - 1)); //yes, this is a HOTSPOT
206 else
207 for (x = 0; x < 8 * (BLOCKSZ - 1); x += 8) {
208 t = x + x0 - 2; //correct t=x+x0-2-(y&1), but its the same
209
210 if (t < 0) t = 0; //t always < width-2
211
212 t = qp_store[qy + (t >> qpsh)];
213 t = ff_norm_qscale(t, p->qscale_type);
214
215 if (t != p->prev_q) p->prev_q = t, p->mul_thrmat((int16_t *)(&p->threshold_mtx_noq[0]), (int16_t *)(&p->threshold_mtx[0]), t);
216 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + x * 8, block3 + x * 8, 8); //yes, this is a HOTSPOT
217 }
218 p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, 2 * (BLOCKSZ - 1));
219 memmove(block, block + (BLOCKSZ - 1) * 64, 8 * 8 * sizeof(int16_t)); //cycling
220 memmove(block3, block3 + (BLOCKSZ - 1) * 64, 6 * 8 * sizeof(int16_t));
221 }
222
223 es = width + 8 - x0; // 8, ...
224 if (es > 8)
225 p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y & 1), stride, (es - 4) >> 2);
226
227 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block, block3, es&(~1));
228 if (es > 3)
229 p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, es >> 2);
230
231 if (!(y1 & 7) && y1) {
232 if (y1 & 8)
233 p->store_slice(dst + (y1 - 8) * dst_stride, p->temp + 8 + 8 * stride,
234 dst_stride, stride, width, 8, 5 - p->log2_count);
235 else
236 p->store_slice2(dst + (y1 - 8) * dst_stride, p->temp + 8 + 0 * stride,
237 dst_stride, stride, width, 8, 5 - p->log2_count);
238 }
239 }
240
241 if (y & 7) { // height % 8 != 0
242 if (y & 8)
243 p->store_slice(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 8 * stride,
244 dst_stride, stride, width, y&7, 5 - p->log2_count);
245 else
246 p->store_slice2(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 0 * stride,
247 dst_stride, stride, width, y&7, 5 - p->log2_count);
248 }
249 }
250
251 static void column_fidct_c(int16_t *thr_adr, int16_t *data, int16_t *output, int cnt)
252 {
253 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
254 int_simd16_t tmp10, tmp11, tmp12, tmp13;
255 int_simd16_t z1,z2,z3,z4,z5, z10, z11, z12, z13;
256 int_simd16_t d0, d1, d2, d3, d4, d5, d6, d7;
257
258 int16_t *dataptr;
259 int16_t *wsptr;
260 int16_t *threshold;
261 int ctr;
262
263 dataptr = data;
264 wsptr = output;
265
266 for (; cnt > 0; cnt -= 2) { //start positions
267 threshold = (int16_t *)thr_adr;//threshold_mtx
268 for (ctr = DCTSIZE; ctr > 0; ctr--) {
269 // Process columns from input, add to output.
270 tmp0 = dataptr[DCTSIZE * 0] + dataptr[DCTSIZE * 7];
271 tmp7 = dataptr[DCTSIZE * 0] - dataptr[DCTSIZE * 7];
272
273 tmp1 = dataptr[DCTSIZE * 1] + dataptr[DCTSIZE * 6];
274 tmp6 = dataptr[DCTSIZE * 1] - dataptr[DCTSIZE * 6];
275
276 tmp2 = dataptr[DCTSIZE * 2] + dataptr[DCTSIZE * 5];
277 tmp5 = dataptr[DCTSIZE * 2] - dataptr[DCTSIZE * 5];
278
279 tmp3 = dataptr[DCTSIZE * 3] + dataptr[DCTSIZE * 4];
280 tmp4 = dataptr[DCTSIZE * 3] - dataptr[DCTSIZE * 4];
281
282 // Even part of FDCT
283
284 tmp10 = tmp0 + tmp3;
285 tmp13 = tmp0 - tmp3;
286 tmp11 = tmp1 + tmp2;
287 tmp12 = tmp1 - tmp2;
288
289 d0 = tmp10 + tmp11;
290 d4 = tmp10 - tmp11;
291
292 z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
293 d2 = tmp13 + z1;
294 d6 = tmp13 - z1;
295
296 // Even part of IDCT
297
298 THRESHOLD(tmp0, d0, threshold[0 * 8]);
299 THRESHOLD(tmp1, d2, threshold[2 * 8]);
300 THRESHOLD(tmp2, d4, threshold[4 * 8]);
301 THRESHOLD(tmp3, d6, threshold[6 * 8]);
302 tmp0 += 2;
303 tmp10 = (tmp0 + tmp2) >> 2;
304 tmp11 = (tmp0 - tmp2) >> 2;
305
306 tmp13 = (tmp1 + tmp3) >>2; //+2 ! (psnr decides)
307 tmp12 = MULTIPLY16H((tmp1 - tmp3), FIX_1_414213562_A) - tmp13; //<<2
308
309 tmp0 = tmp10 + tmp13; //->temps
310 tmp3 = tmp10 - tmp13; //->temps
311 tmp1 = tmp11 + tmp12; //->temps
312 tmp2 = tmp11 - tmp12; //->temps
313
314 // Odd part of FDCT
315
316 tmp10 = tmp4 + tmp5;
317 tmp11 = tmp5 + tmp6;
318 tmp12 = tmp6 + tmp7;
319
320 z5 = MULTIPLY16H((tmp10 - tmp12) << 2, FIX_0_382683433);
321 z2 = MULTIPLY16H(tmp10 << 2, FIX_0_541196100) + z5;
322 z4 = MULTIPLY16H(tmp12 << 2, FIX_1_306562965) + z5;
323 z3 = MULTIPLY16H(tmp11 << 2, FIX_0_707106781);
324
325 z11 = tmp7 + z3;
326 z13 = tmp7 - z3;
327
328 d5 = z13 + z2;
329 d3 = z13 - z2;
330 d1 = z11 + z4;
331 d7 = z11 - z4;
332
333 // Odd part of IDCT
334
335 THRESHOLD(tmp4, d1, threshold[1 * 8]);
336 THRESHOLD(tmp5, d3, threshold[3 * 8]);
337 THRESHOLD(tmp6, d5, threshold[5 * 8]);
338 THRESHOLD(tmp7, d7, threshold[7 * 8]);
339
340 //Simd version uses here a shortcut for the tmp5,tmp6,tmp7 == 0
341 z13 = tmp6 + tmp5;
342 z10 = (tmp6 - tmp5) << 1;
343 z11 = tmp4 + tmp7;
344 z12 = (tmp4 - tmp7) << 1;
345
346 tmp7 = (z11 + z13) >> 2; //+2 !
347 tmp11 = MULTIPLY16H((z11 - z13) << 1, FIX_1_414213562);
348 z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
349 tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
350 tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - !!
351
352 tmp6 = tmp12 - tmp7;
353 tmp5 = tmp11 - tmp6;
354 tmp4 = tmp10 + tmp5;
355
356 wsptr[DCTSIZE * 0] += (tmp0 + tmp7);
357 wsptr[DCTSIZE * 1] += (tmp1 + tmp6);
358 wsptr[DCTSIZE * 2] += (tmp2 + tmp5);
359 wsptr[DCTSIZE * 3] += (tmp3 - tmp4);
360 wsptr[DCTSIZE * 4] += (tmp3 + tmp4);
361 wsptr[DCTSIZE * 5] += (tmp2 - tmp5);
362 wsptr[DCTSIZE * 6] = (tmp1 - tmp6);
363 wsptr[DCTSIZE * 7] = (tmp0 - tmp7);
364 //
365 dataptr++; //next column
366 wsptr++;
367 threshold++;
368 }
369 dataptr += 8; //skip each second start pos
370 wsptr += 8;
371 }
372 }
373
374 static void row_idct_c(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_stride, int cnt)
375 {
376 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
377 int_simd16_t tmp10, tmp11, tmp12, tmp13;
378 int_simd16_t z5, z10, z11, z12, z13;
379 int16_t *outptr;
380 int16_t *wsptr;
381
382 cnt *= 4;
383 wsptr = workspace;
384 outptr = output_adr;
385 for (; cnt > 0; cnt--) {
386 // Even part
387 //Simd version reads 4x4 block and transposes it
388 tmp10 = wsptr[2] + wsptr[3];
389 tmp11 = wsptr[2] - wsptr[3];
390
391 tmp13 = wsptr[0] + wsptr[1];
392 tmp12 = (MULTIPLY16H(wsptr[0] - wsptr[1], FIX_1_414213562_A) << 2) - tmp13;//this shift order to avoid overflow
393
394 tmp0 = tmp10 + tmp13; //->temps
395 tmp3 = tmp10 - tmp13; //->temps
396 tmp1 = tmp11 + tmp12;
397 tmp2 = tmp11 - tmp12;
398
399 // Odd part
400 //Also transpose, with previous:
401 // ---- ---- ||||
402 // ---- ---- idct ||||
403 // ---- ---- ---> ||||
404 // ---- ---- ||||
405 z13 = wsptr[4] + wsptr[5];
406 z10 = wsptr[4] - wsptr[5];
407 z11 = wsptr[6] + wsptr[7];
408 z12 = wsptr[6] - wsptr[7];
409
410 tmp7 = z11 + z13;
411 tmp11 = MULTIPLY16H(z11 - z13, FIX_1_414213562);
412
413 z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
414 tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
415 tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - FIX_
416
417 tmp6 = (tmp12 << 3) - tmp7;
418 tmp5 = (tmp11 << 3) - tmp6;
419 tmp4 = (tmp10 << 3) + tmp5;
420
421 // Final output stage: descale and write column
422 outptr[0 * output_stride] += DESCALE(tmp0 + tmp7, 3);
423 outptr[1 * output_stride] += DESCALE(tmp1 + tmp6, 3);
424 outptr[2 * output_stride] += DESCALE(tmp2 + tmp5, 3);
425 outptr[3 * output_stride] += DESCALE(tmp3 - tmp4, 3);
426 outptr[4 * output_stride] += DESCALE(tmp3 + tmp4, 3);
427 outptr[5 * output_stride] += DESCALE(tmp2 - tmp5, 3);
428 outptr[6 * output_stride] += DESCALE(tmp1 - tmp6, 3); //no += ?
429 outptr[7 * output_stride] += DESCALE(tmp0 - tmp7, 3); //no += ?
430 outptr++;
431
432 wsptr += DCTSIZE; // advance pointer to next row
433 }
434 }
435
436 static void row_fdct_c(int16_t *data, const uint8_t *pixels, ptrdiff_t line_size, int cnt)
437 {
438 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
439 int_simd16_t tmp10, tmp11, tmp12, tmp13;
440 int_simd16_t z1, z2, z3, z4, z5, z11, z13;
441 int16_t *dataptr;
442
443 cnt *= 4;
444 // Pass 1: process rows.
445
446 dataptr = data;
447 for (; cnt > 0; cnt--) {
448 tmp0 = pixels[line_size * 0] + pixels[line_size * 7];
449 tmp7 = pixels[line_size * 0] - pixels[line_size * 7];
450 tmp1 = pixels[line_size * 1] + pixels[line_size * 6];
451 tmp6 = pixels[line_size * 1] - pixels[line_size * 6];
452 tmp2 = pixels[line_size * 2] + pixels[line_size * 5];
453 tmp5 = pixels[line_size * 2] - pixels[line_size * 5];
454 tmp3 = pixels[line_size * 3] + pixels[line_size * 4];
455 tmp4 = pixels[line_size * 3] - pixels[line_size * 4];
456
457 // Even part
458
459 tmp10 = tmp0 + tmp3;
460 tmp13 = tmp0 - tmp3;
461 tmp11 = tmp1 + tmp2;
462 tmp12 = tmp1 - tmp2;
463 //Even columns are written first, this leads to different order of columns
464 //in column_fidct(), but they are processed independently, so all ok.
465 //Later in the row_idct() columns readed at the same order.
466 dataptr[2] = tmp10 + tmp11;
467 dataptr[3] = tmp10 - tmp11;
468
469 z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
470 dataptr[0] = tmp13 + z1;
471 dataptr[1] = tmp13 - z1;
472
473 // Odd part
474
475 tmp10 = (tmp4 + tmp5) << 2;
476 tmp11 = (tmp5 + tmp6) << 2;
477 tmp12 = (tmp6 + tmp7) << 2;
478
479 z5 = MULTIPLY16H(tmp10 - tmp12, FIX_0_382683433);
480 z2 = MULTIPLY16H(tmp10, FIX_0_541196100) + z5;
481 z4 = MULTIPLY16H(tmp12, FIX_1_306562965) + z5;
482 z3 = MULTIPLY16H(tmp11, FIX_0_707106781);
483
484 z11 = tmp7 + z3;
485 z13 = tmp7 - z3;
486
487 dataptr[4] = z13 + z2;
488 dataptr[5] = z13 - z2;
489 dataptr[6] = z11 + z4;
490 dataptr[7] = z11 - z4;
491
492 pixels++; // advance pointer to next column
493 dataptr += DCTSIZE;
494 }
495 }
496
497 static const enum AVPixelFormat pix_fmts[] = {
498 AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P,
499 AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV411P,
500 AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV440P,
501 AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ422P,
502 AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ440P,
503 AV_PIX_FMT_GBRP, AV_PIX_FMT_GRAY8,
504 AV_PIX_FMT_NONE
505 };
506
507 static int config_input(AVFilterLink *inlink)
508 {
509 AVFilterContext *ctx = inlink->dst;
510 FSPPContext *fspp = ctx->priv;
511 const int h = FFALIGN(inlink->h + 16, 16);
512 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
513
514 fspp->hsub = desc->log2_chroma_w;
515 fspp->vsub = desc->log2_chroma_h;
516
517 fspp->temp_stride = FFALIGN(inlink->w + 16, 16);
518 fspp->temp = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->temp));
519 fspp->src = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->src));
520
521 if (!fspp->temp || !fspp->src)
522 return AVERROR(ENOMEM);
523
524 fspp->store_slice = store_slice_c;
525 fspp->store_slice2 = store_slice2_c;
526 fspp->mul_thrmat = mul_thrmat_c;
527 fspp->column_fidct = column_fidct_c;
528 fspp->row_idct = row_idct_c;
529 fspp->row_fdct = row_fdct_c;
530
531 #if ARCH_X86
532 ff_fspp_init_x86(fspp);
533 #endif
534
535 return 0;
536 }
537
538 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
539 {
540 AVFilterContext *ctx = inlink->dst;
541 FSPPContext *fspp = ctx->priv;
542 AVFilterLink *outlink = ctx->outputs[0];
543 AVFrame *out = in;
544
545 int qp_stride = 0;
546 int8_t *qp_table = NULL;
547 int i, bias;
548 int ret = 0;
549 int custom_threshold_m[64];
550
551 bias = (1 << 4) + fspp->strength;
552
553 for (i = 0; i < 64; i++) //FIXME: tune custom_threshold[] and remove this !
554 custom_threshold_m[i] = (int)(custom_threshold[i] * (bias / 71.0) + 0.5);
555
556 for (i = 0; i < 8; i++) {
557 fspp->threshold_mtx_noq[2 * i] = (uint64_t)custom_threshold_m[i * 8 + 2]
558 |(((uint64_t)custom_threshold_m[i * 8 + 6]) << 16)
559 |(((uint64_t)custom_threshold_m[i * 8 + 0]) << 32)
560 |(((uint64_t)custom_threshold_m[i * 8 + 4]) << 48);
561
562 fspp->threshold_mtx_noq[2 * i + 1] = (uint64_t)custom_threshold_m[i * 8 + 5]
563 |(((uint64_t)custom_threshold_m[i * 8 + 3]) << 16)
564 |(((uint64_t)custom_threshold_m[i * 8 + 1]) << 32)
565 |(((uint64_t)custom_threshold_m[i * 8 + 7]) << 48);
566 }
567
568 if (fspp->qp)
569 fspp->prev_q = fspp->qp, fspp->mul_thrmat((int16_t *)(&fspp->threshold_mtx_noq[0]), (int16_t *)(&fspp->threshold_mtx[0]), fspp->qp);
570
571 /* if we are not in a constant user quantizer mode and we don't want to use
572 * the quantizers from the B-frames (B-frames often have a higher QP), we
573 * need to save the qp table from the last non B-frame; this is what the
574 * following code block does */
575 if (!fspp->qp && (fspp->use_bframe_qp || in->pict_type != AV_PICTURE_TYPE_B)) {
576 ret = ff_qp_table_extract(in, &qp_table, &qp_stride, NULL, &fspp->qscale_type);
577 if (ret < 0) {
578 av_frame_free(&in);
579 return ret;
580 }
581
582 if (!fspp->use_bframe_qp && in->pict_type != AV_PICTURE_TYPE_B) {
583 av_freep(&fspp->non_b_qp_table);
584 fspp->non_b_qp_table = qp_table;
585 fspp->non_b_qp_stride = qp_stride;
586 }
587 }
588
589 if (fspp->log2_count && !ctx->is_disabled) {
590 if (!fspp->use_bframe_qp && fspp->non_b_qp_table) {
591 qp_table = fspp->non_b_qp_table;
592 qp_stride = fspp->non_b_qp_stride;
593 }
594
595 if (qp_table || fspp->qp) {
596 const int cw = AV_CEIL_RSHIFT(inlink->w, fspp->hsub);
597 const int ch = AV_CEIL_RSHIFT(inlink->h, fspp->vsub);
598
599 /* get a new frame if in-place is not possible or if the dimensions
600 * are not multiple of 8 */
601 if (!av_frame_is_writable(in) || (inlink->w & 7) || (inlink->h & 7)) {
602 const int aligned_w = FFALIGN(inlink->w, 8);
603 const int aligned_h = FFALIGN(inlink->h, 8);
604
605 out = ff_get_video_buffer(outlink, aligned_w, aligned_h);
606 if (!out) {
607 av_frame_free(&in);
608 ret = AVERROR(ENOMEM);
609 goto finish;
610 }
611 av_frame_copy_props(out, in);
612 out->width = in->width;
613 out->height = in->height;
614 }
615
616 filter(fspp, out->data[0], in->data[0], out->linesize[0], in->linesize[0],
617 inlink->w, inlink->h, qp_table, qp_stride, 1);
618 filter(fspp, out->data[1], in->data[1], out->linesize[1], in->linesize[1],
619 cw, ch, qp_table, qp_stride, 0);
620 filter(fspp, out->data[2], in->data[2], out->linesize[2], in->linesize[2],
621 cw, ch, qp_table, qp_stride, 0);
622 emms_c();
623 }
624 }
625
626 if (in != out) {
627 if (in->data[3])
628 av_image_copy_plane(out->data[3], out->linesize[3],
629 in ->data[3], in ->linesize[3],
630 inlink->w, inlink->h);
631 av_frame_free(&in);
632 }
633 ret = ff_filter_frame(outlink, out);
634 finish:
635 if (qp_table != fspp->non_b_qp_table)
636 av_freep(&qp_table);
637 return ret;
638 }
639
640 static av_cold void uninit(AVFilterContext *ctx)
641 {
642 FSPPContext *fspp = ctx->priv;
643 av_freep(&fspp->temp);
644 av_freep(&fspp->src);
645 av_freep(&fspp->non_b_qp_table);
646 }
647
648 static const AVFilterPad fspp_inputs[] = {
649 {
650 .name = "default",
651 .type = AVMEDIA_TYPE_VIDEO,
652 .config_props = config_input,
653 .filter_frame = filter_frame,
654 },
655 };
656
657 const AVFilter ff_vf_fspp = {
658 .name = "fspp",
659 .description = NULL_IF_CONFIG_SMALL("Apply Fast Simple Post-processing filter."),
660 .priv_size = sizeof(FSPPContext),
661 .uninit = uninit,
662 FILTER_INPUTS(fspp_inputs),
663 FILTER_OUTPUTS(ff_video_default_filterpad),
664 FILTER_PIXFMTS_ARRAY(pix_fmts),
665 .priv_class = &fspp_class,
666 .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
667 };
668