| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> | ||
| 3 | * Copyright (C) 2005 Nikolaj Poroshin <porosh3@psu.ru> | ||
| 4 | * Copyright (c) 2014 Arwa Arif <arwaarif1994@gmail.com> | ||
| 5 | * | ||
| 6 | * This file is part of FFmpeg. | ||
| 7 | * | ||
| 8 | * FFmpeg is free software; you can redistribute it and/or modify | ||
| 9 | * it under the terms of the GNU General Public License as published by | ||
| 10 | * the Free Software Foundation; either version 2 of the License, or | ||
| 11 | * (at your option) any later version. | ||
| 12 | * | ||
| 13 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 16 | * GNU General Public License for more details. | ||
| 17 | * | ||
| 18 | * You should have received a copy of the GNU General Public License along | ||
| 19 | * with FFmpeg; if not, write to the Free Software Foundation, Inc., | ||
| 20 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||
| 21 | */ | ||
| 22 | |||
| 23 | /** | ||
| 24 | * @file | ||
| 25 | * Fast Simple Post-processing filter | ||
| 26 | * This implementation is based on an algorithm described in | ||
| 27 | * "Aria Nosratinia Embedded Post-Processing for | ||
| 28 | * Enhancement of Compressed Images (1999)" | ||
| 29 | * (http://www.utdallas.edu/~aria/papers/vlsisp99.pdf) | ||
| 30 | * Further, with splitting (I)DCT into horizontal/vertical passes, one of | ||
| 31 | * them can be performed once per block, not per pixel. This allows for much | ||
| 32 | * higher speed. | ||
| 33 | * | ||
| 34 | * Originally written by Michael Niedermayer and Nikolaj for the MPlayer | ||
| 35 | * project, and ported by Arwa Arif for FFmpeg. | ||
| 36 | */ | ||
| 37 | |||
| 38 | #include "libavutil/emms.h" | ||
| 39 | #include "libavutil/imgutils.h" | ||
| 40 | #include "libavutil/mem.h" | ||
| 41 | #include "libavutil/mem_internal.h" | ||
| 42 | #include "libavutil/opt.h" | ||
| 43 | #include "libavutil/pixdesc.h" | ||
| 44 | #include "libavutil/video_enc_params.h" | ||
| 45 | |||
| 46 | #include "avfilter.h" | ||
| 47 | #include "filters.h" | ||
| 48 | #include "qp_table.h" | ||
| 49 | #include "vf_fsppdsp.h" | ||
| 50 | #include "video.h" | ||
| 51 | |||
| 52 | #define BLOCKSZ 12 | ||
| 53 | #define MAX_LEVEL 5 | ||
| 54 | |||
| 55 | typedef struct FSPPContext { | ||
| 56 | const struct AVClass *class; | ||
| 57 | |||
| 58 | int log2_count; | ||
| 59 | int strength; | ||
| 60 | int hsub; | ||
| 61 | int vsub; | ||
| 62 | int temp_stride; | ||
| 63 | int qp; | ||
| 64 | enum AVVideoEncParamsType qscale_type; | ||
| 65 | int prev_q; | ||
| 66 | uint8_t *src; | ||
| 67 | int16_t *temp; | ||
| 68 | int8_t *non_b_qp_table; | ||
| 69 | int non_b_qp_stride; | ||
| 70 | int use_bframe_qp; | ||
| 71 | |||
| 72 | FSPPDSPContext dsp; | ||
| 73 | |||
| 74 | DECLARE_ALIGNED(16, int16_t, threshold_mtx_noq)[8 * 8]; | ||
| 75 | DECLARE_ALIGNED(16, int16_t, threshold_mtx)[8 * 8]; | ||
| 76 | } FSPPContext; | ||
| 77 | |||
| 78 | |||
| 79 | #define OFFSET(x) offsetof(FSPPContext, x) | ||
| 80 | #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM | ||
| 81 | static const AVOption fspp_options[] = { | ||
| 82 | { "quality", "set quality", OFFSET(log2_count), AV_OPT_TYPE_INT, {.i64 = 4}, 4, MAX_LEVEL, FLAGS }, | ||
| 83 | { "qp", "force a constant quantizer parameter", OFFSET(qp), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 64, FLAGS }, | ||
| 84 | { "strength", "set filter strength", OFFSET(strength), AV_OPT_TYPE_INT, {.i64 = 0}, -15, 32, FLAGS }, | ||
| 85 | { "use_bframe_qp", "use B-frames' QP", OFFSET(use_bframe_qp), AV_OPT_TYPE_BOOL,{.i64 = 0}, 0, 1, FLAGS }, | ||
| 86 | { NULL } | ||
| 87 | }; | ||
| 88 | |||
| 89 | AVFILTER_DEFINE_CLASS(fspp); | ||
| 90 | |||
| 91 | static const short custom_threshold[64] = { | ||
| 92 | // values (296) can't be too high | ||
| 93 | // -it causes too big quant dependence | ||
| 94 | // or maybe overflow(check), which results in some flashing | ||
| 95 | // reorder coefficients to the order in which columns are processed | ||
| 96 | #define REORDER(a,b,c,d,e,f,g,h) c, g, a, e, f, d, b, h | ||
| 97 | REORDER( 71, 296, 295, 237, 71, 40, 38, 19), | ||
| 98 | REORDER(245, 193, 185, 121, 102, 73, 53, 27), | ||
| 99 | REORDER(158, 129, 141, 107, 97, 73, 50, 26), | ||
| 100 | REORDER(102, 116, 109, 98, 82, 66, 45, 23), | ||
| 101 | REORDER( 71, 94, 95, 81, 70, 56, 38, 20), | ||
| 102 | REORDER( 56, 77, 74, 66, 56, 44, 30, 15), | ||
| 103 | REORDER( 38, 53, 50, 45, 38, 30, 21, 11), | ||
| 104 | REORDER( 20, 27, 26, 23, 20, 15, 11, 5) | ||
| 105 | }; | ||
| 106 | |||
| 107 | ✗ | static void filter(FSPPContext *p, uint8_t *dst, uint8_t *src, | |
| 108 | int dst_stride, int src_stride, | ||
| 109 | int width, int height, | ||
| 110 | uint8_t *qp_store, int qp_stride, int is_luma) | ||
| 111 | { | ||
| 112 | int x, x0, y, es, qy, t; | ||
| 113 | |||
| 114 | ✗ | const int stride = is_luma ? p->temp_stride : (width + 16); | |
| 115 | ✗ | const int step = 6 - p->log2_count; | |
| 116 | ✗ | const int qpsh = 4 - p->hsub * !is_luma; | |
| 117 | ✗ | const int qpsv = 4 - p->vsub * !is_luma; | |
| 118 | |||
| 119 | DECLARE_ALIGNED(16, int16_t, block_align)[8 * 8 * BLOCKSZ + 8 * 8 * BLOCKSZ]; | ||
| 120 | ✗ | int16_t *block = block_align; | |
| 121 | ✗ | int16_t *block3 = block_align + 8 * 8 * BLOCKSZ; | |
| 122 | |||
| 123 | ✗ | memset(block3, 0, 4 * 8 * BLOCKSZ); | |
| 124 | |||
| 125 | ✗ | if (!src || !dst) return; | |
| 126 | |||
| 127 | ✗ | for (y = 0; y < height; y++) { | |
| 128 | ✗ | int index = 8 + 8 * stride + y * stride; | |
| 129 | ✗ | memcpy(p->src + index, src + y * src_stride, width); | |
| 130 | ✗ | for (x = 0; x < 8; x++) { | |
| 131 | ✗ | p->src[index - x - 1] = p->src[index + x ]; | |
| 132 | ✗ | p->src[index + width + x ] = p->src[index + width - x - 1]; | |
| 133 | } | ||
| 134 | } | ||
| 135 | |||
| 136 | ✗ | for (y = 0; y < 8; y++) { | |
| 137 | ✗ | memcpy(p->src + ( 7 - y ) * stride, p->src + ( y + 8 ) * stride, stride); | |
| 138 | ✗ | memcpy(p->src + (height + 8 + y) * stride, p->src + (height - y + 7) * stride, stride); | |
| 139 | } | ||
| 140 | //FIXME (try edge emu) | ||
| 141 | |||
| 142 | ✗ | for (y = 8; y < 24; y++) | |
| 143 | ✗ | memset(p->temp + 8 + y * stride, 0, width * sizeof(int16_t)); | |
| 144 | |||
| 145 | ✗ | for (y = step; y < height + 8; y += step) { //step= 1,2 | |
| 146 | ✗ | const int y1 = y - 8 + step; //l5-7 l4-6; | |
| 147 | ✗ | qy = y - 4; | |
| 148 | |||
| 149 | ✗ | if (qy > height - 1) qy = height - 1; | |
| 150 | ✗ | if (qy < 0) qy = 0; | |
| 151 | |||
| 152 | ✗ | qy = (qy >> qpsv) * qp_stride; | |
| 153 | ✗ | p->dsp.row_fdct(block, p->src + y * stride + 2 - (y&1), stride, 2); | |
| 154 | |||
| 155 | ✗ | for (x0 = 0; x0 < width + 8 - 8 * (BLOCKSZ - 1); x0 += 8 * (BLOCKSZ - 1)) { | |
| 156 | ✗ | p->dsp.row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y&1), stride, 2 * (BLOCKSZ - 1)); | |
| 157 | |||
| 158 | ✗ | if (p->qp) | |
| 159 | ✗ | p->dsp.column_fidct(p->threshold_mtx, block + 0 * 8, block3 + 0 * 8, 8 * (BLOCKSZ - 1)); //yes, this is a HOTSPOT | |
| 160 | else | ||
| 161 | ✗ | for (x = 0; x < 8 * (BLOCKSZ - 1); x += 8) { | |
| 162 | ✗ | t = x + x0 - 2; //correct t=x+x0-2-(y&1), but its the same | |
| 163 | |||
| 164 | ✗ | if (t < 0) t = 0; //t always < width-2 | |
| 165 | |||
| 166 | ✗ | t = qp_store[qy + (t >> qpsh)]; | |
| 167 | ✗ | t = ff_norm_qscale(t, p->qscale_type); | |
| 168 | |||
| 169 | ✗ | if (t != p->prev_q) { | |
| 170 | ✗ | p->prev_q = t; | |
| 171 | ✗ | p->dsp.mul_thrmat(p->threshold_mtx_noq, p->threshold_mtx, t); | |
| 172 | } | ||
| 173 | ✗ | p->dsp.column_fidct(p->threshold_mtx, block + x * 8, block3 + x * 8, 8); //yes, this is a HOTSPOT | |
| 174 | } | ||
| 175 | ✗ | p->dsp.row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, 2 * (BLOCKSZ - 1)); | |
| 176 | ✗ | memmove(block, block + (BLOCKSZ - 1) * 64, 8 * 8 * sizeof(int16_t)); //cycling | |
| 177 | ✗ | memmove(block3, block3 + (BLOCKSZ - 1) * 64, 6 * 8 * sizeof(int16_t)); | |
| 178 | } | ||
| 179 | |||
| 180 | ✗ | es = width + 8 - x0; // 8, ... | |
| 181 | ✗ | if (es > 8) | |
| 182 | ✗ | p->dsp.row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y & 1), stride, (es - 4) >> 2); | |
| 183 | |||
| 184 | ✗ | p->dsp.column_fidct(p->threshold_mtx, block, block3, es&(~1)); | |
| 185 | ✗ | if (es > 3) | |
| 186 | ✗ | p->dsp.row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, es >> 2); | |
| 187 | |||
| 188 | ✗ | if (!(y1 & 7) && y1) { | |
| 189 | ✗ | if (y1 & 8) | |
| 190 | ✗ | p->dsp.store_slice(dst + (y1 - 8) * dst_stride, p->temp + 8 + 8 * stride, | |
| 191 | ✗ | dst_stride, stride, width, 8, 5 - p->log2_count); | |
| 192 | else | ||
| 193 | ✗ | p->dsp.store_slice2(dst + (y1 - 8) * dst_stride, p->temp + 8 + 0 * stride, | |
| 194 | ✗ | dst_stride, stride, width, 8, 5 - p->log2_count); | |
| 195 | } | ||
| 196 | } | ||
| 197 | |||
| 198 | ✗ | if (y & 7) { // height % 8 != 0 | |
| 199 | ✗ | if (y & 8) | |
| 200 | ✗ | p->dsp.store_slice(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 8 * stride, | |
| 201 | ✗ | dst_stride, stride, width, y&7, 5 - p->log2_count); | |
| 202 | else | ||
| 203 | ✗ | p->dsp.store_slice2(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 0 * stride, | |
| 204 | ✗ | dst_stride, stride, width, y&7, 5 - p->log2_count); | |
| 205 | } | ||
| 206 | } | ||
| 207 | |||
| 208 | static const enum AVPixelFormat pix_fmts[] = { | ||
| 209 | AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P, | ||
| 210 | AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV411P, | ||
| 211 | AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV440P, | ||
| 212 | AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ422P, | ||
| 213 | AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ440P, | ||
| 214 | AV_PIX_FMT_GBRP, AV_PIX_FMT_GRAY8, | ||
| 215 | AV_PIX_FMT_NONE | ||
| 216 | }; | ||
| 217 | |||
| 218 | ✗ | static int config_input(AVFilterLink *inlink) | |
| 219 | { | ||
| 220 | ✗ | AVFilterContext *ctx = inlink->dst; | |
| 221 | ✗ | FSPPContext *fspp = ctx->priv; | |
| 222 | ✗ | const int h = FFALIGN(inlink->h + 16, 16); | |
| 223 | ✗ | const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format); | |
| 224 | |||
| 225 | ✗ | fspp->hsub = desc->log2_chroma_w; | |
| 226 | ✗ | fspp->vsub = desc->log2_chroma_h; | |
| 227 | |||
| 228 | ✗ | fspp->temp_stride = FFALIGN(inlink->w + 16, 16); | |
| 229 | ✗ | fspp->temp = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->temp)); | |
| 230 | ✗ | fspp->src = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->src)); | |
| 231 | |||
| 232 | ✗ | if (!fspp->temp || !fspp->src) | |
| 233 | ✗ | return AVERROR(ENOMEM); | |
| 234 | |||
| 235 | ✗ | ff_fsppdsp_init(&fspp->dsp); | |
| 236 | |||
| 237 | ✗ | return 0; | |
| 238 | } | ||
| 239 | |||
| 240 | ✗ | static int filter_frame(AVFilterLink *inlink, AVFrame *in) | |
| 241 | { | ||
| 242 | ✗ | AVFilterContext *ctx = inlink->dst; | |
| 243 | ✗ | FSPPContext *fspp = ctx->priv; | |
| 244 | ✗ | AVFilterLink *outlink = ctx->outputs[0]; | |
| 245 | ✗ | AVFrame *out = in; | |
| 246 | |||
| 247 | ✗ | int qp_stride = 0; | |
| 248 | ✗ | int8_t *qp_table = NULL; | |
| 249 | ✗ | int ret = 0; | |
| 250 | |||
| 251 | //FIXME: tune custom_threshold[] and remove this ! | ||
| 252 | ✗ | for (int i = 0, bias = (1 << 4) + fspp->strength; i < 64; ++i) | |
| 253 | ✗ | fspp->threshold_mtx_noq[i] = (int)(custom_threshold[i] * (bias / 71.0) + 0.5); | |
| 254 | |||
| 255 | ✗ | if (fspp->qp) { | |
| 256 | ✗ | fspp->prev_q = fspp->qp; | |
| 257 | ✗ | fspp->dsp.mul_thrmat(fspp->threshold_mtx_noq, fspp->threshold_mtx, fspp->qp); | |
| 258 | } | ||
| 259 | |||
| 260 | /* if we are not in a constant user quantizer mode and we don't want to use | ||
| 261 | * the quantizers from the B-frames (B-frames often have a higher QP), we | ||
| 262 | * need to save the qp table from the last non B-frame; this is what the | ||
| 263 | * following code block does */ | ||
| 264 | ✗ | if (!fspp->qp && (fspp->use_bframe_qp || in->pict_type != AV_PICTURE_TYPE_B)) { | |
| 265 | ✗ | ret = ff_qp_table_extract(in, &qp_table, &qp_stride, NULL, &fspp->qscale_type); | |
| 266 | ✗ | if (ret < 0) { | |
| 267 | ✗ | av_frame_free(&in); | |
| 268 | ✗ | return ret; | |
| 269 | } | ||
| 270 | |||
| 271 | ✗ | if (!fspp->use_bframe_qp && in->pict_type != AV_PICTURE_TYPE_B) { | |
| 272 | ✗ | av_freep(&fspp->non_b_qp_table); | |
| 273 | ✗ | fspp->non_b_qp_table = qp_table; | |
| 274 | ✗ | fspp->non_b_qp_stride = qp_stride; | |
| 275 | } | ||
| 276 | } | ||
| 277 | |||
| 278 | ✗ | if (fspp->log2_count && !ctx->is_disabled) { | |
| 279 | ✗ | if (!fspp->use_bframe_qp && fspp->non_b_qp_table) { | |
| 280 | ✗ | qp_table = fspp->non_b_qp_table; | |
| 281 | ✗ | qp_stride = fspp->non_b_qp_stride; | |
| 282 | } | ||
| 283 | |||
| 284 | ✗ | if (qp_table || fspp->qp) { | |
| 285 | ✗ | const int cw = AV_CEIL_RSHIFT(inlink->w, fspp->hsub); | |
| 286 | ✗ | const int ch = AV_CEIL_RSHIFT(inlink->h, fspp->vsub); | |
| 287 | |||
| 288 | /* get a new frame if in-place is not possible or if the dimensions | ||
| 289 | * are not multiple of 8 */ | ||
| 290 | ✗ | if (!av_frame_is_writable(in) || (inlink->w & 7) || (inlink->h & 7)) { | |
| 291 | ✗ | const int aligned_w = FFALIGN(inlink->w, 8); | |
| 292 | ✗ | const int aligned_h = FFALIGN(inlink->h, 8); | |
| 293 | |||
| 294 | ✗ | out = ff_get_video_buffer(outlink, aligned_w, aligned_h); | |
| 295 | ✗ | if (!out) { | |
| 296 | ✗ | av_frame_free(&in); | |
| 297 | ✗ | ret = AVERROR(ENOMEM); | |
| 298 | ✗ | goto finish; | |
| 299 | } | ||
| 300 | ✗ | av_frame_copy_props(out, in); | |
| 301 | ✗ | out->width = in->width; | |
| 302 | ✗ | out->height = in->height; | |
| 303 | } | ||
| 304 | |||
| 305 | ✗ | filter(fspp, out->data[0], in->data[0], out->linesize[0], in->linesize[0], | |
| 306 | inlink->w, inlink->h, qp_table, qp_stride, 1); | ||
| 307 | ✗ | filter(fspp, out->data[1], in->data[1], out->linesize[1], in->linesize[1], | |
| 308 | cw, ch, qp_table, qp_stride, 0); | ||
| 309 | ✗ | filter(fspp, out->data[2], in->data[2], out->linesize[2], in->linesize[2], | |
| 310 | cw, ch, qp_table, qp_stride, 0); | ||
| 311 | ✗ | emms_c(); | |
| 312 | } | ||
| 313 | } | ||
| 314 | |||
| 315 | ✗ | if (in != out) { | |
| 316 | ✗ | if (in->data[3]) | |
| 317 | ✗ | av_image_copy_plane(out->data[3], out->linesize[3], | |
| 318 | ✗ | in ->data[3], in ->linesize[3], | |
| 319 | inlink->w, inlink->h); | ||
| 320 | ✗ | av_frame_free(&in); | |
| 321 | } | ||
| 322 | ✗ | ret = ff_filter_frame(outlink, out); | |
| 323 | ✗ | finish: | |
| 324 | ✗ | if (qp_table != fspp->non_b_qp_table) | |
| 325 | ✗ | av_freep(&qp_table); | |
| 326 | ✗ | return ret; | |
| 327 | } | ||
| 328 | |||
| 329 | ✗ | static av_cold void uninit(AVFilterContext *ctx) | |
| 330 | { | ||
| 331 | ✗ | FSPPContext *fspp = ctx->priv; | |
| 332 | ✗ | av_freep(&fspp->temp); | |
| 333 | ✗ | av_freep(&fspp->src); | |
| 334 | ✗ | av_freep(&fspp->non_b_qp_table); | |
| 335 | ✗ | } | |
| 336 | |||
| 337 | static const AVFilterPad fspp_inputs[] = { | ||
| 338 | { | ||
| 339 | .name = "default", | ||
| 340 | .type = AVMEDIA_TYPE_VIDEO, | ||
| 341 | .config_props = config_input, | ||
| 342 | .filter_frame = filter_frame, | ||
| 343 | }, | ||
| 344 | }; | ||
| 345 | |||
| 346 | const FFFilter ff_vf_fspp = { | ||
| 347 | .p.name = "fspp", | ||
| 348 | .p.description = NULL_IF_CONFIG_SMALL("Apply Fast Simple Post-processing filter."), | ||
| 349 | .p.priv_class = &fspp_class, | ||
| 350 | .p.flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL, | ||
| 351 | .priv_size = sizeof(FSPPContext), | ||
| 352 | .uninit = uninit, | ||
| 353 | FILTER_INPUTS(fspp_inputs), | ||
| 354 | FILTER_OUTPUTS(ff_video_default_filterpad), | ||
| 355 | FILTER_PIXFMTS_ARRAY(pix_fmts), | ||
| 356 | }; | ||
| 357 |