LCOV - code coverage report
Current view: top level - libavfilter - vf_normalize.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 98 0.0 %
Date: 2017-12-15 18:13:28 Functions: 0 5 0.0 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2017 Richard Ling
       3             :  *
       4             :  * This file is part of FFmpeg.
       5             :  *
       6             :  * FFmpeg is free software; you can redistribute it and/or
       7             :  * modify it under the terms of the GNU Lesser General Public
       8             :  * License as published by the Free Software Foundation; either
       9             :  * version 2.1 of the License, or (at your option) any later version.
      10             :  *
      11             :  * FFmpeg is distributed in the hope that it will be useful,
      12             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      13             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      14             :  * Lesser General Public License for more details.
      15             :  *
      16             :  * You should have received a copy of the GNU Lesser General Public
      17             :  * License along with FFmpeg; if not, write to the Free Software
      18             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      19             :  */
      20             : 
      21             : /*
      22             :  * Normalize RGB video (aka histogram stretching, contrast stretching).
      23             :  * See: https://en.wikipedia.org/wiki/Normalization_(image_processing)
      24             :  *
      25             :  * For each channel of each frame, the filter computes the input range and maps
      26             :  * it linearly to the user-specified output range. The output range defaults
      27             :  * to the full dynamic range from pure black to pure white.
      28             :  *
      29             :  * Naively maximising the dynamic range of each frame of video in isolation
      30             :  * may cause flickering (rapid changes in brightness of static objects in the
      31             :  * scene) when small dark or bright objects enter or leave the scene. This
      32             :  * filter can apply temporal smoothing to the input range to reduce flickering.
      33             :  * Temporal smoothing is similar to the auto-exposure (automatic gain control)
      34             :  * on a video camera, which performs the same function; and, like a video
      35             :  * camera, it may cause a period of over- or under-exposure of the video.
      36             :  *
      37             :  * The filter can normalize the R,G,B channels independently, which may cause
      38             :  * color shifting, or link them together as a single channel, which prevents
      39             :  * color shifting. More precisely, linked normalization preserves hue (as it's
      40             :  * defined in HSV/HSL color spaces) while independent normalization does not.
      41             :  * Independent normalization can be used to remove color casts, such as the
      42             :  * blue cast from underwater video, restoring more natural colors. The filter
      43             :  * can also combine independent and linked normalization in any ratio.
      44             :  *
      45             :  * Finally the overall strength of the filter can be adjusted, from no effect
      46             :  * to full normalization.
      47             :  *
      48             :  * The 5 AVOptions are:
      49             :  *   blackpt,   Colors which define the output range. The minimum input value
      50             :  *   whitept    is mapped to the blackpt. The maximum input value is mapped to
      51             :  *              the whitept. The defaults are black and white respectively.
      52             :  *              Specifying white for blackpt and black for whitept will give
      53             :  *              color-inverted, normalized video. Shades of grey can be used
      54             :  *              to reduce the dynamic range (contrast). Specifying saturated
      55             :  *              colors here can create some interesting effects.
      56             :  *
      57             :  *   smoothing  The amount of temporal smoothing, expressed in frames (>=0).
      58             :  *              the minimum and maximum input values of each channel are
      59             :  *              smoothed using a rolling average over the current frame and
      60             :  *              that many previous frames of video.  Defaults to 0 (no temporal
      61             :  *              smoothing).
      62             :  *
      63             :  *   independence
      64             :  *              Controls the ratio of independent (color shifting) channel
      65             :  *              normalization to linked (color preserving) normalization. 0.0
      66             :  *              is fully linked, 1.0 is fully independent. Defaults to fully
      67             :  *              independent.
      68             :  *
      69             :  *   strength   Overall strength of the filter. 1.0 is full strength. 0.0 is
      70             :  *              a rather expensive no-op. Values in between can give a gentle
      71             :  *              boost to low-contrast video without creating an artificial
      72             :  *              over-processed look. The default is full strength.
      73             :  */
      74             : 
      75             : #include "libavutil/imgutils.h"
      76             : #include "libavutil/opt.h"
      77             : #include "libavutil/pixdesc.h"
      78             : #include "avfilter.h"
      79             : #include "formats.h"
      80             : #include "internal.h"
      81             : #include "video.h"
      82             : 
      83             : typedef struct NormalizeContext {
      84             :     const AVClass *class;
      85             : 
      86             :     // Storage for the corresponding AVOptions
      87             :     uint8_t blackpt[4];
      88             :     uint8_t whitept[4];
      89             :     int smoothing;
      90             :     float independence;
      91             :     float strength;
      92             : 
      93             :     int co[4];          // Offsets to R,G,B,A bytes respectively in each pixel
      94             :     int num_components; // Number of components in the pixel format
      95             :     int history_len;    // Number of frames to average; based on smoothing factor
      96             :     int frame_num;      // Increments on each frame, starting from 0.
      97             : 
      98             :     // Per-extremum, per-channel history, for temporal smoothing.
      99             :     struct {
     100             :         uint8_t *history;       // History entries.
     101             :         uint32_t history_sum;   // Sum of history entries.
     102             :     } min[3], max[3];           // Min and max for each channel in {R,G,B}.
     103             :     uint8_t *history_mem;       // Single allocation for above history entries
     104             : 
     105             : } NormalizeContext;
     106             : 
     107             : #define OFFSET(x) offsetof(NormalizeContext, x)
     108             : #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
     109             : 
     110             : static const AVOption normalize_options[] = {
     111             :     { "blackpt",  "output color to which darkest input color is mapped",  OFFSET(blackpt), AV_OPT_TYPE_COLOR, { .str = "black" }, CHAR_MIN, CHAR_MAX, FLAGS },
     112             :     { "whitept",  "output color to which brightest input color is mapped",  OFFSET(whitept), AV_OPT_TYPE_COLOR, { .str = "white" }, CHAR_MIN, CHAR_MAX, FLAGS },
     113             :     { "smoothing",  "amount of temporal smoothing of the input range, to reduce flicker", OFFSET(smoothing), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX/8, FLAGS },
     114             :     { "independence", "proportion of independent to linked channel normalization", OFFSET(independence), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, 1.0, FLAGS },
     115             :     { "strength", "strength of filter, from no effect to full normalization", OFFSET(strength), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, 1.0, FLAGS },
     116             :     { NULL }
     117             : };
     118             : 
     119             : AVFILTER_DEFINE_CLASS(normalize);
     120             : 
     121             : // This function is the main guts of the filter. Normalizes the input frame
     122             : // into the output frame. The frames are known to have the same dimensions
     123             : // and pixel format.
     124           0 : static void normalize(NormalizeContext *s, AVFrame *in, AVFrame *out)
     125             : {
     126             :     // Per-extremum, per-channel local variables.
     127             :     struct {
     128             :         uint8_t in;     // Original input byte value for this frame.
     129             :         float smoothed; // Smoothed input value [0,255].
     130             :         float out;      // Output value [0,255].
     131             :     } min[3], max[3];   // Min and max for each channel in {R,G,B}.
     132             : 
     133             :     float rgb_min_smoothed; // Min input range for linked normalization
     134             :     float rgb_max_smoothed; // Max input range for linked normalization
     135             :     uint8_t lut[3][256];    // Lookup table
     136             :     int x, y, c;
     137             : 
     138             :     // First, scan the input frame to find, for each channel, the minimum
     139             :     // (min.in) and maximum (max.in) values present in the channel.
     140           0 :     for (c = 0; c < 3; c++)
     141           0 :         min[c].in = max[c].in = in->data[0][s->co[c]];
     142           0 :     for (y = 0; y < in->height; y++) {
     143           0 :         uint8_t *inp = in->data[0] + y * in->linesize[0];
     144           0 :         uint8_t *outp = out->data[0] + y * out->linesize[0];
     145           0 :         for (x = 0; x < in->width; x++) {
     146           0 :             for (c = 0; c < 3; c++) {
     147           0 :                 min[c].in = FFMIN(min[c].in, inp[s->co[c]]);
     148           0 :                 max[c].in = FFMAX(max[c].in, inp[s->co[c]]);
     149             :             }
     150           0 :             inp += s->num_components;
     151           0 :             outp += s->num_components;
     152             :         }
     153             :     }
     154             : 
     155             :     // Next, for each channel, push min.in and max.in into their respective
     156             :     // histories, to determine the min.smoothed and max.smoothed for this frame.
     157             :     {
     158           0 :         int history_idx = s->frame_num % s->history_len;
     159             :         // Assume the history is not yet full; num_history_vals is the number
     160             :         // of frames received so far including the current frame.
     161           0 :         int num_history_vals = s->frame_num + 1;
     162           0 :         if (s->frame_num >= s->history_len) {
     163             :             //The history is full; drop oldest value and cap num_history_vals.
     164           0 :             for (c = 0; c < 3; c++) {
     165           0 :                 s->min[c].history_sum -= s->min[c].history[history_idx];
     166           0 :                 s->max[c].history_sum -= s->max[c].history[history_idx];
     167             :             }
     168           0 :             num_history_vals = s->history_len;
     169             :         }
     170             :         // For each extremum, update history_sum and calculate smoothed value
     171             :         // as the rolling average of the history entries.
     172           0 :         for (c = 0; c < 3; c++) {
     173           0 :             s->min[c].history_sum += (s->min[c].history[history_idx] = min[c].in);
     174           0 :             min[c].smoothed = s->min[c].history_sum / (float)num_history_vals;
     175           0 :             s->max[c].history_sum += (s->max[c].history[history_idx] = max[c].in);
     176           0 :             max[c].smoothed = s->max[c].history_sum / (float)num_history_vals;
     177             :         }
     178             :     }
     179             : 
     180             :     // Determine the input range for linked normalization. This is simply the
     181             :     // minimum of the per-channel minimums, and the maximum of the per-channel
     182             :     // maximums.
     183           0 :     rgb_min_smoothed = FFMIN3(min[0].smoothed, min[1].smoothed, min[2].smoothed);
     184           0 :     rgb_max_smoothed = FFMAX3(max[0].smoothed, max[1].smoothed, max[2].smoothed);
     185             : 
     186             :     // Now, process each channel to determine the input and output range and
     187             :     // build the lookup tables.
     188           0 :     for (c = 0; c < 3; c++) {
     189             :         int in_val;
     190             :         // Adjust the input range for this channel [min.smoothed,max.smoothed]
     191             :         // by mixing in the correct proportion of the linked normalization
     192             :         // input range [rgb_min_smoothed,rgb_max_smoothed].
     193           0 :         min[c].smoothed = (min[c].smoothed  *         s->independence)
     194           0 :                         + (rgb_min_smoothed * (1.0f - s->independence));
     195           0 :         max[c].smoothed = (max[c].smoothed  *         s->independence)
     196           0 :                         + (rgb_max_smoothed * (1.0f - s->independence));
     197             : 
     198             :         // Calculate the output range [min.out,max.out] as a ratio of the full-
     199             :         // strength output range [blackpt,whitept] and the original input range
     200             :         // [min.in,max.in], based on the user-specified filter strength.
     201           0 :         min[c].out = (s->blackpt[c] *         s->strength)
     202           0 :                    + (min[c].in     * (1.0f - s->strength));
     203           0 :         max[c].out = (s->whitept[c] *         s->strength)
     204           0 :                    + (max[c].in     * (1.0f - s->strength));
     205             : 
     206             :         // Now, build a lookup table which linearly maps the adjusted input range
     207             :         // [min.smoothed,max.smoothed] to the output range [min.out,max.out].
     208             :         // Perform the linear interpolation for each x:
     209             :         //     lut[x] = (int)(float(x - min.smoothed) * scale + max.out + 0.5)
     210             :         // where scale = (max.out - min.out) / (max.smoothed - min.smoothed)
     211           0 :         if (min[c].smoothed == max[c].smoothed) {
     212             :             // There is no dynamic range to expand. No mapping for this channel.
     213           0 :             for (in_val = min[c].in; in_val <= max[c].in; in_val++)
     214           0 :                 lut[c][in_val] = min[c].out;
     215             :         } else {
     216             :             // We must set lookup values for all values in the original input
     217             :             // range [min.in,max.in]. Since the original input range may be
     218             :             // larger than [min.smoothed,max.smoothed], some output values may
     219             :             // fall outside the [0,255] dynamic range. We need to clamp them.
     220           0 :             float scale = (max[c].out - min[c].out) / (max[c].smoothed - min[c].smoothed);
     221           0 :             for (in_val = min[c].in; in_val <= max[c].in; in_val++) {
     222           0 :                 int out_val = (in_val - min[c].smoothed) * scale + min[c].out + 0.5f;
     223           0 :                 out_val = FFMAX(out_val, 0);
     224           0 :                 out_val = FFMIN(out_val, 255);
     225           0 :                 lut[c][in_val] = out_val;
     226             :             }
     227             :         }
     228             :     }
     229             : 
     230             :     // Finally, process the pixels of the input frame using the lookup tables.
     231           0 :     for (y = 0; y < in->height; y++) {
     232           0 :         uint8_t *inp = in->data[0] + y * in->linesize[0];
     233           0 :         uint8_t *outp = out->data[0] + y * out->linesize[0];
     234           0 :         for (x = 0; x < in->width; x++) {
     235           0 :             for (c = 0; c < 3; c++)
     236           0 :                 outp[s->co[c]] = lut[c][inp[s->co[c]]];
     237           0 :             if (s->num_components == 4)
     238             :                 // Copy alpha as-is.
     239           0 :                 outp[s->co[3]] = inp[s->co[3]];
     240           0 :             inp += s->num_components;
     241           0 :             outp += s->num_components;
     242             :         }
     243             :     }
     244             : 
     245           0 :     s->frame_num++;
     246           0 : }
     247             : 
     248             : // Now we define all the functions accessible from the ff_vf_normalize class,
     249             : // which is ffmpeg's interface to our filter.  See doc/filter_design.txt and
     250             : // doc/writing_filters.txt for descriptions of what these interface functions
     251             : // are expected to do.
     252             : 
     253             : // Set the pixel formats that our filter supports. We should be able to process
     254             : // any 8-bit RGB formats. 16-bit support might be useful one day.
     255           0 : static int query_formats(AVFilterContext *ctx)
     256             : {
     257             :     static const enum AVPixelFormat pixel_fmts[] = {
     258             :         AV_PIX_FMT_RGB24,
     259             :         AV_PIX_FMT_BGR24,
     260             :         AV_PIX_FMT_ARGB,
     261             :         AV_PIX_FMT_RGBA,
     262             :         AV_PIX_FMT_ABGR,
     263             :         AV_PIX_FMT_BGRA,
     264             :         AV_PIX_FMT_0RGB,
     265             :         AV_PIX_FMT_RGB0,
     266             :         AV_PIX_FMT_0BGR,
     267             :         AV_PIX_FMT_BGR0,
     268             :         AV_PIX_FMT_NONE
     269             :     };
     270             :     // According to filter_design.txt, using ff_set_common_formats() this way
     271             :     // ensures the pixel formats of the input and output will be the same. That
     272             :     // saves a bit of effort possibly needing to handle format conversions.
     273           0 :     AVFilterFormats *formats = ff_make_format_list(pixel_fmts);
     274           0 :     if (!formats)
     275           0 :         return AVERROR(ENOMEM);
     276           0 :     return ff_set_common_formats(ctx, formats);
     277             : }
     278             : 
     279             : // At this point we know the pixel format used for both input and output.  We
     280             : // can also access the frame rate of the input video and allocate some memory
     281             : // appropriately
     282           0 : static int config_input(AVFilterLink *inlink)
     283             : {
     284           0 :     NormalizeContext *s = inlink->dst->priv;
     285             :     // Store offsets to R,G,B,A bytes respectively in each pixel
     286           0 :     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
     287             :     int c;
     288             : 
     289           0 :     for (c = 0; c < 4; ++c)
     290           0 :         s->co[c] = desc->comp[c].offset;
     291           0 :     s->num_components = desc->nb_components;
     292             :     // Convert smoothing value to history_len (a count of frames to average,
     293             :     // must be at least 1).  Currently this is a direct assignment, but the
     294             :     // smoothing value was originally envisaged as a number of seconds.  In
     295             :     // future it would be nice to set history_len using a number of seconds,
     296             :     // but VFR video is currently an obstacle to doing so.
     297           0 :     s->history_len = s->smoothing + 1;
     298             :     // Allocate the history buffers -- there are 6 -- one for each extrema.
     299             :     // s->smoothing is limited to INT_MAX/8, so that (s->history_len * 6)
     300             :     // can't overflow on 32bit causing a too-small allocation.
     301           0 :     s->history_mem = av_malloc(s->history_len * 6);
     302           0 :     if (s->history_mem == NULL)
     303           0 :         return AVERROR(ENOMEM);
     304             : 
     305           0 :     for (c = 0; c < 3; c++) {
     306           0 :         s->min[c].history = s->history_mem + (c*2)   * s->history_len;
     307           0 :         s->max[c].history = s->history_mem + (c*2+1) * s->history_len;
     308             :     }
     309           0 :     return 0;
     310             : }
     311             : 
     312             : // Free any memory allocations here
     313           0 : static av_cold void uninit(AVFilterContext *ctx)
     314             : {
     315           0 :     NormalizeContext *s = ctx->priv;
     316             : 
     317           0 :     av_freep(&s->history_mem);
     318           0 : }
     319             : 
     320             : // This function is pretty much standard from doc/writing_filters.txt.  It
     321             : // tries to do in-place filtering where possible, only allocating a new output
     322             : // frame when absolutely necessary.
     323           0 : static int filter_frame(AVFilterLink *inlink, AVFrame *in)
     324             : {
     325           0 :     AVFilterContext *ctx = inlink->dst;
     326           0 :     AVFilterLink *outlink = ctx->outputs[0];
     327           0 :     NormalizeContext *s = ctx->priv;
     328             :     AVFrame *out;
     329             :     // Set 'direct' if we can modify the input frame in-place.  Otherwise we
     330             :     // need to retrieve a new frame from the output link.
     331           0 :     int direct = av_frame_is_writable(in) && !ctx->is_disabled;
     332             : 
     333           0 :     if (direct) {
     334           0 :         out = in;
     335             :     } else {
     336           0 :         out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
     337           0 :         if (!out) {
     338           0 :             av_frame_free(&in);
     339           0 :             return AVERROR(ENOMEM);
     340             :         }
     341           0 :         av_frame_copy_props(out, in);
     342             :     }
     343             : 
     344             :     // Now we've got the input and output frames (which may be the same frame)
     345             :     // perform the filtering with our custom function.
     346           0 :     normalize(s, in, out);
     347             : 
     348           0 :     if (ctx->is_disabled) {
     349           0 :         av_frame_free(&out);
     350           0 :         return ff_filter_frame(outlink, in);
     351             :     }
     352             : 
     353           0 :     if (!direct)
     354           0 :         av_frame_free(&in);
     355             : 
     356           0 :     return ff_filter_frame(outlink, out);
     357             : }
     358             : 
     359             : static const AVFilterPad inputs[] = {
     360             :     {
     361             :         .name         = "default",
     362             :         .type         = AVMEDIA_TYPE_VIDEO,
     363             :         .filter_frame = filter_frame,
     364             :         .config_props = config_input,
     365             :     },
     366             :     { NULL }
     367             : };
     368             : 
     369             : static const AVFilterPad outputs[] = {
     370             :     {
     371             :         .name = "default",
     372             :         .type = AVMEDIA_TYPE_VIDEO,
     373             :     },
     374             :     { NULL }
     375             : };
     376             : 
     377             : AVFilter ff_vf_normalize = {
     378             :     .name          = "normalize",
     379             :     .description   = NULL_IF_CONFIG_SMALL("Normalize RGB video."),
     380             :     .priv_size     = sizeof(NormalizeContext),
     381             :     .priv_class    = &normalize_class,
     382             :     .uninit        = uninit,
     383             :     .query_formats = query_formats,
     384             :     .inputs        = inputs,
     385             :     .outputs       = outputs,
     386             : };

Generated by: LCOV version 1.13