Line data Source code
1 : /*
2 : * Copyright (c) 2011 Smartjog S.A.S, Clément Bœsch <clement.boesch@smartjog.com>
3 : *
4 : * This file is part of FFmpeg.
5 : *
6 : * FFmpeg is free software; you can redistribute it and/or
7 : * modify it under the terms of the GNU Lesser General Public
8 : * License as published by the Free Software Foundation; either
9 : * version 2.1 of the License, or (at your option) any later version.
10 : *
11 : * FFmpeg is distributed in the hope that it will be useful,
12 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 : * Lesser General Public License for more details.
15 : *
16 : * You should have received a copy of the GNU Lesser General Public
17 : * License along with FFmpeg; if not, write to the Free Software
18 : * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 : */
20 :
21 : /**
22 : * @file
23 : * Potential thumbnail lookup filter to reduce the risk of an inappropriate
24 : * selection (such as a black frame) we could get with an absolute seek.
25 : *
26 : * Simplified version of algorithm by Vadim Zaliva <lord@crocodile.org>.
27 : * @see http://notbrainsurgery.livejournal.com/29773.html
28 : */
29 :
30 : #include "libavutil/opt.h"
31 : #include "avfilter.h"
32 : #include "internal.h"
33 :
34 : #define HIST_SIZE (3*256)
35 :
36 : struct thumb_frame {
37 : AVFrame *buf; ///< cached frame
38 : int histogram[HIST_SIZE]; ///< RGB color distribution histogram of the frame
39 : };
40 :
41 : typedef struct ThumbContext {
42 : const AVClass *class;
43 : int n; ///< current frame
44 : int n_frames; ///< number of frames for analysis
45 : struct thumb_frame *frames; ///< the n_frames frames
46 : AVRational tb; ///< copy of the input timebase to ease access
47 : } ThumbContext;
48 :
49 : #define OFFSET(x) offsetof(ThumbContext, x)
50 : #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
51 :
52 : static const AVOption thumbnail_options[] = {
53 : { "n", "set the frames batch size", OFFSET(n_frames), AV_OPT_TYPE_INT, {.i64=100}, 2, INT_MAX, FLAGS },
54 : { NULL }
55 : };
56 :
57 : AVFILTER_DEFINE_CLASS(thumbnail);
58 :
59 1 : static av_cold int init(AVFilterContext *ctx)
60 : {
61 1 : ThumbContext *s = ctx->priv;
62 :
63 1 : s->frames = av_calloc(s->n_frames, sizeof(*s->frames));
64 1 : if (!s->frames) {
65 0 : av_log(ctx, AV_LOG_ERROR,
66 : "Allocation failure, try to lower the number of frames\n");
67 0 : return AVERROR(ENOMEM);
68 : }
69 1 : av_log(ctx, AV_LOG_VERBOSE, "batch size: %d frames\n", s->n_frames);
70 1 : return 0;
71 : }
72 :
73 : /**
74 : * @brief Compute Sum-square deviation to estimate "closeness".
75 : * @param hist color distribution histogram
76 : * @param median average color distribution histogram
77 : * @return sum of squared errors
78 : */
79 50 : static double frame_sum_square_err(const int *hist, const double *median)
80 : {
81 : int i;
82 50 : double err, sum_sq_err = 0;
83 :
84 38450 : for (i = 0; i < HIST_SIZE; i++) {
85 38400 : err = median[i] - (double)hist[i];
86 38400 : sum_sq_err += err*err;
87 : }
88 50 : return sum_sq_err;
89 : }
90 :
91 5 : static AVFrame *get_best_frame(AVFilterContext *ctx)
92 : {
93 : AVFrame *picref;
94 5 : ThumbContext *s = ctx->priv;
95 5 : int i, j, best_frame_idx = 0;
96 5 : int nb_frames = s->n;
97 5 : double avg_hist[HIST_SIZE] = {0}, sq_err, min_sq_err = -1;
98 :
99 : // average histogram of the N frames
100 3845 : for (j = 0; j < FF_ARRAY_ELEMS(avg_hist); j++) {
101 42240 : for (i = 0; i < nb_frames; i++)
102 38400 : avg_hist[j] += (double)s->frames[i].histogram[j];
103 3840 : avg_hist[j] /= nb_frames;
104 : }
105 :
106 : // find the frame closer to the average using the sum of squared errors
107 55 : for (i = 0; i < nb_frames; i++) {
108 50 : sq_err = frame_sum_square_err(s->frames[i].histogram, avg_hist);
109 50 : if (i == 0 || sq_err < min_sq_err)
110 11 : best_frame_idx = i, min_sq_err = sq_err;
111 : }
112 :
113 : // free and reset everything (except the best frame buffer)
114 55 : for (i = 0; i < nb_frames; i++) {
115 50 : memset(s->frames[i].histogram, 0, sizeof(s->frames[i].histogram));
116 50 : if (i != best_frame_idx)
117 45 : av_frame_free(&s->frames[i].buf);
118 : }
119 5 : s->n = 0;
120 :
121 : // raise the chosen one
122 5 : picref = s->frames[best_frame_idx].buf;
123 10 : av_log(ctx, AV_LOG_INFO, "frame id #%d (pts_time=%f) selected "
124 : "from a set of %d images\n", best_frame_idx,
125 10 : picref->pts * av_q2d(s->tb), nb_frames);
126 5 : s->frames[best_frame_idx].buf = NULL;
127 :
128 5 : return picref;
129 : }
130 :
131 50 : static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
132 : {
133 : int i, j;
134 50 : AVFilterContext *ctx = inlink->dst;
135 50 : ThumbContext *s = ctx->priv;
136 50 : AVFilterLink *outlink = ctx->outputs[0];
137 50 : int *hist = s->frames[s->n].histogram;
138 50 : const uint8_t *p = frame->data[0];
139 :
140 : // keep a reference of each frame
141 50 : s->frames[s->n].buf = frame;
142 :
143 : // update current frame RGB histogram
144 14450 : for (j = 0; j < inlink->h; j++) {
145 5083200 : for (i = 0; i < inlink->w; i++) {
146 5068800 : hist[0*256 + p[i*3 ]]++;
147 5068800 : hist[1*256 + p[i*3 + 1]]++;
148 5068800 : hist[2*256 + p[i*3 + 2]]++;
149 : }
150 14400 : p += frame->linesize[0];
151 : }
152 :
153 : // no selection until the buffer of N frames is filled up
154 50 : s->n++;
155 50 : if (s->n < s->n_frames)
156 45 : return 0;
157 :
158 5 : return ff_filter_frame(outlink, get_best_frame(ctx));
159 : }
160 :
161 1 : static av_cold void uninit(AVFilterContext *ctx)
162 : {
163 : int i;
164 1 : ThumbContext *s = ctx->priv;
165 1 : for (i = 0; i < s->n_frames && s->frames[i].buf; i++)
166 0 : av_frame_free(&s->frames[i].buf);
167 1 : av_freep(&s->frames);
168 1 : }
169 :
170 50 : static int request_frame(AVFilterLink *link)
171 : {
172 50 : AVFilterContext *ctx = link->src;
173 50 : ThumbContext *s = ctx->priv;
174 50 : int ret = ff_request_frame(ctx->inputs[0]);
175 :
176 50 : if (ret == AVERROR_EOF && s->n) {
177 0 : ret = ff_filter_frame(link, get_best_frame(ctx));
178 0 : if (ret < 0)
179 0 : return ret;
180 0 : ret = AVERROR_EOF;
181 : }
182 50 : if (ret < 0)
183 1 : return ret;
184 49 : return 0;
185 : }
186 :
187 1 : static int config_props(AVFilterLink *inlink)
188 : {
189 1 : AVFilterContext *ctx = inlink->dst;
190 1 : ThumbContext *s = ctx->priv;
191 :
192 1 : s->tb = inlink->time_base;
193 1 : return 0;
194 : }
195 :
196 1 : static int query_formats(AVFilterContext *ctx)
197 : {
198 : static const enum AVPixelFormat pix_fmts[] = {
199 : AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24,
200 : AV_PIX_FMT_NONE
201 : };
202 1 : AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
203 1 : if (!fmts_list)
204 0 : return AVERROR(ENOMEM);
205 1 : return ff_set_common_formats(ctx, fmts_list);
206 : }
207 :
208 : static const AVFilterPad thumbnail_inputs[] = {
209 : {
210 : .name = "default",
211 : .type = AVMEDIA_TYPE_VIDEO,
212 : .config_props = config_props,
213 : .filter_frame = filter_frame,
214 : },
215 : { NULL }
216 : };
217 :
218 : static const AVFilterPad thumbnail_outputs[] = {
219 : {
220 : .name = "default",
221 : .type = AVMEDIA_TYPE_VIDEO,
222 : .request_frame = request_frame,
223 : },
224 : { NULL }
225 : };
226 :
227 : AVFilter ff_vf_thumbnail = {
228 : .name = "thumbnail",
229 : .description = NULL_IF_CONFIG_SMALL("Select the most representative frame in a given sequence of consecutive frames."),
230 : .priv_size = sizeof(ThumbContext),
231 : .init = init,
232 : .uninit = uninit,
233 : .query_formats = query_formats,
234 : .inputs = thumbnail_inputs,
235 : .outputs = thumbnail_outputs,
236 : .priv_class = &thumbnail_class,
237 : };
|