FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavfilter/vf_signature.c
Date: 2025-01-20 09:27:23
Exec Total Coverage
Lines: 0 398 0.0%
Functions: 0 13 0.0%
Branches: 0 206 0.0%

Line Branch Exec Source
1 /*
2 * Copyright (c) 2017 Gerion Entrup
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 */
20
21 /**
22 * @file
23 * MPEG-7 video signature calculation and lookup filter
24 * @see http://epubs.surrey.ac.uk/531590/1/MPEG-7%20Video%20Signature%20Author%27s%20Copy.pdf
25 */
26
27 #include "libavcodec/put_bits.h"
28 #include "libavformat/avformat.h"
29 #include "libavutil/mem.h"
30 #include "libavutil/opt.h"
31 #include "libavutil/avstring.h"
32 #include "libavutil/file_open.h"
33 #include "avfilter.h"
34 #include "filters.h"
35 #include "signature.h"
36 #include "signature_lookup.c"
37
38 #define OFFSET(x) offsetof(SignatureContext, x)
39 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
40 #define BLOCK_LCM (int64_t) 476985600
41
42 static const AVOption signature_options[] = {
43 { "detectmode", "set the detectmode",
44 OFFSET(mode), AV_OPT_TYPE_INT, {.i64 = MODE_OFF}, 0, NB_LOOKUP_MODE-1, FLAGS, .unit = "mode" },
45 { "off", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MODE_OFF}, 0, 0, .flags = FLAGS, .unit = "mode" },
46 { "full", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MODE_FULL}, 0, 0, .flags = FLAGS, .unit = "mode" },
47 { "fast", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MODE_FAST}, 0, 0, .flags = FLAGS, .unit = "mode" },
48 { "nb_inputs", "number of inputs",
49 OFFSET(nb_inputs), AV_OPT_TYPE_INT, {.i64 = 1}, 1, INT_MAX, FLAGS },
50 { "filename", "filename for output files",
51 OFFSET(filename), AV_OPT_TYPE_STRING, {.str = ""}, 0, NB_FORMATS-1, FLAGS },
52 { "format", "set output format",
53 OFFSET(format), AV_OPT_TYPE_INT, {.i64 = FORMAT_BINARY}, 0, 1, FLAGS , .unit = "format" },
54 { "binary", 0, 0, AV_OPT_TYPE_CONST, {.i64=FORMAT_BINARY}, 0, 0, FLAGS, .unit = "format" },
55 { "xml", 0, 0, AV_OPT_TYPE_CONST, {.i64=FORMAT_XML}, 0, 0, FLAGS, .unit = "format" },
56 { "th_d", "threshold to detect one word as similar",
57 OFFSET(thworddist), AV_OPT_TYPE_INT, {.i64 = 9000}, 1, INT_MAX, FLAGS },
58 { "th_dc", "threshold to detect all words as similar",
59 OFFSET(thcomposdist), AV_OPT_TYPE_INT, {.i64 = 60000}, 1, INT_MAX, FLAGS },
60 { "th_xh", "threshold to detect frames as similar",
61 OFFSET(thl1), AV_OPT_TYPE_INT, {.i64 = 116}, 1, INT_MAX, FLAGS },
62 { "th_di", "minimum length of matching sequence in frames",
63 OFFSET(thdi), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS },
64 { "th_it", "threshold for relation of good to all frames",
65 OFFSET(thit), AV_OPT_TYPE_DOUBLE, {.dbl = 0.5}, 0.0, 1.0, FLAGS },
66 { NULL }
67 };
68
69 AVFILTER_DEFINE_CLASS(signature);
70
71 /* all formats with a separate gray value */
72 static const enum AVPixelFormat pix_fmts[] = {
73 AV_PIX_FMT_GRAY8,
74 AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
75 AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
76 AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P,
77 AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUVJ420P,
78 AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ444P,
79 AV_PIX_FMT_YUVJ440P,
80 AV_PIX_FMT_NV12, AV_PIX_FMT_NV21,
81 AV_PIX_FMT_NONE
82 };
83
84 static int config_input(AVFilterLink *inlink)
85 {
86 AVFilterContext *ctx = inlink->dst;
87 SignatureContext *sic = ctx->priv;
88 StreamContext *sc = &(sic->streamcontexts[FF_INLINK_IDX(inlink)]);
89
90 sc->time_base = inlink->time_base;
91 /* test for overflow */
92 sc->divide = (((uint64_t) inlink->w/32) * (inlink->w/32 + 1) * (inlink->h/32 * inlink->h/32 + 1) > INT64_MAX / (BLOCK_LCM * 255));
93 if (sc->divide) {
94 av_log(ctx, AV_LOG_WARNING, "Input dimension too high for precise calculation, numbers will be rounded.\n");
95 }
96 sc->w = inlink->w;
97 sc->h = inlink->h;
98 return 0;
99 }
100
101 static int get_block_size(const Block *b)
102 {
103 return (b->to.y - b->up.y + 1) * (b->to.x - b->up.x + 1);
104 }
105
106 static uint64_t get_block_sum(StreamContext *sc, uint64_t intpic[32][32], const Block *b)
107 {
108 uint64_t sum = 0;
109
110 int x0, y0, x1, y1;
111
112 x0 = b->up.x;
113 y0 = b->up.y;
114 x1 = b->to.x;
115 y1 = b->to.y;
116
117 if (x0-1 >= 0 && y0-1 >= 0) {
118 sum = intpic[y1][x1] + intpic[y0-1][x0-1] - intpic[y1][x0-1] - intpic[y0-1][x1];
119 } else if (x0-1 >= 0) {
120 sum = intpic[y1][x1] - intpic[y1][x0-1];
121 } else if (y0-1 >= 0) {
122 sum = intpic[y1][x1] - intpic[y0-1][x1];
123 } else {
124 sum = intpic[y1][x1];
125 }
126 return sum;
127 }
128
129 static int cmp(const void *x, const void *y)
130 {
131 const uint64_t *a = x, *b = y;
132 return *a < *b ? -1 : ( *a > *b ? 1 : 0 );
133 }
134
135 /**
136 * sets the bit at position pos to 1 in data
137 */
138 static void set_bit(uint8_t* data, size_t pos)
139 {
140 uint8_t mask = 1 << 7-(pos%8);
141 data[pos/8] |= mask;
142 }
143
144 static int filter_frame(AVFilterLink *inlink, AVFrame *picref)
145 {
146 AVFilterContext *ctx = inlink->dst;
147 SignatureContext *sic = ctx->priv;
148 StreamContext *sc = &(sic->streamcontexts[FF_INLINK_IDX(inlink)]);
149 FineSignature* fs;
150
151 static const uint8_t pot3[5] = { 3*3*3*3, 3*3*3, 3*3, 3, 1 };
152 /* indexes of words : 210,217,219,274,334 44,175,233,270,273 57,70,103,237,269 100,285,295,337,354 101,102,111,275,296
153 s2usw = sorted to unsorted wordvec: 44 is at index 5, 57 at index 10...
154 */
155 static const unsigned int wordvec[25] = {44,57,70,100,101,102,103,111,175,210,217,219,233,237,269,270,273,274,275,285,295,296,334,337,354};
156 static const uint8_t s2usw[25] = { 5,10,11, 15, 20, 21, 12, 22, 6, 0, 1, 2, 7, 13, 14, 8, 9, 3, 23, 16, 17, 24, 4, 18, 19};
157
158 uint8_t wordt2b[5] = { 0, 0, 0, 0, 0 }; /* word ternary to binary */
159 uint64_t intpic[32][32];
160 uint64_t rowcount;
161 uint8_t *p = picref->data[0];
162 int inti, intj;
163 int *intjlut;
164
165 uint64_t conflist[DIFFELEM_SIZE];
166 int f = 0, g = 0, w = 0;
167 int32_t dh1 = 1, dh2 = 1, dw1 = 1, dw2 = 1, a, b;
168 int64_t denom;
169 int i, j, k, ternary;
170 uint64_t blocksum;
171 int blocksize;
172 int64_t th; /* threshold */
173 int64_t sum;
174
175 int64_t precfactor = (sc->divide) ? 65536 : BLOCK_LCM;
176
177 /* initialize fs */
178 if (sc->curfinesig) {
179 fs = av_mallocz(sizeof(FineSignature));
180 if (!fs)
181 return AVERROR(ENOMEM);
182 sc->curfinesig->next = fs;
183 fs->prev = sc->curfinesig;
184 sc->curfinesig = fs;
185 } else {
186 fs = sc->curfinesig = sc->finesiglist;
187 sc->curcoarsesig1->first = fs;
188 }
189
190 fs->pts = picref->pts;
191 fs->index = sc->lastindex++;
192
193 memset(intpic, 0, sizeof(uint64_t)*32*32);
194 intjlut = av_malloc_array(inlink->w, sizeof(int));
195 if (!intjlut)
196 return AVERROR(ENOMEM);
197 for (i = 0; i < inlink->w; i++) {
198 intjlut[i] = (i*32)/inlink->w;
199 }
200
201 for (i = 0; i < inlink->h; i++) {
202 inti = (i*32)/inlink->h;
203 for (j = 0; j < inlink->w; j++) {
204 intj = intjlut[j];
205 intpic[inti][intj] += p[j];
206 }
207 p += picref->linesize[0];
208 }
209 av_freep(&intjlut);
210
211 /* The following calculates a summed area table (intpic) and brings the numbers
212 * in intpic to the same denominator.
213 * So you only have to handle the numinator in the following sections.
214 */
215 dh1 = inlink->h / 32;
216 if (inlink->h % 32)
217 dh2 = dh1 + 1;
218 dw1 = inlink->w / 32;
219 if (inlink->w % 32)
220 dw2 = dw1 + 1;
221 denom = (sc->divide) ? dh1 * (int64_t)dh2 * dw1 * dw2 : 1;
222
223 for (i = 0; i < 32; i++) {
224 rowcount = 0;
225 a = 1;
226 if (dh2 > 1) {
227 a = ((inlink->h*(i+1))%32 == 0) ? (inlink->h*(i+1))/32 - 1 : (inlink->h*(i+1))/32;
228 a -= ((inlink->h*i)%32 == 0) ? (inlink->h*i)/32 - 1 : (inlink->h*i)/32;
229 a = (a == dh1)? dh2 : dh1;
230 }
231 for (j = 0; j < 32; j++) {
232 b = 1;
233 if (dw2 > 1) {
234 b = ((inlink->w*(j+1))%32 == 0) ? (inlink->w*(j+1))/32 - 1 : (inlink->w*(j+1))/32;
235 b -= ((inlink->w*j)%32 == 0) ? (inlink->w*j)/32 - 1 : (inlink->w*j)/32;
236 b = (b == dw1)? dw2 : dw1;
237 }
238 rowcount += intpic[i][j] * a * b * precfactor / denom;
239 if (i > 0) {
240 intpic[i][j] = intpic[i-1][j] + rowcount;
241 } else {
242 intpic[i][j] = rowcount;
243 }
244 }
245 }
246
247 denom = (sc->divide) ? 1 : dh1 * (int64_t)dh2 * dw1 * dw2;
248
249 for (i = 0; i < ELEMENT_COUNT; i++) {
250 const ElemCat* elemcat = elements[i];
251 int64_t* elemsignature;
252 uint64_t* sortsignature;
253
254 elemsignature = av_malloc_array(elemcat->elem_count, 2 * sizeof(int64_t));
255 if (!elemsignature)
256 return AVERROR(ENOMEM);
257 sortsignature = elemsignature + elemcat->elem_count;
258
259 for (j = 0; j < elemcat->elem_count; j++) {
260 blocksum = 0;
261 blocksize = 0;
262 for (k = 0; k < elemcat->left_count; k++) {
263 blocksum += get_block_sum(sc, intpic, &elemcat->blocks[j*elemcat->block_count+k]);
264 blocksize += get_block_size(&elemcat->blocks[j*elemcat->block_count+k]);
265 }
266 sum = blocksum / blocksize;
267 if (elemcat->av_elem) {
268 sum -= 128 * precfactor * denom;
269 } else {
270 blocksum = 0;
271 blocksize = 0;
272 for (; k < elemcat->block_count; k++) {
273 blocksum += get_block_sum(sc, intpic, &elemcat->blocks[j*elemcat->block_count+k]);
274 blocksize += get_block_size(&elemcat->blocks[j*elemcat->block_count+k]);
275 }
276 sum -= blocksum / blocksize;
277 conflist[g++] = FFABS(sum * 8 / (precfactor * denom));
278 }
279
280 elemsignature[j] = sum;
281 sortsignature[j] = FFABS(sum);
282 }
283
284 /* get threshold */
285 qsort(sortsignature, elemcat->elem_count, sizeof(uint64_t), cmp);
286 th = sortsignature[(int) (elemcat->elem_count*0.333)];
287
288 /* ternarize */
289 for (j = 0; j < elemcat->elem_count; j++) {
290 if (elemsignature[j] < -th) {
291 ternary = 0;
292 } else if (elemsignature[j] <= th) {
293 ternary = 1;
294 } else {
295 ternary = 2;
296 }
297 fs->framesig[f/5] += ternary * pot3[f%5];
298
299 if (f == wordvec[w]) {
300 fs->words[s2usw[w]/5] += ternary * pot3[wordt2b[s2usw[w]/5]++];
301 if (w < 24)
302 w++;
303 }
304 f++;
305 }
306 av_freep(&elemsignature);
307 }
308
309 /* confidence */
310 qsort(conflist, DIFFELEM_SIZE, sizeof(uint64_t), cmp);
311 fs->confidence = FFMIN(conflist[DIFFELEM_SIZE/2], 255);
312
313 /* coarsesignature */
314 if (sc->coarsecount == 0) {
315 if (sc->curcoarsesig2) {
316 sc->curcoarsesig1 = av_mallocz(sizeof(CoarseSignature));
317 if (!sc->curcoarsesig1)
318 return AVERROR(ENOMEM);
319 sc->curcoarsesig1->first = fs;
320 sc->curcoarsesig2->next = sc->curcoarsesig1;
321 sc->coarseend = sc->curcoarsesig1;
322 }
323 }
324 if (sc->coarsecount == 45) {
325 sc->midcoarse = 1;
326 sc->curcoarsesig2 = av_mallocz(sizeof(CoarseSignature));
327 if (!sc->curcoarsesig2)
328 return AVERROR(ENOMEM);
329 sc->curcoarsesig2->first = fs;
330 sc->curcoarsesig1->next = sc->curcoarsesig2;
331 sc->coarseend = sc->curcoarsesig2;
332 }
333 for (i = 0; i < 5; i++) {
334 set_bit(sc->curcoarsesig1->data[i], fs->words[i]);
335 }
336 /* assuming the actual frame is the last */
337 sc->curcoarsesig1->last = fs;
338 if (sc->midcoarse) {
339 for (i = 0; i < 5; i++) {
340 set_bit(sc->curcoarsesig2->data[i], fs->words[i]);
341 }
342 sc->curcoarsesig2->last = fs;
343 }
344
345 sc->coarsecount = (sc->coarsecount+1)%90;
346
347 /* debug printing finesignature */
348 if (av_log_get_level() == AV_LOG_DEBUG) {
349 av_log(ctx, AV_LOG_DEBUG, "input %d, confidence: %d\n", FF_INLINK_IDX(inlink), fs->confidence);
350
351 av_log(ctx, AV_LOG_DEBUG, "words:");
352 for (i = 0; i < 5; i++) {
353 av_log(ctx, AV_LOG_DEBUG, " %d:", fs->words[i] );
354 av_log(ctx, AV_LOG_DEBUG, " %d", fs->words[i] / pot3[0] );
355 for (j = 1; j < 5; j++)
356 av_log(ctx, AV_LOG_DEBUG, ",%d", fs->words[i] % pot3[j-1] / pot3[j] );
357 av_log(ctx, AV_LOG_DEBUG, ";");
358 }
359 av_log(ctx, AV_LOG_DEBUG, "\n");
360
361 av_log(ctx, AV_LOG_DEBUG, "framesignature:");
362 for (i = 0; i < SIGELEM_SIZE/5; i++) {
363 av_log(ctx, AV_LOG_DEBUG, " %d", fs->framesig[i] / pot3[0] );
364 for (j = 1; j < 5; j++)
365 av_log(ctx, AV_LOG_DEBUG, ",%d", fs->framesig[i] % pot3[j-1] / pot3[j] );
366 }
367 av_log(ctx, AV_LOG_DEBUG, "\n");
368 }
369
370 if (FF_INLINK_IDX(inlink) == 0)
371 return ff_filter_frame(inlink->dst->outputs[0], picref);
372 return 1;
373 }
374
375 static int xml_export(AVFilterContext *ctx, StreamContext *sc, const char* filename)
376 {
377 FineSignature* fs;
378 CoarseSignature* cs;
379 int i, j;
380 FILE* f;
381 unsigned int pot3[5] = { 3*3*3*3, 3*3*3, 3*3, 3, 1 };
382
383 if (!sc->coarseend->last)
384 return AVERROR(EINVAL); // No frames ?
385
386 f = avpriv_fopen_utf8(filename, "w");
387 if (!f) {
388 int err = AVERROR(EINVAL);
389 av_log(ctx, AV_LOG_ERROR, "cannot open xml file %s: %s\n", filename, av_err2str(err));
390 return err;
391 }
392
393 /* header */
394 fprintf(f, "<?xml version='1.0' encoding='ASCII' ?>\n");
395 fprintf(f, "<Mpeg7 xmlns=\"urn:mpeg:mpeg7:schema:2001\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"urn:mpeg:mpeg7:schema:2001 schema/Mpeg7-2001.xsd\">\n");
396 fprintf(f, " <DescriptionUnit xsi:type=\"DescriptorCollectionType\">\n");
397 fprintf(f, " <Descriptor xsi:type=\"VideoSignatureType\">\n");
398 fprintf(f, " <VideoSignatureRegion>\n");
399 fprintf(f, " <VideoSignatureSpatialRegion>\n");
400 fprintf(f, " <Pixel>0 0 </Pixel>\n");
401 fprintf(f, " <Pixel>%d %d </Pixel>\n", sc->w - 1, sc->h - 1);
402 fprintf(f, " </VideoSignatureSpatialRegion>\n");
403 fprintf(f, " <StartFrameOfSpatialRegion>0</StartFrameOfSpatialRegion>\n");
404 /* hoping num is 1, other values are vague */
405 fprintf(f, " <MediaTimeUnit>%d</MediaTimeUnit>\n", sc->time_base.den / sc->time_base.num);
406 fprintf(f, " <MediaTimeOfSpatialRegion>\n");
407 fprintf(f, " <StartMediaTimeOfSpatialRegion>0</StartMediaTimeOfSpatialRegion>\n");
408 fprintf(f, " <EndMediaTimeOfSpatialRegion>%" PRIu64 "</EndMediaTimeOfSpatialRegion>\n", sc->coarseend->last->pts);
409 fprintf(f, " </MediaTimeOfSpatialRegion>\n");
410
411 /* coarsesignatures */
412 for (cs = sc->coarsesiglist; cs; cs = cs->next) {
413 fprintf(f, " <VSVideoSegment>\n");
414 fprintf(f, " <StartFrameOfSegment>%" PRIu32 "</StartFrameOfSegment>\n", cs->first->index);
415 fprintf(f, " <EndFrameOfSegment>%" PRIu32 "</EndFrameOfSegment>\n", cs->last->index);
416 fprintf(f, " <MediaTimeOfSegment>\n");
417 fprintf(f, " <StartMediaTimeOfSegment>%" PRIu64 "</StartMediaTimeOfSegment>\n", cs->first->pts);
418 fprintf(f, " <EndMediaTimeOfSegment>%" PRIu64 "</EndMediaTimeOfSegment>\n", cs->last->pts);
419 fprintf(f, " </MediaTimeOfSegment>\n");
420 for (i = 0; i < 5; i++) {
421 fprintf(f, " <BagOfWords>");
422 for (j = 0; j < 31; j++) {
423 uint8_t n = cs->data[i][j];
424 if (j < 30) {
425 fprintf(f, "%d %d %d %d %d %d %d %d ", (n & 0x80) >> 7,
426 (n & 0x40) >> 6,
427 (n & 0x20) >> 5,
428 (n & 0x10) >> 4,
429 (n & 0x08) >> 3,
430 (n & 0x04) >> 2,
431 (n & 0x02) >> 1,
432 (n & 0x01));
433 } else {
434 /* print only 3 bit in last byte */
435 fprintf(f, "%d %d %d ", (n & 0x80) >> 7,
436 (n & 0x40) >> 6,
437 (n & 0x20) >> 5);
438 }
439 }
440 fprintf(f, "</BagOfWords>\n");
441 }
442 fprintf(f, " </VSVideoSegment>\n");
443 }
444
445 /* finesignatures */
446 for (fs = sc->finesiglist; fs; fs = fs->next) {
447 fprintf(f, " <VideoFrame>\n");
448 fprintf(f, " <MediaTimeOfFrame>%" PRIu64 "</MediaTimeOfFrame>\n", fs->pts);
449 /* confidence */
450 fprintf(f, " <FrameConfidence>%d</FrameConfidence>\n", fs->confidence);
451 /* words */
452 fprintf(f, " <Word>");
453 for (i = 0; i < 5; i++) {
454 fprintf(f, "%d ", fs->words[i]);
455 if (i < 4) {
456 fprintf(f, " ");
457 }
458 }
459 fprintf(f, "</Word>\n");
460 /* framesignature */
461 fprintf(f, " <FrameSignature>");
462 for (i = 0; i< SIGELEM_SIZE/5; i++) {
463 if (i > 0) {
464 fprintf(f, " ");
465 }
466 fprintf(f, "%d ", fs->framesig[i] / pot3[0]);
467 for (j = 1; j < 5; j++)
468 fprintf(f, " %d ", fs->framesig[i] % pot3[j-1] / pot3[j] );
469 }
470 fprintf(f, "</FrameSignature>\n");
471 fprintf(f, " </VideoFrame>\n");
472 }
473 fprintf(f, " </VideoSignatureRegion>\n");
474 fprintf(f, " </Descriptor>\n");
475 fprintf(f, " </DescriptionUnit>\n");
476 fprintf(f, "</Mpeg7>\n");
477
478 fclose(f);
479 return 0;
480 }
481
482 static int binary_export(AVFilterContext *ctx, StreamContext *sc, const char* filename)
483 {
484 FILE* f;
485 FineSignature* fs;
486 CoarseSignature* cs;
487 uint32_t numofsegments = (sc->lastindex + 44)/45;
488 int i, j;
489 PutBitContext buf;
490 /* buffer + header + coarsesignatures + finesignature */
491 int len = (512 + 6 * 32 + 3*16 + 2 +
492 numofsegments * (4*32 + 1 + 5*243) +
493 sc->lastindex * (2 + 32 + 6*8 + 608)) / 8;
494 uint8_t* buffer = av_malloc_array(len, sizeof(uint8_t));
495 if (!buffer)
496 return AVERROR(ENOMEM);
497
498 f = avpriv_fopen_utf8(filename, "wb");
499 if (!f) {
500 int err = AVERROR(EINVAL);
501 av_log(ctx, AV_LOG_ERROR, "cannot open file %s: %s\n", filename, av_err2str(err));
502 av_freep(&buffer);
503 return err;
504 }
505 init_put_bits(&buf, buffer, len);
506
507 put_bits32(&buf, 1); /* NumOfSpatial Regions, only 1 supported */
508 put_bits(&buf, 1, 1); /* SpatialLocationFlag, always the whole image */
509 put_bits32(&buf, 0); /* PixelX,1 PixelY,1, 0,0 */
510 put_bits(&buf, 16, sc->w-1 & 0xFFFF); /* PixelX,2 */
511 put_bits(&buf, 16, sc->h-1 & 0xFFFF); /* PixelY,2 */
512 put_bits32(&buf, 0); /* StartFrameOfSpatialRegion */
513 put_bits32(&buf, sc->lastindex); /* NumOfFrames */
514 /* hoping num is 1, other values are vague */
515 /* den/num might be greater than 16 bit, so cutting it */
516 put_bits(&buf, 16, 0xFFFF & (sc->time_base.den / sc->time_base.num)); /* MediaTimeUnit */
517 put_bits(&buf, 1, 1); /* MediaTimeFlagOfSpatialRegion */
518 put_bits32(&buf, 0); /* StartMediaTimeOfSpatialRegion */
519 put_bits32(&buf, 0xFFFFFFFF & sc->coarseend->last->pts); /* EndMediaTimeOfSpatialRegion */
520 put_bits32(&buf, numofsegments); /* NumOfSegments */
521 /* coarsesignatures */
522 for (cs = sc->coarsesiglist; cs; cs = cs->next) {
523 put_bits32(&buf, cs->first->index); /* StartFrameOfSegment */
524 put_bits32(&buf, cs->last->index); /* EndFrameOfSegment */
525 put_bits(&buf, 1, 1); /* MediaTimeFlagOfSegment */
526 put_bits32(&buf, 0xFFFFFFFF & cs->first->pts); /* StartMediaTimeOfSegment */
527 put_bits32(&buf, 0xFFFFFFFF & cs->last->pts); /* EndMediaTimeOfSegment */
528 for (i = 0; i < 5; i++) {
529 /* put 243 bits ( = 7 * 32 + 19 = 8 * 28 + 19) into buffer */
530 for (j = 0; j < 30; j++) {
531 put_bits(&buf, 8, cs->data[i][j]);
532 }
533 put_bits(&buf, 3, cs->data[i][30] >> 5);
534 }
535 }
536 /* finesignatures */
537 put_bits(&buf, 1, 0); /* CompressionFlag, only 0 supported */
538 for (fs = sc->finesiglist; fs; fs = fs->next) {
539 put_bits(&buf, 1, 1); /* MediaTimeFlagOfFrame */
540 put_bits32(&buf, 0xFFFFFFFF & fs->pts); /* MediaTimeOfFrame */
541 put_bits(&buf, 8, fs->confidence); /* FrameConfidence */
542 for (i = 0; i < 5; i++) {
543 put_bits(&buf, 8, fs->words[i]); /* Words */
544 }
545 /* framesignature */
546 for (i = 0; i < SIGELEM_SIZE/5; i++) {
547 put_bits(&buf, 8, fs->framesig[i]);
548 }
549 }
550
551 flush_put_bits(&buf);
552 fwrite(buffer, 1, put_bytes_output(&buf), f);
553 fclose(f);
554 av_freep(&buffer);
555 return 0;
556 }
557
558 static int export(AVFilterContext *ctx, StreamContext *sc, int input)
559 {
560 SignatureContext* sic = ctx->priv;
561 char filename[1024];
562
563 if (sic->nb_inputs > 1) {
564 /* error already handled */
565 av_assert0(av_get_frame_filename(filename, sizeof(filename), sic->filename, input) == 0);
566 } else {
567 if (av_strlcpy(filename, sic->filename, sizeof(filename)) >= sizeof(filename))
568 return AVERROR(EINVAL);
569 }
570 if (sic->format == FORMAT_XML) {
571 return xml_export(ctx, sc, filename);
572 } else {
573 return binary_export(ctx, sc, filename);
574 }
575 }
576
577 static int request_frame(AVFilterLink *outlink)
578 {
579 AVFilterContext *ctx = outlink->src;
580 SignatureContext *sic = ctx->priv;
581 StreamContext *sc, *sc2;
582 MatchingInfo match;
583 int i, j, ret;
584 int lookup = 1; /* indicates wheather EOF of all files is reached */
585
586 /* process all inputs */
587 for (i = 0; i < sic->nb_inputs; i++){
588 sc = &(sic->streamcontexts[i]);
589
590 ret = ff_request_frame(ctx->inputs[i]);
591
592 /* return if unexpected error occurs in input stream */
593 if (ret < 0 && ret != AVERROR_EOF)
594 return ret;
595
596 /* export signature at EOF */
597 if (ret == AVERROR_EOF && !sc->exported) {
598 /* export if wanted */
599 if (strlen(sic->filename) > 0) {
600 if (export(ctx, sc, i) < 0)
601 return ret;
602 }
603 sc->exported = 1;
604 }
605 lookup &= sc->exported;
606 }
607
608 /* signature lookup */
609 if (lookup && sic->mode != MODE_OFF) {
610 /* iterate over every pair */
611 for (i = 0; i < sic->nb_inputs; i++) {
612 sc = &(sic->streamcontexts[i]);
613 for (j = i+1; j < sic->nb_inputs; j++) {
614 sc2 = &(sic->streamcontexts[j]);
615 match = lookup_signatures(ctx, sic, sc, sc2, sic->mode);
616 if (match.score != 0) {
617 av_log(ctx, AV_LOG_INFO, "matching of video %d at %f and %d at %f, %d frames matching\n",
618 i, ((double) match.first->pts * sc->time_base.num) / sc->time_base.den,
619 j, ((double) match.second->pts * sc2->time_base.num) / sc2->time_base.den,
620 match.matchframes);
621 if (match.whole)
622 av_log(ctx, AV_LOG_INFO, "whole video matching\n");
623 } else {
624 av_log(ctx, AV_LOG_INFO, "no matching of video %d and %d\n", i, j);
625 }
626 }
627 }
628 }
629
630 return ret;
631 }
632
633 static av_cold int init(AVFilterContext *ctx)
634 {
635
636 SignatureContext *sic = ctx->priv;
637 StreamContext *sc;
638 int i, ret;
639 char tmp[1024];
640
641 sic->streamcontexts = av_mallocz(sic->nb_inputs * sizeof(StreamContext));
642 if (!sic->streamcontexts)
643 return AVERROR(ENOMEM);
644
645 for (i = 0; i < sic->nb_inputs; i++) {
646 AVFilterPad pad = {
647 .type = AVMEDIA_TYPE_VIDEO,
648 .name = av_asprintf("in%d", i),
649 .config_props = config_input,
650 .filter_frame = filter_frame,
651 };
652
653 if (!pad.name)
654 return AVERROR(ENOMEM);
655 if ((ret = ff_append_inpad_free_name(ctx, &pad)) < 0)
656 return ret;
657
658 sc = &(sic->streamcontexts[i]);
659
660 sc->lastindex = 0;
661 sc->finesiglist = av_mallocz(sizeof(FineSignature));
662 if (!sc->finesiglist)
663 return AVERROR(ENOMEM);
664 sc->curfinesig = NULL;
665
666 sc->coarsesiglist = av_mallocz(sizeof(CoarseSignature));
667 if (!sc->coarsesiglist)
668 return AVERROR(ENOMEM);
669 sc->curcoarsesig1 = sc->coarsesiglist;
670 sc->coarseend = sc->coarsesiglist;
671 sc->coarsecount = 0;
672 sc->midcoarse = 0;
673 }
674
675 /* check filename */
676 if (sic->nb_inputs > 1 && strlen(sic->filename) > 0 && av_get_frame_filename(tmp, sizeof(tmp), sic->filename, 0) == -1) {
677 av_log(ctx, AV_LOG_ERROR, "The filename must contain %%d or %%0nd, if you have more than one input.\n");
678 return AVERROR(EINVAL);
679 }
680
681 return 0;
682 }
683
684
685
686 static av_cold void uninit(AVFilterContext *ctx)
687 {
688 SignatureContext *sic = ctx->priv;
689 StreamContext *sc;
690 void* tmp;
691 FineSignature* finsig;
692 CoarseSignature* cousig;
693 int i;
694
695
696 /* free the lists */
697 if (sic->streamcontexts != NULL) {
698 for (i = 0; i < sic->nb_inputs; i++) {
699 sc = &(sic->streamcontexts[i]);
700 finsig = sc->finesiglist;
701 cousig = sc->coarsesiglist;
702
703 while (finsig) {
704 tmp = finsig;
705 finsig = finsig->next;
706 av_freep(&tmp);
707 }
708 sc->finesiglist = NULL;
709
710 while (cousig) {
711 tmp = cousig;
712 cousig = cousig->next;
713 av_freep(&tmp);
714 }
715 sc->coarsesiglist = NULL;
716 }
717 av_freep(&sic->streamcontexts);
718 }
719 }
720
721 static int config_output(AVFilterLink *outlink)
722 {
723 AVFilterContext *ctx = outlink->src;
724 AVFilterLink *inlink = ctx->inputs[0];
725 FilterLink *il = ff_filter_link(inlink);
726 FilterLink *ol = ff_filter_link(outlink);
727
728 outlink->time_base = inlink->time_base;
729 ol->frame_rate = il->frame_rate;
730 outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
731 outlink->w = inlink->w;
732 outlink->h = inlink->h;
733
734 return 0;
735 }
736
737 static const AVFilterPad signature_outputs[] = {
738 {
739 .name = "default",
740 .type = AVMEDIA_TYPE_VIDEO,
741 .request_frame = request_frame,
742 .config_props = config_output,
743 },
744 };
745
746 const FFFilter ff_vf_signature = {
747 .p.name = "signature",
748 .p.description = NULL_IF_CONFIG_SMALL("Calculate the MPEG-7 video signature"),
749 .p.priv_class = &signature_class,
750 .p.inputs = NULL,
751 .p.flags = AVFILTER_FLAG_DYNAMIC_INPUTS,
752 .priv_size = sizeof(SignatureContext),
753 .init = init,
754 .uninit = uninit,
755 FILTER_OUTPUTS(signature_outputs),
756 FILTER_PIXFMTS_ARRAY(pix_fmts),
757 };
758