LCOV - code coverage report
Current view: top level - libavfilter - vf_signature.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 402 0.0 %
Date: 2017-12-15 18:13:28 Functions: 0 14 0.0 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2017 Gerion Entrup
       3             :  *
       4             :  * This file is part of FFmpeg.
       5             :  *
       6             :  * FFmpeg is free software; you can redistribute it and/or modify
       7             :  * it under the terms of the GNU General Public License as published by
       8             :  * the Free Software Foundation; either version 2 of the License, or
       9             :  * (at your option) any later version.
      10             :  *
      11             :  * FFmpeg is distributed in the hope that it will be useful,
      12             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      13             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14             :  * GNU General Public License for more details.
      15             :  *
      16             :  * You should have received a copy of the GNU General Public License along
      17             :  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
      18             :  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
      19             :  */
      20             : 
      21             : /**
      22             :  * @file
      23             :  * MPEG-7 video signature calculation and lookup filter
      24             :  * @see http://epubs.surrey.ac.uk/531590/1/MPEG-7%20Video%20Signature%20Author%27s%20Copy.pdf
      25             :  */
      26             : 
      27             : #include <float.h>
      28             : #include "libavcodec/put_bits.h"
      29             : #include "libavformat/avformat.h"
      30             : #include "libavutil/opt.h"
      31             : #include "libavutil/avstring.h"
      32             : #include "libavutil/intreadwrite.h"
      33             : #include "libavutil/timestamp.h"
      34             : #include "avfilter.h"
      35             : #include "internal.h"
      36             : #include "signature.h"
      37             : #include "signature_lookup.c"
      38             : 
      39             : #define OFFSET(x) offsetof(SignatureContext, x)
      40             : #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
      41             : #define BLOCK_LCM (int64_t) 476985600
      42             : 
      43             : static const AVOption signature_options[] = {
      44             :     { "detectmode", "set the detectmode",
      45             :         OFFSET(mode),         AV_OPT_TYPE_INT,    {.i64 = MODE_OFF}, 0, NB_LOOKUP_MODE-1, FLAGS, "mode" },
      46             :         { "off",  NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MODE_OFF},  0, 0, .flags = FLAGS, "mode" },
      47             :         { "full", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MODE_FULL}, 0, 0, .flags = FLAGS, "mode" },
      48             :         { "fast", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MODE_FAST}, 0, 0, .flags = FLAGS, "mode" },
      49             :     { "nb_inputs",  "number of inputs",
      50             :         OFFSET(nb_inputs),    AV_OPT_TYPE_INT,    {.i64 = 1},        1, INT_MAX,          FLAGS },
      51             :     { "filename",   "filename for output files",
      52             :         OFFSET(filename),     AV_OPT_TYPE_STRING, {.str = ""},       0, NB_FORMATS-1,     FLAGS },
      53             :     { "format",     "set output format",
      54             :         OFFSET(format),       AV_OPT_TYPE_INT,    {.i64 = FORMAT_BINARY}, 0, 1,           FLAGS , "format" },
      55             :         { "binary", 0, 0, AV_OPT_TYPE_CONST, {.i64=FORMAT_BINARY}, 0, 0, FLAGS, "format" },
      56             :         { "xml",    0, 0, AV_OPT_TYPE_CONST, {.i64=FORMAT_XML},    0, 0, FLAGS, "format" },
      57             :     { "th_d",       "threshold to detect one word as similar",
      58             :         OFFSET(thworddist),   AV_OPT_TYPE_INT,    {.i64 = 9000},     1, INT_MAX,          FLAGS },
      59             :     { "th_dc",      "threshold to detect all words as similar",
      60             :         OFFSET(thcomposdist), AV_OPT_TYPE_INT,    {.i64 = 60000},    1, INT_MAX,          FLAGS },
      61             :     { "th_xh",      "threshold to detect frames as similar",
      62             :         OFFSET(thl1),         AV_OPT_TYPE_INT,    {.i64 = 116},      1, INT_MAX,          FLAGS },
      63             :     { "th_di",      "minimum length of matching sequence in frames",
      64             :         OFFSET(thdi),         AV_OPT_TYPE_INT,    {.i64 = 0},        0, INT_MAX,          FLAGS },
      65             :     { "th_it",      "threshold for relation of good to all frames",
      66             :         OFFSET(thit),         AV_OPT_TYPE_DOUBLE, {.dbl = 0.5},    0.0, 1.0,              FLAGS },
      67             :     { NULL }
      68             : };
      69             : 
      70             : AVFILTER_DEFINE_CLASS(signature);
      71             : 
      72           0 : static int query_formats(AVFilterContext *ctx)
      73             : {
      74             :     /* all formats with a separate gray value */
      75             :     static const enum AVPixelFormat pix_fmts[] = {
      76             :         AV_PIX_FMT_GRAY8,
      77             :         AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
      78             :         AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
      79             :         AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P,
      80             :         AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUVJ420P,
      81             :         AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ444P,
      82             :         AV_PIX_FMT_YUVJ440P,
      83             :         AV_PIX_FMT_NV12, AV_PIX_FMT_NV21,
      84             :         AV_PIX_FMT_NONE
      85             :     };
      86             : 
      87           0 :     return ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
      88             : }
      89             : 
      90           0 : static int config_input(AVFilterLink *inlink)
      91             : {
      92           0 :     AVFilterContext *ctx = inlink->dst;
      93           0 :     SignatureContext *sic = ctx->priv;
      94           0 :     StreamContext *sc = &(sic->streamcontexts[FF_INLINK_IDX(inlink)]);
      95             : 
      96           0 :     sc->time_base = inlink->time_base;
      97             :     /* test for overflow */
      98           0 :     sc->divide = (((uint64_t) inlink->w/32) * (inlink->w/32 + 1) * (inlink->h/32 * inlink->h/32 + 1) > INT64_MAX / (BLOCK_LCM * 255));
      99           0 :     if (sc->divide) {
     100           0 :         av_log(ctx, AV_LOG_WARNING, "Input dimension too high for precise calculation, numbers will be rounded.\n");
     101             :     }
     102           0 :     sc->w = inlink->w;
     103           0 :     sc->h = inlink->h;
     104           0 :     return 0;
     105             : }
     106             : 
     107           0 : static int get_block_size(const Block *b)
     108             : {
     109           0 :     return (b->to.y - b->up.y + 1) * (b->to.x - b->up.x + 1);
     110             : }
     111             : 
     112           0 : static uint64_t get_block_sum(StreamContext *sc, uint64_t intpic[32][32], const Block *b)
     113             : {
     114           0 :     uint64_t sum = 0;
     115             : 
     116             :     int x0, y0, x1, y1;
     117             : 
     118           0 :     x0 = b->up.x;
     119           0 :     y0 = b->up.y;
     120           0 :     x1 = b->to.x;
     121           0 :     y1 = b->to.y;
     122             : 
     123           0 :     if (x0-1 >= 0 && y0-1 >= 0) {
     124           0 :         sum = intpic[y1][x1] + intpic[y0-1][x0-1] - intpic[y1][x0-1] - intpic[y0-1][x1];
     125           0 :     } else if (x0-1 >= 0) {
     126           0 :         sum = intpic[y1][x1] - intpic[y1][x0-1];
     127           0 :     } else if (y0-1 >= 0) {
     128           0 :         sum = intpic[y1][x1] - intpic[y0-1][x1];
     129             :     } else {
     130           0 :         sum = intpic[y1][x1];
     131             :     }
     132           0 :     return sum;
     133             : }
     134             : 
     135           0 : static int cmp(const uint64_t *a, const uint64_t *b)
     136             : {
     137           0 :     return *a < *b ? -1 : ( *a > *b ? 1 : 0 );
     138             : }
     139             : 
     140             : /**
     141             :  * sets the bit at position pos to 1 in data
     142             :  */
     143           0 : static void set_bit(uint8_t* data, size_t pos)
     144             : {
     145           0 :     uint8_t mask = 1 << 7-(pos%8);
     146           0 :     data[pos/8] |= mask;
     147           0 : }
     148             : 
     149           0 : static int filter_frame(AVFilterLink *inlink, AVFrame *picref)
     150             : {
     151           0 :     AVFilterContext *ctx = inlink->dst;
     152           0 :     SignatureContext *sic = ctx->priv;
     153           0 :     StreamContext *sc = &(sic->streamcontexts[FF_INLINK_IDX(inlink)]);
     154             :     FineSignature* fs;
     155             : 
     156             :     static const uint8_t pot3[5] = { 3*3*3*3, 3*3*3, 3*3, 3, 1 };
     157             :     /* indexes of words : 210,217,219,274,334  44,175,233,270,273  57,70,103,237,269  100,285,295,337,354  101,102,111,275,296
     158             :     s2usw = sorted to unsorted wordvec: 44 is at index 5, 57 at index 10...
     159             :     */
     160             :     static const unsigned int wordvec[25] = {44,57,70,100,101,102,103,111,175,210,217,219,233,237,269,270,273,274,275,285,295,296,334,337,354};
     161             :     static const uint8_t      s2usw[25]   = { 5,10,11, 15, 20, 21, 12, 22,  6,  0,  1,  2,  7, 13, 14,  8,  9,  3, 23, 16, 17, 24,  4, 18, 19};
     162             : 
     163           0 :     uint8_t wordt2b[5] = { 0, 0, 0, 0, 0 }; /* word ternary to binary */
     164             :     uint64_t intpic[32][32];
     165             :     uint64_t rowcount;
     166           0 :     uint8_t *p = picref->data[0];
     167             :     int inti, intj;
     168             :     int *intjlut;
     169             : 
     170             :     uint64_t conflist[DIFFELEM_SIZE];
     171           0 :     int f = 0, g = 0, w = 0;
     172           0 :     int32_t dh1 = 1, dh2 = 1, dw1 = 1, dw2 = 1, a, b;
     173             :     int64_t denom;
     174             :     int i, j, k, ternary;
     175             :     uint64_t blocksum;
     176             :     int blocksize;
     177             :     int64_t th; /* threshold */
     178             :     int64_t sum;
     179             : 
     180           0 :     int64_t precfactor = (sc->divide) ? 65536 : BLOCK_LCM;
     181             : 
     182             :     /* initialize fs */
     183           0 :     if (sc->curfinesig) {
     184           0 :         fs = av_mallocz(sizeof(FineSignature));
     185           0 :         if (!fs)
     186           0 :             return AVERROR(ENOMEM);
     187           0 :         sc->curfinesig->next = fs;
     188           0 :         fs->prev = sc->curfinesig;
     189           0 :         sc->curfinesig = fs;
     190             :     } else {
     191           0 :         fs = sc->curfinesig = sc->finesiglist;
     192           0 :         sc->curcoarsesig1->first = fs;
     193             :     }
     194             : 
     195           0 :     fs->pts = picref->pts;
     196           0 :     fs->index = sc->lastindex++;
     197             : 
     198           0 :     memset(intpic, 0, sizeof(uint64_t)*32*32);
     199           0 :     intjlut = av_malloc_array(inlink->w, sizeof(int));
     200           0 :     if (!intjlut)
     201           0 :         return AVERROR(ENOMEM);
     202           0 :     for (i = 0; i < inlink->w; i++) {
     203           0 :         intjlut[i] = (i*32)/inlink->w;
     204             :     }
     205             : 
     206           0 :     for (i = 0; i < inlink->h; i++) {
     207           0 :         inti = (i*32)/inlink->h;
     208           0 :         for (j = 0; j < inlink->w; j++) {
     209           0 :             intj = intjlut[j];
     210           0 :             intpic[inti][intj] += p[j];
     211             :         }
     212           0 :         p += picref->linesize[0];
     213             :     }
     214           0 :     av_freep(&intjlut);
     215             : 
     216             :     /* The following calculates a summed area table (intpic) and brings the numbers
     217             :      * in intpic to the same denominator.
     218             :      * So you only have to handle the numinator in the following sections.
     219             :      */
     220           0 :     dh1 = inlink->h / 32;
     221           0 :     if (inlink->h % 32)
     222           0 :         dh2 = dh1 + 1;
     223           0 :     dw1 = inlink->w / 32;
     224           0 :     if (inlink->w % 32)
     225           0 :         dw2 = dw1 + 1;
     226           0 :     denom = (sc->divide) ? dh1 * dh2 * dw1 * dw2 : 1;
     227             : 
     228           0 :     for (i = 0; i < 32; i++) {
     229           0 :         rowcount = 0;
     230           0 :         a = 1;
     231           0 :         if (dh2 > 1) {
     232           0 :             a = ((inlink->h*(i+1))%32 == 0) ? (inlink->h*(i+1))/32 - 1 : (inlink->h*(i+1))/32;
     233           0 :             a -= ((inlink->h*i)%32 == 0) ? (inlink->h*i)/32 - 1 : (inlink->h*i)/32;
     234           0 :             a = (a == dh1)? dh2 : dh1;
     235             :         }
     236           0 :         for (j = 0; j < 32; j++) {
     237           0 :             b = 1;
     238           0 :             if (dw2 > 1) {
     239           0 :                 b = ((inlink->w*(j+1))%32 == 0) ? (inlink->w*(j+1))/32 - 1 : (inlink->w*(j+1))/32;
     240           0 :                 b -= ((inlink->w*j)%32 == 0) ? (inlink->w*j)/32 - 1 : (inlink->w*j)/32;
     241           0 :                 b = (b == dw1)? dw2 : dw1;
     242             :             }
     243           0 :             rowcount += intpic[i][j] * a * b * precfactor / denom;
     244           0 :             if (i > 0) {
     245           0 :                 intpic[i][j] = intpic[i-1][j] + rowcount;
     246             :             } else {
     247           0 :                 intpic[i][j] = rowcount;
     248             :             }
     249             :         }
     250             :     }
     251             : 
     252           0 :     denom = (sc->divide) ? 1 : dh1 * dh2 * dw1 * dw2;
     253             : 
     254           0 :     for (i = 0; i < ELEMENT_COUNT; i++) {
     255           0 :         const ElemCat* elemcat = elements[i];
     256             :         int64_t* elemsignature;
     257             :         uint64_t* sortsignature;
     258             : 
     259           0 :         elemsignature = av_malloc_array(elemcat->elem_count, sizeof(int64_t));
     260           0 :         if (!elemsignature)
     261           0 :             return AVERROR(ENOMEM);
     262           0 :         sortsignature = av_malloc_array(elemcat->elem_count, sizeof(int64_t));
     263           0 :         if (!sortsignature) {
     264           0 :             av_freep(&elemsignature);
     265           0 :             return AVERROR(ENOMEM);
     266             :         }
     267             : 
     268           0 :         for (j = 0; j < elemcat->elem_count; j++) {
     269           0 :             blocksum = 0;
     270           0 :             blocksize = 0;
     271           0 :             for (k = 0; k < elemcat->left_count; k++) {
     272           0 :                 blocksum += get_block_sum(sc, intpic, &elemcat->blocks[j*elemcat->block_count+k]);
     273           0 :                 blocksize += get_block_size(&elemcat->blocks[j*elemcat->block_count+k]);
     274             :             }
     275           0 :             sum = blocksum / blocksize;
     276           0 :             if (elemcat->av_elem) {
     277           0 :                 sum -= 128 * precfactor * denom;
     278             :             } else {
     279           0 :                 blocksum = 0;
     280           0 :                 blocksize = 0;
     281           0 :                 for (; k < elemcat->block_count; k++) {
     282           0 :                     blocksum += get_block_sum(sc, intpic, &elemcat->blocks[j*elemcat->block_count+k]);
     283           0 :                     blocksize += get_block_size(&elemcat->blocks[j*elemcat->block_count+k]);
     284             :                 }
     285           0 :                 sum -= blocksum / blocksize;
     286           0 :                 conflist[g++] = FFABS(sum * 8 / (precfactor * denom));
     287             :             }
     288             : 
     289           0 :             elemsignature[j] = sum;
     290           0 :             sortsignature[j] = FFABS(sum);
     291             :         }
     292             : 
     293             :         /* get threshold */
     294           0 :         qsort(sortsignature, elemcat->elem_count, sizeof(uint64_t), (void*) cmp);
     295           0 :         th = sortsignature[(int) (elemcat->elem_count*0.333)];
     296             : 
     297             :         /* ternarize */
     298           0 :         for (j = 0; j < elemcat->elem_count; j++) {
     299           0 :             if (elemsignature[j] < -th) {
     300           0 :                 ternary = 0;
     301           0 :             } else if (elemsignature[j] <= th) {
     302           0 :                 ternary = 1;
     303             :             } else {
     304           0 :                 ternary = 2;
     305             :             }
     306           0 :             fs->framesig[f/5] += ternary * pot3[f%5];
     307             : 
     308           0 :             if (f == wordvec[w]) {
     309           0 :                 fs->words[s2usw[w]/5] += ternary * pot3[wordt2b[s2usw[w]/5]++];
     310           0 :                 if (w < 24)
     311           0 :                     w++;
     312             :             }
     313           0 :             f++;
     314             :         }
     315           0 :         av_freep(&elemsignature);
     316           0 :         av_freep(&sortsignature);
     317             :     }
     318             : 
     319             :     /* confidence */
     320           0 :     qsort(conflist, DIFFELEM_SIZE, sizeof(uint64_t), (void*) cmp);
     321           0 :     fs->confidence = FFMIN(conflist[DIFFELEM_SIZE/2], 255);
     322             : 
     323             :     /* coarsesignature */
     324           0 :     if (sc->coarsecount == 0) {
     325           0 :         if (sc->curcoarsesig2) {
     326           0 :             sc->curcoarsesig1 = av_mallocz(sizeof(CoarseSignature));
     327           0 :             if (!sc->curcoarsesig1)
     328           0 :                 return AVERROR(ENOMEM);
     329           0 :             sc->curcoarsesig1->first = fs;
     330           0 :             sc->curcoarsesig2->next = sc->curcoarsesig1;
     331           0 :             sc->coarseend = sc->curcoarsesig1;
     332             :         }
     333             :     }
     334           0 :     if (sc->coarsecount == 45) {
     335           0 :         sc->midcoarse = 1;
     336           0 :         sc->curcoarsesig2 = av_mallocz(sizeof(CoarseSignature));
     337           0 :         if (!sc->curcoarsesig2)
     338           0 :             return AVERROR(ENOMEM);
     339           0 :         sc->curcoarsesig2->first = fs;
     340           0 :         sc->curcoarsesig1->next = sc->curcoarsesig2;
     341           0 :         sc->coarseend = sc->curcoarsesig2;
     342             :     }
     343           0 :     for (i = 0; i < 5; i++) {
     344           0 :         set_bit(sc->curcoarsesig1->data[i], fs->words[i]);
     345             :     }
     346             :     /* assuming the actual frame is the last */
     347           0 :     sc->curcoarsesig1->last = fs;
     348           0 :     if (sc->midcoarse) {
     349           0 :         for (i = 0; i < 5; i++) {
     350           0 :             set_bit(sc->curcoarsesig2->data[i], fs->words[i]);
     351             :         }
     352           0 :         sc->curcoarsesig2->last = fs;
     353             :     }
     354             : 
     355           0 :     sc->coarsecount = (sc->coarsecount+1)%90;
     356             : 
     357             :     /* debug printing finesignature */
     358           0 :     if (av_log_get_level() == AV_LOG_DEBUG) {
     359           0 :         av_log(ctx, AV_LOG_DEBUG, "input %d, confidence: %d\n", FF_INLINK_IDX(inlink), fs->confidence);
     360             : 
     361           0 :         av_log(ctx, AV_LOG_DEBUG, "words:");
     362           0 :         for (i = 0; i < 5; i++) {
     363           0 :             av_log(ctx, AV_LOG_DEBUG, " %d:", fs->words[i] );
     364           0 :             av_log(ctx, AV_LOG_DEBUG, " %d", fs->words[i] / pot3[0] );
     365           0 :             for (j = 1; j < 5; j++)
     366           0 :                 av_log(ctx, AV_LOG_DEBUG, ",%d", fs->words[i] % pot3[j-1] / pot3[j] );
     367           0 :             av_log(ctx, AV_LOG_DEBUG, ";");
     368             :         }
     369           0 :         av_log(ctx, AV_LOG_DEBUG, "\n");
     370             : 
     371           0 :         av_log(ctx, AV_LOG_DEBUG, "framesignature:");
     372           0 :         for (i = 0; i < SIGELEM_SIZE/5; i++) {
     373           0 :             av_log(ctx, AV_LOG_DEBUG, " %d", fs->framesig[i] / pot3[0] );
     374           0 :             for (j = 1; j < 5; j++)
     375           0 :                 av_log(ctx, AV_LOG_DEBUG, ",%d", fs->framesig[i] % pot3[j-1] / pot3[j] );
     376             :         }
     377           0 :         av_log(ctx, AV_LOG_DEBUG, "\n");
     378             :     }
     379             : 
     380           0 :     if (FF_INLINK_IDX(inlink) == 0)
     381           0 :         return ff_filter_frame(inlink->dst->outputs[0], picref);
     382           0 :     return 1;
     383             : }
     384             : 
     385           0 : static int xml_export(AVFilterContext *ctx, StreamContext *sc, const char* filename)
     386             : {
     387             :     FineSignature* fs;
     388             :     CoarseSignature* cs;
     389             :     int i, j;
     390             :     FILE* f;
     391           0 :     unsigned int pot3[5] = { 3*3*3*3, 3*3*3, 3*3, 3, 1 };
     392             : 
     393           0 :     f = fopen(filename, "w");
     394           0 :     if (!f) {
     395           0 :         int err = AVERROR(EINVAL);
     396             :         char buf[128];
     397           0 :         av_strerror(err, buf, sizeof(buf));
     398           0 :         av_log(ctx, AV_LOG_ERROR, "cannot open xml file %s: %s\n", filename, buf);
     399           0 :         return err;
     400             :     }
     401             : 
     402             :     /* header */
     403           0 :     fprintf(f, "<?xml version='1.0' encoding='ASCII' ?>\n");
     404           0 :     fprintf(f, "<Mpeg7 xmlns=\"urn:mpeg:mpeg7:schema:2001\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"urn:mpeg:mpeg7:schema:2001 schema/Mpeg7-2001.xsd\">\n");
     405           0 :     fprintf(f, "  <DescriptionUnit xsi:type=\"DescriptorCollectionType\">\n");
     406           0 :     fprintf(f, "    <Descriptor xsi:type=\"VideoSignatureType\">\n");
     407           0 :     fprintf(f, "      <VideoSignatureRegion>\n");
     408           0 :     fprintf(f, "        <VideoSignatureSpatialRegion>\n");
     409           0 :     fprintf(f, "          <Pixel>0 0 </Pixel>\n");
     410           0 :     fprintf(f, "          <Pixel>%d %d </Pixel>\n", sc->w - 1, sc->h - 1);
     411           0 :     fprintf(f, "        </VideoSignatureSpatialRegion>\n");
     412           0 :     fprintf(f, "        <StartFrameOfSpatialRegion>0</StartFrameOfSpatialRegion>\n");
     413             :     /* hoping num is 1, other values are vague */
     414           0 :     fprintf(f, "        <MediaTimeUnit>%d</MediaTimeUnit>\n", sc->time_base.den / sc->time_base.num);
     415           0 :     fprintf(f, "        <MediaTimeOfSpatialRegion>\n");
     416           0 :     fprintf(f, "          <StartMediaTimeOfSpatialRegion>0</StartMediaTimeOfSpatialRegion>\n");
     417           0 :     fprintf(f, "          <EndMediaTimeOfSpatialRegion>%" PRIu64 "</EndMediaTimeOfSpatialRegion>\n", sc->coarseend->last->pts);
     418           0 :     fprintf(f, "        </MediaTimeOfSpatialRegion>\n");
     419             : 
     420             :     /* coarsesignatures */
     421           0 :     for (cs = sc->coarsesiglist; cs; cs = cs->next) {
     422           0 :         fprintf(f, "        <VSVideoSegment>\n");
     423           0 :         fprintf(f, "          <StartFrameOfSegment>%" PRIu32 "</StartFrameOfSegment>\n", cs->first->index);
     424           0 :         fprintf(f, "          <EndFrameOfSegment>%" PRIu32 "</EndFrameOfSegment>\n", cs->last->index);
     425           0 :         fprintf(f, "          <MediaTimeOfSegment>\n");
     426           0 :         fprintf(f, "            <StartMediaTimeOfSegment>%" PRIu64 "</StartMediaTimeOfSegment>\n", cs->first->pts);
     427           0 :         fprintf(f, "            <EndMediaTimeOfSegment>%" PRIu64 "</EndMediaTimeOfSegment>\n", cs->last->pts);
     428           0 :         fprintf(f, "          </MediaTimeOfSegment>\n");
     429           0 :         for (i = 0; i < 5; i++) {
     430           0 :             fprintf(f, "          <BagOfWords>");
     431           0 :             for (j = 0; j < 31; j++) {
     432           0 :                 uint8_t n = cs->data[i][j];
     433           0 :                 if (j < 30) {
     434           0 :                     fprintf(f, "%d  %d  %d  %d  %d  %d  %d  %d  ", (n & 0x80) >> 7,
     435           0 :                                                                    (n & 0x40) >> 6,
     436           0 :                                                                    (n & 0x20) >> 5,
     437           0 :                                                                    (n & 0x10) >> 4,
     438           0 :                                                                    (n & 0x08) >> 3,
     439           0 :                                                                    (n & 0x04) >> 2,
     440           0 :                                                                    (n & 0x02) >> 1,
     441             :                                                                    (n & 0x01));
     442             :                 } else {
     443             :                     /* print only 3 bit in last byte */
     444           0 :                     fprintf(f, "%d  %d  %d ", (n & 0x80) >> 7,
     445           0 :                                               (n & 0x40) >> 6,
     446           0 :                                               (n & 0x20) >> 5);
     447             :                 }
     448             :             }
     449           0 :             fprintf(f, "</BagOfWords>\n");
     450             :         }
     451           0 :         fprintf(f, "        </VSVideoSegment>\n");
     452             :     }
     453             : 
     454             :     /* finesignatures */
     455           0 :     for (fs = sc->finesiglist; fs; fs = fs->next) {
     456           0 :         fprintf(f, "        <VideoFrame>\n");
     457           0 :         fprintf(f, "          <MediaTimeOfFrame>%" PRIu64 "</MediaTimeOfFrame>\n", fs->pts);
     458             :         /* confidence */
     459           0 :         fprintf(f, "          <FrameConfidence>%d</FrameConfidence>\n", fs->confidence);
     460             :         /* words */
     461           0 :         fprintf(f, "          <Word>");
     462           0 :         for (i = 0; i < 5; i++) {
     463           0 :             fprintf(f, "%d ", fs->words[i]);
     464           0 :             if (i < 4) {
     465           0 :                 fprintf(f, " ");
     466             :             }
     467             :         }
     468           0 :         fprintf(f, "</Word>\n");
     469             :         /* framesignature */
     470           0 :         fprintf(f, "          <FrameSignature>");
     471           0 :         for (i = 0; i< SIGELEM_SIZE/5; i++) {
     472           0 :             if (i > 0) {
     473           0 :                 fprintf(f, " ");
     474             :             }
     475           0 :             fprintf(f, "%d ", fs->framesig[i] / pot3[0]);
     476           0 :             for (j = 1; j < 5; j++)
     477           0 :                 fprintf(f, " %d ", fs->framesig[i] % pot3[j-1] / pot3[j] );
     478             :         }
     479           0 :         fprintf(f, "</FrameSignature>\n");
     480           0 :         fprintf(f, "        </VideoFrame>\n");
     481             :     }
     482           0 :     fprintf(f, "      </VideoSignatureRegion>\n");
     483           0 :     fprintf(f, "    </Descriptor>\n");
     484           0 :     fprintf(f, "  </DescriptionUnit>\n");
     485           0 :     fprintf(f, "</Mpeg7>\n");
     486             : 
     487           0 :     fclose(f);
     488           0 :     return 0;
     489             : }
     490             : 
     491           0 : static int binary_export(AVFilterContext *ctx, StreamContext *sc, const char* filename)
     492             : {
     493             :     FILE* f;
     494             :     FineSignature* fs;
     495             :     CoarseSignature* cs;
     496           0 :     uint32_t numofsegments = (sc->lastindex + 44)/45;
     497             :     int i, j;
     498             :     PutBitContext buf;
     499             :     /* buffer + header + coarsesignatures + finesignature */
     500           0 :     int len = (512 + 6 * 32 + 3*16 + 2 +
     501           0 :         numofsegments * (4*32 + 1 + 5*243) +
     502           0 :         sc->lastindex * (2 + 32 + 6*8 + 608)) / 8;
     503           0 :     uint8_t* buffer = av_malloc_array(len, sizeof(uint8_t));
     504           0 :     if (!buffer)
     505           0 :         return AVERROR(ENOMEM);
     506             : 
     507           0 :     f = fopen(filename, "wb");
     508           0 :     if (!f) {
     509           0 :         int err = AVERROR(EINVAL);
     510             :         char buf[128];
     511           0 :         av_strerror(err, buf, sizeof(buf));
     512           0 :         av_log(ctx, AV_LOG_ERROR, "cannot open file %s: %s\n", filename, buf);
     513           0 :         av_freep(&buffer);
     514           0 :         return err;
     515             :     }
     516           0 :     init_put_bits(&buf, buffer, len);
     517             : 
     518           0 :     put_bits32(&buf, 1); /* NumOfSpatial Regions, only 1 supported */
     519           0 :     put_bits(&buf, 1, 1); /* SpatialLocationFlag, always the whole image */
     520           0 :     put_bits32(&buf, 0); /* PixelX,1 PixelY,1, 0,0 */
     521           0 :     put_bits(&buf, 16, sc->w-1 & 0xFFFF); /* PixelX,2 */
     522           0 :     put_bits(&buf, 16, sc->h-1 & 0xFFFF); /* PixelY,2 */
     523           0 :     put_bits32(&buf, 0); /* StartFrameOfSpatialRegion */
     524           0 :     put_bits32(&buf, sc->lastindex); /* NumOfFrames */
     525             :     /* hoping num is 1, other values are vague */
     526             :     /* den/num might be greater than 16 bit, so cutting it */
     527           0 :     put_bits(&buf, 16, 0xFFFF & (sc->time_base.den / sc->time_base.num)); /* MediaTimeUnit */
     528           0 :     put_bits(&buf, 1, 1); /* MediaTimeFlagOfSpatialRegion */
     529           0 :     put_bits32(&buf, 0); /* StartMediaTimeOfSpatialRegion */
     530           0 :     put_bits32(&buf, 0xFFFFFFFF & sc->coarseend->last->pts); /* EndMediaTimeOfSpatialRegion */
     531           0 :     put_bits32(&buf, numofsegments); /* NumOfSegments */
     532             :     /* coarsesignatures */
     533           0 :     for (cs = sc->coarsesiglist; cs; cs = cs->next) {
     534           0 :         put_bits32(&buf, cs->first->index); /* StartFrameOfSegment */
     535           0 :         put_bits32(&buf, cs->last->index); /* EndFrameOfSegment */
     536           0 :         put_bits(&buf, 1, 1); /* MediaTimeFlagOfSegment */
     537           0 :         put_bits32(&buf, 0xFFFFFFFF & cs->first->pts); /* StartMediaTimeOfSegment */
     538           0 :         put_bits32(&buf, 0xFFFFFFFF & cs->last->pts); /* EndMediaTimeOfSegment */
     539           0 :         for (i = 0; i < 5; i++) {
     540             :             /* put 243 bits ( = 7 * 32 + 19 = 8 * 28 + 19) into buffer */
     541           0 :             for (j = 0; j < 30; j++) {
     542           0 :                 put_bits(&buf, 8, cs->data[i][j]);
     543             :             }
     544           0 :             put_bits(&buf, 3, cs->data[i][30] >> 5);
     545             :         }
     546             :     }
     547             :     /* finesignatures */
     548           0 :     put_bits(&buf, 1, 0); /* CompressionFlag, only 0 supported */
     549           0 :     for (fs = sc->finesiglist; fs; fs = fs->next) {
     550           0 :         put_bits(&buf, 1, 1); /* MediaTimeFlagOfFrame */
     551           0 :         put_bits32(&buf, 0xFFFFFFFF & fs->pts); /* MediaTimeOfFrame */
     552           0 :         put_bits(&buf, 8, fs->confidence); /* FrameConfidence */
     553           0 :         for (i = 0; i < 5; i++) {
     554           0 :             put_bits(&buf, 8, fs->words[i]); /* Words */
     555             :         }
     556             :         /* framesignature */
     557           0 :         for (i = 0; i < SIGELEM_SIZE/5; i++) {
     558           0 :             put_bits(&buf, 8, fs->framesig[i]);
     559             :         }
     560             :     }
     561             : 
     562           0 :     avpriv_align_put_bits(&buf);
     563           0 :     flush_put_bits(&buf);
     564           0 :     fwrite(buffer, 1, put_bits_count(&buf)/8, f);
     565           0 :     fclose(f);
     566           0 :     av_freep(&buffer);
     567           0 :     return 0;
     568             : }
     569             : 
     570           0 : static int export(AVFilterContext *ctx, StreamContext *sc, int input)
     571             : {
     572           0 :     SignatureContext* sic = ctx->priv;
     573             :     char filename[1024];
     574             : 
     575           0 :     if (sic->nb_inputs > 1) {
     576             :         /* error already handled */
     577           0 :         av_assert0(av_get_frame_filename(filename, sizeof(filename), sic->filename, input) == 0);
     578             :     } else {
     579           0 :         strcpy(filename, sic->filename);
     580             :     }
     581           0 :     if (sic->format == FORMAT_XML) {
     582           0 :         return xml_export(ctx, sc, filename);
     583             :     } else {
     584           0 :         return binary_export(ctx, sc, filename);
     585             :     }
     586             : }
     587             : 
     588           0 : static int request_frame(AVFilterLink *outlink)
     589             : {
     590           0 :     AVFilterContext *ctx = outlink->src;
     591           0 :     SignatureContext *sic = ctx->priv;
     592             :     StreamContext *sc, *sc2;
     593             :     MatchingInfo match;
     594             :     int i, j, ret;
     595           0 :     int lookup = 1; /* indicates wheather EOF of all files is reached */
     596             : 
     597             :     /* process all inputs */
     598           0 :     for (i = 0; i < sic->nb_inputs; i++){
     599           0 :         sc = &(sic->streamcontexts[i]);
     600             : 
     601           0 :         ret = ff_request_frame(ctx->inputs[i]);
     602             : 
     603             :         /* return if unexpected error occurs in input stream */
     604           0 :         if (ret < 0 && ret != AVERROR_EOF)
     605           0 :             return ret;
     606             : 
     607             :         /* export signature at EOF */
     608           0 :         if (ret == AVERROR_EOF && !sc->exported) {
     609             :             /* export if wanted */
     610           0 :             if (strlen(sic->filename) > 0) {
     611           0 :                 if (export(ctx, sc, i) < 0)
     612           0 :                     return ret;
     613             :             }
     614           0 :             sc->exported = 1;
     615             :         }
     616           0 :         lookup &= sc->exported;
     617             :     }
     618             : 
     619             :     /* signature lookup */
     620           0 :     if (lookup && sic->mode != MODE_OFF) {
     621             :         /* iterate over every pair */
     622           0 :         for (i = 0; i < sic->nb_inputs; i++) {
     623           0 :             sc = &(sic->streamcontexts[i]);
     624           0 :             for (j = i+1; j < sic->nb_inputs; j++) {
     625           0 :                 sc2 = &(sic->streamcontexts[j]);
     626           0 :                 match = lookup_signatures(ctx, sic, sc, sc2, sic->mode);
     627           0 :                 if (match.score != 0) {
     628           0 :                     av_log(ctx, AV_LOG_INFO, "matching of video %d at %f and %d at %f, %d frames matching\n",
     629           0 :                             i, ((double) match.first->pts * sc->time_base.num) / sc->time_base.den,
     630           0 :                             j, ((double) match.second->pts * sc2->time_base.num) / sc2->time_base.den,
     631             :                             match.matchframes);
     632           0 :                     if (match.whole)
     633           0 :                         av_log(ctx, AV_LOG_INFO, "whole video matching\n");
     634             :                 } else {
     635           0 :                     av_log(ctx, AV_LOG_INFO, "no matching of video %d and %d\n", i, j);
     636             :                 }
     637             :             }
     638             :         }
     639             :     }
     640             : 
     641           0 :     return ret;
     642             : }
     643             : 
     644           0 : static av_cold int init(AVFilterContext *ctx)
     645             : {
     646             : 
     647           0 :     SignatureContext *sic = ctx->priv;
     648             :     StreamContext *sc;
     649             :     int i, ret;
     650             :     char tmp[1024];
     651             : 
     652           0 :     sic->streamcontexts = av_mallocz(sic->nb_inputs * sizeof(StreamContext));
     653           0 :     if (!sic->streamcontexts)
     654           0 :         return AVERROR(ENOMEM);
     655             : 
     656           0 :     for (i = 0; i < sic->nb_inputs; i++) {
     657           0 :         AVFilterPad pad = {
     658             :             .type = AVMEDIA_TYPE_VIDEO,
     659           0 :             .name = av_asprintf("in%d", i),
     660             :             .config_props = config_input,
     661             :             .filter_frame = filter_frame,
     662             :         };
     663             : 
     664           0 :         if (!pad.name)
     665           0 :             return AVERROR(ENOMEM);
     666             : 
     667           0 :         sc = &(sic->streamcontexts[i]);
     668             : 
     669           0 :         sc->lastindex = 0;
     670           0 :         sc->finesiglist = av_mallocz(sizeof(FineSignature));
     671           0 :         if (!sc->finesiglist)
     672           0 :             return AVERROR(ENOMEM);
     673           0 :         sc->curfinesig = NULL;
     674             : 
     675           0 :         sc->coarsesiglist = av_mallocz(sizeof(CoarseSignature));
     676           0 :         if (!sc->coarsesiglist)
     677           0 :             return AVERROR(ENOMEM);
     678           0 :         sc->curcoarsesig1 = sc->coarsesiglist;
     679           0 :         sc->coarseend = sc->coarsesiglist;
     680           0 :         sc->coarsecount = 0;
     681           0 :         sc->midcoarse = 0;
     682             : 
     683           0 :         if ((ret = ff_insert_inpad(ctx, i, &pad)) < 0) {
     684           0 :             av_freep(&pad.name);
     685           0 :             return ret;
     686             :         }
     687             :     }
     688             : 
     689             :     /* check filename */
     690           0 :     if (sic->nb_inputs > 1 && strlen(sic->filename) > 0 && av_get_frame_filename(tmp, sizeof(tmp), sic->filename, 0) == -1) {
     691           0 :         av_log(ctx, AV_LOG_ERROR, "The filename must contain %%d or %%0nd, if you have more than one input.\n");
     692           0 :         return AVERROR(EINVAL);
     693             :     }
     694             : 
     695           0 :     return 0;
     696             : }
     697             : 
     698             : 
     699             : 
     700           0 : static av_cold void uninit(AVFilterContext *ctx)
     701             : {
     702           0 :     SignatureContext *sic = ctx->priv;
     703             :     StreamContext *sc;
     704             :     void* tmp;
     705             :     FineSignature* finsig;
     706             :     CoarseSignature* cousig;
     707             :     int i;
     708             : 
     709             : 
     710             :     /* free the lists */
     711           0 :     if (sic->streamcontexts != NULL) {
     712           0 :         for (i = 0; i < sic->nb_inputs; i++) {
     713           0 :             sc = &(sic->streamcontexts[i]);
     714           0 :             finsig = sc->finesiglist;
     715           0 :             cousig = sc->coarsesiglist;
     716             : 
     717           0 :             while (finsig) {
     718           0 :                 tmp = finsig;
     719           0 :                 finsig = finsig->next;
     720           0 :                 av_freep(&tmp);
     721             :             }
     722           0 :             sc->finesiglist = NULL;
     723             : 
     724           0 :             while (cousig) {
     725           0 :                 tmp = cousig;
     726           0 :                 cousig = cousig->next;
     727           0 :                 av_freep(&tmp);
     728             :             }
     729           0 :             sc->coarsesiglist = NULL;
     730             :         }
     731           0 :         av_freep(&sic->streamcontexts);
     732             :     }
     733           0 : }
     734             : 
     735           0 : static int config_output(AVFilterLink *outlink)
     736             : {
     737           0 :     AVFilterContext *ctx = outlink->src;
     738           0 :     AVFilterLink *inlink = ctx->inputs[0];
     739             : 
     740           0 :     outlink->time_base = inlink->time_base;
     741           0 :     outlink->frame_rate = inlink->frame_rate;
     742           0 :     outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
     743           0 :     outlink->w = inlink->w;
     744           0 :     outlink->h = inlink->h;
     745             : 
     746           0 :     return 0;
     747             : }
     748             : 
     749             : static const AVFilterPad signature_outputs[] = {
     750             :     {
     751             :         .name          = "default",
     752             :         .type          = AVMEDIA_TYPE_VIDEO,
     753             :         .request_frame = request_frame,
     754             :         .config_props  = config_output,
     755             :     },
     756             :     { NULL }
     757             : };
     758             : 
     759             : AVFilter ff_vf_signature = {
     760             :     .name          = "signature",
     761             :     .description   = NULL_IF_CONFIG_SMALL("Calculate the MPEG-7 video signature"),
     762             :     .priv_size     = sizeof(SignatureContext),
     763             :     .priv_class    = &signature_class,
     764             :     .init          = init,
     765             :     .uninit        = uninit,
     766             :     .query_formats = query_formats,
     767             :     .outputs       = signature_outputs,
     768             :     .inputs        = NULL,
     769             :     .flags         = AVFILTER_FLAG_DYNAMIC_INPUTS,
     770             : };

Generated by: LCOV version 1.13