LCOV - code coverage report
Current view: top level - libavformat - tedcaptionsdec.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 9 202 4.5 %
Date: 2017-12-17 16:07:53 Functions: 1 14 7.1 %

          Line data    Source code
       1             : /*
       2             :  * TED Talks captions format decoder
       3             :  * Copyright (c) 2012 Nicolas George
       4             :  *
       5             :  * This file is part of FFmpeg.
       6             :  *
       7             :  * FFmpeg is free software; you can redistribute it and/or
       8             :  * modify it under the terms of the GNU Lesser General Public
       9             :  * License as published by the Free Software Foundation; either
      10             :  * version 2.1 of the License, or (at your option) any later version.
      11             :  *
      12             :  * FFmpeg is distributed in the hope that it will be useful,
      13             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      14             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      15             :  * Lesser General Public License for more details.
      16             :  *
      17             :  * You should have received a copy of the GNU Lesser General Public
      18             :  * License along with FFmpeg; if not, write to the Free Software
      19             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      20             :  */
      21             : 
      22             : #include "libavutil/bprint.h"
      23             : #include "libavutil/log.h"
      24             : #include "libavutil/opt.h"
      25             : #include "avformat.h"
      26             : #include "internal.h"
      27             : #include "subtitles.h"
      28             : 
      29             : typedef struct {
      30             :     AVClass *class;
      31             :     int64_t start_time;
      32             :     FFDemuxSubtitlesQueue subs;
      33             : } TEDCaptionsDemuxer;
      34             : 
      35             : static const AVOption tedcaptions_options[] = {
      36             :     { "start_time", "set the start time (offset) of the subtitles, in ms",
      37             :       offsetof(TEDCaptionsDemuxer, start_time), AV_OPT_TYPE_INT64,
      38             :       { .i64 = 15000 }, INT64_MIN, INT64_MAX,
      39             :       AV_OPT_FLAG_SUBTITLE_PARAM | AV_OPT_FLAG_DECODING_PARAM },
      40             :     { NULL },
      41             : };
      42             : 
      43             : static const AVClass tedcaptions_demuxer_class = {
      44             :     .class_name = "tedcaptions_demuxer",
      45             :     .item_name  = av_default_item_name,
      46             :     .option     = tedcaptions_options,
      47             :     .version    = LIBAVUTIL_VERSION_INT,
      48             : };
      49             : 
      50             : #define BETWEEN(a, amin, amax) ((unsigned)((a) - (amin)) <= (amax) - (amin))
      51             : 
      52             : #define HEX_DIGIT_TEST(c) (BETWEEN(c, '0', '9') || BETWEEN((c) | 32, 'a', 'z'))
      53             : #define HEX_DIGIT_VAL(c) ((c) <= '9' ? (c) - '0' : ((c) | 32) - 'a' + 10)
      54             : #define ERR_CODE(c) ((c) < 0 ? (c) : AVERROR_INVALIDDATA)
      55             : 
      56           0 : static void av_bprint_utf8(AVBPrint *bp, unsigned c)
      57             : {
      58             :     int bytes, i;
      59             : 
      60           0 :     if (c <= 0x7F) {
      61           0 :         av_bprint_chars(bp, c, 1);
      62           0 :         return;
      63             :     }
      64           0 :     bytes = (av_log2(c) - 2) / 5;
      65           0 :     av_bprint_chars(bp, (c >> (bytes * 6)) | ((0xFF80 >> bytes) & 0xFF), 1);
      66           0 :     for (i = bytes - 1; i >= 0; i--)
      67           0 :         av_bprint_chars(bp, ((c >> (i * 6)) & 0x3F) | 0x80, 1);
      68             : }
      69             : 
      70           0 : static void next_byte(AVIOContext *pb, int *cur_byte)
      71             : {
      72             :     uint8_t b;
      73           0 :     int ret = avio_read(pb, &b, 1);
      74           0 :     *cur_byte = ret > 0 ? b : ret == 0 ? AVERROR_EOF : ret;
      75           0 : }
      76             : 
      77           0 : static void skip_spaces(AVIOContext *pb, int *cur_byte)
      78             : {
      79           0 :     while (*cur_byte == ' '  || *cur_byte == '\t' ||
      80           0 :            *cur_byte == '\n' || *cur_byte == '\r')
      81           0 :         next_byte(pb, cur_byte);
      82           0 : }
      83             : 
      84           0 : static int expect_byte(AVIOContext *pb, int *cur_byte, uint8_t c)
      85             : {
      86           0 :     skip_spaces(pb, cur_byte);
      87           0 :     if (*cur_byte != c)
      88           0 :         return ERR_CODE(*cur_byte);
      89           0 :     next_byte(pb, cur_byte);
      90           0 :     return 0;
      91             : }
      92             : 
      93           0 : static int parse_string(AVIOContext *pb, int *cur_byte, AVBPrint *bp, int full)
      94             : {
      95             :     int ret;
      96             : 
      97           0 :     av_bprint_init(bp, 0, full ? -1 : 1);
      98           0 :     ret = expect_byte(pb, cur_byte, '"');
      99           0 :     if (ret < 0)
     100           0 :         goto fail;
     101           0 :     while (*cur_byte > 0 && *cur_byte != '"') {
     102           0 :         if (*cur_byte == '\\') {
     103           0 :             next_byte(pb, cur_byte);
     104           0 :             if (*cur_byte < 0) {
     105           0 :                 ret = AVERROR_INVALIDDATA;
     106           0 :                 goto fail;
     107             :             }
     108           0 :             if ((*cur_byte | 32) == 'u') {
     109           0 :                 unsigned chr = 0, i;
     110           0 :                 for (i = 0; i < 4; i++) {
     111           0 :                     next_byte(pb, cur_byte);
     112           0 :                     if (!HEX_DIGIT_TEST(*cur_byte)) {
     113           0 :                         ret = ERR_CODE(*cur_byte);
     114           0 :                         goto fail;
     115             :                     }
     116           0 :                     chr = chr * 16 + HEX_DIGIT_VAL(*cur_byte);
     117             :                 }
     118           0 :                 av_bprint_utf8(bp, chr);
     119             :             } else {
     120           0 :                 av_bprint_chars(bp, *cur_byte, 1);
     121             :             }
     122             :         } else {
     123           0 :             av_bprint_chars(bp, *cur_byte, 1);
     124             :         }
     125           0 :         next_byte(pb, cur_byte);
     126             :     }
     127           0 :     ret = expect_byte(pb, cur_byte, '"');
     128           0 :     if (ret < 0)
     129           0 :         goto fail;
     130           0 :     if (full && !av_bprint_is_complete(bp)) {
     131           0 :         ret = AVERROR(ENOMEM);
     132           0 :         goto fail;
     133             :     }
     134           0 :     return 0;
     135             : 
     136           0 : fail:
     137           0 :     av_bprint_finalize(bp, NULL);
     138           0 :     return ret;
     139             : }
     140             : 
     141           0 : static int parse_label(AVIOContext *pb, int *cur_byte, AVBPrint *bp)
     142             : {
     143             :     int ret;
     144             : 
     145           0 :     ret = parse_string(pb, cur_byte, bp, 0);
     146           0 :     if (ret < 0)
     147           0 :         return ret;
     148           0 :     ret = expect_byte(pb, cur_byte, ':');
     149           0 :     if (ret < 0)
     150           0 :         return ret;
     151           0 :     return 0;
     152             : }
     153             : 
     154           0 : static int parse_boolean(AVIOContext *pb, int *cur_byte, int *result)
     155             : {
     156             :     static const char * const text[] = { "false", "true" };
     157             :     const char *p;
     158             :     int i;
     159             : 
     160           0 :     skip_spaces(pb, cur_byte);
     161           0 :     for (i = 0; i < 2; i++) {
     162           0 :         p = text[i];
     163           0 :         if (*cur_byte != *p)
     164           0 :             continue;
     165           0 :         for (; *p; p++, next_byte(pb, cur_byte))
     166           0 :             if (*cur_byte != *p)
     167           0 :                 return AVERROR_INVALIDDATA;
     168           0 :         if (BETWEEN(*cur_byte | 32, 'a', 'z'))
     169           0 :             return AVERROR_INVALIDDATA;
     170           0 :         *result = i;
     171           0 :         return 0;
     172             :     }
     173           0 :     return AVERROR_INVALIDDATA;
     174             : }
     175             : 
     176           0 : static int parse_int(AVIOContext *pb, int *cur_byte, int64_t *result)
     177             : {
     178           0 :     int64_t val = 0;
     179             : 
     180           0 :     skip_spaces(pb, cur_byte);
     181           0 :     if ((unsigned)*cur_byte - '0' > 9)
     182           0 :         return AVERROR_INVALIDDATA;
     183           0 :     while (BETWEEN(*cur_byte, '0', '9')) {
     184           0 :         val = val * 10 + (*cur_byte - '0');
     185           0 :         next_byte(pb, cur_byte);
     186             :     }
     187           0 :     *result = val;
     188           0 :     return 0;
     189             : }
     190             : 
     191           0 : static int parse_file(AVIOContext *pb, FFDemuxSubtitlesQueue *subs)
     192             : {
     193             :     int ret, cur_byte, start_of_par;
     194             :     AVBPrint label, content;
     195             :     int64_t pos, start, duration;
     196             :     AVPacket *pkt;
     197             : 
     198           0 :     next_byte(pb, &cur_byte);
     199           0 :     ret = expect_byte(pb, &cur_byte, '{');
     200           0 :     if (ret < 0)
     201           0 :         return AVERROR_INVALIDDATA;
     202           0 :     ret = parse_label(pb, &cur_byte, &label);
     203           0 :     if (ret < 0 || strcmp(label.str, "captions"))
     204           0 :         return AVERROR_INVALIDDATA;
     205           0 :     ret = expect_byte(pb, &cur_byte, '[');
     206           0 :     if (ret < 0)
     207           0 :         return AVERROR_INVALIDDATA;
     208             :     while (1) {
     209           0 :         content.size = 0;
     210           0 :         start = duration = AV_NOPTS_VALUE;
     211           0 :         ret = expect_byte(pb, &cur_byte, '{');
     212           0 :         if (ret < 0)
     213           0 :             return ret;
     214           0 :         pos = avio_tell(pb) - 1;
     215             :         while (1) {
     216           0 :             ret = parse_label(pb, &cur_byte, &label);
     217           0 :             if (ret < 0)
     218           0 :                 return ret;
     219           0 :             if (!strcmp(label.str, "startOfParagraph")) {
     220           0 :                 ret = parse_boolean(pb, &cur_byte, &start_of_par);
     221           0 :                 if (ret < 0)
     222           0 :                     return ret;
     223           0 :             } else if (!strcmp(label.str, "content")) {
     224           0 :                 ret = parse_string(pb, &cur_byte, &content, 1);
     225           0 :                 if (ret < 0)
     226           0 :                     return ret;
     227           0 :             } else if (!strcmp(label.str, "startTime")) {
     228           0 :                 ret = parse_int(pb, &cur_byte, &start);
     229           0 :                 if (ret < 0)
     230           0 :                     return ret;
     231           0 :             } else if (!strcmp(label.str, "duration")) {
     232           0 :                 ret = parse_int(pb, &cur_byte, &duration);
     233           0 :                 if (ret < 0)
     234           0 :                     return ret;
     235             :             } else {
     236           0 :                 return AVERROR_INVALIDDATA;
     237             :             }
     238           0 :             skip_spaces(pb, &cur_byte);
     239           0 :             if (cur_byte != ',')
     240           0 :                 break;
     241           0 :             next_byte(pb, &cur_byte);
     242             :         }
     243           0 :         ret = expect_byte(pb, &cur_byte, '}');
     244           0 :         if (ret < 0)
     245           0 :             return ret;
     246             : 
     247           0 :         if (!content.size || start == AV_NOPTS_VALUE ||
     248           0 :             duration == AV_NOPTS_VALUE)
     249           0 :             return AVERROR_INVALIDDATA;
     250           0 :         pkt = ff_subtitles_queue_insert(subs, content.str, content.len, 0);
     251           0 :         if (!pkt)
     252           0 :             return AVERROR(ENOMEM);
     253           0 :         pkt->pos      = pos;
     254           0 :         pkt->pts      = start;
     255           0 :         pkt->duration = duration;
     256           0 :         av_bprint_finalize(&content, NULL);
     257             : 
     258           0 :         skip_spaces(pb, &cur_byte);
     259           0 :         if (cur_byte != ',')
     260           0 :             break;
     261           0 :         next_byte(pb, &cur_byte);
     262             :     }
     263           0 :     ret = expect_byte(pb, &cur_byte, ']');
     264           0 :     if (ret < 0)
     265           0 :         return ret;
     266           0 :     ret = expect_byte(pb, &cur_byte, '}');
     267           0 :     if (ret < 0)
     268           0 :         return ret;
     269           0 :     skip_spaces(pb, &cur_byte);
     270           0 :     if (cur_byte != AVERROR_EOF)
     271           0 :         return ERR_CODE(cur_byte);
     272           0 :     return 0;
     273             : }
     274             : 
     275           0 : static av_cold int tedcaptions_read_header(AVFormatContext *avf)
     276             : {
     277           0 :     TEDCaptionsDemuxer *tc = avf->priv_data;
     278             :     AVStream *st;
     279             :     int ret, i;
     280             :     AVPacket *last;
     281             : 
     282           0 :     ret = parse_file(avf->pb, &tc->subs);
     283           0 :     if (ret < 0) {
     284           0 :         if (ret == AVERROR_INVALIDDATA)
     285           0 :             av_log(avf, AV_LOG_ERROR, "Syntax error near offset %"PRId64".\n",
     286             :                    avio_tell(avf->pb));
     287           0 :         ff_subtitles_queue_clean(&tc->subs);
     288           0 :         return ret;
     289             :     }
     290           0 :     ff_subtitles_queue_finalize(avf, &tc->subs);
     291           0 :     for (i = 0; i < tc->subs.nb_subs; i++)
     292           0 :         tc->subs.subs[i].pts += tc->start_time;
     293             : 
     294           0 :     last = &tc->subs.subs[tc->subs.nb_subs - 1];
     295           0 :     st = avformat_new_stream(avf, NULL);
     296           0 :     if (!st)
     297           0 :         return AVERROR(ENOMEM);
     298           0 :     st->codecpar->codec_type     = AVMEDIA_TYPE_SUBTITLE;
     299           0 :     st->codecpar->codec_id       = AV_CODEC_ID_TEXT;
     300           0 :     avpriv_set_pts_info(st, 64, 1, 1000);
     301           0 :     st->probe_packets = 0;
     302           0 :     st->start_time    = 0;
     303           0 :     st->duration      = last->pts + last->duration;
     304           0 :     st->cur_dts       = 0;
     305             : 
     306           0 :     return 0;
     307             : }
     308             : 
     309           0 : static int tedcaptions_read_packet(AVFormatContext *avf, AVPacket *packet)
     310             : {
     311           0 :     TEDCaptionsDemuxer *tc = avf->priv_data;
     312             : 
     313           0 :     return ff_subtitles_queue_read_packet(&tc->subs, packet);
     314             : }
     315             : 
     316           0 : static int tedcaptions_read_close(AVFormatContext *avf)
     317             : {
     318           0 :     TEDCaptionsDemuxer *tc = avf->priv_data;
     319             : 
     320           0 :     ff_subtitles_queue_clean(&tc->subs);
     321           0 :     return 0;
     322             : }
     323             : 
     324        6130 : static av_cold int tedcaptions_read_probe(AVProbeData *p)
     325             : {
     326             :     static const char *const tags[] = {
     327             :         "\"captions\"", "\"duration\"", "\"content\"",
     328             :         "\"startOfParagraph\"", "\"startTime\"",
     329             :     };
     330        6130 :     unsigned i, count = 0;
     331             :     const char *t;
     332             : 
     333        6130 :     if (p->buf[strspn(p->buf, " \t\r\n")] != '{')
     334        6127 :         return 0;
     335          18 :     for (i = 0; i < FF_ARRAY_ELEMS(tags); i++) {
     336          15 :         if (!(t = strstr(p->buf, tags[i])))
     337          15 :             continue;
     338           0 :         t += strlen(tags[i]);
     339           0 :         t += strspn(t, " \t\r\n");
     340           0 :         if (*t == ':')
     341           0 :             count++;
     342             :     }
     343           6 :     return count == FF_ARRAY_ELEMS(tags) ? AVPROBE_SCORE_MAX :
     344           3 :            count                         ? AVPROBE_SCORE_EXTENSION : 0;
     345             : }
     346             : 
     347           0 : static int tedcaptions_read_seek(AVFormatContext *avf, int stream_index,
     348             :                                  int64_t min_ts, int64_t ts, int64_t max_ts,
     349             :                                  int flags)
     350             : {
     351           0 :     TEDCaptionsDemuxer *tc = avf->priv_data;
     352           0 :     return ff_subtitles_queue_seek(&tc->subs, avf, stream_index,
     353             :                                    min_ts, ts, max_ts, flags);
     354             : }
     355             : 
     356             : AVInputFormat ff_tedcaptions_demuxer = {
     357             :     .name           = "tedcaptions",
     358             :     .long_name      = NULL_IF_CONFIG_SMALL("TED Talks captions"),
     359             :     .priv_data_size = sizeof(TEDCaptionsDemuxer),
     360             :     .priv_class     = &tedcaptions_demuxer_class,
     361             :     .read_header    = tedcaptions_read_header,
     362             :     .read_packet    = tedcaptions_read_packet,
     363             :     .read_close     = tedcaptions_read_close,
     364             :     .read_probe     = tedcaptions_read_probe,
     365             :     .read_seek2     = tedcaptions_read_seek,
     366             : };

Generated by: LCOV version 1.13