LCOV - code coverage report
Current view: top level - libavcodec - metasound.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 203 0.0 %
Date: 2017-12-14 01:15:32 Functions: 0 6 0.0 %

          Line data    Source code
       1             : /*
       2             :  * Voxware MetaSound decoder
       3             :  * Copyright (c) 2013 Konstantin Shishkov
       4             :  * based on TwinVQ decoder
       5             :  * Copyright (c) 2009 Vitor Sessak
       6             :  *
       7             :  * This file is part of FFmpeg.
       8             :  *
       9             :  * FFmpeg is free software; you can redistribute it and/or
      10             :  * modify it under the terms of the GNU Lesser General Public
      11             :  * License as published by the Free Software Foundation; either
      12             :  * version 2.1 of the License, or (at your option) any later version.
      13             :  *
      14             :  * FFmpeg is distributed in the hope that it will be useful,
      15             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      16             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      17             :  * Lesser General Public License for more details.
      18             :  *
      19             :  * You should have received a copy of the GNU Lesser General Public
      20             :  * License along with FFmpeg; if not, write to the Free Software
      21             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      22             :  */
      23             : 
      24             : #include <inttypes.h>
      25             : #include <math.h>
      26             : #include <stdint.h>
      27             : 
      28             : #include "libavutil/channel_layout.h"
      29             : #include "libavutil/float_dsp.h"
      30             : 
      31             : #define BITSTREAM_READER_LE
      32             : #include "avcodec.h"
      33             : #include "fft.h"
      34             : #include "get_bits.h"
      35             : #include "internal.h"
      36             : #include "lsp.h"
      37             : #include "sinewin.h"
      38             : 
      39             : #include "twinvq.h"
      40             : #include "metasound_data.h"
      41             : 
      42           0 : static void add_peak(float period, int width, const float *shape,
      43             :                      float ppc_gain, float *speech, int len)
      44             : {
      45             :     int i, j, center;
      46           0 :     const float *shape_end = shape + len;
      47             : 
      48             :     // First peak centered around zero
      49           0 :     for (i = 0; i < width / 2; i++)
      50           0 :         speech[i] += ppc_gain * *shape++;
      51             : 
      52           0 :     for (i = 1; i < ROUNDED_DIV(len, width); i++) {
      53           0 :         center = (int)(i * period + 0.5);
      54           0 :         for (j = -width / 2; j < (width + 1) / 2; j++)
      55           0 :             speech[j + center] += ppc_gain * *shape++;
      56             :     }
      57             : 
      58             :     // For the last block, be careful not to go beyond the end of the buffer
      59           0 :     center = (int)(i * period + 0.5);
      60           0 :     for (j = -width / 2; j < (width + 1) / 2 && shape < shape_end; j++)
      61           0 :         speech[j + center] += ppc_gain * *shape++;
      62           0 : }
      63             : 
      64           0 : static void decode_ppc(TwinVQContext *tctx, int period_coef, int g_coef,
      65             :                        const float *shape, float *speech)
      66             : {
      67           0 :     const TwinVQModeTab *mtab = tctx->mtab;
      68           0 :     int isampf       = tctx->avctx->sample_rate / 1000;
      69           0 :     int ibps         = tctx->avctx->bit_rate / (1000 * tctx->avctx->channels);
      70             :     int width;
      71             : 
      72           0 :     float ratio = (float)mtab->size / isampf;
      73             :     float min_period, max_period, period_range, period;
      74             :     float some_mult;
      75             : 
      76             :     float pgain_base, pgain_step, ppc_gain;
      77             : 
      78           0 :     if (tctx->avctx->channels == 1) {
      79           0 :         min_period = log2(ratio * 0.2);
      80           0 :         max_period = min_period + log2(6);
      81             :     } else {
      82           0 :         min_period = (int)(ratio * 0.2 * 400     + 0.5) / 400.0;
      83           0 :         max_period = (int)(ratio * 0.2 * 400 * 6 + 0.5) / 400.0;
      84             :     }
      85           0 :     period_range = max_period - min_period;
      86           0 :     period       = min_period + period_coef * period_range /
      87           0 :                    ((1 << mtab->ppc_period_bit) - 1);
      88           0 :     if (tctx->avctx->channels == 1)
      89           0 :         period = powf(2.0, period);
      90             :     else
      91           0 :         period = (int)(period * 400 + 0.5) / 400.0;
      92             : 
      93           0 :     switch (isampf) {
      94           0 :     case  8: some_mult = 2.0; break;
      95           0 :     case 11: some_mult = 3.0; break;
      96           0 :     case 16: some_mult = 3.0; break;
      97           0 :     case 22: some_mult = ibps == 32 ? 2.0 : 4.0; break;
      98           0 :     case 44: some_mult = 8.0; break;
      99           0 :     default: some_mult = 4.0;
     100             :     }
     101             : 
     102           0 :     width = (int)(some_mult / (mtab->size / period) * mtab->ppc_shape_len);
     103           0 :     if (isampf == 22 && ibps == 32)
     104           0 :         width = (int)((2.0 / period + 1) * width + 0.5);
     105             : 
     106           0 :     pgain_base = tctx->avctx->channels == 2 ? 25000.0 : 20000.0;
     107           0 :     pgain_step = pgain_base / ((1 << mtab->pgain_bit) - 1);
     108           0 :     ppc_gain   = 1.0 / 8192 *
     109           0 :                  twinvq_mulawinv(pgain_step * g_coef + pgain_step / 2,
     110             :                                  pgain_base, TWINVQ_PGAIN_MU);
     111             : 
     112           0 :     add_peak(period, width, shape, ppc_gain, speech, mtab->ppc_shape_len);
     113           0 : }
     114             : 
     115           0 : static void dec_bark_env(TwinVQContext *tctx, const uint8_t *in, int use_hist,
     116             :                          int ch, float *out, float gain,
     117             :                          enum TwinVQFrameType ftype)
     118             : {
     119           0 :     const TwinVQModeTab *mtab = tctx->mtab;
     120             :     int i, j;
     121           0 :     float *hist     = tctx->bark_hist[ftype][ch];
     122           0 :     float val       = ((const float []) { 0.4, 0.35, 0.28 })[ftype];
     123           0 :     int bark_n_coef = mtab->fmode[ftype].bark_n_coef;
     124           0 :     int fw_cb_len   = mtab->fmode[ftype].bark_env_size / bark_n_coef;
     125           0 :     int idx         = 0;
     126             : 
     127           0 :     if (tctx->avctx->channels == 1)
     128           0 :         val = 0.5;
     129           0 :     for (i = 0; i < fw_cb_len; i++)
     130           0 :         for (j = 0; j < bark_n_coef; j++, idx++) {
     131           0 :             float tmp2 = mtab->fmode[ftype].bark_cb[fw_cb_len * in[j] + i] *
     132             :                          (1.0 / 2048);
     133             :             float st;
     134             : 
     135           0 :             if (tctx->avctx->channels == 1)
     136           0 :                 st = use_hist ?
     137           0 :                     tmp2 + val * hist[idx] + 1.0 : tmp2 + 1.0;
     138             :             else
     139           0 :                 st = use_hist ? (1.0 - val) * tmp2 + val * hist[idx] + 1.0
     140           0 :                               : tmp2 + 1.0;
     141             : 
     142           0 :             hist[idx] = tmp2;
     143           0 :             if (st < 0.1)
     144           0 :                 st = 0.1;
     145             : 
     146           0 :             twinvq_memset_float(out, st * gain,
     147           0 :                                 mtab->fmode[ftype].bark_tab[idx]);
     148           0 :             out += mtab->fmode[ftype].bark_tab[idx];
     149             :         }
     150           0 : }
     151             : 
     152           0 : static void read_cb_data(TwinVQContext *tctx, GetBitContext *gb,
     153             :                          uint8_t *dst, enum TwinVQFrameType ftype)
     154             : {
     155             :     int i;
     156             : 
     157           0 :     for (i = 0; i < tctx->n_div[ftype]; i++) {
     158           0 :         int bs_second_part = (i >= tctx->bits_main_spec_change[ftype]);
     159             : 
     160           0 :         *dst++ = get_bits(gb, tctx->bits_main_spec[0][ftype][bs_second_part]);
     161           0 :         *dst++ = get_bits(gb, tctx->bits_main_spec[1][ftype][bs_second_part]);
     162             :     }
     163           0 : }
     164             : 
     165           0 : static int metasound_read_bitstream(AVCodecContext *avctx, TwinVQContext *tctx,
     166             :                                     const uint8_t *buf, int buf_size)
     167             : {
     168             :     TwinVQFrameData     *bits;
     169           0 :     const TwinVQModeTab *mtab = tctx->mtab;
     170           0 :     int channels              = tctx->avctx->channels;
     171             :     int sub;
     172             :     GetBitContext gb;
     173             :     int i, j, k, ret;
     174             : 
     175           0 :     if ((ret = init_get_bits8(&gb, buf, buf_size)) < 0)
     176           0 :         return ret;
     177             : 
     178           0 :     for (tctx->cur_frame = 0; tctx->cur_frame < tctx->frames_per_packet;
     179           0 :          tctx->cur_frame++) {
     180           0 :         bits = tctx->bits + tctx->cur_frame;
     181             : 
     182           0 :         bits->window_type = get_bits(&gb, TWINVQ_WINDOW_TYPE_BITS);
     183             : 
     184           0 :         if (bits->window_type > 8) {
     185           0 :             av_log(avctx, AV_LOG_ERROR, "Invalid window type, broken sample?\n");
     186           0 :             return AVERROR_INVALIDDATA;
     187             :         }
     188             : 
     189           0 :         bits->ftype = ff_twinvq_wtype_to_ftype_table[tctx->bits[tctx->cur_frame].window_type];
     190             : 
     191           0 :         sub = mtab->fmode[bits->ftype].sub;
     192             : 
     193           0 :         if (bits->ftype != TWINVQ_FT_SHORT && !tctx->is_6kbps)
     194           0 :             get_bits(&gb, 2);
     195             : 
     196           0 :         read_cb_data(tctx, &gb, bits->main_coeffs, bits->ftype);
     197             : 
     198           0 :         for (i = 0; i < channels; i++)
     199           0 :             for (j = 0; j < sub; j++)
     200           0 :                 for (k = 0; k < mtab->fmode[bits->ftype].bark_n_coef; k++)
     201           0 :                     bits->bark1[i][j][k] =
     202           0 :                         get_bits(&gb, mtab->fmode[bits->ftype].bark_n_bit);
     203             : 
     204           0 :         for (i = 0; i < channels; i++)
     205           0 :             for (j = 0; j < sub; j++)
     206           0 :                 bits->bark_use_hist[i][j] = get_bits1(&gb);
     207             : 
     208           0 :         if (bits->ftype == TWINVQ_FT_LONG) {
     209           0 :             for (i = 0; i < channels; i++)
     210           0 :                 bits->gain_bits[i] = get_bits(&gb, TWINVQ_GAIN_BITS);
     211             :         } else {
     212           0 :             for (i = 0; i < channels; i++) {
     213           0 :                 bits->gain_bits[i] = get_bits(&gb, TWINVQ_GAIN_BITS);
     214           0 :                 for (j = 0; j < sub; j++)
     215           0 :                     bits->sub_gain_bits[i * sub + j] =
     216           0 :                         get_bits(&gb, TWINVQ_SUB_GAIN_BITS);
     217             :             }
     218             :         }
     219             : 
     220           0 :         for (i = 0; i < channels; i++) {
     221           0 :             bits->lpc_hist_idx[i] = get_bits(&gb, mtab->lsp_bit0);
     222           0 :             bits->lpc_idx1[i]     = get_bits(&gb, mtab->lsp_bit1);
     223             : 
     224           0 :             for (j = 0; j < mtab->lsp_split; j++)
     225           0 :                 bits->lpc_idx2[i][j] = get_bits(&gb, mtab->lsp_bit2);
     226             :         }
     227             : 
     228           0 :         if (bits->ftype == TWINVQ_FT_LONG) {
     229           0 :             read_cb_data(tctx, &gb, bits->ppc_coeffs, 3);
     230           0 :             for (i = 0; i < channels; i++) {
     231           0 :                 bits->p_coef[i] = get_bits(&gb, mtab->ppc_period_bit);
     232           0 :                 bits->g_coef[i] = get_bits(&gb, mtab->pgain_bit);
     233             :             }
     234             :         }
     235             : 
     236             :         // subframes are aligned to nibbles
     237           0 :         if (get_bits_count(&gb) & 3)
     238           0 :             skip_bits(&gb, 4 - (get_bits_count(&gb) & 3));
     239             :     }
     240             : 
     241           0 :     return (get_bits_count(&gb) + 7) / 8;
     242             : }
     243             : 
     244             : typedef struct MetasoundProps {
     245             :     uint32_t tag;
     246             :     int      bit_rate;
     247             :     int      channels;
     248             :     int      sample_rate;
     249             : } MetasoundProps;
     250             : 
     251             : static const MetasoundProps codec_props[] = {
     252             :     { MKTAG('V','X','0','3'),  6, 1,  8000 },
     253             :     { MKTAG('V','X','0','4'), 12, 2,  8000 },
     254             : 
     255             :     { MKTAG('V','O','X','i'),  8, 1,  8000 },
     256             :     { MKTAG('V','O','X','j'), 10, 1, 11025 },
     257             :     { MKTAG('V','O','X','k'), 16, 1, 16000 },
     258             :     { MKTAG('V','O','X','L'), 24, 1, 22050 },
     259             :     { MKTAG('V','O','X','q'), 32, 1, 44100 },
     260             :     { MKTAG('V','O','X','r'), 40, 1, 44100 },
     261             :     { MKTAG('V','O','X','s'), 48, 1, 44100 },
     262             :     { MKTAG('V','O','X','t'), 16, 2,  8000 },
     263             :     { MKTAG('V','O','X','u'), 20, 2, 11025 },
     264             :     { MKTAG('V','O','X','v'), 32, 2, 16000 },
     265             :     { MKTAG('V','O','X','w'), 48, 2, 22050 },
     266             :     { MKTAG('V','O','X','x'), 64, 2, 44100 },
     267             :     { MKTAG('V','O','X','y'), 80, 2, 44100 },
     268             :     { MKTAG('V','O','X','z'), 96, 2, 44100 },
     269             : 
     270             :     { 0, 0, 0, 0 }
     271             : };
     272             : 
     273           0 : static av_cold int metasound_decode_init(AVCodecContext *avctx)
     274             : {
     275             :     int isampf, ibps;
     276           0 :     TwinVQContext *tctx = avctx->priv_data;
     277             :     uint32_t tag;
     278           0 :     const MetasoundProps *props = codec_props;
     279             : 
     280           0 :     if (!avctx->extradata || avctx->extradata_size < 16) {
     281           0 :         av_log(avctx, AV_LOG_ERROR, "Missing or incomplete extradata\n");
     282           0 :         return AVERROR_INVALIDDATA;
     283             :     }
     284             : 
     285           0 :     tag = AV_RL32(avctx->extradata + 12);
     286             : 
     287             :     for (;;) {
     288           0 :         if (!props->tag) {
     289           0 :             av_log(avctx, AV_LOG_ERROR, "Could not find tag %08"PRIX32"\n", tag);
     290           0 :             return AVERROR_INVALIDDATA;
     291             :         }
     292           0 :         if (props->tag == tag) {
     293           0 :             avctx->sample_rate = props->sample_rate;
     294           0 :             avctx->channels    = props->channels;
     295           0 :             avctx->bit_rate    = props->bit_rate * 1000;
     296           0 :             isampf             = avctx->sample_rate / 1000;
     297           0 :             break;
     298             :         }
     299           0 :         props++;
     300             :     }
     301             : 
     302           0 :     if (avctx->channels <= 0 || avctx->channels > TWINVQ_CHANNELS_MAX) {
     303           0 :         av_log(avctx, AV_LOG_ERROR, "Unsupported number of channels: %i\n",
     304             :                avctx->channels);
     305           0 :         return AVERROR_INVALIDDATA;
     306             :     }
     307           0 :     avctx->channel_layout = avctx->channels == 1 ? AV_CH_LAYOUT_MONO
     308           0 :                                                  : AV_CH_LAYOUT_STEREO;
     309             : 
     310           0 :     ibps = avctx->bit_rate / (1000 * avctx->channels);
     311             : 
     312           0 :     switch ((avctx->channels << 16) + (isampf << 8) + ibps) {
     313           0 :     case (1 << 16) + ( 8 << 8) +  6:
     314           0 :         tctx->mtab = &ff_metasound_mode0806;
     315           0 :         break;
     316           0 :     case (2 << 16) + ( 8 << 8) +  6:
     317           0 :         tctx->mtab = &ff_metasound_mode0806s;
     318           0 :         break;
     319           0 :     case (1 << 16) + ( 8 << 8) +  8:
     320           0 :         tctx->mtab = &ff_metasound_mode0808;
     321           0 :         break;
     322           0 :     case (2 << 16) + ( 8 << 8) +  8:
     323           0 :         tctx->mtab = &ff_metasound_mode0808s;
     324           0 :         break;
     325           0 :     case (1 << 16) + (11 << 8) + 10:
     326           0 :         tctx->mtab = &ff_metasound_mode1110;
     327           0 :         break;
     328           0 :     case (2 << 16) + (11 << 8) + 10:
     329           0 :         tctx->mtab = &ff_metasound_mode1110s;
     330           0 :         break;
     331           0 :     case (1 << 16) + (16 << 8) + 16:
     332           0 :         tctx->mtab = &ff_metasound_mode1616;
     333           0 :         break;
     334           0 :     case (2 << 16) + (16 << 8) + 16:
     335           0 :         tctx->mtab = &ff_metasound_mode1616s;
     336           0 :         break;
     337           0 :     case (1 << 16) + (22 << 8) + 24:
     338           0 :         tctx->mtab = &ff_metasound_mode2224;
     339           0 :         break;
     340           0 :     case (2 << 16) + (22 << 8) + 24:
     341           0 :         tctx->mtab = &ff_metasound_mode2224s;
     342           0 :         break;
     343           0 :     case (1 << 16) + (44 << 8) + 32:
     344           0 :         tctx->mtab = &ff_metasound_mode4432;
     345           0 :         break;
     346           0 :     case (2 << 16) + (44 << 8) + 32:
     347           0 :         tctx->mtab = &ff_metasound_mode4432s;
     348           0 :         break;
     349           0 :     case (1 << 16) + (44 << 8) + 40:
     350           0 :         tctx->mtab = &ff_metasound_mode4440;
     351           0 :         break;
     352           0 :     case (2 << 16) + (44 << 8) + 40:
     353           0 :         tctx->mtab = &ff_metasound_mode4440s;
     354           0 :         break;
     355           0 :     case (1 << 16) + (44 << 8) + 48:
     356           0 :         tctx->mtab = &ff_metasound_mode4448;
     357           0 :         break;
     358           0 :     case (2 << 16) + (44 << 8) + 48:
     359           0 :         tctx->mtab = &ff_metasound_mode4448s;
     360           0 :         break;
     361           0 :     default:
     362           0 :         av_log(avctx, AV_LOG_ERROR,
     363             :                "This version does not support %d kHz - %d kbit/s/ch mode.\n",
     364             :                isampf, ibps);
     365           0 :         return AVERROR(ENOSYS);
     366             :     }
     367             : 
     368           0 :     tctx->codec          = TWINVQ_CODEC_METASOUND;
     369           0 :     tctx->read_bitstream = metasound_read_bitstream;
     370           0 :     tctx->dec_bark_env   = dec_bark_env;
     371           0 :     tctx->decode_ppc     = decode_ppc;
     372           0 :     tctx->frame_size     = avctx->bit_rate * tctx->mtab->size
     373           0 :                                            / avctx->sample_rate;
     374           0 :     tctx->is_6kbps       = ibps == 6;
     375             : 
     376           0 :     return ff_twinvq_decode_init(avctx);
     377             : }
     378             : 
     379             : AVCodec ff_metasound_decoder = {
     380             :     .name           = "metasound",
     381             :     .long_name      = NULL_IF_CONFIG_SMALL("Voxware MetaSound"),
     382             :     .type           = AVMEDIA_TYPE_AUDIO,
     383             :     .id             = AV_CODEC_ID_METASOUND,
     384             :     .priv_data_size = sizeof(TwinVQContext),
     385             :     .init           = metasound_decode_init,
     386             :     .close          = ff_twinvq_decode_close,
     387             :     .decode         = ff_twinvq_decode_frame,
     388             :     .capabilities   = AV_CODEC_CAP_DR1,
     389             :     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
     390             :                                                       AV_SAMPLE_FMT_NONE },
     391             : };

Generated by: LCOV version 1.13