LCOV - code coverage report
Current view: top level - libavcodec - g722enc.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 70 161 43.5 %
Date: 2017-12-16 13:57:32 Functions: 8 9 88.9 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) CMU 1993 Computer Science, Speech Group
       3             :  *                        Chengxiang Lu and Alex Hauptmann
       4             :  * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
       5             :  * Copyright (c) 2009 Kenan Gillet
       6             :  * Copyright (c) 2010 Martin Storsjo
       7             :  *
       8             :  * This file is part of FFmpeg.
       9             :  *
      10             :  * FFmpeg is free software; you can redistribute it and/or
      11             :  * modify it under the terms of the GNU Lesser General Public
      12             :  * License as published by the Free Software Foundation; either
      13             :  * version 2.1 of the License, or (at your option) any later version.
      14             :  *
      15             :  * FFmpeg is distributed in the hope that it will be useful,
      16             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      17             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      18             :  * Lesser General Public License for more details.
      19             :  *
      20             :  * You should have received a copy of the GNU Lesser General Public
      21             :  * License along with FFmpeg; if not, write to the Free Software
      22             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      23             :  */
      24             : 
      25             : /**
      26             :  * @file
      27             :  * G.722 ADPCM audio encoder
      28             :  */
      29             : 
      30             : #include "libavutil/avassert.h"
      31             : #include "avcodec.h"
      32             : #include "internal.h"
      33             : #include "g722.h"
      34             : #include "libavutil/common.h"
      35             : 
      36             : #define FREEZE_INTERVAL 128
      37             : 
      38             : /* This is an arbitrary value. Allowing insanely large values leads to strange
      39             :    problems, so we limit it to a reasonable value */
      40             : #define MAX_FRAME_SIZE 32768
      41             : 
      42             : /* We clip the value of avctx->trellis to prevent data type overflows and
      43             :    undefined behavior. Using larger values is insanely slow anyway. */
      44             : #define MIN_TRELLIS 0
      45             : #define MAX_TRELLIS 16
      46             : 
      47           1 : static av_cold int g722_encode_close(AVCodecContext *avctx)
      48             : {
      49           1 :     G722Context *c = avctx->priv_data;
      50             :     int i;
      51           3 :     for (i = 0; i < 2; i++) {
      52           2 :         av_freep(&c->paths[i]);
      53           2 :         av_freep(&c->node_buf[i]);
      54           2 :         av_freep(&c->nodep_buf[i]);
      55             :     }
      56           1 :     return 0;
      57             : }
      58             : 
      59           1 : static av_cold int g722_encode_init(AVCodecContext * avctx)
      60             : {
      61           1 :     G722Context *c = avctx->priv_data;
      62             :     int ret;
      63             : 
      64           1 :     c->band[0].scale_factor = 8;
      65           1 :     c->band[1].scale_factor = 2;
      66           1 :     c->prev_samples_pos = 22;
      67             : 
      68           1 :     if (avctx->trellis) {
      69           0 :         int frontier = 1 << avctx->trellis;
      70           0 :         int max_paths = frontier * FREEZE_INTERVAL;
      71             :         int i;
      72           0 :         for (i = 0; i < 2; i++) {
      73           0 :             c->paths[i] = av_mallocz_array(max_paths, sizeof(**c->paths));
      74           0 :             c->node_buf[i] = av_mallocz_array(frontier, 2 * sizeof(**c->node_buf));
      75           0 :             c->nodep_buf[i] = av_mallocz_array(frontier, 2 * sizeof(**c->nodep_buf));
      76           0 :             if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i]) {
      77           0 :                 ret = AVERROR(ENOMEM);
      78           0 :                 goto error;
      79             :             }
      80             :         }
      81             :     }
      82             : 
      83           1 :     if (avctx->frame_size) {
      84             :         /* validate frame size */
      85           0 :         if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
      86             :             int new_frame_size;
      87             : 
      88           0 :             if (avctx->frame_size == 1)
      89           0 :                 new_frame_size = 2;
      90           0 :             else if (avctx->frame_size > MAX_FRAME_SIZE)
      91           0 :                 new_frame_size = MAX_FRAME_SIZE;
      92             :             else
      93           0 :                 new_frame_size = avctx->frame_size - 1;
      94             : 
      95           0 :             av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
      96             :                    "allowed. Using %d instead of %d\n", new_frame_size,
      97             :                    avctx->frame_size);
      98           0 :             avctx->frame_size = new_frame_size;
      99             :         }
     100             :     } else {
     101             :         /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
     102             :            a common packet size for VoIP applications */
     103           1 :         avctx->frame_size = 320;
     104             :     }
     105           1 :     avctx->initial_padding = 22;
     106             : 
     107           1 :     if (avctx->trellis) {
     108             :         /* validate trellis */
     109           0 :         if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
     110           0 :             int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
     111           0 :             av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
     112             :                    "allowed. Using %d instead of %d\n", new_trellis,
     113             :                    avctx->trellis);
     114           0 :             avctx->trellis = new_trellis;
     115             :         }
     116             :     }
     117             : 
     118           1 :     ff_g722dsp_init(&c->dsp);
     119             : 
     120           1 :     return 0;
     121           0 : error:
     122           0 :     g722_encode_close(avctx);
     123           0 :     return ret;
     124             : }
     125             : 
     126             : static const int16_t low_quant[33] = {
     127             :       35,   72,  110,  150,  190,  233,  276,  323,
     128             :      370,  422,  473,  530,  587,  650,  714,  786,
     129             :      858,  940, 1023, 1121, 1219, 1339, 1458, 1612,
     130             :     1765, 1980, 2195, 2557, 2919
     131             : };
     132             : 
     133       48000 : static inline void filter_samples(G722Context *c, const int16_t *samples,
     134             :                                   int *xlow, int *xhigh)
     135             : {
     136             :     int xout[2];
     137       48000 :     c->prev_samples[c->prev_samples_pos++] = samples[0];
     138       48000 :     c->prev_samples[c->prev_samples_pos++] = samples[1];
     139       48000 :     c->dsp.apply_qmf(c->prev_samples + c->prev_samples_pos - 24, xout);
     140       48000 :     *xlow  = xout[0] + xout[1] >> 14;
     141       48000 :     *xhigh = xout[0] - xout[1] >> 14;
     142       48000 :     if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) {
     143          95 :         memmove(c->prev_samples,
     144          95 :                 c->prev_samples + c->prev_samples_pos - 22,
     145             :                 22 * sizeof(c->prev_samples[0]));
     146          95 :         c->prev_samples_pos = 22;
     147             :     }
     148       48000 : }
     149             : 
     150       48000 : static inline int encode_high(const struct G722Band *state, int xhigh)
     151             : {
     152       48000 :     int diff = av_clip_int16(xhigh - state->s_predictor);
     153       48000 :     int pred = 141 * state->scale_factor >> 8;
     154             :            /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
     155       48000 :     return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
     156             : }
     157             : 
     158       48000 : static inline int encode_low(const struct G722Band* state, int xlow)
     159             : {
     160       48000 :     int diff  = av_clip_int16(xlow - state->s_predictor);
     161             :            /* = diff >= 0 ? diff : -(diff + 1) */
     162       48000 :     int limit = diff ^ (diff >> (sizeof(diff)*8-1));
     163       48000 :     int i = 0;
     164       48000 :     limit = limit + 1 << 10;
     165       48000 :     if (limit > low_quant[8] * state->scale_factor)
     166       21162 :         i = 9;
     167      314274 :     while (i < 29 && limit > low_quant[i] * state->scale_factor)
     168      218274 :         i++;
     169       48000 :     return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
     170             : }
     171             : 
     172           0 : static void g722_encode_trellis(G722Context *c, int trellis,
     173             :                                 uint8_t *dst, int nb_samples,
     174             :                                 const int16_t *samples)
     175             : {
     176             :     int i, j, k;
     177           0 :     int frontier = 1 << trellis;
     178             :     struct TrellisNode **nodes[2];
     179             :     struct TrellisNode **nodes_next[2];
     180           0 :     int pathn[2] = {0, 0}, froze = -1;
     181             :     struct TrellisPath *p[2];
     182             : 
     183           0 :     for (i = 0; i < 2; i++) {
     184           0 :         nodes[i] = c->nodep_buf[i];
     185           0 :         nodes_next[i] = c->nodep_buf[i] + frontier;
     186           0 :         memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf[i]));
     187           0 :         nodes[i][0] = c->node_buf[i] + frontier;
     188           0 :         nodes[i][0]->ssd = 0;
     189           0 :         nodes[i][0]->path = 0;
     190           0 :         nodes[i][0]->state = c->band[i];
     191             :     }
     192             : 
     193           0 :     for (i = 0; i < nb_samples >> 1; i++) {
     194             :         int xlow, xhigh;
     195             :         struct TrellisNode *next[2];
     196           0 :         int heap_pos[2] = {0, 0};
     197             : 
     198           0 :         for (j = 0; j < 2; j++) {
     199           0 :             next[j] = c->node_buf[j] + frontier*(i & 1);
     200           0 :             memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
     201             :         }
     202             : 
     203           0 :         filter_samples(c, &samples[2*i], &xlow, &xhigh);
     204             : 
     205           0 :         for (j = 0; j < frontier && nodes[0][j]; j++) {
     206             :             /* Only k >> 2 affects the future adaptive state, therefore testing
     207             :              * small steps that don't change k >> 2 is useless, the original
     208             :              * value from encode_low is better than them. Since we step k
     209             :              * in steps of 4, make sure range is a multiple of 4, so that
     210             :              * we don't miss the original value from encode_low. */
     211           0 :             int range = j < frontier/2 ? 4 : 0;
     212           0 :             struct TrellisNode *cur_node = nodes[0][j];
     213             : 
     214           0 :             int ilow = encode_low(&cur_node->state, xlow);
     215             : 
     216           0 :             for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
     217             :                 int decoded, dec_diff, pos;
     218             :                 uint32_t ssd;
     219             :                 struct TrellisNode* node;
     220             : 
     221           0 :                 if (k < 0)
     222           0 :                     continue;
     223             : 
     224           0 :                 decoded = av_clip_intp2((cur_node->state.scale_factor *
     225           0 :                                   ff_g722_low_inv_quant6[k] >> 10)
     226           0 :                                 + cur_node->state.s_predictor, 14);
     227           0 :                 dec_diff = xlow - decoded;
     228             : 
     229             : #define STORE_NODE(index, UPDATE, VALUE)\
     230             :                 ssd = cur_node->ssd + dec_diff*dec_diff;\
     231             :                 /* Check for wraparound. Using 64 bit ssd counters would \
     232             :                  * be simpler, but is slower on x86 32 bit. */\
     233             :                 if (ssd < cur_node->ssd)\
     234             :                     continue;\
     235             :                 if (heap_pos[index] < frontier) {\
     236             :                     pos = heap_pos[index]++;\
     237             :                     av_assert2(pathn[index] < FREEZE_INTERVAL * frontier);\
     238             :                     node = nodes_next[index][pos] = next[index]++;\
     239             :                     node->path = pathn[index]++;\
     240             :                 } else {\
     241             :                     /* Try to replace one of the leaf nodes with the new \
     242             :                      * one, but not always testing the same leaf position */\
     243             :                     pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
     244             :                     if (ssd >= nodes_next[index][pos]->ssd)\
     245             :                         continue;\
     246             :                     heap_pos[index]++;\
     247             :                     node = nodes_next[index][pos];\
     248             :                 }\
     249             :                 node->ssd = ssd;\
     250             :                 node->state = cur_node->state;\
     251             :                 UPDATE;\
     252             :                 c->paths[index][node->path].value = VALUE;\
     253             :                 c->paths[index][node->path].prev = cur_node->path;\
     254             :                 /* Sift the newly inserted node up in the heap to restore \
     255             :                  * the heap property */\
     256             :                 while (pos > 0) {\
     257             :                     int parent = (pos - 1) >> 1;\
     258             :                     if (nodes_next[index][parent]->ssd <= ssd)\
     259             :                         break;\
     260             :                     FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
     261             :                                                 nodes_next[index][pos]);\
     262             :                     pos = parent;\
     263             :                 }
     264           0 :                 STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
     265             :             }
     266             :         }
     267             : 
     268           0 :         for (j = 0; j < frontier && nodes[1][j]; j++) {
     269             :             int ihigh;
     270           0 :             struct TrellisNode *cur_node = nodes[1][j];
     271             : 
     272             :             /* We don't try to get any initial guess for ihigh via
     273             :              * encode_high - since there's only 4 possible values, test
     274             :              * them all. Testing all of these gives a much, much larger
     275             :              * gain than testing a larger range around ilow. */
     276           0 :             for (ihigh = 0; ihigh < 4; ihigh++) {
     277             :                 int dhigh, decoded, dec_diff, pos;
     278             :                 uint32_t ssd;
     279             :                 struct TrellisNode* node;
     280             : 
     281           0 :                 dhigh = cur_node->state.scale_factor *
     282           0 :                         ff_g722_high_inv_quant[ihigh] >> 10;
     283           0 :                 decoded = av_clip_intp2(dhigh + cur_node->state.s_predictor, 14);
     284           0 :                 dec_diff = xhigh - decoded;
     285             : 
     286           0 :                 STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
     287             :             }
     288             :         }
     289             : 
     290           0 :         for (j = 0; j < 2; j++) {
     291           0 :             FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
     292             : 
     293           0 :             if (nodes[j][0]->ssd > (1 << 16)) {
     294           0 :                 for (k = 1; k < frontier && nodes[j][k]; k++)
     295           0 :                     nodes[j][k]->ssd -= nodes[j][0]->ssd;
     296           0 :                 nodes[j][0]->ssd = 0;
     297             :             }
     298             :         }
     299             : 
     300           0 :         if (i == froze + FREEZE_INTERVAL) {
     301           0 :             p[0] = &c->paths[0][nodes[0][0]->path];
     302           0 :             p[1] = &c->paths[1][nodes[1][0]->path];
     303           0 :             for (j = i; j > froze; j--) {
     304           0 :                 dst[j] = p[1]->value << 6 | p[0]->value;
     305           0 :                 p[0] = &c->paths[0][p[0]->prev];
     306           0 :                 p[1] = &c->paths[1][p[1]->prev];
     307             :             }
     308           0 :             froze = i;
     309           0 :             pathn[0] = pathn[1] = 0;
     310           0 :             memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
     311           0 :             memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
     312             :         }
     313             :     }
     314             : 
     315           0 :     p[0] = &c->paths[0][nodes[0][0]->path];
     316           0 :     p[1] = &c->paths[1][nodes[1][0]->path];
     317           0 :     for (j = i; j > froze; j--) {
     318           0 :         dst[j] = p[1]->value << 6 | p[0]->value;
     319           0 :         p[0] = &c->paths[0][p[0]->prev];
     320           0 :         p[1] = &c->paths[1][p[1]->prev];
     321             :     }
     322           0 :     c->band[0] = nodes[0][0]->state;
     323           0 :     c->band[1] = nodes[1][0]->state;
     324           0 : }
     325             : 
     326       48000 : static av_always_inline void encode_byte(G722Context *c, uint8_t *dst,
     327             :                                          const int16_t *samples)
     328             : {
     329             :     int xlow, xhigh, ilow, ihigh;
     330       48000 :     filter_samples(c, samples, &xlow, &xhigh);
     331       48000 :     ihigh = encode_high(&c->band[1], xhigh);
     332       48000 :     ilow  = encode_low (&c->band[0], xlow);
     333       96000 :     ff_g722_update_high_predictor(&c->band[1], c->band[1].scale_factor *
     334       48000 :                                 ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
     335       48000 :     ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
     336       48000 :     *dst = ihigh << 6 | ilow;
     337       48000 : }
     338             : 
     339         300 : static void g722_encode_no_trellis(G722Context *c,
     340             :                                    uint8_t *dst, int nb_samples,
     341             :                                    const int16_t *samples)
     342             : {
     343             :     int i;
     344       48300 :     for (i = 0; i < nb_samples; i += 2)
     345       48000 :         encode_byte(c, dst++, &samples[i]);
     346         300 : }
     347             : 
     348         300 : static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     349             :                              const AVFrame *frame, int *got_packet_ptr)
     350             : {
     351         300 :     G722Context *c = avctx->priv_data;
     352         300 :     const int16_t *samples = (const int16_t *)frame->data[0];
     353             :     int nb_samples, out_size, ret;
     354             : 
     355         300 :     out_size = (frame->nb_samples + 1) / 2;
     356         300 :     if ((ret = ff_alloc_packet2(avctx, avpkt, out_size, 0)) < 0)
     357           0 :         return ret;
     358             : 
     359         300 :     nb_samples = frame->nb_samples - (frame->nb_samples & 1);
     360             : 
     361         300 :     if (avctx->trellis)
     362           0 :         g722_encode_trellis(c, avctx->trellis, avpkt->data, nb_samples, samples);
     363             :     else
     364         300 :         g722_encode_no_trellis(c, avpkt->data, nb_samples, samples);
     365             : 
     366             :     /* handle last frame with odd frame_size */
     367         300 :     if (nb_samples < frame->nb_samples) {
     368           0 :         int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
     369           0 :         encode_byte(c, &avpkt->data[nb_samples >> 1], last_samples);
     370             :     }
     371             : 
     372         300 :     if (frame->pts != AV_NOPTS_VALUE)
     373         300 :         avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
     374         300 :     *got_packet_ptr = 1;
     375         300 :     return 0;
     376             : }
     377             : 
     378             : AVCodec ff_adpcm_g722_encoder = {
     379             :     .name            = "g722",
     380             :     .long_name       = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
     381             :     .type            = AVMEDIA_TYPE_AUDIO,
     382             :     .id              = AV_CODEC_ID_ADPCM_G722,
     383             :     .priv_data_size  = sizeof(G722Context),
     384             :     .init            = g722_encode_init,
     385             :     .close           = g722_encode_close,
     386             :     .encode2         = g722_encode_frame,
     387             :     .capabilities    = AV_CODEC_CAP_SMALL_LAST_FRAME,
     388             :     .sample_fmts     = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
     389             :     .channel_layouts = (const uint64_t[]){ AV_CH_LAYOUT_MONO, 0 },
     390             : };

Generated by: LCOV version 1.13