LCOV - code coverage report
Current view: top level - libavcodec - aacenc.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 573 663 86.4 %
Date: 2018-05-20 11:54:08 Functions: 26 27 96.3 %

          Line data    Source code
       1             : /*
       2             :  * AAC encoder
       3             :  * Copyright (C) 2008 Konstantin Shishkov
       4             :  *
       5             :  * This file is part of FFmpeg.
       6             :  *
       7             :  * FFmpeg is free software; you can redistribute it and/or
       8             :  * modify it under the terms of the GNU Lesser General Public
       9             :  * License as published by the Free Software Foundation; either
      10             :  * version 2.1 of the License, or (at your option) any later version.
      11             :  *
      12             :  * FFmpeg is distributed in the hope that it will be useful,
      13             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      14             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      15             :  * Lesser General Public License for more details.
      16             :  *
      17             :  * You should have received a copy of the GNU Lesser General Public
      18             :  * License along with FFmpeg; if not, write to the Free Software
      19             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      20             :  */
      21             : 
      22             : /**
      23             :  * @file
      24             :  * AAC encoder
      25             :  */
      26             : 
      27             : /***********************************
      28             :  *              TODOs:
      29             :  * add sane pulse detection
      30             :  ***********************************/
      31             : 
      32             : #include "libavutil/libm.h"
      33             : #include "libavutil/thread.h"
      34             : #include "libavutil/float_dsp.h"
      35             : #include "libavutil/opt.h"
      36             : #include "avcodec.h"
      37             : #include "put_bits.h"
      38             : #include "internal.h"
      39             : #include "mpeg4audio.h"
      40             : #include "kbdwin.h"
      41             : #include "sinewin.h"
      42             : 
      43             : #include "aac.h"
      44             : #include "aactab.h"
      45             : #include "aacenc.h"
      46             : #include "aacenctab.h"
      47             : #include "aacenc_utils.h"
      48             : 
      49             : #include "psymodel.h"
      50             : 
      51             : static AVOnce aac_table_init = AV_ONCE_INIT;
      52             : 
      53           0 : static void put_pce(PutBitContext *pb, AVCodecContext *avctx)
      54             : {
      55             :     int i, j;
      56           0 :     AACEncContext *s = avctx->priv_data;
      57           0 :     AACPCEInfo *pce = &s->pce;
      58           0 :     const int bitexact = avctx->flags & AV_CODEC_FLAG_BITEXACT;
      59           0 :     const char *aux_data = bitexact ? "Lavc" : LIBAVCODEC_IDENT;
      60             : 
      61           0 :     put_bits(pb, 4, 0);
      62             : 
      63           0 :     put_bits(pb, 2, avctx->profile);
      64           0 :     put_bits(pb, 4, s->samplerate_index);
      65             : 
      66           0 :     put_bits(pb, 4, pce->num_ele[0]); /* Front */
      67           0 :     put_bits(pb, 4, pce->num_ele[1]); /* Side */
      68           0 :     put_bits(pb, 4, pce->num_ele[2]); /* Back */
      69           0 :     put_bits(pb, 2, pce->num_ele[3]); /* LFE */
      70           0 :     put_bits(pb, 3, 0); /* Assoc data */
      71           0 :     put_bits(pb, 4, 0); /* CCs */
      72             : 
      73           0 :     put_bits(pb, 1, 0); /* Stereo mixdown */
      74           0 :     put_bits(pb, 1, 0); /* Mono mixdown */
      75           0 :     put_bits(pb, 1, 0); /* Something else */
      76             : 
      77           0 :     for (i = 0; i < 4; i++) {
      78           0 :         for (j = 0; j < pce->num_ele[i]; j++) {
      79           0 :             if (i < 3)
      80           0 :                 put_bits(pb, 1, pce->pairing[i][j]);
      81           0 :             put_bits(pb, 4, pce->index[i][j]);
      82             :         }
      83             :     }
      84             : 
      85           0 :     avpriv_align_put_bits(pb);
      86           0 :     put_bits(pb, 8, strlen(aux_data));
      87           0 :     avpriv_put_string(pb, aux_data, 0);
      88           0 : }
      89             : 
      90             : /**
      91             :  * Make AAC audio config object.
      92             :  * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
      93             :  */
      94          11 : static int put_audio_specific_config(AVCodecContext *avctx)
      95             : {
      96             :     PutBitContext pb;
      97          11 :     AACEncContext *s = avctx->priv_data;
      98          11 :     int channels = (!s->needs_pce)*(s->channels - (s->channels == 8 ? 1 : 0));
      99          11 :     const int max_size = 32;
     100             : 
     101          11 :     avctx->extradata = av_mallocz(max_size);
     102          11 :     if (!avctx->extradata)
     103           0 :         return AVERROR(ENOMEM);
     104             : 
     105          11 :     init_put_bits(&pb, avctx->extradata, max_size);
     106          11 :     put_bits(&pb, 5, s->profile+1); //profile
     107          11 :     put_bits(&pb, 4, s->samplerate_index); //sample rate index
     108          11 :     put_bits(&pb, 4, channels);
     109             :     //GASpecificConfig
     110          11 :     put_bits(&pb, 1, 0); //frame length - 1024 samples
     111          11 :     put_bits(&pb, 1, 0); //does not depend on core coder
     112          11 :     put_bits(&pb, 1, 0); //is not extension
     113          11 :     if (s->needs_pce)
     114           0 :         put_pce(&pb, avctx);
     115             : 
     116             :     //Explicitly Mark SBR absent
     117          11 :     put_bits(&pb, 11, 0x2b7); //sync extension
     118          11 :     put_bits(&pb, 5,  AOT_SBR);
     119          11 :     put_bits(&pb, 1,  0);
     120          11 :     flush_put_bits(&pb);
     121          11 :     avctx->extradata_size = put_bits_count(&pb) >> 3;
     122             : 
     123          11 :     return 0;
     124             : }
     125             : 
     126       11643 : void ff_quantize_band_cost_cache_init(struct AACEncContext *s)
     127             : {
     128       11643 :     ++s->quantize_band_cost_cache_generation;
     129       11643 :     if (s->quantize_band_cost_cache_generation == 0) {
     130           0 :         memset(s->quantize_band_cost_cache, 0, sizeof(s->quantize_band_cost_cache));
     131           0 :         s->quantize_band_cost_cache_generation = 1;
     132             :     }
     133       11643 : }
     134             : 
     135             : #define WINDOW_FUNC(type) \
     136             : static void apply_ ##type ##_window(AVFloatDSPContext *fdsp, \
     137             :                                     SingleChannelElement *sce, \
     138             :                                     const float *audio)
     139             : 
     140        6899 : WINDOW_FUNC(only_long)
     141             : {
     142        6899 :     const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
     143        6899 :     const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
     144        6899 :     float *out = sce->ret_buf;
     145             : 
     146        6899 :     fdsp->vector_fmul        (out,        audio,        lwindow, 1024);
     147        6899 :     fdsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024);
     148        6899 : }
     149             : 
     150         119 : WINDOW_FUNC(long_start)
     151             : {
     152         119 :     const float *lwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
     153         119 :     const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
     154         119 :     float *out = sce->ret_buf;
     155             : 
     156         119 :     fdsp->vector_fmul(out, audio, lwindow, 1024);
     157         119 :     memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448);
     158         119 :     fdsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128);
     159         119 :     memset(out + 1024 + 576, 0, sizeof(out[0]) * 448);
     160         119 : }
     161             : 
     162         102 : WINDOW_FUNC(long_stop)
     163             : {
     164         102 :     const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
     165         102 :     const float *swindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
     166         102 :     float *out = sce->ret_buf;
     167             : 
     168         102 :     memset(out, 0, sizeof(out[0]) * 448);
     169         102 :     fdsp->vector_fmul(out + 448, audio + 448, swindow, 128);
     170         102 :     memcpy(out + 576, audio + 576, sizeof(out[0]) * 448);
     171         102 :     fdsp->vector_fmul_reverse(out + 1024, audio + 1024, lwindow, 1024);
     172         102 : }
     173             : 
     174         164 : WINDOW_FUNC(eight_short)
     175             : {
     176         164 :     const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
     177         164 :     const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
     178         164 :     const float *in = audio + 448;
     179         164 :     float *out = sce->ret_buf;
     180             :     int w;
     181             : 
     182        1476 :     for (w = 0; w < 8; w++) {
     183        1312 :         fdsp->vector_fmul        (out, in, w ? pwindow : swindow, 128);
     184        1312 :         out += 128;
     185        1312 :         in  += 128;
     186        1312 :         fdsp->vector_fmul_reverse(out, in, swindow, 128);
     187        1312 :         out += 128;
     188             :     }
     189         164 : }
     190             : 
     191             : static void (*const apply_window[4])(AVFloatDSPContext *fdsp,
     192             :                                      SingleChannelElement *sce,
     193             :                                      const float *audio) = {
     194             :     [ONLY_LONG_SEQUENCE]   = apply_only_long_window,
     195             :     [LONG_START_SEQUENCE]  = apply_long_start_window,
     196             :     [EIGHT_SHORT_SEQUENCE] = apply_eight_short_window,
     197             :     [LONG_STOP_SEQUENCE]   = apply_long_stop_window
     198             : };
     199             : 
     200        7284 : static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce,
     201             :                                   float *audio)
     202             : {
     203             :     int i;
     204        7284 :     const float *output = sce->ret_buf;
     205             : 
     206        7284 :     apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, audio);
     207             : 
     208        7284 :     if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE)
     209        7120 :         s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output);
     210             :     else
     211        1476 :         for (i = 0; i < 1024; i += 128)
     212        1312 :             s->mdct128.mdct_calc(&s->mdct128, &sce->coeffs[i], output + i*2);
     213        7284 :     memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
     214        7284 :     memcpy(sce->pcoeffs, sce->coeffs, sizeof(sce->pcoeffs));
     215        7284 : }
     216             : 
     217             : /**
     218             :  * Encode ics_info element.
     219             :  * @see Table 4.6 (syntax of ics_info)
     220             :  */
     221        6711 : static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
     222             : {
     223             :     int w;
     224             : 
     225        6711 :     put_bits(&s->pb, 1, 0);                // ics_reserved bit
     226        6711 :     put_bits(&s->pb, 2, info->window_sequence[0]);
     227        6711 :     put_bits(&s->pb, 1, info->use_kb_window[0]);
     228        6711 :     if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
     229        6526 :         put_bits(&s->pb, 6, info->max_sfb);
     230        6526 :         put_bits(&s->pb, 1, !!info->predictor_present);
     231             :     } else {
     232         185 :         put_bits(&s->pb, 4, info->max_sfb);
     233        1480 :         for (w = 1; w < 8; w++)
     234        1295 :             put_bits(&s->pb, 1, !info->group_len[w]);
     235             :     }
     236        6711 : }
     237             : 
     238             : /**
     239             :  * Encode MS data.
     240             :  * @see 4.6.8.1 "Joint Coding - M/S Stereo"
     241             :  */
     242        5028 : static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe)
     243             : {
     244             :     int i, w;
     245             : 
     246        5028 :     put_bits(pb, 2, cpe->ms_mode);
     247        5028 :     if (cpe->ms_mode == 1)
     248        1826 :         for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w])
     249       41222 :             for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
     250       40291 :                 put_bits(pb, 1, cpe->ms_mask[w*16 + i]);
     251        5028 : }
     252             : 
     253             : /**
     254             :  * Produce integer coefficients from scalefactors provided by the model.
     255             :  */
     256        6467 : static void adjust_frame_information(ChannelElement *cpe, int chans)
     257             : {
     258             :     int i, w, w2, g, ch;
     259             :     int maxsfb, cmaxsfb;
     260             : 
     261       18206 :     for (ch = 0; ch < chans; ch++) {
     262       11739 :         IndividualChannelStream *ics = &cpe->ch[ch].ics;
     263       11739 :         maxsfb = 0;
     264       11739 :         cpe->ch[ch].pulse.num_pulse = 0;
     265       24159 :         for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
     266       26021 :             for (w2 =  0; w2 < ics->group_len[w]; w2++) {
     267       13601 :                 for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w*16+cmaxsfb-1]; cmaxsfb--)
     268             :                     ;
     269       13601 :                 maxsfb = FFMAX(maxsfb, cmaxsfb);
     270             :             }
     271             :         }
     272       11739 :         ics->max_sfb = maxsfb;
     273             : 
     274             :         //adjust zero bands for window groups
     275       24159 :         for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
     276      563749 :             for (g = 0; g < ics->max_sfb; g++) {
     277      551329 :                 i = 1;
     278      566819 :                 for (w2 = w; w2 < w + ics->group_len[w]; w2++) {
     279      551669 :                     if (!cpe->ch[ch].zeroes[w2*16 + g]) {
     280      536179 :                         i = 0;
     281      536179 :                         break;
     282             :                     }
     283             :                 }
     284      551329 :                 cpe->ch[ch].zeroes[w*16 + g] = i;
     285             :             }
     286             :         }
     287             :     }
     288             : 
     289        6467 :     if (chans > 1 && cpe->common_window) {
     290        5028 :         IndividualChannelStream *ics0 = &cpe->ch[0].ics;
     291        5028 :         IndividualChannelStream *ics1 = &cpe->ch[1].ics;
     292        5028 :         int msc = 0;
     293        5028 :         ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
     294        5028 :         ics1->max_sfb = ics0->max_sfb;
     295       10623 :         for (w = 0; w < ics0->num_windows*16; w += 16)
     296      247484 :             for (i = 0; i < ics0->max_sfb; i++)
     297      241889 :                 if (cpe->ms_mask[w+i])
     298       30094 :                     msc++;
     299        5028 :         if (msc == 0 || ics0->max_sfb == 0)
     300        3741 :             cpe->ms_mode = 0;
     301             :         else
     302        1287 :             cpe->ms_mode = msc < ics0->max_sfb * ics0->num_windows ? 1 : 2;
     303             :     }
     304        6467 : }
     305             : 
     306        2355 : static void apply_intensity_stereo(ChannelElement *cpe)
     307             : {
     308             :     int w, w2, g, i;
     309        2355 :     IndividualChannelStream *ics = &cpe->ch[0].ics;
     310        2355 :     if (!cpe->common_window)
     311        1237 :         return;
     312        2289 :     for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
     313        2443 :         for (w2 =  0; w2 < ics->group_len[w]; w2++) {
     314        1272 :             int start = (w+w2) * 128;
     315       57440 :             for (g = 0; g < ics->num_swb; g++) {
     316       56168 :                 int p  = -1 + 2 * (cpe->ch[1].band_type[w*16+g] - 14);
     317       56168 :                 float scale = cpe->ch[0].is_ener[w*16+g];
     318       56168 :                 if (!cpe->is_mask[w*16 + g]) {
     319       47945 :                     start += ics->swb_sizes[g];
     320       47945 :                     continue;
     321             :                 }
     322        8223 :                 if (cpe->ms_mask[w*16 + g])
     323        2441 :                     p *= -1;
     324      290175 :                 for (i = 0; i < ics->swb_sizes[g]; i++) {
     325      281952 :                     float sum = (cpe->ch[0].coeffs[start+i] + p*cpe->ch[1].coeffs[start+i])*scale;
     326      281952 :                     cpe->ch[0].coeffs[start+i] = sum;
     327      281952 :                     cpe->ch[1].coeffs[start+i] = 0.0f;
     328             :                 }
     329        8223 :                 start += ics->swb_sizes[g];
     330             :             }
     331             :         }
     332             :     }
     333             : }
     334             : 
     335        2069 : static void apply_mid_side_stereo(ChannelElement *cpe)
     336             : {
     337             :     int w, w2, g, i;
     338        2069 :     IndividualChannelStream *ics = &cpe->ch[0].ics;
     339        2069 :     if (!cpe->common_window)
     340        1077 :         return;
     341        2037 :     for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
     342        2191 :         for (w2 =  0; w2 < ics->group_len[w]; w2++) {
     343        1146 :             int start = (w+w2) * 128;
     344       51140 :             for (g = 0; g < ics->num_swb; g++) {
     345             :                 /* ms_mask can be used for other purposes in PNS and I/S,
     346             :                  * so must not apply M/S if any band uses either, even if
     347             :                  * ms_mask is set.
     348             :                  */
     349       49994 :                 if (!cpe->ms_mask[w*16 + g] || cpe->is_mask[w*16 + g]
     350       28861 :                     || cpe->ch[0].band_type[w*16 + g] >= NOISE_BT
     351       28861 :                     || cpe->ch[1].band_type[w*16 + g] >= NOISE_BT) {
     352       21133 :                     start += ics->swb_sizes[g];
     353       21133 :                     continue;
     354             :                 }
     355      649873 :                 for (i = 0; i < ics->swb_sizes[g]; i++) {
     356      621012 :                     float L = (cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) * 0.5f;
     357      621012 :                     float R = L - cpe->ch[1].coeffs[start+i];
     358      621012 :                     cpe->ch[0].coeffs[start+i] = L;
     359      621012 :                     cpe->ch[1].coeffs[start+i] = R;
     360             :                 }
     361       28861 :                 start += ics->swb_sizes[g];
     362             :             }
     363             :         }
     364             :     }
     365             : }
     366             : 
     367             : /**
     368             :  * Encode scalefactor band coding type.
     369             :  */
     370       11739 : static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
     371             : {
     372             :     int w;
     373             : 
     374       11739 :     if (s->coder->set_special_band_scalefactors)
     375       11739 :         s->coder->set_special_band_scalefactors(s, sce);
     376             : 
     377       24159 :     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
     378       12420 :         s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
     379       11739 : }
     380             : 
     381             : /**
     382             :  * Encode scalefactors.
     383             :  */
     384       11739 : static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s,
     385             :                                  SingleChannelElement *sce)
     386             : {
     387       11739 :     int diff, off_sf = sce->sf_idx[0], off_pns = sce->sf_idx[0] - NOISE_OFFSET;
     388       11739 :     int off_is = 0, noise_flag = 1;
     389             :     int i, w;
     390             : 
     391       24159 :     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
     392      563754 :         for (i = 0; i < sce->ics.max_sfb; i++) {
     393      551334 :             if (!sce->zeroes[w*16 + i]) {
     394      521193 :                 if (sce->band_type[w*16 + i] == NOISE_BT) {
     395       17340 :                     diff = sce->sf_idx[w*16 + i] - off_pns;
     396       17340 :                     off_pns = sce->sf_idx[w*16 + i];
     397       17340 :                     if (noise_flag-- > 0) {
     398        2242 :                         put_bits(&s->pb, NOISE_PRE_BITS, diff + NOISE_PRE);
     399        2242 :                         continue;
     400             :                     }
     401     1001862 :                 } else if (sce->band_type[w*16 + i] == INTENSITY_BT  ||
     402      498009 :                            sce->band_type[w*16 + i] == INTENSITY_BT2) {
     403        8132 :                     diff = sce->sf_idx[w*16 + i] - off_is;
     404        8132 :                     off_is = sce->sf_idx[w*16 + i];
     405             :                 } else {
     406      495721 :                     diff = sce->sf_idx[w*16 + i] - off_sf;
     407      495721 :                     off_sf = sce->sf_idx[w*16 + i];
     408             :                 }
     409      518951 :                 diff += SCALE_DIFF_ZERO;
     410      518951 :                 av_assert0(diff >= 0 && diff <= 120);
     411      518951 :                 put_bits(&s->pb, ff_aac_scalefactor_bits[diff], ff_aac_scalefactor_code[diff]);
     412             :             }
     413             :         }
     414             :     }
     415       11739 : }
     416             : 
     417             : /**
     418             :  * Encode pulse data.
     419             :  */
     420       11739 : static void encode_pulses(AACEncContext *s, Pulse *pulse)
     421             : {
     422             :     int i;
     423             : 
     424       11739 :     put_bits(&s->pb, 1, !!pulse->num_pulse);
     425       11739 :     if (!pulse->num_pulse)
     426       11739 :         return;
     427             : 
     428           0 :     put_bits(&s->pb, 2, pulse->num_pulse - 1);
     429           0 :     put_bits(&s->pb, 6, pulse->start);
     430           0 :     for (i = 0; i < pulse->num_pulse; i++) {
     431           0 :         put_bits(&s->pb, 5, pulse->pos[i]);
     432           0 :         put_bits(&s->pb, 4, pulse->amp[i]);
     433             :     }
     434             : }
     435             : 
     436             : /**
     437             :  * Encode spectral coefficients processed by psychoacoustic model.
     438             :  */
     439       11739 : static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
     440             : {
     441             :     int start, i, w, w2;
     442             : 
     443       24159 :     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
     444       12420 :         start = 0;
     445      563754 :         for (i = 0; i < sce->ics.max_sfb; i++) {
     446      551334 :             if (sce->zeroes[w*16 + i]) {
     447       30141 :                 start += sce->ics.swb_sizes[i];
     448       30141 :                 continue;
     449             :             }
     450     1050921 :             for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++) {
     451     3178368 :                 s->coder->quantize_and_encode_band(s, &s->pb,
     452      529728 :                                                    &sce->coeffs[start + w2*128],
     453      529728 :                                                    NULL, sce->ics.swb_sizes[i],
     454      529728 :                                                    sce->sf_idx[w*16 + i],
     455      529728 :                                                    sce->band_type[w*16 + i],
     456             :                                                    s->lambda,
     457      529728 :                                                    sce->ics.window_clipping[w]);
     458             :             }
     459      521193 :             start += sce->ics.swb_sizes[i];
     460             :         }
     461             :     }
     462       11739 : }
     463             : 
     464             : /**
     465             :  * Downscale spectral coefficients for near-clipping windows to avoid artifacts
     466             :  */
     467        7284 : static void avoid_clipping(AACEncContext *s, SingleChannelElement *sce)
     468             : {
     469             :     int start, i, j, w;
     470             : 
     471        7284 :     if (sce->ics.clip_avoidance_factor < 1.0f) {
     472         392 :         for (w = 0; w < sce->ics.num_windows; w++) {
     473         224 :             start = 0;
     474        8645 :             for (i = 0; i < sce->ics.max_sfb; i++) {
     475        8421 :                 float *swb_coeffs = &sce->coeffs[start + w*128];
     476      167173 :                 for (j = 0; j < sce->ics.swb_sizes[i]; j++)
     477      158752 :                     swb_coeffs[j] *= sce->ics.clip_avoidance_factor;
     478        8421 :                 start += sce->ics.swb_sizes[i];
     479             :             }
     480             :         }
     481             :     }
     482        7284 : }
     483             : 
     484             : /**
     485             :  * Encode one channel of audio data.
     486             :  */
     487       11739 : static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s,
     488             :                                      SingleChannelElement *sce,
     489             :                                      int common_window)
     490             : {
     491       11739 :     put_bits(&s->pb, 8, sce->sf_idx[0]);
     492       11739 :     if (!common_window) {
     493        1683 :         put_ics_info(s, &sce->ics);
     494        1683 :         if (s->coder->encode_main_pred)
     495        1683 :             s->coder->encode_main_pred(s, sce);
     496        1683 :         if (s->coder->encode_ltp_info)
     497        1683 :             s->coder->encode_ltp_info(s, sce, 0);
     498             :     }
     499       11739 :     encode_band_info(s, sce);
     500       11739 :     encode_scale_factors(avctx, s, sce);
     501       11739 :     encode_pulses(s, &sce->pulse);
     502       11739 :     put_bits(&s->pb, 1, !!sce->tns.present);
     503       11739 :     if (s->coder->encode_tns_info)
     504       11739 :         s->coder->encode_tns_info(s, sce);
     505       11739 :     put_bits(&s->pb, 1, 0); //ssr
     506       11739 :     encode_spectral_coeffs(s, sce);
     507       11739 :     return 0;
     508             : }
     509             : 
     510             : /**
     511             :  * Write some auxiliary information about the created AAC file.
     512             :  */
     513          17 : static void put_bitstream_info(AACEncContext *s, const char *name)
     514             : {
     515             :     int i, namelen, padbits;
     516             : 
     517          17 :     namelen = strlen(name) + 2;
     518          17 :     put_bits(&s->pb, 3, TYPE_FIL);
     519          17 :     put_bits(&s->pb, 4, FFMIN(namelen, 15));
     520          17 :     if (namelen >= 15)
     521          17 :         put_bits(&s->pb, 8, namelen - 14);
     522          17 :     put_bits(&s->pb, 4, 0); //extension type - filler
     523          17 :     padbits = -put_bits_count(&s->pb) & 7;
     524          17 :     avpriv_align_put_bits(&s->pb);
     525         238 :     for (i = 0; i < namelen - 2; i++)
     526         221 :         put_bits(&s->pb, 8, name[i]);
     527          17 :     put_bits(&s->pb, 12 - padbits, 0);
     528          17 : }
     529             : 
     530             : /*
     531             :  * Copy input samples.
     532             :  * Channels are reordered from libavcodec's default order to AAC order.
     533             :  */
     534        3816 : static void copy_input_samples(AACEncContext *s, const AVFrame *frame)
     535             : {
     536             :     int ch;
     537        3816 :     int end = 2048 + (frame ? frame->nb_samples : 0);
     538        3816 :     const uint8_t *channel_map = s->reorder_map;
     539             : 
     540             :     /* copy and remap input samples */
     541       11125 :     for (ch = 0; ch < s->channels; ch++) {
     542             :         /* copy last 1024 samples of previous frame to the start of the current frame */
     543        7309 :         memcpy(&s->planar_samples[ch][1024], &s->planar_samples[ch][2048], 1024 * sizeof(s->planar_samples[0][0]));
     544             : 
     545             :         /* copy new samples and zero any remaining samples */
     546        7309 :         if (frame) {
     547       14518 :             memcpy(&s->planar_samples[ch][2048],
     548        7259 :                    frame->extended_data[channel_map[ch]],
     549        7259 :                    frame->nb_samples * sizeof(s->planar_samples[0][0]));
     550             :         }
     551        7309 :         memset(&s->planar_samples[ch][end], 0,
     552        7309 :                (3072 - end) * sizeof(s->planar_samples[0][0]));
     553             :     }
     554        3816 : }
     555             : 
     556        3827 : static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     557             :                             const AVFrame *frame, int *got_packet_ptr)
     558             : {
     559        3827 :     AACEncContext *s = avctx->priv_data;
     560        3827 :     float **samples = s->planar_samples, *samples2, *la, *overlap;
     561             :     ChannelElement *cpe;
     562             :     SingleChannelElement *sce;
     563             :     IndividualChannelStream *ics;
     564             :     int i, its, ch, w, chans, tag, start_ch, ret, frame_bits;
     565             :     int target_bits, rate_bits, too_many_bits, too_few_bits;
     566        3827 :     int ms_mode = 0, is_mode = 0, tns_mode = 0, pred_mode = 0;
     567             :     int chan_el_counter[4];
     568             :     FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
     569             : 
     570             :     /* add current frame to queue */
     571        3827 :     if (frame) {
     572        3794 :         if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
     573           0 :             return ret;
     574             :     } else {
     575          33 :         if (!s->afq.remaining_samples || (!s->afq.frame_alloc && !s->afq.frame_count))
     576          11 :             return 0;
     577             :     }
     578             : 
     579        3816 :     copy_input_samples(s, frame);
     580        3816 :     if (s->psypp)
     581        3816 :         ff_psy_preprocess(s->psypp, s->planar_samples, s->channels);
     582             : 
     583        3816 :     if (!avctx->frame_number)
     584          11 :         return 0;
     585             : 
     586        3805 :     start_ch = 0;
     587        7754 :     for (i = 0; i < s->chan_map[0]; i++) {
     588        3949 :         FFPsyWindowInfo* wi = windows + start_ch;
     589        3949 :         tag      = s->chan_map[i+1];
     590        3949 :         chans    = tag == TYPE_CPE ? 2 : 1;
     591        3949 :         cpe      = &s->cpe[i];
     592       11233 :         for (ch = 0; ch < chans; ch++) {
     593             :             int k;
     594             :             float clip_avoidance_factor;
     595        7284 :             sce = &cpe->ch[ch];
     596        7284 :             ics = &sce->ics;
     597        7284 :             s->cur_channel = start_ch + ch;
     598        7284 :             overlap  = &samples[s->cur_channel][0];
     599        7284 :             samples2 = overlap + 1024;
     600        7284 :             la       = samples2 + (448+64);
     601        7284 :             if (!frame)
     602          50 :                 la = NULL;
     603        7284 :             if (tag == TYPE_LFE) {
     604          48 :                 wi[ch].window_type[0] = wi[ch].window_type[1] = ONLY_LONG_SEQUENCE;
     605          48 :                 wi[ch].window_shape   = 0;
     606          48 :                 wi[ch].num_windows    = 1;
     607          48 :                 wi[ch].grouping[0]    = 1;
     608          48 :                 wi[ch].clipping[0]    = 0;
     609             : 
     610             :                 /* Only the lowest 12 coefficients are used in a LFE channel.
     611             :                  * The expression below results in only the bottom 8 coefficients
     612             :                  * being used for 11.025kHz to 16kHz sample rates.
     613             :                  */
     614          48 :                 ics->num_swb = s->samplerate_index >= 8 ? 1 : 3;
     615             :             } else {
     616       14472 :                 wi[ch] = s->psy.model->window(&s->psy, samples2, la, s->cur_channel,
     617        7236 :                                               ics->window_sequence[0]);
     618             :             }
     619        7284 :             ics->window_sequence[1] = ics->window_sequence[0];
     620        7284 :             ics->window_sequence[0] = wi[ch].window_type[0];
     621        7284 :             ics->use_kb_window[1]   = ics->use_kb_window[0];
     622        7284 :             ics->use_kb_window[0]   = wi[ch].window_shape;
     623        7284 :             ics->num_windows        = wi[ch].num_windows;
     624        7284 :             ics->swb_sizes          = s->psy.bands    [ics->num_windows == 8];
     625        7284 :             ics->num_swb            = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
     626        7284 :             ics->max_sfb            = FFMIN(ics->max_sfb, ics->num_swb);
     627       14568 :             ics->swb_offset         = wi[ch].window_type[0] == EIGHT_SHORT_SEQUENCE ?
     628       14404 :                                         ff_swb_offset_128 [s->samplerate_index]:
     629        7120 :                                         ff_swb_offset_1024[s->samplerate_index];
     630       14568 :             ics->tns_max_bands      = wi[ch].window_type[0] == EIGHT_SHORT_SEQUENCE ?
     631       14404 :                                         ff_tns_max_bands_128 [s->samplerate_index]:
     632        7120 :                                         ff_tns_max_bands_1024[s->samplerate_index];
     633             : 
     634       15716 :             for (w = 0; w < ics->num_windows; w++)
     635        8432 :                 ics->group_len[w] = wi[ch].grouping[w];
     636             : 
     637             :             /* Calculate input sample maximums and evaluate clipping risk */
     638        7284 :             clip_avoidance_factor = 0.0f;
     639       15716 :             for (w = 0; w < ics->num_windows; w++) {
     640        8432 :                 const float *wbuf = overlap + w * 128;
     641        8432 :                 const int wlen = 2048 / ics->num_windows;
     642        8432 :                 float max = 0;
     643             :                 int j;
     644             :                 /* mdct input is 2 * output */
     645    14926064 :                 for (j = 0; j < wlen; j++)
     646    14917632 :                     max = FFMAX(max, fabsf(wbuf[j]));
     647        8432 :                 wi[ch].clipping[w] = max;
     648             :             }
     649       15716 :             for (w = 0; w < ics->num_windows; w++) {
     650        8432 :                 if (wi[ch].clipping[w] > CLIP_AVOIDANCE_FACTOR) {
     651         176 :                     ics->window_clipping[w] = 1;
     652         176 :                     clip_avoidance_factor = FFMAX(clip_avoidance_factor, wi[ch].clipping[w]);
     653             :                 } else {
     654        8256 :                     ics->window_clipping[w] = 0;
     655             :                 }
     656             :             }
     657        7284 :             if (clip_avoidance_factor > CLIP_AVOIDANCE_FACTOR) {
     658         168 :                 ics->clip_avoidance_factor = CLIP_AVOIDANCE_FACTOR / clip_avoidance_factor;
     659             :             } else {
     660        7116 :                 ics->clip_avoidance_factor = 1.0f;
     661             :             }
     662             : 
     663        7284 :             apply_window_and_mdct(s, sce, overlap);
     664             : 
     665        7284 :             if (s->options.ltp && s->coder->update_ltp) {
     666           0 :                 s->coder->update_ltp(s, sce);
     667           0 :                 apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, &sce->ltp_state[0]);
     668           0 :                 s->mdct1024.mdct_calc(&s->mdct1024, sce->lcoeffs, sce->ret_buf);
     669             :             }
     670             : 
     671     7466100 :             for (k = 0; k < 1024; k++) {
     672     7458816 :                 if (!(fabs(cpe->ch[ch].coeffs[k]) < 1E16)) { // Ensure headroom for energy calculation
     673           0 :                     av_log(avctx, AV_LOG_ERROR, "Input contains (near) NaN/+-Inf\n");
     674           0 :                     return AVERROR(EINVAL);
     675             :                 }
     676             :             }
     677        7284 :             avoid_clipping(s, sce);
     678             :         }
     679        3949 :         start_ch += chans;
     680             :     }
     681        3805 :     if ((ret = ff_alloc_packet2(avctx, avpkt, 8192 * s->channels, 0)) < 0)
     682           0 :         return ret;
     683        3805 :     frame_bits = its = 0;
     684             :     do {
     685        8697 :         init_put_bits(&s->pb, avpkt->data, avpkt->size);
     686             : 
     687        6251 :         if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & AV_CODEC_FLAG_BITEXACT))
     688          17 :             put_bitstream_info(s, LIBAVCODEC_IDENT);
     689        6251 :         start_ch = 0;
     690        6251 :         target_bits = 0;
     691        6251 :         memset(chan_el_counter, 0, sizeof(chan_el_counter));
     692       12718 :         for (i = 0; i < s->chan_map[0]; i++) {
     693        6467 :             FFPsyWindowInfo* wi = windows + start_ch;
     694             :             const float *coeffs[2];
     695        6467 :             tag      = s->chan_map[i+1];
     696        6467 :             chans    = tag == TYPE_CPE ? 2 : 1;
     697        6467 :             cpe      = &s->cpe[i];
     698        6467 :             cpe->common_window = 0;
     699        6467 :             memset(cpe->is_mask, 0, sizeof(cpe->is_mask));
     700        6467 :             memset(cpe->ms_mask, 0, sizeof(cpe->ms_mask));
     701        6467 :             put_bits(&s->pb, 3, tag);
     702        6467 :             put_bits(&s->pb, 4, chan_el_counter[tag]++);
     703       18206 :             for (ch = 0; ch < chans; ch++) {
     704       11739 :                 sce = &cpe->ch[ch];
     705       11739 :                 coeffs[ch] = sce->coeffs;
     706       11739 :                 sce->ics.predictor_present = 0;
     707       11739 :                 sce->ics.ltp.present = 0;
     708       11739 :                 memset(sce->ics.ltp.used, 0, sizeof(sce->ics.ltp.used));
     709       11739 :                 memset(sce->ics.prediction_used, 0, sizeof(sce->ics.prediction_used));
     710       11739 :                 memset(&sce->tns, 0, sizeof(TemporalNoiseShaping));
     711     1514331 :                 for (w = 0; w < 128; w++)
     712     1502592 :                     if (sce->band_type[w] > RESERVED_BT)
     713       25410 :                         sce->band_type[w] = 0;
     714             :             }
     715        6467 :             s->psy.bitres.alloc = -1;
     716        6467 :             s->psy.bitres.bits = s->last_frame_pb_count / s->channels;
     717        6467 :             s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
     718        6467 :             if (s->psy.bitres.alloc > 0) {
     719             :                 /* Lambda unused here on purpose, we need to take psy's unscaled allocation */
     720       12934 :                 target_bits += s->psy.bitres.alloc
     721        6467 :                     * (s->lambda / (avctx->global_quality ? avctx->global_quality : 120));
     722        6467 :                 s->psy.bitres.alloc /= chans;
     723             :             }
     724        6467 :             s->cur_type = tag;
     725       18206 :             for (ch = 0; ch < chans; ch++) {
     726       11739 :                 s->cur_channel = start_ch + ch;
     727       11739 :                 if (s->options.pns && s->coder->mark_pns)
     728        3515 :                     s->coder->mark_pns(s, avctx, &cpe->ch[ch]);
     729       11739 :                 s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
     730             :             }
     731        6467 :             if (chans > 1
     732        5272 :                 && wi[0].window_type[0] == wi[1].window_type[0]
     733        5036 :                 && wi[0].window_shape   == wi[1].window_shape) {
     734             : 
     735        5036 :                 cpe->common_window = 1;
     736       10631 :                 for (w = 0; w < wi[0].num_windows; w++) {
     737        5603 :                     if (wi[0].grouping[w] != wi[1].grouping[w]) {
     738           8 :                         cpe->common_window = 0;
     739           8 :                         break;
     740             :                     }
     741             :                 }
     742             :             }
     743       18206 :             for (ch = 0; ch < chans; ch++) { /* TNS and PNS */
     744       11739 :                 sce = &cpe->ch[ch];
     745       11739 :                 s->cur_channel = start_ch + ch;
     746       11739 :                 if (s->options.tns && s->coder->search_for_tns)
     747        3515 :                     s->coder->search_for_tns(s, sce);
     748       11739 :                 if (s->options.tns && s->coder->apply_tns_filt)
     749        3515 :                     s->coder->apply_tns_filt(s, sce);
     750       11739 :                 if (sce->tns.present)
     751          46 :                     tns_mode = 1;
     752       11739 :                 if (s->options.pns && s->coder->search_for_pns)
     753        3515 :                     s->coder->search_for_pns(s, avctx, sce);
     754             :             }
     755        6467 :             s->cur_channel = start_ch;
     756        6467 :             if (s->options.intensity_stereo) { /* Intensity Stereo */
     757        2355 :                 if (s->coder->search_for_is)
     758        2355 :                     s->coder->search_for_is(s, avctx, cpe);
     759        2355 :                 if (cpe->is_mode) is_mode = 1;
     760        2355 :                 apply_intensity_stereo(cpe);
     761             :             }
     762        6467 :             if (s->options.pred) { /* Prediction */
     763        1248 :                 for (ch = 0; ch < chans; ch++) {
     764         832 :                     sce = &cpe->ch[ch];
     765         832 :                     s->cur_channel = start_ch + ch;
     766         832 :                     if (s->options.pred && s->coder->search_for_pred)
     767         832 :                         s->coder->search_for_pred(s, sce);
     768         832 :                     if (cpe->ch[ch].ics.predictor_present) pred_mode = 1;
     769             :                 }
     770         416 :                 if (s->coder->adjust_common_pred)
     771         416 :                     s->coder->adjust_common_pred(s, cpe);
     772        1248 :                 for (ch = 0; ch < chans; ch++) {
     773         832 :                     sce = &cpe->ch[ch];
     774         832 :                     s->cur_channel = start_ch + ch;
     775         832 :                     if (s->options.pred && s->coder->apply_main_pred)
     776         832 :                         s->coder->apply_main_pred(s, sce);
     777             :                 }
     778         416 :                 s->cur_channel = start_ch;
     779             :             }
     780        6467 :             if (s->options.mid_side) { /* Mid/Side stereo */
     781        2069 :                 if (s->options.mid_side == -1 && s->coder->search_for_ms)
     782        1651 :                     s->coder->search_for_ms(s, cpe);
     783         418 :                 else if (cpe->common_window)
     784         392 :                     memset(cpe->ms_mask, 1, sizeof(cpe->ms_mask));
     785        2069 :                 apply_mid_side_stereo(cpe);
     786             :             }
     787        6467 :             adjust_frame_information(cpe, chans);
     788        6467 :             if (s->options.ltp) { /* LTP */
     789           0 :                 for (ch = 0; ch < chans; ch++) {
     790           0 :                     sce = &cpe->ch[ch];
     791           0 :                     s->cur_channel = start_ch + ch;
     792           0 :                     if (s->coder->search_for_ltp)
     793           0 :                         s->coder->search_for_ltp(s, sce, cpe->common_window);
     794           0 :                     if (sce->ics.ltp.present) pred_mode = 1;
     795             :                 }
     796           0 :                 s->cur_channel = start_ch;
     797           0 :                 if (s->coder->adjust_common_ltp)
     798           0 :                     s->coder->adjust_common_ltp(s, cpe);
     799             :             }
     800        6467 :             if (chans == 2) {
     801        5272 :                 put_bits(&s->pb, 1, cpe->common_window);
     802        5272 :                 if (cpe->common_window) {
     803        5028 :                     put_ics_info(s, &cpe->ch[0].ics);
     804        5028 :                     if (s->coder->encode_main_pred)
     805        5028 :                         s->coder->encode_main_pred(s, &cpe->ch[0]);
     806        5028 :                     if (s->coder->encode_ltp_info)
     807        5028 :                         s->coder->encode_ltp_info(s, &cpe->ch[0], 1);
     808        5028 :                     encode_ms_info(&s->pb, cpe);
     809        5028 :                     if (cpe->ms_mode) ms_mode = 1;
     810             :                 }
     811             :             }
     812       18206 :             for (ch = 0; ch < chans; ch++) {
     813       11739 :                 s->cur_channel = start_ch + ch;
     814       11739 :                 encode_individual_channel(avctx, s, &cpe->ch[ch], cpe->common_window);
     815             :             }
     816        6467 :             start_ch += chans;
     817             :         }
     818             : 
     819        6251 :         if (avctx->flags & AV_CODEC_FLAG_QSCALE) {
     820             :             /* When using a constant Q-scale, don't mess with lambda */
     821           0 :             break;
     822             :         }
     823             : 
     824             :         /* rate control stuff
     825             :          * allow between the nominal bitrate, and what psy's bit reservoir says to target
     826             :          * but drift towards the nominal bitrate always
     827             :          */
     828        6251 :         frame_bits = put_bits_count(&s->pb);
     829        6251 :         rate_bits = avctx->bit_rate * 1024 / avctx->sample_rate;
     830        6251 :         rate_bits = FFMIN(rate_bits, 6144 * s->channels - 3);
     831        6251 :         too_many_bits = FFMAX(target_bits, rate_bits);
     832        6251 :         too_many_bits = FFMIN(too_many_bits, 6144 * s->channels - 3);
     833        6251 :         too_few_bits = FFMIN(FFMAX(rate_bits - rate_bits/4, target_bits), too_many_bits);
     834             : 
     835             :         /* When using ABR, be strict (but only for increasing) */
     836        6251 :         too_few_bits = too_few_bits - too_few_bits/8;
     837        6251 :         too_many_bits = too_many_bits + too_many_bits/2;
     838             : 
     839        6251 :         if (   its == 0 /* for steady-state Q-scale tracking */
     840        2446 :             || (its < 5 && (frame_bits < too_few_bits || frame_bits > too_many_bits))
     841         495 :             || frame_bits >= 6144 * s->channels - 3  )
     842             :         {
     843        5756 :             float ratio = ((float)rate_bits) / frame_bits;
     844             : 
     845        5756 :             if (frame_bits >= too_few_bits && frame_bits <= too_many_bits) {
     846             :                 /*
     847             :                  * This path is for steady-state Q-scale tracking
     848             :                  * When frame bits fall within the stable range, we still need to adjust
     849             :                  * lambda to maintain it like so in a stable fashion (large jumps in lambda
     850             :                  * create artifacts and should be avoided), but slowly
     851             :                  */
     852        2861 :                 ratio = sqrtf(sqrtf(ratio));
     853        2861 :                 ratio = av_clipf(ratio, 0.9f, 1.1f);
     854             :             } else {
     855             :                 /* Not so fast though */
     856        2895 :                 ratio = sqrtf(ratio);
     857             :             }
     858        5756 :             s->lambda = FFMIN(s->lambda * ratio, 65536.f);
     859             : 
     860             :             /* Keep iterating if we must reduce and lambda is in the sky */
     861        5756 :             if (ratio > 0.9f && ratio < 1.1f) {
     862             :                 break;
     863             :             } else {
     864        2446 :                 if (is_mode || ms_mode || tns_mode || pred_mode) {
     865        1025 :                     for (i = 0; i < s->chan_map[0]; i++) {
     866             :                         // Must restore coeffs
     867         520 :                         chans = tag == TYPE_CPE ? 2 : 1;
     868         520 :                         cpe = &s->cpe[i];
     869        1540 :                         for (ch = 0; ch < chans; ch++)
     870        1020 :                             memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs));
     871             :                     }
     872             :                 }
     873        2446 :                 its++;
     874             :             }
     875             :         } else {
     876             :             break;
     877             :         }
     878             :     } while (1);
     879             : 
     880        3805 :     if (s->options.ltp && s->coder->ltp_insert_new_frame)
     881           0 :         s->coder->ltp_insert_new_frame(s);
     882             : 
     883        3805 :     put_bits(&s->pb, 3, TYPE_END);
     884        3805 :     flush_put_bits(&s->pb);
     885             : 
     886        3805 :     s->last_frame_pb_count = put_bits_count(&s->pb);
     887             : 
     888        3805 :     s->lambda_sum += s->lambda;
     889        3805 :     s->lambda_count++;
     890             : 
     891        3805 :     ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
     892             :                        &avpkt->duration);
     893             : 
     894        3805 :     avpkt->size = put_bits_count(&s->pb) >> 3;
     895        3805 :     *got_packet_ptr = 1;
     896        3805 :     return 0;
     897             : }
     898             : 
     899          11 : static av_cold int aac_encode_end(AVCodecContext *avctx)
     900             : {
     901          11 :     AACEncContext *s = avctx->priv_data;
     902             : 
     903          11 :     av_log(avctx, AV_LOG_INFO, "Qavg: %.3f\n", s->lambda_sum / s->lambda_count);
     904             : 
     905          11 :     ff_mdct_end(&s->mdct1024);
     906          11 :     ff_mdct_end(&s->mdct128);
     907          11 :     ff_psy_end(&s->psy);
     908          11 :     ff_lpc_end(&s->lpc);
     909          11 :     if (s->psypp)
     910          11 :         ff_psy_preprocess_end(s->psypp);
     911          11 :     av_freep(&s->buffer.samples);
     912          11 :     av_freep(&s->cpe);
     913          11 :     av_freep(&s->fdsp);
     914          11 :     ff_af_queue_close(&s->afq);
     915          11 :     return 0;
     916             : }
     917             : 
     918          11 : static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
     919             : {
     920          11 :     int ret = 0;
     921             : 
     922          11 :     s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
     923          11 :     if (!s->fdsp)
     924           0 :         return AVERROR(ENOMEM);
     925             : 
     926             :     // window init
     927          11 :     ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
     928          11 :     ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
     929          11 :     ff_init_ff_sine_windows(10);
     930          11 :     ff_init_ff_sine_windows(7);
     931             : 
     932          11 :     if ((ret = ff_mdct_init(&s->mdct1024, 11, 0, 32768.0)) < 0)
     933           0 :         return ret;
     934          11 :     if ((ret = ff_mdct_init(&s->mdct128,   8, 0, 32768.0)) < 0)
     935           0 :         return ret;
     936             : 
     937          11 :     return 0;
     938             : }
     939             : 
     940          11 : static av_cold int alloc_buffers(AVCodecContext *avctx, AACEncContext *s)
     941             : {
     942             :     int ch;
     943          11 :     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->buffer.samples, s->channels, 3 * 1024 * sizeof(s->buffer.samples[0]), alloc_fail);
     944          11 :     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->cpe, s->chan_map[0], sizeof(ChannelElement), alloc_fail);
     945             : 
     946          36 :     for(ch = 0; ch < s->channels; ch++)
     947          25 :         s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
     948             : 
     949          11 :     return 0;
     950           0 : alloc_fail:
     951           0 :     return AVERROR(ENOMEM);
     952             : }
     953             : 
     954          11 : static av_cold void aac_encode_init_tables(void)
     955             : {
     956          11 :     ff_aac_tableinit();
     957          11 : }
     958             : 
     959          11 : static av_cold int aac_encode_init(AVCodecContext *avctx)
     960             : {
     961          11 :     AACEncContext *s = avctx->priv_data;
     962          11 :     int i, ret = 0;
     963             :     const uint8_t *sizes[2];
     964             :     uint8_t grouping[AAC_MAX_CHANNELS];
     965             :     int lengths[2];
     966             : 
     967             :     /* Constants */
     968          11 :     s->last_frame_pb_count = 0;
     969          11 :     avctx->frame_size = 1024;
     970          11 :     avctx->initial_padding = 1024;
     971          11 :     s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
     972             : 
     973             :     /* Channel map and unspecified bitrate guessing */
     974          11 :     s->channels = avctx->channels;
     975             : 
     976          11 :     s->needs_pce = 1;
     977          25 :     for (i = 0; i < FF_ARRAY_ELEMS(aac_normal_chan_layouts); i++) {
     978          25 :         if (avctx->channel_layout == aac_normal_chan_layouts[i]) {
     979          11 :             s->needs_pce = s->options.pce;
     980          11 :             break;
     981             :         }
     982             :     }
     983             : 
     984          11 :     if (s->needs_pce) {
     985           0 :         for (i = 0; i < FF_ARRAY_ELEMS(aac_pce_configs); i++)
     986           0 :             if (avctx->channel_layout == aac_pce_configs[i].layout)
     987           0 :                 break;
     988           0 :         ERROR_IF(i == FF_ARRAY_ELEMS(aac_pce_configs), "Unsupported channel layout\n");
     989           0 :         av_log(avctx, AV_LOG_INFO, "Using a PCE to encode channel layout\n");
     990           0 :         s->pce = aac_pce_configs[i];
     991           0 :         s->reorder_map = s->pce.reorder_map;
     992           0 :         s->chan_map = s->pce.config_map;
     993             :     } else {
     994          11 :         s->reorder_map = aac_chan_maps[s->channels - 1];
     995          11 :         s->chan_map = aac_chan_configs[s->channels - 1];
     996             :     }
     997             : 
     998          11 :     if (!avctx->bit_rate) {
     999           9 :         for (i = 1; i <= s->chan_map[0]; i++) {
    1000           9 :             avctx->bit_rate += s->chan_map[i] == TYPE_CPE ? 128000 : /* Pair */
    1001           3 :                                s->chan_map[i] == TYPE_LFE ? 16000  : /* LFE  */
    1002             :                                                             69000  ; /* SCE  */
    1003             :         }
    1004             :     }
    1005             : 
    1006             :     /* Samplerate */
    1007          54 :     for (i = 0; i < 16; i++)
    1008          54 :         if (avctx->sample_rate == avpriv_mpeg4audio_sample_rates[i])
    1009          11 :             break;
    1010          11 :     s->samplerate_index = i;
    1011          11 :     ERROR_IF(s->samplerate_index == 16 ||
    1012             :              s->samplerate_index >= ff_aac_swb_size_1024_len ||
    1013             :              s->samplerate_index >= ff_aac_swb_size_128_len,
    1014             :              "Unsupported sample rate %d\n", avctx->sample_rate);
    1015             : 
    1016             :     /* Bitrate limiting */
    1017          11 :     WARN_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
    1018             :              "Too many bits %f > %d per frame requested, clamping to max\n",
    1019             :              1024.0 * avctx->bit_rate / avctx->sample_rate,
    1020             :              6144 * s->channels);
    1021          11 :     avctx->bit_rate = (int64_t)FFMIN(6144 * s->channels / 1024.0 * avctx->sample_rate,
    1022             :                                      avctx->bit_rate);
    1023             : 
    1024             :     /* Profile and option setting */
    1025          11 :     avctx->profile = avctx->profile == FF_PROFILE_UNKNOWN ? FF_PROFILE_AAC_LOW :
    1026             :                      avctx->profile;
    1027          21 :     for (i = 0; i < FF_ARRAY_ELEMS(aacenc_profiles); i++)
    1028          21 :         if (avctx->profile == aacenc_profiles[i])
    1029          11 :             break;
    1030          11 :     if (avctx->profile == FF_PROFILE_MPEG2_AAC_LOW) {
    1031           0 :         avctx->profile = FF_PROFILE_AAC_LOW;
    1032           0 :         ERROR_IF(s->options.pred,
    1033             :                  "Main prediction unavailable in the \"mpeg2_aac_low\" profile\n");
    1034           0 :         ERROR_IF(s->options.ltp,
    1035             :                  "LTP prediction unavailable in the \"mpeg2_aac_low\" profile\n");
    1036           0 :         WARN_IF(s->options.pns,
    1037             :                 "PNS unavailable in the \"mpeg2_aac_low\" profile, turning off\n");
    1038           0 :         s->options.pns = 0;
    1039          11 :     } else if (avctx->profile == FF_PROFILE_AAC_LTP) {
    1040           0 :         s->options.ltp = 1;
    1041           0 :         ERROR_IF(s->options.pred,
    1042             :                  "Main prediction unavailable in the \"aac_ltp\" profile\n");
    1043          11 :     } else if (avctx->profile == FF_PROFILE_AAC_MAIN) {
    1044           1 :         s->options.pred = 1;
    1045           1 :         ERROR_IF(s->options.ltp,
    1046             :                  "LTP prediction unavailable in the \"aac_main\" profile\n");
    1047          10 :     } else if (s->options.ltp) {
    1048           0 :         avctx->profile = FF_PROFILE_AAC_LTP;
    1049           0 :         WARN_IF(1,
    1050             :                 "Chainging profile to \"aac_ltp\"\n");
    1051           0 :         ERROR_IF(s->options.pred,
    1052             :                  "Main prediction unavailable in the \"aac_ltp\" profile\n");
    1053          10 :     } else if (s->options.pred) {
    1054           0 :         avctx->profile = FF_PROFILE_AAC_MAIN;
    1055           0 :         WARN_IF(1,
    1056             :                 "Chainging profile to \"aac_main\"\n");
    1057           0 :         ERROR_IF(s->options.ltp,
    1058             :                  "LTP prediction unavailable in the \"aac_main\" profile\n");
    1059             :     }
    1060          11 :     s->profile = avctx->profile;
    1061             : 
    1062             :     /* Coder limitations */
    1063          11 :     s->coder = &ff_aac_coders[s->options.coder];
    1064          11 :     if (s->options.coder == AAC_CODER_ANMR) {
    1065           0 :         ERROR_IF(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL,
    1066             :                  "The ANMR coder is considered experimental, add -strict -2 to enable!\n");
    1067           0 :         s->options.intensity_stereo = 0;
    1068           0 :         s->options.pns = 0;
    1069             :     }
    1070          11 :     ERROR_IF(s->options.ltp && avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL,
    1071             :              "The LPT profile requires experimental compliance, add -strict -2 to enable!\n");
    1072             : 
    1073             :     /* M/S introduces horrible artifacts with multichannel files, this is temporary */
    1074          11 :     if (s->channels > 3)
    1075           1 :         s->options.mid_side = 0;
    1076             : 
    1077          11 :     if ((ret = dsp_init(avctx, s)) < 0)
    1078           0 :         goto fail;
    1079             : 
    1080          11 :     if ((ret = alloc_buffers(avctx, s)) < 0)
    1081           0 :         goto fail;
    1082             : 
    1083          11 :     if ((ret = put_audio_specific_config(avctx)))
    1084           0 :         goto fail;
    1085             : 
    1086          11 :     sizes[0]   = ff_aac_swb_size_1024[s->samplerate_index];
    1087          11 :     sizes[1]   = ff_aac_swb_size_128[s->samplerate_index];
    1088          11 :     lengths[0] = ff_aac_num_swb_1024[s->samplerate_index];
    1089          11 :     lengths[1] = ff_aac_num_swb_128[s->samplerate_index];
    1090          25 :     for (i = 0; i < s->chan_map[0]; i++)
    1091          14 :         grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
    1092          11 :     if ((ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths,
    1093          11 :                            s->chan_map[0], grouping)) < 0)
    1094           0 :         goto fail;
    1095          11 :     s->psypp = ff_psy_preprocess_init(avctx);
    1096          11 :     ff_lpc_init(&s->lpc, 2*avctx->frame_size, TNS_MAX_ORDER, FF_LPC_TYPE_LEVINSON);
    1097          11 :     s->random_state = 0x1f2e3d4c;
    1098             : 
    1099          11 :     s->abs_pow34   = abs_pow34_v;
    1100          11 :     s->quant_bands = quantize_bands;
    1101             : 
    1102             :     if (ARCH_X86)
    1103          11 :         ff_aac_dsp_init_x86(s);
    1104             : 
    1105             :     if (HAVE_MIPSDSP)
    1106             :         ff_aac_coder_init_mips(s);
    1107             : 
    1108          11 :     if ((ret = ff_thread_once(&aac_table_init, &aac_encode_init_tables)) != 0)
    1109           0 :         return AVERROR_UNKNOWN;
    1110             : 
    1111          11 :     ff_af_queue_init(avctx, &s->afq);
    1112             : 
    1113          11 :     return 0;
    1114           0 : fail:
    1115           0 :     aac_encode_end(avctx);
    1116           0 :     return ret;
    1117             : }
    1118             : 
    1119             : #define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
    1120             : static const AVOption aacenc_options[] = {
    1121             :     {"aac_coder", "Coding algorithm", offsetof(AACEncContext, options.coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_FAST}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "coder"},
    1122             :         {"anmr",     "ANMR method",               0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR},    INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
    1123             :         {"twoloop",  "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
    1124             :         {"fast",     "Default fast search",       0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST},    INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
    1125             :     {"aac_ms", "Force M/S stereo coding", offsetof(AACEncContext, options.mid_side), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, AACENC_FLAGS},
    1126             :     {"aac_is", "Intensity stereo coding", offsetof(AACEncContext, options.intensity_stereo), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
    1127             :     {"aac_pns", "Perceptual noise substitution", offsetof(AACEncContext, options.pns), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
    1128             :     {"aac_tns", "Temporal noise shaping", offsetof(AACEncContext, options.tns), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
    1129             :     {"aac_ltp", "Long term prediction", offsetof(AACEncContext, options.ltp), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
    1130             :     {"aac_pred", "AAC-Main prediction", offsetof(AACEncContext, options.pred), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
    1131             :     {"aac_pce", "Forces the use of PCEs", offsetof(AACEncContext, options.pce), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
    1132             :     {NULL}
    1133             : };
    1134             : 
    1135             : static const AVClass aacenc_class = {
    1136             :     .class_name = "AAC encoder",
    1137             :     .item_name  = av_default_item_name,
    1138             :     .option     = aacenc_options,
    1139             :     .version    = LIBAVUTIL_VERSION_INT,
    1140             : };
    1141             : 
    1142             : static const AVCodecDefault aac_encode_defaults[] = {
    1143             :     { "b", "0" },
    1144             :     { NULL }
    1145             : };
    1146             : 
    1147             : AVCodec ff_aac_encoder = {
    1148             :     .name           = "aac",
    1149             :     .long_name      = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
    1150             :     .type           = AVMEDIA_TYPE_AUDIO,
    1151             :     .id             = AV_CODEC_ID_AAC,
    1152             :     .priv_data_size = sizeof(AACEncContext),
    1153             :     .init           = aac_encode_init,
    1154             :     .encode2        = aac_encode_frame,
    1155             :     .close          = aac_encode_end,
    1156             :     .defaults       = aac_encode_defaults,
    1157             :     .supported_samplerates = mpeg4audio_sample_rates,
    1158             :     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE,
    1159             :     .capabilities   = AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_DELAY,
    1160             :     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
    1161             :                                                      AV_SAMPLE_FMT_NONE },
    1162             :     .priv_class     = &aacenc_class,
    1163             : };

Generated by: LCOV version 1.13