LCOV - code coverage report
Current view: top level - libavcodec - aacenc.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 587 663 88.5 %
Date: 2017-12-18 06:23:41 Functions: 26 27 96.3 %

          Line data    Source code
       1             : /*
       2             :  * AAC encoder
       3             :  * Copyright (C) 2008 Konstantin Shishkov
       4             :  *
       5             :  * This file is part of FFmpeg.
       6             :  *
       7             :  * FFmpeg is free software; you can redistribute it and/or
       8             :  * modify it under the terms of the GNU Lesser General Public
       9             :  * License as published by the Free Software Foundation; either
      10             :  * version 2.1 of the License, or (at your option) any later version.
      11             :  *
      12             :  * FFmpeg is distributed in the hope that it will be useful,
      13             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      14             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      15             :  * Lesser General Public License for more details.
      16             :  *
      17             :  * You should have received a copy of the GNU Lesser General Public
      18             :  * License along with FFmpeg; if not, write to the Free Software
      19             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      20             :  */
      21             : 
      22             : /**
      23             :  * @file
      24             :  * AAC encoder
      25             :  */
      26             : 
      27             : /***********************************
      28             :  *              TODOs:
      29             :  * add sane pulse detection
      30             :  ***********************************/
      31             : 
      32             : #include "libavutil/libm.h"
      33             : #include "libavutil/thread.h"
      34             : #include "libavutil/float_dsp.h"
      35             : #include "libavutil/opt.h"
      36             : #include "avcodec.h"
      37             : #include "put_bits.h"
      38             : #include "internal.h"
      39             : #include "mpeg4audio.h"
      40             : #include "kbdwin.h"
      41             : #include "sinewin.h"
      42             : 
      43             : #include "aac.h"
      44             : #include "aactab.h"
      45             : #include "aacenc.h"
      46             : #include "aacenctab.h"
      47             : #include "aacenc_utils.h"
      48             : 
      49             : #include "psymodel.h"
      50             : 
      51             : static AVOnce aac_table_init = AV_ONCE_INIT;
      52             : 
      53           0 : static void put_pce(PutBitContext *pb, AVCodecContext *avctx)
      54             : {
      55             :     int i, j;
      56           0 :     AACEncContext *s = avctx->priv_data;
      57           0 :     AACPCEInfo *pce = &s->pce;
      58           0 :     const int bitexact = avctx->flags & AV_CODEC_FLAG_BITEXACT;
      59           0 :     const char *aux_data = bitexact ? "Lavc" : LIBAVCODEC_IDENT;
      60             : 
      61           0 :     put_bits(pb, 4, 0);
      62             : 
      63           0 :     put_bits(pb, 2, avctx->profile);
      64           0 :     put_bits(pb, 4, s->samplerate_index);
      65             : 
      66           0 :     put_bits(pb, 4, pce->num_ele[0]); /* Front */
      67           0 :     put_bits(pb, 4, pce->num_ele[1]); /* Side */
      68           0 :     put_bits(pb, 4, pce->num_ele[2]); /* Back */
      69           0 :     put_bits(pb, 2, pce->num_ele[3]); /* LFE */
      70           0 :     put_bits(pb, 3, 0); /* Assoc data */
      71           0 :     put_bits(pb, 4, 0); /* CCs */
      72             : 
      73           0 :     put_bits(pb, 1, 0); /* Stereo mixdown */
      74           0 :     put_bits(pb, 1, 0); /* Mono mixdown */
      75           0 :     put_bits(pb, 1, 0); /* Something else */
      76             : 
      77           0 :     for (i = 0; i < 4; i++) {
      78           0 :         for (j = 0; j < pce->num_ele[i]; j++) {
      79           0 :             if (i < 3)
      80           0 :                 put_bits(pb, 1, pce->pairing[i][j]);
      81           0 :             put_bits(pb, 4, pce->index[i][j]);
      82             :         }
      83             :     }
      84             : 
      85           0 :     avpriv_align_put_bits(pb);
      86           0 :     put_bits(pb, 8, strlen(aux_data));
      87           0 :     avpriv_put_string(pb, aux_data, 0);
      88           0 : }
      89             : 
      90             : /**
      91             :  * Make AAC audio config object.
      92             :  * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
      93             :  */
      94          12 : static int put_audio_specific_config(AVCodecContext *avctx)
      95             : {
      96             :     PutBitContext pb;
      97          12 :     AACEncContext *s = avctx->priv_data;
      98          12 :     int channels = (!s->needs_pce)*(s->channels - (s->channels == 8 ? 1 : 0));
      99          12 :     const int max_size = 32;
     100             : 
     101          12 :     avctx->extradata = av_mallocz(max_size);
     102          12 :     if (!avctx->extradata)
     103           0 :         return AVERROR(ENOMEM);
     104             : 
     105          12 :     init_put_bits(&pb, avctx->extradata, max_size);
     106          12 :     put_bits(&pb, 5, s->profile+1); //profile
     107          12 :     put_bits(&pb, 4, s->samplerate_index); //sample rate index
     108          12 :     put_bits(&pb, 4, channels);
     109             :     //GASpecificConfig
     110          12 :     put_bits(&pb, 1, 0); //frame length - 1024 samples
     111          12 :     put_bits(&pb, 1, 0); //does not depend on core coder
     112          12 :     put_bits(&pb, 1, 0); //is not extension
     113          12 :     if (s->needs_pce)
     114           0 :         put_pce(&pb, avctx);
     115             : 
     116             :     //Explicitly Mark SBR absent
     117          12 :     put_bits(&pb, 11, 0x2b7); //sync extension
     118          12 :     put_bits(&pb, 5,  AOT_SBR);
     119          12 :     put_bits(&pb, 1,  0);
     120          12 :     flush_put_bits(&pb);
     121          12 :     avctx->extradata_size = put_bits_count(&pb) >> 3;
     122             : 
     123          12 :     return 0;
     124             : }
     125             : 
     126       12022 : void ff_quantize_band_cost_cache_init(struct AACEncContext *s)
     127             : {
     128       12022 :     ++s->quantize_band_cost_cache_generation;
     129       12022 :     if (s->quantize_band_cost_cache_generation == 0) {
     130           0 :         memset(s->quantize_band_cost_cache, 0, sizeof(s->quantize_band_cost_cache));
     131           0 :         s->quantize_band_cost_cache_generation = 1;
     132             :     }
     133       12022 : }
     134             : 
     135             : #define WINDOW_FUNC(type) \
     136             : static void apply_ ##type ##_window(AVFloatDSPContext *fdsp, \
     137             :                                     SingleChannelElement *sce, \
     138             :                                     const float *audio)
     139             : 
     140        8467 : WINDOW_FUNC(only_long)
     141             : {
     142        8467 :     const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
     143        8467 :     const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
     144        8467 :     float *out = sce->ret_buf;
     145             : 
     146        8467 :     fdsp->vector_fmul        (out,        audio,        lwindow, 1024);
     147        8467 :     fdsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024);
     148        8467 : }
     149             : 
     150         143 : WINDOW_FUNC(long_start)
     151             : {
     152         143 :     const float *lwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
     153         143 :     const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
     154         143 :     float *out = sce->ret_buf;
     155             : 
     156         143 :     fdsp->vector_fmul(out, audio, lwindow, 1024);
     157         143 :     memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448);
     158         143 :     fdsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128);
     159         143 :     memset(out + 1024 + 576, 0, sizeof(out[0]) * 448);
     160         143 : }
     161             : 
     162         122 : WINDOW_FUNC(long_stop)
     163             : {
     164         122 :     const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
     165         122 :     const float *swindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
     166         122 :     float *out = sce->ret_buf;
     167             : 
     168         122 :     memset(out, 0, sizeof(out[0]) * 448);
     169         122 :     fdsp->vector_fmul(out + 448, audio + 448, swindow, 128);
     170         122 :     memcpy(out + 576, audio + 576, sizeof(out[0]) * 448);
     171         122 :     fdsp->vector_fmul_reverse(out + 1024, audio + 1024, lwindow, 1024);
     172         122 : }
     173             : 
     174         196 : WINDOW_FUNC(eight_short)
     175             : {
     176         196 :     const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
     177         196 :     const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
     178         196 :     const float *in = audio + 448;
     179         196 :     float *out = sce->ret_buf;
     180             :     int w;
     181             : 
     182        1764 :     for (w = 0; w < 8; w++) {
     183        1568 :         fdsp->vector_fmul        (out, in, w ? pwindow : swindow, 128);
     184        1568 :         out += 128;
     185        1568 :         in  += 128;
     186        1568 :         fdsp->vector_fmul_reverse(out, in, swindow, 128);
     187        1568 :         out += 128;
     188             :     }
     189         196 : }
     190             : 
     191             : static void (*const apply_window[4])(AVFloatDSPContext *fdsp,
     192             :                                      SingleChannelElement *sce,
     193             :                                      const float *audio) = {
     194             :     [ONLY_LONG_SEQUENCE]   = apply_only_long_window,
     195             :     [LONG_START_SEQUENCE]  = apply_long_start_window,
     196             :     [EIGHT_SHORT_SEQUENCE] = apply_eight_short_window,
     197             :     [LONG_STOP_SEQUENCE]   = apply_long_stop_window
     198             : };
     199             : 
     200        8106 : static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce,
     201             :                                   float *audio)
     202             : {
     203             :     int i;
     204        8106 :     const float *output = sce->ret_buf;
     205             : 
     206        8106 :     apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, audio);
     207             : 
     208        8106 :     if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE)
     209        7926 :         s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output);
     210             :     else
     211        1620 :         for (i = 0; i < 1024; i += 128)
     212        1440 :             s->mdct128.mdct_calc(&s->mdct128, &sce->coeffs[i], output + i*2);
     213        8106 :     memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
     214        8106 :     memcpy(sce->pcoeffs, sce->coeffs, sizeof(sce->pcoeffs));
     215        8106 : }
     216             : 
     217             : /**
     218             :  * Encode ics_info element.
     219             :  * @see Table 4.6 (syntax of ics_info)
     220             :  */
     221        6716 : static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
     222             : {
     223             :     int w;
     224             : 
     225        6716 :     put_bits(&s->pb, 1, 0);                // ics_reserved bit
     226        6716 :     put_bits(&s->pb, 2, info->window_sequence[0]);
     227        6716 :     put_bits(&s->pb, 1, info->use_kb_window[0]);
     228        6716 :     if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
     229        6500 :         put_bits(&s->pb, 6, info->max_sfb);
     230        6500 :         put_bits(&s->pb, 1, !!info->predictor_present);
     231             :     } else {
     232         216 :         put_bits(&s->pb, 4, info->max_sfb);
     233        1728 :         for (w = 1; w < 8; w++)
     234        1512 :             put_bits(&s->pb, 1, !info->group_len[w]);
     235             :     }
     236        6716 : }
     237             : 
     238             : /**
     239             :  * Encode MS data.
     240             :  * @see 4.6.8.1 "Joint Coding - M/S Stereo"
     241             :  */
     242        5409 : static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe)
     243             : {
     244             :     int i, w;
     245             : 
     246        5409 :     put_bits(pb, 2, cpe->ms_mode);
     247        5409 :     if (cpe->ms_mode == 1)
     248         692 :         for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w])
     249       16371 :             for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
     250       16017 :                 put_bits(pb, 1, cpe->ms_mask[w*16 + i]);
     251        5409 : }
     252             : 
     253             : /**
     254             :  * Produce integer coefficients from scalefactors provided by the model.
     255             :  */
     256        6372 : static void adjust_frame_information(ChannelElement *cpe, int chans)
     257             : {
     258             :     int i, w, w2, g, ch;
     259             :     int maxsfb, cmaxsfb;
     260             : 
     261       18497 :     for (ch = 0; ch < chans; ch++) {
     262       12125 :         IndividualChannelStream *ics = &cpe->ch[ch].ics;
     263       12125 :         maxsfb = 0;
     264       12125 :         cpe->ch[ch].pulse.num_pulse = 0;
     265       25026 :         for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
     266       27119 :             for (w2 =  0; w2 < ics->group_len[w]; w2++) {
     267       14218 :                 for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w*16+cmaxsfb-1]; cmaxsfb--)
     268             :                     ;
     269       14218 :                 maxsfb = FFMAX(maxsfb, cmaxsfb);
     270             :             }
     271             :         }
     272       12125 :         ics->max_sfb = maxsfb;
     273             : 
     274             :         //adjust zero bands for window groups
     275       25026 :         for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
     276      573642 :             for (g = 0; g < ics->max_sfb; g++) {
     277      560741 :                 i = 1;
     278      563595 :                 for (w2 = w; w2 < w + ics->group_len[w]; w2++) {
     279      560971 :                     if (!cpe->ch[ch].zeroes[w2*16 + g]) {
     280      558117 :                         i = 0;
     281      558117 :                         break;
     282             :                     }
     283             :                 }
     284      560741 :                 cpe->ch[ch].zeroes[w*16 + g] = i;
     285             :             }
     286             :         }
     287             :     }
     288             : 
     289        6372 :     if (chans > 1 && cpe->common_window) {
     290        5409 :         IndividualChannelStream *ics0 = &cpe->ch[0].ics;
     291        5409 :         IndividualChannelStream *ics1 = &cpe->ch[1].ics;
     292        5409 :         int msc = 0;
     293        5409 :         ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
     294        5409 :         ics1->max_sfb = ics0->max_sfb;
     295       11399 :         for (w = 0; w < ics0->num_windows*16; w += 16)
     296      259251 :             for (i = 0; i < ics0->max_sfb; i++)
     297      253261 :                 if (cpe->ms_mask[w+i])
     298       23288 :                     msc++;
     299        5409 :         if (msc == 0 || ics0->max_sfb == 0)
     300        4686 :             cpe->ms_mode = 0;
     301             :         else
     302         723 :             cpe->ms_mode = msc < ics0->max_sfb * ics0->num_windows ? 1 : 2;
     303             :     }
     304        6372 : }
     305             : 
     306        1243 : static void apply_intensity_stereo(ChannelElement *cpe)
     307             : {
     308             :     int w, w2, g, i;
     309        1243 :     IndividualChannelStream *ics = &cpe->ch[0].ics;
     310        1243 :     if (!cpe->common_window)
     311         652 :         return;
     312        1216 :     for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
     313        1314 :         for (w2 =  0; w2 < ics->group_len[w]; w2++) {
     314         689 :             int start = (w+w2) * 128;
     315       30530 :             for (g = 0; g < ics->num_swb; g++) {
     316       29841 :                 int p  = -1 + 2 * (cpe->ch[1].band_type[w*16+g] - 14);
     317       29841 :                 float scale = cpe->ch[0].is_ener[w*16+g];
     318       29841 :                 if (!cpe->is_mask[w*16 + g]) {
     319       21424 :                     start += ics->swb_sizes[g];
     320       21424 :                     continue;
     321             :                 }
     322        8417 :                 if (cpe->ms_mask[w*16 + g])
     323        1348 :                     p *= -1;
     324      297001 :                 for (i = 0; i < ics->swb_sizes[g]; i++) {
     325      288584 :                     float sum = (cpe->ch[0].coeffs[start+i] + p*cpe->ch[1].coeffs[start+i])*scale;
     326      288584 :                     cpe->ch[0].coeffs[start+i] = sum;
     327      288584 :                     cpe->ch[1].coeffs[start+i] = 0.0f;
     328             :                 }
     329        8417 :                 start += ics->swb_sizes[g];
     330             :             }
     331             :         }
     332             :     }
     333             : }
     334             : 
     335        1047 : static void apply_mid_side_stereo(ChannelElement *cpe)
     336             : {
     337             :     int w, w2, g, i;
     338        1047 :     IndividualChannelStream *ics = &cpe->ch[0].ics;
     339        1047 :     if (!cpe->common_window)
     340         547 :         return;
     341        1034 :     for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
     342        1132 :         for (w2 =  0; w2 < ics->group_len[w]; w2++) {
     343         598 :             int start = (w+w2) * 128;
     344       25980 :             for (g = 0; g < ics->num_swb; g++) {
     345             :                 /* ms_mask can be used for other purposes in PNS and I/S,
     346             :                  * so must not apply M/S if any band uses either, even if
     347             :                  * ms_mask is set.
     348             :                  */
     349       25382 :                 if (!cpe->ms_mask[w*16 + g] || cpe->is_mask[w*16 + g]
     350       22817 :                     || cpe->ch[0].band_type[w*16 + g] >= NOISE_BT
     351       22817 :                     || cpe->ch[1].band_type[w*16 + g] >= NOISE_BT) {
     352        2565 :                     start += ics->swb_sizes[g];
     353        2565 :                     continue;
     354             :                 }
     355      482253 :                 for (i = 0; i < ics->swb_sizes[g]; i++) {
     356      459436 :                     float L = (cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) * 0.5f;
     357      459436 :                     float R = L - cpe->ch[1].coeffs[start+i];
     358      459436 :                     cpe->ch[0].coeffs[start+i] = L;
     359      459436 :                     cpe->ch[1].coeffs[start+i] = R;
     360             :                 }
     361       22817 :                 start += ics->swb_sizes[g];
     362             :             }
     363             :         }
     364             :     }
     365             : }
     366             : 
     367             : /**
     368             :  * Encode scalefactor band coding type.
     369             :  */
     370       12125 : static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
     371             : {
     372             :     int w;
     373             : 
     374       12125 :     if (s->coder->set_special_band_scalefactors)
     375       12125 :         s->coder->set_special_band_scalefactors(s, sce);
     376             : 
     377       25026 :     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
     378       12901 :         s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
     379       12125 : }
     380             : 
     381             : /**
     382             :  * Encode scalefactors.
     383             :  */
     384       12125 : static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s,
     385             :                                  SingleChannelElement *sce)
     386             : {
     387       12125 :     int diff, off_sf = sce->sf_idx[0], off_pns = sce->sf_idx[0] - NOISE_OFFSET;
     388       12125 :     int off_is = 0, noise_flag = 1;
     389             :     int i, w;
     390             : 
     391       25026 :     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
     392      573649 :         for (i = 0; i < sce->ics.max_sfb; i++) {
     393      560748 :             if (!sce->zeroes[w*16 + i]) {
     394      558117 :                 if (sce->band_type[w*16 + i] == NOISE_BT) {
     395       15417 :                     diff = sce->sf_idx[w*16 + i] - off_pns;
     396       15417 :                     off_pns = sce->sf_idx[w*16 + i];
     397       15417 :                     if (noise_flag-- > 0) {
     398        1199 :                         put_bits(&s->pb, NOISE_PRE_BITS, diff + NOISE_PRE);
     399        1199 :                         continue;
     400             :                     }
     401     1078302 :                 } else if (sce->band_type[w*16 + i] == INTENSITY_BT  ||
     402      535602 :                            sce->band_type[w*16 + i] == INTENSITY_BT2) {
     403        8320 :                     diff = sce->sf_idx[w*16 + i] - off_is;
     404        8320 :                     off_is = sce->sf_idx[w*16 + i];
     405             :                 } else {
     406      534380 :                     diff = sce->sf_idx[w*16 + i] - off_sf;
     407      534380 :                     off_sf = sce->sf_idx[w*16 + i];
     408             :                 }
     409      556918 :                 diff += SCALE_DIFF_ZERO;
     410      556918 :                 av_assert0(diff >= 0 && diff <= 120);
     411      556918 :                 put_bits(&s->pb, ff_aac_scalefactor_bits[diff], ff_aac_scalefactor_code[diff]);
     412             :             }
     413             :         }
     414             :     }
     415       12125 : }
     416             : 
     417             : /**
     418             :  * Encode pulse data.
     419             :  */
     420       12125 : static void encode_pulses(AACEncContext *s, Pulse *pulse)
     421             : {
     422             :     int i;
     423             : 
     424       12125 :     put_bits(&s->pb, 1, !!pulse->num_pulse);
     425       12125 :     if (!pulse->num_pulse)
     426       12125 :         return;
     427             : 
     428           0 :     put_bits(&s->pb, 2, pulse->num_pulse - 1);
     429           0 :     put_bits(&s->pb, 6, pulse->start);
     430           0 :     for (i = 0; i < pulse->num_pulse; i++) {
     431           0 :         put_bits(&s->pb, 5, pulse->pos[i]);
     432           0 :         put_bits(&s->pb, 4, pulse->amp[i]);
     433             :     }
     434             : }
     435             : 
     436             : /**
     437             :  * Encode spectral coefficients processed by psychoacoustic model.
     438             :  */
     439       12125 : static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
     440             : {
     441             :     int start, i, w, w2;
     442             : 
     443       25026 :     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
     444       12901 :         start = 0;
     445      573649 :         for (i = 0; i < sce->ics.max_sfb; i++) {
     446      560748 :             if (sce->zeroes[w*16 + i]) {
     447        2631 :                 start += sce->ics.swb_sizes[i];
     448        2631 :                 continue;
     449             :             }
     450     1126592 :             for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++) {
     451     3410850 :                 s->coder->quantize_and_encode_band(s, &s->pb,
     452      568475 :                                                    &sce->coeffs[start + w2*128],
     453      568475 :                                                    NULL, sce->ics.swb_sizes[i],
     454      568475 :                                                    sce->sf_idx[w*16 + i],
     455      568475 :                                                    sce->band_type[w*16 + i],
     456             :                                                    s->lambda,
     457      568475 :                                                    sce->ics.window_clipping[w]);
     458             :             }
     459      558117 :             start += sce->ics.swb_sizes[i];
     460             :         }
     461             :     }
     462       12125 : }
     463             : 
     464             : /**
     465             :  * Downscale spectral coefficients for near-clipping windows to avoid artifacts
     466             :  */
     467        8106 : static void avoid_clipping(AACEncContext *s, SingleChannelElement *sce)
     468             : {
     469             :     int start, i, j, w;
     470             : 
     471        8106 :     if (sce->ics.clip_avoidance_factor < 1.0f) {
     472         419 :         for (w = 0; w < sce->ics.num_windows; w++) {
     473         241 :             start = 0;
     474        9031 :             for (i = 0; i < sce->ics.max_sfb; i++) {
     475        8790 :                 float *swb_coeffs = &sce->coeffs[start + w*128];
     476      171750 :                 for (j = 0; j < sce->ics.swb_sizes[i]; j++)
     477      162960 :                     swb_coeffs[j] *= sce->ics.clip_avoidance_factor;
     478        8790 :                 start += sce->ics.swb_sizes[i];
     479             :             }
     480             :         }
     481             :     }
     482        8106 : }
     483             : 
     484             : /**
     485             :  * Encode one channel of audio data.
     486             :  */
     487       12125 : static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s,
     488             :                                      SingleChannelElement *sce,
     489             :                                      int common_window)
     490             : {
     491       12125 :     put_bits(&s->pb, 8, sce->sf_idx[0]);
     492       12125 :     if (!common_window) {
     493        1307 :         put_ics_info(s, &sce->ics);
     494        1307 :         if (s->coder->encode_main_pred)
     495        1307 :             s->coder->encode_main_pred(s, sce);
     496        1307 :         if (s->coder->encode_ltp_info)
     497        1307 :             s->coder->encode_ltp_info(s, sce, 0);
     498             :     }
     499       12125 :     encode_band_info(s, sce);
     500       12125 :     encode_scale_factors(avctx, s, sce);
     501       12125 :     encode_pulses(s, &sce->pulse);
     502       12125 :     put_bits(&s->pb, 1, !!sce->tns.present);
     503       12125 :     if (s->coder->encode_tns_info)
     504       12125 :         s->coder->encode_tns_info(s, sce);
     505       12125 :     put_bits(&s->pb, 1, 0); //ssr
     506       12125 :     encode_spectral_coeffs(s, sce);
     507       12125 :     return 0;
     508             : }
     509             : 
     510             : /**
     511             :  * Write some auxiliary information about the created AAC file.
     512             :  */
     513          22 : static void put_bitstream_info(AACEncContext *s, const char *name)
     514             : {
     515             :     int i, namelen, padbits;
     516             : 
     517          22 :     namelen = strlen(name) + 2;
     518          22 :     put_bits(&s->pb, 3, TYPE_FIL);
     519          22 :     put_bits(&s->pb, 4, FFMIN(namelen, 15));
     520          22 :     if (namelen >= 15)
     521           0 :         put_bits(&s->pb, 8, namelen - 14);
     522          22 :     put_bits(&s->pb, 4, 0); //extension type - filler
     523          22 :     padbits = -put_bits_count(&s->pb) & 7;
     524          22 :     avpriv_align_put_bits(&s->pb);
     525         286 :     for (i = 0; i < namelen - 2; i++)
     526         264 :         put_bits(&s->pb, 8, name[i]);
     527          22 :     put_bits(&s->pb, 12 - padbits, 0);
     528          22 : }
     529             : 
     530             : /*
     531             :  * Copy input samples.
     532             :  * Channels are reordered from libavcodec's default order to AAC order.
     533             :  */
     534        4228 : static void copy_input_samples(AACEncContext *s, const AVFrame *frame)
     535             : {
     536             :     int ch;
     537        4228 :     int end = 2048 + (frame ? frame->nb_samples : 0);
     538        4228 :     const uint8_t *channel_map = s->reorder_map;
     539             : 
     540             :     /* copy and remap input samples */
     541       12361 :     for (ch = 0; ch < s->channels; ch++) {
     542             :         /* copy last 1024 samples of previous frame to the start of the current frame */
     543        8133 :         memcpy(&s->planar_samples[ch][1024], &s->planar_samples[ch][2048], 1024 * sizeof(s->planar_samples[0][0]));
     544             : 
     545             :         /* copy new samples and zero any remaining samples */
     546        8133 :         if (frame) {
     547       16158 :             memcpy(&s->planar_samples[ch][2048],
     548        8079 :                    frame->extended_data[channel_map[ch]],
     549        8079 :                    frame->nb_samples * sizeof(s->planar_samples[0][0]));
     550             :         }
     551        8133 :         memset(&s->planar_samples[ch][end], 0,
     552        8133 :                (3072 - end) * sizeof(s->planar_samples[0][0]));
     553             :     }
     554        4228 : }
     555             : 
     556        4240 : static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     557             :                             const AVFrame *frame, int *got_packet_ptr)
     558             : {
     559        4240 :     AACEncContext *s = avctx->priv_data;
     560        4240 :     float **samples = s->planar_samples, *samples2, *la, *overlap;
     561             :     ChannelElement *cpe;
     562             :     SingleChannelElement *sce;
     563             :     IndividualChannelStream *ics;
     564             :     int i, its, ch, w, chans, tag, start_ch, ret, frame_bits;
     565             :     int target_bits, rate_bits, too_many_bits, too_few_bits;
     566        4240 :     int ms_mode = 0, is_mode = 0, tns_mode = 0, pred_mode = 0;
     567             :     int chan_el_counter[4];
     568             :     FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
     569             : 
     570             :     /* add current frame to queue */
     571        4240 :     if (frame) {
     572        4204 :         if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
     573           0 :             return ret;
     574             :     } else {
     575          36 :         if (!s->afq.remaining_samples || (!s->afq.frame_alloc && !s->afq.frame_count))
     576          12 :             return 0;
     577             :     }
     578             : 
     579        4228 :     copy_input_samples(s, frame);
     580        4228 :     if (s->psypp)
     581        4228 :         ff_psy_preprocess(s->psypp, s->planar_samples, s->channels);
     582             : 
     583        4228 :     if (!avctx->frame_number)
     584          12 :         return 0;
     585             : 
     586        4216 :     start_ch = 0;
     587        8576 :     for (i = 0; i < s->chan_map[0]; i++) {
     588        4360 :         FFPsyWindowInfo* wi = windows + start_ch;
     589        4360 :         tag      = s->chan_map[i+1];
     590        4360 :         chans    = tag == TYPE_CPE ? 2 : 1;
     591        4360 :         cpe      = &s->cpe[i];
     592       12466 :         for (ch = 0; ch < chans; ch++) {
     593             :             int k;
     594             :             float clip_avoidance_factor;
     595        8106 :             sce = &cpe->ch[ch];
     596        8106 :             ics = &sce->ics;
     597        8106 :             s->cur_channel = start_ch + ch;
     598        8106 :             overlap  = &samples[s->cur_channel][0];
     599        8106 :             samples2 = overlap + 1024;
     600        8106 :             la       = samples2 + (448+64);
     601        8106 :             if (!frame)
     602          54 :                 la = NULL;
     603        8106 :             if (tag == TYPE_LFE) {
     604          48 :                 wi[ch].window_type[0] = wi[ch].window_type[1] = ONLY_LONG_SEQUENCE;
     605          48 :                 wi[ch].window_shape   = 0;
     606          48 :                 wi[ch].num_windows    = 1;
     607          48 :                 wi[ch].grouping[0]    = 1;
     608          48 :                 wi[ch].clipping[0]    = 0;
     609             : 
     610             :                 /* Only the lowest 12 coefficients are used in a LFE channel.
     611             :                  * The expression below results in only the bottom 8 coefficients
     612             :                  * being used for 11.025kHz to 16kHz sample rates.
     613             :                  */
     614          48 :                 ics->num_swb = s->samplerate_index >= 8 ? 1 : 3;
     615             :             } else {
     616       16116 :                 wi[ch] = s->psy.model->window(&s->psy, samples2, la, s->cur_channel,
     617        8058 :                                               ics->window_sequence[0]);
     618             :             }
     619        8106 :             ics->window_sequence[1] = ics->window_sequence[0];
     620        8106 :             ics->window_sequence[0] = wi[ch].window_type[0];
     621        8106 :             ics->use_kb_window[1]   = ics->use_kb_window[0];
     622        8106 :             ics->use_kb_window[0]   = wi[ch].window_shape;
     623        8106 :             ics->num_windows        = wi[ch].num_windows;
     624        8106 :             ics->swb_sizes          = s->psy.bands    [ics->num_windows == 8];
     625        8106 :             ics->num_swb            = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
     626        8106 :             ics->max_sfb            = FFMIN(ics->max_sfb, ics->num_swb);
     627       16212 :             ics->swb_offset         = wi[ch].window_type[0] == EIGHT_SHORT_SEQUENCE ?
     628       16032 :                                         ff_swb_offset_128 [s->samplerate_index]:
     629        7926 :                                         ff_swb_offset_1024[s->samplerate_index];
     630       16212 :             ics->tns_max_bands      = wi[ch].window_type[0] == EIGHT_SHORT_SEQUENCE ?
     631       16032 :                                         ff_tns_max_bands_128 [s->samplerate_index]:
     632        7926 :                                         ff_tns_max_bands_1024[s->samplerate_index];
     633             : 
     634       17472 :             for (w = 0; w < ics->num_windows; w++)
     635        9366 :                 ics->group_len[w] = wi[ch].grouping[w];
     636             : 
     637             :             /* Calculate input sample maximums and evaluate clipping risk */
     638        8106 :             clip_avoidance_factor = 0.0f;
     639       17472 :             for (w = 0; w < ics->num_windows; w++) {
     640        9366 :                 const float *wbuf = overlap + w * 128;
     641        9366 :                 const int wlen = 2048 / ics->num_windows;
     642        9366 :                 float max = 0;
     643             :                 int j;
     644             :                 /* mdct input is 2 * output */
     645    16610454 :                 for (j = 0; j < wlen; j++)
     646    16601088 :                     max = FFMAX(max, fabsf(wbuf[j]));
     647        9366 :                 wi[ch].clipping[w] = max;
     648             :             }
     649       17472 :             for (w = 0; w < ics->num_windows; w++) {
     650        9366 :                 if (wi[ch].clipping[w] > CLIP_AVOIDANCE_FACTOR) {
     651         187 :                     ics->window_clipping[w] = 1;
     652         187 :                     clip_avoidance_factor = FFMAX(clip_avoidance_factor, wi[ch].clipping[w]);
     653             :                 } else {
     654        9179 :                     ics->window_clipping[w] = 0;
     655             :                 }
     656             :             }
     657        8106 :             if (clip_avoidance_factor > CLIP_AVOIDANCE_FACTOR) {
     658         178 :                 ics->clip_avoidance_factor = CLIP_AVOIDANCE_FACTOR / clip_avoidance_factor;
     659             :             } else {
     660        7928 :                 ics->clip_avoidance_factor = 1.0f;
     661             :             }
     662             : 
     663        8106 :             apply_window_and_mdct(s, sce, overlap);
     664             : 
     665        8106 :             if (s->options.ltp && s->coder->update_ltp) {
     666         822 :                 s->coder->update_ltp(s, sce);
     667         822 :                 apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, &sce->ltp_state[0]);
     668         822 :                 s->mdct1024.mdct_calc(&s->mdct1024, sce->lcoeffs, sce->ret_buf);
     669             :             }
     670             : 
     671     8308650 :             for (k = 0; k < 1024; k++) {
     672     8300544 :                 if (!(fabs(cpe->ch[ch].coeffs[k]) < 1E16)) { // Ensure headroom for energy calculation
     673           0 :                     av_log(avctx, AV_LOG_ERROR, "Input contains (near) NaN/+-Inf\n");
     674           0 :                     return AVERROR(EINVAL);
     675             :                 }
     676             :             }
     677        8106 :             avoid_clipping(s, sce);
     678             :         }
     679        4360 :         start_ch += chans;
     680             :     }
     681        4216 :     if ((ret = ff_alloc_packet2(avctx, avpkt, 8192 * s->channels, 0)) < 0)
     682           0 :         return ret;
     683        4216 :     frame_bits = its = 0;
     684             :     do {
     685        8234 :         init_put_bits(&s->pb, avpkt->data, avpkt->size);
     686             : 
     687        6225 :         if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & AV_CODEC_FLAG_BITEXACT))
     688          22 :             put_bitstream_info(s, LIBAVCODEC_IDENT);
     689        6225 :         start_ch = 0;
     690        6225 :         target_bits = 0;
     691        6225 :         memset(chan_el_counter, 0, sizeof(chan_el_counter));
     692       12597 :         for (i = 0; i < s->chan_map[0]; i++) {
     693        6372 :             FFPsyWindowInfo* wi = windows + start_ch;
     694             :             const float *coeffs[2];
     695        6372 :             tag      = s->chan_map[i+1];
     696        6372 :             chans    = tag == TYPE_CPE ? 2 : 1;
     697        6372 :             cpe      = &s->cpe[i];
     698        6372 :             cpe->common_window = 0;
     699        6372 :             memset(cpe->is_mask, 0, sizeof(cpe->is_mask));
     700        6372 :             memset(cpe->ms_mask, 0, sizeof(cpe->ms_mask));
     701        6372 :             put_bits(&s->pb, 3, tag);
     702        6372 :             put_bits(&s->pb, 4, chan_el_counter[tag]++);
     703       18497 :             for (ch = 0; ch < chans; ch++) {
     704       12125 :                 sce = &cpe->ch[ch];
     705       12125 :                 coeffs[ch] = sce->coeffs;
     706       12125 :                 sce->ics.predictor_present = 0;
     707       12125 :                 sce->ics.ltp.present = 0;
     708       12125 :                 memset(sce->ics.ltp.used, 0, sizeof(sce->ics.ltp.used));
     709       12125 :                 memset(sce->ics.prediction_used, 0, sizeof(sce->ics.prediction_used));
     710       12125 :                 memset(&sce->tns, 0, sizeof(TemporalNoiseShaping));
     711     1564125 :                 for (w = 0; w < 128; w++)
     712     1552000 :                     if (sce->band_type[w] > RESERVED_BT)
     713       23716 :                         sce->band_type[w] = 0;
     714             :             }
     715        6372 :             s->psy.bitres.alloc = -1;
     716        6372 :             s->psy.bitres.bits = s->last_frame_pb_count / s->channels;
     717        6372 :             s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
     718        6372 :             if (s->psy.bitres.alloc > 0) {
     719             :                 /* Lambda unused here on purpose, we need to take psy's unscaled allocation */
     720       12744 :                 target_bits += s->psy.bitres.alloc
     721        6372 :                     * (s->lambda / (avctx->global_quality ? avctx->global_quality : 120));
     722        6372 :                 s->psy.bitres.alloc /= chans;
     723             :             }
     724        6372 :             s->cur_type = tag;
     725       18497 :             for (ch = 0; ch < chans; ch++) {
     726       12125 :                 s->cur_channel = start_ch + ch;
     727       12125 :                 if (s->options.pns && s->coder->mark_pns)
     728        1869 :                     s->coder->mark_pns(s, avctx, &cpe->ch[ch]);
     729       12125 :                 s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
     730             :             }
     731        6372 :             if (chans > 1
     732        5753 :                 && wi[0].window_type[0] == wi[1].window_type[0]
     733        5420 :                 && wi[0].window_shape   == wi[1].window_shape) {
     734             : 
     735        5420 :                 cpe->common_window = 1;
     736       11410 :                 for (w = 0; w < wi[0].num_windows; w++) {
     737        6001 :                     if (wi[0].grouping[w] != wi[1].grouping[w]) {
     738          11 :                         cpe->common_window = 0;
     739          11 :                         break;
     740             :                     }
     741             :                 }
     742             :             }
     743       18497 :             for (ch = 0; ch < chans; ch++) { /* TNS and PNS */
     744       12125 :                 sce = &cpe->ch[ch];
     745       12125 :                 s->cur_channel = start_ch + ch;
     746       12125 :                 if (s->options.tns && s->coder->search_for_tns)
     747        1865 :                     s->coder->search_for_tns(s, sce);
     748       12125 :                 if (s->options.tns && s->coder->apply_tns_filt)
     749        1865 :                     s->coder->apply_tns_filt(s, sce);
     750       12125 :                 if (sce->tns.present)
     751          40 :                     tns_mode = 1;
     752       12125 :                 if (s->options.pns && s->coder->search_for_pns)
     753        1869 :                     s->coder->search_for_pns(s, avctx, sce);
     754             :             }
     755        6372 :             s->cur_channel = start_ch;
     756        6372 :             if (s->options.intensity_stereo) { /* Intensity Stereo */
     757        1243 :                 if (s->coder->search_for_is)
     758        1243 :                     s->coder->search_for_is(s, avctx, cpe);
     759        1243 :                 if (cpe->is_mode) is_mode = 1;
     760        1243 :                 apply_intensity_stereo(cpe);
     761             :             }
     762        6372 :             if (s->options.pred) { /* Prediction */
     763        1248 :                 for (ch = 0; ch < chans; ch++) {
     764         832 :                     sce = &cpe->ch[ch];
     765         832 :                     s->cur_channel = start_ch + ch;
     766         832 :                     if (s->options.pred && s->coder->search_for_pred)
     767         832 :                         s->coder->search_for_pred(s, sce);
     768         832 :                     if (cpe->ch[ch].ics.predictor_present) pred_mode = 1;
     769             :                 }
     770         416 :                 if (s->coder->adjust_common_pred)
     771         416 :                     s->coder->adjust_common_pred(s, cpe);
     772        1248 :                 for (ch = 0; ch < chans; ch++) {
     773         832 :                     sce = &cpe->ch[ch];
     774         832 :                     s->cur_channel = start_ch + ch;
     775         832 :                     if (s->options.pred && s->coder->apply_main_pred)
     776         832 :                         s->coder->apply_main_pred(s, sce);
     777             :                 }
     778         416 :                 s->cur_channel = start_ch;
     779             :             }
     780        6372 :             if (s->options.mid_side) { /* Mid/Side stereo */
     781        1047 :                 if (s->options.mid_side == -1 && s->coder->search_for_ms)
     782         630 :                     s->coder->search_for_ms(s, cpe);
     783         417 :                 else if (cpe->common_window)
     784         391 :                     memset(cpe->ms_mask, 1, sizeof(cpe->ms_mask));
     785        1047 :                 apply_mid_side_stereo(cpe);
     786             :             }
     787        6372 :             adjust_frame_information(cpe, chans);
     788        6372 :             if (s->options.ltp) { /* LTP */
     789        1251 :                 for (ch = 0; ch < chans; ch++) {
     790         834 :                     sce = &cpe->ch[ch];
     791         834 :                     s->cur_channel = start_ch + ch;
     792         834 :                     if (s->coder->search_for_ltp)
     793         834 :                         s->coder->search_for_ltp(s, sce, cpe->common_window);
     794         834 :                     if (sce->ics.ltp.present) pred_mode = 1;
     795             :                 }
     796         417 :                 s->cur_channel = start_ch;
     797         417 :                 if (s->coder->adjust_common_ltp)
     798         417 :                     s->coder->adjust_common_ltp(s, cpe);
     799             :             }
     800        6372 :             if (chans == 2) {
     801        5753 :                 put_bits(&s->pb, 1, cpe->common_window);
     802        5753 :                 if (cpe->common_window) {
     803        5409 :                     put_ics_info(s, &cpe->ch[0].ics);
     804        5409 :                     if (s->coder->encode_main_pred)
     805        5409 :                         s->coder->encode_main_pred(s, &cpe->ch[0]);
     806        5409 :                     if (s->coder->encode_ltp_info)
     807        5409 :                         s->coder->encode_ltp_info(s, &cpe->ch[0], 1);
     808        5409 :                     encode_ms_info(&s->pb, cpe);
     809        5409 :                     if (cpe->ms_mode) ms_mode = 1;
     810             :                 }
     811             :             }
     812       18497 :             for (ch = 0; ch < chans; ch++) {
     813       12125 :                 s->cur_channel = start_ch + ch;
     814       12125 :                 encode_individual_channel(avctx, s, &cpe->ch[ch], cpe->common_window);
     815             :             }
     816        6372 :             start_ch += chans;
     817             :         }
     818             : 
     819        6225 :         if (avctx->flags & AV_CODEC_FLAG_QSCALE) {
     820             :             /* When using a constant Q-scale, don't mess with lambda */
     821           0 :             break;
     822             :         }
     823             : 
     824             :         /* rate control stuff
     825             :          * allow between the nominal bitrate, and what psy's bit reservoir says to target
     826             :          * but drift towards the nominal bitrate always
     827             :          */
     828        6225 :         frame_bits = put_bits_count(&s->pb);
     829        6225 :         rate_bits = avctx->bit_rate * 1024 / avctx->sample_rate;
     830        6225 :         rate_bits = FFMIN(rate_bits, 6144 * s->channels - 3);
     831        6225 :         too_many_bits = FFMAX(target_bits, rate_bits);
     832        6225 :         too_many_bits = FFMIN(too_many_bits, 6144 * s->channels - 3);
     833        6225 :         too_few_bits = FFMIN(FFMAX(rate_bits - rate_bits/4, target_bits), too_many_bits);
     834             : 
     835             :         /* When using ABR, be strict (but only for increasing) */
     836        6225 :         too_few_bits = too_few_bits - too_few_bits/8;
     837        6225 :         too_many_bits = too_many_bits + too_many_bits/2;
     838             : 
     839        6225 :         if (   its == 0 /* for steady-state Q-scale tracking */
     840        2009 :             || (its < 5 && (frame_bits < too_few_bits || frame_bits > too_many_bits))
     841         409 :             || frame_bits >= 6144 * s->channels - 3  )
     842             :         {
     843        5816 :             float ratio = ((float)rate_bits) / frame_bits;
     844             : 
     845        5816 :             if (frame_bits >= too_few_bits && frame_bits <= too_many_bits) {
     846             :                 /*
     847             :                  * This path is for steady-state Q-scale tracking
     848             :                  * When frame bits fall within the stable range, we still need to adjust
     849             :                  * lambda to maintain it like so in a stable fashion (large jumps in lambda
     850             :                  * create artifacts and should be avoided), but slowly
     851             :                  */
     852        3244 :                 ratio = sqrtf(sqrtf(ratio));
     853        3244 :                 ratio = av_clipf(ratio, 0.9f, 1.1f);
     854             :             } else {
     855             :                 /* Not so fast though */
     856        2572 :                 ratio = sqrtf(ratio);
     857             :             }
     858        5816 :             s->lambda = FFMIN(s->lambda * ratio, 65536.f);
     859             : 
     860             :             /* Keep iterating if we must reduce and lambda is in the sky */
     861        5816 :             if (ratio > 0.9f && ratio < 1.1f) {
     862             :                 break;
     863             :             } else {
     864        2009 :                 if (is_mode || ms_mode || tns_mode || pred_mode) {
     865          23 :                     for (i = 0; i < s->chan_map[0]; i++) {
     866             :                         // Must restore coeffs
     867          13 :                         chans = tag == TYPE_CPE ? 2 : 1;
     868          13 :                         cpe = &s->cpe[i];
     869          35 :                         for (ch = 0; ch < chans; ch++)
     870          22 :                             memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs));
     871             :                     }
     872             :                 }
     873        2009 :                 its++;
     874             :             }
     875             :         } else {
     876             :             break;
     877             :         }
     878             :     } while (1);
     879             : 
     880        4216 :     if (s->options.ltp && s->coder->ltp_insert_new_frame)
     881         411 :         s->coder->ltp_insert_new_frame(s);
     882             : 
     883        4216 :     put_bits(&s->pb, 3, TYPE_END);
     884        4216 :     flush_put_bits(&s->pb);
     885             : 
     886        4216 :     s->last_frame_pb_count = put_bits_count(&s->pb);
     887             : 
     888        4216 :     s->lambda_sum += s->lambda;
     889        4216 :     s->lambda_count++;
     890             : 
     891        4216 :     ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
     892             :                        &avpkt->duration);
     893             : 
     894        4216 :     avpkt->size = put_bits_count(&s->pb) >> 3;
     895        4216 :     *got_packet_ptr = 1;
     896        4216 :     return 0;
     897             : }
     898             : 
     899          12 : static av_cold int aac_encode_end(AVCodecContext *avctx)
     900             : {
     901          12 :     AACEncContext *s = avctx->priv_data;
     902             : 
     903          12 :     av_log(avctx, AV_LOG_INFO, "Qavg: %.3f\n", s->lambda_sum / s->lambda_count);
     904             : 
     905          12 :     ff_mdct_end(&s->mdct1024);
     906          12 :     ff_mdct_end(&s->mdct128);
     907          12 :     ff_psy_end(&s->psy);
     908          12 :     ff_lpc_end(&s->lpc);
     909          12 :     if (s->psypp)
     910          12 :         ff_psy_preprocess_end(s->psypp);
     911          12 :     av_freep(&s->buffer.samples);
     912          12 :     av_freep(&s->cpe);
     913          12 :     av_freep(&s->fdsp);
     914          12 :     ff_af_queue_close(&s->afq);
     915          12 :     return 0;
     916             : }
     917             : 
     918          12 : static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
     919             : {
     920          12 :     int ret = 0;
     921             : 
     922          12 :     s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
     923          12 :     if (!s->fdsp)
     924           0 :         return AVERROR(ENOMEM);
     925             : 
     926             :     // window init
     927          12 :     ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
     928          12 :     ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
     929          12 :     ff_init_ff_sine_windows(10);
     930          12 :     ff_init_ff_sine_windows(7);
     931             : 
     932          12 :     if ((ret = ff_mdct_init(&s->mdct1024, 11, 0, 32768.0)) < 0)
     933           0 :         return ret;
     934          12 :     if ((ret = ff_mdct_init(&s->mdct128,   8, 0, 32768.0)) < 0)
     935           0 :         return ret;
     936             : 
     937          12 :     return 0;
     938             : }
     939             : 
     940          12 : static av_cold int alloc_buffers(AVCodecContext *avctx, AACEncContext *s)
     941             : {
     942             :     int ch;
     943          12 :     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->buffer.samples, s->channels, 3 * 1024 * sizeof(s->buffer.samples[0]), alloc_fail);
     944          12 :     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->cpe, s->chan_map[0], sizeof(ChannelElement), alloc_fail);
     945             : 
     946          39 :     for(ch = 0; ch < s->channels; ch++)
     947          27 :         s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
     948             : 
     949          12 :     return 0;
     950           0 : alloc_fail:
     951           0 :     return AVERROR(ENOMEM);
     952             : }
     953             : 
     954          12 : static av_cold void aac_encode_init_tables(void)
     955             : {
     956          12 :     ff_aac_tableinit();
     957          12 : }
     958             : 
     959          12 : static av_cold int aac_encode_init(AVCodecContext *avctx)
     960             : {
     961          12 :     AACEncContext *s = avctx->priv_data;
     962          12 :     int i, ret = 0;
     963             :     const uint8_t *sizes[2];
     964             :     uint8_t grouping[AAC_MAX_CHANNELS];
     965             :     int lengths[2];
     966             : 
     967             :     /* Constants */
     968          12 :     s->last_frame_pb_count = 0;
     969          12 :     avctx->frame_size = 1024;
     970          12 :     avctx->initial_padding = 1024;
     971          12 :     s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
     972             : 
     973             :     /* Channel map and unspecified bitrate guessing */
     974          12 :     s->channels = avctx->channels;
     975             : 
     976          12 :     s->needs_pce = 1;
     977          27 :     for (i = 0; i < FF_ARRAY_ELEMS(aac_normal_chan_layouts); i++) {
     978          27 :         if (avctx->channel_layout == aac_normal_chan_layouts[i]) {
     979          12 :             s->needs_pce = s->options.pce;
     980          12 :             break;
     981             :         }
     982             :     }
     983             : 
     984          12 :     if (s->needs_pce) {
     985           0 :         for (i = 0; i < FF_ARRAY_ELEMS(aac_pce_configs); i++)
     986           0 :             if (avctx->channel_layout == aac_pce_configs[i].layout)
     987           0 :                 break;
     988           0 :         ERROR_IF(i == FF_ARRAY_ELEMS(aac_pce_configs), "Unsupported channel layout\n");
     989           0 :         av_log(avctx, AV_LOG_INFO, "Using a PCE to encode channel layout\n");
     990           0 :         s->pce = aac_pce_configs[i];
     991           0 :         s->reorder_map = s->pce.reorder_map;
     992           0 :         s->chan_map = s->pce.config_map;
     993             :     } else {
     994          12 :         s->reorder_map = aac_chan_maps[s->channels - 1];
     995          12 :         s->chan_map = aac_chan_configs[s->channels - 1];
     996             :     }
     997             : 
     998          12 :     if (!avctx->bit_rate) {
     999           9 :         for (i = 1; i <= s->chan_map[0]; i++) {
    1000           9 :             avctx->bit_rate += s->chan_map[i] == TYPE_CPE ? 128000 : /* Pair */
    1001           3 :                                s->chan_map[i] == TYPE_LFE ? 16000  : /* LFE  */
    1002             :                                                             69000  ; /* SCE  */
    1003             :         }
    1004             :     }
    1005             : 
    1006             :     /* Samplerate */
    1007          59 :     for (i = 0; i < 16; i++)
    1008          59 :         if (avctx->sample_rate == avpriv_mpeg4audio_sample_rates[i])
    1009          12 :             break;
    1010          12 :     s->samplerate_index = i;
    1011          12 :     ERROR_IF(s->samplerate_index == 16 ||
    1012             :              s->samplerate_index >= ff_aac_swb_size_1024_len ||
    1013             :              s->samplerate_index >= ff_aac_swb_size_128_len,
    1014             :              "Unsupported sample rate %d\n", avctx->sample_rate);
    1015             : 
    1016             :     /* Bitrate limiting */
    1017          12 :     WARN_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
    1018             :              "Too many bits %f > %d per frame requested, clamping to max\n",
    1019             :              1024.0 * avctx->bit_rate / avctx->sample_rate,
    1020             :              6144 * s->channels);
    1021          12 :     avctx->bit_rate = (int64_t)FFMIN(6144 * s->channels / 1024.0 * avctx->sample_rate,
    1022             :                                      avctx->bit_rate);
    1023             : 
    1024             :     /* Profile and option setting */
    1025          12 :     avctx->profile = avctx->profile == FF_PROFILE_UNKNOWN ? FF_PROFILE_AAC_LOW :
    1026             :                      avctx->profile;
    1027          24 :     for (i = 0; i < FF_ARRAY_ELEMS(aacenc_profiles); i++)
    1028          24 :         if (avctx->profile == aacenc_profiles[i])
    1029          12 :             break;
    1030          12 :     if (avctx->profile == FF_PROFILE_MPEG2_AAC_LOW) {
    1031           0 :         avctx->profile = FF_PROFILE_AAC_LOW;
    1032           0 :         ERROR_IF(s->options.pred,
    1033             :                  "Main prediction unavailable in the \"mpeg2_aac_low\" profile\n");
    1034           0 :         ERROR_IF(s->options.ltp,
    1035             :                  "LTP prediction unavailable in the \"mpeg2_aac_low\" profile\n");
    1036           0 :         WARN_IF(s->options.pns,
    1037             :                 "PNS unavailable in the \"mpeg2_aac_low\" profile, turning off\n");
    1038           0 :         s->options.pns = 0;
    1039          12 :     } else if (avctx->profile == FF_PROFILE_AAC_LTP) {
    1040           1 :         s->options.ltp = 1;
    1041           1 :         ERROR_IF(s->options.pred,
    1042             :                  "Main prediction unavailable in the \"aac_ltp\" profile\n");
    1043          11 :     } else if (avctx->profile == FF_PROFILE_AAC_MAIN) {
    1044           1 :         s->options.pred = 1;
    1045           1 :         ERROR_IF(s->options.ltp,
    1046             :                  "LTP prediction unavailable in the \"aac_main\" profile\n");
    1047          10 :     } else if (s->options.ltp) {
    1048           0 :         avctx->profile = FF_PROFILE_AAC_LTP;
    1049           0 :         WARN_IF(1,
    1050             :                 "Chainging profile to \"aac_ltp\"\n");
    1051           0 :         ERROR_IF(s->options.pred,
    1052             :                  "Main prediction unavailable in the \"aac_ltp\" profile\n");
    1053          10 :     } else if (s->options.pred) {
    1054           0 :         avctx->profile = FF_PROFILE_AAC_MAIN;
    1055           0 :         WARN_IF(1,
    1056             :                 "Chainging profile to \"aac_main\"\n");
    1057           0 :         ERROR_IF(s->options.ltp,
    1058             :                  "LTP prediction unavailable in the \"aac_main\" profile\n");
    1059             :     }
    1060          12 :     s->profile = avctx->profile;
    1061             : 
    1062             :     /* Coder limitations */
    1063          12 :     s->coder = &ff_aac_coders[s->options.coder];
    1064          12 :     if (s->options.coder == AAC_CODER_ANMR) {
    1065           0 :         ERROR_IF(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL,
    1066             :                  "The ANMR coder is considered experimental, add -strict -2 to enable!\n");
    1067           0 :         s->options.intensity_stereo = 0;
    1068           0 :         s->options.pns = 0;
    1069             :     }
    1070          12 :     ERROR_IF(s->options.ltp && avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL,
    1071             :              "The LPT profile requires experimental compliance, add -strict -2 to enable!\n");
    1072             : 
    1073             :     /* M/S introduces horrible artifacts with multichannel files, this is temporary */
    1074          12 :     if (s->channels > 3)
    1075           1 :         s->options.mid_side = 0;
    1076             : 
    1077          12 :     if ((ret = dsp_init(avctx, s)) < 0)
    1078           0 :         goto fail;
    1079             : 
    1080          12 :     if ((ret = alloc_buffers(avctx, s)) < 0)
    1081           0 :         goto fail;
    1082             : 
    1083          12 :     if ((ret = put_audio_specific_config(avctx)))
    1084           0 :         goto fail;
    1085             : 
    1086          12 :     sizes[0]   = ff_aac_swb_size_1024[s->samplerate_index];
    1087          12 :     sizes[1]   = ff_aac_swb_size_128[s->samplerate_index];
    1088          12 :     lengths[0] = ff_aac_num_swb_1024[s->samplerate_index];
    1089          12 :     lengths[1] = ff_aac_num_swb_128[s->samplerate_index];
    1090          27 :     for (i = 0; i < s->chan_map[0]; i++)
    1091          15 :         grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
    1092          12 :     if ((ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths,
    1093          12 :                            s->chan_map[0], grouping)) < 0)
    1094           0 :         goto fail;
    1095          12 :     s->psypp = ff_psy_preprocess_init(avctx);
    1096          12 :     ff_lpc_init(&s->lpc, 2*avctx->frame_size, TNS_MAX_ORDER, FF_LPC_TYPE_LEVINSON);
    1097          12 :     s->random_state = 0x1f2e3d4c;
    1098             : 
    1099          12 :     s->abs_pow34   = abs_pow34_v;
    1100          12 :     s->quant_bands = quantize_bands;
    1101             : 
    1102             :     if (ARCH_X86)
    1103          12 :         ff_aac_dsp_init_x86(s);
    1104             : 
    1105             :     if (HAVE_MIPSDSP)
    1106             :         ff_aac_coder_init_mips(s);
    1107             : 
    1108          12 :     if ((ret = ff_thread_once(&aac_table_init, &aac_encode_init_tables)) != 0)
    1109           0 :         return AVERROR_UNKNOWN;
    1110             : 
    1111          12 :     ff_af_queue_init(avctx, &s->afq);
    1112             : 
    1113          12 :     return 0;
    1114           0 : fail:
    1115           0 :     aac_encode_end(avctx);
    1116           0 :     return ret;
    1117             : }
    1118             : 
    1119             : #define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
    1120             : static const AVOption aacenc_options[] = {
    1121             :     {"aac_coder", "Coding algorithm", offsetof(AACEncContext, options.coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "coder"},
    1122             :         {"anmr",     "ANMR method",               0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR},    INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
    1123             :         {"twoloop",  "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
    1124             :         {"fast",     "Constant quantizer",        0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST},    INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
    1125             :     {"aac_ms", "Force M/S stereo coding", offsetof(AACEncContext, options.mid_side), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, AACENC_FLAGS},
    1126             :     {"aac_is", "Intensity stereo coding", offsetof(AACEncContext, options.intensity_stereo), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
    1127             :     {"aac_pns", "Perceptual noise substitution", offsetof(AACEncContext, options.pns), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
    1128             :     {"aac_tns", "Temporal noise shaping", offsetof(AACEncContext, options.tns), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
    1129             :     {"aac_ltp", "Long term prediction", offsetof(AACEncContext, options.ltp), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
    1130             :     {"aac_pred", "AAC-Main prediction", offsetof(AACEncContext, options.pred), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
    1131             :     {"aac_pce", "Forces the use of PCEs", offsetof(AACEncContext, options.pce), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
    1132             :     {NULL}
    1133             : };
    1134             : 
    1135             : static const AVClass aacenc_class = {
    1136             :     .class_name = "AAC encoder",
    1137             :     .item_name  = av_default_item_name,
    1138             :     .option     = aacenc_options,
    1139             :     .version    = LIBAVUTIL_VERSION_INT,
    1140             : };
    1141             : 
    1142             : static const AVCodecDefault aac_encode_defaults[] = {
    1143             :     { "b", "0" },
    1144             :     { NULL }
    1145             : };
    1146             : 
    1147             : AVCodec ff_aac_encoder = {
    1148             :     .name           = "aac",
    1149             :     .long_name      = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
    1150             :     .type           = AVMEDIA_TYPE_AUDIO,
    1151             :     .id             = AV_CODEC_ID_AAC,
    1152             :     .priv_data_size = sizeof(AACEncContext),
    1153             :     .init           = aac_encode_init,
    1154             :     .encode2        = aac_encode_frame,
    1155             :     .close          = aac_encode_end,
    1156             :     .defaults       = aac_encode_defaults,
    1157             :     .supported_samplerates = mpeg4audio_sample_rates,
    1158             :     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE,
    1159             :     .capabilities   = AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_DELAY,
    1160             :     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
    1161             :                                                      AV_SAMPLE_FMT_NONE },
    1162             :     .priv_class     = &aacenc_class,
    1163             : };

Generated by: LCOV version 1.13