LCOV - code coverage report
Current view: top level - src/libavcodec - aaccoder_twoloop.h (source / functions) Hit Total Coverage
Test: coverage.info Lines: 373 384 97.1 %
Date: 2017-01-23 11:54:22 Functions: 2 2 100.0 %

          Line data    Source code
       1             : /*
       2             :  * AAC encoder twoloop coder
       3             :  * Copyright (C) 2008-2009 Konstantin Shishkov
       4             :  *
       5             :  * This file is part of FFmpeg.
       6             :  *
       7             :  * FFmpeg is free software; you can redistribute it and/or
       8             :  * modify it under the terms of the GNU Lesser General Public
       9             :  * License as published by the Free Software Foundation; either
      10             :  * version 2.1 of the License, or (at your option) any later version.
      11             :  *
      12             :  * FFmpeg is distributed in the hope that it will be useful,
      13             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      14             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      15             :  * Lesser General Public License for more details.
      16             :  *
      17             :  * You should have received a copy of the GNU Lesser General Public
      18             :  * License along with FFmpeg; if not, write to the Free Software
      19             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      20             :  */
      21             : 
      22             : /**
      23             :  * @file
      24             :  * AAC encoder twoloop coder
      25             :  * @author Konstantin Shishkov, Claudio Freire
      26             :  */
      27             : 
      28             : /**
      29             :  * This file contains a template for the twoloop coder function.
      30             :  * It needs to be provided, externally, as an already included declaration,
      31             :  * the following functions from aacenc_quantization/util.h. They're not included
      32             :  * explicitly here to make it possible to provide alternative implementations:
      33             :  *  - quantize_band_cost
      34             :  *  - abs_pow34_v
      35             :  *  - find_max_val
      36             :  *  - find_min_book
      37             :  *  - find_form_factor
      38             :  */
      39             : 
      40             : #ifndef AVCODEC_AACCODER_TWOLOOP_H
      41             : #define AVCODEC_AACCODER_TWOLOOP_H
      42             : 
      43             : #include <float.h>
      44             : #include "libavutil/mathematics.h"
      45             : #include "mathops.h"
      46             : #include "avcodec.h"
      47             : #include "put_bits.h"
      48             : #include "aac.h"
      49             : #include "aacenc.h"
      50             : #include "aactab.h"
      51             : #include "aacenctab.h"
      52             : 
      53             : /** Frequency in Hz for lower limit of noise substitution **/
      54             : #define NOISE_LOW_LIMIT 4000
      55             : 
      56             : #define sclip(x) av_clip(x,60,218)
      57             : 
      58             : /* Reflects the cost to change codebooks */
      59      339855 : static inline int ff_pns_bits(SingleChannelElement *sce, int w, int g)
      60             : {
      61      339855 :     return (!g || !sce->zeroes[w*16+g-1] || !sce->can_pns[w*16+g-1]) ? 9 : 5;
      62             : }
      63             : 
      64             : /**
      65             :  * two-loop quantizers search taken from ISO 13818-7 Appendix C
      66             :  */
      67       12123 : static void search_for_quantizers_twoloop(AVCodecContext *avctx,
      68             :                                           AACEncContext *s,
      69             :                                           SingleChannelElement *sce,
      70             :                                           const float lambda)
      71             : {
      72       12123 :     int start = 0, i, w, w2, g, recomprd;
      73       24246 :     int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
      74       12123 :         / ((avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels)
      75       12123 :         * (lambda / 120.f);
      76       12123 :     int refbits = destbits;
      77             :     int toomanybits, toofewbits;
      78             :     char nzs[128];
      79             :     uint8_t nextband[128];
      80             :     int maxsf[128], minsf[128];
      81       12123 :     float dists[128] = { 0 }, qenergies[128] = { 0 }, uplims[128], euplims[128], energies[128];
      82             :     float maxvals[128], spread_thr_r[128];
      83             :     float min_spread_thr_r, max_spread_thr_r;
      84             : 
      85             :     /**
      86             :      * rdlambda controls the maximum tolerated distortion. Twoloop
      87             :      * will keep iterating until it fails to lower it or it reaches
      88             :      * ulimit * rdlambda. Keeping it low increases quality on difficult
      89             :      * signals, but lower it too much, and bits will be taken from weak
      90             :      * signals, creating "holes". A balance is necessary.
      91             :      * rdmax and rdmin specify the relative deviation from rdlambda
      92             :      * allowed for tonality compensation
      93             :      */
      94       12123 :     float rdlambda = av_clipf(2.0f * 120.f / lambda, 0.0625f, 16.0f);
      95       12123 :     const float nzslope = 1.5f;
      96       12123 :     float rdmin = 0.03125f;
      97       12123 :     float rdmax = 1.0f;
      98             : 
      99             :     /**
     100             :      * sfoffs controls an offset of optmium allocation that will be
     101             :      * applied based on lambda. Keep it real and modest, the loop
     102             :      * will take care of the rest, this just accelerates convergence
     103             :      */
     104       12123 :     float sfoffs = av_clipf(log2f(120.0f / lambda) * 4.0f, -5, 10);
     105             : 
     106             :     int fflag, minscaler, maxscaler, nminscaler;
     107       12123 :     int its  = 0;
     108       12123 :     int maxits = 30;
     109       12123 :     int allz = 0;
     110             :     int tbits;
     111       12123 :     int cutoff = 1024;
     112             :     int pns_start_pos;
     113             :     int prev;
     114             : 
     115             :     /**
     116             :      * zeroscale controls a multiplier of the threshold, if band energy
     117             :      * is below this, a zero is forced. Keep it lower than 1, unless
     118             :      * low lambda is used, because energy < threshold doesn't mean there's
     119             :      * no audible signal outright, it's just energy. Also make it rise
     120             :      * slower than rdlambda, as rdscale has due compensation with
     121             :      * noisy band depriorization below, whereas zeroing logic is rather dumb
     122             :      */
     123             :     float zeroscale;
     124       12123 :     if (lambda > 120.f) {
     125       11353 :         zeroscale = av_clipf(powf(120.f / lambda, 0.25f), 0.0625f, 1.0f);
     126             :     } else {
     127         770 :         zeroscale = 1.f;
     128             :     }
     129             : 
     130       12123 :     if (s->psy.bitres.alloc >= 0) {
     131             :         /**
     132             :          * Psy granted us extra bits to use, from the reservoire
     133             :          * adjust for lambda except what psy already did
     134             :          */
     135       24246 :         destbits = s->psy.bitres.alloc
     136       12123 :             * (lambda / (avctx->global_quality ? avctx->global_quality : 120));
     137             :     }
     138             : 
     139       12123 :     if (avctx->flags & CODEC_FLAG_QSCALE) {
     140             :         /**
     141             :          * Constant Q-scale doesn't compensate MS coding on its own
     142             :          * No need to be overly precise, this only controls RD
     143             :          * adjustment CB limits when going overboard
     144             :          */
     145           0 :         if (s->options.mid_side && s->cur_type == TYPE_CPE)
     146           0 :             destbits *= 2;
     147             : 
     148             :         /**
     149             :          * When using a constant Q-scale, don't adjust bits, just use RD
     150             :          * Don't let it go overboard, though... 8x psy target is enough
     151             :          */
     152           0 :         toomanybits = 5800;
     153           0 :         toofewbits = destbits / 16;
     154             : 
     155             :         /** Don't offset scalers, just RD */
     156           0 :         sfoffs = sce->ics.num_windows - 1;
     157           0 :         rdlambda = sqrtf(rdlambda);
     158             : 
     159             :         /** search further */
     160           0 :         maxits *= 2;
     161             :     } else {
     162             :         /* When using ABR, be strict, but a reasonable leeway is
     163             :          * critical to allow RC to smoothly track desired bitrate
     164             :          * without sudden quality drops that cause audible artifacts.
     165             :          * Symmetry is also desirable, to avoid systematic bias.
     166             :          */
     167       12123 :         toomanybits = destbits + destbits/8;
     168       12123 :         toofewbits = destbits - destbits/8;
     169             : 
     170       12123 :         sfoffs = 0;
     171       12123 :         rdlambda = sqrtf(rdlambda);
     172             :     }
     173             : 
     174             :     /** and zero out above cutoff frequency */
     175             :     {
     176       12123 :         int wlen = 1024 / sce->ics.num_windows;
     177             :         int bandwidth;
     178             : 
     179             :         /**
     180             :          * Scale, psy gives us constant quality, this LP only scales
     181             :          * bitrate by lambda, so we save bits on subjectively unimportant HF
     182             :          * rather than increase quantization noise. Adjust nominal bitrate
     183             :          * to effective bitrate according to encoding parameters,
     184             :          * AAC_CUTOFF_FROM_BITRATE is calibrated for effective bitrate.
     185             :          */
     186       12123 :         float rate_bandwidth_multiplier = 1.5f;
     187       24246 :         int frame_bit_rate = (avctx->flags & CODEC_FLAG_QSCALE)
     188           0 :             ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
     189       12123 :             : (avctx->bit_rate / avctx->channels);
     190             : 
     191             :         /** Compensate for extensions that increase efficiency */
     192       12123 :         if (s->options.pns || s->options.intensity_stereo)
     193        2703 :             frame_bit_rate *= 1.15f;
     194             : 
     195       12123 :         if (avctx->cutoff > 0) {
     196        4998 :             bandwidth = avctx->cutoff;
     197             :         } else {
     198        7125 :             bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
     199        7125 :             s->psy.cutoff = bandwidth;
     200             :         }
     201             : 
     202       12123 :         cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
     203       12123 :         pns_start_pos = NOISE_LOW_LIMIT * 2 * wlen / avctx->sample_rate;
     204             :     }
     205             : 
     206             :     /**
     207             :      * for values above this the decoder might end up in an endless loop
     208             :      * due to always having more bits than what can be encoded.
     209             :      */
     210       12123 :     destbits = FFMIN(destbits, 5800);
     211       12123 :     toomanybits = FFMIN(toomanybits, 5800);
     212       12123 :     toofewbits = FFMIN(toofewbits, 5800);
     213             :     /**
     214             :      * XXX: some heuristic to determine initial quantizers will reduce search time
     215             :      * determine zero bands and upper distortion limits
     216             :      */
     217       12123 :     min_spread_thr_r = -1;
     218       12123 :     max_spread_thr_r = -1;
     219       25022 :     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
     220      605071 :         for (g = start = 0;  g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
     221      592172 :             int nz = 0;
     222      592172 :             float uplim = 0.0f, energy = 0.0f, spread = 0.0f;
     223     1202782 :             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
     224      610610 :                 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
     225      610610 :                 if (start >= cutoff || band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f) {
     226       42287 :                     sce->zeroes[(w+w2)*16+g] = 1;
     227       42287 :                     continue;
     228             :                 }
     229      568323 :                 nz = 1;
     230             :             }
     231      592172 :             if (!nz) {
     232       34153 :                 uplim = 0.0f;
     233             :             } else {
     234      558019 :                 nz = 0;
     235     1126396 :                 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
     236      568377 :                     FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
     237      568377 :                     if (band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f)
     238          54 :                         continue;
     239      568323 :                     uplim += band->threshold;
     240      568323 :                     energy += band->energy;
     241      568323 :                     spread += band->spread;
     242      568323 :                     nz++;
     243             :                 }
     244             :             }
     245      592172 :             uplims[w*16+g] = uplim;
     246      592172 :             energies[w*16+g] = energy;
     247      592172 :             nzs[w*16+g] = nz;
     248      592172 :             sce->zeroes[w*16+g] = !nz;
     249      592172 :             allz |= nz;
     250      592172 :             if (nz && sce->can_pns[w*16+g]) {
     251       39184 :                 spread_thr_r[w*16+g] = energy * nz / (uplim * spread);
     252       39184 :                 if (min_spread_thr_r < 0) {
     253        1805 :                     min_spread_thr_r = max_spread_thr_r = spread_thr_r[w*16+g];
     254             :                 } else {
     255       37379 :                     min_spread_thr_r = FFMIN(min_spread_thr_r, spread_thr_r[w*16+g]);
     256       37379 :                     max_spread_thr_r = FFMAX(max_spread_thr_r, spread_thr_r[w*16+g]);
     257             :                 }
     258             :             }
     259             :         }
     260             :     }
     261             : 
     262             :     /** Compute initial scalers */
     263       12123 :     minscaler = 65535;
     264       25022 :     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
     265      605071 :         for (g = 0;  g < sce->ics.num_swb; g++) {
     266      592172 :             if (sce->zeroes[w*16+g]) {
     267       34153 :                 sce->sf_idx[w*16+g] = SCALE_ONE_POS;
     268       34153 :                 continue;
     269             :             }
     270             :             /**
     271             :              * log2f-to-distortion ratio is, technically, 2 (1.5db = 4, but it's power vs level so it's 2).
     272             :              * But, as offsets are applied, low-frequency signals are too sensitive to the induced distortion,
     273             :              * so we make scaling more conservative by choosing a lower log2f-to-distortion ratio, and thus
     274             :              * more robust.
     275             :              */
     276     1116038 :             sce->sf_idx[w*16+g] = av_clip(
     277             :                 SCALE_ONE_POS
     278      558019 :                     + 1.75*log2f(FFMAX(0.00125f,uplims[w*16+g]) / sce->ics.swb_sizes[g])
     279      558019 :                     + sfoffs,
     280             :                 60, SCALE_MAX_POS);
     281      558019 :             minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
     282             :         }
     283             :     }
     284             : 
     285             :     /** Clip */
     286       12123 :     minscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
     287       25022 :     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
     288      605071 :         for (g = 0;  g < sce->ics.num_swb; g++)
     289      592172 :             if (!sce->zeroes[w*16+g])
     290      558019 :                 sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF - 1);
     291             : 
     292       12123 :     if (!allz)
     293         103 :         return;
     294       12020 :     s->abs_pow34(s->scoefs, sce->coeffs, 1024);
     295       12020 :     ff_quantize_band_cost_cache_init(s);
     296             : 
     297     1550580 :     for (i = 0; i < sizeof(minsf) / sizeof(minsf[0]); ++i)
     298     1538560 :         minsf[i] = 0;
     299       24612 :     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
     300       12592 :         start = w*128;
     301      600431 :         for (g = 0;  g < sce->ics.num_swb; g++) {
     302      587839 :             const float *scaled = s->scoefs + start;
     303             :             int minsfidx;
     304      587839 :             maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
     305      587839 :             if (maxvals[w*16+g] > 0) {
     306      587544 :                 minsfidx = coef2minsf(maxvals[w*16+g]);
     307     1186134 :                 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++)
     308      598590 :                     minsf[(w+w2)*16+g] = minsfidx;
     309             :             }
     310      587839 :             start += sce->ics.swb_sizes[g];
     311             :         }
     312             :     }
     313             : 
     314             :     /**
     315             :      * Scale uplims to match rate distortion to quality
     316             :      * bu applying noisy band depriorization and tonal band priorization.
     317             :      * Maxval-energy ratio gives us an idea of how noisy/tonal the band is.
     318             :      * If maxval^2 ~ energy, then that band is mostly noise, and we can relax
     319             :      * rate distortion requirements.
     320             :      */
     321       12020 :     memcpy(euplims, uplims, sizeof(euplims));
     322       24612 :     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
     323             :         /** psy already priorizes transients to some extent */
     324       12592 :         float de_psy_factor = (sce->ics.num_windows > 1) ? 8.0f / sce->ics.group_len[w] : 1.0f;
     325       12592 :         start = w*128;
     326      600431 :         for (g = 0;  g < sce->ics.num_swb; g++) {
     327      587839 :             if (nzs[g] > 0) {
     328      558162 :                 float cleanup_factor = ff_sqrf(av_clipf(start / (cutoff * 0.75f), 1.0f, 2.0f));
     329     2790810 :                 float energy2uplim = find_form_factor(
     330     1116324 :                     sce->ics.group_len[w], sce->ics.swb_sizes[g],
     331      558162 :                     uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
     332      558162 :                     sce->coeffs + start,
     333             :                     nzslope * cleanup_factor);
     334      558162 :                 energy2uplim *= de_psy_factor;
     335      558162 :                 if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
     336             :                     /** In ABR, we need to priorize less and let rate control do its thing */
     337      558162 :                     energy2uplim = sqrtf(energy2uplim);
     338             :                 }
     339      558162 :                 energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
     340     1116324 :                 uplims[w*16+g] *= av_clipf(rdlambda * energy2uplim, rdmin, rdmax)
     341      558162 :                                   * sce->ics.group_len[w];
     342             : 
     343     2232648 :                 energy2uplim = find_form_factor(
     344     1116324 :                     sce->ics.group_len[w], sce->ics.swb_sizes[g],
     345      558162 :                     uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
     346      558162 :                     sce->coeffs + start,
     347             :                     2.0f);
     348      558162 :                 energy2uplim *= de_psy_factor;
     349      558162 :                 if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
     350             :                     /** In ABR, we need to priorize less and let rate control do its thing */
     351      558162 :                     energy2uplim = sqrtf(energy2uplim);
     352             :                 }
     353      558162 :                 energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
     354      558162 :                 euplims[w*16+g] *= av_clipf(rdlambda * energy2uplim * sce->ics.group_len[w],
     355             :                     0.5f, 1.0f);
     356             :             }
     357      587839 :             start += sce->ics.swb_sizes[g];
     358             :         }
     359             :     }
     360             : 
     361     1550580 :     for (i = 0; i < sizeof(maxsf) / sizeof(maxsf[0]); ++i)
     362     1538560 :         maxsf[i] = SCALE_MAX_POS;
     363             : 
     364             :     //perform two-loop search
     365             :     //outer loop - improve quality
     366             :     do {
     367             :         //inner loop - quantize spectrum to fit into given number of bits
     368             :         int overdist;
     369      356784 :         int qstep = its ? 1 : 32;
     370             :         do {
     371     1533415 :             int changed = 0;
     372     1533415 :             prev = -1;
     373     1533415 :             recomprd = 0;
     374     1533415 :             tbits = 0;
     375     3207237 :             for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
     376     1673822 :                 start = w*128;
     377    77007885 :                 for (g = 0;  g < sce->ics.num_swb; g++) {
     378    75334063 :                     const float *coefs = &sce->coeffs[start];
     379    75334063 :                     const float *scaled = &s->scoefs[start];
     380    75334063 :                     int bits = 0;
     381             :                     int cb;
     382    75334063 :                     float dist = 0.0f;
     383    75334063 :                     float qenergy = 0.0f;
     384             : 
     385    75334063 :                     if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
     386     7053494 :                         start += sce->ics.swb_sizes[g];
     387     7053494 :                         if (sce->can_pns[w*16+g]) {
     388             :                             /** PNS isn't free */
     389      269091 :                             tbits += ff_pns_bits(sce, w, g);
     390             :                         }
     391     7053494 :                         continue;
     392             :                     }
     393    68280569 :                     cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
     394   139043316 :                     for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
     395             :                         int b;
     396             :                         float sqenergy;
     397   141525494 :                         dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
     398             :                                                    scaled + w2*128,
     399    70762747 :                                                    sce->ics.swb_sizes[g],
     400    70762747 :                                                    sce->sf_idx[w*16+g],
     401             :                                                    cb,
     402             :                                                    1.0f,
     403             :                                                    INFINITY,
     404             :                                                    &b, &sqenergy,
     405             :                                                    0);
     406    70762747 :                         bits += b;
     407    70762747 :                         qenergy += sqenergy;
     408             :                     }
     409    68280569 :                     dists[w*16+g] = dist - bits;
     410    68280569 :                     qenergies[w*16+g] = qenergy;
     411    68280569 :                     if (prev != -1) {
     412    66747154 :                         int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF);
     413    66747154 :                         bits += ff_aac_scalefactor_bits[sfdiff];
     414             :                     }
     415    68280569 :                     tbits += bits;
     416    68280569 :                     start += sce->ics.swb_sizes[g];
     417    68280569 :                     prev = sce->sf_idx[w*16+g];
     418             :                 }
     419             :             }
     420     1533415 :             if (tbits > toomanybits) {
     421     1132509 :                 recomprd = 1;
     422   146093661 :                 for (i = 0; i < 128; i++) {
     423   144961152 :                     if (sce->sf_idx[i] < (SCALE_MAX_POS - SCALE_DIV_512)) {
     424    66072333 :                         int maxsf_i = (tbits > 5800) ? SCALE_MAX_POS : maxsf[i];
     425    66072333 :                         int new_sf = FFMIN(maxsf_i, sce->sf_idx[i] + qstep);
     426    66072333 :                         if (new_sf != sce->sf_idx[i]) {
     427    60689813 :                             sce->sf_idx[i] = new_sf;
     428    60689813 :                             changed = 1;
     429             :                         }
     430             :                     }
     431             :                 }
     432      400906 :             } else if (tbits < toofewbits) {
     433      207538 :                 recomprd = 1;
     434    26772402 :                 for (i = 0; i < 128; i++) {
     435    26564864 :                     if (sce->sf_idx[i] > SCALE_ONE_POS) {
     436     4894911 :                         int new_sf = FFMAX3(minsf[i], SCALE_ONE_POS, sce->sf_idx[i] - qstep);
     437     4894911 :                         if (new_sf != sce->sf_idx[i]) {
     438     4894911 :                             sce->sf_idx[i] = new_sf;
     439     4894911 :                             changed = 1;
     440             :                         }
     441             :                     }
     442             :                 }
     443             :             }
     444     1533415 :             qstep >>= 1;
     445     1533415 :             if (!qstep && tbits > toomanybits && sce->sf_idx[0] < 217 && changed)
     446     1116531 :                 qstep = 1;
     447     1533415 :         } while (qstep);
     448             : 
     449      356784 :         overdist = 1;
     450      356784 :         fflag = tbits < toofewbits;
     451     1056829 :         for (i = 0; i < 2 && (overdist || recomprd); ++i) {
     452      700045 :             if (recomprd) {
     453             :                 /** Must recompute distortion */
     454      339670 :                 prev = -1;
     455      339670 :                 tbits = 0;
     456      696068 :                 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
     457      356398 :                     start = w*128;
     458    16900380 :                     for (g = 0;  g < sce->ics.num_swb; g++) {
     459    16543982 :                         const float *coefs = sce->coeffs + start;
     460    16543982 :                         const float *scaled = s->scoefs + start;
     461    16543982 :                         int bits = 0;
     462             :                         int cb;
     463    16543982 :                         float dist = 0.0f;
     464    16543982 :                         float qenergy = 0.0f;
     465             : 
     466    16543982 :                         if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
     467      231820 :                             start += sce->ics.swb_sizes[g];
     468      231820 :                             if (sce->can_pns[w*16+g]) {
     469             :                                 /** PNS isn't free */
     470       70764 :                                 tbits += ff_pns_bits(sce, w, g);
     471             :                             }
     472      231820 :                             continue;
     473             :                         }
     474    16312162 :                         cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
     475    32909924 :                         for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
     476             :                             int b;
     477             :                             float sqenergy;
     478    33195524 :                             dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
     479             :                                                     scaled + w2*128,
     480    16597762 :                                                     sce->ics.swb_sizes[g],
     481    16597762 :                                                     sce->sf_idx[w*16+g],
     482             :                                                     cb,
     483             :                                                     1.0f,
     484             :                                                     INFINITY,
     485             :                                                     &b, &sqenergy,
     486             :                                                     0);
     487    16597762 :                             bits += b;
     488    16597762 :                             qenergy += sqenergy;
     489             :                         }
     490    16312162 :                         dists[w*16+g] = dist - bits;
     491    16312162 :                         qenergies[w*16+g] = qenergy;
     492    16312162 :                         if (prev != -1) {
     493    15972492 :                             int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF);
     494    15972492 :                             bits += ff_aac_scalefactor_bits[sfdiff];
     495             :                         }
     496    16312162 :                         tbits += bits;
     497    16312162 :                         start += sce->ics.swb_sizes[g];
     498    16312162 :                         prev = sce->sf_idx[w*16+g];
     499             :                     }
     500             :                 }
     501             :             }
     502      700045 :             if (!i && s->options.pns && its > maxits/2 && tbits > toofewbits) {
     503       21174 :                 float maxoverdist = 0.0f;
     504       21174 :                 float ovrfactor = 1.f+(maxits-its)*16.f/maxits;
     505       21174 :                 overdist = recomprd = 0;
     506       43720 :                 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
     507     1062620 :                     for (g = start = 0;  g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
     508     1040074 :                         if (!sce->zeroes[w*16+g] && sce->sf_idx[w*16+g] > SCALE_ONE_POS && dists[w*16+g] > uplims[w*16+g]*ovrfactor) {
     509       48154 :                             float ovrdist = dists[w*16+g] / FFMAX(uplims[w*16+g],euplims[w*16+g]);
     510       48154 :                             maxoverdist = FFMAX(maxoverdist, ovrdist);
     511       48154 :                             overdist++;
     512             :                         }
     513             :                     }
     514             :                 }
     515       21174 :                 if (overdist) {
     516             :                     /* We have overdistorted bands, trade for zeroes (that can be noise)
     517             :                      * Zero the bands in the lowest 1.25% spread-energy-threshold ranking
     518             :                      */
     519        7651 :                     float minspread = max_spread_thr_r;
     520        7651 :                     float maxspread = min_spread_thr_r;
     521             :                     float zspread;
     522        7651 :                     int zeroable = 0;
     523        7651 :                     int zeroed = 0;
     524             :                     int maxzeroed, zloop;
     525       16585 :                     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
     526      386500 :                         for (g = start = 0;  g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
     527      377566 :                             if (start >= pns_start_pos && !sce->zeroes[w*16+g] && sce->can_pns[w*16+g]) {
     528      107775 :                                 minspread = FFMIN(minspread, spread_thr_r[w*16+g]);
     529      107775 :                                 maxspread = FFMAX(maxspread, spread_thr_r[w*16+g]);
     530      107775 :                                 zeroable++;
     531             :                             }
     532             :                         }
     533             :                     }
     534        7651 :                     zspread = (maxspread-minspread) * 0.0125f + minspread;
     535             :                     /* Don't PNS everything even if allowed. It suppresses bit starvation signals from RC,
     536             :                      * and forced the hand of the later search_for_pns step.
     537             :                      * Instead, PNS a fraction of the spread_thr_r range depending on how starved for bits we are,
     538             :                      * and leave further PNSing to search_for_pns if worthwhile.
     539             :                      */
     540        7651 :                     zspread = FFMIN3(min_spread_thr_r * 8.f, zspread,
     541             :                         ((toomanybits - tbits) * min_spread_thr_r + (tbits - toofewbits) * max_spread_thr_r) / (toomanybits - toofewbits + 1));
     542        7651 :                     maxzeroed = FFMIN(zeroable, FFMAX(1, (zeroable * its + maxits - 1) / (2 * maxits)));
     543       22953 :                     for (zloop = 0; zloop < 2; zloop++) {
     544             :                         /* Two passes: first distorted stuff - two birds in one shot and all that,
     545             :                          * then anything viable. Viable means not zero, but either CB=zero-able
     546             :                          * (too high SF), not SF <= 1 (that means we'd be operating at very high
     547             :                          * quality, we don't want PNS when doing VHQ), PNS allowed, and within
     548             :                          * the lowest ranking percentile.
     549             :                          */
     550       15302 :                         float loopovrfactor = (zloop) ? 1.0f : ovrfactor;
     551       15302 :                         int loopminsf = (zloop) ? (SCALE_ONE_POS - SCALE_DIV_512) : SCALE_ONE_POS;
     552             :                         int mcb;
     553      622774 :                         for (g = sce->ics.num_swb-1; g > 0 && zeroed < maxzeroed; g--) {
     554      607472 :                             if (sce->ics.swb_offset[g] < pns_start_pos)
     555      277301 :                                 continue;
     556      680349 :                             for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
     557      350178 :                                 if (!sce->zeroes[w*16+g] && sce->can_pns[w*16+g] && spread_thr_r[w*16+g] <= zspread
     558       19672 :                                     && sce->sf_idx[w*16+g] > loopminsf
     559       17010 :                                     && (dists[w*16+g] > loopovrfactor*uplims[w*16+g] || !(mcb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]))
     560       12438 :                                         || (mcb <= 1 && dists[w*16+g] > FFMIN(uplims[w*16+g], euplims[w*16+g]))) ) {
     561       15417 :                                     sce->zeroes[w*16+g] = 1;
     562       15417 :                                     sce->band_type[w*16+g] = 0;
     563       15417 :                                     zeroed++;
     564             :                                 }
     565             :                             }
     566             :                         }
     567             :                     }
     568        7651 :                     if (zeroed)
     569        6587 :                         recomprd = fflag = 1;
     570             :                 } else {
     571       13523 :                     overdist = 0;
     572             :                 }
     573             :             }
     574             :         }
     575             : 
     576      356784 :         minscaler = SCALE_MAX_POS;
     577      356784 :         maxscaler = 0;
     578      730638 :         for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
     579    17821830 :             for (g = 0;  g < sce->ics.num_swb; g++) {
     580    17447976 :                 if (!sce->zeroes[w*16+g]) {
     581    16459712 :                     minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
     582    16459712 :                     maxscaler = FFMAX(maxscaler, sce->sf_idx[w*16+g]);
     583             :                 }
     584             :             }
     585             :         }
     586             : 
     587      356784 :         minscaler = nminscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
     588      356784 :         prev = -1;
     589      730638 :         for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
     590             :             /** Start with big steps, end up fine-tunning */
     591      373854 :             int depth = (its > maxits/2) ? ((its > maxits*2/3) ? 1 : 3) : 10;
     592      373854 :             int edepth = depth+2;
     593      373854 :             float uplmax = its / (maxits*0.25f) + 1.0f;
     594      373854 :             uplmax *= (tbits > destbits) ? FFMIN(2.0f, tbits / (float)FFMAX(1,destbits)) : 1.0f;
     595      373854 :             start = w * 128;
     596    17821830 :             for (g = 0; g < sce->ics.num_swb; g++) {
     597    17447976 :                 int prevsc = sce->sf_idx[w*16+g];
     598    17447976 :                 if (prev < 0 && !sce->zeroes[w*16+g])
     599      356784 :                     prev = sce->sf_idx[0];
     600    17447976 :                 if (!sce->zeroes[w*16+g]) {
     601    16459712 :                     const float *coefs = sce->coeffs + start;
     602    16459712 :                     const float *scaled = s->scoefs + start;
     603    16459712 :                     int cmb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
     604    16459712 :                     int mindeltasf = FFMAX(0, prev - SCALE_MAX_DIFF);
     605    16459712 :                     int maxdeltasf = FFMIN(SCALE_MAX_POS - SCALE_DIV_512, prev + SCALE_MAX_DIFF);
     606    25168582 :                     if ((!cmb || dists[w*16+g] > uplims[w*16+g]) && sce->sf_idx[w*16+g] > FFMAX(mindeltasf, minsf[w*16+g])) {
     607             :                         /* Try to make sure there is some energy in every nonzero band
     608             :                          * NOTE: This algorithm must be forcibly imbalanced, pushing harder
     609             :                          *  on holes or more distorted bands at first, otherwise there's
     610             :                          *  no net gain (since the next iteration will offset all bands
     611             :                          *  on the opposite direction to compensate for extra bits)
     612             :                          */
     613    52622177 :                         for (i = 0; i < edepth && sce->sf_idx[w*16+g] > mindeltasf; ++i) {
     614             :                             int cb, bits;
     615             :                             float dist, qenergy;
     616    49622966 :                             int mb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1);
     617    49622966 :                             cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
     618    49622966 :                             dist = qenergy = 0.f;
     619    49622966 :                             bits = 0;
     620    49622966 :                             if (!cb) {
     621     2168474 :                                 maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g]-1, maxsf[w*16+g]);
     622    47454492 :                             } else if (i >= depth && dists[w*16+g] < euplims[w*16+g]) {
     623     1462095 :                                 break;
     624             :                             }
     625             :                             /* !g is the DC band, it's important, since quantization error here
     626             :                              * applies to less than a cycle, it creates horrible intermodulation
     627             :                              * distortion if it doesn't stick to what psy requests
     628             :                              */
     629    48160871 :                             if (!g && sce->ics.num_windows > 1 && dists[w*16+g] >= euplims[w*16+g])
     630       22912 :                                 maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]);
     631    98157931 :                             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
     632             :                                 int b;
     633             :                                 float sqenergy;
     634    99994120 :                                 dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
     635             :                                                         scaled + w2*128,
     636    49997060 :                                                         sce->ics.swb_sizes[g],
     637    49997060 :                                                         sce->sf_idx[w*16+g]-1,
     638             :                                                         cb,
     639             :                                                         1.0f,
     640             :                                                         INFINITY,
     641             :                                                         &b, &sqenergy,
     642             :                                                         0);
     643    49997060 :                                 bits += b;
     644    49997060 :                                 qenergy += sqenergy;
     645             :                             }
     646    48160871 :                             sce->sf_idx[w*16+g]--;
     647    48160871 :                             dists[w*16+g] = dist - bits;
     648    48160871 :                             qenergies[w*16+g] = qenergy;
     649    94388441 :                             if (mb && (sce->sf_idx[w*16+g] < mindeltasf || (
     650    46227570 :                                     (dists[w*16+g] < FFMIN(uplmax*uplims[w*16+g], euplims[w*16+g]))
     651    10367570 :                                     && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
     652             :                                 ) )) {
     653             :                                 break;
     654             :                             }
     655             :                         }
     656     7750842 :                     } else if (tbits > toofewbits && sce->sf_idx[w*16+g] < FFMIN(maxdeltasf, maxsf[w*16+g])
     657      888970 :                             && (dists[w*16+g] < FFMIN(euplims[w*16+g], uplims[w*16+g]))
     658      872830 :                             && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
     659             :                         ) {
     660             :                         /** Um... over target. Save bits for more important stuff. */
     661     2025191 :                         for (i = 0; i < depth && sce->sf_idx[w*16+g] < maxdeltasf; ++i) {
     662             :                             int cb, bits;
     663             :                             float dist, qenergy;
     664     1536427 :                             cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]+1);
     665     1536427 :                             if (cb > 0) {
     666     1536402 :                                 dist = qenergy = 0.f;
     667     1536402 :                                 bits = 0;
     668     3076124 :                                 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
     669             :                                     int b;
     670             :                                     float sqenergy;
     671     3079444 :                                     dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
     672             :                                                             scaled + w2*128,
     673     1539722 :                                                             sce->ics.swb_sizes[g],
     674     1539722 :                                                             sce->sf_idx[w*16+g]+1,
     675             :                                                             cb,
     676             :                                                             1.0f,
     677             :                                                             INFINITY,
     678             :                                                             &b, &sqenergy,
     679             :                                                             0);
     680     1539722 :                                     bits += b;
     681     1539722 :                                     qenergy += sqenergy;
     682             :                                 }
     683     1536402 :                                 dist -= bits;
     684     1536402 :                                 if (dist < FFMIN(euplims[w*16+g], uplims[w*16+g])) {
     685     1455669 :                                     sce->sf_idx[w*16+g]++;
     686     1455669 :                                     dists[w*16+g] = dist;
     687     1455669 :                                     qenergies[w*16+g] = qenergy;
     688             :                                 } else {
     689       80733 :                                     break;
     690             :                                 }
     691             :                             } else {
     692          25 :                                 maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]);
     693          25 :                                 break;
     694             :                             }
     695             :                         }
     696             :                     }
     697    16459712 :                     prev = sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], mindeltasf, maxdeltasf);
     698    16459712 :                     if (sce->sf_idx[w*16+g] != prevsc)
     699     9239187 :                         fflag = 1;
     700    16459712 :                     nminscaler = FFMIN(nminscaler, sce->sf_idx[w*16+g]);
     701    16459712 :                     sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
     702             :                 }
     703    17447976 :                 start += sce->ics.swb_sizes[g];
     704             :             }
     705             :         }
     706             : 
     707             :         /** SF difference limit violation risk. Must re-clamp. */
     708      356784 :         prev = -1;
     709      730638 :         for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
     710    17821830 :             for (g = 0; g < sce->ics.num_swb; g++) {
     711    17447976 :                 if (!sce->zeroes[w*16+g]) {
     712    16459712 :                     int prevsf = sce->sf_idx[w*16+g];
     713    16459712 :                     if (prev < 0)
     714      356784 :                         prev = prevsf;
     715    16459712 :                     sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], prev - SCALE_MAX_DIFF, prev + SCALE_MAX_DIFF);
     716    16459712 :                     sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
     717    16459712 :                     prev = sce->sf_idx[w*16+g];
     718    16459712 :                     if (!fflag && prevsf != sce->sf_idx[w*16+g])
     719           0 :                         fflag = 1;
     720             :                 }
     721             :             }
     722             :         }
     723             : 
     724      356784 :         its++;
     725      356784 :     } while (fflag && its < maxits);
     726             : 
     727             :     /** Scout out next nonzero bands */
     728       12020 :     ff_init_nextband_map(sce, nextband);
     729             : 
     730       12020 :     prev = -1;
     731       24612 :     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
     732             :         /** Make sure proper codebooks are set */
     733      600431 :         for (g = 0; g < sce->ics.num_swb; g++) {
     734      587839 :             if (!sce->zeroes[w*16+g]) {
     735      542602 :                 sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
     736      542602 :                 if (sce->band_type[w*16+g] <= 0) {
     737          68 :                     if (!ff_sfdelta_can_remove_band(sce, nextband, prev, w*16+g)) {
     738             :                         /** Cannot zero out, make sure it's not attempted */
     739          68 :                         sce->band_type[w*16+g] = 1;
     740             :                     } else {
     741           0 :                         sce->zeroes[w*16+g] = 1;
     742           0 :                         sce->band_type[w*16+g] = 0;
     743             :                     }
     744             :                 }
     745             :             } else {
     746       45237 :                 sce->band_type[w*16+g] = 0;
     747             :             }
     748             :             /** Check that there's no SF delta range violations */
     749      587839 :             if (!sce->zeroes[w*16+g]) {
     750      542602 :                 if (prev != -1) {
     751      530582 :                     av_unused int sfdiff = sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO;
     752             :                     av_assert1(sfdiff >= 0 && sfdiff <= 2*SCALE_MAX_DIFF);
     753       12020 :                 } else if (sce->zeroes[0]) {
     754             :                     /** Set global gain to something useful */
     755         196 :                     sce->sf_idx[0] = sce->sf_idx[w*16+g];
     756             :                 }
     757      542602 :                 prev = sce->sf_idx[w*16+g];
     758             :             }
     759             :         }
     760             :     }
     761             : }
     762             : 
     763             : #endif /* AVCODEC_AACCODER_TWOLOOP_H */

Generated by: LCOV version 1.12