LCOV - code coverage report
Current view: top level - libavcodec - opus_pvq.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 397 499 79.6 %
Date: 2017-12-17 04:34:43 Functions: 20 28 71.4 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2007-2008 CSIRO
       3             :  * Copyright (c) 2007-2009 Xiph.Org Foundation
       4             :  * Copyright (c) 2008-2009 Gregory Maxwell
       5             :  * Copyright (c) 2012 Andrew D'Addesio
       6             :  * Copyright (c) 2013-2014 Mozilla Corporation
       7             :  * Copyright (c) 2017 Rostislav Pehlivanov <atomnuker@gmail.com>
       8             :  *
       9             :  * This file is part of FFmpeg.
      10             :  *
      11             :  * FFmpeg is free software; you can redistribute it and/or
      12             :  * modify it under the terms of the GNU Lesser General Public
      13             :  * License as published by the Free Software Foundation; either
      14             :  * version 2.1 of the License, or (at your option) any later version.
      15             :  *
      16             :  * FFmpeg is distributed in the hope that it will be useful,
      17             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      18             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      19             :  * Lesser General Public License for more details.
      20             :  *
      21             :  * You should have received a copy of the GNU Lesser General Public
      22             :  * License along with FFmpeg; if not, write to the Free Software
      23             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      24             :  */
      25             : 
      26             : #include "opustab.h"
      27             : #include "opus_pvq.h"
      28             : 
      29             : #define CELT_PVQ_U(n, k) (ff_celt_pvq_u_row[FFMIN(n, k)][FFMAX(n, k)])
      30             : #define CELT_PVQ_V(n, k) (CELT_PVQ_U(n, k) + CELT_PVQ_U(n, (k) + 1))
      31             : 
      32     1036522 : static inline int16_t celt_cos(int16_t x)
      33             : {
      34     1036522 :     x = (MUL16(x, x) + 4096) >> 13;
      35     1036522 :     x = (32767-x) + ROUND_MUL16(x, (-7651 + ROUND_MUL16(x, (8277 + ROUND_MUL16(-626, x)))));
      36     1036522 :     return x + 1;
      37             : }
      38             : 
      39      518261 : static inline int celt_log2tan(int isin, int icos)
      40             : {
      41             :     int lc, ls;
      42      518261 :     lc = opus_ilog(icos);
      43      518261 :     ls = opus_ilog(isin);
      44      518261 :     icos <<= 15 - lc;
      45      518261 :     isin <<= 15 - ls;
      46     1036522 :     return (ls << 11) - (lc << 11) +
      47     1036522 :            ROUND_MUL16(isin, ROUND_MUL16(isin, -2597) + 7932) -
      48      518261 :            ROUND_MUL16(icos, ROUND_MUL16(icos, -2597) + 7932);
      49             : }
      50             : 
      51     1005341 : static inline int celt_bits2pulses(const uint8_t *cache, int bits)
      52             : {
      53             :     // TODO: Find the size of cache and make it into an array in the parameters list
      54     1005341 :     int i, low = 0, high;
      55             : 
      56     1005341 :     high = cache[0];
      57     1005341 :     bits--;
      58             : 
      59     7037387 :     for (i = 0; i < 6; i++) {
      60     6032046 :         int center = (low + high + 1) >> 1;
      61     6032046 :         if (cache[center] >= bits)
      62     3415322 :             high = center;
      63             :         else
      64     2616724 :             low = center;
      65             :     }
      66             : 
      67     1005341 :     return (bits - (low == 0 ? -1 : cache[low]) <= cache[high] - bits) ? low : high;
      68             : }
      69             : 
      70     1017064 : static inline int celt_pulses2bits(const uint8_t *cache, int pulses)
      71             : {
      72             :     // TODO: Find the size of cache and make it into an array in the parameters list
      73     1017064 :    return (pulses == 0) ? 0 : cache[pulses] + 1;
      74             : }
      75             : 
      76      866967 : static inline void celt_normalize_residual(const int * av_restrict iy, float * av_restrict X,
      77             :                                            int N, float g)
      78             : {
      79             :     int i;
      80     8194287 :     for (i = 0; i < N; i++)
      81     7327320 :         X[i] = g * iy[i];
      82      866967 : }
      83             : 
      84      374344 : static void celt_exp_rotation_impl(float *X, uint32_t len, uint32_t stride,
      85             :                                    float c, float s)
      86             : {
      87             :     float *Xptr;
      88             :     int i;
      89             : 
      90      374344 :     Xptr = X;
      91     5419535 :     for (i = 0; i < len - stride; i++) {
      92     5045191 :         float x1     = Xptr[0];
      93     5045191 :         float x2     = Xptr[stride];
      94     5045191 :         Xptr[stride] = c * x2 + s * x1;
      95     5045191 :         *Xptr++      = c * x1 - s * x2;
      96             :     }
      97             : 
      98      374344 :     Xptr = &X[len - 2 * stride - 1];
      99     4578948 :     for (i = len - 2 * stride - 1; i >= 0; i--) {
     100     4204604 :         float x1     = Xptr[0];
     101     4204604 :         float x2     = Xptr[stride];
     102     4204604 :         Xptr[stride] = c * x2 + s * x1;
     103     4204604 :         *Xptr--      = c * x1 - s * x2;
     104             :     }
     105      374344 : }
     106             : 
     107      866967 : static inline void celt_exp_rotation(float *X, uint32_t len,
     108             :                                      uint32_t stride, uint32_t K,
     109             :                                      enum CeltSpread spread, const int encode)
     110             : {
     111      866967 :     uint32_t stride2 = 0;
     112             :     float c, s;
     113             :     float gain, theta;
     114             :     int i;
     115             : 
     116      866967 :     if (2*K >= len || spread == CELT_SPREAD_NONE)
     117      703721 :         return;
     118             : 
     119      163246 :     gain = (float)len / (len + (20 - 5*spread) * K);
     120      163246 :     theta = M_PI * gain * gain / 4;
     121             : 
     122      163246 :     c = cosf(theta);
     123      163246 :     s = sinf(theta);
     124             : 
     125      163246 :     if (len >= stride << 3) {
     126      138758 :         stride2 = 1;
     127             :         /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding.
     128             :         It's basically incrementing long as (stride2+0.5)^2 < len/stride. */
     129      728197 :         while ((stride2 * stride2 + stride2) * stride + (stride >> 2) < len)
     130      450681 :             stride2++;
     131             :     }
     132             : 
     133      163246 :     len /= stride;
     134      384046 :     for (i = 0; i < stride; i++) {
     135      220800 :         if (encode) {
     136           0 :             celt_exp_rotation_impl(X + i * len, len, 1, c, -s);
     137           0 :             if (stride2)
     138           0 :                 celt_exp_rotation_impl(X + i * len, len, stride2, s, -c);
     139             :         } else {
     140      220800 :             if (stride2)
     141      153544 :                 celt_exp_rotation_impl(X + i * len, len, stride2, s, c);
     142      220800 :             celt_exp_rotation_impl(X + i * len, len, 1, c, s);
     143             :         }
     144             :     }
     145             : }
     146             : 
     147      866967 : static inline uint32_t celt_extract_collapse_mask(const int *iy, uint32_t N, uint32_t B)
     148             : {
     149      866967 :     int i, j, N0 = N / B;
     150      866967 :     uint32_t collapse_mask = 0;
     151             : 
     152      866967 :     if (B <= 1)
     153      740809 :         return 1;
     154             : 
     155      532170 :     for (i = 0; i < B; i++)
     156     1516724 :         for (j = 0; j < N0; j++)
     157     1110712 :             collapse_mask |= (!!iy[i*N0+j]) << i;
     158      126158 :     return collapse_mask;
     159             : }
     160             : 
     161      186032 : static inline void celt_stereo_merge(float *X, float *Y, float mid, int N)
     162             : {
     163             :     int i;
     164      186032 :     float xp = 0, side = 0;
     165             :     float E[2];
     166             :     float mid2;
     167             :     float gain[2];
     168             : 
     169             :     /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
     170     4464772 :     for (i = 0; i < N; i++) {
     171     4278740 :         xp   += X[i] * Y[i];
     172     4278740 :         side += Y[i] * Y[i];
     173             :     }
     174             : 
     175             :     /* Compensating for the mid normalization */
     176      186032 :     xp *= mid;
     177      186032 :     mid2 = mid;
     178      186032 :     E[0] = mid2 * mid2 + side - 2 * xp;
     179      186032 :     E[1] = mid2 * mid2 + side + 2 * xp;
     180      186032 :     if (E[0] < 6e-4f || E[1] < 6e-4f) {
     181        1154 :         for (i = 0; i < N; i++)
     182         998 :             Y[i] = X[i];
     183         156 :         return;
     184             :     }
     185             : 
     186      185876 :     gain[0] = 1.0f / sqrtf(E[0]);
     187      185876 :     gain[1] = 1.0f / sqrtf(E[1]);
     188             : 
     189     4463618 :     for (i = 0; i < N; i++) {
     190             :         float value[2];
     191             :         /* Apply mid scaling (side is already scaled) */
     192     4277742 :         value[0] = mid * X[i];
     193     4277742 :         value[1] = Y[i];
     194     4277742 :         X[i] = gain[0] * (value[0] - value[1]);
     195     4277742 :         Y[i] = gain[1] * (value[0] + value[1]);
     196             :     }
     197             : }
     198             : 
     199      139945 : static void celt_interleave_hadamard(float *tmp, float *X, int N0,
     200             :                                      int stride, int hadamard)
     201             : {
     202      139945 :     int i, j, N = N0*stride;
     203      139945 :     const uint8_t *order = &ff_celt_hadamard_order[hadamard ? stride - 2 : 30];
     204             : 
     205      883035 :     for (i = 0; i < stride; i++)
     206     4386146 :         for (j = 0; j < N0; j++)
     207     3643056 :             tmp[j*stride+i] = X[order[i]*N0+j];
     208             : 
     209      139945 :     memcpy(X, tmp, N*sizeof(float));
     210      139945 : }
     211             : 
     212       90681 : static void celt_deinterleave_hadamard(float *tmp, float *X, int N0,
     213             :                                        int stride, int hadamard)
     214             : {
     215       90681 :     int i, j, N = N0*stride;
     216       90681 :     const uint8_t *order = &ff_celt_hadamard_order[hadamard ? stride - 2 : 30];
     217             : 
     218      554705 :     for (i = 0; i < stride; i++)
     219     2736098 :         for (j = 0; j < N0; j++)
     220     2272074 :             tmp[order[i]*N0+j] = X[j*stride+i];
     221             : 
     222       90681 :     memcpy(X, tmp, N*sizeof(float));
     223       90681 : }
     224             : 
     225      356869 : static void celt_haar1(float *X, int N0, int stride)
     226             : {
     227             :     int i, j;
     228      356869 :     N0 >>= 1;
     229     1032244 :     for (i = 0; i < stride; i++) {
     230     5804455 :         for (j = 0; j < N0; j++) {
     231     5129080 :             float x0 = X[stride * (2 * j + 0) + i];
     232     5129080 :             float x1 = X[stride * (2 * j + 1) + i];
     233     5129080 :             X[stride * (2 * j + 0) + i] = (x0 + x1) * M_SQRT1_2;
     234     5129080 :             X[stride * (2 * j + 1) + i] = (x0 - x1) * M_SQRT1_2;
     235             :         }
     236             :     }
     237      356869 : }
     238             : 
     239      574432 : static inline int celt_compute_qn(int N, int b, int offset, int pulse_cap,
     240             :                                   int stereo)
     241             : {
     242             :     int qn, qb;
     243      574432 :     int N2 = 2 * N - 1;
     244      574432 :     if (stereo && N == 2)
     245       48653 :         N2--;
     246             : 
     247             :     /* The upper limit ensures that in a stereo split with itheta==16384, we'll
     248             :      * always have enough bits left over to code at least one pulse in the
     249             :      * side; otherwise it would collapse, since it doesn't get folded. */
     250      574432 :     qb = FFMIN3(b - pulse_cap - (4 << 3), (b + N2 * offset) / N2, 8 << 3);
     251      574432 :     qn = (qb < (1 << 3 >> 1)) ? 1 : ((ff_celt_qn_exp2[qb & 0x7] >> (14 - (qb >> 3))) + 1) >> 1 << 1;
     252      574432 :     return qn;
     253             : }
     254             : 
     255             : /* Convert the quantized vector to an index */
     256           0 : static inline uint32_t celt_icwrsi(uint32_t N, uint32_t K, const int *y)
     257             : {
     258           0 :     int i, idx = 0, sum = 0;
     259           0 :     for (i = N - 1; i >= 0; i--) {
     260           0 :         const uint32_t i_s = CELT_PVQ_U(N - i, sum + FFABS(y[i]) + 1);
     261           0 :         idx += CELT_PVQ_U(N - i, sum) + (y[i] < 0)*i_s;
     262           0 :         sum += FFABS(y[i]);
     263             :     }
     264           0 :     return idx;
     265             : }
     266             : 
     267             : // this code was adapted from libopus
     268      866967 : static inline uint64_t celt_cwrsi(uint32_t N, uint32_t K, uint32_t i, int *y)
     269             : {
     270      866967 :     uint64_t norm = 0;
     271             :     uint32_t q, p;
     272             :     int s, val;
     273             :     int k0;
     274             : 
     275     7327320 :     while (N > 2) {
     276             :         /*Lots of pulses case:*/
     277     5593386 :         if (K >= N) {
     278     1663468 :             const uint32_t *row = ff_celt_pvq_u_row[N];
     279             : 
     280             :             /* Are the pulses in this dimension negative? */
     281     1663468 :             p  = row[K + 1];
     282     1663468 :             s  = -(i >= p);
     283     1663468 :             i -= p & s;
     284             : 
     285             :             /*Count how many pulses were placed in this dimension.*/
     286     1663468 :             k0 = K;
     287     1663468 :             q = row[N];
     288     1663468 :             if (q > i) {
     289      268437 :                 K = N;
     290             :                 do {
     291      386581 :                     p = ff_celt_pvq_u_row[--K][N];
     292      386581 :                 } while (p > i);
     293             :             } else
     294     8734428 :                 for (p = row[K]; p > i; p = row[K])
     295     7339397 :                     K--;
     296             : 
     297     1663468 :             i    -= p;
     298     1663468 :             val   = (k0 - K + s) ^ s;
     299     1663468 :             norm += val * val;
     300     1663468 :             *y++  = val;
     301             :         } else { /*Lots of dimensions case:*/
     302             :             /*Are there any pulses in this dimension at all?*/
     303     3929918 :             p = ff_celt_pvq_u_row[K    ][N];
     304     3929918 :             q = ff_celt_pvq_u_row[K + 1][N];
     305             : 
     306     3929918 :             if (p <= i && i < q) {
     307     2829050 :                 i -= p;
     308     2829050 :                 *y++ = 0;
     309             :             } else {
     310             :                 /*Are the pulses in this dimension negative?*/
     311     1100868 :                 s  = -(i >= q);
     312     1100868 :                 i -= q & s;
     313             : 
     314             :                 /*Count how many pulses were placed in this dimension.*/
     315     1100868 :                 k0 = K;
     316     1282075 :                 do p = ff_celt_pvq_u_row[--K][N];
     317     1282075 :                 while (p > i);
     318             : 
     319     1100868 :                 i    -= p;
     320     1100868 :                 val   = (k0 - K + s) ^ s;
     321     1100868 :                 norm += val * val;
     322     1100868 :                 *y++  = val;
     323             :             }
     324             :         }
     325     5593386 :         N--;
     326             :     }
     327             : 
     328             :     /* N == 2 */
     329      866967 :     p  = 2 * K + 1;
     330      866967 :     s  = -(i >= p);
     331      866967 :     i -= p & s;
     332      866967 :     k0 = K;
     333      866967 :     K  = (i + 1) / 2;
     334             : 
     335      866967 :     if (K)
     336      611372 :         i -= 2 * K - 1;
     337             : 
     338      866967 :     val   = (k0 - K + s) ^ s;
     339      866967 :     norm += val * val;
     340      866967 :     *y++  = val;
     341             : 
     342             :     /* N==1 */
     343      866967 :     s     = -i;
     344      866967 :     val   = (K + s) ^ s;
     345      866967 :     norm += val * val;
     346      866967 :     *y    = val;
     347             : 
     348      866967 :     return norm;
     349             : }
     350             : 
     351           0 : static inline void celt_encode_pulses(OpusRangeCoder *rc, int *y, uint32_t N, uint32_t K)
     352             : {
     353           0 :     ff_opus_rc_enc_uint(rc, celt_icwrsi(N, K, y), CELT_PVQ_V(N, K));
     354           0 : }
     355             : 
     356      866967 : static inline float celt_decode_pulses(OpusRangeCoder *rc, int *y, uint32_t N, uint32_t K)
     357             : {
     358      866967 :     const uint32_t idx = ff_opus_rc_dec_uint(rc, CELT_PVQ_V(N, K));
     359      866967 :     return celt_cwrsi(N, K, idx, y);
     360             : }
     361             : 
     362             : /*
     363             :  * Faster than libopus's search, operates entirely in the signed domain.
     364             :  * Slightly worse/better depending on N, K and the input vector.
     365             :  */
     366           0 : static float ppp_pvq_search_c(float *X, int *y, int K, int N)
     367             : {
     368           0 :     int i, y_norm = 0;
     369           0 :     float res = 0.0f, xy_norm = 0.0f;
     370             : 
     371           0 :     for (i = 0; i < N; i++)
     372           0 :         res += FFABS(X[i]);
     373             : 
     374           0 :     res = K/(res + FLT_EPSILON);
     375             : 
     376           0 :     for (i = 0; i < N; i++) {
     377           0 :         y[i] = lrintf(res*X[i]);
     378           0 :         y_norm  += y[i]*y[i];
     379           0 :         xy_norm += y[i]*X[i];
     380           0 :         K -= FFABS(y[i]);
     381             :     }
     382             : 
     383           0 :     while (K) {
     384           0 :         int max_idx = 0, phase = FFSIGN(K);
     385           0 :         float max_num = 0.0f;
     386           0 :         float max_den = 1.0f;
     387           0 :         y_norm += 1.0f;
     388             : 
     389           0 :         for (i = 0; i < N; i++) {
     390             :             /* If the sum has been overshot and the best place has 0 pulses allocated
     391             :              * to it, attempting to decrease it further will actually increase the
     392             :              * sum. Prevent this by disregarding any 0 positions when decrementing. */
     393           0 :             const int ca = 1 ^ ((y[i] == 0) & (phase < 0));
     394           0 :             const int y_new = y_norm  + 2*phase*FFABS(y[i]);
     395           0 :             float xy_new = xy_norm + 1*phase*FFABS(X[i]);
     396           0 :             xy_new = xy_new * xy_new;
     397           0 :             if (ca && (max_den*xy_new) > (y_new*max_num)) {
     398           0 :                 max_den = y_new;
     399           0 :                 max_num = xy_new;
     400           0 :                 max_idx = i;
     401             :             }
     402             :         }
     403             : 
     404           0 :         K -= phase;
     405             : 
     406           0 :         phase *= FFSIGN(X[max_idx]);
     407           0 :         xy_norm += 1*phase*X[max_idx];
     408           0 :         y_norm  += 2*phase*y[max_idx];
     409           0 :         y[max_idx] += phase;
     410             :     }
     411             : 
     412           0 :     return (float)y_norm;
     413             : }
     414             : 
     415           0 : static uint32_t celt_alg_quant(OpusRangeCoder *rc, float *X, uint32_t N, uint32_t K,
     416             :                                enum CeltSpread spread, uint32_t blocks, float gain,
     417             :                                CeltPVQ *pvq)
     418             : {
     419           0 :     int *y = pvq->qcoeff;
     420             : 
     421           0 :     celt_exp_rotation(X, N, blocks, K, spread, 1);
     422           0 :     gain /= sqrtf(pvq->pvq_search(X, y, K, N));
     423           0 :     celt_encode_pulses(rc, y,  N, K);
     424           0 :     celt_normalize_residual(y, X, N, gain);
     425           0 :     celt_exp_rotation(X, N, blocks, K, spread, 0);
     426           0 :     return celt_extract_collapse_mask(y, N, blocks);
     427             : }
     428             : 
     429             : /** Decode pulse vector and combine the result with the pitch vector to produce
     430             :     the final normalised signal in the current band. */
     431      866967 : static uint32_t celt_alg_unquant(OpusRangeCoder *rc, float *X, uint32_t N, uint32_t K,
     432             :                                  enum CeltSpread spread, uint32_t blocks, float gain,
     433             :                                  CeltPVQ *pvq)
     434             : {
     435      866967 :     int *y = pvq->qcoeff;
     436             : 
     437      866967 :     gain /= sqrtf(celt_decode_pulses(rc, y, N, K));
     438      866967 :     celt_normalize_residual(y, X, N, gain);
     439      866967 :     celt_exp_rotation(X, N, blocks, K, spread, 0);
     440      866967 :     return celt_extract_collapse_mask(y, N, blocks);
     441             : }
     442             : 
     443           0 : static int celt_calc_theta(const float *X, const float *Y, int coupling, int N)
     444             : {
     445             :     int i;
     446           0 :     float e[2] = { 0.0f, 0.0f };
     447           0 :     if (coupling) { /* Coupling case */
     448           0 :         for (i = 0; i < N; i++) {
     449           0 :             e[0] += (X[i] + Y[i])*(X[i] + Y[i]);
     450           0 :             e[1] += (X[i] - Y[i])*(X[i] - Y[i]);
     451             :         }
     452             :     } else {
     453           0 :         for (i = 0; i < N; i++) {
     454           0 :             e[0] += X[i]*X[i];
     455           0 :             e[1] += Y[i]*Y[i];
     456             :         }
     457             :     }
     458           0 :     return lrintf(32768.0f*atan2f(sqrtf(e[1]), sqrtf(e[0]))/M_PI);
     459             : }
     460             : 
     461           0 : static void celt_stereo_is_decouple(float *X, float *Y, float e_l, float e_r, int N)
     462             : {
     463             :     int i;
     464           0 :     const float energy_n = 1.0f/(sqrtf(e_l*e_l + e_r*e_r) + FLT_EPSILON);
     465           0 :     e_l *= energy_n;
     466           0 :     e_r *= energy_n;
     467           0 :     for (i = 0; i < N; i++)
     468           0 :         X[i] = e_l*X[i] + e_r*Y[i];
     469           0 : }
     470             : 
     471           0 : static void celt_stereo_ms_decouple(float *X, float *Y, int N)
     472             : {
     473             :     int i;
     474           0 :     for (i = 0; i < N; i++) {
     475           0 :         const float Xret = X[i];
     476           0 :         X[i] = (X[i] + Y[i])*M_SQRT1_2;
     477           0 :         Y[i] = (Y[i] - Xret)*M_SQRT1_2;
     478             :     }
     479           0 : }
     480             : 
     481     1740988 : static av_always_inline uint32_t quant_band_template(CeltPVQ *pvq, CeltFrame *f,
     482             :                                                      OpusRangeCoder *rc,
     483             :                                                      const int band, float *X,
     484             :                                                      float *Y, int N, int b,
     485             :                                                      uint32_t blocks, float *lowband,
     486             :                                                      int duration, float *lowband_out,
     487             :                                                      int level, float gain,
     488             :                                                      float *lowband_scratch,
     489             :                                                      int fill, int quant)
     490             : {
     491             :     int i;
     492             :     const uint8_t *cache;
     493     1740988 :     int stereo = !!Y, split = stereo;
     494     1740988 :     int imid = 0, iside = 0;
     495     1740988 :     uint32_t N0 = N;
     496     1740988 :     int N_B = N / blocks;
     497     1740988 :     int N_B0 = N_B;
     498     1740988 :     int B0 = blocks;
     499     1740988 :     int time_divide = 0;
     500     1740988 :     int recombine = 0;
     501     1740988 :     int inv = 0;
     502     1740988 :     float mid = 0, side = 0;
     503     1740988 :     int longblocks = (B0 == 1);
     504     1740988 :     uint32_t cm = 0;
     505             : 
     506     1740988 :     if (N == 1) {
     507      109616 :         float *x = X;
     508      285592 :         for (i = 0; i <= stereo; i++) {
     509      175976 :             int sign = 0;
     510      175976 :             if (f->remaining2 >= 1 << 3) {
     511      169165 :                 if (quant) {
     512           0 :                     sign = x[0] < 0;
     513           0 :                     ff_opus_rc_put_raw(rc, sign, 1);
     514             :                 } else {
     515      169165 :                     sign = ff_opus_rc_get_raw(rc, 1);
     516             :                 }
     517      169165 :                 f->remaining2 -= 1 << 3;
     518             :             }
     519      175976 :             x[0] = 1.0f - 2.0f*sign;
     520      175976 :             x = Y;
     521             :         }
     522      109616 :         if (lowband_out)
     523      109616 :             lowband_out[0] = X[0];
     524      109616 :         return 1;
     525             :     }
     526             : 
     527     1631372 :     if (!stereo && level == 0) {
     528      619529 :         int tf_change = f->tf_change[band];
     529             :         int k;
     530      619529 :         if (tf_change > 0)
     531       40217 :             recombine = tf_change;
     532             :         /* Band recombining to increase frequency resolution */
     533             : 
     534      619529 :         if (lowband &&
     535      345051 :             (recombine || ((N_B & 1) == 0 && tf_change < 0) || B0 > 1)) {
     536     2776730 :             for (i = 0; i < N; i++)
     537     2668594 :                 lowband_scratch[i] = lowband[i];
     538      108136 :             lowband = lowband_scratch;
     539             :         }
     540             : 
     541      687329 :         for (k = 0; k < recombine; k++) {
     542       67800 :             if (quant || lowband)
     543       42647 :                 celt_haar1(quant ? X : lowband, N >> k, 1 << k);
     544       67800 :             fill = ff_celt_bit_interleave[fill & 0xF] | ff_celt_bit_interleave[fill >> 4] << 2;
     545             :         }
     546      619529 :         blocks >>= recombine;
     547      619529 :         N_B <<= recombine;
     548             : 
     549             :         /* Increasing the time resolution */
     550     1388667 :         while ((N_B & 1) == 0 && tf_change < 0) {
     551      149609 :             if (quant || lowband)
     552       96813 :                 celt_haar1(quant ? X : lowband, N_B, blocks);
     553      149609 :             fill |= fill << blocks;
     554      149609 :             blocks <<= 1;
     555      149609 :             N_B >>= 1;
     556      149609 :             time_divide++;
     557      149609 :             tf_change++;
     558             :         }
     559      619529 :         B0 = blocks;
     560      619529 :         N_B0 = N_B;
     561             : 
     562             :         /* Reorganize the samples in time order instead of frequency order */
     563      619529 :         if (B0 > 1 && (quant || lowband))
     564       90681 :             celt_deinterleave_hadamard(pvq->hadamard_tmp, quant ? X : lowband,
     565             :                                        N_B >> recombine, B0 << recombine,
     566             :                                        longblocks);
     567             :     }
     568             : 
     569             :     /* If we need 1.5 more bit than we can produce, split the band in two. */
     570     1631372 :     cache = ff_celt_cache_bits +
     571     1631372 :             ff_celt_cache_index[(duration + 1) * CELT_MAX_BANDS + band];
     572     1631372 :     if (!stereo && duration >= 0 && b > cache[cache[0]] + 12 && N > 2) {
     573      385812 :         N >>= 1;
     574      385812 :         Y = X + N;
     575      385812 :         split = 1;
     576      385812 :         duration -= 1;
     577      385812 :         if (blocks == 1)
     578      245445 :             fill = (fill & 1) | (fill << 1);
     579      385812 :         blocks = (blocks + 1) >> 1;
     580             :     }
     581             : 
     582     1631372 :     if (split) {
     583             :         int qn;
     584      626031 :         int itheta = quant ? celt_calc_theta(X, Y, stereo, N) : 0;
     585             :         int mbits, sbits, delta;
     586             :         int qalloc;
     587             :         int pulse_cap;
     588             :         int offset;
     589             :         int orig_fill;
     590             :         int tell;
     591             : 
     592             :         /* Decide on the resolution to give to the split parameter theta */
     593      626031 :         pulse_cap = ff_celt_log_freq_range[band] + duration * 8;
     594      626031 :         offset = (pulse_cap >> 1) - (stereo && N == 2 ? CELT_QTHETA_OFFSET_TWOPHASE :
     595             :                                                           CELT_QTHETA_OFFSET);
     596      626031 :         qn = (stereo && band >= f->intensity_stereo) ? 1 :
     597             :              celt_compute_qn(N, b, offset, pulse_cap, stereo);
     598      626031 :         tell = opus_rc_tell_frac(rc);
     599      626031 :         if (qn != 1) {
     600      572717 :             if (quant)
     601           0 :                 itheta = (itheta*qn + 8192) >> 14;
     602             :             /* Entropy coding of the angle. We use a uniform pdf for the
     603             :              * time split, a step for stereo, and a triangular one for the rest. */
     604      572717 :             if (quant) {
     605           0 :                 if (stereo && N > 2)
     606           0 :                     ff_opus_rc_enc_uint_step(rc, itheta, qn / 2);
     607           0 :                 else if (stereo || B0 > 1)
     608           0 :                     ff_opus_rc_enc_uint(rc, itheta, qn + 1);
     609             :                 else
     610           0 :                     ff_opus_rc_enc_uint_tri(rc, itheta, qn);
     611           0 :                 itheta = itheta * 16384 / qn;
     612           0 :                 if (stereo) {
     613           0 :                     if (itheta == 0)
     614           0 :                         celt_stereo_is_decouple(X, Y, f->block[0].lin_energy[band],
     615             :                                                 f->block[1].lin_energy[band], N);
     616             :                     else
     617           0 :                         celt_stereo_ms_decouple(X, Y, N);
     618             :                 }
     619             :             } else {
     620      572717 :                 if (stereo && N > 2)
     621      138754 :                     itheta = ff_opus_rc_dec_uint_step(rc, qn / 2);
     622      433963 :                 else if (stereo || B0 > 1)
     623      188518 :                     itheta = ff_opus_rc_dec_uint(rc, qn+1);
     624             :                 else
     625      245445 :                     itheta = ff_opus_rc_dec_uint_tri(rc, qn);
     626      572717 :                 itheta = itheta * 16384 / qn;
     627             :             }
     628       53314 :         } else if (stereo) {
     629       53314 :             if (quant) {
     630           0 :                 inv = itheta > 8192;
     631           0 :                  if (inv) {
     632           0 :                     for (i = 0; i < N; i++)
     633           0 :                        Y[i] *= -1;
     634             :                  }
     635           0 :                  celt_stereo_is_decouple(X, Y, f->block[0].lin_energy[band],
     636             :                                          f->block[1].lin_energy[band], N);
     637             : 
     638           0 :                 if (b > 2 << 3 && f->remaining2 > 2 << 3) {
     639           0 :                     ff_opus_rc_enc_log(rc, inv, 2);
     640             :                 } else {
     641           0 :                     inv = 0;
     642             :                 }
     643             :             } else {
     644       53314 :                 inv = (b > 2 << 3 && f->remaining2 > 2 << 3) ? ff_opus_rc_dec_log(rc, 2) : 0;
     645       53314 :                 inv = f->apply_phase_inv ? inv : 0;
     646             :             }
     647       53314 :             itheta = 0;
     648             :         }
     649      626031 :         qalloc = opus_rc_tell_frac(rc) - tell;
     650      626031 :         b -= qalloc;
     651             : 
     652      626031 :         orig_fill = fill;
     653      626031 :         if (itheta == 0) {
     654      105589 :             imid = 32767;
     655      105589 :             iside = 0;
     656      105589 :             fill = av_mod_uintp2(fill, blocks);
     657      105589 :             delta = -16384;
     658      520442 :         } else if (itheta == 16384) {
     659        2181 :             imid = 0;
     660        2181 :             iside = 32767;
     661        2181 :             fill &= ((1 << blocks) - 1) << blocks;
     662        2181 :             delta = 16384;
     663             :         } else {
     664      518261 :             imid = celt_cos(itheta);
     665      518261 :             iside = celt_cos(16384-itheta);
     666             :             /* This is the mid vs side allocation that minimizes squared error
     667             :             in that band. */
     668      518261 :             delta = ROUND_MUL16((N - 1) << 7, celt_log2tan(iside, imid));
     669             :         }
     670             : 
     671      626031 :         mid  = imid  / 32768.0f;
     672      626031 :         side = iside / 32768.0f;
     673             : 
     674             :         /* This is a special case for N=2 that only works for stereo and takes
     675             :         advantage of the fact that mid and side are orthogonal to encode
     676             :         the side with just one bit. */
     677      680218 :         if (N == 2 && stereo) {
     678             :             int c;
     679       54187 :             int sign = 0;
     680             :             float tmp;
     681             :             float *x2, *y2;
     682       54187 :             mbits = b;
     683             :             /* Only need one bit for the side */
     684       54187 :             sbits = (itheta != 0 && itheta != 16384) ? 1 << 3 : 0;
     685       54187 :             mbits -= sbits;
     686       54187 :             c = (itheta > 8192);
     687       54187 :             f->remaining2 -= qalloc+sbits;
     688             : 
     689       54187 :             x2 = c ? Y : X;
     690       54187 :             y2 = c ? X : Y;
     691       54187 :             if (sbits) {
     692       28836 :                 if (quant) {
     693           0 :                     sign = x2[0]*y2[1] - x2[1]*y2[0] < 0;
     694           0 :                     ff_opus_rc_put_raw(rc, sign, 1);
     695             :                 } else {
     696       28836 :                     sign = ff_opus_rc_get_raw(rc, 1);
     697             :                 }
     698             :             }
     699       54187 :             sign = 1 - 2 * sign;
     700             :             /* We use orig_fill here because we want to fold the side, but if
     701             :             itheta==16384, we'll have cleared the low bits of fill. */
     702       54187 :             cm = pvq->quant_band(pvq, f, rc, band, x2, NULL, N, mbits, blocks, lowband, duration,
     703             :                                  lowband_out, level, gain, lowband_scratch, orig_fill);
     704             :             /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse),
     705             :             and there's no need to worry about mixing with the other channel. */
     706       54187 :             y2[0] = -sign * x2[1];
     707       54187 :             y2[1] =  sign * x2[0];
     708       54187 :             X[0] *= mid;
     709       54187 :             X[1] *= mid;
     710       54187 :             Y[0] *= side;
     711       54187 :             Y[1] *= side;
     712       54187 :             tmp = X[0];
     713       54187 :             X[0] = tmp - Y[0];
     714       54187 :             Y[0] = tmp + Y[0];
     715       54187 :             tmp = X[1];
     716       54187 :             X[1] = tmp - Y[1];
     717       54187 :             Y[1] = tmp + Y[1];
     718             :         } else {
     719             :             /* "Normal" split code */
     720      571844 :             float *next_lowband2     = NULL;
     721      571844 :             float *next_lowband_out1 = NULL;
     722      571844 :             int next_level = 0;
     723             :             int rebalance;
     724             :             uint32_t cmt;
     725             : 
     726             :             /* Give more bits to low-energy MDCTs than they would
     727             :              * otherwise deserve */
     728      571844 :             if (B0 > 1 && !stereo && (itheta & 0x3fff)) {
     729      139586 :                 if (itheta > 8192)
     730             :                     /* Rough approximation for pre-echo masking */
     731       60897 :                     delta -= delta >> (4 - duration);
     732             :                 else
     733             :                     /* Corresponds to a forward-masking slope of
     734             :                      * 1.5 dB per 10 ms */
     735       78689 :                     delta = FFMIN(0, delta + (N << 3 >> (5 - duration)));
     736             :             }
     737      571844 :             mbits = av_clip((b - delta) / 2, 0, b);
     738      571844 :             sbits = b - mbits;
     739      571844 :             f->remaining2 -= qalloc;
     740             : 
     741      571844 :             if (lowband && !stereo)
     742      291653 :                 next_lowband2 = lowband + N; /* >32-bit split case */
     743             : 
     744             :             /* Only stereo needs to pass on lowband_out.
     745             :              * Otherwise, it's handled at the end */
     746      571844 :             if (stereo)
     747      186032 :                 next_lowband_out1 = lowband_out;
     748             :             else
     749      385812 :                 next_level = level + 1;
     750             : 
     751      571844 :             rebalance = f->remaining2;
     752      571844 :             if (mbits >= sbits) {
     753             :                 /* In stereo mode, we do not apply a scaling to the mid
     754             :                  * because we need the normalized mid for folding later */
     755      351165 :                 cm = pvq->quant_band(pvq, f, rc, band, X, NULL, N, mbits, blocks,
     756             :                                      lowband, duration, next_lowband_out1, next_level,
     757             :                                      stereo ? 1.0f : (gain * mid), lowband_scratch, fill);
     758      351165 :                 rebalance = mbits - (rebalance - f->remaining2);
     759      351165 :                 if (rebalance > 3 << 3 && itheta != 0)
     760       86508 :                     sbits += rebalance - (3 << 3);
     761             : 
     762             :                 /* For a stereo split, the high bits of fill are always zero,
     763             :                  * so no folding will be done to the side. */
     764      351165 :                 cmt = pvq->quant_band(pvq, f, rc, band, Y, NULL, N, sbits, blocks,
     765             :                                       next_lowband2, duration, NULL, next_level,
     766             :                                       gain * side, NULL, fill >> blocks);
     767      351165 :                 cm |= cmt << ((B0 >> 1) & (stereo - 1));
     768             :             } else {
     769             :                 /* For a stereo split, the high bits of fill are always zero,
     770             :                  * so no folding will be done to the side. */
     771      220679 :                 cm = pvq->quant_band(pvq, f, rc, band, Y, NULL, N, sbits, blocks,
     772             :                                      next_lowband2, duration, NULL, next_level,
     773             :                                      gain * side, NULL, fill >> blocks);
     774      220679 :                 cm <<= ((B0 >> 1) & (stereo - 1));
     775      220679 :                 rebalance = sbits - (rebalance - f->remaining2);
     776      220679 :                 if (rebalance > 3 << 3 && itheta != 16384)
     777       80909 :                     mbits += rebalance - (3 << 3);
     778             : 
     779             :                 /* In stereo mode, we do not apply a scaling to the mid because
     780             :                  * we need the normalized mid for folding later */
     781      220679 :                 cm |= pvq->quant_band(pvq, f, rc, band, X, NULL, N, mbits, blocks,
     782             :                                       lowband, duration, next_lowband_out1, next_level,
     783             :                                       stereo ? 1.0f : (gain * mid), lowband_scratch, fill);
     784             :             }
     785             :         }
     786             :     } else {
     787             :         /* This is the basic no-split case */
     788     1005341 :         uint32_t q         = celt_bits2pulses(cache, b);
     789     1005341 :         uint32_t curr_bits = celt_pulses2bits(cache, q);
     790     1005341 :         f->remaining2 -= curr_bits;
     791             : 
     792             :         /* Ensures we can never bust the budget */
     793     2022405 :         while (f->remaining2 < 0 && q > 0) {
     794       11723 :             f->remaining2 += curr_bits;
     795       11723 :             curr_bits      = celt_pulses2bits(cache, --q);
     796       11723 :             f->remaining2 -= curr_bits;
     797             :         }
     798             : 
     799     1005341 :         if (q != 0) {
     800             :             /* Finally do the actual (de)quantization */
     801      866967 :             if (quant) {
     802           0 :                 cm = celt_alg_quant(rc, X, N, (q < 8) ? q : (8 + (q & 7)) << ((q >> 3) - 1),
     803             :                                     f->spread, blocks, gain, pvq);
     804             :             } else {
     805      866967 :                 cm = celt_alg_unquant(rc, X, N, (q < 8) ? q : (8 + (q & 7)) << ((q >> 3) - 1),
     806             :                                       f->spread, blocks, gain, pvq);
     807             :             }
     808             :         } else {
     809             :             /* If there's no pulse, fill the band anyway */
     810      138374 :             uint32_t cm_mask = (1 << blocks) - 1;
     811      138374 :             fill &= cm_mask;
     812      138374 :             if (fill) {
     813       54115 :                 if (!lowband) {
     814             :                     /* Noise */
     815      220826 :                     for (i = 0; i < N; i++)
     816      211640 :                         X[i] = (((int32_t)celt_rng(f)) >> 20);
     817        9186 :                     cm = cm_mask;
     818             :                 } else {
     819             :                     /* Folded spectrum */
     820     1813793 :                     for (i = 0; i < N; i++) {
     821             :                         /* About 48 dB below the "normal" folding level */
     822     1768864 :                         X[i] = lowband[i] + (((celt_rng(f)) & 0x8000) ? 1.0f / 256 : -1.0f / 256);
     823             :                     }
     824       44929 :                     cm = fill;
     825             :                 }
     826       54115 :                 celt_renormalize_vector(X, N, gain);
     827             :             } else {
     828       84259 :                 memset(X, 0, N*sizeof(float));
     829             :             }
     830             :         }
     831             :     }
     832             : 
     833             :     /* This code is used by the decoder and by the resynthesis-enabled encoder */
     834     1631372 :     if (stereo) {
     835      240219 :         if (N > 2)
     836      186032 :             celt_stereo_merge(X, Y, mid, N);
     837      240219 :         if (inv) {
     838      396805 :             for (i = 0; i < N; i++)
     839      389838 :                 Y[i] *= -1;
     840             :         }
     841     1391153 :     } else if (level == 0) {
     842             :         int k;
     843             : 
     844             :         /* Undo the sample reorganization going from time order to frequency order */
     845      619529 :         if (B0 > 1)
     846      139945 :             celt_interleave_hadamard(pvq->hadamard_tmp, X, N_B >> recombine,
     847             :                                      B0 << recombine, longblocks);
     848             : 
     849             :         /* Undo time-freq changes that we did earlier */
     850      619529 :         N_B = N_B0;
     851      619529 :         blocks = B0;
     852      769138 :         for (k = 0; k < time_divide; k++) {
     853      149609 :             blocks >>= 1;
     854      149609 :             N_B <<= 1;
     855      149609 :             cm |= cm >> blocks;
     856      149609 :             celt_haar1(X, N_B, blocks);
     857             :         }
     858             : 
     859      687329 :         for (k = 0; k < recombine; k++) {
     860       67800 :             cm = ff_celt_bit_deinterleave[cm];
     861       67800 :             celt_haar1(X, N0>>k, 1<<k);
     862             :         }
     863      619529 :         blocks <<= recombine;
     864             : 
     865             :         /* Scale output for later folding */
     866      619529 :         if (lowband_out) {
     867      433497 :             float n = sqrtf(N0);
     868     8175267 :             for (i = 0; i < N0; i++)
     869     7741770 :                 lowband_out[i] = n * X[i];
     870             :         }
     871      619529 :         cm = av_mod_uintp2(cm, blocks);
     872             :     }
     873             : 
     874     1631372 :     return cm;
     875             : }
     876             : 
     877     1740988 : static QUANT_FN(pvq_decode_band)
     878             : {
     879             : #if CONFIG_OPUS_DECODER
     880     1740988 :     return quant_band_template(pvq, f, rc, band, X, Y, N, b, blocks, lowband, duration,
     881             :                                lowband_out, level, gain, lowband_scratch, fill, 0);
     882             : #else
     883             :     return 0;
     884             : #endif
     885             : }
     886             : 
     887           0 : static QUANT_FN(pvq_encode_band)
     888             : {
     889             : #if CONFIG_OPUS_ENCODER
     890           0 :     return quant_band_template(pvq, f, rc, band, X, Y, N, b, blocks, lowband, duration,
     891             :                                lowband_out, level, gain, lowband_scratch, fill, 1);
     892             : #else
     893             :     return 0;
     894             : #endif
     895             : }
     896             : 
     897          41 : int av_cold ff_celt_pvq_init(CeltPVQ **pvq, int encode)
     898             : {
     899          41 :     CeltPVQ *s = av_malloc(sizeof(CeltPVQ));
     900          41 :     if (!s)
     901           0 :         return AVERROR(ENOMEM);
     902             : 
     903          41 :     s->pvq_search = ppp_pvq_search_c;
     904          41 :     s->quant_band = encode ? pvq_encode_band : pvq_decode_band;
     905             : 
     906             :     if (ARCH_X86)
     907          41 :         ff_opus_dsp_init_x86(s);
     908             : 
     909          41 :     *pvq = s;
     910             : 
     911          41 :     return 0;
     912             : }
     913             : 
     914          41 : void av_cold ff_celt_pvq_uninit(CeltPVQ **pvq)
     915             : {
     916          41 :     av_freep(pvq);
     917          41 : }

Generated by: LCOV version 1.13