LCOV - code coverage report
Current view: top level - libavcodec - dcadsp.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 171 236 72.5 %
Date: 2017-12-18 06:23:41 Functions: 19 24 79.2 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (C) 2016 foo86
       3             :  *
       4             :  * This file is part of FFmpeg.
       5             :  *
       6             :  * FFmpeg is free software; you can redistribute it and/or
       7             :  * modify it under the terms of the GNU Lesser General Public
       8             :  * License as published by the Free Software Foundation; either
       9             :  * version 2.1 of the License, or (at your option) any later version.
      10             :  *
      11             :  * FFmpeg is distributed in the hope that it will be useful,
      12             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      13             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      14             :  * Lesser General Public License for more details.
      15             :  *
      16             :  * You should have received a copy of the GNU Lesser General Public
      17             :  * License along with FFmpeg; if not, write to the Free Software
      18             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      19             :  */
      20             : 
      21             : #include "libavutil/mem.h"
      22             : 
      23             : #include "dcadsp.h"
      24             : #include "dcamath.h"
      25             : 
      26        9731 : static void decode_hf_c(int32_t **dst,
      27             :                         const int32_t *vq_index,
      28             :                         const int8_t hf_vq[1024][32],
      29             :                         int32_t scale_factors[32][2],
      30             :                         ptrdiff_t sb_start, ptrdiff_t sb_end,
      31             :                         ptrdiff_t ofs, ptrdiff_t len)
      32             : {
      33             :     int i, j;
      34             : 
      35       53015 :     for (i = sb_start; i < sb_end; i++) {
      36       43284 :         const int8_t *coeff = hf_vq[vq_index[i]];
      37       43284 :         int32_t scale = scale_factors[i][0];
      38      735828 :         for (j = 0; j < len; j++)
      39      692544 :             dst[i][j + ofs] = clip23(coeff[j] * scale + (1 << 3) >> 4);
      40             :     }
      41        9731 : }
      42             : 
      43          91 : static void decode_joint_c(int32_t **dst, int32_t **src,
      44             :                            const int32_t *scale_factors,
      45             :                            ptrdiff_t sb_start, ptrdiff_t sb_end,
      46             :                            ptrdiff_t ofs, ptrdiff_t len)
      47             : {
      48             :     int i, j;
      49             : 
      50        2632 :     for (i = sb_start; i < sb_end; i++) {
      51        2541 :         int32_t scale = scale_factors[i];
      52       43197 :         for (j = 0; j < len; j++)
      53       40656 :             dst[i][j + ofs] = clip23(mul17(src[i][j + ofs], scale));
      54             :     }
      55          91 : }
      56             : 
      57         603 : static void lfe_fir_float_c(float *pcm_samples, int32_t *lfe_samples,
      58             :                             const float *filter_coeff, ptrdiff_t npcmblocks,
      59             :                             int dec_select)
      60             : {
      61             :     // Select decimation factor
      62         603 :     int factor = 64 << dec_select;
      63         603 :     int ncoeffs = 8 >> dec_select;
      64         603 :     int nlfesamples = npcmblocks >> (dec_select + 1);
      65             :     int i, j, k;
      66             : 
      67        5427 :     for (i = 0; i < nlfesamples; i++) {
      68             :         // One decimated sample generates 64 or 128 interpolated ones
      69      159192 :         for (j = 0; j < factor / 2; j++) {
      70      154368 :             float a = 0;
      71      154368 :             float b = 0;
      72             : 
      73     1389312 :             for (k = 0; k < ncoeffs; k++) {
      74     1234944 :                 a += filter_coeff[      j * ncoeffs + k] * lfe_samples[-k];
      75     1234944 :                 b += filter_coeff[255 - j * ncoeffs - k] * lfe_samples[-k];
      76             :             }
      77             : 
      78      154368 :             pcm_samples[             j] = a;
      79      154368 :             pcm_samples[factor / 2 + j] = b;
      80             :         }
      81             : 
      82        4824 :         lfe_samples++;
      83        4824 :         pcm_samples += factor;
      84             :     }
      85         603 : }
      86             : 
      87         603 : static void lfe_fir0_float_c(float *pcm_samples, int32_t *lfe_samples,
      88             :                              const float *filter_coeff, ptrdiff_t npcmblocks)
      89             : {
      90         603 :     lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 0);
      91         603 : }
      92             : 
      93           0 : static void lfe_fir1_float_c(float *pcm_samples, int32_t *lfe_samples,
      94             :                              const float *filter_coeff, ptrdiff_t npcmblocks)
      95             : {
      96           0 :     lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 1);
      97           0 : }
      98             : 
      99          35 : static void lfe_x96_float_c(float *dst, const float *src,
     100             :                             float *hist, ptrdiff_t len)
     101             : {
     102          35 :     float prev = *hist;
     103             :     int i;
     104             : 
     105       17955 :     for (i = 0; i < len; i++) {
     106       17920 :         float a = 0.25f * src[i] + 0.75f * prev;
     107       17920 :         float b = 0.75f * src[i] + 0.25f * prev;
     108       17920 :         prev = src[i];
     109       17920 :         *dst++ = a;
     110       17920 :         *dst++ = b;
     111             :     }
     112             : 
     113          35 :     *hist = prev;
     114          35 : }
     115             : 
     116        3135 : static void sub_qmf32_float_c(SynthFilterContext *synth,
     117             :                               FFTContext *imdct,
     118             :                               float *pcm_samples,
     119             :                               int32_t **subband_samples_lo,
     120             :                               int32_t **subband_samples_hi,
     121             :                               float *hist1, int *offset, float *hist2,
     122             :                               const float *filter_coeff, ptrdiff_t npcmblocks,
     123             :                               float scale)
     124             : {
     125        3135 :     LOCAL_ALIGNED_32(float, input, [32]);
     126             :     int i, j;
     127             : 
     128       53295 :     for (j = 0; j < npcmblocks; j++) {
     129             :         // Load in one sample from each subband
     130     1655280 :         for (i = 0; i < 32; i++) {
     131     1605120 :             if ((i - 1) & 2)
     132      802560 :                 input[i] = -subband_samples_lo[i][j];
     133             :             else
     134      802560 :                 input[i] =  subband_samples_lo[i][j];
     135             :         }
     136             : 
     137             :         // One subband sample generates 32 interpolated ones
     138       50160 :         synth->synth_filter_float(imdct, hist1, offset,
     139             :                                   hist2, filter_coeff,
     140             :                                   pcm_samples, input, scale);
     141       50160 :         pcm_samples += 32;
     142             :     }
     143        3135 : }
     144             : 
     145         196 : static void sub_qmf64_float_c(SynthFilterContext *synth,
     146             :                               FFTContext *imdct,
     147             :                               float *pcm_samples,
     148             :                               int32_t **subband_samples_lo,
     149             :                               int32_t **subband_samples_hi,
     150             :                               float *hist1, int *offset, float *hist2,
     151             :                               const float *filter_coeff, ptrdiff_t npcmblocks,
     152             :                               float scale)
     153             : {
     154         196 :     LOCAL_ALIGNED_32(float, input, [64]);
     155             :     int i, j;
     156             : 
     157         196 :     if (!subband_samples_hi)
     158           0 :         memset(&input[32], 0, sizeof(input[0]) * 32);
     159             : 
     160        3332 :     for (j = 0; j < npcmblocks; j++) {
     161             :         // Load in one sample from each subband
     162        3136 :         if (subband_samples_hi) {
     163             :             // Full 64 subbands, first 32 are residual coded
     164      103488 :             for (i =  0; i < 32; i++) {
     165      100352 :                 if ((i - 1) & 2)
     166       50176 :                     input[i] = -subband_samples_lo[i][j] - subband_samples_hi[i][j];
     167             :                 else
     168       50176 :                     input[i] =  subband_samples_lo[i][j] + subband_samples_hi[i][j];
     169             :             }
     170      103488 :             for (i = 32; i < 64; i++) {
     171      100352 :                 if ((i - 1) & 2)
     172       50176 :                     input[i] = -subband_samples_hi[i][j];
     173             :                 else
     174       50176 :                     input[i] =  subband_samples_hi[i][j];
     175             :             }
     176             :         } else {
     177             :             // Only first 32 subbands
     178           0 :             for (i =  0; i < 32; i++) {
     179           0 :                 if ((i - 1) & 2)
     180           0 :                     input[i] = -subband_samples_lo[i][j];
     181             :                 else
     182           0 :                     input[i] =  subband_samples_lo[i][j];
     183             :             }
     184             :         }
     185             : 
     186             :         // One subband sample generates 64 interpolated ones
     187        3136 :         synth->synth_filter_float_64(imdct, hist1, offset,
     188             :                                      hist2, filter_coeff,
     189             :                                      pcm_samples, input, scale);
     190        3136 :         pcm_samples += 64;
     191             :     }
     192         196 : }
     193             : 
     194        1342 : static void lfe_fir_fixed_c(int32_t *pcm_samples, int32_t *lfe_samples,
     195             :                             const int32_t *filter_coeff, ptrdiff_t npcmblocks)
     196             : {
     197             :     // Select decimation factor
     198        1342 :     int nlfesamples = npcmblocks >> 1;
     199             :     int i, j, k;
     200             : 
     201       12078 :     for (i = 0; i < nlfesamples; i++) {
     202             :         // One decimated sample generates 64 interpolated ones
     203      354288 :         for (j = 0; j < 32; j++) {
     204      343552 :             int64_t a = 0;
     205      343552 :             int64_t b = 0;
     206             : 
     207     3091968 :             for (k = 0; k < 8; k++) {
     208     2748416 :                 a += (int64_t)filter_coeff[      j * 8 + k] * lfe_samples[-k];
     209     2748416 :                 b += (int64_t)filter_coeff[255 - j * 8 - k] * lfe_samples[-k];
     210             :             }
     211             : 
     212      343552 :             pcm_samples[     j] = clip23(norm23(a));
     213      343552 :             pcm_samples[32 + j] = clip23(norm23(b));
     214             :         }
     215             : 
     216       10736 :         lfe_samples++;
     217       10736 :         pcm_samples += 64;
     218             :     }
     219        1342 : }
     220             : 
     221          84 : static void lfe_x96_fixed_c(int32_t *dst, const int32_t *src,
     222             :                             int32_t *hist, ptrdiff_t len)
     223             : {
     224          84 :     int32_t prev = *hist;
     225             :     int i;
     226             : 
     227       43092 :     for (i = 0; i < len; i++) {
     228       43008 :         int64_t a = INT64_C(2097471) * src[i] + INT64_C(6291137) * prev;
     229       43008 :         int64_t b = INT64_C(6291137) * src[i] + INT64_C(2097471) * prev;
     230       43008 :         prev = src[i];
     231       43008 :         *dst++ = clip23(norm23(a));
     232       43008 :         *dst++ = clip23(norm23(b));
     233             :     }
     234             : 
     235          84 :     *hist = prev;
     236          84 : }
     237             : 
     238        7331 : static void sub_qmf32_fixed_c(SynthFilterContext *synth,
     239             :                               DCADCTContext *imdct,
     240             :                               int32_t *pcm_samples,
     241             :                               int32_t **subband_samples_lo,
     242             :                               int32_t **subband_samples_hi,
     243             :                               int32_t *hist1, int *offset, int32_t *hist2,
     244             :                               const int32_t *filter_coeff, ptrdiff_t npcmblocks)
     245             : {
     246        7331 :     LOCAL_ALIGNED_32(int32_t, input, [32]);
     247             :     int i, j;
     248             : 
     249      124627 :     for (j = 0; j < npcmblocks; j++) {
     250             :         // Load in one sample from each subband
     251     3870768 :         for (i = 0; i < 32; i++)
     252     3753472 :             input[i] = subband_samples_lo[i][j];
     253             : 
     254             :         // One subband sample generates 32 interpolated ones
     255      117296 :         synth->synth_filter_fixed(imdct, hist1, offset,
     256             :                                   hist2, filter_coeff,
     257             :                                   pcm_samples, input);
     258      117296 :         pcm_samples += 32;
     259             :     }
     260        7331 : }
     261             : 
     262         420 : static void sub_qmf64_fixed_c(SynthFilterContext *synth,
     263             :                               DCADCTContext *imdct,
     264             :                               int32_t *pcm_samples,
     265             :                               int32_t **subband_samples_lo,
     266             :                               int32_t **subband_samples_hi,
     267             :                               int32_t *hist1, int *offset, int32_t *hist2,
     268             :                               const int32_t *filter_coeff, ptrdiff_t npcmblocks)
     269             : {
     270         420 :     LOCAL_ALIGNED_32(int32_t, input, [64]);
     271             :     int i, j;
     272             : 
     273         420 :     if (!subband_samples_hi)
     274         420 :         memset(&input[32], 0, sizeof(input[0]) * 32);
     275             : 
     276        7140 :     for (j = 0; j < npcmblocks; j++) {
     277             :         // Load in one sample from each subband
     278        6720 :         if (subband_samples_hi) {
     279             :             // Full 64 subbands, first 32 are residual coded
     280           0 :             for (i =  0; i < 32; i++)
     281           0 :                 input[i] = subband_samples_lo[i][j] + subband_samples_hi[i][j];
     282           0 :             for (i = 32; i < 64; i++)
     283           0 :                 input[i] = subband_samples_hi[i][j];
     284             :         } else {
     285             :             // Only first 32 subbands
     286      221760 :             for (i =  0; i < 32; i++)
     287      215040 :                 input[i] = subband_samples_lo[i][j];
     288             :         }
     289             : 
     290             :         // One subband sample generates 64 interpolated ones
     291        6720 :         synth->synth_filter_fixed_64(imdct, hist1, offset,
     292             :                                      hist2, filter_coeff,
     293             :                                      pcm_samples, input);
     294        6720 :         pcm_samples += 64;
     295             :     }
     296         420 : }
     297             : 
     298        1799 : static void decor_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
     299             : {
     300             :     int i;
     301             : 
     302     1023751 :     for (i = 0; i < len; i++)
     303     1021952 :         dst[i] += (SUINT)((int)(src[i] * (SUINT)coeff + (1 << 2)) >> 3);
     304        1799 : }
     305             : 
     306           0 : static void dmix_sub_xch_c(int32_t *dst1, int32_t *dst2,
     307             :                            const int32_t *src, ptrdiff_t len)
     308             : {
     309             :     int i;
     310             : 
     311           0 :     for (i = 0; i < len; i++) {
     312           0 :         int32_t cs = mul23(src[i], 5931520 /* M_SQRT1_2 * (1 << 23) */);
     313           0 :         dst1[i] -= cs;
     314           0 :         dst2[i] -= cs;
     315             :     }
     316           0 : }
     317             : 
     318         120 : static void dmix_sub_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
     319             : {
     320             :     int i;
     321             : 
     322       77080 :     for (i = 0; i < len; i++)
     323       76960 :         dst[i] -= (unsigned)mul15(src[i], coeff);
     324         120 : }
     325             : 
     326          14 : static void dmix_add_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
     327             : {
     328             :     int i;
     329             : 
     330        7182 :     for (i = 0; i < len; i++)
     331        7168 :         dst[i] += mul15(src[i], coeff);
     332          14 : }
     333             : 
     334         204 : static void dmix_scale_c(int32_t *dst, int scale, ptrdiff_t len)
     335             : {
     336             :     int i;
     337             : 
     338      140572 :     for (i = 0; i < len; i++)
     339      140368 :         dst[i] = mul15(dst[i], scale);
     340         204 : }
     341             : 
     342           0 : static void dmix_scale_inv_c(int32_t *dst, int scale_inv, ptrdiff_t len)
     343             : {
     344             :     int i;
     345             : 
     346           0 :     for (i = 0; i < len; i++)
     347           0 :         dst[i] = mul16(dst[i], scale_inv);
     348           0 : }
     349             : 
     350         896 : static void filter0(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
     351             : {
     352             :     int i;
     353             : 
     354      918400 :     for (i = 0; i < len; i++)
     355      917504 :         dst[i] -= mul22(src[i], coeff);
     356         896 : }
     357             : 
     358        5376 : static void filter1(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
     359             : {
     360             :     int i;
     361             : 
     362     5510400 :     for (i = 0; i < len; i++)
     363     5505024 :         dst[i] -= mul23(src[i], coeff);
     364        5376 : }
     365             : 
     366         224 : static void assemble_freq_bands_c(int32_t *dst, int32_t *src0, int32_t *src1,
     367             :                                   const int32_t *coeff, ptrdiff_t len)
     368             : {
     369             :     int i;
     370             : 
     371         224 :     filter0(src0, src1, coeff[0], len);
     372         224 :     filter0(src1, src0, coeff[1], len);
     373         224 :     filter0(src0, src1, coeff[2], len);
     374         224 :     filter0(src1, src0, coeff[3], len);
     375             : 
     376        2016 :     for (i = 0; i < 8; i++, src0--) {
     377        1792 :         filter1(src0, src1, coeff[i +  4], len);
     378        1792 :         filter1(src1, src0, coeff[i + 12], len);
     379        1792 :         filter1(src0, src1, coeff[i +  4], len);
     380             :     }
     381             : 
     382      229600 :     for (i = 0; i < len; i++) {
     383      229376 :         *dst++ = *src1++;
     384      229376 :         *dst++ = *++src0;
     385             :     }
     386         224 : }
     387             : 
     388           0 : static void lbr_bank_c(float output[32][4], float **input,
     389             :                        const float *coeff, ptrdiff_t ofs, ptrdiff_t len)
     390             : {
     391           0 :     float SW0 = coeff[0];
     392           0 :     float SW1 = coeff[1];
     393           0 :     float SW2 = coeff[2];
     394           0 :     float SW3 = coeff[3];
     395             : 
     396           0 :     float C1  = coeff[4];
     397           0 :     float C2  = coeff[5];
     398           0 :     float C3  = coeff[6];
     399           0 :     float C4  = coeff[7];
     400             : 
     401           0 :     float AL1 = coeff[8];
     402           0 :     float AL2 = coeff[9];
     403             : 
     404             :     int i;
     405             : 
     406             :     // Short window and 8 point forward MDCT
     407           0 :     for (i = 0; i < len; i++) {
     408           0 :         float *src = input[i] + ofs;
     409             : 
     410           0 :         float a = src[-4] * SW0 - src[-1] * SW3;
     411           0 :         float b = src[-3] * SW1 - src[-2] * SW2;
     412           0 :         float c = src[ 2] * SW1 + src[ 1] * SW2;
     413           0 :         float d = src[ 3] * SW0 + src[ 0] * SW3;
     414             : 
     415           0 :         output[i][0] = C1 * b - C2 * c + C4 * a - C3 * d;
     416           0 :         output[i][1] = C1 * d - C2 * a - C4 * b - C3 * c;
     417           0 :         output[i][2] = C3 * b + C2 * d - C4 * c + C1 * a;
     418           0 :         output[i][3] = C3 * a - C2 * b + C4 * d - C1 * c;
     419             :     }
     420             : 
     421             :     // Aliasing cancellation for high frequencies
     422           0 :     for (i = 12; i < len - 1; i++) {
     423           0 :         float a = output[i  ][3] * AL1;
     424           0 :         float b = output[i+1][0] * AL1;
     425           0 :         output[i  ][3] += b - a;
     426           0 :         output[i+1][0] -= b + a;
     427           0 :         a = output[i  ][2] * AL2;
     428           0 :         b = output[i+1][1] * AL2;
     429           0 :         output[i  ][2] += b - a;
     430           0 :         output[i+1][1] -= b + a;
     431             :     }
     432           0 : }
     433             : 
     434           0 : static void lfe_iir_c(float *output, const float *input,
     435             :                       const float iir[5][4], float hist[5][2],
     436             :                       ptrdiff_t factor)
     437             : {
     438             :     float res, tmp;
     439             :     int i, j, k;
     440             : 
     441           0 :     for (i = 0; i < 64; i++) {
     442           0 :         res = *input++;
     443             : 
     444           0 :         for (j = 0; j < factor; j++) {
     445           0 :             for (k = 0; k < 5; k++) {
     446           0 :                 tmp = hist[k][0] * iir[k][0] + hist[k][1] * iir[k][1] + res;
     447           0 :                 res = hist[k][0] * iir[k][2] + hist[k][1] * iir[k][3] + tmp;
     448             : 
     449           0 :                 hist[k][0] = hist[k][1];
     450           0 :                 hist[k][1] = tmp;
     451             :             }
     452             : 
     453           0 :             *output++ = res;
     454           0 :             res = 0;
     455             :         }
     456             :     }
     457           0 : }
     458             : 
     459          91 : av_cold void ff_dcadsp_init(DCADSPContext *s)
     460             : {
     461          91 :     s->decode_hf     = decode_hf_c;
     462          91 :     s->decode_joint  = decode_joint_c;
     463             : 
     464          91 :     s->lfe_fir_float[0] = lfe_fir0_float_c;
     465          91 :     s->lfe_fir_float[1] = lfe_fir1_float_c;
     466          91 :     s->lfe_x96_float    = lfe_x96_float_c;
     467          91 :     s->sub_qmf_float[0] = sub_qmf32_float_c;
     468          91 :     s->sub_qmf_float[1] = sub_qmf64_float_c;
     469             : 
     470          91 :     s->lfe_fir_fixed    = lfe_fir_fixed_c;
     471          91 :     s->lfe_x96_fixed    = lfe_x96_fixed_c;
     472          91 :     s->sub_qmf_fixed[0] = sub_qmf32_fixed_c;
     473          91 :     s->sub_qmf_fixed[1] = sub_qmf64_fixed_c;
     474             : 
     475          91 :     s->decor   = decor_c;
     476             : 
     477          91 :     s->dmix_sub_xch   = dmix_sub_xch_c;
     478          91 :     s->dmix_sub       = dmix_sub_c;
     479          91 :     s->dmix_add       = dmix_add_c;
     480          91 :     s->dmix_scale     = dmix_scale_c;
     481          91 :     s->dmix_scale_inv = dmix_scale_inv_c;
     482             : 
     483          91 :     s->assemble_freq_bands = assemble_freq_bands_c;
     484             : 
     485          91 :     s->lbr_bank = lbr_bank_c;
     486          91 :     s->lfe_iir = lfe_iir_c;
     487             : 
     488             :     if (ARCH_X86)
     489          91 :         ff_dcadsp_init_x86(s);
     490          91 : }

Generated by: LCOV version 1.13