GCC Code Coverage Report
Directory: ../../../ffmpeg/ Exec Total Coverage
File: src/libavcodec/dcadsp.c Lines: 171 231 74.0 %
Date: 2021-04-20 04:37:23 Branches: 68 94 72.3 %

Line Branch Exec Source
1
/*
2
 * Copyright (C) 2016 foo86
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20
21
#include "libavutil/mem.h"
22
#include "libavutil/mem_internal.h"
23
24
#include "dcadsp.h"
25
#include "dcamath.h"
26
27
9736
static void decode_hf_c(int32_t **dst,
28
                        const int32_t *vq_index,
29
                        const int8_t hf_vq[1024][32],
30
                        int32_t scale_factors[32][2],
31
                        ptrdiff_t sb_start, ptrdiff_t sb_end,
32
                        ptrdiff_t ofs, ptrdiff_t len)
33
{
34
    int i, j;
35
36
53040
    for (i = sb_start; i < sb_end; i++) {
37
43304
        const int8_t *coeff = hf_vq[vq_index[i]];
38
43304
        int32_t scale = scale_factors[i][0];
39
736168
        for (j = 0; j < len; j++)
40
692864
            dst[i][j + ofs] = clip23(coeff[j] * scale + (1 << 3) >> 4);
41
    }
42
9736
}
43
44
91
static void decode_joint_c(int32_t **dst, int32_t **src,
45
                           const int32_t *scale_factors,
46
                           ptrdiff_t sb_start, ptrdiff_t sb_end,
47
                           ptrdiff_t ofs, ptrdiff_t len)
48
{
49
    int i, j;
50
51
2632
    for (i = sb_start; i < sb_end; i++) {
52
2541
        int32_t scale = scale_factors[i];
53
43197
        for (j = 0; j < len; j++)
54
40656
            dst[i][j + ofs] = clip23(mul17(src[i][j + ofs], scale));
55
    }
56
91
}
57
58
603
static void lfe_fir_float_c(float *pcm_samples, int32_t *lfe_samples,
59
                            const float *filter_coeff, ptrdiff_t npcmblocks,
60
                            int dec_select)
61
{
62
    // Select decimation factor
63
603
    int factor = 64 << dec_select;
64
603
    int ncoeffs = 8 >> dec_select;
65
603
    int nlfesamples = npcmblocks >> (dec_select + 1);
66
    int i, j, k;
67
68
5427
    for (i = 0; i < nlfesamples; i++) {
69
        // One decimated sample generates 64 or 128 interpolated ones
70
159192
        for (j = 0; j < factor / 2; j++) {
71
154368
            float a = 0;
72
154368
            float b = 0;
73
74
1389312
            for (k = 0; k < ncoeffs; k++) {
75
1234944
                a += filter_coeff[      j * ncoeffs + k] * lfe_samples[-k];
76
1234944
                b += filter_coeff[255 - j * ncoeffs - k] * lfe_samples[-k];
77
            }
78
79
154368
            pcm_samples[             j] = a;
80
154368
            pcm_samples[factor / 2 + j] = b;
81
        }
82
83
4824
        lfe_samples++;
84
4824
        pcm_samples += factor;
85
    }
86
603
}
87
88
603
static void lfe_fir0_float_c(float *pcm_samples, int32_t *lfe_samples,
89
                             const float *filter_coeff, ptrdiff_t npcmblocks)
90
{
91
603
    lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 0);
92
603
}
93
94
static void lfe_fir1_float_c(float *pcm_samples, int32_t *lfe_samples,
95
                             const float *filter_coeff, ptrdiff_t npcmblocks)
96
{
97
    lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 1);
98
}
99
100
35
static void lfe_x96_float_c(float *dst, const float *src,
101
                            float *hist, ptrdiff_t len)
102
{
103
35
    float prev = *hist;
104
    int i;
105
106
17955
    for (i = 0; i < len; i++) {
107
17920
        float a = 0.25f * src[i] + 0.75f * prev;
108
17920
        float b = 0.75f * src[i] + 0.25f * prev;
109
17920
        prev = src[i];
110
17920
        *dst++ = a;
111
17920
        *dst++ = b;
112
    }
113
114
35
    *hist = prev;
115
35
}
116
117
3135
static void sub_qmf32_float_c(SynthFilterContext *synth,
118
                              FFTContext *imdct,
119
                              float *pcm_samples,
120
                              int32_t **subband_samples_lo,
121
                              int32_t **subband_samples_hi,
122
                              float *hist1, int *offset, float *hist2,
123
                              const float *filter_coeff, ptrdiff_t npcmblocks,
124
                              float scale)
125
{
126
3135
    LOCAL_ALIGNED_32(float, input, [32]);
127
    int i, j;
128
129
53295
    for (j = 0; j < npcmblocks; j++) {
130
        // Load in one sample from each subband
131
1655280
        for (i = 0; i < 32; i++) {
132
1605120
            if ((i - 1) & 2)
133
802560
                input[i] = -subband_samples_lo[i][j];
134
            else
135
802560
                input[i] =  subband_samples_lo[i][j];
136
        }
137
138
        // One subband sample generates 32 interpolated ones
139
50160
        synth->synth_filter_float(imdct, hist1, offset,
140
                                  hist2, filter_coeff,
141
                                  pcm_samples, input, scale);
142
50160
        pcm_samples += 32;
143
    }
144
3135
}
145
146
196
static void sub_qmf64_float_c(SynthFilterContext *synth,
147
                              FFTContext *imdct,
148
                              float *pcm_samples,
149
                              int32_t **subband_samples_lo,
150
                              int32_t **subband_samples_hi,
151
                              float *hist1, int *offset, float *hist2,
152
                              const float *filter_coeff, ptrdiff_t npcmblocks,
153
                              float scale)
154
{
155
196
    LOCAL_ALIGNED_32(float, input, [64]);
156
    int i, j;
157
158
196
    if (!subband_samples_hi)
159
        memset(&input[32], 0, sizeof(input[0]) * 32);
160
161
3332
    for (j = 0; j < npcmblocks; j++) {
162
        // Load in one sample from each subband
163
3136
        if (subband_samples_hi) {
164
            // Full 64 subbands, first 32 are residual coded
165
103488
            for (i =  0; i < 32; i++) {
166
100352
                if ((i - 1) & 2)
167
50176
                    input[i] = -subband_samples_lo[i][j] - subband_samples_hi[i][j];
168
                else
169
50176
                    input[i] =  subband_samples_lo[i][j] + subband_samples_hi[i][j];
170
            }
171
103488
            for (i = 32; i < 64; i++) {
172
100352
                if ((i - 1) & 2)
173
50176
                    input[i] = -subband_samples_hi[i][j];
174
                else
175
50176
                    input[i] =  subband_samples_hi[i][j];
176
            }
177
        } else {
178
            // Only first 32 subbands
179
            for (i =  0; i < 32; i++) {
180
                if ((i - 1) & 2)
181
                    input[i] = -subband_samples_lo[i][j];
182
                else
183
                    input[i] =  subband_samples_lo[i][j];
184
            }
185
        }
186
187
        // One subband sample generates 64 interpolated ones
188
3136
        synth->synth_filter_float_64(imdct, hist1, offset,
189
                                     hist2, filter_coeff,
190
                                     pcm_samples, input, scale);
191
3136
        pcm_samples += 64;
192
    }
193
196
}
194
195
1343
static void lfe_fir_fixed_c(int32_t *pcm_samples, int32_t *lfe_samples,
196
                            const int32_t *filter_coeff, ptrdiff_t npcmblocks)
197
{
198
    // Select decimation factor
199
1343
    int nlfesamples = npcmblocks >> 1;
200
    int i, j, k;
201
202
12087
    for (i = 0; i < nlfesamples; i++) {
203
        // One decimated sample generates 64 interpolated ones
204
354552
        for (j = 0; j < 32; j++) {
205
343808
            int64_t a = 0;
206
343808
            int64_t b = 0;
207
208
3094272
            for (k = 0; k < 8; k++) {
209
2750464
                a += (int64_t)filter_coeff[      j * 8 + k] * lfe_samples[-k];
210
2750464
                b += (int64_t)filter_coeff[255 - j * 8 - k] * lfe_samples[-k];
211
            }
212
213
343808
            pcm_samples[     j] = clip23(norm23(a));
214
343808
            pcm_samples[32 + j] = clip23(norm23(b));
215
        }
216
217
10744
        lfe_samples++;
218
10744
        pcm_samples += 64;
219
    }
220
1343
}
221
222
84
static void lfe_x96_fixed_c(int32_t *dst, const int32_t *src,
223
                            int32_t *hist, ptrdiff_t len)
224
{
225
84
    int32_t prev = *hist;
226
    int i;
227
228
43092
    for (i = 0; i < len; i++) {
229
43008
        int64_t a = INT64_C(2097471) * src[i] + INT64_C(6291137) * prev;
230
43008
        int64_t b = INT64_C(6291137) * src[i] + INT64_C(2097471) * prev;
231
43008
        prev = src[i];
232
43008
        *dst++ = clip23(norm23(a));
233
43008
        *dst++ = clip23(norm23(b));
234
    }
235
236
84
    *hist = prev;
237
84
}
238
239
7336
static void sub_qmf32_fixed_c(SynthFilterContext *synth,
240
                              DCADCTContext *imdct,
241
                              int32_t *pcm_samples,
242
                              int32_t **subband_samples_lo,
243
                              int32_t **subband_samples_hi,
244
                              int32_t *hist1, int *offset, int32_t *hist2,
245
                              const int32_t *filter_coeff, ptrdiff_t npcmblocks)
246
{
247
7336
    LOCAL_ALIGNED_32(int32_t, input, [32]);
248
    int i, j;
249
250
124712
    for (j = 0; j < npcmblocks; j++) {
251
        // Load in one sample from each subband
252
3873408
        for (i = 0; i < 32; i++)
253
3756032
            input[i] = subband_samples_lo[i][j];
254
255
        // One subband sample generates 32 interpolated ones
256
117376
        synth->synth_filter_fixed(imdct, hist1, offset,
257
                                  hist2, filter_coeff,
258
                                  pcm_samples, input);
259
117376
        pcm_samples += 32;
260
    }
261
7336
}
262
263
420
static void sub_qmf64_fixed_c(SynthFilterContext *synth,
264
                              DCADCTContext *imdct,
265
                              int32_t *pcm_samples,
266
                              int32_t **subband_samples_lo,
267
                              int32_t **subband_samples_hi,
268
                              int32_t *hist1, int *offset, int32_t *hist2,
269
                              const int32_t *filter_coeff, ptrdiff_t npcmblocks)
270
{
271
420
    LOCAL_ALIGNED_32(int32_t, input, [64]);
272
    int i, j;
273
274
420
    if (!subband_samples_hi)
275
420
        memset(&input[32], 0, sizeof(input[0]) * 32);
276
277
7140
    for (j = 0; j < npcmblocks; j++) {
278
        // Load in one sample from each subband
279
6720
        if (subband_samples_hi) {
280
            // Full 64 subbands, first 32 are residual coded
281
            for (i =  0; i < 32; i++)
282
                input[i] = subband_samples_lo[i][j] + subband_samples_hi[i][j];
283
            for (i = 32; i < 64; i++)
284
                input[i] = subband_samples_hi[i][j];
285
        } else {
286
            // Only first 32 subbands
287
221760
            for (i =  0; i < 32; i++)
288
215040
                input[i] = subband_samples_lo[i][j];
289
        }
290
291
        // One subband sample generates 64 interpolated ones
292
6720
        synth->synth_filter_fixed_64(imdct, hist1, offset,
293
                                     hist2, filter_coeff,
294
                                     pcm_samples, input);
295
6720
        pcm_samples += 64;
296
    }
297
420
}
298
299
1803
static void decor_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
300
{
301
    int i;
302
303
1025803
    for (i = 0; i < len; i++)
304
1024000
        dst[i] += (SUINT)((int)(src[i] * (SUINT)coeff + (1 << 2)) >> 3);
305
1803
}
306
307
static void dmix_sub_xch_c(int32_t *dst1, int32_t *dst2,
308
                           const int32_t *src, ptrdiff_t len)
309
{
310
    int i;
311
312
    for (i = 0; i < len; i++) {
313
        int32_t cs = mul23(src[i], 5931520 /* M_SQRT1_2 * (1 << 23) */);
314
        dst1[i] -= cs;
315
        dst2[i] -= cs;
316
    }
317
}
318
319
120
static void dmix_sub_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
320
{
321
    int i;
322
323
77080
    for (i = 0; i < len; i++)
324
76960
        dst[i] -= (unsigned)mul15(src[i], coeff);
325
120
}
326
327
14
static void dmix_add_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
328
{
329
    int i;
330
331
7182
    for (i = 0; i < len; i++)
332
7168
        dst[i] += (unsigned)mul15(src[i], coeff);
333
14
}
334
335
204
static void dmix_scale_c(int32_t *dst, int scale, ptrdiff_t len)
336
{
337
    int i;
338
339
140572
    for (i = 0; i < len; i++)
340
140368
        dst[i] = mul15(dst[i], scale);
341
204
}
342
343
static void dmix_scale_inv_c(int32_t *dst, int scale_inv, ptrdiff_t len)
344
{
345
    int i;
346
347
    for (i = 0; i < len; i++)
348
        dst[i] = mul16(dst[i], scale_inv);
349
}
350
351
896
static void filter0(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
352
{
353
    int i;
354
355
918400
    for (i = 0; i < len; i++)
356
917504
        dst[i] -= mul22(src[i], coeff);
357
896
}
358
359
5376
static void filter1(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
360
{
361
    int i;
362
363
5510400
    for (i = 0; i < len; i++)
364
5505024
        dst[i] -= mul23(src[i], coeff);
365
5376
}
366
367
224
static void assemble_freq_bands_c(int32_t *dst, int32_t *src0, int32_t *src1,
368
                                  const int32_t *coeff, ptrdiff_t len)
369
{
370
    int i;
371
372
224
    filter0(src0, src1, coeff[0], len);
373
224
    filter0(src1, src0, coeff[1], len);
374
224
    filter0(src0, src1, coeff[2], len);
375
224
    filter0(src1, src0, coeff[3], len);
376
377
2016
    for (i = 0; i < 8; i++, src0--) {
378
1792
        filter1(src0, src1, coeff[i +  4], len);
379
1792
        filter1(src1, src0, coeff[i + 12], len);
380
1792
        filter1(src0, src1, coeff[i +  4], len);
381
    }
382
383
229600
    for (i = 0; i < len; i++) {
384
229376
        *dst++ = *src1++;
385
229376
        *dst++ = *++src0;
386
    }
387
224
}
388
389
static void lbr_bank_c(float output[32][4], float **input,
390
                       const float *coeff, ptrdiff_t ofs, ptrdiff_t len)
391
{
392
    float SW0 = coeff[0];
393
    float SW1 = coeff[1];
394
    float SW2 = coeff[2];
395
    float SW3 = coeff[3];
396
397
    float C1  = coeff[4];
398
    float C2  = coeff[5];
399
    float C3  = coeff[6];
400
    float C4  = coeff[7];
401
402
    float AL1 = coeff[8];
403
    float AL2 = coeff[9];
404
405
    int i;
406
407
    // Short window and 8 point forward MDCT
408
    for (i = 0; i < len; i++) {
409
        float *src = input[i] + ofs;
410
411
        float a = src[-4] * SW0 - src[-1] * SW3;
412
        float b = src[-3] * SW1 - src[-2] * SW2;
413
        float c = src[ 2] * SW1 + src[ 1] * SW2;
414
        float d = src[ 3] * SW0 + src[ 0] * SW3;
415
416
        output[i][0] = C1 * b - C2 * c + C4 * a - C3 * d;
417
        output[i][1] = C1 * d - C2 * a - C4 * b - C3 * c;
418
        output[i][2] = C3 * b + C2 * d - C4 * c + C1 * a;
419
        output[i][3] = C3 * a - C2 * b + C4 * d - C1 * c;
420
    }
421
422
    // Aliasing cancellation for high frequencies
423
    for (i = 12; i < len - 1; i++) {
424
        float a = output[i  ][3] * AL1;
425
        float b = output[i+1][0] * AL1;
426
        output[i  ][3] += b - a;
427
        output[i+1][0] -= b + a;
428
        a = output[i  ][2] * AL2;
429
        b = output[i+1][1] * AL2;
430
        output[i  ][2] += b - a;
431
        output[i+1][1] -= b + a;
432
    }
433
}
434
435
static void lfe_iir_c(float *output, const float *input,
436
                      const float iir[5][4], float hist[5][2],
437
                      ptrdiff_t factor)
438
{
439
    float res, tmp;
440
    int i, j, k;
441
442
    for (i = 0; i < 64; i++) {
443
        res = *input++;
444
445
        for (j = 0; j < factor; j++) {
446
            for (k = 0; k < 5; k++) {
447
                tmp = hist[k][0] * iir[k][0] + hist[k][1] * iir[k][1] + res;
448
                res = hist[k][0] * iir[k][2] + hist[k][1] * iir[k][3] + tmp;
449
450
                hist[k][0] = hist[k][1];
451
                hist[k][1] = tmp;
452
            }
453
454
            *output++ = res;
455
            res = 0;
456
        }
457
    }
458
}
459
460
92
av_cold void ff_dcadsp_init(DCADSPContext *s)
461
{
462
92
    s->decode_hf     = decode_hf_c;
463
92
    s->decode_joint  = decode_joint_c;
464
465
92
    s->lfe_fir_float[0] = lfe_fir0_float_c;
466
92
    s->lfe_fir_float[1] = lfe_fir1_float_c;
467
92
    s->lfe_x96_float    = lfe_x96_float_c;
468
92
    s->sub_qmf_float[0] = sub_qmf32_float_c;
469
92
    s->sub_qmf_float[1] = sub_qmf64_float_c;
470
471
92
    s->lfe_fir_fixed    = lfe_fir_fixed_c;
472
92
    s->lfe_x96_fixed    = lfe_x96_fixed_c;
473
92
    s->sub_qmf_fixed[0] = sub_qmf32_fixed_c;
474
92
    s->sub_qmf_fixed[1] = sub_qmf64_fixed_c;
475
476
92
    s->decor   = decor_c;
477
478
92
    s->dmix_sub_xch   = dmix_sub_xch_c;
479
92
    s->dmix_sub       = dmix_sub_c;
480
92
    s->dmix_add       = dmix_add_c;
481
92
    s->dmix_scale     = dmix_scale_c;
482
92
    s->dmix_scale_inv = dmix_scale_inv_c;
483
484
92
    s->assemble_freq_bands = assemble_freq_bands_c;
485
486
92
    s->lbr_bank = lbr_bank_c;
487
92
    s->lfe_iir = lfe_iir_c;
488
489
    if (ARCH_X86)
490
92
        ff_dcadsp_init_x86(s);
491
92
}