GCC Code Coverage Report
Directory: ../../../ffmpeg/ Exec Total Coverage
File: src/libavcodec/dcadsp.c Lines: 171 231 74.0 %
Date: 2019-11-20 04:07:19 Branches: 68 94 72.3 %

Line Branch Exec Source
1
/*
2
 * Copyright (C) 2016 foo86
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20
21
#include "libavutil/mem.h"
22
23
#include "dcadsp.h"
24
#include "dcamath.h"
25
26
9736
static void decode_hf_c(int32_t **dst,
27
                        const int32_t *vq_index,
28
                        const int8_t hf_vq[1024][32],
29
                        int32_t scale_factors[32][2],
30
                        ptrdiff_t sb_start, ptrdiff_t sb_end,
31
                        ptrdiff_t ofs, ptrdiff_t len)
32
{
33
    int i, j;
34
35
53040
    for (i = sb_start; i < sb_end; i++) {
36
43304
        const int8_t *coeff = hf_vq[vq_index[i]];
37
43304
        int32_t scale = scale_factors[i][0];
38
736168
        for (j = 0; j < len; j++)
39
692864
            dst[i][j + ofs] = clip23(coeff[j] * scale + (1 << 3) >> 4);
40
    }
41
9736
}
42
43
91
static void decode_joint_c(int32_t **dst, int32_t **src,
44
                           const int32_t *scale_factors,
45
                           ptrdiff_t sb_start, ptrdiff_t sb_end,
46
                           ptrdiff_t ofs, ptrdiff_t len)
47
{
48
    int i, j;
49
50
2632
    for (i = sb_start; i < sb_end; i++) {
51
2541
        int32_t scale = scale_factors[i];
52
43197
        for (j = 0; j < len; j++)
53
40656
            dst[i][j + ofs] = clip23(mul17(src[i][j + ofs], scale));
54
    }
55
91
}
56
57
603
static void lfe_fir_float_c(float *pcm_samples, int32_t *lfe_samples,
58
                            const float *filter_coeff, ptrdiff_t npcmblocks,
59
                            int dec_select)
60
{
61
    // Select decimation factor
62
603
    int factor = 64 << dec_select;
63
603
    int ncoeffs = 8 >> dec_select;
64
603
    int nlfesamples = npcmblocks >> (dec_select + 1);
65
    int i, j, k;
66
67
5427
    for (i = 0; i < nlfesamples; i++) {
68
        // One decimated sample generates 64 or 128 interpolated ones
69
159192
        for (j = 0; j < factor / 2; j++) {
70
154368
            float a = 0;
71
154368
            float b = 0;
72
73
1389312
            for (k = 0; k < ncoeffs; k++) {
74
1234944
                a += filter_coeff[      j * ncoeffs + k] * lfe_samples[-k];
75
1234944
                b += filter_coeff[255 - j * ncoeffs - k] * lfe_samples[-k];
76
            }
77
78
154368
            pcm_samples[             j] = a;
79
154368
            pcm_samples[factor / 2 + j] = b;
80
        }
81
82
4824
        lfe_samples++;
83
4824
        pcm_samples += factor;
84
    }
85
603
}
86
87
603
static void lfe_fir0_float_c(float *pcm_samples, int32_t *lfe_samples,
88
                             const float *filter_coeff, ptrdiff_t npcmblocks)
89
{
90
603
    lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 0);
91
603
}
92
93
static void lfe_fir1_float_c(float *pcm_samples, int32_t *lfe_samples,
94
                             const float *filter_coeff, ptrdiff_t npcmblocks)
95
{
96
    lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 1);
97
}
98
99
35
static void lfe_x96_float_c(float *dst, const float *src,
100
                            float *hist, ptrdiff_t len)
101
{
102
35
    float prev = *hist;
103
    int i;
104
105
17955
    for (i = 0; i < len; i++) {
106
17920
        float a = 0.25f * src[i] + 0.75f * prev;
107
17920
        float b = 0.75f * src[i] + 0.25f * prev;
108
17920
        prev = src[i];
109
17920
        *dst++ = a;
110
17920
        *dst++ = b;
111
    }
112
113
35
    *hist = prev;
114
35
}
115
116
3135
static void sub_qmf32_float_c(SynthFilterContext *synth,
117
                              FFTContext *imdct,
118
                              float *pcm_samples,
119
                              int32_t **subband_samples_lo,
120
                              int32_t **subband_samples_hi,
121
                              float *hist1, int *offset, float *hist2,
122
                              const float *filter_coeff, ptrdiff_t npcmblocks,
123
                              float scale)
124
{
125
3135
    LOCAL_ALIGNED_32(float, input, [32]);
126
    int i, j;
127
128
53295
    for (j = 0; j < npcmblocks; j++) {
129
        // Load in one sample from each subband
130
1655280
        for (i = 0; i < 32; i++) {
131
1605120
            if ((i - 1) & 2)
132
802560
                input[i] = -subband_samples_lo[i][j];
133
            else
134
802560
                input[i] =  subband_samples_lo[i][j];
135
        }
136
137
        // One subband sample generates 32 interpolated ones
138
50160
        synth->synth_filter_float(imdct, hist1, offset,
139
                                  hist2, filter_coeff,
140
                                  pcm_samples, input, scale);
141
50160
        pcm_samples += 32;
142
    }
143
3135
}
144
145
196
static void sub_qmf64_float_c(SynthFilterContext *synth,
146
                              FFTContext *imdct,
147
                              float *pcm_samples,
148
                              int32_t **subband_samples_lo,
149
                              int32_t **subband_samples_hi,
150
                              float *hist1, int *offset, float *hist2,
151
                              const float *filter_coeff, ptrdiff_t npcmblocks,
152
                              float scale)
153
{
154
196
    LOCAL_ALIGNED_32(float, input, [64]);
155
    int i, j;
156
157
196
    if (!subband_samples_hi)
158
        memset(&input[32], 0, sizeof(input[0]) * 32);
159
160
3332
    for (j = 0; j < npcmblocks; j++) {
161
        // Load in one sample from each subband
162
3136
        if (subband_samples_hi) {
163
            // Full 64 subbands, first 32 are residual coded
164
103488
            for (i =  0; i < 32; i++) {
165
100352
                if ((i - 1) & 2)
166
50176
                    input[i] = -subband_samples_lo[i][j] - subband_samples_hi[i][j];
167
                else
168
50176
                    input[i] =  subband_samples_lo[i][j] + subband_samples_hi[i][j];
169
            }
170
103488
            for (i = 32; i < 64; i++) {
171
100352
                if ((i - 1) & 2)
172
50176
                    input[i] = -subband_samples_hi[i][j];
173
                else
174
50176
                    input[i] =  subband_samples_hi[i][j];
175
            }
176
        } else {
177
            // Only first 32 subbands
178
            for (i =  0; i < 32; i++) {
179
                if ((i - 1) & 2)
180
                    input[i] = -subband_samples_lo[i][j];
181
                else
182
                    input[i] =  subband_samples_lo[i][j];
183
            }
184
        }
185
186
        // One subband sample generates 64 interpolated ones
187
3136
        synth->synth_filter_float_64(imdct, hist1, offset,
188
                                     hist2, filter_coeff,
189
                                     pcm_samples, input, scale);
190
3136
        pcm_samples += 64;
191
    }
192
196
}
193
194
1343
static void lfe_fir_fixed_c(int32_t *pcm_samples, int32_t *lfe_samples,
195
                            const int32_t *filter_coeff, ptrdiff_t npcmblocks)
196
{
197
    // Select decimation factor
198
1343
    int nlfesamples = npcmblocks >> 1;
199
    int i, j, k;
200
201
12087
    for (i = 0; i < nlfesamples; i++) {
202
        // One decimated sample generates 64 interpolated ones
203
354552
        for (j = 0; j < 32; j++) {
204
343808
            int64_t a = 0;
205
343808
            int64_t b = 0;
206
207
3094272
            for (k = 0; k < 8; k++) {
208
2750464
                a += (int64_t)filter_coeff[      j * 8 + k] * lfe_samples[-k];
209
2750464
                b += (int64_t)filter_coeff[255 - j * 8 - k] * lfe_samples[-k];
210
            }
211
212
343808
            pcm_samples[     j] = clip23(norm23(a));
213
343808
            pcm_samples[32 + j] = clip23(norm23(b));
214
        }
215
216
10744
        lfe_samples++;
217
10744
        pcm_samples += 64;
218
    }
219
1343
}
220
221
84
static void lfe_x96_fixed_c(int32_t *dst, const int32_t *src,
222
                            int32_t *hist, ptrdiff_t len)
223
{
224
84
    int32_t prev = *hist;
225
    int i;
226
227
43092
    for (i = 0; i < len; i++) {
228
43008
        int64_t a = INT64_C(2097471) * src[i] + INT64_C(6291137) * prev;
229
43008
        int64_t b = INT64_C(6291137) * src[i] + INT64_C(2097471) * prev;
230
43008
        prev = src[i];
231
43008
        *dst++ = clip23(norm23(a));
232
43008
        *dst++ = clip23(norm23(b));
233
    }
234
235
84
    *hist = prev;
236
84
}
237
238
7336
static void sub_qmf32_fixed_c(SynthFilterContext *synth,
239
                              DCADCTContext *imdct,
240
                              int32_t *pcm_samples,
241
                              int32_t **subband_samples_lo,
242
                              int32_t **subband_samples_hi,
243
                              int32_t *hist1, int *offset, int32_t *hist2,
244
                              const int32_t *filter_coeff, ptrdiff_t npcmblocks)
245
{
246
7336
    LOCAL_ALIGNED_32(int32_t, input, [32]);
247
    int i, j;
248
249
124712
    for (j = 0; j < npcmblocks; j++) {
250
        // Load in one sample from each subband
251
3873408
        for (i = 0; i < 32; i++)
252
3756032
            input[i] = subband_samples_lo[i][j];
253
254
        // One subband sample generates 32 interpolated ones
255
117376
        synth->synth_filter_fixed(imdct, hist1, offset,
256
                                  hist2, filter_coeff,
257
                                  pcm_samples, input);
258
117376
        pcm_samples += 32;
259
    }
260
7336
}
261
262
420
static void sub_qmf64_fixed_c(SynthFilterContext *synth,
263
                              DCADCTContext *imdct,
264
                              int32_t *pcm_samples,
265
                              int32_t **subband_samples_lo,
266
                              int32_t **subband_samples_hi,
267
                              int32_t *hist1, int *offset, int32_t *hist2,
268
                              const int32_t *filter_coeff, ptrdiff_t npcmblocks)
269
{
270
420
    LOCAL_ALIGNED_32(int32_t, input, [64]);
271
    int i, j;
272
273
420
    if (!subband_samples_hi)
274
420
        memset(&input[32], 0, sizeof(input[0]) * 32);
275
276
7140
    for (j = 0; j < npcmblocks; j++) {
277
        // Load in one sample from each subband
278
6720
        if (subband_samples_hi) {
279
            // Full 64 subbands, first 32 are residual coded
280
            for (i =  0; i < 32; i++)
281
                input[i] = subband_samples_lo[i][j] + subband_samples_hi[i][j];
282
            for (i = 32; i < 64; i++)
283
                input[i] = subband_samples_hi[i][j];
284
        } else {
285
            // Only first 32 subbands
286
221760
            for (i =  0; i < 32; i++)
287
215040
                input[i] = subband_samples_lo[i][j];
288
        }
289
290
        // One subband sample generates 64 interpolated ones
291
6720
        synth->synth_filter_fixed_64(imdct, hist1, offset,
292
                                     hist2, filter_coeff,
293
                                     pcm_samples, input);
294
6720
        pcm_samples += 64;
295
    }
296
420
}
297
298
1803
static void decor_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
299
{
300
    int i;
301
302
1025803
    for (i = 0; i < len; i++)
303
1024000
        dst[i] += (SUINT)((int)(src[i] * (SUINT)coeff + (1 << 2)) >> 3);
304
1803
}
305
306
static void dmix_sub_xch_c(int32_t *dst1, int32_t *dst2,
307
                           const int32_t *src, ptrdiff_t len)
308
{
309
    int i;
310
311
    for (i = 0; i < len; i++) {
312
        int32_t cs = mul23(src[i], 5931520 /* M_SQRT1_2 * (1 << 23) */);
313
        dst1[i] -= cs;
314
        dst2[i] -= cs;
315
    }
316
}
317
318
120
static void dmix_sub_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
319
{
320
    int i;
321
322
77080
    for (i = 0; i < len; i++)
323
76960
        dst[i] -= (unsigned)mul15(src[i], coeff);
324
120
}
325
326
14
static void dmix_add_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
327
{
328
    int i;
329
330
7182
    for (i = 0; i < len; i++)
331
7168
        dst[i] += mul15(src[i], coeff);
332
14
}
333
334
204
static void dmix_scale_c(int32_t *dst, int scale, ptrdiff_t len)
335
{
336
    int i;
337
338
140572
    for (i = 0; i < len; i++)
339
140368
        dst[i] = mul15(dst[i], scale);
340
204
}
341
342
static void dmix_scale_inv_c(int32_t *dst, int scale_inv, ptrdiff_t len)
343
{
344
    int i;
345
346
    for (i = 0; i < len; i++)
347
        dst[i] = mul16(dst[i], scale_inv);
348
}
349
350
896
static void filter0(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
351
{
352
    int i;
353
354
918400
    for (i = 0; i < len; i++)
355
917504
        dst[i] -= mul22(src[i], coeff);
356
896
}
357
358
5376
static void filter1(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
359
{
360
    int i;
361
362
5510400
    for (i = 0; i < len; i++)
363
5505024
        dst[i] -= mul23(src[i], coeff);
364
5376
}
365
366
224
static void assemble_freq_bands_c(int32_t *dst, int32_t *src0, int32_t *src1,
367
                                  const int32_t *coeff, ptrdiff_t len)
368
{
369
    int i;
370
371
224
    filter0(src0, src1, coeff[0], len);
372
224
    filter0(src1, src0, coeff[1], len);
373
224
    filter0(src0, src1, coeff[2], len);
374
224
    filter0(src1, src0, coeff[3], len);
375
376
2016
    for (i = 0; i < 8; i++, src0--) {
377
1792
        filter1(src0, src1, coeff[i +  4], len);
378
1792
        filter1(src1, src0, coeff[i + 12], len);
379
1792
        filter1(src0, src1, coeff[i +  4], len);
380
    }
381
382
229600
    for (i = 0; i < len; i++) {
383
229376
        *dst++ = *src1++;
384
229376
        *dst++ = *++src0;
385
    }
386
224
}
387
388
static void lbr_bank_c(float output[32][4], float **input,
389
                       const float *coeff, ptrdiff_t ofs, ptrdiff_t len)
390
{
391
    float SW0 = coeff[0];
392
    float SW1 = coeff[1];
393
    float SW2 = coeff[2];
394
    float SW3 = coeff[3];
395
396
    float C1  = coeff[4];
397
    float C2  = coeff[5];
398
    float C3  = coeff[6];
399
    float C4  = coeff[7];
400
401
    float AL1 = coeff[8];
402
    float AL2 = coeff[9];
403
404
    int i;
405
406
    // Short window and 8 point forward MDCT
407
    for (i = 0; i < len; i++) {
408
        float *src = input[i] + ofs;
409
410
        float a = src[-4] * SW0 - src[-1] * SW3;
411
        float b = src[-3] * SW1 - src[-2] * SW2;
412
        float c = src[ 2] * SW1 + src[ 1] * SW2;
413
        float d = src[ 3] * SW0 + src[ 0] * SW3;
414
415
        output[i][0] = C1 * b - C2 * c + C4 * a - C3 * d;
416
        output[i][1] = C1 * d - C2 * a - C4 * b - C3 * c;
417
        output[i][2] = C3 * b + C2 * d - C4 * c + C1 * a;
418
        output[i][3] = C3 * a - C2 * b + C4 * d - C1 * c;
419
    }
420
421
    // Aliasing cancellation for high frequencies
422
    for (i = 12; i < len - 1; i++) {
423
        float a = output[i  ][3] * AL1;
424
        float b = output[i+1][0] * AL1;
425
        output[i  ][3] += b - a;
426
        output[i+1][0] -= b + a;
427
        a = output[i  ][2] * AL2;
428
        b = output[i+1][1] * AL2;
429
        output[i  ][2] += b - a;
430
        output[i+1][1] -= b + a;
431
    }
432
}
433
434
static void lfe_iir_c(float *output, const float *input,
435
                      const float iir[5][4], float hist[5][2],
436
                      ptrdiff_t factor)
437
{
438
    float res, tmp;
439
    int i, j, k;
440
441
    for (i = 0; i < 64; i++) {
442
        res = *input++;
443
444
        for (j = 0; j < factor; j++) {
445
            for (k = 0; k < 5; k++) {
446
                tmp = hist[k][0] * iir[k][0] + hist[k][1] * iir[k][1] + res;
447
                res = hist[k][0] * iir[k][2] + hist[k][1] * iir[k][3] + tmp;
448
449
                hist[k][0] = hist[k][1];
450
                hist[k][1] = tmp;
451
            }
452
453
            *output++ = res;
454
            res = 0;
455
        }
456
    }
457
}
458
459
92
av_cold void ff_dcadsp_init(DCADSPContext *s)
460
{
461
92
    s->decode_hf     = decode_hf_c;
462
92
    s->decode_joint  = decode_joint_c;
463
464
92
    s->lfe_fir_float[0] = lfe_fir0_float_c;
465
92
    s->lfe_fir_float[1] = lfe_fir1_float_c;
466
92
    s->lfe_x96_float    = lfe_x96_float_c;
467
92
    s->sub_qmf_float[0] = sub_qmf32_float_c;
468
92
    s->sub_qmf_float[1] = sub_qmf64_float_c;
469
470
92
    s->lfe_fir_fixed    = lfe_fir_fixed_c;
471
92
    s->lfe_x96_fixed    = lfe_x96_fixed_c;
472
92
    s->sub_qmf_fixed[0] = sub_qmf32_fixed_c;
473
92
    s->sub_qmf_fixed[1] = sub_qmf64_fixed_c;
474
475
92
    s->decor   = decor_c;
476
477
92
    s->dmix_sub_xch   = dmix_sub_xch_c;
478
92
    s->dmix_sub       = dmix_sub_c;
479
92
    s->dmix_add       = dmix_add_c;
480
92
    s->dmix_scale     = dmix_scale_c;
481
92
    s->dmix_scale_inv = dmix_scale_inv_c;
482
483
92
    s->assemble_freq_bands = assemble_freq_bands_c;
484
485
92
    s->lbr_bank = lbr_bank_c;
486
92
    s->lfe_iir = lfe_iir_c;
487
488
    if (ARCH_X86)
489
92
        ff_dcadsp_init_x86(s);
490
92
}