GCC Code Coverage Report
Directory: ../../../ffmpeg/ Exec Total Coverage
File: src/libavcodec/tests/dct.c Lines: 208 250 83.2 %
Date: 2021-01-26 11:44:58 Branches: 150 178 84.3 %

Line Branch Exec Source
1
/*
2
 * (c) 2001 Fabrice Bellard
3
 *     2007 Marc Hoffman <marc.hoffman@analog.com>
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21
22
/**
23
 * @file
24
 * DCT test (c) 2001 Fabrice Bellard
25
 * Started from sample code by Juan J. Sierralta P.
26
 */
27
28
#include "config.h"
29
#include <stdlib.h>
30
#include <stdio.h>
31
#include <string.h>
32
#if HAVE_UNISTD_H
33
#include <unistd.h>
34
#endif
35
#include <math.h>
36
37
#include "libavutil/cpu.h"
38
#include "libavutil/common.h"
39
#include "libavutil/internal.h"
40
#include "libavutil/lfg.h"
41
#include "libavutil/mem_internal.h"
42
#include "libavutil/time.h"
43
44
#include "libavcodec/dct.h"
45
#include "libavcodec/idctdsp.h"
46
#include "libavcodec/simple_idct.h"
47
#include "libavcodec/xvididct.h"
48
#include "libavcodec/aandcttab.h"
49
#include "libavcodec/faandct.h"
50
#include "libavcodec/faanidct.h"
51
#include "libavcodec/dctref.h"
52
53
struct algo {
54
    const char *name;
55
    void (*func)(int16_t *block);
56
    enum idct_permutation_type perm_type;
57
    int cpu_flag;
58
    int nonspec;
59
};
60
61
static const struct algo fdct_tab[] = {
62
    { "REF-DBL",     ff_ref_fdct,          FF_IDCT_PERM_NONE },
63
    { "IJG-AAN-INT", ff_fdct_ifast,        FF_IDCT_PERM_NONE },
64
    { "IJG-LLM-INT", ff_jpeg_fdct_islow_8, FF_IDCT_PERM_NONE },
65
#if CONFIG_FAANDCT
66
    { "FAAN",        ff_faandct,           FF_IDCT_PERM_NONE },
67
#endif /* CONFIG_FAANDCT */
68
};
69
70
60000
static void ff_prores_idct_wrap(int16_t *dst){
71
60000
    LOCAL_ALIGNED(16, int16_t, qmat, [64]);
72
    int i;
73
74
3900000
    for(i=0; i<64; i++){
75
3840000
        qmat[i]=4;
76
    }
77
60000
    ff_prores_idct_10(dst, qmat);
78
3900000
    for(i=0; i<64; i++) {
79
3840000
         dst[i] -= 512;
80
    }
81
60000
}
82
83
static const struct algo idct_tab[] = {
84
    { "REF-DBL",     ff_ref_idct,          FF_IDCT_PERM_NONE },
85
    { "INT",         ff_j_rev_dct,         FF_IDCT_PERM_LIBMPEG2 },
86
    { "SIMPLE-C",    ff_simple_idct_int16_8bit,     FF_IDCT_PERM_NONE },
87
    { "SIMPLE-C10",  ff_simple_idct_int16_10bit,    FF_IDCT_PERM_NONE },
88
    { "SIMPLE-C12",  ff_simple_idct_int16_12bit,    FF_IDCT_PERM_NONE, 0, 1 },
89
    { "PR-C",        ff_prores_idct_wrap,  FF_IDCT_PERM_NONE, 0, 1 },
90
#if CONFIG_FAANIDCT
91
    { "FAANI",       ff_faanidct,          FF_IDCT_PERM_NONE },
92
#endif /* CONFIG_FAANIDCT */
93
#if CONFIG_MPEG4_DECODER
94
    { "XVID",        ff_xvid_idct,         FF_IDCT_PERM_NONE, 0, 1 },
95
#endif /* CONFIG_MPEG4_DECODER */
96
};
97
98
#if ARCH_AARCH64
99
#include "aarch64/dct.c"
100
#elif ARCH_ARM
101
#include "arm/dct.c"
102
#elif ARCH_PPC
103
#include "ppc/dct.c"
104
#elif ARCH_X86
105
#include "x86/dct.c"
106
#else
107
static const struct algo fdct_tab_arch[] = { { 0 } };
108
static const struct algo idct_tab_arch[] = { { 0 } };
109
#endif
110
111
#define AANSCALE_BITS 12
112
113
#define NB_ITS 20000
114
#define NB_ITS_SPEED 50000
115
116
DECLARE_ALIGNED(16, static int16_t, block)[64];
117
DECLARE_ALIGNED(8,  static int16_t, block1)[64];
118
119
1220000
static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng, int vals)
120
{
121
    int i, j;
122
123
1220000
    memset(block, 0, 64 * sizeof(*block));
124
125

1220000
    switch (test) {
126
360000
    case 0:
127
23400000
        for (i = 0; i < 64; i++)
128
23040000
            block[i] = (av_lfg_get(prng) % (2*vals)) -vals;
129
360000
        if (is_idct) {
130
360000
            ff_ref_fdct(block);
131
23400000
            for (i = 0; i < 64; i++)
132
23040000
                block[i] >>= 3;
133
        }
134
360000
        break;
135
500000
    case 1:
136
500000
        j = av_lfg_get(prng) % 10 + 1;
137
3242825
        for (i = 0; i < j; i++) {
138
2742825
            int idx = av_lfg_get(prng) % 64;
139
2742825
            block[idx] = av_lfg_get(prng) % (2*vals) -vals;
140
        }
141
500000
        break;
142
360000
    case 2:
143
360000
        block[ 0] = av_lfg_get(prng) % (16*vals) - (8*vals);
144
360000
        block[63] = (block[0] & 1) ^ 1;
145
360000
        break;
146
    }
147
1220000
}
148
149
1220000
static void permute(int16_t dst[64], const int16_t src[64],
150
                    enum idct_permutation_type perm_type)
151
{
152
    int i;
153
154
#if ARCH_X86
155
1220000
    if (permute_x86(dst, src, perm_type))
156
120000
        return;
157
#endif
158
159

1100000
    switch (perm_type) {
160
60000
    case FF_IDCT_PERM_LIBMPEG2:
161
3900000
        for (i = 0; i < 64; i++)
162
3840000
            dst[(i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2)] = src[i];
163
60000
        break;
164
    case FF_IDCT_PERM_PARTTRANS:
165
        for (i = 0; i < 64; i++)
166
            dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
167
        break;
168
480000
    case FF_IDCT_PERM_TRANSPOSE:
169
31200000
        for (i = 0; i < 64; i++)
170
30720000
            dst[(i>>3) | ((i<<3)&0x38)] = src[i];
171
480000
        break;
172
560000
    default:
173
36400000
        for (i = 0; i < 64; i++)
174
35840000
            dst[i] = src[i];
175
560000
        break;
176
    }
177
}
178
179
61
static int dct_error(const struct algo *dct, int test, int is_idct, int speed, const int bits)
180
{
181
61
    void (*ref)(int16_t *block) = is_idct ? ff_ref_idct : ff_ref_fdct;
182
    int it, i, scale;
183
    int err_inf, v;
184
61
    int64_t err2, ti, ti1, it1, err_sum = 0;
185
61
    int64_t sysErr[64], sysErrMax = 0;
186
61
    int64_t err2_matrix[64], err2_max = 0;
187
61
    int maxout = 0;
188
61
    int blockSumErrMax = 0, blockSumErr;
189
    AVLFG prng;
190
61
    const int vals=1<<bits;
191
    double omse, ome;
192
    int spec_err;
193
194
61
    av_lfg_init(&prng, 1);
195
196
61
    err_inf = 0;
197
61
    err2 = 0;
198
3965
    for (i = 0; i < 64; i++)
199
3904
        err2_matrix[i] = sysErr[i] = 0;
200
1220061
    for (it = 0; it < NB_ITS; it++) {
201
1220000
        init_block(block1, test, is_idct, &prng, vals);
202
1220000
        permute(block, block1, dct->perm_type);
203
204
1220000
        dct->func(block);
205
1220000
        emms_c();
206
207
1220000
        if (!strcmp(dct->name, "IJG-AAN-INT")) {
208
1300000
            for (i = 0; i < 64; i++) {
209
1280000
                scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
210
1280000
                block[i] = (block[i] * scale) >> AANSCALE_BITS;
211
            }
212
        }
213
214
1220000
        ref(block1);
215
1220000
        if (!strcmp(dct->name, "PR-SSE2"))
216
3900000
            for (i = 0; i < 64; i++)
217
3840000
                block1[i] = av_clip(block1[i], 4-512, 1019-512);
218
219
1220000
        blockSumErr = 0;
220
79300000
        for (i = 0; i < 64; i++) {
221
78080000
            int err = block[i] - block1[i];
222
78080000
            err_sum += err;
223
78080000
            v = abs(err);
224
78080000
            if (v > err_inf)
225
68
                err_inf = v;
226
78080000
            err2_matrix[i] += v * v;
227
78080000
            err2 += v * v;
228
78080000
            sysErr[i] += block[i] - block1[i];
229
78080000
            blockSumErr += v;
230
78080000
            if (abs(block[i]) > maxout)
231
842
                maxout = abs(block[i]);
232
        }
233
1220000
        if (blockSumErrMax < blockSumErr)
234
271
            blockSumErrMax = blockSumErr;
235
    }
236
3965
    for (i = 0; i < 64; i++) {
237
3904
        sysErrMax = FFMAX(sysErrMax, FFABS(sysErr[i]));
238
3904
        err2_max  = FFMAX(err2_max , FFABS(err2_matrix[i]));
239
    }
240
241
3965
    for (i = 0; i < 64; i++) {
242
3904
        if (i % 8 == 0)
243
488
            printf("\n");
244
3904
        printf("%7d ", (int) sysErr[i]);
245
    }
246
61
    printf("\n");
247
248
61
    omse = (double) err2 / NB_ITS / 64;
249
61
    ome  = (double) err_sum / NB_ITS / 64;
250
251


61
    spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
252
61
    if (test < 2)
253

43
        spec_err = is_idct && ((double) err2_max / NB_ITS > 0.06 || (double) sysErrMax / NB_ITS > 0.015);
254
255
61
    printf("%s %s: max_err=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
256
           is_idct ? "IDCT" : "DCT", dct->name, err_inf,
257
61
           omse, ome, (double) sysErrMax / NB_ITS,
258
           maxout, blockSumErrMax);
259
260

61
    if (spec_err && !dct->nonspec) {
261
        printf("Failed!\n");
262
        return 1;
263
    }
264
265
61
    if (!speed)
266
61
        return 0;
267
268
    /* speed test */
269
270
    init_block(block, test, is_idct, &prng, vals);
271
    permute(block1, block, dct->perm_type);
272
273
    ti = av_gettime_relative();
274
    it1 = 0;
275
    do {
276
        for (it = 0; it < NB_ITS_SPEED; it++) {
277
            memcpy(block, block1, sizeof(block));
278
            dct->func(block);
279
        }
280
        emms_c();
281
        it1 += NB_ITS_SPEED;
282
        ti1 = av_gettime_relative() - ti;
283
    } while (ti1 < 1000000);
284
285
    printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", dct->name,
286
           (double) it1 * 1000.0 / (double) ti1);
287
288
    return 0;
289
}
290
291
DECLARE_ALIGNED(8, static uint8_t, img_dest)[64];
292
DECLARE_ALIGNED(8, static uint8_t, img_dest1)[64];
293
294
20000
static void idct248_ref(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
295
{
296
    static int init;
297
    static double c8[8][8];
298
    static double c4[4][4];
299
    double block1[64], block2[64], block3[64];
300
    double s, sum, v;
301
    int i, j, k;
302
303
20000
    if (!init) {
304
1
        init = 1;
305
306
9
        for (i = 0; i < 8; i++) {
307
8
            sum = 0;
308
72
            for (j = 0; j < 8; j++) {
309
64
                s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
310
64
                c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
311
64
                sum += c8[i][j] * c8[i][j];
312
            }
313
        }
314
315
5
        for (i = 0; i < 4; i++) {
316
4
            sum = 0;
317
20
            for (j = 0; j < 4; j++) {
318
16
                s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
319
16
                c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
320
16
                sum += c4[i][j] * c4[i][j];
321
            }
322
        }
323
    }
324
325
    /* butterfly */
326
20000
    s = 0.5 * sqrt(2.0);
327
100000
    for (i = 0; i < 4; i++) {
328
720000
        for (j = 0; j < 8; j++) {
329
640000
            block1[8 * (2 * i) + j] =
330
640000
                (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s;
331
640000
            block1[8 * (2 * i + 1) + j] =
332
640000
                (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s;
333
        }
334
    }
335
336
    /* idct8 on lines */
337
180000
    for (i = 0; i < 8; i++) {
338
1440000
        for (j = 0; j < 8; j++) {
339
1280000
            sum = 0;
340
11520000
            for (k = 0; k < 8; k++)
341
10240000
                sum += c8[k][j] * block1[8 * i + k];
342
1280000
            block2[8 * i + j] = sum;
343
        }
344
    }
345
346
    /* idct4 */
347
180000
    for (i = 0; i < 8; i++) {
348
800000
        for (j = 0; j < 4; j++) {
349
            /* top */
350
640000
            sum = 0;
351
3200000
            for (k = 0; k < 4; k++)
352
2560000
                sum += c4[k][j] * block2[8 * (2 * k) + i];
353
640000
            block3[8 * (2 * j) + i] = sum;
354
355
            /* bottom */
356
640000
            sum = 0;
357
3200000
            for (k = 0; k < 4; k++)
358
2560000
                sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
359
640000
            block3[8 * (2 * j + 1) + i] = sum;
360
        }
361
    }
362
363
    /* clamp and store the result */
364
180000
    for (i = 0; i < 8; i++) {
365
1440000
        for (j = 0; j < 8; j++) {
366
1280000
            v = block3[8 * i + j];
367
1280000
            if      (v < 0)   v = 0;
368
1226527
            else if (v > 255) v = 255;
369
1280000
            dest[i * linesize + j] = (int) rint(v);
370
        }
371
    }
372
20000
}
373
374
1
static void idct248_error(const char *name,
375
                          void (*idct248_put)(uint8_t *dest,
376
                                              ptrdiff_t line_size,
377
                                              int16_t *block),
378
                          int speed)
379
{
380
    int it, i, it1, ti, ti1, err_max, v;
381
    AVLFG prng;
382
383
1
    av_lfg_init(&prng, 1);
384
385
    /* just one test to see if code is correct (precision is less
386
       important here) */
387
1
    err_max = 0;
388
20001
    for (it = 0; it < NB_ITS; it++) {
389
        /* XXX: use forward transform to generate values */
390
1300000
        for (i = 0; i < 64; i++)
391
1280000
            block1[i] = av_lfg_get(&prng) % 256 - 128;
392
20000
        block1[0] += 1024;
393
394
1300000
        for (i = 0; i < 64; i++)
395
1280000
            block[i] = block1[i];
396
20000
        idct248_ref(img_dest1, 8, block);
397
398
1300000
        for (i = 0; i < 64; i++)
399
1280000
            block[i] = block1[i];
400
20000
        idct248_put(img_dest, 8, block);
401
402
1300000
        for (i = 0; i < 64; i++) {
403
1280000
            v = abs((int) img_dest[i] - (int) img_dest1[i]);
404
1280000
            if (v == 255)
405
                printf("%d %d\n", img_dest[i], img_dest1[i]);
406
1280000
            if (v > err_max)
407
1
                err_max = v;
408
        }
409
#if 0
410
        printf("ref=\n");
411
        for(i=0;i<8;i++) {
412
            int j;
413
            for(j=0;j<8;j++) {
414
                printf(" %3d", img_dest1[i*8+j]);
415
            }
416
            printf("\n");
417
        }
418
419
        printf("out=\n");
420
        for(i=0;i<8;i++) {
421
            int j;
422
            for(j=0;j<8;j++) {
423
                printf(" %3d", img_dest[i*8+j]);
424
            }
425
            printf("\n");
426
        }
427
#endif
428
    }
429
1
    printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max);
430
431
1
    if (!speed)
432
1
        return;
433
434
    ti = av_gettime_relative();
435
    it1 = 0;
436
    do {
437
        for (it = 0; it < NB_ITS_SPEED; it++) {
438
            for (i = 0; i < 64; i++)
439
                block[i] = block1[i];
440
            idct248_put(img_dest, 8, block);
441
        }
442
        emms_c();
443
        it1 += NB_ITS_SPEED;
444
        ti1 = av_gettime_relative() - ti;
445
    } while (ti1 < 1000000);
446
447
    printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name,
448
           (double) it1 * 1000.0 / (double) ti1);
449
}
450
451
static void help(void)
452
{
453
    printf("dct-test [-i] [<test-number>] [<bits>]\n"
454
           "test-number 0 -> test with random matrixes\n"
455
           "            1 -> test with random sparse matrixes\n"
456
           "            2 -> do 3. test from MPEG-4 std\n"
457
           "bits        Number of time domain bits to use, 8 is default\n"
458
           "-i          test IDCT implementations\n"
459
           "-4          test IDCT248 implementations\n"
460
           "-t          speed test\n");
461
}
462
463
#if !HAVE_GETOPT
464
#include "compat/getopt.c"
465
#endif
466
467
5
int main(int argc, char **argv)
468
{
469
5
    int test_idct = 0, test_248_dct = 0;
470
    int c, i;
471
5
    int test = 1;
472
5
    int speed = 0;
473
5
    int err = 0;
474
5
    int bits=8;
475
476
5
    ff_ref_dct_init();
477
478
    for (;;) {
479
9
        c = getopt(argc, argv, "ih4t");
480
9
        if (c == -1)
481
5
            break;
482

4
        switch (c) {
483
3
        case 'i':
484
3
            test_idct = 1;
485
3
            break;
486
1
        case '4':
487
1
            test_248_dct = 1;
488
1
            break;
489
        case 't':
490
            speed = 1;
491
            break;
492
        default:
493
        case 'h':
494
            help();
495
            return 0;
496
        }
497
    }
498
499
5
    if (optind < argc)
500
3
        test = atoi(argv[optind]);
501
5
    if(optind+1 < argc) bits= atoi(argv[optind+1]);
502
503
5
    printf("ffmpeg DCT/IDCT test\n");
504
505
5
    if (test_248_dct) {
506
1
        idct248_error("SIMPLE-C", ff_simple_idct248_put, speed);
507
    } else {
508
4
        const int cpu_flags = av_get_cpu_flags();
509
4
        if (test_idct) {
510
27
            for (i = 0; i < FF_ARRAY_ELEMS(idct_tab); i++)
511
24
                err |= dct_error(&idct_tab[i], test, test_idct, speed, bits);
512
513
33
            for (i = 0; idct_tab_arch[i].name; i++)
514
30
                if (!(~cpu_flags & idct_tab_arch[i].cpu_flag))
515
30
                    err |= dct_error(&idct_tab_arch[i], test, test_idct, speed, bits);
516
        }
517
#if CONFIG_FDCTDSP
518
        else {
519
5
            for (i = 0; i < FF_ARRAY_ELEMS(fdct_tab); i++)
520
4
                err |= dct_error(&fdct_tab[i], test, test_idct, speed, bits);
521
522
4
            for (i = 0; fdct_tab_arch[i].name; i++)
523
3
                if (!(~cpu_flags & fdct_tab_arch[i].cpu_flag))
524
3
                    err |= dct_error(&fdct_tab_arch[i], test, test_idct, speed, bits);
525
        }
526
#endif /* CONFIG_FDCTDSP */
527
    }
528
529
5
    if (err)
530
        printf("Error: %d.\n", err);
531
532
5
    return !!err;
533
}