GCC Code Coverage Report
Directory: ../../../ffmpeg/ Exec Total Coverage
File: src/libavcodec/simple_idct_template.c Lines: 115 116 99.1 %
Date: 2020-10-23 17:01:47 Branches: 42 44 95.5 %

Line Branch Exec Source
1
/*
2
 * Simple IDCT
3
 *
4
 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
5
 *
6
 * This file is part of FFmpeg.
7
 *
8
 * FFmpeg is free software; you can redistribute it and/or
9
 * modify it under the terms of the GNU Lesser General Public
10
 * License as published by the Free Software Foundation; either
11
 * version 2.1 of the License, or (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
 * Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22
23
/**
24
 * @file
25
 * simpleidct in C.
26
 */
27
28
/* Based upon some commented-out C code from mpeg2dec (idct_mmx.c
29
 * written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>). */
30
31
#include "simple_idct.h"
32
33
#include "bit_depth_template.c"
34
35
#undef W1
36
#undef W2
37
#undef W3
38
#undef W4
39
#undef W5
40
#undef W6
41
#undef W7
42
#undef ROW_SHIFT
43
#undef COL_SHIFT
44
#undef DC_SHIFT
45
#undef MUL
46
#undef MAC
47
48
#if BIT_DEPTH == 8
49
50
#define W1  22725  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
51
#define W2  21407  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
52
#define W3  19266  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
53
#define W4  16383  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
54
#define W5  12873  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
55
#define W6  8867   //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
56
#define W7  4520   //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
57
58
#define ROW_SHIFT 11
59
#define COL_SHIFT 20
60
#define DC_SHIFT 3
61
62
#define MUL(a, b)    MUL16(a, b)
63
#define MAC(a, b, c) MAC16(a, b, c)
64
65
#elif BIT_DEPTH == 10 || BIT_DEPTH == 12
66
67
# if BIT_DEPTH == 10
68
#define W1 22725 // 90901
69
#define W2 21407 //  85627
70
#define W3 19265 //  77062
71
#define W4 16384 //  65535
72
#define W5 12873 //  51491
73
#define W6  8867 //  35468
74
#define W7  4520 //  18081
75
76
#   ifdef EXTRA_SHIFT
77
#define ROW_SHIFT 13
78
#define COL_SHIFT 18
79
#define DC_SHIFT  1
80
#   elif IN_IDCT_DEPTH == 32
81
#define ROW_SHIFT 13
82
#define COL_SHIFT 21
83
#define DC_SHIFT  2
84
#   else
85
#define ROW_SHIFT 12
86
#define COL_SHIFT 19
87
#define DC_SHIFT  2
88
#   endif
89
90
# else
91
#define W1 45451
92
#define W2 42813
93
#define W3 38531
94
#define W4 32767
95
#define W5 25746
96
#define W6 17734
97
#define W7 9041
98
99
#define ROW_SHIFT 16
100
#define COL_SHIFT 17
101
#define DC_SHIFT -1
102
# endif
103
104
#define MUL(a, b)    ((int)((SUINT)(a) * (b)))
105
#define MAC(a, b, c) ((a) += (SUINT)(b) * (c))
106
107
#else
108
109
#error "Unsupported bitdepth"
110
111
#endif
112
113
#ifdef EXTRA_SHIFT
114
19850496
static inline void FUNC(idctRowCondDC_extrashift)(int16_t *row, int extra_shift)
115
#else
116
1410238896
static inline void FUNC6(idctRowCondDC)(idctin *row, int extra_shift)
117
#endif
118
{
119
    SUINT a0, a1, a2, a3, b0, b1, b2, b3;
120
121
// TODO: Add DC-only support for int32_t input
122
#if IN_IDCT_DEPTH == 16
123
#if HAVE_FAST_64BIT
124
#define ROW0_MASK (0xffffULL << 48 * HAVE_BIGENDIAN)
125
1430089392
    if (((AV_RN64A(row) & ~ROW0_MASK) | AV_RN64A(row+4)) == 0) {
126
        uint64_t temp;
127
1041246657
        if (DC_SHIFT - extra_shift >= 0) {
128
1006612704
            temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff;
129
        } else {
130
34633953
            temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff;
131
        }
132
1041246657
        temp += temp * (1 << 16);
133
1041246657
        temp += temp * ((uint64_t) 1 << 32);
134
1041246657
        AV_WN64A(row, temp);
135
1041246657
        AV_WN64A(row + 4, temp);
136
1041246657
        return;
137
    }
138
#else
139
    if (!(AV_RN32A(row+2) |
140
          AV_RN32A(row+4) |
141
          AV_RN32A(row+6) |
142
          row[1])) {
143
        uint32_t temp;
144
        if (DC_SHIFT - extra_shift >= 0) {
145
            temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff;
146
        } else {
147
            temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff;
148
        }
149
        temp += temp * (1 << 16);
150
        AV_WN32A(row, temp);
151
        AV_WN32A(row+2, temp);
152
        AV_WN32A(row+4, temp);
153
        AV_WN32A(row+6, temp);
154
        return;
155
    }
156
#endif
157
#endif
158
159
388842735
    a0 = ((SUINT)W4 * row[0]) + (1 << (ROW_SHIFT + extra_shift - 1));
160
388842735
    a1 = a0;
161
388842735
    a2 = a0;
162
388842735
    a3 = a0;
163
164
388842735
    a0 += (SUINT)W2 * row[2];
165
388842735
    a1 += (SUINT)W6 * row[2];
166
388842735
    a2 -= (SUINT)W6 * row[2];
167
388842735
    a3 -= (SUINT)W2 * row[2];
168
169
388842735
    b0 = MUL(W1, row[1]);
170
388842735
    MAC(b0, W3, row[3]);
171
388842735
    b1 = MUL(W3, row[1]);
172
388842735
    MAC(b1, -W7, row[3]);
173
388842735
    b2 = MUL(W5, row[1]);
174
388842735
    MAC(b2, -W1, row[3]);
175
388842735
    b3 = MUL(W7, row[1]);
176
388842735
    MAC(b3, -W5, row[3]);
177
178
#if IN_IDCT_DEPTH == 32
179
    if (AV_RN64A(row + 4) | AV_RN64A(row + 6)) {
180
#else
181
388842735
    if (AV_RN64A(row + 4)) {
182
#endif
183
163641256
        a0 += (SUINT)  W4*row[4] + (SUINT)W6*row[6];
184
163641256
        a1 += (SUINT)- W4*row[4] - (SUINT)W2*row[6];
185
163641256
        a2 += (SUINT)- W4*row[4] + (SUINT)W2*row[6];
186
163641256
        a3 += (SUINT)  W4*row[4] - (SUINT)W6*row[6];
187
188
163641256
        MAC(b0,  W5, row[5]);
189
163641256
        MAC(b0,  W7, row[7]);
190
191
163641256
        MAC(b1, -W1, row[5]);
192
163641256
        MAC(b1, -W5, row[7]);
193
194
163641256
        MAC(b2,  W7, row[5]);
195
163641256
        MAC(b2,  W3, row[7]);
196
197
163641256
        MAC(b3,  W3, row[5]);
198
163641256
        MAC(b3, -W1, row[7]);
199
    }
200
201
388842735
    row[0] = (int)(a0 + b0) >> (ROW_SHIFT + extra_shift);
202
388842735
    row[7] = (int)(a0 - b0) >> (ROW_SHIFT + extra_shift);
203
388842735
    row[1] = (int)(a1 + b1) >> (ROW_SHIFT + extra_shift);
204
388842735
    row[6] = (int)(a1 - b1) >> (ROW_SHIFT + extra_shift);
205
388842735
    row[2] = (int)(a2 + b2) >> (ROW_SHIFT + extra_shift);
206
388842735
    row[5] = (int)(a2 - b2) >> (ROW_SHIFT + extra_shift);
207
388842735
    row[3] = (int)(a3 + b3) >> (ROW_SHIFT + extra_shift);
208
388842735
    row[4] = (int)(a3 - b3) >> (ROW_SHIFT + extra_shift);
209
}
210
211
#define IDCT_COLS do {                                  \
212
        a0 = (SUINT)W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); \
213
        a1 = a0;                                        \
214
        a2 = a0;                                        \
215
        a3 = a0;                                        \
216
                                                        \
217
        a0 += (SUINT) W2*col[8*2];                             \
218
        a1 += (SUINT) W6*col[8*2];                             \
219
        a2 += (SUINT)-W6*col[8*2];                             \
220
        a3 += (SUINT)-W2*col[8*2];                             \
221
                                                        \
222
        b0 = MUL(W1, col[8*1]);                         \
223
        b1 = MUL(W3, col[8*1]);                         \
224
        b2 = MUL(W5, col[8*1]);                         \
225
        b3 = MUL(W7, col[8*1]);                         \
226
                                                        \
227
        MAC(b0,  W3, col[8*3]);                         \
228
        MAC(b1, -W7, col[8*3]);                         \
229
        MAC(b2, -W1, col[8*3]);                         \
230
        MAC(b3, -W5, col[8*3]);                         \
231
                                                        \
232
        if (col[8*4]) {                                 \
233
            a0 += (SUINT) W4*col[8*4];                         \
234
            a1 += (SUINT)-W4*col[8*4];                         \
235
            a2 += (SUINT)-W4*col[8*4];                         \
236
            a3 += (SUINT) W4*col[8*4];                         \
237
        }                                               \
238
                                                        \
239
        if (col[8*5]) {                                 \
240
            MAC(b0,  W5, col[8*5]);                     \
241
            MAC(b1, -W1, col[8*5]);                     \
242
            MAC(b2,  W7, col[8*5]);                     \
243
            MAC(b3,  W3, col[8*5]);                     \
244
        }                                               \
245
                                                        \
246
        if (col[8*6]) {                                 \
247
            a0 += (SUINT) W6*col[8*6];                         \
248
            a1 += (SUINT)-W2*col[8*6];                         \
249
            a2 += (SUINT) W2*col[8*6];                         \
250
            a3 += (SUINT)-W6*col[8*6];                         \
251
        }                                               \
252
                                                        \
253
        if (col[8*7]) {                                 \
254
            MAC(b0,  W7, col[8*7]);                     \
255
            MAC(b1, -W5, col[8*7]);                     \
256
            MAC(b2,  W3, col[8*7]);                     \
257
            MAC(b3, -W1, col[8*7]);                     \
258
        }                                               \
259
    } while (0)
260
261
#ifdef EXTRA_SHIFT
262
19850496
static inline void FUNC(idctSparseCol_extrashift)(int16_t *col)
263
#else
264
1056438992
static inline void FUNC6(idctSparseColPut)(pixel *dest, ptrdiff_t line_size,
265
                                          idctin *col)
266
{
267
    SUINT a0, a1, a2, a3, b0, b1, b2, b3;
268
269


1056438992
    IDCT_COLS;
270
271
1056438992
    dest[0] = av_clip_pixel((int)(a0 + b0) >> COL_SHIFT);
272
1056438992
    dest += line_size;
273
1056438992
    dest[0] = av_clip_pixel((int)(a1 + b1) >> COL_SHIFT);
274
1056438992
    dest += line_size;
275
1056438992
    dest[0] = av_clip_pixel((int)(a2 + b2) >> COL_SHIFT);
276
1056438992
    dest += line_size;
277
1056438992
    dest[0] = av_clip_pixel((int)(a3 + b3) >> COL_SHIFT);
278
1056438992
    dest += line_size;
279
1056438992
    dest[0] = av_clip_pixel((int)(a3 - b3) >> COL_SHIFT);
280
1056438992
    dest += line_size;
281
1056438992
    dest[0] = av_clip_pixel((int)(a2 - b2) >> COL_SHIFT);
282
1056438992
    dest += line_size;
283
1056438992
    dest[0] = av_clip_pixel((int)(a1 - b1) >> COL_SHIFT);
284
1056438992
    dest += line_size;
285
1056438992
    dest[0] = av_clip_pixel((int)(a0 - b0) >> COL_SHIFT);
286
}
287
288
248959472
static inline void FUNC6(idctSparseColAdd)(pixel *dest, ptrdiff_t line_size,
289
                                          idctin *col)
290
{
291
    unsigned a0, a1, a2, a3, b0, b1, b2, b3;
292
293


248959472
    IDCT_COLS;
294
295
248959472
    dest[0] = av_clip_pixel(dest[0] + ((int)(a0 + b0) >> COL_SHIFT));
296
248959472
    dest += line_size;
297
248959472
    dest[0] = av_clip_pixel(dest[0] + ((int)(a1 + b1) >> COL_SHIFT));
298
248959472
    dest += line_size;
299
248959472
    dest[0] = av_clip_pixel(dest[0] + ((int)(a2 + b2) >> COL_SHIFT));
300
248959472
    dest += line_size;
301
248959472
    dest[0] = av_clip_pixel(dest[0] + ((int)(a3 + b3) >> COL_SHIFT));
302
248959472
    dest += line_size;
303
248959472
    dest[0] = av_clip_pixel(dest[0] + ((int)(a3 - b3) >> COL_SHIFT));
304
248959472
    dest += line_size;
305
248959472
    dest[0] = av_clip_pixel(dest[0] + ((int)(a2 - b2) >> COL_SHIFT));
306
248959472
    dest += line_size;
307
248959472
    dest[0] = av_clip_pixel(dest[0] + ((int)(a1 - b1) >> COL_SHIFT));
308
248959472
    dest += line_size;
309
248959472
    dest[0] = av_clip_pixel(dest[0] + ((int)(a0 - b0) >> COL_SHIFT));
310
}
311
312
104492032
static inline void FUNC6(idctSparseCol)(idctin *col)
313
#endif
314
{
315
    unsigned a0, a1, a2, a3, b0, b1, b2, b3;
316
317


124342528
    IDCT_COLS;
318
319
124342528
    col[0 ] = ((int)(a0 + b0) >> COL_SHIFT);
320
124342528
    col[8 ] = ((int)(a1 + b1) >> COL_SHIFT);
321
124342528
    col[16] = ((int)(a2 + b2) >> COL_SHIFT);
322
124342528
    col[24] = ((int)(a3 + b3) >> COL_SHIFT);
323
124342528
    col[32] = ((int)(a3 - b3) >> COL_SHIFT);
324
124342528
    col[40] = ((int)(a2 - b2) >> COL_SHIFT);
325
124342528
    col[48] = ((int)(a1 - b1) >> COL_SHIFT);
326
124342528
    col[56] = ((int)(a0 - b0) >> COL_SHIFT);
327
124342528
}
328
329
#ifndef EXTRA_SHIFT
330
132054874
void FUNC6(ff_simple_idct_put)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block_)
331
{
332
132054874
    idctin *block = (idctin *)block_;
333
132054874
    pixel *dest = (pixel *)dest_;
334
    int i;
335
336
132054874
    line_size /= sizeof(pixel);
337
338
1188493866
    for (i = 0; i < 8; i++)
339
1056438992
        FUNC6(idctRowCondDC)(block + i*8, 0);
340
341
1188493866
    for (i = 0; i < 8; i++)
342
1056438992
        FUNC6(idctSparseColPut)(dest + i, line_size, block + i);
343
}
344
345
#if IN_IDCT_DEPTH == 16
346
31089978
void FUNC6(ff_simple_idct_add)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block)
347
{
348
31089978
    pixel *dest = (pixel *)dest_;
349
    int i;
350
351
31089978
    line_size /= sizeof(pixel);
352
353
279809802
    for (i = 0; i < 8; i++)
354
248719824
        FUNC6(idctRowCondDC)(block + i*8, 0);
355
356
279809802
    for (i = 0; i < 8; i++)
357
248719824
        FUNC6(idctSparseColAdd)(dest + i, line_size, block + i);
358
}
359
360
8169176
void FUNC6(ff_simple_idct)(int16_t *block)
361
{
362
    int i;
363
364
73522584
    for (i = 0; i < 8; i++)
365
65353408
        FUNC6(idctRowCondDC)(block + i*8, 0);
366
367
73522584
    for (i = 0; i < 8; i++)
368
65353408
        FUNC6(idctSparseCol)(block + i);
369
8169176
}
370
#endif
371
#endif