GCC Code Coverage Report
Directory: ../../../ffmpeg/ Exec Total Coverage
File: src/libavcodec/vp8dsp.c Lines: 239 259 92.3 %
Date: 2020-08-14 10:39:37 Branches: 244 248 98.4 %

Line Branch Exec Source
1
/*
2
 * Copyright (C) 2010 David Conrad
3
 * Copyright (C) 2010 Ronald S. Bultje
4
 * Copyright (C) 2014 Peter Ross
5
 *
6
 * This file is part of FFmpeg.
7
 *
8
 * FFmpeg is free software; you can redistribute it and/or
9
 * modify it under the terms of the GNU Lesser General Public
10
 * License as published by the Free Software Foundation; either
11
 * version 2.1 of the License, or (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
 * Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22
23
/**
24
 * @file
25
 * VP8 compatible video decoder
26
 */
27
28
#include "libavutil/common.h"
29
#include "libavutil/intreadwrite.h"
30
31
#include "mathops.h"
32
#include "vp8dsp.h"
33
34
#define MK_IDCT_DC_ADD4_C(name)                                               \
35
static void name ## _idct_dc_add4uv_c(uint8_t *dst, int16_t block[4][16],     \
36
                                      ptrdiff_t stride)                       \
37
{                                                                             \
38
    name ## _idct_dc_add_c(dst + stride * 0 + 0, block[0], stride);           \
39
    name ## _idct_dc_add_c(dst + stride * 0 + 4, block[1], stride);           \
40
    name ## _idct_dc_add_c(dst + stride * 4 + 0, block[2], stride);           \
41
    name ## _idct_dc_add_c(dst + stride * 4 + 4, block[3], stride);           \
42
}                                                                             \
43
                                                                              \
44
static void name ## _idct_dc_add4y_c(uint8_t *dst, int16_t block[4][16],      \
45
                                     ptrdiff_t stride)                        \
46
{                                                                             \
47
    name ## _idct_dc_add_c(dst +  0, block[0], stride);                       \
48
    name ## _idct_dc_add_c(dst +  4, block[1], stride);                       \
49
    name ## _idct_dc_add_c(dst +  8, block[2], stride);                       \
50
    name ## _idct_dc_add_c(dst + 12, block[3], stride);                       \
51
}
52
53
#if CONFIG_VP7_DECODER
54
static void vp7_luma_dc_wht_c(int16_t block[4][4][16], int16_t dc[16])
55
{
56
    int i;
57
    unsigned a1, b1, c1, d1;
58
    int16_t tmp[16];
59
60
    for (i = 0; i < 4; i++) {
61
        a1 = (dc[i * 4 + 0] + dc[i * 4 + 2]) * 23170;
62
        b1 = (dc[i * 4 + 0] - dc[i * 4 + 2]) * 23170;
63
        c1 = dc[i * 4 + 1] * 12540 - dc[i * 4 + 3] * 30274;
64
        d1 = dc[i * 4 + 1] * 30274 + dc[i * 4 + 3] * 12540;
65
        tmp[i * 4 + 0] = (int)(a1 + d1) >> 14;
66
        tmp[i * 4 + 3] = (int)(a1 - d1) >> 14;
67
        tmp[i * 4 + 1] = (int)(b1 + c1) >> 14;
68
        tmp[i * 4 + 2] = (int)(b1 - c1) >> 14;
69
    }
70
71
    for (i = 0; i < 4; i++) {
72
        a1 = (tmp[i + 0] + tmp[i + 8]) * 23170;
73
        b1 = (tmp[i + 0] - tmp[i + 8]) * 23170;
74
        c1 = tmp[i + 4] * 12540 - tmp[i + 12] * 30274;
75
        d1 = tmp[i + 4] * 30274 + tmp[i + 12] * 12540;
76
        AV_ZERO64(dc + i * 4);
77
        block[0][i][0] = (int)(a1 + d1 + 0x20000) >> 18;
78
        block[3][i][0] = (int)(a1 - d1 + 0x20000) >> 18;
79
        block[1][i][0] = (int)(b1 + c1 + 0x20000) >> 18;
80
        block[2][i][0] = (int)(b1 - c1 + 0x20000) >> 18;
81
    }
82
}
83
84
25
static void vp7_luma_dc_wht_dc_c(int16_t block[4][4][16], int16_t dc[16])
85
{
86
25
    int i, val = (23170 * (23170 * dc[0] >> 14) + 0x20000) >> 18;
87
25
    dc[0] = 0;
88
89
125
    for (i = 0; i < 4; i++) {
90
100
        block[i][0][0] = val;
91
100
        block[i][1][0] = val;
92
100
        block[i][2][0] = val;
93
100
        block[i][3][0] = val;
94
    }
95
25
}
96
97
658
static void vp7_idct_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
98
{
99
    int i;
100
    unsigned a1, b1, c1, d1;
101
    int16_t tmp[16];
102
103
3290
    for (i = 0; i < 4; i++) {
104
2632
        a1 = (block[i * 4 + 0] + block[i * 4 + 2]) * 23170;
105
2632
        b1 = (block[i * 4 + 0] - block[i * 4 + 2]) * 23170;
106
2632
        c1 = block[i * 4 + 1] * 12540 - block[i * 4 + 3] * 30274;
107
2632
        d1 = block[i * 4 + 1] * 30274 + block[i * 4 + 3] * 12540;
108
2632
        AV_ZERO64(block + i * 4);
109
2632
        tmp[i * 4 + 0] = (int)(a1 + d1) >> 14;
110
2632
        tmp[i * 4 + 3] = (int)(a1 - d1) >> 14;
111
2632
        tmp[i * 4 + 1] = (int)(b1 + c1) >> 14;
112
2632
        tmp[i * 4 + 2] = (int)(b1 - c1) >> 14;
113
    }
114
115
3290
    for (i = 0; i < 4; i++) {
116
2632
        a1 = (tmp[i + 0] + tmp[i + 8]) * 23170;
117
2632
        b1 = (tmp[i + 0] - tmp[i + 8]) * 23170;
118
2632
        c1 = tmp[i + 4] * 12540 - tmp[i + 12] * 30274;
119
2632
        d1 = tmp[i + 4] * 30274 + tmp[i + 12] * 12540;
120
2632
        dst[0 * stride + i] = av_clip_uint8(dst[0 * stride + i] +
121
2632
                                            ((int)(a1 + d1 + 0x20000) >> 18));
122
2632
        dst[3 * stride + i] = av_clip_uint8(dst[3 * stride + i] +
123
2632
                                            ((int)(a1 - d1 + 0x20000) >> 18));
124
2632
        dst[1 * stride + i] = av_clip_uint8(dst[1 * stride + i] +
125
2632
                                            ((int)(b1 + c1 + 0x20000) >> 18));
126
2632
        dst[2 * stride + i] = av_clip_uint8(dst[2 * stride + i] +
127
2632
                                            ((int)(b1 - c1 + 0x20000) >> 18));
128
    }
129
658
}
130
131
495
static void vp7_idct_dc_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
132
{
133
495
    int i, dc = (23170 * (23170 * block[0] >> 14) + 0x20000) >> 18;
134
495
    block[0] = 0;
135
136
2475
    for (i = 0; i < 4; i++) {
137
1980
        dst[0] = av_clip_uint8(dst[0] + dc);
138
1980
        dst[1] = av_clip_uint8(dst[1] + dc);
139
1980
        dst[2] = av_clip_uint8(dst[2] + dc);
140
1980
        dst[3] = av_clip_uint8(dst[3] + dc);
141
1980
        dst   += stride;
142
    }
143
495
}
144
145
234
MK_IDCT_DC_ADD4_C(vp7)
146
#endif /* CONFIG_VP7_DECODER */
147
148
// TODO: Maybe add dequant
149
#if CONFIG_VP8_DECODER
150
24399
static void vp8_luma_dc_wht_c(int16_t block[4][4][16], int16_t dc[16])
151
{
152
    int i, t0, t1, t2, t3;
153
154
121995
    for (i = 0; i < 4; i++) {
155
97596
        t0 = dc[0 * 4 + i] + dc[3 * 4 + i];
156
97596
        t1 = dc[1 * 4 + i] + dc[2 * 4 + i];
157
97596
        t2 = dc[1 * 4 + i] - dc[2 * 4 + i];
158
97596
        t3 = dc[0 * 4 + i] - dc[3 * 4 + i];
159
160
97596
        dc[0 * 4 + i] = t0 + t1;
161
97596
        dc[1 * 4 + i] = t3 + t2;
162
97596
        dc[2 * 4 + i] = t0 - t1;
163
97596
        dc[3 * 4 + i] = t3 - t2;
164
    }
165
166
121995
    for (i = 0; i < 4; i++) {
167
97596
        t0 = dc[i * 4 + 0] + dc[i * 4 + 3] + 3; // rounding
168
97596
        t1 = dc[i * 4 + 1] + dc[i * 4 + 2];
169
97596
        t2 = dc[i * 4 + 1] - dc[i * 4 + 2];
170
97596
        t3 = dc[i * 4 + 0] - dc[i * 4 + 3] + 3; // rounding
171
97596
        AV_ZERO64(dc + i * 4);
172
173
97596
        block[i][0][0] = (t0 + t1) >> 3;
174
97596
        block[i][1][0] = (t3 + t2) >> 3;
175
97596
        block[i][2][0] = (t0 - t1) >> 3;
176
97596
        block[i][3][0] = (t3 - t2) >> 3;
177
    }
178
24399
}
179
180
10280
static void vp8_luma_dc_wht_dc_c(int16_t block[4][4][16], int16_t dc[16])
181
{
182
10280
    int i, val = (dc[0] + 3) >> 3;
183
10280
    dc[0] = 0;
184
185
51400
    for (i = 0; i < 4; i++) {
186
41120
        block[i][0][0] = val;
187
41120
        block[i][1][0] = val;
188
41120
        block[i][2][0] = val;
189
41120
        block[i][3][0] = val;
190
    }
191
10280
}
192
193
#define MUL_20091(a) ((((a) * 20091) >> 16) + (a))
194
#define MUL_35468(a)  (((a) * 35468) >> 16)
195
196
215530
static void vp8_idct_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
197
{
198
    int i, t0, t1, t2, t3;
199
    int16_t tmp[16];
200
201
1077650
    for (i = 0; i < 4; i++) {
202
862120
        t0 = block[0 * 4 + i] + block[2 * 4 + i];
203
862120
        t1 = block[0 * 4 + i] - block[2 * 4 + i];
204
862120
        t2 = MUL_35468(block[1 * 4 + i]) - MUL_20091(block[3 * 4 + i]);
205
862120
        t3 = MUL_20091(block[1 * 4 + i]) + MUL_35468(block[3 * 4 + i]);
206
862120
        block[0 * 4 + i] = 0;
207
862120
        block[1 * 4 + i] = 0;
208
862120
        block[2 * 4 + i] = 0;
209
862120
        block[3 * 4 + i] = 0;
210
211
862120
        tmp[i * 4 + 0] = t0 + t3;
212
862120
        tmp[i * 4 + 1] = t1 + t2;
213
862120
        tmp[i * 4 + 2] = t1 - t2;
214
862120
        tmp[i * 4 + 3] = t0 - t3;
215
    }
216
217
1077650
    for (i = 0; i < 4; i++) {
218
862120
        t0 = tmp[0 * 4 + i] + tmp[2 * 4 + i];
219
862120
        t1 = tmp[0 * 4 + i] - tmp[2 * 4 + i];
220
862120
        t2 = MUL_35468(tmp[1 * 4 + i]) - MUL_20091(tmp[3 * 4 + i]);
221
862120
        t3 = MUL_20091(tmp[1 * 4 + i]) + MUL_35468(tmp[3 * 4 + i]);
222
223
862120
        dst[0] = av_clip_uint8(dst[0] + ((t0 + t3 + 4) >> 3));
224
862120
        dst[1] = av_clip_uint8(dst[1] + ((t1 + t2 + 4) >> 3));
225
862120
        dst[2] = av_clip_uint8(dst[2] + ((t1 - t2 + 4) >> 3));
226
862120
        dst[3] = av_clip_uint8(dst[3] + ((t0 - t3 + 4) >> 3));
227
862120
        dst   += stride;
228
    }
229
215530
}
230
231
746520
static void vp8_idct_dc_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
232
{
233
746520
    int i, dc = (block[0] + 4) >> 3;
234
746520
    block[0] = 0;
235
236
3732600
    for (i = 0; i < 4; i++) {
237
2986080
        dst[0] = av_clip_uint8(dst[0] + dc);
238
2986080
        dst[1] = av_clip_uint8(dst[1] + dc);
239
2986080
        dst[2] = av_clip_uint8(dst[2] + dc);
240
2986080
        dst[3] = av_clip_uint8(dst[3] + dc);
241
2986080
        dst   += stride;
242
    }
243
746520
}
244
245
321336
MK_IDCT_DC_ADD4_C(vp8)
246
#endif /* CONFIG_VP8_DECODER */
247
248
// because I like only having two parameters to pass functions...
249
#define LOAD_PIXELS                                                           \
250
    int av_unused p3 = p[-4 * stride];                                        \
251
    int av_unused p2 = p[-3 * stride];                                        \
252
    int av_unused p1 = p[-2 * stride];                                        \
253
    int av_unused p0 = p[-1 * stride];                                        \
254
    int av_unused q0 = p[ 0 * stride];                                        \
255
    int av_unused q1 = p[ 1 * stride];                                        \
256
    int av_unused q2 = p[ 2 * stride];                                        \
257
    int av_unused q3 = p[ 3 * stride];
258
259
#define clip_int8(n) (cm[(n) + 0x80] - 0x80)
260
261
10168990
static av_always_inline void filter_common(uint8_t *p, ptrdiff_t stride,
262
                                           int is4tap, int is_vp7)
263
{
264
10168990
    LOAD_PIXELS
265
    int a, f1, f2;
266
10168990
    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
267
268
10168990
    a = 3 * (q0 - p0);
269
270
10168990
    if (is4tap)
271
5532529
        a += clip_int8(p1 - q1);
272
273
10168990
    a = clip_int8(a);
274
275
    // We deviate from the spec here with c(a+3) >> 3
276
    // since that's what libvpx does.
277
10168990
    f1 = FFMIN(a + 4, 127) >> 3;
278
279
10168990
    if (is_vp7)
280
756321
        f2 = f1 - ((a & 7) == 4);
281
    else
282
9412669
        f2 = FFMIN(a + 3, 127) >> 3;
283
284
    // Despite what the spec says, we do need to clamp here to
285
    // be bitexact with libvpx.
286
10168990
    p[-1 * stride] = cm[p0 + f2];
287
10168990
    p[ 0 * stride] = cm[q0 - f1];
288
289
    // only used for _inner on blocks without high edge variance
290
10168990
    if (!is4tap) {
291
4636461
        a              = (f1 + 1) >> 1;
292
4636461
        p[-2 * stride] = cm[p1 + a];
293
4636461
        p[ 1 * stride] = cm[q1 - a];
294
    }
295
10168990
}
296
297
756321
static av_always_inline void vp7_filter_common(uint8_t *p, ptrdiff_t stride,
298
                                               int is4tap)
299
{
300
756321
    filter_common(p, stride, is4tap, IS_VP7);
301
756321
}
302
303
9412669
static av_always_inline void vp8_filter_common(uint8_t *p, ptrdiff_t stride,
304
                                               int is4tap)
305
{
306
9412669
    filter_common(p, stride, is4tap, IS_VP8);
307
9412669
}
308
309
1237440
static av_always_inline int vp7_simple_limit(uint8_t *p, ptrdiff_t stride,
310
                                             int flim)
311
{
312
1237440
    LOAD_PIXELS
313
1237440
    return FFABS(p0 - q0) <= flim;
314
}
315
316
33226752
static av_always_inline int vp8_simple_limit(uint8_t *p, ptrdiff_t stride,
317
                                             int flim)
318
{
319
33226752
    LOAD_PIXELS
320
33226752
    return 2 * FFABS(p0 - q0) + (FFABS(p1 - q1) >> 1) <= flim;
321
}
322
323
/**
324
 * E - limit at the macroblock edge
325
 * I - limit for interior difference
326
 */
327
#define NORMAL_LIMIT(vpn)                                                     \
328
static av_always_inline int vp ## vpn ## _normal_limit(uint8_t *p,            \
329
                                                       ptrdiff_t stride,      \
330
                                                       int E, int I)          \
331
{                                                                             \
332
    LOAD_PIXELS                                                               \
333
    return vp ## vpn ## _simple_limit(p, stride, E) &&                        \
334
           FFABS(p3 - p2) <= I && FFABS(p2 - p1) <= I &&                      \
335
           FFABS(p1 - p0) <= I && FFABS(q3 - q2) <= I &&                      \
336
           FFABS(q2 - q1) <= I && FFABS(q1 - q0) <= I;                        \
337
}
338
339



1237440
NORMAL_LIMIT(7)
340



32675296
NORMAL_LIMIT(8)
341
342
// high edge variance
343
31012181
static av_always_inline int hev(uint8_t *p, ptrdiff_t stride, int thresh)
344
{
345
31012181
    LOAD_PIXELS
346

31012181
    return FFABS(p1 - p0) > thresh || FFABS(q1 - q0) > thresh;
347
}
348
349
21334276
static av_always_inline void filter_mbedge(uint8_t *p, ptrdiff_t stride)
350
{
351
    int a0, a1, a2, w;
352
21334276
    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
353
354
21334276
    LOAD_PIXELS
355
356
21334276
    w = clip_int8(p1 - q1);
357
21334276
    w = clip_int8(w + 3 * (q0 - p0));
358
359
21334276
    a0 = (27 * w + 63) >> 7;
360
21334276
    a1 = (18 * w + 63) >> 7;
361
21334276
    a2 =  (9 * w + 63) >> 7;
362
363
21334276
    p[-3 * stride] = cm[p2 + a2];
364
21334276
    p[-2 * stride] = cm[p1 + a1];
365
21334276
    p[-1 * stride] = cm[p0 + a0];
366
21334276
    p[ 0 * stride] = cm[q0 - a0];
367
21334276
    p[ 1 * stride] = cm[q1 - a1];
368
21334276
    p[ 2 * stride] = cm[q2 - a2];
369
21334276
}
370
371
#define LOOP_FILTER(vpn, dir, size, stridea, strideb, maybe_inline)           \
372
static maybe_inline                                                           \
373
void vpn ## _ ## dir ## _loop_filter ## size ## _c(uint8_t *dst,              \
374
                                                   ptrdiff_t stride,          \
375
                                                   int flim_E, int flim_I,    \
376
                                                   int hev_thresh)            \
377
{                                                                             \
378
    int i;                                                                    \
379
    for (i = 0; i < size; i++)                                                \
380
        if (vpn ## _normal_limit(dst + i * stridea, strideb,                  \
381
                                 flim_E, flim_I)) {                           \
382
            if (hev(dst + i * stridea, strideb, hev_thresh))                  \
383
                vpn ## _filter_common(dst + i * stridea, strideb, 1);         \
384
            else                                                              \
385
                filter_mbedge(dst + i * stridea, strideb);                    \
386
        }                                                                     \
387
}                                                                             \
388
                                                                              \
389
static maybe_inline                                                           \
390
void vpn ## _ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst,        \
391
                                                         ptrdiff_t stride,    \
392
                                                         int flim_E,          \
393
                                                         int flim_I,          \
394
                                                         int hev_thresh)      \
395
{                                                                             \
396
    int i;                                                                    \
397
    for (i = 0; i < size; i++)                                                \
398
        if (vpn ## _normal_limit(dst + i * stridea, strideb,                  \
399
                                 flim_E, flim_I)) {                           \
400
            int hv = hev(dst + i * stridea, strideb, hev_thresh);             \
401
            if (hv)                                                           \
402
                vpn ## _filter_common(dst + i * stridea, strideb, 1);         \
403
            else                                                              \
404
                vpn ## _filter_common(dst + i * stridea, strideb, 0);         \
405
        }                                                                     \
406
}
407
408
#define UV_LOOP_FILTER(vpn, dir, stridea, strideb)                            \
409
LOOP_FILTER(vpn, dir, 8, stridea, strideb, av_always_inline)                  \
410
static void vpn ## _ ## dir ## _loop_filter8uv_c(uint8_t *dstU,               \
411
                                                 uint8_t *dstV,               \
412
                                                 ptrdiff_t stride, int fE,    \
413
                                                 int fI, int hev_thresh)      \
414
{                                                                             \
415
    vpn ## _ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh);     \
416
    vpn ## _ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh);     \
417
}                                                                             \
418
                                                                              \
419
static void vpn ## _ ## dir ## _loop_filter8uv_inner_c(uint8_t *dstU,         \
420
                                                       uint8_t *dstV,         \
421
                                                       ptrdiff_t stride,      \
422
                                                       int fE, int fI,        \
423
                                                       int hev_thresh)        \
424
{                                                                             \
425
    vpn ## _ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI,            \
426
                                             hev_thresh);                     \
427
    vpn ## _ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI,            \
428
                                             hev_thresh);                     \
429
}
430
431
#define LOOP_FILTER_SIMPLE(vpn)                                               \
432
static void vpn ## _v_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride,    \
433
                                           int flim)                          \
434
{                                                                             \
435
    int i;                                                                    \
436
    for (i = 0; i < 16; i++)                                                  \
437
        if (vpn ## _simple_limit(dst + i, stride, flim))                      \
438
            vpn ## _filter_common(dst + i, stride, 1);                        \
439
}                                                                             \
440
                                                                              \
441
static void vpn ## _h_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride,    \
442
                                           int flim)                          \
443
{                                                                             \
444
    int i;                                                                    \
445
    for (i = 0; i < 16; i++)                                                  \
446
        if (vpn ## _simple_limit(dst + i * stride, 1, flim))                  \
447
            vpn ## _filter_common(dst + i * stride, 1, 1);                    \
448
}
449
450
#define LOOP_FILTERS(vpn)                \
451
    LOOP_FILTER(vpn, v, 16, 1, stride, ) \
452
    LOOP_FILTER(vpn, h, 16, stride, 1, ) \
453
    UV_LOOP_FILTER(vpn, v, 1, stride)    \
454
    UV_LOOP_FILTER(vpn, h, stride, 1)    \
455
    LOOP_FILTER_SIMPLE(vpn)              \
456
457
static const uint8_t subpel_filters[7][6] = {
458
    { 0,  6, 123,  12,  1, 0 },
459
    { 2, 11, 108,  36,  8, 1 },
460
    { 0,  9,  93,  50,  6, 0 },
461
    { 3, 16,  77,  77, 16, 3 },
462
    { 0,  6,  50,  93,  9, 0 },
463
    { 1,  8,  36, 108, 11, 2 },
464
    { 0,  1,  12, 123,  6, 0 },
465
};
466
467
#define PUT_PIXELS(WIDTH)                                                     \
468
static void put_vp8_pixels ## WIDTH ## _c(uint8_t *dst, ptrdiff_t dststride,  \
469
                                          uint8_t *src, ptrdiff_t srcstride,  \
470
                                          int h, int x, int y)                \
471
{                                                                             \
472
    int i;                                                                    \
473
    for (i = 0; i < h; i++, dst += dststride, src += srcstride)               \
474
        memcpy(dst, src, WIDTH);                                              \
475
}
476
477
5722780
PUT_PIXELS(16)
478
6086467
PUT_PIXELS(8)
479
405195
PUT_PIXELS(4)
480
481
#define FILTER_6TAP(src, F, stride)                                           \
482
    cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] +             \
483
        F[0] * src[x - 2 * stride] + F[3] * src[x + 1 * stride] -             \
484
        F[4] * src[x + 2 * stride] + F[5] * src[x + 3 * stride] + 64) >> 7]
485
486
#define FILTER_4TAP(src, F, stride)                                           \
487
    cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] +             \
488
        F[3] * src[x + 1 * stride] - F[4] * src[x + 2 * stride] + 64) >> 7]
489
490
#define VP8_EPEL_H(SIZE, TAPS)                                                \
491
static void put_vp8_epel ## SIZE ## _h ## TAPS ## _c(uint8_t *dst,            \
492
                                                     ptrdiff_t dststride,     \
493
                                                     uint8_t *src,            \
494
                                                     ptrdiff_t srcstride,     \
495
                                                     int h, int mx, int my)   \
496
{                                                                             \
497
    const uint8_t *filter = subpel_filters[mx - 1];                           \
498
    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;                       \
499
    int x, y;                                                                 \
500
    for (y = 0; y < h; y++) {                                                 \
501
        for (x = 0; x < SIZE; x++)                                            \
502
            dst[x] = FILTER_ ## TAPS ## TAP(src, filter, 1);                  \
503
        dst += dststride;                                                     \
504
        src += srcstride;                                                     \
505
    }                                                                         \
506
}
507
508
#define VP8_EPEL_V(SIZE, TAPS)                                                \
509
static void put_vp8_epel ## SIZE ## _v ## TAPS ## _c(uint8_t *dst,            \
510
                                                     ptrdiff_t dststride,     \
511
                                                     uint8_t *src,            \
512
                                                     ptrdiff_t srcstride,     \
513
                                                     int h, int mx, int my)   \
514
{                                                                             \
515
    const uint8_t *filter = subpel_filters[my - 1];                           \
516
    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;                       \
517
    int x, y;                                                                 \
518
    for (y = 0; y < h; y++) {                                                 \
519
        for (x = 0; x < SIZE; x++)                                            \
520
            dst[x] = FILTER_ ## TAPS ## TAP(src, filter, srcstride);          \
521
        dst += dststride;                                                     \
522
        src += srcstride;                                                     \
523
    }                                                                         \
524
}
525
526
#define VP8_EPEL_HV(SIZE, HTAPS, VTAPS)                                       \
527
static void                                                                   \
528
put_vp8_epel ## SIZE ## _h ## HTAPS ## v ## VTAPS ## _c(uint8_t *dst,         \
529
                                                        ptrdiff_t dststride,  \
530
                                                        uint8_t *src,         \
531
                                                        ptrdiff_t srcstride,  \
532
                                                        int h, int mx,        \
533
                                                        int my)               \
534
{                                                                             \
535
    const uint8_t *filter = subpel_filters[mx - 1];                           \
536
    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;                       \
537
    int x, y;                                                                 \
538
    uint8_t tmp_array[(2 * SIZE + VTAPS - 1) * SIZE];                         \
539
    uint8_t *tmp = tmp_array;                                                 \
540
    src -= (2 - (VTAPS == 4)) * srcstride;                                    \
541
                                                                              \
542
    for (y = 0; y < h + VTAPS - 1; y++) {                                     \
543
        for (x = 0; x < SIZE; x++)                                            \
544
            tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1);                 \
545
        tmp += SIZE;                                                          \
546
        src += srcstride;                                                     \
547
    }                                                                         \
548
    tmp    = tmp_array + (2 - (VTAPS == 4)) * SIZE;                           \
549
    filter = subpel_filters[my - 1];                                          \
550
                                                                              \
551
    for (y = 0; y < h; y++) {                                                 \
552
        for (x = 0; x < SIZE; x++)                                            \
553
            dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE);              \
554
        dst += dststride;                                                     \
555
        tmp += SIZE;                                                          \
556
    }                                                                         \
557
}
558
559

546
VP8_EPEL_H(16, 4)
560

926859
VP8_EPEL_H(8,  4)
561

206215
VP8_EPEL_H(4,  4)
562

2802396
VP8_EPEL_H(16, 6)
563

964278
VP8_EPEL_H(8,  6)
564

480549
VP8_EPEL_H(4,  6)
565

546
VP8_EPEL_V(16, 4)
566

737267
VP8_EPEL_V(8,  4)
567

188079
VP8_EPEL_V(4,  4)
568

2287711
VP8_EPEL_V(16, 6)
569

757295
VP8_EPEL_V(8,  6)
570

433425
VP8_EPEL_V(4,  6)
571
572


1192
VP8_EPEL_HV(16, 4, 4)
573


2484388
VP8_EPEL_HV(8,  4, 4)
574


849248
VP8_EPEL_HV(4,  4, 4)
575


1260
VP8_EPEL_HV(16, 4, 6)
576


1406858
VP8_EPEL_HV(8,  4, 6)
577


574742
VP8_EPEL_HV(4,  4, 6)
578


1192
VP8_EPEL_HV(16, 6, 4)
579


1609692
VP8_EPEL_HV(8,  6, 4)
580


562464
VP8_EPEL_HV(4,  6, 4)
581


9969222
VP8_EPEL_HV(16, 6, 6)
582


2758682
VP8_EPEL_HV(8,  6, 6)
583


1850596
VP8_EPEL_HV(4,  6, 6)
584
585
#define VP8_BILINEAR(SIZE)                                                    \
586
static void put_vp8_bilinear ## SIZE ## _h_c(uint8_t *dst, ptrdiff_t dstride, \
587
                                             uint8_t *src, ptrdiff_t sstride, \
588
                                             int h, int mx, int my)           \
589
{                                                                             \
590
    int a = 8 - mx, b = mx;                                                   \
591
    int x, y;                                                                 \
592
    for (y = 0; y < h; y++) {                                                 \
593
        for (x = 0; x < SIZE; x++)                                            \
594
            dst[x] = (a * src[x] + b * src[x + 1] + 4) >> 3;                  \
595
        dst += dstride;                                                       \
596
        src += sstride;                                                       \
597
    }                                                                         \
598
}                                                                             \
599
                                                                              \
600
static void put_vp8_bilinear ## SIZE ## _v_c(uint8_t *dst, ptrdiff_t dstride, \
601
                                             uint8_t *src, ptrdiff_t sstride, \
602
                                             int h, int mx, int my)           \
603
{                                                                             \
604
    int c = 8 - my, d = my;                                                   \
605
    int x, y;                                                                 \
606
    for (y = 0; y < h; y++) {                                                 \
607
        for (x = 0; x < SIZE; x++)                                            \
608
            dst[x] = (c * src[x] + d * src[x + sstride] + 4) >> 3;            \
609
        dst += dstride;                                                       \
610
        src += sstride;                                                       \
611
    }                                                                         \
612
}                                                                             \
613
                                                                              \
614
static void put_vp8_bilinear ## SIZE ## _hv_c(uint8_t *dst,                   \
615
                                              ptrdiff_t dstride,              \
616
                                              uint8_t *src,                   \
617
                                              ptrdiff_t sstride,              \
618
                                              int h, int mx, int my)          \
619
{                                                                             \
620
    int a = 8 - mx, b = mx;                                                   \
621
    int c = 8 - my, d = my;                                                   \
622
    int x, y;                                                                 \
623
    uint8_t tmp_array[(2 * SIZE + 1) * SIZE];                                 \
624
    uint8_t *tmp = tmp_array;                                                 \
625
    for (y = 0; y < h + 1; y++) {                                             \
626
        for (x = 0; x < SIZE; x++)                                            \
627
            tmp[x] = (a * src[x] + b * src[x + 1] + 4) >> 3;                  \
628
        tmp += SIZE;                                                          \
629
        src += sstride;                                                       \
630
    }                                                                         \
631
    tmp = tmp_array;                                                          \
632
    for (y = 0; y < h; y++) {                                                 \
633
        for (x = 0; x < SIZE; x++)                                            \
634
            dst[x] = (c * tmp[x] + d * tmp[x + SIZE] + 4) >> 3;               \
635
        dst += dstride;                                                       \
636
        tmp += SIZE;                                                          \
637
    }                                                                         \
638
}
639
640


1341280
VP8_BILINEAR(16)
641


463713
VP8_BILINEAR(8)
642


174923
VP8_BILINEAR(4)
643
644
#define VP78_MC_FUNC(IDX, SIZE)                                               \
645
    dsp->put_vp8_epel_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c;   \
646
    dsp->put_vp8_epel_pixels_tab[IDX][0][1] = put_vp8_epel ## SIZE ## _h4_c;  \
647
    dsp->put_vp8_epel_pixels_tab[IDX][0][2] = put_vp8_epel ## SIZE ## _h6_c;  \
648
    dsp->put_vp8_epel_pixels_tab[IDX][1][0] = put_vp8_epel ## SIZE ## _v4_c;  \
649
    dsp->put_vp8_epel_pixels_tab[IDX][1][1] = put_vp8_epel ## SIZE ## _h4v4_c; \
650
    dsp->put_vp8_epel_pixels_tab[IDX][1][2] = put_vp8_epel ## SIZE ## _h6v4_c; \
651
    dsp->put_vp8_epel_pixels_tab[IDX][2][0] = put_vp8_epel ## SIZE ## _v6_c;  \
652
    dsp->put_vp8_epel_pixels_tab[IDX][2][1] = put_vp8_epel ## SIZE ## _h4v6_c; \
653
    dsp->put_vp8_epel_pixels_tab[IDX][2][2] = put_vp8_epel ## SIZE ## _h6v6_c
654
655
#define VP78_BILINEAR_MC_FUNC(IDX, SIZE)                                      \
656
    dsp->put_vp8_bilinear_pixels_tab[IDX][0][0] = put_vp8_pixels   ## SIZE ## _c; \
657
    dsp->put_vp8_bilinear_pixels_tab[IDX][0][1] = put_vp8_bilinear ## SIZE ## _h_c; \
658
    dsp->put_vp8_bilinear_pixels_tab[IDX][0][2] = put_vp8_bilinear ## SIZE ## _h_c; \
659
    dsp->put_vp8_bilinear_pixels_tab[IDX][1][0] = put_vp8_bilinear ## SIZE ## _v_c; \
660
    dsp->put_vp8_bilinear_pixels_tab[IDX][1][1] = put_vp8_bilinear ## SIZE ## _hv_c; \
661
    dsp->put_vp8_bilinear_pixels_tab[IDX][1][2] = put_vp8_bilinear ## SIZE ## _hv_c; \
662
    dsp->put_vp8_bilinear_pixels_tab[IDX][2][0] = put_vp8_bilinear ## SIZE ## _v_c; \
663
    dsp->put_vp8_bilinear_pixels_tab[IDX][2][1] = put_vp8_bilinear ## SIZE ## _hv_c; \
664
    dsp->put_vp8_bilinear_pixels_tab[IDX][2][2] = put_vp8_bilinear ## SIZE ## _hv_c
665
666
73
av_cold void ff_vp78dsp_init(VP8DSPContext *dsp)
667
{
668
73
    VP78_MC_FUNC(0, 16);
669
73
    VP78_MC_FUNC(1, 8);
670
73
    VP78_MC_FUNC(2, 4);
671
672
73
    VP78_BILINEAR_MC_FUNC(0, 16);
673
73
    VP78_BILINEAR_MC_FUNC(1, 8);
674
73
    VP78_BILINEAR_MC_FUNC(2, 4);
675
676
    if (ARCH_AARCH64)
677
        ff_vp78dsp_init_aarch64(dsp);
678
    if (ARCH_ARM)
679
        ff_vp78dsp_init_arm(dsp);
680
    if (ARCH_PPC)
681
        ff_vp78dsp_init_ppc(dsp);
682
    if (ARCH_X86)
683
73
        ff_vp78dsp_init_x86(dsp);
684
73
}
685
686
#if CONFIG_VP7_DECODER
687

2731440
LOOP_FILTERS(vp7)
688
689
3
av_cold void ff_vp7dsp_init(VP8DSPContext *dsp)
690
{
691
3
    dsp->vp8_luma_dc_wht    = vp7_luma_dc_wht_c;
692
3
    dsp->vp8_luma_dc_wht_dc = vp7_luma_dc_wht_dc_c;
693
3
    dsp->vp8_idct_add       = vp7_idct_add_c;
694
3
    dsp->vp8_idct_dc_add    = vp7_idct_dc_add_c;
695
3
    dsp->vp8_idct_dc_add4y  = vp7_idct_dc_add4y_c;
696
3
    dsp->vp8_idct_dc_add4uv = vp7_idct_dc_add4uv_c;
697
698
3
    dsp->vp8_v_loop_filter16y = vp7_v_loop_filter16_c;
699
3
    dsp->vp8_h_loop_filter16y = vp7_h_loop_filter16_c;
700
3
    dsp->vp8_v_loop_filter8uv = vp7_v_loop_filter8uv_c;
701
3
    dsp->vp8_h_loop_filter8uv = vp7_h_loop_filter8uv_c;
702
703
3
    dsp->vp8_v_loop_filter16y_inner = vp7_v_loop_filter16_inner_c;
704
3
    dsp->vp8_h_loop_filter16y_inner = vp7_h_loop_filter16_inner_c;
705
3
    dsp->vp8_v_loop_filter8uv_inner = vp7_v_loop_filter8uv_inner_c;
706
3
    dsp->vp8_h_loop_filter8uv_inner = vp7_h_loop_filter8uv_inner_c;
707
708
3
    dsp->vp8_v_loop_filter_simple = vp7_v_loop_filter_simple_c;
709
3
    dsp->vp8_h_loop_filter_simple = vp7_h_loop_filter_simple_c;
710
3
}
711
#endif /* CONFIG_VP7_DECODER */
712
713
#if CONFIG_VP8_DECODER
714

74194844
LOOP_FILTERS(vp8)
715
716
135
av_cold void ff_vp8dsp_init(VP8DSPContext *dsp)
717
{
718
135
    dsp->vp8_luma_dc_wht    = vp8_luma_dc_wht_c;
719
135
    dsp->vp8_luma_dc_wht_dc = vp8_luma_dc_wht_dc_c;
720
135
    dsp->vp8_idct_add       = vp8_idct_add_c;
721
135
    dsp->vp8_idct_dc_add    = vp8_idct_dc_add_c;
722
135
    dsp->vp8_idct_dc_add4y  = vp8_idct_dc_add4y_c;
723
135
    dsp->vp8_idct_dc_add4uv = vp8_idct_dc_add4uv_c;
724
725
135
    dsp->vp8_v_loop_filter16y = vp8_v_loop_filter16_c;
726
135
    dsp->vp8_h_loop_filter16y = vp8_h_loop_filter16_c;
727
135
    dsp->vp8_v_loop_filter8uv = vp8_v_loop_filter8uv_c;
728
135
    dsp->vp8_h_loop_filter8uv = vp8_h_loop_filter8uv_c;
729
730
135
    dsp->vp8_v_loop_filter16y_inner = vp8_v_loop_filter16_inner_c;
731
135
    dsp->vp8_h_loop_filter16y_inner = vp8_h_loop_filter16_inner_c;
732
135
    dsp->vp8_v_loop_filter8uv_inner = vp8_v_loop_filter8uv_inner_c;
733
135
    dsp->vp8_h_loop_filter8uv_inner = vp8_h_loop_filter8uv_inner_c;
734
735
135
    dsp->vp8_v_loop_filter_simple = vp8_v_loop_filter_simple_c;
736
135
    dsp->vp8_h_loop_filter_simple = vp8_h_loop_filter_simple_c;
737
738
    if (ARCH_AARCH64)
739
        ff_vp8dsp_init_aarch64(dsp);
740
    if (ARCH_ARM)
741
        ff_vp8dsp_init_arm(dsp);
742
    if (ARCH_X86)
743
135
        ff_vp8dsp_init_x86(dsp);
744
    if (ARCH_MIPS)
745
        ff_vp8dsp_init_mips(dsp);
746
135
}
747
#endif /* CONFIG_VP8_DECODER */