GCC Code Coverage Report
Directory: ../../../ffmpeg/ Exec Total Coverage
File: src/libavcodec/vp8.c Lines: 1349 1584 85.2 %
Date: 2020-08-14 10:39:37 Branches: 805 1161 69.3 %

Line Branch Exec Source
1
/*
2
 * VP7/VP8 compatible video decoder
3
 *
4
 * Copyright (C) 2010 David Conrad
5
 * Copyright (C) 2010 Ronald S. Bultje
6
 * Copyright (C) 2010 Fiona Glaser
7
 * Copyright (C) 2012 Daniel Kang
8
 * Copyright (C) 2014 Peter Ross
9
 *
10
 * This file is part of FFmpeg.
11
 *
12
 * FFmpeg is free software; you can redistribute it and/or
13
 * modify it under the terms of the GNU Lesser General Public
14
 * License as published by the Free Software Foundation; either
15
 * version 2.1 of the License, or (at your option) any later version.
16
 *
17
 * FFmpeg is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20
 * Lesser General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU Lesser General Public
23
 * License along with FFmpeg; if not, write to the Free Software
24
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25
 */
26
27
#include "libavutil/imgutils.h"
28
29
#include "avcodec.h"
30
#include "hwconfig.h"
31
#include "internal.h"
32
#include "mathops.h"
33
#include "rectangle.h"
34
#include "thread.h"
35
#include "vp8.h"
36
#include "vp8data.h"
37
38
#if ARCH_ARM
39
#   include "arm/vp8.h"
40
#endif
41
42
#if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
43
#define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
44
#elif CONFIG_VP7_DECODER
45
#define VPX(vp7, f) vp7_ ## f
46
#else // CONFIG_VP8_DECODER
47
#define VPX(vp7, f) vp8_ ## f
48
#endif
49
50
73
static void free_buffers(VP8Context *s)
51
{
52
    int i;
53
73
    if (s->thread_data)
54
333
        for (i = 0; i < MAX_THREADS; i++) {
55
#if HAVE_THREADS
56
296
            pthread_cond_destroy(&s->thread_data[i].cond);
57
296
            pthread_mutex_destroy(&s->thread_data[i].lock);
58
#endif
59
296
            av_freep(&s->thread_data[i].filter_strength);
60
        }
61
73
    av_freep(&s->thread_data);
62
73
    av_freep(&s->macroblocks_base);
63
73
    av_freep(&s->intra4x4_pred_mode_top);
64
73
    av_freep(&s->top_nnz);
65
73
    av_freep(&s->top_border);
66
67
73
    s->macroblocks = NULL;
68
73
}
69
70
1142
static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
71
{
72
    int ret;
73
1142
    if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
74
                                    ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
75
        return ret;
76
1142
    if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height)))
77
        goto fail;
78
1142
    if (s->avctx->hwaccel) {
79
        const AVHWAccel *hwaccel = s->avctx->hwaccel;
80
        if (hwaccel->frame_priv_data_size) {
81
            f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
82
            if (!f->hwaccel_priv_buf)
83
                goto fail;
84
            f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
85
        }
86
    }
87
1142
    return 0;
88
89
fail:
90
    av_buffer_unref(&f->seg_map);
91
    ff_thread_release_buffer(s->avctx, &f->tf);
92
    return AVERROR(ENOMEM);
93
}
94
95
1398
static void vp8_release_frame(VP8Context *s, VP8Frame *f)
96
{
97
1398
    av_buffer_unref(&f->seg_map);
98
1398
    av_buffer_unref(&f->hwaccel_priv_buf);
99
1398
    f->hwaccel_picture_private = NULL;
100
1398
    ff_thread_release_buffer(s->avctx, &f->tf);
101
1398
}
102
103
#if CONFIG_VP8_DECODER
104
static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
105
{
106
    int ret;
107
108
    vp8_release_frame(s, dst);
109
110
    if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
111
        return ret;
112
    if (src->seg_map &&
113
        !(dst->seg_map = av_buffer_ref(src->seg_map))) {
114
        vp8_release_frame(s, dst);
115
        return AVERROR(ENOMEM);
116
    }
117
    if (src->hwaccel_picture_private) {
118
        dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
119
        if (!dst->hwaccel_priv_buf)
120
            return AVERROR(ENOMEM);
121
        dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
122
    }
123
124
    return 0;
125
}
126
#endif /* CONFIG_VP8_DECODER */
127
128
73
static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
129
{
130
73
    VP8Context *s = avctx->priv_data;
131
    int i;
132
133
438
    for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
134
365
        vp8_release_frame(s, &s->frames[i]);
135
73
    memset(s->framep, 0, sizeof(s->framep));
136
137
73
    if (free_mem)
138
73
        free_buffers(s);
139
73
}
140
141
static void vp8_decode_flush(AVCodecContext *avctx)
142
{
143
    vp8_decode_flush_impl(avctx, 0);
144
}
145
146
1142
static VP8Frame *vp8_find_free_buffer(VP8Context *s)
147
{
148
1142
    VP8Frame *frame = NULL;
149
    int i;
150
151
    // find a free buffer
152
2793
    for (i = 0; i < 5; i++)
153
2793
        if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT]  &&
154
2203
            &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
155
2202
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN]   &&
156
1600
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
157
1142
            frame = &s->frames[i];
158
1142
            break;
159
        }
160
1142
    if (i == 5) {
161
        av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
162
        abort();
163
    }
164
1142
    if (frame->tf.f->buf[0])
165
        vp8_release_frame(s, frame);
166
167
1142
    return frame;
168
}
169
170
30
static enum AVPixelFormat get_pixel_format(VP8Context *s)
171
{
172
30
    enum AVPixelFormat pix_fmts[] = {
173
#if CONFIG_VP8_VAAPI_HWACCEL
174
        AV_PIX_FMT_VAAPI,
175
#endif
176
#if CONFIG_VP8_NVDEC_HWACCEL
177
        AV_PIX_FMT_CUDA,
178
#endif
179
        AV_PIX_FMT_YUV420P,
180
        AV_PIX_FMT_NONE,
181
    };
182
183
30
    return ff_get_format(s->avctx, pix_fmts);
184
}
185
186
static av_always_inline
187
37
int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
188
{
189
37
    AVCodecContext *avctx = s->avctx;
190
    int i, ret;
191
192


37
    if (width  != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
193
24
        height != s->avctx->height) {
194
13
        vp8_decode_flush_impl(s->avctx, 1);
195
196
13
        ret = ff_set_dimensions(s->avctx, width, height);
197
13
        if (ret < 0)
198
            return ret;
199
    }
200
201

37
    if (!s->actually_webp && !is_vp7) {
202
30
        s->pix_fmt = get_pixel_format(s);
203
30
        if (s->pix_fmt < 0)
204
            return AVERROR(EINVAL);
205
30
        avctx->pix_fmt = s->pix_fmt;
206
    }
207
208
37
    s->mb_width  = (s->avctx->coded_width  + 15) / 16;
209
37
    s->mb_height = (s->avctx->coded_height + 15) / 16;
210
211

37
    s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
212
                   avctx->thread_count > 1;
213
37
    if (!s->mb_layout) { // Frame threading and one thread
214
36
        s->macroblocks_base       = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
215
                                               sizeof(*s->macroblocks));
216
36
        s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
217
    } else // Sliced threading
218
1
        s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
219
                                         sizeof(*s->macroblocks));
220
37
    s->top_nnz     = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
221
37
    s->top_border  = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
222
37
    s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
223
224

37
    if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
225

37
        !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
226
        free_buffers(s);
227
        return AVERROR(ENOMEM);
228
    }
229
230
333
    for (i = 0; i < MAX_THREADS; i++) {
231
592
        s->thread_data[i].filter_strength =
232
296
            av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
233
296
        if (!s->thread_data[i].filter_strength) {
234
            free_buffers(s);
235
            return AVERROR(ENOMEM);
236
        }
237
#if HAVE_THREADS
238
296
        pthread_mutex_init(&s->thread_data[i].lock, NULL);
239
296
        pthread_cond_init(&s->thread_data[i].cond, NULL);
240
#endif
241
    }
242
243
37
    s->macroblocks = s->macroblocks_base + 1;
244
245
37
    return 0;
246
}
247
248
1
static int vp7_update_dimensions(VP8Context *s, int width, int height)
249
{
250
1
    return update_dimensions(s, width, height, IS_VP7);
251
}
252
253
36
static int vp8_update_dimensions(VP8Context *s, int width, int height)
254
{
255
36
    return update_dimensions(s, width, height, IS_VP8);
256
}
257
258
259
433
static void parse_segment_info(VP8Context *s)
260
{
261
433
    VP56RangeCoder *c = &s->c;
262
    int i;
263
264
433
    s->segmentation.update_map = vp8_rac_get(c);
265
433
    s->segmentation.update_feature_data = vp8_rac_get(c);
266
267
433
    if (s->segmentation.update_feature_data) {
268
68
        s->segmentation.absolute_vals = vp8_rac_get(c);
269
270
340
        for (i = 0; i < 4; i++)
271
272
            s->segmentation.base_quant[i]   = vp8_rac_get_sint(c, 7);
272
273
340
        for (i = 0; i < 4; i++)
274
272
            s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
275
    }
276
433
    if (s->segmentation.update_map)
277
272
        for (i = 0; i < 3; i++)
278
204
            s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
279
433
}
280
281
42
static void update_lf_deltas(VP8Context *s)
282
{
283
42
    VP56RangeCoder *c = &s->c;
284
    int i;
285
286
210
    for (i = 0; i < 4; i++) {
287
168
        if (vp8_rac_get(c)) {
288
126
            s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
289
290
126
            if (vp8_rac_get(c))
291
84
                s->lf_delta.ref[i] = -s->lf_delta.ref[i];
292
        }
293
    }
294
295
210
    for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
296
168
        if (vp8_rac_get(c)) {
297
168
            s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
298
299
168
            if (vp8_rac_get(c))
300
42
                s->lf_delta.mode[i] = -s->lf_delta.mode[i];
301
        }
302
    }
303
42
}
304
305
1112
static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
306
{
307
1112
    const uint8_t *sizes = buf;
308
    int i;
309
    int ret;
310
311
1112
    s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
312
313
1112
    buf      += 3 * (s->num_coeff_partitions - 1);
314
1112
    buf_size -= 3 * (s->num_coeff_partitions - 1);
315
1112
    if (buf_size < 0)
316
        return -1;
317
318
1451
    for (i = 0; i < s->num_coeff_partitions - 1; i++) {
319
339
        int size = AV_RL24(sizes + 3 * i);
320
339
        if (buf_size - size < 0)
321
            return -1;
322
339
        s->coeff_partition_size[i] = size;
323
324
339
        ret = ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
325
339
        if (ret < 0)
326
            return ret;
327
339
        buf      += size;
328
339
        buf_size -= size;
329
    }
330
331
1112
    s->coeff_partition_size[i] = buf_size;
332
1112
    ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
333
334
1112
    return 0;
335
}
336
337
30
static void vp7_get_quants(VP8Context *s)
338
{
339
30
    VP56RangeCoder *c = &s->c;
340
341
30
    int yac_qi  = vp8_rac_get_uint(c, 7);
342
30
    int ydc_qi  = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
343
30
    int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
344
30
    int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
345
30
    int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
346
30
    int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
347
348
30
    s->qmat[0].luma_qmul[0]    =       vp7_ydc_qlookup[ydc_qi];
349
30
    s->qmat[0].luma_qmul[1]    =       vp7_yac_qlookup[yac_qi];
350
30
    s->qmat[0].luma_dc_qmul[0] =       vp7_y2dc_qlookup[y2dc_qi];
351
30
    s->qmat[0].luma_dc_qmul[1] =       vp7_y2ac_qlookup[y2ac_qi];
352
30
    s->qmat[0].chroma_qmul[0]  = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
353
30
    s->qmat[0].chroma_qmul[1]  =       vp7_yac_qlookup[uvac_qi];
354
30
}
355
356
1112
static void vp8_get_quants(VP8Context *s)
357
{
358
1112
    VP56RangeCoder *c = &s->c;
359
    int i, base_qi;
360
361
1112
    s->quant.yac_qi     = vp8_rac_get_uint(c, 7);
362
1112
    s->quant.ydc_delta  = vp8_rac_get_sint(c, 4);
363
1112
    s->quant.y2dc_delta = vp8_rac_get_sint(c, 4);
364
1112
    s->quant.y2ac_delta = vp8_rac_get_sint(c, 4);
365
1112
    s->quant.uvdc_delta = vp8_rac_get_sint(c, 4);
366
1112
    s->quant.uvac_delta = vp8_rac_get_sint(c, 4);
367
368
5560
    for (i = 0; i < 4; i++) {
369
4448
        if (s->segmentation.enabled) {
370
1732
            base_qi = s->segmentation.base_quant[i];
371
1732
            if (!s->segmentation.absolute_vals)
372
1608
                base_qi += s->quant.yac_qi;
373
        } else
374
2716
            base_qi = s->quant.yac_qi;
375
376
4448
        s->qmat[i].luma_qmul[0]    = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.ydc_delta,  7)];
377
4448
        s->qmat[i].luma_qmul[1]    = vp8_ac_qlookup[av_clip_uintp2(base_qi,              7)];
378
4448
        s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.y2dc_delta, 7)] * 2;
379
        /* 101581>>16 is equivalent to 155/100 */
380
4448
        s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.y2ac_delta, 7)] * 101581 >> 16;
381
4448
        s->qmat[i].chroma_qmul[0]  = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.uvdc_delta, 7)];
382
4448
        s->qmat[i].chroma_qmul[1]  = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.uvac_delta, 7)];
383
384
4448
        s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
385
4448
        s->qmat[i].chroma_qmul[0]  = FFMIN(s->qmat[i].chroma_qmul[0], 132);
386
    }
387
1112
}
388
389
/**
390
 * Determine which buffers golden and altref should be updated with after this frame.
391
 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
392
 *
393
 * Intra frames update all 3 references
394
 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
395
 * If the update (golden|altref) flag is set, it's updated with the current frame
396
 *      if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
397
 * If the flag is not set, the number read means:
398
 *      0: no update
399
 *      1: VP56_FRAME_PREVIOUS
400
 *      2: update golden with altref, or update altref with golden
401
 */
402
2128
static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
403
{
404
2128
    VP56RangeCoder *c = &s->c;
405
406
2128
    if (update)
407
90
        return VP56_FRAME_CURRENT;
408
409
2038
    switch (vp8_rac_get_uint(c, 2)) {
410
31
    case 1:
411
31
        return VP56_FRAME_PREVIOUS;
412
84
    case 2:
413
84
        return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
414
    }
415
1923
    return VP56_FRAME_NONE;
416
}
417
418
49
static void vp78_reset_probability_tables(VP8Context *s)
419
{
420
    int i, j;
421
245
    for (i = 0; i < 4; i++)
422
3332
        for (j = 0; j < 16; j++)
423
3136
            memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
424
                   sizeof(s->prob->token[i][j]));
425
49
}
426
427
1142
static void vp78_update_probability_tables(VP8Context *s)
428
{
429
1142
    VP56RangeCoder *c = &s->c;
430
    int i, j, k, l, m;
431
432
5710
    for (i = 0; i < 4; i++)
433
41112
        for (j = 0; j < 8; j++)
434
146176
            for (k = 0; k < 3; k++)
435
1315584
                for (l = 0; l < NUM_DCT_TOKENS-1; l++)
436
1205952
                    if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
437
7203
                        int prob = vp8_rac_get_uint(c, 8);
438
23838
                        for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
439
16635
                            s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
440
                    }
441
1142
}
442
443
#define VP7_MVC_SIZE 17
444
#define VP8_MVC_SIZE 19
445
446
1093
static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
447
                                                            int mvc_size)
448
{
449
1093
    VP56RangeCoder *c = &s->c;
450
    int i, j;
451
452
1093
    if (vp8_rac_get(c))
453
10
        for (i = 0; i < 4; i++)
454
8
            s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
455
1093
    if (vp8_rac_get(c))
456
4
        for (i = 0; i < 3; i++)
457
3
            s->prob->pred8x8c[i]  = vp8_rac_get_uint(c, 8);
458
459
    // 17.2 MV probability update
460
3279
    for (i = 0; i < 2; i++)
461
43604
        for (j = 0; j < mvc_size; j++)
462
41418
            if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
463
306
                s->prob->mvc[i][j] = vp8_rac_get_nn(c);
464
1093
}
465
466
1064
static void update_refs(VP8Context *s)
467
{
468
1064
    VP56RangeCoder *c = &s->c;
469
470
1064
    int update_golden = vp8_rac_get(c);
471
1064
    int update_altref = vp8_rac_get(c);
472
473
1064
    s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
474
1064
    s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
475
1064
}
476
477
static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)
478
{
479
    int i, j;
480
481
    for (j = 1; j < 3; j++) {
482
        for (i = 0; i < height / 2; i++)
483
            memcpy(dst->data[j] + i * dst->linesize[j],
484
                   src->data[j] + i * src->linesize[j], width / 2);
485
    }
486
}
487
488
static void fade(uint8_t *dst, ptrdiff_t dst_linesize,
489
                 const uint8_t *src, ptrdiff_t src_linesize,
490
                 int width, int height,
491
                 int alpha, int beta)
492
{
493
    int i, j;
494
    for (j = 0; j < height; j++) {
495
        const uint8_t *src2 = src + j * src_linesize;
496
        uint8_t *dst2 = dst + j * dst_linesize;
497
        for (i = 0; i < width; i++) {
498
            uint8_t y = src2[i];
499
            dst2[i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
500
        }
501
    }
502
}
503
504
30
static int vp7_fade_frame(VP8Context *s, int alpha, int beta)
505
{
506
    int ret;
507
508

30
    if (!s->keyframe && (alpha || beta)) {
509
        int width  = s->mb_width * 16;
510
        int height = s->mb_height * 16;
511
        AVFrame *src, *dst;
512
513
        if (!s->framep[VP56_FRAME_PREVIOUS] ||
514
            !s->framep[VP56_FRAME_GOLDEN]) {
515
            av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
516
            return AVERROR_INVALIDDATA;
517
        }
518
519
        dst =
520
        src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
521
522
        /* preserve the golden frame, write a new previous frame */
523
        if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
524
            s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
525
            if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
526
                return ret;
527
528
            dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
529
530
            copy_chroma(dst, src, width, height);
531
        }
532
533
        fade(dst->data[0], dst->linesize[0],
534
             src->data[0], src->linesize[0],
535
             width, height, alpha, beta);
536
    }
537
538
30
    return 0;
539
}
540
541
30
static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
542
{
543
30
    VP56RangeCoder *c = &s->c;
544
    int part1_size, hscale, vscale, i, j, ret;
545
30
    int width  = s->avctx->width;
546
30
    int height = s->avctx->height;
547
30
    int alpha = 0;
548
30
    int beta  = 0;
549
550
30
    if (buf_size < 4) {
551
        return AVERROR_INVALIDDATA;
552
    }
553
554
30
    s->profile = (buf[0] >> 1) & 7;
555
30
    if (s->profile > 1) {
556
        avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
557
        return AVERROR_INVALIDDATA;
558
    }
559
560
30
    s->keyframe  = !(buf[0] & 1);
561
30
    s->invisible = 0;
562
30
    part1_size   = AV_RL24(buf) >> 4;
563
564
30
    if (buf_size < 4 - s->profile + part1_size) {
565
        av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
566
        return AVERROR_INVALIDDATA;
567
    }
568
569
30
    buf      += 4 - s->profile;
570
30
    buf_size -= 4 - s->profile;
571
572
30
    memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
573
574
30
    ret = ff_vp56_init_range_decoder(c, buf, part1_size);
575
30
    if (ret < 0)
576
        return ret;
577
30
    buf      += part1_size;
578
30
    buf_size -= part1_size;
579
580
    /* A. Dimension information (keyframes only) */
581
30
    if (s->keyframe) {
582
1
        width  = vp8_rac_get_uint(c, 12);
583
1
        height = vp8_rac_get_uint(c, 12);
584
1
        hscale = vp8_rac_get_uint(c, 2);
585
1
        vscale = vp8_rac_get_uint(c, 2);
586

1
        if (hscale || vscale)
587
            avpriv_request_sample(s->avctx, "Upscaling");
588
589
1
        s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
590
1
        vp78_reset_probability_tables(s);
591
1
        memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
592
               sizeof(s->prob->pred16x16));
593
1
        memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
594
               sizeof(s->prob->pred8x8c));
595
3
        for (i = 0; i < 2; i++)
596
2
            memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
597
                   sizeof(vp7_mv_default_prob[i]));
598
1
        memset(&s->segmentation, 0, sizeof(s->segmentation));
599
1
        memset(&s->lf_delta, 0, sizeof(s->lf_delta));
600
1
        memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
601
    }
602
603

30
    if (s->keyframe || s->profile > 0)
604
1
        memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
605
606
    /* B. Decoding information for all four macroblock-level features */
607
150
    for (i = 0; i < 4; i++) {
608
120
        s->feature_enabled[i] = vp8_rac_get(c);
609
120
        if (s->feature_enabled[i]) {
610
             s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
611
612
             for (j = 0; j < 3; j++)
613
                 s->feature_index_prob[i][j] =
614
                     vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
615
616
             if (vp7_feature_value_size[s->profile][i])
617
                 for (j = 0; j < 4; j++)
618
                     s->feature_value[i][j] =
619
                        vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
620
        }
621
    }
622
623
30
    s->segmentation.enabled    = 0;
624
30
    s->segmentation.update_map = 0;
625
30
    s->lf_delta.enabled        = 0;
626
627
30
    s->num_coeff_partitions = 1;
628
30
    ret = ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
629
30
    if (ret < 0)
630
        return ret;
631
632
30
    if (!s->macroblocks_base || /* first frame */
633

29
        width != s->avctx->width || height != s->avctx->height ||
634

29
        (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
635
1
        if ((ret = vp7_update_dimensions(s, width, height)) < 0)
636
            return ret;
637
    }
638
639
    /* C. Dequantization indices */
640
30
    vp7_get_quants(s);
641
642
    /* D. Golden frame update flag (a Flag) for interframes only */
643
30
    if (!s->keyframe) {
644
29
        s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
645
29
        s->sign_bias[VP56_FRAME_GOLDEN] = 0;
646
    }
647
648
30
    s->update_last          = 1;
649
30
    s->update_probabilities = 1;
650
30
    s->fade_present         = 1;
651
652
30
    if (s->profile > 0) {
653
        s->update_probabilities = vp8_rac_get(c);
654
        if (!s->update_probabilities)
655
            s->prob[1] = s->prob[0];
656
657
        if (!s->keyframe)
658
            s->fade_present = vp8_rac_get(c);
659
    }
660
661
30
    if (vpX_rac_is_end(c))
662
        return AVERROR_INVALIDDATA;
663
    /* E. Fading information for previous frame */
664

30
    if (s->fade_present && vp8_rac_get(c)) {
665
        alpha = (int8_t) vp8_rac_get_uint(c, 8);
666
        beta  = (int8_t) vp8_rac_get_uint(c, 8);
667
    }
668
669
    /* F. Loop filter type */
670
30
    if (!s->profile)
671
30
        s->filter.simple = vp8_rac_get(c);
672
673
    /* G. DCT coefficient ordering specification */
674
30
    if (vp8_rac_get(c))
675
208
        for (i = 1; i < 16; i++)
676
195
            s->prob[0].scan[i] = ff_zigzag_scan[vp8_rac_get_uint(c, 4)];
677
678
    /* H. Loop filter levels  */
679
30
    if (s->profile > 0)
680
        s->filter.simple = vp8_rac_get(c);
681
30
    s->filter.level     = vp8_rac_get_uint(c, 6);
682
30
    s->filter.sharpness = vp8_rac_get_uint(c, 3);
683
684
    /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
685
30
    vp78_update_probability_tables(s);
686
687
30
    s->mbskip_enabled = 0;
688
689
    /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
690
30
    if (!s->keyframe) {
691
29
        s->prob->intra  = vp8_rac_get_uint(c, 8);
692
29
        s->prob->last   = vp8_rac_get_uint(c, 8);
693
29
        vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
694
    }
695
696
30
    if (vpX_rac_is_end(c))
697
        return AVERROR_INVALIDDATA;
698
699
30
    if ((ret = vp7_fade_frame(s, alpha, beta)) < 0)
700
        return ret;
701
702
30
    return 0;
703
}
704
705
1112
static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
706
{
707
1112
    VP56RangeCoder *c = &s->c;
708
    int header_size, hscale, vscale, ret;
709
1112
    int width  = s->avctx->width;
710
1112
    int height = s->avctx->height;
711
712
1112
    if (buf_size < 3) {
713
        av_log(s->avctx, AV_LOG_ERROR, "Insufficent data (%d) for header\n", buf_size);
714
        return AVERROR_INVALIDDATA;
715
    }
716
717
1112
    s->keyframe  = !(buf[0] & 1);
718
1112
    s->profile   =  (buf[0]>>1) & 7;
719
1112
    s->invisible = !(buf[0] & 0x10);
720
1112
    header_size  = AV_RL24(buf) >> 5;
721
1112
    buf      += 3;
722
1112
    buf_size -= 3;
723
724
1112
    s->header_partition_size = header_size;
725
726
1112
    if (s->profile > 3)
727
        av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
728
729
1112
    if (!s->profile)
730
956
        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
731
               sizeof(s->put_pixels_tab));
732
    else    // profile 1-3 use bilinear, 4+ aren't defined so whatever
733
156
        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
734
               sizeof(s->put_pixels_tab));
735
736
1112
    if (header_size > buf_size - 7 * s->keyframe) {
737
        av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
738
        return AVERROR_INVALIDDATA;
739
    }
740
741
1112
    if (s->keyframe) {
742
48
        if (AV_RL24(buf) != 0x2a019d) {
743
            av_log(s->avctx, AV_LOG_ERROR,
744
                   "Invalid start code 0x%x\n", AV_RL24(buf));
745
            return AVERROR_INVALIDDATA;
746
        }
747
48
        width     = AV_RL16(buf + 3) & 0x3fff;
748
48
        height    = AV_RL16(buf + 5) & 0x3fff;
749
48
        hscale    = buf[4] >> 6;
750
48
        vscale    = buf[6] >> 6;
751
48
        buf      += 7;
752
48
        buf_size -= 7;
753
754

48
        if (hscale || vscale)
755
            avpriv_request_sample(s->avctx, "Upscaling");
756
757
48
        s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
758
48
        vp78_reset_probability_tables(s);
759
48
        memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
760
               sizeof(s->prob->pred16x16));
761
48
        memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
762
               sizeof(s->prob->pred8x8c));
763
48
        memcpy(s->prob->mvc, vp8_mv_default_prob,
764
               sizeof(s->prob->mvc));
765
48
        memset(&s->segmentation, 0, sizeof(s->segmentation));
766
48
        memset(&s->lf_delta, 0, sizeof(s->lf_delta));
767
    }
768
769
1112
    ret = ff_vp56_init_range_decoder(c, buf, header_size);
770
1112
    if (ret < 0)
771
        return ret;
772
1112
    buf      += header_size;
773
1112
    buf_size -= header_size;
774
775
1112
    if (s->keyframe) {
776
48
        s->colorspace = vp8_rac_get(c);
777
48
        if (s->colorspace)
778
            av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
779
48
        s->fullrange = vp8_rac_get(c);
780
    }
781
782
1112
    if ((s->segmentation.enabled = vp8_rac_get(c)))
783
433
        parse_segment_info(s);
784
    else
785
679
        s->segmentation.update_map = 0; // FIXME: move this to some init function?
786
787
1112
    s->filter.simple    = vp8_rac_get(c);
788
1112
    s->filter.level     = vp8_rac_get_uint(c, 6);
789
1112
    s->filter.sharpness = vp8_rac_get_uint(c, 3);
790
791
1112
    if ((s->lf_delta.enabled = vp8_rac_get(c))) {
792
1106
        s->lf_delta.update = vp8_rac_get(c);
793
1106
        if (s->lf_delta.update)
794
42
            update_lf_deltas(s);
795
    }
796
797
1112
    if (setup_partitions(s, buf, buf_size)) {
798
        av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
799
        return AVERROR_INVALIDDATA;
800
    }
801
802
1112
    if (!s->macroblocks_base || /* first frame */
803

1085
        width != s->avctx->width || height != s->avctx->height ||
804

1076
        (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
805
36
        if ((ret = vp8_update_dimensions(s, width, height)) < 0)
806
            return ret;
807
808
1112
    vp8_get_quants(s);
809
810
1112
    if (!s->keyframe) {
811
1064
        update_refs(s);
812
1064
        s->sign_bias[VP56_FRAME_GOLDEN]               = vp8_rac_get(c);
813
1064
        s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
814
    }
815
816
    // if we aren't saving this frame's probabilities for future frames,
817
    // make a copy of the current probabilities
818
1112
    if (!(s->update_probabilities = vp8_rac_get(c)))
819
64
        s->prob[1] = s->prob[0];
820
821

1112
    s->update_last = s->keyframe || vp8_rac_get(c);
822
823
1112
    vp78_update_probability_tables(s);
824
825
1112
    if ((s->mbskip_enabled = vp8_rac_get(c)))
826
1106
        s->prob->mbskip = vp8_rac_get_uint(c, 8);
827
828
1112
    if (!s->keyframe) {
829
1064
        s->prob->intra  = vp8_rac_get_uint(c, 8);
830
1064
        s->prob->last   = vp8_rac_get_uint(c, 8);
831
1064
        s->prob->golden = vp8_rac_get_uint(c, 8);
832
1064
        vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
833
    }
834
835
    // Record the entropy coder state here so that hwaccels can use it.
836
1112
    s->c.code_word = vp56_rac_renorm(&s->c);
837
1112
    s->coder_state_at_header_end.input     = s->c.buffer - (-s->c.bits / 8);
838
1112
    s->coder_state_at_header_end.range     = s->c.high;
839
1112
    s->coder_state_at_header_end.value     = s->c.code_word >> 16;
840
1112
    s->coder_state_at_header_end.bit_count = -s->c.bits % 8;
841
842
1112
    return 0;
843
}
844
845
static av_always_inline
846
57174
void clamp_mv(VP8mvbounds *s, VP56mv *dst, const VP56mv *src)
847
{
848
57174
    dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
849
                             av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
850
57174
    dst->y = av_clip(src->y, av_clip(s->mv_min.y, INT16_MIN, INT16_MAX),
851
                             av_clip(s->mv_max.y, INT16_MIN, INT16_MAX));
852
57174
}
853
854
/**
855
 * Motion vector coding, 17.1.
856
 */
857
84886
static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
858
{
859
84886
    int bit, x = 0;
860
861
84886
    if (vp56_rac_get_prob_branchy(c, p[0])) {
862
        int i;
863
864
79804
        for (i = 0; i < 3; i++)
865
59853
            x += vp56_rac_get_prob(c, p[9 + i]) << i;
866

139657
        for (i = (vp7 ? 7 : 9); i > 3; i--)
867
119706
            x += vp56_rac_get_prob(c, p[9 + i]) << i;
868

19951
        if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
869
13304
            x += 8;
870
    } else {
871
        // small_mvtree
872
64935
        const uint8_t *ps = p + 2;
873
64935
        bit = vp56_rac_get_prob(c, *ps);
874
64935
        ps += 1 + 3 * bit;
875
64935
        x  += 4 * bit;
876
64935
        bit = vp56_rac_get_prob(c, *ps);
877
64935
        ps += 1 + bit;
878
64935
        x  += 2 * bit;
879
64935
        x  += vp56_rac_get_prob(c, *ps);
880
    }
881
882

84886
    return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
883
}
884
885
static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
886
{
887
    return read_mv_component(c, p, 1);
888
}
889
890
28388
static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
891
{
892
28388
    return read_mv_component(c, p, 0);
893
}
894
895
static av_always_inline
896
136556
const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
897
{
898
136556
    if (is_vp7)
899
        return vp7_submv_prob;
900
901
136556
    if (left == top)
902
58677
        return vp8_submv_prob[4 - !!left];
903
77879
    if (!top)
904
18933
        return vp8_submv_prob[2];
905
58946
    return vp8_submv_prob[1 - !!left];
906
}
907
908
/**
909
 * Split motion vector prediction, 16.4.
910
 * @returns the number of motion vectors parsed (2, 4 or 16)
911
 */
912
static av_always_inline
913
18325
int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
914
                    int layout, int is_vp7)
915
{
916
    int part_idx;
917
    int n, num;
918
    VP8Macroblock *top_mb;
919
18325
    VP8Macroblock *left_mb = &mb[-1];
920
18325
    const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
921
    const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
922
    VP56mv *top_mv;
923
18325
    VP56mv *left_mv = left_mb->bmv;
924
18325
    VP56mv *cur_mv  = mb->bmv;
925
926
18325
    if (!layout) // layout is inlined, s->mb_layout is not
927
18325
        top_mb = &mb[2];
928
    else
929
        top_mb = &mb[-s->mb_width - 1];
930
18325
    mbsplits_top = vp8_mbsplits[top_mb->partitioning];
931
18325
    top_mv       = top_mb->bmv;
932
933
18325
    if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
934
11713
        if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
935
8044
            part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
936
        else
937
3669
            part_idx = VP8_SPLITMVMODE_8x8;
938
    } else {
939
6612
        part_idx = VP8_SPLITMVMODE_4x4;
940
    }
941
942
18325
    num              = vp8_mbsplit_count[part_idx];
943
18325
    mbsplits_cur     = vp8_mbsplits[part_idx],
944
18325
    firstidx         = vp8_mbfirstidx[part_idx];
945
18325
    mb->partitioning = part_idx;
946
947
154881
    for (n = 0; n < num; n++) {
948
136556
        int k = firstidx[n];
949
        uint32_t left, above;
950
        const uint8_t *submv_prob;
951
952
136556
        if (!(k & 3))
953
46739
            left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
954
        else
955
89817
            left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
956
136556
        if (k <= 3)
957
44965
            above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
958
        else
959
91591
            above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
960
961
136556
        submv_prob = get_submv_prob(left, above, is_vp7);
962
963
136556
        if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
964
50605
            if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
965
31737
                if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
966
56498
                    mb->bmv[n].y = mb->mv.y +
967
28249
                                   read_mv_component(c, s->prob->mvc[0], is_vp7);
968
28249
                    mb->bmv[n].x = mb->mv.x +
969
28249
                                   read_mv_component(c, s->prob->mvc[1], is_vp7);
970
                } else {
971
3488
                    AV_ZERO32(&mb->bmv[n]);
972
                }
973
            } else {
974
18868
                AV_WN32A(&mb->bmv[n], above);
975
            }
976
        } else {
977
85951
            AV_WN32A(&mb->bmv[n], left);
978
        }
979
    }
980
981
18325
    return num;
982
}
983
984
/**
985
 * The vp7 reference decoder uses a padding macroblock column (added to right
986
 * edge of the frame) to guard against illegal macroblock offsets. The
987
 * algorithm has bugs that permit offsets to straddle the padding column.
988
 * This function replicates those bugs.
989
 *
990
 * @param[out] edge_x macroblock x address
991
 * @param[out] edge_y macroblock y address
992
 *
993
 * @return macroblock offset legal (boolean)
994
 */
995
76560
static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
996
                                   int xoffset, int yoffset, int boundary,
997
                                   int *edge_x, int *edge_y)
998
{
999
76560
    int vwidth = mb_width + 1;
1000
76560
    int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
1001

76560
    if (new < boundary || new % vwidth == vwidth - 1)
1002
11861
        return 0;
1003
64699
    *edge_y = new / vwidth;
1004
64699
    *edge_x = new % vwidth;
1005
64699
    return 1;
1006
}
1007
1008
64699
static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
1009
{
1010
64699
    return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
1011
}
1012
1013
static av_always_inline
1014
6380
void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
1015
                    int mb_x, int mb_y, int layout)
1016
{
1017
    VP8Macroblock *mb_edge[12];
1018
    enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
1019
    enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1020
6380
    int idx = CNT_ZERO;
1021
    VP56mv near_mv[3];
1022
6380
    uint8_t cnt[3] = { 0 };
1023
6380
    VP56RangeCoder *c = &s->c;
1024
    int i;
1025
1026
6380
    AV_ZERO32(&near_mv[0]);
1027
6380
    AV_ZERO32(&near_mv[1]);
1028
6380
    AV_ZERO32(&near_mv[2]);
1029
1030
82940
    for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
1031
76560
        const VP7MVPred * pred = &vp7_mv_pred[i];
1032
        int edge_x, edge_y;
1033
1034
76560
        if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
1035
76560
                                    pred->yoffset, !s->profile, &edge_x, &edge_y)) {
1036
129398
            VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
1037
64699
                                             ? s->macroblocks_base + 1 + edge_x +
1038
64699
                                               (s->mb_width + 1) * (edge_y + 1)
1039
64699
                                             : s->macroblocks + edge_x +
1040
                                               (s->mb_height - edge_y - 1) * 2;
1041
64699
            uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
1042
64699
            if (mv) {
1043
                if (AV_RN32A(&near_mv[CNT_NEAREST])) {
1044
                    if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
1045
                        idx = CNT_NEAREST;
1046
                    } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
1047
                        if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
1048
                            continue;
1049
                        idx = CNT_NEAR;
1050
                    } else {
1051
                        AV_WN32A(&near_mv[CNT_NEAR], mv);
1052
                        idx = CNT_NEAR;
1053
                    }
1054
                } else {
1055
                    AV_WN32A(&near_mv[CNT_NEAREST], mv);
1056
                    idx = CNT_NEAREST;
1057
                }
1058
            } else {
1059
64699
                idx = CNT_ZERO;
1060
            }
1061
        } else {
1062
11861
            idx = CNT_ZERO;
1063
        }
1064
76560
        cnt[idx] += vp7_mv_pred[i].score;
1065
    }
1066
1067
6380
    mb->partitioning = VP8_SPLITMVMODE_NONE;
1068
1069
6380
    if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
1070
        mb->mode = VP8_MVMODE_MV;
1071
1072
        if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
1073
1074
            if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
1075
1076
                if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
1077
                    AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
1078
                else
1079
                    AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR]    ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
1080
1081
                if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
1082
                    mb->mode = VP8_MVMODE_SPLIT;
1083
                    mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
1084
                } else {
1085
                    mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
1086
                    mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
1087
                    mb->bmv[0] = mb->mv;
1088
                }
1089
            } else {
1090
                mb->mv = near_mv[CNT_NEAR];
1091
                mb->bmv[0] = mb->mv;
1092
            }
1093
        } else {
1094
            mb->mv = near_mv[CNT_NEAREST];
1095
            mb->bmv[0] = mb->mv;
1096
        }
1097
    } else {
1098
6380
        mb->mode = VP8_MVMODE_ZERO;
1099
6380
        AV_ZERO32(&mb->mv);
1100
6380
        mb->bmv[0] = mb->mv;
1101
    }
1102
6380
}
1103
1104
static av_always_inline
1105
379497
void vp8_decode_mvs(VP8Context *s, VP8mvbounds *mv_bounds, VP8Macroblock *mb,
1106
                    int mb_x, int mb_y, int layout)
1107
{
1108
379497
    VP8Macroblock *mb_edge[3] = { 0      /* top */,
1109
379497
                                  mb - 1 /* left */,
1110
                                  0      /* top-left */ };
1111
    enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1112
    enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1113
379497
    int idx = CNT_ZERO;
1114
379497
    int cur_sign_bias = s->sign_bias[mb->ref_frame];
1115
379497
    int8_t *sign_bias = s->sign_bias;
1116
    VP56mv near_mv[4];
1117
379497
    uint8_t cnt[4] = { 0 };
1118
379497
    VP56RangeCoder *c = &s->c;
1119
1120
379497
    if (!layout) { // layout is inlined (s->mb_layout is not)
1121
379497
        mb_edge[0] = mb + 2;
1122
379497
        mb_edge[2] = mb + 1;
1123
    } else {
1124
        mb_edge[0] = mb - s->mb_width - 1;
1125
        mb_edge[2] = mb - s->mb_width - 2;
1126
    }
1127
1128
379497
    AV_ZERO32(&near_mv[0]);
1129
379497
    AV_ZERO32(&near_mv[1]);
1130
379497
    AV_ZERO32(&near_mv[2]);
1131
1132
    /* Process MB on top, left and top-left */
1133
#define MV_EDGE_CHECK(n)                                                      \
1134
    {                                                                         \
1135
        VP8Macroblock *edge = mb_edge[n];                                     \
1136
        int edge_ref = edge->ref_frame;                                       \
1137
        if (edge_ref != VP56_FRAME_CURRENT) {                                 \
1138
            uint32_t mv = AV_RN32A(&edge->mv);                                \
1139
            if (mv) {                                                         \
1140
                if (cur_sign_bias != sign_bias[edge_ref]) {                   \
1141
                    /* SWAR negate of the values in mv. */                    \
1142
                    mv = ~mv;                                                 \
1143
                    mv = ((mv & 0x7fff7fff) +                                 \
1144
                          0x00010001) ^ (mv & 0x80008000);                    \
1145
                }                                                             \
1146
                if (!n || mv != AV_RN32A(&near_mv[idx]))                      \
1147
                    AV_WN32A(&near_mv[++idx], mv);                            \
1148
                cnt[idx] += 1 + (n != 2);                                     \
1149
            } else                                                            \
1150
                cnt[CNT_ZERO] += 1 + (n != 2);                                \
1151
        }                                                                     \
1152
    }
1153
1154

379497
    MV_EDGE_CHECK(0)
1155


379497
    MV_EDGE_CHECK(1)
1156


379497
    MV_EDGE_CHECK(2)
1157
1158
379497
    mb->partitioning = VP8_SPLITMVMODE_NONE;
1159
379497
    if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1160
57174
        mb->mode = VP8_MVMODE_MV;
1161
1162
        /* If we have three distinct MVs, merge first and last if they're the same */
1163
57174
        if (cnt[CNT_SPLITMV] &&
1164
12759
            AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1165
5465
            cnt[CNT_NEAREST] += 1;
1166
1167
        /* Swap near and nearest if necessary */
1168
57174
        if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1169
4482
            FFSWAP(uint8_t,     cnt[CNT_NEAREST],     cnt[CNT_NEAR]);
1170
4482
            FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1171
        }
1172
1173
57174
        if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1174
37607
            if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1175
                /* Choose the best mv out of 0,0 and the nearest mv */
1176
32519
                clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1177
32519
                cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode    == VP8_MVMODE_SPLIT) +
1178
32519
                                    (mb_edge[VP8_EDGE_TOP]->mode     == VP8_MVMODE_SPLIT)) * 2 +
1179
32519
                                    (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1180
1181
32519
                if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1182
18325
                    mb->mode = VP8_MVMODE_SPLIT;
1183
18325
                    mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1184
                } else {
1185
14194
                    mb->mv.y  += vp8_read_mv_component(c, s->prob->mvc[0]);
1186
14194
                    mb->mv.x  += vp8_read_mv_component(c, s->prob->mvc[1]);
1187
14194
                    mb->bmv[0] = mb->mv;
1188
                }
1189
            } else {
1190
5088
                clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAR]);
1191
5088
                mb->bmv[0] = mb->mv;
1192
            }
1193
        } else {
1194
19567
            clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAREST]);
1195
19567
            mb->bmv[0] = mb->mv;
1196
        }
1197
    } else {
1198
322323
        mb->mode = VP8_MVMODE_ZERO;
1199
322323
        AV_ZERO32(&mb->mv);
1200
322323
        mb->bmv[0] = mb->mv;
1201
    }
1202
379497
}
1203
1204
static av_always_inline
1205
17675
void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1206
                           int mb_x, int keyframe, int layout)
1207
{
1208
17675
    uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1209
1210
17675
    if (layout) {
1211
33
        VP8Macroblock *mb_top = mb - s->mb_width - 1;
1212
33
        memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1213
    }
1214
17675
    if (keyframe) {
1215
        int x, y;
1216
        uint8_t *top;
1217
10822
        uint8_t *const left = s->intra4x4_pred_mode_left;
1218
10822
        if (layout)
1219
33
            top = mb->intra4x4_pred_mode_top;
1220
        else
1221
10789
            top = s->intra4x4_pred_mode_top + 4 * mb_x;
1222
54110
        for (y = 0; y < 4; y++) {
1223
216440
            for (x = 0; x < 4; x++) {
1224
                const uint8_t *ctx;
1225
173152
                ctx       = vp8_pred4x4_prob_intra[top[x]][left[y]];
1226
173152
                *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1227
173152
                left[y]   = top[x] = *intra4x4;
1228
173152
                intra4x4++;
1229
            }
1230
        }
1231
    } else {
1232
        int i;
1233
116501
        for (i = 0; i < 16; i++)
1234
109648
            intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1235
                                           vp8_pred4x4_prob_inter);
1236
    }
1237
17675
}
1238
1239
static av_always_inline
1240
443209
void decode_mb_mode(VP8Context *s, VP8mvbounds *mv_bounds,
1241
                    VP8Macroblock *mb, int mb_x, int mb_y,
1242
                    uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1243
{
1244
443209
    VP56RangeCoder *c = &s->c;
1245
    static const char * const vp7_feature_name[] = { "q-index",
1246
                                                     "lf-delta",
1247
                                                     "partial-golden-update",
1248
                                                     "blit-pitch" };
1249
443209
    if (is_vp7) {
1250
        int i;
1251
6600
        *segment = 0;
1252
33000
        for (i = 0; i < 4; i++) {
1253
26400
            if (s->feature_enabled[i]) {
1254
                if (vp56_rac_get_prob_branchy(c, s->feature_present_prob[i])) {
1255
                      int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1256
                                                   s->feature_index_prob[i]);
1257
                      av_log(s->avctx, AV_LOG_WARNING,
1258
                             "Feature %s present in macroblock (value 0x%x)\n",
1259
                             vp7_feature_name[i], s->feature_value[i][index]);
1260
                }
1261
           }
1262
        }
1263
436609
    } else if (s->segmentation.update_map) {
1264
8289
        int bit  = vp56_rac_get_prob(c, s->prob->segmentid[0]);
1265
8289
        *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1266
428320
    } else if (s->segmentation.enabled)
1267
94425
        *segment = ref ? *ref : *segment;
1268
443209
    mb->segment = *segment;
1269
1270
443209
    mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1271
1272
443209
    if (s->keyframe) {
1273
32872
        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1274
                                    vp8_pred16x16_prob_intra);
1275
1276
32872
        if (mb->mode == MODE_I4x4) {
1277
10822
            decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1278
        } else {
1279
22050
            const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1280
22050
                                           : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1281
22050
            if (s->mb_layout)
1282
187
                AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1283
            else
1284
21863
                AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1285
22050
            AV_WN32A(s->intra4x4_pred_mode_left, modes);
1286
        }
1287
1288
32872
        mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1289
                                                vp8_pred8x8c_prob_intra);
1290
32872
        mb->ref_frame        = VP56_FRAME_CURRENT;
1291
410337
    } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1292
        // inter MB, 16.2
1293
385877
        if (vp56_rac_get_prob_branchy(c, s->prob->last))
1294
34528
            mb->ref_frame =
1295
17248
                (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1296
                                                                   : VP56_FRAME_GOLDEN;
1297
        else
1298
368597
            mb->ref_frame = VP56_FRAME_PREVIOUS;
1299
385877
        s->ref_count[mb->ref_frame - 1]++;
1300
1301
        // motion vectors, 16.3
1302
385877
        if (is_vp7)
1303
6380
            vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1304
        else
1305
379497
            vp8_decode_mvs(s, mv_bounds, mb, mb_x, mb_y, layout);
1306
    } else {
1307
        // intra MB, 16.1
1308
24460
        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1309
1310
24460
        if (mb->mode == MODE_I4x4)
1311
6853
            decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1312
1313
48920
        mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1314
24460
                                                s->prob->pred8x8c);
1315
24460
        mb->ref_frame        = VP56_FRAME_CURRENT;
1316
24460
        mb->partitioning     = VP8_SPLITMVMODE_NONE;
1317
24460
        AV_ZERO32(&mb->bmv[0]);
1318
    }
1319
443209
}
1320
1321
/**
1322
 * @param r     arithmetic bitstream reader context
1323
 * @param block destination for block coefficients
1324
 * @param probs probabilities to use when reading trees from the bitstream
1325
 * @param i     initial coeff index, 0 unless a separate DC block is coded
1326
 * @param qmul  array holding the dc/ac dequant factor at position 0/1
1327
 *
1328
 * @return 0 if no coeffs were decoded
1329
 *         otherwise, the index of the last coeff decoded plus one
1330
 */
1331
static av_always_inline
1332
393067
int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1333
                                 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1334
                                 int i, uint8_t *token_prob, int16_t qmul[2],
1335
                                 const uint8_t scan[16], int vp7)
1336
{
1337
393067
    VP56RangeCoder c = *r;
1338
393067
    goto skip_eob;
1339
    do {
1340
        int coeff;
1341
1101534
restart:
1342
1103653
        if (!vp56_rac_get_prob_branchy(&c, token_prob[0]))   // DCT_EOB
1343
373414
            break;
1344
1345
730239
skip_eob:
1346
1956826
        if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1347
835639
            if (++i == 16)
1348
                break; // invalid input; blocks should end with EOB
1349
835639
            token_prob = probs[i][0];
1350
835639
            if (vp7)
1351
2119
                goto restart;
1352
833520
            goto skip_eob;
1353
        }
1354
1355
1121187
        if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1356
719461
            coeff = 1;
1357
719461
            token_prob = probs[i + 1][1];
1358
        } else {
1359
401726
            if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1360
274407
                coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1361
274407
                if (coeff)
1362
113420
                    coeff += vp56_rac_get_prob(&c, token_prob[5]);
1363
274407
                coeff += 2;
1364
            } else {
1365
                // DCT_CAT*
1366
127319
                if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1367
83429
                    if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1368
45203
                        coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1369
                    } else {                                    // DCT_CAT2
1370
38226
                        coeff  = 7;
1371
38226
                        coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1372
38226
                        coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1373
                    }
1374
                } else {    // DCT_CAT3 and up
1375
43890
                    int a   = vp56_rac_get_prob(&c, token_prob[8]);
1376
43890
                    int b   = vp56_rac_get_prob(&c, token_prob[9 + a]);
1377
43890
                    int cat = (a << 1) + b;
1378
43890
                    coeff  = 3 + (8 << cat);
1379
43890
                    coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1380
                }
1381
            }
1382
401726
            token_prob = probs[i + 1][2];
1383
        }
1384

1121187
        block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1385
1121187
    } while (++i < 16);
1386
1387
393067
    *r = c;
1388
393067
    return i;
1389
}
1390
1391
static av_always_inline
1392
6380
int inter_predict_dc(int16_t block[16], int16_t pred[2])
1393
{
1394
6380
    int16_t dc = block[0];
1395
6380
    int ret = 0;
1396
1397
6380
    if (pred[1] > 3) {
1398
        dc += pred[0];
1399
        ret = 1;
1400
    }
1401
1402
6380
    if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1403
6380
        block[0] = pred[0] = dc;
1404
6380
        pred[1] = 0;
1405
    } else {
1406
        if (pred[0] == dc)
1407
            pred[1]++;
1408
        block[0] = pred[0] = dc;
1409
    }
1410
1411
6380
    return ret;
1412
}
1413
1414
734
static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1415
                                            int16_t block[16],
1416
                                            uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1417
                                            int i, uint8_t *token_prob,
1418
                                            int16_t qmul[2],
1419
                                            const uint8_t scan[16])
1420
{
1421
734
    return decode_block_coeffs_internal(r, block, probs, i,
1422
                                        token_prob, qmul, scan, IS_VP7);
1423
}
1424
1425
#ifndef vp8_decode_block_coeffs_internal
1426
392333
static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1427
                                            int16_t block[16],
1428
                                            uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1429
                                            int i, uint8_t *token_prob,
1430
                                            int16_t qmul[2])
1431
{
1432
392333
    return decode_block_coeffs_internal(r, block, probs, i,
1433
                                        token_prob, qmul, ff_zigzag_scan, IS_VP8);
1434
}
1435
#endif
1436
1437
/**
1438
 * @param c          arithmetic bitstream reader context
1439
 * @param block      destination for block coefficients
1440
 * @param probs      probabilities to use when reading trees from the bitstream
1441
 * @param i          initial coeff index, 0 unless a separate DC block is coded
1442
 * @param zero_nhood the initial prediction context for number of surrounding
1443
 *                   all-zero blocks (only left/top, so 0-2)
1444
 * @param qmul       array holding the dc/ac dequant factor at position 0/1
1445
 * @param scan       scan pattern (VP7 only)
1446
 *
1447
 * @return 0 if no coeffs were decoded
1448
 *         otherwise, the index of the last coeff decoded plus one
1449
 */
1450
static av_always_inline
1451
2050151
int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1452
                        uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1453
                        int i, int zero_nhood, int16_t qmul[2],
1454
                        const uint8_t scan[16], int vp7)
1455
{
1456
2050151
    uint8_t *token_prob = probs[i][zero_nhood];
1457
2050151
    if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
1458
1657084
        return 0;
1459
734
    return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1460
                                                  token_prob, qmul, scan)
1461
393801
               : vp8_decode_block_coeffs_internal(c, block, probs, i,
1462
                                                  token_prob, qmul);
1463
}
1464
1465
static av_always_inline
1466
83297
void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1467
                      VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1468
                      int is_vp7)
1469
{
1470
83297
    int i, x, y, luma_start = 0, luma_ctx = 3;
1471
83297
    int nnz_pred, nnz, nnz_total = 0;
1472
83297
    int segment = mb->segment;
1473
83297
    int block_dc = 0;
1474
1475

83297
    if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1476
51023
        nnz_pred = t_nnz[8] + l_nnz[8];
1477
1478
        // decode DC values and do hadamard
1479
51023
        nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1480
51023
                                  nnz_pred, s->qmat[segment].luma_dc_qmul,
1481
                                  ff_zigzag_scan, is_vp7);
1482
51023
        l_nnz[8] = t_nnz[8] = !!nnz;
1483
1484

51023
        if (is_vp7 && mb->mode > MODE_I4x4) {
1485
6380
            nnz |=  inter_predict_dc(td->block_dc,
1486
6380
                                     s->inter_dc_pred[mb->ref_frame - 1]);
1487
        }
1488
1489
51023
        if (nnz) {
1490
34741
            nnz_total += nnz;
1491
34741
            block_dc   = 1;
1492
34741
            if (nnz == 1)
1493
10303
                s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1494
            else
1495
24438
                s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1496
        }
1497
51023
        luma_start = 1;
1498
51023
        luma_ctx   = 0;
1499
    }
1500
1501
    // luma blocks
1502
416485
    for (y = 0; y < 4; y++)
1503
1665940
        for (x = 0; x < 4; x++) {
1504
1332752
            nnz_pred = l_nnz[y] + t_nnz[x];
1505
1332752
            nnz = decode_block_coeffs(c, td->block[y][x],
1506
1332752
                                      s->prob->token[luma_ctx],
1507
                                      luma_start, nnz_pred,
1508
1332752
                                      s->qmat[segment].luma_qmul,
1509
1332752
                                      s->prob[0].scan, is_vp7);
1510
            /* nnz+block_dc may be one more than the actual last index,
1511
             * but we don't care */
1512
1332752
            td->non_zero_count_cache[y][x] = nnz + block_dc;
1513
1332752
            t_nnz[x] = l_nnz[y] = !!nnz;
1514
1332752
            nnz_total += nnz;
1515
        }
1516
1517
    // chroma blocks
1518
    // TODO: what to do about dimensions? 2nd dim for luma is x,
1519
    // but for chroma it's (y<<1)|x
1520
249891
    for (i = 4; i < 6; i++)
1521
499782
        for (y = 0; y < 2; y++)
1522
999564
            for (x = 0; x < 2; x++) {
1523
666376
                nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1524
666376
                nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1525
666376
                                          s->prob->token[2], 0, nnz_pred,
1526
666376
                                          s->qmat[segment].chroma_qmul,
1527
666376
                                          s->prob[0].scan, is_vp7);
1528
666376
                td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1529
666376
                t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1530
666376
                nnz_total += nnz;
1531
            }
1532
1533
    // if there were no coded coeffs despite the macroblock not being marked skip,
1534
    // we MUST not do the inner loop filter and should not do IDCT
1535
    // Since skip isn't used for bitstream prediction, just manually set it.
1536
83297
    if (!nnz_total)
1537
6457
        mb->skip = 1;
1538
83297
}
1539
1540
static av_always_inline
1541
424229
void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1542
                      uint8_t *src_cb, uint8_t *src_cr,
1543
                      ptrdiff_t linesize, ptrdiff_t uvlinesize, int simple)
1544
{
1545
424229
    AV_COPY128(top_border, src_y + 15 * linesize);
1546
424229
    if (!simple) {
1547
416507
        AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1548
416507
        AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1549
    }
1550
424229
}
1551
1552
static av_always_inline
1553
84130
void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1554
                    uint8_t *src_cr, ptrdiff_t linesize, ptrdiff_t uvlinesize, int mb_x,
1555
                    int mb_y, int mb_width, int simple, int xchg)
1556
{
1557
84130
    uint8_t *top_border_m1 = top_border - 32;     // for TL prediction
1558
84130
    src_y  -= linesize;
1559
84130
    src_cb -= uvlinesize;
1560
84130
    src_cr -= uvlinesize;
1561
1562
#define XCHG(a, b, xchg)                                                      \
1563
    do {                                                                      \
1564
        if (xchg)                                                             \
1565
            AV_SWAP64(b, a);                                                  \
1566
        else                                                                  \
1567
            AV_COPY64(b, a);                                                  \
1568
    } while (0)
1569
1570
84130
    XCHG(top_border_m1 + 8, src_y - 8, xchg);
1571
84130
    XCHG(top_border, src_y, xchg);
1572
84130
    XCHG(top_border + 8, src_y + 8, 1);
1573
84130
    if (mb_x < mb_width - 1)
1574
80706
        XCHG(top_border + 32, src_y + 16, 1);
1575
1576
    // only copy chroma for normal loop filter
1577
    // or to initialize the top row to 127
1578

84130
    if (!simple || !mb_y) {
1579
81048
        XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1580
81048
        XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1581
81048
        XCHG(top_border + 16, src_cb, 1);
1582
81048
        XCHG(top_border + 24, src_cr, 1);
1583
    }
1584
84130
}
1585
1586
static av_always_inline
1587
72888
int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1588
{
1589
72888
    if (!mb_x)
1590
3029
        return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1591
    else
1592
69859
        return mb_y ? mode : LEFT_DC_PRED8x8;
1593
}
1594
1595
static av_always_inline
1596
1809
int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1597
{
1598
1809
    if (!mb_x)
1599

1
        return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1600
    else
1601
1808
        return mb_y ? mode : HOR_PRED8x8;
1602
}
1603
1604
static av_always_inline
1605
96989
int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1606
{
1607

96989
    switch (mode) {
1608
72888
    case DC_PRED8x8:
1609
72888
        return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1610
5757
    case VERT_PRED8x8:
1611

5757
        return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1612
16535
    case HOR_PRED8x8:
1613

16535
        return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1614
1809
    case PLANE_PRED8x8: /* TM */
1615
1809
        return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1616
    }
1617
    return mode;
1618
}
1619
1620
static av_always_inline
1621
49009
int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1622
{
1623
49009
    if (!mb_x) {
1624

1362
        return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1625
    } else {
1626
47647
        return mb_y ? mode : HOR_VP8_PRED;
1627
    }
1628
}
1629
1630
static av_always_inline
1631
282800
int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1632
                                     int *copy_buf, int vp7)
1633
{
1634

282800
    switch (mode) {
1635
13608
    case VERT_PRED:
1636

13608
        if (!mb_x && mb_y) {
1637
95
            *copy_buf = 1;
1638
95
            return mode;
1639
        }
1640
        /* fall-through */
1641
    case DIAG_DOWN_LEFT_PRED:
1642
    case VERT_LEFT_PRED:
1643

34820
        return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1644
19464
    case HOR_PRED:
1645
19464
        if (!mb_y) {
1646
128
            *copy_buf = 1;
1647
128
            return mode;
1648
        }
1649
        /* fall-through */
1650
    case HOR_UP_PRED:
1651

31686
        return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1652
49009
    case TM_VP8_PRED:
1653
49009
        return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1654
167062
    case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1655
                   * as 16x16/8x8 DC */
1656
    case DIAG_DOWN_RIGHT_PRED:
1657
    case VERT_RIGHT_PRED:
1658
    case HOR_DOWN_PRED:
1659

167062
        if (!mb_y || !mb_x)
1660
1140
            *copy_buf = 1;
1661
167062
        return mode;
1662
    }
1663
    return mode;
1664
}
1665
1666
static av_always_inline
1667
57332
void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1668
                   VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1669
{
1670
    int x, y, mode, nnz;
1671
    uint32_t tr;
1672
1673
    /* for the first row, we need to run xchg_mb_border to init the top edge
1674
     * to 127 otherwise, skip it if we aren't going to deblock */
1675


57332
    if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1676
42065
        xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1677
42065
                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1678
42065
                       s->filter.simple, 1);
1679
1680
57332
    if (mb->mode < MODE_I4x4) {
1681
39657
        mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1682
39657
        s->hpc.pred16x16[mode](dst[0], s->linesize);
1683
    } else {
1684
17675
        uint8_t *ptr = dst[0];
1685
17675
        uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1686
17675
        const uint8_t lo = is_vp7 ? 128 : 127;
1687
17675
        const uint8_t hi = is_vp7 ? 128 : 129;
1688
17675
        uint8_t tr_top[4] = { lo, lo, lo, lo };
1689
1690
        // all blocks on the right edge of the macroblock use bottom edge
1691
        // the top macroblock for their topright edge
1692
17675
        uint8_t *tr_right = ptr - s->linesize + 16;
1693
1694
        // if we're on the right edge of the frame, said edge is extended
1695
        // from the top macroblock
1696

17675
        if (mb_y && mb_x == s->mb_width - 1) {
1697
635
            tr       = tr_right[-1] * 0x01010101u;
1698
635
            tr_right = (uint8_t *) &tr;
1699
        }
1700
1701
17675
        if (mb->skip)
1702
460
            AV_ZERO128(td->non_zero_count_cache);
1703
1704
88375
        for (y = 0; y < 4; y++) {
1705
70700
            uint8_t *topright = ptr + 4 - s->linesize;
1706
353500
            for (x = 0; x < 4; x++) {
1707
282800
                int copy = 0;
1708
282800
                ptrdiff_t linesize = s->linesize;
1709
282800
                uint8_t *dst = ptr + 4 * x;
1710
282800
                LOCAL_ALIGNED(4, uint8_t, copy_dst, [5 * 8]);
1711
1712

282800
                if ((y == 0 || x == 3) && mb_y == 0) {
1713
4417
                    topright = tr_top;
1714
278383
                } else if (x == 3)
1715
68176
                    topright = tr_right;
1716
1717
282800
                mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1718
                                                        mb_y + y, &copy, is_vp7);
1719
282800
                if (copy) {
1720
1363
                    dst      = copy_dst + 12;
1721
1363
                    linesize = 8;
1722
1363
                    if (!(mb_y + y)) {
1723
702
                        copy_dst[3] = lo;
1724
702
                        AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1725
                    } else {
1726
661
                        AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1727
661
                        if (!(mb_x + x)) {
1728
661
                            copy_dst[3] = hi;
1729
                        } else {
1730
                            copy_dst[3] = ptr[4 * x - s->linesize - 1];
1731
                        }
1732
                    }
1733
1363
                    if (!(mb_x + x)) {
1734
702
                        copy_dst[11] =
1735
702
                        copy_dst[19] =
1736
702
                        copy_dst[27] =
1737
702
                        copy_dst[35] = hi;
1738
                    } else {
1739
661
                        copy_dst[11] = ptr[4 * x                   - 1];
1740
661
                        copy_dst[19] = ptr[4 * x + s->linesize     - 1];
1741
661
                        copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1742
661
                        copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1743
                    }
1744
                }
1745
282800
                s->hpc.pred4x4[mode](dst, topright, linesize);
1746
282800
                if (copy) {
1747
1363
                    AV_COPY32(ptr + 4 * x,                   copy_dst + 12);
1748
1363
                    AV_COPY32(ptr + 4 * x + s->linesize,     copy_dst + 20);
1749
1363
                    AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1750
1363
                    AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1751
                }
1752
1753
282800
                nnz = td->non_zero_count_cache[y][x];
1754
282800
                if (nnz) {
1755
122340
                    if (nnz == 1)
1756
35131
                        s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1757
35131
                                                  td->block[y][x], s->linesize);
1758
                    else
1759
87209
                        s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1760
87209
                                               td->block[y][x], s->linesize);
1761
                }
1762
282800
                topright += 4;
1763
            }
1764
1765
70700
            ptr      += 4 * s->linesize;
1766
70700
            intra4x4 += 4;
1767
        }
1768
    }
1769
1770
57332
    mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1771
                                            mb_x, mb_y, is_vp7);
1772
57332
    s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1773
57332
    s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1774
1775


57332
    if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1776
42065
        xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1777
42065
                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1778
42065
                       s->filter.simple, 0);
1779
57332
}
1780
1781
static const uint8_t subpel_idx[3][8] = {
1782
    { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1783
                                // also function pointer index
1784
    { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1785
    { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1786
};
1787
1788
/**
1789
 * luma MC function
1790
 *
1791
 * @param s        VP8 decoding context
1792
 * @param dst      target buffer for block data at block position
1793
 * @param ref      reference picture buffer at origin (0, 0)
1794
 * @param mv       motion vector (relative to block position) to get pixel data from
1795
 * @param x_off    horizontal position of block from origin (0, 0)
1796
 * @param y_off    vertical position of block from origin (0, 0)
1797
 * @param block_w  width of block (16, 8 or 4)
1798
 * @param block_h  height of block (always same as block_w)
1799
 * @param width    width of src/dst plane data
1800
 * @param height   height of src/dst plane data
1801
 * @param linesize size of a single line of plane data, including padding
1802
 * @param mc_func  motion compensation function pointers (bilinear or sixtap MC)
1803
 */
1804
static av_always_inline
1805
504108
void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1806
                 ThreadFrame *ref, const VP56mv *mv,
1807
                 int x_off, int y_off, int block_w, int block_h,
1808
                 int width, int height, ptrdiff_t linesize,
1809
                 vp8_mc_func mc_func[3][3])
1810
{
1811
504108
    uint8_t *src = ref->f->data[0];
1812
1813
504108
    if (AV_RN32A(mv)) {
1814
124886
        ptrdiff_t src_linesize = linesize;
1815
1816
124886
        int mx = (mv->x * 2) & 7, mx_idx = subpel_idx[0][mx];
1817
124886
        int my = (mv->y * 2) & 7, my_idx = subpel_idx[0][my];
1818
1819
124886
        x_off += mv->x >> 2;
1820
124886
        y_off += mv->y >> 2;
1821
1822
        // edge emulation
1823
124886
        ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1824
124886
        src += y_off * linesize + x_off;
1825

124886
        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
1826
118693
            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1827
9236
            s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1828
9236
                                     src - my_idx * linesize - mx_idx,
1829
                                     EDGE_EMU_LINESIZE, linesize,
1830
9236
                                     block_w + subpel_idx[1][mx],
1831
9236
                                     block_h + subpel_idx[1][my],
1832
                                     x_off - mx_idx, y_off - my_idx,
1833
                                     width, height);
1834
9236
            src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1835
9236
            src_linesize = EDGE_EMU_LINESIZE;
1836
        }
1837
124886
        mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1838
    } else {
1839
379222
        ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1840
379222
        mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1841
                      linesize, block_h, 0, 0);
1842
    }
1843
504108
}
1844
1845
/**
1846
 * chroma MC function
1847
 *
1848
 * @param s        VP8 decoding context
1849
 * @param dst1     target buffer for block data at block position (U plane)
1850
 * @param dst2     target buffer for block data at block position (V plane)
1851
 * @param ref      reference picture buffer at origin (0, 0)
1852
 * @param mv       motion vector (relative to block position) to get pixel data from
1853
 * @param x_off    horizontal position of block from origin (0, 0)
1854
 * @param y_off    vertical position of block from origin (0, 0)
1855
 * @param block_w  width of block (16, 8 or 4)
1856
 * @param block_h  height of block (always same as block_w)
1857
 * @param width    width of src/dst plane data
1858
 * @param height   height of src/dst plane data
1859
 * @param linesize size of a single line of plane data, including padding
1860
 * @param mc_func  motion compensation function pointers (bilinear or sixtap MC)
1861
 */
1862
static av_always_inline
1863
424764
void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1864
                   uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1865
                   int x_off, int y_off, int block_w, int block_h,
1866
                   int width, int height, ptrdiff_t linesize,
1867
                   vp8_mc_func mc_func[3][3])
1868
{
1869
424764
    uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1870
1871
424764
    if (AV_RN32A(mv)) {
1872
79185
        int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1873
79185
        int my = mv->y & 7, my_idx = subpel_idx[0][my];
1874
1875
79185
        x_off += mv->x >> 3;
1876
79185
        y_off += mv->y >> 3;
1877
1878
        // edge emulation
1879
79185
        src1 += y_off * linesize + x_off;
1880
79185
        src2 += y_off * linesize + x_off;
1881
79185
        ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1882

79185
        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
1883
73045
            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1884
9157
            s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1885
9157
                                     src1 - my_idx * linesize - mx_idx,
1886
                                     EDGE_EMU_LINESIZE, linesize,
1887
9157
                                     block_w + subpel_idx[1][mx],
1888
9157
                                     block_h + subpel_idx[1][my],
1889
                                     x_off - mx_idx, y_off - my_idx, width, height);
1890
9157
            src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1891
9157
            mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1892
1893
9157
            s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1894
9157
                                     src2 - my_idx * linesize - mx_idx,
1895
                                     EDGE_EMU_LINESIZE, linesize,
1896
9157
                                     block_w + subpel_idx[1][mx],
1897
9157
                                     block_h + subpel_idx[1][my],
1898
                                     x_off - mx_idx, y_off - my_idx, width, height);
1899
9157
            src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1900
9157
            mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1901
        } else {
1902
70028
            mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1903
70028
            mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1904
        }
1905
    } else {
1906
345579
        ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1907
345579
        mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1908
345579
        mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1909
    }
1910
424764
}
1911
1912
static av_always_inline
1913
398316
void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1914
                 ThreadFrame *ref_frame, int x_off, int y_off,
1915
                 int bx_off, int by_off, int block_w, int block_h,
1916
                 int width, int height, VP56mv *mv)
1917
{
1918
398316
    VP56mv uvmv = *mv;
1919
1920
    /* Y */
1921
398316
    vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1922
                ref_frame, mv, x_off + bx_off, y_off + by_off,
1923
                block_w, block_h, width, height, s->linesize,
1924
398316
                s->put_pixels_tab[block_w == 8]);
1925
1926
    /* U/V */
1927
398316
    if (s->profile == 3) {
1928
        /* this block only applies VP8; it is safe to check
1929
         * only the profile, as VP7 profile <= 1 */
1930
4775
        uvmv.x &= ~7;
1931
4775
        uvmv.y &= ~7;
1932
    }
1933
398316
    x_off   >>= 1;
1934
398316
    y_off   >>= 1;
1935
398316
    bx_off  >>= 1;
1936
398316
    by_off  >>= 1;
1937
398316
    width   >>= 1;
1938
398316
    height  >>= 1;
1939
398316
    block_w >>= 1;
1940
398316
    block_h >>= 1;
1941
398316
    vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1942
398316
                  dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1943
                  &uvmv, x_off + bx_off, y_off + by_off,
1944
                  block_w, block_h, width, height, s->uvlinesize,
1945
398316
                  s->put_pixels_tab[1 + (block_w == 4)]);
1946
398316
}
1947
1948
/* Fetch pixels for estimated mv 4 macroblocks ahead.
1949
 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1950
static av_always_inline
1951
1329627
void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1952
                     int mb_xy, int ref)
1953
{
1954
    /* Don't prefetch refs that haven't been used very often this frame. */
1955
1329627
    if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1956
478745
        int x_off = mb_x << 4, y_off = mb_y << 4;
1957
478745
        int mx = (mb->mv.x >> 2) + x_off + 8;
1958
478745
        int my = (mb->mv.y >> 2) + y_off;
1959
478745
        uint8_t **src = s->framep[ref]->tf.f->data;
1960
478745
        int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1961
        /* For threading, a ff_thread_await_progress here might be useful, but
1962
         * it actually slows down the decoder. Since a bad prefetch doesn't
1963
         * generate bad decoder output, we don't run it here. */
1964
478745
        s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1965
478745
        off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1966
478745
        s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1967
    }
1968
1329627
}
1969
1970
/**
1971
 * Apply motion vectors to prediction buffer, chapter 18.
1972
 */
1973
static av_always_inline
1974
385877
void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1975
                   VP8Macroblock *mb, int mb_x, int mb_y)
1976
{
1977
385877
    int x_off = mb_x << 4, y_off = mb_y << 4;
1978
385877
    int width = 16 * s->mb_width, height = 16 * s->mb_height;
1979
385877
    ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1980
385877
    VP56mv *bmv = mb->bmv;
1981
1982

385877
    switch (mb->partitioning) {
1983
367552
    case VP8_SPLITMVMODE_NONE:
1984
367552
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1985
                    0, 0, 16, 16, width, height, &mb->mv);
1986
367552
        break;
1987
6612
    case VP8_SPLITMVMODE_4x4: {
1988
        int x, y;
1989
        VP56mv uvmv;
1990
1991
        /* Y */
1992
33060
        for (y = 0; y < 4; y++) {
1993
132240
            for (x = 0; x < 4; x++) {
1994
105792
                vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1995
105792
                            ref, &bmv[4 * y + x],
1996
105792
                            4 * x + x_off, 4 * y + y_off, 4, 4,
1997
                            width, height, s->linesize,
1998
105792
                            s->put_pixels_tab[2]);
1999
            }
2000
        }
2001
2002
        /* U/V */
2003
6612
        x_off  >>= 1;
2004
6612
        y_off  >>= 1;
2005
6612
        width  >>= 1;
2006
6612
        height >>= 1;
2007
19836
        for (y = 0; y < 2; y++) {
2008
39672
            for (x = 0; x < 2; x++) {
2009
26448
                uvmv.x = mb->bmv[2 * y       * 4 + 2 * x    ].x +
2010
26448
                         mb->bmv[2 * y       * 4 + 2 * x + 1].x +
2011
26448
                         mb->bmv[(2 * y + 1) * 4 + 2 * x    ].x +
2012
26448
                         mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
2013
26448
                uvmv.y = mb->bmv[2 * y       * 4 + 2 * x    ].y +
2014
26448
                         mb->bmv[2 * y       * 4 + 2 * x + 1].y +
2015
26448
                         mb->bmv[(2 * y + 1) * 4 + 2 * x    ].y +
2016
26448
                         mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
2017
26448
                uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
2018
26448
                uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
2019
26448
                if (s->profile == 3) {
2020
492
                    uvmv.x &= ~7;
2021
492
                    uvmv.y &= ~7;
2022
                }
2023
26448
                vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
2024
26448
                              dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
2025
26448
                              &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
2026
                              width, height, s->uvlinesize,
2027
26448
                              s->put_pixels_tab[2]);
2028
            }
2029
        }
2030
6612
        break;
2031
    }
2032
4909
    case VP8_SPLITMVMODE_16x8:
2033
4909
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
2034
                    0, 0, 16, 8, width, height, &bmv[0]);
2035
4909
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
2036
                    0, 8, 16, 8, width, height, &bmv[1]);
2037
4909
        break;
2038
3135
    case VP8_SPLITMVMODE_8x16:
2039
3135
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
2040
                    0, 0, 8, 16, width, height, &bmv[0]);
2041
3135
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
2042
                    8, 0, 8, 16, width, height, &bmv[1]);
2043
3135
        break;
2044
3669
    case VP8_SPLITMVMODE_8x8:
2045
3669
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
2046
                    0, 0, 8, 8, width, height, &bmv[0]);
2047
3669
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
2048
                    8, 0, 8, 8, width, height, &bmv[1]);
2049
3669
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
2050
                    0, 8, 8, 8, width, height, &bmv[2]);
2051
3669
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
2052
                    8, 8, 8, 8, width, height, &bmv[3]);
2053
3669
        break;
2054
    }
2055
385877
}
2056
2057
static av_always_inline
2058
76840
void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
2059
{
2060
    int x, y, ch;
2061
2062
76840
    if (mb->mode != MODE_I4x4) {
2063
59625
        uint8_t *y_dst = dst[0];
2064
298125
        for (y = 0; y < 4; y++) {
2065
238500
            uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
2066
238500
            if (nnz4) {
2067
175833
                if (nnz4 & ~0x01010101) {
2068
155935
                    for (x = 0; x < 4; x++) {
2069
155935
                        if ((uint8_t) nnz4 == 1)
2070
54051
                            s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
2071
54051
                                                      td->block[y][x],
2072
                                                      s->linesize);
2073
101884
                        else if ((uint8_t) nnz4 > 1)
2074
74428
                            s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
2075
74428
                                                   td->block[y][x],
2076
                                                   s->linesize);
2077
155935
                        nnz4 >>= 8;
2078
155935
                        if (!nnz4)
2079
44887
                            break;
2080
                    }
2081
                } else {
2082
130946
                    s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
2083
                }
2084
            }
2085
238500
            y_dst += 4 * s->linesize;
2086
        }
2087
    }
2088
2089
230520
    for (ch = 0; ch < 2; ch++) {
2090
153680
        uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
2091
153680
        if (nnz4) {
2092
60025
            uint8_t *ch_dst = dst[1 + ch];
2093
60025
            if (nnz4 & ~0x01010101) {
2094
55530
                for (y = 0; y < 2; y++) {
2095
131913
                    for (x = 0; x < 2; x++) {
2096
106337
                        if ((uint8_t) nnz4 == 1)
2097
16054
                            s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
2098
16054
                                                      td->block[4 + ch][(y << 1) + x],
2099
                                                      s->uvlinesize);
2100
90283
                        else if ((uint8_t) nnz4 > 1)
2101
66922
                            s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2102
66922
                                                   td->block[4 + ch][(y << 1) + x],
2103
                                                   s->uvlinesize);
2104
106337
                        nnz4 >>= 8;
2105
106337
                        if (!nnz4)
2106
29954
                            goto chroma_idct_end;
2107
                    }
2108
25576
                    ch_dst += 4 * s->uvlinesize;
2109
                }
2110
            } else {
2111
30071
                s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2112
            }
2113
        }
2114
153680
chroma_idct_end:
2115
        ;
2116
    }
2117
76840
}
2118
2119
static av_always_inline
2120
424229
void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
2121
                         VP8FilterStrength *f, int is_vp7)
2122
{
2123
    int interior_limit, filter_level;
2124
2125
424229
    if (s->segmentation.enabled) {
2126
102219
        filter_level = s->segmentation.filter_level[mb->segment];
2127
102219
        if (!s->segmentation.absolute_vals)
2128
98895
            filter_level += s->filter.level;
2129
    } else
2130
322010
        filter_level = s->filter.level;
2131
2132
424229
    if (s->lf_delta.enabled) {
2133
416875
        filter_level += s->lf_delta.ref[mb->ref_frame];
2134
416875
        filter_level += s->lf_delta.mode[mb->mode];
2135
    }
2136
2137
424229
    filter_level = av_clip_uintp2(filter_level, 6);
2138
2139
424229
    interior_limit = filter_level;
2140
424229
    if (s->filter.sharpness) {
2141
2772
        interior_limit >>= (s->filter.sharpness + 3) >> 2;
2142
2772
        interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2143
    }
2144
424229
    interior_limit = FFMAX(interior_limit, 1);
2145
2146
424229
    f->filter_level = filter_level;
2147
424229
    f->inner_limit = interior_limit;
2148

777260
    f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2149
353031
                      mb->mode == VP8_MVMODE_SPLIT;
2150
424229
}
2151
2152
static av_always_inline
2153
416507
void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2154
               int mb_x, int mb_y, int is_vp7)
2155
{
2156
    int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2157
416507
    int filter_level = f->filter_level;
2158
416507
    int inner_limit = f->inner_limit;
2159
416507
    int inner_filter = f->inner_filter;
2160
416507
    ptrdiff_t linesize   = s->linesize;
2161
416507
    ptrdiff_t uvlinesize = s->uvlinesize;
2162
    static const uint8_t hev_thresh_lut[2][64] = {
2163
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2164
          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2165
          3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2166
          3, 3, 3, 3 },
2167
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2168
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2169
          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2170
          2, 2, 2, 2 }
2171
    };
2172
2173
416507
    if (!filter_level)
2174
5858
        return;
2175
2176
410649
    if (is_vp7) {
2177
6600
        bedge_lim_y  = filter_level;
2178
6600
        bedge_lim_uv = filter_level * 2;
2179
6600
        mbedge_lim   = filter_level + 2;
2180
    } else {
2181
404049
        bedge_lim_y  =
2182
404049
        bedge_lim_uv = filter_level * 2 + inner_limit;
2183
404049
        mbedge_lim   = bedge_lim_y + 4;
2184
    }
2185
2186
410649
    hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2187
2188
410649
    if (mb_x) {
2189
396544
        s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2190
                                       mbedge_lim, inner_limit, hev_thresh);
2191
396544
        s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2192
                                       mbedge_lim, inner_limit, hev_thresh);
2193
    }
2194
2195
#define H_LOOP_FILTER_16Y_INNER(cond)                                         \
2196
    if (cond && inner_filter) {                                               \
2197
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] +  4, linesize,           \
2198
                                             bedge_lim_y, inner_limit,        \
2199
                                             hev_thresh);                     \
2200
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] +  8, linesize,           \
2201
                                             bedge_lim_y, inner_limit,        \
2202
                                             hev_thresh);                     \
2203
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize,           \
2204
                                             bedge_lim_y, inner_limit,        \
2205
                                             hev_thresh);                     \
2206
        s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] +  4, dst[2] + 4,         \
2207
                                             uvlinesize,  bedge_lim_uv,       \
2208
                                             inner_limit, hev_thresh);        \
2209
    }
2210
2211

410649
    H_LOOP_FILTER_16Y_INNER(!is_vp7)
2212
2213
410649
    if (mb_y) {
2214
389635
        s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2215
                                       mbedge_lim, inner_limit, hev_thresh);
2216
389635
        s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2217
                                       mbedge_lim, inner_limit, hev_thresh);
2218
    }
2219
2220
410649
    if (inner_filter) {
2221
69204
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] +  4 * linesize,
2222
                                             linesize, bedge_lim_y,
2223
                                             inner_limit, hev_thresh);
2224
69204
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] +  8 * linesize,
2225
                                             linesize, bedge_lim_y,
2226
                                             inner_limit, hev_thresh);
2227
69204
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2228
                                             linesize, bedge_lim_y,
2229
                                             inner_limit, hev_thresh);
2230
69204
        s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] +  4 * uvlinesize,
2231
69204
                                             dst[2] +  4 * uvlinesize,
2232
                                             uvlinesize, bedge_lim_uv,
2233
                                             inner_limit, hev_thresh);
2234
    }
2235
2236

410649
    H_LOOP_FILTER_16Y_INNER(is_vp7)
2237
}
2238
2239
static av_always_inline
2240
7722
void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2241
                      int mb_x, int mb_y)
2242
{
2243
    int mbedge_lim, bedge_lim;
2244
7722
    int filter_level = f->filter_level;
2245
7722
    int inner_limit  = f->inner_limit;
2246
7722
    int inner_filter = f->inner_filter;
2247
7722
    ptrdiff_t linesize = s->linesize;
2248
2249
7722
    if (!filter_level)
2250
332
        return;
2251
2252
7390
    bedge_lim  = 2 * filter_level + inner_limit;
2253
7390
    mbedge_lim = bedge_lim + 4;
2254
2255
7390
    if (mb_x)
2256
6713
        s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2257
7390
    if (inner_filter) {
2258
3523
        s->vp8dsp.vp8_h_loop_filter_simple(dst +  4, linesize, bedge_lim);
2259
3523
        s->vp8dsp.vp8_h_loop_filter_simple(dst +  8, linesize, bedge_lim);
2260
3523
        s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2261
    }
2262
2263
7390
    if (mb_y)
2264
6609
        s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2265
7390
    if (inner_filter) {
2266
3523
        s->vp8dsp.vp8_v_loop_filter_simple(dst +  4 * linesize, linesize, bedge_lim);
2267
3523
        s->vp8dsp.vp8_v_loop_filter_simple(dst +  8 * linesize, linesize, bedge_lim);
2268
3523
        s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2269
    }
2270
}
2271
2272
#define MARGIN (16 << 2)
2273
static av_always_inline
2274
30
int vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2275
                                    VP8Frame *prev_frame, int is_vp7)
2276
{
2277
30
    VP8Context *s = avctx->priv_data;
2278
    int mb_x, mb_y;
2279
2280
30
    s->mv_bounds.mv_min.y = -MARGIN;
2281
30
    s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2282
360
    for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2283
330
        VP8Macroblock *mb = s->macroblocks_base +
2284
330
                            ((s->mb_width + 1) * (mb_y + 1) + 1);
2285
330
        int mb_xy = mb_y * s->mb_width;
2286
2287
330
        AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2288
2289
330
        s->mv_bounds.mv_min.x = -MARGIN;
2290
330
        s->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2291
2292
330
        if (vpX_rac_is_end(&s->c)) {
2293
            return AVERROR_INVALIDDATA;
2294
        }
2295
6930
        for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2296
6600
            if (mb_y == 0)
2297
600
                AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2298
                         DC_PRED * 0x01010101);
2299
12980
            decode_mb_mode(s, &s->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2300
6380
                           prev_frame && prev_frame->seg_map ?
2301
6380
                           prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2302
6600
            s->mv_bounds.mv_min.x -= 64;
2303
6600
            s->mv_bounds.mv_max.x -= 64;
2304
        }
2305
330
        s->mv_bounds.mv_min.y -= 64;
2306
330
        s->mv_bounds.mv_max.y -= 64;
2307
    }
2308
30
    return 0;
2309
}
2310
2311
30
static int vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2312
                                   VP8Frame *prev_frame)
2313
{
2314
30
    return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2315
}
2316
2317
static int vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2318
                                   VP8Frame *prev_frame)
2319
{
2320
    return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2321
}
2322
2323
#if HAVE_THREADS
2324
#define check_thread_pos(td, otd, mb_x_check, mb_y_check)                     \
2325
    do {                                                                      \
2326
        int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);                 \
2327
        if (atomic_load(&otd->thread_mb_pos) < tmp) {                         \
2328
            pthread_mutex_lock(&otd->lock);                                   \
2329
            atomic_store(&td->wait_mb_pos, tmp);                              \
2330
            do {                                                              \
2331
                if (atomic_load(&otd->thread_mb_pos) >= tmp)                  \
2332
                    break;                                                    \
2333
                pthread_cond_wait(&otd->cond, &otd->lock);                    \
2334
            } while (1);                                                      \
2335
            atomic_store(&td->wait_mb_pos, INT_MAX);                          \
2336
            pthread_mutex_unlock(&otd->lock);                                 \
2337
        }                                                                     \
2338
    } while (0)
2339
2340
#define update_pos(td, mb_y, mb_x)                                            \
2341
    do {                                                                      \
2342
        int pos              = (mb_y << 16) | (mb_x & 0xFFFF);                \
2343
        int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2344
                               (num_jobs > 1);                                \
2345
        int is_null          = !next_td || !prev_td;                          \
2346
        int pos_check        = (is_null) ? 1 :                                \
2347
            (next_td != td && pos >= atomic_load(&next_td->wait_mb_pos)) ||   \
2348
            (prev_td != td && pos >= atomic_load(&prev_td->wait_mb_pos));     \
2349
        atomic_store(&td->thread_mb_pos, pos);                                \
2350
        if (sliced_threading && pos_check) {                                  \
2351
            pthread_mutex_lock(&td->lock);                                    \
2352
            pthread_cond_broadcast(&td->cond);                                \
2353
            pthread_mutex_unlock(&td->lock);                                  \
2354
        }                                                                     \
2355
    } while (0)
2356
#else
2357
#define check_thread_pos(td, otd, mb_x_check, mb_y_check) while(0)
2358
#define update_pos(td, mb_y, mb_x) while(0)
2359
#endif
2360
2361
16181
static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2362
                                        int jobnr, int threadnr, int is_vp7)
2363
{
2364
16181
    VP8Context *s = avctx->priv_data;
2365
16181
    VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2366
16181
    int mb_y = atomic_load(&td->thread_mb_pos) >> 16;
2367
16181
    int mb_x, mb_xy = mb_y * s->mb_width;
2368
16181
    int num_jobs = s->num_jobs;
2369
16181
    VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2370
16181
    VP56RangeCoder *c  = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2371
    VP8Macroblock *mb;
2372
16181
    uint8_t *dst[3] = {
2373
16181
        curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2374
16181
        curframe->tf.f->data[1] +  8 * mb_y * s->uvlinesize,
2375
16181
        curframe->tf.f->data[2] +  8 * mb_y * s->uvlinesize
2376
    };
2377
2378
16181
    if (vpX_rac_is_end(c))
2379
         return AVERROR_INVALIDDATA;
2380
2381
16181
    if (mb_y == 0)
2382
1142
        prev_td = td;
2383
    else
2384
15039
        prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2385
16181
    if (mb_y == s->mb_height - 1)
2386
1142
        next_td = td;
2387
    else
2388
15039
        next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2389
16181
    if (s->mb_layout == 1)
2390
330
        mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2391
    else {
2392
        // Make sure the previous frame has read its segmentation map,
2393
        // if we re-use the same map.
2394

15851
        if (prev_frame && s->segmentation.enabled &&
2395
5598
            !s->segmentation.update_map)
2396
5025
            ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2397
15851
        mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2398
15851
        memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2399
15851
        AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2400
    }
2401
2402

16181
    if (!is_vp7 || mb_y == 0)
2403
15881
        memset(td->left_nnz, 0, sizeof(td->left_nnz));
2404
2405
16181
    td->mv_bounds.mv_min.x = -MARGIN;
2406
16181
    td->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2407
2408
459390
    for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2409
443209
        if (vpX_rac_is_end(c))
2410
            return AVERROR_INVALIDDATA;
2411
        // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2412
443209
        if (prev_td != td) {
2413
            if (threadnr != 0) {
2414
                check_thread_pos(td, prev_td,
2415
                                 mb_x + (is_vp7 ? 2 : 1),
2416
                                 mb_y - (is_vp7 ? 2 : 1));
2417
            } else {
2418
                check_thread_pos(td, prev_td,
2419
                                 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2420
                                 mb_y - (is_vp7 ? 2 : 1));
2421
            }
2422
        }
2423
2424
443209
        s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2425
                         s->linesize, 4);
2426
443209
        s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2427
443209
                         dst[2] - dst[1], 2);
2428
2429
443209
        if (!s->mb_layout)
2430
842558
            decode_mb_mode(s, &td->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2431
405949
                           prev_frame && prev_frame->seg_map ?
2432
405949
                           prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2433
2434
443209
        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2435
2436
443209
        if (!mb->skip)
2437
83297
            decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2438
2439
443209
        if (mb->mode <= MODE_I4x4)
2440
57332
            intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2441
        else
2442
385877
            inter_predict(s, td, dst, mb, mb_x, mb_y);
2443
2444
443209
        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2445
2446
443209
        if (!mb->skip) {
2447
76840
            idct_mb(s, td, dst, mb);
2448
        } else {
2449
366369
            AV_ZERO64(td->left_nnz);
2450
366369
            AV_WN64(s->top_nnz[mb_x], 0);   // array of 9, so unaligned
2451
2452
            /* Reset DC block predictors if they would exist
2453
             * if the mb had coefficients */
2454

366369
            if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2455
362642
                td->left_nnz[8]     = 0;
2456
362642
                s->top_nnz[mb_x][8] = 0;
2457
            }
2458
        }
2459
2460
443209
        if (s->deblock_filter)
2461
424229
            filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2462
2463

443209
        if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2464
            if (s->filter.simple)
2465
                backup_mb_border(s->top_border[mb_x + 1], dst[0],
2466
                                 NULL, NULL, s->linesize, 0, 1);
2467
            else
2468
                backup_mb_border(s->top_border[mb_x + 1], dst[0],
2469
                                 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2470
        }
2471
2472
443209
        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2473
2474
443209
        dst[0]      += 16;
2475
443209
        dst[1]      += 8;
2476
443209
        dst[2]      += 8;
2477
443209
        td->mv_bounds.mv_min.x -= 64;
2478
443209
        td->mv_bounds.mv_max.x -= 64;
2479
2480
443209
        if (mb_x == s->mb_width + 1) {
2481
            update_pos(td, mb_y, s->mb_width + 3);
2482
        } else {
2483





443209
            update_pos(td, mb_y, mb_x);
2484
        }
2485
    }
2486
16181
    return 0;
2487
}
2488
2489
330
static int vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2490
                                        int jobnr, int threadnr)
2491
{
2492
330
    return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2493
}
2494
2495
15851
static int vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2496
                                        int jobnr, int threadnr)
2497
{
2498
15851
    return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2499
}
2500
2501
15115
static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2502
                              int jobnr, int threadnr, int is_vp7)
2503
{
2504
15115
    VP8Context *s = avctx->priv_data;
2505
15115
    VP8ThreadData *td = &s->thread_data[threadnr];
2506
15115
    int mb_x, mb_y = atomic_load(&td->thread_mb_pos) >> 16, num_jobs = s->num_jobs;
2507
15115
    AVFrame *curframe = s->curframe->tf.f;
2508
    VP8Macroblock *mb;
2509
    VP8ThreadData *prev_td, *next_td;
2510
15115
    uint8_t *dst[3] = {
2511
15115
        curframe->data[0] + 16 * mb_y * s->linesize,
2512
15115
        curframe->data[1] +  8 * mb_y * s->uvlinesize,
2513
15115
        curframe->data[2] +  8 * mb_y * s->uvlinesize
2514
    };
2515
2516
15115
    if (s->mb_layout == 1)
2517
330
        mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2518
    else
2519
14785
        mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2520
2521
15115
    if (mb_y == 0)
2522
1034
        prev_td = td;
2523
    else
2524
14081
        prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2525
15115
    if (mb_y == s->mb_height - 1)
2526
1034
        next_td = td;
2527
    else
2528
14081
        next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2529
2530
439344
    for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2531
424229
        VP8FilterStrength *f = &td->filter_strength[mb_x];
2532
424229
        if (prev_td != td)
2533
            check_thread_pos(td, prev_td,
2534
                             (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2535
424229
        if (next_td != td)
2536
            if (next_td != &s->thread_data[0])
2537
                check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2538
2539
424229
        if (num_jobs == 1) {
2540
424229
            if (s->filter.simple)
2541
7722
                backup_mb_border(s->top_border[mb_x + 1], dst[0],
2542
                                 NULL, NULL, s->linesize, 0, 1);
2543
            else
2544
416507
                backup_mb_border(s->top_border[mb_x + 1], dst[0],
2545
                                 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2546
        }
2547
2548
424229
        if (s->filter.simple)
2549
7722
            filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2550
        else
2551
416507
            filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2552
424229
        dst[0] += 16;
2553
424229
        dst[1] += 8;
2554
424229
        dst[2] += 8;
2555
2556





424229
        update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2557
    }
2558
15115
}
2559
2560
330
static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2561
                              int jobnr, int threadnr)
2562
{
2563
330
    filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2564
330
}
2565
2566
14785
static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2567
                              int jobnr, int threadnr)
2568
{
2569
14785
    filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2570
14785
}
2571
2572
static av_always_inline
2573
1142
int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2574
                              int threadnr, int is_vp7)
2575
{
2576
1142
    VP8Context *s = avctx->priv_data;
2577
1142
    VP8ThreadData *td = &s->thread_data[jobnr];
2578
1142
    VP8ThreadData *next_td = NULL, *prev_td = NULL;
2579
1142
    VP8Frame *curframe = s->curframe;
2580
1142
    int mb_y, num_jobs = s->num_jobs;
2581
    int ret;
2582
2583
1142
    td->thread_nr = threadnr;
2584
1142
    td->mv_bounds.mv_min.y   = -MARGIN - 64 * threadnr;
2585
1142
    td->mv_bounds.mv_max.y   = ((s->mb_height - 1) << 6) + MARGIN - 64 * threadnr;
2586
17323
    for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2587
16181
        atomic_store(&td->thread_mb_pos, mb_y << 16);
2588
16181
        ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2589
16181
        if (ret < 0) {
2590
            update_pos(td, s->mb_height, INT_MAX & 0xFFFF);
2591
            return ret;
2592
        }
2593
16181
        if (s->deblock_filter)
2594
15115
            s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2595





16181
        update_pos(td, mb_y, INT_MAX & 0xFFFF);
2596
2597
16181
        td->mv_bounds.mv_min.y -= 64 * num_jobs;
2598
16181
        td->mv_bounds.mv_max.y -= 64 * num_jobs;
2599
2600
16181
        if (avctx->active_thread_type == FF_THREAD_FRAME)
2601
            ff_thread_report_progress(&curframe->tf, mb_y, 0);
2602
    }
2603
2604
1142
    return 0;
2605
}
2606
2607
30
static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2608
                                    int jobnr, int threadnr)
2609
{
2610
30
    return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2611
}
2612
2613
1112
static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2614
                                    int jobnr, int threadnr)
2615
{
2616
1112
    return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2617
}
2618
2619
static av_always_inline
2620
1142
int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2621
                      AVPacket *avpkt, int is_vp7)
2622
{
2623
1142
    VP8Context *s = avctx->priv_data;
2624
    int ret, i, referenced, num_jobs;
2625
    enum AVDiscard skip_thresh;
2626
1142
    VP8Frame *av_uninit(curframe), *prev_frame;
2627
2628
1142
    if (is_vp7)
2629
30
        ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2630
    else
2631
1112
        ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2632
2633
1142
    if (ret < 0)
2634
        goto err;
2635
2636
1142
    if (s->actually_webp) {
2637
        // avctx->pix_fmt already set in caller.
2638

1136
    } else if (!is_vp7 && s->pix_fmt == AV_PIX_FMT_NONE) {
2639
        s->pix_fmt = get_pixel_format(s);
2640
        if (s->pix_fmt < 0) {
2641
            ret = AVERROR(EINVAL);
2642
            goto err;
2643
        }
2644
        avctx->pix_fmt = s->pix_fmt;
2645
    }
2646
2647
1142
    prev_frame = s->framep[VP56_FRAME_CURRENT];
2648
2649

1153
    referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2650
11
                 s->update_altref == VP56_FRAME_CURRENT;
2651
2652
1142
    skip_thresh = !referenced ? AVDISCARD_NONREF
2653

1142
                              : !s->keyframe ? AVDISCARD_NONKEY
2654
                                             : AVDISCARD_ALL;
2655
2656
1142
    if (avctx->skip_frame >= skip_thresh) {
2657
        s->invisible = 1;
2658
        memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2659
        goto skip_decode;
2660
    }
2661

1142
    s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2662
2663
    // release no longer referenced frames
2664
6852
    for (i = 0; i < 5; i++)
2665
5710
        if (s->frames[i].tf.f->buf[0] &&
2666
3826
            &s->frames[i] != prev_frame &&
2667
2721
            &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2668
2710
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN]   &&
2669
1741
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2670
1033
            vp8_release_frame(s, &s->frames[i]);
2671
2672
1142
    curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2673
2674
1142
    if (!s->colorspace)
2675
1142
        avctx->colorspace = AVCOL_SPC_BT470BG;
2676
1142
    if (s->fullrange)
2677
        avctx->color_range = AVCOL_RANGE_JPEG;
2678
    else
2679
1142
        avctx->color_range = AVCOL_RANGE_MPEG;
2680
2681
    /* Given that arithmetic probabilities are updated every frame, it's quite
2682
     * likely that the values we have on a random interframe are complete
2683
     * junk if we didn't start decode on a keyframe. So just don't display
2684
     * anything rather than junk. */
2685

1142
    if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2686
1093
                         !s->framep[VP56_FRAME_GOLDEN]   ||
2687
1093
                         !s->framep[VP56_FRAME_GOLDEN2])) {
2688
        av_log(avctx, AV_LOG_WARNING,
2689
               "Discarding interframe without a prior keyframe!\n");
2690
        ret = AVERROR_INVALIDDATA;
2691
        goto err;
2692
    }
2693
2694
1142
    curframe->tf.f->key_frame = s->keyframe;
2695
2284
    curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2696
1142
                                            : AV_PICTURE_TYPE_P;
2697
1142
    if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2698
        goto err;
2699
2700
    // check if golden and altref are swapped
2701
1142
    if (s->update_altref != VP56_FRAME_NONE)
2702
196
        s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2703
    else
2704
946
        s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2705
2706
1142
    if (s->update_golden != VP56_FRAME_NONE)
2707
139
        s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2708
    else
2709
1003
        s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2710
2711
1142
    if (s->update_last)
2712
1131
        s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2713
    else
2714
11
        s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2715
2716
1142
    s->next_framep[VP56_FRAME_CURRENT] = curframe;
2717
2718
1142
    if (avctx->codec->update_thread_context)
2719
1106
        ff_thread_finish_setup(avctx);
2720
2721
1142
    if (avctx->hwaccel) {
2722
        ret = avctx->hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
2723
        if (ret < 0)
2724
            goto err;
2725
2726
        ret = avctx->hwaccel->decode_slice(avctx, avpkt->data, avpkt->size);
2727
        if (ret < 0)
2728
            goto err;
2729
2730
        ret = avctx->hwaccel->end_frame(avctx);
2731
        if (ret < 0)
2732
            goto err;
2733
2734
    } else {
2735
1142
        s->linesize   = curframe->tf.f->linesize[0];
2736
1142
        s->uvlinesize = curframe->tf.f->linesize[1];
2737
2738
1142
        memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2739
        /* Zero macroblock structures for top/top-left prediction
2740
         * from outside the frame. */
2741
1142
        if (!s->mb_layout)
2742
1112
            memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2743
1112
                   (s->mb_width + 1) * sizeof(*s->macroblocks));
2744

1142
        if (!s->mb_layout && s->keyframe)
2745
48
            memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2746
2747
1142
        memset(s->ref_count, 0, sizeof(s->ref_count));
2748
2749
1142
        if (s->mb_layout == 1) {
2750
            // Make sure the previous frame has read its segmentation map,
2751
            // if we re-use the same map.
2752

30
            if (prev_frame && s->segmentation.enabled &&
2753
                !s->segmentation.update_map)
2754
                ff_thread_await_progress(&prev_frame->tf, 1, 0);
2755
30
            if (is_vp7)
2756
30
                ret = vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2757
            else
2758
                ret = vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2759
30
            if (ret < 0)
2760
                goto err;
2761
        }
2762
2763
1142
        if (avctx->active_thread_type == FF_THREAD_FRAME)
2764
            num_jobs = 1;
2765
        else
2766
1142
            num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2767
1142
        s->num_jobs   = num_jobs;
2768
1142
        s->curframe   = curframe;
2769
1142
        s->prev_frame = prev_frame;
2770
1142
        s->mv_bounds.mv_min.y   = -MARGIN;
2771
1142
        s->mv_bounds.mv_max.y   = ((s->mb_height - 1) << 6) + MARGIN;
2772
10278
        for (i = 0; i < MAX_THREADS; i++) {
2773
9136
            VP8ThreadData *td = &s->thread_data[i];
2774
9136
            atomic_init(&td->thread_mb_pos, 0);
2775
9136
            atomic_init(&td->wait_mb_pos, INT_MAX);
2776
        }
2777
1142
        if (is_vp7)
2778
30
            avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2779
                            num_jobs);
2780
        else
2781
1112
            avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2782
                            num_jobs);
2783
    }
2784
2785
1142
    ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2786
1142
    memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2787
2788
1142
skip_decode:
2789
    // if future frames don't use the updated probabilities,
2790
    // reset them to the values we saved
2791
1142
    if (!s->update_probabilities)
2792
64
        s->prob[0] = s->prob[1];
2793
2794
1142
    if (!s->invisible) {
2795
1133
        if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2796
            return ret;
2797
1133
        *got_frame = 1;
2798
    }
2799
2800
1142
    return avpkt->size;
2801
err:
2802
    memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2803
    return ret;
2804
}
2805
2806
1112
int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2807
                        AVPacket *avpkt)
2808
{
2809
1112
    return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2810
}
2811
2812
#if CONFIG_VP7_DECODER
2813
30
static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2814
                            AVPacket *avpkt)
2815
{
2816
30
    return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2817
}
2818
#endif /* CONFIG_VP7_DECODER */
2819
2820
60
av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2821
{
2822
60
    VP8Context *s = avctx->priv_data;
2823
    int i;
2824
2825
60
    if (!s)
2826
        return 0;
2827
2828
60
    vp8_decode_flush_impl(avctx, 1);
2829
360
    for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2830
300
        av_frame_free(&s->frames[i].tf.f);
2831
2832
60
    return 0;
2833
}
2834
2835
60
static av_cold int vp8_init_frames(VP8Context *s)
2836
{
2837
    int i;
2838
360
    for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2839
300
        s->frames[i].tf.f = av_frame_alloc();
2840
300
        if (!s->frames[i].tf.f)
2841
            return AVERROR(ENOMEM);
2842
    }
2843
60
    return 0;
2844
}
2845
2846
static av_always_inline
2847
60
int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2848
{
2849
60
    VP8Context *s = avctx->priv_data;
2850
    int ret;
2851
2852
60
    s->avctx = avctx;
2853
60
    s->vp7   = avctx->codec->id == AV_CODEC_ID_VP7;
2854
60
    s->pix_fmt = AV_PIX_FMT_NONE;
2855
60
    avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2856
2857
60
    ff_videodsp_init(&s->vdsp, 8);
2858
2859
60
    ff_vp78dsp_init(&s->vp8dsp);
2860
60
    if (CONFIG_VP7_DECODER && is_vp7) {
2861
3
        ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2862
3
        ff_vp7dsp_init(&s->vp8dsp);
2863
3
        s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter;
2864
3
        s->filter_mb_row           = vp7_filter_mb_row;
2865
57
    } else if (CONFIG_VP8_DECODER && !is_vp7) {
2866
57
        ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2867
57
        ff_vp8dsp_init(&s->vp8dsp);
2868
57
        s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter;
2869
57
        s->filter_mb_row           = vp8_filter_mb_row;
2870
    }
2871
2872
    /* does not change for VP8 */
2873
60
    memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
2874
2875
60
    if ((ret = vp8_init_frames(s)) < 0) {
2876
        ff_vp8_decode_free(avctx);
2877
        return ret;
2878
    }
2879
2880
60
    return 0;
2881
}
2882
2883
#if CONFIG_VP7_DECODER
2884
3
static int vp7_decode_init(AVCodecContext *avctx)
2885
{
2886
3
    return vp78_decode_init(avctx, IS_VP7);
2887
}
2888
#endif /* CONFIG_VP7_DECODER */
2889
2890
57
av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2891
{
2892
57
    return vp78_decode_init(avctx, IS_VP8);
2893
}
2894
2895
#if CONFIG_VP8_DECODER
2896
#if HAVE_THREADS
2897
#define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
2898
2899
static int vp8_decode_update_thread_context(AVCodecContext *dst,
2900
                                            const AVCodecContext *src)
2901
{
2902
    VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2903
    int i;
2904
2905
    if (s->macroblocks_base &&
2906
        (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2907
        free_buffers(s);
2908
        s->mb_width  = s_src->mb_width;
2909
        s->mb_height = s_src->mb_height;
2910
    }
2911
2912
    s->pix_fmt      = s_src->pix_fmt;
2913
    s->prob[0]      = s_src->prob[!s_src->update_probabilities];
2914
    s->segmentation = s_src->segmentation;
2915
    s->lf_delta     = s_src->lf_delta;
2916
    memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2917
2918
    for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2919
        if (s_src->frames[i].tf.f->buf[0]) {
2920
            int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2921
            if (ret < 0)
2922
                return ret;
2923
        }
2924
    }
2925
2926
    s->framep[0] = REBASE(s_src->next_framep[0]);
2927
    s->framep[1] = REBASE(s_src->next_framep[1]);
2928
    s->framep[2] = REBASE(s_src->next_framep[2]);
2929
    s->framep[3] = REBASE(s_src->next_framep[3]);
2930
2931
    return 0;
2932
}
2933
#endif /* HAVE_THREADS */
2934
#endif /* CONFIG_VP8_DECODER */
2935
2936
#if CONFIG_VP7_DECODER
2937
AVCodec ff_vp7_decoder = {
2938
    .name                  = "vp7",
2939
    .long_name             = NULL_IF_CONFIG_SMALL("On2 VP7"),
2940
    .type                  = AVMEDIA_TYPE_VIDEO,
2941
    .id                    = AV_CODEC_ID_VP7,
2942
    .priv_data_size        = sizeof(VP8Context),
2943
    .init                  = vp7_decode_init,
2944
    .close                 = ff_vp8_decode_free,
2945
    .decode                = vp7_decode_frame,
2946
    .capabilities          = AV_CODEC_CAP_DR1,
2947
    .flush                 = vp8_decode_flush,
2948
};
2949
#endif /* CONFIG_VP7_DECODER */
2950
2951
#if CONFIG_VP8_DECODER
2952
AVCodec ff_vp8_decoder = {
2953
    .name                  = "vp8",
2954
    .long_name             = NULL_IF_CONFIG_SMALL("On2 VP8"),
2955
    .type                  = AVMEDIA_TYPE_VIDEO,
2956
    .id                    = AV_CODEC_ID_VP8,
2957
    .priv_data_size        = sizeof(VP8Context),
2958
    .init                  = ff_vp8_decode_init,
2959
    .close                 = ff_vp8_decode_free,
2960
    .decode                = ff_vp8_decode_frame,
2961
    .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
2962
                             AV_CODEC_CAP_SLICE_THREADS,
2963
    .flush                 = vp8_decode_flush,
2964
    .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2965
    .hw_configs            = (const AVCodecHWConfigInternal*[]) {
2966
#if CONFIG_VP8_VAAPI_HWACCEL
2967
                               HWACCEL_VAAPI(vp8),
2968
#endif
2969
#if CONFIG_VP8_NVDEC_HWACCEL
2970
                               HWACCEL_NVDEC(vp8),
2971
#endif
2972
                               NULL
2973
                           },
2974
    .caps_internal         = FF_CODEC_CAP_ALLOCATE_PROGRESS,
2975
};
2976
#endif /* CONFIG_VP7_DECODER */