GCC Code Coverage Report
Directory: ../../../ffmpeg/ Exec Total Coverage
File: src/libavcodec/vp8.c Lines: 1352 1587 85.2 %
Date: 2021-04-22 14:24:15 Branches: 808 1165 69.4 %

Line Branch Exec Source
1
/*
2
 * VP7/VP8 compatible video decoder
3
 *
4
 * Copyright (C) 2010 David Conrad
5
 * Copyright (C) 2010 Ronald S. Bultje
6
 * Copyright (C) 2010 Fiona Glaser
7
 * Copyright (C) 2012 Daniel Kang
8
 * Copyright (C) 2014 Peter Ross
9
 *
10
 * This file is part of FFmpeg.
11
 *
12
 * FFmpeg is free software; you can redistribute it and/or
13
 * modify it under the terms of the GNU Lesser General Public
14
 * License as published by the Free Software Foundation; either
15
 * version 2.1 of the License, or (at your option) any later version.
16
 *
17
 * FFmpeg is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20
 * Lesser General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU Lesser General Public
23
 * License along with FFmpeg; if not, write to the Free Software
24
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25
 */
26
27
#include "libavutil/imgutils.h"
28
#include "libavutil/mem_internal.h"
29
30
#include "avcodec.h"
31
#include "hwconfig.h"
32
#include "internal.h"
33
#include "mathops.h"
34
#include "rectangle.h"
35
#include "thread.h"
36
#include "vp8.h"
37
#include "vp8data.h"
38
39
#if ARCH_ARM
40
#   include "arm/vp8.h"
41
#endif
42
43
#if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
44
#define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
45
#elif CONFIG_VP7_DECODER
46
#define VPX(vp7, f) vp7_ ## f
47
#else // CONFIG_VP8_DECODER
48
#define VPX(vp7, f) vp8_ ## f
49
#endif
50
51
77
static void free_buffers(VP8Context *s)
52
{
53
    int i;
54
77
    if (s->thread_data)
55
333
        for (i = 0; i < MAX_THREADS; i++) {
56
#if HAVE_THREADS
57
296
            pthread_cond_destroy(&s->thread_data[i].cond);
58
296
            pthread_mutex_destroy(&s->thread_data[i].lock);
59
#endif
60
296
            av_freep(&s->thread_data[i].filter_strength);
61
        }
62
77
    av_freep(&s->thread_data);
63
77
    av_freep(&s->macroblocks_base);
64
77
    av_freep(&s->intra4x4_pred_mode_top);
65
77
    av_freep(&s->top_nnz);
66
77
    av_freep(&s->top_border);
67
68
77
    s->macroblocks = NULL;
69
77
}
70
71
1142
static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
72
{
73
    int ret;
74
1142
    if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
75
                                    ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
76
        return ret;
77
1142
    if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height)))
78
        goto fail;
79
1142
    if (s->avctx->hwaccel) {
80
        const AVHWAccel *hwaccel = s->avctx->hwaccel;
81
        if (hwaccel->frame_priv_data_size) {
82
            f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
83
            if (!f->hwaccel_priv_buf)
84
                goto fail;
85
            f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
86
        }
87
    }
88
1142
    return 0;
89
90
fail:
91
    av_buffer_unref(&f->seg_map);
92
    ff_thread_release_buffer(s->avctx, &f->tf);
93
    return AVERROR(ENOMEM);
94
}
95
96
1418
static void vp8_release_frame(VP8Context *s, VP8Frame *f)
97
{
98
1418
    av_buffer_unref(&f->seg_map);
99
1418
    av_buffer_unref(&f->hwaccel_priv_buf);
100
1418
    f->hwaccel_picture_private = NULL;
101
1418
    ff_thread_release_buffer(s->avctx, &f->tf);
102
1418
}
103
104
#if CONFIG_VP8_DECODER
105
static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
106
{
107
    int ret;
108
109
    vp8_release_frame(s, dst);
110
111
    if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
112
        return ret;
113
    if (src->seg_map &&
114
        !(dst->seg_map = av_buffer_ref(src->seg_map))) {
115
        vp8_release_frame(s, dst);
116
        return AVERROR(ENOMEM);
117
    }
118
    if (src->hwaccel_picture_private) {
119
        dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
120
        if (!dst->hwaccel_priv_buf)
121
            return AVERROR(ENOMEM);
122
        dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
123
    }
124
125
    return 0;
126
}
127
#endif /* CONFIG_VP8_DECODER */
128
129
77
static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
130
{
131
77
    VP8Context *s = avctx->priv_data;
132
    int i;
133
134
462
    for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
135
385
        vp8_release_frame(s, &s->frames[i]);
136
77
    memset(s->framep, 0, sizeof(s->framep));
137
138
77
    if (free_mem)
139
77
        free_buffers(s);
140
77
}
141
142
static void vp8_decode_flush(AVCodecContext *avctx)
143
{
144
    vp8_decode_flush_impl(avctx, 0);
145
}
146
147
1142
static VP8Frame *vp8_find_free_buffer(VP8Context *s)
148
{
149
1142
    VP8Frame *frame = NULL;
150
    int i;
151
152
    // find a free buffer
153
2793
    for (i = 0; i < 5; i++)
154
2793
        if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT]  &&
155
2203
            &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
156
2202
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN]   &&
157
1600
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
158
1142
            frame = &s->frames[i];
159
1142
            break;
160
        }
161
1142
    if (i == 5) {
162
        av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
163
        abort();
164
    }
165
1142
    if (frame->tf.f->buf[0])
166
        vp8_release_frame(s, frame);
167
168
1142
    return frame;
169
}
170
171
21
static enum AVPixelFormat get_pixel_format(VP8Context *s)
172
{
173
21
    enum AVPixelFormat pix_fmts[] = {
174
#if CONFIG_VP8_VAAPI_HWACCEL
175
        AV_PIX_FMT_VAAPI,
176
#endif
177
#if CONFIG_VP8_NVDEC_HWACCEL
178
        AV_PIX_FMT_CUDA,
179
#endif
180
        AV_PIX_FMT_YUV420P,
181
        AV_PIX_FMT_NONE,
182
    };
183
184
21
    return ff_get_format(s->avctx, pix_fmts);
185
}
186
187
static av_always_inline
188
37
int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
189
{
190
37
    AVCodecContext *avctx = s->avctx;
191
37
    int i, ret, dim_reset = 0;
192
193


37
    if (width  != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
194
24
        height != s->avctx->height) {
195
13
        vp8_decode_flush_impl(s->avctx, 1);
196
197
13
        ret = ff_set_dimensions(s->avctx, width, height);
198
13
        if (ret < 0)
199
            return ret;
200
201
13
        dim_reset = (s->macroblocks_base != NULL);
202
    }
203
204

37
    if ((s->pix_fmt == AV_PIX_FMT_NONE || dim_reset) &&
205

28
         !s->actually_webp && !is_vp7) {
206
21
        s->pix_fmt = get_pixel_format(s);
207
21
        if (s->pix_fmt < 0)
208
            return AVERROR(EINVAL);
209
21
        avctx->pix_fmt = s->pix_fmt;
210
    }
211
212
37
    s->mb_width  = (s->avctx->coded_width  + 15) / 16;
213
37
    s->mb_height = (s->avctx->coded_height + 15) / 16;
214
215

37
    s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
216
                   avctx->thread_count > 1;
217
37
    if (!s->mb_layout) { // Frame threading and one thread
218
36
        s->macroblocks_base       = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
219
                                               sizeof(*s->macroblocks));
220
36
        s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
221
    } else // Sliced threading
222
1
        s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
223
                                         sizeof(*s->macroblocks));
224
37
    s->top_nnz     = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
225
37
    s->top_border  = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
226
37
    s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
227
228

37
    if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
229

37
        !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
230
        free_buffers(s);
231
        return AVERROR(ENOMEM);
232
    }
233
234
333
    for (i = 0; i < MAX_THREADS; i++) {
235
592
        s->thread_data[i].filter_strength =
236
296
            av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
237
296
        if (!s->thread_data[i].filter_strength) {
238
            free_buffers(s);
239
            return AVERROR(ENOMEM);
240
        }
241
#if HAVE_THREADS
242
296
        pthread_mutex_init(&s->thread_data[i].lock, NULL);
243
296
        pthread_cond_init(&s->thread_data[i].cond, NULL);
244
#endif
245
    }
246
247
37
    s->macroblocks = s->macroblocks_base + 1;
248
249
37
    return 0;
250
}
251
252
1
static int vp7_update_dimensions(VP8Context *s, int width, int height)
253
{
254
1
    return update_dimensions(s, width, height, IS_VP7);
255
}
256
257
36
static int vp8_update_dimensions(VP8Context *s, int width, int height)
258
{
259
36
    return update_dimensions(s, width, height, IS_VP8);
260
}
261
262
263
433
static void parse_segment_info(VP8Context *s)
264
{
265
433
    VP56RangeCoder *c = &s->c;
266
    int i;
267
268
433
    s->segmentation.update_map = vp8_rac_get(c);
269
433
    s->segmentation.update_feature_data = vp8_rac_get(c);
270
271
433
    if (s->segmentation.update_feature_data) {
272
68
        s->segmentation.absolute_vals = vp8_rac_get(c);
273
274
340
        for (i = 0; i < 4; i++)
275
272
            s->segmentation.base_quant[i]   = vp8_rac_get_sint(c, 7);
276
277
340
        for (i = 0; i < 4; i++)
278
272
            s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
279
    }
280
433
    if (s->segmentation.update_map)
281
272
        for (i = 0; i < 3; i++)
282
204
            s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
283
433
}
284
285
42
static void update_lf_deltas(VP8Context *s)
286
{
287
42
    VP56RangeCoder *c = &s->c;
288
    int i;
289
290
210
    for (i = 0; i < 4; i++) {
291
168
        if (vp8_rac_get(c)) {
292
126
            s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
293
294
126
            if (vp8_rac_get(c))
295
84
                s->lf_delta.ref[i] = -s->lf_delta.ref[i];
296
        }
297
    }
298
299
210
    for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
300
168
        if (vp8_rac_get(c)) {
301
168
            s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
302
303
168
            if (vp8_rac_get(c))
304
42
                s->lf_delta.mode[i] = -s->lf_delta.mode[i];
305
        }
306
    }
307
42
}
308
309
1112
static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
310
{
311
1112
    const uint8_t *sizes = buf;
312
    int i;
313
    int ret;
314
315
1112
    s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
316
317
1112
    buf      += 3 * (s->num_coeff_partitions - 1);
318
1112
    buf_size -= 3 * (s->num_coeff_partitions - 1);
319
1112
    if (buf_size < 0)
320
        return -1;
321
322
1451
    for (i = 0; i < s->num_coeff_partitions - 1; i++) {
323
339
        int size = AV_RL24(sizes + 3 * i);
324
339
        if (buf_size - size < 0)
325
            return -1;
326
339
        s->coeff_partition_size[i] = size;
327
328
339
        ret = ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
329
339
        if (ret < 0)
330
            return ret;
331
339
        buf      += size;
332
339
        buf_size -= size;
333
    }
334
335
1112
    s->coeff_partition_size[i] = buf_size;
336
1112
    ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
337
338
1112
    return 0;
339
}
340
341
30
static void vp7_get_quants(VP8Context *s)
342
{
343
30
    VP56RangeCoder *c = &s->c;
344
345
30
    int yac_qi  = vp8_rac_get_uint(c, 7);
346
30
    int ydc_qi  = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
347
30
    int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
348
30
    int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
349
30
    int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
350
30
    int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
351
352
30
    s->qmat[0].luma_qmul[0]    =       vp7_ydc_qlookup[ydc_qi];
353
30
    s->qmat[0].luma_qmul[1]    =       vp7_yac_qlookup[yac_qi];
354
30
    s->qmat[0].luma_dc_qmul[0] =       vp7_y2dc_qlookup[y2dc_qi];
355
30
    s->qmat[0].luma_dc_qmul[1] =       vp7_y2ac_qlookup[y2ac_qi];
356
30
    s->qmat[0].chroma_qmul[0]  = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
357
30
    s->qmat[0].chroma_qmul[1]  =       vp7_yac_qlookup[uvac_qi];
358
30
}
359
360
1112
static void vp8_get_quants(VP8Context *s)
361
{
362
1112
    VP56RangeCoder *c = &s->c;
363
    int i, base_qi;
364
365
1112
    s->quant.yac_qi     = vp8_rac_get_uint(c, 7);
366
1112
    s->quant.ydc_delta  = vp8_rac_get_sint(c, 4);
367
1112
    s->quant.y2dc_delta = vp8_rac_get_sint(c, 4);
368
1112
    s->quant.y2ac_delta = vp8_rac_get_sint(c, 4);
369
1112
    s->quant.uvdc_delta = vp8_rac_get_sint(c, 4);
370
1112
    s->quant.uvac_delta = vp8_rac_get_sint(c, 4);
371
372
5560
    for (i = 0; i < 4; i++) {
373
4448
        if (s->segmentation.enabled) {
374
1732
            base_qi = s->segmentation.base_quant[i];
375
1732
            if (!s->segmentation.absolute_vals)
376
1608
                base_qi += s->quant.yac_qi;
377
        } else
378
2716
            base_qi = s->quant.yac_qi;
379
380
4448
        s->qmat[i].luma_qmul[0]    = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.ydc_delta,  7)];
381
4448
        s->qmat[i].luma_qmul[1]    = vp8_ac_qlookup[av_clip_uintp2(base_qi,              7)];
382
4448
        s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.y2dc_delta, 7)] * 2;
383
        /* 101581>>16 is equivalent to 155/100 */
384
4448
        s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.y2ac_delta, 7)] * 101581 >> 16;
385
4448
        s->qmat[i].chroma_qmul[0]  = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.uvdc_delta, 7)];
386
4448
        s->qmat[i].chroma_qmul[1]  = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.uvac_delta, 7)];
387
388
4448
        s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
389
4448
        s->qmat[i].chroma_qmul[0]  = FFMIN(s->qmat[i].chroma_qmul[0], 132);
390
    }
391
1112
}
392
393
/**
394
 * Determine which buffers golden and altref should be updated with after this frame.
395
 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
396
 *
397
 * Intra frames update all 3 references
398
 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
399
 * If the update (golden|altref) flag is set, it's updated with the current frame
400
 *      if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
401
 * If the flag is not set, the number read means:
402
 *      0: no update
403
 *      1: VP56_FRAME_PREVIOUS
404
 *      2: update golden with altref, or update altref with golden
405
 */
406
2128
static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
407
{
408
2128
    VP56RangeCoder *c = &s->c;
409
410
2128
    if (update)
411
90
        return VP56_FRAME_CURRENT;
412
413
2038
    switch (vp8_rac_get_uint(c, 2)) {
414
31
    case 1:
415
31
        return VP56_FRAME_PREVIOUS;
416
84
    case 2:
417
84
        return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
418
    }
419
1923
    return VP56_FRAME_NONE;
420
}
421
422
49
static void vp78_reset_probability_tables(VP8Context *s)
423
{
424
    int i, j;
425
245
    for (i = 0; i < 4; i++)
426
3332
        for (j = 0; j < 16; j++)
427
3136
            memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
428
                   sizeof(s->prob->token[i][j]));
429
49
}
430
431
1142
static void vp78_update_probability_tables(VP8Context *s)
432
{
433
1142
    VP56RangeCoder *c = &s->c;
434
    int i, j, k, l, m;
435
436
5710
    for (i = 0; i < 4; i++)
437
41112
        for (j = 0; j < 8; j++)
438
146176
            for (k = 0; k < 3; k++)
439
1315584
                for (l = 0; l < NUM_DCT_TOKENS-1; l++)
440
1205952
                    if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
441
7203
                        int prob = vp8_rac_get_uint(c, 8);
442
23838
                        for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
443
16635
                            s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
444
                    }
445
1142
}
446
447
#define VP7_MVC_SIZE 17
448
#define VP8_MVC_SIZE 19
449
450
1093
static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
451
                                                            int mvc_size)
452
{
453
1093
    VP56RangeCoder *c = &s->c;
454
    int i, j;
455
456
1093
    if (vp8_rac_get(c))
457
10
        for (i = 0; i < 4; i++)
458
8
            s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
459
1093
    if (vp8_rac_get(c))
460
4
        for (i = 0; i < 3; i++)
461
3
            s->prob->pred8x8c[i]  = vp8_rac_get_uint(c, 8);
462
463
    // 17.2 MV probability update
464
3279
    for (i = 0; i < 2; i++)
465
43604
        for (j = 0; j < mvc_size; j++)
466
41418
            if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
467
306
                s->prob->mvc[i][j] = vp8_rac_get_nn(c);
468
1093
}
469
470
1064
static void update_refs(VP8Context *s)
471
{
472
1064
    VP56RangeCoder *c = &s->c;
473
474
1064
    int update_golden = vp8_rac_get(c);
475
1064
    int update_altref = vp8_rac_get(c);
476
477
1064
    s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
478
1064
    s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
479
1064
}
480
481
static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)
482
{
483
    int i, j;
484
485
    for (j = 1; j < 3; j++) {
486
        for (i = 0; i < height / 2; i++)
487
            memcpy(dst->data[j] + i * dst->linesize[j],
488
                   src->data[j] + i * src->linesize[j], width / 2);
489
    }
490
}
491
492
static void fade(uint8_t *dst, ptrdiff_t dst_linesize,
493
                 const uint8_t *src, ptrdiff_t src_linesize,
494
                 int width, int height,
495
                 int alpha, int beta)
496
{
497
    int i, j;
498
    for (j = 0; j < height; j++) {
499
        const uint8_t *src2 = src + j * src_linesize;
500
        uint8_t *dst2 = dst + j * dst_linesize;
501
        for (i = 0; i < width; i++) {
502
            uint8_t y = src2[i];
503
            dst2[i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
504
        }
505
    }
506
}
507
508
30
static int vp7_fade_frame(VP8Context *s, int alpha, int beta)
509
{
510
    int ret;
511
512

30
    if (!s->keyframe && (alpha || beta)) {
513
        int width  = s->mb_width * 16;
514
        int height = s->mb_height * 16;
515
        AVFrame *src, *dst;
516
517
        if (!s->framep[VP56_FRAME_PREVIOUS] ||
518
            !s->framep[VP56_FRAME_GOLDEN]) {
519
            av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
520
            return AVERROR_INVALIDDATA;
521
        }
522
523
        dst =
524
        src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
525
526
        /* preserve the golden frame, write a new previous frame */
527
        if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
528
            s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
529
            if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
530
                return ret;
531
532
            dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
533
534
            copy_chroma(dst, src, width, height);
535
        }
536
537
        fade(dst->data[0], dst->linesize[0],
538
             src->data[0], src->linesize[0],
539
             width, height, alpha, beta);
540
    }
541
542
30
    return 0;
543
}
544
545
30
static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
546
{
547
30
    VP56RangeCoder *c = &s->c;
548
    int part1_size, hscale, vscale, i, j, ret;
549
30
    int width  = s->avctx->width;
550
30
    int height = s->avctx->height;
551
30
    int alpha = 0;
552
30
    int beta  = 0;
553
554
30
    if (buf_size < 4) {
555
        return AVERROR_INVALIDDATA;
556
    }
557
558
30
    s->profile = (buf[0] >> 1) & 7;
559
30
    if (s->profile > 1) {
560
        avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
561
        return AVERROR_INVALIDDATA;
562
    }
563
564
30
    s->keyframe  = !(buf[0] & 1);
565
30
    s->invisible = 0;
566
30
    part1_size   = AV_RL24(buf) >> 4;
567
568
30
    if (buf_size < 4 - s->profile + part1_size) {
569
        av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
570
        return AVERROR_INVALIDDATA;
571
    }
572
573
30
    buf      += 4 - s->profile;
574
30
    buf_size -= 4 - s->profile;
575
576
30
    memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
577
578
30
    ret = ff_vp56_init_range_decoder(c, buf, part1_size);
579
30
    if (ret < 0)
580
        return ret;
581
30
    buf      += part1_size;
582
30
    buf_size -= part1_size;
583
584
    /* A. Dimension information (keyframes only) */
585
30
    if (s->keyframe) {
586
1
        width  = vp8_rac_get_uint(c, 12);
587
1
        height = vp8_rac_get_uint(c, 12);
588
1
        hscale = vp8_rac_get_uint(c, 2);
589
1
        vscale = vp8_rac_get_uint(c, 2);
590

1
        if (hscale || vscale)
591
            avpriv_request_sample(s->avctx, "Upscaling");
592
593
1
        s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
594
1
        vp78_reset_probability_tables(s);
595
1
        memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
596
               sizeof(s->prob->pred16x16));
597
1
        memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
598
               sizeof(s->prob->pred8x8c));
599
3
        for (i = 0; i < 2; i++)
600
2
            memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
601
                   sizeof(vp7_mv_default_prob[i]));
602
1
        memset(&s->segmentation, 0, sizeof(s->segmentation));
603
1
        memset(&s->lf_delta, 0, sizeof(s->lf_delta));
604
1
        memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
605
    }
606
607

30
    if (s->keyframe || s->profile > 0)
608
1
        memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
609
610
    /* B. Decoding information for all four macroblock-level features */
611
150
    for (i = 0; i < 4; i++) {
612
120
        s->feature_enabled[i] = vp8_rac_get(c);
613
120
        if (s->feature_enabled[i]) {
614
             s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
615
616
             for (j = 0; j < 3; j++)
617
                 s->feature_index_prob[i][j] =
618
                     vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
619
620
             if (vp7_feature_value_size[s->profile][i])
621
                 for (j = 0; j < 4; j++)
622
                     s->feature_value[i][j] =
623
                        vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
624
        }
625
    }
626
627
30
    s->segmentation.enabled    = 0;
628
30
    s->segmentation.update_map = 0;
629
30
    s->lf_delta.enabled        = 0;
630
631
30
    s->num_coeff_partitions = 1;
632
30
    ret = ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
633
30
    if (ret < 0)
634
        return ret;
635
636
30
    if (!s->macroblocks_base || /* first frame */
637

29
        width != s->avctx->width || height != s->avctx->height ||
638

29
        (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
639
1
        if ((ret = vp7_update_dimensions(s, width, height)) < 0)
640
            return ret;
641
    }
642
643
    /* C. Dequantization indices */
644
30
    vp7_get_quants(s);
645
646
    /* D. Golden frame update flag (a Flag) for interframes only */
647
30
    if (!s->keyframe) {
648
29
        s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
649
29
        s->sign_bias[VP56_FRAME_GOLDEN] = 0;
650
    }
651
652
30
    s->update_last          = 1;
653
30
    s->update_probabilities = 1;
654
30
    s->fade_present         = 1;
655
656
30
    if (s->profile > 0) {
657
        s->update_probabilities = vp8_rac_get(c);
658
        if (!s->update_probabilities)
659
            s->prob[1] = s->prob[0];
660
661
        if (!s->keyframe)
662
            s->fade_present = vp8_rac_get(c);
663
    }
664
665
30
    if (vpX_rac_is_end(c))
666
        return AVERROR_INVALIDDATA;
667
    /* E. Fading information for previous frame */
668

30
    if (s->fade_present && vp8_rac_get(c)) {
669
        alpha = (int8_t) vp8_rac_get_uint(c, 8);
670
        beta  = (int8_t) vp8_rac_get_uint(c, 8);
671
    }
672
673
    /* F. Loop filter type */
674
30
    if (!s->profile)
675
30
        s->filter.simple = vp8_rac_get(c);
676
677
    /* G. DCT coefficient ordering specification */
678
30
    if (vp8_rac_get(c))
679
208
        for (i = 1; i < 16; i++)
680
195
            s->prob[0].scan[i] = ff_zigzag_scan[vp8_rac_get_uint(c, 4)];
681
682
    /* H. Loop filter levels  */
683
30
    if (s->profile > 0)
684
        s->filter.simple = vp8_rac_get(c);
685
30
    s->filter.level     = vp8_rac_get_uint(c, 6);
686
30
    s->filter.sharpness = vp8_rac_get_uint(c, 3);
687
688
    /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
689
30
    vp78_update_probability_tables(s);
690
691
30
    s->mbskip_enabled = 0;
692
693
    /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
694
30
    if (!s->keyframe) {
695
29
        s->prob->intra  = vp8_rac_get_uint(c, 8);
696
29
        s->prob->last   = vp8_rac_get_uint(c, 8);
697
29
        vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
698
    }
699
700
30
    if (vpX_rac_is_end(c))
701
        return AVERROR_INVALIDDATA;
702
703
30
    if ((ret = vp7_fade_frame(s, alpha, beta)) < 0)
704
        return ret;
705
706
30
    return 0;
707
}
708
709
1112
static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
710
{
711
1112
    VP56RangeCoder *c = &s->c;
712
    int header_size, hscale, vscale, ret;
713
1112
    int width  = s->avctx->width;
714
1112
    int height = s->avctx->height;
715
716
1112
    if (buf_size < 3) {
717
        av_log(s->avctx, AV_LOG_ERROR, "Insufficent data (%d) for header\n", buf_size);
718
        return AVERROR_INVALIDDATA;
719
    }
720
721
1112
    s->keyframe  = !(buf[0] & 1);
722
1112
    s->profile   =  (buf[0]>>1) & 7;
723
1112
    s->invisible = !(buf[0] & 0x10);
724
1112
    header_size  = AV_RL24(buf) >> 5;
725
1112
    buf      += 3;
726
1112
    buf_size -= 3;
727
728
1112
    s->header_partition_size = header_size;
729
730
1112
    if (s->profile > 3)
731
        av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
732
733
1112
    if (!s->profile)
734
956
        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
735
               sizeof(s->put_pixels_tab));
736
    else    // profile 1-3 use bilinear, 4+ aren't defined so whatever
737
156
        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
738
               sizeof(s->put_pixels_tab));
739
740
1112
    if (header_size > buf_size - 7 * s->keyframe) {
741
        av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
742
        return AVERROR_INVALIDDATA;
743
    }
744
745
1112
    if (s->keyframe) {
746
48
        if (AV_RL24(buf) != 0x2a019d) {
747
            av_log(s->avctx, AV_LOG_ERROR,
748
                   "Invalid start code 0x%x\n", AV_RL24(buf));
749
            return AVERROR_INVALIDDATA;
750
        }
751
48
        width     = AV_RL16(buf + 3) & 0x3fff;
752
48
        height    = AV_RL16(buf + 5) & 0x3fff;
753
48
        hscale    = buf[4] >> 6;
754
48
        vscale    = buf[6] >> 6;
755
48
        buf      += 7;
756
48
        buf_size -= 7;
757
758

48
        if (hscale || vscale)
759
            avpriv_request_sample(s->avctx, "Upscaling");
760
761
48
        s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
762
48
        vp78_reset_probability_tables(s);
763
48
        memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
764
               sizeof(s->prob->pred16x16));
765
48
        memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
766
               sizeof(s->prob->pred8x8c));
767
48
        memcpy(s->prob->mvc, vp8_mv_default_prob,
768
               sizeof(s->prob->mvc));
769
48
        memset(&s->segmentation, 0, sizeof(s->segmentation));
770
48
        memset(&s->lf_delta, 0, sizeof(s->lf_delta));
771
    }
772
773
1112
    ret = ff_vp56_init_range_decoder(c, buf, header_size);
774
1112
    if (ret < 0)
775
        return ret;
776
1112
    buf      += header_size;
777
1112
    buf_size -= header_size;
778
779
1112
    if (s->keyframe) {
780
48
        s->colorspace = vp8_rac_get(c);
781
48
        if (s->colorspace)
782
            av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
783
48
        s->fullrange = vp8_rac_get(c);
784
    }
785
786
1112
    if ((s->segmentation.enabled = vp8_rac_get(c)))
787
433
        parse_segment_info(s);
788
    else
789
679
        s->segmentation.update_map = 0; // FIXME: move this to some init function?
790
791
1112
    s->filter.simple    = vp8_rac_get(c);
792
1112
    s->filter.level     = vp8_rac_get_uint(c, 6);
793
1112
    s->filter.sharpness = vp8_rac_get_uint(c, 3);
794
795
1112
    if ((s->lf_delta.enabled = vp8_rac_get(c))) {
796
1106
        s->lf_delta.update = vp8_rac_get(c);
797
1106
        if (s->lf_delta.update)
798
42
            update_lf_deltas(s);
799
    }
800
801
1112
    if (setup_partitions(s, buf, buf_size)) {
802
        av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
803
        return AVERROR_INVALIDDATA;
804
    }
805
806
1112
    if (!s->macroblocks_base || /* first frame */
807

1085
        width != s->avctx->width || height != s->avctx->height ||
808

1076
        (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
809
36
        if ((ret = vp8_update_dimensions(s, width, height)) < 0)
810
            return ret;
811
812
1112
    vp8_get_quants(s);
813
814
1112
    if (!s->keyframe) {
815
1064
        update_refs(s);
816
1064
        s->sign_bias[VP56_FRAME_GOLDEN]               = vp8_rac_get(c);
817
1064
        s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
818
    }
819
820
    // if we aren't saving this frame's probabilities for future frames,
821
    // make a copy of the current probabilities
822
1112
    if (!(s->update_probabilities = vp8_rac_get(c)))
823
64
        s->prob[1] = s->prob[0];
824
825

1112
    s->update_last = s->keyframe || vp8_rac_get(c);
826
827
1112
    vp78_update_probability_tables(s);
828
829
1112
    if ((s->mbskip_enabled = vp8_rac_get(c)))
830
1106
        s->prob->mbskip = vp8_rac_get_uint(c, 8);
831
832
1112
    if (!s->keyframe) {
833
1064
        s->prob->intra  = vp8_rac_get_uint(c, 8);
834
1064
        s->prob->last   = vp8_rac_get_uint(c, 8);
835
1064
        s->prob->golden = vp8_rac_get_uint(c, 8);
836
1064
        vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
837
    }
838
839
    // Record the entropy coder state here so that hwaccels can use it.
840
1112
    s->c.code_word = vp56_rac_renorm(&s->c);
841
1112
    s->coder_state_at_header_end.input     = s->c.buffer - (-s->c.bits / 8);
842
1112
    s->coder_state_at_header_end.range     = s->c.high;
843
1112
    s->coder_state_at_header_end.value     = s->c.code_word >> 16;
844
1112
    s->coder_state_at_header_end.bit_count = -s->c.bits % 8;
845
846
1112
    return 0;
847
}
848
849
static av_always_inline
850
57174
void clamp_mv(VP8mvbounds *s, VP56mv *dst, const VP56mv *src)
851
{
852
57174
    dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
853
                             av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
854
57174
    dst->y = av_clip(src->y, av_clip(s->mv_min.y, INT16_MIN, INT16_MAX),
855
                             av_clip(s->mv_max.y, INT16_MIN, INT16_MAX));
856
57174
}
857
858
/**
859
 * Motion vector coding, 17.1.
860
 */
861
84886
static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
862
{
863
84886
    int bit, x = 0;
864
865
84886
    if (vp56_rac_get_prob_branchy(c, p[0])) {
866
        int i;
867
868
79804
        for (i = 0; i < 3; i++)
869
59853
            x += vp56_rac_get_prob(c, p[9 + i]) << i;
870

139657
        for (i = (vp7 ? 7 : 9); i > 3; i--)
871
119706
            x += vp56_rac_get_prob(c, p[9 + i]) << i;
872

19951
        if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
873
13304
            x += 8;
874
    } else {
875
        // small_mvtree
876
64935
        const uint8_t *ps = p + 2;
877
64935
        bit = vp56_rac_get_prob(c, *ps);
878
64935
        ps += 1 + 3 * bit;
879
64935
        x  += 4 * bit;
880
64935
        bit = vp56_rac_get_prob(c, *ps);
881
64935
        ps += 1 + bit;
882
64935
        x  += 2 * bit;
883
64935
        x  += vp56_rac_get_prob(c, *ps);
884
    }
885
886

84886
    return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
887
}
888
889
static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
890
{
891
    return read_mv_component(c, p, 1);
892
}
893
894
28388
static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
895
{
896
28388
    return read_mv_component(c, p, 0);
897
}
898
899
static av_always_inline
900
136556
const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
901
{
902
136556
    if (is_vp7)
903
        return vp7_submv_prob;
904
905
136556
    if (left == top)
906
58677
        return vp8_submv_prob[4 - !!left];
907
77879
    if (!top)
908
18933
        return vp8_submv_prob[2];
909
58946
    return vp8_submv_prob[1 - !!left];
910
}
911
912
/**
913
 * Split motion vector prediction, 16.4.
914
 * @returns the number of motion vectors parsed (2, 4 or 16)
915
 */
916
static av_always_inline
917
18325
int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
918
                    int layout, int is_vp7)
919
{
920
    int part_idx;
921
    int n, num;
922
    VP8Macroblock *top_mb;
923
18325
    VP8Macroblock *left_mb = &mb[-1];
924
18325
    const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
925
    const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
926
    VP56mv *top_mv;
927
18325
    VP56mv *left_mv = left_mb->bmv;
928
18325
    VP56mv *cur_mv  = mb->bmv;
929
930
18325
    if (!layout) // layout is inlined, s->mb_layout is not
931
18325
        top_mb = &mb[2];
932
    else
933
        top_mb = &mb[-s->mb_width - 1];
934
18325
    mbsplits_top = vp8_mbsplits[top_mb->partitioning];
935
18325
    top_mv       = top_mb->bmv;
936
937
18325
    if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
938
11713
        if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
939
8044
            part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
940
        else
941
3669
            part_idx = VP8_SPLITMVMODE_8x8;
942
    } else {
943
6612
        part_idx = VP8_SPLITMVMODE_4x4;
944
    }
945
946
18325
    num              = vp8_mbsplit_count[part_idx];
947
18325
    mbsplits_cur     = vp8_mbsplits[part_idx],
948
18325
    firstidx         = vp8_mbfirstidx[part_idx];
949
18325
    mb->partitioning = part_idx;
950
951
154881
    for (n = 0; n < num; n++) {
952
136556
        int k = firstidx[n];
953
        uint32_t left, above;
954
        const uint8_t *submv_prob;
955
956
136556
        if (!(k & 3))
957
46739
            left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
958
        else
959
89817
            left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
960
136556
        if (k <= 3)
961
44965
            above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
962
        else
963
91591
            above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
964
965
136556
        submv_prob = get_submv_prob(left, above, is_vp7);
966
967
136556
        if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
968
50605
            if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
969
31737
                if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
970
56498
                    mb->bmv[n].y = mb->mv.y +
971
28249
                                   read_mv_component(c, s->prob->mvc[0], is_vp7);
972
28249
                    mb->bmv[n].x = mb->mv.x +
973
28249
                                   read_mv_component(c, s->prob->mvc[1], is_vp7);
974
                } else {
975
3488
                    AV_ZERO32(&mb->bmv[n]);
976
                }
977
            } else {
978
18868
                AV_WN32A(&mb->bmv[n], above);
979
            }
980
        } else {
981
85951
            AV_WN32A(&mb->bmv[n], left);
982
        }
983
    }
984
985
18325
    return num;
986
}
987
988
/**
989
 * The vp7 reference decoder uses a padding macroblock column (added to right
990
 * edge of the frame) to guard against illegal macroblock offsets. The
991
 * algorithm has bugs that permit offsets to straddle the padding column.
992
 * This function replicates those bugs.
993
 *
994
 * @param[out] edge_x macroblock x address
995
 * @param[out] edge_y macroblock y address
996
 *
997
 * @return macroblock offset legal (boolean)
998
 */
999
76560
static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
1000
                                   int xoffset, int yoffset, int boundary,
1001
                                   int *edge_x, int *edge_y)
1002
{
1003
76560
    int vwidth = mb_width + 1;
1004
76560
    int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
1005

76560
    if (new < boundary || new % vwidth == vwidth - 1)
1006
11861
        return 0;
1007
64699
    *edge_y = new / vwidth;
1008
64699
    *edge_x = new % vwidth;
1009
64699
    return 1;
1010
}
1011
1012
64699
static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
1013
{
1014
64699
    return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
1015
}
1016
1017
static av_always_inline
1018
6380
void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
1019
                    int mb_x, int mb_y, int layout)
1020
{
1021
    VP8Macroblock *mb_edge[12];
1022
    enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
1023
    enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1024
6380
    int idx = CNT_ZERO;
1025
    VP56mv near_mv[3];
1026
6380
    uint8_t cnt[3] = { 0 };
1027
6380
    VP56RangeCoder *c = &s->c;
1028
    int i;
1029
1030
6380
    AV_ZERO32(&near_mv[0]);
1031
6380
    AV_ZERO32(&near_mv[1]);
1032
6380
    AV_ZERO32(&near_mv[2]);
1033
1034
82940
    for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
1035
76560
        const VP7MVPred * pred = &vp7_mv_pred[i];
1036
        int edge_x, edge_y;
1037
1038
76560
        if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
1039
76560
                                    pred->yoffset, !s->profile, &edge_x, &edge_y)) {
1040
129398
            VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
1041
64699
                                             ? s->macroblocks_base + 1 + edge_x +
1042
64699
                                               (s->mb_width + 1) * (edge_y + 1)
1043
64699
                                             : s->macroblocks + edge_x +
1044
                                               (s->mb_height - edge_y - 1) * 2;
1045
64699
            uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
1046
64699
            if (mv) {
1047
                if (AV_RN32A(&near_mv[CNT_NEAREST])) {
1048
                    if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
1049
                        idx = CNT_NEAREST;
1050
                    } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
1051
                        if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
1052
                            continue;
1053
                        idx = CNT_NEAR;
1054
                    } else {
1055
                        AV_WN32A(&near_mv[CNT_NEAR], mv);
1056
                        idx = CNT_NEAR;
1057
                    }
1058
                } else {
1059
                    AV_WN32A(&near_mv[CNT_NEAREST], mv);
1060
                    idx = CNT_NEAREST;
1061
                }
1062
            } else {
1063
64699
                idx = CNT_ZERO;
1064
            }
1065
        } else {
1066
11861
            idx = CNT_ZERO;
1067
        }
1068
76560
        cnt[idx] += vp7_mv_pred[i].score;
1069
    }
1070
1071
6380
    mb->partitioning = VP8_SPLITMVMODE_NONE;
1072
1073
6380
    if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
1074
        mb->mode = VP8_MVMODE_MV;
1075
1076
        if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
1077
1078
            if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
1079
1080
                if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
1081
                    AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
1082
                else
1083
                    AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR]    ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
1084
1085
                if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
1086
                    mb->mode = VP8_MVMODE_SPLIT;
1087
                    mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
1088
                } else {
1089
                    mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
1090
                    mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
1091
                    mb->bmv[0] = mb->mv;
1092
                }
1093
            } else {
1094
                mb->mv = near_mv[CNT_NEAR];
1095
                mb->bmv[0] = mb->mv;
1096
            }
1097
        } else {
1098
            mb->mv = near_mv[CNT_NEAREST];
1099
            mb->bmv[0] = mb->mv;
1100
        }
1101
    } else {
1102
6380
        mb->mode = VP8_MVMODE_ZERO;
1103
6380
        AV_ZERO32(&mb->mv);
1104
6380
        mb->bmv[0] = mb->mv;
1105
    }
1106
6380
}
1107
1108
static av_always_inline
1109
379497
void vp8_decode_mvs(VP8Context *s, VP8mvbounds *mv_bounds, VP8Macroblock *mb,
1110
                    int mb_x, int mb_y, int layout)
1111
{
1112
379497
    VP8Macroblock *mb_edge[3] = { 0      /* top */,
1113
379497
                                  mb - 1 /* left */,
1114
                                  0      /* top-left */ };
1115
    enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1116
    enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1117
379497
    int idx = CNT_ZERO;
1118
379497
    int cur_sign_bias = s->sign_bias[mb->ref_frame];
1119
379497
    int8_t *sign_bias = s->sign_bias;
1120
    VP56mv near_mv[4];
1121
379497
    uint8_t cnt[4] = { 0 };
1122
379497
    VP56RangeCoder *c = &s->c;
1123
1124
379497
    if (!layout) { // layout is inlined (s->mb_layout is not)
1125
379497
        mb_edge[0] = mb + 2;
1126
379497
        mb_edge[2] = mb + 1;
1127
    } else {
1128
        mb_edge[0] = mb - s->mb_width - 1;
1129
        mb_edge[2] = mb - s->mb_width - 2;
1130
    }
1131
1132
379497
    AV_ZERO32(&near_mv[0]);
1133
379497
    AV_ZERO32(&near_mv[1]);
1134
379497
    AV_ZERO32(&near_mv[2]);
1135
1136
    /* Process MB on top, left and top-left */
1137
#define MV_EDGE_CHECK(n)                                                      \
1138
    {                                                                         \
1139
        VP8Macroblock *edge = mb_edge[n];                                     \
1140
        int edge_ref = edge->ref_frame;                                       \
1141
        if (edge_ref != VP56_FRAME_CURRENT) {                                 \
1142
            uint32_t mv = AV_RN32A(&edge->mv);                                \
1143
            if (mv) {                                                         \
1144
                if (cur_sign_bias != sign_bias[edge_ref]) {                   \
1145
                    /* SWAR negate of the values in mv. */                    \
1146
                    mv = ~mv;                                                 \
1147
                    mv = ((mv & 0x7fff7fff) +                                 \
1148
                          0x00010001) ^ (mv & 0x80008000);                    \
1149
                }                                                             \
1150
                if (!n || mv != AV_RN32A(&near_mv[idx]))                      \
1151
                    AV_WN32A(&near_mv[++idx], mv);                            \
1152
                cnt[idx] += 1 + (n != 2);                                     \
1153
            } else                                                            \
1154
                cnt[CNT_ZERO] += 1 + (n != 2);                                \
1155
        }                                                                     \
1156
    }
1157
1158

379497
    MV_EDGE_CHECK(0)
1159


379497
    MV_EDGE_CHECK(1)
1160


379497
    MV_EDGE_CHECK(2)
1161
1162
379497
    mb->partitioning = VP8_SPLITMVMODE_NONE;
1163
379497
    if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1164
57174
        mb->mode = VP8_MVMODE_MV;
1165
1166
        /* If we have three distinct MVs, merge first and last if they're the same */
1167
57174
        if (cnt[CNT_SPLITMV] &&
1168
12759
            AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1169
5465
            cnt[CNT_NEAREST] += 1;
1170
1171
        /* Swap near and nearest if necessary */
1172
57174
        if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1173
4482
            FFSWAP(uint8_t,     cnt[CNT_NEAREST],     cnt[CNT_NEAR]);
1174
4482
            FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1175
        }
1176
1177
57174
        if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1178
37607
            if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1179
                /* Choose the best mv out of 0,0 and the nearest mv */
1180
32519
                clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1181
32519
                cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode    == VP8_MVMODE_SPLIT) +
1182
32519
                                    (mb_edge[VP8_EDGE_TOP]->mode     == VP8_MVMODE_SPLIT)) * 2 +
1183
32519
                                    (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1184
1185
32519
                if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1186
18325
                    mb->mode = VP8_MVMODE_SPLIT;
1187
18325
                    mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1188
                } else {
1189
14194
                    mb->mv.y  += vp8_read_mv_component(c, s->prob->mvc[0]);
1190
14194
                    mb->mv.x  += vp8_read_mv_component(c, s->prob->mvc[1]);
1191
14194
                    mb->bmv[0] = mb->mv;
1192
                }
1193
            } else {
1194
5088
                clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAR]);
1195
5088
                mb->bmv[0] = mb->mv;
1196
            }
1197
        } else {
1198
19567
            clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAREST]);
1199
19567
            mb->bmv[0] = mb->mv;
1200
        }
1201
    } else {
1202
322323
        mb->mode = VP8_MVMODE_ZERO;
1203
322323
        AV_ZERO32(&mb->mv);
1204
322323
        mb->bmv[0] = mb->mv;
1205
    }
1206
379497
}
1207
1208
static av_always_inline
1209
17675
void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1210
                           int mb_x, int keyframe, int layout)
1211
{
1212
17675
    uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1213
1214
17675
    if (layout) {
1215
33
        VP8Macroblock *mb_top = mb - s->mb_width - 1;
1216
33
        memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1217
    }
1218
17675
    if (keyframe) {
1219
        int x, y;
1220
        uint8_t *top;
1221
10822
        uint8_t *const left = s->intra4x4_pred_mode_left;
1222
10822
        if (layout)
1223
33
            top = mb->intra4x4_pred_mode_top;
1224
        else
1225
10789
            top = s->intra4x4_pred_mode_top + 4 * mb_x;
1226
54110
        for (y = 0; y < 4; y++) {
1227
216440
            for (x = 0; x < 4; x++) {
1228
                const uint8_t *ctx;
1229
173152
                ctx       = vp8_pred4x4_prob_intra[top[x]][left[y]];
1230
173152
                *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1231
173152
                left[y]   = top[x] = *intra4x4;
1232
173152
                intra4x4++;
1233
            }
1234
        }
1235
    } else {
1236
        int i;
1237
116501
        for (i = 0; i < 16; i++)
1238
109648
            intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1239
                                           vp8_pred4x4_prob_inter);
1240
    }
1241
17675
}
1242
1243
static av_always_inline
1244
443209
void decode_mb_mode(VP8Context *s, VP8mvbounds *mv_bounds,
1245
                    VP8Macroblock *mb, int mb_x, int mb_y,
1246
                    uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1247
{
1248
443209
    VP56RangeCoder *c = &s->c;
1249
    static const char * const vp7_feature_name[] = { "q-index",
1250
                                                     "lf-delta",
1251
                                                     "partial-golden-update",
1252
                                                     "blit-pitch" };
1253
443209
    if (is_vp7) {
1254
        int i;
1255
6600
        *segment = 0;
1256
33000
        for (i = 0; i < 4; i++) {
1257
26400
            if (s->feature_enabled[i]) {
1258
                if (vp56_rac_get_prob_branchy(c, s->feature_present_prob[i])) {
1259
                      int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1260
                                                   s->feature_index_prob[i]);
1261
                      av_log(s->avctx, AV_LOG_WARNING,
1262
                             "Feature %s present in macroblock (value 0x%x)\n",
1263
                             vp7_feature_name[i], s->feature_value[i][index]);
1264
                }
1265
           }
1266
        }
1267
436609
    } else if (s->segmentation.update_map) {
1268
8289
        int bit  = vp56_rac_get_prob(c, s->prob->segmentid[0]);
1269
8289
        *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1270
428320
    } else if (s->segmentation.enabled)
1271
94425
        *segment = ref ? *ref : *segment;
1272
443209
    mb->segment = *segment;
1273
1274
443209
    mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1275
1276
443209
    if (s->keyframe) {
1277
32872
        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1278
                                    vp8_pred16x16_prob_intra);
1279
1280
32872
        if (mb->mode == MODE_I4x4) {
1281
10822
            decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1282
        } else {
1283
22050
            const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1284
22050
                                           : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1285
22050
            if (s->mb_layout)
1286
187
                AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1287
            else
1288
21863
                AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1289
22050
            AV_WN32A(s->intra4x4_pred_mode_left, modes);
1290
        }
1291
1292
32872
        mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1293
                                                vp8_pred8x8c_prob_intra);
1294
32872
        mb->ref_frame        = VP56_FRAME_CURRENT;
1295
410337
    } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1296
        // inter MB, 16.2
1297
385877
        if (vp56_rac_get_prob_branchy(c, s->prob->last))
1298
34528
            mb->ref_frame =
1299
17248
                (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1300
                                                                   : VP56_FRAME_GOLDEN;
1301
        else
1302
368597
            mb->ref_frame = VP56_FRAME_PREVIOUS;
1303
385877
        s->ref_count[mb->ref_frame - 1]++;
1304
1305
        // motion vectors, 16.3
1306
385877
        if (is_vp7)
1307
6380
            vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1308
        else
1309
379497
            vp8_decode_mvs(s, mv_bounds, mb, mb_x, mb_y, layout);
1310
    } else {
1311
        // intra MB, 16.1
1312
24460
        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1313
1314
24460
        if (mb->mode == MODE_I4x4)
1315
6853
            decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1316
1317
48920
        mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1318
24460
                                                s->prob->pred8x8c);
1319
24460
        mb->ref_frame        = VP56_FRAME_CURRENT;
1320
24460
        mb->partitioning     = VP8_SPLITMVMODE_NONE;
1321
24460
        AV_ZERO32(&mb->bmv[0]);
1322
    }
1323
443209
}
1324
1325
/**
1326
 * @param r     arithmetic bitstream reader context
1327
 * @param block destination for block coefficients
1328
 * @param probs probabilities to use when reading trees from the bitstream
1329
 * @param i     initial coeff index, 0 unless a separate DC block is coded
1330
 * @param qmul  array holding the dc/ac dequant factor at position 0/1
1331
 *
1332
 * @return 0 if no coeffs were decoded
1333
 *         otherwise, the index of the last coeff decoded plus one
1334
 */
1335
static av_always_inline
1336
393067
int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1337
                                 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1338
                                 int i, uint8_t *token_prob, int16_t qmul[2],
1339
                                 const uint8_t scan[16], int vp7)
1340
{
1341
393067
    VP56RangeCoder c = *r;
1342
393067
    goto skip_eob;
1343
    do {
1344
        int coeff;
1345
1101534
restart:
1346
1103653
        if (!vp56_rac_get_prob_branchy(&c, token_prob[0]))   // DCT_EOB
1347
373414
            break;
1348
1349
730239
skip_eob:
1350
1956826
        if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1351
835639
            if (++i == 16)
1352
                break; // invalid input; blocks should end with EOB
1353
835639
            token_prob = probs[i][0];
1354
835639
            if (vp7)
1355
2119
                goto restart;
1356
833520
            goto skip_eob;
1357
        }
1358
1359
1121187
        if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1360
719461
            coeff = 1;
1361
719461
            token_prob = probs[i + 1][1];
1362
        } else {
1363
401726
            if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1364
274407
                coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1365
274407
                if (coeff)
1366
113420
                    coeff += vp56_rac_get_prob(&c, token_prob[5]);
1367
274407
                coeff += 2;
1368
            } else {
1369
                // DCT_CAT*
1370
127319
                if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1371
83429
                    if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1372
45203
                        coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1373
                    } else {                                    // DCT_CAT2
1374
38226
                        coeff  = 7;
1375
38226
                        coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1376
38226
                        coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1377
                    }
1378
                } else {    // DCT_CAT3 and up
1379
43890
                    int a   = vp56_rac_get_prob(&c, token_prob[8]);
1380
43890
                    int b   = vp56_rac_get_prob(&c, token_prob[9 + a]);
1381
43890
                    int cat = (a << 1) + b;
1382
43890
                    coeff  = 3 + (8 << cat);
1383
43890
                    coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1384
                }
1385
            }
1386
401726
            token_prob = probs[i + 1][2];
1387
        }
1388

1121187
        block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1389
1121187
    } while (++i < 16);
1390
1391
393067
    *r = c;
1392
393067
    return i;
1393
}
1394
1395
static av_always_inline
1396
6380
int inter_predict_dc(int16_t block[16], int16_t pred[2])
1397
{
1398
6380
    int16_t dc = block[0];
1399
6380
    int ret = 0;
1400
1401
6380
    if (pred[1] > 3) {
1402
        dc += pred[0];
1403
        ret = 1;
1404
    }
1405
1406
6380
    if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1407
6380
        block[0] = pred[0] = dc;
1408
6380
        pred[1] = 0;
1409
    } else {
1410
        if (pred[0] == dc)
1411
            pred[1]++;
1412
        block[0] = pred[0] = dc;
1413
    }
1414
1415
6380
    return ret;
1416
}
1417
1418
734
static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1419
                                            int16_t block[16],
1420
                                            uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1421
                                            int i, uint8_t *token_prob,
1422
                                            int16_t qmul[2],
1423
                                            const uint8_t scan[16])
1424
{
1425
734
    return decode_block_coeffs_internal(r, block, probs, i,
1426
                                        token_prob, qmul, scan, IS_VP7);
1427
}
1428
1429
#ifndef vp8_decode_block_coeffs_internal
1430
392333
static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1431
                                            int16_t block[16],
1432
                                            uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1433
                                            int i, uint8_t *token_prob,
1434
                                            int16_t qmul[2])
1435
{
1436
392333
    return decode_block_coeffs_internal(r, block, probs, i,
1437
                                        token_prob, qmul, ff_zigzag_scan, IS_VP8);
1438
}
1439
#endif
1440
1441
/**
1442
 * @param c          arithmetic bitstream reader context
1443
 * @param block      destination for block coefficients
1444
 * @param probs      probabilities to use when reading trees from the bitstream
1445
 * @param i          initial coeff index, 0 unless a separate DC block is coded
1446
 * @param zero_nhood the initial prediction context for number of surrounding
1447
 *                   all-zero blocks (only left/top, so 0-2)
1448
 * @param qmul       array holding the dc/ac dequant factor at position 0/1
1449
 * @param scan       scan pattern (VP7 only)
1450
 *
1451
 * @return 0 if no coeffs were decoded
1452
 *         otherwise, the index of the last coeff decoded plus one
1453
 */
1454
static av_always_inline
1455
2050151
int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1456
                        uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1457
                        int i, int zero_nhood, int16_t qmul[2],
1458
                        const uint8_t scan[16], int vp7)
1459
{
1460
2050151
    uint8_t *token_prob = probs[i][zero_nhood];
1461
2050151
    if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
1462
1657084
        return 0;
1463
734
    return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1464
                                                  token_prob, qmul, scan)
1465
393801
               : vp8_decode_block_coeffs_internal(c, block, probs, i,
1466
                                                  token_prob, qmul);
1467
}
1468
1469
static av_always_inline
1470
83297
void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1471
                      VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1472
                      int is_vp7)
1473
{
1474
83297
    int i, x, y, luma_start = 0, luma_ctx = 3;
1475
83297
    int nnz_pred, nnz, nnz_total = 0;
1476
83297
    int segment = mb->segment;
1477
83297
    int block_dc = 0;
1478
1479

83297
    if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1480
51023
        nnz_pred = t_nnz[8] + l_nnz[8];
1481
1482
        // decode DC values and do hadamard
1483
51023
        nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1484
51023
                                  nnz_pred, s->qmat[segment].luma_dc_qmul,
1485
                                  ff_zigzag_scan, is_vp7);
1486
51023
        l_nnz[8] = t_nnz[8] = !!nnz;
1487
1488

51023
        if (is_vp7 && mb->mode > MODE_I4x4) {
1489
6380
            nnz |=  inter_predict_dc(td->block_dc,
1490
6380
                                     s->inter_dc_pred[mb->ref_frame - 1]);
1491
        }
1492
1493
51023
        if (nnz) {
1494
34741
            nnz_total += nnz;
1495
34741
            block_dc   = 1;
1496
34741
            if (nnz == 1)
1497
10303
                s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1498
            else
1499
24438
                s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1500
        }
1501
51023
        luma_start = 1;
1502
51023
        luma_ctx   = 0;
1503
    }
1504
1505
    // luma blocks
1506
416485
    for (y = 0; y < 4; y++)
1507
1665940
        for (x = 0; x < 4; x++) {
1508
1332752
            nnz_pred = l_nnz[y] + t_nnz[x];
1509
1332752
            nnz = decode_block_coeffs(c, td->block[y][x],
1510
1332752
                                      s->prob->token[luma_ctx],
1511
                                      luma_start, nnz_pred,
1512
1332752
                                      s->qmat[segment].luma_qmul,
1513
1332752
                                      s->prob[0].scan, is_vp7);
1514
            /* nnz+block_dc may be one more than the actual last index,
1515
             * but we don't care */
1516
1332752
            td->non_zero_count_cache[y][x] = nnz + block_dc;
1517
1332752
            t_nnz[x] = l_nnz[y] = !!nnz;
1518
1332752
            nnz_total += nnz;
1519
        }
1520
1521
    // chroma blocks
1522
    // TODO: what to do about dimensions? 2nd dim for luma is x,
1523
    // but for chroma it's (y<<1)|x
1524
249891
    for (i = 4; i < 6; i++)
1525
499782
        for (y = 0; y < 2; y++)
1526
999564
            for (x = 0; x < 2; x++) {
1527
666376
                nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1528
666376
                nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1529
666376
                                          s->prob->token[2], 0, nnz_pred,
1530
666376
                                          s->qmat[segment].chroma_qmul,
1531
666376
                                          s->prob[0].scan, is_vp7);
1532
666376
                td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1533
666376
                t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1534
666376
                nnz_total += nnz;
1535
            }
1536
1537
    // if there were no coded coeffs despite the macroblock not being marked skip,
1538
    // we MUST not do the inner loop filter and should not do IDCT
1539
    // Since skip isn't used for bitstream prediction, just manually set it.
1540
83297
    if (!nnz_total)
1541
6457
        mb->skip = 1;
1542
83297
}
1543
1544
static av_always_inline
1545
424229
void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1546
                      uint8_t *src_cb, uint8_t *src_cr,
1547
                      ptrdiff_t linesize, ptrdiff_t uvlinesize, int simple)
1548
{
1549
424229
    AV_COPY128(top_border, src_y + 15 * linesize);
1550
424229
    if (!simple) {
1551
416507
        AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1552
416507
        AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1553
    }
1554
424229
}
1555
1556
static av_always_inline
1557
84130
void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1558
                    uint8_t *src_cr, ptrdiff_t linesize, ptrdiff_t uvlinesize, int mb_x,
1559
                    int mb_y, int mb_width, int simple, int xchg)
1560
{
1561
84130
    uint8_t *top_border_m1 = top_border - 32;     // for TL prediction
1562
84130
    src_y  -= linesize;
1563
84130
    src_cb -= uvlinesize;
1564
84130
    src_cr -= uvlinesize;
1565
1566
#define XCHG(a, b, xchg)                                                      \
1567
    do {                                                                      \
1568
        if (xchg)                                                             \
1569
            AV_SWAP64(b, a);                                                  \
1570
        else                                                                  \
1571
            AV_COPY64(b, a);                                                  \
1572
    } while (0)
1573
1574
84130
    XCHG(top_border_m1 + 8, src_y - 8, xchg);
1575
84130
    XCHG(top_border, src_y, xchg);
1576
84130
    XCHG(top_border + 8, src_y + 8, 1);
1577
84130
    if (mb_x < mb_width - 1)
1578
80706
        XCHG(top_border + 32, src_y + 16, 1);
1579
1580
    // only copy chroma for normal loop filter
1581
    // or to initialize the top row to 127
1582

84130
    if (!simple || !mb_y) {
1583
81048
        XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1584
81048
        XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1585
81048
        XCHG(top_border + 16, src_cb, 1);
1586
81048
        XCHG(top_border + 24, src_cr, 1);
1587
    }
1588
84130
}
1589
1590
static av_always_inline
1591
72888
int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1592
{
1593
72888
    if (!mb_x)
1594
3029
        return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1595
    else
1596
69859
        return mb_y ? mode : LEFT_DC_PRED8x8;
1597
}
1598
1599
static av_always_inline
1600
1809
int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1601
{
1602
1809
    if (!mb_x)
1603

1
        return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1604
    else
1605
1808
        return mb_y ? mode : HOR_PRED8x8;
1606
}
1607
1608
static av_always_inline
1609
96989
int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1610
{
1611

96989
    switch (mode) {
1612
72888
    case DC_PRED8x8:
1613
72888
        return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1614
5757
    case VERT_PRED8x8:
1615

5757
        return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1616
16535
    case HOR_PRED8x8:
1617

16535
        return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1618
1809
    case PLANE_PRED8x8: /* TM */
1619
1809
        return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1620
    }
1621
    return mode;
1622
}
1623
1624
static av_always_inline
1625
49009
int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1626
{
1627
49009
    if (!mb_x) {
1628

1362
        return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1629
    } else {
1630
47647
        return mb_y ? mode : HOR_VP8_PRED;
1631
    }
1632
}
1633
1634
static av_always_inline
1635
282800
int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1636
                                     int *copy_buf, int vp7)
1637
{
1638

282800
    switch (mode) {
1639
13608
    case VERT_PRED:
1640

13608
        if (!mb_x && mb_y) {
1641
95
            *copy_buf = 1;
1642
95
            return mode;
1643
        }
1644
        /* fall-through */
1645
    case DIAG_DOWN_LEFT_PRED:
1646
    case VERT_LEFT_PRED:
1647

34820
        return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1648
19464
    case HOR_PRED:
1649
19464
        if (!mb_y) {
1650
128
            *copy_buf = 1;
1651
128
            return mode;
1652
        }
1653
        /* fall-through */
1654
    case HOR_UP_PRED:
1655

31686
        return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1656
49009
    case TM_VP8_PRED:
1657
49009
        return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1658
167062
    case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1659
                   * as 16x16/8x8 DC */
1660
    case DIAG_DOWN_RIGHT_PRED:
1661
    case VERT_RIGHT_PRED:
1662
    case HOR_DOWN_PRED:
1663

167062
        if (!mb_y || !mb_x)
1664
1140
            *copy_buf = 1;
1665
167062
        return mode;
1666
    }
1667
    return mode;
1668
}
1669
1670
static av_always_inline
1671
57332
void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1672
                   VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1673
{
1674
    int x, y, mode, nnz;
1675
    uint32_t tr;
1676
1677
    /* for the first row, we need to run xchg_mb_border to init the top edge
1678
     * to 127 otherwise, skip it if we aren't going to deblock */
1679


57332
    if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1680
42065
        xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1681
42065
                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1682
42065
                       s->filter.simple, 1);
1683
1684
57332
    if (mb->mode < MODE_I4x4) {
1685
39657
        mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1686
39657
        s->hpc.pred16x16[mode](dst[0], s->linesize);
1687
    } else {
1688
17675
        uint8_t *ptr = dst[0];
1689
17675
        uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1690
17675
        const uint8_t lo = is_vp7 ? 128 : 127;
1691
17675
        const uint8_t hi = is_vp7 ? 128 : 129;
1692
17675
        uint8_t tr_top[4] = { lo, lo, lo, lo };
1693
1694
        // all blocks on the right edge of the macroblock use bottom edge
1695
        // the top macroblock for their topright edge
1696
17675
        uint8_t *tr_right = ptr - s->linesize + 16;
1697
1698
        // if we're on the right edge of the frame, said edge is extended
1699
        // from the top macroblock
1700

17675
        if (mb_y && mb_x == s->mb_width - 1) {
1701
635
            tr       = tr_right[-1] * 0x01010101u;
1702
635
            tr_right = (uint8_t *) &tr;
1703
        }
1704
1705
17675
        if (mb->skip)
1706
460
            AV_ZERO128(td->non_zero_count_cache);
1707
1708
88375
        for (y = 0; y < 4; y++) {
1709
70700
            uint8_t *topright = ptr + 4 - s->linesize;
1710
353500
            for (x = 0; x < 4; x++) {
1711
282800
                int copy = 0;
1712
282800
                ptrdiff_t linesize = s->linesize;
1713
282800
                uint8_t *dst = ptr + 4 * x;
1714
282800
                LOCAL_ALIGNED(4, uint8_t, copy_dst, [5 * 8]);
1715
1716

282800
                if ((y == 0 || x == 3) && mb_y == 0) {
1717
4417
                    topright = tr_top;
1718
278383
                } else if (x == 3)
1719
68176
                    topright = tr_right;
1720
1721
282800
                mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1722
                                                        mb_y + y, &copy, is_vp7);
1723
282800
                if (copy) {
1724
1363
                    dst      = copy_dst + 12;
1725
1363
                    linesize = 8;
1726
1363
                    if (!(mb_y + y)) {
1727
702
                        copy_dst[3] = lo;
1728
702
                        AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1729
                    } else {
1730
661
                        AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1731
661
                        if (!(mb_x + x)) {
1732
661
                            copy_dst[3] = hi;
1733
                        } else {
1734
                            copy_dst[3] = ptr[4 * x - s->linesize - 1];
1735
                        }
1736
                    }
1737
1363
                    if (!(mb_x + x)) {
1738
702
                        copy_dst[11] =
1739
702
                        copy_dst[19] =
1740
702
                        copy_dst[27] =
1741
702
                        copy_dst[35] = hi;
1742
                    } else {
1743
661
                        copy_dst[11] = ptr[4 * x                   - 1];
1744
661
                        copy_dst[19] = ptr[4 * x + s->linesize     - 1];
1745
661
                        copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1746
661
                        copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1747
                    }
1748
                }
1749
282800
                s->hpc.pred4x4[mode](dst, topright, linesize);
1750
282800
                if (copy) {
1751
1363
                    AV_COPY32(ptr + 4 * x,                   copy_dst + 12);
1752
1363
                    AV_COPY32(ptr + 4 * x + s->linesize,     copy_dst + 20);
1753
1363
                    AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1754
1363
                    AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1755
                }
1756
1757
282800
                nnz = td->non_zero_count_cache[y][x];
1758
282800
                if (nnz) {
1759
122340
                    if (nnz == 1)
1760
35131
                        s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1761
35131
                                                  td->block[y][x], s->linesize);
1762
                    else
1763
87209
                        s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1764
87209
                                               td->block[y][x], s->linesize);
1765
                }
1766
282800
                topright += 4;
1767
            }
1768
1769
70700
            ptr      += 4 * s->linesize;
1770
70700
            intra4x4 += 4;
1771
        }
1772
    }
1773
1774
57332
    mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1775
                                            mb_x, mb_y, is_vp7);
1776
57332
    s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1777
57332
    s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1778
1779


57332
    if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1780
42065
        xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1781
42065
                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1782
42065
                       s->filter.simple, 0);
1783
57332
}
1784
1785
static const uint8_t subpel_idx[3][8] = {
1786
    { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1787
                                // also function pointer index
1788
    { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1789
    { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1790
};
1791
1792
/**
1793
 * luma MC function
1794
 *
1795
 * @param s        VP8 decoding context
1796
 * @param dst      target buffer for block data at block position
1797
 * @param ref      reference picture buffer at origin (0, 0)
1798
 * @param mv       motion vector (relative to block position) to get pixel data from
1799
 * @param x_off    horizontal position of block from origin (0, 0)
1800
 * @param y_off    vertical position of block from origin (0, 0)
1801
 * @param block_w  width of block (16, 8 or 4)
1802
 * @param block_h  height of block (always same as block_w)
1803
 * @param width    width of src/dst plane data
1804
 * @param height   height of src/dst plane data
1805
 * @param linesize size of a single line of plane data, including padding
1806
 * @param mc_func  motion compensation function pointers (bilinear or sixtap MC)
1807
 */
1808
static av_always_inline
1809
504108
void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1810
                 ThreadFrame *ref, const VP56mv *mv,
1811
                 int x_off, int y_off, int block_w, int block_h,
1812
                 int width, int height, ptrdiff_t linesize,
1813
                 vp8_mc_func mc_func[3][3])
1814
{
1815
504108
    uint8_t *src = ref->f->data[0];
1816
1817
504108
    if (AV_RN32A(mv)) {
1818
124886
        ptrdiff_t src_linesize = linesize;
1819
1820
124886
        int mx = (mv->x * 2) & 7, mx_idx = subpel_idx[0][mx];
1821
124886
        int my = (mv->y * 2) & 7, my_idx = subpel_idx[0][my];
1822
1823
124886
        x_off += mv->x >> 2;
1824
124886
        y_off += mv->y >> 2;
1825
1826
        // edge emulation
1827
124886
        ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1828
124886
        src += y_off * linesize + x_off;
1829

124886
        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
1830
118693
            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1831
9236
            s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1832
9236
                                     src - my_idx * linesize - mx_idx,
1833
                                     EDGE_EMU_LINESIZE, linesize,
1834
9236
                                     block_w + subpel_idx[1][mx],
1835
9236
                                     block_h + subpel_idx[1][my],
1836
                                     x_off - mx_idx, y_off - my_idx,
1837
                                     width, height);
1838
9236
            src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1839
9236
            src_linesize = EDGE_EMU_LINESIZE;
1840
        }
1841
124886
        mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1842
    } else {
1843
379222
        ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1844
379222
        mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1845
                      linesize, block_h, 0, 0);
1846
    }
1847
504108
}
1848
1849
/**
1850
 * chroma MC function
1851
 *
1852
 * @param s        VP8 decoding context
1853
 * @param dst1     target buffer for block data at block position (U plane)
1854
 * @param dst2     target buffer for block data at block position (V plane)
1855
 * @param ref      reference picture buffer at origin (0, 0)
1856
 * @param mv       motion vector (relative to block position) to get pixel data from
1857
 * @param x_off    horizontal position of block from origin (0, 0)
1858
 * @param y_off    vertical position of block from origin (0, 0)
1859
 * @param block_w  width of block (16, 8 or 4)
1860
 * @param block_h  height of block (always same as block_w)
1861
 * @param width    width of src/dst plane data
1862
 * @param height   height of src/dst plane data
1863
 * @param linesize size of a single line of plane data, including padding
1864
 * @param mc_func  motion compensation function pointers (bilinear or sixtap MC)
1865
 */
1866
static av_always_inline
1867
424764
void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1868
                   uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1869
                   int x_off, int y_off, int block_w, int block_h,
1870
                   int width, int height, ptrdiff_t linesize,
1871
                   vp8_mc_func mc_func[3][3])
1872
{
1873
424764
    uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1874
1875
424764
    if (AV_RN32A(mv)) {
1876
79185
        int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1877
79185
        int my = mv->y & 7, my_idx = subpel_idx[0][my];
1878
1879
79185
        x_off += mv->x >> 3;
1880
79185
        y_off += mv->y >> 3;
1881
1882
        // edge emulation
1883
79185
        src1 += y_off * linesize + x_off;
1884
79185
        src2 += y_off * linesize + x_off;
1885
79185
        ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1886

79185
        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
1887
73045
            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1888
9157
            s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1889
9157
                                     src1 - my_idx * linesize - mx_idx,
1890
                                     EDGE_EMU_LINESIZE, linesize,
1891
9157
                                     block_w + subpel_idx[1][mx],
1892
9157
                                     block_h + subpel_idx[1][my],
1893
                                     x_off - mx_idx, y_off - my_idx, width, height);
1894
9157
            src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1895
9157
            mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1896
1897
9157
            s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1898
9157
                                     src2 - my_idx * linesize - mx_idx,
1899
                                     EDGE_EMU_LINESIZE, linesize,
1900
9157
                                     block_w + subpel_idx[1][mx],
1901
9157
                                     block_h + subpel_idx[1][my],
1902
                                     x_off - mx_idx, y_off - my_idx, width, height);
1903
9157
            src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1904
9157
            mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1905
        } else {
1906
70028
            mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1907
70028
            mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1908
        }
1909
    } else {
1910
345579
        ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1911
345579
        mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1912
345579
        mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1913
    }
1914
424764
}
1915
1916
static av_always_inline
1917
398316
void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1918
                 ThreadFrame *ref_frame, int x_off, int y_off,
1919
                 int bx_off, int by_off, int block_w, int block_h,
1920
                 int width, int height, VP56mv *mv)
1921
{
1922
398316
    VP56mv uvmv = *mv;
1923
1924
    /* Y */
1925
398316
    vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1926
                ref_frame, mv, x_off + bx_off, y_off + by_off,
1927
                block_w, block_h, width, height, s->linesize,
1928
398316
                s->put_pixels_tab[block_w == 8]);
1929
1930
    /* U/V */
1931
398316
    if (s->profile == 3) {
1932
        /* this block only applies VP8; it is safe to check
1933
         * only the profile, as VP7 profile <= 1 */
1934
4775
        uvmv.x &= ~7;
1935
4775
        uvmv.y &= ~7;
1936
    }
1937
398316
    x_off   >>= 1;
1938
398316
    y_off   >>= 1;
1939
398316
    bx_off  >>= 1;
1940
398316
    by_off  >>= 1;
1941
398316
    width   >>= 1;
1942
398316
    height  >>= 1;
1943
398316
    block_w >>= 1;
1944
398316
    block_h >>= 1;
1945
398316
    vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1946
398316
                  dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1947
                  &uvmv, x_off + bx_off, y_off + by_off,
1948
                  block_w, block_h, width, height, s->uvlinesize,
1949
398316
                  s->put_pixels_tab[1 + (block_w == 4)]);
1950
398316
}
1951
1952
/* Fetch pixels for estimated mv 4 macroblocks ahead.
1953
 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1954
static av_always_inline
1955
1329627
void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1956
                     int mb_xy, int ref)
1957
{
1958
    /* Don't prefetch refs that haven't been used very often this frame. */
1959
1329627
    if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1960
478745
        int x_off = mb_x << 4, y_off = mb_y << 4;
1961
478745
        int mx = (mb->mv.x >> 2) + x_off + 8;
1962
478745
        int my = (mb->mv.y >> 2) + y_off;
1963
478745
        uint8_t **src = s->framep[ref]->tf.f->data;
1964
478745
        int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1965
        /* For threading, a ff_thread_await_progress here might be useful, but
1966
         * it actually slows down the decoder. Since a bad prefetch doesn't
1967
         * generate bad decoder output, we don't run it here. */
1968
478745
        s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1969
478745
        off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1970
478745
        s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1971
    }
1972
1329627
}
1973
1974
/**
1975
 * Apply motion vectors to prediction buffer, chapter 18.
1976
 */
1977
static av_always_inline
1978
385877
void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1979
                   VP8Macroblock *mb, int mb_x, int mb_y)
1980
{
1981
385877
    int x_off = mb_x << 4, y_off = mb_y << 4;
1982
385877
    int width = 16 * s->mb_width, height = 16 * s->mb_height;
1983
385877
    ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1984
385877
    VP56mv *bmv = mb->bmv;
1985
1986

385877
    switch (mb->partitioning) {
1987
367552
    case VP8_SPLITMVMODE_NONE:
1988
367552
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1989
                    0, 0, 16, 16, width, height, &mb->mv);
1990
367552
        break;
1991
6612
    case VP8_SPLITMVMODE_4x4: {
1992
        int x, y;
1993
        VP56mv uvmv;
1994
1995
        /* Y */
1996
33060
        for (y = 0; y < 4; y++) {
1997
132240
            for (x = 0; x < 4; x++) {
1998
105792
                vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1999
105792
                            ref, &bmv[4 * y + x],
2000
105792
                            4 * x + x_off, 4 * y + y_off, 4, 4,
2001
                            width, height, s->linesize,
2002
105792
                            s->put_pixels_tab[2]);
2003
            }
2004
        }
2005
2006
        /* U/V */
2007
6612
        x_off  >>= 1;
2008
6612
        y_off  >>= 1;
2009
6612
        width  >>= 1;
2010
6612
        height >>= 1;
2011
19836
        for (y = 0; y < 2; y++) {
2012
39672
            for (x = 0; x < 2; x++) {
2013
26448
                uvmv.x = mb->bmv[2 * y       * 4 + 2 * x    ].x +
2014
26448
                         mb->bmv[2 * y       * 4 + 2 * x + 1].x +
2015
26448
                         mb->bmv[(2 * y + 1) * 4 + 2 * x    ].x +
2016
26448
                         mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
2017
26448
                uvmv.y = mb->bmv[2 * y       * 4 + 2 * x    ].y +
2018
26448
                         mb->bmv[2 * y       * 4 + 2 * x + 1].y +
2019
26448
                         mb->bmv[(2 * y + 1) * 4 + 2 * x    ].y +
2020
26448
                         mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
2021
26448
                uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
2022
26448
                uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
2023
26448
                if (s->profile == 3) {
2024
492
                    uvmv.x &= ~7;
2025
492
                    uvmv.y &= ~7;
2026
                }
2027
26448
                vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
2028
26448
                              dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
2029
26448
                              &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
2030
                              width, height, s->uvlinesize,
2031
26448
                              s->put_pixels_tab[2]);
2032
            }
2033
        }
2034
6612
        break;
2035
    }
2036
4909
    case VP8_SPLITMVMODE_16x8:
2037
4909
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
2038
                    0, 0, 16, 8, width, height, &bmv[0]);
2039
4909
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
2040
                    0, 8, 16, 8, width, height, &bmv[1]);
2041
4909
        break;
2042
3135
    case VP8_SPLITMVMODE_8x16:
2043
3135
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
2044
                    0, 0, 8, 16, width, height, &bmv[0]);
2045
3135
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
2046
                    8, 0, 8, 16, width, height, &bmv[1]);
2047
3135
        break;
2048
3669
    case VP8_SPLITMVMODE_8x8:
2049
3669
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
2050
                    0, 0, 8, 8, width, height, &bmv[0]);
2051
3669
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
2052
                    8, 0, 8, 8, width, height, &bmv[1]);
2053
3669
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
2054
                    0, 8, 8, 8, width, height, &bmv[2]);
2055
3669
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
2056
                    8, 8, 8, 8, width, height, &bmv[3]);
2057
3669
        break;
2058
    }
2059
385877
}
2060
2061
static av_always_inline
2062
76840
void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
2063
{
2064
    int x, y, ch;
2065
2066
76840
    if (mb->mode != MODE_I4x4) {
2067
59625
        uint8_t *y_dst = dst[0];
2068
298125
        for (y = 0; y < 4; y++) {
2069
238500
            uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
2070
238500
            if (nnz4) {
2071
175833
                if (nnz4 & ~0x01010101) {
2072
155935
                    for (x = 0; x < 4; x++) {
2073
155935
                        if ((uint8_t) nnz4 == 1)
2074
54051
                            s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
2075
54051
                                                      td->block[y][x],
2076
                                                      s->linesize);
2077
101884
                        else if ((uint8_t) nnz4 > 1)
2078
74428
                            s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
2079
74428
                                                   td->block[y][x],
2080
                                                   s->linesize);
2081
155935
                        nnz4 >>= 8;
2082
155935
                        if (!nnz4)
2083
44887
                            break;
2084
                    }
2085
                } else {
2086
130946
                    s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
2087
                }
2088
            }
2089
238500
            y_dst += 4 * s->linesize;
2090
        }
2091
    }
2092
2093
230520
    for (ch = 0; ch < 2; ch++) {
2094
153680
        uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
2095
153680
        if (nnz4) {
2096
60025
            uint8_t *ch_dst = dst[1 + ch];
2097
60025
            if (nnz4 & ~0x01010101) {
2098
55530
                for (y = 0; y < 2; y++) {
2099
131913
                    for (x = 0; x < 2; x++) {
2100
106337
                        if ((uint8_t) nnz4 == 1)
2101
16054
                            s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
2102
16054
                                                      td->block[4 + ch][(y << 1) + x],
2103
                                                      s->uvlinesize);
2104
90283
                        else if ((uint8_t) nnz4 > 1)
2105
66922
                            s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2106
66922
                                                   td->block[4 + ch][(y << 1) + x],
2107
                                                   s->uvlinesize);
2108
106337
                        nnz4 >>= 8;
2109
106337
                        if (!nnz4)
2110
29954
                            goto chroma_idct_end;
2111
                    }
2112
25576
                    ch_dst += 4 * s->uvlinesize;
2113
                }
2114
            } else {
2115
30071
                s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2116
            }
2117
        }
2118
153680
chroma_idct_end:
2119
        ;
2120
    }
2121
76840
}
2122
2123
static av_always_inline
2124
424229
void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
2125
                         VP8FilterStrength *f, int is_vp7)
2126
{
2127
    int interior_limit, filter_level;
2128
2129
424229
    if (s->segmentation.enabled) {
2130
102219
        filter_level = s->segmentation.filter_level[mb->segment];
2131
102219
        if (!s->segmentation.absolute_vals)
2132
98895
            filter_level += s->filter.level;
2133
    } else
2134
322010
        filter_level = s->filter.level;
2135
2136
424229
    if (s->lf_delta.enabled) {
2137
416875
        filter_level += s->lf_delta.ref[mb->ref_frame];
2138
416875
        filter_level += s->lf_delta.mode[mb->mode];
2139
    }
2140
2141
424229
    filter_level = av_clip_uintp2(filter_level, 6);
2142
2143
424229
    interior_limit = filter_level;
2144
424229
    if (s->filter.sharpness) {
2145
2772
        interior_limit >>= (s->filter.sharpness + 3) >> 2;
2146
2772
        interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2147
    }
2148
424229
    interior_limit = FFMAX(interior_limit, 1);
2149
2150
424229
    f->filter_level = filter_level;
2151
424229
    f->inner_limit = interior_limit;
2152

777260
    f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2153
353031
                      mb->mode == VP8_MVMODE_SPLIT;
2154
424229
}
2155
2156
static av_always_inline
2157
416507
void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2158
               int mb_x, int mb_y, int is_vp7)
2159
{
2160
    int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2161
416507
    int filter_level = f->filter_level;
2162
416507
    int inner_limit = f->inner_limit;
2163
416507
    int inner_filter = f->inner_filter;
2164
416507
    ptrdiff_t linesize   = s->linesize;
2165
416507
    ptrdiff_t uvlinesize = s->uvlinesize;
2166
    static const uint8_t hev_thresh_lut[2][64] = {
2167
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2168
          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2169
          3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2170
          3, 3, 3, 3 },
2171
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2172
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2173
          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2174
          2, 2, 2, 2 }
2175
    };
2176
2177
416507
    if (!filter_level)
2178
5858
        return;
2179
2180
410649
    if (is_vp7) {
2181
6600
        bedge_lim_y  = filter_level;
2182
6600
        bedge_lim_uv = filter_level * 2;
2183
6600
        mbedge_lim   = filter_level + 2;
2184
    } else {
2185
404049
        bedge_lim_y  =
2186
404049
        bedge_lim_uv = filter_level * 2 + inner_limit;
2187
404049
        mbedge_lim   = bedge_lim_y + 4;
2188
    }
2189
2190
410649
    hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2191
2192
410649
    if (mb_x) {
2193
396544
        s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2194
                                       mbedge_lim, inner_limit, hev_thresh);
2195
396544
        s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2196
                                       mbedge_lim, inner_limit, hev_thresh);
2197
    }
2198
2199
#define H_LOOP_FILTER_16Y_INNER(cond)                                         \
2200
    if (cond && inner_filter) {                                               \
2201
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] +  4, linesize,           \
2202
                                             bedge_lim_y, inner_limit,        \
2203
                                             hev_thresh);                     \
2204
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] +  8, linesize,           \
2205
                                             bedge_lim_y, inner_limit,        \
2206
                                             hev_thresh);                     \
2207
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize,           \
2208
                                             bedge_lim_y, inner_limit,        \
2209
                                             hev_thresh);                     \
2210
        s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] +  4, dst[2] + 4,         \
2211
                                             uvlinesize,  bedge_lim_uv,       \
2212
                                             inner_limit, hev_thresh);        \
2213
    }
2214
2215

410649
    H_LOOP_FILTER_16Y_INNER(!is_vp7)
2216
2217
410649
    if (mb_y) {
2218
389635
        s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2219
                                       mbedge_lim, inner_limit, hev_thresh);
2220
389635
        s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2221
                                       mbedge_lim, inner_limit, hev_thresh);
2222
    }
2223
2224
410649
    if (inner_filter) {
2225
69204
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] +  4 * linesize,
2226
                                             linesize, bedge_lim_y,
2227
                                             inner_limit, hev_thresh);
2228
69204
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] +  8 * linesize,
2229
                                             linesize, bedge_lim_y,
2230
                                             inner_limit, hev_thresh);
2231
69204
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2232
                                             linesize, bedge_lim_y,
2233
                                             inner_limit, hev_thresh);
2234
69204
        s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] +  4 * uvlinesize,
2235
69204
                                             dst[2] +  4 * uvlinesize,
2236
                                             uvlinesize, bedge_lim_uv,
2237
                                             inner_limit, hev_thresh);
2238
    }
2239
2240

410649
    H_LOOP_FILTER_16Y_INNER(is_vp7)
2241
}
2242
2243
static av_always_inline
2244
7722
void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2245
                      int mb_x, int mb_y)
2246
{
2247
    int mbedge_lim, bedge_lim;
2248
7722
    int filter_level = f->filter_level;
2249
7722
    int inner_limit  = f->inner_limit;
2250
7722
    int inner_filter = f->inner_filter;
2251
7722
    ptrdiff_t linesize = s->linesize;
2252
2253
7722
    if (!filter_level)
2254
332
        return;
2255
2256
7390
    bedge_lim  = 2 * filter_level + inner_limit;
2257
7390
    mbedge_lim = bedge_lim + 4;
2258
2259
7390
    if (mb_x)
2260
6713
        s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2261
7390
    if (inner_filter) {
2262
3523
        s->vp8dsp.vp8_h_loop_filter_simple(dst +  4, linesize, bedge_lim);
2263
3523
        s->vp8dsp.vp8_h_loop_filter_simple(dst +  8, linesize, bedge_lim);
2264
3523
        s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2265
    }
2266
2267
7390
    if (mb_y)
2268
6609
        s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2269
7390
    if (inner_filter) {
2270
3523
        s->vp8dsp.vp8_v_loop_filter_simple(dst +  4 * linesize, linesize, bedge_lim);
2271
3523
        s->vp8dsp.vp8_v_loop_filter_simple(dst +  8 * linesize, linesize, bedge_lim);
2272
3523
        s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2273
    }
2274
}
2275
2276
#define MARGIN (16 << 2)
2277
static av_always_inline
2278
30
int vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2279
                                    VP8Frame *prev_frame, int is_vp7)
2280
{
2281
30
    VP8Context *s = avctx->priv_data;
2282
    int mb_x, mb_y;
2283
2284
30
    s->mv_bounds.mv_min.y = -MARGIN;
2285
30
    s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2286
360
    for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2287
330
        VP8Macroblock *mb = s->macroblocks_base +
2288
330
                            ((s->mb_width + 1) * (mb_y + 1) + 1);
2289
330
        int mb_xy = mb_y * s->mb_width;
2290
2291
330
        AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2292
2293
330
        s->mv_bounds.mv_min.x = -MARGIN;
2294
330
        s->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2295
2296
6930
        for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2297
6600
            if (vpX_rac_is_end(&s->c)) {
2298
                return AVERROR_INVALIDDATA;
2299
            }
2300
6600
            if (mb_y == 0)
2301
600
                AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2302
                         DC_PRED * 0x01010101);
2303
12980
            decode_mb_mode(s, &s->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2304
6380
                           prev_frame && prev_frame->seg_map ?
2305
6380
                           prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2306
6600
            s->mv_bounds.mv_min.x -= 64;
2307
6600
            s->mv_bounds.mv_max.x -= 64;
2308
        }
2309
330
        s->mv_bounds.mv_min.y -= 64;
2310
330
        s->mv_bounds.mv_max.y -= 64;
2311
    }
2312
30
    return 0;
2313
}
2314
2315
30
static int vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2316
                                   VP8Frame *prev_frame)
2317
{
2318
30
    return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2319
}
2320
2321
static int vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2322
                                   VP8Frame *prev_frame)
2323
{
2324
    return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2325
}
2326
2327
#if HAVE_THREADS
2328
#define check_thread_pos(td, otd, mb_x_check, mb_y_check)                     \
2329
    do {                                                                      \
2330
        int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);                 \
2331
        if (atomic_load(&otd->thread_mb_pos) < tmp) {                         \
2332
            pthread_mutex_lock(&otd->lock);                                   \
2333
            atomic_store(&td->wait_mb_pos, tmp);                              \
2334
            do {                                                              \
2335
                if (atomic_load(&otd->thread_mb_pos) >= tmp)                  \
2336
                    break;                                                    \
2337
                pthread_cond_wait(&otd->cond, &otd->lock);                    \
2338
            } while (1);                                                      \
2339
            atomic_store(&td->wait_mb_pos, INT_MAX);                          \
2340
            pthread_mutex_unlock(&otd->lock);                                 \
2341
        }                                                                     \
2342
    } while (0)
2343
2344
#define update_pos(td, mb_y, mb_x)                                            \
2345
    do {                                                                      \
2346
        int pos              = (mb_y << 16) | (mb_x & 0xFFFF);                \
2347
        int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2348
                               (num_jobs > 1);                                \
2349
        int is_null          = !next_td || !prev_td;                          \
2350
        int pos_check        = (is_null) ? 1 :                                \
2351
            (next_td != td && pos >= atomic_load(&next_td->wait_mb_pos)) ||   \
2352
            (prev_td != td && pos >= atomic_load(&prev_td->wait_mb_pos));     \
2353
        atomic_store(&td->thread_mb_pos, pos);                                \
2354
        if (sliced_threading && pos_check) {                                  \
2355
            pthread_mutex_lock(&td->lock);                                    \
2356
            pthread_cond_broadcast(&td->cond);                                \
2357
            pthread_mutex_unlock(&td->lock);                                  \
2358
        }                                                                     \
2359
    } while (0)
2360
#else
2361
#define check_thread_pos(td, otd, mb_x_check, mb_y_check) while(0)
2362
#define update_pos(td, mb_y, mb_x) while(0)
2363
#endif
2364
2365
16181
static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2366
                                        int jobnr, int threadnr, int is_vp7)
2367
{
2368
16181
    VP8Context *s = avctx->priv_data;
2369
16181
    VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2370
16181
    int mb_y = atomic_load(&td->thread_mb_pos) >> 16;
2371
16181
    int mb_x, mb_xy = mb_y * s->mb_width;
2372
16181
    int num_jobs = s->num_jobs;
2373
16181
    VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2374
16181
    VP56RangeCoder *c  = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2375
    VP8Macroblock *mb;
2376
16181
    uint8_t *dst[3] = {
2377
16181
        curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2378
16181
        curframe->tf.f->data[1] +  8 * mb_y * s->uvlinesize,
2379
16181
        curframe->tf.f->data[2] +  8 * mb_y * s->uvlinesize
2380
    };
2381
2382
16181
    if (vpX_rac_is_end(c))
2383
         return AVERROR_INVALIDDATA;
2384
2385
16181
    if (mb_y == 0)
2386
1142
        prev_td = td;
2387
    else
2388
15039
        prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2389
16181
    if (mb_y == s->mb_height - 1)
2390
1142
        next_td = td;
2391
    else
2392
15039
        next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2393
16181
    if (s->mb_layout == 1)
2394
330
        mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2395
    else {
2396
        // Make sure the previous frame has read its segmentation map,
2397
        // if we re-use the same map.
2398

15851
        if (prev_frame && s->segmentation.enabled &&
2399
5598
            !s->segmentation.update_map)
2400
5025
            ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2401
15851
        mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2402
15851
        memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2403
15851
        AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2404
    }
2405
2406

16181
    if (!is_vp7 || mb_y == 0)
2407
15881
        memset(td->left_nnz, 0, sizeof(td->left_nnz));
2408
2409
16181
    td->mv_bounds.mv_min.x = -MARGIN;
2410
16181
    td->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2411
2412
459390
    for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2413
443209
        if (vpX_rac_is_end(c))
2414
            return AVERROR_INVALIDDATA;
2415
        // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2416
443209
        if (prev_td != td) {
2417
            if (threadnr != 0) {
2418
                check_thread_pos(td, prev_td,
2419
                                 mb_x + (is_vp7 ? 2 : 1),
2420
                                 mb_y - (is_vp7 ? 2 : 1));
2421
            } else {
2422
                check_thread_pos(td, prev_td,
2423
                                 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2424
                                 mb_y - (is_vp7 ? 2 : 1));
2425
            }
2426
        }
2427
2428
443209
        s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2429
                         s->linesize, 4);
2430
443209
        s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2431
443209
                         dst[2] - dst[1], 2);
2432
2433
443209
        if (!s->mb_layout)
2434
842558
            decode_mb_mode(s, &td->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2435
405949
                           prev_frame && prev_frame->seg_map ?
2436
405949
                           prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2437
2438
443209
        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2439
2440
443209
        if (!mb->skip)
2441
83297
            decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2442
2443
443209
        if (mb->mode <= MODE_I4x4)
2444
57332
            intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2445
        else
2446
385877
            inter_predict(s, td, dst, mb, mb_x, mb_y);
2447
2448
443209
        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2449
2450
443209
        if (!mb->skip) {
2451
76840
            idct_mb(s, td, dst, mb);
2452
        } else {
2453
366369
            AV_ZERO64(td->left_nnz);
2454
366369
            AV_WN64(s->top_nnz[mb_x], 0);   // array of 9, so unaligned
2455
2456
            /* Reset DC block predictors if they would exist
2457
             * if the mb had coefficients */
2458

366369
            if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2459
362642
                td->left_nnz[8]     = 0;
2460
362642
                s->top_nnz[mb_x][8] = 0;
2461
            }
2462
        }
2463
2464
443209
        if (s->deblock_filter)
2465
424229
            filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2466
2467

443209
        if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2468
            if (s->filter.simple)
2469
                backup_mb_border(s->top_border[mb_x + 1], dst[0],
2470
                                 NULL, NULL, s->linesize, 0, 1);
2471
            else
2472
                backup_mb_border(s->top_border[mb_x + 1], dst[0],
2473
                                 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2474
        }
2475
2476
443209
        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2477
2478
443209
        dst[0]      += 16;
2479
443209
        dst[1]      += 8;
2480
443209
        dst[2]      += 8;
2481
443209
        td->mv_bounds.mv_min.x -= 64;
2482
443209
        td->mv_bounds.mv_max.x -= 64;
2483
2484
443209
        if (mb_x == s->mb_width + 1) {
2485
            update_pos(td, mb_y, s->mb_width + 3);
2486
        } else {
2487





443209
            update_pos(td, mb_y, mb_x);
2488
        }
2489
    }
2490
16181
    return 0;
2491
}
2492
2493
330
static int vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2494
                                        int jobnr, int threadnr)
2495
{
2496
330
    return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2497
}
2498
2499
15851
static int vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2500
                                        int jobnr, int threadnr)
2501
{
2502
15851
    return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2503
}
2504
2505
15115
static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2506
                              int jobnr, int threadnr, int is_vp7)
2507
{
2508
15115
    VP8Context *s = avctx->priv_data;
2509
15115
    VP8ThreadData *td = &s->thread_data[threadnr];
2510
15115
    int mb_x, mb_y = atomic_load(&td->thread_mb_pos) >> 16, num_jobs = s->num_jobs;
2511
15115
    AVFrame *curframe = s->curframe->tf.f;
2512
    VP8Macroblock *mb;
2513
    VP8ThreadData *prev_td, *next_td;
2514
15115
    uint8_t *dst[3] = {
2515
15115
        curframe->data[0] + 16 * mb_y * s->linesize,
2516
15115
        curframe->data[1] +  8 * mb_y * s->uvlinesize,
2517
15115
        curframe->data[2] +  8 * mb_y * s->uvlinesize
2518
    };
2519
2520
15115
    if (s->mb_layout == 1)
2521
330
        mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2522
    else
2523
14785
        mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2524
2525
15115
    if (mb_y == 0)
2526
1034
        prev_td = td;
2527
    else
2528
14081
        prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2529
15115
    if (mb_y == s->mb_height - 1)
2530
1034
        next_td = td;
2531
    else
2532
14081
        next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2533
2534
439344
    for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2535
424229
        VP8FilterStrength *f = &td->filter_strength[mb_x];
2536
424229
        if (prev_td != td)
2537
            check_thread_pos(td, prev_td,
2538
                             (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2539
424229
        if (next_td != td)
2540
            if (next_td != &s->thread_data[0])
2541
                check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2542
2543
424229
        if (num_jobs == 1) {
2544
424229
            if (s->filter.simple)
2545
7722
                backup_mb_border(s->top_border[mb_x + 1], dst[0],
2546
                                 NULL, NULL, s->linesize, 0, 1);
2547
            else
2548
416507
                backup_mb_border(s->top_border[mb_x + 1], dst[0],
2549
                                 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2550
        }
2551
2552
424229
        if (s->filter.simple)
2553
7722
            filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2554
        else
2555
416507
            filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2556
424229
        dst[0] += 16;
2557
424229
        dst[1] += 8;
2558
424229
        dst[2] += 8;
2559
2560





424229
        update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2561
    }
2562
15115
}
2563
2564
330
static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2565
                              int jobnr, int threadnr)
2566
{
2567
330
    filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2568
330
}
2569
2570
14785
static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2571
                              int jobnr, int threadnr)
2572
{
2573
14785
    filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2574
14785
}
2575
2576
static av_always_inline
2577
1142
int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2578
                              int threadnr, int is_vp7)
2579
{
2580
1142
    VP8Context *s = avctx->priv_data;
2581
1142
    VP8ThreadData *td = &s->thread_data[jobnr];
2582
1142
    VP8ThreadData *next_td = NULL, *prev_td = NULL;
2583
1142
    VP8Frame *curframe = s->curframe;
2584
1142
    int mb_y, num_jobs = s->num_jobs;
2585
    int ret;
2586
2587
1142
    td->thread_nr = threadnr;
2588
1142
    td->mv_bounds.mv_min.y   = -MARGIN - 64 * threadnr;
2589
1142
    td->mv_bounds.mv_max.y   = ((s->mb_height - 1) << 6) + MARGIN - 64 * threadnr;
2590
17323
    for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2591
16181
        atomic_store(&td->thread_mb_pos, mb_y << 16);
2592
16181
        ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2593
16181
        if (ret < 0) {
2594
            update_pos(td, s->mb_height, INT_MAX & 0xFFFF);
2595
            return ret;
2596
        }
2597
16181
        if (s->deblock_filter)
2598
15115
            s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2599





16181
        update_pos(td, mb_y, INT_MAX & 0xFFFF);
2600
2601
16181
        td->mv_bounds.mv_min.y -= 64 * num_jobs;
2602
16181
        td->mv_bounds.mv_max.y -= 64 * num_jobs;
2603
2604
16181
        if (avctx->active_thread_type == FF_THREAD_FRAME)
2605
            ff_thread_report_progress(&curframe->tf, mb_y, 0);
2606
    }
2607
2608
1142
    return 0;
2609
}
2610
2611
30
static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2612
                                    int jobnr, int threadnr)
2613
{
2614
30
    return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2615
}
2616
2617
1112
static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2618
                                    int jobnr, int threadnr)
2619
{
2620
1112
    return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2621
}
2622
2623
static av_always_inline
2624
1142
int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2625
                      const AVPacket *avpkt, int is_vp7)
2626
{
2627
1142
    VP8Context *s = avctx->priv_data;
2628
    int ret, i, referenced, num_jobs;
2629
    enum AVDiscard skip_thresh;
2630
1142
    VP8Frame *av_uninit(curframe), *prev_frame;
2631
2632
1142
    if (is_vp7)
2633
30
        ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2634
    else
2635
1112
        ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2636
2637
1142
    if (ret < 0)
2638
        goto err;
2639
2640
1142
    if (s->actually_webp) {
2641
        // avctx->pix_fmt already set in caller.
2642

1136
    } else if (!is_vp7 && s->pix_fmt == AV_PIX_FMT_NONE) {
2643
        s->pix_fmt = get_pixel_format(s);
2644
        if (s->pix_fmt < 0) {
2645
            ret = AVERROR(EINVAL);
2646
            goto err;
2647
        }
2648
        avctx->pix_fmt = s->pix_fmt;
2649
    }
2650
2651
1142
    prev_frame = s->framep[VP56_FRAME_CURRENT];
2652
2653

1153
    referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2654
11
                 s->update_altref == VP56_FRAME_CURRENT;
2655
2656
1142
    skip_thresh = !referenced ? AVDISCARD_NONREF
2657

1142
                              : !s->keyframe ? AVDISCARD_NONKEY
2658
                                             : AVDISCARD_ALL;
2659
2660
1142
    if (avctx->skip_frame >= skip_thresh) {
2661
        s->invisible = 1;
2662
        memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2663
        goto skip_decode;
2664
    }
2665

1142
    s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2666
2667
    // release no longer referenced frames
2668
6852
    for (i = 0; i < 5; i++)
2669
5710
        if (s->frames[i].tf.f->buf[0] &&
2670
3826
            &s->frames[i] != prev_frame &&
2671
2721
            &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2672
2710
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN]   &&
2673
1741
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2674
1033
            vp8_release_frame(s, &s->frames[i]);
2675
2676
1142
    curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2677
2678
1142
    if (!s->colorspace)
2679
1142
        avctx->colorspace = AVCOL_SPC_BT470BG;
2680
1142
    if (s->fullrange)
2681
        avctx->color_range = AVCOL_RANGE_JPEG;
2682
    else
2683
1142
        avctx->color_range = AVCOL_RANGE_MPEG;
2684
2685
    /* Given that arithmetic probabilities are updated every frame, it's quite
2686
     * likely that the values we have on a random interframe are complete
2687
     * junk if we didn't start decode on a keyframe. So just don't display
2688
     * anything rather than junk. */
2689

1142
    if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2690
1093
                         !s->framep[VP56_FRAME_GOLDEN]   ||
2691
1093
                         !s->framep[VP56_FRAME_GOLDEN2])) {
2692
        av_log(avctx, AV_LOG_WARNING,
2693
               "Discarding interframe without a prior keyframe!\n");
2694
        ret = AVERROR_INVALIDDATA;
2695
        goto err;
2696
    }
2697
2698
1142
    curframe->tf.f->key_frame = s->keyframe;
2699
2284
    curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2700
1142
                                            : AV_PICTURE_TYPE_P;
2701
1142
    if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2702
        goto err;
2703
2704
    // check if golden and altref are swapped
2705
1142
    if (s->update_altref != VP56_FRAME_NONE)
2706
196
        s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2707
    else
2708
946
        s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2709
2710
1142
    if (s->update_golden != VP56_FRAME_NONE)
2711
139
        s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2712
    else
2713
1003
        s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2714
2715
1142
    if (s->update_last)
2716
1131
        s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2717
    else
2718
11
        s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2719
2720
1142
    s->next_framep[VP56_FRAME_CURRENT] = curframe;
2721
2722
1142
    if (avctx->codec->update_thread_context)
2723
1106
        ff_thread_finish_setup(avctx);
2724
2725
1142
    if (avctx->hwaccel) {
2726
        ret = avctx->hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
2727
        if (ret < 0)
2728
            goto err;
2729
2730
        ret = avctx->hwaccel->decode_slice(avctx, avpkt->data, avpkt->size);
2731
        if (ret < 0)
2732
            goto err;
2733
2734
        ret = avctx->hwaccel->end_frame(avctx);
2735
        if (ret < 0)
2736
            goto err;
2737
2738
    } else {
2739
1142
        s->linesize   = curframe->tf.f->linesize[0];
2740
1142
        s->uvlinesize = curframe->tf.f->linesize[1];
2741
2742
1142
        memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2743
        /* Zero macroblock structures for top/top-left prediction
2744
         * from outside the frame. */
2745
1142
        if (!s->mb_layout)
2746
1112
            memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2747
1112
                   (s->mb_width + 1) * sizeof(*s->macroblocks));
2748

1142
        if (!s->mb_layout && s->keyframe)
2749
48
            memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2750
2751
1142
        memset(s->ref_count, 0, sizeof(s->ref_count));
2752
2753
1142
        if (s->mb_layout == 1) {
2754
            // Make sure the previous frame has read its segmentation map,
2755
            // if we re-use the same map.
2756

30
            if (prev_frame && s->segmentation.enabled &&
2757
                !s->segmentation.update_map)
2758
                ff_thread_await_progress(&prev_frame->tf, 1, 0);
2759
30
            if (is_vp7)
2760
30
                ret = vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2761
            else
2762
                ret = vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2763
30
            if (ret < 0)
2764
                goto err;
2765
        }
2766
2767
1142
        if (avctx->active_thread_type == FF_THREAD_FRAME)
2768
            num_jobs = 1;
2769
        else
2770
1142
            num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2771
1142
        s->num_jobs   = num_jobs;
2772
1142
        s->curframe   = curframe;
2773
1142
        s->prev_frame = prev_frame;
2774
1142
        s->mv_bounds.mv_min.y   = -MARGIN;
2775
1142
        s->mv_bounds.mv_max.y   = ((s->mb_height - 1) << 6) + MARGIN;
2776
10278
        for (i = 0; i < MAX_THREADS; i++) {
2777
9136
            VP8ThreadData *td = &s->thread_data[i];
2778
9136
            atomic_init(&td->thread_mb_pos, 0);
2779
9136
            atomic_init(&td->wait_mb_pos, INT_MAX);
2780
        }
2781
1142
        if (is_vp7)
2782
30
            avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2783
                            num_jobs);
2784
        else
2785
1112
            avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2786
                            num_jobs);
2787
    }
2788
2789
1142
    ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2790
1142
    memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2791
2792
1142
skip_decode:
2793
    // if future frames don't use the updated probabilities,
2794
    // reset them to the values we saved
2795
1142
    if (!s->update_probabilities)
2796
64
        s->prob[0] = s->prob[1];
2797
2798
1142
    if (!s->invisible) {
2799
1133
        if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2800
            return ret;
2801
1133
        *got_frame = 1;
2802
    }
2803
2804
1142
    return avpkt->size;
2805
err:
2806
    memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2807
    return ret;
2808
}
2809
2810
1112
int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2811
                        AVPacket *avpkt)
2812
{
2813
1112
    return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2814
}
2815
2816
#if CONFIG_VP7_DECODER
2817
30
static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2818
                            AVPacket *avpkt)
2819
{
2820
30
    return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2821
}
2822
#endif /* CONFIG_VP7_DECODER */
2823
2824
64
av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2825
{
2826
64
    VP8Context *s = avctx->priv_data;
2827
    int i;
2828
2829
64
    if (!s)
2830
        return 0;
2831
2832
64
    vp8_decode_flush_impl(avctx, 1);
2833
384
    for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2834
320
        av_frame_free(&s->frames[i].tf.f);
2835
2836
64
    return 0;
2837
}
2838
2839
64
static av_cold int vp8_init_frames(VP8Context *s)
2840
{
2841
    int i;
2842
384
    for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2843
320
        s->frames[i].tf.f = av_frame_alloc();
2844
320
        if (!s->frames[i].tf.f)
2845
            return AVERROR(ENOMEM);
2846
    }
2847
64
    return 0;
2848
}
2849
2850
static av_always_inline
2851
64
int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2852
{
2853
64
    VP8Context *s = avctx->priv_data;
2854
    int ret;
2855
2856
64
    s->avctx = avctx;
2857
64
    s->vp7   = avctx->codec->id == AV_CODEC_ID_VP7;
2858
64
    s->pix_fmt = AV_PIX_FMT_NONE;
2859
64
    avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2860
2861
64
    ff_videodsp_init(&s->vdsp, 8);
2862
2863
64
    ff_vp78dsp_init(&s->vp8dsp);
2864
64
    if (CONFIG_VP7_DECODER && is_vp7) {
2865
3
        ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2866
3
        ff_vp7dsp_init(&s->vp8dsp);
2867
3
        s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter;
2868
3
        s->filter_mb_row           = vp7_filter_mb_row;
2869
61
    } else if (CONFIG_VP8_DECODER && !is_vp7) {
2870
61
        ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2871
61
        ff_vp8dsp_init(&s->vp8dsp);
2872
61
        s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter;
2873
61
        s->filter_mb_row           = vp8_filter_mb_row;
2874
    }
2875
2876
    /* does not change for VP8 */
2877
64
    memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
2878
2879
64
    if ((ret = vp8_init_frames(s)) < 0) {
2880
        ff_vp8_decode_free(avctx);
2881
        return ret;
2882
    }
2883
2884
64
    return 0;
2885
}
2886
2887
#if CONFIG_VP7_DECODER
2888
3
static int vp7_decode_init(AVCodecContext *avctx)
2889
{
2890
3
    return vp78_decode_init(avctx, IS_VP7);
2891
}
2892
#endif /* CONFIG_VP7_DECODER */
2893
2894
61
av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2895
{
2896
61
    return vp78_decode_init(avctx, IS_VP8);
2897
}
2898
2899
#if CONFIG_VP8_DECODER
2900
#if HAVE_THREADS
2901
#define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
2902
2903
static int vp8_decode_update_thread_context(AVCodecContext *dst,
2904
                                            const AVCodecContext *src)
2905
{
2906
    VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2907
    int i;
2908
2909
    if (s->macroblocks_base &&
2910
        (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2911
        free_buffers(s);
2912
        s->mb_width  = s_src->mb_width;
2913
        s->mb_height = s_src->mb_height;
2914
    }
2915
2916
    s->pix_fmt      = s_src->pix_fmt;
2917
    s->prob[0]      = s_src->prob[!s_src->update_probabilities];
2918
    s->segmentation = s_src->segmentation;
2919
    s->lf_delta     = s_src->lf_delta;
2920
    memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2921
2922
    for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2923
        if (s_src->frames[i].tf.f->buf[0]) {
2924
            int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2925
            if (ret < 0)
2926
                return ret;
2927
        }
2928
    }
2929
2930
    s->framep[0] = REBASE(s_src->next_framep[0]);
2931
    s->framep[1] = REBASE(s_src->next_framep[1]);
2932
    s->framep[2] = REBASE(s_src->next_framep[2]);
2933
    s->framep[3] = REBASE(s_src->next_framep[3]);
2934
2935
    return 0;
2936
}
2937
#endif /* HAVE_THREADS */
2938
#endif /* CONFIG_VP8_DECODER */
2939
2940
#if CONFIG_VP7_DECODER
2941
AVCodec ff_vp7_decoder = {
2942
    .name                  = "vp7",
2943
    .long_name             = NULL_IF_CONFIG_SMALL("On2 VP7"),
2944
    .type                  = AVMEDIA_TYPE_VIDEO,
2945
    .id                    = AV_CODEC_ID_VP7,
2946
    .priv_data_size        = sizeof(VP8Context),
2947
    .init                  = vp7_decode_init,
2948
    .close                 = ff_vp8_decode_free,
2949
    .decode                = vp7_decode_frame,
2950
    .capabilities          = AV_CODEC_CAP_DR1,
2951
    .flush                 = vp8_decode_flush,
2952
};
2953
#endif /* CONFIG_VP7_DECODER */
2954
2955
#if CONFIG_VP8_DECODER
2956
AVCodec ff_vp8_decoder = {
2957
    .name                  = "vp8",
2958
    .long_name             = NULL_IF_CONFIG_SMALL("On2 VP8"),
2959
    .type                  = AVMEDIA_TYPE_VIDEO,
2960
    .id                    = AV_CODEC_ID_VP8,
2961
    .priv_data_size        = sizeof(VP8Context),
2962
    .init                  = ff_vp8_decode_init,
2963
    .close                 = ff_vp8_decode_free,
2964
    .decode                = ff_vp8_decode_frame,
2965
    .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
2966
                             AV_CODEC_CAP_SLICE_THREADS,
2967
    .flush                 = vp8_decode_flush,
2968
    .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2969
    .hw_configs            = (const AVCodecHWConfigInternal *const []) {
2970
#if CONFIG_VP8_VAAPI_HWACCEL
2971
                               HWACCEL_VAAPI(vp8),
2972
#endif
2973
#if CONFIG_VP8_NVDEC_HWACCEL
2974
                               HWACCEL_NVDEC(vp8),
2975
#endif
2976
                               NULL
2977
                           },
2978
    .caps_internal         = FF_CODEC_CAP_ALLOCATE_PROGRESS,
2979
};
2980
#endif /* CONFIG_VP7_DECODER */