GCC Code Coverage Report
Directory: ../../../ffmpeg/ Exec Total Coverage
File: src/libavcodec/vp8.c Lines: 1352 1587 85.2 %
Date: 2020-10-23 17:01:47 Branches: 808 1165 69.4 %

Line Branch Exec Source
1
/*
2
 * VP7/VP8 compatible video decoder
3
 *
4
 * Copyright (C) 2010 David Conrad
5
 * Copyright (C) 2010 Ronald S. Bultje
6
 * Copyright (C) 2010 Fiona Glaser
7
 * Copyright (C) 2012 Daniel Kang
8
 * Copyright (C) 2014 Peter Ross
9
 *
10
 * This file is part of FFmpeg.
11
 *
12
 * FFmpeg is free software; you can redistribute it and/or
13
 * modify it under the terms of the GNU Lesser General Public
14
 * License as published by the Free Software Foundation; either
15
 * version 2.1 of the License, or (at your option) any later version.
16
 *
17
 * FFmpeg is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20
 * Lesser General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU Lesser General Public
23
 * License along with FFmpeg; if not, write to the Free Software
24
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25
 */
26
27
#include "libavutil/imgutils.h"
28
29
#include "avcodec.h"
30
#include "hwconfig.h"
31
#include "internal.h"
32
#include "mathops.h"
33
#include "rectangle.h"
34
#include "thread.h"
35
#include "vp8.h"
36
#include "vp8data.h"
37
38
#if ARCH_ARM
39
#   include "arm/vp8.h"
40
#endif
41
42
#if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
43
#define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
44
#elif CONFIG_VP7_DECODER
45
#define VPX(vp7, f) vp7_ ## f
46
#else // CONFIG_VP8_DECODER
47
#define VPX(vp7, f) vp8_ ## f
48
#endif
49
50
73
static void free_buffers(VP8Context *s)
51
{
52
    int i;
53
73
    if (s->thread_data)
54
333
        for (i = 0; i < MAX_THREADS; i++) {
55
#if HAVE_THREADS
56
296
            pthread_cond_destroy(&s->thread_data[i].cond);
57
296
            pthread_mutex_destroy(&s->thread_data[i].lock);
58
#endif
59
296
            av_freep(&s->thread_data[i].filter_strength);
60
        }
61
73
    av_freep(&s->thread_data);
62
73
    av_freep(&s->macroblocks_base);
63
73
    av_freep(&s->intra4x4_pred_mode_top);
64
73
    av_freep(&s->top_nnz);
65
73
    av_freep(&s->top_border);
66
67
73
    s->macroblocks = NULL;
68
73
}
69
70
1142
static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
71
{
72
    int ret;
73
1142
    if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
74
                                    ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
75
        return ret;
76
1142
    if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height)))
77
        goto fail;
78
1142
    if (s->avctx->hwaccel) {
79
        const AVHWAccel *hwaccel = s->avctx->hwaccel;
80
        if (hwaccel->frame_priv_data_size) {
81
            f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
82
            if (!f->hwaccel_priv_buf)
83
                goto fail;
84
            f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
85
        }
86
    }
87
1142
    return 0;
88
89
fail:
90
    av_buffer_unref(&f->seg_map);
91
    ff_thread_release_buffer(s->avctx, &f->tf);
92
    return AVERROR(ENOMEM);
93
}
94
95
1398
static void vp8_release_frame(VP8Context *s, VP8Frame *f)
96
{
97
1398
    av_buffer_unref(&f->seg_map);
98
1398
    av_buffer_unref(&f->hwaccel_priv_buf);
99
1398
    f->hwaccel_picture_private = NULL;
100
1398
    ff_thread_release_buffer(s->avctx, &f->tf);
101
1398
}
102
103
#if CONFIG_VP8_DECODER
104
static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
105
{
106
    int ret;
107
108
    vp8_release_frame(s, dst);
109
110
    if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
111
        return ret;
112
    if (src->seg_map &&
113
        !(dst->seg_map = av_buffer_ref(src->seg_map))) {
114
        vp8_release_frame(s, dst);
115
        return AVERROR(ENOMEM);
116
    }
117
    if (src->hwaccel_picture_private) {
118
        dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
119
        if (!dst->hwaccel_priv_buf)
120
            return AVERROR(ENOMEM);
121
        dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
122
    }
123
124
    return 0;
125
}
126
#endif /* CONFIG_VP8_DECODER */
127
128
73
static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
129
{
130
73
    VP8Context *s = avctx->priv_data;
131
    int i;
132
133
438
    for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
134
365
        vp8_release_frame(s, &s->frames[i]);
135
73
    memset(s->framep, 0, sizeof(s->framep));
136
137
73
    if (free_mem)
138
73
        free_buffers(s);
139
73
}
140
141
static void vp8_decode_flush(AVCodecContext *avctx)
142
{
143
    vp8_decode_flush_impl(avctx, 0);
144
}
145
146
1142
static VP8Frame *vp8_find_free_buffer(VP8Context *s)
147
{
148
1142
    VP8Frame *frame = NULL;
149
    int i;
150
151
    // find a free buffer
152
2793
    for (i = 0; i < 5; i++)
153
2793
        if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT]  &&
154
2203
            &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
155
2202
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN]   &&
156
1600
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
157
1142
            frame = &s->frames[i];
158
1142
            break;
159
        }
160
1142
    if (i == 5) {
161
        av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
162
        abort();
163
    }
164
1142
    if (frame->tf.f->buf[0])
165
        vp8_release_frame(s, frame);
166
167
1142
    return frame;
168
}
169
170
21
static enum AVPixelFormat get_pixel_format(VP8Context *s)
171
{
172
21
    enum AVPixelFormat pix_fmts[] = {
173
#if CONFIG_VP8_VAAPI_HWACCEL
174
        AV_PIX_FMT_VAAPI,
175
#endif
176
#if CONFIG_VP8_NVDEC_HWACCEL
177
        AV_PIX_FMT_CUDA,
178
#endif
179
        AV_PIX_FMT_YUV420P,
180
        AV_PIX_FMT_NONE,
181
    };
182
183
21
    return ff_get_format(s->avctx, pix_fmts);
184
}
185
186
static av_always_inline
187
37
int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
188
{
189
37
    AVCodecContext *avctx = s->avctx;
190
37
    int i, ret, dim_reset = 0;
191
192


37
    if (width  != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
193
24
        height != s->avctx->height) {
194
13
        vp8_decode_flush_impl(s->avctx, 1);
195
196
13
        ret = ff_set_dimensions(s->avctx, width, height);
197
13
        if (ret < 0)
198
            return ret;
199
200
13
        dim_reset = (s->macroblocks_base != NULL);
201
    }
202
203

37
    if ((s->pix_fmt == AV_PIX_FMT_NONE || dim_reset) &&
204

28
         !s->actually_webp && !is_vp7) {
205
21
        s->pix_fmt = get_pixel_format(s);
206
21
        if (s->pix_fmt < 0)
207
            return AVERROR(EINVAL);
208
21
        avctx->pix_fmt = s->pix_fmt;
209
    }
210
211
37
    s->mb_width  = (s->avctx->coded_width  + 15) / 16;
212
37
    s->mb_height = (s->avctx->coded_height + 15) / 16;
213
214

37
    s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
215
                   avctx->thread_count > 1;
216
37
    if (!s->mb_layout) { // Frame threading and one thread
217
36
        s->macroblocks_base       = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
218
                                               sizeof(*s->macroblocks));
219
36
        s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
220
    } else // Sliced threading
221
1
        s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
222
                                         sizeof(*s->macroblocks));
223
37
    s->top_nnz     = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
224
37
    s->top_border  = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
225
37
    s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
226
227

37
    if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
228

37
        !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
229
        free_buffers(s);
230
        return AVERROR(ENOMEM);
231
    }
232
233
333
    for (i = 0; i < MAX_THREADS; i++) {
234
592
        s->thread_data[i].filter_strength =
235
296
            av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
236
296
        if (!s->thread_data[i].filter_strength) {
237
            free_buffers(s);
238
            return AVERROR(ENOMEM);
239
        }
240
#if HAVE_THREADS
241
296
        pthread_mutex_init(&s->thread_data[i].lock, NULL);
242
296
        pthread_cond_init(&s->thread_data[i].cond, NULL);
243
#endif
244
    }
245
246
37
    s->macroblocks = s->macroblocks_base + 1;
247
248
37
    return 0;
249
}
250
251
1
static int vp7_update_dimensions(VP8Context *s, int width, int height)
252
{
253
1
    return update_dimensions(s, width, height, IS_VP7);
254
}
255
256
36
static int vp8_update_dimensions(VP8Context *s, int width, int height)
257
{
258
36
    return update_dimensions(s, width, height, IS_VP8);
259
}
260
261
262
433
static void parse_segment_info(VP8Context *s)
263
{
264
433
    VP56RangeCoder *c = &s->c;
265
    int i;
266
267
433
    s->segmentation.update_map = vp8_rac_get(c);
268
433
    s->segmentation.update_feature_data = vp8_rac_get(c);
269
270
433
    if (s->segmentation.update_feature_data) {
271
68
        s->segmentation.absolute_vals = vp8_rac_get(c);
272
273
340
        for (i = 0; i < 4; i++)
274
272
            s->segmentation.base_quant[i]   = vp8_rac_get_sint(c, 7);
275
276
340
        for (i = 0; i < 4; i++)
277
272
            s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
278
    }
279
433
    if (s->segmentation.update_map)
280
272
        for (i = 0; i < 3; i++)
281
204
            s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
282
433
}
283
284
42
static void update_lf_deltas(VP8Context *s)
285
{
286
42
    VP56RangeCoder *c = &s->c;
287
    int i;
288
289
210
    for (i = 0; i < 4; i++) {
290
168
        if (vp8_rac_get(c)) {
291
126
            s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
292
293
126
            if (vp8_rac_get(c))
294
84
                s->lf_delta.ref[i] = -s->lf_delta.ref[i];
295
        }
296
    }
297
298
210
    for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
299
168
        if (vp8_rac_get(c)) {
300
168
            s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
301
302
168
            if (vp8_rac_get(c))
303
42
                s->lf_delta.mode[i] = -s->lf_delta.mode[i];
304
        }
305
    }
306
42
}
307
308
1112
static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
309
{
310
1112
    const uint8_t *sizes = buf;
311
    int i;
312
    int ret;
313
314
1112
    s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
315
316
1112
    buf      += 3 * (s->num_coeff_partitions - 1);
317
1112
    buf_size -= 3 * (s->num_coeff_partitions - 1);
318
1112
    if (buf_size < 0)
319
        return -1;
320
321
1451
    for (i = 0; i < s->num_coeff_partitions - 1; i++) {
322
339
        int size = AV_RL24(sizes + 3 * i);
323
339
        if (buf_size - size < 0)
324
            return -1;
325
339
        s->coeff_partition_size[i] = size;
326
327
339
        ret = ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
328
339
        if (ret < 0)
329
            return ret;
330
339
        buf      += size;
331
339
        buf_size -= size;
332
    }
333
334
1112
    s->coeff_partition_size[i] = buf_size;
335
1112
    ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
336
337
1112
    return 0;
338
}
339
340
30
static void vp7_get_quants(VP8Context *s)
341
{
342
30
    VP56RangeCoder *c = &s->c;
343
344
30
    int yac_qi  = vp8_rac_get_uint(c, 7);
345
30
    int ydc_qi  = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
346
30
    int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
347
30
    int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
348
30
    int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
349
30
    int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
350
351
30
    s->qmat[0].luma_qmul[0]    =       vp7_ydc_qlookup[ydc_qi];
352
30
    s->qmat[0].luma_qmul[1]    =       vp7_yac_qlookup[yac_qi];
353
30
    s->qmat[0].luma_dc_qmul[0] =       vp7_y2dc_qlookup[y2dc_qi];
354
30
    s->qmat[0].luma_dc_qmul[1] =       vp7_y2ac_qlookup[y2ac_qi];
355
30
    s->qmat[0].chroma_qmul[0]  = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
356
30
    s->qmat[0].chroma_qmul[1]  =       vp7_yac_qlookup[uvac_qi];
357
30
}
358
359
1112
static void vp8_get_quants(VP8Context *s)
360
{
361
1112
    VP56RangeCoder *c = &s->c;
362
    int i, base_qi;
363
364
1112
    s->quant.yac_qi     = vp8_rac_get_uint(c, 7);
365
1112
    s->quant.ydc_delta  = vp8_rac_get_sint(c, 4);
366
1112
    s->quant.y2dc_delta = vp8_rac_get_sint(c, 4);
367
1112
    s->quant.y2ac_delta = vp8_rac_get_sint(c, 4);
368
1112
    s->quant.uvdc_delta = vp8_rac_get_sint(c, 4);
369
1112
    s->quant.uvac_delta = vp8_rac_get_sint(c, 4);
370
371
5560
    for (i = 0; i < 4; i++) {
372
4448
        if (s->segmentation.enabled) {
373
1732
            base_qi = s->segmentation.base_quant[i];
374
1732
            if (!s->segmentation.absolute_vals)
375
1608
                base_qi += s->quant.yac_qi;
376
        } else
377
2716
            base_qi = s->quant.yac_qi;
378
379
4448
        s->qmat[i].luma_qmul[0]    = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.ydc_delta,  7)];
380
4448
        s->qmat[i].luma_qmul[1]    = vp8_ac_qlookup[av_clip_uintp2(base_qi,              7)];
381
4448
        s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.y2dc_delta, 7)] * 2;
382
        /* 101581>>16 is equivalent to 155/100 */
383
4448
        s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.y2ac_delta, 7)] * 101581 >> 16;
384
4448
        s->qmat[i].chroma_qmul[0]  = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.uvdc_delta, 7)];
385
4448
        s->qmat[i].chroma_qmul[1]  = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.uvac_delta, 7)];
386
387
4448
        s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
388
4448
        s->qmat[i].chroma_qmul[0]  = FFMIN(s->qmat[i].chroma_qmul[0], 132);
389
    }
390
1112
}
391
392
/**
393
 * Determine which buffers golden and altref should be updated with after this frame.
394
 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
395
 *
396
 * Intra frames update all 3 references
397
 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
398
 * If the update (golden|altref) flag is set, it's updated with the current frame
399
 *      if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
400
 * If the flag is not set, the number read means:
401
 *      0: no update
402
 *      1: VP56_FRAME_PREVIOUS
403
 *      2: update golden with altref, or update altref with golden
404
 */
405
2128
static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
406
{
407
2128
    VP56RangeCoder *c = &s->c;
408
409
2128
    if (update)
410
90
        return VP56_FRAME_CURRENT;
411
412
2038
    switch (vp8_rac_get_uint(c, 2)) {
413
31
    case 1:
414
31
        return VP56_FRAME_PREVIOUS;
415
84
    case 2:
416
84
        return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
417
    }
418
1923
    return VP56_FRAME_NONE;
419
}
420
421
49
static void vp78_reset_probability_tables(VP8Context *s)
422
{
423
    int i, j;
424
245
    for (i = 0; i < 4; i++)
425
3332
        for (j = 0; j < 16; j++)
426
3136
            memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
427
                   sizeof(s->prob->token[i][j]));
428
49
}
429
430
1142
static void vp78_update_probability_tables(VP8Context *s)
431
{
432
1142
    VP56RangeCoder *c = &s->c;
433
    int i, j, k, l, m;
434
435
5710
    for (i = 0; i < 4; i++)
436
41112
        for (j = 0; j < 8; j++)
437
146176
            for (k = 0; k < 3; k++)
438
1315584
                for (l = 0; l < NUM_DCT_TOKENS-1; l++)
439
1205952
                    if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
440
7203
                        int prob = vp8_rac_get_uint(c, 8);
441
23838
                        for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
442
16635
                            s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
443
                    }
444
1142
}
445
446
#define VP7_MVC_SIZE 17
447
#define VP8_MVC_SIZE 19
448
449
1093
static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
450
                                                            int mvc_size)
451
{
452
1093
    VP56RangeCoder *c = &s->c;
453
    int i, j;
454
455
1093
    if (vp8_rac_get(c))
456
10
        for (i = 0; i < 4; i++)
457
8
            s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
458
1093
    if (vp8_rac_get(c))
459
4
        for (i = 0; i < 3; i++)
460
3
            s->prob->pred8x8c[i]  = vp8_rac_get_uint(c, 8);
461
462
    // 17.2 MV probability update
463
3279
    for (i = 0; i < 2; i++)
464
43604
        for (j = 0; j < mvc_size; j++)
465
41418
            if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
466
306
                s->prob->mvc[i][j] = vp8_rac_get_nn(c);
467
1093
}
468
469
1064
static void update_refs(VP8Context *s)
470
{
471
1064
    VP56RangeCoder *c = &s->c;
472
473
1064
    int update_golden = vp8_rac_get(c);
474
1064
    int update_altref = vp8_rac_get(c);
475
476
1064
    s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
477
1064
    s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
478
1064
}
479
480
static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)
481
{
482
    int i, j;
483
484
    for (j = 1; j < 3; j++) {
485
        for (i = 0; i < height / 2; i++)
486
            memcpy(dst->data[j] + i * dst->linesize[j],
487
                   src->data[j] + i * src->linesize[j], width / 2);
488
    }
489
}
490
491
static void fade(uint8_t *dst, ptrdiff_t dst_linesize,
492
                 const uint8_t *src, ptrdiff_t src_linesize,
493
                 int width, int height,
494
                 int alpha, int beta)
495
{
496
    int i, j;
497
    for (j = 0; j < height; j++) {
498
        const uint8_t *src2 = src + j * src_linesize;
499
        uint8_t *dst2 = dst + j * dst_linesize;
500
        for (i = 0; i < width; i++) {
501
            uint8_t y = src2[i];
502
            dst2[i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
503
        }
504
    }
505
}
506
507
30
static int vp7_fade_frame(VP8Context *s, int alpha, int beta)
508
{
509
    int ret;
510
511

30
    if (!s->keyframe && (alpha || beta)) {
512
        int width  = s->mb_width * 16;
513
        int height = s->mb_height * 16;
514
        AVFrame *src, *dst;
515
516
        if (!s->framep[VP56_FRAME_PREVIOUS] ||
517
            !s->framep[VP56_FRAME_GOLDEN]) {
518
            av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
519
            return AVERROR_INVALIDDATA;
520
        }
521
522
        dst =
523
        src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
524
525
        /* preserve the golden frame, write a new previous frame */
526
        if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
527
            s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
528
            if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
529
                return ret;
530
531
            dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
532
533
            copy_chroma(dst, src, width, height);
534
        }
535
536
        fade(dst->data[0], dst->linesize[0],
537
             src->data[0], src->linesize[0],
538
             width, height, alpha, beta);
539
    }
540
541
30
    return 0;
542
}
543
544
30
static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
545
{
546
30
    VP56RangeCoder *c = &s->c;
547
    int part1_size, hscale, vscale, i, j, ret;
548
30
    int width  = s->avctx->width;
549
30
    int height = s->avctx->height;
550
30
    int alpha = 0;
551
30
    int beta  = 0;
552
553
30
    if (buf_size < 4) {
554
        return AVERROR_INVALIDDATA;
555
    }
556
557
30
    s->profile = (buf[0] >> 1) & 7;
558
30
    if (s->profile > 1) {
559
        avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
560
        return AVERROR_INVALIDDATA;
561
    }
562
563
30
    s->keyframe  = !(buf[0] & 1);
564
30
    s->invisible = 0;
565
30
    part1_size   = AV_RL24(buf) >> 4;
566
567
30
    if (buf_size < 4 - s->profile + part1_size) {
568
        av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
569
        return AVERROR_INVALIDDATA;
570
    }
571
572
30
    buf      += 4 - s->profile;
573
30
    buf_size -= 4 - s->profile;
574
575
30
    memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
576
577
30
    ret = ff_vp56_init_range_decoder(c, buf, part1_size);
578
30
    if (ret < 0)
579
        return ret;
580
30
    buf      += part1_size;
581
30
    buf_size -= part1_size;
582
583
    /* A. Dimension information (keyframes only) */
584
30
    if (s->keyframe) {
585
1
        width  = vp8_rac_get_uint(c, 12);
586
1
        height = vp8_rac_get_uint(c, 12);
587
1
        hscale = vp8_rac_get_uint(c, 2);
588
1
        vscale = vp8_rac_get_uint(c, 2);
589

1
        if (hscale || vscale)
590
            avpriv_request_sample(s->avctx, "Upscaling");
591
592
1
        s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
593
1
        vp78_reset_probability_tables(s);
594
1
        memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
595
               sizeof(s->prob->pred16x16));
596
1
        memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
597
               sizeof(s->prob->pred8x8c));
598
3
        for (i = 0; i < 2; i++)
599
2
            memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
600
                   sizeof(vp7_mv_default_prob[i]));
601
1
        memset(&s->segmentation, 0, sizeof(s->segmentation));
602
1
        memset(&s->lf_delta, 0, sizeof(s->lf_delta));
603
1
        memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
604
    }
605
606

30
    if (s->keyframe || s->profile > 0)
607
1
        memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
608
609
    /* B. Decoding information for all four macroblock-level features */
610
150
    for (i = 0; i < 4; i++) {
611
120
        s->feature_enabled[i] = vp8_rac_get(c);
612
120
        if (s->feature_enabled[i]) {
613
             s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
614
615
             for (j = 0; j < 3; j++)
616
                 s->feature_index_prob[i][j] =
617
                     vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
618
619
             if (vp7_feature_value_size[s->profile][i])
620
                 for (j = 0; j < 4; j++)
621
                     s->feature_value[i][j] =
622
                        vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
623
        }
624
    }
625
626
30
    s->segmentation.enabled    = 0;
627
30
    s->segmentation.update_map = 0;
628
30
    s->lf_delta.enabled        = 0;
629
630
30
    s->num_coeff_partitions = 1;
631
30
    ret = ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
632
30
    if (ret < 0)
633
        return ret;
634
635
30
    if (!s->macroblocks_base || /* first frame */
636

29
        width != s->avctx->width || height != s->avctx->height ||
637

29
        (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
638
1
        if ((ret = vp7_update_dimensions(s, width, height)) < 0)
639
            return ret;
640
    }
641
642
    /* C. Dequantization indices */
643
30
    vp7_get_quants(s);
644
645
    /* D. Golden frame update flag (a Flag) for interframes only */
646
30
    if (!s->keyframe) {
647
29
        s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
648
29
        s->sign_bias[VP56_FRAME_GOLDEN] = 0;
649
    }
650
651
30
    s->update_last          = 1;
652
30
    s->update_probabilities = 1;
653
30
    s->fade_present         = 1;
654
655
30
    if (s->profile > 0) {
656
        s->update_probabilities = vp8_rac_get(c);
657
        if (!s->update_probabilities)
658
            s->prob[1] = s->prob[0];
659
660
        if (!s->keyframe)
661
            s->fade_present = vp8_rac_get(c);
662
    }
663
664
30
    if (vpX_rac_is_end(c))
665
        return AVERROR_INVALIDDATA;
666
    /* E. Fading information for previous frame */
667

30
    if (s->fade_present && vp8_rac_get(c)) {
668
        alpha = (int8_t) vp8_rac_get_uint(c, 8);
669
        beta  = (int8_t) vp8_rac_get_uint(c, 8);
670
    }
671
672
    /* F. Loop filter type */
673
30
    if (!s->profile)
674
30
        s->filter.simple = vp8_rac_get(c);
675
676
    /* G. DCT coefficient ordering specification */
677
30
    if (vp8_rac_get(c))
678
208
        for (i = 1; i < 16; i++)
679
195
            s->prob[0].scan[i] = ff_zigzag_scan[vp8_rac_get_uint(c, 4)];
680
681
    /* H. Loop filter levels  */
682
30
    if (s->profile > 0)
683
        s->filter.simple = vp8_rac_get(c);
684
30
    s->filter.level     = vp8_rac_get_uint(c, 6);
685
30
    s->filter.sharpness = vp8_rac_get_uint(c, 3);
686
687
    /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
688
30
    vp78_update_probability_tables(s);
689
690
30
    s->mbskip_enabled = 0;
691
692
    /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
693
30
    if (!s->keyframe) {
694
29
        s->prob->intra  = vp8_rac_get_uint(c, 8);
695
29
        s->prob->last   = vp8_rac_get_uint(c, 8);
696
29
        vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
697
    }
698
699
30
    if (vpX_rac_is_end(c))
700
        return AVERROR_INVALIDDATA;
701
702
30
    if ((ret = vp7_fade_frame(s, alpha, beta)) < 0)
703
        return ret;
704
705
30
    return 0;
706
}
707
708
1112
static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
709
{
710
1112
    VP56RangeCoder *c = &s->c;
711
    int header_size, hscale, vscale, ret;
712
1112
    int width  = s->avctx->width;
713
1112
    int height = s->avctx->height;
714
715
1112
    if (buf_size < 3) {
716
        av_log(s->avctx, AV_LOG_ERROR, "Insufficent data (%d) for header\n", buf_size);
717
        return AVERROR_INVALIDDATA;
718
    }
719
720
1112
    s->keyframe  = !(buf[0] & 1);
721
1112
    s->profile   =  (buf[0]>>1) & 7;
722
1112
    s->invisible = !(buf[0] & 0x10);
723
1112
    header_size  = AV_RL24(buf) >> 5;
724
1112
    buf      += 3;
725
1112
    buf_size -= 3;
726
727
1112
    s->header_partition_size = header_size;
728
729
1112
    if (s->profile > 3)
730
        av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
731
732
1112
    if (!s->profile)
733
956
        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
734
               sizeof(s->put_pixels_tab));
735
    else    // profile 1-3 use bilinear, 4+ aren't defined so whatever
736
156
        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
737
               sizeof(s->put_pixels_tab));
738
739
1112
    if (header_size > buf_size - 7 * s->keyframe) {
740
        av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
741
        return AVERROR_INVALIDDATA;
742
    }
743
744
1112
    if (s->keyframe) {
745
48
        if (AV_RL24(buf) != 0x2a019d) {
746
            av_log(s->avctx, AV_LOG_ERROR,
747
                   "Invalid start code 0x%x\n", AV_RL24(buf));
748
            return AVERROR_INVALIDDATA;
749
        }
750
48
        width     = AV_RL16(buf + 3) & 0x3fff;
751
48
        height    = AV_RL16(buf + 5) & 0x3fff;
752
48
        hscale    = buf[4] >> 6;
753
48
        vscale    = buf[6] >> 6;
754
48
        buf      += 7;
755
48
        buf_size -= 7;
756
757

48
        if (hscale || vscale)
758
            avpriv_request_sample(s->avctx, "Upscaling");
759
760
48
        s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
761
48
        vp78_reset_probability_tables(s);
762
48
        memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
763
               sizeof(s->prob->pred16x16));
764
48
        memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
765
               sizeof(s->prob->pred8x8c));
766
48
        memcpy(s->prob->mvc, vp8_mv_default_prob,
767
               sizeof(s->prob->mvc));
768
48
        memset(&s->segmentation, 0, sizeof(s->segmentation));
769
48
        memset(&s->lf_delta, 0, sizeof(s->lf_delta));
770
    }
771
772
1112
    ret = ff_vp56_init_range_decoder(c, buf, header_size);
773
1112
    if (ret < 0)
774
        return ret;
775
1112
    buf      += header_size;
776
1112
    buf_size -= header_size;
777
778
1112
    if (s->keyframe) {
779
48
        s->colorspace = vp8_rac_get(c);
780
48
        if (s->colorspace)
781
            av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
782
48
        s->fullrange = vp8_rac_get(c);
783
    }
784
785
1112
    if ((s->segmentation.enabled = vp8_rac_get(c)))
786
433
        parse_segment_info(s);
787
    else
788
679
        s->segmentation.update_map = 0; // FIXME: move this to some init function?
789
790
1112
    s->filter.simple    = vp8_rac_get(c);
791
1112
    s->filter.level     = vp8_rac_get_uint(c, 6);
792
1112
    s->filter.sharpness = vp8_rac_get_uint(c, 3);
793
794
1112
    if ((s->lf_delta.enabled = vp8_rac_get(c))) {
795
1106
        s->lf_delta.update = vp8_rac_get(c);
796
1106
        if (s->lf_delta.update)
797
42
            update_lf_deltas(s);
798
    }
799
800
1112
    if (setup_partitions(s, buf, buf_size)) {
801
        av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
802
        return AVERROR_INVALIDDATA;
803
    }
804
805
1112
    if (!s->macroblocks_base || /* first frame */
806

1085
        width != s->avctx->width || height != s->avctx->height ||
807

1076
        (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
808
36
        if ((ret = vp8_update_dimensions(s, width, height)) < 0)
809
            return ret;
810
811
1112
    vp8_get_quants(s);
812
813
1112
    if (!s->keyframe) {
814
1064
        update_refs(s);
815
1064
        s->sign_bias[VP56_FRAME_GOLDEN]               = vp8_rac_get(c);
816
1064
        s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
817
    }
818
819
    // if we aren't saving this frame's probabilities for future frames,
820
    // make a copy of the current probabilities
821
1112
    if (!(s->update_probabilities = vp8_rac_get(c)))
822
64
        s->prob[1] = s->prob[0];
823
824

1112
    s->update_last = s->keyframe || vp8_rac_get(c);
825
826
1112
    vp78_update_probability_tables(s);
827
828
1112
    if ((s->mbskip_enabled = vp8_rac_get(c)))
829
1106
        s->prob->mbskip = vp8_rac_get_uint(c, 8);
830
831
1112
    if (!s->keyframe) {
832
1064
        s->prob->intra  = vp8_rac_get_uint(c, 8);
833
1064
        s->prob->last   = vp8_rac_get_uint(c, 8);
834
1064
        s->prob->golden = vp8_rac_get_uint(c, 8);
835
1064
        vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
836
    }
837
838
    // Record the entropy coder state here so that hwaccels can use it.
839
1112
    s->c.code_word = vp56_rac_renorm(&s->c);
840
1112
    s->coder_state_at_header_end.input     = s->c.buffer - (-s->c.bits / 8);
841
1112
    s->coder_state_at_header_end.range     = s->c.high;
842
1112
    s->coder_state_at_header_end.value     = s->c.code_word >> 16;
843
1112
    s->coder_state_at_header_end.bit_count = -s->c.bits % 8;
844
845
1112
    return 0;
846
}
847
848
static av_always_inline
849
57174
void clamp_mv(VP8mvbounds *s, VP56mv *dst, const VP56mv *src)
850
{
851
57174
    dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
852
                             av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
853
57174
    dst->y = av_clip(src->y, av_clip(s->mv_min.y, INT16_MIN, INT16_MAX),
854
                             av_clip(s->mv_max.y, INT16_MIN, INT16_MAX));
855
57174
}
856
857
/**
858
 * Motion vector coding, 17.1.
859
 */
860
84886
static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
861
{
862
84886
    int bit, x = 0;
863
864
84886
    if (vp56_rac_get_prob_branchy(c, p[0])) {
865
        int i;
866
867
79804
        for (i = 0; i < 3; i++)
868
59853
            x += vp56_rac_get_prob(c, p[9 + i]) << i;
869

139657
        for (i = (vp7 ? 7 : 9); i > 3; i--)
870
119706
            x += vp56_rac_get_prob(c, p[9 + i]) << i;
871

19951
        if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
872
13304
            x += 8;
873
    } else {
874
        // small_mvtree
875
64935
        const uint8_t *ps = p + 2;
876
64935
        bit = vp56_rac_get_prob(c, *ps);
877
64935
        ps += 1 + 3 * bit;
878
64935
        x  += 4 * bit;
879
64935
        bit = vp56_rac_get_prob(c, *ps);
880
64935
        ps += 1 + bit;
881
64935
        x  += 2 * bit;
882
64935
        x  += vp56_rac_get_prob(c, *ps);
883
    }
884
885

84886
    return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
886
}
887
888
static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
889
{
890
    return read_mv_component(c, p, 1);
891
}
892
893
28388
static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
894
{
895
28388
    return read_mv_component(c, p, 0);
896
}
897
898
static av_always_inline
899
136556
const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
900
{
901
136556
    if (is_vp7)
902
        return vp7_submv_prob;
903
904
136556
    if (left == top)
905
58677
        return vp8_submv_prob[4 - !!left];
906
77879
    if (!top)
907
18933
        return vp8_submv_prob[2];
908
58946
    return vp8_submv_prob[1 - !!left];
909
}
910
911
/**
912
 * Split motion vector prediction, 16.4.
913
 * @returns the number of motion vectors parsed (2, 4 or 16)
914
 */
915
static av_always_inline
916
18325
int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
917
                    int layout, int is_vp7)
918
{
919
    int part_idx;
920
    int n, num;
921
    VP8Macroblock *top_mb;
922
18325
    VP8Macroblock *left_mb = &mb[-1];
923
18325
    const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
924
    const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
925
    VP56mv *top_mv;
926
18325
    VP56mv *left_mv = left_mb->bmv;
927
18325
    VP56mv *cur_mv  = mb->bmv;
928
929
18325
    if (!layout) // layout is inlined, s->mb_layout is not
930
18325
        top_mb = &mb[2];
931
    else
932
        top_mb = &mb[-s->mb_width - 1];
933
18325
    mbsplits_top = vp8_mbsplits[top_mb->partitioning];
934
18325
    top_mv       = top_mb->bmv;
935
936
18325
    if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
937
11713
        if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
938
8044
            part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
939
        else
940
3669
            part_idx = VP8_SPLITMVMODE_8x8;
941
    } else {
942
6612
        part_idx = VP8_SPLITMVMODE_4x4;
943
    }
944
945
18325
    num              = vp8_mbsplit_count[part_idx];
946
18325
    mbsplits_cur     = vp8_mbsplits[part_idx],
947
18325
    firstidx         = vp8_mbfirstidx[part_idx];
948
18325
    mb->partitioning = part_idx;
949
950
154881
    for (n = 0; n < num; n++) {
951
136556
        int k = firstidx[n];
952
        uint32_t left, above;
953
        const uint8_t *submv_prob;
954
955
136556
        if (!(k & 3))
956
46739
            left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
957
        else
958
89817
            left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
959
136556
        if (k <= 3)
960
44965
            above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
961
        else
962
91591
            above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
963
964
136556
        submv_prob = get_submv_prob(left, above, is_vp7);
965
966
136556
        if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
967
50605
            if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
968
31737
                if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
969
56498
                    mb->bmv[n].y = mb->mv.y +
970
28249
                                   read_mv_component(c, s->prob->mvc[0], is_vp7);
971
28249
                    mb->bmv[n].x = mb->mv.x +
972
28249
                                   read_mv_component(c, s->prob->mvc[1], is_vp7);
973
                } else {
974
3488
                    AV_ZERO32(&mb->bmv[n]);
975
                }
976
            } else {
977
18868
                AV_WN32A(&mb->bmv[n], above);
978
            }
979
        } else {
980
85951
            AV_WN32A(&mb->bmv[n], left);
981
        }
982
    }
983
984
18325
    return num;
985
}
986
987
/**
988
 * The vp7 reference decoder uses a padding macroblock column (added to right
989
 * edge of the frame) to guard against illegal macroblock offsets. The
990
 * algorithm has bugs that permit offsets to straddle the padding column.
991
 * This function replicates those bugs.
992
 *
993
 * @param[out] edge_x macroblock x address
994
 * @param[out] edge_y macroblock y address
995
 *
996
 * @return macroblock offset legal (boolean)
997
 */
998
76560
static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
999
                                   int xoffset, int yoffset, int boundary,
1000
                                   int *edge_x, int *edge_y)
1001
{
1002
76560
    int vwidth = mb_width + 1;
1003
76560
    int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
1004

76560
    if (new < boundary || new % vwidth == vwidth - 1)
1005
11861
        return 0;
1006
64699
    *edge_y = new / vwidth;
1007
64699
    *edge_x = new % vwidth;
1008
64699
    return 1;
1009
}
1010
1011
64699
static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
1012
{
1013
64699
    return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
1014
}
1015
1016
static av_always_inline
1017
6380
void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
1018
                    int mb_x, int mb_y, int layout)
1019
{
1020
    VP8Macroblock *mb_edge[12];
1021
    enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
1022
    enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1023
6380
    int idx = CNT_ZERO;
1024
    VP56mv near_mv[3];
1025
6380
    uint8_t cnt[3] = { 0 };
1026
6380
    VP56RangeCoder *c = &s->c;
1027
    int i;
1028
1029
6380
    AV_ZERO32(&near_mv[0]);
1030
6380
    AV_ZERO32(&near_mv[1]);
1031
6380
    AV_ZERO32(&near_mv[2]);
1032
1033
82940
    for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
1034
76560
        const VP7MVPred * pred = &vp7_mv_pred[i];
1035
        int edge_x, edge_y;
1036
1037
76560
        if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
1038
76560
                                    pred->yoffset, !s->profile, &edge_x, &edge_y)) {
1039
129398
            VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
1040
64699
                                             ? s->macroblocks_base + 1 + edge_x +
1041
64699
                                               (s->mb_width + 1) * (edge_y + 1)
1042
64699
                                             : s->macroblocks + edge_x +
1043
                                               (s->mb_height - edge_y - 1) * 2;
1044
64699
            uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
1045
64699
            if (mv) {
1046
                if (AV_RN32A(&near_mv[CNT_NEAREST])) {
1047
                    if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
1048
                        idx = CNT_NEAREST;
1049
                    } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
1050
                        if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
1051
                            continue;
1052
                        idx = CNT_NEAR;
1053
                    } else {
1054
                        AV_WN32A(&near_mv[CNT_NEAR], mv);
1055
                        idx = CNT_NEAR;
1056
                    }
1057
                } else {
1058
                    AV_WN32A(&near_mv[CNT_NEAREST], mv);
1059
                    idx = CNT_NEAREST;
1060
                }
1061
            } else {
1062
64699
                idx = CNT_ZERO;
1063
            }
1064
        } else {
1065
11861
            idx = CNT_ZERO;
1066
        }
1067
76560
        cnt[idx] += vp7_mv_pred[i].score;
1068
    }
1069
1070
6380
    mb->partitioning = VP8_SPLITMVMODE_NONE;
1071
1072
6380
    if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
1073
        mb->mode = VP8_MVMODE_MV;
1074
1075
        if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
1076
1077
            if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
1078
1079
                if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
1080
                    AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
1081
                else
1082
                    AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR]    ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
1083
1084
                if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
1085
                    mb->mode = VP8_MVMODE_SPLIT;
1086
                    mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
1087
                } else {
1088
                    mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
1089
                    mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
1090
                    mb->bmv[0] = mb->mv;
1091
                }
1092
            } else {
1093
                mb->mv = near_mv[CNT_NEAR];
1094
                mb->bmv[0] = mb->mv;
1095
            }
1096
        } else {
1097
            mb->mv = near_mv[CNT_NEAREST];
1098
            mb->bmv[0] = mb->mv;
1099
        }
1100
    } else {
1101
6380
        mb->mode = VP8_MVMODE_ZERO;
1102
6380
        AV_ZERO32(&mb->mv);
1103
6380
        mb->bmv[0] = mb->mv;
1104
    }
1105
6380
}
1106
1107
static av_always_inline
1108
379497
void vp8_decode_mvs(VP8Context *s, VP8mvbounds *mv_bounds, VP8Macroblock *mb,
1109
                    int mb_x, int mb_y, int layout)
1110
{
1111
379497
    VP8Macroblock *mb_edge[3] = { 0      /* top */,
1112
379497
                                  mb - 1 /* left */,
1113
                                  0      /* top-left */ };
1114
    enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1115
    enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1116
379497
    int idx = CNT_ZERO;
1117
379497
    int cur_sign_bias = s->sign_bias[mb->ref_frame];
1118
379497
    int8_t *sign_bias = s->sign_bias;
1119
    VP56mv near_mv[4];
1120
379497
    uint8_t cnt[4] = { 0 };
1121
379497
    VP56RangeCoder *c = &s->c;
1122
1123
379497
    if (!layout) { // layout is inlined (s->mb_layout is not)
1124
379497
        mb_edge[0] = mb + 2;
1125
379497
        mb_edge[2] = mb + 1;
1126
    } else {
1127
        mb_edge[0] = mb - s->mb_width - 1;
1128
        mb_edge[2] = mb - s->mb_width - 2;
1129
    }
1130
1131
379497
    AV_ZERO32(&near_mv[0]);
1132
379497
    AV_ZERO32(&near_mv[1]);
1133
379497
    AV_ZERO32(&near_mv[2]);
1134
1135
    /* Process MB on top, left and top-left */
1136
#define MV_EDGE_CHECK(n)                                                      \
1137
    {                                                                         \
1138
        VP8Macroblock *edge = mb_edge[n];                                     \
1139
        int edge_ref = edge->ref_frame;                                       \
1140
        if (edge_ref != VP56_FRAME_CURRENT) {                                 \
1141
            uint32_t mv = AV_RN32A(&edge->mv);                                \
1142
            if (mv) {                                                         \
1143
                if (cur_sign_bias != sign_bias[edge_ref]) {                   \
1144
                    /* SWAR negate of the values in mv. */                    \
1145
                    mv = ~mv;                                                 \
1146
                    mv = ((mv & 0x7fff7fff) +                                 \
1147
                          0x00010001) ^ (mv & 0x80008000);                    \
1148
                }                                                             \
1149
                if (!n || mv != AV_RN32A(&near_mv[idx]))                      \
1150
                    AV_WN32A(&near_mv[++idx], mv);                            \
1151
                cnt[idx] += 1 + (n != 2);                                     \
1152
            } else                                                            \
1153
                cnt[CNT_ZERO] += 1 + (n != 2);                                \
1154
        }                                                                     \
1155
    }
1156
1157

379497
    MV_EDGE_CHECK(0)
1158


379497
    MV_EDGE_CHECK(1)
1159


379497
    MV_EDGE_CHECK(2)
1160
1161
379497
    mb->partitioning = VP8_SPLITMVMODE_NONE;
1162
379497
    if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1163
57174
        mb->mode = VP8_MVMODE_MV;
1164
1165
        /* If we have three distinct MVs, merge first and last if they're the same */
1166
57174
        if (cnt[CNT_SPLITMV] &&
1167
12759
            AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1168
5465
            cnt[CNT_NEAREST] += 1;
1169
1170
        /* Swap near and nearest if necessary */
1171
57174
        if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1172
4482
            FFSWAP(uint8_t,     cnt[CNT_NEAREST],     cnt[CNT_NEAR]);
1173
4482
            FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1174
        }
1175
1176
57174
        if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1177
37607
            if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1178
                /* Choose the best mv out of 0,0 and the nearest mv */
1179
32519
                clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1180
32519
                cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode    == VP8_MVMODE_SPLIT) +
1181
32519
                                    (mb_edge[VP8_EDGE_TOP]->mode     == VP8_MVMODE_SPLIT)) * 2 +
1182
32519
                                    (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1183
1184
32519
                if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1185
18325
                    mb->mode = VP8_MVMODE_SPLIT;
1186
18325
                    mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1187
                } else {
1188
14194
                    mb->mv.y  += vp8_read_mv_component(c, s->prob->mvc[0]);
1189
14194
                    mb->mv.x  += vp8_read_mv_component(c, s->prob->mvc[1]);
1190
14194
                    mb->bmv[0] = mb->mv;
1191
                }
1192
            } else {
1193
5088
                clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAR]);
1194
5088
                mb->bmv[0] = mb->mv;
1195
            }
1196
        } else {
1197
19567
            clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAREST]);
1198
19567
            mb->bmv[0] = mb->mv;
1199
        }
1200
    } else {
1201
322323
        mb->mode = VP8_MVMODE_ZERO;
1202
322323
        AV_ZERO32(&mb->mv);
1203
322323
        mb->bmv[0] = mb->mv;
1204
    }
1205
379497
}
1206
1207
static av_always_inline
1208
17675
void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1209
                           int mb_x, int keyframe, int layout)
1210
{
1211
17675
    uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1212
1213
17675
    if (layout) {
1214
33
        VP8Macroblock *mb_top = mb - s->mb_width - 1;
1215
33
        memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1216
    }
1217
17675
    if (keyframe) {
1218
        int x, y;
1219
        uint8_t *top;
1220
10822
        uint8_t *const left = s->intra4x4_pred_mode_left;
1221
10822
        if (layout)
1222
33
            top = mb->intra4x4_pred_mode_top;
1223
        else
1224
10789
            top = s->intra4x4_pred_mode_top + 4 * mb_x;
1225
54110
        for (y = 0; y < 4; y++) {
1226
216440
            for (x = 0; x < 4; x++) {
1227
                const uint8_t *ctx;
1228
173152
                ctx       = vp8_pred4x4_prob_intra[top[x]][left[y]];
1229
173152
                *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1230
173152
                left[y]   = top[x] = *intra4x4;
1231
173152
                intra4x4++;
1232
            }
1233
        }
1234
    } else {
1235
        int i;
1236
116501
        for (i = 0; i < 16; i++)
1237
109648
            intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1238
                                           vp8_pred4x4_prob_inter);
1239
    }
1240
17675
}
1241
1242
static av_always_inline
1243
443209
void decode_mb_mode(VP8Context *s, VP8mvbounds *mv_bounds,
1244
                    VP8Macroblock *mb, int mb_x, int mb_y,
1245
                    uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1246
{
1247
443209
    VP56RangeCoder *c = &s->c;
1248
    static const char * const vp7_feature_name[] = { "q-index",
1249
                                                     "lf-delta",
1250
                                                     "partial-golden-update",
1251
                                                     "blit-pitch" };
1252
443209
    if (is_vp7) {
1253
        int i;
1254
6600
        *segment = 0;
1255
33000
        for (i = 0; i < 4; i++) {
1256
26400
            if (s->feature_enabled[i]) {
1257
                if (vp56_rac_get_prob_branchy(c, s->feature_present_prob[i])) {
1258
                      int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1259
                                                   s->feature_index_prob[i]);
1260
                      av_log(s->avctx, AV_LOG_WARNING,
1261
                             "Feature %s present in macroblock (value 0x%x)\n",
1262
                             vp7_feature_name[i], s->feature_value[i][index]);
1263
                }
1264
           }
1265
        }
1266
436609
    } else if (s->segmentation.update_map) {
1267
8289
        int bit  = vp56_rac_get_prob(c, s->prob->segmentid[0]);
1268
8289
        *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1269
428320
    } else if (s->segmentation.enabled)
1270
94425
        *segment = ref ? *ref : *segment;
1271
443209
    mb->segment = *segment;
1272
1273
443209
    mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1274
1275
443209
    if (s->keyframe) {
1276
32872
        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1277
                                    vp8_pred16x16_prob_intra);
1278
1279
32872
        if (mb->mode == MODE_I4x4) {
1280
10822
            decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1281
        } else {
1282
22050
            const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1283
22050
                                           : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1284
22050
            if (s->mb_layout)
1285
187
                AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1286
            else
1287
21863
                AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1288
22050
            AV_WN32A(s->intra4x4_pred_mode_left, modes);
1289
        }
1290
1291
32872
        mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1292
                                                vp8_pred8x8c_prob_intra);
1293
32872
        mb->ref_frame        = VP56_FRAME_CURRENT;
1294
410337
    } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1295
        // inter MB, 16.2
1296
385877
        if (vp56_rac_get_prob_branchy(c, s->prob->last))
1297
34528
            mb->ref_frame =
1298
17248
                (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1299
                                                                   : VP56_FRAME_GOLDEN;
1300
        else
1301
368597
            mb->ref_frame = VP56_FRAME_PREVIOUS;
1302
385877
        s->ref_count[mb->ref_frame - 1]++;
1303
1304
        // motion vectors, 16.3
1305
385877
        if (is_vp7)
1306
6380
            vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1307
        else
1308
379497
            vp8_decode_mvs(s, mv_bounds, mb, mb_x, mb_y, layout);
1309
    } else {
1310
        // intra MB, 16.1
1311
24460
        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1312
1313
24460
        if (mb->mode == MODE_I4x4)
1314
6853
            decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1315
1316
48920
        mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1317
24460
                                                s->prob->pred8x8c);
1318
24460
        mb->ref_frame        = VP56_FRAME_CURRENT;
1319
24460
        mb->partitioning     = VP8_SPLITMVMODE_NONE;
1320
24460
        AV_ZERO32(&mb->bmv[0]);
1321
    }
1322
443209
}
1323
1324
/**
1325
 * @param r     arithmetic bitstream reader context
1326
 * @param block destination for block coefficients
1327
 * @param probs probabilities to use when reading trees from the bitstream
1328
 * @param i     initial coeff index, 0 unless a separate DC block is coded
1329
 * @param qmul  array holding the dc/ac dequant factor at position 0/1
1330
 *
1331
 * @return 0 if no coeffs were decoded
1332
 *         otherwise, the index of the last coeff decoded plus one
1333
 */
1334
static av_always_inline
1335
393067
int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1336
                                 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1337
                                 int i, uint8_t *token_prob, int16_t qmul[2],
1338
                                 const uint8_t scan[16], int vp7)
1339
{
1340
393067
    VP56RangeCoder c = *r;
1341
393067
    goto skip_eob;
1342
    do {
1343
        int coeff;
1344
1101534
restart:
1345
1103653
        if (!vp56_rac_get_prob_branchy(&c, token_prob[0]))   // DCT_EOB
1346
373414
            break;
1347
1348
730239
skip_eob:
1349
1956826
        if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1350
835639
            if (++i == 16)
1351
                break; // invalid input; blocks should end with EOB
1352
835639
            token_prob = probs[i][0];
1353
835639
            if (vp7)
1354
2119
                goto restart;
1355
833520
            goto skip_eob;
1356
        }
1357
1358
1121187
        if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1359
719461
            coeff = 1;
1360
719461
            token_prob = probs[i + 1][1];
1361
        } else {
1362
401726
            if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1363
274407
                coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1364
274407
                if (coeff)
1365
113420
                    coeff += vp56_rac_get_prob(&c, token_prob[5]);
1366
274407
                coeff += 2;
1367
            } else {
1368
                // DCT_CAT*
1369
127319
                if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1370
83429
                    if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1371
45203
                        coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1372
                    } else {                                    // DCT_CAT2
1373
38226
                        coeff  = 7;
1374
38226
                        coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1375
38226
                        coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1376
                    }
1377
                } else {    // DCT_CAT3 and up
1378
43890
                    int a   = vp56_rac_get_prob(&c, token_prob[8]);
1379
43890
                    int b   = vp56_rac_get_prob(&c, token_prob[9 + a]);
1380
43890
                    int cat = (a << 1) + b;
1381
43890
                    coeff  = 3 + (8 << cat);
1382
43890
                    coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1383
                }
1384
            }
1385
401726
            token_prob = probs[i + 1][2];
1386
        }
1387

1121187
        block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1388
1121187
    } while (++i < 16);
1389
1390
393067
    *r = c;
1391
393067
    return i;
1392
}
1393
1394
static av_always_inline
1395
6380
int inter_predict_dc(int16_t block[16], int16_t pred[2])
1396
{
1397
6380
    int16_t dc = block[0];
1398
6380
    int ret = 0;
1399
1400
6380
    if (pred[1] > 3) {
1401
        dc += pred[0];
1402
        ret = 1;
1403
    }
1404
1405
6380
    if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1406
6380
        block[0] = pred[0] = dc;
1407
6380
        pred[1] = 0;
1408
    } else {
1409
        if (pred[0] == dc)
1410
            pred[1]++;
1411
        block[0] = pred[0] = dc;
1412
    }
1413
1414
6380
    return ret;
1415
}
1416
1417
734
static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1418
                                            int16_t block[16],
1419
                                            uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1420
                                            int i, uint8_t *token_prob,
1421
                                            int16_t qmul[2],
1422
                                            const uint8_t scan[16])
1423
{
1424
734
    return decode_block_coeffs_internal(r, block, probs, i,
1425
                                        token_prob, qmul, scan, IS_VP7);
1426
}
1427
1428
#ifndef vp8_decode_block_coeffs_internal
1429
392333
static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1430
                                            int16_t block[16],
1431
                                            uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1432
                                            int i, uint8_t *token_prob,
1433
                                            int16_t qmul[2])
1434
{
1435
392333
    return decode_block_coeffs_internal(r, block, probs, i,
1436
                                        token_prob, qmul, ff_zigzag_scan, IS_VP8);
1437
}
1438
#endif
1439
1440
/**
1441
 * @param c          arithmetic bitstream reader context
1442
 * @param block      destination for block coefficients
1443
 * @param probs      probabilities to use when reading trees from the bitstream
1444
 * @param i          initial coeff index, 0 unless a separate DC block is coded
1445
 * @param zero_nhood the initial prediction context for number of surrounding
1446
 *                   all-zero blocks (only left/top, so 0-2)
1447
 * @param qmul       array holding the dc/ac dequant factor at position 0/1
1448
 * @param scan       scan pattern (VP7 only)
1449
 *
1450
 * @return 0 if no coeffs were decoded
1451
 *         otherwise, the index of the last coeff decoded plus one
1452
 */
1453
static av_always_inline
1454
2050151
int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1455
                        uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1456
                        int i, int zero_nhood, int16_t qmul[2],
1457
                        const uint8_t scan[16], int vp7)
1458
{
1459
2050151
    uint8_t *token_prob = probs[i][zero_nhood];
1460
2050151
    if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
1461
1657084
        return 0;
1462
734
    return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1463
                                                  token_prob, qmul, scan)
1464
393801
               : vp8_decode_block_coeffs_internal(c, block, probs, i,
1465
                                                  token_prob, qmul);
1466
}
1467
1468
static av_always_inline
1469
83297
void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1470
                      VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1471
                      int is_vp7)
1472
{
1473
83297
    int i, x, y, luma_start = 0, luma_ctx = 3;
1474
83297
    int nnz_pred, nnz, nnz_total = 0;
1475
83297
    int segment = mb->segment;
1476
83297
    int block_dc = 0;
1477
1478

83297
    if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1479
51023
        nnz_pred = t_nnz[8] + l_nnz[8];
1480
1481
        // decode DC values and do hadamard
1482
51023
        nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1483
51023
                                  nnz_pred, s->qmat[segment].luma_dc_qmul,
1484
                                  ff_zigzag_scan, is_vp7);
1485
51023
        l_nnz[8] = t_nnz[8] = !!nnz;
1486
1487

51023
        if (is_vp7 && mb->mode > MODE_I4x4) {
1488
6380
            nnz |=  inter_predict_dc(td->block_dc,
1489
6380
                                     s->inter_dc_pred[mb->ref_frame - 1]);
1490
        }
1491
1492
51023
        if (nnz) {
1493
34741
            nnz_total += nnz;
1494
34741
            block_dc   = 1;
1495
34741
            if (nnz == 1)
1496
10303
                s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1497
            else
1498
24438
                s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1499
        }
1500
51023
        luma_start = 1;
1501
51023
        luma_ctx   = 0;
1502
    }
1503
1504
    // luma blocks
1505
416485
    for (y = 0; y < 4; y++)
1506
1665940
        for (x = 0; x < 4; x++) {
1507
1332752
            nnz_pred = l_nnz[y] + t_nnz[x];
1508
1332752
            nnz = decode_block_coeffs(c, td->block[y][x],
1509
1332752
                                      s->prob->token[luma_ctx],
1510
                                      luma_start, nnz_pred,
1511
1332752
                                      s->qmat[segment].luma_qmul,
1512
1332752
                                      s->prob[0].scan, is_vp7);
1513
            /* nnz+block_dc may be one more than the actual last index,
1514
             * but we don't care */
1515
1332752
            td->non_zero_count_cache[y][x] = nnz + block_dc;
1516
1332752
            t_nnz[x] = l_nnz[y] = !!nnz;
1517
1332752
            nnz_total += nnz;
1518
        }
1519
1520
    // chroma blocks
1521
    // TODO: what to do about dimensions? 2nd dim for luma is x,
1522
    // but for chroma it's (y<<1)|x
1523
249891
    for (i = 4; i < 6; i++)
1524
499782
        for (y = 0; y < 2; y++)
1525
999564
            for (x = 0; x < 2; x++) {
1526
666376
                nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1527
666376
                nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1528
666376
                                          s->prob->token[2], 0, nnz_pred,
1529
666376
                                          s->qmat[segment].chroma_qmul,
1530
666376
                                          s->prob[0].scan, is_vp7);
1531
666376
                td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1532
666376
                t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1533
666376
                nnz_total += nnz;
1534
            }
1535
1536
    // if there were no coded coeffs despite the macroblock not being marked skip,
1537
    // we MUST not do the inner loop filter and should not do IDCT
1538
    // Since skip isn't used for bitstream prediction, just manually set it.
1539
83297
    if (!nnz_total)
1540
6457
        mb->skip = 1;
1541
83297
}
1542
1543
static av_always_inline
1544
424229
void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1545
                      uint8_t *src_cb, uint8_t *src_cr,
1546
                      ptrdiff_t linesize, ptrdiff_t uvlinesize, int simple)
1547
{
1548
424229
    AV_COPY128(top_border, src_y + 15 * linesize);
1549
424229
    if (!simple) {
1550
416507
        AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1551
416507
        AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1552
    }
1553
424229
}
1554
1555
static av_always_inline
1556
84130
void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1557
                    uint8_t *src_cr, ptrdiff_t linesize, ptrdiff_t uvlinesize, int mb_x,
1558
                    int mb_y, int mb_width, int simple, int xchg)
1559
{
1560
84130
    uint8_t *top_border_m1 = top_border - 32;     // for TL prediction
1561
84130
    src_y  -= linesize;
1562
84130
    src_cb -= uvlinesize;
1563
84130
    src_cr -= uvlinesize;
1564
1565
#define XCHG(a, b, xchg)                                                      \
1566
    do {                                                                      \
1567
        if (xchg)                                                             \
1568
            AV_SWAP64(b, a);                                                  \
1569
        else                                                                  \
1570
            AV_COPY64(b, a);                                                  \
1571
    } while (0)
1572
1573
84130
    XCHG(top_border_m1 + 8, src_y - 8, xchg);
1574
84130
    XCHG(top_border, src_y, xchg);
1575
84130
    XCHG(top_border + 8, src_y + 8, 1);
1576
84130
    if (mb_x < mb_width - 1)
1577
80706
        XCHG(top_border + 32, src_y + 16, 1);
1578
1579
    // only copy chroma for normal loop filter
1580
    // or to initialize the top row to 127
1581

84130
    if (!simple || !mb_y) {
1582
81048
        XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1583
81048
        XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1584
81048
        XCHG(top_border + 16, src_cb, 1);
1585
81048
        XCHG(top_border + 24, src_cr, 1);
1586
    }
1587
84130
}
1588
1589
static av_always_inline
1590
72888
int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1591
{
1592
72888
    if (!mb_x)
1593
3029
        return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1594
    else
1595
69859
        return mb_y ? mode : LEFT_DC_PRED8x8;
1596
}
1597
1598
static av_always_inline
1599
1809
int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1600
{
1601
1809
    if (!mb_x)
1602

1
        return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1603
    else
1604
1808
        return mb_y ? mode : HOR_PRED8x8;
1605
}
1606
1607
static av_always_inline
1608
96989
int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1609
{
1610

96989
    switch (mode) {
1611
72888
    case DC_PRED8x8:
1612
72888
        return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1613
5757
    case VERT_PRED8x8:
1614

5757
        return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1615
16535
    case HOR_PRED8x8:
1616

16535
        return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1617
1809
    case PLANE_PRED8x8: /* TM */
1618
1809
        return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1619
    }
1620
    return mode;
1621
}
1622
1623
static av_always_inline
1624
49009
int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1625
{
1626
49009
    if (!mb_x) {
1627

1362
        return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1628
    } else {
1629
47647
        return mb_y ? mode : HOR_VP8_PRED;
1630
    }
1631
}
1632
1633
static av_always_inline
1634
282800
int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1635
                                     int *copy_buf, int vp7)
1636
{
1637

282800
    switch (mode) {
1638
13608
    case VERT_PRED:
1639

13608
        if (!mb_x && mb_y) {
1640
95
            *copy_buf = 1;
1641
95
            return mode;
1642
        }
1643
        /* fall-through */
1644
    case DIAG_DOWN_LEFT_PRED:
1645
    case VERT_LEFT_PRED:
1646

34820
        return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1647
19464
    case HOR_PRED:
1648
19464
        if (!mb_y) {
1649
128
            *copy_buf = 1;
1650
128
            return mode;
1651
        }
1652
        /* fall-through */
1653
    case HOR_UP_PRED:
1654

31686
        return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1655
49009
    case TM_VP8_PRED:
1656
49009
        return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1657
167062
    case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1658
                   * as 16x16/8x8 DC */
1659
    case DIAG_DOWN_RIGHT_PRED:
1660
    case VERT_RIGHT_PRED:
1661
    case HOR_DOWN_PRED:
1662

167062
        if (!mb_y || !mb_x)
1663
1140
            *copy_buf = 1;
1664
167062
        return mode;
1665
    }
1666
    return mode;
1667
}
1668
1669
static av_always_inline
1670
57332
void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1671
                   VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1672
{
1673
    int x, y, mode, nnz;
1674
    uint32_t tr;
1675
1676
    /* for the first row, we need to run xchg_mb_border to init the top edge
1677
     * to 127 otherwise, skip it if we aren't going to deblock */
1678


57332
    if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1679
42065
        xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1680
42065
                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1681
42065
                       s->filter.simple, 1);
1682
1683
57332
    if (mb->mode < MODE_I4x4) {
1684
39657
        mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1685
39657
        s->hpc.pred16x16[mode](dst[0], s->linesize);
1686
    } else {
1687
17675
        uint8_t *ptr = dst[0];
1688
17675
        uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1689
17675
        const uint8_t lo = is_vp7 ? 128 : 127;
1690
17675
        const uint8_t hi = is_vp7 ? 128 : 129;
1691
17675
        uint8_t tr_top[4] = { lo, lo, lo, lo };
1692
1693
        // all blocks on the right edge of the macroblock use bottom edge
1694
        // the top macroblock for their topright edge
1695
17675
        uint8_t *tr_right = ptr - s->linesize + 16;
1696
1697
        // if we're on the right edge of the frame, said edge is extended
1698
        // from the top macroblock
1699

17675
        if (mb_y && mb_x == s->mb_width - 1) {
1700
635
            tr       = tr_right[-1] * 0x01010101u;
1701
635
            tr_right = (uint8_t *) &tr;
1702
        }
1703
1704
17675
        if (mb->skip)
1705
460
            AV_ZERO128(td->non_zero_count_cache);
1706
1707
88375
        for (y = 0; y < 4; y++) {
1708
70700
            uint8_t *topright = ptr + 4 - s->linesize;
1709
353500
            for (x = 0; x < 4; x++) {
1710
282800
                int copy = 0;
1711
282800
                ptrdiff_t linesize = s->linesize;
1712
282800
                uint8_t *dst = ptr + 4 * x;
1713
282800
                LOCAL_ALIGNED(4, uint8_t, copy_dst, [5 * 8]);
1714
1715

282800
                if ((y == 0 || x == 3) && mb_y == 0) {
1716
4417
                    topright = tr_top;
1717
278383
                } else if (x == 3)
1718
68176
                    topright = tr_right;
1719
1720
282800
                mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1721
                                                        mb_y + y, &copy, is_vp7);
1722
282800
                if (copy) {
1723
1363
                    dst      = copy_dst + 12;
1724
1363
                    linesize = 8;
1725
1363
                    if (!(mb_y + y)) {
1726
702
                        copy_dst[3] = lo;
1727
702
                        AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1728
                    } else {
1729
661
                        AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1730
661
                        if (!(mb_x + x)) {
1731
661
                            copy_dst[3] = hi;
1732
                        } else {
1733
                            copy_dst[3] = ptr[4 * x - s->linesize - 1];
1734
                        }
1735
                    }
1736
1363
                    if (!(mb_x + x)) {
1737
702
                        copy_dst[11] =
1738
702
                        copy_dst[19] =
1739
702
                        copy_dst[27] =
1740
702
                        copy_dst[35] = hi;
1741
                    } else {
1742
661
                        copy_dst[11] = ptr[4 * x                   - 1];
1743
661
                        copy_dst[19] = ptr[4 * x + s->linesize     - 1];
1744
661
                        copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1745
661
                        copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1746
                    }
1747
                }
1748
282800
                s->hpc.pred4x4[mode](dst, topright, linesize);
1749
282800
                if (copy) {
1750
1363
                    AV_COPY32(ptr + 4 * x,                   copy_dst + 12);
1751
1363
                    AV_COPY32(ptr + 4 * x + s->linesize,     copy_dst + 20);
1752
1363
                    AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1753
1363
                    AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1754
                }
1755
1756
282800
                nnz = td->non_zero_count_cache[y][x];
1757
282800
                if (nnz) {
1758
122340
                    if (nnz == 1)
1759
35131
                        s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1760
35131
                                                  td->block[y][x], s->linesize);
1761
                    else
1762
87209
                        s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1763
87209
                                               td->block[y][x], s->linesize);
1764
                }
1765
282800
                topright += 4;
1766
            }
1767
1768
70700
            ptr      += 4 * s->linesize;
1769
70700
            intra4x4 += 4;
1770
        }
1771
    }
1772
1773
57332
    mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1774
                                            mb_x, mb_y, is_vp7);
1775
57332
    s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1776
57332
    s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1777
1778


57332
    if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1779
42065
        xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1780
42065
                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1781
42065
                       s->filter.simple, 0);
1782
57332
}
1783
1784
static const uint8_t subpel_idx[3][8] = {
1785
    { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1786
                                // also function pointer index
1787
    { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1788
    { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1789
};
1790
1791
/**
1792
 * luma MC function
1793
 *
1794
 * @param s        VP8 decoding context
1795
 * @param dst      target buffer for block data at block position
1796
 * @param ref      reference picture buffer at origin (0, 0)
1797
 * @param mv       motion vector (relative to block position) to get pixel data from
1798
 * @param x_off    horizontal position of block from origin (0, 0)
1799
 * @param y_off    vertical position of block from origin (0, 0)
1800
 * @param block_w  width of block (16, 8 or 4)
1801
 * @param block_h  height of block (always same as block_w)
1802
 * @param width    width of src/dst plane data
1803
 * @param height   height of src/dst plane data
1804
 * @param linesize size of a single line of plane data, including padding
1805
 * @param mc_func  motion compensation function pointers (bilinear or sixtap MC)
1806
 */
1807
static av_always_inline
1808
504108
void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1809
                 ThreadFrame *ref, const VP56mv *mv,
1810
                 int x_off, int y_off, int block_w, int block_h,
1811
                 int width, int height, ptrdiff_t linesize,
1812
                 vp8_mc_func mc_func[3][3])
1813
{
1814
504108
    uint8_t *src = ref->f->data[0];
1815
1816
504108
    if (AV_RN32A(mv)) {
1817
124886
        ptrdiff_t src_linesize = linesize;
1818
1819
124886
        int mx = (mv->x * 2) & 7, mx_idx = subpel_idx[0][mx];
1820
124886
        int my = (mv->y * 2) & 7, my_idx = subpel_idx[0][my];
1821
1822
124886
        x_off += mv->x >> 2;
1823
124886
        y_off += mv->y >> 2;
1824
1825
        // edge emulation
1826
124886
        ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1827
124886
        src += y_off * linesize + x_off;
1828

124886
        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
1829
118693
            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1830
9236
            s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1831
9236
                                     src - my_idx * linesize - mx_idx,
1832
                                     EDGE_EMU_LINESIZE, linesize,
1833
9236
                                     block_w + subpel_idx[1][mx],
1834
9236
                                     block_h + subpel_idx[1][my],
1835
                                     x_off - mx_idx, y_off - my_idx,
1836
                                     width, height);
1837
9236
            src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1838
9236
            src_linesize = EDGE_EMU_LINESIZE;
1839
        }
1840
124886
        mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1841
    } else {
1842
379222
        ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1843
379222
        mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1844
                      linesize, block_h, 0, 0);
1845
    }
1846
504108
}
1847
1848
/**
1849
 * chroma MC function
1850
 *
1851
 * @param s        VP8 decoding context
1852
 * @param dst1     target buffer for block data at block position (U plane)
1853
 * @param dst2     target buffer for block data at block position (V plane)
1854
 * @param ref      reference picture buffer at origin (0, 0)
1855
 * @param mv       motion vector (relative to block position) to get pixel data from
1856
 * @param x_off    horizontal position of block from origin (0, 0)
1857
 * @param y_off    vertical position of block from origin (0, 0)
1858
 * @param block_w  width of block (16, 8 or 4)
1859
 * @param block_h  height of block (always same as block_w)
1860
 * @param width    width of src/dst plane data
1861
 * @param height   height of src/dst plane data
1862
 * @param linesize size of a single line of plane data, including padding
1863
 * @param mc_func  motion compensation function pointers (bilinear or sixtap MC)
1864
 */
1865
static av_always_inline
1866
424764
void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1867
                   uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1868
                   int x_off, int y_off, int block_w, int block_h,
1869
                   int width, int height, ptrdiff_t linesize,
1870
                   vp8_mc_func mc_func[3][3])
1871
{
1872
424764
    uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1873
1874
424764
    if (AV_RN32A(mv)) {
1875
79185
        int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1876
79185
        int my = mv->y & 7, my_idx = subpel_idx[0][my];
1877
1878
79185
        x_off += mv->x >> 3;
1879
79185
        y_off += mv->y >> 3;
1880
1881
        // edge emulation
1882
79185
        src1 += y_off * linesize + x_off;
1883
79185
        src2 += y_off * linesize + x_off;
1884
79185
        ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1885

79185
        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
1886
73045
            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1887
9157
            s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1888
9157
                                     src1 - my_idx * linesize - mx_idx,
1889
                                     EDGE_EMU_LINESIZE, linesize,
1890
9157
                                     block_w + subpel_idx[1][mx],
1891
9157
                                     block_h + subpel_idx[1][my],
1892
                                     x_off - mx_idx, y_off - my_idx, width, height);
1893
9157
            src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1894
9157
            mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1895
1896
9157
            s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1897
9157
                                     src2 - my_idx * linesize - mx_idx,
1898
                                     EDGE_EMU_LINESIZE, linesize,
1899
9157
                                     block_w + subpel_idx[1][mx],
1900
9157
                                     block_h + subpel_idx[1][my],
1901
                                     x_off - mx_idx, y_off - my_idx, width, height);
1902
9157
            src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1903
9157
            mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1904
        } else {
1905
70028
            mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1906
70028
            mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1907
        }
1908
    } else {
1909
345579
        ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1910
345579
        mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1911
345579
        mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1912
    }
1913
424764
}
1914
1915
static av_always_inline
1916
398316
void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1917
                 ThreadFrame *ref_frame, int x_off, int y_off,
1918
                 int bx_off, int by_off, int block_w, int block_h,
1919
                 int width, int height, VP56mv *mv)
1920
{
1921
398316
    VP56mv uvmv = *mv;
1922
1923
    /* Y */
1924
398316
    vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1925
                ref_frame, mv, x_off + bx_off, y_off + by_off,
1926
                block_w, block_h, width, height, s->linesize,
1927
398316
                s->put_pixels_tab[block_w == 8]);
1928
1929
    /* U/V */
1930
398316
    if (s->profile == 3) {
1931
        /* this block only applies VP8; it is safe to check
1932
         * only the profile, as VP7 profile <= 1 */
1933
4775
        uvmv.x &= ~7;
1934
4775
        uvmv.y &= ~7;
1935
    }
1936
398316
    x_off   >>= 1;
1937
398316
    y_off   >>= 1;
1938
398316
    bx_off  >>= 1;
1939
398316
    by_off  >>= 1;
1940
398316
    width   >>= 1;
1941
398316
    height  >>= 1;
1942
398316
    block_w >>= 1;
1943
398316
    block_h >>= 1;
1944
398316
    vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1945
398316
                  dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1946
                  &uvmv, x_off + bx_off, y_off + by_off,
1947
                  block_w, block_h, width, height, s->uvlinesize,
1948
398316
                  s->put_pixels_tab[1 + (block_w == 4)]);
1949
398316
}
1950
1951
/* Fetch pixels for estimated mv 4 macroblocks ahead.
1952
 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1953
static av_always_inline
1954
1329627
void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1955
                     int mb_xy, int ref)
1956
{
1957
    /* Don't prefetch refs that haven't been used very often this frame. */
1958
1329627
    if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1959
478745
        int x_off = mb_x << 4, y_off = mb_y << 4;
1960
478745
        int mx = (mb->mv.x >> 2) + x_off + 8;
1961
478745
        int my = (mb->mv.y >> 2) + y_off;
1962
478745
        uint8_t **src = s->framep[ref]->tf.f->data;
1963
478745
        int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1964
        /* For threading, a ff_thread_await_progress here might be useful, but
1965
         * it actually slows down the decoder. Since a bad prefetch doesn't
1966
         * generate bad decoder output, we don't run it here. */
1967
478745
        s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1968
478745
        off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1969
478745
        s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1970
    }
1971
1329627
}
1972
1973
/**
1974
 * Apply motion vectors to prediction buffer, chapter 18.
1975
 */
1976
static av_always_inline
1977
385877
void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1978
                   VP8Macroblock *mb, int mb_x, int mb_y)
1979
{
1980
385877
    int x_off = mb_x << 4, y_off = mb_y << 4;
1981
385877
    int width = 16 * s->mb_width, height = 16 * s->mb_height;
1982
385877
    ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1983
385877
    VP56mv *bmv = mb->bmv;
1984
1985

385877
    switch (mb->partitioning) {
1986
367552
    case VP8_SPLITMVMODE_NONE:
1987
367552
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1988
                    0, 0, 16, 16, width, height, &mb->mv);
1989
367552
        break;
1990
6612
    case VP8_SPLITMVMODE_4x4: {
1991
        int x, y;
1992
        VP56mv uvmv;
1993
1994
        /* Y */
1995
33060
        for (y = 0; y < 4; y++) {
1996
132240
            for (x = 0; x < 4; x++) {
1997
105792
                vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1998
105792
                            ref, &bmv[4 * y + x],
1999
105792
                            4 * x + x_off, 4 * y + y_off, 4, 4,
2000
                            width, height, s->linesize,
2001
105792
                            s->put_pixels_tab[2]);
2002
            }
2003
        }
2004
2005
        /* U/V */
2006
6612
        x_off  >>= 1;
2007
6612
        y_off  >>= 1;
2008
6612
        width  >>= 1;
2009
6612
        height >>= 1;
2010
19836
        for (y = 0; y < 2; y++) {
2011
39672
            for (x = 0; x < 2; x++) {
2012
26448
                uvmv.x = mb->bmv[2 * y       * 4 + 2 * x    ].x +
2013
26448
                         mb->bmv[2 * y       * 4 + 2 * x + 1].x +
2014
26448
                         mb->bmv[(2 * y + 1) * 4 + 2 * x    ].x +
2015
26448
                         mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
2016
26448
                uvmv.y = mb->bmv[2 * y       * 4 + 2 * x    ].y +
2017
26448
                         mb->bmv[2 * y       * 4 + 2 * x + 1].y +
2018
26448
                         mb->bmv[(2 * y + 1) * 4 + 2 * x    ].y +
2019
26448
                         mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
2020
26448
                uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
2021
26448
                uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
2022
26448
                if (s->profile == 3) {
2023
492
                    uvmv.x &= ~7;
2024
492
                    uvmv.y &= ~7;
2025
                }
2026
26448
                vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
2027
26448
                              dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
2028
26448
                              &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
2029
                              width, height, s->uvlinesize,
2030
26448
                              s->put_pixels_tab[2]);
2031
            }
2032
        }
2033
6612
        break;
2034
    }
2035
4909
    case VP8_SPLITMVMODE_16x8:
2036
4909
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
2037
                    0, 0, 16, 8, width, height, &bmv[0]);
2038
4909
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
2039
                    0, 8, 16, 8, width, height, &bmv[1]);
2040
4909
        break;
2041
3135
    case VP8_SPLITMVMODE_8x16:
2042
3135
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
2043
                    0, 0, 8, 16, width, height, &bmv[0]);
2044
3135
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
2045
                    8, 0, 8, 16, width, height, &bmv[1]);
2046
3135
        break;
2047
3669
    case VP8_SPLITMVMODE_8x8:
2048
3669
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
2049
                    0, 0, 8, 8, width, height, &bmv[0]);
2050
3669
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
2051
                    8, 0, 8, 8, width, height, &bmv[1]);
2052
3669
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
2053
                    0, 8, 8, 8, width, height, &bmv[2]);
2054
3669
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
2055
                    8, 8, 8, 8, width, height, &bmv[3]);
2056
3669
        break;
2057
    }
2058
385877
}
2059
2060
static av_always_inline
2061
76840
void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
2062
{
2063
    int x, y, ch;
2064
2065
76840
    if (mb->mode != MODE_I4x4) {
2066
59625
        uint8_t *y_dst = dst[0];
2067
298125
        for (y = 0; y < 4; y++) {
2068
238500
            uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
2069
238500
            if (nnz4) {
2070
175833
                if (nnz4 & ~0x01010101) {
2071
155935
                    for (x = 0; x < 4; x++) {
2072
155935
                        if ((uint8_t) nnz4 == 1)
2073
54051
                            s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
2074
54051
                                                      td->block[y][x],
2075
                                                      s->linesize);
2076
101884
                        else if ((uint8_t) nnz4 > 1)
2077
74428
                            s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
2078
74428
                                                   td->block[y][x],
2079
                                                   s->linesize);
2080
155935
                        nnz4 >>= 8;
2081
155935
                        if (!nnz4)
2082
44887
                            break;
2083
                    }
2084
                } else {
2085
130946
                    s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
2086
                }
2087
            }
2088
238500
            y_dst += 4 * s->linesize;
2089
        }
2090
    }
2091
2092
230520
    for (ch = 0; ch < 2; ch++) {
2093
153680
        uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
2094
153680
        if (nnz4) {
2095
60025
            uint8_t *ch_dst = dst[1 + ch];
2096
60025
            if (nnz4 & ~0x01010101) {
2097
55530
                for (y = 0; y < 2; y++) {
2098
131913
                    for (x = 0; x < 2; x++) {
2099
106337
                        if ((uint8_t) nnz4 == 1)
2100
16054
                            s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
2101
16054
                                                      td->block[4 + ch][(y << 1) + x],
2102
                                                      s->uvlinesize);
2103
90283
                        else if ((uint8_t) nnz4 > 1)
2104
66922
                            s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2105
66922
                                                   td->block[4 + ch][(y << 1) + x],
2106
                                                   s->uvlinesize);
2107
106337
                        nnz4 >>= 8;
2108
106337
                        if (!nnz4)
2109
29954
                            goto chroma_idct_end;
2110
                    }
2111
25576
                    ch_dst += 4 * s->uvlinesize;
2112
                }
2113
            } else {
2114
30071
                s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2115
            }
2116
        }
2117
153680
chroma_idct_end:
2118
        ;
2119
    }
2120
76840
}
2121
2122
static av_always_inline
2123
424229
void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
2124
                         VP8FilterStrength *f, int is_vp7)
2125
{
2126
    int interior_limit, filter_level;
2127
2128
424229
    if (s->segmentation.enabled) {
2129
102219
        filter_level = s->segmentation.filter_level[mb->segment];
2130
102219
        if (!s->segmentation.absolute_vals)
2131
98895
            filter_level += s->filter.level;
2132
    } else
2133
322010
        filter_level = s->filter.level;
2134
2135
424229
    if (s->lf_delta.enabled) {
2136
416875
        filter_level += s->lf_delta.ref[mb->ref_frame];
2137
416875
        filter_level += s->lf_delta.mode[mb->mode];
2138
    }
2139
2140
424229
    filter_level = av_clip_uintp2(filter_level, 6);
2141
2142
424229
    interior_limit = filter_level;
2143
424229
    if (s->filter.sharpness) {
2144
2772
        interior_limit >>= (s->filter.sharpness + 3) >> 2;
2145
2772
        interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2146
    }
2147
424229
    interior_limit = FFMAX(interior_limit, 1);
2148
2149
424229
    f->filter_level = filter_level;
2150
424229
    f->inner_limit = interior_limit;
2151

777260
    f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2152
353031
                      mb->mode == VP8_MVMODE_SPLIT;
2153
424229
}
2154
2155
static av_always_inline
2156
416507
void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2157
               int mb_x, int mb_y, int is_vp7)
2158
{
2159
    int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2160
416507
    int filter_level = f->filter_level;
2161
416507
    int inner_limit = f->inner_limit;
2162
416507
    int inner_filter = f->inner_filter;
2163
416507
    ptrdiff_t linesize   = s->linesize;
2164
416507
    ptrdiff_t uvlinesize = s->uvlinesize;
2165
    static const uint8_t hev_thresh_lut[2][64] = {
2166
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2167
          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2168
          3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2169
          3, 3, 3, 3 },
2170
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2171
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2172
          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2173
          2, 2, 2, 2 }
2174
    };
2175
2176
416507
    if (!filter_level)
2177
5858
        return;
2178
2179
410649
    if (is_vp7) {
2180
6600
        bedge_lim_y  = filter_level;
2181
6600
        bedge_lim_uv = filter_level * 2;
2182
6600
        mbedge_lim   = filter_level + 2;
2183
    } else {
2184
404049
        bedge_lim_y  =
2185
404049
        bedge_lim_uv = filter_level * 2 + inner_limit;
2186
404049
        mbedge_lim   = bedge_lim_y + 4;
2187
    }
2188
2189
410649
    hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2190
2191
410649
    if (mb_x) {
2192
396544
        s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2193
                                       mbedge_lim, inner_limit, hev_thresh);
2194
396544
        s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2195
                                       mbedge_lim, inner_limit, hev_thresh);
2196
    }
2197
2198
#define H_LOOP_FILTER_16Y_INNER(cond)                                         \
2199
    if (cond && inner_filter) {                                               \
2200
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] +  4, linesize,           \
2201
                                             bedge_lim_y, inner_limit,        \
2202
                                             hev_thresh);                     \
2203
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] +  8, linesize,           \
2204
                                             bedge_lim_y, inner_limit,        \
2205
                                             hev_thresh);                     \
2206
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize,           \
2207
                                             bedge_lim_y, inner_limit,        \
2208
                                             hev_thresh);                     \
2209
        s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] +  4, dst[2] + 4,         \
2210
                                             uvlinesize,  bedge_lim_uv,       \
2211
                                             inner_limit, hev_thresh);        \
2212
    }
2213
2214

410649
    H_LOOP_FILTER_16Y_INNER(!is_vp7)
2215
2216
410649
    if (mb_y) {
2217
389635
        s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2218
                                       mbedge_lim, inner_limit, hev_thresh);
2219
389635
        s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2220
                                       mbedge_lim, inner_limit, hev_thresh);
2221
    }
2222
2223
410649
    if (inner_filter) {
2224
69204
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] +  4 * linesize,
2225
                                             linesize, bedge_lim_y,
2226
                                             inner_limit, hev_thresh);
2227
69204
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] +  8 * linesize,
2228
                                             linesize, bedge_lim_y,
2229
                                             inner_limit, hev_thresh);
2230
69204
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2231
                                             linesize, bedge_lim_y,
2232
                                             inner_limit, hev_thresh);
2233
69204
        s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] +  4 * uvlinesize,
2234
69204
                                             dst[2] +  4 * uvlinesize,
2235
                                             uvlinesize, bedge_lim_uv,
2236
                                             inner_limit, hev_thresh);
2237
    }
2238
2239

410649
    H_LOOP_FILTER_16Y_INNER(is_vp7)
2240
}
2241
2242
static av_always_inline
2243
7722
void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2244
                      int mb_x, int mb_y)
2245
{
2246
    int mbedge_lim, bedge_lim;
2247
7722
    int filter_level = f->filter_level;
2248
7722
    int inner_limit  = f->inner_limit;
2249
7722
    int inner_filter = f->inner_filter;
2250
7722
    ptrdiff_t linesize = s->linesize;
2251
2252
7722
    if (!filter_level)
2253
332
        return;
2254
2255
7390
    bedge_lim  = 2 * filter_level + inner_limit;
2256
7390
    mbedge_lim = bedge_lim + 4;
2257
2258
7390
    if (mb_x)
2259
6713
        s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2260
7390
    if (inner_filter) {
2261
3523
        s->vp8dsp.vp8_h_loop_filter_simple(dst +  4, linesize, bedge_lim);
2262
3523
        s->vp8dsp.vp8_h_loop_filter_simple(dst +  8, linesize, bedge_lim);
2263
3523
        s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2264
    }
2265
2266
7390
    if (mb_y)
2267
6609
        s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2268
7390
    if (inner_filter) {
2269
3523
        s->vp8dsp.vp8_v_loop_filter_simple(dst +  4 * linesize, linesize, bedge_lim);
2270
3523
        s->vp8dsp.vp8_v_loop_filter_simple(dst +  8 * linesize, linesize, bedge_lim);
2271
3523
        s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2272
    }
2273
}
2274
2275
#define MARGIN (16 << 2)
2276
static av_always_inline
2277
30
int vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2278
                                    VP8Frame *prev_frame, int is_vp7)
2279
{
2280
30
    VP8Context *s = avctx->priv_data;
2281
    int mb_x, mb_y;
2282
2283
30
    s->mv_bounds.mv_min.y = -MARGIN;
2284
30
    s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2285
360
    for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2286
330
        VP8Macroblock *mb = s->macroblocks_base +
2287
330
                            ((s->mb_width + 1) * (mb_y + 1) + 1);
2288
330
        int mb_xy = mb_y * s->mb_width;
2289
2290
330
        AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2291
2292
330
        s->mv_bounds.mv_min.x = -MARGIN;
2293
330
        s->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2294
2295
330
        if (vpX_rac_is_end(&s->c)) {
2296
            return AVERROR_INVALIDDATA;
2297
        }
2298
6930
        for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2299
6600
            if (mb_y == 0)
2300
600
                AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2301
                         DC_PRED * 0x01010101);
2302
12980
            decode_mb_mode(s, &s->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2303
6380
                           prev_frame && prev_frame->seg_map ?
2304
6380
                           prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2305
6600
            s->mv_bounds.mv_min.x -= 64;
2306
6600
            s->mv_bounds.mv_max.x -= 64;
2307
        }
2308
330
        s->mv_bounds.mv_min.y -= 64;
2309
330
        s->mv_bounds.mv_max.y -= 64;
2310
    }
2311
30
    return 0;
2312
}
2313
2314
30
static int vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2315
                                   VP8Frame *prev_frame)
2316
{
2317
30
    return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2318
}
2319
2320
static int vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2321
                                   VP8Frame *prev_frame)
2322
{
2323
    return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2324
}
2325
2326
#if HAVE_THREADS
2327
#define check_thread_pos(td, otd, mb_x_check, mb_y_check)                     \
2328
    do {                                                                      \
2329
        int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);                 \
2330
        if (atomic_load(&otd->thread_mb_pos) < tmp) {                         \
2331
            pthread_mutex_lock(&otd->lock);                                   \
2332
            atomic_store(&td->wait_mb_pos, tmp);                              \
2333
            do {                                                              \
2334
                if (atomic_load(&otd->thread_mb_pos) >= tmp)                  \
2335
                    break;                                                    \
2336
                pthread_cond_wait(&otd->cond, &otd->lock);                    \
2337
            } while (1);                                                      \
2338
            atomic_store(&td->wait_mb_pos, INT_MAX);                          \
2339
            pthread_mutex_unlock(&otd->lock);                                 \
2340
        }                                                                     \
2341
    } while (0)
2342
2343
#define update_pos(td, mb_y, mb_x)                                            \
2344
    do {                                                                      \
2345
        int pos              = (mb_y << 16) | (mb_x & 0xFFFF);                \
2346
        int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2347
                               (num_jobs > 1);                                \
2348
        int is_null          = !next_td || !prev_td;                          \
2349
        int pos_check        = (is_null) ? 1 :                                \
2350
            (next_td != td && pos >= atomic_load(&next_td->wait_mb_pos)) ||   \
2351
            (prev_td != td && pos >= atomic_load(&prev_td->wait_mb_pos));     \
2352
        atomic_store(&td->thread_mb_pos, pos);                                \
2353
        if (sliced_threading && pos_check) {                                  \
2354
            pthread_mutex_lock(&td->lock);                                    \
2355
            pthread_cond_broadcast(&td->cond);                                \
2356
            pthread_mutex_unlock(&td->lock);                                  \
2357
        }                                                                     \
2358
    } while (0)
2359
#else
2360
#define check_thread_pos(td, otd, mb_x_check, mb_y_check) while(0)
2361
#define update_pos(td, mb_y, mb_x) while(0)
2362
#endif
2363
2364
16181
static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2365
                                        int jobnr, int threadnr, int is_vp7)
2366
{
2367
16181
    VP8Context *s = avctx->priv_data;
2368
16181
    VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2369
16181
    int mb_y = atomic_load(&td->thread_mb_pos) >> 16;
2370
16181
    int mb_x, mb_xy = mb_y * s->mb_width;
2371
16181
    int num_jobs = s->num_jobs;
2372
16181
    VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2373
16181
    VP56RangeCoder *c  = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2374
    VP8Macroblock *mb;
2375
16181
    uint8_t *dst[3] = {
2376
16181
        curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2377
16181
        curframe->tf.f->data[1] +  8 * mb_y * s->uvlinesize,
2378
16181
        curframe->tf.f->data[2] +  8 * mb_y * s->uvlinesize
2379
    };
2380
2381
16181
    if (vpX_rac_is_end(c))
2382
         return AVERROR_INVALIDDATA;
2383
2384
16181
    if (mb_y == 0)
2385
1142
        prev_td = td;
2386
    else
2387
15039
        prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2388
16181
    if (mb_y == s->mb_height - 1)
2389
1142
        next_td = td;
2390
    else
2391
15039
        next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2392
16181
    if (s->mb_layout == 1)
2393
330
        mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2394
    else {
2395
        // Make sure the previous frame has read its segmentation map,
2396
        // if we re-use the same map.
2397

15851
        if (prev_frame && s->segmentation.enabled &&
2398
5598
            !s->segmentation.update_map)
2399
5025
            ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2400
15851
        mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2401
15851
        memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2402
15851
        AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2403
    }
2404
2405

16181
    if (!is_vp7 || mb_y == 0)
2406
15881
        memset(td->left_nnz, 0, sizeof(td->left_nnz));
2407
2408
16181
    td->mv_bounds.mv_min.x = -MARGIN;
2409
16181
    td->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2410
2411
459390
    for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2412
443209
        if (vpX_rac_is_end(c))
2413
            return AVERROR_INVALIDDATA;
2414
        // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2415
443209
        if (prev_td != td) {
2416
            if (threadnr != 0) {
2417
                check_thread_pos(td, prev_td,
2418
                                 mb_x + (is_vp7 ? 2 : 1),
2419
                                 mb_y - (is_vp7 ? 2 : 1));
2420
            } else {
2421
                check_thread_pos(td, prev_td,
2422
                                 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2423
                                 mb_y - (is_vp7 ? 2 : 1));
2424
            }
2425
        }
2426
2427
443209
        s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2428
                         s->linesize, 4);
2429
443209
        s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2430
443209
                         dst[2] - dst[1], 2);
2431
2432
443209
        if (!s->mb_layout)
2433
842558
            decode_mb_mode(s, &td->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2434
405949
                           prev_frame && prev_frame->seg_map ?
2435
405949
                           prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2436
2437
443209
        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2438
2439
443209
        if (!mb->skip)
2440
83297
            decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2441
2442
443209
        if (mb->mode <= MODE_I4x4)
2443
57332
            intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2444
        else
2445
385877
            inter_predict(s, td, dst, mb, mb_x, mb_y);
2446
2447
443209
        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2448
2449
443209
        if (!mb->skip) {
2450
76840
            idct_mb(s, td, dst, mb);
2451
        } else {
2452
366369
            AV_ZERO64(td->left_nnz);
2453
366369
            AV_WN64(s->top_nnz[mb_x], 0);   // array of 9, so unaligned
2454
2455
            /* Reset DC block predictors if they would exist
2456
             * if the mb had coefficients */
2457

366369
            if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2458
362642
                td->left_nnz[8]     = 0;
2459
362642
                s->top_nnz[mb_x][8] = 0;
2460
            }
2461
        }
2462
2463
443209
        if (s->deblock_filter)
2464
424229
            filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2465
2466

443209
        if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2467
            if (s->filter.simple)
2468
                backup_mb_border(s->top_border[mb_x + 1], dst[0],
2469
                                 NULL, NULL, s->linesize, 0, 1);
2470
            else
2471
                backup_mb_border(s->top_border[mb_x + 1], dst[0],
2472
                                 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2473
        }
2474
2475
443209
        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2476
2477
443209
        dst[0]      += 16;
2478
443209
        dst[1]      += 8;
2479
443209
        dst[2]      += 8;
2480
443209
        td->mv_bounds.mv_min.x -= 64;
2481
443209
        td->mv_bounds.mv_max.x -= 64;
2482
2483
443209
        if (mb_x == s->mb_width + 1) {
2484
            update_pos(td, mb_y, s->mb_width + 3);
2485
        } else {
2486





443209
            update_pos(td, mb_y, mb_x);
2487
        }
2488
    }
2489
16181
    return 0;
2490
}
2491
2492
330
static int vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2493
                                        int jobnr, int threadnr)
2494
{
2495
330
    return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2496
}
2497
2498
15851
static int vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2499
                                        int jobnr, int threadnr)
2500
{
2501
15851
    return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2502
}
2503
2504
15115
static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2505
                              int jobnr, int threadnr, int is_vp7)
2506
{
2507
15115
    VP8Context *s = avctx->priv_data;
2508
15115
    VP8ThreadData *td = &s->thread_data[threadnr];
2509
15115
    int mb_x, mb_y = atomic_load(&td->thread_mb_pos) >> 16, num_jobs = s->num_jobs;
2510
15115
    AVFrame *curframe = s->curframe->tf.f;
2511
    VP8Macroblock *mb;
2512
    VP8ThreadData *prev_td, *next_td;
2513
15115
    uint8_t *dst[3] = {
2514
15115
        curframe->data[0] + 16 * mb_y * s->linesize,
2515
15115
        curframe->data[1] +  8 * mb_y * s->uvlinesize,
2516
15115
        curframe->data[2] +  8 * mb_y * s->uvlinesize
2517
    };
2518
2519
15115
    if (s->mb_layout == 1)
2520
330
        mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2521
    else
2522
14785
        mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2523
2524
15115
    if (mb_y == 0)
2525
1034
        prev_td = td;
2526
    else
2527
14081
        prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2528
15115
    if (mb_y == s->mb_height - 1)
2529
1034
        next_td = td;
2530
    else
2531
14081
        next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2532
2533
439344
    for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2534
424229
        VP8FilterStrength *f = &td->filter_strength[mb_x];
2535
424229
        if (prev_td != td)
2536
            check_thread_pos(td, prev_td,
2537
                             (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2538
424229
        if (next_td != td)
2539
            if (next_td != &s->thread_data[0])
2540
                check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2541
2542
424229
        if (num_jobs == 1) {
2543
424229
            if (s->filter.simple)
2544
7722
                backup_mb_border(s->top_border[mb_x + 1], dst[0],
2545
                                 NULL, NULL, s->linesize, 0, 1);
2546
            else
2547
416507
                backup_mb_border(s->top_border[mb_x + 1], dst[0],
2548
                                 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2549
        }
2550
2551
424229
        if (s->filter.simple)
2552
7722
            filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2553
        else
2554
416507
            filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2555
424229
        dst[0] += 16;
2556
424229
        dst[1] += 8;
2557
424229
        dst[2] += 8;
2558
2559





424229
        update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2560
    }
2561
15115
}
2562
2563
330
static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2564
                              int jobnr, int threadnr)
2565
{
2566
330
    filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2567
330
}
2568
2569
14785
static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2570
                              int jobnr, int threadnr)
2571
{
2572
14785
    filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2573
14785
}
2574
2575
static av_always_inline
2576
1142
int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2577
                              int threadnr, int is_vp7)
2578
{
2579
1142
    VP8Context *s = avctx->priv_data;
2580
1142
    VP8ThreadData *td = &s->thread_data[jobnr];
2581
1142
    VP8ThreadData *next_td = NULL, *prev_td = NULL;
2582
1142
    VP8Frame *curframe = s->curframe;
2583
1142
    int mb_y, num_jobs = s->num_jobs;
2584
    int ret;
2585
2586
1142
    td->thread_nr = threadnr;
2587
1142
    td->mv_bounds.mv_min.y   = -MARGIN - 64 * threadnr;
2588
1142
    td->mv_bounds.mv_max.y   = ((s->mb_height - 1) << 6) + MARGIN - 64 * threadnr;
2589
17323
    for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2590
16181
        atomic_store(&td->thread_mb_pos, mb_y << 16);
2591
16181
        ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2592
16181
        if (ret < 0) {
2593
            update_pos(td, s->mb_height, INT_MAX & 0xFFFF);
2594
            return ret;
2595
        }
2596
16181
        if (s->deblock_filter)
2597
15115
            s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2598





16181
        update_pos(td, mb_y, INT_MAX & 0xFFFF);
2599
2600
16181
        td->mv_bounds.mv_min.y -= 64 * num_jobs;
2601
16181
        td->mv_bounds.mv_max.y -= 64 * num_jobs;
2602
2603
16181
        if (avctx->active_thread_type == FF_THREAD_FRAME)
2604
            ff_thread_report_progress(&curframe->tf, mb_y, 0);
2605
    }
2606
2607
1142
    return 0;
2608
}
2609
2610
30
static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2611
                                    int jobnr, int threadnr)
2612
{
2613
30
    return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2614
}
2615
2616
1112
static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2617
                                    int jobnr, int threadnr)
2618
{
2619
1112
    return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2620
}
2621
2622
static av_always_inline
2623
1142
int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2624
                      AVPacket *avpkt, int is_vp7)
2625
{
2626
1142
    VP8Context *s = avctx->priv_data;
2627
    int ret, i, referenced, num_jobs;
2628
    enum AVDiscard skip_thresh;
2629
1142
    VP8Frame *av_uninit(curframe), *prev_frame;
2630
2631
1142
    if (is_vp7)
2632
30
        ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2633
    else
2634
1112
        ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2635
2636
1142
    if (ret < 0)
2637
        goto err;
2638
2639
1142
    if (s->actually_webp) {
2640
        // avctx->pix_fmt already set in caller.
2641

1136
    } else if (!is_vp7 && s->pix_fmt == AV_PIX_FMT_NONE) {
2642
        s->pix_fmt = get_pixel_format(s);
2643
        if (s->pix_fmt < 0) {
2644
            ret = AVERROR(EINVAL);
2645
            goto err;
2646
        }
2647
        avctx->pix_fmt = s->pix_fmt;
2648
    }
2649
2650
1142
    prev_frame = s->framep[VP56_FRAME_CURRENT];
2651
2652

1153
    referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2653
11
                 s->update_altref == VP56_FRAME_CURRENT;
2654
2655
1142
    skip_thresh = !referenced ? AVDISCARD_NONREF
2656

1142
                              : !s->keyframe ? AVDISCARD_NONKEY
2657
                                             : AVDISCARD_ALL;
2658
2659
1142
    if (avctx->skip_frame >= skip_thresh) {
2660
        s->invisible = 1;
2661
        memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2662
        goto skip_decode;
2663
    }
2664

1142
    s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2665
2666
    // release no longer referenced frames
2667
6852
    for (i = 0; i < 5; i++)
2668
5710
        if (s->frames[i].tf.f->buf[0] &&
2669
3826
            &s->frames[i] != prev_frame &&
2670
2721
            &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2671
2710
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN]   &&
2672
1741
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2673
1033
            vp8_release_frame(s, &s->frames[i]);
2674
2675
1142
    curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2676
2677
1142
    if (!s->colorspace)
2678
1142
        avctx->colorspace = AVCOL_SPC_BT470BG;
2679
1142
    if (s->fullrange)
2680
        avctx->color_range = AVCOL_RANGE_JPEG;
2681
    else
2682
1142
        avctx->color_range = AVCOL_RANGE_MPEG;
2683
2684
    /* Given that arithmetic probabilities are updated every frame, it's quite
2685
     * likely that the values we have on a random interframe are complete
2686
     * junk if we didn't start decode on a keyframe. So just don't display
2687
     * anything rather than junk. */
2688

1142
    if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2689
1093
                         !s->framep[VP56_FRAME_GOLDEN]   ||
2690
1093
                         !s->framep[VP56_FRAME_GOLDEN2])) {
2691
        av_log(avctx, AV_LOG_WARNING,
2692
               "Discarding interframe without a prior keyframe!\n");
2693
        ret = AVERROR_INVALIDDATA;
2694
        goto err;
2695
    }
2696
2697
1142
    curframe->tf.f->key_frame = s->keyframe;
2698
2284
    curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2699
1142
                                            : AV_PICTURE_TYPE_P;
2700
1142
    if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2701
        goto err;
2702
2703
    // check if golden and altref are swapped
2704
1142
    if (s->update_altref != VP56_FRAME_NONE)
2705
196
        s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2706
    else
2707
946
        s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2708
2709
1142
    if (s->update_golden != VP56_FRAME_NONE)
2710
139
        s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2711
    else
2712
1003
        s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2713
2714
1142
    if (s->update_last)
2715
1131
        s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2716
    else
2717
11
        s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2718
2719
1142
    s->next_framep[VP56_FRAME_CURRENT] = curframe;
2720
2721
1142
    if (avctx->codec->update_thread_context)
2722
1106
        ff_thread_finish_setup(avctx);
2723
2724
1142
    if (avctx->hwaccel) {
2725
        ret = avctx->hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
2726
        if (ret < 0)
2727
            goto err;
2728
2729
        ret = avctx->hwaccel->decode_slice(avctx, avpkt->data, avpkt->size);
2730
        if (ret < 0)
2731
            goto err;
2732
2733
        ret = avctx->hwaccel->end_frame(avctx);
2734
        if (ret < 0)
2735
            goto err;
2736
2737
    } else {
2738
1142
        s->linesize   = curframe->tf.f->linesize[0];
2739
1142
        s->uvlinesize = curframe->tf.f->linesize[1];
2740
2741
1142
        memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2742
        /* Zero macroblock structures for top/top-left prediction
2743
         * from outside the frame. */
2744
1142
        if (!s->mb_layout)
2745
1112
            memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2746
1112
                   (s->mb_width + 1) * sizeof(*s->macroblocks));
2747

1142
        if (!s->mb_layout && s->keyframe)
2748
48
            memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2749
2750
1142
        memset(s->ref_count, 0, sizeof(s->ref_count));
2751
2752
1142
        if (s->mb_layout == 1) {
2753
            // Make sure the previous frame has read its segmentation map,
2754
            // if we re-use the same map.
2755

30
            if (prev_frame && s->segmentation.enabled &&
2756
                !s->segmentation.update_map)
2757
                ff_thread_await_progress(&prev_frame->tf, 1, 0);
2758
30
            if (is_vp7)
2759
30
                ret = vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2760
            else
2761
                ret = vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2762
30
            if (ret < 0)
2763
                goto err;
2764
        }
2765
2766
1142
        if (avctx->active_thread_type == FF_THREAD_FRAME)
2767
            num_jobs = 1;
2768
        else
2769
1142
            num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2770
1142
        s->num_jobs   = num_jobs;
2771
1142
        s->curframe   = curframe;
2772
1142
        s->prev_frame = prev_frame;
2773
1142
        s->mv_bounds.mv_min.y   = -MARGIN;
2774
1142
        s->mv_bounds.mv_max.y   = ((s->mb_height - 1) << 6) + MARGIN;
2775
10278
        for (i = 0; i < MAX_THREADS; i++) {
2776
9136
            VP8ThreadData *td = &s->thread_data[i];
2777
9136
            atomic_init(&td->thread_mb_pos, 0);
2778
9136
            atomic_init(&td->wait_mb_pos, INT_MAX);
2779
        }
2780
1142
        if (is_vp7)
2781
30
            avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2782
                            num_jobs);
2783
        else
2784
1112
            avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2785
                            num_jobs);
2786
    }
2787
2788
1142
    ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2789
1142
    memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2790
2791
1142
skip_decode:
2792
    // if future frames don't use the updated probabilities,
2793
    // reset them to the values we saved
2794
1142
    if (!s->update_probabilities)
2795
64
        s->prob[0] = s->prob[1];
2796
2797
1142
    if (!s->invisible) {
2798
1133
        if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2799
            return ret;
2800
1133
        *got_frame = 1;
2801
    }
2802
2803
1142
    return avpkt->size;
2804
err:
2805
    memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2806
    return ret;
2807
}
2808
2809
1112
int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2810
                        AVPacket *avpkt)
2811
{
2812
1112
    return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2813
}
2814
2815
#if CONFIG_VP7_DECODER
2816
30
static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2817
                            AVPacket *avpkt)
2818
{
2819
30
    return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2820
}
2821
#endif /* CONFIG_VP7_DECODER */
2822
2823
60
av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2824
{
2825
60
    VP8Context *s = avctx->priv_data;
2826
    int i;
2827
2828
60
    if (!s)
2829
        return 0;
2830
2831
60
    vp8_decode_flush_impl(avctx, 1);
2832
360
    for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2833
300
        av_frame_free(&s->frames[i].tf.f);
2834
2835
60
    return 0;
2836
}
2837
2838
60
static av_cold int vp8_init_frames(VP8Context *s)
2839
{
2840
    int i;
2841
360
    for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2842
300
        s->frames[i].tf.f = av_frame_alloc();
2843
300
        if (!s->frames[i].tf.f)
2844
            return AVERROR(ENOMEM);
2845
    }
2846
60
    return 0;
2847
}
2848
2849
static av_always_inline
2850
60
int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2851
{
2852
60
    VP8Context *s = avctx->priv_data;
2853
    int ret;
2854
2855
60
    s->avctx = avctx;
2856
60
    s->vp7   = avctx->codec->id == AV_CODEC_ID_VP7;
2857
60
    s->pix_fmt = AV_PIX_FMT_NONE;
2858
60
    avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2859
2860
60
    ff_videodsp_init(&s->vdsp, 8);
2861
2862
60
    ff_vp78dsp_init(&s->vp8dsp);
2863
60
    if (CONFIG_VP7_DECODER && is_vp7) {
2864
3
        ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2865
3
        ff_vp7dsp_init(&s->vp8dsp);
2866
3
        s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter;
2867
3
        s->filter_mb_row           = vp7_filter_mb_row;
2868
57
    } else if (CONFIG_VP8_DECODER && !is_vp7) {
2869
57
        ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2870
57
        ff_vp8dsp_init(&s->vp8dsp);
2871
57
        s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter;
2872
57
        s->filter_mb_row           = vp8_filter_mb_row;
2873
    }
2874
2875
    /* does not change for VP8 */
2876
60
    memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
2877
2878
60
    if ((ret = vp8_init_frames(s)) < 0) {
2879
        ff_vp8_decode_free(avctx);
2880
        return ret;
2881
    }
2882
2883
60
    return 0;
2884
}
2885
2886
#if CONFIG_VP7_DECODER
2887
3
static int vp7_decode_init(AVCodecContext *avctx)
2888
{
2889
3
    return vp78_decode_init(avctx, IS_VP7);
2890
}
2891
#endif /* CONFIG_VP7_DECODER */
2892
2893
57
av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2894
{
2895
57
    return vp78_decode_init(avctx, IS_VP8);
2896
}
2897
2898
#if CONFIG_VP8_DECODER
2899
#if HAVE_THREADS
2900
#define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
2901
2902
static int vp8_decode_update_thread_context(AVCodecContext *dst,
2903
                                            const AVCodecContext *src)
2904
{
2905
    VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2906
    int i;
2907
2908
    if (s->macroblocks_base &&
2909
        (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2910
        free_buffers(s);
2911
        s->mb_width  = s_src->mb_width;
2912
        s->mb_height = s_src->mb_height;
2913
    }
2914
2915
    s->pix_fmt      = s_src->pix_fmt;
2916
    s->prob[0]      = s_src->prob[!s_src->update_probabilities];
2917
    s->segmentation = s_src->segmentation;
2918
    s->lf_delta     = s_src->lf_delta;
2919
    memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2920
2921
    for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2922
        if (s_src->frames[i].tf.f->buf[0]) {
2923
            int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2924
            if (ret < 0)
2925
                return ret;
2926
        }
2927
    }
2928
2929
    s->framep[0] = REBASE(s_src->next_framep[0]);
2930
    s->framep[1] = REBASE(s_src->next_framep[1]);
2931
    s->framep[2] = REBASE(s_src->next_framep[2]);
2932
    s->framep[3] = REBASE(s_src->next_framep[3]);
2933
2934
    return 0;
2935
}
2936
#endif /* HAVE_THREADS */
2937
#endif /* CONFIG_VP8_DECODER */
2938
2939
#if CONFIG_VP7_DECODER
2940
AVCodec ff_vp7_decoder = {
2941
    .name                  = "vp7",
2942
    .long_name             = NULL_IF_CONFIG_SMALL("On2 VP7"),
2943
    .type                  = AVMEDIA_TYPE_VIDEO,
2944
    .id                    = AV_CODEC_ID_VP7,
2945
    .priv_data_size        = sizeof(VP8Context),
2946
    .init                  = vp7_decode_init,
2947
    .close                 = ff_vp8_decode_free,
2948
    .decode                = vp7_decode_frame,
2949
    .capabilities          = AV_CODEC_CAP_DR1,
2950
    .flush                 = vp8_decode_flush,
2951
};
2952
#endif /* CONFIG_VP7_DECODER */
2953
2954
#if CONFIG_VP8_DECODER
2955
AVCodec ff_vp8_decoder = {
2956
    .name                  = "vp8",
2957
    .long_name             = NULL_IF_CONFIG_SMALL("On2 VP8"),
2958
    .type                  = AVMEDIA_TYPE_VIDEO,
2959
    .id                    = AV_CODEC_ID_VP8,
2960
    .priv_data_size        = sizeof(VP8Context),
2961
    .init                  = ff_vp8_decode_init,
2962
    .close                 = ff_vp8_decode_free,
2963
    .decode                = ff_vp8_decode_frame,
2964
    .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
2965
                             AV_CODEC_CAP_SLICE_THREADS,
2966
    .flush                 = vp8_decode_flush,
2967
    .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2968
    .hw_configs            = (const AVCodecHWConfigInternal*[]) {
2969
#if CONFIG_VP8_VAAPI_HWACCEL
2970
                               HWACCEL_VAAPI(vp8),
2971
#endif
2972
#if CONFIG_VP8_NVDEC_HWACCEL
2973
                               HWACCEL_NVDEC(vp8),
2974
#endif
2975
                               NULL
2976
                           },
2977
    .caps_internal         = FF_CODEC_CAP_ALLOCATE_PROGRESS,
2978
};
2979
#endif /* CONFIG_VP7_DECODER */