GCC Code Coverage Report
Directory: ../../../ffmpeg/ Exec Total Coverage
File: src/libavcodec/h264_mb.c Lines: 374 396 94.4 %
Date: 2019-11-22 03:34:36 Branches: 264 298 88.6 %

Line Branch Exec Source
1
/*
2
 * H.26L/H.264/AVC/JVT/14496-10/... decoder
3
 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21
22
/**
23
 * @file
24
 * H.264 / AVC / MPEG-4 part10 macroblock decoding
25
 */
26
27
#include <stdint.h>
28
29
#include "config.h"
30
31
#include "libavutil/common.h"
32
#include "libavutil/intreadwrite.h"
33
#include "avcodec.h"
34
#include "h264dec.h"
35
#include "h264_ps.h"
36
#include "qpeldsp.h"
37
#include "thread.h"
38
39
67646
static inline int get_lowest_part_list_y(H264SliceContext *sl,
40
                                         int n, int height, int y_offset, int list)
41
{
42
67646
    int raw_my             = sl->mv_cache[list][scan8[n]][1];
43
67646
    int filter_height_down = (raw_my & 3) ? 3 : 0;
44
67646
    int full_my            = (raw_my >> 2) + y_offset;
45
67646
    int bottom             = full_my + filter_height_down + height;
46
47
    av_assert2(height >= 0);
48
49
67646
    return FFMAX(0, bottom);
50
}
51
52
53762
static inline void get_lowest_part_y(const H264Context *h, H264SliceContext *sl,
53
                                     int16_t refs[2][48], int n,
54
                                     int height, int y_offset, int list0,
55
                                     int list1, int *nrefs)
56
{
57
    int my;
58
59
53762
    y_offset += 16 * (sl->mb_y >> MB_FIELD(sl));
60
61
53762
    if (list0) {
62
48825
        int ref_n = sl->ref_cache[0][scan8[n]];
63
48825
        H264Ref *ref = &sl->ref_list[0][ref_n];
64
65
        // Error resilience puts the current picture in the ref list.
66
        // Don't try to wait on these as it will cause a deadlock.
67
        // Fields can wait on each other, though.
68
48825
        if (ref->parent->tf.progress->data != h->cur_pic.tf.progress->data ||
69
            (ref->reference & 3) != h->picture_structure) {
70
48825
            my = get_lowest_part_list_y(sl, n, height, y_offset, 0);
71
48825
            if (refs[0][ref_n] < 0)
72
39009
                nrefs[0] += 1;
73
48825
            refs[0][ref_n] = FFMAX(refs[0][ref_n], my);
74
        }
75
    }
76
77
53762
    if (list1) {
78
18821
        int ref_n    = sl->ref_cache[1][scan8[n]];
79
18821
        H264Ref *ref = &sl->ref_list[1][ref_n];
80
81
18821
        if (ref->parent->tf.progress->data != h->cur_pic.tf.progress->data ||
82
            (ref->reference & 3) != h->picture_structure) {
83
18821
            my = get_lowest_part_list_y(sl, n, height, y_offset, 1);
84
18821
            if (refs[1][ref_n] < 0)
85
16624
                nrefs[1] += 1;
86
18821
            refs[1][ref_n] = FFMAX(refs[1][ref_n], my);
87
        }
88
    }
89
53762
}
90
91
/**
92
 * Wait until all reference frames are available for MC operations.
93
 *
94
 * @param h the H.264 context
95
 */
96
39818
static void await_references(const H264Context *h, H264SliceContext *sl)
97
{
98
39818
    const int mb_xy   = sl->mb_xy;
99
39818
    const int mb_type = h->cur_pic.mb_type[mb_xy];
100
    int16_t refs[2][48];
101
39818
    int nrefs[2] = { 0 };
102
    int ref, list;
103
104
39818
    memset(refs, -1, sizeof(refs));
105
106
39818
    if (IS_16X16(mb_type)) {
107
31704
        get_lowest_part_y(h, sl, refs, 0, 16, 0,
108
                          IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
109
8114
    } else if (IS_16X8(mb_type)) {
110
2608
        get_lowest_part_y(h, sl, refs, 0, 8, 0,
111
                          IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
112
2608
        get_lowest_part_y(h, sl, refs, 8, 8, 8,
113
                          IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
114
5506
    } else if (IS_8X16(mb_type)) {
115
2591
        get_lowest_part_y(h, sl, refs, 0, 16, 0,
116
                          IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
117
2591
        get_lowest_part_y(h, sl, refs, 4, 16, 0,
118
                          IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
119
    } else {
120
        int i;
121
122
        av_assert2(IS_8X8(mb_type));
123
124
14575
        for (i = 0; i < 4; i++) {
125
11660
            const int sub_mb_type = sl->sub_mb_type[i];
126
11660
            const int n           = 4 * i;
127
11660
            int y_offset          = (i & 2) << 2;
128
129
11660
            if (IS_SUB_8X8(sub_mb_type)) {
130
11660
                get_lowest_part_y(h, sl, refs, n, 8, y_offset,
131
                                  IS_DIR(sub_mb_type, 0, 0),
132
                                  IS_DIR(sub_mb_type, 0, 1),
133
                                  nrefs);
134
            } else if (IS_SUB_8X4(sub_mb_type)) {
135
                get_lowest_part_y(h, sl, refs, n, 4, y_offset,
136
                                  IS_DIR(sub_mb_type, 0, 0),
137
                                  IS_DIR(sub_mb_type, 0, 1),
138
                                  nrefs);
139
                get_lowest_part_y(h, sl, refs, n + 2, 4, y_offset + 4,
140
                                  IS_DIR(sub_mb_type, 0, 0),
141
                                  IS_DIR(sub_mb_type, 0, 1),
142
                                  nrefs);
143
            } else if (IS_SUB_4X8(sub_mb_type)) {
144
                get_lowest_part_y(h, sl, refs, n, 8, y_offset,
145
                                  IS_DIR(sub_mb_type, 0, 0),
146
                                  IS_DIR(sub_mb_type, 0, 1),
147
                                  nrefs);
148
                get_lowest_part_y(h, sl, refs, n + 1, 8, y_offset,
149
                                  IS_DIR(sub_mb_type, 0, 0),
150
                                  IS_DIR(sub_mb_type, 0, 1),
151
                                  nrefs);
152
            } else {
153
                int j;
154
                av_assert2(IS_SUB_4X4(sub_mb_type));
155
                for (j = 0; j < 4; j++) {
156
                    int sub_y_offset = y_offset + 2 * (j & 2);
157
                    get_lowest_part_y(h, sl, refs, n + j, 4, sub_y_offset,
158
                                      IS_DIR(sub_mb_type, 0, 0),
159
                                      IS_DIR(sub_mb_type, 0, 1),
160
                                      nrefs);
161
                }
162
            }
163
        }
164
    }
165
166
97600
    for (list = sl->list_count - 1; list >= 0; list--)
167

120634
        for (ref = 0; ref < 48 && nrefs[list]; ref++) {
168
62852
            int row = refs[list][ref];
169
62852
            if (row >= 0) {
170
55633
                H264Ref *ref_pic  = &sl->ref_list[list][ref];
171
55633
                int ref_field         = ref_pic->reference - 1;
172
55633
                int ref_field_picture = ref_pic->parent->field_picture;
173
55633
                int pic_height        = 16 * h->mb_height >> ref_field_picture;
174
175
55633
                row <<= MB_MBAFF(sl);
176
55633
                nrefs[list]--;
177
178

55633
                if (!FIELD_PICTURE(h) && ref_field_picture) { // frame referencing two fields
179
                    av_assert2((ref_pic->parent->reference & 3) == 3);
180
                    ff_thread_await_progress(&ref_pic->parent->tf,
181
                                             FFMIN((row >> 1) - !(row & 1),
182
                                                   pic_height - 1),
183
                                             1);
184
                    ff_thread_await_progress(&ref_pic->parent->tf,
185
                                             FFMIN((row >> 1), pic_height - 1),
186
                                             0);
187

55633
                } else if (FIELD_PICTURE(h) && !ref_field_picture) { // field referencing one field of a frame
188
                    ff_thread_await_progress(&ref_pic->parent->tf,
189
                                             FFMIN(row * 2 + ref_field,
190
                                                   pic_height - 1),
191
                                             0);
192
55633
                } else if (FIELD_PICTURE(h)) {
193
                    ff_thread_await_progress(&ref_pic->parent->tf,
194
                                             FFMIN(row, pic_height - 1),
195
                                             ref_field);
196
                } else {
197
55633
                    ff_thread_await_progress(&ref_pic->parent->tf,
198
                                             FFMIN(row, pic_height - 1),
199
                                             0);
200
                }
201
            }
202
        }
203
39818
}
204
205
24897718
static av_always_inline void mc_dir_part(const H264Context *h, H264SliceContext *sl,
206
                                         H264Ref *pic,
207
                                         int n, int square, int height,
208
                                         int delta, int list,
209
                                         uint8_t *dest_y, uint8_t *dest_cb,
210
                                         uint8_t *dest_cr,
211
                                         int src_x_offset, int src_y_offset,
212
                                         const qpel_mc_func *qpix_op,
213
                                         h264_chroma_mc_func chroma_op,
214
                                         int pixel_shift, int chroma_idc)
215
{
216
24897718
    const int mx      = sl->mv_cache[list][scan8[n]][0] + src_x_offset * 8;
217
24897718
    int my            = sl->mv_cache[list][scan8[n]][1] + src_y_offset * 8;
218
24897718
    const int luma_xy = (mx & 3) + ((my & 3) << 2);
219
24897718
    ptrdiff_t offset  = (mx >> 2) * (1 << pixel_shift) + (my >> 2) * sl->mb_linesize;
220
24897718
    uint8_t *src_y    = pic->data[0] + offset;
221
    uint8_t *src_cb, *src_cr;
222
24897718
    int extra_width  = 0;
223
24897718
    int extra_height = 0;
224
24897718
    int emu = 0;
225
24897718
    const int full_mx    = mx >> 2;
226
24897718
    const int full_my    = my >> 2;
227
24897718
    const int pic_width  = 16 * h->mb_width;
228
24897718
    const int pic_height = 16 * h->mb_height >> MB_FIELD(sl);
229
    int ysh;
230
231
24897718
    if (mx & 7)
232
18267580
        extra_width -= 3;
233
24897718
    if (my & 7)
234
15847819
        extra_height -= 3;
235
236
24897718
    if (full_mx                <          0 - extra_width  ||
237
24778755
        full_my                <          0 - extra_height ||
238
24423643
        full_mx + 16 /*FIXME*/ > pic_width  + extra_width  ||
239
23847760
        full_my + 16 /*FIXME*/ > pic_height + extra_height) {
240
2069178
        h->vdsp.emulated_edge_mc(sl->edge_emu_buffer,
241
2069178
                                 src_y - (2 << pixel_shift) - 2 * sl->mb_linesize,
242
                                 sl->mb_linesize, sl->mb_linesize,
243
                                 16 + 5, 16 + 5 /*FIXME*/, full_mx - 2,
244
                                 full_my - 2, pic_width, pic_height);
245
2069178
        src_y = sl->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize;
246
2069178
        emu   = 1;
247
    }
248
249
24897718
    qpix_op[luma_xy](dest_y, src_y, sl->mb_linesize); // FIXME try variable height perhaps?
250
24897718
    if (!square)
251
8656550
        qpix_op[luma_xy](dest_y + delta, src_y + delta, sl->mb_linesize);
252
253
    if (CONFIG_GRAY && h->flags & AV_CODEC_FLAG_GRAY)
254
        return;
255
256
24897718
    if (chroma_idc == 3 /* yuv444 */) {
257
139404
        src_cb = pic->data[1] + offset;
258
139404
        if (emu) {
259
5484
            h->vdsp.emulated_edge_mc(sl->edge_emu_buffer,
260
5484
                                     src_cb - (2 << pixel_shift) - 2 * sl->mb_linesize,
261
                                     sl->mb_linesize, sl->mb_linesize,
262
                                     16 + 5, 16 + 5 /*FIXME*/,
263
                                     full_mx - 2, full_my - 2,
264
                                     pic_width, pic_height);
265
5484
            src_cb = sl->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize;
266
        }
267
139404
        qpix_op[luma_xy](dest_cb, src_cb, sl->mb_linesize); // FIXME try variable height perhaps?
268
139404
        if (!square)
269
12311
            qpix_op[luma_xy](dest_cb + delta, src_cb + delta, sl->mb_linesize);
270
271
139404
        src_cr = pic->data[2] + offset;
272
139404
        if (emu) {
273
5484
            h->vdsp.emulated_edge_mc(sl->edge_emu_buffer,
274
5484
                                     src_cr - (2 << pixel_shift) - 2 * sl->mb_linesize,
275
                                     sl->mb_linesize, sl->mb_linesize,
276
                                     16 + 5, 16 + 5 /*FIXME*/,
277
                                     full_mx - 2, full_my - 2,
278
                                     pic_width, pic_height);
279
5484
            src_cr = sl->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize;
280
        }
281
139404
        qpix_op[luma_xy](dest_cr, src_cr, sl->mb_linesize); // FIXME try variable height perhaps?
282
139404
        if (!square)
283
12311
            qpix_op[luma_xy](dest_cr + delta, src_cr + delta, sl->mb_linesize);
284
139404
        return;
285
    }
286
287
24758314
    ysh = 3 - (chroma_idc == 2 /* yuv422 */);
288

24758314
    if (chroma_idc == 1 /* yuv420 */ && MB_FIELD(sl)) {
289
        // chroma offset when predicting from a field of opposite parity
290
8256799
        my  += 2 * ((sl->mb_y & 1) - (pic->reference - 1));
291

8256799
        emu |= (my >> 3) < 0 || (my >> 3) + 8 >= (pic_height >> 1);
292
    }
293
294
24758314
    src_cb = pic->data[1] + ((mx >> 3) * (1 << pixel_shift)) +
295
24758314
             (my >> ysh) * sl->mb_uvlinesize;
296
24758314
    src_cr = pic->data[2] + ((mx >> 3) * (1 << pixel_shift)) +
297
24758314
             (my >> ysh) * sl->mb_uvlinesize;
298
299
24758314
    if (emu) {
300
2224998
        h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, src_cb,
301
                                 sl->mb_uvlinesize, sl->mb_uvlinesize,
302
2224998
                                 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
303
2224998
                                 pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
304
2224998
        src_cb = sl->edge_emu_buffer;
305
    }
306
24758314
    chroma_op(dest_cb, src_cb, sl->mb_uvlinesize,
307
24758314
              height >> (chroma_idc == 1 /* yuv420 */),
308
24758314
              mx & 7, ((unsigned)my << (chroma_idc == 2 /* yuv422 */)) & 7);
309
310
24758314
    if (emu) {
311
2224998
        h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, src_cr,
312
                                 sl->mb_uvlinesize, sl->mb_uvlinesize,
313
2224998
                                 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
314
2224998
                                 pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
315
2224998
        src_cr = sl->edge_emu_buffer;
316
    }
317
24758314
    chroma_op(dest_cr, src_cr, sl->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */),
318
24758314
              mx & 7, ((unsigned)my << (chroma_idc == 2 /* yuv422 */)) & 7);
319
}
320
321
18480715
static av_always_inline void mc_part_std(const H264Context *h, H264SliceContext *sl,
322
                                         int n, int square,
323
                                         int height, int delta,
324
                                         uint8_t *dest_y, uint8_t *dest_cb,
325
                                         uint8_t *dest_cr,
326
                                         int x_offset, int y_offset,
327
                                         const qpel_mc_func *qpix_put,
328
                                         h264_chroma_mc_func chroma_put,
329
                                         const qpel_mc_func *qpix_avg,
330
                                         h264_chroma_mc_func chroma_avg,
331
                                         int list0, int list1,
332
                                         int pixel_shift, int chroma_idc)
333
{
334
18480715
    const qpel_mc_func *qpix_op   = qpix_put;
335
18480715
    h264_chroma_mc_func chroma_op = chroma_put;
336
337
18480715
    dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->mb_linesize;
338
18480715
    if (chroma_idc == 3 /* yuv444 */) {
339
38509
        dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->mb_linesize;
340
38509
        dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->mb_linesize;
341
18442206
    } else if (chroma_idc == 2 /* yuv422 */) {
342
98093
        dest_cb += (x_offset << pixel_shift) + 2 * y_offset * sl->mb_uvlinesize;
343
98093
        dest_cr += (x_offset << pixel_shift) + 2 * y_offset * sl->mb_uvlinesize;
344
    } else { /* yuv420 */
345
18344113
        dest_cb += (x_offset << pixel_shift) + y_offset * sl->mb_uvlinesize;
346
18344113
        dest_cr += (x_offset << pixel_shift) + y_offset * sl->mb_uvlinesize;
347
    }
348
18480715
    x_offset += 8 * sl->mb_x;
349
18480715
    y_offset += 8 * (sl->mb_y >> MB_FIELD(sl));
350
351
18480715
    if (list0) {
352
17073903
        H264Ref *ref = &sl->ref_list[0][sl->ref_cache[0][scan8[n]]];
353
17073903
        mc_dir_part(h, sl, ref, n, square, height, delta, 0,
354
                    dest_y, dest_cb, dest_cr, x_offset, y_offset,
355
                    qpix_op, chroma_op, pixel_shift, chroma_idc);
356
357
17073903
        qpix_op   = qpix_avg;
358
17073903
        chroma_op = chroma_avg;
359
    }
360
361
18480715
    if (list1) {
362
6098297
        H264Ref *ref = &sl->ref_list[1][sl->ref_cache[1][scan8[n]]];
363
6098297
        mc_dir_part(h, sl, ref, n, square, height, delta, 1,
364
                    dest_y, dest_cb, dest_cr, x_offset, y_offset,
365
                    qpix_op, chroma_op, pixel_shift, chroma_idc);
366
    }
367
18480715
}
368
369
1453841
static av_always_inline void mc_part_weighted(const H264Context *h, H264SliceContext *sl,
370
                                              int n, int square,
371
                                              int height, int delta,
372
                                              uint8_t *dest_y, uint8_t *dest_cb,
373
                                              uint8_t *dest_cr,
374
                                              int x_offset, int y_offset,
375
                                              const qpel_mc_func *qpix_put,
376
                                              h264_chroma_mc_func chroma_put,
377
                                              h264_weight_func luma_weight_op,
378
                                              h264_weight_func chroma_weight_op,
379
                                              h264_biweight_func luma_weight_avg,
380
                                              h264_biweight_func chroma_weight_avg,
381
                                              int list0, int list1,
382
                                              int pixel_shift, int chroma_idc)
383
{
384
    int chroma_height;
385
386
1453841
    dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->mb_linesize;
387
1453841
    if (chroma_idc == 3 /* yuv444 */) {
388
88196
        chroma_height     = height;
389
88196
        chroma_weight_avg = luma_weight_avg;
390
88196
        chroma_weight_op  = luma_weight_op;
391
88196
        dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->mb_linesize;
392
88196
        dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->mb_linesize;
393
1365645
    } else if (chroma_idc == 2 /* yuv422 */) {
394
1266
        chroma_height = height;
395
1266
        dest_cb      += (x_offset << pixel_shift) + 2 * y_offset * sl->mb_uvlinesize;
396
1266
        dest_cr      += (x_offset << pixel_shift) + 2 * y_offset * sl->mb_uvlinesize;
397
    } else { /* yuv420 */
398
1364379
        chroma_height = height >> 1;
399
1364379
        dest_cb      += (x_offset << pixel_shift) + y_offset * sl->mb_uvlinesize;
400
1364379
        dest_cr      += (x_offset << pixel_shift) + y_offset * sl->mb_uvlinesize;
401
    }
402
1453841
    x_offset += 8 * sl->mb_x;
403
1453841
    y_offset += 8 * (sl->mb_y >> MB_FIELD(sl));
404
405

1725518
    if (list0 && list1) {
406
        /* don't optimize for luma-only case, since B-frames usually
407
         * use implicit weights => chroma too. */
408
271677
        uint8_t *tmp_cb = sl->bipred_scratchpad;
409
271677
        uint8_t *tmp_cr = sl->bipred_scratchpad + (16 << pixel_shift);
410
271677
        uint8_t *tmp_y  = sl->bipred_scratchpad + 16 * sl->mb_uvlinesize;
411
271677
        int refn0       = sl->ref_cache[0][scan8[n]];
412
271677
        int refn1       = sl->ref_cache[1][scan8[n]];
413
414
271677
        mc_dir_part(h, sl, &sl->ref_list[0][refn0], n, square, height, delta, 0,
415
                    dest_y, dest_cb, dest_cr,
416
                    x_offset, y_offset, qpix_put, chroma_put,
417
                    pixel_shift, chroma_idc);
418
271677
        mc_dir_part(h, sl, &sl->ref_list[1][refn1], n, square, height, delta, 1,
419
                    tmp_y, tmp_cb, tmp_cr,
420
                    x_offset, y_offset, qpix_put, chroma_put,
421
                    pixel_shift, chroma_idc);
422
423
271677
        if (sl->pwt.use_weight == 2) {
424
221014
            int weight0 = sl->pwt.implicit_weight[refn0][refn1][sl->mb_y & 1];
425
221014
            int weight1 = 64 - weight0;
426
221014
            luma_weight_avg(dest_y, tmp_y, sl->mb_linesize,
427
                            height, 5, weight0, weight1, 0);
428
            if (!CONFIG_GRAY || !(h->flags & AV_CODEC_FLAG_GRAY)) {
429
221014
                chroma_weight_avg(dest_cb, tmp_cb, sl->mb_uvlinesize,
430
                                  chroma_height, 5, weight0, weight1, 0);
431
221014
                chroma_weight_avg(dest_cr, tmp_cr, sl->mb_uvlinesize,
432
                                  chroma_height, 5, weight0, weight1, 0);
433
            }
434
        } else {
435
50663
            luma_weight_avg(dest_y, tmp_y, sl->mb_linesize, height,
436
                            sl->pwt.luma_log2_weight_denom,
437
                            sl->pwt.luma_weight[refn0][0][0],
438
                            sl->pwt.luma_weight[refn1][1][0],
439
50663
                            sl->pwt.luma_weight[refn0][0][1] +
440
50663
                            sl->pwt.luma_weight[refn1][1][1]);
441
            if (!CONFIG_GRAY || !(h->flags & AV_CODEC_FLAG_GRAY)) {
442
50663
                chroma_weight_avg(dest_cb, tmp_cb, sl->mb_uvlinesize, chroma_height,
443
                                  sl->pwt.chroma_log2_weight_denom,
444
                                  sl->pwt.chroma_weight[refn0][0][0][0],
445
                                  sl->pwt.chroma_weight[refn1][1][0][0],
446
50663
                                  sl->pwt.chroma_weight[refn0][0][0][1] +
447
50663
                                  sl->pwt.chroma_weight[refn1][1][0][1]);
448
50663
                chroma_weight_avg(dest_cr, tmp_cr, sl->mb_uvlinesize, chroma_height,
449
                                  sl->pwt.chroma_log2_weight_denom,
450
                                  sl->pwt.chroma_weight[refn0][0][1][0],
451
                                  sl->pwt.chroma_weight[refn1][1][1][0],
452
50663
                                  sl->pwt.chroma_weight[refn0][0][1][1] +
453
50663
                                  sl->pwt.chroma_weight[refn1][1][1][1]);
454
            }
455
        }
456
    } else {
457
1182164
        int list     = list1 ? 1 : 0;
458
1182164
        int refn     = sl->ref_cache[list][scan8[n]];
459
1182164
        H264Ref *ref = &sl->ref_list[list][refn];
460
1182164
        mc_dir_part(h, sl, ref, n, square, height, delta, list,
461
                    dest_y, dest_cb, dest_cr, x_offset, y_offset,
462
                    qpix_put, chroma_put, pixel_shift, chroma_idc);
463
464
1182164
        luma_weight_op(dest_y, sl->mb_linesize, height,
465
                       sl->pwt.luma_log2_weight_denom,
466
                       sl->pwt.luma_weight[refn][list][0],
467
                       sl->pwt.luma_weight[refn][list][1]);
468
        if (!CONFIG_GRAY || !(h->flags & AV_CODEC_FLAG_GRAY)) {
469
1182164
            if (sl->pwt.use_weight_chroma) {
470
542457
                chroma_weight_op(dest_cb, sl->mb_uvlinesize, chroma_height,
471
                                 sl->pwt.chroma_log2_weight_denom,
472
                                 sl->pwt.chroma_weight[refn][list][0][0],
473
                                 sl->pwt.chroma_weight[refn][list][0][1]);
474
542457
                chroma_weight_op(dest_cr, sl->mb_uvlinesize, chroma_height,
475
                                 sl->pwt.chroma_log2_weight_denom,
476
                                 sl->pwt.chroma_weight[refn][list][1][0],
477
                                 sl->pwt.chroma_weight[refn][list][1][1]);
478
            }
479
        }
480
    }
481
1453841
}
482
483
11979020
static av_always_inline void prefetch_motion(const H264Context *h, H264SliceContext *sl,
484
                                             int list, int pixel_shift,
485
                                             int chroma_idc)
486
{
487
    /* fetch pixels for estimated mv 4 macroblocks ahead
488
     * optimized for 64byte cache lines */
489
11979020
    const int refn = sl->ref_cache[list][scan8[0]];
490
11979020
    if (refn >= 0) {
491
11609824
        const int mx  = (sl->mv_cache[list][scan8[0]][0] >> 2) + 16 * sl->mb_x + 8;
492
11609824
        const int my  = (sl->mv_cache[list][scan8[0]][1] >> 2) + 16 * sl->mb_y;
493
11609824
        uint8_t **src = sl->ref_list[list][refn].data;
494
11609824
        int off       =  mx * (1<< pixel_shift) +
495
11609824
                        (my + (sl->mb_x & 3) * 4) * sl->mb_linesize +
496
11609824
                        (64 << pixel_shift);
497
11609824
        h->vdsp.prefetch(src[0] + off, sl->linesize, 4);
498
11609824
        if (chroma_idc == 3 /* yuv444 */) {
499
114677
            h->vdsp.prefetch(src[1] + off, sl->linesize, 4);
500
114677
            h->vdsp.prefetch(src[2] + off, sl->linesize, 4);
501
        } else {
502
11495147
            off= ((mx>>1)+64) * (1<<pixel_shift) + ((my>>1) + (sl->mb_x&7))*sl->uvlinesize;
503
11495147
            h->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
504
        }
505
    }
506
11979020
}
507
508
6228148
static av_always_inline void xchg_mb_border(const H264Context *h, H264SliceContext *sl,
509
                                            uint8_t *src_y,
510
                                            uint8_t *src_cb, uint8_t *src_cr,
511
                                            int linesize, int uvlinesize,
512
                                            int xchg, int chroma444,
513
                                            int simple, int pixel_shift)
514
{
515
    int deblock_topleft;
516
    int deblock_top;
517
6228148
    int top_idx = 1;
518
    uint8_t *top_border_m1;
519
    uint8_t *top_border;
520
521

6228148
    if (!simple && FRAME_MBAFF(h)) {
522
1785962
        if (sl->mb_y & 1) {
523
897060
            if (!MB_MBAFF(sl))
524
689044
                return;
525
        } else {
526
888902
            top_idx = MB_MBAFF(sl) ? 0 : 1;
527
        }
528
    }
529
530
5539104
    if (sl->deblocking_filter == 2) {
531
60068
        deblock_topleft = h->slice_table[sl->mb_xy - 1 - h->mb_stride] == sl->slice_num;
532
60068
        deblock_top     = sl->top_type;
533
    } else {
534
5479036
        deblock_topleft = (sl->mb_x > 0);
535
5479036
        deblock_top     = (sl->mb_y > !!MB_FIELD(sl));
536
    }
537
538
5539104
    src_y  -= linesize   + 1 + pixel_shift;
539
5539104
    src_cb -= uvlinesize + 1 + pixel_shift;
540
5539104
    src_cr -= uvlinesize + 1 + pixel_shift;
541
542
5539104
    top_border_m1 = sl->top_borders[top_idx][sl->mb_x - 1];
543
5539104
    top_border    = sl->top_borders[top_idx][sl->mb_x];
544
545
#define XCHG(a, b, xchg)                        \
546
    if (pixel_shift) {                          \
547
        if (xchg) {                             \
548
            AV_SWAP64(b + 0, a + 0);            \
549
            AV_SWAP64(b + 8, a + 8);            \
550
        } else {                                \
551
            AV_COPY128(b, a);                   \
552
        }                                       \
553
    } else if (xchg)                            \
554
        AV_SWAP64(b, a);                        \
555
    else                                        \
556
        AV_COPY64(b, a);
557
558
5539104
    if (deblock_top) {
559
5317360
        if (deblock_topleft) {
560
5216884
            XCHG(top_border_m1 + (8 << pixel_shift),
561
                 src_y - (7 << pixel_shift), 1);
562
        }
563

5317360
        XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg);
564
5317360
        XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1);
565
5317360
        if (sl->mb_x + 1 < h->mb_width) {
566
5197826
            XCHG(sl->top_borders[top_idx][sl->mb_x + 1],
567
                 src_y + (17 << pixel_shift), 1);
568
        }
569
        if (simple || !CONFIG_GRAY || !(h->flags & AV_CODEC_FLAG_GRAY)) {
570
5317360
            if (chroma444) {
571
41172
                if (deblock_topleft) {
572
40600
                    XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1);
573
40600
                    XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1);
574
                }
575

41172
                XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg);
576
41172
                XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1);
577

41172
                XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg);
578
41172
                XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1);
579
41172
                if (sl->mb_x + 1 < h->mb_width) {
580
40584
                    XCHG(sl->top_borders[top_idx][sl->mb_x + 1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1);
581
40584
                    XCHG(sl->top_borders[top_idx][sl->mb_x + 1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1);
582
                }
583
            } else {
584
5276188
                if (deblock_topleft) {
585
5176284
                    XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
586
5176284
                    XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
587
                }
588
5276188
                XCHG(top_border + (16 << pixel_shift), src_cb + 1 + pixel_shift, 1);
589
5276188
                XCHG(top_border + (24 << pixel_shift), src_cr + 1 + pixel_shift, 1);
590
            }
591
        }
592
    }
593
}
594
595
4170513
static av_always_inline int dctcoef_get(int16_t *mb, int high_bit_depth,
596
                                        int index)
597
{
598
4170513
    if (high_bit_depth) {
599
1113043
        return AV_RN32A(((int32_t *)mb) + index);
600
    } else
601
3057470
        return AV_RN16A(mb + index);
602
}
603
604
9360
static av_always_inline void dctcoef_set(int16_t *mb, int high_bit_depth,
605
                                         int index, int value)
606
{
607
9360
    if (high_bit_depth) {
608
        AV_WN32A(((int32_t *)mb) + index, value);
609
    } else
610
9360
        AV_WN16A(mb + index, value);
611
9360
}
612
613
3788256
static av_always_inline void hl_decode_mb_predict_luma(const H264Context *h,
614
                                                       H264SliceContext *sl,
615
                                                       int mb_type, int simple,
616
                                                       int transform_bypass,
617
                                                       int pixel_shift,
618
                                                       const int *block_offset,
619
                                                       int linesize,
620
                                                       uint8_t *dest_y, int p)
621
{
622
    void (*idct_add)(uint8_t *dst, int16_t *block, int stride);
623
    void (*idct_dc_add)(uint8_t *dst, int16_t *block, int stride);
624
    int i;
625
3788256
    int qscale = p == 0 ? sl->qscale : sl->chroma_qp[p - 1];
626
3788256
    block_offset += 16 * p;
627
3788256
    if (IS_INTRA4x4(mb_type)) {
628
2927367
        if (IS_8x8DCT(mb_type)) {
629
1483373
            if (transform_bypass) {
630
660
                idct_dc_add =
631
660
                idct_add    = h->h264dsp.h264_add_pixels8_clear;
632
            } else {
633
1482713
                idct_dc_add = h->h264dsp.h264_idct8_dc_add;
634
1482713
                idct_add    = h->h264dsp.h264_idct8_add;
635
            }
636
7416865
            for (i = 0; i < 16; i += 4) {
637
5933492
                uint8_t *const ptr = dest_y + block_offset[i];
638
5933492
                const int dir      = sl->intra4x4_pred_mode_cache[scan8[i]];
639

5933492
                if (transform_bypass && h->ps.sps->profile_idc == 244 && dir <= 1) {
640
2488
                    if (h->x264_build < 151U) {
641
2488
                        h->hpc.pred8x8l_add[dir](ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize);
642
                    } else
643
                        h->hpc.pred8x8l_filter_add[dir](ptr, sl->mb + (i * 16 + p * 256 << pixel_shift),
644
                                                        (sl-> topleft_samples_available << i) & 0x8000,
645
                                                        (sl->topright_samples_available << i) & 0x4000, linesize);
646
                } else {
647
5931004
                    const int nnz = sl->non_zero_count_cache[scan8[i + p * 16]];
648
5931004
                    h->hpc.pred8x8l[dir](ptr, (sl->topleft_samples_available << i) & 0x8000,
649
5931004
                                         (sl->topright_samples_available << i) & 0x4000, linesize);
650
5931004
                    if (nnz) {
651

5071842
                        if (nnz == 1 && dctcoef_get(sl->mb, pixel_shift, i * 16 + p * 256))
652
243081
                            idct_dc_add(ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize);
653
                        else
654
4828761
                            idct_add(ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize);
655
                    }
656
                }
657
            }
658
        } else {
659
1443994
            if (transform_bypass) {
660
12974
                idct_dc_add  =
661
12974
                idct_add     = h->h264dsp.h264_add_pixels4_clear;
662
            } else {
663
1431020
                idct_dc_add = h->h264dsp.h264_idct_dc_add;
664
1431020
                idct_add    = h->h264dsp.h264_idct_add;
665
            }
666
24547898
            for (i = 0; i < 16; i++) {
667
23103904
                uint8_t *const ptr = dest_y + block_offset[i];
668
23103904
                const int dir      = sl->intra4x4_pred_mode_cache[scan8[i]];
669
670

23103904
                if (transform_bypass && h->ps.sps->profile_idc == 244 && dir <= 1) {
671
153963
                    h->hpc.pred4x4_add[dir](ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize);
672
                } else {
673
                    uint8_t *topright;
674
                    int nnz, tr;
675
                    uint64_t tr_high;
676

25145754
                    if (dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED) {
677
2195813
                        const int topright_avail = (sl->topright_samples_available << i) & 0x8000;
678
                        av_assert2(sl->mb_y || linesize <= block_offset[i]);
679
2195813
                        if (!topright_avail) {
680
610202
                            if (pixel_shift) {
681
246620
                                tr_high  = ((uint16_t *)ptr)[3 - linesize / 2] * 0x0001000100010001ULL;
682
246620
                                topright = (uint8_t *)&tr_high;
683
                            } else {
684
363582
                                tr       = ptr[3 - linesize] * 0x01010101u;
685
363582
                                topright = (uint8_t *)&tr;
686
                            }
687
                        } else
688
1585611
                            topright = ptr + (4 << pixel_shift) - linesize;
689
                    } else
690
20754128
                        topright = NULL;
691
692
22949941
                    h->hpc.pred4x4[dir](ptr, topright, linesize);
693
22949941
                    nnz = sl->non_zero_count_cache[scan8[i + p * 16]];
694
22949941
                    if (nnz) {
695

15988501
                        if (nnz == 1 && dctcoef_get(sl->mb, pixel_shift, i * 16 + p * 256))
696
2036583
                            idct_dc_add(ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize);
697
                        else
698
13951918
                            idct_add(ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize);
699
                    }
700
                }
701
            }
702
        }
703
    } else {
704
860889
        h->hpc.pred16x16[sl->intra16x16_pred_mode](dest_y, linesize);
705
860889
        if (sl->non_zero_count_cache[scan8[LUMA_DC_BLOCK_INDEX + p]]) {
706
576596
            if (!transform_bypass)
707
576011
                h->h264dsp.h264_luma_dc_dequant_idct(sl->mb + (p * 256 << pixel_shift),
708
576011
                                                     sl->mb_luma_dc[p],
709
576011
                                                     h->ps.pps->dequant4_coeff[p][qscale][0]);
710
            else {
711
                static const uint8_t dc_mapping[16] = {
712
                     0 * 16,  1 * 16,  4 * 16,  5 * 16,
713
                     2 * 16,  3 * 16,  6 * 16,  7 * 16,
714
                     8 * 16,  9 * 16, 12 * 16, 13 * 16,
715
                    10 * 16, 11 * 16, 14 * 16, 15 * 16
716
                };
717
9945
                for (i = 0; i < 16; i++)
718
18720
                    dctcoef_set(sl->mb + (p * 256 << pixel_shift),
719
9360
                                pixel_shift, dc_mapping[i],
720
9360
                                dctcoef_get(sl->mb_luma_dc[p],
721
                                            pixel_shift, i));
722
            }
723
        }
724
    }
725
3788256
}
726
727
13085357
static av_always_inline void hl_decode_mb_idct_luma(const H264Context *h, H264SliceContext *sl,
728
                                                    int mb_type, int simple,
729
                                                    int transform_bypass,
730
                                                    int pixel_shift,
731
                                                    const int *block_offset,
732
                                                    int linesize,
733
                                                    uint8_t *dest_y, int p)
734
{
735
    void (*idct_add)(uint8_t *dst, int16_t *block, int stride);
736
    int i;
737
13085357
    block_offset += 16 * p;
738
13085357
    if (!IS_INTRA4x4(mb_type)) {
739
10157990
        if (IS_INTRA16x16(mb_type)) {
740
860889
            if (transform_bypass) {
741
813
                if (h->ps.sps->profile_idc == 244 &&
742
813
                    (sl->intra16x16_pred_mode == VERT_PRED8x8 ||
743
460
                     sl->intra16x16_pred_mode == HOR_PRED8x8)) {
744
625
                    h->hpc.pred16x16_add[sl->intra16x16_pred_mode](dest_y, block_offset,
745
625
                                                                   sl->mb + (p * 256 << pixel_shift),
746
                                                                   linesize);
747
                } else {
748
3196
                    for (i = 0; i < 16; i++)
749

3046
                        if (sl->non_zero_count_cache[scan8[i + p * 16]] ||
750
38
                            dctcoef_get(sl->mb, pixel_shift, i * 16 + p * 256))
751
2970
                            h->h264dsp.h264_add_pixels4_clear(dest_y + block_offset[i],
752
2970
                                                              sl->mb + (i * 16 + p * 256 << pixel_shift),
753
                                                              linesize);
754
                }
755
            } else {
756
860076
                h->h264dsp.h264_idct_add16intra(dest_y, block_offset,
757
860076
                                                sl->mb + (p * 256 << pixel_shift),
758
                                                linesize,
759
860076
                                                sl->non_zero_count_cache + p * 5 * 8);
760
            }
761
9297101
        } else if (sl->cbp & 15) {
762
5601260
            if (transform_bypass) {
763
285491
                const int di = IS_8x8DCT(mb_type) ? 4 : 1;
764
570982
                idct_add = IS_8x8DCT(mb_type) ? h->h264dsp.h264_add_pixels8_clear
765
285491
                    : h->h264dsp.h264_add_pixels4_clear;
766
4676287
                for (i = 0; i < 16; i += di)
767
4390796
                    if (sl->non_zero_count_cache[scan8[i + p * 16]])
768
639478
                        idct_add(dest_y + block_offset[i],
769
639478
                                 sl->mb + (i * 16 + p * 256 << pixel_shift),
770
                                 linesize);
771
            } else {
772
5315769
                if (IS_8x8DCT(mb_type))
773
895724
                    h->h264dsp.h264_idct8_add4(dest_y, block_offset,
774
895724
                                               sl->mb + (p * 256 << pixel_shift),
775
                                               linesize,
776
895724
                                               sl->non_zero_count_cache + p * 5 * 8);
777
                else
778
4420045
                    h->h264dsp.h264_idct_add16(dest_y, block_offset,
779
4420045
                                               sl->mb + (p * 256 << pixel_shift),
780
                                               linesize,
781
4420045
                                               sl->non_zero_count_cache + p * 5 * 8);
782
            }
783
        }
784
    }
785
13085357
}
786
787
#define BITS   8
788
#define SIMPLE 1
789
#include "h264_mb_template.c"
790
791
#undef  BITS
792
#define BITS   16
793
#include "h264_mb_template.c"
794
795
#undef  SIMPLE
796
#define SIMPLE 0
797
#include "h264_mb_template.c"
798
799
12842186
void ff_h264_hl_decode_mb(const H264Context *h, H264SliceContext *sl)
800
{
801
12842186
    const int mb_xy   = sl->mb_xy;
802
12842186
    const int mb_type = h->cur_pic.mb_type[mb_xy];
803
32738485
    int is_complex    = CONFIG_SMALL || sl->is_complex ||
804

12842186
                        IS_INTRA_PCM(mb_type) || sl->qscale == 0;
805
806
12842186
    if (CHROMA444(h)) {
807

133138
        if (is_complex || h->pixel_shift)
808
113872
            hl_decode_mb_444_complex(h, sl);
809
        else
810
19266
            hl_decode_mb_444_simple_8(h, sl);
811
12709048
    } else if (is_complex) {
812
5835266
        hl_decode_mb_complex(h, sl);
813
6873782
    } else if (h->pixel_shift) {
814
1065720
        hl_decode_mb_simple_16(h, sl);
815
    } else
816
5808062
        hl_decode_mb_simple_8(h, sl);
817
12842186
}