GCC Code Coverage Report
Directory: ../../../ffmpeg/ Exec Total Coverage
File: src/libavcodec/h264_direct.c Lines: 439 445 98.7 %
Date: 2019-11-22 03:34:36 Branches: 300 316 94.9 %

Line Branch Exec Source
1
/*
2
 * H.26L/H.264/AVC/JVT/14496-10/... direct mb/block decoding
3
 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21
22
/**
23
 * @file
24
 * H.264 / AVC / MPEG-4 part10 direct mb/block decoding.
25
 * @author Michael Niedermayer <michaelni@gmx.at>
26
 */
27
28
#include "internal.h"
29
#include "avcodec.h"
30
#include "h264dec.h"
31
#include "h264_ps.h"
32
#include "mpegutils.h"
33
#include "rectangle.h"
34
#include "thread.h"
35
36
#include <assert.h>
37
38
27501
static int get_scale_factor(H264SliceContext *sl,
39
                            int poc, int poc1, int i)
40
{
41
27501
    int poc0 = sl->ref_list[0][i].poc;
42
27501
    int64_t pocdiff = poc1 - (int64_t)poc0;
43
27501
    int td = av_clip_int8(pocdiff);
44
45
27501
    if (pocdiff != (int)pocdiff)
46
        avpriv_request_sample(sl->h264->avctx, "pocdiff overflow\n");
47
48

27501
    if (td == 0 || sl->ref_list[0][i].parent->long_ref) {
49
8127
        return 256;
50
    } else {
51
19374
        int64_t pocdiff0 = poc - (int64_t)poc0;
52
19374
        int tb = av_clip_int8(pocdiff0);
53
19374
        int tx = (16384 + (FFABS(td) >> 1)) / td;
54
55
19374
        if (pocdiff0 != (int)pocdiff0)
56
            av_log(sl->h264->avctx, AV_LOG_DEBUG, "pocdiff0 overflow\n");
57
58
19374
        return av_clip_intp2((tb * tx + 32) >> 6, 10);
59
    }
60
}
61
62
7703
void ff_h264_direct_dist_scale_factor(const H264Context *const h,
63
                                      H264SliceContext *sl)
64
{
65
18790
    const int poc  = FIELD_PICTURE(h) ? h->cur_pic_ptr->field_poc[h->picture_structure == PICT_BOTTOM_FIELD]
66
7703
                                      : h->cur_pic_ptr->poc;
67
7703
    const int poc1 = sl->ref_list[1][0].poc;
68
    int i, field;
69
70
7703
    if (FRAME_MBAFF(h))
71
1539
        for (field = 0; field < 2; field++) {
72
1026
            const int poc  = h->cur_pic_ptr->field_poc[field];
73
1026
            const int poc1 = sl->ref_list[1][0].parent->field_poc[field];
74
7598
            for (i = 0; i < 2 * sl->ref_count[0]; i++)
75
6572
                sl->dist_scale_factor_field[field][i ^ field] =
76
6572
                    get_scale_factor(sl, poc, poc1, i + 16);
77
        }
78
79
28632
    for (i = 0; i < sl->ref_count[0]; i++)
80
20929
        sl->dist_scale_factor[i] = get_scale_factor(sl, poc, poc1, i);
81
7703
}
82
83
17458
static void fill_colmap(const H264Context *h, H264SliceContext *sl,
84
                        int map[2][16 + 32], int list,
85
                        int field, int colfield, int mbafi)
86
{
87
17458
    H264Picture *const ref1 = sl->ref_list[1][0].parent;
88
    int j, old_ref, rfield;
89
17458
    int start  = mbafi ? 16                       : 0;
90
17458
    int end    = mbafi ? 16 + 2 * sl->ref_count[0] : sl->ref_count[0];
91

17458
    int interl = mbafi || h->picture_structure != PICT_FRAME;
92
93
    /* bogus; fills in for missing frames */
94
17458
    memset(map[list], 0, sizeof(map[list]));
95
96
52374
    for (rfield = 0; rfield < 2; rfield++) {
97
72084
        for (old_ref = 0; old_ref < ref1->ref_count[colfield][list]; old_ref++) {
98
37168
            int poc = ref1->ref_poc[colfield][list][old_ref];
99
100
37168
            if (!interl)
101
16796
                poc |= 3;
102
            // FIXME: store all MBAFF references so this is not needed
103

20372
            else if (interl && (poc & 3) == 3)
104
5396
                poc = (poc & ~3) + rfield + 1;
105
106
108524
            for (j = start; j < end; j++) {
107
101132
                if (4 * sl->ref_list[0][j].parent->frame_num +
108
101132
                    (sl->ref_list[0][j].reference & 3) == poc) {
109
29776
                    int cur_ref = mbafi ? (j - 16) ^ field : j;
110
29776
                    if (ref1->mbaff)
111
5736
                        map[list][2 * old_ref + (rfield ^ field) + 16] = cur_ref;
112

29776
                    if (rfield == field || !interl)
113
21462
                        map[list][old_ref] = cur_ref;
114
29776
                    break;
115
                }
116
            }
117
        }
118
    }
119
17458
}
120
121
34013
void ff_h264_direct_ref_list_init(const H264Context *const h, H264SliceContext *sl)
122
{
123
34013
    H264Ref *const ref1 = &sl->ref_list[1][0];
124
34013
    H264Picture *const cur = h->cur_pic_ptr;
125
    int list, j, field;
126
34013
    int sidx     = (h->picture_structure & 1) ^ 1;
127
34013
    int ref1sidx = (ref1->reference      & 1) ^ 1;
128
129
75684
    for (list = 0; list < sl->list_count; list++) {
130
41671
        cur->ref_count[sidx][list] = sl->ref_count[list];
131
175484
        for (j = 0; j < sl->ref_count[list]; j++)
132
133813
            cur->ref_poc[sidx][list][j] = 4 * sl->ref_list[list][j].parent->frame_num +
133
133813
                                          (sl->ref_list[list][j].reference & 3);
134
    }
135
136
34013
    if (h->picture_structure == PICT_FRAME) {
137
25616
        memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
138
25616
        memcpy(cur->ref_poc[1],   cur->ref_poc[0],   sizeof(cur->ref_poc[0]));
139
    }
140
141
34013
    if (h->current_slice == 0) {
142
25900
        cur->mbaff = FRAME_MBAFF(h);
143
    } else {
144
8113
        av_assert0(cur->mbaff == FRAME_MBAFF(h));
145
    }
146
147
34013
    sl->col_fieldoff = 0;
148
149

34013
    if (sl->list_count != 2 || !sl->ref_count[1])
150
22131
        return;
151
152
11882
    if (h->picture_structure == PICT_FRAME) {
153
7452
        int cur_poc  = h->cur_pic_ptr->poc;
154
7452
        int *col_poc = sl->ref_list[1][0].parent->field_poc;
155

7452
        if (col_poc[0] == INT_MAX && col_poc[1] == INT_MAX) {
156
3
            av_log(h->avctx, AV_LOG_ERROR, "co located POCs unavailable\n");
157
3
            sl->col_parity = 1;
158
        } else
159
7449
            sl->col_parity = (FFABS(col_poc[0] - (int64_t)cur_poc) >=
160
7449
                              FFABS(col_poc[1] - (int64_t)cur_poc));
161
7452
        ref1sidx =
162
7452
        sidx     = sl->col_parity;
163
    // FL -> FL & differ parity
164
4430
    } else if (!(h->picture_structure & sl->ref_list[1][0].reference) &&
165
1866
               !sl->ref_list[1][0].parent->mbaff) {
166
1728
        sl->col_fieldoff = 2 * sl->ref_list[1][0].reference - 3;
167
    }
168
169

11882
    if (sl->slice_type_nos != AV_PICTURE_TYPE_B || sl->direct_spatial_mv_pred)
170
4179
        return;
171
172
23109
    for (list = 0; list < 2; list++) {
173
15406
        fill_colmap(h, sl, sl->map_col_to_list0, list, sidx, ref1sidx, 0);
174
15406
        if (FRAME_MBAFF(h))
175
3078
            for (field = 0; field < 2; field++)
176
2052
                fill_colmap(h, sl, sl->map_col_to_list0_field[field], list, field,
177
                            field, 1);
178
    }
179
}
180
181
3506949
static void await_reference_mb_row(const H264Context *const h, H264Ref *ref,
182
                                   int mb_y)
183
{
184
3506949
    int ref_field         = ref->reference - 1;
185
3506949
    int ref_field_picture = ref->parent->field_picture;
186
3506949
    int ref_height        = 16 * h->mb_height >> ref_field_picture;
187
188
3506949
    if (!HAVE_THREADS || !(h->avctx->active_thread_type & FF_THREAD_FRAME))
189
3490131
        return;
190
191
    /* FIXME: It can be safe to access mb stuff
192
     * even if pixels aren't deblocked yet. */
193
194

33636
    ff_thread_await_progress(&ref->parent->tf,
195
16818
                             FFMIN(16 * mb_y >> ref_field_picture,
196
                                   ref_height - 1),
197
                             ref_field_picture && ref_field);
198
}
199
200
2018485
static void pred_spatial_direct_motion(const H264Context *const h, H264SliceContext *sl,
201
                                       int *mb_type)
202
{
203
2018485
    int b8_stride = 2;
204
2018485
    int b4_stride = h->b_stride;
205
2018485
    int mb_xy = sl->mb_xy, mb_y = sl->mb_y;
206
    int mb_type_col[2];
207
    const int16_t (*l1mv0)[2], (*l1mv1)[2];
208
    const int8_t *l1ref0, *l1ref1;
209
2018485
    const int is_b8x8 = IS_8X8(*mb_type);
210
2018485
    unsigned int sub_mb_type = MB_TYPE_L0L1;
211
    int i8, i4;
212
    int ref[2];
213
    int mv[2];
214
    int list;
215
216
    assert(sl->ref_list[1][0].reference & 3);
217
218
2018485
    await_reference_mb_row(h, &sl->ref_list[1][0],
219
2018485
                           sl->mb_y + !!IS_INTERLACED(*mb_type));
220
221
#define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16 | MB_TYPE_INTRA4x4 | \
222
                                MB_TYPE_INTRA16x16 | MB_TYPE_INTRA_PCM)
223
224
    /* ref = min(neighbors) */
225
6055455
    for (list = 0; list < 2; list++) {
226
4036970
        int left_ref     = sl->ref_cache[list][scan8[0] - 1];
227
4036970
        int top_ref      = sl->ref_cache[list][scan8[0] - 8];
228
4036970
        int refc         = sl->ref_cache[list][scan8[0] - 8 + 4];
229
4036970
        const int16_t *C = sl->mv_cache[list][scan8[0]  - 8 + 4];
230
4036970
        if (refc == PART_NOT_AVAILABLE) {
231
521894
            refc = sl->ref_cache[list][scan8[0] - 8 - 1];
232
521894
            C    = sl->mv_cache[list][scan8[0]  - 8 - 1];
233
        }
234
4036970
        ref[list] = FFMIN3((unsigned)left_ref,
235
                           (unsigned)top_ref,
236
                           (unsigned)refc);
237
4036970
        if (ref[list] >= 0) {
238
            /* This is just pred_motion() but with the cases removed that
239
             * cannot happen for direct blocks. */
240
3647128
            const int16_t *const A = sl->mv_cache[list][scan8[0] - 1];
241
3647128
            const int16_t *const B = sl->mv_cache[list][scan8[0] - 8];
242
243
3647128
            int match_count = (left_ref == ref[list]) +
244
3647128
                              (top_ref  == ref[list]) +
245
3647128
                              (refc     == ref[list]);
246
247
3647128
            if (match_count > 1) { // most common
248
3197595
                mv[list] = pack16to32(mid_pred(A[0], B[0], C[0]),
249
3197595
                                      mid_pred(A[1], B[1], C[1]));
250
            } else {
251
                assert(match_count == 1);
252
449533
                if (left_ref == ref[list])
253
336448
                    mv[list] = AV_RN32A(A);
254
113085
                else if (top_ref == ref[list])
255
53005
                    mv[list] = AV_RN32A(B);
256
                else
257
60080
                    mv[list] = AV_RN32A(C);
258
            }
259
            av_assert2(ref[list] < (sl->ref_count[list] << !!FRAME_MBAFF(h)));
260
        } else {
261
389842
            int mask = ~(MB_TYPE_L0 << (2 * list));
262
389842
            mv[list]  = 0;
263
389842
            ref[list] = -1;
264
389842
            if (!is_b8x8)
265
365400
                *mb_type &= mask;
266
389842
            sub_mb_type &= mask;
267
        }
268
    }
269

2018485
    if (ref[0] < 0 && ref[1] < 0) {
270
5030
        ref[0] = ref[1] = 0;
271
5030
        if (!is_b8x8)
272
4200
            *mb_type |= MB_TYPE_L0L1;
273
5030
        sub_mb_type |= MB_TYPE_L0L1;
274
    }
275
276
2018485
    if (!(is_b8x8 | mv[0] | mv[1])) {
277
1033653
        fill_rectangle(&sl->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
278
1033653
        fill_rectangle(&sl->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
279
1033653
        fill_rectangle(&sl->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
280
1033653
        fill_rectangle(&sl->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
281
1033653
        *mb_type = (*mb_type & ~(MB_TYPE_8x8 | MB_TYPE_16x8 | MB_TYPE_8x16 |
282
                                 MB_TYPE_P1L0 | MB_TYPE_P1L1)) |
283
1033653
                   MB_TYPE_16x16 | MB_TYPE_DIRECT2;
284
1033653
        return;
285
    }
286
287
984832
    if (IS_INTERLACED(sl->ref_list[1][0].parent->mb_type[mb_xy])) { // AFL/AFR/FR/FL -> AFL/FL
288
678519
        if (!IS_INTERLACED(*mb_type)) {                    //     AFR/FR    -> AFL/FL
289
39366
            mb_y  = (sl->mb_y & ~1) + sl->col_parity;
290
39366
            mb_xy = sl->mb_x +
291
39366
                    ((sl->mb_y & ~1) + sl->col_parity) * h->mb_stride;
292
39366
            b8_stride = 0;
293
        } else {
294
639153
            mb_y  += sl->col_fieldoff;
295
639153
            mb_xy += h->mb_stride * sl->col_fieldoff; // non-zero for FL -> FL & differ parity
296
        }
297
678519
        goto single_col;
298
    } else {                                             // AFL/AFR/FR/FL -> AFR/FR
299
306313
        if (IS_INTERLACED(*mb_type)) {                   // AFL       /FL -> AFR/FR
300
43711
            mb_y           =  sl->mb_y & ~1;
301
43711
            mb_xy          = (sl->mb_y & ~1) * h->mb_stride + sl->mb_x;
302
43711
            mb_type_col[0] = sl->ref_list[1][0].parent->mb_type[mb_xy];
303
43711
            mb_type_col[1] = sl->ref_list[1][0].parent->mb_type[mb_xy + h->mb_stride];
304
43711
            b8_stride      = 2 + 4 * h->mb_stride;
305
43711
            b4_stride     *= 6;
306
43711
            if (IS_INTERLACED(mb_type_col[0]) !=
307
43711
                IS_INTERLACED(mb_type_col[1])) {
308
                mb_type_col[0] &= ~MB_TYPE_INTERLACED;
309
                mb_type_col[1] &= ~MB_TYPE_INTERLACED;
310
            }
311
312
43711
            sub_mb_type |= MB_TYPE_16x16 | MB_TYPE_DIRECT2; /* B_SUB_8x8 */
313
43711
            if ((mb_type_col[0] & MB_TYPE_16x16_OR_INTRA) &&
314

33897
                (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA) &&
315
                !is_b8x8) {
316
26172
                *mb_type |= MB_TYPE_16x8 | MB_TYPE_DIRECT2;  /* B_16x8 */
317
            } else {
318
17539
                *mb_type |= MB_TYPE_8x8;
319
            }
320
        } else {                                         //     AFR/FR    -> AFR/FR
321
262602
single_col:
322
941121
            mb_type_col[0] =
323
941121
            mb_type_col[1] = sl->ref_list[1][0].parent->mb_type[mb_xy];
324
325
941121
            sub_mb_type |= MB_TYPE_16x16 | MB_TYPE_DIRECT2; /* B_SUB_8x8 */
326

941121
            if (!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)) {
327
784135
                *mb_type |= MB_TYPE_16x16 | MB_TYPE_DIRECT2; /* B_16x16 */
328
156986
            } else if (!is_b8x8 &&
329
88312
                       (mb_type_col[0] & (MB_TYPE_16x8 | MB_TYPE_8x16))) {
330
47987
                *mb_type |= MB_TYPE_DIRECT2 |
331
47987
                            (mb_type_col[0] & (MB_TYPE_16x8 | MB_TYPE_8x16));
332
            } else {
333
108999
                if (!h->ps.sps->direct_8x8_inference_flag) {
334
                    /* FIXME: Save sub mb types from previous frames (or derive
335
                     * from MVs) so we know exactly what block size to use. */
336
21546
                    sub_mb_type += (MB_TYPE_8x8 - MB_TYPE_16x16); /* B_SUB_4x4 */
337
                }
338
108999
                *mb_type |= MB_TYPE_8x8;
339
            }
340
        }
341
    }
342
343
984832
    await_reference_mb_row(h, &sl->ref_list[1][0], mb_y);
344
345
984832
    l1mv0  = (void*)&sl->ref_list[1][0].parent->motion_val[0][h->mb2b_xy[mb_xy]];
346
984832
    l1mv1  = (void*)&sl->ref_list[1][0].parent->motion_val[1][h->mb2b_xy[mb_xy]];
347
984832
    l1ref0 = &sl->ref_list[1][0].parent->ref_index[0][4 * mb_xy];
348
984832
    l1ref1 = &sl->ref_list[1][0].parent->ref_index[1][4 * mb_xy];
349
984832
    if (!b8_stride) {
350
39366
        if (sl->mb_y & 1) {
351
19982
            l1ref0 += 2;
352
19982
            l1ref1 += 2;
353
19982
            l1mv0  += 2 * b4_stride;
354
19982
            l1mv1  += 2 * b4_stride;
355
        }
356
    }
357
358
984832
    if (IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])) {
359
83077
        int n = 0;
360
415385
        for (i8 = 0; i8 < 4; i8++) {
361
332308
            int x8  = i8 & 1;
362
332308
            int y8  = i8 >> 1;
363
332308
            int xy8 = x8     + y8 * b8_stride;
364
332308
            int xy4 = x8 * 3 + y8 * b4_stride;
365
            int a, b;
366
367

332308
            if (is_b8x8 && !IS_DIRECT(sl->sub_mb_type[i8]))
368
25993
                continue;
369
306315
            sl->sub_mb_type[i8] = sub_mb_type;
370
371
306315
            fill_rectangle(&sl->ref_cache[0][scan8[i8 * 4]], 2, 2, 8,
372
306315
                           (uint8_t)ref[0], 1);
373
306315
            fill_rectangle(&sl->ref_cache[1][scan8[i8 * 4]], 2, 2, 8,
374
306315
                           (uint8_t)ref[1], 1);
375

306315
            if (!IS_INTRA(mb_type_col[y8]) && !sl->ref_list[1][0].parent->long_ref &&
376
173012
                ((l1ref0[xy8] == 0 &&
377
118708
                  FFABS(l1mv0[xy4][0]) <= 1 &&
378
8884
                  FFABS(l1mv0[xy4][1]) <= 1) ||
379
168083
                 (l1ref0[xy8] < 0 &&
380
8526
                  l1ref1[xy8] == 0 &&
381
6741
                  FFABS(l1mv1[xy4][0]) <= 1 &&
382
574
                  FFABS(l1mv1[xy4][1]) <= 1))) {
383
5283
                a =
384
5283
                b = 0;
385
5283
                if (ref[0] > 0)
386
459
                    a = mv[0];
387
5283
                if (ref[1] > 0)
388
149
                    b = mv[1];
389
5283
                n++;
390
            } else {
391
301032
                a = mv[0];
392
301032
                b = mv[1];
393
            }
394
306315
            fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, a, 4);
395
306315
            fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, b, 4);
396
        }
397

83077
        if (!is_b8x8 && !(n & 3))
398
66179
            *mb_type = (*mb_type & ~(MB_TYPE_8x8 | MB_TYPE_16x8 | MB_TYPE_8x16 |
399
                                     MB_TYPE_P1L0 | MB_TYPE_P1L1)) |
400
66179
                       MB_TYPE_16x16 | MB_TYPE_DIRECT2;
401
901755
    } else if (IS_16X16(*mb_type)) {
402
        int a, b;
403
404
759063
        fill_rectangle(&sl->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
405
759063
        fill_rectangle(&sl->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
406

759063
        if (!IS_INTRA(mb_type_col[0]) && !sl->ref_list[1][0].parent->long_ref &&
407
571701
            ((l1ref0[0] == 0 &&
408
307843
              FFABS(l1mv0[0][0]) <= 1 &&
409
57898
              FFABS(l1mv0[0][1]) <= 1) ||
410

547557
             (l1ref0[0] < 0 && !l1ref1[0] &&
411
15337
              FFABS(l1mv1[0][0]) <= 1 &&
412
4335
              FFABS(l1mv1[0][1]) <= 1 &&
413
3598
              h->x264_build > 33U))) {
414
27742
            a = b = 0;
415
27742
            if (ref[0] > 0)
416
5010
                a = mv[0];
417
27742
            if (ref[1] > 0)
418
3330
                b = mv[1];
419
        } else {
420
731321
            a = mv[0];
421
731321
            b = mv[1];
422
        }
423
759063
        fill_rectangle(&sl->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
424
759063
        fill_rectangle(&sl->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
425
    } else {
426
142692
        int n = 0;
427
713460
        for (i8 = 0; i8 < 4; i8++) {
428
570768
            const int x8 = i8 & 1;
429
570768
            const int y8 = i8 >> 1;
430
431

570768
            if (is_b8x8 && !IS_DIRECT(sl->sub_mb_type[i8]))
432
112465
                continue;
433
458303
            sl->sub_mb_type[i8] = sub_mb_type;
434
435
458303
            fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, mv[0], 4);
436
458303
            fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, mv[1], 4);
437
458303
            fill_rectangle(&sl->ref_cache[0][scan8[i8 * 4]], 2, 2, 8,
438
458303
                           (uint8_t)ref[0], 1);
439
458303
            fill_rectangle(&sl->ref_cache[1][scan8[i8 * 4]], 2, 2, 8,
440
458303
                           (uint8_t)ref[1], 1);
441
442
            assert(b8_stride == 2);
443
            /* col_zero_flag */
444

458303
            if (!IS_INTRA(mb_type_col[0]) && !sl->ref_list[1][0].parent->long_ref &&
445
423203
                (l1ref0[i8] == 0 ||
446
161688
                 (l1ref0[i8] < 0 &&
447
10917
                  l1ref1[i8] == 0 &&
448
10171
                  h->x264_build > 33U))) {
449
271686
                const int16_t (*l1mv)[2] = l1ref0[i8] == 0 ? l1mv0 : l1mv1;
450
271686
                if (IS_SUB_8X8(sub_mb_type)) {
451
241447
                    const int16_t *mv_col = l1mv[x8 * 3 + y8 * 3 * b4_stride];
452

241447
                    if (FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1) {
453
18145
                        if (ref[0] == 0)
454
15055
                            fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2,
455
                                           8, 0, 4);
456
18145
                        if (ref[1] == 0)
457
16136
                            fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2,
458
                                           8, 0, 4);
459
18145
                        n += 4;
460
                    }
461
                } else {
462
30239
                    int m = 0;
463
151195
                    for (i4 = 0; i4 < 4; i4++) {
464
120956
                        const int16_t *mv_col = l1mv[x8 * 2 + (i4 & 1) +
465
120956
                                                     (y8 * 2 + (i4 >> 1)) * b4_stride];
466

120956
                        if (FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1) {
467
2956
                            if (ref[0] == 0)
468
1991
                                AV_ZERO32(sl->mv_cache[0][scan8[i8 * 4 + i4]]);
469
2956
                            if (ref[1] == 0)
470
2710
                                AV_ZERO32(sl->mv_cache[1][scan8[i8 * 4 + i4]]);
471
2956
                            m++;
472
                        }
473
                    }
474
30239
                    if (!(m & 3))
475
29926
                        sl->sub_mb_type[i8] += MB_TYPE_16x16 - MB_TYPE_8x8;
476
30239
                    n += m;
477
                }
478
            }
479
        }
480

142692
        if (!is_b8x8 && !(n & 15))
481
75297
            *mb_type = (*mb_type & ~(MB_TYPE_8x8 | MB_TYPE_16x8 | MB_TYPE_8x16 |
482
                                     MB_TYPE_P1L0 | MB_TYPE_P1L1)) |
483
75297
                       MB_TYPE_16x16 | MB_TYPE_DIRECT2;
484
    }
485
}
486
487
251816
static void pred_temp_direct_motion(const H264Context *const h, H264SliceContext *sl,
488
                                    int *mb_type)
489
{
490
251816
    int b8_stride = 2;
491
251816
    int b4_stride = h->b_stride;
492
251816
    int mb_xy = sl->mb_xy, mb_y = sl->mb_y;
493
    int mb_type_col[2];
494
    const int16_t (*l1mv0)[2], (*l1mv1)[2];
495
    const int8_t *l1ref0, *l1ref1;
496
251816
    const int is_b8x8 = IS_8X8(*mb_type);
497
    unsigned int sub_mb_type;
498
    int i8, i4;
499
500
    assert(sl->ref_list[1][0].reference & 3);
501
502
251816
    await_reference_mb_row(h, &sl->ref_list[1][0],
503
251816
                           sl->mb_y + !!IS_INTERLACED(*mb_type));
504
505
251816
    if (IS_INTERLACED(sl->ref_list[1][0].parent->mb_type[mb_xy])) { // AFL/AFR/FR/FL -> AFL/FL
506
88777
        if (!IS_INTERLACED(*mb_type)) {                    //     AFR/FR    -> AFL/FL
507
15611
            mb_y  = (sl->mb_y & ~1) + sl->col_parity;
508
15611
            mb_xy = sl->mb_x +
509
15611
                    ((sl->mb_y & ~1) + sl->col_parity) * h->mb_stride;
510
15611
            b8_stride = 0;
511
        } else {
512
73166
            mb_y  += sl->col_fieldoff;
513
73166
            mb_xy += h->mb_stride * sl->col_fieldoff; // non-zero for FL -> FL & differ parity
514
        }
515
88777
        goto single_col;
516
    } else {                                        // AFL/AFR/FR/FL -> AFR/FR
517
163039
        if (IS_INTERLACED(*mb_type)) {              // AFL       /FL -> AFR/FR
518
16549
            mb_y           = sl->mb_y & ~1;
519
16549
            mb_xy          = sl->mb_x + (sl->mb_y & ~1) * h->mb_stride;
520
16549
            mb_type_col[0] = sl->ref_list[1][0].parent->mb_type[mb_xy];
521
16549
            mb_type_col[1] = sl->ref_list[1][0].parent->mb_type[mb_xy + h->mb_stride];
522
16549
            b8_stride      = 2 + 4 * h->mb_stride;
523
16549
            b4_stride     *= 6;
524
16549
            if (IS_INTERLACED(mb_type_col[0]) !=
525
16549
                IS_INTERLACED(mb_type_col[1])) {
526
                mb_type_col[0] &= ~MB_TYPE_INTERLACED;
527
                mb_type_col[1] &= ~MB_TYPE_INTERLACED;
528
            }
529
530
16549
            sub_mb_type = MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 |
531
                          MB_TYPE_DIRECT2;                  /* B_SUB_8x8 */
532
533
16549
            if ((mb_type_col[0] & MB_TYPE_16x16_OR_INTRA) &&
534

9887
                (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA) &&
535
                !is_b8x8) {
536
5873
                *mb_type |= MB_TYPE_16x8 | MB_TYPE_L0L1 |
537
                            MB_TYPE_DIRECT2;                /* B_16x8 */
538
            } else {
539
10676
                *mb_type |= MB_TYPE_8x8 | MB_TYPE_L0L1;
540
            }
541
        } else {                                    //     AFR/FR    -> AFR/FR
542
146490
single_col:
543
235267
            mb_type_col[0]     =
544
235267
                mb_type_col[1] = sl->ref_list[1][0].parent->mb_type[mb_xy];
545
546
235267
            sub_mb_type = MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 |
547
                          MB_TYPE_DIRECT2;                  /* B_SUB_8x8 */
548

235267
            if (!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)) {
549
134937
                *mb_type |= MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 |
550
                            MB_TYPE_DIRECT2;                /* B_16x16 */
551
100330
            } else if (!is_b8x8 &&
552
54757
                       (mb_type_col[0] & (MB_TYPE_16x8 | MB_TYPE_8x16))) {
553
34983
                *mb_type |= MB_TYPE_L0L1 | MB_TYPE_DIRECT2 |
554
34983
                            (mb_type_col[0] & (MB_TYPE_16x8 | MB_TYPE_8x16));
555
            } else {
556
65347
                if (!h->ps.sps->direct_8x8_inference_flag) {
557
                    /* FIXME: save sub mb types from previous frames (or derive
558
                     * from MVs) so we know exactly what block size to use */
559
10593
                    sub_mb_type = MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 |
560
                                  MB_TYPE_DIRECT2;          /* B_SUB_4x4 */
561
                }
562
65347
                *mb_type |= MB_TYPE_8x8 | MB_TYPE_L0L1;
563
            }
564
        }
565
    }
566
567
251816
    await_reference_mb_row(h, &sl->ref_list[1][0], mb_y);
568
569
251816
    l1mv0  = (void*)&sl->ref_list[1][0].parent->motion_val[0][h->mb2b_xy[mb_xy]];
570
251816
    l1mv1  = (void*)&sl->ref_list[1][0].parent->motion_val[1][h->mb2b_xy[mb_xy]];
571
251816
    l1ref0 = &sl->ref_list[1][0].parent->ref_index[0][4 * mb_xy];
572
251816
    l1ref1 = &sl->ref_list[1][0].parent->ref_index[1][4 * mb_xy];
573
251816
    if (!b8_stride) {
574
15611
        if (sl->mb_y & 1) {
575
7720
            l1ref0 += 2;
576
7720
            l1ref1 += 2;
577
7720
            l1mv0  += 2 * b4_stride;
578
7720
            l1mv1  += 2 * b4_stride;
579
        }
580
    }
581
582
    {
583
251816
        const int *map_col_to_list0[2] = { sl->map_col_to_list0[0],
584
251816
                                           sl->map_col_to_list0[1] };
585
251816
        const int *dist_scale_factor = sl->dist_scale_factor;
586
        int ref_offset;
587
588

251816
        if (FRAME_MBAFF(h) && IS_INTERLACED(*mb_type)) {
589
20032
            map_col_to_list0[0] = sl->map_col_to_list0_field[sl->mb_y & 1][0];
590
20032
            map_col_to_list0[1] = sl->map_col_to_list0_field[sl->mb_y & 1][1];
591
20032
            dist_scale_factor   = sl->dist_scale_factor_field[sl->mb_y & 1];
592
        }
593
251816
        ref_offset = (sl->ref_list[1][0].parent->mbaff << 4) & (mb_type_col[0] >> 3);
594
595
251816
        if (IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])) {
596
32160
            int y_shift = 2 * !IS_INTERLACED(*mb_type);
597
            assert(h->ps.sps->direct_8x8_inference_flag);
598
599
160800
            for (i8 = 0; i8 < 4; i8++) {
600
128640
                const int x8 = i8 & 1;
601
128640
                const int y8 = i8 >> 1;
602
                int ref0, scale;
603
128640
                const int16_t (*l1mv)[2] = l1mv0;
604
605

128640
                if (is_b8x8 && !IS_DIRECT(sl->sub_mb_type[i8]))
606
22674
                    continue;
607
105966
                sl->sub_mb_type[i8] = sub_mb_type;
608
609
105966
                fill_rectangle(&sl->ref_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 1);
610
105966
                if (IS_INTRA(mb_type_col[y8])) {
611
25496
                    fill_rectangle(&sl->ref_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 1);
612
25496
                    fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 4);
613
25496
                    fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 4);
614
25496
                    continue;
615
                }
616
617
80470
                ref0 = l1ref0[x8 + y8 * b8_stride];
618
80470
                if (ref0 >= 0)
619
80277
                    ref0 = map_col_to_list0[0][ref0 + ref_offset];
620
                else {
621
193
                    ref0 = map_col_to_list0[1][l1ref1[x8 + y8 * b8_stride] +
622
                                               ref_offset];
623
193
                    l1mv = l1mv1;
624
                }
625
80470
                scale = dist_scale_factor[ref0];
626
80470
                fill_rectangle(&sl->ref_cache[0][scan8[i8 * 4]], 2, 2, 8,
627
                               ref0, 1);
628
629
                {
630
80470
                    const int16_t *mv_col = l1mv[x8 * 3 + y8 * b4_stride];
631
80470
                    int my_col            = (mv_col[1] * (1 << y_shift)) / 2;
632
80470
                    int mx                = (scale * mv_col[0] + 128) >> 8;
633
80470
                    int my                = (scale * my_col    + 128) >> 8;
634
80470
                    fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2, 8,
635
                                   pack16to32(mx, my), 4);
636
80470
                    fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2, 8,
637
80470
                                   pack16to32(mx - mv_col[0], my - my_col), 4);
638
                }
639
            }
640
32160
            return;
641
        }
642
643
        /* one-to-one mv scaling */
644
645
219656
        if (IS_16X16(*mb_type)) {
646
            int ref, mv0, mv1;
647
648
127333
            fill_rectangle(&sl->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
649
127333
            if (IS_INTRA(mb_type_col[0])) {
650
60991
                ref = mv0 = mv1 = 0;
651
            } else {
652
198975
                const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
653
66342
                                                : map_col_to_list0[1][l1ref1[0] + ref_offset];
654
66342
                const int scale = dist_scale_factor[ref0];
655
66342
                const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
656
                int mv_l0[2];
657
66342
                mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
658
66342
                mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
659
66342
                ref      = ref0;
660
66342
                mv0      = pack16to32(mv_l0[0], mv_l0[1]);
661
66342
                mv1      = pack16to32(mv_l0[0] - mv_col[0], mv_l0[1] - mv_col[1]);
662
            }
663
127333
            fill_rectangle(&sl->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
664
127333
            fill_rectangle(&sl->mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
665
127333
            fill_rectangle(&sl->mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
666
        } else {
667
461615
            for (i8 = 0; i8 < 4; i8++) {
668
369292
                const int x8 = i8 & 1;
669
369292
                const int y8 = i8 >> 1;
670
                int ref0, scale;
671
369292
                const int16_t (*l1mv)[2] = l1mv0;
672
673

369292
                if (is_b8x8 && !IS_DIRECT(sl->sub_mb_type[i8]))
674
92460
                    continue;
675
276832
                sl->sub_mb_type[i8] = sub_mb_type;
676
276832
                fill_rectangle(&sl->ref_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 1);
677
276832
                if (IS_INTRA(mb_type_col[0])) {
678
22293
                    fill_rectangle(&sl->ref_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 1);
679
22293
                    fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 4);
680
22293
                    fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 4);
681
22293
                    continue;
682
                }
683
684
                assert(b8_stride == 2);
685
254539
                ref0 = l1ref0[i8];
686
254539
                if (ref0 >= 0)
687
254519
                    ref0 = map_col_to_list0[0][ref0 + ref_offset];
688
                else {
689
20
                    ref0 = map_col_to_list0[1][l1ref1[i8] + ref_offset];
690
20
                    l1mv = l1mv1;
691
                }
692
254539
                scale = dist_scale_factor[ref0];
693
694
254539
                fill_rectangle(&sl->ref_cache[0][scan8[i8 * 4]], 2, 2, 8,
695
                               ref0, 1);
696
254539
                if (IS_SUB_8X8(sub_mb_type)) {
697
229231
                    const int16_t *mv_col = l1mv[x8 * 3 + y8 * 3 * b4_stride];
698
229231
                    int mx                = (scale * mv_col[0] + 128) >> 8;
699
229231
                    int my                = (scale * mv_col[1] + 128) >> 8;
700
229231
                    fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2, 8,
701
                                   pack16to32(mx, my), 4);
702
229231
                    fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2, 8,
703
229231
                                   pack16to32(mx - mv_col[0], my - mv_col[1]), 4);
704
                } else {
705
126540
                    for (i4 = 0; i4 < 4; i4++) {
706
101232
                        const int16_t *mv_col = l1mv[x8 * 2 + (i4 & 1) +
707
101232
                                                     (y8 * 2 + (i4 >> 1)) * b4_stride];
708
101232
                        int16_t *mv_l0 = sl->mv_cache[0][scan8[i8 * 4 + i4]];
709
101232
                        mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
710
101232
                        mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
711
101232
                        AV_WN32A(sl->mv_cache[1][scan8[i8 * 4 + i4]],
712
                                 pack16to32(mv_l0[0] - mv_col[0],
713
                                            mv_l0[1] - mv_col[1]));
714
                    }
715
                }
716
            }
717
        }
718
    }
719
}
720
721
2270301
void ff_h264_pred_direct_motion(const H264Context *const h, H264SliceContext *sl,
722
                                int *mb_type)
723
{
724
2270301
    if (sl->direct_spatial_mv_pred)
725
2018485
        pred_spatial_direct_motion(h, sl, mb_type);
726
    else
727
251816
        pred_temp_direct_motion(h, sl, mb_type);
728
2270301
}