LCOV - code coverage report
Current view: top level - libavcodec - h264_direct.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 437 445 98.2 %
Date: 2017-12-13 10:57:33 Functions: 8 8 100.0 %

          Line data    Source code
       1             : /*
       2             :  * H.26L/H.264/AVC/JVT/14496-10/... direct mb/block decoding
       3             :  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
       4             :  *
       5             :  * This file is part of FFmpeg.
       6             :  *
       7             :  * FFmpeg is free software; you can redistribute it and/or
       8             :  * modify it under the terms of the GNU Lesser General Public
       9             :  * License as published by the Free Software Foundation; either
      10             :  * version 2.1 of the License, or (at your option) any later version.
      11             :  *
      12             :  * FFmpeg is distributed in the hope that it will be useful,
      13             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      14             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      15             :  * Lesser General Public License for more details.
      16             :  *
      17             :  * You should have received a copy of the GNU Lesser General Public
      18             :  * License along with FFmpeg; if not, write to the Free Software
      19             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      20             :  */
      21             : 
      22             : /**
      23             :  * @file
      24             :  * H.264 / AVC / MPEG-4 part10 direct mb/block decoding.
      25             :  * @author Michael Niedermayer <michaelni@gmx.at>
      26             :  */
      27             : 
      28             : #include "internal.h"
      29             : #include "avcodec.h"
      30             : #include "h264dec.h"
      31             : #include "h264_ps.h"
      32             : #include "mpegutils.h"
      33             : #include "rectangle.h"
      34             : #include "thread.h"
      35             : 
      36             : #include <assert.h>
      37             : 
      38       27501 : static int get_scale_factor(H264SliceContext *sl,
      39             :                             int poc, int poc1, int i)
      40             : {
      41       27501 :     int poc0 = sl->ref_list[0][i].poc;
      42       27501 :     int64_t pocdiff = poc1 - (int64_t)poc0;
      43       27501 :     int td = av_clip_int8(pocdiff);
      44             : 
      45       27501 :     if (pocdiff != (int)pocdiff)
      46           0 :         avpriv_request_sample(sl->h264->avctx, "pocdiff overflow\n");
      47             : 
      48       27501 :     if (td == 0 || sl->ref_list[0][i].parent->long_ref) {
      49        8127 :         return 256;
      50             :     } else {
      51       19374 :         int64_t pocdiff0 = poc - (int64_t)poc0;
      52       19374 :         int tb = av_clip_int8(pocdiff0);
      53       19374 :         int tx = (16384 + (FFABS(td) >> 1)) / td;
      54             : 
      55       19374 :         if (pocdiff0 != (int)pocdiff0)
      56           0 :             av_log(sl->h264->avctx, AV_LOG_DEBUG, "pocdiff0 overflow\n");
      57             : 
      58       19374 :         return av_clip_intp2((tb * tx + 32) >> 6, 10);
      59             :     }
      60             : }
      61             : 
      62        7703 : void ff_h264_direct_dist_scale_factor(const H264Context *const h,
      63             :                                       H264SliceContext *sl)
      64             : {
      65       18790 :     const int poc  = FIELD_PICTURE(h) ? h->cur_pic_ptr->field_poc[h->picture_structure == PICT_BOTTOM_FIELD]
      66       11087 :                                       : h->cur_pic_ptr->poc;
      67        7703 :     const int poc1 = sl->ref_list[1][0].poc;
      68             :     int i, field;
      69             : 
      70        7703 :     if (FRAME_MBAFF(h))
      71        1539 :         for (field = 0; field < 2; field++) {
      72        1026 :             const int poc  = h->cur_pic_ptr->field_poc[field];
      73        1026 :             const int poc1 = sl->ref_list[1][0].parent->field_poc[field];
      74        7598 :             for (i = 0; i < 2 * sl->ref_count[0]; i++)
      75       13144 :                 sl->dist_scale_factor_field[field][i ^ field] =
      76       13144 :                     get_scale_factor(sl, poc, poc1, i + 16);
      77             :         }
      78             : 
      79       28632 :     for (i = 0; i < sl->ref_count[0]; i++)
      80       20929 :         sl->dist_scale_factor[i] = get_scale_factor(sl, poc, poc1, i);
      81        7703 : }
      82             : 
      83       17458 : static void fill_colmap(const H264Context *h, H264SliceContext *sl,
      84             :                         int map[2][16 + 32], int list,
      85             :                         int field, int colfield, int mbafi)
      86             : {
      87       17458 :     H264Picture *const ref1 = sl->ref_list[1][0].parent;
      88             :     int j, old_ref, rfield;
      89       17458 :     int start  = mbafi ? 16                       : 0;
      90       17458 :     int end    = mbafi ? 16 + 2 * sl->ref_count[0] : sl->ref_count[0];
      91       17458 :     int interl = mbafi || h->picture_structure != PICT_FRAME;
      92             : 
      93             :     /* bogus; fills in for missing frames */
      94       17458 :     memset(map[list], 0, sizeof(map[list]));
      95             : 
      96       52374 :     for (rfield = 0; rfield < 2; rfield++) {
      97       72084 :         for (old_ref = 0; old_ref < ref1->ref_count[colfield][list]; old_ref++) {
      98       37168 :             int poc = ref1->ref_poc[colfield][list][old_ref];
      99             : 
     100       37168 :             if (!interl)
     101       16796 :                 poc |= 3;
     102             :             // FIXME: store all MBAFF references so this is not needed
     103       20372 :             else if (interl && (poc & 3) == 3)
     104        5396 :                 poc = (poc & ~3) + rfield + 1;
     105             : 
     106      108524 :             for (j = start; j < end; j++) {
     107      202264 :                 if (4 * sl->ref_list[0][j].parent->frame_num +
     108      101132 :                     (sl->ref_list[0][j].reference & 3) == poc) {
     109       29776 :                     int cur_ref = mbafi ? (j - 16) ^ field : j;
     110       29776 :                     if (ref1->mbaff)
     111        5736 :                         map[list][2 * old_ref + (rfield ^ field) + 16] = cur_ref;
     112       29776 :                     if (rfield == field || !interl)
     113       21462 :                         map[list][old_ref] = cur_ref;
     114       29776 :                     break;
     115             :                 }
     116             :             }
     117             :         }
     118             :     }
     119       17458 : }
     120             : 
     121       31410 : void ff_h264_direct_ref_list_init(const H264Context *const h, H264SliceContext *sl)
     122             : {
     123       31410 :     H264Ref *const ref1 = &sl->ref_list[1][0];
     124       31410 :     H264Picture *const cur = h->cur_pic_ptr;
     125             :     int list, j, field;
     126       31410 :     int sidx     = (h->picture_structure & 1) ^ 1;
     127       31410 :     int ref1sidx = (ref1->reference      & 1) ^ 1;
     128             : 
     129       69725 :     for (list = 0; list < sl->list_count; list++) {
     130       38315 :         cur->ref_count[sidx][list] = sl->ref_count[list];
     131      164492 :         for (j = 0; j < sl->ref_count[list]; j++)
     132      252354 :             cur->ref_poc[sidx][list][j] = 4 * sl->ref_list[list][j].parent->frame_num +
     133      126177 :                                           (sl->ref_list[list][j].reference & 3);
     134             :     }
     135             : 
     136       31410 :     if (h->picture_structure == PICT_FRAME) {
     137       23012 :         memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
     138       23012 :         memcpy(cur->ref_poc[1],   cur->ref_poc[0],   sizeof(cur->ref_poc[0]));
     139             :     }
     140             : 
     141       31410 :     if (h->current_slice == 0) {
     142       25098 :         cur->mbaff = FRAME_MBAFF(h);
     143             :     } else {
     144        6312 :         av_assert0(cur->mbaff == FRAME_MBAFF(h));
     145             :     }
     146             : 
     147       31410 :     sl->col_fieldoff = 0;
     148             : 
     149       31410 :     if (sl->list_count != 2 || !sl->ref_count[1])
     150       20384 :         return;
     151             : 
     152       11026 :     if (h->picture_structure == PICT_FRAME) {
     153        6596 :         int cur_poc  = h->cur_pic_ptr->poc;
     154        6596 :         int *col_poc = sl->ref_list[1][0].parent->field_poc;
     155        6596 :         if (col_poc[0] == INT_MAX && col_poc[1] == INT_MAX) {
     156           3 :             av_log(h->avctx, AV_LOG_ERROR, "co located POCs unavailable\n");
     157           3 :             sl->col_parity = 1;
     158             :         } else
     159       13186 :         sl->col_parity = (FFABS(col_poc[0] - cur_poc) >=
     160        6593 :                           FFABS(col_poc[1] - cur_poc));
     161        6596 :         ref1sidx =
     162        6596 :         sidx     = sl->col_parity;
     163             :     // FL -> FL & differ parity
     164        6296 :     } else if (!(h->picture_structure & sl->ref_list[1][0].reference) &&
     165        1866 :                !sl->ref_list[1][0].parent->mbaff) {
     166        1728 :         sl->col_fieldoff = 2 * sl->ref_list[1][0].reference - 3;
     167             :     }
     168             : 
     169       11026 :     if (sl->slice_type_nos != AV_PICTURE_TYPE_B || sl->direct_spatial_mv_pred)
     170        3323 :         return;
     171             : 
     172       23109 :     for (list = 0; list < 2; list++) {
     173       15406 :         fill_colmap(h, sl, sl->map_col_to_list0, list, sidx, ref1sidx, 0);
     174       15406 :         if (FRAME_MBAFF(h))
     175        3078 :             for (field = 0; field < 2; field++)
     176        2052 :                 fill_colmap(h, sl, sl->map_col_to_list0_field[field], list, field,
     177             :                             field, 1);
     178             :     }
     179             : }
     180             : 
     181     3457899 : static void await_reference_mb_row(const H264Context *const h, H264Ref *ref,
     182             :                                    int mb_y)
     183             : {
     184     3457899 :     int ref_field         = ref->reference - 1;
     185     3457899 :     int ref_field_picture = ref->parent->field_picture;
     186     3457899 :     int ref_height        = 16 * h->mb_height >> ref_field_picture;
     187             : 
     188     3457899 :     if (!HAVE_THREADS || !(h->avctx->active_thread_type & FF_THREAD_FRAME))
     189     3457899 :         return;
     190             : 
     191             :     /* FIXME: It can be safe to access mb stuff
     192             :      * even if pixels aren't deblocked yet. */
     193             : 
     194           0 :     ff_thread_await_progress(&ref->parent->tf,
     195           0 :                              FFMIN(16 * mb_y >> ref_field_picture,
     196             :                                    ref_height - 1),
     197             :                              ref_field_picture && ref_field);
     198             : }
     199             : 
     200     1991991 : static void pred_spatial_direct_motion(const H264Context *const h, H264SliceContext *sl,
     201             :                                        int *mb_type)
     202             : {
     203     1991991 :     int b8_stride = 2;
     204     1991991 :     int b4_stride = h->b_stride;
     205     1991991 :     int mb_xy = sl->mb_xy, mb_y = sl->mb_y;
     206             :     int mb_type_col[2];
     207             :     const int16_t (*l1mv0)[2], (*l1mv1)[2];
     208             :     const int8_t *l1ref0, *l1ref1;
     209     1991991 :     const int is_b8x8 = IS_8X8(*mb_type);
     210     1991991 :     unsigned int sub_mb_type = MB_TYPE_L0L1;
     211             :     int i8, i4;
     212             :     int ref[2];
     213             :     int mv[2];
     214             :     int list;
     215             : 
     216             :     assert(sl->ref_list[1][0].reference & 3);
     217             : 
     218     1991991 :     await_reference_mb_row(h, &sl->ref_list[1][0],
     219     1991991 :                            sl->mb_y + !!IS_INTERLACED(*mb_type));
     220             : 
     221             : #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16 | MB_TYPE_INTRA4x4 | \
     222             :                                 MB_TYPE_INTRA16x16 | MB_TYPE_INTRA_PCM)
     223             : 
     224             :     /* ref = min(neighbors) */
     225     5975973 :     for (list = 0; list < 2; list++) {
     226     3983982 :         int left_ref     = sl->ref_cache[list][scan8[0] - 1];
     227     3983982 :         int top_ref      = sl->ref_cache[list][scan8[0] - 8];
     228     3983982 :         int refc         = sl->ref_cache[list][scan8[0] - 8 + 4];
     229     3983982 :         const int16_t *C = sl->mv_cache[list][scan8[0]  - 8 + 4];
     230     3983982 :         if (refc == PART_NOT_AVAILABLE) {
     231      486468 :             refc = sl->ref_cache[list][scan8[0] - 8 - 1];
     232      486468 :             C    = sl->mv_cache[list][scan8[0]  - 8 - 1];
     233             :         }
     234     3983982 :         ref[list] = FFMIN3((unsigned)left_ref,
     235             :                            (unsigned)top_ref,
     236             :                            (unsigned)refc);
     237     3983982 :         if (ref[list] >= 0) {
     238             :             /* This is just pred_motion() but with the cases removed that
     239             :              * cannot happen for direct blocks. */
     240     3606047 :             const int16_t *const A = sl->mv_cache[list][scan8[0] - 1];
     241     3606047 :             const int16_t *const B = sl->mv_cache[list][scan8[0] - 8];
     242             : 
     243    10818141 :             int match_count = (left_ref == ref[list]) +
     244     3606047 :                               (top_ref  == ref[list]) +
     245     3606047 :                               (refc     == ref[list]);
     246             : 
     247     3606047 :             if (match_count > 1) { // most common
     248     3170348 :                 mv[list] = pack16to32(mid_pred(A[0], B[0], C[0]),
     249     3170348 :                                       mid_pred(A[1], B[1], C[1]));
     250             :             } else {
     251             :                 assert(match_count == 1);
     252      435699 :                 if (left_ref == ref[list])
     253      327469 :                     mv[list] = AV_RN32A(A);
     254      108230 :                 else if (top_ref == ref[list])
     255       49757 :                     mv[list] = AV_RN32A(B);
     256             :                 else
     257       58473 :                     mv[list] = AV_RN32A(C);
     258             :             }
     259             :             av_assert2(ref[list] < (sl->ref_count[list] << !!FRAME_MBAFF(h)));
     260             :         } else {
     261      377935 :             int mask = ~(MB_TYPE_L0 << (2 * list));
     262      377935 :             mv[list]  = 0;
     263      377935 :             ref[list] = -1;
     264      377935 :             if (!is_b8x8)
     265      355144 :                 *mb_type &= mask;
     266      377935 :             sub_mb_type &= mask;
     267             :         }
     268             :     }
     269     1991991 :     if (ref[0] < 0 && ref[1] < 0) {
     270        4790 :         ref[0] = ref[1] = 0;
     271        4790 :         if (!is_b8x8)
     272        3993 :             *mb_type |= MB_TYPE_L0L1;
     273        4790 :         sub_mb_type |= MB_TYPE_L0L1;
     274             :     }
     275             : 
     276     1991991 :     if (!(is_b8x8 | mv[0] | mv[1])) {
     277     1029715 :         fill_rectangle(&sl->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
     278     1029715 :         fill_rectangle(&sl->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
     279     1029715 :         fill_rectangle(&sl->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
     280     1029715 :         fill_rectangle(&sl->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
     281     2059430 :         *mb_type = (*mb_type & ~(MB_TYPE_8x8 | MB_TYPE_16x8 | MB_TYPE_8x16 |
     282             :                                  MB_TYPE_P1L0 | MB_TYPE_P1L1)) |
     283     1029715 :                    MB_TYPE_16x16 | MB_TYPE_DIRECT2;
     284     1029715 :         return;
     285             :     }
     286             : 
     287      962276 :     if (IS_INTERLACED(sl->ref_list[1][0].parent->mb_type[mb_xy])) { // AFL/AFR/FR/FL -> AFL/FL
     288      678519 :         if (!IS_INTERLACED(*mb_type)) {                    //     AFR/FR    -> AFL/FL
     289       39366 :             mb_y  = (sl->mb_y & ~1) + sl->col_parity;
     290       78732 :             mb_xy = sl->mb_x +
     291       39366 :                     ((sl->mb_y & ~1) + sl->col_parity) * h->mb_stride;
     292       39366 :             b8_stride = 0;
     293             :         } else {
     294      639153 :             mb_y  += sl->col_fieldoff;
     295      639153 :             mb_xy += h->mb_stride * sl->col_fieldoff; // non-zero for FL -> FL & differ parity
     296             :         }
     297      678519 :         goto single_col;
     298             :     } else {                                             // AFL/AFR/FR/FL -> AFR/FR
     299      283757 :         if (IS_INTERLACED(*mb_type)) {                   // AFL       /FL -> AFR/FR
     300       43711 :             mb_y           =  sl->mb_y & ~1;
     301       43711 :             mb_xy          = (sl->mb_y & ~1) * h->mb_stride + sl->mb_x;
     302       43711 :             mb_type_col[0] = sl->ref_list[1][0].parent->mb_type[mb_xy];
     303       43711 :             mb_type_col[1] = sl->ref_list[1][0].parent->mb_type[mb_xy + h->mb_stride];
     304       43711 :             b8_stride      = 2 + 4 * h->mb_stride;
     305       43711 :             b4_stride     *= 6;
     306       87422 :             if (IS_INTERLACED(mb_type_col[0]) !=
     307       43711 :                 IS_INTERLACED(mb_type_col[1])) {
     308           0 :                 mb_type_col[0] &= ~MB_TYPE_INTERLACED;
     309           0 :                 mb_type_col[1] &= ~MB_TYPE_INTERLACED;
     310             :             }
     311             : 
     312       43711 :             sub_mb_type |= MB_TYPE_16x16 | MB_TYPE_DIRECT2; /* B_SUB_8x8 */
     313       77608 :             if ((mb_type_col[0] & MB_TYPE_16x16_OR_INTRA) &&
     314       63300 :                 (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA) &&
     315             :                 !is_b8x8) {
     316       26172 :                 *mb_type |= MB_TYPE_16x8 | MB_TYPE_DIRECT2;  /* B_16x8 */
     317             :             } else {
     318       17539 :                 *mb_type |= MB_TYPE_8x8;
     319             :             }
     320             :         } else {                                         //     AFR/FR    -> AFR/FR
     321      240046 : single_col:
     322      918565 :             mb_type_col[0] =
     323      918565 :             mb_type_col[1] = sl->ref_list[1][0].parent->mb_type[mb_xy];
     324             : 
     325      918565 :             sub_mb_type |= MB_TYPE_16x16 | MB_TYPE_DIRECT2; /* B_SUB_8x8 */
     326      918565 :             if (!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)) {
     327      770996 :                 *mb_type |= MB_TYPE_16x16 | MB_TYPE_DIRECT2; /* B_16x16 */
     328      230977 :             } else if (!is_b8x8 &&
     329       83408 :                        (mb_type_col[0] & (MB_TYPE_16x8 | MB_TYPE_8x16))) {
     330       89172 :                 *mb_type |= MB_TYPE_DIRECT2 |
     331       44586 :                             (mb_type_col[0] & (MB_TYPE_16x8 | MB_TYPE_8x16));
     332             :             } else {
     333      102983 :                 if (!h->ps.sps->direct_8x8_inference_flag) {
     334             :                     /* FIXME: Save sub mb types from previous frames (or derive
     335             :                      * from MVs) so we know exactly what block size to use. */
     336       21546 :                     sub_mb_type += (MB_TYPE_8x8 - MB_TYPE_16x16); /* B_SUB_4x4 */
     337             :                 }
     338      102983 :                 *mb_type |= MB_TYPE_8x8;
     339             :             }
     340             :         }
     341             :     }
     342             : 
     343      962276 :     await_reference_mb_row(h, &sl->ref_list[1][0], mb_y);
     344             : 
     345      962276 :     l1mv0  = (void*)&sl->ref_list[1][0].parent->motion_val[0][h->mb2b_xy[mb_xy]];
     346      962276 :     l1mv1  = (void*)&sl->ref_list[1][0].parent->motion_val[1][h->mb2b_xy[mb_xy]];
     347      962276 :     l1ref0 = &sl->ref_list[1][0].parent->ref_index[0][4 * mb_xy];
     348      962276 :     l1ref1 = &sl->ref_list[1][0].parent->ref_index[1][4 * mb_xy];
     349      962276 :     if (!b8_stride) {
     350       39366 :         if (sl->mb_y & 1) {
     351       19982 :             l1ref0 += 2;
     352       19982 :             l1ref1 += 2;
     353       19982 :             l1mv0  += 2 * b4_stride;
     354       19982 :             l1mv1  += 2 * b4_stride;
     355             :         }
     356             :     }
     357             : 
     358      962276 :     if (IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])) {
     359       83077 :         int n = 0;
     360      415385 :         for (i8 = 0; i8 < 4; i8++) {
     361      332308 :             int x8  = i8 & 1;
     362      332308 :             int y8  = i8 >> 1;
     363      332308 :             int xy8 = x8     + y8 * b8_stride;
     364      332308 :             int xy4 = x8 * 3 + y8 * b4_stride;
     365             :             int a, b;
     366             : 
     367      332308 :             if (is_b8x8 && !IS_DIRECT(sl->sub_mb_type[i8]))
     368       25993 :                 continue;
     369      306315 :             sl->sub_mb_type[i8] = sub_mb_type;
     370             : 
     371      306315 :             fill_rectangle(&sl->ref_cache[0][scan8[i8 * 4]], 2, 2, 8,
     372      306315 :                            (uint8_t)ref[0], 1);
     373      306315 :             fill_rectangle(&sl->ref_cache[1][scan8[i8 * 4]], 2, 2, 8,
     374      306315 :                            (uint8_t)ref[1], 1);
     375      479327 :             if (!IS_INTRA(mb_type_col[y8]) && !sl->ref_list[1][0].parent->long_ref &&
     376      291720 :                 ((l1ref0[xy8] == 0 &&
     377      168881 :                   FFABS(l1mv0[xy4][0]) <= 1 &&
     378      184398 :                   FFABS(l1mv0[xy4][1]) <= 1) ||
     379      176609 :                  (l1ref0[xy8] < 0 &&
     380       15267 :                   l1ref1[xy8] == 0 &&
     381       10690 :                   FFABS(l1mv1[xy4][0]) <= 1 &&
     382         988 :                   FFABS(l1mv1[xy4][1]) <= 1))) {
     383        5283 :                 a =
     384        5283 :                 b = 0;
     385        5283 :                 if (ref[0] > 0)
     386         459 :                     a = mv[0];
     387        5283 :                 if (ref[1] > 0)
     388         149 :                     b = mv[1];
     389        5283 :                 n++;
     390             :             } else {
     391      301032 :                 a = mv[0];
     392      301032 :                 b = mv[1];
     393             :             }
     394      306315 :             fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, a, 4);
     395      306315 :             fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, b, 4);
     396             :         }
     397       83077 :         if (!is_b8x8 && !(n & 3))
     398      132358 :             *mb_type = (*mb_type & ~(MB_TYPE_8x8 | MB_TYPE_16x8 | MB_TYPE_8x16 |
     399             :                                      MB_TYPE_P1L0 | MB_TYPE_P1L1)) |
     400       66179 :                        MB_TYPE_16x16 | MB_TYPE_DIRECT2;
     401      879199 :     } else if (IS_16X16(*mb_type)) {
     402             :         int a, b;
     403             : 
     404      745924 :         fill_rectangle(&sl->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
     405      745924 :         fill_rectangle(&sl->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
     406     1307656 :         if (!IS_INTRA(mb_type_col[0]) && !sl->ref_list[1][0].parent->long_ref &&
     407      863099 :             ((l1ref0[0] == 0 &&
     408      531334 :               FFABS(l1mv0[0][0]) <= 1 &&
     409      641982 :               FFABS(l1mv0[0][1]) <= 1) ||
     410      565906 :              (l1ref0[0] < 0 && !l1ref1[0] &&
     411       27913 :               FFABS(l1mv1[0][0]) <= 1 &&
     412       11435 :               FFABS(l1mv1[0][1]) <= 1 &&
     413        3547 :               h->x264_build > 33U))) {
     414       27418 :             a = b = 0;
     415       27418 :             if (ref[0] > 0)
     416        4994 :                 a = mv[0];
     417       54836 :             if (ref[1] > 0)
     418        3319 :                 b = mv[1];
     419             :         } else {
     420      718506 :             a = mv[0];
     421      718506 :             b = mv[1];
     422             :         }
     423      745924 :         fill_rectangle(&sl->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
     424      745924 :         fill_rectangle(&sl->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
     425             :     } else {
     426      133275 :         int n = 0;
     427      666375 :         for (i8 = 0; i8 < 4; i8++) {
     428      533100 :             const int x8 = i8 & 1;
     429      533100 :             const int y8 = i8 >> 1;
     430             : 
     431      533100 :             if (is_b8x8 && !IS_DIRECT(sl->sub_mb_type[i8]))
     432      102485 :                 continue;
     433      430615 :             sl->sub_mb_type[i8] = sub_mb_type;
     434             : 
     435      430615 :             fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, mv[0], 4);
     436      430615 :             fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, mv[1], 4);
     437      430615 :             fill_rectangle(&sl->ref_cache[0][scan8[i8 * 4]], 2, 2, 8,
     438      430615 :                            (uint8_t)ref[0], 1);
     439      430615 :             fill_rectangle(&sl->ref_cache[1][scan8[i8 * 4]], 2, 2, 8,
     440      430615 :                            (uint8_t)ref[1], 1);
     441             : 
     442             :             assert(b8_stride == 2);
     443             :             /* col_zero_flag */
     444      828367 :             if (!IS_INTRA(mb_type_col[0]) && !sl->ref_list[1][0].parent->long_ref &&
     445      551044 :                 (l1ref0[i8] == 0 ||
     446      161274 :                  (l1ref0[i8] < 0 &&
     447       15218 :                   l1ref1[i8] == 0 &&
     448        7236 :                   h->x264_build > 33U))) {
     449      251696 :                 const int16_t (*l1mv)[2] = l1ref0[i8] == 0 ? l1mv0 : l1mv1;
     450      251696 :                 if (IS_SUB_8X8(sub_mb_type)) {
     451      221457 :                     const int16_t *mv_col = l1mv[x8 * 3 + y8 * 3 * b4_stride];
     452      221457 :                     if (FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1) {
     453       17676 :                         if (ref[0] == 0)
     454       14682 :                             fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2,
     455             :                                            8, 0, 4);
     456       17676 :                         if (ref[1] == 0)
     457       15715 :                             fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2,
     458             :                                            8, 0, 4);
     459       17676 :                         n += 4;
     460             :                     }
     461             :                 } else {
     462       30239 :                     int m = 0;
     463      151195 :                     for (i4 = 0; i4 < 4; i4++) {
     464      241912 :                         const int16_t *mv_col = l1mv[x8 * 2 + (i4 & 1) +
     465      120956 :                                                      (y8 * 2 + (i4 >> 1)) * b4_stride];
     466      120956 :                         if (FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1) {
     467        2956 :                             if (ref[0] == 0)
     468        1991 :                                 AV_ZERO32(sl->mv_cache[0][scan8[i8 * 4 + i4]]);
     469        2956 :                             if (ref[1] == 0)
     470        2710 :                                 AV_ZERO32(sl->mv_cache[1][scan8[i8 * 4 + i4]]);
     471        2956 :                             m++;
     472             :                         }
     473             :                     }
     474       30239 :                     if (!(m & 3))
     475       29926 :                         sl->sub_mb_type[i8] += MB_TYPE_16x16 - MB_TYPE_8x8;
     476       30239 :                     n += m;
     477             :                 }
     478             :             }
     479             :         }
     480      133275 :         if (!is_b8x8 && !(n & 15))
     481      141064 :             *mb_type = (*mb_type & ~(MB_TYPE_8x8 | MB_TYPE_16x8 | MB_TYPE_8x16 |
     482             :                                      MB_TYPE_P1L0 | MB_TYPE_P1L1)) |
     483       70532 :                        MB_TYPE_16x16 | MB_TYPE_DIRECT2;
     484             :     }
     485             : }
     486             : 
     487      251816 : static void pred_temp_direct_motion(const H264Context *const h, H264SliceContext *sl,
     488             :                                     int *mb_type)
     489             : {
     490      251816 :     int b8_stride = 2;
     491      251816 :     int b4_stride = h->b_stride;
     492      251816 :     int mb_xy = sl->mb_xy, mb_y = sl->mb_y;
     493             :     int mb_type_col[2];
     494             :     const int16_t (*l1mv0)[2], (*l1mv1)[2];
     495             :     const int8_t *l1ref0, *l1ref1;
     496      251816 :     const int is_b8x8 = IS_8X8(*mb_type);
     497             :     unsigned int sub_mb_type;
     498             :     int i8, i4;
     499             : 
     500             :     assert(sl->ref_list[1][0].reference & 3);
     501             : 
     502      251816 :     await_reference_mb_row(h, &sl->ref_list[1][0],
     503      251816 :                            sl->mb_y + !!IS_INTERLACED(*mb_type));
     504             : 
     505      251816 :     if (IS_INTERLACED(sl->ref_list[1][0].parent->mb_type[mb_xy])) { // AFL/AFR/FR/FL -> AFL/FL
     506       88777 :         if (!IS_INTERLACED(*mb_type)) {                    //     AFR/FR    -> AFL/FL
     507       15611 :             mb_y  = (sl->mb_y & ~1) + sl->col_parity;
     508       31222 :             mb_xy = sl->mb_x +
     509       15611 :                     ((sl->mb_y & ~1) + sl->col_parity) * h->mb_stride;
     510       15611 :             b8_stride = 0;
     511             :         } else {
     512       73166 :             mb_y  += sl->col_fieldoff;
     513       73166 :             mb_xy += h->mb_stride * sl->col_fieldoff; // non-zero for FL -> FL & differ parity
     514             :         }
     515       88777 :         goto single_col;
     516             :     } else {                                        // AFL/AFR/FR/FL -> AFR/FR
     517      163039 :         if (IS_INTERLACED(*mb_type)) {              // AFL       /FL -> AFR/FR
     518       16549 :             mb_y           = sl->mb_y & ~1;
     519       16549 :             mb_xy          = sl->mb_x + (sl->mb_y & ~1) * h->mb_stride;
     520       16549 :             mb_type_col[0] = sl->ref_list[1][0].parent->mb_type[mb_xy];
     521       16549 :             mb_type_col[1] = sl->ref_list[1][0].parent->mb_type[mb_xy + h->mb_stride];
     522       16549 :             b8_stride      = 2 + 4 * h->mb_stride;
     523       16549 :             b4_stride     *= 6;
     524       33098 :             if (IS_INTERLACED(mb_type_col[0]) !=
     525       16549 :                 IS_INTERLACED(mb_type_col[1])) {
     526           0 :                 mb_type_col[0] &= ~MB_TYPE_INTERLACED;
     527           0 :                 mb_type_col[1] &= ~MB_TYPE_INTERLACED;
     528             :             }
     529             : 
     530       16549 :             sub_mb_type = MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 |
     531             :                           MB_TYPE_DIRECT2;                  /* B_SUB_8x8 */
     532             : 
     533       26436 :             if ((mb_type_col[0] & MB_TYPE_16x16_OR_INTRA) &&
     534       17096 :                 (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA) &&
     535             :                 !is_b8x8) {
     536        5873 :                 *mb_type |= MB_TYPE_16x8 | MB_TYPE_L0L1 |
     537             :                             MB_TYPE_DIRECT2;                /* B_16x8 */
     538             :             } else {
     539       10676 :                 *mb_type |= MB_TYPE_8x8 | MB_TYPE_L0L1;
     540             :             }
     541             :         } else {                                    //     AFR/FR    -> AFR/FR
     542      146490 : single_col:
     543      235267 :             mb_type_col[0]     =
     544      235267 :                 mb_type_col[1] = sl->ref_list[1][0].parent->mb_type[mb_xy];
     545             : 
     546      235267 :             sub_mb_type = MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 |
     547             :                           MB_TYPE_DIRECT2;                  /* B_SUB_8x8 */
     548      235267 :             if (!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)) {
     549      134937 :                 *mb_type |= MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 |
     550             :                             MB_TYPE_DIRECT2;                /* B_16x16 */
     551      155087 :             } else if (!is_b8x8 &&
     552       54757 :                        (mb_type_col[0] & (MB_TYPE_16x8 | MB_TYPE_8x16))) {
     553       69966 :                 *mb_type |= MB_TYPE_L0L1 | MB_TYPE_DIRECT2 |
     554       34983 :                             (mb_type_col[0] & (MB_TYPE_16x8 | MB_TYPE_8x16));
     555             :             } else {
     556       65347 :                 if (!h->ps.sps->direct_8x8_inference_flag) {
     557             :                     /* FIXME: save sub mb types from previous frames (or derive
     558             :                      * from MVs) so we know exactly what block size to use */
     559       10593 :                     sub_mb_type = MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 |
     560             :                                   MB_TYPE_DIRECT2;          /* B_SUB_4x4 */
     561             :                 }
     562       65347 :                 *mb_type |= MB_TYPE_8x8 | MB_TYPE_L0L1;
     563             :             }
     564             :         }
     565             :     }
     566             : 
     567      251816 :     await_reference_mb_row(h, &sl->ref_list[1][0], mb_y);
     568             : 
     569      251816 :     l1mv0  = (void*)&sl->ref_list[1][0].parent->motion_val[0][h->mb2b_xy[mb_xy]];
     570      251816 :     l1mv1  = (void*)&sl->ref_list[1][0].parent->motion_val[1][h->mb2b_xy[mb_xy]];
     571      251816 :     l1ref0 = &sl->ref_list[1][0].parent->ref_index[0][4 * mb_xy];
     572      251816 :     l1ref1 = &sl->ref_list[1][0].parent->ref_index[1][4 * mb_xy];
     573      251816 :     if (!b8_stride) {
     574       15611 :         if (sl->mb_y & 1) {
     575        7720 :             l1ref0 += 2;
     576        7720 :             l1ref1 += 2;
     577        7720 :             l1mv0  += 2 * b4_stride;
     578        7720 :             l1mv1  += 2 * b4_stride;
     579             :         }
     580             :     }
     581             : 
     582             :     {
     583      503632 :         const int *map_col_to_list0[2] = { sl->map_col_to_list0[0],
     584      251816 :                                            sl->map_col_to_list0[1] };
     585      251816 :         const int *dist_scale_factor = sl->dist_scale_factor;
     586             :         int ref_offset;
     587             : 
     588      251816 :         if (FRAME_MBAFF(h) && IS_INTERLACED(*mb_type)) {
     589       20032 :             map_col_to_list0[0] = sl->map_col_to_list0_field[sl->mb_y & 1][0];
     590       20032 :             map_col_to_list0[1] = sl->map_col_to_list0_field[sl->mb_y & 1][1];
     591       20032 :             dist_scale_factor   = sl->dist_scale_factor_field[sl->mb_y & 1];
     592             :         }
     593      251816 :         ref_offset = (sl->ref_list[1][0].parent->mbaff << 4) & (mb_type_col[0] >> 3);
     594             : 
     595      251816 :         if (IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])) {
     596       32160 :             int y_shift = 2 * !IS_INTERLACED(*mb_type);
     597             :             assert(h->ps.sps->direct_8x8_inference_flag);
     598             : 
     599      160800 :             for (i8 = 0; i8 < 4; i8++) {
     600      128640 :                 const int x8 = i8 & 1;
     601      128640 :                 const int y8 = i8 >> 1;
     602             :                 int ref0, scale;
     603      128640 :                 const int16_t (*l1mv)[2] = l1mv0;
     604             : 
     605      128640 :                 if (is_b8x8 && !IS_DIRECT(sl->sub_mb_type[i8]))
     606       22674 :                     continue;
     607      105966 :                 sl->sub_mb_type[i8] = sub_mb_type;
     608             : 
     609      105966 :                 fill_rectangle(&sl->ref_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 1);
     610      105966 :                 if (IS_INTRA(mb_type_col[y8])) {
     611       25496 :                     fill_rectangle(&sl->ref_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 1);
     612       25496 :                     fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 4);
     613       25496 :                     fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 4);
     614       25496 :                     continue;
     615             :                 }
     616             : 
     617       80470 :                 ref0 = l1ref0[x8 + y8 * b8_stride];
     618       80470 :                 if (ref0 >= 0)
     619       80277 :                     ref0 = map_col_to_list0[0][ref0 + ref_offset];
     620             :                 else {
     621         193 :                     ref0 = map_col_to_list0[1][l1ref1[x8 + y8 * b8_stride] +
     622             :                                                ref_offset];
     623         193 :                     l1mv = l1mv1;
     624             :                 }
     625       80470 :                 scale = dist_scale_factor[ref0];
     626       80470 :                 fill_rectangle(&sl->ref_cache[0][scan8[i8 * 4]], 2, 2, 8,
     627             :                                ref0, 1);
     628             : 
     629             :                 {
     630       80470 :                     const int16_t *mv_col = l1mv[x8 * 3 + y8 * b4_stride];
     631       80470 :                     int my_col            = (mv_col[1] * (1 << y_shift)) / 2;
     632       80470 :                     int mx                = (scale * mv_col[0] + 128) >> 8;
     633       80470 :                     int my                = (scale * my_col    + 128) >> 8;
     634       80470 :                     fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2, 8,
     635             :                                    pack16to32(mx, my), 4);
     636      160940 :                     fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2, 8,
     637      160940 :                                    pack16to32(mx - mv_col[0], my - my_col), 4);
     638             :                 }
     639             :             }
     640       64320 :             return;
     641             :         }
     642             : 
     643             :         /* one-to-one mv scaling */
     644             : 
     645      219656 :         if (IS_16X16(*mb_type)) {
     646             :             int ref, mv0, mv1;
     647             : 
     648      127333 :             fill_rectangle(&sl->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
     649      127333 :             if (IS_INTRA(mb_type_col[0])) {
     650       60991 :                 ref = mv0 = mv1 = 0;
     651             :             } else {
     652      198975 :                 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
     653      132633 :                                                 : map_col_to_list0[1][l1ref1[0] + ref_offset];
     654       66342 :                 const int scale = dist_scale_factor[ref0];
     655       66342 :                 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
     656             :                 int mv_l0[2];
     657       66342 :                 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
     658       66342 :                 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
     659       66342 :                 ref      = ref0;
     660       66342 :                 mv0      = pack16to32(mv_l0[0], mv_l0[1]);
     661       66342 :                 mv1      = pack16to32(mv_l0[0] - mv_col[0], mv_l0[1] - mv_col[1]);
     662             :             }
     663      127333 :             fill_rectangle(&sl->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
     664      127333 :             fill_rectangle(&sl->mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
     665      127333 :             fill_rectangle(&sl->mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
     666             :         } else {
     667      461615 :             for (i8 = 0; i8 < 4; i8++) {
     668      369292 :                 const int x8 = i8 & 1;
     669      369292 :                 const int y8 = i8 >> 1;
     670             :                 int ref0, scale;
     671      369292 :                 const int16_t (*l1mv)[2] = l1mv0;
     672             : 
     673      369292 :                 if (is_b8x8 && !IS_DIRECT(sl->sub_mb_type[i8]))
     674       92460 :                     continue;
     675      276832 :                 sl->sub_mb_type[i8] = sub_mb_type;
     676      276832 :                 fill_rectangle(&sl->ref_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 1);
     677      276832 :                 if (IS_INTRA(mb_type_col[0])) {
     678       22293 :                     fill_rectangle(&sl->ref_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 1);
     679       22293 :                     fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 4);
     680       22293 :                     fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 4);
     681       22293 :                     continue;
     682             :                 }
     683             : 
     684             :                 assert(b8_stride == 2);
     685      254539 :                 ref0 = l1ref0[i8];
     686      254539 :                 if (ref0 >= 0)
     687      254519 :                     ref0 = map_col_to_list0[0][ref0 + ref_offset];
     688             :                 else {
     689          20 :                     ref0 = map_col_to_list0[1][l1ref1[i8] + ref_offset];
     690          20 :                     l1mv = l1mv1;
     691             :                 }
     692      254539 :                 scale = dist_scale_factor[ref0];
     693             : 
     694      254539 :                 fill_rectangle(&sl->ref_cache[0][scan8[i8 * 4]], 2, 2, 8,
     695             :                                ref0, 1);
     696      254539 :                 if (IS_SUB_8X8(sub_mb_type)) {
     697      229231 :                     const int16_t *mv_col = l1mv[x8 * 3 + y8 * 3 * b4_stride];
     698      229231 :                     int mx                = (scale * mv_col[0] + 128) >> 8;
     699      229231 :                     int my                = (scale * mv_col[1] + 128) >> 8;
     700      229231 :                     fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2, 8,
     701             :                                    pack16to32(mx, my), 4);
     702      458462 :                     fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2, 8,
     703      458462 :                                    pack16to32(mx - mv_col[0], my - mv_col[1]), 4);
     704             :                 } else {
     705      126540 :                     for (i4 = 0; i4 < 4; i4++) {
     706      202464 :                         const int16_t *mv_col = l1mv[x8 * 2 + (i4 & 1) +
     707      101232 :                                                      (y8 * 2 + (i4 >> 1)) * b4_stride];
     708      101232 :                         int16_t *mv_l0 = sl->mv_cache[0][scan8[i8 * 4 + i4]];
     709      101232 :                         mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
     710      101232 :                         mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
     711      101232 :                         AV_WN32A(sl->mv_cache[1][scan8[i8 * 4 + i4]],
     712             :                                  pack16to32(mv_l0[0] - mv_col[0],
     713             :                                             mv_l0[1] - mv_col[1]));
     714             :                     }
     715             :                 }
     716             :             }
     717             :         }
     718             :     }
     719             : }
     720             : 
     721     2243807 : void ff_h264_pred_direct_motion(const H264Context *const h, H264SliceContext *sl,
     722             :                                 int *mb_type)
     723             : {
     724     2243807 :     if (sl->direct_spatial_mv_pred)
     725     1991991 :         pred_spatial_direct_motion(h, sl, mb_type);
     726             :     else
     727      251816 :         pred_temp_direct_motion(h, sl, mb_type);
     728     2243807 : }

Generated by: LCOV version 1.13