LCOV - code coverage report
Current view: top level - libavcodec - motion_est.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 861 1001 86.0 %
Date: 2017-12-11 04:34:20 Functions: 27 33 81.8 %

          Line data    Source code
       1             : /*
       2             :  * Motion estimation
       3             :  * Copyright (c) 2000,2001 Fabrice Bellard
       4             :  * Copyright (c) 2002-2004 Michael Niedermayer
       5             :  *
       6             :  * new motion estimation (X1/EPZS) by Michael Niedermayer <michaelni@gmx.at>
       7             :  *
       8             :  * This file is part of FFmpeg.
       9             :  *
      10             :  * FFmpeg is free software; you can redistribute it and/or
      11             :  * modify it under the terms of the GNU Lesser General Public
      12             :  * License as published by the Free Software Foundation; either
      13             :  * version 2.1 of the License, or (at your option) any later version.
      14             :  *
      15             :  * FFmpeg is distributed in the hope that it will be useful,
      16             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      17             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      18             :  * Lesser General Public License for more details.
      19             :  *
      20             :  * You should have received a copy of the GNU Lesser General Public
      21             :  * License along with FFmpeg; if not, write to the Free Software
      22             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      23             :  */
      24             : 
      25             : /**
      26             :  * @file
      27             :  * Motion estimation.
      28             :  */
      29             : 
      30             : #include <stdlib.h>
      31             : #include <stdio.h>
      32             : #include <limits.h>
      33             : 
      34             : #include "avcodec.h"
      35             : #include "internal.h"
      36             : #include "mathops.h"
      37             : #include "motion_est.h"
      38             : #include "mpegutils.h"
      39             : #include "mpegvideo.h"
      40             : 
      41             : #define P_LEFT P[1]
      42             : #define P_TOP P[2]
      43             : #define P_TOPRIGHT P[3]
      44             : #define P_MEDIAN P[4]
      45             : #define P_MV1 P[9]
      46             : 
      47             : #define ME_MAP_SHIFT 3
      48             : #define ME_MAP_MV_BITS 11
      49             : 
      50             : static int sad_hpel_motion_search(MpegEncContext * s,
      51             :                                   int *mx_ptr, int *my_ptr, int dmin,
      52             :                                   int src_index, int ref_index,
      53             :                                   int size, int h);
      54             : 
      55     5671293 : static inline unsigned update_map_generation(MotionEstContext *c)
      56             : {
      57     5671293 :     c->map_generation+= 1<<(ME_MAP_MV_BITS*2);
      58     5671293 :     if(c->map_generation==0){
      59        5468 :         c->map_generation= 1<<(ME_MAP_MV_BITS*2);
      60        5468 :         memset(c->map, 0, sizeof(uint32_t)*ME_MAP_SIZE);
      61             :     }
      62     5671293 :     return c->map_generation;
      63             : }
      64             : 
      65             : /* shape adaptive search stuff */
      66             : typedef struct Minima{
      67             :     int height;
      68             :     int x, y;
      69             :     int checked;
      70             : }Minima;
      71             : 
      72           0 : static int minima_cmp(const void *a, const void *b){
      73           0 :     const Minima *da = (const Minima *) a;
      74           0 :     const Minima *db = (const Minima *) b;
      75             : 
      76           0 :     return da->height - db->height;
      77             : }
      78             : 
      79             : #define FLAG_QPEL   1 //must be 1
      80             : #define FLAG_CHROMA 2
      81             : #define FLAG_DIRECT 4
      82             : 
      83     2181726 : static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
      84     6545178 :     const int offset[3]= {
      85     2181726 :           y*c->  stride + x,
      86     2181726 :         ((y*c->uvstride + x)>>1),
      87     2181726 :         ((y*c->uvstride + x)>>1),
      88             :     };
      89             :     int i;
      90     8726904 :     for(i=0; i<3; i++){
      91     6545178 :         c->src[0][i]= src [i] + offset[i];
      92     6545178 :         c->ref[0][i]= ref [i] + offset[i];
      93             :     }
      94     2181726 :     if(ref_index){
      95     1633248 :         for(i=0; i<3; i++){
      96     1224936 :             c->ref[ref_index][i]= ref2[i] + offset[i];
      97             :         }
      98             :     }
      99     2181726 : }
     100             : 
     101       28458 : static int get_flags(MotionEstContext *c, int direct, int chroma){
     102       28458 :     return   ((c->avctx->flags&AV_CODEC_FLAG_QPEL) ? FLAG_QPEL : 0)
     103       28458 :            + (direct ? FLAG_DIRECT : 0)
     104       28458 :            + (chroma ? FLAG_CHROMA : 0);
     105             : }
     106             : 
     107     2666114 : static av_always_inline int cmp_direct_inline(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
     108             :                       const int size, const int h, int ref_index, int src_index,
     109             :                       me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, int qpel){
     110     2666114 :     MotionEstContext * const c= &s->me;
     111     2666114 :     const int stride= c->stride;
     112     2666114 :     const int hx= subx + (x<<(1+qpel));
     113     2666114 :     const int hy= suby + (y<<(1+qpel));
     114     2666114 :     uint8_t * const * const ref= c->ref[ref_index];
     115     2666114 :     uint8_t * const * const src= c->src[src_index];
     116             :     int d;
     117             :     //FIXME check chroma 4mv, (no crashes ...)
     118             :         av_assert2(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1));
     119     5332228 :         if(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1)){
     120     2666114 :             const int time_pp= s->pp_time;
     121     2666114 :             const int time_pb= s->pb_time;
     122     2666114 :             const int mask= 2*qpel+1;
     123     2666114 :             if(s->mv_type==MV_TYPE_8X8){
     124             :                 int i;
     125     3320665 :                 for(i=0; i<4; i++){
     126     2656532 :                     int fx = c->direct_basis_mv[i][0] + hx;
     127     2656532 :                     int fy = c->direct_basis_mv[i][1] + hy;
     128     2656532 :                     int bx = hx ? fx - c->co_located_mv[i][0] : c->co_located_mv[i][0]*(time_pb - time_pp)/time_pp + ((i &1)<<(qpel+4));
     129     2656532 :                     int by = hy ? fy - c->co_located_mv[i][1] : c->co_located_mv[i][1]*(time_pb - time_pp)/time_pp + ((i>>1)<<(qpel+4));
     130     2656532 :                     int fxy= (fx&mask) + ((fy&mask)<<(qpel+1));
     131     2656532 :                     int bxy= (bx&mask) + ((by&mask)<<(qpel+1));
     132             : 
     133     2656532 :                     uint8_t *dst= c->temp + 8*(i&1) + 8*stride*(i>>1);
     134     2656532 :                     if(qpel){
     135      961440 :                         c->qpel_put[1][fxy](dst, ref[0] + (fx>>2) + (fy>>2)*stride, stride);
     136      961440 :                         c->qpel_avg[1][bxy](dst, ref[8] + (bx>>2) + (by>>2)*stride, stride);
     137             :                     }else{
     138     1695092 :                         c->hpel_put[1][fxy](dst, ref[0] + (fx>>1) + (fy>>1)*stride, stride, 8);
     139     1695092 :                         c->hpel_avg[1][bxy](dst, ref[8] + (bx>>1) + (by>>1)*stride, stride, 8);
     140             :                     }
     141             :                 }
     142             :             }else{
     143     2001981 :                 int fx = c->direct_basis_mv[0][0] + hx;
     144     2001981 :                 int fy = c->direct_basis_mv[0][1] + hy;
     145     2001981 :                 int bx = hx ? fx - c->co_located_mv[0][0] : (c->co_located_mv[0][0]*(time_pb - time_pp)/time_pp);
     146     2001981 :                 int by = hy ? fy - c->co_located_mv[0][1] : (c->co_located_mv[0][1]*(time_pb - time_pp)/time_pp);
     147     2001981 :                 int fxy= (fx&mask) + ((fy&mask)<<(qpel+1));
     148     2001981 :                 int bxy= (bx&mask) + ((by&mask)<<(qpel+1));
     149             : 
     150     2001981 :                 if(qpel){
     151      433681 :                     c->qpel_put[1][fxy](c->temp               , ref[0] + (fx>>2) + (fy>>2)*stride               , stride);
     152      433681 :                     c->qpel_put[1][fxy](c->temp + 8           , ref[0] + (fx>>2) + (fy>>2)*stride + 8           , stride);
     153      433681 :                     c->qpel_put[1][fxy](c->temp     + 8*stride, ref[0] + (fx>>2) + (fy>>2)*stride     + 8*stride, stride);
     154      433681 :                     c->qpel_put[1][fxy](c->temp + 8 + 8*stride, ref[0] + (fx>>2) + (fy>>2)*stride + 8 + 8*stride, stride);
     155      433681 :                     c->qpel_avg[1][bxy](c->temp               , ref[8] + (bx>>2) + (by>>2)*stride               , stride);
     156      433681 :                     c->qpel_avg[1][bxy](c->temp + 8           , ref[8] + (bx>>2) + (by>>2)*stride + 8           , stride);
     157      433681 :                     c->qpel_avg[1][bxy](c->temp     + 8*stride, ref[8] + (bx>>2) + (by>>2)*stride     + 8*stride, stride);
     158      433681 :                     c->qpel_avg[1][bxy](c->temp + 8 + 8*stride, ref[8] + (bx>>2) + (by>>2)*stride + 8 + 8*stride, stride);
     159             :                 }else{
     160             :                     av_assert2((fx>>1) + 16*s->mb_x >= -16);
     161             :                     av_assert2((fy>>1) + 16*s->mb_y >= -16);
     162             :                     av_assert2((fx>>1) + 16*s->mb_x <= s->width);
     163             :                     av_assert2((fy>>1) + 16*s->mb_y <= s->height);
     164             :                     av_assert2((bx>>1) + 16*s->mb_x >= -16);
     165             :                     av_assert2((by>>1) + 16*s->mb_y >= -16);
     166             :                     av_assert2((bx>>1) + 16*s->mb_x <= s->width);
     167             :                     av_assert2((by>>1) + 16*s->mb_y <= s->height);
     168             : 
     169     1568300 :                     c->hpel_put[0][fxy](c->temp, ref[0] + (fx>>1) + (fy>>1)*stride, stride, 16);
     170     1568300 :                     c->hpel_avg[0][bxy](c->temp, ref[8] + (bx>>1) + (by>>1)*stride, stride, 16);
     171             :                 }
     172             :             }
     173     2666114 :             d = cmp_func(s, c->temp, src[0], stride, 16);
     174             :         }else
     175           0 :             d= 256*256*256*32;
     176     2666114 :     return d;
     177             : }
     178             : 
     179    56832647 : static av_always_inline int cmp_inline(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
     180             :                       const int size, const int h, int ref_index, int src_index,
     181             :                       me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, int qpel, int chroma){
     182    56832647 :     MotionEstContext * const c= &s->me;
     183    56832647 :     const int stride= c->stride;
     184    56832647 :     const int uvstride= c->uvstride;
     185    56832647 :     const int dxy= subx + (suby<<(1+qpel)); //FIXME log2_subpel?
     186    56832647 :     const int hx= subx + x*(1<<(1+qpel));
     187    56832647 :     const int hy= suby + y*(1<<(1+qpel));
     188    56832647 :     uint8_t * const * const ref= c->ref[ref_index];
     189    56832647 :     uint8_t * const * const src= c->src[src_index];
     190             :     int d;
     191             :     //FIXME check chroma 4mv, (no crashes ...)
     192             :         int uvdxy;              /* no, it might not be used uninitialized */
     193    56832647 :         if(dxy){
     194     7986392 :             if(qpel){
     195     6065882 :                 if (h << size == 16) {
     196     6065882 :                     c->qpel_put[size][dxy](c->temp, ref[0] + x + y*stride, stride); //FIXME prototype (add h)
     197           0 :                 } else if (size == 0 && h == 8) {
     198           0 :                     c->qpel_put[1][dxy](c->temp    , ref[0] + x + y*stride    , stride);
     199           0 :                     c->qpel_put[1][dxy](c->temp + 8, ref[0] + x + y*stride + 8, stride);
     200             :                 } else
     201             :                     av_assert2(0);
     202     6065882 :                 if(chroma){
     203           0 :                     int cx= hx/2;
     204           0 :                     int cy= hy/2;
     205           0 :                     cx= (cx>>1)|(cx&1);
     206           0 :                     cy= (cy>>1)|(cy&1);
     207           0 :                     uvdxy= (cx&1) + 2*(cy&1);
     208             :                     // FIXME x/y wrong, but MPEG-4 qpel is sick anyway, we should drop as much of it as possible in favor for H.264
     209             :                 }
     210             :             }else{
     211     1920510 :                 c->hpel_put[size][dxy](c->temp, ref[0] + x + y*stride, stride, h);
     212     1920510 :                 if(chroma)
     213           0 :                     uvdxy= dxy | (x&1) | (2*(y&1));
     214             :             }
     215     7986392 :             d = cmp_func(s, c->temp, src[0], stride, h);
     216             :         }else{
     217    48846255 :             d = cmp_func(s, src[0], ref[0] + x + y*stride, stride, h);
     218    48846255 :             if(chroma)
     219           0 :                 uvdxy= (x&1) + 2*(y&1);
     220             :         }
     221    56832647 :         if(chroma){
     222           0 :             uint8_t * const uvtemp= c->temp + 16*stride;
     223           0 :             c->hpel_put[size+1][uvdxy](uvtemp  , ref[1] + (x>>1) + (y>>1)*uvstride, uvstride, h>>1);
     224           0 :             c->hpel_put[size+1][uvdxy](uvtemp+8, ref[2] + (x>>1) + (y>>1)*uvstride, uvstride, h>>1);
     225           0 :             d += chroma_cmp_func(s, uvtemp  , src[1], uvstride, h>>1);
     226           0 :             d += chroma_cmp_func(s, uvtemp+8, src[2], uvstride, h>>1);
     227             :         }
     228    56832647 :     return d;
     229             : }
     230             : 
     231           0 : static int cmp_simple(MpegEncContext *s, const int x, const int y,
     232             :                       int ref_index, int src_index,
     233             :                       me_cmp_func cmp_func, me_cmp_func chroma_cmp_func){
     234           0 :     return cmp_inline(s,x,y,0,0,0,16,ref_index,src_index, cmp_func, chroma_cmp_func, 0, 0);
     235             : }
     236             : 
     237           0 : static int cmp_fpel_internal(MpegEncContext *s, const int x, const int y,
     238             :                       const int size, const int h, int ref_index, int src_index,
     239             :                       me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
     240           0 :     if(flags&FLAG_DIRECT){
     241           0 :         return cmp_direct_inline(s,x,y,0,0,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags&FLAG_QPEL);
     242             :     }else{
     243           0 :         return cmp_inline(s,x,y,0,0,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 0, flags&FLAG_CHROMA);
     244             :     }
     245             : }
     246             : 
     247    51288977 : static int cmp_internal(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
     248             :                       const int size, const int h, int ref_index, int src_index,
     249             :                       me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
     250    51288977 :     if(flags&FLAG_DIRECT){
     251     1794670 :         return cmp_direct_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags&FLAG_QPEL);
     252             :     }else{
     253    49494307 :         return cmp_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags&FLAG_QPEL, flags&FLAG_CHROMA);
     254             :     }
     255             : }
     256             : 
     257             : /** @brief compares a block (either a full macroblock or a partition thereof)
     258             :     against a proposed motion-compensated prediction of that block
     259             :  */
     260    51288977 : static av_always_inline int cmp(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
     261             :                       const int size, const int h, int ref_index, int src_index,
     262             :                       me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
     263             :     if(av_builtin_constant_p(flags) && av_builtin_constant_p(h) && av_builtin_constant_p(size)
     264             :        && av_builtin_constant_p(subx) && av_builtin_constant_p(suby)
     265             :        && flags==0 && h==16 && size==0 && subx==0 && suby==0){
     266             :         return cmp_simple(s,x,y,ref_index,src_index, cmp_func, chroma_cmp_func);
     267             :     }else if(av_builtin_constant_p(subx) && av_builtin_constant_p(suby)
     268             :        && subx==0 && suby==0){
     269             :         return cmp_fpel_internal(s,x,y,size,h,ref_index,src_index, cmp_func, chroma_cmp_func,flags);
     270             :     }else{
     271    51288977 :         return cmp_internal(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags);
     272             :     }
     273             : }
     274             : 
     275     2331640 : static int cmp_hpel(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
     276             :                       const int size, const int h, int ref_index, int src_index,
     277             :                       me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
     278     2331640 :     if(flags&FLAG_DIRECT){
     279      598036 :         return cmp_direct_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 0);
     280             :     }else{
     281     1733604 :         return cmp_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 0, flags&FLAG_CHROMA);
     282             :     }
     283             : }
     284             : 
     285     5878144 : static int cmp_qpel(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
     286             :                       const int size, const int h, int ref_index, int src_index,
     287             :                       me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
     288     5878144 :     if(flags&FLAG_DIRECT){
     289      273408 :         return cmp_direct_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 1);
     290             :     }else{
     291     5604736 :         return cmp_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 1, flags&FLAG_CHROMA);
     292             :     }
     293             : }
     294             : 
     295             : #include "motion_est_template.c"
     296             : 
     297           0 : static int zero_cmp(MpegEncContext *s, uint8_t *a, uint8_t *b,
     298             :                     ptrdiff_t stride, int h)
     299             : {
     300           0 :     return 0;
     301             : }
     302             : 
     303           0 : static void zero_hpel(uint8_t *a, const uint8_t *b, ptrdiff_t stride, int h){
     304           0 : }
     305             : 
     306        9486 : int ff_init_me(MpegEncContext *s){
     307        9486 :     MotionEstContext * const c= &s->me;
     308        9486 :     int cache_size= FFMIN(ME_MAP_SIZE>>ME_MAP_SHIFT, 1<<ME_MAP_SHIFT);
     309        9486 :     int dia_size= FFMAX(FFABS(s->avctx->dia_size)&255, FFABS(s->avctx->pre_dia_size)&255);
     310             : 
     311        9486 :     if(FFMIN(s->avctx->dia_size, s->avctx->pre_dia_size) < -FFMIN(ME_MAP_SIZE, MAX_SAB_SIZE)){
     312           0 :         av_log(s->avctx, AV_LOG_ERROR, "ME_MAP size is too small for SAB diamond\n");
     313           0 :         return -1;
     314             :     }
     315             : 
     316        9486 :     c->avctx= s->avctx;
     317             : 
     318        9486 :     if(s->codec_id == AV_CODEC_ID_H261)
     319         300 :         c->avctx->me_sub_cmp = c->avctx->me_cmp;
     320             : 
     321        9486 :     if(cache_size < 2*dia_size && !c->stride){
     322           0 :         av_log(s->avctx, AV_LOG_INFO, "ME_MAP size may be a little small for the selected diamond size\n");
     323             :     }
     324             : 
     325        9486 :     ff_set_cmp(&s->mecc, s->mecc.me_pre_cmp, c->avctx->me_pre_cmp);
     326        9486 :     ff_set_cmp(&s->mecc, s->mecc.me_cmp,     c->avctx->me_cmp);
     327        9486 :     ff_set_cmp(&s->mecc, s->mecc.me_sub_cmp, c->avctx->me_sub_cmp);
     328        9486 :     ff_set_cmp(&s->mecc, s->mecc.mb_cmp,     c->avctx->mb_cmp);
     329             : 
     330        9486 :     c->flags    = get_flags(c, 0, c->avctx->me_cmp    &FF_CMP_CHROMA);
     331        9486 :     c->sub_flags= get_flags(c, 0, c->avctx->me_sub_cmp&FF_CMP_CHROMA);
     332        9486 :     c->mb_flags = get_flags(c, 0, c->avctx->mb_cmp    &FF_CMP_CHROMA);
     333             : 
     334             : /*FIXME s->no_rounding b_type*/
     335        9486 :     if (s->avctx->flags & AV_CODEC_FLAG_QPEL) {
     336         528 :         c->sub_motion_search= qpel_motion_search;
     337         528 :         c->qpel_avg = s->qdsp.avg_qpel_pixels_tab;
     338         528 :         if (s->no_rounding)
     339          99 :             c->qpel_put = s->qdsp.put_no_rnd_qpel_pixels_tab;
     340             :         else
     341         429 :             c->qpel_put = s->qdsp.put_qpel_pixels_tab;
     342             :     }else{
     343        8958 :         if(c->avctx->me_sub_cmp&FF_CMP_CHROMA)
     344           0 :             c->sub_motion_search= hpel_motion_search;
     345        8958 :         else if(   c->avctx->me_sub_cmp == FF_CMP_SAD
     346        8023 :                 && c->avctx->    me_cmp == FF_CMP_SAD
     347        8023 :                 && c->avctx->    mb_cmp == FF_CMP_SAD)
     348        8023 :             c->sub_motion_search= sad_hpel_motion_search; // 2050 vs. 2450 cycles
     349             :         else
     350         935 :             c->sub_motion_search= hpel_motion_search;
     351             :     }
     352        9486 :     c->hpel_avg = s->hdsp.avg_pixels_tab;
     353        9486 :     if (s->no_rounding)
     354        1623 :         c->hpel_put = s->hdsp.put_no_rnd_pixels_tab;
     355             :     else
     356        7863 :         c->hpel_put = s->hdsp.put_pixels_tab;
     357             : 
     358        9486 :     if(s->linesize){
     359        9486 :         c->stride  = s->linesize;
     360        9486 :         c->uvstride= s->uvlinesize;
     361             :     }else{
     362           0 :         c->stride  = 16*s->mb_width + 32;
     363           0 :         c->uvstride=  8*s->mb_width + 16;
     364             :     }
     365             : 
     366             :     /* 8x8 fullpel search would need a 4x4 chroma compare, which we do
     367             :      * not have yet, and even if we had, the motion estimation code
     368             :      * does not expect it. */
     369        9486 :     if (s->codec_id != AV_CODEC_ID_SNOW) {
     370        9486 :         if ((c->avctx->me_cmp & FF_CMP_CHROMA) /* && !s->mecc.me_cmp[2] */)
     371           0 :             s->mecc.me_cmp[2] = zero_cmp;
     372        9486 :         if ((c->avctx->me_sub_cmp & FF_CMP_CHROMA) && !s->mecc.me_sub_cmp[2])
     373           0 :             s->mecc.me_sub_cmp[2] = zero_cmp;
     374       18972 :         c->hpel_put[2][0]= c->hpel_put[2][1]=
     375       18972 :         c->hpel_put[2][2]= c->hpel_put[2][3]= zero_hpel;
     376             :     }
     377             : 
     378        9486 :     if(s->codec_id == AV_CODEC_ID_H261){
     379         300 :         c->sub_motion_search= no_sub_motion_search;
     380             :     }
     381             : 
     382        9486 :     return 0;
     383             : }
     384             : 
     385             : #define CHECK_SAD_HALF_MV(suffix, x, y) \
     386             : {\
     387             :     d  = s->mecc.pix_abs[size][(x ? 1 : 0) + (y ? 2 : 0)](NULL, pix, ptr + ((x) >> 1), stride, h); \
     388             :     d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*penalty_factor;\
     389             :     COPY3_IF_LT(dminh, d, dx, x, dy, y)\
     390             : }
     391             : 
     392     4223910 : static int sad_hpel_motion_search(MpegEncContext * s,
     393             :                                   int *mx_ptr, int *my_ptr, int dmin,
     394             :                                   int src_index, int ref_index,
     395             :                                   int size, int h)
     396             : {
     397     4223910 :     MotionEstContext * const c= &s->me;
     398     4223910 :     const int penalty_factor= c->sub_penalty_factor;
     399             :     int mx, my, dminh;
     400             :     uint8_t *pix, *ptr;
     401     4223910 :     int stride= c->stride;
     402     4223910 :     LOAD_COMMON
     403             : 
     404             :     av_assert2(c->sub_flags == 0);
     405             : 
     406     4223910 :     if(c->skip){
     407       21434 :         *mx_ptr = 0;
     408       21434 :         *my_ptr = 0;
     409       21434 :         return dmin;
     410             :     }
     411             : 
     412     4202476 :     pix = c->src[src_index][0];
     413             : 
     414     4202476 :     mx = *mx_ptr;
     415     4202476 :     my = *my_ptr;
     416     4202476 :     ptr = c->ref[ref_index][0] + (my * stride) + mx;
     417             : 
     418     4202476 :     dminh = dmin;
     419             : 
     420     4202476 :     if (mx > xmin && mx < xmax &&
     421     7892015 :         my > ymin && my < ymax) {
     422     3907015 :         int dx=0, dy=0;
     423             :         int d, pen_x, pen_y;
     424     3907015 :         const int index= my*(1<<ME_MAP_SHIFT) + mx;
     425     3907015 :         const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
     426     3907015 :         const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)];
     427     3907015 :         const int r= score_map[(index+ 1               )&(ME_MAP_SIZE-1)];
     428     3907015 :         const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
     429     3907015 :         mx += mx;
     430     3907015 :         my += my;
     431             : 
     432             : 
     433     3907015 :         pen_x= pred_x + mx;
     434     3907015 :         pen_y= pred_y + my;
     435             : 
     436     3907015 :         ptr-= stride;
     437     3907015 :         if(t<=b){
     438     1978477 :             CHECK_SAD_HALF_MV(y2 , 0, -1)
     439     1978477 :             if(l<=r){
     440     1235568 :                 CHECK_SAD_HALF_MV(xy2, -1, -1)
     441     1235568 :                 if(t+r<=b+l){
     442      721666 :                     CHECK_SAD_HALF_MV(xy2, +1, -1)
     443      721666 :                     ptr+= stride;
     444             :                 }else{
     445      513902 :                     ptr+= stride;
     446      513902 :                     CHECK_SAD_HALF_MV(xy2, -1, +1)
     447             :                 }
     448     1235568 :                 CHECK_SAD_HALF_MV(x2 , -1,  0)
     449             :             }else{
     450      742909 :                 CHECK_SAD_HALF_MV(xy2, +1, -1)
     451      742909 :                 if(t+l<=b+r){
     452      409412 :                     CHECK_SAD_HALF_MV(xy2, -1, -1)
     453      409412 :                     ptr+= stride;
     454             :                 }else{
     455      333497 :                     ptr+= stride;
     456      333497 :                     CHECK_SAD_HALF_MV(xy2, +1, +1)
     457             :                 }
     458      742909 :                 CHECK_SAD_HALF_MV(x2 , +1,  0)
     459             :             }
     460             :         }else{
     461     1928538 :             if(l<=r){
     462      727070 :                 if(t+l<=b+r){
     463      305991 :                     CHECK_SAD_HALF_MV(xy2, -1, -1)
     464      305991 :                     ptr+= stride;
     465             :                 }else{
     466      421079 :                     ptr+= stride;
     467      421079 :                     CHECK_SAD_HALF_MV(xy2, +1, +1)
     468             :                 }
     469      727070 :                 CHECK_SAD_HALF_MV(x2 , -1,  0)
     470      727070 :                 CHECK_SAD_HALF_MV(xy2, -1, +1)
     471             :             }else{
     472     1201468 :                 if(t+r<=b+l){
     473      531444 :                     CHECK_SAD_HALF_MV(xy2, +1, -1)
     474      531444 :                     ptr+= stride;
     475             :                 }else{
     476      670024 :                     ptr+= stride;
     477      670024 :                     CHECK_SAD_HALF_MV(xy2, -1, +1)
     478             :                 }
     479     1201468 :                 CHECK_SAD_HALF_MV(x2 , +1,  0)
     480     1201468 :                 CHECK_SAD_HALF_MV(xy2, +1, +1)
     481             :             }
     482     1928538 :             CHECK_SAD_HALF_MV(y2 ,  0, +1)
     483             :         }
     484     3907015 :         mx+=dx;
     485     3907015 :         my+=dy;
     486             : 
     487             :     }else{
     488      295461 :         mx += mx;
     489      295461 :         my += my;
     490             :     }
     491             : 
     492     4202476 :     *mx_ptr = mx;
     493     4202476 :     *my_ptr = my;
     494     4202476 :     return dminh;
     495             : }
     496             : 
     497     1773414 : static inline void set_p_mv_tables(MpegEncContext * s, int mx, int my, int mv4)
     498             : {
     499     1773414 :     const int xy= s->mb_x + s->mb_y*s->mb_stride;
     500             : 
     501     1773414 :     s->p_mv_table[xy][0] = mx;
     502     1773414 :     s->p_mv_table[xy][1] = my;
     503             : 
     504             :     /* has already been set to the 4 MV if 4MV is done */
     505     1773414 :     if(mv4){
     506     1521583 :         int mot_xy= s->block_index[0];
     507             : 
     508     1521583 :         s->current_picture.motion_val[0][mot_xy    ][0] = mx;
     509     1521583 :         s->current_picture.motion_val[0][mot_xy    ][1] = my;
     510     1521583 :         s->current_picture.motion_val[0][mot_xy + 1][0] = mx;
     511     1521583 :         s->current_picture.motion_val[0][mot_xy + 1][1] = my;
     512             : 
     513     1521583 :         mot_xy += s->b8_stride;
     514     1521583 :         s->current_picture.motion_val[0][mot_xy    ][0] = mx;
     515     1521583 :         s->current_picture.motion_val[0][mot_xy    ][1] = my;
     516     1521583 :         s->current_picture.motion_val[0][mot_xy + 1][0] = mx;
     517     1521583 :         s->current_picture.motion_val[0][mot_xy + 1][1] = my;
     518             :     }
     519     1773414 : }
     520             : 
     521             : /**
     522             :  * get fullpel ME search limits.
     523             :  */
     524     3189529 : static inline void get_limits(MpegEncContext *s, int x, int y)
     525             : {
     526     3189529 :     MotionEstContext * const c= &s->me;
     527     3189529 :     int range= c->avctx->me_range >> (1 + !!(c->flags&FLAG_QPEL));
     528     3189529 :     int max_range = MAX_MV >> (1 + !!(c->flags&FLAG_QPEL));
     529             : /*
     530             :     if(c->avctx->me_range) c->range= c->avctx->me_range >> 1;
     531             :     else                   c->range= 16;
     532             : */
     533     3189529 :     if (s->unrestricted_mv) {
     534     1688914 :         c->xmin = - x - 16;
     535     1688914 :         c->ymin = - y - 16;
     536     1688914 :         c->xmax = - x + s->width;
     537     1688914 :         c->ymax = - y + s->height;
     538     1500615 :     } else if (s->out_format == FMT_H261){
     539             :         // Search range of H.261 is different from other codec standards
     540      106920 :         c->xmin = (x > 15) ? - 15 : 0;
     541      106920 :         c->ymin = (y > 15) ? - 15 : 0;
     542      106920 :         c->xmax = (x < s->mb_width * 16 - 16) ? 15 : 0;
     543      106920 :         c->ymax = (y < s->mb_height * 16 - 16) ? 15 : 0;
     544             :     } else {
     545     1393695 :         c->xmin = - x;
     546     1393695 :         c->ymin = - y;
     547     1393695 :         c->xmax = - x + s->mb_width *16 - 16;
     548     1393695 :         c->ymax = - y + s->mb_height*16 - 16;
     549             :     }
     550     3189529 :     if(!range || range > max_range)
     551     3189529 :         range = max_range;
     552     3189529 :     if(range){
     553     3189529 :         c->xmin = FFMAX(c->xmin,-range);
     554     3189529 :         c->xmax = FFMIN(c->xmax, range);
     555     3189529 :         c->ymin = FFMAX(c->ymin,-range);
     556     3189529 :         c->ymax = FFMIN(c->ymax, range);
     557             :     }
     558     3189529 : }
     559             : 
     560      251831 : static inline void init_mv4_ref(MotionEstContext *c){
     561      251831 :     const int stride= c->stride;
     562             : 
     563      251831 :     c->ref[1][0] = c->ref[0][0] + 8;
     564      251831 :     c->ref[2][0] = c->ref[0][0] + 8*stride;
     565      251831 :     c->ref[3][0] = c->ref[2][0] + 8;
     566      251831 :     c->src[1][0] = c->src[0][0] + 8;
     567      251831 :     c->src[2][0] = c->src[0][0] + 8*stride;
     568      251831 :     c->src[3][0] = c->src[2][0] + 8;
     569      251831 : }
     570             : 
     571      251831 : static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
     572             : {
     573      251831 :     MotionEstContext * const c= &s->me;
     574      251831 :     const int size= 1;
     575      251831 :     const int h=8;
     576             :     int block;
     577             :     int P[10][2];
     578      251831 :     int dmin_sum=0, mx4_sum=0, my4_sum=0, i;
     579      251831 :     int same=1;
     580      251831 :     const int stride= c->stride;
     581      251831 :     uint8_t *mv_penalty= c->current_mv_penalty;
     582      251831 :     int safety_clipping= s->unrestricted_mv && (s->width&15) && (s->height&15);
     583             : 
     584      251831 :     init_mv4_ref(c);
     585             : 
     586     1259155 :     for(block=0; block<4; block++){
     587             :         int mx4, my4;
     588             :         int pred_x4, pred_y4;
     589             :         int dmin4;
     590             :         static const int off[4]= {2, 1, 1, -1};
     591     1007324 :         const int mot_stride = s->b8_stride;
     592     1007324 :         const int mot_xy = s->block_index[block];
     593             : 
     594     1007324 :         if(safety_clipping){
     595        8240 :             c->xmax = - 16*s->mb_x + s->width  - 8*(block &1);
     596        8240 :             c->ymax = - 16*s->mb_y + s->height - 8*(block>>1);
     597             :         }
     598             : 
     599     1007324 :         P_LEFT[0] = s->current_picture.motion_val[0][mot_xy - 1][0];
     600     1007324 :         P_LEFT[1] = s->current_picture.motion_val[0][mot_xy - 1][1];
     601             : 
     602     1007324 :         if(P_LEFT[0]       > (c->xmax<<shift)) P_LEFT[0]       = (c->xmax<<shift);
     603             : 
     604             :         /* special case for first line */
     605     1007324 :         if (s->first_slice_line && block<2) {
     606       31178 :             c->pred_x= pred_x4= P_LEFT[0];
     607       31178 :             c->pred_y= pred_y4= P_LEFT[1];
     608             :         } else {
     609      976146 :             P_TOP[0]      = s->current_picture.motion_val[0][mot_xy - mot_stride             ][0];
     610      976146 :             P_TOP[1]      = s->current_picture.motion_val[0][mot_xy - mot_stride             ][1];
     611      976146 :             P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][0];
     612      976146 :             P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][1];
     613      976146 :             if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
     614      976146 :             if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
     615      976146 :             if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift);
     616      976146 :             if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
     617             : 
     618      976146 :             P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
     619      976146 :             P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
     620             : 
     621      976146 :             c->pred_x= pred_x4 = P_MEDIAN[0];
     622      976146 :             c->pred_y= pred_y4 = P_MEDIAN[1];
     623             :         }
     624     1007324 :         P_MV1[0]= mx;
     625     1007324 :         P_MV1[1]= my;
     626     1007324 :         if(safety_clipping)
     627       82400 :             for(i=1; i<10; i++){
     628       74160 :                 if (s->first_slice_line && block<2 && i>1 && i<9)
     629       10234 :                     continue;
     630       63926 :                 if (i>4 && i<9)
     631       27112 :                     continue;
     632       36814 :                 if(P[i][0] > (c->xmax<<shift)) P[i][0]= (c->xmax<<shift);
     633       36814 :                 if(P[i][1] > (c->ymax<<shift)) P[i][1]= (c->ymax<<shift);
     634             :             }
     635             : 
     636     1007324 :         dmin4 = epzs_motion_search4(s, &mx4, &my4, P, block, block, s->p_mv_table, (1<<16)>>shift);
     637             : 
     638     1007324 :         dmin4= c->sub_motion_search(s, &mx4, &my4, dmin4, block, block, size, h);
     639             : 
     640     1007324 :         if (s->mecc.me_sub_cmp[0] != s->mecc.mb_cmp[0]) {
     641             :             int dxy;
     642      167784 :             const int offset= ((block&1) + (block>>1)*stride)*8;
     643      167784 :             uint8_t *dest_y = c->scratchpad + offset;
     644      167784 :             if(s->quarter_sample){
     645       56744 :                 uint8_t *ref= c->ref[block][0] + (mx4>>2) + (my4>>2)*stride;
     646       56744 :                 dxy = ((my4 & 3) << 2) | (mx4 & 3);
     647             : 
     648       56744 :                 if(s->no_rounding)
     649       39304 :                     s->qdsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y, ref, stride);
     650             :                 else
     651       17440 :                     s->qdsp.put_qpel_pixels_tab[1][dxy](dest_y, ref, stride);
     652             :             }else{
     653      111040 :                 uint8_t *ref= c->ref[block][0] + (mx4>>1) + (my4>>1)*stride;
     654      111040 :                 dxy = ((my4 & 1) << 1) | (mx4 & 1);
     655             : 
     656      111040 :                 if(s->no_rounding)
     657       77264 :                     s->hdsp.put_no_rnd_pixels_tab[1][dxy](dest_y    , ref    , stride, h);
     658             :                 else
     659       33776 :                     s->hdsp.put_pixels_tab       [1][dxy](dest_y    , ref    , stride, h);
     660             :             }
     661      167784 :             dmin_sum+= (mv_penalty[mx4-pred_x4] + mv_penalty[my4-pred_y4])*c->mb_penalty_factor;
     662             :         }else
     663      839540 :             dmin_sum+= dmin4;
     664             : 
     665     1007324 :         if(s->quarter_sample){
     666       56744 :             mx4_sum+= mx4/2;
     667       56744 :             my4_sum+= my4/2;
     668             :         }else{
     669      950580 :             mx4_sum+= mx4;
     670      950580 :             my4_sum+= my4;
     671             :         }
     672             : 
     673     1007324 :         s->current_picture.motion_val[0][s->block_index[block]][0] = mx4;
     674     1007324 :         s->current_picture.motion_val[0][s->block_index[block]][1] = my4;
     675             : 
     676     1007324 :         if(mx4 != mx || my4 != my) same=0;
     677             :     }
     678             : 
     679      251831 :     if(same)
     680       41713 :         return INT_MAX;
     681             : 
     682      210118 :     if (s->mecc.me_sub_cmp[0] != s->mecc.mb_cmp[0]) {
     683       83034 :         dmin_sum += s->mecc.mb_cmp[0](s,
     684       41517 :                                       s->new_picture.f->data[0] +
     685       41517 :                                       s->mb_x * 16 + s->mb_y * 16 * stride,
     686             :                                       c->scratchpad, stride, 16);
     687             :     }
     688             : 
     689      210118 :     if(c->avctx->mb_cmp&FF_CMP_CHROMA){
     690             :         int dxy;
     691             :         int mx, my;
     692             :         int offset;
     693             : 
     694           0 :         mx= ff_h263_round_chroma(mx4_sum);
     695           0 :         my= ff_h263_round_chroma(my4_sum);
     696           0 :         dxy = ((my & 1) << 1) | (mx & 1);
     697             : 
     698           0 :         offset= (s->mb_x*8 + (mx>>1)) + (s->mb_y*8 + (my>>1))*s->uvlinesize;
     699             : 
     700           0 :         if(s->no_rounding){
     701           0 :             s->hdsp.put_no_rnd_pixels_tab[1][dxy](c->scratchpad    , s->last_picture.f->data[1] + offset, s->uvlinesize, 8);
     702           0 :             s->hdsp.put_no_rnd_pixels_tab[1][dxy](c->scratchpad + 8, s->last_picture.f->data[2] + offset, s->uvlinesize, 8);
     703             :         }else{
     704           0 :             s->hdsp.put_pixels_tab       [1][dxy](c->scratchpad    , s->last_picture.f->data[1] + offset, s->uvlinesize, 8);
     705           0 :             s->hdsp.put_pixels_tab       [1][dxy](c->scratchpad + 8, s->last_picture.f->data[2] + offset, s->uvlinesize, 8);
     706             :         }
     707             : 
     708           0 :         dmin_sum += s->mecc.mb_cmp[1](s, s->new_picture.f->data[1] + s->mb_x * 8 + s->mb_y * 8 * s->uvlinesize, c->scratchpad,     s->uvlinesize, 8);
     709           0 :         dmin_sum += s->mecc.mb_cmp[1](s, s->new_picture.f->data[2] + s->mb_x * 8 + s->mb_y * 8 * s->uvlinesize, c->scratchpad + 8, s->uvlinesize, 8);
     710             :     }
     711             : 
     712      210118 :     c->pred_x= mx;
     713      210118 :     c->pred_y= my;
     714             : 
     715      210118 :     switch(c->avctx->mb_cmp&0xFF){
     716             :     /*case FF_CMP_SSE:
     717             :         return dmin_sum+ 32*s->qscale*s->qscale;*/
     718           0 :     case FF_CMP_RD:
     719           0 :         return dmin_sum;
     720      210118 :     default:
     721      210118 :         return dmin_sum+ 11*c->mb_penalty_factor;
     722             :     }
     723             : }
     724             : 
     725      331233 : static inline void init_interlaced_ref(MpegEncContext *s, int ref_index){
     726      331233 :     MotionEstContext * const c= &s->me;
     727             : 
     728      331233 :     c->ref[1+ref_index][0] = c->ref[0+ref_index][0] + s->linesize;
     729      331233 :     c->src[1][0] = c->src[0][0] + s->linesize;
     730      331233 :     if(c->flags & FLAG_CHROMA){
     731           0 :         c->ref[1+ref_index][1] = c->ref[0+ref_index][1] + s->uvlinesize;
     732           0 :         c->ref[1+ref_index][2] = c->ref[0+ref_index][2] + s->uvlinesize;
     733           0 :         c->src[1][1] = c->src[0][1] + s->uvlinesize;
     734           0 :         c->src[1][2] = c->src[0][2] + s->uvlinesize;
     735             :     }
     736      331233 : }
     737             : 
     738      331233 : static int interlaced_search(MpegEncContext *s, int ref_index,
     739             :                              int16_t (*mv_tables[2][2])[2], uint8_t *field_select_tables[2], int mx, int my, int user_field_select)
     740             : {
     741      331233 :     MotionEstContext * const c= &s->me;
     742      331233 :     const int size=0;
     743      331233 :     const int h=8;
     744             :     int block;
     745             :     int P[10][2];
     746      331233 :     uint8_t * const mv_penalty= c->current_mv_penalty;
     747      331233 :     int same=1;
     748      331233 :     const int stride= 2*s->linesize;
     749      331233 :     int dmin_sum= 0;
     750      331233 :     const int mot_stride= s->mb_stride;
     751      331233 :     const int xy= s->mb_x + s->mb_y*mot_stride;
     752             : 
     753      331233 :     c->ymin>>=1;
     754      331233 :     c->ymax>>=1;
     755      331233 :     c->stride<<=1;
     756      331233 :     c->uvstride<<=1;
     757      331233 :     init_interlaced_ref(s, ref_index);
     758             : 
     759      993699 :     for(block=0; block<2; block++){
     760             :         int field_select;
     761      662466 :         int best_dmin= INT_MAX;
     762      662466 :         int best_field= -1;
     763             : 
     764     1987398 :         for(field_select=0; field_select<2; field_select++){
     765             :             int dmin, mx_i, my_i;
     766     1324932 :             int16_t (*mv_table)[2]= mv_tables[block][field_select];
     767             : 
     768     1324932 :             if(user_field_select){
     769             :                 av_assert1(field_select==0 || field_select==1);
     770             :                 av_assert1(field_select_tables[block][xy]==0 || field_select_tables[block][xy]==1);
     771           0 :                 if(field_select_tables[block][xy] != field_select)
     772           0 :                     continue;
     773             :             }
     774             : 
     775     1324932 :             P_LEFT[0] = mv_table[xy - 1][0];
     776     1324932 :             P_LEFT[1] = mv_table[xy - 1][1];
     777     1324932 :             if(P_LEFT[0]       > (c->xmax<<1)) P_LEFT[0]       = (c->xmax<<1);
     778             : 
     779     1324932 :             c->pred_x= P_LEFT[0];
     780     1324932 :             c->pred_y= P_LEFT[1];
     781             : 
     782     1324932 :             if(!s->first_slice_line){
     783     1206192 :                 P_TOP[0]      = mv_table[xy - mot_stride][0];
     784     1206192 :                 P_TOP[1]      = mv_table[xy - mot_stride][1];
     785     1206192 :                 P_TOPRIGHT[0] = mv_table[xy - mot_stride + 1][0];
     786     1206192 :                 P_TOPRIGHT[1] = mv_table[xy - mot_stride + 1][1];
     787     1206192 :                 if(P_TOP[1]      > (c->ymax<<1)) P_TOP[1]     = (c->ymax<<1);
     788     1206192 :                 if(P_TOPRIGHT[0] < (c->xmin<<1)) P_TOPRIGHT[0]= (c->xmin<<1);
     789     1206192 :                 if(P_TOPRIGHT[0] > (c->xmax<<1)) P_TOPRIGHT[0]= (c->xmax<<1);
     790     1206192 :                 if(P_TOPRIGHT[1] > (c->ymax<<1)) P_TOPRIGHT[1]= (c->ymax<<1);
     791             : 
     792     1206192 :                 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
     793     1206192 :                 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
     794             :             }
     795     1324932 :             P_MV1[0]= mx; //FIXME not correct if block != field_select
     796     1324932 :             P_MV1[1]= my / 2;
     797             : 
     798     1324932 :             dmin = epzs_motion_search2(s, &mx_i, &my_i, P, block, field_select+ref_index, mv_table, (1<<16)>>1);
     799             : 
     800     1324932 :             dmin= c->sub_motion_search(s, &mx_i, &my_i, dmin, block, field_select+ref_index, size, h);
     801             : 
     802     1324932 :             mv_table[xy][0]= mx_i;
     803     1324932 :             mv_table[xy][1]= my_i;
     804             : 
     805     1324932 :             if (s->mecc.me_sub_cmp[0] != s->mecc.mb_cmp[0]) {
     806             :                 int dxy;
     807             : 
     808             :                 //FIXME chroma ME
     809           0 :                 uint8_t *ref= c->ref[field_select+ref_index][0] + (mx_i>>1) + (my_i>>1)*stride;
     810           0 :                 dxy = ((my_i & 1) << 1) | (mx_i & 1);
     811             : 
     812           0 :                 if(s->no_rounding){
     813           0 :                     s->hdsp.put_no_rnd_pixels_tab[size][dxy](c->scratchpad, ref    , stride, h);
     814             :                 }else{
     815           0 :                     s->hdsp.put_pixels_tab       [size][dxy](c->scratchpad, ref    , stride, h);
     816             :                 }
     817           0 :                 dmin = s->mecc.mb_cmp[size](s, c->src[block][0], c->scratchpad, stride, h);
     818           0 :                 dmin+= (mv_penalty[mx_i-c->pred_x] + mv_penalty[my_i-c->pred_y] + 1)*c->mb_penalty_factor;
     819             :             }else
     820     1324932 :                 dmin+= c->mb_penalty_factor; //field_select bits
     821             : 
     822     1324932 :             dmin += field_select != block; //slightly prefer same field
     823             : 
     824     1324932 :             if(dmin < best_dmin){
     825      994986 :                 best_dmin= dmin;
     826      994986 :                 best_field= field_select;
     827             :             }
     828             :         }
     829             :         {
     830      662466 :             int16_t (*mv_table)[2]= mv_tables[block][best_field];
     831             : 
     832      662466 :             if(mv_table[xy][0] != mx) same=0; //FIXME check if these checks work and are any good at all
     833      662466 :             if(mv_table[xy][1]&1) same=0;
     834      662466 :             if(mv_table[xy][1]*2 != my) same=0;
     835      662466 :             if(best_field != block) same=0;
     836             :         }
     837             : 
     838      662466 :         field_select_tables[block][xy]= best_field;
     839      662466 :         dmin_sum += best_dmin;
     840             :     }
     841             : 
     842      331233 :     c->ymin<<=1;
     843      331233 :     c->ymax<<=1;
     844      331233 :     c->stride>>=1;
     845      331233 :     c->uvstride>>=1;
     846             : 
     847      331233 :     if(same)
     848       53962 :         return INT_MAX;
     849             : 
     850      277271 :     switch(c->avctx->mb_cmp&0xFF){
     851             :     /*case FF_CMP_SSE:
     852             :         return dmin_sum+ 32*s->qscale*s->qscale;*/
     853           0 :     case FF_CMP_RD:
     854           0 :         return dmin_sum;
     855      277271 :     default:
     856      277271 :         return dmin_sum+ 11*c->mb_penalty_factor;
     857             :     }
     858             : }
     859             : 
     860     7769406 : static inline int get_penalty_factor(int lambda, int lambda2, int type){
     861     7769406 :     switch(type&0xFF){
     862     6870571 :     default:
     863             :     case FF_CMP_SAD:
     864     6870571 :         return lambda>>FF_LAMBDA_SHIFT;
     865           0 :     case FF_CMP_DCT:
     866           0 :         return (3*lambda)>>(FF_LAMBDA_SHIFT+1);
     867           0 :     case FF_CMP_W53:
     868           0 :         return (4*lambda)>>(FF_LAMBDA_SHIFT);
     869           0 :     case FF_CMP_W97:
     870           0 :         return (2*lambda)>>(FF_LAMBDA_SHIFT);
     871      552905 :     case FF_CMP_SATD:
     872             :     case FF_CMP_DCT264:
     873      552905 :         return (2*lambda)>>FF_LAMBDA_SHIFT;
     874      345930 :     case FF_CMP_RD:
     875             :     case FF_CMP_PSNR:
     876             :     case FF_CMP_SSE:
     877             :     case FF_CMP_NSSE:
     878      345930 :         return lambda2>>FF_LAMBDA_SHIFT;
     879           0 :     case FF_CMP_BIT:
     880             :     case FF_CMP_MEDIAN_SAD:
     881           0 :         return 1;
     882             :     }
     883             : }
     884             : 
     885     1773414 : void ff_estimate_p_frame_motion(MpegEncContext * s,
     886             :                                 int mb_x, int mb_y)
     887             : {
     888     1773414 :     MotionEstContext * const c= &s->me;
     889             :     uint8_t *pix, *ppix;
     890     1773414 :     int sum, mx = 0, my = 0, dmin = 0;
     891             :     int varc;            ///< the variance of the block (sum of squared (p[y][x]-average))
     892             :     int vard;            ///< sum of squared differences with the estimated motion vector
     893             :     int P[10][2];
     894     1773414 :     const int shift= 1+s->quarter_sample;
     895     1773414 :     int mb_type=0;
     896     1773414 :     Picture * const pic= &s->current_picture;
     897             : 
     898     1773414 :     init_ref(c, s->new_picture.f->data, s->last_picture.f->data, NULL, 16*mb_x, 16*mb_y, 0);
     899             : 
     900     1773414 :     av_assert0(s->quarter_sample==0 || s->quarter_sample==1);
     901     1773414 :     av_assert0(s->linesize == c->stride);
     902     1773414 :     av_assert0(s->uvlinesize == c->uvstride);
     903             : 
     904     1773414 :     c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
     905     1773414 :     c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
     906     1773414 :     c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
     907     1773414 :     c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_DMV;
     908             : 
     909     1773414 :     get_limits(s, 16*mb_x, 16*mb_y);
     910     1773414 :     c->skip=0;
     911             : 
     912             :     /* intra / predictive decision */
     913     1773414 :     pix = c->src[0][0];
     914     1773414 :     sum  = s->mpvencdsp.pix_sum(pix, s->linesize);
     915     5320242 :     varc = s->mpvencdsp.pix_norm1(pix, s->linesize) -
     916     3546828 :            (((unsigned) sum * sum) >> 8) + 500;
     917             : 
     918     1773414 :     pic->mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
     919     1773414 :     pic->mb_var [s->mb_stride * mb_y + mb_x] = (varc+128)>>8;
     920     1773414 :     c->mb_var_sum_temp += (varc+128)>>8;
     921             : 
     922     1773414 :     if (s->motion_est != FF_ME_ZERO) {
     923     1773414 :         const int mot_stride = s->b8_stride;
     924     1773414 :         const int mot_xy = s->block_index[0];
     925             : 
     926     1773414 :         P_LEFT[0] = s->current_picture.motion_val[0][mot_xy - 1][0];
     927     1773414 :         P_LEFT[1] = s->current_picture.motion_val[0][mot_xy - 1][1];
     928             : 
     929     1773414 :         if (P_LEFT[0] > (c->xmax << shift))
     930       19208 :             P_LEFT[0] =  c->xmax << shift;
     931             : 
     932     1773414 :         if (!s->first_slice_line) {
     933     1670811 :             P_TOP[0]      = s->current_picture.motion_val[0][mot_xy - mot_stride    ][0];
     934     1670811 :             P_TOP[1]      = s->current_picture.motion_val[0][mot_xy - mot_stride    ][1];
     935     1670811 :             P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][0];
     936     1670811 :             P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][1];
     937     1670811 :             if (P_TOP[1] > (c->ymax << shift))
     938       21656 :                 P_TOP[1] =  c->ymax << shift;
     939     1670811 :             if (P_TOPRIGHT[0] < (c->xmin * (1 << shift)))
     940       21298 :                 P_TOPRIGHT[0] =  c->xmin * (1 << shift);
     941     1670811 :             if (P_TOPRIGHT[1] > (c->ymax * (1 << shift)))
     942       19888 :                 P_TOPRIGHT[1] =  c->ymax * (1 << shift);
     943             : 
     944     1670811 :             P_MEDIAN[0] = mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
     945     1670811 :             P_MEDIAN[1] = mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
     946             : 
     947     1670811 :             if (s->out_format == FMT_H263) {
     948     1078981 :                 c->pred_x = P_MEDIAN[0];
     949     1078981 :                 c->pred_y = P_MEDIAN[1];
     950             :             } else { /* MPEG-1 at least */
     951      591830 :                 c->pred_x = P_LEFT[0];
     952      591830 :                 c->pred_y = P_LEFT[1];
     953             :             }
     954             :         } else {
     955      102603 :             c->pred_x = P_LEFT[0];
     956      102603 :             c->pred_y = P_LEFT[1];
     957             :         }
     958     1773414 :         dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift, 0, 16);
     959             :     }
     960             : 
     961             :     /* At this point (mx,my) are full-pell and the relative displacement */
     962     1773414 :     ppix = c->ref[0][0] + (my * s->linesize) + mx;
     963             : 
     964     1773414 :     vard = s->mecc.sse[0](NULL, pix, ppix, s->linesize, 16);
     965             : 
     966     1773414 :     pic->mc_mb_var[s->mb_stride * mb_y + mb_x] = (vard+128)>>8;
     967     1773414 :     c->mc_mb_var_sum_temp += (vard+128)>>8;
     968             : 
     969     1773414 :     if (c->avctx->mb_decision > FF_MB_DECISION_SIMPLE) {
     970      362361 :         int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
     971      362361 :         int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
     972      362361 :         c->scene_change_score+= ff_sqrt(p_score) - ff_sqrt(i_score);
     973             : 
     974      362361 :         if (vard*2 + 200*256 > varc)
     975       97665 :             mb_type|= CANDIDATE_MB_TYPE_INTRA;
     976      362361 :         if (varc*2 + 200*256 > vard || s->qscale > 24){
     977             : //        if (varc*2 + 200*256 + 50*(s->lambda2>>FF_LAMBDA_SHIFT) > vard){
     978      360249 :             mb_type|= CANDIDATE_MB_TYPE_INTER;
     979      360249 :             c->sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
     980      720498 :             if (s->mpv_flags & FF_MPV_FLAG_MV0)
     981       61151 :                 if(mx || my)
     982       60625 :                     mb_type |= CANDIDATE_MB_TYPE_SKIPPED; //FIXME check difference
     983             :         }else{
     984        2112 :             mx <<=shift;
     985        2112 :             my <<=shift;
     986             :         }
     987      362361 :         if ((s->avctx->flags & AV_CODEC_FLAG_4MV)
     988      277722 :            && !c->skip && varc>50<<8 && vard>10<<8){
     989      251831 :             if(h263_mv4_search(s, mx, my, shift) < INT_MAX)
     990      210118 :                 mb_type|=CANDIDATE_MB_TYPE_INTER4V;
     991             : 
     992      251831 :             set_p_mv_tables(s, mx, my, 0);
     993             :         }else
     994      110530 :             set_p_mv_tables(s, mx, my, 1);
     995      362361 :         if ((s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME)
     996       15600 :            && !c->skip){ //FIXME varc/d checks
     997       15600 :             if(interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 0) < INT_MAX)
     998       13629 :                 mb_type |= CANDIDATE_MB_TYPE_INTER_I;
     999             :         }
    1000             :     }else{
    1001             :         int intra_score, i;
    1002     1411053 :         mb_type= CANDIDATE_MB_TYPE_INTER;
    1003             : 
    1004     1411053 :         dmin= c->sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
    1005     1411053 :         if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip)
    1006           0 :             dmin= get_mb_score(s, mx, my, 0, 0, 0, 16, 1);
    1007             : 
    1008     1411053 :         if ((s->avctx->flags & AV_CODEC_FLAG_4MV)
    1009           0 :            && !c->skip && varc>50<<8 && vard>10<<8){
    1010           0 :             int dmin4= h263_mv4_search(s, mx, my, shift);
    1011           0 :             if(dmin4 < dmin){
    1012           0 :                 mb_type= CANDIDATE_MB_TYPE_INTER4V;
    1013           0 :                 dmin=dmin4;
    1014             :             }
    1015             :         }
    1016     1411053 :         if ((s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME)
    1017       85236 :            && !c->skip){ //FIXME varc/d checks
    1018       85233 :             int dmin_i= interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 0);
    1019       85233 :             if(dmin_i < dmin){
    1020       11208 :                 mb_type = CANDIDATE_MB_TYPE_INTER_I;
    1021       11208 :                 dmin= dmin_i;
    1022             :             }
    1023             :         }
    1024             : 
    1025     1411053 :         set_p_mv_tables(s, mx, my, mb_type!=CANDIDATE_MB_TYPE_INTER4V);
    1026             : 
    1027             :         /* get intra luma score */
    1028     1411053 :         if((c->avctx->mb_cmp&0xFF)==FF_CMP_SSE){
    1029           0 :             intra_score= varc - 500;
    1030             :         }else{
    1031     1411053 :             unsigned mean = (sum+128)>>8;
    1032     1411053 :             mean*= 0x01010101;
    1033             : 
    1034    23987901 :             for(i=0; i<16; i++){
    1035    22576848 :                 *(uint32_t*)(&c->scratchpad[i*s->linesize+ 0]) = mean;
    1036    22576848 :                 *(uint32_t*)(&c->scratchpad[i*s->linesize+ 4]) = mean;
    1037    22576848 :                 *(uint32_t*)(&c->scratchpad[i*s->linesize+ 8]) = mean;
    1038    22576848 :                 *(uint32_t*)(&c->scratchpad[i*s->linesize+12]) = mean;
    1039             :             }
    1040             : 
    1041     1411053 :             intra_score= s->mecc.mb_cmp[0](s, c->scratchpad, pix, s->linesize, 16);
    1042             :         }
    1043     1411053 :         intra_score += c->mb_penalty_factor*16;
    1044             : 
    1045     1411053 :         if(intra_score < dmin){
    1046       70770 :             mb_type= CANDIDATE_MB_TYPE_INTRA;
    1047       70770 :             s->current_picture.mb_type[mb_y*s->mb_stride + mb_x] = CANDIDATE_MB_TYPE_INTRA; //FIXME cleanup
    1048             :         }else
    1049     1340283 :             s->current_picture.mb_type[mb_y*s->mb_stride + mb_x] = 0;
    1050             : 
    1051             :         {
    1052     1411053 :             int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
    1053     1411053 :             int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
    1054     1411053 :             c->scene_change_score+= ff_sqrt(p_score) - ff_sqrt(i_score);
    1055             :         }
    1056             :     }
    1057             : 
    1058     1773414 :     s->mb_type[mb_y*s->mb_stride + mb_x]= mb_type;
    1059     1773414 : }
    1060             : 
    1061           0 : int ff_pre_estimate_p_frame_motion(MpegEncContext * s,
    1062             :                                     int mb_x, int mb_y)
    1063             : {
    1064           0 :     MotionEstContext * const c= &s->me;
    1065             :     int mx, my, dmin;
    1066             :     int P[10][2];
    1067           0 :     const int shift= 1+s->quarter_sample;
    1068           0 :     const int xy= mb_x + mb_y*s->mb_stride;
    1069           0 :     init_ref(c, s->new_picture.f->data, s->last_picture.f->data, NULL, 16*mb_x, 16*mb_y, 0);
    1070             : 
    1071           0 :     av_assert0(s->quarter_sample==0 || s->quarter_sample==1);
    1072             : 
    1073           0 :     c->pre_penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_pre_cmp);
    1074           0 :     c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_DMV;
    1075             : 
    1076           0 :     get_limits(s, 16*mb_x, 16*mb_y);
    1077           0 :     c->skip=0;
    1078             : 
    1079           0 :     P_LEFT[0]       = s->p_mv_table[xy + 1][0];
    1080           0 :     P_LEFT[1]       = s->p_mv_table[xy + 1][1];
    1081             : 
    1082           0 :     if(P_LEFT[0]       < (c->xmin<<shift)) P_LEFT[0]       = (c->xmin<<shift);
    1083             : 
    1084             :     /* special case for first line */
    1085           0 :     if (s->first_slice_line) {
    1086           0 :         c->pred_x= P_LEFT[0];
    1087           0 :         c->pred_y= P_LEFT[1];
    1088           0 :         P_TOP[0]= P_TOPRIGHT[0]= P_MEDIAN[0]=
    1089           0 :         P_TOP[1]= P_TOPRIGHT[1]= P_MEDIAN[1]= 0; //FIXME
    1090             :     } else {
    1091           0 :         P_TOP[0]      = s->p_mv_table[xy + s->mb_stride    ][0];
    1092           0 :         P_TOP[1]      = s->p_mv_table[xy + s->mb_stride    ][1];
    1093           0 :         P_TOPRIGHT[0] = s->p_mv_table[xy + s->mb_stride - 1][0];
    1094           0 :         P_TOPRIGHT[1] = s->p_mv_table[xy + s->mb_stride - 1][1];
    1095           0 :         if(P_TOP[1]      < (c->ymin<<shift)) P_TOP[1]     = (c->ymin<<shift);
    1096           0 :         if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift);
    1097           0 :         if(P_TOPRIGHT[1] < (c->ymin<<shift)) P_TOPRIGHT[1]= (c->ymin<<shift);
    1098             : 
    1099           0 :         P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
    1100           0 :         P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
    1101             : 
    1102           0 :         c->pred_x = P_MEDIAN[0];
    1103           0 :         c->pred_y = P_MEDIAN[1];
    1104             :     }
    1105             : 
    1106           0 :     dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift, 0, 16);
    1107             : 
    1108           0 :     s->p_mv_table[xy][0] = mx<<shift;
    1109           0 :     s->p_mv_table[xy][1] = my<<shift;
    1110             : 
    1111           0 :     return dmin;
    1112             : }
    1113             : 
    1114      816388 : static int estimate_motion_b(MpegEncContext *s, int mb_x, int mb_y,
    1115             :                              int16_t (*mv_table)[2], int ref_index, int f_code)
    1116             : {
    1117      816388 :     MotionEstContext * const c= &s->me;
    1118      816388 :     int mx = 0, my = 0, dmin = 0;
    1119             :     int P[10][2];
    1120      816388 :     const int shift= 1+s->quarter_sample;
    1121      816388 :     const int mot_stride = s->mb_stride;
    1122      816388 :     const int mot_xy = mb_y*mot_stride + mb_x;
    1123      816388 :     uint8_t * const mv_penalty= c->mv_penalty[f_code] + MAX_DMV;
    1124             :     int mv_scale;
    1125             : 
    1126      816388 :     c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
    1127      816388 :     c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
    1128      816388 :     c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
    1129      816388 :     c->current_mv_penalty= mv_penalty;
    1130             : 
    1131      816388 :     get_limits(s, 16*mb_x, 16*mb_y);
    1132             : 
    1133      816388 :     if (s->motion_est != FF_ME_ZERO) {
    1134      816388 :         P_LEFT[0] = mv_table[mot_xy - 1][0];
    1135      816388 :         P_LEFT[1] = mv_table[mot_xy - 1][1];
    1136             : 
    1137      816388 :         if (P_LEFT[0] > (c->xmax << shift)) P_LEFT[0] = (c->xmax << shift);
    1138             : 
    1139             :         /* special case for first line */
    1140      816388 :         if (!s->first_slice_line) {
    1141      756196 :             P_TOP[0]      = mv_table[mot_xy - mot_stride    ][0];
    1142      756196 :             P_TOP[1]      = mv_table[mot_xy - mot_stride    ][1];
    1143      756196 :             P_TOPRIGHT[0] = mv_table[mot_xy - mot_stride + 1][0];
    1144      756196 :             P_TOPRIGHT[1] = mv_table[mot_xy - mot_stride + 1][1];
    1145      756196 :             if (P_TOP[1] > (c->ymax << shift)) P_TOP[1] = (c->ymax << shift);
    1146      756196 :             if (P_TOPRIGHT[0] < (c->xmin << shift)) P_TOPRIGHT[0] = (c->xmin << shift);
    1147      756196 :             if (P_TOPRIGHT[1] > (c->ymax << shift)) P_TOPRIGHT[1] = (c->ymax << shift);
    1148             : 
    1149      756196 :             P_MEDIAN[0] = mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
    1150      756196 :             P_MEDIAN[1] = mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
    1151             :         }
    1152      816388 :         c->pred_x = P_LEFT[0];
    1153      816388 :         c->pred_y = P_LEFT[1];
    1154             : 
    1155      816388 :         if(mv_table == s->b_forw_mv_table){
    1156      408194 :             mv_scale= (s->pb_time<<16) / (s->pp_time<<shift);
    1157             :         }else{
    1158      408194 :             mv_scale= ((s->pb_time - s->pp_time)<<16) / (s->pp_time<<shift);
    1159             :         }
    1160             : 
    1161      816388 :         dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, ref_index, s->p_mv_table, mv_scale, 0, 16);
    1162             :     }
    1163             : 
    1164      816388 :     dmin= c->sub_motion_search(s, &mx, &my, dmin, 0, ref_index, 0, 16);
    1165             : 
    1166      816388 :     if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip)
    1167      306340 :         dmin= get_mb_score(s, mx, my, 0, ref_index, 0, 16, 1);
    1168             : 
    1169             : //    s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
    1170      816388 :     mv_table[mot_xy][0]= mx;
    1171      816388 :     mv_table[mot_xy][1]= my;
    1172             : 
    1173      816388 :     return dmin;
    1174             : }
    1175             : 
    1176     5444630 : static inline int check_bidir_mv(MpegEncContext * s,
    1177             :                    int motion_fx, int motion_fy,
    1178             :                    int motion_bx, int motion_by,
    1179             :                    int pred_fx, int pred_fy,
    1180             :                    int pred_bx, int pred_by,
    1181             :                    int size, int h)
    1182             : {
    1183             :     //FIXME optimize?
    1184             :     //FIXME better f_code prediction (max mv & distance)
    1185             :     //FIXME pointers
    1186     5444630 :     MotionEstContext * const c= &s->me;
    1187     5444630 :     uint8_t * const mv_penalty_f= c->mv_penalty[s->f_code] + MAX_DMV; // f_code of the prev frame
    1188     5444630 :     uint8_t * const mv_penalty_b= c->mv_penalty[s->b_code] + MAX_DMV; // f_code of the prev frame
    1189     5444630 :     int stride= c->stride;
    1190     5444630 :     uint8_t *dest_y = c->scratchpad;
    1191             :     uint8_t *ptr;
    1192             :     int dxy;
    1193             :     int src_x, src_y;
    1194             :     int fbmin;
    1195     5444630 :     uint8_t **src_data= c->src[0];
    1196     5444630 :     uint8_t **ref_data= c->ref[0];
    1197     5444630 :     uint8_t **ref2_data= c->ref[2];
    1198             : 
    1199     5444630 :     if(s->quarter_sample){
    1200      625749 :         dxy = ((motion_fy & 3) << 2) | (motion_fx & 3);
    1201      625749 :         src_x = motion_fx >> 2;
    1202      625749 :         src_y = motion_fy >> 2;
    1203             : 
    1204      625749 :         ptr = ref_data[0] + (src_y * stride) + src_x;
    1205      625749 :         s->qdsp.put_qpel_pixels_tab[0][dxy](dest_y, ptr, stride);
    1206             : 
    1207      625749 :         dxy = ((motion_by & 3) << 2) | (motion_bx & 3);
    1208      625749 :         src_x = motion_bx >> 2;
    1209      625749 :         src_y = motion_by >> 2;
    1210             : 
    1211      625749 :         ptr = ref2_data[0] + (src_y * stride) + src_x;
    1212      625749 :         s->qdsp.avg_qpel_pixels_tab[size][dxy](dest_y, ptr, stride);
    1213             :     }else{
    1214     4818881 :         dxy = ((motion_fy & 1) << 1) | (motion_fx & 1);
    1215     4818881 :         src_x = motion_fx >> 1;
    1216     4818881 :         src_y = motion_fy >> 1;
    1217             : 
    1218     4818881 :         ptr = ref_data[0] + (src_y * stride) + src_x;
    1219     4818881 :         s->hdsp.put_pixels_tab[size][dxy](dest_y    , ptr    , stride, h);
    1220             : 
    1221     4818881 :         dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
    1222     4818881 :         src_x = motion_bx >> 1;
    1223     4818881 :         src_y = motion_by >> 1;
    1224             : 
    1225     4818881 :         ptr = ref2_data[0] + (src_y * stride) + src_x;
    1226     4818881 :         s->hdsp.avg_pixels_tab[size][dxy](dest_y    , ptr    , stride, h);
    1227             :     }
    1228             : 
    1229    16333890 :     fbmin = (mv_penalty_f[motion_fx-pred_fx] + mv_penalty_f[motion_fy-pred_fy])*c->mb_penalty_factor
    1230    10889260 :            +(mv_penalty_b[motion_bx-pred_bx] + mv_penalty_b[motion_by-pred_by])*c->mb_penalty_factor
    1231     5444630 :            + s->mecc.mb_cmp[size](s, src_data[0], dest_y, stride, h); // FIXME new_pic
    1232             : 
    1233     5444630 :     if(c->avctx->mb_cmp&FF_CMP_CHROMA){
    1234             :     }
    1235             :     //FIXME CHROMA !!!
    1236             : 
    1237     5444630 :     return fbmin;
    1238             : }
    1239             : 
    1240             : /* refine the bidir vectors in hq mode and return the score in both lq & hq mode*/
    1241      408194 : static inline int bidir_refine(MpegEncContext * s, int mb_x, int mb_y)
    1242             : {
    1243      408194 :     MotionEstContext * const c= &s->me;
    1244      408194 :     const int mot_stride = s->mb_stride;
    1245      408194 :     const int xy = mb_y *mot_stride + mb_x;
    1246             :     int fbmin;
    1247      408194 :     int pred_fx= s->b_bidir_forw_mv_table[xy-1][0];
    1248      408194 :     int pred_fy= s->b_bidir_forw_mv_table[xy-1][1];
    1249      408194 :     int pred_bx= s->b_bidir_back_mv_table[xy-1][0];
    1250      408194 :     int pred_by= s->b_bidir_back_mv_table[xy-1][1];
    1251      408194 :     int motion_fx= s->b_bidir_forw_mv_table[xy][0]= s->b_forw_mv_table[xy][0];
    1252      408194 :     int motion_fy= s->b_bidir_forw_mv_table[xy][1]= s->b_forw_mv_table[xy][1];
    1253      408194 :     int motion_bx= s->b_bidir_back_mv_table[xy][0]= s->b_back_mv_table[xy][0];
    1254      408194 :     int motion_by= s->b_bidir_back_mv_table[xy][1]= s->b_back_mv_table[xy][1];
    1255      408194 :     const int flags= c->sub_flags;
    1256      408194 :     const int qpel= flags&FLAG_QPEL;
    1257      408194 :     const int shift= 1+qpel;
    1258      408194 :     const int xmin= c->xmin<<shift;
    1259      408194 :     const int ymin= c->ymin<<shift;
    1260      408194 :     const int xmax= c->xmax<<shift;
    1261      408194 :     const int ymax= c->ymax<<shift;
    1262             : #define HASH(fx,fy,bx,by) ((fx)+17*(fy)+63*(bx)+117*(by))
    1263             : #define HASH8(fx,fy,bx,by) ((uint8_t)HASH(fx,fy,bx,by))
    1264      408194 :     int hashidx= HASH(motion_fx,motion_fy, motion_bx, motion_by);
    1265      408194 :     uint8_t map[256] = { 0 };
    1266             : 
    1267      408194 :     map[hashidx&255] = 1;
    1268             : 
    1269      408194 :     fbmin= check_bidir_mv(s, motion_fx, motion_fy,
    1270             :                           motion_bx, motion_by,
    1271             :                           pred_fx, pred_fy,
    1272             :                           pred_bx, pred_by,
    1273             :                           0, 16);
    1274             : 
    1275      408194 :     if(s->avctx->bidir_refine){
    1276             :         int end;
    1277             :         static const uint8_t limittab[5]={0,8,32,64,80};
    1278      408194 :         const int limit= limittab[s->avctx->bidir_refine];
    1279             :         static const int8_t vect[][4]={
    1280             : { 0, 0, 0, 1}, { 0, 0, 0,-1}, { 0, 0, 1, 0}, { 0, 0,-1, 0}, { 0, 1, 0, 0}, { 0,-1, 0, 0}, { 1, 0, 0, 0}, {-1, 0, 0, 0},
    1281             : 
    1282             : { 0, 0, 1, 1}, { 0, 0,-1,-1}, { 0, 1, 1, 0}, { 0,-1,-1, 0}, { 1, 1, 0, 0}, {-1,-1, 0, 0}, { 1, 0, 0, 1}, {-1, 0, 0,-1},
    1283             : { 0, 1, 0, 1}, { 0,-1, 0,-1}, { 1, 0, 1, 0}, {-1, 0,-1, 0},
    1284             : { 0, 0,-1, 1}, { 0, 0, 1,-1}, { 0,-1, 1, 0}, { 0, 1,-1, 0}, {-1, 1, 0, 0}, { 1,-1, 0, 0}, { 1, 0, 0,-1}, {-1, 0, 0, 1},
    1285             : { 0,-1, 0, 1}, { 0, 1, 0,-1}, {-1, 0, 1, 0}, { 1, 0,-1, 0},
    1286             : 
    1287             : { 0, 1, 1, 1}, { 0,-1,-1,-1}, { 1, 1, 1, 0}, {-1,-1,-1, 0}, { 1, 1, 0, 1}, {-1,-1, 0,-1}, { 1, 0, 1, 1}, {-1, 0,-1,-1},
    1288             : { 0,-1, 1, 1}, { 0, 1,-1,-1}, {-1, 1, 1, 0}, { 1,-1,-1, 0}, { 1, 1, 0,-1}, {-1,-1, 0, 1}, { 1, 0,-1, 1}, {-1, 0, 1,-1},
    1289             : { 0, 1,-1, 1}, { 0,-1, 1,-1}, { 1,-1, 1, 0}, {-1, 1,-1, 0}, {-1, 1, 0, 1}, { 1,-1, 0,-1}, { 1, 0, 1,-1}, {-1, 0,-1, 1},
    1290             : { 0, 1, 1,-1}, { 0,-1,-1, 1}, { 1, 1,-1, 0}, {-1,-1, 1, 0}, { 1,-1, 0, 1}, {-1, 1, 0,-1}, {-1, 0, 1, 1}, { 1, 0,-1,-1},
    1291             : 
    1292             : { 1, 1, 1, 1}, {-1,-1,-1,-1},
    1293             : { 1, 1, 1,-1}, {-1,-1,-1, 1}, { 1, 1,-1, 1}, {-1,-1, 1,-1}, { 1,-1, 1, 1}, {-1, 1,-1,-1}, {-1, 1, 1, 1}, { 1,-1,-1,-1},
    1294             : { 1, 1,-1,-1}, {-1,-1, 1, 1}, { 1,-1,-1, 1}, {-1, 1, 1,-1}, { 1,-1, 1,-1}, {-1, 1,-1, 1},
    1295             :         };
    1296             :         static const uint8_t hash[]={
    1297             : HASH8( 0, 0, 0, 1), HASH8( 0, 0, 0,-1), HASH8( 0, 0, 1, 0), HASH8( 0, 0,-1, 0), HASH8( 0, 1, 0, 0), HASH8( 0,-1, 0, 0), HASH8( 1, 0, 0, 0), HASH8(-1, 0, 0, 0),
    1298             : 
    1299             : HASH8( 0, 0, 1, 1), HASH8( 0, 0,-1,-1), HASH8( 0, 1, 1, 0), HASH8( 0,-1,-1, 0), HASH8( 1, 1, 0, 0), HASH8(-1,-1, 0, 0), HASH8( 1, 0, 0, 1), HASH8(-1, 0, 0,-1),
    1300             : HASH8( 0, 1, 0, 1), HASH8( 0,-1, 0,-1), HASH8( 1, 0, 1, 0), HASH8(-1, 0,-1, 0),
    1301             : HASH8( 0, 0,-1, 1), HASH8( 0, 0, 1,-1), HASH8( 0,-1, 1, 0), HASH8( 0, 1,-1, 0), HASH8(-1, 1, 0, 0), HASH8( 1,-1, 0, 0), HASH8( 1, 0, 0,-1), HASH8(-1, 0, 0, 1),
    1302             : HASH8( 0,-1, 0, 1), HASH8( 0, 1, 0,-1), HASH8(-1, 0, 1, 0), HASH8( 1, 0,-1, 0),
    1303             : 
    1304             : HASH8( 0, 1, 1, 1), HASH8( 0,-1,-1,-1), HASH8( 1, 1, 1, 0), HASH8(-1,-1,-1, 0), HASH8( 1, 1, 0, 1), HASH8(-1,-1, 0,-1), HASH8( 1, 0, 1, 1), HASH8(-1, 0,-1,-1),
    1305             : HASH8( 0,-1, 1, 1), HASH8( 0, 1,-1,-1), HASH8(-1, 1, 1, 0), HASH8( 1,-1,-1, 0), HASH8( 1, 1, 0,-1), HASH8(-1,-1, 0, 1), HASH8( 1, 0,-1, 1), HASH8(-1, 0, 1,-1),
    1306             : HASH8( 0, 1,-1, 1), HASH8( 0,-1, 1,-1), HASH8( 1,-1, 1, 0), HASH8(-1, 1,-1, 0), HASH8(-1, 1, 0, 1), HASH8( 1,-1, 0,-1), HASH8( 1, 0, 1,-1), HASH8(-1, 0,-1, 1),
    1307             : HASH8( 0, 1, 1,-1), HASH8( 0,-1,-1, 1), HASH8( 1, 1,-1, 0), HASH8(-1,-1, 1, 0), HASH8( 1,-1, 0, 1), HASH8(-1, 1, 0,-1), HASH8(-1, 0, 1, 1), HASH8( 1, 0,-1,-1),
    1308             : 
    1309             : HASH8( 1, 1, 1, 1), HASH8(-1,-1,-1,-1),
    1310             : HASH8( 1, 1, 1,-1), HASH8(-1,-1,-1, 1), HASH8( 1, 1,-1, 1), HASH8(-1,-1, 1,-1), HASH8( 1,-1, 1, 1), HASH8(-1, 1,-1,-1), HASH8(-1, 1, 1, 1), HASH8( 1,-1,-1,-1),
    1311             : HASH8( 1, 1,-1,-1), HASH8(-1,-1, 1, 1), HASH8( 1,-1,-1, 1), HASH8(-1, 1, 1,-1), HASH8( 1,-1, 1,-1), HASH8(-1, 1,-1, 1),
    1312             : };
    1313             : 
    1314             : #define CHECK_BIDIR(fx,fy,bx,by)\
    1315             :     if( !map[(hashidx+HASH(fx,fy,bx,by))&255]\
    1316             :        &&(fx<=0 || motion_fx+fx<=xmax) && (fy<=0 || motion_fy+fy<=ymax) && (bx<=0 || motion_bx+bx<=xmax) && (by<=0 || motion_by+by<=ymax)\
    1317             :        &&(fx>=0 || motion_fx+fx>=xmin) && (fy>=0 || motion_fy+fy>=ymin) && (bx>=0 || motion_bx+bx>=xmin) && (by>=0 || motion_by+by>=ymin)){\
    1318             :         int score;\
    1319             :         map[(hashidx+HASH(fx,fy,bx,by))&255] = 1;\
    1320             :         score= check_bidir_mv(s, motion_fx+fx, motion_fy+fy, motion_bx+bx, motion_by+by, pred_fx, pred_fy, pred_bx, pred_by, 0, 16);\
    1321             :         if(score < fbmin){\
    1322             :             hashidx += HASH(fx,fy,bx,by);\
    1323             :             fbmin= score;\
    1324             :             motion_fx+=fx;\
    1325             :             motion_fy+=fy;\
    1326             :             motion_bx+=bx;\
    1327             :             motion_by+=by;\
    1328             :             end=0;\
    1329             :         }\
    1330             :     }
    1331             : #define CHECK_BIDIR2(a,b,c,d)\
    1332             : CHECK_BIDIR(a,b,c,d)\
    1333             : CHECK_BIDIR(-(a),-(b),-(c),-(d))
    1334             : 
    1335             :         do{
    1336             :             int i;
    1337      829046 :             int borderdist=0;
    1338      829046 :             end=1;
    1339             : 
    1340      829046 :             CHECK_BIDIR2(0,0,0,1)
    1341      829046 :             CHECK_BIDIR2(0,0,1,0)
    1342      829046 :             CHECK_BIDIR2(0,1,0,0)
    1343      829046 :             CHECK_BIDIR2(1,0,0,0)
    1344             : 
    1345      829046 :             for(i=8; i<limit; i++){
    1346           0 :                 int fx= motion_fx+vect[i][0];
    1347           0 :                 int fy= motion_fy+vect[i][1];
    1348           0 :                 int bx= motion_bx+vect[i][2];
    1349           0 :                 int by= motion_by+vect[i][3];
    1350           0 :                 if(borderdist<=0){
    1351           0 :                     int a= (xmax - FFMAX(fx,bx))|(FFMIN(fx,bx) - xmin);
    1352           0 :                     int b= (ymax - FFMAX(fy,by))|(FFMIN(fy,by) - ymin);
    1353           0 :                     if((a|b) < 0)
    1354           0 :                         map[(hashidx+hash[i])&255] = 1;
    1355             :                 }
    1356           0 :                 if(!map[(hashidx+hash[i])&255]){
    1357             :                     int score;
    1358           0 :                     map[(hashidx+hash[i])&255] = 1;
    1359           0 :                     score= check_bidir_mv(s, fx, fy, bx, by, pred_fx, pred_fy, pred_bx, pred_by, 0, 16);
    1360           0 :                     if(score < fbmin){
    1361           0 :                         hashidx += hash[i];
    1362           0 :                         fbmin= score;
    1363           0 :                         motion_fx=fx;
    1364           0 :                         motion_fy=fy;
    1365           0 :                         motion_bx=bx;
    1366           0 :                         motion_by=by;
    1367           0 :                         end=0;
    1368           0 :                         borderdist--;
    1369           0 :                         if(borderdist<=0){
    1370           0 :                             int a= FFMIN(xmax - FFMAX(fx,bx), FFMIN(fx,bx) - xmin);
    1371           0 :                             int b= FFMIN(ymax - FFMAX(fy,by), FFMIN(fy,by) - ymin);
    1372           0 :                             borderdist= FFMIN(a,b);
    1373             :                         }
    1374             :                     }
    1375             :                 }
    1376             :             }
    1377      829046 :         }while(!end);
    1378             :     }
    1379             : 
    1380      408194 :     s->b_bidir_forw_mv_table[xy][0]= motion_fx;
    1381      408194 :     s->b_bidir_forw_mv_table[xy][1]= motion_fy;
    1382      408194 :     s->b_bidir_back_mv_table[xy][0]= motion_bx;
    1383      408194 :     s->b_bidir_back_mv_table[xy][1]= motion_by;
    1384             : 
    1385      408194 :     return fbmin;
    1386             : }
    1387             : 
    1388      191520 : static inline int direct_search(MpegEncContext * s, int mb_x, int mb_y)
    1389             : {
    1390      191520 :     MotionEstContext * const c= &s->me;
    1391             :     int P[10][2];
    1392      191520 :     const int mot_stride = s->mb_stride;
    1393      191520 :     const int mot_xy = mb_y*mot_stride + mb_x;
    1394      191520 :     const int shift= 1+s->quarter_sample;
    1395             :     int dmin, i;
    1396      191520 :     const int time_pp= s->pp_time;
    1397      191520 :     const int time_pb= s->pb_time;
    1398             :     int mx, my, xmin, xmax, ymin, ymax;
    1399      191520 :     int16_t (*mv_table)[2]= s->b_direct_mv_table;
    1400             : 
    1401      191520 :     c->current_mv_penalty= c->mv_penalty[1] + MAX_DMV;
    1402      191520 :     ymin= xmin=(-32)>>shift;
    1403      191520 :     ymax= xmax=   31>>shift;
    1404             : 
    1405      191520 :     if (IS_8X8(s->next_picture.mb_type[mot_xy])) {
    1406       54358 :         s->mv_type= MV_TYPE_8X8;
    1407             :     }else{
    1408      137162 :         s->mv_type= MV_TYPE_16X16;
    1409             :     }
    1410             : 
    1411      408952 :     for(i=0; i<4; i++){
    1412      354594 :         int index= s->block_index[i];
    1413             :         int min, max;
    1414             : 
    1415      354594 :         c->co_located_mv[i][0] = s->next_picture.motion_val[0][index][0];
    1416      354594 :         c->co_located_mv[i][1] = s->next_picture.motion_val[0][index][1];
    1417      354594 :         c->direct_basis_mv[i][0]= c->co_located_mv[i][0]*time_pb/time_pp + ((i& 1)<<(shift+3));
    1418      354594 :         c->direct_basis_mv[i][1]= c->co_located_mv[i][1]*time_pb/time_pp + ((i>>1)<<(shift+3));
    1419             : //        c->direct_basis_mv[1][i][0]= c->co_located_mv[i][0]*(time_pb - time_pp)/time_pp + ((i &1)<<(shift+3);
    1420             : //        c->direct_basis_mv[1][i][1]= c->co_located_mv[i][1]*(time_pb - time_pp)/time_pp + ((i>>1)<<(shift+3);
    1421             : 
    1422      354594 :         max= FFMAX(c->direct_basis_mv[i][0], c->direct_basis_mv[i][0] - c->co_located_mv[i][0])>>shift;
    1423      354594 :         min= FFMIN(c->direct_basis_mv[i][0], c->direct_basis_mv[i][0] - c->co_located_mv[i][0])>>shift;
    1424      354594 :         max+= 16*mb_x + 1; // +-1 is for the simpler rounding
    1425      354594 :         min+= 16*mb_x - 1;
    1426      354594 :         xmax= FFMIN(xmax, s->width - max);
    1427      354594 :         xmin= FFMAX(xmin, - 16     - min);
    1428             : 
    1429      354594 :         max= FFMAX(c->direct_basis_mv[i][1], c->direct_basis_mv[i][1] - c->co_located_mv[i][1])>>shift;
    1430      354594 :         min= FFMIN(c->direct_basis_mv[i][1], c->direct_basis_mv[i][1] - c->co_located_mv[i][1])>>shift;
    1431      354594 :         max+= 16*mb_y + 1; // +-1 is for the simpler rounding
    1432      354594 :         min+= 16*mb_y - 1;
    1433      354594 :         ymax= FFMIN(ymax, s->height - max);
    1434      354594 :         ymin= FFMAX(ymin, - 16      - min);
    1435             : 
    1436      354594 :         if(s->mv_type == MV_TYPE_16X16) break;
    1437             :     }
    1438             : 
    1439             :     av_assert2(xmax <= 15 && ymax <= 15 && xmin >= -16 && ymin >= -16);
    1440             : 
    1441      191520 :     if(xmax < 0 || xmin >0 || ymax < 0 || ymin > 0){
    1442         105 :         s->b_direct_mv_table[mot_xy][0]= 0;
    1443         105 :         s->b_direct_mv_table[mot_xy][1]= 0;
    1444             : 
    1445         105 :         return 256*256*256*64;
    1446             :     }
    1447             : 
    1448      191415 :     c->xmin= xmin;
    1449      191415 :     c->ymin= ymin;
    1450      191415 :     c->xmax= xmax;
    1451      191415 :     c->ymax= ymax;
    1452      191415 :     c->flags     |= FLAG_DIRECT;
    1453      191415 :     c->sub_flags |= FLAG_DIRECT;
    1454      191415 :     c->pred_x=0;
    1455      191415 :     c->pred_y=0;
    1456             : 
    1457      191415 :     P_LEFT[0]        = av_clip(mv_table[mot_xy - 1][0], xmin<<shift, xmax<<shift);
    1458      191415 :     P_LEFT[1]        = av_clip(mv_table[mot_xy - 1][1], ymin<<shift, ymax<<shift);
    1459             : 
    1460             :     /* special case for first line */
    1461      191415 :     if (!s->first_slice_line) { //FIXME maybe allow this over thread boundary as it is clipped
    1462      178176 :         P_TOP[0]      = av_clip(mv_table[mot_xy - mot_stride             ][0], xmin<<shift, xmax<<shift);
    1463      178176 :         P_TOP[1]      = av_clip(mv_table[mot_xy - mot_stride             ][1], ymin<<shift, ymax<<shift);
    1464      178176 :         P_TOPRIGHT[0] = av_clip(mv_table[mot_xy - mot_stride + 1         ][0], xmin<<shift, xmax<<shift);
    1465      178176 :         P_TOPRIGHT[1] = av_clip(mv_table[mot_xy - mot_stride + 1         ][1], ymin<<shift, ymax<<shift);
    1466             : 
    1467      178176 :         P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
    1468      178176 :         P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
    1469             :     }
    1470             : 
    1471      191415 :     dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, mv_table, 1<<(16-shift), 0, 16);
    1472      191415 :     if(c->sub_flags&FLAG_QPEL)
    1473       38282 :         dmin = qpel_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
    1474             :     else
    1475      153133 :         dmin = hpel_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
    1476             : 
    1477      191415 :     if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip)
    1478      114819 :         dmin= get_mb_score(s, mx, my, 0, 0, 0, 16, 1);
    1479             : 
    1480      191415 :     get_limits(s, 16*mb_x, 16*mb_y); //restore c->?min/max, maybe not needed
    1481             : 
    1482      191415 :     mv_table[mot_xy][0]= mx;
    1483      191415 :     mv_table[mot_xy][1]= my;
    1484      191415 :     c->flags     &= ~FLAG_DIRECT;
    1485      191415 :     c->sub_flags &= ~FLAG_DIRECT;
    1486             : 
    1487      191415 :     return dmin;
    1488             : }
    1489             : 
    1490      408312 : void ff_estimate_b_frame_motion(MpegEncContext * s,
    1491             :                              int mb_x, int mb_y)
    1492             : {
    1493      408312 :     MotionEstContext * const c= &s->me;
    1494      408312 :     const int penalty_factor= c->mb_penalty_factor;
    1495             :     int fmin, bmin, dmin, fbmin, bimin, fimin;
    1496      408312 :     int type=0;
    1497      408312 :     const int xy = mb_y*s->mb_stride + mb_x;
    1498      816624 :     init_ref(c, s->new_picture.f->data, s->last_picture.f->data,
    1499      408312 :              s->next_picture.f->data, 16 * mb_x, 16 * mb_y, 2);
    1500             : 
    1501      408312 :     get_limits(s, 16*mb_x, 16*mb_y);
    1502             : 
    1503      408312 :     c->skip=0;
    1504             : 
    1505      408312 :     if (s->codec_id == AV_CODEC_ID_MPEG4 && s->next_picture.mbskip_table[xy]) {
    1506         118 :         int score= direct_search(s, mb_x, mb_y); //FIXME just check 0,0
    1507             : 
    1508         118 :         score= ((unsigned)(score*score + 128*256))>>16;
    1509         118 :         c->mc_mb_var_sum_temp += score;
    1510         118 :         s->current_picture.mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE
    1511         118 :         s->mb_type[mb_y*s->mb_stride + mb_x]= CANDIDATE_MB_TYPE_DIRECT0;
    1512             : 
    1513         118 :         return;
    1514             :     }
    1515             : 
    1516      408194 :     if (s->codec_id == AV_CODEC_ID_MPEG4)
    1517      191402 :         dmin= direct_search(s, mb_x, mb_y);
    1518             :     else
    1519      216792 :         dmin= INT_MAX;
    1520             : // FIXME penalty stuff for non-MPEG-4
    1521      408194 :     c->skip=0;
    1522      816388 :     fmin = estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, 0, s->f_code) +
    1523      408194 :            3 * penalty_factor;
    1524             : 
    1525      408194 :     c->skip=0;
    1526      816388 :     bmin = estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, 2, s->b_code) +
    1527      408194 :            2 * penalty_factor;
    1528             :     ff_dlog(s, " %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]);
    1529             : 
    1530      408194 :     c->skip=0;
    1531      408194 :     fbmin= bidir_refine(s, mb_x, mb_y) + penalty_factor;
    1532             :     ff_dlog(s, "%d %d %d %d\n", dmin, fmin, bmin, fbmin);
    1533             : 
    1534      408194 :     if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
    1535             : //FIXME mb type penalty
    1536      115200 :         c->skip=0;
    1537      115200 :         c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_DMV;
    1538      345600 :         fimin= interlaced_search(s, 0,
    1539      115200 :                                  s->b_field_mv_table[0], s->b_field_select_table[0],
    1540      230400 :                                  s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1], 0);
    1541      115200 :         c->current_mv_penalty= c->mv_penalty[s->b_code] + MAX_DMV;
    1542      345600 :         bimin= interlaced_search(s, 2,
    1543      115200 :                                  s->b_field_mv_table[1], s->b_field_select_table[1],
    1544      230400 :                                  s->b_back_mv_table[xy][0], s->b_back_mv_table[xy][1], 0);
    1545             :     }else
    1546      292994 :         fimin= bimin= INT_MAX;
    1547             : 
    1548             :     {
    1549      408194 :         int score= fmin;
    1550      408194 :         type = CANDIDATE_MB_TYPE_FORWARD;
    1551             : 
    1552      408194 :         if (dmin <= score){
    1553       55255 :             score = dmin;
    1554       55255 :             type = CANDIDATE_MB_TYPE_DIRECT;
    1555             :         }
    1556      408194 :         if(bmin<score){
    1557      202836 :             score=bmin;
    1558      202836 :             type= CANDIDATE_MB_TYPE_BACKWARD;
    1559             :         }
    1560      408194 :         if(fbmin<score){
    1561      237884 :             score=fbmin;
    1562      237884 :             type= CANDIDATE_MB_TYPE_BIDIR;
    1563             :         }
    1564      408194 :         if(fimin<score){
    1565        3925 :             score=fimin;
    1566        3925 :             type= CANDIDATE_MB_TYPE_FORWARD_I;
    1567             :         }
    1568      408194 :         if(bimin<score){
    1569        4080 :             score=bimin;
    1570        4080 :             type= CANDIDATE_MB_TYPE_BACKWARD_I;
    1571             :         }
    1572             : 
    1573      408194 :         score= ((unsigned)(score*score + 128*256))>>16;
    1574      408194 :         c->mc_mb_var_sum_temp += score;
    1575      408194 :         s->current_picture.mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE
    1576             :     }
    1577             : 
    1578      408194 :     if(c->avctx->mb_decision > FF_MB_DECISION_SIMPLE){
    1579      229850 :         type= CANDIDATE_MB_TYPE_FORWARD | CANDIDATE_MB_TYPE_BACKWARD | CANDIDATE_MB_TYPE_BIDIR | CANDIDATE_MB_TYPE_DIRECT;
    1580      229850 :         if(fimin < INT_MAX)
    1581       31700 :             type |= CANDIDATE_MB_TYPE_FORWARD_I;
    1582      229850 :         if(bimin < INT_MAX)
    1583       31688 :             type |= CANDIDATE_MB_TYPE_BACKWARD_I;
    1584      229850 :         if(fimin < INT_MAX && bimin < INT_MAX){
    1585       26999 :             type |= CANDIDATE_MB_TYPE_BIDIR_I;
    1586             :         }
    1587             :          //FIXME something smarter
    1588      229850 :         if(dmin>256*256*16) type&= ~CANDIDATE_MB_TYPE_DIRECT; //do not try direct mode if it is invalid for this MB
    1589      382896 :         if (s->codec_id == AV_CODEC_ID_MPEG4 && type&CANDIDATE_MB_TYPE_DIRECT &&
    1590      229558 :             s->mpv_flags & FF_MPV_FLAG_MV0 && *(uint32_t*)s->b_direct_mv_table[xy])
    1591       47465 :             type |= CANDIDATE_MB_TYPE_DIRECT0;
    1592             :     }
    1593             : 
    1594      408194 :     s->mb_type[mb_y*s->mb_stride + mb_x]= type;
    1595             : }
    1596             : 
    1597             : /* find best f_code for ME which do unlimited searches */
    1598       10740 : int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type)
    1599             : {
    1600       10740 :     if (s->motion_est != FF_ME_ZERO) {
    1601             :         int score[8];
    1602       10740 :         int i, y, range= s->avctx->me_range ? s->avctx->me_range : (INT_MAX/2);
    1603       10740 :         uint8_t * fcode_tab= s->fcode_tab;
    1604       10740 :         int best_fcode=-1;
    1605       10740 :         int best_score=-10000000;
    1606             : 
    1607       10740 :         if(s->msmpeg4_version)
    1608         742 :             range= FFMIN(range, 16);
    1609        9998 :         else if(s->codec_id == AV_CODEC_ID_MPEG2VIDEO && s->avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL)
    1610        3786 :             range= FFMIN(range, 256);
    1611             : 
    1612       10740 :         for(i=0; i<8; i++) score[i]= s->mb_num*(8-i);
    1613             : 
    1614      172332 :         for(y=0; y<s->mb_height; y++){
    1615             :             int x;
    1616      161592 :             int xy= y*s->mb_stride;
    1617     3653934 :             for(x=0; x<s->mb_width; x++){
    1618     3492342 :                 if(s->mb_type[xy] & type){
    1619     2806468 :                     int mx= mv_table[xy][0];
    1620     2806468 :                     int my= mv_table[xy][1];
    1621     2806468 :                     int fcode= FFMAX(fcode_tab[mx + MAX_MV],
    1622             :                                      fcode_tab[my + MAX_MV]);
    1623             :                     int j;
    1624             : 
    1625     2806468 :                         if(mx >= range || mx < -range ||
    1626     2794625 :                            my >= range || my < -range)
    1627       14408 :                             continue;
    1628             : 
    1629     5774935 :                     for(j=0; j<fcode && j<8; j++){
    1630     2982875 :                         if(s->pict_type==AV_PICTURE_TYPE_B || s->current_picture.mc_mb_var[xy] < s->current_picture.mb_var[xy])
    1631     2885503 :                             score[j]-= 170;
    1632             :                     }
    1633             :                 }
    1634     3477934 :                 xy++;
    1635             :             }
    1636             :         }
    1637             : 
    1638       85920 :         for(i=1; i<8; i++){
    1639       75180 :             if(score[i] > best_score){
    1640       14519 :                 best_score= score[i];
    1641       14519 :                 best_fcode= i;
    1642             :             }
    1643             :         }
    1644             : 
    1645       10740 :         return best_fcode;
    1646             :     }else{
    1647           0 :         return 1;
    1648             :     }
    1649             : }
    1650             : 
    1651        5228 : void ff_fix_long_p_mvs(MpegEncContext * s)
    1652             : {
    1653        5228 :     MotionEstContext * const c= &s->me;
    1654        5228 :     const int f_code= s->f_code;
    1655             :     int y, range;
    1656        5228 :     av_assert0(s->pict_type==AV_PICTURE_TYPE_P);
    1657             : 
    1658        5228 :     range = (((s->out_format == FMT_MPEG1 || s->msmpeg4_version) ? 8 : 16) << f_code);
    1659             : 
    1660        5228 :     av_assert0(range <= 16 || !s->msmpeg4_version);
    1661        5228 :     av_assert0(range <=256 || !(s->codec_id == AV_CODEC_ID_MPEG2VIDEO && s->avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL));
    1662             : 
    1663        5228 :     if(c->avctx->me_range && range > c->avctx->me_range) range= c->avctx->me_range;
    1664             : 
    1665        5228 :     if (s->avctx->flags & AV_CODEC_FLAG_4MV) {
    1666         927 :         const int wrap= s->b8_stride;
    1667             : 
    1668             :         /* clip / convert to intra 8x8 type MVs */
    1669       14148 :         for(y=0; y<s->mb_height; y++){
    1670       13221 :             int xy= y*2*wrap;
    1671       13221 :             int i= y*s->mb_stride;
    1672             :             int x;
    1673             : 
    1674      290916 :             for(x=0; x<s->mb_width; x++){
    1675      277695 :                 if(s->mb_type[i]&CANDIDATE_MB_TYPE_INTER4V){
    1676             :                     int block;
    1677     1050460 :                     for(block=0; block<4; block++){
    1678      840368 :                         int off= (block& 1) + (block>>1)*wrap;
    1679      840368 :                         int mx = s->current_picture.motion_val[0][ xy + off ][0];
    1680      840368 :                         int my = s->current_picture.motion_val[0][ xy + off ][1];
    1681             : 
    1682      840368 :                         if(   mx >=range || mx <-range
    1683      839337 :                            || my >=range || my <-range){
    1684        1409 :                             s->mb_type[i] &= ~CANDIDATE_MB_TYPE_INTER4V;
    1685        1409 :                             s->mb_type[i] |= CANDIDATE_MB_TYPE_INTRA;
    1686        1409 :                             s->current_picture.mb_type[i] = CANDIDATE_MB_TYPE_INTRA;
    1687             :                         }
    1688             :                     }
    1689             :                 }
    1690      277695 :                 xy+=2;
    1691      277695 :                 i++;
    1692             :             }
    1693             :         }
    1694             :     }
    1695        5228 : }
    1696             : 
    1697             : /**
    1698             :  * @param truncate 1 for truncation, 0 for using intra
    1699             :  */
    1700       15016 : void ff_fix_long_mvs(MpegEncContext * s, uint8_t *field_select_table, int field_select,
    1701             :                      int16_t (*mv_table)[2], int f_code, int type, int truncate)
    1702             : {
    1703       15016 :     MotionEstContext * const c= &s->me;
    1704             :     int y, h_range, v_range;
    1705             : 
    1706             :     // RAL: 8 in MPEG-1, 16 in MPEG-4
    1707       15016 :     int range = (((s->out_format == FMT_MPEG1 || s->msmpeg4_version) ? 8 : 16) << f_code);
    1708             : 
    1709       15016 :     if(c->avctx->me_range && range > c->avctx->me_range) range= c->avctx->me_range;
    1710             : 
    1711       15016 :     h_range= range;
    1712       15016 :     v_range= field_select_table ? range>>1 : range;
    1713             : 
    1714             :     /* clip / convert to intra 16x16 type MVs */
    1715      234869 :     for(y=0; y<s->mb_height; y++){
    1716             :         int x;
    1717      219853 :         int xy= y*s->mb_stride;
    1718     4897354 :         for(x=0; x<s->mb_width; x++){
    1719     4677501 :             if (s->mb_type[xy] & type){    // RAL: "type" test added...
    1720     3182865 :                 if (!field_select_table || field_select_table[xy] == field_select) {
    1721     3000571 :                     if(   mv_table[xy][0] >=h_range || mv_table[xy][0] <-h_range
    1722     2996422 :                        || mv_table[xy][1] >=v_range || mv_table[xy][1] <-v_range){
    1723             : 
    1724        6675 :                         if(truncate){
    1725        1528 :                             if     (mv_table[xy][0] > h_range-1) mv_table[xy][0]=  h_range-1;
    1726        1075 :                             else if(mv_table[xy][0] < -h_range ) mv_table[xy][0]= -h_range;
    1727        1528 :                             if     (mv_table[xy][1] > v_range-1) mv_table[xy][1]=  v_range-1;
    1728        1185 :                             else if(mv_table[xy][1] < -v_range ) mv_table[xy][1]= -v_range;
    1729             :                         }else{
    1730        5147 :                             s->mb_type[xy] &= ~type;
    1731        5147 :                             s->mb_type[xy] |= CANDIDATE_MB_TYPE_INTRA;
    1732       10294 :                             mv_table[xy][0]=
    1733       10294 :                             mv_table[xy][1]= 0;
    1734             :                         }
    1735             :                     }
    1736             :                 }
    1737             :             }
    1738     4677501 :             xy++;
    1739             :         }
    1740             :     }
    1741       15016 : }

Generated by: LCOV version 1.13