LCOV - code coverage report
Current view: top level - libavcodec - motion_est_template.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 344 531 64.8 %
Date: 2017-12-14 01:15:32 Functions: 12 18 66.7 %

          Line data    Source code
       1             : /*
       2             :  * Motion estimation
       3             :  * Copyright (c) 2002-2004 Michael Niedermayer
       4             :  *
       5             :  * This file is part of FFmpeg.
       6             :  *
       7             :  * FFmpeg is free software; you can redistribute it and/or
       8             :  * modify it under the terms of the GNU Lesser General Public
       9             :  * License as published by the Free Software Foundation; either
      10             :  * version 2.1 of the License, or (at your option) any later version.
      11             :  *
      12             :  * FFmpeg is distributed in the hope that it will be useful,
      13             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      14             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      15             :  * Lesser General Public License for more details.
      16             :  *
      17             :  * You should have received a copy of the GNU Lesser General Public
      18             :  * License along with FFmpeg; if not, write to the Free Software
      19             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      20             :  */
      21             : 
      22             : /**
      23             :  * @file
      24             :  * Motion estimation template.
      25             :  */
      26             : 
      27             : #include "libavutil/qsort.h"
      28             : #include "mpegvideo.h"
      29             : 
      30             : //Let us hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
      31             : #define LOAD_COMMON\
      32             :     uint32_t av_unused * const score_map= c->score_map;\
      33             :     const int av_unused xmin= c->xmin;\
      34             :     const int av_unused ymin= c->ymin;\
      35             :     const int av_unused xmax= c->xmax;\
      36             :     const int av_unused ymax= c->ymax;\
      37             :     uint8_t *mv_penalty= c->current_mv_penalty;\
      38             :     const int pred_x= c->pred_x;\
      39             :     const int pred_y= c->pred_y;\
      40             : 
      41             : #define CHECK_HALF_MV(dx, dy, x, y)\
      42             : {\
      43             :     const int hx= 2*(x)+(dx);\
      44             :     const int hy= 2*(y)+(dy);\
      45             :     d= cmp_hpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\
      46             :     d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
      47             :     COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
      48             : }
      49             : 
      50      597890 : static int hpel_motion_search(MpegEncContext * s,
      51             :                                   int *mx_ptr, int *my_ptr, int dmin,
      52             :                                   int src_index, int ref_index,
      53             :                                   int size, int h)
      54             : {
      55      597890 :     MotionEstContext * const c= &s->me;
      56      597890 :     const int mx = *mx_ptr;
      57      597890 :     const int my = *my_ptr;
      58      597890 :     const int penalty_factor= c->sub_penalty_factor;
      59             :     me_cmp_func cmp_sub, chroma_cmp_sub;
      60      597890 :     int bx=2*mx, by=2*my;
      61             : 
      62      597890 :     LOAD_COMMON
      63      597890 :     int flags= c->sub_flags;
      64             : 
      65             :  //FIXME factorize
      66             : 
      67      597890 :     cmp_sub        = s->mecc.me_sub_cmp[size];
      68      597890 :     chroma_cmp_sub = s->mecc.me_sub_cmp[size + 1];
      69             : 
      70      597890 :     if(c->skip){ //FIXME move out of hpel?
      71         707 :         *mx_ptr = 0;
      72         707 :         *my_ptr = 0;
      73         707 :         return dmin;
      74             :     }
      75             : 
      76      597183 :     if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
      77      186511 :         dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
      78      186511 :         if(mx || my || size>0)
      79      164395 :             dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
      80             :     }
      81             : 
      82      597183 :     if (mx > xmin && mx < xmax &&
      83      586234 :         my > ymin && my < ymax) {
      84      582910 :         int d= dmin;
      85      582910 :         const int index= (my<<ME_MAP_SHIFT) + mx;
      86     1165820 :         const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
      87      582910 :                      + (mv_penalty[bx   - pred_x] + mv_penalty[by-2 - pred_y])*c->penalty_factor;
      88     1165820 :         const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)]
      89      582910 :                      + (mv_penalty[bx-2 - pred_x] + mv_penalty[by   - pred_y])*c->penalty_factor;
      90     1165820 :         const int r= score_map[(index+ 1               )&(ME_MAP_SIZE-1)]
      91      582910 :                      + (mv_penalty[bx+2 - pred_x] + mv_penalty[by   - pred_y])*c->penalty_factor;
      92     1165820 :         const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
      93      582910 :                      + (mv_penalty[bx   - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
      94             : 
      95             : #if defined(ASSERT_LEVEL) && ASSERT_LEVEL > 1
      96             :         unsigned key;
      97             :         unsigned map_generation= c->map_generation;
      98             :         key= ((my-1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
      99             :         av_assert2(c->map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
     100             :         key= ((my+1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
     101             :         av_assert2(c->map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
     102             :         key= ((my)<<ME_MAP_MV_BITS) + (mx+1) + map_generation;
     103             :         av_assert2(c->map[(index+1)&(ME_MAP_SIZE-1)] == key);
     104             :         key= ((my)<<ME_MAP_MV_BITS) + (mx-1) + map_generation;
     105             :         av_assert2(c->map[(index-1)&(ME_MAP_SIZE-1)] == key);
     106             : #endif
     107      582910 :         if(t<=b){
     108      289994 :             CHECK_HALF_MV(0, 1, mx  ,my-1)
     109      289994 :             if(l<=r){
     110      173072 :                 CHECK_HALF_MV(1, 1, mx-1, my-1)
     111      173072 :                 if(t+r<=b+l){
     112       89741 :                     CHECK_HALF_MV(1, 1, mx  , my-1)
     113             :                 }else{
     114       83331 :                     CHECK_HALF_MV(1, 1, mx-1, my  )
     115             :                 }
     116      173072 :                 CHECK_HALF_MV(1, 0, mx-1, my  )
     117             :             }else{
     118      116922 :                 CHECK_HALF_MV(1, 1, mx  , my-1)
     119      116922 :                 if(t+l<=b+r){
     120       59849 :                     CHECK_HALF_MV(1, 1, mx-1, my-1)
     121             :                 }else{
     122       57073 :                     CHECK_HALF_MV(1, 1, mx  , my  )
     123             :                 }
     124      116922 :                 CHECK_HALF_MV(1, 0, mx  , my  )
     125             :             }
     126             :         }else{
     127      292916 :             if(l<=r){
     128      116527 :                 if(t+l<=b+r){
     129       53297 :                     CHECK_HALF_MV(1, 1, mx-1, my-1)
     130             :                 }else{
     131       63230 :                     CHECK_HALF_MV(1, 1, mx  , my  )
     132             :                 }
     133      116527 :                 CHECK_HALF_MV(1, 0, mx-1, my)
     134      116527 :                 CHECK_HALF_MV(1, 1, mx-1, my)
     135             :             }else{
     136      176389 :                 if(t+r<=b+l){
     137       87441 :                     CHECK_HALF_MV(1, 1, mx  , my-1)
     138             :                 }else{
     139       88948 :                     CHECK_HALF_MV(1, 1, mx-1, my)
     140             :                 }
     141      176389 :                 CHECK_HALF_MV(1, 0, mx  , my)
     142      176389 :                 CHECK_HALF_MV(1, 1, mx  , my)
     143             :             }
     144      292916 :             CHECK_HALF_MV(0, 1, mx  , my)
     145             :         }
     146             :         av_assert2(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
     147             :     }
     148             : 
     149      597183 :     *mx_ptr = bx;
     150      597183 :     *my_ptr = by;
     151             : 
     152      597183 :     return dmin;
     153             : }
     154             : 
     155      106777 : static int no_sub_motion_search(MpegEncContext * s,
     156             :           int *mx_ptr, int *my_ptr, int dmin,
     157             :                                   int src_index, int ref_index,
     158             :                                   int size, int h)
     159             : {
     160      106777 :     (*mx_ptr)<<=1;
     161      106777 :     (*my_ptr)<<=1;
     162      106777 :     return dmin;
     163             : }
     164             : 
     165      978979 : static inline int get_mb_score(MpegEncContext *s, int mx, int my,
     166             :                                int src_index, int ref_index, int size,
     167             :                                int h, int add_rate)
     168             : {
     169      978979 :     MotionEstContext * const c= &s->me;
     170      978979 :     const int penalty_factor= c->mb_penalty_factor;
     171      978979 :     const int flags= c->mb_flags;
     172      978979 :     const int qpel= flags & FLAG_QPEL;
     173      978979 :     const int mask= 1+2*qpel;
     174             :     me_cmp_func cmp_sub, chroma_cmp_sub;
     175             :     int d;
     176             : 
     177      978979 :     LOAD_COMMON
     178             : 
     179             :  //FIXME factorize
     180             : 
     181      978979 :     cmp_sub        = s->mecc.mb_cmp[size];
     182      978979 :     chroma_cmp_sub = s->mecc.mb_cmp[size + 1];
     183             : 
     184      978979 :     d= cmp(s, mx>>(qpel+1), my>>(qpel+1), mx&mask, my&mask, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
     185             :     //FIXME check cbp before adding penalty for (0,0) vector
     186      978979 :     if(add_rate && (mx || my || size>0))
     187      380650 :         d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
     188             : 
     189      978979 :     return d;
     190             : }
     191             : 
     192      557820 : int ff_get_mb_score(MpegEncContext *s, int mx, int my, int src_index,
     193             :                     int ref_index, int size, int h, int add_rate)
     194             : {
     195      557820 :     return get_mb_score(s, mx, my, src_index, ref_index, size, h, add_rate);
     196             : }
     197             : 
     198             : #define CHECK_QUARTER_MV(dx, dy, x, y)\
     199             : {\
     200             :     const int hx= 4*(x)+(dx);\
     201             :     const int hy= 4*(y)+(dy);\
     202             :     d= cmp_qpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
     203             :     d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
     204             :     COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
     205             : }
     206             : 
     207      740604 : static int qpel_motion_search(MpegEncContext * s,
     208             :                                   int *mx_ptr, int *my_ptr, int dmin,
     209             :                                   int src_index, int ref_index,
     210             :                                   int size, int h)
     211             : {
     212      740604 :     MotionEstContext * const c= &s->me;
     213      740604 :     const int mx = *mx_ptr;
     214      740604 :     const int my = *my_ptr;
     215      740604 :     const int penalty_factor= c->sub_penalty_factor;
     216      740604 :     const unsigned map_generation = c->map_generation;
     217      740604 :     const int subpel_quality= c->avctx->me_subpel_quality;
     218      740604 :     uint32_t *map= c->map;
     219             :     me_cmp_func cmpf, chroma_cmpf;
     220             :     me_cmp_func cmp_sub, chroma_cmp_sub;
     221             : 
     222      740604 :     LOAD_COMMON
     223      740604 :     int flags= c->sub_flags;
     224             : 
     225      740604 :     cmpf        = s->mecc.me_cmp[size];
     226      740604 :     chroma_cmpf = s->mecc.me_cmp[size + 1]; // FIXME: factorize
     227             :  //FIXME factorize
     228             : 
     229      740604 :     cmp_sub        = s->mecc.me_sub_cmp[size];
     230      740604 :     chroma_cmp_sub = s->mecc.me_sub_cmp[size + 1];
     231             : 
     232      740604 :     if(c->skip){ //FIXME somehow move up (benchmark)
     233        1425 :         *mx_ptr = 0;
     234        1425 :         *my_ptr = 0;
     235        1425 :         return dmin;
     236             :     }
     237             : 
     238      739179 :     if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
     239      187104 :         dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
     240      187104 :         if(mx || my || size>0)
     241      164228 :             dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
     242             :     }
     243             : 
     244      739179 :     if (mx > xmin && mx < xmax &&
     245     1469968 :         my > ymin && my < ymax) {
     246      734768 :         int bx=4*mx, by=4*my;
     247      734768 :         int d= dmin;
     248             :         int i, nx, ny;
     249      734768 :         const int index= (my<<ME_MAP_SHIFT) + mx;
     250      734768 :         const int t= score_map[(index-(1<<ME_MAP_SHIFT)  )&(ME_MAP_SIZE-1)];
     251      734768 :         const int l= score_map[(index- 1                 )&(ME_MAP_SIZE-1)];
     252      734768 :         const int r= score_map[(index+ 1                 )&(ME_MAP_SIZE-1)];
     253      734768 :         const int b= score_map[(index+(1<<ME_MAP_SHIFT)  )&(ME_MAP_SIZE-1)];
     254      734768 :         const int c= score_map[(index                    )&(ME_MAP_SIZE-1)];
     255             :         int best[8];
     256             :         int best_pos[8][2];
     257             : 
     258      734768 :         memset(best, 64, sizeof(int)*8);
     259      734768 :         if(s->me.dia_size>=2){
     260      237984 :             const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
     261      237984 :             const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
     262      237984 :             const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
     263      237984 :             const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
     264             : 
     265     1903872 :             for(ny= -3; ny <= 3; ny++){
     266    13327104 :                 for(nx= -3; nx <= 3; nx++){
     267             :                     //FIXME this could overflow (unlikely though)
     268    11661216 :                     const int64_t t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t;
     269    11661216 :                     const int64_t c2= nx*nx*( r +  l - 2*c) + 4*nx*( r- l) + 32*c;
     270    11661216 :                     const int64_t b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b;
     271    11661216 :                     int score= (ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2 + 512)>>10;
     272             :                     int i;
     273             : 
     274    11661216 :                     if((nx&3)==0 && (ny&3)==0) continue;
     275             : 
     276    11423232 :                     score += (mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
     277             : 
     278             : //                    if(nx&1) score-=1024*c->penalty_factor;
     279             : //                    if(ny&1) score-=1024*c->penalty_factor;
     280             : 
     281    68380859 :                     for(i=0; i<8; i++){
     282    62881991 :                         if(score < best[i]){
     283     5924364 :                             memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
     284     5924364 :                             memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
     285     5924364 :                             best[i]= score;
     286     5924364 :                             best_pos[i][0]= nx + 4*mx;
     287     5924364 :                             best_pos[i][1]= ny + 4*my;
     288     5924364 :                             break;
     289             :                         }
     290             :                     }
     291             :                 }
     292             :             }
     293             :         }else{
     294             :             int tl;
     295             :             //FIXME this could overflow (unlikely though)
     296      496784 :             const int cx = 4*(r - l);
     297      496784 :             const int cx2= r + l - 2*c;
     298      496784 :             const int cy = 4*(b - t);
     299      496784 :             const int cy2= b + t - 2*c;
     300             :             int cxy;
     301             : 
     302      496784 :             if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == ((my-1)<<ME_MAP_MV_BITS) + (mx-1) + map_generation){
     303      233720 :                 tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
     304             :             }else{
     305      263064 :                 tl= cmp(s, mx-1, my-1, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);//FIXME wrong if chroma me is different
     306             :             }
     307             : 
     308      496784 :             cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;
     309             : 
     310             :             av_assert2(16*cx2 + 4*cx + 32*c == 32*r);
     311             :             av_assert2(16*cx2 - 4*cx + 32*c == 32*l);
     312             :             av_assert2(16*cy2 + 4*cy + 32*c == 32*b);
     313             :             av_assert2(16*cy2 - 4*cy + 32*c == 32*t);
     314             :             av_assert2(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
     315             : 
     316     3974272 :             for(ny= -3; ny <= 3; ny++){
     317    27819904 :                 for(nx= -3; nx <= 3; nx++){
     318             :                     //FIXME this could overflow (unlikely though)
     319    24342416 :                     int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor
     320             :                     int i;
     321             : 
     322    24342416 :                     if((nx&3)==0 && (ny&3)==0) continue;
     323             : 
     324    23845632 :                     score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
     325             : //                    if(nx&1) score-=32*c->penalty_factor;
     326             :   //                  if(ny&1) score-=32*c->penalty_factor;
     327             : 
     328   141214169 :                     for(i=0; i<8; i++){
     329   130006302 :                         if(score < best[i]){
     330    12637765 :                             memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
     331    12637765 :                             memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
     332    12637765 :                             best[i]= score;
     333    12637765 :                             best_pos[i][0]= nx + 4*mx;
     334    12637765 :                             best_pos[i][1]= ny + 4*my;
     335    12637765 :                             break;
     336             :                         }
     337             :                     }
     338             :                 }
     339             :             }
     340             :         }
     341     6612912 :         for(i=0; i<subpel_quality; i++){
     342     5878144 :             nx= best_pos[i][0];
     343     5878144 :             ny= best_pos[i][1];
     344     5878144 :             CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2)
     345             :         }
     346             : 
     347             :         av_assert2(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4);
     348             : 
     349      734768 :         *mx_ptr = bx;
     350      734768 :         *my_ptr = by;
     351             :     }else{
     352        4411 :         *mx_ptr =4*mx;
     353        4411 :         *my_ptr =4*my;
     354             :     }
     355             : 
     356      739179 :     return dmin;
     357             : }
     358             : 
     359             : 
     360             : #define CHECK_MV(x,y)\
     361             : {\
     362             :     const unsigned key = ((unsigned)(y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
     363             :     const int index= (((unsigned)(y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
     364             :     av_assert2((x) >= xmin);\
     365             :     av_assert2((x) <= xmax);\
     366             :     av_assert2((y) >= ymin);\
     367             :     av_assert2((y) <= ymax);\
     368             :     if(map[index]!=key){\
     369             :         d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
     370             :         map[index]= key;\
     371             :         score_map[index]= d;\
     372             :         d += (mv_penalty[((x)*(1<<shift))-pred_x] + mv_penalty[((y)*(1<<shift))-pred_y])*penalty_factor;\
     373             :         COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
     374             :     }\
     375             : }
     376             : 
     377             : #define CHECK_CLIPPED_MV(ax,ay)\
     378             : {\
     379             :     const int Lx= ax;\
     380             :     const int Ly= ay;\
     381             :     const int Lx2= FFMAX(xmin, FFMIN(Lx, xmax));\
     382             :     const int Ly2= FFMAX(ymin, FFMIN(Ly, ymax));\
     383             :     CHECK_MV(Lx2, Ly2)\
     384             : }
     385             : 
     386             : #define CHECK_MV_DIR(x,y,new_dir)\
     387             : {\
     388             :     const unsigned key = ((unsigned)(y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
     389             :     const int index= (((unsigned)(y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
     390             :     if(map[index]!=key){\
     391             :         d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
     392             :         map[index]= key;\
     393             :         score_map[index]= d;\
     394             :         d += (mv_penalty[(int)((unsigned)(x)<<shift)-pred_x] + mv_penalty[(int)((unsigned)(y)<<shift)-pred_y])*penalty_factor;\
     395             :         if(d<dmin){\
     396             :             best[0]=x;\
     397             :             best[1]=y;\
     398             :             dmin=d;\
     399             :             next_dir= new_dir;\
     400             :         }\
     401             :     }\
     402             : }
     403             : 
     404             : #define check(x,y,S,v)\
     405             : if( (x)<(xmin<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
     406             : if( (x)>(xmax<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
     407             : if( (y)<(ymin<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
     408             : if( (y)>(ymax<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
     409             : 
     410             : #define LOAD_COMMON2\
     411             :     uint32_t *map= c->map;\
     412             :     const int qpel= flags&FLAG_QPEL;\
     413             :     const int shift= 1+qpel;\
     414             : 
     415     5405399 : static av_always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
     416             :                                        int src_index, int ref_index, const int penalty_factor,
     417             :                                        int size, int h, int flags)
     418             : {
     419     5405399 :     MotionEstContext * const c= &s->me;
     420             :     me_cmp_func cmpf, chroma_cmpf;
     421     5405399 :     int next_dir=-1;
     422     5405399 :     LOAD_COMMON
     423     5405399 :     LOAD_COMMON2
     424     5405399 :     unsigned map_generation = c->map_generation;
     425             : 
     426     5405399 :     cmpf        = s->mecc.me_cmp[size];
     427     5405399 :     chroma_cmpf = s->mecc.me_cmp[size + 1];
     428             : 
     429             :     { /* ensure that the best point is in the MAP as h/qpel refinement needs it */
     430     5405399 :         const unsigned key = ((unsigned)best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
     431     5405399 :         const int index= (((unsigned)best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1);
     432     5405399 :         if (map[index] != key) { // this will be executed only very rarely
     433       32790 :             score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
     434       32790 :             map[index]= key;
     435             :         }
     436             :     }
     437             : 
     438     1470446 :     for(;;){
     439             :         int d;
     440     6875845 :         const int dir= next_dir;
     441     6875845 :         const int x= best[0];
     442     6875845 :         const int y= best[1];
     443     6875845 :         next_dir=-1;
     444             : 
     445     6875845 :         if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y  , 0)
     446     6875845 :         if(dir!=3 && y>ymin) CHECK_MV_DIR(x  , y-1, 1)
     447     6875845 :         if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y  , 2)
     448     6875845 :         if(dir!=1 && y<ymax) CHECK_MV_DIR(x  , y+1, 3)
     449             : 
     450     6875845 :         if(next_dir==-1){
     451    10810798 :             return dmin;
     452             :         }
     453             :     }
     454             : }
     455             : 
     456           0 : static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
     457             :                                        int src_index, int ref_index, const int penalty_factor,
     458             :                                        int size, int h, int flags)
     459             : {
     460           0 :     MotionEstContext * const c= &s->me;
     461             :     me_cmp_func cmpf, chroma_cmpf;
     462             :     int dia_size;
     463           0 :     LOAD_COMMON
     464           0 :     LOAD_COMMON2
     465           0 :     unsigned map_generation = c->map_generation;
     466             : 
     467           0 :     cmpf        = s->mecc.me_cmp[size];
     468           0 :     chroma_cmpf = s->mecc.me_cmp[size + 1];
     469             : 
     470           0 :     for(dia_size=1; dia_size<=4; dia_size++){
     471             :         int dir;
     472           0 :         const int x= best[0];
     473           0 :         const int y= best[1];
     474             : 
     475           0 :         if(dia_size&(dia_size-1)) continue;
     476             : 
     477           0 :         if(   x + dia_size > xmax
     478           0 :            || x - dia_size < xmin
     479           0 :            || y + dia_size > ymax
     480           0 :            || y - dia_size < ymin)
     481           0 :            continue;
     482             : 
     483           0 :         for(dir= 0; dir<dia_size; dir+=2){
     484             :             int d;
     485             : 
     486           0 :             CHECK_MV(x + dir           , y + dia_size - dir);
     487           0 :             CHECK_MV(x + dia_size - dir, y - dir           );
     488           0 :             CHECK_MV(x - dir           , y - dia_size + dir);
     489           0 :             CHECK_MV(x - dia_size + dir, y + dir           );
     490             :         }
     491             : 
     492           0 :         if(x!=best[0] || y!=best[1])
     493           0 :             dia_size=0;
     494             :     }
     495           0 :     return dmin;
     496             : }
     497             : 
     498           0 : static int hex_search(MpegEncContext * s, int *best, int dmin,
     499             :                                        int src_index, int ref_index, const int penalty_factor,
     500             :                                        int size, int h, int flags, int dia_size)
     501             : {
     502           0 :     MotionEstContext * const c= &s->me;
     503             :     me_cmp_func cmpf, chroma_cmpf;
     504           0 :     LOAD_COMMON
     505           0 :     LOAD_COMMON2
     506           0 :     unsigned map_generation = c->map_generation;
     507             :     int x,y,d;
     508           0 :     const int dec= dia_size & (dia_size-1);
     509             : 
     510           0 :     cmpf        = s->mecc.me_cmp[size];
     511           0 :     chroma_cmpf = s->mecc.me_cmp[size + 1];
     512             : 
     513           0 :     for(;dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
     514             :         do{
     515           0 :             x= best[0];
     516           0 :             y= best[1];
     517             : 
     518           0 :             CHECK_CLIPPED_MV(x  -dia_size    , y);
     519           0 :             CHECK_CLIPPED_MV(x+  dia_size    , y);
     520           0 :             CHECK_CLIPPED_MV(x+( dia_size>>1), y+dia_size);
     521           0 :             CHECK_CLIPPED_MV(x+( dia_size>>1), y-dia_size);
     522           0 :             if(dia_size>1){
     523           0 :                 CHECK_CLIPPED_MV(x+(-dia_size>>1), y+dia_size);
     524           0 :                 CHECK_CLIPPED_MV(x+(-dia_size>>1), y-dia_size);
     525             :             }
     526           0 :         }while(best[0] != x || best[1] != y);
     527             :     }
     528             : 
     529           0 :     return dmin;
     530             : }
     531             : 
     532           0 : static int l2s_dia_search(MpegEncContext * s, int *best, int dmin,
     533             :                                        int src_index, int ref_index, const int penalty_factor,
     534             :                                        int size, int h, int flags)
     535             : {
     536           0 :     MotionEstContext * const c= &s->me;
     537             :     me_cmp_func cmpf, chroma_cmpf;
     538           0 :     LOAD_COMMON
     539           0 :     LOAD_COMMON2
     540           0 :     unsigned map_generation = c->map_generation;
     541             :     int x,y,i,d;
     542           0 :     int dia_size= c->dia_size&0xFF;
     543           0 :     const int dec= dia_size & (dia_size-1);
     544             :     static const int hex[8][2]={{-2, 0}, {-1,-1}, { 0,-2}, { 1,-1},
     545             :                                 { 2, 0}, { 1, 1}, { 0, 2}, {-1, 1}};
     546             : 
     547           0 :     cmpf        = s->mecc.me_cmp[size];
     548           0 :     chroma_cmpf = s->mecc.me_cmp[size + 1];
     549             : 
     550           0 :     for(; dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
     551             :         do{
     552           0 :             x= best[0];
     553           0 :             y= best[1];
     554           0 :             for(i=0; i<8; i++){
     555           0 :                 CHECK_CLIPPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size);
     556             :             }
     557           0 :         }while(best[0] != x || best[1] != y);
     558             :     }
     559             : 
     560           0 :     x= best[0];
     561           0 :     y= best[1];
     562           0 :     CHECK_CLIPPED_MV(x+1, y);
     563           0 :     CHECK_CLIPPED_MV(x, y+1);
     564           0 :     CHECK_CLIPPED_MV(x-1, y);
     565           0 :     CHECK_CLIPPED_MV(x, y-1);
     566             : 
     567           0 :     return dmin;
     568             : }
     569             : 
     570           0 : static int umh_search(MpegEncContext * s, int *best, int dmin,
     571             :                                        int src_index, int ref_index, const int penalty_factor,
     572             :                                        int size, int h, int flags)
     573             : {
     574           0 :     MotionEstContext * const c= &s->me;
     575             :     me_cmp_func cmpf, chroma_cmpf;
     576           0 :     LOAD_COMMON
     577           0 :     LOAD_COMMON2
     578           0 :     unsigned map_generation = c->map_generation;
     579             :     int x,y,x2,y2, i, j, d;
     580           0 :     const int dia_size= c->dia_size&0xFE;
     581             :     static const int hex[16][2]={{-4,-2}, {-4,-1}, {-4, 0}, {-4, 1}, {-4, 2},
     582             :                                  { 4,-2}, { 4,-1}, { 4, 0}, { 4, 1}, { 4, 2},
     583             :                                  {-2, 3}, { 0, 4}, { 2, 3},
     584             :                                  {-2,-3}, { 0,-4}, { 2,-3},};
     585             : 
     586           0 :     cmpf        = s->mecc.me_cmp[size];
     587           0 :     chroma_cmpf = s->mecc.me_cmp[size + 1];
     588             : 
     589           0 :     x= best[0];
     590           0 :     y= best[1];
     591           0 :     for(x2=FFMAX(x-dia_size+1, xmin); x2<=FFMIN(x+dia_size-1,xmax); x2+=2){
     592           0 :         CHECK_MV(x2, y);
     593             :     }
     594           0 :     for(y2=FFMAX(y-dia_size/2+1, ymin); y2<=FFMIN(y+dia_size/2-1,ymax); y2+=2){
     595           0 :         CHECK_MV(x, y2);
     596             :     }
     597             : 
     598           0 :     x= best[0];
     599           0 :     y= best[1];
     600           0 :     for(y2=FFMAX(y-2, ymin); y2<=FFMIN(y+2,ymax); y2++){
     601           0 :         for(x2=FFMAX(x-2, xmin); x2<=FFMIN(x+2,xmax); x2++){
     602           0 :             CHECK_MV(x2, y2);
     603             :         }
     604             :     }
     605             : 
     606             : //FIXME prevent the CLIP stuff
     607             : 
     608           0 :     for(j=1; j<=dia_size/4; j++){
     609           0 :         for(i=0; i<16; i++){
     610           0 :             CHECK_CLIPPED_MV(x+hex[i][0]*j, y+hex[i][1]*j);
     611             :         }
     612             :     }
     613             : 
     614           0 :     return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 2);
     615             : }
     616             : 
     617           0 : static int full_search(MpegEncContext * s, int *best, int dmin,
     618             :                                        int src_index, int ref_index, const int penalty_factor,
     619             :                                        int size, int h, int flags)
     620             : {
     621           0 :     MotionEstContext * const c= &s->me;
     622             :     me_cmp_func cmpf, chroma_cmpf;
     623           0 :     LOAD_COMMON
     624           0 :     LOAD_COMMON2
     625           0 :     unsigned map_generation = c->map_generation;
     626             :     int x,y, d;
     627           0 :     const int dia_size= c->dia_size&0xFF;
     628             : 
     629           0 :     cmpf        = s->mecc.me_cmp[size];
     630           0 :     chroma_cmpf = s->mecc.me_cmp[size + 1];
     631             : 
     632           0 :     for(y=FFMAX(-dia_size, ymin); y<=FFMIN(dia_size,ymax); y++){
     633           0 :         for(x=FFMAX(-dia_size, xmin); x<=FFMIN(dia_size,xmax); x++){
     634           0 :             CHECK_MV(x, y);
     635             :         }
     636             :     }
     637             : 
     638           0 :     x= best[0];
     639           0 :     y= best[1];
     640           0 :     d= dmin;
     641           0 :     CHECK_CLIPPED_MV(x  , y);
     642           0 :     CHECK_CLIPPED_MV(x+1, y);
     643           0 :     CHECK_CLIPPED_MV(x, y+1);
     644           0 :     CHECK_CLIPPED_MV(x-1, y);
     645           0 :     CHECK_CLIPPED_MV(x, y-1);
     646           0 :     best[0]= x;
     647           0 :     best[1]= y;
     648             : 
     649           0 :     return d;
     650             : }
     651             : 
     652             : #define SAB_CHECK_MV(ax,ay)\
     653             : {\
     654             :     const unsigned key = ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\
     655             :     const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\
     656             :     if(map[index]!=key){\
     657             :         d= cmp(s, ax, ay, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
     658             :         map[index]= key;\
     659             :         score_map[index]= d;\
     660             :         d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\
     661             :         if(d < minima[minima_count-1].height){\
     662             :             int j=0;\
     663             :             \
     664             :             while(d >= minima[j].height) j++;\
     665             : \
     666             :             memmove(&minima [j+1], &minima [j], (minima_count - j - 1)*sizeof(Minima));\
     667             : \
     668             :             minima[j].checked= 0;\
     669             :             minima[j].height= d;\
     670             :             minima[j].x= ax;\
     671             :             minima[j].y= ay;\
     672             :             \
     673             :             i=-1;\
     674             :             continue;\
     675             :         }\
     676             :     }\
     677             : }
     678             : 
     679             : #define MAX_SAB_SIZE ME_MAP_SIZE
     680           0 : static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
     681             :                                        int src_index, int ref_index, const int penalty_factor,
     682             :                                        int size, int h, int flags)
     683             : {
     684           0 :     MotionEstContext * const c= &s->me;
     685             :     me_cmp_func cmpf, chroma_cmpf;
     686             :     Minima minima[MAX_SAB_SIZE];
     687           0 :     const int minima_count= FFABS(c->dia_size);
     688             :     int i, j;
     689           0 :     LOAD_COMMON
     690           0 :     LOAD_COMMON2
     691           0 :     unsigned map_generation = c->map_generation;
     692             : 
     693             :     av_assert1(minima_count <= MAX_SAB_SIZE);
     694             : 
     695           0 :     cmpf        = s->mecc.me_cmp[size];
     696           0 :     chroma_cmpf = s->mecc.me_cmp[size + 1];
     697             : 
     698             :     /*Note j<MAX_SAB_SIZE is needed if MAX_SAB_SIZE < ME_MAP_SIZE as j can
     699             :       become larger due to MVs overflowing their ME_MAP_MV_BITS bits space in map
     700             :      */
     701           0 :     for(j=i=0; i<ME_MAP_SIZE && j<MAX_SAB_SIZE; i++){
     702           0 :         uint32_t key= map[i];
     703             : 
     704           0 :         key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
     705             : 
     706           0 :         if ((key & (-(1 << (2 * ME_MAP_MV_BITS)))) != map_generation)
     707           0 :             continue;
     708             : 
     709           0 :         minima[j].height= score_map[i];
     710           0 :         minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS;
     711           0 :         minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1);
     712           0 :         minima[j].x-= (1<<(ME_MAP_MV_BITS-1));
     713           0 :         minima[j].y-= (1<<(ME_MAP_MV_BITS-1));
     714             : 
     715             :         // all entries in map should be in range except if the mv overflows their ME_MAP_MV_BITS bits space
     716           0 :         if(   minima[j].x > xmax || minima[j].x < xmin
     717           0 :            || minima[j].y > ymax || minima[j].y < ymin)
     718           0 :             continue;
     719             : 
     720           0 :         minima[j].checked=0;
     721           0 :         if(minima[j].x || minima[j].y)
     722           0 :             minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor;
     723             : 
     724           0 :         j++;
     725             :     }
     726             : 
     727           0 :     AV_QSORT(minima, j, Minima, minima_cmp);
     728             : 
     729           0 :     for(; j<minima_count; j++){
     730           0 :         minima[j].height=256*256*256*64;
     731           0 :         minima[j].checked=0;
     732           0 :         minima[j].x= minima[j].y=0;
     733             :     }
     734             : 
     735           0 :     for(i=0; i<minima_count; i++){
     736           0 :         const int x= minima[i].x;
     737           0 :         const int y= minima[i].y;
     738             :         int d;
     739             : 
     740           0 :         if(minima[i].checked) continue;
     741             : 
     742           0 :         if(   x >= xmax || x <= xmin
     743           0 :            || y >= ymax || y <= ymin)
     744           0 :            continue;
     745             : 
     746           0 :         SAB_CHECK_MV(x-1, y)
     747           0 :         SAB_CHECK_MV(x+1, y)
     748           0 :         SAB_CHECK_MV(x  , y-1)
     749           0 :         SAB_CHECK_MV(x  , y+1)
     750             : 
     751           0 :         minima[i].checked= 1;
     752             :     }
     753             : 
     754           0 :     best[0]= minima[0].x;
     755           0 :     best[1]= minima[0].y;
     756           0 :     dmin= minima[0].height;
     757             : 
     758           0 :     if(   best[0] < xmax && best[0] > xmin
     759           0 :        && best[1] < ymax && best[1] > ymin){
     760             :         int d;
     761             :         // ensure that the reference samples for hpel refinement are in the map
     762           0 :         CHECK_MV(best[0]-1, best[1])
     763           0 :         CHECK_MV(best[0]+1, best[1])
     764           0 :         CHECK_MV(best[0], best[1]-1)
     765           0 :         CHECK_MV(best[0], best[1]+1)
     766             :     }
     767           0 :     return dmin;
     768             : }
     769             : 
     770      242328 : static int var_diamond_search(MpegEncContext * s, int *best, int dmin,
     771             :                                        int src_index, int ref_index, const int penalty_factor,
     772             :                                        int size, int h, int flags)
     773             : {
     774      242328 :     MotionEstContext * const c= &s->me;
     775             :     me_cmp_func cmpf, chroma_cmpf;
     776             :     int dia_size;
     777      242328 :     LOAD_COMMON
     778      242328 :     LOAD_COMMON2
     779      242328 :     unsigned map_generation = c->map_generation;
     780             : 
     781      242328 :     cmpf        = s->mecc.me_cmp[size];
     782      242328 :     chroma_cmpf = s->mecc.me_cmp[size + 1];
     783             : 
     784      993289 :     for(dia_size=1; dia_size<=c->dia_size; dia_size++){
     785             :         int dir, start, end;
     786      750961 :         const int x= best[0];
     787      750961 :         const int y= best[1];
     788             : 
     789      750961 :         start= FFMAX(0, y + dia_size - ymax);
     790      750961 :         end  = FFMIN(dia_size, xmax - x + 1);
     791     1783873 :         for(dir= start; dir<end; dir++){
     792             :             int d;
     793             : 
     794             : //check(x + dir,y + dia_size - dir,0, a0)
     795     1032912 :             CHECK_MV(x + dir           , y + dia_size - dir);
     796             :         }
     797             : 
     798      750961 :         start= FFMAX(0, x + dia_size - xmax);
     799      750961 :         end  = FFMIN(dia_size, y - ymin + 1);
     800     1783879 :         for(dir= start; dir<end; dir++){
     801             :             int d;
     802             : 
     803             : //check(x + dia_size - dir, y - dir,0, a1)
     804     1032918 :             CHECK_MV(x + dia_size - dir, y - dir           );
     805             :         }
     806             : 
     807      750961 :         start= FFMAX(0, -y + dia_size + ymin );
     808      750961 :         end  = FFMIN(dia_size, x - xmin + 1);
     809     1783793 :         for(dir= start; dir<end; dir++){
     810             :             int d;
     811             : 
     812             : //check(x - dir,y - dia_size + dir,0, a2)
     813     1032832 :             CHECK_MV(x - dir           , y - dia_size + dir);
     814             :         }
     815             : 
     816      750961 :         start= FFMAX(0, -x + dia_size + xmin );
     817      750961 :         end  = FFMIN(dia_size, ymax - y + 1);
     818     1783894 :         for(dir= start; dir<end; dir++){
     819             :             int d;
     820             : 
     821             : //check(x - dia_size + dir, y + dir,0, a3)
     822     1032933 :             CHECK_MV(x - dia_size + dir, y + dir           );
     823             :         }
     824             : 
     825      750961 :         if(x!=best[0] || y!=best[1])
     826      226656 :             dia_size=0;
     827             :     }
     828      242328 :     return dmin;
     829             : }
     830             : 
     831     5647727 : static av_always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
     832             :                                        int src_index, int ref_index, const int penalty_factor,
     833             :                                        int size, int h, int flags){
     834     5647727 :     MotionEstContext * const c= &s->me;
     835     5647727 :     if(c->dia_size==-1)
     836           0 :         return funny_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
     837     5647727 :     else if(c->dia_size<-1)
     838           0 :         return   sab_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
     839     5647727 :     else if(c->dia_size<2)
     840     5405399 :         return small_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
     841      242328 :     else if(c->dia_size>1024)
     842           0 :         return          full_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
     843      242328 :     else if(c->dia_size>768)
     844           0 :         return           umh_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
     845      242328 :     else if(c->dia_size>512)
     846           0 :         return           hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, c->dia_size&0xFF);
     847      242328 :     else if(c->dia_size>256)
     848           0 :         return       l2s_dia_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
     849             :     else
     850      242328 :         return   var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
     851             : }
     852             : 
     853             : /**
     854             :    @param P a list of candidate mvs to check before starting the
     855             :    iterative search. If one of the candidates is close to the optimal mv, then
     856             :    it takes fewer iterations. And it increases the chance that we find the
     857             :    optimal mv.
     858             :  */
     859     3339037 : static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
     860             :                              int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
     861             :                              int ref_mv_scale, int flags, int size, int h)
     862             : {
     863     3339037 :     MotionEstContext * const c= &s->me;
     864     3339037 :     int best[2]={0, 0};      /**< x and y coordinates of the best motion vector.
     865             :                                i.e. the difference between the position of the
     866             :                                block currently being encoded and the position of
     867             :                                the block chosen to predict it from. */
     868             :     int d;                   ///< the score (cmp + penalty) of any given mv
     869             :     int dmin;                /**< the best value of d, i.e. the score
     870             :                                corresponding to the mv stored in best[]. */
     871             :     unsigned map_generation;
     872             :     int penalty_factor;
     873     3339037 :     const int ref_mv_stride= s->mb_stride; //pass as arg  FIXME
     874     3339037 :     const int ref_mv_xy = s->mb_x + s->mb_y * ref_mv_stride; // add to last_mv before passing FIXME
     875             :     me_cmp_func cmpf, chroma_cmpf;
     876             : 
     877     3339037 :     LOAD_COMMON
     878     3339037 :     LOAD_COMMON2
     879             : 
     880     3339037 :     if(c->pre_pass){
     881           0 :         penalty_factor= c->pre_penalty_factor;
     882           0 :         cmpf           = s->mecc.me_pre_cmp[size];
     883           0 :         chroma_cmpf    = s->mecc.me_pre_cmp[size + 1];
     884             :     }else{
     885     3339037 :         penalty_factor= c->penalty_factor;
     886     3339037 :         cmpf           = s->mecc.me_cmp[size];
     887     3339037 :         chroma_cmpf    = s->mecc.me_cmp[size + 1];
     888             :     }
     889             : 
     890     3339037 :     map_generation= update_map_generation(c);
     891             : 
     892             :     av_assert2(cmpf);
     893     3339037 :     dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
     894     3339037 :     map[0]= map_generation;
     895     3339037 :     score_map[0]= dmin;
     896             : 
     897             :     //FIXME precalc first term below?
     898     5861686 :     if ((s->pict_type == AV_PICTURE_TYPE_B && !(c->flags & FLAG_DIRECT)) ||
     899     2522649 :         s->mpv_flags & FF_MPV_FLAG_MV0)
     900      955256 :         dmin += (mv_penalty[pred_x] + mv_penalty[pred_y])*penalty_factor;
     901             : 
     902             :     /* first line */
     903     3339037 :     if (s->first_slice_line) {
     904      176034 :         CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
     905      176034 :         CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
     906             :                         (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
     907             :     }else{
     908     3163003 :         if(dmin<((h*h*s->avctx->mv0_threshold)>>8)
     909       89924 :                     && ( P_LEFT[0]    |P_LEFT[1]
     910       44962 :                         |P_TOP[0]     |P_TOP[1]
     911       44962 :                         |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
     912       23566 :             *mx_ptr= 0;
     913       23566 :             *my_ptr= 0;
     914       23566 :             c->skip=1;
     915       23566 :             return dmin;
     916             :         }
     917     3139437 :         CHECK_MV(    P_MEDIAN[0] >>shift ,    P_MEDIAN[1] >>shift)
     918     3139437 :         CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)  , (P_MEDIAN[1]>>shift)-1)
     919     3139437 :         CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)  , (P_MEDIAN[1]>>shift)+1)
     920     3139437 :         CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)-1, (P_MEDIAN[1]>>shift)  )
     921     3139437 :         CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)+1, (P_MEDIAN[1]>>shift)  )
     922     3139437 :         CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
     923             :                         (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
     924     3139437 :         CHECK_MV(P_LEFT[0]    >>shift, P_LEFT[1]    >>shift)
     925     3139437 :         CHECK_MV(P_TOP[0]     >>shift, P_TOP[1]     >>shift)
     926     3139437 :         CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
     927             :     }
     928     3315471 :     if(dmin>h*h*4){
     929     2423738 :         if(c->pre_pass){
     930           0 :             CHECK_CLIPPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16,
     931             :                             (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16)
     932           0 :             if(!s->first_slice_line)
     933           0 :                 CHECK_CLIPPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
     934             :                                 (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
     935             :         }else{
     936     2423738 :             CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
     937             :                             (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
     938     2423738 :             if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
     939     2009745 :                 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
     940             :                                 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
     941             :         }
     942             :     }
     943             : 
     944     3315471 :     if(c->avctx->last_predictor_count){
     945           0 :         const int count= c->avctx->last_predictor_count;
     946           0 :         const int xstart= FFMAX(0, s->mb_x - count);
     947           0 :         const int ystart= FFMAX(0, s->mb_y - count);
     948           0 :         const int xend= FFMIN(s->mb_width , s->mb_x + count + 1);
     949           0 :         const int yend= FFMIN(s->mb_height, s->mb_y + count + 1);
     950             :         int mb_y;
     951             : 
     952           0 :         for(mb_y=ystart; mb_y<yend; mb_y++){
     953             :             int mb_x;
     954           0 :             for(mb_x=xstart; mb_x<xend; mb_x++){
     955           0 :                 const int xy= mb_x + 1 + (mb_y + 1)*ref_mv_stride;
     956           0 :                 int mx= (last_mv[xy][0]*ref_mv_scale + (1<<15))>>16;
     957           0 :                 int my= (last_mv[xy][1]*ref_mv_scale + (1<<15))>>16;
     958             : 
     959           0 :                 if(mx>xmax || mx<xmin || my>ymax || my<ymin) continue;
     960           0 :                 CHECK_MV(mx,my)
     961             :             }
     962             :         }
     963             :     }
     964             : 
     965             : //check(best[0],best[1],0, b0)
     966     3315471 :     dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
     967             : 
     968             : //check(best[0],best[1],0, b1)
     969     3315471 :     *mx_ptr= best[0];
     970     3315471 :     *my_ptr= best[1];
     971             : 
     972     3315471 :     return dmin;
     973             : }
     974             : 
     975             : //this function is dedicated to the brain damaged gcc
     976     3339037 : int ff_epzs_motion_search(MpegEncContext *s, int *mx_ptr, int *my_ptr,
     977             :                           int P[10][2], int src_index, int ref_index,
     978             :                           int16_t (*last_mv)[2], int ref_mv_scale,
     979             :                           int size, int h)
     980             : {
     981     3339037 :     MotionEstContext * const c= &s->me;
     982             : //FIXME convert other functions in the same way if faster
     983     3339037 :     if(c->flags==0 && h==16 && size==0){
     984     2501944 :         return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0, 0, 16);
     985             : //    case FLAG_QPEL:
     986             : //        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL);
     987             :     }else{
     988      837093 :         return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags, size, h);
     989             :     }
     990             : }
     991             : 
     992     1007324 : static int epzs_motion_search4(MpegEncContext * s,
     993             :                              int *mx_ptr, int *my_ptr, int P[10][2],
     994             :                              int src_index, int ref_index, int16_t (*last_mv)[2],
     995             :                              int ref_mv_scale)
     996             : {
     997     1007324 :     MotionEstContext * const c= &s->me;
     998     1007324 :     int best[2]={0, 0};
     999             :     int d, dmin;
    1000             :     unsigned map_generation;
    1001     1007324 :     const int penalty_factor= c->penalty_factor;
    1002     1007324 :     const int size=1;
    1003     1007324 :     const int h=8;
    1004     1007324 :     const int ref_mv_stride= s->mb_stride;
    1005     1007324 :     const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
    1006             :     me_cmp_func cmpf, chroma_cmpf;
    1007     1007324 :     LOAD_COMMON
    1008     1007324 :     int flags= c->flags;
    1009     1007324 :     LOAD_COMMON2
    1010             : 
    1011     1007324 :     cmpf        = s->mecc.me_cmp[size];
    1012     1007324 :     chroma_cmpf = s->mecc.me_cmp[size + 1];
    1013             : 
    1014     1007324 :     map_generation= update_map_generation(c);
    1015             : 
    1016     1007324 :     dmin = 1000000;
    1017             : 
    1018             :     /* first line */
    1019     1007324 :     if (s->first_slice_line) {
    1020       62356 :         CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
    1021       62356 :         CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
    1022             :                         (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
    1023       62356 :         CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
    1024             :     }else{
    1025      944968 :         CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
    1026             :         //FIXME try some early stop
    1027      944968 :         CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
    1028      944968 :         CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
    1029      944968 :         CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
    1030      944968 :         CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
    1031      944968 :         CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
    1032             :                         (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
    1033             :     }
    1034     1007324 :     if(dmin>64*4){
    1035      778918 :         CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
    1036             :                         (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
    1037      778918 :         if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
    1038      729749 :             CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
    1039             :                             (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
    1040             :     }
    1041             : 
    1042     1007324 :     dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
    1043             : 
    1044     1007324 :     *mx_ptr= best[0];
    1045     1007324 :     *my_ptr= best[1];
    1046             : 
    1047     1007324 :     return dmin;
    1048             : }
    1049             : 
    1050             : //try to merge with above FIXME (needs PSNR test)
    1051     1324932 : static int epzs_motion_search2(MpegEncContext * s,
    1052             :                              int *mx_ptr, int *my_ptr, int P[10][2],
    1053             :                              int src_index, int ref_index, int16_t (*last_mv)[2],
    1054             :                              int ref_mv_scale)
    1055             : {
    1056     1324932 :     MotionEstContext * const c= &s->me;
    1057     1324932 :     int best[2]={0, 0};
    1058             :     int d, dmin;
    1059             :     unsigned map_generation;
    1060     1324932 :     const int penalty_factor= c->penalty_factor;
    1061     1324932 :     const int size=0; //FIXME pass as arg
    1062     1324932 :     const int h=8;
    1063     1324932 :     const int ref_mv_stride= s->mb_stride;
    1064     1324932 :     const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
    1065             :     me_cmp_func cmpf, chroma_cmpf;
    1066     1324932 :     LOAD_COMMON
    1067     1324932 :     int flags= c->flags;
    1068     1324932 :     LOAD_COMMON2
    1069             : 
    1070     1324932 :     cmpf        = s->mecc.me_cmp[size];
    1071     1324932 :     chroma_cmpf = s->mecc.me_cmp[size + 1];
    1072             : 
    1073     1324932 :     map_generation= update_map_generation(c);
    1074             : 
    1075     1324932 :     dmin = 1000000;
    1076             : 
    1077             :     /* first line */
    1078     1324932 :     if (s->first_slice_line) {
    1079      118740 :         CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
    1080      118740 :         CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
    1081             :                         (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
    1082      118740 :         CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
    1083             :     }else{
    1084     1206192 :         CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
    1085             :         //FIXME try some early stop
    1086     1206192 :         CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
    1087     1206192 :         CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
    1088     1206192 :         CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
    1089     1206192 :         CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
    1090     1206192 :         CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
    1091             :                         (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
    1092             :     }
    1093     1324932 :     if(dmin>64*4){
    1094     1257331 :         CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
    1095             :                         (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
    1096     1257331 :         if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
    1097     1143217 :             CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
    1098             :                             (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
    1099             :     }
    1100             : 
    1101     1324932 :     dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
    1102             : 
    1103     1324932 :     *mx_ptr= best[0];
    1104     1324932 :     *my_ptr= best[1];
    1105             : 
    1106     1324932 :     return dmin;
    1107             : }

Generated by: LCOV version 1.13