LCOV - code coverage report
Current view: top level - libavcodec - me_cmp.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 310 580 53.4 %
Date: 2017-12-15 11:05:35 Functions: 19 45 42.2 %

          Line data    Source code
       1             : /*
       2             :  * DSP utils
       3             :  * Copyright (c) 2000, 2001 Fabrice Bellard
       4             :  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
       5             :  *
       6             :  * This file is part of FFmpeg.
       7             :  *
       8             :  * FFmpeg is free software; you can redistribute it and/or
       9             :  * modify it under the terms of the GNU Lesser General Public
      10             :  * License as published by the Free Software Foundation; either
      11             :  * version 2.1 of the License, or (at your option) any later version.
      12             :  *
      13             :  * FFmpeg is distributed in the hope that it will be useful,
      14             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      15             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      16             :  * Lesser General Public License for more details.
      17             :  *
      18             :  * You should have received a copy of the GNU Lesser General Public
      19             :  * License along with FFmpeg; if not, write to the Free Software
      20             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      21             :  */
      22             : 
      23             : #include "libavutil/attributes.h"
      24             : #include "libavutil/internal.h"
      25             : #include "avcodec.h"
      26             : #include "copy_block.h"
      27             : #include "simple_idct.h"
      28             : #include "me_cmp.h"
      29             : #include "mpegvideo.h"
      30             : #include "config.h"
      31             : 
      32             : uint32_t ff_square_tab[512] = { 0, };
      33             : 
      34           0 : static int sse4_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
      35             :                   ptrdiff_t stride, int h)
      36             : {
      37           0 :     int s = 0, i;
      38           0 :     uint32_t *sq = ff_square_tab + 256;
      39             : 
      40           0 :     for (i = 0; i < h; i++) {
      41           0 :         s    += sq[pix1[0] - pix2[0]];
      42           0 :         s    += sq[pix1[1] - pix2[1]];
      43           0 :         s    += sq[pix1[2] - pix2[2]];
      44           0 :         s    += sq[pix1[3] - pix2[3]];
      45           0 :         pix1 += stride;
      46           0 :         pix2 += stride;
      47             :     }
      48           0 :     return s;
      49             : }
      50             : 
      51     5002254 : static int sse8_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
      52             :                   ptrdiff_t stride, int h)
      53             : {
      54     5002254 :     int s = 0, i;
      55     5002254 :     uint32_t *sq = ff_square_tab + 256;
      56             : 
      57    45020286 :     for (i = 0; i < h; i++) {
      58    40018032 :         s    += sq[pix1[0] - pix2[0]];
      59    40018032 :         s    += sq[pix1[1] - pix2[1]];
      60    40018032 :         s    += sq[pix1[2] - pix2[2]];
      61    40018032 :         s    += sq[pix1[3] - pix2[3]];
      62    40018032 :         s    += sq[pix1[4] - pix2[4]];
      63    40018032 :         s    += sq[pix1[5] - pix2[5]];
      64    40018032 :         s    += sq[pix1[6] - pix2[6]];
      65    40018032 :         s    += sq[pix1[7] - pix2[7]];
      66    40018032 :         pix1 += stride;
      67    40018032 :         pix2 += stride;
      68             :     }
      69     5002254 :     return s;
      70             : }
      71             : 
      72     7107952 : static int sse16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
      73             :                    ptrdiff_t stride, int h)
      74             : {
      75     7107952 :     int s = 0, i;
      76     7107952 :     uint32_t *sq = ff_square_tab + 256;
      77             : 
      78   120835184 :     for (i = 0; i < h; i++) {
      79   113727232 :         s += sq[pix1[0]  - pix2[0]];
      80   113727232 :         s += sq[pix1[1]  - pix2[1]];
      81   113727232 :         s += sq[pix1[2]  - pix2[2]];
      82   113727232 :         s += sq[pix1[3]  - pix2[3]];
      83   113727232 :         s += sq[pix1[4]  - pix2[4]];
      84   113727232 :         s += sq[pix1[5]  - pix2[5]];
      85   113727232 :         s += sq[pix1[6]  - pix2[6]];
      86   113727232 :         s += sq[pix1[7]  - pix2[7]];
      87   113727232 :         s += sq[pix1[8]  - pix2[8]];
      88   113727232 :         s += sq[pix1[9]  - pix2[9]];
      89   113727232 :         s += sq[pix1[10] - pix2[10]];
      90   113727232 :         s += sq[pix1[11] - pix2[11]];
      91   113727232 :         s += sq[pix1[12] - pix2[12]];
      92   113727232 :         s += sq[pix1[13] - pix2[13]];
      93   113727232 :         s += sq[pix1[14] - pix2[14]];
      94   113727232 :         s += sq[pix1[15] - pix2[15]];
      95             : 
      96   113727232 :         pix1 += stride;
      97   113727232 :         pix2 += stride;
      98             :     }
      99     7107952 :     return s;
     100             : }
     101             : 
     102           0 : static int sum_abs_dctelem_c(int16_t *block)
     103             : {
     104           0 :     int sum = 0, i;
     105             : 
     106           0 :     for (i = 0; i < 64; i++)
     107           0 :         sum += FFABS(block[i]);
     108           0 :     return sum;
     109             : }
     110             : 
     111             : #define avg2(a, b) (((a) + (b) + 1) >> 1)
     112             : #define avg4(a, b, c, d) (((a) + (b) + (c) + (d) + 2) >> 2)
     113             : 
     114    40794260 : static inline int pix_abs16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
     115             :                               ptrdiff_t stride, int h)
     116             : {
     117    40794260 :     int s = 0, i;
     118             : 
     119   612956252 :     for (i = 0; i < h; i++) {
     120   572161992 :         s    += abs(pix1[0]  - pix2[0]);
     121   572161992 :         s    += abs(pix1[1]  - pix2[1]);
     122   572161992 :         s    += abs(pix1[2]  - pix2[2]);
     123   572161992 :         s    += abs(pix1[3]  - pix2[3]);
     124   572161992 :         s    += abs(pix1[4]  - pix2[4]);
     125   572161992 :         s    += abs(pix1[5]  - pix2[5]);
     126   572161992 :         s    += abs(pix1[6]  - pix2[6]);
     127   572161992 :         s    += abs(pix1[7]  - pix2[7]);
     128   572161992 :         s    += abs(pix1[8]  - pix2[8]);
     129   572161992 :         s    += abs(pix1[9]  - pix2[9]);
     130   572161992 :         s    += abs(pix1[10] - pix2[10]);
     131   572161992 :         s    += abs(pix1[11] - pix2[11]);
     132   572161992 :         s    += abs(pix1[12] - pix2[12]);
     133   572161992 :         s    += abs(pix1[13] - pix2[13]);
     134   572161992 :         s    += abs(pix1[14] - pix2[14]);
     135   572161992 :         s    += abs(pix1[15] - pix2[15]);
     136   572161992 :         pix1 += stride;
     137   572161992 :         pix2 += stride;
     138             :     }
     139    40794260 :     return s;
     140             : }
     141             : 
     142           0 : static inline int pix_median_abs16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
     143             :                              ptrdiff_t stride, int h)
     144             : {
     145           0 :     int s = 0, i, j;
     146             : 
     147             : #define V(x) (pix1[x] - pix2[x])
     148             : 
     149           0 :     s    += abs(V(0));
     150           0 :     s    += abs(V(1) - V(0));
     151           0 :     s    += abs(V(2) - V(1));
     152           0 :     s    += abs(V(3) - V(2));
     153           0 :     s    += abs(V(4) - V(3));
     154           0 :     s    += abs(V(5) - V(4));
     155           0 :     s    += abs(V(6) - V(5));
     156           0 :     s    += abs(V(7) - V(6));
     157           0 :     s    += abs(V(8) - V(7));
     158           0 :     s    += abs(V(9) - V(8));
     159           0 :     s    += abs(V(10) - V(9));
     160           0 :     s    += abs(V(11) - V(10));
     161           0 :     s    += abs(V(12) - V(11));
     162           0 :     s    += abs(V(13) - V(12));
     163           0 :     s    += abs(V(14) - V(13));
     164           0 :     s    += abs(V(15) - V(14));
     165             : 
     166           0 :     pix1 += stride;
     167           0 :     pix2 += stride;
     168             : 
     169           0 :     for (i = 1; i < h; i++) {
     170           0 :         s    += abs(V(0) - V(-stride));
     171           0 :         for (j = 1; j < 16; j++)
     172           0 :             s    += abs(V(j) - mid_pred(V(j-stride), V(j-1), V(j-stride) + V(j-1) - V(j-stride-1)));
     173           0 :         pix1 += stride;
     174           0 :         pix2 += stride;
     175             : 
     176             :     }
     177             : #undef V
     178           0 :     return s;
     179             : }
     180             : 
     181     3054481 : static int pix_abs16_x2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
     182             :                           ptrdiff_t stride, int h)
     183             : {
     184     3054481 :     int s = 0, i;
     185             : 
     186    42614633 :     for (i = 0; i < h; i++) {
     187    39560152 :         s    += abs(pix1[0]  - avg2(pix2[0],  pix2[1]));
     188    39560152 :         s    += abs(pix1[1]  - avg2(pix2[1],  pix2[2]));
     189    39560152 :         s    += abs(pix1[2]  - avg2(pix2[2],  pix2[3]));
     190    39560152 :         s    += abs(pix1[3]  - avg2(pix2[3],  pix2[4]));
     191    39560152 :         s    += abs(pix1[4]  - avg2(pix2[4],  pix2[5]));
     192    39560152 :         s    += abs(pix1[5]  - avg2(pix2[5],  pix2[6]));
     193    39560152 :         s    += abs(pix1[6]  - avg2(pix2[6],  pix2[7]));
     194    39560152 :         s    += abs(pix1[7]  - avg2(pix2[7],  pix2[8]));
     195    39560152 :         s    += abs(pix1[8]  - avg2(pix2[8],  pix2[9]));
     196    39560152 :         s    += abs(pix1[9]  - avg2(pix2[9],  pix2[10]));
     197    39560152 :         s    += abs(pix1[10] - avg2(pix2[10], pix2[11]));
     198    39560152 :         s    += abs(pix1[11] - avg2(pix2[11], pix2[12]));
     199    39560152 :         s    += abs(pix1[12] - avg2(pix2[12], pix2[13]));
     200    39560152 :         s    += abs(pix1[13] - avg2(pix2[13], pix2[14]));
     201    39560152 :         s    += abs(pix1[14] - avg2(pix2[14], pix2[15]));
     202    39560152 :         s    += abs(pix1[15] - avg2(pix2[15], pix2[16]));
     203    39560152 :         pix1 += stride;
     204    39560152 :         pix2 += stride;
     205             :     }
     206     3054481 :     return s;
     207             : }
     208             : 
     209     3054481 : static int pix_abs16_y2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
     210             :                           ptrdiff_t stride, int h)
     211             : {
     212     3054481 :     int s = 0, i;
     213     3054481 :     uint8_t *pix3 = pix2 + stride;
     214             : 
     215    42614633 :     for (i = 0; i < h; i++) {
     216    39560152 :         s    += abs(pix1[0]  - avg2(pix2[0],  pix3[0]));
     217    39560152 :         s    += abs(pix1[1]  - avg2(pix2[1],  pix3[1]));
     218    39560152 :         s    += abs(pix1[2]  - avg2(pix2[2],  pix3[2]));
     219    39560152 :         s    += abs(pix1[3]  - avg2(pix2[3],  pix3[3]));
     220    39560152 :         s    += abs(pix1[4]  - avg2(pix2[4],  pix3[4]));
     221    39560152 :         s    += abs(pix1[5]  - avg2(pix2[5],  pix3[5]));
     222    39560152 :         s    += abs(pix1[6]  - avg2(pix2[6],  pix3[6]));
     223    39560152 :         s    += abs(pix1[7]  - avg2(pix2[7],  pix3[7]));
     224    39560152 :         s    += abs(pix1[8]  - avg2(pix2[8],  pix3[8]));
     225    39560152 :         s    += abs(pix1[9]  - avg2(pix2[9],  pix3[9]));
     226    39560152 :         s    += abs(pix1[10] - avg2(pix2[10], pix3[10]));
     227    39560152 :         s    += abs(pix1[11] - avg2(pix2[11], pix3[11]));
     228    39560152 :         s    += abs(pix1[12] - avg2(pix2[12], pix3[12]));
     229    39560152 :         s    += abs(pix1[13] - avg2(pix2[13], pix3[13]));
     230    39560152 :         s    += abs(pix1[14] - avg2(pix2[14], pix3[14]));
     231    39560152 :         s    += abs(pix1[15] - avg2(pix2[15], pix3[15]));
     232    39560152 :         pix1 += stride;
     233    39560152 :         pix2 += stride;
     234    39560152 :         pix3 += stride;
     235             :     }
     236     3054481 :     return s;
     237             : }
     238             : 
     239     6108962 : static int pix_abs16_xy2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
     240             :                            ptrdiff_t stride, int h)
     241             : {
     242     6108962 :     int s = 0, i;
     243     6108962 :     uint8_t *pix3 = pix2 + stride;
     244             : 
     245    85229266 :     for (i = 0; i < h; i++) {
     246    79120304 :         s    += abs(pix1[0]  - avg4(pix2[0],  pix2[1],  pix3[0],  pix3[1]));
     247    79120304 :         s    += abs(pix1[1]  - avg4(pix2[1],  pix2[2],  pix3[1],  pix3[2]));
     248    79120304 :         s    += abs(pix1[2]  - avg4(pix2[2],  pix2[3],  pix3[2],  pix3[3]));
     249    79120304 :         s    += abs(pix1[3]  - avg4(pix2[3],  pix2[4],  pix3[3],  pix3[4]));
     250    79120304 :         s    += abs(pix1[4]  - avg4(pix2[4],  pix2[5],  pix3[4],  pix3[5]));
     251    79120304 :         s    += abs(pix1[5]  - avg4(pix2[5],  pix2[6],  pix3[5],  pix3[6]));
     252    79120304 :         s    += abs(pix1[6]  - avg4(pix2[6],  pix2[7],  pix3[6],  pix3[7]));
     253    79120304 :         s    += abs(pix1[7]  - avg4(pix2[7],  pix2[8],  pix3[7],  pix3[8]));
     254    79120304 :         s    += abs(pix1[8]  - avg4(pix2[8],  pix2[9],  pix3[8],  pix3[9]));
     255    79120304 :         s    += abs(pix1[9]  - avg4(pix2[9],  pix2[10], pix3[9],  pix3[10]));
     256    79120304 :         s    += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
     257    79120304 :         s    += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
     258    79120304 :         s    += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
     259    79120304 :         s    += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
     260    79120304 :         s    += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
     261    79120304 :         s    += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
     262    79120304 :         pix1 += stride;
     263    79120304 :         pix2 += stride;
     264    79120304 :         pix3 += stride;
     265             :     }
     266     6108962 :     return s;
     267             : }
     268             : 
     269    31797226 : static inline int pix_abs8_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
     270             :                              ptrdiff_t stride, int h)
     271             : {
     272    31797226 :     int s = 0, i;
     273             : 
     274   286175034 :     for (i = 0; i < h; i++) {
     275   254377808 :         s    += abs(pix1[0] - pix2[0]);
     276   254377808 :         s    += abs(pix1[1] - pix2[1]);
     277   254377808 :         s    += abs(pix1[2] - pix2[2]);
     278   254377808 :         s    += abs(pix1[3] - pix2[3]);
     279   254377808 :         s    += abs(pix1[4] - pix2[4]);
     280   254377808 :         s    += abs(pix1[5] - pix2[5]);
     281   254377808 :         s    += abs(pix1[6] - pix2[6]);
     282   254377808 :         s    += abs(pix1[7] - pix2[7]);
     283   254377808 :         pix1 += stride;
     284   254377808 :         pix2 += stride;
     285             :     }
     286    31797226 :     return s;
     287             : }
     288             : 
     289           0 : static inline int pix_median_abs8_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
     290             :                              ptrdiff_t stride, int h)
     291             : {
     292           0 :     int s = 0, i, j;
     293             : 
     294             : #define V(x) (pix1[x] - pix2[x])
     295             : 
     296           0 :     s    += abs(V(0));
     297           0 :     s    += abs(V(1) - V(0));
     298           0 :     s    += abs(V(2) - V(1));
     299           0 :     s    += abs(V(3) - V(2));
     300           0 :     s    += abs(V(4) - V(3));
     301           0 :     s    += abs(V(5) - V(4));
     302           0 :     s    += abs(V(6) - V(5));
     303           0 :     s    += abs(V(7) - V(6));
     304             : 
     305           0 :     pix1 += stride;
     306           0 :     pix2 += stride;
     307             : 
     308           0 :     for (i = 1; i < h; i++) {
     309           0 :         s    += abs(V(0) - V(-stride));
     310           0 :         for (j = 1; j < 8; j++)
     311           0 :             s    += abs(V(j) - mid_pred(V(j-stride), V(j-1), V(j-stride) + V(j-1) - V(j-stride-1)));
     312           0 :         pix1 += stride;
     313           0 :         pix2 += stride;
     314             : 
     315             :     }
     316             : #undef V
     317           0 :     return s;
     318             : }
     319             : 
     320      837549 : static int pix_abs8_x2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
     321             :                          ptrdiff_t stride, int h)
     322             : {
     323      837549 :     int s = 0, i;
     324             : 
     325     7537941 :     for (i = 0; i < h; i++) {
     326     6700392 :         s    += abs(pix1[0] - avg2(pix2[0], pix2[1]));
     327     6700392 :         s    += abs(pix1[1] - avg2(pix2[1], pix2[2]));
     328     6700392 :         s    += abs(pix1[2] - avg2(pix2[2], pix2[3]));
     329     6700392 :         s    += abs(pix1[3] - avg2(pix2[3], pix2[4]));
     330     6700392 :         s    += abs(pix1[4] - avg2(pix2[4], pix2[5]));
     331     6700392 :         s    += abs(pix1[5] - avg2(pix2[5], pix2[6]));
     332     6700392 :         s    += abs(pix1[6] - avg2(pix2[6], pix2[7]));
     333     6700392 :         s    += abs(pix1[7] - avg2(pix2[7], pix2[8]));
     334     6700392 :         pix1 += stride;
     335     6700392 :         pix2 += stride;
     336             :     }
     337      837549 :     return s;
     338             : }
     339             : 
     340      837549 : static int pix_abs8_y2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
     341             :                          ptrdiff_t stride, int h)
     342             : {
     343      837549 :     int s = 0, i;
     344      837549 :     uint8_t *pix3 = pix2 + stride;
     345             : 
     346     7537941 :     for (i = 0; i < h; i++) {
     347     6700392 :         s    += abs(pix1[0] - avg2(pix2[0], pix3[0]));
     348     6700392 :         s    += abs(pix1[1] - avg2(pix2[1], pix3[1]));
     349     6700392 :         s    += abs(pix1[2] - avg2(pix2[2], pix3[2]));
     350     6700392 :         s    += abs(pix1[3] - avg2(pix2[3], pix3[3]));
     351     6700392 :         s    += abs(pix1[4] - avg2(pix2[4], pix3[4]));
     352     6700392 :         s    += abs(pix1[5] - avg2(pix2[5], pix3[5]));
     353     6700392 :         s    += abs(pix1[6] - avg2(pix2[6], pix3[6]));
     354     6700392 :         s    += abs(pix1[7] - avg2(pix2[7], pix3[7]));
     355     6700392 :         pix1 += stride;
     356     6700392 :         pix2 += stride;
     357     6700392 :         pix3 += stride;
     358             :     }
     359      837549 :     return s;
     360             : }
     361             : 
     362     1675098 : static int pix_abs8_xy2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
     363             :                           ptrdiff_t stride, int h)
     364             : {
     365     1675098 :     int s = 0, i;
     366     1675098 :     uint8_t *pix3 = pix2 + stride;
     367             : 
     368    15075882 :     for (i = 0; i < h; i++) {
     369    13400784 :         s    += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
     370    13400784 :         s    += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
     371    13400784 :         s    += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
     372    13400784 :         s    += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
     373    13400784 :         s    += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
     374    13400784 :         s    += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
     375    13400784 :         s    += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
     376    13400784 :         s    += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
     377    13400784 :         pix1 += stride;
     378    13400784 :         pix2 += stride;
     379    13400784 :         pix3 += stride;
     380             :     }
     381     1675098 :     return s;
     382             : }
     383             : 
     384      748141 : static int nsse16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2,
     385             :                     ptrdiff_t stride, int h)
     386             : {
     387      748141 :     int score1 = 0, score2 = 0, x, y;
     388             : 
     389    12718397 :     for (y = 0; y < h; y++) {
     390   203494352 :         for (x = 0; x < 16; x++)
     391   191524096 :             score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]);
     392    11970256 :         if (y + 1 < h) {
     393   179553840 :             for (x = 0; x < 15; x++)
     394   336663450 :                 score2 += FFABS(s1[x]     - s1[x + stride] -
     395   168331725 :                                 s1[x + 1] + s1[x + stride + 1]) -
     396   168331725 :                           FFABS(s2[x]     - s2[x + stride] -
     397             :                                 s2[x + 1] + s2[x + stride + 1]);
     398             :         }
     399    11970256 :         s1 += stride;
     400    11970256 :         s2 += stride;
     401             :     }
     402             : 
     403      748141 :     if (c)
     404      748141 :         return score1 + FFABS(score2) * c->avctx->nsse_weight;
     405             :     else
     406           0 :         return score1 + FFABS(score2) * 8;
     407             : }
     408             : 
     409           0 : static int nsse8_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2,
     410             :                    ptrdiff_t stride, int h)
     411             : {
     412           0 :     int score1 = 0, score2 = 0, x, y;
     413             : 
     414           0 :     for (y = 0; y < h; y++) {
     415           0 :         for (x = 0; x < 8; x++)
     416           0 :             score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]);
     417           0 :         if (y + 1 < h) {
     418           0 :             for (x = 0; x < 7; x++)
     419           0 :                 score2 += FFABS(s1[x]     - s1[x + stride] -
     420           0 :                                 s1[x + 1] + s1[x + stride + 1]) -
     421           0 :                           FFABS(s2[x]     - s2[x + stride] -
     422             :                                 s2[x + 1] + s2[x + stride + 1]);
     423             :         }
     424           0 :         s1 += stride;
     425           0 :         s2 += stride;
     426             :     }
     427             : 
     428           0 :     if (c)
     429           0 :         return score1 + FFABS(score2) * c->avctx->nsse_weight;
     430             :     else
     431           0 :         return score1 + FFABS(score2) * 8;
     432             : }
     433             : 
     434           0 : static int zero_cmp(MpegEncContext *s, uint8_t *a, uint8_t *b,
     435             :                     ptrdiff_t stride, int h)
     436             : {
     437           0 :     return 0;
     438             : }
     439             : 
     440       38346 : void ff_set_cmp(MECmpContext *c, me_cmp_func *cmp, int type)
     441             : {
     442             :     int i;
     443             : 
     444       38346 :     memset(cmp, 0, sizeof(void *) * 6);
     445             : 
     446      268422 :     for (i = 0; i < 6; i++) {
     447      230076 :         switch (type & 0xFF) {
     448      209736 :         case FF_CMP_SAD:
     449      209736 :             cmp[i] = c->sad[i];
     450      209736 :             break;
     451           0 :         case FF_CMP_MEDIAN_SAD:
     452           0 :             cmp[i] = c->median_sad[i];
     453           0 :             break;
     454        7200 :         case FF_CMP_SATD:
     455        7200 :             cmp[i] = c->hadamard8_diff[i];
     456        7200 :             break;
     457        2748 :         case FF_CMP_SSE:
     458        2748 :             cmp[i] = c->sse[i];
     459        2748 :             break;
     460           0 :         case FF_CMP_DCT:
     461           0 :             cmp[i] = c->dct_sad[i];
     462           0 :             break;
     463           0 :         case FF_CMP_DCT264:
     464           0 :             cmp[i] = c->dct264_sad[i];
     465           0 :             break;
     466        1074 :         case FF_CMP_DCTMAX:
     467        1074 :             cmp[i] = c->dct_max[i];
     468        1074 :             break;
     469           0 :         case FF_CMP_PSNR:
     470           0 :             cmp[i] = c->quant_psnr[i];
     471           0 :             break;
     472           0 :         case FF_CMP_BIT:
     473           0 :             cmp[i] = c->bit[i];
     474           0 :             break;
     475           0 :         case FF_CMP_RD:
     476           0 :             cmp[i] = c->rd[i];
     477           0 :             break;
     478        1182 :         case FF_CMP_VSAD:
     479        1182 :             cmp[i] = c->vsad[i];
     480        1182 :             break;
     481           0 :         case FF_CMP_VSSE:
     482           0 :             cmp[i] = c->vsse[i];
     483           0 :             break;
     484           0 :         case FF_CMP_ZERO:
     485           0 :             cmp[i] = zero_cmp;
     486           0 :             break;
     487        4824 :         case FF_CMP_NSSE:
     488        4824 :             cmp[i] = c->nsse[i];
     489        4824 :             break;
     490             : #if CONFIG_DWT
     491           0 :         case FF_CMP_W53:
     492           0 :             cmp[i]= c->w53[i];
     493           0 :             break;
     494        3312 :         case FF_CMP_W97:
     495        3312 :             cmp[i]= c->w97[i];
     496        3312 :             break;
     497             : #endif
     498           0 :         default:
     499           0 :             av_log(NULL, AV_LOG_ERROR,
     500             :                    "internal error in cmp function selection\n");
     501             :         }
     502             :     }
     503       38346 : }
     504             : 
     505             : #define BUTTERFLY2(o1, o2, i1, i2)              \
     506             :     o1 = (i1) + (i2);                           \
     507             :     o2 = (i1) - (i2);
     508             : 
     509             : #define BUTTERFLY1(x, y)                        \
     510             :     {                                           \
     511             :         int a, b;                               \
     512             :         a = x;                                  \
     513             :         b = y;                                  \
     514             :         x = a + b;                              \
     515             :         y = a - b;                              \
     516             :     }
     517             : 
     518             : #define BUTTERFLYA(x, y) (FFABS((x) + (y)) + FFABS((x) - (y)))
     519             : 
     520    15481796 : static int hadamard8_diff8x8_c(MpegEncContext *s, uint8_t *dst,
     521             :                                uint8_t *src, ptrdiff_t stride, int h)
     522             : {
     523    15481796 :     int i, temp[64], sum = 0;
     524             : 
     525             :     av_assert2(h == 8);
     526             : 
     527   139336164 :     for (i = 0; i < 8; i++) {
     528             :         // FIXME: try pointer walks
     529   123854368 :         BUTTERFLY2(temp[8 * i + 0], temp[8 * i + 1],
     530             :                    src[stride * i + 0] - dst[stride * i + 0],
     531             :                    src[stride * i + 1] - dst[stride * i + 1]);
     532   123854368 :         BUTTERFLY2(temp[8 * i + 2], temp[8 * i + 3],
     533             :                    src[stride * i + 2] - dst[stride * i + 2],
     534             :                    src[stride * i + 3] - dst[stride * i + 3]);
     535   123854368 :         BUTTERFLY2(temp[8 * i + 4], temp[8 * i + 5],
     536             :                    src[stride * i + 4] - dst[stride * i + 4],
     537             :                    src[stride * i + 5] - dst[stride * i + 5]);
     538   123854368 :         BUTTERFLY2(temp[8 * i + 6], temp[8 * i + 7],
     539             :                    src[stride * i + 6] - dst[stride * i + 6],
     540             :                    src[stride * i + 7] - dst[stride * i + 7]);
     541             : 
     542   123854368 :         BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 2]);
     543   123854368 :         BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 3]);
     544   123854368 :         BUTTERFLY1(temp[8 * i + 4], temp[8 * i + 6]);
     545   123854368 :         BUTTERFLY1(temp[8 * i + 5], temp[8 * i + 7]);
     546             : 
     547   123854368 :         BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 4]);
     548   123854368 :         BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 5]);
     549   123854368 :         BUTTERFLY1(temp[8 * i + 2], temp[8 * i + 6]);
     550   123854368 :         BUTTERFLY1(temp[8 * i + 3], temp[8 * i + 7]);
     551             :     }
     552             : 
     553   139336164 :     for (i = 0; i < 8; i++) {
     554   123854368 :         BUTTERFLY1(temp[8 * 0 + i], temp[8 * 1 + i]);
     555   123854368 :         BUTTERFLY1(temp[8 * 2 + i], temp[8 * 3 + i]);
     556   123854368 :         BUTTERFLY1(temp[8 * 4 + i], temp[8 * 5 + i]);
     557   123854368 :         BUTTERFLY1(temp[8 * 6 + i], temp[8 * 7 + i]);
     558             : 
     559   123854368 :         BUTTERFLY1(temp[8 * 0 + i], temp[8 * 2 + i]);
     560   123854368 :         BUTTERFLY1(temp[8 * 1 + i], temp[8 * 3 + i]);
     561   123854368 :         BUTTERFLY1(temp[8 * 4 + i], temp[8 * 6 + i]);
     562   123854368 :         BUTTERFLY1(temp[8 * 5 + i], temp[8 * 7 + i]);
     563             : 
     564   371563104 :         sum += BUTTERFLYA(temp[8 * 0 + i], temp[8 * 4 + i]) +
     565   247708736 :                BUTTERFLYA(temp[8 * 1 + i], temp[8 * 5 + i]) +
     566   247708736 :                BUTTERFLYA(temp[8 * 2 + i], temp[8 * 6 + i]) +
     567   123854368 :                BUTTERFLYA(temp[8 * 3 + i], temp[8 * 7 + i]);
     568             :     }
     569    15481796 :     return sum;
     570             : }
     571             : 
     572           0 : static int hadamard8_intra8x8_c(MpegEncContext *s, uint8_t *src,
     573             :                                 uint8_t *dummy, ptrdiff_t stride, int h)
     574             : {
     575           0 :     int i, temp[64], sum = 0;
     576             : 
     577             :     av_assert2(h == 8);
     578             : 
     579           0 :     for (i = 0; i < 8; i++) {
     580             :         // FIXME: try pointer walks
     581           0 :         BUTTERFLY2(temp[8 * i + 0], temp[8 * i + 1],
     582             :                    src[stride * i + 0], src[stride * i + 1]);
     583           0 :         BUTTERFLY2(temp[8 * i + 2], temp[8 * i + 3],
     584             :                    src[stride * i + 2], src[stride * i + 3]);
     585           0 :         BUTTERFLY2(temp[8 * i + 4], temp[8 * i + 5],
     586             :                    src[stride * i + 4], src[stride * i + 5]);
     587           0 :         BUTTERFLY2(temp[8 * i + 6], temp[8 * i + 7],
     588             :                    src[stride * i + 6], src[stride * i + 7]);
     589             : 
     590           0 :         BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 2]);
     591           0 :         BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 3]);
     592           0 :         BUTTERFLY1(temp[8 * i + 4], temp[8 * i + 6]);
     593           0 :         BUTTERFLY1(temp[8 * i + 5], temp[8 * i + 7]);
     594             : 
     595           0 :         BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 4]);
     596           0 :         BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 5]);
     597           0 :         BUTTERFLY1(temp[8 * i + 2], temp[8 * i + 6]);
     598           0 :         BUTTERFLY1(temp[8 * i + 3], temp[8 * i + 7]);
     599             :     }
     600             : 
     601           0 :     for (i = 0; i < 8; i++) {
     602           0 :         BUTTERFLY1(temp[8 * 0 + i], temp[8 * 1 + i]);
     603           0 :         BUTTERFLY1(temp[8 * 2 + i], temp[8 * 3 + i]);
     604           0 :         BUTTERFLY1(temp[8 * 4 + i], temp[8 * 5 + i]);
     605           0 :         BUTTERFLY1(temp[8 * 6 + i], temp[8 * 7 + i]);
     606             : 
     607           0 :         BUTTERFLY1(temp[8 * 0 + i], temp[8 * 2 + i]);
     608           0 :         BUTTERFLY1(temp[8 * 1 + i], temp[8 * 3 + i]);
     609           0 :         BUTTERFLY1(temp[8 * 4 + i], temp[8 * 6 + i]);
     610           0 :         BUTTERFLY1(temp[8 * 5 + i], temp[8 * 7 + i]);
     611             : 
     612           0 :         sum +=
     613           0 :             BUTTERFLYA(temp[8 * 0 + i], temp[8 * 4 + i])
     614           0 :             + BUTTERFLYA(temp[8 * 1 + i], temp[8 * 5 + i])
     615           0 :             + BUTTERFLYA(temp[8 * 2 + i], temp[8 * 6 + i])
     616           0 :             + BUTTERFLYA(temp[8 * 3 + i], temp[8 * 7 + i]);
     617             :     }
     618             : 
     619           0 :     sum -= FFABS(temp[8 * 0] + temp[8 * 4]); // -mean
     620             : 
     621           0 :     return sum;
     622             : }
     623             : 
     624           0 : static int dct_sad8x8_c(MpegEncContext *s, uint8_t *src1,
     625             :                         uint8_t *src2, ptrdiff_t stride, int h)
     626             : {
     627           0 :     LOCAL_ALIGNED_16(int16_t, temp, [64]);
     628             : 
     629             :     av_assert2(h == 8);
     630             : 
     631           0 :     s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride);
     632           0 :     s->fdsp.fdct(temp);
     633           0 :     return s->mecc.sum_abs_dctelem(temp);
     634             : }
     635             : 
     636             : #if CONFIG_GPL
     637             : #define DCT8_1D                                         \
     638             :     {                                                   \
     639             :         const int s07 = SRC(0) + SRC(7);                \
     640             :         const int s16 = SRC(1) + SRC(6);                \
     641             :         const int s25 = SRC(2) + SRC(5);                \
     642             :         const int s34 = SRC(3) + SRC(4);                \
     643             :         const int a0  = s07 + s34;                      \
     644             :         const int a1  = s16 + s25;                      \
     645             :         const int a2  = s07 - s34;                      \
     646             :         const int a3  = s16 - s25;                      \
     647             :         const int d07 = SRC(0) - SRC(7);                \
     648             :         const int d16 = SRC(1) - SRC(6);                \
     649             :         const int d25 = SRC(2) - SRC(5);                \
     650             :         const int d34 = SRC(3) - SRC(4);                \
     651             :         const int a4  = d16 + d25 + (d07 + (d07 >> 1)); \
     652             :         const int a5  = d07 - d34 - (d25 + (d25 >> 1)); \
     653             :         const int a6  = d07 + d34 - (d16 + (d16 >> 1)); \
     654             :         const int a7  = d16 - d25 + (d34 + (d34 >> 1)); \
     655             :         DST(0, a0 + a1);                                \
     656             :         DST(1, a4 + (a7 >> 2));                         \
     657             :         DST(2, a2 + (a3 >> 1));                         \
     658             :         DST(3, a5 + (a6 >> 2));                         \
     659             :         DST(4, a0 - a1);                                \
     660             :         DST(5, a6 - (a5 >> 2));                         \
     661             :         DST(6, (a2 >> 1) - a3);                         \
     662             :         DST(7, (a4 >> 2) - a7);                         \
     663             :     }
     664             : 
     665           0 : static int dct264_sad8x8_c(MpegEncContext *s, uint8_t *src1,
     666             :                            uint8_t *src2, ptrdiff_t stride, int h)
     667             : {
     668             :     int16_t dct[8][8];
     669           0 :     int i, sum = 0;
     670             : 
     671           0 :     s->pdsp.diff_pixels_unaligned(dct[0], src1, src2, stride);
     672             : 
     673             : #define SRC(x) dct[i][x]
     674             : #define DST(x, v) dct[i][x] = v
     675           0 :     for (i = 0; i < 8; i++)
     676           0 :         DCT8_1D
     677             : #undef SRC
     678             : #undef DST
     679             : 
     680             : #define SRC(x) dct[x][i]
     681             : #define DST(x, v) sum += FFABS(v)
     682           0 :         for (i = 0; i < 8; i++)
     683           0 :             DCT8_1D
     684             : #undef SRC
     685             : #undef DST
     686           0 :             return sum;
     687             : }
     688             : #endif
     689             : 
     690           0 : static int dct_max8x8_c(MpegEncContext *s, uint8_t *src1,
     691             :                         uint8_t *src2, ptrdiff_t stride, int h)
     692             : {
     693           0 :     LOCAL_ALIGNED_16(int16_t, temp, [64]);
     694           0 :     int sum = 0, i;
     695             : 
     696             :     av_assert2(h == 8);
     697             : 
     698           0 :     s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride);
     699           0 :     s->fdsp.fdct(temp);
     700             : 
     701           0 :     for (i = 0; i < 64; i++)
     702           0 :         sum = FFMAX(sum, FFABS(temp[i]));
     703             : 
     704           0 :     return sum;
     705             : }
     706             : 
     707           0 : static int quant_psnr8x8_c(MpegEncContext *s, uint8_t *src1,
     708             :                            uint8_t *src2, ptrdiff_t stride, int h)
     709             : {
     710           0 :     LOCAL_ALIGNED_16(int16_t, temp, [64 * 2]);
     711           0 :     int16_t *const bak = temp + 64;
     712           0 :     int sum = 0, i;
     713             : 
     714             :     av_assert2(h == 8);
     715           0 :     s->mb_intra = 0;
     716             : 
     717           0 :     s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride);
     718             : 
     719           0 :     memcpy(bak, temp, 64 * sizeof(int16_t));
     720             : 
     721           0 :     s->block_last_index[0 /* FIXME */] =
     722           0 :         s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i);
     723           0 :     s->dct_unquantize_inter(s, temp, 0, s->qscale);
     724           0 :     ff_simple_idct_8(temp); // FIXME
     725             : 
     726           0 :     for (i = 0; i < 64; i++)
     727           0 :         sum += (temp[i] - bak[i]) * (temp[i] - bak[i]);
     728             : 
     729           0 :     return sum;
     730             : }
     731             : 
     732           0 : static int rd8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2,
     733             :                    ptrdiff_t stride, int h)
     734             : {
     735           0 :     const uint8_t *scantable = s->intra_scantable.permutated;
     736           0 :     LOCAL_ALIGNED_16(int16_t, temp, [64]);
     737           0 :     LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
     738           0 :     LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
     739             :     int i, last, run, bits, level, distortion, start_i;
     740           0 :     const int esc_length = s->ac_esc_length;
     741             :     uint8_t *length, *last_length;
     742             : 
     743             :     av_assert2(h == 8);
     744             : 
     745           0 :     copy_block8(lsrc1, src1, 8, stride, 8);
     746           0 :     copy_block8(lsrc2, src2, 8, stride, 8);
     747             : 
     748           0 :     s->pdsp.diff_pixels(temp, lsrc1, lsrc2, 8);
     749             : 
     750           0 :     s->block_last_index[0 /* FIXME */] =
     751             :     last                               =
     752           0 :         s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i);
     753             : 
     754           0 :     bits = 0;
     755             : 
     756           0 :     if (s->mb_intra) {
     757           0 :         start_i     = 1;
     758           0 :         length      = s->intra_ac_vlc_length;
     759           0 :         last_length = s->intra_ac_vlc_last_length;
     760           0 :         bits       += s->luma_dc_vlc_length[temp[0] + 256]; // FIXME: chroma
     761             :     } else {
     762           0 :         start_i     = 0;
     763           0 :         length      = s->inter_ac_vlc_length;
     764           0 :         last_length = s->inter_ac_vlc_last_length;
     765             :     }
     766             : 
     767           0 :     if (last >= start_i) {
     768           0 :         run = 0;
     769           0 :         for (i = start_i; i < last; i++) {
     770           0 :             int j = scantable[i];
     771           0 :             level = temp[j];
     772             : 
     773           0 :             if (level) {
     774           0 :                 level += 64;
     775           0 :                 if ((level & (~127)) == 0)
     776           0 :                     bits += length[UNI_AC_ENC_INDEX(run, level)];
     777             :                 else
     778           0 :                     bits += esc_length;
     779           0 :                 run = 0;
     780             :             } else
     781           0 :                 run++;
     782             :         }
     783           0 :         i = scantable[last];
     784             : 
     785           0 :         level = temp[i] + 64;
     786             : 
     787             :         av_assert2(level - 64);
     788             : 
     789           0 :         if ((level & (~127)) == 0) {
     790           0 :             bits += last_length[UNI_AC_ENC_INDEX(run, level)];
     791             :         } else
     792           0 :             bits += esc_length;
     793             :     }
     794             : 
     795           0 :     if (last >= 0) {
     796           0 :         if (s->mb_intra)
     797           0 :             s->dct_unquantize_intra(s, temp, 0, s->qscale);
     798             :         else
     799           0 :             s->dct_unquantize_inter(s, temp, 0, s->qscale);
     800             :     }
     801             : 
     802           0 :     s->idsp.idct_add(lsrc2, 8, temp);
     803             : 
     804           0 :     distortion = s->mecc.sse[1](NULL, lsrc2, lsrc1, 8, 8);
     805             : 
     806           0 :     return distortion + ((bits * s->qscale * s->qscale * 109 + 64) >> 7);
     807             : }
     808             : 
     809           0 : static int bit8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2,
     810             :                     ptrdiff_t stride, int h)
     811             : {
     812           0 :     const uint8_t *scantable = s->intra_scantable.permutated;
     813           0 :     LOCAL_ALIGNED_16(int16_t, temp, [64]);
     814             :     int i, last, run, bits, level, start_i;
     815           0 :     const int esc_length = s->ac_esc_length;
     816             :     uint8_t *length, *last_length;
     817             : 
     818             :     av_assert2(h == 8);
     819             : 
     820           0 :     s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride);
     821             : 
     822           0 :     s->block_last_index[0 /* FIXME */] =
     823             :     last                               =
     824           0 :         s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i);
     825             : 
     826           0 :     bits = 0;
     827             : 
     828           0 :     if (s->mb_intra) {
     829           0 :         start_i     = 1;
     830           0 :         length      = s->intra_ac_vlc_length;
     831           0 :         last_length = s->intra_ac_vlc_last_length;
     832           0 :         bits       += s->luma_dc_vlc_length[temp[0] + 256]; // FIXME: chroma
     833             :     } else {
     834           0 :         start_i     = 0;
     835           0 :         length      = s->inter_ac_vlc_length;
     836           0 :         last_length = s->inter_ac_vlc_last_length;
     837             :     }
     838             : 
     839           0 :     if (last >= start_i) {
     840           0 :         run = 0;
     841           0 :         for (i = start_i; i < last; i++) {
     842           0 :             int j = scantable[i];
     843           0 :             level = temp[j];
     844             : 
     845           0 :             if (level) {
     846           0 :                 level += 64;
     847           0 :                 if ((level & (~127)) == 0)
     848           0 :                     bits += length[UNI_AC_ENC_INDEX(run, level)];
     849             :                 else
     850           0 :                     bits += esc_length;
     851           0 :                 run = 0;
     852             :             } else
     853           0 :                 run++;
     854             :         }
     855           0 :         i = scantable[last];
     856             : 
     857           0 :         level = temp[i] + 64;
     858             : 
     859             :         av_assert2(level - 64);
     860             : 
     861           0 :         if ((level & (~127)) == 0)
     862           0 :             bits += last_length[UNI_AC_ENC_INDEX(run, level)];
     863             :         else
     864           0 :             bits += esc_length;
     865             :     }
     866             : 
     867           0 :     return bits;
     868             : }
     869             : 
     870             : #define VSAD_INTRA(size)                                                \
     871             : static int vsad_intra ## size ## _c(MpegEncContext *c,                  \
     872             :                                     uint8_t *s, uint8_t *dummy,         \
     873             :                                     ptrdiff_t stride, int h)            \
     874             : {                                                                       \
     875             :     int score = 0, x, y;                                                \
     876             :                                                                         \
     877             :     for (y = 1; y < h; y++) {                                           \
     878             :         for (x = 0; x < size; x += 4) {                                 \
     879             :             score += FFABS(s[x]     - s[x + stride])     +              \
     880             :                      FFABS(s[x + 1] - s[x + stride + 1]) +              \
     881             :                      FFABS(s[x + 2] - s[x + 2 + stride]) +              \
     882             :                      FFABS(s[x + 3] - s[x + 3 + stride]);               \
     883             :         }                                                               \
     884             :         s += stride;                                                    \
     885             :     }                                                                   \
     886             :                                                                         \
     887             :     return score;                                                       \
     888             : }
     889           0 : VSAD_INTRA(8)
     890     1231444 : VSAD_INTRA(16)
     891             : 
     892             : #define VSAD(size)                                                             \
     893             : static int vsad ## size ## _c(MpegEncContext *c,                               \
     894             :                               uint8_t *s1, uint8_t *s2,                        \
     895             :                               ptrdiff_t stride, int h)                               \
     896             : {                                                                              \
     897             :     int score = 0, x, y;                                                       \
     898             :                                                                                \
     899             :     for (y = 1; y < h; y++) {                                                  \
     900             :         for (x = 0; x < size; x++)                                             \
     901             :             score += FFABS(s1[x] - s2[x] - s1[x + stride] + s2[x + stride]);   \
     902             :         s1 += stride;                                                          \
     903             :         s2 += stride;                                                          \
     904             :     }                                                                          \
     905             :                                                                                \
     906             :     return score;                                                              \
     907             : }
     908           0 : VSAD(8)
     909     1915022 : VSAD(16)
     910             : 
     911             : #define SQ(a) ((a) * (a))
     912             : #define VSSE_INTRA(size)                                                \
     913             : static int vsse_intra ## size ## _c(MpegEncContext *c,                  \
     914             :                                     uint8_t *s, uint8_t *dummy,         \
     915             :                                     ptrdiff_t stride, int h)            \
     916             : {                                                                       \
     917             :     int score = 0, x, y;                                                \
     918             :                                                                         \
     919             :     for (y = 1; y < h; y++) {                                           \
     920             :         for (x = 0; x < size; x += 4) {                                 \
     921             :             score += SQ(s[x]     - s[x + stride]) +                     \
     922             :                      SQ(s[x + 1] - s[x + stride + 1]) +                 \
     923             :                      SQ(s[x + 2] - s[x + stride + 2]) +                 \
     924             :                      SQ(s[x + 3] - s[x + stride + 3]);                  \
     925             :         }                                                               \
     926             :         s += stride;                                                    \
     927             :     }                                                                   \
     928             :                                                                         \
     929             :     return score;                                                       \
     930             : }
     931           0 : VSSE_INTRA(8)
     932           0 : VSSE_INTRA(16)
     933             : 
     934             : #define VSSE(size)                                                             \
     935             : static int vsse ## size ## _c(MpegEncContext *c, uint8_t *s1, uint8_t *s2,     \
     936             :                               ptrdiff_t stride, int h)                         \
     937             : {                                                                              \
     938             :     int score = 0, x, y;                                                       \
     939             :                                                                                \
     940             :     for (y = 1; y < h; y++) {                                                  \
     941             :         for (x = 0; x < size; x++)                                             \
     942             :             score += SQ(s1[x] - s2[x] - s1[x + stride] + s2[x + stride]);      \
     943             :         s1 += stride;                                                          \
     944             :         s2 += stride;                                                          \
     945             :     }                                                                          \
     946             :                                                                                \
     947             :     return score;                                                              \
     948             : }
     949           0 : VSSE(8)
     950           0 : VSSE(16)
     951             : 
     952             : #define WRAPPER8_16_SQ(name8, name16)                                   \
     953             : static int name16(MpegEncContext *s, uint8_t *dst, uint8_t *src,        \
     954             :                   ptrdiff_t stride, int h)                              \
     955             : {                                                                       \
     956             :     int score = 0;                                                      \
     957             :                                                                         \
     958             :     score += name8(s, dst, src, stride, 8);                             \
     959             :     score += name8(s, dst + 8, src + 8, stride, 8);                     \
     960             :     if (h == 16) {                                                      \
     961             :         dst   += 8 * stride;                                            \
     962             :         src   += 8 * stride;                                            \
     963             :         score += name8(s, dst, src, stride, 8);                         \
     964             :         score += name8(s, dst + 8, src + 8, stride, 8);                 \
     965             :     }                                                                   \
     966             :     return score;                                                       \
     967             : }
     968             : 
     969     3613091 : WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
     970           0 : WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
     971           0 : WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
     972             : #if CONFIG_GPL
     973           0 : WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
     974             : #endif
     975           0 : WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
     976           0 : WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
     977           0 : WRAPPER8_16_SQ(rd8x8_c, rd16_c)
     978           0 : WRAPPER8_16_SQ(bit8x8_c, bit16_c)
     979             : 
     980        5597 : av_cold void ff_me_cmp_init_static(void)
     981             : {
     982             :     int i;
     983             : 
     984     2871261 :     for (i = 0; i < 512; i++)
     985     2865664 :         ff_square_tab[i] = (i - 256) * (i - 256);
     986        5597 : }
     987             : 
     988       17131 : int ff_check_alignment(void)
     989             : {
     990             :     static int did_fail = 0;
     991       17131 :     LOCAL_ALIGNED_16(int, aligned, [4]);
     992             : 
     993       17131 :     if ((intptr_t)aligned & 15) {
     994           0 :         if (!did_fail) {
     995             : #if HAVE_MMX || HAVE_ALTIVEC
     996           0 :             av_log(NULL, AV_LOG_ERROR,
     997             :                 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
     998             :                 "and may be very slow or crash. This is not a bug in libavcodec,\n"
     999             :                 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
    1000             :                 "Do not report crashes to FFmpeg developers.\n");
    1001             : #endif
    1002           0 :             did_fail=1;
    1003             :         }
    1004           0 :         return -1;
    1005             :     }
    1006       17131 :     return 0;
    1007             : }
    1008             : 
    1009         938 : av_cold void ff_me_cmp_init(MECmpContext *c, AVCodecContext *avctx)
    1010             : {
    1011         938 :     ff_check_alignment();
    1012             : 
    1013         938 :     c->sum_abs_dctelem = sum_abs_dctelem_c;
    1014             : 
    1015             :     /* TODO [0] 16  [1] 8 */
    1016         938 :     c->pix_abs[0][0] = pix_abs16_c;
    1017         938 :     c->pix_abs[0][1] = pix_abs16_x2_c;
    1018         938 :     c->pix_abs[0][2] = pix_abs16_y2_c;
    1019         938 :     c->pix_abs[0][3] = pix_abs16_xy2_c;
    1020         938 :     c->pix_abs[1][0] = pix_abs8_c;
    1021         938 :     c->pix_abs[1][1] = pix_abs8_x2_c;
    1022         938 :     c->pix_abs[1][2] = pix_abs8_y2_c;
    1023         938 :     c->pix_abs[1][3] = pix_abs8_xy2_c;
    1024             : 
    1025             : #define SET_CMP_FUNC(name)                      \
    1026             :     c->name[0] = name ## 16_c;                  \
    1027             :     c->name[1] = name ## 8x8_c;
    1028             : 
    1029         938 :     SET_CMP_FUNC(hadamard8_diff)
    1030         938 :     c->hadamard8_diff[4] = hadamard8_intra16_c;
    1031         938 :     c->hadamard8_diff[5] = hadamard8_intra8x8_c;
    1032         938 :     SET_CMP_FUNC(dct_sad)
    1033         938 :     SET_CMP_FUNC(dct_max)
    1034             : #if CONFIG_GPL
    1035         938 :     SET_CMP_FUNC(dct264_sad)
    1036             : #endif
    1037         938 :     c->sad[0] = pix_abs16_c;
    1038         938 :     c->sad[1] = pix_abs8_c;
    1039         938 :     c->sse[0] = sse16_c;
    1040         938 :     c->sse[1] = sse8_c;
    1041         938 :     c->sse[2] = sse4_c;
    1042         938 :     SET_CMP_FUNC(quant_psnr)
    1043         938 :     SET_CMP_FUNC(rd)
    1044         938 :     SET_CMP_FUNC(bit)
    1045         938 :     c->vsad[0] = vsad16_c;
    1046         938 :     c->vsad[1] = vsad8_c;
    1047         938 :     c->vsad[4] = vsad_intra16_c;
    1048         938 :     c->vsad[5] = vsad_intra8_c;
    1049         938 :     c->vsse[0] = vsse16_c;
    1050         938 :     c->vsse[1] = vsse8_c;
    1051         938 :     c->vsse[4] = vsse_intra16_c;
    1052         938 :     c->vsse[5] = vsse_intra8_c;
    1053         938 :     c->nsse[0] = nsse16_c;
    1054         938 :     c->nsse[1] = nsse8_c;
    1055             : #if CONFIG_SNOW_DECODER || CONFIG_SNOW_ENCODER
    1056         938 :     ff_dsputil_init_dwt(c);
    1057             : #endif
    1058             : 
    1059             :     if (ARCH_ALPHA)
    1060             :         ff_me_cmp_init_alpha(c, avctx);
    1061             :     if (ARCH_ARM)
    1062             :         ff_me_cmp_init_arm(c, avctx);
    1063             :     if (ARCH_PPC)
    1064             :         ff_me_cmp_init_ppc(c, avctx);
    1065             :     if (ARCH_X86)
    1066         938 :         ff_me_cmp_init_x86(c, avctx);
    1067             :     if (ARCH_MIPS)
    1068             :         ff_me_cmp_init_mips(c, avctx);
    1069             : 
    1070         938 :     c->median_sad[0] = pix_median_abs16_c;
    1071         938 :     c->median_sad[1] = pix_median_abs8_c;
    1072         938 : }

Generated by: LCOV version 1.13