LCOV - code coverage report
Current view: top level - libavcodec - me_cmp.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 306 576 53.1 %
Date: 2018-05-20 11:54:08 Functions: 18 44 40.9 %

          Line data    Source code
       1             : /*
       2             :  * DSP utils
       3             :  * Copyright (c) 2000, 2001 Fabrice Bellard
       4             :  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
       5             :  *
       6             :  * This file is part of FFmpeg.
       7             :  *
       8             :  * FFmpeg is free software; you can redistribute it and/or
       9             :  * modify it under the terms of the GNU Lesser General Public
      10             :  * License as published by the Free Software Foundation; either
      11             :  * version 2.1 of the License, or (at your option) any later version.
      12             :  *
      13             :  * FFmpeg is distributed in the hope that it will be useful,
      14             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      15             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      16             :  * Lesser General Public License for more details.
      17             :  *
      18             :  * You should have received a copy of the GNU Lesser General Public
      19             :  * License along with FFmpeg; if not, write to the Free Software
      20             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      21             :  */
      22             : 
      23             : #include "libavutil/attributes.h"
      24             : #include "libavutil/internal.h"
      25             : #include "avcodec.h"
      26             : #include "copy_block.h"
      27             : #include "simple_idct.h"
      28             : #include "me_cmp.h"
      29             : #include "mpegvideo.h"
      30             : #include "config.h"
      31             : 
      32             : /* (i - 256) * (i - 256) */
      33             : const uint32_t ff_square_tab[512] = {
      34             :     65536, 65025, 64516, 64009, 63504, 63001, 62500, 62001, 61504, 61009, 60516, 60025, 59536, 59049, 58564, 58081,
      35             :     57600, 57121, 56644, 56169, 55696, 55225, 54756, 54289, 53824, 53361, 52900, 52441, 51984, 51529, 51076, 50625,
      36             :     50176, 49729, 49284, 48841, 48400, 47961, 47524, 47089, 46656, 46225, 45796, 45369, 44944, 44521, 44100, 43681,
      37             :     43264, 42849, 42436, 42025, 41616, 41209, 40804, 40401, 40000, 39601, 39204, 38809, 38416, 38025, 37636, 37249,
      38             :     36864, 36481, 36100, 35721, 35344, 34969, 34596, 34225, 33856, 33489, 33124, 32761, 32400, 32041, 31684, 31329,
      39             :     30976, 30625, 30276, 29929, 29584, 29241, 28900, 28561, 28224, 27889, 27556, 27225, 26896, 26569, 26244, 25921,
      40             :     25600, 25281, 24964, 24649, 24336, 24025, 23716, 23409, 23104, 22801, 22500, 22201, 21904, 21609, 21316, 21025,
      41             :     20736, 20449, 20164, 19881, 19600, 19321, 19044, 18769, 18496, 18225, 17956, 17689, 17424, 17161, 16900, 16641,
      42             :     16384, 16129, 15876, 15625, 15376, 15129, 14884, 14641, 14400, 14161, 13924, 13689, 13456, 13225, 12996, 12769,
      43             :     12544, 12321, 12100, 11881, 11664, 11449, 11236, 11025, 10816, 10609, 10404, 10201, 10000,  9801,  9604,  9409,
      44             :      9216,  9025,  8836,  8649,  8464,  8281,  8100,  7921,  7744,  7569,  7396,  7225,  7056,  6889,  6724,  6561,
      45             :      6400,  6241,  6084,  5929,  5776,  5625,  5476,  5329,  5184,  5041,  4900,  4761,  4624,  4489,  4356,  4225,
      46             :      4096,  3969,  3844,  3721,  3600,  3481,  3364,  3249,  3136,  3025,  2916,  2809,  2704,  2601,  2500,  2401,
      47             :      2304,  2209,  2116,  2025,  1936,  1849,  1764,  1681,  1600,  1521,  1444,  1369,  1296,  1225,  1156,  1089,
      48             :      1024,   961,   900,   841,   784,   729,   676,   625,   576,   529,   484,   441,   400,   361,   324,   289,
      49             :       256,   225,   196,   169,   144,   121,   100,    81,    64,    49,    36,    25,    16,     9,     4,     1,
      50             :         0,     1,     4,     9,    16,    25,    36,    49,    64,    81,   100,   121,   144,   169,   196,   225,
      51             :       256,   289,   324,   361,   400,   441,   484,   529,   576,   625,   676,   729,   784,   841,   900,   961,
      52             :      1024,  1089,  1156,  1225,  1296,  1369,  1444,  1521,  1600,  1681,  1764,  1849,  1936,  2025,  2116,  2209,
      53             :      2304,  2401,  2500,  2601,  2704,  2809,  2916,  3025,  3136,  3249,  3364,  3481,  3600,  3721,  3844,  3969,
      54             :      4096,  4225,  4356,  4489,  4624,  4761,  4900,  5041,  5184,  5329,  5476,  5625,  5776,  5929,  6084,  6241,
      55             :      6400,  6561,  6724,  6889,  7056,  7225,  7396,  7569,  7744,  7921,  8100,  8281,  8464,  8649,  8836,  9025,
      56             :      9216,  9409,  9604,  9801, 10000, 10201, 10404, 10609, 10816, 11025, 11236, 11449, 11664, 11881, 12100, 12321,
      57             :     12544, 12769, 12996, 13225, 13456, 13689, 13924, 14161, 14400, 14641, 14884, 15129, 15376, 15625, 15876, 16129,
      58             :     16384, 16641, 16900, 17161, 17424, 17689, 17956, 18225, 18496, 18769, 19044, 19321, 19600, 19881, 20164, 20449,
      59             :     20736, 21025, 21316, 21609, 21904, 22201, 22500, 22801, 23104, 23409, 23716, 24025, 24336, 24649, 24964, 25281,
      60             :     25600, 25921, 26244, 26569, 26896, 27225, 27556, 27889, 28224, 28561, 28900, 29241, 29584, 29929, 30276, 30625,
      61             :     30976, 31329, 31684, 32041, 32400, 32761, 33124, 33489, 33856, 34225, 34596, 34969, 35344, 35721, 36100, 36481,
      62             :     36864, 37249, 37636, 38025, 38416, 38809, 39204, 39601, 40000, 40401, 40804, 41209, 41616, 42025, 42436, 42849,
      63             :     43264, 43681, 44100, 44521, 44944, 45369, 45796, 46225, 46656, 47089, 47524, 47961, 48400, 48841, 49284, 49729,
      64             :     50176, 50625, 51076, 51529, 51984, 52441, 52900, 53361, 53824, 54289, 54756, 55225, 55696, 56169, 56644, 57121,
      65             :     57600, 58081, 58564, 59049, 59536, 60025, 60516, 61009, 61504, 62001, 62500, 63001, 63504, 64009, 64516, 65025,
      66             : };
      67             : 
      68           0 : static int sse4_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
      69             :                   ptrdiff_t stride, int h)
      70             : {
      71           0 :     int s = 0, i;
      72           0 :     const uint32_t *sq = ff_square_tab + 256;
      73             : 
      74           0 :     for (i = 0; i < h; i++) {
      75           0 :         s    += sq[pix1[0] - pix2[0]];
      76           0 :         s    += sq[pix1[1] - pix2[1]];
      77           0 :         s    += sq[pix1[2] - pix2[2]];
      78           0 :         s    += sq[pix1[3] - pix2[3]];
      79           0 :         pix1 += stride;
      80           0 :         pix2 += stride;
      81             :     }
      82           0 :     return s;
      83             : }
      84             : 
      85     5002254 : static int sse8_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
      86             :                   ptrdiff_t stride, int h)
      87             : {
      88     5002254 :     int s = 0, i;
      89     5002254 :     const uint32_t *sq = ff_square_tab + 256;
      90             : 
      91    45020286 :     for (i = 0; i < h; i++) {
      92    40018032 :         s    += sq[pix1[0] - pix2[0]];
      93    40018032 :         s    += sq[pix1[1] - pix2[1]];
      94    40018032 :         s    += sq[pix1[2] - pix2[2]];
      95    40018032 :         s    += sq[pix1[3] - pix2[3]];
      96    40018032 :         s    += sq[pix1[4] - pix2[4]];
      97    40018032 :         s    += sq[pix1[5] - pix2[5]];
      98    40018032 :         s    += sq[pix1[6] - pix2[6]];
      99    40018032 :         s    += sq[pix1[7] - pix2[7]];
     100    40018032 :         pix1 += stride;
     101    40018032 :         pix2 += stride;
     102             :     }
     103     5002254 :     return s;
     104             : }
     105             : 
     106     7099240 : static int sse16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
     107             :                    ptrdiff_t stride, int h)
     108             : {
     109     7099240 :     int s = 0, i;
     110     7099240 :     const uint32_t *sq = ff_square_tab + 256;
     111             : 
     112   120687080 :     for (i = 0; i < h; i++) {
     113   113587840 :         s += sq[pix1[0]  - pix2[0]];
     114   113587840 :         s += sq[pix1[1]  - pix2[1]];
     115   113587840 :         s += sq[pix1[2]  - pix2[2]];
     116   113587840 :         s += sq[pix1[3]  - pix2[3]];
     117   113587840 :         s += sq[pix1[4]  - pix2[4]];
     118   113587840 :         s += sq[pix1[5]  - pix2[5]];
     119   113587840 :         s += sq[pix1[6]  - pix2[6]];
     120   113587840 :         s += sq[pix1[7]  - pix2[7]];
     121   113587840 :         s += sq[pix1[8]  - pix2[8]];
     122   113587840 :         s += sq[pix1[9]  - pix2[9]];
     123   113587840 :         s += sq[pix1[10] - pix2[10]];
     124   113587840 :         s += sq[pix1[11] - pix2[11]];
     125   113587840 :         s += sq[pix1[12] - pix2[12]];
     126   113587840 :         s += sq[pix1[13] - pix2[13]];
     127   113587840 :         s += sq[pix1[14] - pix2[14]];
     128   113587840 :         s += sq[pix1[15] - pix2[15]];
     129             : 
     130   113587840 :         pix1 += stride;
     131   113587840 :         pix2 += stride;
     132             :     }
     133     7099240 :     return s;
     134             : }
     135             : 
     136           0 : static int sum_abs_dctelem_c(int16_t *block)
     137             : {
     138           0 :     int sum = 0, i;
     139             : 
     140           0 :     for (i = 0; i < 64; i++)
     141           0 :         sum += FFABS(block[i]);
     142           0 :     return sum;
     143             : }
     144             : 
     145             : #define avg2(a, b) (((a) + (b) + 1) >> 1)
     146             : #define avg4(a, b, c, d) (((a) + (b) + (c) + (d) + 2) >> 2)
     147             : 
     148    40702164 : static inline int pix_abs16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
     149             :                               ptrdiff_t stride, int h)
     150             : {
     151    40702164 :     int s = 0, i;
     152             : 
     153   611390620 :     for (i = 0; i < h; i++) {
     154   570688456 :         s    += abs(pix1[0]  - pix2[0]);
     155   570688456 :         s    += abs(pix1[1]  - pix2[1]);
     156   570688456 :         s    += abs(pix1[2]  - pix2[2]);
     157   570688456 :         s    += abs(pix1[3]  - pix2[3]);
     158   570688456 :         s    += abs(pix1[4]  - pix2[4]);
     159   570688456 :         s    += abs(pix1[5]  - pix2[5]);
     160   570688456 :         s    += abs(pix1[6]  - pix2[6]);
     161   570688456 :         s    += abs(pix1[7]  - pix2[7]);
     162   570688456 :         s    += abs(pix1[8]  - pix2[8]);
     163   570688456 :         s    += abs(pix1[9]  - pix2[9]);
     164   570688456 :         s    += abs(pix1[10] - pix2[10]);
     165   570688456 :         s    += abs(pix1[11] - pix2[11]);
     166   570688456 :         s    += abs(pix1[12] - pix2[12]);
     167   570688456 :         s    += abs(pix1[13] - pix2[13]);
     168   570688456 :         s    += abs(pix1[14] - pix2[14]);
     169   570688456 :         s    += abs(pix1[15] - pix2[15]);
     170   570688456 :         pix1 += stride;
     171   570688456 :         pix2 += stride;
     172             :     }
     173    40702164 :     return s;
     174             : }
     175             : 
     176           0 : static inline int pix_median_abs16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
     177             :                              ptrdiff_t stride, int h)
     178             : {
     179           0 :     int s = 0, i, j;
     180             : 
     181             : #define V(x) (pix1[x] - pix2[x])
     182             : 
     183           0 :     s    += abs(V(0));
     184           0 :     s    += abs(V(1) - V(0));
     185           0 :     s    += abs(V(2) - V(1));
     186           0 :     s    += abs(V(3) - V(2));
     187           0 :     s    += abs(V(4) - V(3));
     188           0 :     s    += abs(V(5) - V(4));
     189           0 :     s    += abs(V(6) - V(5));
     190           0 :     s    += abs(V(7) - V(6));
     191           0 :     s    += abs(V(8) - V(7));
     192           0 :     s    += abs(V(9) - V(8));
     193           0 :     s    += abs(V(10) - V(9));
     194           0 :     s    += abs(V(11) - V(10));
     195           0 :     s    += abs(V(12) - V(11));
     196           0 :     s    += abs(V(13) - V(12));
     197           0 :     s    += abs(V(14) - V(13));
     198           0 :     s    += abs(V(15) - V(14));
     199             : 
     200           0 :     pix1 += stride;
     201           0 :     pix2 += stride;
     202             : 
     203           0 :     for (i = 1; i < h; i++) {
     204           0 :         s    += abs(V(0) - V(-stride));
     205           0 :         for (j = 1; j < 16; j++)
     206           0 :             s    += abs(V(j) - mid_pred(V(j-stride), V(j-1), V(j-stride) + V(j-1) - V(j-stride-1)));
     207           0 :         pix1 += stride;
     208           0 :         pix2 += stride;
     209             : 
     210             :     }
     211             : #undef V
     212           0 :     return s;
     213             : }
     214             : 
     215     3046787 : static int pix_abs16_x2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
     216             :                           ptrdiff_t stride, int h)
     217             : {
     218     3046787 :     int s = 0, i;
     219             : 
     220    42483835 :     for (i = 0; i < h; i++) {
     221    39437048 :         s    += abs(pix1[0]  - avg2(pix2[0],  pix2[1]));
     222    39437048 :         s    += abs(pix1[1]  - avg2(pix2[1],  pix2[2]));
     223    39437048 :         s    += abs(pix1[2]  - avg2(pix2[2],  pix2[3]));
     224    39437048 :         s    += abs(pix1[3]  - avg2(pix2[3],  pix2[4]));
     225    39437048 :         s    += abs(pix1[4]  - avg2(pix2[4],  pix2[5]));
     226    39437048 :         s    += abs(pix1[5]  - avg2(pix2[5],  pix2[6]));
     227    39437048 :         s    += abs(pix1[6]  - avg2(pix2[6],  pix2[7]));
     228    39437048 :         s    += abs(pix1[7]  - avg2(pix2[7],  pix2[8]));
     229    39437048 :         s    += abs(pix1[8]  - avg2(pix2[8],  pix2[9]));
     230    39437048 :         s    += abs(pix1[9]  - avg2(pix2[9],  pix2[10]));
     231    39437048 :         s    += abs(pix1[10] - avg2(pix2[10], pix2[11]));
     232    39437048 :         s    += abs(pix1[11] - avg2(pix2[11], pix2[12]));
     233    39437048 :         s    += abs(pix1[12] - avg2(pix2[12], pix2[13]));
     234    39437048 :         s    += abs(pix1[13] - avg2(pix2[13], pix2[14]));
     235    39437048 :         s    += abs(pix1[14] - avg2(pix2[14], pix2[15]));
     236    39437048 :         s    += abs(pix1[15] - avg2(pix2[15], pix2[16]));
     237    39437048 :         pix1 += stride;
     238    39437048 :         pix2 += stride;
     239             :     }
     240     3046787 :     return s;
     241             : }
     242             : 
     243     3046787 : static int pix_abs16_y2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
     244             :                           ptrdiff_t stride, int h)
     245             : {
     246     3046787 :     int s = 0, i;
     247     3046787 :     uint8_t *pix3 = pix2 + stride;
     248             : 
     249    42483835 :     for (i = 0; i < h; i++) {
     250    39437048 :         s    += abs(pix1[0]  - avg2(pix2[0],  pix3[0]));
     251    39437048 :         s    += abs(pix1[1]  - avg2(pix2[1],  pix3[1]));
     252    39437048 :         s    += abs(pix1[2]  - avg2(pix2[2],  pix3[2]));
     253    39437048 :         s    += abs(pix1[3]  - avg2(pix2[3],  pix3[3]));
     254    39437048 :         s    += abs(pix1[4]  - avg2(pix2[4],  pix3[4]));
     255    39437048 :         s    += abs(pix1[5]  - avg2(pix2[5],  pix3[5]));
     256    39437048 :         s    += abs(pix1[6]  - avg2(pix2[6],  pix3[6]));
     257    39437048 :         s    += abs(pix1[7]  - avg2(pix2[7],  pix3[7]));
     258    39437048 :         s    += abs(pix1[8]  - avg2(pix2[8],  pix3[8]));
     259    39437048 :         s    += abs(pix1[9]  - avg2(pix2[9],  pix3[9]));
     260    39437048 :         s    += abs(pix1[10] - avg2(pix2[10], pix3[10]));
     261    39437048 :         s    += abs(pix1[11] - avg2(pix2[11], pix3[11]));
     262    39437048 :         s    += abs(pix1[12] - avg2(pix2[12], pix3[12]));
     263    39437048 :         s    += abs(pix1[13] - avg2(pix2[13], pix3[13]));
     264    39437048 :         s    += abs(pix1[14] - avg2(pix2[14], pix3[14]));
     265    39437048 :         s    += abs(pix1[15] - avg2(pix2[15], pix3[15]));
     266    39437048 :         pix1 += stride;
     267    39437048 :         pix2 += stride;
     268    39437048 :         pix3 += stride;
     269             :     }
     270     3046787 :     return s;
     271             : }
     272             : 
     273     6093574 : static int pix_abs16_xy2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
     274             :                            ptrdiff_t stride, int h)
     275             : {
     276     6093574 :     int s = 0, i;
     277     6093574 :     uint8_t *pix3 = pix2 + stride;
     278             : 
     279    84967670 :     for (i = 0; i < h; i++) {
     280    78874096 :         s    += abs(pix1[0]  - avg4(pix2[0],  pix2[1],  pix3[0],  pix3[1]));
     281    78874096 :         s    += abs(pix1[1]  - avg4(pix2[1],  pix2[2],  pix3[1],  pix3[2]));
     282    78874096 :         s    += abs(pix1[2]  - avg4(pix2[2],  pix2[3],  pix3[2],  pix3[3]));
     283    78874096 :         s    += abs(pix1[3]  - avg4(pix2[3],  pix2[4],  pix3[3],  pix3[4]));
     284    78874096 :         s    += abs(pix1[4]  - avg4(pix2[4],  pix2[5],  pix3[4],  pix3[5]));
     285    78874096 :         s    += abs(pix1[5]  - avg4(pix2[5],  pix2[6],  pix3[5],  pix3[6]));
     286    78874096 :         s    += abs(pix1[6]  - avg4(pix2[6],  pix2[7],  pix3[6],  pix3[7]));
     287    78874096 :         s    += abs(pix1[7]  - avg4(pix2[7],  pix2[8],  pix3[7],  pix3[8]));
     288    78874096 :         s    += abs(pix1[8]  - avg4(pix2[8],  pix2[9],  pix3[8],  pix3[9]));
     289    78874096 :         s    += abs(pix1[9]  - avg4(pix2[9],  pix2[10], pix3[9],  pix3[10]));
     290    78874096 :         s    += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
     291    78874096 :         s    += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
     292    78874096 :         s    += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
     293    78874096 :         s    += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
     294    78874096 :         s    += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
     295    78874096 :         s    += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
     296    78874096 :         pix1 += stride;
     297    78874096 :         pix2 += stride;
     298    78874096 :         pix3 += stride;
     299             :     }
     300     6093574 :     return s;
     301             : }
     302             : 
     303    31769153 : static inline int pix_abs8_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
     304             :                              ptrdiff_t stride, int h)
     305             : {
     306    31769153 :     int s = 0, i;
     307             : 
     308   285922377 :     for (i = 0; i < h; i++) {
     309   254153224 :         s    += abs(pix1[0] - pix2[0]);
     310   254153224 :         s    += abs(pix1[1] - pix2[1]);
     311   254153224 :         s    += abs(pix1[2] - pix2[2]);
     312   254153224 :         s    += abs(pix1[3] - pix2[3]);
     313   254153224 :         s    += abs(pix1[4] - pix2[4]);
     314   254153224 :         s    += abs(pix1[5] - pix2[5]);
     315   254153224 :         s    += abs(pix1[6] - pix2[6]);
     316   254153224 :         s    += abs(pix1[7] - pix2[7]);
     317   254153224 :         pix1 += stride;
     318   254153224 :         pix2 += stride;
     319             :     }
     320    31769153 :     return s;
     321             : }
     322             : 
     323           0 : static inline int pix_median_abs8_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
     324             :                              ptrdiff_t stride, int h)
     325             : {
     326           0 :     int s = 0, i, j;
     327             : 
     328             : #define V(x) (pix1[x] - pix2[x])
     329             : 
     330           0 :     s    += abs(V(0));
     331           0 :     s    += abs(V(1) - V(0));
     332           0 :     s    += abs(V(2) - V(1));
     333           0 :     s    += abs(V(3) - V(2));
     334           0 :     s    += abs(V(4) - V(3));
     335           0 :     s    += abs(V(5) - V(4));
     336           0 :     s    += abs(V(6) - V(5));
     337           0 :     s    += abs(V(7) - V(6));
     338             : 
     339           0 :     pix1 += stride;
     340           0 :     pix2 += stride;
     341             : 
     342           0 :     for (i = 1; i < h; i++) {
     343           0 :         s    += abs(V(0) - V(-stride));
     344           0 :         for (j = 1; j < 8; j++)
     345           0 :             s    += abs(V(j) - mid_pred(V(j-stride), V(j-1), V(j-stride) + V(j-1) - V(j-stride-1)));
     346           0 :         pix1 += stride;
     347           0 :         pix2 += stride;
     348             : 
     349             :     }
     350             : #undef V
     351           0 :     return s;
     352             : }
     353             : 
     354      837549 : static int pix_abs8_x2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
     355             :                          ptrdiff_t stride, int h)
     356             : {
     357      837549 :     int s = 0, i;
     358             : 
     359     7537941 :     for (i = 0; i < h; i++) {
     360     6700392 :         s    += abs(pix1[0] - avg2(pix2[0], pix2[1]));
     361     6700392 :         s    += abs(pix1[1] - avg2(pix2[1], pix2[2]));
     362     6700392 :         s    += abs(pix1[2] - avg2(pix2[2], pix2[3]));
     363     6700392 :         s    += abs(pix1[3] - avg2(pix2[3], pix2[4]));
     364     6700392 :         s    += abs(pix1[4] - avg2(pix2[4], pix2[5]));
     365     6700392 :         s    += abs(pix1[5] - avg2(pix2[5], pix2[6]));
     366     6700392 :         s    += abs(pix1[6] - avg2(pix2[6], pix2[7]));
     367     6700392 :         s    += abs(pix1[7] - avg2(pix2[7], pix2[8]));
     368     6700392 :         pix1 += stride;
     369     6700392 :         pix2 += stride;
     370             :     }
     371      837549 :     return s;
     372             : }
     373             : 
     374      837549 : static int pix_abs8_y2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
     375             :                          ptrdiff_t stride, int h)
     376             : {
     377      837549 :     int s = 0, i;
     378      837549 :     uint8_t *pix3 = pix2 + stride;
     379             : 
     380     7537941 :     for (i = 0; i < h; i++) {
     381     6700392 :         s    += abs(pix1[0] - avg2(pix2[0], pix3[0]));
     382     6700392 :         s    += abs(pix1[1] - avg2(pix2[1], pix3[1]));
     383     6700392 :         s    += abs(pix1[2] - avg2(pix2[2], pix3[2]));
     384     6700392 :         s    += abs(pix1[3] - avg2(pix2[3], pix3[3]));
     385     6700392 :         s    += abs(pix1[4] - avg2(pix2[4], pix3[4]));
     386     6700392 :         s    += abs(pix1[5] - avg2(pix2[5], pix3[5]));
     387     6700392 :         s    += abs(pix1[6] - avg2(pix2[6], pix3[6]));
     388     6700392 :         s    += abs(pix1[7] - avg2(pix2[7], pix3[7]));
     389     6700392 :         pix1 += stride;
     390     6700392 :         pix2 += stride;
     391     6700392 :         pix3 += stride;
     392             :     }
     393      837549 :     return s;
     394             : }
     395             : 
     396     1675098 : static int pix_abs8_xy2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
     397             :                           ptrdiff_t stride, int h)
     398             : {
     399     1675098 :     int s = 0, i;
     400     1675098 :     uint8_t *pix3 = pix2 + stride;
     401             : 
     402    15075882 :     for (i = 0; i < h; i++) {
     403    13400784 :         s    += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
     404    13400784 :         s    += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
     405    13400784 :         s    += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
     406    13400784 :         s    += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
     407    13400784 :         s    += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
     408    13400784 :         s    += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
     409    13400784 :         s    += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
     410    13400784 :         s    += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
     411    13400784 :         pix1 += stride;
     412    13400784 :         pix2 += stride;
     413    13400784 :         pix3 += stride;
     414             :     }
     415     1675098 :     return s;
     416             : }
     417             : 
     418      748141 : static int nsse16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2,
     419             :                     ptrdiff_t stride, int h)
     420             : {
     421      748141 :     int score1 = 0, score2 = 0, x, y;
     422             : 
     423    12718397 :     for (y = 0; y < h; y++) {
     424   203494352 :         for (x = 0; x < 16; x++)
     425   191524096 :             score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]);
     426    11970256 :         if (y + 1 < h) {
     427   179553840 :             for (x = 0; x < 15; x++)
     428   336663450 :                 score2 += FFABS(s1[x]     - s1[x + stride] -
     429   168331725 :                                 s1[x + 1] + s1[x + stride + 1]) -
     430   168331725 :                           FFABS(s2[x]     - s2[x + stride] -
     431             :                                 s2[x + 1] + s2[x + stride + 1]);
     432             :         }
     433    11970256 :         s1 += stride;
     434    11970256 :         s2 += stride;
     435             :     }
     436             : 
     437      748141 :     if (c)
     438      748141 :         return score1 + FFABS(score2) * c->avctx->nsse_weight;
     439             :     else
     440           0 :         return score1 + FFABS(score2) * 8;
     441             : }
     442             : 
     443           0 : static int nsse8_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2,
     444             :                    ptrdiff_t stride, int h)
     445             : {
     446           0 :     int score1 = 0, score2 = 0, x, y;
     447             : 
     448           0 :     for (y = 0; y < h; y++) {
     449           0 :         for (x = 0; x < 8; x++)
     450           0 :             score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]);
     451           0 :         if (y + 1 < h) {
     452           0 :             for (x = 0; x < 7; x++)
     453           0 :                 score2 += FFABS(s1[x]     - s1[x + stride] -
     454           0 :                                 s1[x + 1] + s1[x + stride + 1]) -
     455           0 :                           FFABS(s2[x]     - s2[x + stride] -
     456             :                                 s2[x + 1] + s2[x + stride + 1]);
     457             :         }
     458           0 :         s1 += stride;
     459           0 :         s2 += stride;
     460             :     }
     461             : 
     462           0 :     if (c)
     463           0 :         return score1 + FFABS(score2) * c->avctx->nsse_weight;
     464             :     else
     465           0 :         return score1 + FFABS(score2) * 8;
     466             : }
     467             : 
     468           0 : static int zero_cmp(MpegEncContext *s, uint8_t *a, uint8_t *b,
     469             :                     ptrdiff_t stride, int h)
     470             : {
     471           0 :     return 0;
     472             : }
     473             : 
     474       38244 : void ff_set_cmp(MECmpContext *c, me_cmp_func *cmp, int type)
     475             : {
     476             :     int i;
     477             : 
     478       38244 :     memset(cmp, 0, sizeof(void *) * 6);
     479             : 
     480      267708 :     for (i = 0; i < 6; i++) {
     481      229464 :         switch (type & 0xFF) {
     482      209136 :         case FF_CMP_SAD:
     483      209136 :             cmp[i] = c->sad[i];
     484      209136 :             break;
     485           0 :         case FF_CMP_MEDIAN_SAD:
     486           0 :             cmp[i] = c->median_sad[i];
     487           0 :             break;
     488        7200 :         case FF_CMP_SATD:
     489        7200 :             cmp[i] = c->hadamard8_diff[i];
     490        7200 :             break;
     491        2748 :         case FF_CMP_SSE:
     492        2748 :             cmp[i] = c->sse[i];
     493        2748 :             break;
     494           0 :         case FF_CMP_DCT:
     495           0 :             cmp[i] = c->dct_sad[i];
     496           0 :             break;
     497           0 :         case FF_CMP_DCT264:
     498           0 :             cmp[i] = c->dct264_sad[i];
     499           0 :             break;
     500        1068 :         case FF_CMP_DCTMAX:
     501        1068 :             cmp[i] = c->dct_max[i];
     502        1068 :             break;
     503           0 :         case FF_CMP_PSNR:
     504           0 :             cmp[i] = c->quant_psnr[i];
     505           0 :             break;
     506           0 :         case FF_CMP_BIT:
     507           0 :             cmp[i] = c->bit[i];
     508           0 :             break;
     509           0 :         case FF_CMP_RD:
     510           0 :             cmp[i] = c->rd[i];
     511           0 :             break;
     512        1176 :         case FF_CMP_VSAD:
     513        1176 :             cmp[i] = c->vsad[i];
     514        1176 :             break;
     515           0 :         case FF_CMP_VSSE:
     516           0 :             cmp[i] = c->vsse[i];
     517           0 :             break;
     518           0 :         case FF_CMP_ZERO:
     519           0 :             cmp[i] = zero_cmp;
     520           0 :             break;
     521        4824 :         case FF_CMP_NSSE:
     522        4824 :             cmp[i] = c->nsse[i];
     523        4824 :             break;
     524             : #if CONFIG_DWT
     525           0 :         case FF_CMP_W53:
     526           0 :             cmp[i]= c->w53[i];
     527           0 :             break;
     528        3312 :         case FF_CMP_W97:
     529        3312 :             cmp[i]= c->w97[i];
     530        3312 :             break;
     531             : #endif
     532           0 :         default:
     533           0 :             av_log(NULL, AV_LOG_ERROR,
     534             :                    "internal error in cmp function selection\n");
     535             :         }
     536             :     }
     537       38244 : }
     538             : 
     539             : #define BUTTERFLY2(o1, o2, i1, i2)              \
     540             :     o1 = (i1) + (i2);                           \
     541             :     o2 = (i1) - (i2);
     542             : 
     543             : #define BUTTERFLY1(x, y)                        \
     544             :     {                                           \
     545             :         int a, b;                               \
     546             :         a = x;                                  \
     547             :         b = y;                                  \
     548             :         x = a + b;                              \
     549             :         y = a - b;                              \
     550             :     }
     551             : 
     552             : #define BUTTERFLYA(x, y) (FFABS((x) + (y)) + FFABS((x) - (y)))
     553             : 
     554    15481796 : static int hadamard8_diff8x8_c(MpegEncContext *s, uint8_t *dst,
     555             :                                uint8_t *src, ptrdiff_t stride, int h)
     556             : {
     557    15481796 :     int i, temp[64], sum = 0;
     558             : 
     559             :     av_assert2(h == 8);
     560             : 
     561   139336164 :     for (i = 0; i < 8; i++) {
     562             :         // FIXME: try pointer walks
     563   123854368 :         BUTTERFLY2(temp[8 * i + 0], temp[8 * i + 1],
     564             :                    src[stride * i + 0] - dst[stride * i + 0],
     565             :                    src[stride * i + 1] - dst[stride * i + 1]);
     566   123854368 :         BUTTERFLY2(temp[8 * i + 2], temp[8 * i + 3],
     567             :                    src[stride * i + 2] - dst[stride * i + 2],
     568             :                    src[stride * i + 3] - dst[stride * i + 3]);
     569   123854368 :         BUTTERFLY2(temp[8 * i + 4], temp[8 * i + 5],
     570             :                    src[stride * i + 4] - dst[stride * i + 4],
     571             :                    src[stride * i + 5] - dst[stride * i + 5]);
     572   123854368 :         BUTTERFLY2(temp[8 * i + 6], temp[8 * i + 7],
     573             :                    src[stride * i + 6] - dst[stride * i + 6],
     574             :                    src[stride * i + 7] - dst[stride * i + 7]);
     575             : 
     576   123854368 :         BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 2]);
     577   123854368 :         BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 3]);
     578   123854368 :         BUTTERFLY1(temp[8 * i + 4], temp[8 * i + 6]);
     579   123854368 :         BUTTERFLY1(temp[8 * i + 5], temp[8 * i + 7]);
     580             : 
     581   123854368 :         BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 4]);
     582   123854368 :         BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 5]);
     583   123854368 :         BUTTERFLY1(temp[8 * i + 2], temp[8 * i + 6]);
     584   123854368 :         BUTTERFLY1(temp[8 * i + 3], temp[8 * i + 7]);
     585             :     }
     586             : 
     587   139336164 :     for (i = 0; i < 8; i++) {
     588   123854368 :         BUTTERFLY1(temp[8 * 0 + i], temp[8 * 1 + i]);
     589   123854368 :         BUTTERFLY1(temp[8 * 2 + i], temp[8 * 3 + i]);
     590   123854368 :         BUTTERFLY1(temp[8 * 4 + i], temp[8 * 5 + i]);
     591   123854368 :         BUTTERFLY1(temp[8 * 6 + i], temp[8 * 7 + i]);
     592             : 
     593   123854368 :         BUTTERFLY1(temp[8 * 0 + i], temp[8 * 2 + i]);
     594   123854368 :         BUTTERFLY1(temp[8 * 1 + i], temp[8 * 3 + i]);
     595   123854368 :         BUTTERFLY1(temp[8 * 4 + i], temp[8 * 6 + i]);
     596   123854368 :         BUTTERFLY1(temp[8 * 5 + i], temp[8 * 7 + i]);
     597             : 
     598   371563104 :         sum += BUTTERFLYA(temp[8 * 0 + i], temp[8 * 4 + i]) +
     599   247708736 :                BUTTERFLYA(temp[8 * 1 + i], temp[8 * 5 + i]) +
     600   247708736 :                BUTTERFLYA(temp[8 * 2 + i], temp[8 * 6 + i]) +
     601   123854368 :                BUTTERFLYA(temp[8 * 3 + i], temp[8 * 7 + i]);
     602             :     }
     603    15481796 :     return sum;
     604             : }
     605             : 
     606           0 : static int hadamard8_intra8x8_c(MpegEncContext *s, uint8_t *src,
     607             :                                 uint8_t *dummy, ptrdiff_t stride, int h)
     608             : {
     609           0 :     int i, temp[64], sum = 0;
     610             : 
     611             :     av_assert2(h == 8);
     612             : 
     613           0 :     for (i = 0; i < 8; i++) {
     614             :         // FIXME: try pointer walks
     615           0 :         BUTTERFLY2(temp[8 * i + 0], temp[8 * i + 1],
     616             :                    src[stride * i + 0], src[stride * i + 1]);
     617           0 :         BUTTERFLY2(temp[8 * i + 2], temp[8 * i + 3],
     618             :                    src[stride * i + 2], src[stride * i + 3]);
     619           0 :         BUTTERFLY2(temp[8 * i + 4], temp[8 * i + 5],
     620             :                    src[stride * i + 4], src[stride * i + 5]);
     621           0 :         BUTTERFLY2(temp[8 * i + 6], temp[8 * i + 7],
     622             :                    src[stride * i + 6], src[stride * i + 7]);
     623             : 
     624           0 :         BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 2]);
     625           0 :         BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 3]);
     626           0 :         BUTTERFLY1(temp[8 * i + 4], temp[8 * i + 6]);
     627           0 :         BUTTERFLY1(temp[8 * i + 5], temp[8 * i + 7]);
     628             : 
     629           0 :         BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 4]);
     630           0 :         BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 5]);
     631           0 :         BUTTERFLY1(temp[8 * i + 2], temp[8 * i + 6]);
     632           0 :         BUTTERFLY1(temp[8 * i + 3], temp[8 * i + 7]);
     633             :     }
     634             : 
     635           0 :     for (i = 0; i < 8; i++) {
     636           0 :         BUTTERFLY1(temp[8 * 0 + i], temp[8 * 1 + i]);
     637           0 :         BUTTERFLY1(temp[8 * 2 + i], temp[8 * 3 + i]);
     638           0 :         BUTTERFLY1(temp[8 * 4 + i], temp[8 * 5 + i]);
     639           0 :         BUTTERFLY1(temp[8 * 6 + i], temp[8 * 7 + i]);
     640             : 
     641           0 :         BUTTERFLY1(temp[8 * 0 + i], temp[8 * 2 + i]);
     642           0 :         BUTTERFLY1(temp[8 * 1 + i], temp[8 * 3 + i]);
     643           0 :         BUTTERFLY1(temp[8 * 4 + i], temp[8 * 6 + i]);
     644           0 :         BUTTERFLY1(temp[8 * 5 + i], temp[8 * 7 + i]);
     645             : 
     646           0 :         sum +=
     647           0 :             BUTTERFLYA(temp[8 * 0 + i], temp[8 * 4 + i])
     648           0 :             + BUTTERFLYA(temp[8 * 1 + i], temp[8 * 5 + i])
     649           0 :             + BUTTERFLYA(temp[8 * 2 + i], temp[8 * 6 + i])
     650           0 :             + BUTTERFLYA(temp[8 * 3 + i], temp[8 * 7 + i]);
     651             :     }
     652             : 
     653           0 :     sum -= FFABS(temp[8 * 0] + temp[8 * 4]); // -mean
     654             : 
     655           0 :     return sum;
     656             : }
     657             : 
     658           0 : static int dct_sad8x8_c(MpegEncContext *s, uint8_t *src1,
     659             :                         uint8_t *src2, ptrdiff_t stride, int h)
     660             : {
     661           0 :     LOCAL_ALIGNED_16(int16_t, temp, [64]);
     662             : 
     663             :     av_assert2(h == 8);
     664             : 
     665           0 :     s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride);
     666           0 :     s->fdsp.fdct(temp);
     667           0 :     return s->mecc.sum_abs_dctelem(temp);
     668             : }
     669             : 
     670             : #if CONFIG_GPL
     671             : #define DCT8_1D                                         \
     672             :     {                                                   \
     673             :         const int s07 = SRC(0) + SRC(7);                \
     674             :         const int s16 = SRC(1) + SRC(6);                \
     675             :         const int s25 = SRC(2) + SRC(5);                \
     676             :         const int s34 = SRC(3) + SRC(4);                \
     677             :         const int a0  = s07 + s34;                      \
     678             :         const int a1  = s16 + s25;                      \
     679             :         const int a2  = s07 - s34;                      \
     680             :         const int a3  = s16 - s25;                      \
     681             :         const int d07 = SRC(0) - SRC(7);                \
     682             :         const int d16 = SRC(1) - SRC(6);                \
     683             :         const int d25 = SRC(2) - SRC(5);                \
     684             :         const int d34 = SRC(3) - SRC(4);                \
     685             :         const int a4  = d16 + d25 + (d07 + (d07 >> 1)); \
     686             :         const int a5  = d07 - d34 - (d25 + (d25 >> 1)); \
     687             :         const int a6  = d07 + d34 - (d16 + (d16 >> 1)); \
     688             :         const int a7  = d16 - d25 + (d34 + (d34 >> 1)); \
     689             :         DST(0, a0 + a1);                                \
     690             :         DST(1, a4 + (a7 >> 2));                         \
     691             :         DST(2, a2 + (a3 >> 1));                         \
     692             :         DST(3, a5 + (a6 >> 2));                         \
     693             :         DST(4, a0 - a1);                                \
     694             :         DST(5, a6 - (a5 >> 2));                         \
     695             :         DST(6, (a2 >> 1) - a3);                         \
     696             :         DST(7, (a4 >> 2) - a7);                         \
     697             :     }
     698             : 
     699           0 : static int dct264_sad8x8_c(MpegEncContext *s, uint8_t *src1,
     700             :                            uint8_t *src2, ptrdiff_t stride, int h)
     701             : {
     702             :     int16_t dct[8][8];
     703           0 :     int i, sum = 0;
     704             : 
     705           0 :     s->pdsp.diff_pixels_unaligned(dct[0], src1, src2, stride);
     706             : 
     707             : #define SRC(x) dct[i][x]
     708             : #define DST(x, v) dct[i][x] = v
     709           0 :     for (i = 0; i < 8; i++)
     710           0 :         DCT8_1D
     711             : #undef SRC
     712             : #undef DST
     713             : 
     714             : #define SRC(x) dct[x][i]
     715             : #define DST(x, v) sum += FFABS(v)
     716           0 :         for (i = 0; i < 8; i++)
     717           0 :             DCT8_1D
     718             : #undef SRC
     719             : #undef DST
     720           0 :             return sum;
     721             : }
     722             : #endif
     723             : 
     724           0 : static int dct_max8x8_c(MpegEncContext *s, uint8_t *src1,
     725             :                         uint8_t *src2, ptrdiff_t stride, int h)
     726             : {
     727           0 :     LOCAL_ALIGNED_16(int16_t, temp, [64]);
     728           0 :     int sum = 0, i;
     729             : 
     730             :     av_assert2(h == 8);
     731             : 
     732           0 :     s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride);
     733           0 :     s->fdsp.fdct(temp);
     734             : 
     735           0 :     for (i = 0; i < 64; i++)
     736           0 :         sum = FFMAX(sum, FFABS(temp[i]));
     737             : 
     738           0 :     return sum;
     739             : }
     740             : 
     741           0 : static int quant_psnr8x8_c(MpegEncContext *s, uint8_t *src1,
     742             :                            uint8_t *src2, ptrdiff_t stride, int h)
     743             : {
     744           0 :     LOCAL_ALIGNED_16(int16_t, temp, [64 * 2]);
     745           0 :     int16_t *const bak = temp + 64;
     746           0 :     int sum = 0, i;
     747             : 
     748             :     av_assert2(h == 8);
     749           0 :     s->mb_intra = 0;
     750             : 
     751           0 :     s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride);
     752             : 
     753           0 :     memcpy(bak, temp, 64 * sizeof(int16_t));
     754             : 
     755           0 :     s->block_last_index[0 /* FIXME */] =
     756           0 :         s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i);
     757           0 :     s->dct_unquantize_inter(s, temp, 0, s->qscale);
     758           0 :     ff_simple_idct_int16_8bit(temp); // FIXME
     759             : 
     760           0 :     for (i = 0; i < 64; i++)
     761           0 :         sum += (temp[i] - bak[i]) * (temp[i] - bak[i]);
     762             : 
     763           0 :     return sum;
     764             : }
     765             : 
     766           0 : static int rd8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2,
     767             :                    ptrdiff_t stride, int h)
     768             : {
     769           0 :     const uint8_t *scantable = s->intra_scantable.permutated;
     770           0 :     LOCAL_ALIGNED_16(int16_t, temp, [64]);
     771           0 :     LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
     772           0 :     LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
     773             :     int i, last, run, bits, level, distortion, start_i;
     774           0 :     const int esc_length = s->ac_esc_length;
     775             :     uint8_t *length, *last_length;
     776             : 
     777             :     av_assert2(h == 8);
     778             : 
     779           0 :     copy_block8(lsrc1, src1, 8, stride, 8);
     780           0 :     copy_block8(lsrc2, src2, 8, stride, 8);
     781             : 
     782           0 :     s->pdsp.diff_pixels(temp, lsrc1, lsrc2, 8);
     783             : 
     784           0 :     s->block_last_index[0 /* FIXME */] =
     785             :     last                               =
     786           0 :         s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i);
     787             : 
     788           0 :     bits = 0;
     789             : 
     790           0 :     if (s->mb_intra) {
     791           0 :         start_i     = 1;
     792           0 :         length      = s->intra_ac_vlc_length;
     793           0 :         last_length = s->intra_ac_vlc_last_length;
     794           0 :         bits       += s->luma_dc_vlc_length[temp[0] + 256]; // FIXME: chroma
     795             :     } else {
     796           0 :         start_i     = 0;
     797           0 :         length      = s->inter_ac_vlc_length;
     798           0 :         last_length = s->inter_ac_vlc_last_length;
     799             :     }
     800             : 
     801           0 :     if (last >= start_i) {
     802           0 :         run = 0;
     803           0 :         for (i = start_i; i < last; i++) {
     804           0 :             int j = scantable[i];
     805           0 :             level = temp[j];
     806             : 
     807           0 :             if (level) {
     808           0 :                 level += 64;
     809           0 :                 if ((level & (~127)) == 0)
     810           0 :                     bits += length[UNI_AC_ENC_INDEX(run, level)];
     811             :                 else
     812           0 :                     bits += esc_length;
     813           0 :                 run = 0;
     814             :             } else
     815           0 :                 run++;
     816             :         }
     817           0 :         i = scantable[last];
     818             : 
     819           0 :         level = temp[i] + 64;
     820             : 
     821             :         av_assert2(level - 64);
     822             : 
     823           0 :         if ((level & (~127)) == 0) {
     824           0 :             bits += last_length[UNI_AC_ENC_INDEX(run, level)];
     825             :         } else
     826           0 :             bits += esc_length;
     827             :     }
     828             : 
     829           0 :     if (last >= 0) {
     830           0 :         if (s->mb_intra)
     831           0 :             s->dct_unquantize_intra(s, temp, 0, s->qscale);
     832             :         else
     833           0 :             s->dct_unquantize_inter(s, temp, 0, s->qscale);
     834             :     }
     835             : 
     836           0 :     s->idsp.idct_add(lsrc2, 8, temp);
     837             : 
     838           0 :     distortion = s->mecc.sse[1](NULL, lsrc2, lsrc1, 8, 8);
     839             : 
     840           0 :     return distortion + ((bits * s->qscale * s->qscale * 109 + 64) >> 7);
     841             : }
     842             : 
     843           0 : static int bit8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2,
     844             :                     ptrdiff_t stride, int h)
     845             : {
     846           0 :     const uint8_t *scantable = s->intra_scantable.permutated;
     847           0 :     LOCAL_ALIGNED_16(int16_t, temp, [64]);
     848             :     int i, last, run, bits, level, start_i;
     849           0 :     const int esc_length = s->ac_esc_length;
     850             :     uint8_t *length, *last_length;
     851             : 
     852             :     av_assert2(h == 8);
     853             : 
     854           0 :     s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride);
     855             : 
     856           0 :     s->block_last_index[0 /* FIXME */] =
     857             :     last                               =
     858           0 :         s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i);
     859             : 
     860           0 :     bits = 0;
     861             : 
     862           0 :     if (s->mb_intra) {
     863           0 :         start_i     = 1;
     864           0 :         length      = s->intra_ac_vlc_length;
     865           0 :         last_length = s->intra_ac_vlc_last_length;
     866           0 :         bits       += s->luma_dc_vlc_length[temp[0] + 256]; // FIXME: chroma
     867             :     } else {
     868           0 :         start_i     = 0;
     869           0 :         length      = s->inter_ac_vlc_length;
     870           0 :         last_length = s->inter_ac_vlc_last_length;
     871             :     }
     872             : 
     873           0 :     if (last >= start_i) {
     874           0 :         run = 0;
     875           0 :         for (i = start_i; i < last; i++) {
     876           0 :             int j = scantable[i];
     877           0 :             level = temp[j];
     878             : 
     879           0 :             if (level) {
     880           0 :                 level += 64;
     881           0 :                 if ((level & (~127)) == 0)
     882           0 :                     bits += length[UNI_AC_ENC_INDEX(run, level)];
     883             :                 else
     884           0 :                     bits += esc_length;
     885           0 :                 run = 0;
     886             :             } else
     887           0 :                 run++;
     888             :         }
     889           0 :         i = scantable[last];
     890             : 
     891           0 :         level = temp[i] + 64;
     892             : 
     893             :         av_assert2(level - 64);
     894             : 
     895           0 :         if ((level & (~127)) == 0)
     896           0 :             bits += last_length[UNI_AC_ENC_INDEX(run, level)];
     897             :         else
     898           0 :             bits += esc_length;
     899             :     }
     900             : 
     901           0 :     return bits;
     902             : }
     903             : 
     904             : #define VSAD_INTRA(size)                                                \
     905             : static int vsad_intra ## size ## _c(MpegEncContext *c,                  \
     906             :                                     uint8_t *s, uint8_t *dummy,         \
     907             :                                     ptrdiff_t stride, int h)            \
     908             : {                                                                       \
     909             :     int score = 0, x, y;                                                \
     910             :                                                                         \
     911             :     for (y = 1; y < h; y++) {                                           \
     912             :         for (x = 0; x < size; x += 4) {                                 \
     913             :             score += FFABS(s[x]     - s[x + stride])     +              \
     914             :                      FFABS(s[x + 1] - s[x + stride + 1]) +              \
     915             :                      FFABS(s[x + 2] - s[x + 2 + stride]) +              \
     916             :                      FFABS(s[x + 3] - s[x + 3 + stride]);               \
     917             :         }                                                               \
     918             :         s += stride;                                                    \
     919             :     }                                                                   \
     920             :                                                                         \
     921             :     return score;                                                       \
     922             : }
     923           0 : VSAD_INTRA(8)
     924     1231444 : VSAD_INTRA(16)
     925             : 
     926             : #define VSAD(size)                                                             \
     927             : static int vsad ## size ## _c(MpegEncContext *c,                               \
     928             :                               uint8_t *s1, uint8_t *s2,                        \
     929             :                               ptrdiff_t stride, int h)                               \
     930             : {                                                                              \
     931             :     int score = 0, x, y;                                                       \
     932             :                                                                                \
     933             :     for (y = 1; y < h; y++) {                                                  \
     934             :         for (x = 0; x < size; x++)                                             \
     935             :             score += FFABS(s1[x] - s2[x] - s1[x + stride] + s2[x + stride]);   \
     936             :         s1 += stride;                                                          \
     937             :         s2 += stride;                                                          \
     938             :     }                                                                          \
     939             :                                                                                \
     940             :     return score;                                                              \
     941             : }
     942           0 : VSAD(8)
     943     1915022 : VSAD(16)
     944             : 
     945             : #define SQ(a) ((a) * (a))
     946             : #define VSSE_INTRA(size)                                                \
     947             : static int vsse_intra ## size ## _c(MpegEncContext *c,                  \
     948             :                                     uint8_t *s, uint8_t *dummy,         \
     949             :                                     ptrdiff_t stride, int h)            \
     950             : {                                                                       \
     951             :     int score = 0, x, y;                                                \
     952             :                                                                         \
     953             :     for (y = 1; y < h; y++) {                                           \
     954             :         for (x = 0; x < size; x += 4) {                                 \
     955             :             score += SQ(s[x]     - s[x + stride]) +                     \
     956             :                      SQ(s[x + 1] - s[x + stride + 1]) +                 \
     957             :                      SQ(s[x + 2] - s[x + stride + 2]) +                 \
     958             :                      SQ(s[x + 3] - s[x + stride + 3]);                  \
     959             :         }                                                               \
     960             :         s += stride;                                                    \
     961             :     }                                                                   \
     962             :                                                                         \
     963             :     return score;                                                       \
     964             : }
     965           0 : VSSE_INTRA(8)
     966           0 : VSSE_INTRA(16)
     967             : 
     968             : #define VSSE(size)                                                             \
     969             : static int vsse ## size ## _c(MpegEncContext *c, uint8_t *s1, uint8_t *s2,     \
     970             :                               ptrdiff_t stride, int h)                         \
     971             : {                                                                              \
     972             :     int score = 0, x, y;                                                       \
     973             :                                                                                \
     974             :     for (y = 1; y < h; y++) {                                                  \
     975             :         for (x = 0; x < size; x++)                                             \
     976             :             score += SQ(s1[x] - s2[x] - s1[x + stride] + s2[x + stride]);      \
     977             :         s1 += stride;                                                          \
     978             :         s2 += stride;                                                          \
     979             :     }                                                                          \
     980             :                                                                                \
     981             :     return score;                                                              \
     982             : }
     983           0 : VSSE(8)
     984           0 : VSSE(16)
     985             : 
     986             : #define WRAPPER8_16_SQ(name8, name16)                                   \
     987             : static int name16(MpegEncContext *s, uint8_t *dst, uint8_t *src,        \
     988             :                   ptrdiff_t stride, int h)                              \
     989             : {                                                                       \
     990             :     int score = 0;                                                      \
     991             :                                                                         \
     992             :     score += name8(s, dst, src, stride, 8);                             \
     993             :     score += name8(s, dst + 8, src + 8, stride, 8);                     \
     994             :     if (h == 16) {                                                      \
     995             :         dst   += 8 * stride;                                            \
     996             :         src   += 8 * stride;                                            \
     997             :         score += name8(s, dst, src, stride, 8);                         \
     998             :         score += name8(s, dst + 8, src + 8, stride, 8);                 \
     999             :     }                                                                   \
    1000             :     return score;                                                       \
    1001             : }
    1002             : 
    1003     3613091 : WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
    1004           0 : WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
    1005           0 : WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
    1006             : #if CONFIG_GPL
    1007           0 : WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
    1008             : #endif
    1009           0 : WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
    1010           0 : WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
    1011           0 : WRAPPER8_16_SQ(rd8x8_c, rd16_c)
    1012           0 : WRAPPER8_16_SQ(bit8x8_c, bit16_c)
    1013             : 
    1014       17096 : int ff_check_alignment(void)
    1015             : {
    1016             :     static int did_fail = 0;
    1017       17096 :     LOCAL_ALIGNED_16(int, aligned, [4]);
    1018             : 
    1019       17096 :     if ((intptr_t)aligned & 15) {
    1020           0 :         if (!did_fail) {
    1021             : #if HAVE_MMX || HAVE_ALTIVEC
    1022           0 :             av_log(NULL, AV_LOG_ERROR,
    1023             :                 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
    1024             :                 "and may be very slow or crash. This is not a bug in libavcodec,\n"
    1025             :                 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
    1026             :                 "Do not report crashes to FFmpeg developers.\n");
    1027             : #endif
    1028           0 :             did_fail=1;
    1029             :         }
    1030           0 :         return -1;
    1031             :     }
    1032       17096 :     return 0;
    1033             : }
    1034             : 
    1035         950 : av_cold void ff_me_cmp_init(MECmpContext *c, AVCodecContext *avctx)
    1036             : {
    1037         950 :     ff_check_alignment();
    1038             : 
    1039         950 :     c->sum_abs_dctelem = sum_abs_dctelem_c;
    1040             : 
    1041             :     /* TODO [0] 16  [1] 8 */
    1042         950 :     c->pix_abs[0][0] = pix_abs16_c;
    1043         950 :     c->pix_abs[0][1] = pix_abs16_x2_c;
    1044         950 :     c->pix_abs[0][2] = pix_abs16_y2_c;
    1045         950 :     c->pix_abs[0][3] = pix_abs16_xy2_c;
    1046         950 :     c->pix_abs[1][0] = pix_abs8_c;
    1047         950 :     c->pix_abs[1][1] = pix_abs8_x2_c;
    1048         950 :     c->pix_abs[1][2] = pix_abs8_y2_c;
    1049         950 :     c->pix_abs[1][3] = pix_abs8_xy2_c;
    1050             : 
    1051             : #define SET_CMP_FUNC(name)                      \
    1052             :     c->name[0] = name ## 16_c;                  \
    1053             :     c->name[1] = name ## 8x8_c;
    1054             : 
    1055         950 :     SET_CMP_FUNC(hadamard8_diff)
    1056         950 :     c->hadamard8_diff[4] = hadamard8_intra16_c;
    1057         950 :     c->hadamard8_diff[5] = hadamard8_intra8x8_c;
    1058         950 :     SET_CMP_FUNC(dct_sad)
    1059         950 :     SET_CMP_FUNC(dct_max)
    1060             : #if CONFIG_GPL
    1061         950 :     SET_CMP_FUNC(dct264_sad)
    1062             : #endif
    1063         950 :     c->sad[0] = pix_abs16_c;
    1064         950 :     c->sad[1] = pix_abs8_c;
    1065         950 :     c->sse[0] = sse16_c;
    1066         950 :     c->sse[1] = sse8_c;
    1067         950 :     c->sse[2] = sse4_c;
    1068         950 :     SET_CMP_FUNC(quant_psnr)
    1069         950 :     SET_CMP_FUNC(rd)
    1070         950 :     SET_CMP_FUNC(bit)
    1071         950 :     c->vsad[0] = vsad16_c;
    1072         950 :     c->vsad[1] = vsad8_c;
    1073         950 :     c->vsad[4] = vsad_intra16_c;
    1074         950 :     c->vsad[5] = vsad_intra8_c;
    1075         950 :     c->vsse[0] = vsse16_c;
    1076         950 :     c->vsse[1] = vsse8_c;
    1077         950 :     c->vsse[4] = vsse_intra16_c;
    1078         950 :     c->vsse[5] = vsse_intra8_c;
    1079         950 :     c->nsse[0] = nsse16_c;
    1080         950 :     c->nsse[1] = nsse8_c;
    1081             : #if CONFIG_SNOW_DECODER || CONFIG_SNOW_ENCODER
    1082         950 :     ff_dsputil_init_dwt(c);
    1083             : #endif
    1084             : 
    1085             :     if (ARCH_ALPHA)
    1086             :         ff_me_cmp_init_alpha(c, avctx);
    1087             :     if (ARCH_ARM)
    1088             :         ff_me_cmp_init_arm(c, avctx);
    1089             :     if (ARCH_PPC)
    1090             :         ff_me_cmp_init_ppc(c, avctx);
    1091             :     if (ARCH_X86)
    1092         950 :         ff_me_cmp_init_x86(c, avctx);
    1093             :     if (ARCH_MIPS)
    1094             :         ff_me_cmp_init_mips(c, avctx);
    1095             : 
    1096         950 :     c->median_sad[0] = pix_median_abs16_c;
    1097         950 :     c->median_sad[1] = pix_median_abs8_c;
    1098         950 : }

Generated by: LCOV version 1.13