GCC Code Coverage Report | |||||||||||||||||||||
|
|||||||||||||||||||||
Line | Branch | Exec | Source |
1 |
/* |
||
2 |
* DSP utils |
||
3 |
* Copyright (c) 2000, 2001 Fabrice Bellard |
||
4 |
* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
||
5 |
* |
||
6 |
* This file is part of FFmpeg. |
||
7 |
* |
||
8 |
* FFmpeg is free software; you can redistribute it and/or |
||
9 |
* modify it under the terms of the GNU Lesser General Public |
||
10 |
* License as published by the Free Software Foundation; either |
||
11 |
* version 2.1 of the License, or (at your option) any later version. |
||
12 |
* |
||
13 |
* FFmpeg is distributed in the hope that it will be useful, |
||
14 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
15 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
16 |
* Lesser General Public License for more details. |
||
17 |
* |
||
18 |
* You should have received a copy of the GNU Lesser General Public |
||
19 |
* License along with FFmpeg; if not, write to the Free Software |
||
20 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||
21 |
*/ |
||
22 |
|||
23 |
#include "libavutil/attributes.h" |
||
24 |
#include "libavutil/internal.h" |
||
25 |
#include "libavutil/mem_internal.h" |
||
26 |
#include "avcodec.h" |
||
27 |
#include "copy_block.h" |
||
28 |
#include "simple_idct.h" |
||
29 |
#include "me_cmp.h" |
||
30 |
#include "mpegvideo.h" |
||
31 |
#include "config.h" |
||
32 |
|||
33 |
/* (i - 256) * (i - 256) */ |
||
34 |
const uint32_t ff_square_tab[512] = { |
||
35 |
65536, 65025, 64516, 64009, 63504, 63001, 62500, 62001, 61504, 61009, 60516, 60025, 59536, 59049, 58564, 58081, |
||
36 |
57600, 57121, 56644, 56169, 55696, 55225, 54756, 54289, 53824, 53361, 52900, 52441, 51984, 51529, 51076, 50625, |
||
37 |
50176, 49729, 49284, 48841, 48400, 47961, 47524, 47089, 46656, 46225, 45796, 45369, 44944, 44521, 44100, 43681, |
||
38 |
43264, 42849, 42436, 42025, 41616, 41209, 40804, 40401, 40000, 39601, 39204, 38809, 38416, 38025, 37636, 37249, |
||
39 |
36864, 36481, 36100, 35721, 35344, 34969, 34596, 34225, 33856, 33489, 33124, 32761, 32400, 32041, 31684, 31329, |
||
40 |
30976, 30625, 30276, 29929, 29584, 29241, 28900, 28561, 28224, 27889, 27556, 27225, 26896, 26569, 26244, 25921, |
||
41 |
25600, 25281, 24964, 24649, 24336, 24025, 23716, 23409, 23104, 22801, 22500, 22201, 21904, 21609, 21316, 21025, |
||
42 |
20736, 20449, 20164, 19881, 19600, 19321, 19044, 18769, 18496, 18225, 17956, 17689, 17424, 17161, 16900, 16641, |
||
43 |
16384, 16129, 15876, 15625, 15376, 15129, 14884, 14641, 14400, 14161, 13924, 13689, 13456, 13225, 12996, 12769, |
||
44 |
12544, 12321, 12100, 11881, 11664, 11449, 11236, 11025, 10816, 10609, 10404, 10201, 10000, 9801, 9604, 9409, |
||
45 |
9216, 9025, 8836, 8649, 8464, 8281, 8100, 7921, 7744, 7569, 7396, 7225, 7056, 6889, 6724, 6561, |
||
46 |
6400, 6241, 6084, 5929, 5776, 5625, 5476, 5329, 5184, 5041, 4900, 4761, 4624, 4489, 4356, 4225, |
||
47 |
4096, 3969, 3844, 3721, 3600, 3481, 3364, 3249, 3136, 3025, 2916, 2809, 2704, 2601, 2500, 2401, |
||
48 |
2304, 2209, 2116, 2025, 1936, 1849, 1764, 1681, 1600, 1521, 1444, 1369, 1296, 1225, 1156, 1089, |
||
49 |
1024, 961, 900, 841, 784, 729, 676, 625, 576, 529, 484, 441, 400, 361, 324, 289, |
||
50 |
256, 225, 196, 169, 144, 121, 100, 81, 64, 49, 36, 25, 16, 9, 4, 1, |
||
51 |
0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144, 169, 196, 225, |
||
52 |
256, 289, 324, 361, 400, 441, 484, 529, 576, 625, 676, 729, 784, 841, 900, 961, |
||
53 |
1024, 1089, 1156, 1225, 1296, 1369, 1444, 1521, 1600, 1681, 1764, 1849, 1936, 2025, 2116, 2209, |
||
54 |
2304, 2401, 2500, 2601, 2704, 2809, 2916, 3025, 3136, 3249, 3364, 3481, 3600, 3721, 3844, 3969, |
||
55 |
4096, 4225, 4356, 4489, 4624, 4761, 4900, 5041, 5184, 5329, 5476, 5625, 5776, 5929, 6084, 6241, |
||
56 |
6400, 6561, 6724, 6889, 7056, 7225, 7396, 7569, 7744, 7921, 8100, 8281, 8464, 8649, 8836, 9025, |
||
57 |
9216, 9409, 9604, 9801, 10000, 10201, 10404, 10609, 10816, 11025, 11236, 11449, 11664, 11881, 12100, 12321, |
||
58 |
12544, 12769, 12996, 13225, 13456, 13689, 13924, 14161, 14400, 14641, 14884, 15129, 15376, 15625, 15876, 16129, |
||
59 |
16384, 16641, 16900, 17161, 17424, 17689, 17956, 18225, 18496, 18769, 19044, 19321, 19600, 19881, 20164, 20449, |
||
60 |
20736, 21025, 21316, 21609, 21904, 22201, 22500, 22801, 23104, 23409, 23716, 24025, 24336, 24649, 24964, 25281, |
||
61 |
25600, 25921, 26244, 26569, 26896, 27225, 27556, 27889, 28224, 28561, 28900, 29241, 29584, 29929, 30276, 30625, |
||
62 |
30976, 31329, 31684, 32041, 32400, 32761, 33124, 33489, 33856, 34225, 34596, 34969, 35344, 35721, 36100, 36481, |
||
63 |
36864, 37249, 37636, 38025, 38416, 38809, 39204, 39601, 40000, 40401, 40804, 41209, 41616, 42025, 42436, 42849, |
||
64 |
43264, 43681, 44100, 44521, 44944, 45369, 45796, 46225, 46656, 47089, 47524, 47961, 48400, 48841, 49284, 49729, |
||
65 |
50176, 50625, 51076, 51529, 51984, 52441, 52900, 53361, 53824, 54289, 54756, 55225, 55696, 56169, 56644, 57121, |
||
66 |
57600, 58081, 58564, 59049, 59536, 60025, 60516, 61009, 61504, 62001, 62500, 63001, 63504, 64009, 64516, 65025, |
||
67 |
}; |
||
68 |
|||
69 |
static int sse4_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, |
||
70 |
ptrdiff_t stride, int h) |
||
71 |
{ |
||
72 |
int s = 0, i; |
||
73 |
const uint32_t *sq = ff_square_tab + 256; |
||
74 |
|||
75 |
for (i = 0; i < h; i++) { |
||
76 |
s += sq[pix1[0] - pix2[0]]; |
||
77 |
s += sq[pix1[1] - pix2[1]]; |
||
78 |
s += sq[pix1[2] - pix2[2]]; |
||
79 |
s += sq[pix1[3] - pix2[3]]; |
||
80 |
pix1 += stride; |
||
81 |
pix2 += stride; |
||
82 |
} |
||
83 |
return s; |
||
84 |
} |
||
85 |
|||
86 |
5002254 |
static int sse8_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, |
|
87 |
ptrdiff_t stride, int h) |
||
88 |
{ |
||
89 |
5002254 |
int s = 0, i; |
|
90 |
5002254 |
const uint32_t *sq = ff_square_tab + 256; |
|
91 |
|||
92 |
✓✓ | 45020286 |
for (i = 0; i < h; i++) { |
93 |
40018032 |
s += sq[pix1[0] - pix2[0]]; |
|
94 |
40018032 |
s += sq[pix1[1] - pix2[1]]; |
|
95 |
40018032 |
s += sq[pix1[2] - pix2[2]]; |
|
96 |
40018032 |
s += sq[pix1[3] - pix2[3]]; |
|
97 |
40018032 |
s += sq[pix1[4] - pix2[4]]; |
|
98 |
40018032 |
s += sq[pix1[5] - pix2[5]]; |
|
99 |
40018032 |
s += sq[pix1[6] - pix2[6]]; |
|
100 |
40018032 |
s += sq[pix1[7] - pix2[7]]; |
|
101 |
40018032 |
pix1 += stride; |
|
102 |
40018032 |
pix2 += stride; |
|
103 |
} |
||
104 |
5002254 |
return s; |
|
105 |
} |
||
106 |
|||
107 |
7099240 |
static int sse16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, |
|
108 |
ptrdiff_t stride, int h) |
||
109 |
{ |
||
110 |
7099240 |
int s = 0, i; |
|
111 |
7099240 |
const uint32_t *sq = ff_square_tab + 256; |
|
112 |
|||
113 |
✓✓ | 120687080 |
for (i = 0; i < h; i++) { |
114 |
113587840 |
s += sq[pix1[0] - pix2[0]]; |
|
115 |
113587840 |
s += sq[pix1[1] - pix2[1]]; |
|
116 |
113587840 |
s += sq[pix1[2] - pix2[2]]; |
|
117 |
113587840 |
s += sq[pix1[3] - pix2[3]]; |
|
118 |
113587840 |
s += sq[pix1[4] - pix2[4]]; |
|
119 |
113587840 |
s += sq[pix1[5] - pix2[5]]; |
|
120 |
113587840 |
s += sq[pix1[6] - pix2[6]]; |
|
121 |
113587840 |
s += sq[pix1[7] - pix2[7]]; |
|
122 |
113587840 |
s += sq[pix1[8] - pix2[8]]; |
|
123 |
113587840 |
s += sq[pix1[9] - pix2[9]]; |
|
124 |
113587840 |
s += sq[pix1[10] - pix2[10]]; |
|
125 |
113587840 |
s += sq[pix1[11] - pix2[11]]; |
|
126 |
113587840 |
s += sq[pix1[12] - pix2[12]]; |
|
127 |
113587840 |
s += sq[pix1[13] - pix2[13]]; |
|
128 |
113587840 |
s += sq[pix1[14] - pix2[14]]; |
|
129 |
113587840 |
s += sq[pix1[15] - pix2[15]]; |
|
130 |
|||
131 |
113587840 |
pix1 += stride; |
|
132 |
113587840 |
pix2 += stride; |
|
133 |
} |
||
134 |
7099240 |
return s; |
|
135 |
} |
||
136 |
|||
137 |
static int sum_abs_dctelem_c(int16_t *block) |
||
138 |
{ |
||
139 |
int sum = 0, i; |
||
140 |
|||
141 |
for (i = 0; i < 64; i++) |
||
142 |
sum += FFABS(block[i]); |
||
143 |
return sum; |
||
144 |
} |
||
145 |
|||
146 |
#define avg2(a, b) (((a) + (b) + 1) >> 1) |
||
147 |
#define avg4(a, b, c, d) (((a) + (b) + (c) + (d) + 2) >> 2) |
||
148 |
|||
149 |
40701304 |
static inline int pix_abs16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, |
|
150 |
ptrdiff_t stride, int h) |
||
151 |
{ |
||
152 |
40701304 |
int s = 0, i; |
|
153 |
|||
154 |
✓✓ | 611376000 |
for (i = 0; i < h; i++) { |
155 |
570674696 |
s += abs(pix1[0] - pix2[0]); |
|
156 |
570674696 |
s += abs(pix1[1] - pix2[1]); |
|
157 |
570674696 |
s += abs(pix1[2] - pix2[2]); |
|
158 |
570674696 |
s += abs(pix1[3] - pix2[3]); |
|
159 |
570674696 |
s += abs(pix1[4] - pix2[4]); |
|
160 |
570674696 |
s += abs(pix1[5] - pix2[5]); |
|
161 |
570674696 |
s += abs(pix1[6] - pix2[6]); |
|
162 |
570674696 |
s += abs(pix1[7] - pix2[7]); |
|
163 |
570674696 |
s += abs(pix1[8] - pix2[8]); |
|
164 |
570674696 |
s += abs(pix1[9] - pix2[9]); |
|
165 |
570674696 |
s += abs(pix1[10] - pix2[10]); |
|
166 |
570674696 |
s += abs(pix1[11] - pix2[11]); |
|
167 |
570674696 |
s += abs(pix1[12] - pix2[12]); |
|
168 |
570674696 |
s += abs(pix1[13] - pix2[13]); |
|
169 |
570674696 |
s += abs(pix1[14] - pix2[14]); |
|
170 |
570674696 |
s += abs(pix1[15] - pix2[15]); |
|
171 |
570674696 |
pix1 += stride; |
|
172 |
570674696 |
pix2 += stride; |
|
173 |
} |
||
174 |
40701304 |
return s; |
|
175 |
} |
||
176 |
|||
177 |
static inline int pix_median_abs16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, |
||
178 |
ptrdiff_t stride, int h) |
||
179 |
{ |
||
180 |
int s = 0, i, j; |
||
181 |
|||
182 |
#define V(x) (pix1[x] - pix2[x]) |
||
183 |
|||
184 |
s += abs(V(0)); |
||
185 |
s += abs(V(1) - V(0)); |
||
186 |
s += abs(V(2) - V(1)); |
||
187 |
s += abs(V(3) - V(2)); |
||
188 |
s += abs(V(4) - V(3)); |
||
189 |
s += abs(V(5) - V(4)); |
||
190 |
s += abs(V(6) - V(5)); |
||
191 |
s += abs(V(7) - V(6)); |
||
192 |
s += abs(V(8) - V(7)); |
||
193 |
s += abs(V(9) - V(8)); |
||
194 |
s += abs(V(10) - V(9)); |
||
195 |
s += abs(V(11) - V(10)); |
||
196 |
s += abs(V(12) - V(11)); |
||
197 |
s += abs(V(13) - V(12)); |
||
198 |
s += abs(V(14) - V(13)); |
||
199 |
s += abs(V(15) - V(14)); |
||
200 |
|||
201 |
pix1 += stride; |
||
202 |
pix2 += stride; |
||
203 |
|||
204 |
for (i = 1; i < h; i++) { |
||
205 |
s += abs(V(0) - V(-stride)); |
||
206 |
for (j = 1; j < 16; j++) |
||
207 |
s += abs(V(j) - mid_pred(V(j-stride), V(j-1), V(j-stride) + V(j-1) - V(j-stride-1))); |
||
208 |
pix1 += stride; |
||
209 |
pix2 += stride; |
||
210 |
|||
211 |
} |
||
212 |
#undef V |
||
213 |
return s; |
||
214 |
} |
||
215 |
|||
216 |
3046787 |
static int pix_abs16_x2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, |
|
217 |
ptrdiff_t stride, int h) |
||
218 |
{ |
||
219 |
3046787 |
int s = 0, i; |
|
220 |
|||
221 |
✓✓ | 42483835 |
for (i = 0; i < h; i++) { |
222 |
39437048 |
s += abs(pix1[0] - avg2(pix2[0], pix2[1])); |
|
223 |
39437048 |
s += abs(pix1[1] - avg2(pix2[1], pix2[2])); |
|
224 |
39437048 |
s += abs(pix1[2] - avg2(pix2[2], pix2[3])); |
|
225 |
39437048 |
s += abs(pix1[3] - avg2(pix2[3], pix2[4])); |
|
226 |
39437048 |
s += abs(pix1[4] - avg2(pix2[4], pix2[5])); |
|
227 |
39437048 |
s += abs(pix1[5] - avg2(pix2[5], pix2[6])); |
|
228 |
39437048 |
s += abs(pix1[6] - avg2(pix2[6], pix2[7])); |
|
229 |
39437048 |
s += abs(pix1[7] - avg2(pix2[7], pix2[8])); |
|
230 |
39437048 |
s += abs(pix1[8] - avg2(pix2[8], pix2[9])); |
|
231 |
39437048 |
s += abs(pix1[9] - avg2(pix2[9], pix2[10])); |
|
232 |
39437048 |
s += abs(pix1[10] - avg2(pix2[10], pix2[11])); |
|
233 |
39437048 |
s += abs(pix1[11] - avg2(pix2[11], pix2[12])); |
|
234 |
39437048 |
s += abs(pix1[12] - avg2(pix2[12], pix2[13])); |
|
235 |
39437048 |
s += abs(pix1[13] - avg2(pix2[13], pix2[14])); |
|
236 |
39437048 |
s += abs(pix1[14] - avg2(pix2[14], pix2[15])); |
|
237 |
39437048 |
s += abs(pix1[15] - avg2(pix2[15], pix2[16])); |
|
238 |
39437048 |
pix1 += stride; |
|
239 |
39437048 |
pix2 += stride; |
|
240 |
} |
||
241 |
3046787 |
return s; |
|
242 |
} |
||
243 |
|||
244 |
3046787 |
static int pix_abs16_y2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, |
|
245 |
ptrdiff_t stride, int h) |
||
246 |
{ |
||
247 |
3046787 |
int s = 0, i; |
|
248 |
3046787 |
uint8_t *pix3 = pix2 + stride; |
|
249 |
|||
250 |
✓✓ | 42483835 |
for (i = 0; i < h; i++) { |
251 |
39437048 |
s += abs(pix1[0] - avg2(pix2[0], pix3[0])); |
|
252 |
39437048 |
s += abs(pix1[1] - avg2(pix2[1], pix3[1])); |
|
253 |
39437048 |
s += abs(pix1[2] - avg2(pix2[2], pix3[2])); |
|
254 |
39437048 |
s += abs(pix1[3] - avg2(pix2[3], pix3[3])); |
|
255 |
39437048 |
s += abs(pix1[4] - avg2(pix2[4], pix3[4])); |
|
256 |
39437048 |
s += abs(pix1[5] - avg2(pix2[5], pix3[5])); |
|
257 |
39437048 |
s += abs(pix1[6] - avg2(pix2[6], pix3[6])); |
|
258 |
39437048 |
s += abs(pix1[7] - avg2(pix2[7], pix3[7])); |
|
259 |
39437048 |
s += abs(pix1[8] - avg2(pix2[8], pix3[8])); |
|
260 |
39437048 |
s += abs(pix1[9] - avg2(pix2[9], pix3[9])); |
|
261 |
39437048 |
s += abs(pix1[10] - avg2(pix2[10], pix3[10])); |
|
262 |
39437048 |
s += abs(pix1[11] - avg2(pix2[11], pix3[11])); |
|
263 |
39437048 |
s += abs(pix1[12] - avg2(pix2[12], pix3[12])); |
|
264 |
39437048 |
s += abs(pix1[13] - avg2(pix2[13], pix3[13])); |
|
265 |
39437048 |
s += abs(pix1[14] - avg2(pix2[14], pix3[14])); |
|
266 |
39437048 |
s += abs(pix1[15] - avg2(pix2[15], pix3[15])); |
|
267 |
39437048 |
pix1 += stride; |
|
268 |
39437048 |
pix2 += stride; |
|
269 |
39437048 |
pix3 += stride; |
|
270 |
} |
||
271 |
3046787 |
return s; |
|
272 |
} |
||
273 |
|||
274 |
6093574 |
static int pix_abs16_xy2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, |
|
275 |
ptrdiff_t stride, int h) |
||
276 |
{ |
||
277 |
6093574 |
int s = 0, i; |
|
278 |
6093574 |
uint8_t *pix3 = pix2 + stride; |
|
279 |
|||
280 |
✓✓ | 84967670 |
for (i = 0; i < h; i++) { |
281 |
78874096 |
s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); |
|
282 |
78874096 |
s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); |
|
283 |
78874096 |
s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); |
|
284 |
78874096 |
s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); |
|
285 |
78874096 |
s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); |
|
286 |
78874096 |
s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); |
|
287 |
78874096 |
s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); |
|
288 |
78874096 |
s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); |
|
289 |
78874096 |
s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9])); |
|
290 |
78874096 |
s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10])); |
|
291 |
78874096 |
s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11])); |
|
292 |
78874096 |
s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12])); |
|
293 |
78874096 |
s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13])); |
|
294 |
78874096 |
s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14])); |
|
295 |
78874096 |
s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15])); |
|
296 |
78874096 |
s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16])); |
|
297 |
78874096 |
pix1 += stride; |
|
298 |
78874096 |
pix2 += stride; |
|
299 |
78874096 |
pix3 += stride; |
|
300 |
} |
||
301 |
6093574 |
return s; |
|
302 |
} |
||
303 |
|||
304 |
31769153 |
static inline int pix_abs8_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, |
|
305 |
ptrdiff_t stride, int h) |
||
306 |
{ |
||
307 |
31769153 |
int s = 0, i; |
|
308 |
|||
309 |
✓✓ | 285922377 |
for (i = 0; i < h; i++) { |
310 |
254153224 |
s += abs(pix1[0] - pix2[0]); |
|
311 |
254153224 |
s += abs(pix1[1] - pix2[1]); |
|
312 |
254153224 |
s += abs(pix1[2] - pix2[2]); |
|
313 |
254153224 |
s += abs(pix1[3] - pix2[3]); |
|
314 |
254153224 |
s += abs(pix1[4] - pix2[4]); |
|
315 |
254153224 |
s += abs(pix1[5] - pix2[5]); |
|
316 |
254153224 |
s += abs(pix1[6] - pix2[6]); |
|
317 |
254153224 |
s += abs(pix1[7] - pix2[7]); |
|
318 |
254153224 |
pix1 += stride; |
|
319 |
254153224 |
pix2 += stride; |
|
320 |
} |
||
321 |
31769153 |
return s; |
|
322 |
} |
||
323 |
|||
324 |
static inline int pix_median_abs8_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, |
||
325 |
ptrdiff_t stride, int h) |
||
326 |
{ |
||
327 |
int s = 0, i, j; |
||
328 |
|||
329 |
#define V(x) (pix1[x] - pix2[x]) |
||
330 |
|||
331 |
s += abs(V(0)); |
||
332 |
s += abs(V(1) - V(0)); |
||
333 |
s += abs(V(2) - V(1)); |
||
334 |
s += abs(V(3) - V(2)); |
||
335 |
s += abs(V(4) - V(3)); |
||
336 |
s += abs(V(5) - V(4)); |
||
337 |
s += abs(V(6) - V(5)); |
||
338 |
s += abs(V(7) - V(6)); |
||
339 |
|||
340 |
pix1 += stride; |
||
341 |
pix2 += stride; |
||
342 |
|||
343 |
for (i = 1; i < h; i++) { |
||
344 |
s += abs(V(0) - V(-stride)); |
||
345 |
for (j = 1; j < 8; j++) |
||
346 |
s += abs(V(j) - mid_pred(V(j-stride), V(j-1), V(j-stride) + V(j-1) - V(j-stride-1))); |
||
347 |
pix1 += stride; |
||
348 |
pix2 += stride; |
||
349 |
|||
350 |
} |
||
351 |
#undef V |
||
352 |
return s; |
||
353 |
} |
||
354 |
|||
355 |
837549 |
static int pix_abs8_x2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, |
|
356 |
ptrdiff_t stride, int h) |
||
357 |
{ |
||
358 |
837549 |
int s = 0, i; |
|
359 |
|||
360 |
✓✓ | 7537941 |
for (i = 0; i < h; i++) { |
361 |
6700392 |
s += abs(pix1[0] - avg2(pix2[0], pix2[1])); |
|
362 |
6700392 |
s += abs(pix1[1] - avg2(pix2[1], pix2[2])); |
|
363 |
6700392 |
s += abs(pix1[2] - avg2(pix2[2], pix2[3])); |
|
364 |
6700392 |
s += abs(pix1[3] - avg2(pix2[3], pix2[4])); |
|
365 |
6700392 |
s += abs(pix1[4] - avg2(pix2[4], pix2[5])); |
|
366 |
6700392 |
s += abs(pix1[5] - avg2(pix2[5], pix2[6])); |
|
367 |
6700392 |
s += abs(pix1[6] - avg2(pix2[6], pix2[7])); |
|
368 |
6700392 |
s += abs(pix1[7] - avg2(pix2[7], pix2[8])); |
|
369 |
6700392 |
pix1 += stride; |
|
370 |
6700392 |
pix2 += stride; |
|
371 |
} |
||
372 |
837549 |
return s; |
|
373 |
} |
||
374 |
|||
375 |
837549 |
static int pix_abs8_y2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, |
|
376 |
ptrdiff_t stride, int h) |
||
377 |
{ |
||
378 |
837549 |
int s = 0, i; |
|
379 |
837549 |
uint8_t *pix3 = pix2 + stride; |
|
380 |
|||
381 |
✓✓ | 7537941 |
for (i = 0; i < h; i++) { |
382 |
6700392 |
s += abs(pix1[0] - avg2(pix2[0], pix3[0])); |
|
383 |
6700392 |
s += abs(pix1[1] - avg2(pix2[1], pix3[1])); |
|
384 |
6700392 |
s += abs(pix1[2] - avg2(pix2[2], pix3[2])); |
|
385 |
6700392 |
s += abs(pix1[3] - avg2(pix2[3], pix3[3])); |
|
386 |
6700392 |
s += abs(pix1[4] - avg2(pix2[4], pix3[4])); |
|
387 |
6700392 |
s += abs(pix1[5] - avg2(pix2[5], pix3[5])); |
|
388 |
6700392 |
s += abs(pix1[6] - avg2(pix2[6], pix3[6])); |
|
389 |
6700392 |
s += abs(pix1[7] - avg2(pix2[7], pix3[7])); |
|
390 |
6700392 |
pix1 += stride; |
|
391 |
6700392 |
pix2 += stride; |
|
392 |
6700392 |
pix3 += stride; |
|
393 |
} |
||
394 |
837549 |
return s; |
|
395 |
} |
||
396 |
|||
397 |
1675098 |
static int pix_abs8_xy2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, |
|
398 |
ptrdiff_t stride, int h) |
||
399 |
{ |
||
400 |
1675098 |
int s = 0, i; |
|
401 |
1675098 |
uint8_t *pix3 = pix2 + stride; |
|
402 |
|||
403 |
✓✓ | 15075882 |
for (i = 0; i < h; i++) { |
404 |
13400784 |
s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); |
|
405 |
13400784 |
s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); |
|
406 |
13400784 |
s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); |
|
407 |
13400784 |
s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); |
|
408 |
13400784 |
s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); |
|
409 |
13400784 |
s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); |
|
410 |
13400784 |
s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); |
|
411 |
13400784 |
s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); |
|
412 |
13400784 |
pix1 += stride; |
|
413 |
13400784 |
pix2 += stride; |
|
414 |
13400784 |
pix3 += stride; |
|
415 |
} |
||
416 |
1675098 |
return s; |
|
417 |
} |
||
418 |
|||
419 |
748141 |
static int nsse16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, |
|
420 |
ptrdiff_t stride, int h) |
||
421 |
{ |
||
422 |
748141 |
int score1 = 0, score2 = 0, x, y; |
|
423 |
|||
424 |
✓✓ | 12718397 |
for (y = 0; y < h; y++) { |
425 |
✓✓ | 203494352 |
for (x = 0; x < 16; x++) |
426 |
191524096 |
score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]); |
|
427 |
✓✓ | 11970256 |
if (y + 1 < h) { |
428 |
✓✓ | 179553840 |
for (x = 0; x < 15; x++) |
429 |
168331725 |
score2 += FFABS(s1[x] - s1[x + stride] - |
|
430 |
168331725 |
s1[x + 1] + s1[x + stride + 1]) - |
|
431 |
168331725 |
FFABS(s2[x] - s2[x + stride] - |
|
432 |
s2[x + 1] + s2[x + stride + 1]); |
||
433 |
} |
||
434 |
11970256 |
s1 += stride; |
|
435 |
11970256 |
s2 += stride; |
|
436 |
} |
||
437 |
|||
438 |
✓✗ | 748141 |
if (c) |
439 |
748141 |
return score1 + FFABS(score2) * c->avctx->nsse_weight; |
|
440 |
else |
||
441 |
return score1 + FFABS(score2) * 8; |
||
442 |
} |
||
443 |
|||
444 |
static int nsse8_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, |
||
445 |
ptrdiff_t stride, int h) |
||
446 |
{ |
||
447 |
int score1 = 0, score2 = 0, x, y; |
||
448 |
|||
449 |
for (y = 0; y < h; y++) { |
||
450 |
for (x = 0; x < 8; x++) |
||
451 |
score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]); |
||
452 |
if (y + 1 < h) { |
||
453 |
for (x = 0; x < 7; x++) |
||
454 |
score2 += FFABS(s1[x] - s1[x + stride] - |
||
455 |
s1[x + 1] + s1[x + stride + 1]) - |
||
456 |
FFABS(s2[x] - s2[x + stride] - |
||
457 |
s2[x + 1] + s2[x + stride + 1]); |
||
458 |
} |
||
459 |
s1 += stride; |
||
460 |
s2 += stride; |
||
461 |
} |
||
462 |
|||
463 |
if (c) |
||
464 |
return score1 + FFABS(score2) * c->avctx->nsse_weight; |
||
465 |
else |
||
466 |
return score1 + FFABS(score2) * 8; |
||
467 |
} |
||
468 |
|||
469 |
static int zero_cmp(MpegEncContext *s, uint8_t *a, uint8_t *b, |
||
470 |
ptrdiff_t stride, int h) |
||
471 |
{ |
||
472 |
return 0; |
||
473 |
} |
||
474 |
|||
475 |
38252 |
void ff_set_cmp(MECmpContext *c, me_cmp_func *cmp, int type) |
|
476 |
{ |
||
477 |
int i; |
||
478 |
|||
479 |
38252 |
memset(cmp, 0, sizeof(void *) * 6); |
|
480 |
|||
481 |
✓✓ | 267764 |
for (i = 0; i < 6; i++) { |
482 |
✓✗✓✓ ✗✗✓✗ ✗✗✓✗ ✗✓✗✓ ✗ |
229512 |
switch (type & 0xFF) { |
483 |
209136 |
case FF_CMP_SAD: |
|
484 |
209136 |
cmp[i] = c->sad[i]; |
|
485 |
209136 |
break; |
|
486 |
case FF_CMP_MEDIAN_SAD: |
||
487 |
cmp[i] = c->median_sad[i]; |
||
488 |
break; |
||
489 |
7200 |
case FF_CMP_SATD: |
|
490 |
7200 |
cmp[i] = c->hadamard8_diff[i]; |
|
491 |
7200 |
break; |
|
492 |
2748 |
case FF_CMP_SSE: |
|
493 |
2748 |
cmp[i] = c->sse[i]; |
|
494 |
2748 |
break; |
|
495 |
case FF_CMP_DCT: |
||
496 |
cmp[i] = c->dct_sad[i]; |
||
497 |
break; |
||
498 |
case FF_CMP_DCT264: |
||
499 |
cmp[i] = c->dct264_sad[i]; |
||
500 |
break; |
||
501 |
1068 |
case FF_CMP_DCTMAX: |
|
502 |
1068 |
cmp[i] = c->dct_max[i]; |
|
503 |
1068 |
break; |
|
504 |
case FF_CMP_PSNR: |
||
505 |
cmp[i] = c->quant_psnr[i]; |
||
506 |
break; |
||
507 |
case FF_CMP_BIT: |
||
508 |
cmp[i] = c->bit[i]; |
||
509 |
break; |
||
510 |
case FF_CMP_RD: |
||
511 |
cmp[i] = c->rd[i]; |
||
512 |
break; |
||
513 |
1224 |
case FF_CMP_VSAD: |
|
514 |
1224 |
cmp[i] = c->vsad[i]; |
|
515 |
1224 |
break; |
|
516 |
case FF_CMP_VSSE: |
||
517 |
cmp[i] = c->vsse[i]; |
||
518 |
break; |
||
519 |
case FF_CMP_ZERO: |
||
520 |
cmp[i] = zero_cmp; |
||
521 |
break; |
||
522 |
4824 |
case FF_CMP_NSSE: |
|
523 |
4824 |
cmp[i] = c->nsse[i]; |
|
524 |
4824 |
break; |
|
525 |
#if CONFIG_DWT |
||
526 |
case FF_CMP_W53: |
||
527 |
cmp[i]= c->w53[i]; |
||
528 |
break; |
||
529 |
3312 |
case FF_CMP_W97: |
|
530 |
3312 |
cmp[i]= c->w97[i]; |
|
531 |
3312 |
break; |
|
532 |
#endif |
||
533 |
default: |
||
534 |
av_log(NULL, AV_LOG_ERROR, |
||
535 |
"internal error in cmp function selection\n"); |
||
536 |
} |
||
537 |
} |
||
538 |
38252 |
} |
|
539 |
|||
540 |
#define BUTTERFLY2(o1, o2, i1, i2) \ |
||
541 |
o1 = (i1) + (i2); \ |
||
542 |
o2 = (i1) - (i2); |
||
543 |
|||
544 |
#define BUTTERFLY1(x, y) \ |
||
545 |
{ \ |
||
546 |
int a, b; \ |
||
547 |
a = x; \ |
||
548 |
b = y; \ |
||
549 |
x = a + b; \ |
||
550 |
y = a - b; \ |
||
551 |
} |
||
552 |
|||
553 |
#define BUTTERFLYA(x, y) (FFABS((x) + (y)) + FFABS((x) - (y))) |
||
554 |
|||
555 |
15481796 |
static int hadamard8_diff8x8_c(MpegEncContext *s, uint8_t *dst, |
|
556 |
uint8_t *src, ptrdiff_t stride, int h) |
||
557 |
{ |
||
558 |
15481796 |
int i, temp[64], sum = 0; |
|
559 |
|||
560 |
av_assert2(h == 8); |
||
561 |
|||
562 |
✓✓ | 139336164 |
for (i = 0; i < 8; i++) { |
563 |
// FIXME: try pointer walks |
||
564 |
123854368 |
BUTTERFLY2(temp[8 * i + 0], temp[8 * i + 1], |
|
565 |
src[stride * i + 0] - dst[stride * i + 0], |
||
566 |
src[stride * i + 1] - dst[stride * i + 1]); |
||
567 |
123854368 |
BUTTERFLY2(temp[8 * i + 2], temp[8 * i + 3], |
|
568 |
src[stride * i + 2] - dst[stride * i + 2], |
||
569 |
src[stride * i + 3] - dst[stride * i + 3]); |
||
570 |
123854368 |
BUTTERFLY2(temp[8 * i + 4], temp[8 * i + 5], |
|
571 |
src[stride * i + 4] - dst[stride * i + 4], |
||
572 |
src[stride * i + 5] - dst[stride * i + 5]); |
||
573 |
123854368 |
BUTTERFLY2(temp[8 * i + 6], temp[8 * i + 7], |
|
574 |
src[stride * i + 6] - dst[stride * i + 6], |
||
575 |
src[stride * i + 7] - dst[stride * i + 7]); |
||
576 |
|||
577 |
123854368 |
BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 2]); |
|
578 |
123854368 |
BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 3]); |
|
579 |
123854368 |
BUTTERFLY1(temp[8 * i + 4], temp[8 * i + 6]); |
|
580 |
123854368 |
BUTTERFLY1(temp[8 * i + 5], temp[8 * i + 7]); |
|
581 |
|||
582 |
123854368 |
BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 4]); |
|
583 |
123854368 |
BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 5]); |
|
584 |
123854368 |
BUTTERFLY1(temp[8 * i + 2], temp[8 * i + 6]); |
|
585 |
123854368 |
BUTTERFLY1(temp[8 * i + 3], temp[8 * i + 7]); |
|
586 |
} |
||
587 |
|||
588 |
✓✓ | 139336164 |
for (i = 0; i < 8; i++) { |
589 |
123854368 |
BUTTERFLY1(temp[8 * 0 + i], temp[8 * 1 + i]); |
|
590 |
123854368 |
BUTTERFLY1(temp[8 * 2 + i], temp[8 * 3 + i]); |
|
591 |
123854368 |
BUTTERFLY1(temp[8 * 4 + i], temp[8 * 5 + i]); |
|
592 |
123854368 |
BUTTERFLY1(temp[8 * 6 + i], temp[8 * 7 + i]); |
|
593 |
|||
594 |
123854368 |
BUTTERFLY1(temp[8 * 0 + i], temp[8 * 2 + i]); |
|
595 |
123854368 |
BUTTERFLY1(temp[8 * 1 + i], temp[8 * 3 + i]); |
|
596 |
123854368 |
BUTTERFLY1(temp[8 * 4 + i], temp[8 * 6 + i]); |
|
597 |
123854368 |
BUTTERFLY1(temp[8 * 5 + i], temp[8 * 7 + i]); |
|
598 |
|||
599 |
123854368 |
sum += BUTTERFLYA(temp[8 * 0 + i], temp[8 * 4 + i]) + |
|
600 |
123854368 |
BUTTERFLYA(temp[8 * 1 + i], temp[8 * 5 + i]) + |
|
601 |
123854368 |
BUTTERFLYA(temp[8 * 2 + i], temp[8 * 6 + i]) + |
|
602 |
123854368 |
BUTTERFLYA(temp[8 * 3 + i], temp[8 * 7 + i]); |
|
603 |
} |
||
604 |
15481796 |
return sum; |
|
605 |
} |
||
606 |
|||
607 |
static int hadamard8_intra8x8_c(MpegEncContext *s, uint8_t *src, |
||
608 |
uint8_t *dummy, ptrdiff_t stride, int h) |
||
609 |
{ |
||
610 |
int i, temp[64], sum = 0; |
||
611 |
|||
612 |
av_assert2(h == 8); |
||
613 |
|||
614 |
for (i = 0; i < 8; i++) { |
||
615 |
// FIXME: try pointer walks |
||
616 |
BUTTERFLY2(temp[8 * i + 0], temp[8 * i + 1], |
||
617 |
src[stride * i + 0], src[stride * i + 1]); |
||
618 |
BUTTERFLY2(temp[8 * i + 2], temp[8 * i + 3], |
||
619 |
src[stride * i + 2], src[stride * i + 3]); |
||
620 |
BUTTERFLY2(temp[8 * i + 4], temp[8 * i + 5], |
||
621 |
src[stride * i + 4], src[stride * i + 5]); |
||
622 |
BUTTERFLY2(temp[8 * i + 6], temp[8 * i + 7], |
||
623 |
src[stride * i + 6], src[stride * i + 7]); |
||
624 |
|||
625 |
BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 2]); |
||
626 |
BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 3]); |
||
627 |
BUTTERFLY1(temp[8 * i + 4], temp[8 * i + 6]); |
||
628 |
BUTTERFLY1(temp[8 * i + 5], temp[8 * i + 7]); |
||
629 |
|||
630 |
BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 4]); |
||
631 |
BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 5]); |
||
632 |
BUTTERFLY1(temp[8 * i + 2], temp[8 * i + 6]); |
||
633 |
BUTTERFLY1(temp[8 * i + 3], temp[8 * i + 7]); |
||
634 |
} |
||
635 |
|||
636 |
for (i = 0; i < 8; i++) { |
||
637 |
BUTTERFLY1(temp[8 * 0 + i], temp[8 * 1 + i]); |
||
638 |
BUTTERFLY1(temp[8 * 2 + i], temp[8 * 3 + i]); |
||
639 |
BUTTERFLY1(temp[8 * 4 + i], temp[8 * 5 + i]); |
||
640 |
BUTTERFLY1(temp[8 * 6 + i], temp[8 * 7 + i]); |
||
641 |
|||
642 |
BUTTERFLY1(temp[8 * 0 + i], temp[8 * 2 + i]); |
||
643 |
BUTTERFLY1(temp[8 * 1 + i], temp[8 * 3 + i]); |
||
644 |
BUTTERFLY1(temp[8 * 4 + i], temp[8 * 6 + i]); |
||
645 |
BUTTERFLY1(temp[8 * 5 + i], temp[8 * 7 + i]); |
||
646 |
|||
647 |
sum += |
||
648 |
BUTTERFLYA(temp[8 * 0 + i], temp[8 * 4 + i]) |
||
649 |
+ BUTTERFLYA(temp[8 * 1 + i], temp[8 * 5 + i]) |
||
650 |
+ BUTTERFLYA(temp[8 * 2 + i], temp[8 * 6 + i]) |
||
651 |
+ BUTTERFLYA(temp[8 * 3 + i], temp[8 * 7 + i]); |
||
652 |
} |
||
653 |
|||
654 |
sum -= FFABS(temp[8 * 0] + temp[8 * 4]); // -mean |
||
655 |
|||
656 |
return sum; |
||
657 |
} |
||
658 |
|||
659 |
static int dct_sad8x8_c(MpegEncContext *s, uint8_t *src1, |
||
660 |
uint8_t *src2, ptrdiff_t stride, int h) |
||
661 |
{ |
||
662 |
LOCAL_ALIGNED_16(int16_t, temp, [64]); |
||
663 |
|||
664 |
av_assert2(h == 8); |
||
665 |
|||
666 |
s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride); |
||
667 |
s->fdsp.fdct(temp); |
||
668 |
return s->mecc.sum_abs_dctelem(temp); |
||
669 |
} |
||
670 |
|||
671 |
#if CONFIG_GPL |
||
672 |
#define DCT8_1D \ |
||
673 |
{ \ |
||
674 |
const int s07 = SRC(0) + SRC(7); \ |
||
675 |
const int s16 = SRC(1) + SRC(6); \ |
||
676 |
const int s25 = SRC(2) + SRC(5); \ |
||
677 |
const int s34 = SRC(3) + SRC(4); \ |
||
678 |
const int a0 = s07 + s34; \ |
||
679 |
const int a1 = s16 + s25; \ |
||
680 |
const int a2 = s07 - s34; \ |
||
681 |
const int a3 = s16 - s25; \ |
||
682 |
const int d07 = SRC(0) - SRC(7); \ |
||
683 |
const int d16 = SRC(1) - SRC(6); \ |
||
684 |
const int d25 = SRC(2) - SRC(5); \ |
||
685 |
const int d34 = SRC(3) - SRC(4); \ |
||
686 |
const int a4 = d16 + d25 + (d07 + (d07 >> 1)); \ |
||
687 |
const int a5 = d07 - d34 - (d25 + (d25 >> 1)); \ |
||
688 |
const int a6 = d07 + d34 - (d16 + (d16 >> 1)); \ |
||
689 |
const int a7 = d16 - d25 + (d34 + (d34 >> 1)); \ |
||
690 |
DST(0, a0 + a1); \ |
||
691 |
DST(1, a4 + (a7 >> 2)); \ |
||
692 |
DST(2, a2 + (a3 >> 1)); \ |
||
693 |
DST(3, a5 + (a6 >> 2)); \ |
||
694 |
DST(4, a0 - a1); \ |
||
695 |
DST(5, a6 - (a5 >> 2)); \ |
||
696 |
DST(6, (a2 >> 1) - a3); \ |
||
697 |
DST(7, (a4 >> 2) - a7); \ |
||
698 |
} |
||
699 |
|||
700 |
static int dct264_sad8x8_c(MpegEncContext *s, uint8_t *src1, |
||
701 |
uint8_t *src2, ptrdiff_t stride, int h) |
||
702 |
{ |
||
703 |
int16_t dct[8][8]; |
||
704 |
int i, sum = 0; |
||
705 |
|||
706 |
s->pdsp.diff_pixels_unaligned(dct[0], src1, src2, stride); |
||
707 |
|||
708 |
#define SRC(x) dct[i][x] |
||
709 |
#define DST(x, v) dct[i][x] = v |
||
710 |
for (i = 0; i < 8; i++) |
||
711 |
DCT8_1D |
||
712 |
#undef SRC |
||
713 |
#undef DST |
||
714 |
|||
715 |
#define SRC(x) dct[x][i] |
||
716 |
#define DST(x, v) sum += FFABS(v) |
||
717 |
for (i = 0; i < 8; i++) |
||
718 |
DCT8_1D |
||
719 |
#undef SRC |
||
720 |
#undef DST |
||
721 |
return sum; |
||
722 |
} |
||
723 |
#endif |
||
724 |
|||
725 |
static int dct_max8x8_c(MpegEncContext *s, uint8_t *src1, |
||
726 |
uint8_t *src2, ptrdiff_t stride, int h) |
||
727 |
{ |
||
728 |
LOCAL_ALIGNED_16(int16_t, temp, [64]); |
||
729 |
int sum = 0, i; |
||
730 |
|||
731 |
av_assert2(h == 8); |
||
732 |
|||
733 |
s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride); |
||
734 |
s->fdsp.fdct(temp); |
||
735 |
|||
736 |
for (i = 0; i < 64; i++) |
||
737 |
sum = FFMAX(sum, FFABS(temp[i])); |
||
738 |
|||
739 |
return sum; |
||
740 |
} |
||
741 |
|||
742 |
static int quant_psnr8x8_c(MpegEncContext *s, uint8_t *src1, |
||
743 |
uint8_t *src2, ptrdiff_t stride, int h) |
||
744 |
{ |
||
745 |
LOCAL_ALIGNED_16(int16_t, temp, [64 * 2]); |
||
746 |
int16_t *const bak = temp + 64; |
||
747 |
int sum = 0, i; |
||
748 |
|||
749 |
av_assert2(h == 8); |
||
750 |
s->mb_intra = 0; |
||
751 |
|||
752 |
s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride); |
||
753 |
|||
754 |
memcpy(bak, temp, 64 * sizeof(int16_t)); |
||
755 |
|||
756 |
s->block_last_index[0 /* FIXME */] = |
||
757 |
s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i); |
||
758 |
s->dct_unquantize_inter(s, temp, 0, s->qscale); |
||
759 |
ff_simple_idct_int16_8bit(temp); // FIXME |
||
760 |
|||
761 |
for (i = 0; i < 64; i++) |
||
762 |
sum += (temp[i] - bak[i]) * (temp[i] - bak[i]); |
||
763 |
|||
764 |
return sum; |
||
765 |
} |
||
766 |
|||
767 |
static int rd8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2, |
||
768 |
ptrdiff_t stride, int h) |
||
769 |
{ |
||
770 |
const uint8_t *scantable = s->intra_scantable.permutated; |
||
771 |
LOCAL_ALIGNED_16(int16_t, temp, [64]); |
||
772 |
LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]); |
||
773 |
LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]); |
||
774 |
int i, last, run, bits, level, distortion, start_i; |
||
775 |
const int esc_length = s->ac_esc_length; |
||
776 |
uint8_t *length, *last_length; |
||
777 |
|||
778 |
av_assert2(h == 8); |
||
779 |
|||
780 |
copy_block8(lsrc1, src1, 8, stride, 8); |
||
781 |
copy_block8(lsrc2, src2, 8, stride, 8); |
||
782 |
|||
783 |
s->pdsp.diff_pixels(temp, lsrc1, lsrc2, 8); |
||
784 |
|||
785 |
s->block_last_index[0 /* FIXME */] = |
||
786 |
last = |
||
787 |
s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i); |
||
788 |
|||
789 |
bits = 0; |
||
790 |
|||
791 |
if (s->mb_intra) { |
||
792 |
start_i = 1; |
||
793 |
length = s->intra_ac_vlc_length; |
||
794 |
last_length = s->intra_ac_vlc_last_length; |
||
795 |
bits += s->luma_dc_vlc_length[temp[0] + 256]; // FIXME: chroma |
||
796 |
} else { |
||
797 |
start_i = 0; |
||
798 |
length = s->inter_ac_vlc_length; |
||
799 |
last_length = s->inter_ac_vlc_last_length; |
||
800 |
} |
||
801 |
|||
802 |
if (last >= start_i) { |
||
803 |
run = 0; |
||
804 |
for (i = start_i; i < last; i++) { |
||
805 |
int j = scantable[i]; |
||
806 |
level = temp[j]; |
||
807 |
|||
808 |
if (level) { |
||
809 |
level += 64; |
||
810 |
if ((level & (~127)) == 0) |
||
811 |
bits += length[UNI_AC_ENC_INDEX(run, level)]; |
||
812 |
else |
||
813 |
bits += esc_length; |
||
814 |
run = 0; |
||
815 |
} else |
||
816 |
run++; |
||
817 |
} |
||
818 |
i = scantable[last]; |
||
819 |
|||
820 |
level = temp[i] + 64; |
||
821 |
|||
822 |
av_assert2(level - 64); |
||
823 |
|||
824 |
if ((level & (~127)) == 0) { |
||
825 |
bits += last_length[UNI_AC_ENC_INDEX(run, level)]; |
||
826 |
} else |
||
827 |
bits += esc_length; |
||
828 |
} |
||
829 |
|||
830 |
if (last >= 0) { |
||
831 |
if (s->mb_intra) |
||
832 |
s->dct_unquantize_intra(s, temp, 0, s->qscale); |
||
833 |
else |
||
834 |
s->dct_unquantize_inter(s, temp, 0, s->qscale); |
||
835 |
} |
||
836 |
|||
837 |
s->idsp.idct_add(lsrc2, 8, temp); |
||
838 |
|||
839 |
distortion = s->mecc.sse[1](NULL, lsrc2, lsrc1, 8, 8); |
||
840 |
|||
841 |
return distortion + ((bits * s->qscale * s->qscale * 109 + 64) >> 7); |
||
842 |
} |
||
843 |
|||
844 |
static int bit8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2, |
||
845 |
ptrdiff_t stride, int h) |
||
846 |
{ |
||
847 |
const uint8_t *scantable = s->intra_scantable.permutated; |
||
848 |
LOCAL_ALIGNED_16(int16_t, temp, [64]); |
||
849 |
int i, last, run, bits, level, start_i; |
||
850 |
const int esc_length = s->ac_esc_length; |
||
851 |
uint8_t *length, *last_length; |
||
852 |
|||
853 |
av_assert2(h == 8); |
||
854 |
|||
855 |
s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride); |
||
856 |
|||
857 |
s->block_last_index[0 /* FIXME */] = |
||
858 |
last = |
||
859 |
s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i); |
||
860 |
|||
861 |
bits = 0; |
||
862 |
|||
863 |
if (s->mb_intra) { |
||
864 |
start_i = 1; |
||
865 |
length = s->intra_ac_vlc_length; |
||
866 |
last_length = s->intra_ac_vlc_last_length; |
||
867 |
bits += s->luma_dc_vlc_length[temp[0] + 256]; // FIXME: chroma |
||
868 |
} else { |
||
869 |
start_i = 0; |
||
870 |
length = s->inter_ac_vlc_length; |
||
871 |
last_length = s->inter_ac_vlc_last_length; |
||
872 |
} |
||
873 |
|||
874 |
if (last >= start_i) { |
||
875 |
run = 0; |
||
876 |
for (i = start_i; i < last; i++) { |
||
877 |
int j = scantable[i]; |
||
878 |
level = temp[j]; |
||
879 |
|||
880 |
if (level) { |
||
881 |
level += 64; |
||
882 |
if ((level & (~127)) == 0) |
||
883 |
bits += length[UNI_AC_ENC_INDEX(run, level)]; |
||
884 |
else |
||
885 |
bits += esc_length; |
||
886 |
run = 0; |
||
887 |
} else |
||
888 |
run++; |
||
889 |
} |
||
890 |
i = scantable[last]; |
||
891 |
|||
892 |
level = temp[i] + 64; |
||
893 |
|||
894 |
av_assert2(level - 64); |
||
895 |
|||
896 |
if ((level & (~127)) == 0) |
||
897 |
bits += last_length[UNI_AC_ENC_INDEX(run, level)]; |
||
898 |
else |
||
899 |
bits += esc_length; |
||
900 |
} |
||
901 |
|||
902 |
return bits; |
||
903 |
} |
||
904 |
|||
905 |
#define VSAD_INTRA(size) \ |
||
906 |
static int vsad_intra ## size ## _c(MpegEncContext *c, \ |
||
907 |
uint8_t *s, uint8_t *dummy, \ |
||
908 |
ptrdiff_t stride, int h) \ |
||
909 |
{ \ |
||
910 |
int score = 0, x, y; \ |
||
911 |
\ |
||
912 |
for (y = 1; y < h; y++) { \ |
||
913 |
for (x = 0; x < size; x += 4) { \ |
||
914 |
score += FFABS(s[x] - s[x + stride]) + \ |
||
915 |
FFABS(s[x + 1] - s[x + stride + 1]) + \ |
||
916 |
FFABS(s[x + 2] - s[x + 2 + stride]) + \ |
||
917 |
FFABS(s[x + 3] - s[x + 3 + stride]); \ |
||
918 |
} \ |
||
919 |
s += stride; \ |
||
920 |
} \ |
||
921 |
\ |
||
922 |
return score; \ |
||
923 |
} |
||
924 |
VSAD_INTRA(8) |
||
925 |
✓✓✓✓ |
44331984 |
VSAD_INTRA(16) |
926 |
|||
927 |
#define VSAD(size) \ |
||
928 |
static int vsad ## size ## _c(MpegEncContext *c, \ |
||
929 |
uint8_t *s1, uint8_t *s2, \ |
||
930 |
ptrdiff_t stride, int h) \ |
||
931 |
{ \ |
||
932 |
int score = 0, x, y; \ |
||
933 |
\ |
||
934 |
for (y = 1; y < h; y++) { \ |
||
935 |
for (x = 0; x < size; x++) \ |
||
936 |
score += FFABS(s1[x] - s2[x] - s1[x + stride] + s2[x + stride]); \ |
||
937 |
s1 += stride; \ |
||
938 |
s2 += stride; \ |
||
939 |
} \ |
||
940 |
\ |
||
941 |
return score; \ |
||
942 |
} |
||
943 |
VSAD(8) |
||
944 |
✓✓✓✓ |
229802640 |
VSAD(16) |
945 |
|||
946 |
#define SQ(a) ((a) * (a)) |
||
947 |
#define VSSE_INTRA(size) \ |
||
948 |
static int vsse_intra ## size ## _c(MpegEncContext *c, \ |
||
949 |
uint8_t *s, uint8_t *dummy, \ |
||
950 |
ptrdiff_t stride, int h) \ |
||
951 |
{ \ |
||
952 |
int score = 0, x, y; \ |
||
953 |
\ |
||
954 |
for (y = 1; y < h; y++) { \ |
||
955 |
for (x = 0; x < size; x += 4) { \ |
||
956 |
score += SQ(s[x] - s[x + stride]) + \ |
||
957 |
SQ(s[x + 1] - s[x + stride + 1]) + \ |
||
958 |
SQ(s[x + 2] - s[x + stride + 2]) + \ |
||
959 |
SQ(s[x + 3] - s[x + stride + 3]); \ |
||
960 |
} \ |
||
961 |
s += stride; \ |
||
962 |
} \ |
||
963 |
\ |
||
964 |
return score; \ |
||
965 |
} |
||
966 |
VSSE_INTRA(8) |
||
967 |
VSSE_INTRA(16) |
||
968 |
|||
969 |
#define VSSE(size) \ |
||
970 |
static int vsse ## size ## _c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, \ |
||
971 |
ptrdiff_t stride, int h) \ |
||
972 |
{ \ |
||
973 |
int score = 0, x, y; \ |
||
974 |
\ |
||
975 |
for (y = 1; y < h; y++) { \ |
||
976 |
for (x = 0; x < size; x++) \ |
||
977 |
score += SQ(s1[x] - s2[x] - s1[x + stride] + s2[x + stride]); \ |
||
978 |
s1 += stride; \ |
||
979 |
s2 += stride; \ |
||
980 |
} \ |
||
981 |
\ |
||
982 |
return score; \ |
||
983 |
} |
||
984 |
VSSE(8) |
||
985 |
VSSE(16) |
||
986 |
|||
987 |
#define WRAPPER8_16_SQ(name8, name16) \ |
||
988 |
static int name16(MpegEncContext *s, uint8_t *dst, uint8_t *src, \ |
||
989 |
ptrdiff_t stride, int h) \ |
||
990 |
{ \ |
||
991 |
int score = 0; \ |
||
992 |
\ |
||
993 |
score += name8(s, dst, src, stride, 8); \ |
||
994 |
score += name8(s, dst + 8, src + 8, stride, 8); \ |
||
995 |
if (h == 16) { \ |
||
996 |
dst += 8 * stride; \ |
||
997 |
src += 8 * stride; \ |
||
998 |
score += name8(s, dst, src, stride, 8); \ |
||
999 |
score += name8(s, dst + 8, src + 8, stride, 8); \ |
||
1000 |
} \ |
||
1001 |
return score; \ |
||
1002 |
} |
||
1003 |
|||
1004 |
✓✗ | 3613091 |
WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c) |
1005 |
WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c) |
||
1006 |
WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c) |
||
1007 |
#if CONFIG_GPL |
||
1008 |
WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c) |
||
1009 |
#endif |
||
1010 |
WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c) |
||
1011 |
WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) |
||
1012 |
WRAPPER8_16_SQ(rd8x8_c, rd16_c) |
||
1013 |
WRAPPER8_16_SQ(bit8x8_c, bit16_c) |
||
1014 |
|||
1015 |
17126 |
int ff_check_alignment(void) |
|
1016 |
{ |
||
1017 |
static int did_fail = 0; |
||
1018 |
17126 |
LOCAL_ALIGNED_16(int, aligned, [4]); |
|
1019 |
|||
1020 |
✗✓ | 17126 |
if ((intptr_t)aligned & 15) { |
1021 |
if (!did_fail) { |
||
1022 |
#if HAVE_MMX || HAVE_ALTIVEC |
||
1023 |
av_log(NULL, AV_LOG_ERROR, |
||
1024 |
"Compiler did not align stack variables. Libavcodec has been miscompiled\n" |
||
1025 |
"and may be very slow or crash. This is not a bug in libavcodec,\n" |
||
1026 |
"but in the compiler. You may try recompiling using gcc >= 4.2.\n" |
||
1027 |
"Do not report crashes to FFmpeg developers.\n"); |
||
1028 |
#endif |
||
1029 |
did_fail=1; |
||
1030 |
} |
||
1031 |
return -1; |
||
1032 |
} |
||
1033 |
17126 |
return 0; |
|
1034 |
} |
||
1035 |
|||
1036 |
980 |
av_cold void ff_me_cmp_init(MECmpContext *c, AVCodecContext *avctx) |
|
1037 |
{ |
||
1038 |
980 |
ff_check_alignment(); |
|
1039 |
|||
1040 |
980 |
c->sum_abs_dctelem = sum_abs_dctelem_c; |
|
1041 |
|||
1042 |
/* TODO [0] 16 [1] 8 */ |
||
1043 |
980 |
c->pix_abs[0][0] = pix_abs16_c; |
|
1044 |
980 |
c->pix_abs[0][1] = pix_abs16_x2_c; |
|
1045 |
980 |
c->pix_abs[0][2] = pix_abs16_y2_c; |
|
1046 |
980 |
c->pix_abs[0][3] = pix_abs16_xy2_c; |
|
1047 |
980 |
c->pix_abs[1][0] = pix_abs8_c; |
|
1048 |
980 |
c->pix_abs[1][1] = pix_abs8_x2_c; |
|
1049 |
980 |
c->pix_abs[1][2] = pix_abs8_y2_c; |
|
1050 |
980 |
c->pix_abs[1][3] = pix_abs8_xy2_c; |
|
1051 |
|||
1052 |
#define SET_CMP_FUNC(name) \ |
||
1053 |
c->name[0] = name ## 16_c; \ |
||
1054 |
c->name[1] = name ## 8x8_c; |
||
1055 |
|||
1056 |
980 |
SET_CMP_FUNC(hadamard8_diff) |
|
1057 |
980 |
c->hadamard8_diff[4] = hadamard8_intra16_c; |
|
1058 |
980 |
c->hadamard8_diff[5] = hadamard8_intra8x8_c; |
|
1059 |
980 |
SET_CMP_FUNC(dct_sad) |
|
1060 |
980 |
SET_CMP_FUNC(dct_max) |
|
1061 |
#if CONFIG_GPL |
||
1062 |
980 |
SET_CMP_FUNC(dct264_sad) |
|
1063 |
#endif |
||
1064 |
980 |
c->sad[0] = pix_abs16_c; |
|
1065 |
980 |
c->sad[1] = pix_abs8_c; |
|
1066 |
980 |
c->sse[0] = sse16_c; |
|
1067 |
980 |
c->sse[1] = sse8_c; |
|
1068 |
980 |
c->sse[2] = sse4_c; |
|
1069 |
980 |
SET_CMP_FUNC(quant_psnr) |
|
1070 |
980 |
SET_CMP_FUNC(rd) |
|
1071 |
980 |
SET_CMP_FUNC(bit) |
|
1072 |
980 |
c->vsad[0] = vsad16_c; |
|
1073 |
980 |
c->vsad[1] = vsad8_c; |
|
1074 |
980 |
c->vsad[4] = vsad_intra16_c; |
|
1075 |
980 |
c->vsad[5] = vsad_intra8_c; |
|
1076 |
980 |
c->vsse[0] = vsse16_c; |
|
1077 |
980 |
c->vsse[1] = vsse8_c; |
|
1078 |
980 |
c->vsse[4] = vsse_intra16_c; |
|
1079 |
980 |
c->vsse[5] = vsse_intra8_c; |
|
1080 |
980 |
c->nsse[0] = nsse16_c; |
|
1081 |
980 |
c->nsse[1] = nsse8_c; |
|
1082 |
#if CONFIG_SNOW_DECODER || CONFIG_SNOW_ENCODER |
||
1083 |
980 |
ff_dsputil_init_dwt(c); |
|
1084 |
#endif |
||
1085 |
|||
1086 |
if (ARCH_ALPHA) |
||
1087 |
ff_me_cmp_init_alpha(c, avctx); |
||
1088 |
if (ARCH_ARM) |
||
1089 |
ff_me_cmp_init_arm(c, avctx); |
||
1090 |
if (ARCH_PPC) |
||
1091 |
ff_me_cmp_init_ppc(c, avctx); |
||
1092 |
if (ARCH_X86) |
||
1093 |
980 |
ff_me_cmp_init_x86(c, avctx); |
|
1094 |
if (ARCH_MIPS) |
||
1095 |
ff_me_cmp_init_mips(c, avctx); |
||
1096 |
|||
1097 |
980 |
c->median_sad[0] = pix_median_abs16_c; |
|
1098 |
980 |
c->median_sad[1] = pix_median_abs8_c; |
|
1099 |
980 |
} |
Generated by: GCOVR (Version 4.2) |