FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/h264pred_template.c
Date: 2021-09-24 20:55:06
Exec Total Coverage
Lines: 760 822 92.5%
Branches: 162 184 88.0%

Line Branch Exec Source
1 /*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003-2011 Michael Niedermayer <michaelni@gmx.at>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 /**
23 * @file
24 * H.264 / AVC / MPEG-4 part10 prediction functions.
25 * @author Michael Niedermayer <michaelni@gmx.at>
26 */
27
28 #include "libavutil/intreadwrite.h"
29
30 #include "mathops.h"
31
32 #include "bit_depth_template.c"
33
34 8594158 static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright,
35 ptrdiff_t _stride)
36 {
37 8594158 pixel *src = (pixel*)_src;
38 8594158 int stride = _stride>>(sizeof(pixel)-1);
39 8594158 const pixel4 a= AV_RN4PA(src-stride);
40
41 8594158 AV_WN4PA(src+0*stride, a);
42 8594158 AV_WN4PA(src+1*stride, a);
43 8594158 AV_WN4PA(src+2*stride, a);
44 8594158 AV_WN4PA(src+3*stride, a);
45 }
46
47 13759350 static void FUNCC(pred4x4_horizontal)(uint8_t *_src, const uint8_t *topright,
48 ptrdiff_t _stride)
49 {
50 13759350 pixel *src = (pixel*)_src;
51 13759350 int stride = _stride>>(sizeof(pixel)-1);
52 13759350 AV_WN4PA(src+0*stride, PIXEL_SPLAT_X4(src[-1+0*stride]));
53 13759350 AV_WN4PA(src+1*stride, PIXEL_SPLAT_X4(src[-1+1*stride]));
54 13759350 AV_WN4PA(src+2*stride, PIXEL_SPLAT_X4(src[-1+2*stride]));
55 13759350 AV_WN4PA(src+3*stride, PIXEL_SPLAT_X4(src[-1+3*stride]));
56 }
57
58 5979892 static void FUNCC(pred4x4_dc)(uint8_t *_src, const uint8_t *topright,
59 ptrdiff_t _stride)
60 {
61 5979892 pixel *src = (pixel*)_src;
62 5979892 int stride = _stride>>(sizeof(pixel)-1);
63 5979892 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
64 5979892 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
65 5979892 const pixel4 a = PIXEL_SPLAT_X4(dc);
66
67 5979892 AV_WN4PA(src+0*stride, a);
68 5979892 AV_WN4PA(src+1*stride, a);
69 5979892 AV_WN4PA(src+2*stride, a);
70 5979892 AV_WN4PA(src+3*stride, a);
71 }
72
73 583630 static void FUNCC(pred4x4_left_dc)(uint8_t *_src, const uint8_t *topright,
74 ptrdiff_t _stride)
75 {
76 583630 pixel *src = (pixel*)_src;
77 583630 int stride = _stride>>(sizeof(pixel)-1);
78 583630 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
79 583630 const pixel4 a = PIXEL_SPLAT_X4(dc);
80
81 583630 AV_WN4PA(src+0*stride, a);
82 583630 AV_WN4PA(src+1*stride, a);
83 583630 AV_WN4PA(src+2*stride, a);
84 583630 AV_WN4PA(src+3*stride, a);
85 }
86
87 116398 static void FUNCC(pred4x4_top_dc)(uint8_t *_src, const uint8_t *topright,
88 ptrdiff_t _stride)
89 {
90 116398 pixel *src = (pixel*)_src;
91 116398 int stride = _stride>>(sizeof(pixel)-1);
92 116398 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
93 116398 const pixel4 a = PIXEL_SPLAT_X4(dc);
94
95 116398 AV_WN4PA(src+0*stride, a);
96 116398 AV_WN4PA(src+1*stride, a);
97 116398 AV_WN4PA(src+2*stride, a);
98 116398 AV_WN4PA(src+3*stride, a);
99 }
100
101 14710 static void FUNCC(pred4x4_128_dc)(uint8_t *_src, const uint8_t *topright,
102 ptrdiff_t _stride)
103 {
104 14710 pixel *src = (pixel*)_src;
105 14710 int stride = _stride>>(sizeof(pixel)-1);
106 14710 const pixel4 a = PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1));
107
108 14710 AV_WN4PA(src+0*stride, a);
109 14710 AV_WN4PA(src+1*stride, a);
110 14710 AV_WN4PA(src+2*stride, a);
111 14710 AV_WN4PA(src+3*stride, a);
112 }
113
114 52 static void FUNCC(pred4x4_127_dc)(uint8_t *_src, const uint8_t *topright,
115 ptrdiff_t _stride)
116 {
117 52 pixel *src = (pixel*)_src;
118 52 int stride = _stride>>(sizeof(pixel)-1);
119 52 const pixel4 a = PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))-1);
120
121 52 AV_WN4PA(src+0*stride, a);
122 52 AV_WN4PA(src+1*stride, a);
123 52 AV_WN4PA(src+2*stride, a);
124 52 AV_WN4PA(src+3*stride, a);
125 }
126
127 94 static void FUNCC(pred4x4_129_dc)(uint8_t *_src, const uint8_t *topright,
128 ptrdiff_t _stride)
129 {
130 94 pixel *src = (pixel*)_src;
131 94 int stride = _stride>>(sizeof(pixel)-1);
132 94 const pixel4 a = PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))+1);
133
134 94 AV_WN4PA(src+0*stride, a);
135 94 AV_WN4PA(src+1*stride, a);
136 94 AV_WN4PA(src+2*stride, a);
137 94 AV_WN4PA(src+3*stride, a);
138 }
139
140
141 #define LOAD_TOP_RIGHT_EDGE\
142 const unsigned av_unused t4 = topright[0];\
143 const unsigned av_unused t5 = topright[1];\
144 const unsigned av_unused t6 = topright[2];\
145 const unsigned av_unused t7 = topright[3];\
146
147 #define LOAD_DOWN_LEFT_EDGE\
148 const unsigned av_unused l4 = src[-1+4*stride];\
149 const unsigned av_unused l5 = src[-1+5*stride];\
150 const unsigned av_unused l6 = src[-1+6*stride];\
151 const unsigned av_unused l7 = src[-1+7*stride];\
152
153 #define LOAD_LEFT_EDGE\
154 const unsigned av_unused l0 = src[-1+0*stride];\
155 const unsigned av_unused l1 = src[-1+1*stride];\
156 const unsigned av_unused l2 = src[-1+2*stride];\
157 const unsigned av_unused l3 = src[-1+3*stride];\
158
159 #define LOAD_TOP_EDGE\
160 const unsigned av_unused t0 = src[ 0-1*stride];\
161 const unsigned av_unused t1 = src[ 1-1*stride];\
162 const unsigned av_unused t2 = src[ 2-1*stride];\
163 const unsigned av_unused t3 = src[ 3-1*stride];\
164
165 3777528 static void FUNCC(pred4x4_down_right)(uint8_t *_src, const uint8_t *topright,
166 ptrdiff_t _stride)
167 {
168 3777528 pixel *src = (pixel*)_src;
169 3777528 int stride = _stride>>(sizeof(pixel)-1);
170 3777528 const int lt= src[-1-1*stride];
171 3777528 LOAD_TOP_EDGE
172 3777528 LOAD_LEFT_EDGE
173
174 3777528 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
175 3777528 src[0+2*stride]=
176 3777528 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
177 3777528 src[0+1*stride]=
178 3777528 src[1+2*stride]=
179 3777528 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
180 3777528 src[0+0*stride]=
181 3777528 src[1+1*stride]=
182 3777528 src[2+2*stride]=
183 3777528 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
184 3777528 src[1+0*stride]=
185 3777528 src[2+1*stride]=
186 3777528 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
187 3777528 src[2+0*stride]=
188 3777528 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
189 3777528 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
190 }
191
192 2254382 static void FUNCC(pred4x4_down_left)(uint8_t *_src, const uint8_t *_topright,
193 ptrdiff_t _stride)
194 {
195 2254382 pixel *src = (pixel*)_src;
196 2254382 const pixel *topright = (const pixel*)_topright;
197 2254382 int stride = _stride>>(sizeof(pixel)-1);
198 2254382 LOAD_TOP_EDGE
199 2254382 LOAD_TOP_RIGHT_EDGE
200 // LOAD_LEFT_EDGE
201
202 2254382 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
203 2254382 src[1+0*stride]=
204 2254382 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
205 2254382 src[2+0*stride]=
206 2254382 src[1+1*stride]=
207 2254382 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
208 2254382 src[3+0*stride]=
209 2254382 src[2+1*stride]=
210 2254382 src[1+2*stride]=
211 2254382 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
212 2254382 src[3+1*stride]=
213 2254382 src[2+2*stride]=
214 2254382 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
215 2254382 src[3+2*stride]=
216 2254382 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
217 2254382 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
218 }
219
220 2608726 static void FUNCC(pred4x4_vertical_right)(uint8_t *_src,
221 const uint8_t *topright,
222 ptrdiff_t _stride)
223 {
224 2608726 pixel *src = (pixel*)_src;
225 2608726 int stride = _stride>>(sizeof(pixel)-1);
226 2608726 const int lt= src[-1-1*stride];
227 2608726 LOAD_TOP_EDGE
228 2608726 LOAD_LEFT_EDGE
229
230 2608726 src[0+0*stride]=
231 2608726 src[1+2*stride]=(lt + t0 + 1)>>1;
232 2608726 src[1+0*stride]=
233 2608726 src[2+2*stride]=(t0 + t1 + 1)>>1;
234 2608726 src[2+0*stride]=
235 2608726 src[3+2*stride]=(t1 + t2 + 1)>>1;
236 2608726 src[3+0*stride]=(t2 + t3 + 1)>>1;
237 2608726 src[0+1*stride]=
238 2608726 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
239 2608726 src[1+1*stride]=
240 2608726 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
241 2608726 src[2+1*stride]=
242 2608726 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
243 2608726 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
244 2608726 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
245 2608726 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
246 }
247
248 2138110 static void FUNCC(pred4x4_vertical_left)(uint8_t *_src,
249 const uint8_t *_topright,
250 ptrdiff_t _stride)
251 {
252 2138110 pixel *src = (pixel*)_src;
253 2138110 const pixel *topright = (const pixel*)_topright;
254 2138110 int stride = _stride>>(sizeof(pixel)-1);
255 2138110 LOAD_TOP_EDGE
256 2138110 LOAD_TOP_RIGHT_EDGE
257
258 2138110 src[0+0*stride]=(t0 + t1 + 1)>>1;
259 2138110 src[1+0*stride]=
260 2138110 src[0+2*stride]=(t1 + t2 + 1)>>1;
261 2138110 src[2+0*stride]=
262 2138110 src[1+2*stride]=(t2 + t3 + 1)>>1;
263 2138110 src[3+0*stride]=
264 2138110 src[2+2*stride]=(t3 + t4+ 1)>>1;
265 2138110 src[3+2*stride]=(t4 + t5+ 1)>>1;
266 2138110 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
267 2138110 src[1+1*stride]=
268 2138110 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
269 2138110 src[2+1*stride]=
270 2138110 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
271 2138110 src[3+1*stride]=
272 2138110 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
273 2138110 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
274 }
275
276 3985216 static void FUNCC(pred4x4_horizontal_up)(uint8_t *_src, const uint8_t *topright,
277 ptrdiff_t _stride)
278 {
279 3985216 pixel *src = (pixel*)_src;
280 3985216 int stride = _stride>>(sizeof(pixel)-1);
281 3985216 LOAD_LEFT_EDGE
282
283 3985216 src[0+0*stride]=(l0 + l1 + 1)>>1;
284 3985216 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
285 3985216 src[2+0*stride]=
286 3985216 src[0+1*stride]=(l1 + l2 + 1)>>1;
287 3985216 src[3+0*stride]=
288 3985216 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
289 3985216 src[2+1*stride]=
290 3985216 src[0+2*stride]=(l2 + l3 + 1)>>1;
291 3985216 src[3+1*stride]=
292 3985216 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
293 3985216 src[3+2*stride]=
294 3985216 src[1+3*stride]=
295 3985216 src[0+3*stride]=
296 3985216 src[2+2*stride]=
297 3985216 src[2+3*stride]=
298 3985216 src[3+3*stride]=l3;
299 }
300
301 4293018 static void FUNCC(pred4x4_horizontal_down)(uint8_t *_src,
302 const uint8_t *topright,
303 ptrdiff_t _stride)
304 {
305 4293018 pixel *src = (pixel*)_src;
306 4293018 int stride = _stride>>(sizeof(pixel)-1);
307 4293018 const int lt= src[-1-1*stride];
308 4293018 LOAD_TOP_EDGE
309 4293018 LOAD_LEFT_EDGE
310
311 4293018 src[0+0*stride]=
312 4293018 src[2+1*stride]=(lt + l0 + 1)>>1;
313 4293018 src[1+0*stride]=
314 4293018 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
315 4293018 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
316 4293018 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
317 4293018 src[0+1*stride]=
318 4293018 src[2+2*stride]=(l0 + l1 + 1)>>1;
319 4293018 src[1+1*stride]=
320 4293018 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
321 4293018 src[0+2*stride]=
322 4293018 src[2+3*stride]=(l1 + l2+ 1)>>1;
323 4293018 src[1+2*stride]=
324 4293018 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
325 4293018 src[0+3*stride]=(l2 + l3 + 1)>>1;
326 4293018 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
327 }
328
329 439192 static void FUNCC(pred16x16_vertical)(uint8_t *_src, ptrdiff_t _stride)
330 {
331 int i;
332 439192 pixel *src = (pixel*)_src;
333 439192 int stride = _stride>>(sizeof(pixel)-1);
334 439192 const pixel4 a = AV_RN4PA(((pixel4*)(src-stride))+0);
335 439192 const pixel4 b = AV_RN4PA(((pixel4*)(src-stride))+1);
336 439192 const pixel4 c = AV_RN4PA(((pixel4*)(src-stride))+2);
337 439192 const pixel4 d = AV_RN4PA(((pixel4*)(src-stride))+3);
338
339
2/2
✓ Branch 0 taken 3513536 times.
✓ Branch 1 taken 219596 times.
7466264 for(i=0; i<16; i++){
340 7027072 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
341 7027072 AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
342 7027072 AV_WN4PA(((pixel4*)(src+i*stride))+2, c);
343 7027072 AV_WN4PA(((pixel4*)(src+i*stride))+3, d);
344 }
345 }
346
347 485960 static void FUNCC(pred16x16_horizontal)(uint8_t *_src, ptrdiff_t stride)
348 {
349 int i;
350 485960 pixel *src = (pixel*)_src;
351 485960 stride >>= sizeof(pixel)-1;
352
353
2/2
✓ Branch 0 taken 3887680 times.
✓ Branch 1 taken 242980 times.
8261320 for(i=0; i<16; i++){
354 7775360 const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
355
356 7775360 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
357 7775360 AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
358 7775360 AV_WN4PA(((pixel4*)(src+i*stride))+2, a);
359 7775360 AV_WN4PA(((pixel4*)(src+i*stride))+3, a);
360 }
361 }
362
363 #define PREDICT_16x16_DC(v)\
364 for(i=0; i<16; i++){\
365 AV_WN4PA(src+ 0, v);\
366 AV_WN4PA(src+ 4, v);\
367 AV_WN4PA(src+ 8, v);\
368 AV_WN4PA(src+12, v);\
369 src += stride;\
370 }
371
372 495484 static void FUNCC(pred16x16_dc)(uint8_t *_src, ptrdiff_t stride)
373 {
374 495484 int i, dc=0;
375 495484 pixel *src = (pixel*)_src;
376 pixel4 dcsplat;
377 495484 stride >>= sizeof(pixel)-1;
378
379
2/2
✓ Branch 0 taken 3963872 times.
✓ Branch 1 taken 247742 times.
8423228 for(i=0;i<16; i++){
380 7927744 dc+= src[-1+i*stride];
381 }
382
383
2/2
✓ Branch 0 taken 3963872 times.
✓ Branch 1 taken 247742 times.
8423228 for(i=0;i<16; i++){
384 7927744 dc+= src[i-stride];
385 }
386
387 495484 dcsplat = PIXEL_SPLAT_X4((dc+16)>>5);
388
2/2
✓ Branch 0 taken 3963872 times.
✓ Branch 1 taken 247742 times.
8423228 PREDICT_16x16_DC(dcsplat);
389 }
390
391 66876 static void FUNCC(pred16x16_left_dc)(uint8_t *_src, ptrdiff_t stride)
392 {
393 66876 int i, dc=0;
394 66876 pixel *src = (pixel*)_src;
395 pixel4 dcsplat;
396 66876 stride >>= sizeof(pixel)-1;
397
398
2/2
✓ Branch 0 taken 535008 times.
✓ Branch 1 taken 33438 times.
1136892 for(i=0;i<16; i++){
399 1070016 dc+= src[-1+i*stride];
400 }
401
402 66876 dcsplat = PIXEL_SPLAT_X4((dc+8)>>4);
403
2/2
✓ Branch 0 taken 535008 times.
✓ Branch 1 taken 33438 times.
1136892 PREDICT_16x16_DC(dcsplat);
404 }
405
406 11188 static void FUNCC(pred16x16_top_dc)(uint8_t *_src, ptrdiff_t stride)
407 {
408 11188 int i, dc=0;
409 11188 pixel *src = (pixel*)_src;
410 pixel4 dcsplat;
411 11188 stride >>= sizeof(pixel)-1;
412
413
2/2
✓ Branch 0 taken 89504 times.
✓ Branch 1 taken 5594 times.
190196 for(i=0;i<16; i++){
414 179008 dc+= src[i-stride];
415 }
416
417 11188 dcsplat = PIXEL_SPLAT_X4((dc+8)>>4);
418
2/2
✓ Branch 0 taken 89504 times.
✓ Branch 1 taken 5594 times.
190196 PREDICT_16x16_DC(dcsplat);
419 }
420
421 #define PRED16x16_X(n, v) \
422 static void FUNCC(pred16x16_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\
423 {\
424 int i;\
425 pixel *src = (pixel*)_src;\
426 stride >>= sizeof(pixel)-1;\
427 PREDICT_16x16_DC(PIXEL_SPLAT_X4(v));\
428 }
429
430
2/2
✓ Branch 0 taken 192 times.
✓ Branch 1 taken 12 times.
408 PRED16x16_X(127, (1<<(BIT_DEPTH-1))-1)
431
2/2
✓ Branch 0 taken 32640 times.
✓ Branch 1 taken 2040 times.
34680 PRED16x16_X(128, (1<<(BIT_DEPTH-1))+0)
432
2/2
✓ Branch 0 taken 160 times.
✓ Branch 1 taken 10 times.
170 PRED16x16_X(129, (1<<(BIT_DEPTH-1))+1)
433
434 300788 static inline void FUNCC(pred16x16_plane_compat)(uint8_t *_src,
435 ptrdiff_t _stride,
436 const int svq3,
437 const int rv40)
438 {
439 int i, j, k;
440 int a;
441 INIT_CLIP
442 300788 pixel *src = (pixel*)_src;
443 300788 int stride = _stride>>(sizeof(pixel)-1);
444 300788 const pixel * const src0 = src +7-stride;
445 300788 const pixel * src1 = src +8*stride-1;
446 300788 const pixel * src2 = src1-2*stride; // == src+6*stride-1;
447 300788 int H = src0[1] - src0[-1];
448 300788 int V = src1[0] - src2[ 0];
449
2/2
✓ Branch 0 taken 1052758 times.
✓ Branch 1 taken 150394 times.
2406304 for(k=2; k<=8; ++k) {
450 2105516 src1 += stride; src2 -= stride;
451 2105516 H += k*(src0[k] - src0[-k]);
452 2105516 V += k*(src1[0] - src2[ 0]);
453 }
454
2/2
✓ Branch 0 taken 56 times.
✓ Branch 1 taken 150338 times.
300788 if(svq3){
455 112 H = ( 5*(H/4) ) / 16;
456 112 V = ( 5*(V/4) ) / 16;
457
458 /* required for 100% accuracy */
459 112 i = H; H = V; V = i;
460
2/2
✓ Branch 0 taken 1216 times.
✓ Branch 1 taken 149122 times.
300676 }else if(rv40){
461 2432 H = ( H + (H>>2) ) >> 4;
462 2432 V = ( V + (V>>2) ) >> 4;
463 }else{
464 298244 H = ( 5*H+32 ) >> 6;
465 298244 V = ( 5*V+32 ) >> 6;
466 }
467
468 300788 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
469
2/2
✓ Branch 0 taken 2406304 times.
✓ Branch 1 taken 150394 times.
5113396 for(j=16; j>0; --j) {
470 4812608 int b = a;
471 4812608 a += V;
472
2/2
✓ Branch 0 taken 9625216 times.
✓ Branch 1 taken 2406304 times.
24063040 for(i=-16; i<0; i+=4) {
473 19250432 src[16+i] = CLIP((b ) >> 5);
474 19250432 src[17+i] = CLIP((b+ H) >> 5);
475 19250432 src[18+i] = CLIP((b+2*H) >> 5);
476 19250432 src[19+i] = CLIP((b+3*H) >> 5);
477 19250432 b += 4*H;
478 }
479 4812608 src += stride;
480 }
481 }
482
483 298244 static void FUNCC(pred16x16_plane)(uint8_t *src, ptrdiff_t stride)
484 {
485 298244 FUNCC(pred16x16_plane_compat)(src, stride, 0, 0);
486 }
487
488 1423816 static void FUNCC(pred8x8_vertical)(uint8_t *_src, ptrdiff_t _stride)
489 {
490 int i;
491 1423816 pixel *src = (pixel*)_src;
492 1423816 int stride = _stride>>(sizeof(pixel)-1);
493 1423816 const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0);
494 1423816 const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1);
495
496
2/2
✓ Branch 0 taken 5695264 times.
✓ Branch 1 taken 711908 times.
12814344 for(i=0; i<8; i++){
497 11390528 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
498 11390528 AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
499 }
500 }
501
502 322924 static void FUNCC(pred8x16_vertical)(uint8_t *_src, ptrdiff_t _stride)
503 {
504 int i;
505 322924 pixel *src = (pixel*)_src;
506 322924 int stride = _stride>>(sizeof(pixel)-1);
507 322924 const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0);
508 322924 const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1);
509
510
2/2
✓ Branch 0 taken 2583392 times.
✓ Branch 1 taken 161462 times.
5489708 for(i=0; i<16; i++){
511 5166784 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
512 5166784 AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
513 }
514 }
515
516 2938128 static void FUNCC(pred8x8_horizontal)(uint8_t *_src, ptrdiff_t stride)
517 {
518 int i;
519 2938128 pixel *src = (pixel*)_src;
520 2938128 stride >>= sizeof(pixel)-1;
521
522
2/2
✓ Branch 0 taken 11752512 times.
✓ Branch 1 taken 1469064 times.
26443152 for(i=0; i<8; i++){
523 23505024 const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
524 23505024 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
525 23505024 AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
526 }
527 }
528
529 595976 static void FUNCC(pred8x16_horizontal)(uint8_t *_src, ptrdiff_t stride)
530 {
531 int i;
532 595976 pixel *src = (pixel*)_src;
533 595976 stride >>= sizeof(pixel)-1;
534
2/2
✓ Branch 0 taken 4767808 times.
✓ Branch 1 taken 297988 times.
10131592 for(i=0; i<16; i++){
535 9535616 const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
536 9535616 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
537 9535616 AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
538 }
539 }
540
541 #define PRED8x8_X(n, v)\
542 static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\
543 {\
544 int i;\
545 const pixel4 a = PIXEL_SPLAT_X4(v);\
546 pixel *src = (pixel*)_src;\
547 stride >>= sizeof(pixel)-1;\
548 for(i=0; i<8; i++){\
549 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);\
550 AV_WN4PA(((pixel4*)(src+i*stride))+1, a);\
551 }\
552 }
553
554
2/2
✓ Branch 0 taken 912 times.
✓ Branch 1 taken 114 times.
2052 PRED8x8_X(127, (1<<(BIT_DEPTH-1))-1)
555
2/2
✓ Branch 0 taken 1032032 times.
✓ Branch 1 taken 129004 times.
1161036 PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0)
556
2/2
✓ Branch 0 taken 2144 times.
✓ Branch 1 taken 268 times.
2412 PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1)
557
558 3076 static void FUNCC(pred8x16_128_dc)(uint8_t *_src, ptrdiff_t stride)
559 {
560 3076 FUNCC(pred8x8_128_dc)(_src, stride);
561 3076 FUNCC(pred8x8_128_dc)(_src+8*stride, stride);
562 }
563
564 1081968 static void FUNCC(pred8x8_left_dc)(uint8_t *_src, ptrdiff_t stride)
565 {
566 int i;
567 int dc0, dc2;
568 pixel4 dc0splat, dc2splat;
569 1081968 pixel *src = (pixel*)_src;
570 1081968 stride >>= sizeof(pixel)-1;
571
572 1081968 dc0=dc2=0;
573
2/2
✓ Branch 0 taken 2163936 times.
✓ Branch 1 taken 540984 times.
5409840 for(i=0;i<4; i++){
574 4327872 dc0+= src[-1+i*stride];
575 4327872 dc2+= src[-1+(i+4)*stride];
576 }
577 1081968 dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
578 1081968 dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
579
580
2/2
✓ Branch 0 taken 2163936 times.
✓ Branch 1 taken 540984 times.
5409840 for(i=0; i<4; i++){
581 4327872 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
582 4327872 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc0splat);
583 }
584
2/2
✓ Branch 0 taken 2163936 times.
✓ Branch 1 taken 540984 times.
5409840 for(i=4; i<8; i++){
585 4327872 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
586 4327872 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc2splat);
587 }
588 }
589
590 195540 static void FUNCC(pred8x16_left_dc)(uint8_t *_src, ptrdiff_t stride)
591 {
592 195540 FUNCC(pred8x8_left_dc)(_src, stride);
593 195540 FUNCC(pred8x8_left_dc)(_src+8*stride, stride);
594 }
595
596 165156 static void FUNCC(pred8x8_top_dc)(uint8_t *_src, ptrdiff_t stride)
597 {
598 int i;
599 int dc0, dc1;
600 pixel4 dc0splat, dc1splat;
601 165156 pixel *src = (pixel*)_src;
602 165156 stride >>= sizeof(pixel)-1;
603
604 165156 dc0=dc1=0;
605
2/2
✓ Branch 0 taken 330312 times.
✓ Branch 1 taken 82578 times.
825780 for(i=0;i<4; i++){
606 660624 dc0+= src[i-stride];
607 660624 dc1+= src[4+i-stride];
608 }
609 165156 dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
610 165156 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
611
612
2/2
✓ Branch 0 taken 330312 times.
✓ Branch 1 taken 82578 times.
825780 for(i=0; i<4; i++){
613 660624 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
614 660624 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
615 }
616
2/2
✓ Branch 0 taken 330312 times.
✓ Branch 1 taken 82578 times.
825780 for(i=4; i<8; i++){
617 660624 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
618 660624 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
619 }
620 }
621
622 26616 static void FUNCC(pred8x16_top_dc)(uint8_t *_src, ptrdiff_t stride)
623 {
624 int i;
625 int dc0, dc1;
626 pixel4 dc0splat, dc1splat;
627 26616 pixel *src = (pixel*)_src;
628 26616 stride >>= sizeof(pixel)-1;
629
630 26616 dc0=dc1=0;
631
2/2
✓ Branch 0 taken 53232 times.
✓ Branch 1 taken 13308 times.
133080 for(i=0;i<4; i++){
632 106464 dc0+= src[i-stride];
633 106464 dc1+= src[4+i-stride];
634 }
635 26616 dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
636 26616 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
637
638
2/2
✓ Branch 0 taken 212928 times.
✓ Branch 1 taken 13308 times.
452472 for(i=0; i<16; i++){
639 425856 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
640 425856 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
641 }
642 }
643
644 5522096 static void FUNCC(pred8x8_dc)(uint8_t *_src, ptrdiff_t stride)
645 {
646 int i;
647 int dc0, dc1, dc2;
648 pixel4 dc0splat, dc1splat, dc2splat, dc3splat;
649 5522096 pixel *src = (pixel*)_src;
650 5522096 stride >>= sizeof(pixel)-1;
651
652 5522096 dc0=dc1=dc2=0;
653
2/2
✓ Branch 0 taken 11044192 times.
✓ Branch 1 taken 2761048 times.
27610480 for(i=0;i<4; i++){
654 22088384 dc0+= src[-1+i*stride] + src[i-stride];
655 22088384 dc1+= src[4+i-stride];
656 22088384 dc2+= src[-1+(i+4)*stride];
657 }
658 5522096 dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);
659 5522096 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
660 5522096 dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
661 5522096 dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3);
662
663
2/2
✓ Branch 0 taken 11044192 times.
✓ Branch 1 taken 2761048 times.
27610480 for(i=0; i<4; i++){
664 22088384 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
665 22088384 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
666 }
667
2/2
✓ Branch 0 taken 11044192 times.
✓ Branch 1 taken 2761048 times.
27610480 for(i=4; i<8; i++){
668 22088384 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
669 22088384 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat);
670 }
671 }
672
673 1594572 static void FUNCC(pred8x16_dc)(uint8_t *_src, ptrdiff_t stride)
674 {
675 int i;
676 int dc0, dc1, dc2, dc3, dc4;
677 pixel4 dc0splat, dc1splat, dc2splat, dc3splat, dc4splat, dc5splat, dc6splat, dc7splat;
678 1594572 pixel *src = (pixel*)_src;
679 1594572 stride >>= sizeof(pixel)-1;
680
681 1594572 dc0=dc1=dc2=dc3=dc4=0;
682
2/2
✓ Branch 0 taken 3189144 times.
✓ Branch 1 taken 797286 times.
7972860 for(i=0;i<4; i++){
683 6378288 dc0+= src[-1+i*stride] + src[i-stride];
684 6378288 dc1+= src[4+i-stride];
685 6378288 dc2+= src[-1+(i+4)*stride];
686 6378288 dc3+= src[-1+(i+8)*stride];
687 6378288 dc4+= src[-1+(i+12)*stride];
688 }
689 1594572 dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);
690 1594572 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
691 1594572 dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
692 1594572 dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3);
693 1594572 dc4splat = PIXEL_SPLAT_X4((dc3 + 2)>>2);
694 1594572 dc5splat = PIXEL_SPLAT_X4((dc1 + dc3 + 4)>>3);
695 1594572 dc6splat = PIXEL_SPLAT_X4((dc4 + 2)>>2);
696 1594572 dc7splat = PIXEL_SPLAT_X4((dc1 + dc4 + 4)>>3);
697
698
2/2
✓ Branch 0 taken 3189144 times.
✓ Branch 1 taken 797286 times.
7972860 for(i=0; i<4; i++){
699 6378288 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
700 6378288 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
701 }
702
2/2
✓ Branch 0 taken 3189144 times.
✓ Branch 1 taken 797286 times.
7972860 for(i=4; i<8; i++){
703 6378288 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
704 6378288 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat);
705 }
706
2/2
✓ Branch 0 taken 3189144 times.
✓ Branch 1 taken 797286 times.
7972860 for(i=8; i<12; i++){
707 6378288 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc4splat);
708 6378288 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc5splat);
709 }
710
2/2
✓ Branch 0 taken 3189144 times.
✓ Branch 1 taken 797286 times.
7972860 for(i=12; i<16; i++){
711 6378288 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc6splat);
712 6378288 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc7splat);
713 }
714 }
715
716 //the following 4 function should not be optimized!
717 68 static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, ptrdiff_t stride)
718 {
719 68 FUNCC(pred8x8_top_dc)(src, stride);
720 68 FUNCC(pred4x4_dc)(src, NULL, stride);
721 }
722
723 12 static void FUNC(pred8x16_mad_cow_dc_l0t)(uint8_t *src, ptrdiff_t stride)
724 {
725 12 FUNCC(pred8x16_top_dc)(src, stride);
726 12 FUNCC(pred4x4_dc)(src, NULL, stride);
727 }
728
729 36 static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, ptrdiff_t stride)
730 {
731 36 FUNCC(pred8x8_dc)(src, stride);
732 36 FUNCC(pred4x4_top_dc)(src, NULL, stride);
733 }
734
735 12 static void FUNC(pred8x16_mad_cow_dc_0lt)(uint8_t *src, ptrdiff_t stride)
736 {
737 12 FUNCC(pred8x16_dc)(src, stride);
738 12 FUNCC(pred4x4_top_dc)(src, NULL, stride);
739 }
740
741 20 static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, ptrdiff_t stride)
742 {
743 20 FUNCC(pred8x8_left_dc)(src, stride);
744 20 FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride);
745 20 FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
746 }
747
748 12 static void FUNC(pred8x16_mad_cow_dc_l00)(uint8_t *src, ptrdiff_t stride)
749 {
750 12 FUNCC(pred8x16_left_dc)(src, stride);
751 12 FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride);
752 12 FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
753 }
754
755 32 static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, ptrdiff_t stride)
756 {
757 32 FUNCC(pred8x8_left_dc)(src, stride);
758 32 FUNCC(pred4x4_128_dc)(src , NULL, stride);
759 32 FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
760 }
761
762 12 static void FUNC(pred8x16_mad_cow_dc_0l0)(uint8_t *src, ptrdiff_t stride)
763 {
764 12 FUNCC(pred8x16_left_dc)(src, stride);
765 12 FUNCC(pred4x4_128_dc)(src , NULL, stride);
766 12 FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
767 }
768
769 923984 static void FUNCC(pred8x8_plane)(uint8_t *_src, ptrdiff_t _stride)
770 {
771 int j, k;
772 int a;
773 INIT_CLIP
774 923984 pixel *src = (pixel*)_src;
775 923984 int stride = _stride>>(sizeof(pixel)-1);
776 923984 const pixel * const src0 = src +3-stride;
777 923984 const pixel * src1 = src +4*stride-1;
778 923984 const pixel * src2 = src1-2*stride; // == src+2*stride-1;
779 923984 int H = src0[1] - src0[-1];
780 923984 int V = src1[0] - src2[ 0];
781
2/2
✓ Branch 0 taken 1385976 times.
✓ Branch 1 taken 461992 times.
3695936 for(k=2; k<=4; ++k) {
782 2771952 src1 += stride; src2 -= stride;
783 2771952 H += k*(src0[k] - src0[-k]);
784 2771952 V += k*(src1[0] - src2[ 0]);
785 }
786 923984 H = ( 17*H+16 ) >> 5;
787 923984 V = ( 17*V+16 ) >> 5;
788
789 923984 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
790
2/2
✓ Branch 0 taken 3695936 times.
✓ Branch 1 taken 461992 times.
8315856 for(j=8; j>0; --j) {
791 7391872 int b = a;
792 7391872 a += V;
793 7391872 src[0] = CLIP((b ) >> 5);
794 7391872 src[1] = CLIP((b+ H) >> 5);
795 7391872 src[2] = CLIP((b+2*H) >> 5);
796 7391872 src[3] = CLIP((b+3*H) >> 5);
797 7391872 src[4] = CLIP((b+4*H) >> 5);
798 7391872 src[5] = CLIP((b+5*H) >> 5);
799 7391872 src[6] = CLIP((b+6*H) >> 5);
800 7391872 src[7] = CLIP((b+7*H) >> 5);
801 7391872 src += stride;
802 }
803 }
804
805 328576 static void FUNCC(pred8x16_plane)(uint8_t *_src, ptrdiff_t _stride)
806 {
807 int j, k;
808 int a;
809 INIT_CLIP
810 328576 pixel *src = (pixel*)_src;
811 328576 int stride = _stride>>(sizeof(pixel)-1);
812 328576 const pixel * const src0 = src +3-stride;
813 328576 const pixel * src1 = src +8*stride-1;
814 328576 const pixel * src2 = src1-2*stride; // == src+6*stride-1;
815 328576 int H = src0[1] - src0[-1];
816 328576 int V = src1[0] - src2[ 0];
817
818
2/2
✓ Branch 0 taken 492864 times.
✓ Branch 1 taken 164288 times.
1314304 for (k = 2; k <= 4; ++k) {
819 985728 src1 += stride; src2 -= stride;
820 985728 H += k*(src0[k] - src0[-k]);
821 985728 V += k*(src1[0] - src2[ 0]);
822 }
823
2/2
✓ Branch 0 taken 657152 times.
✓ Branch 1 taken 164288 times.
1642880 for (; k <= 8; ++k) {
824 1314304 src1 += stride; src2 -= stride;
825 1314304 V += k*(src1[0] - src2[0]);
826 }
827
828 328576 H = (17*H+16) >> 5;
829 328576 V = (5*V+32) >> 6;
830
831 328576 a = 16*(src1[0] + src2[8] + 1) - 7*V - 3*H;
832
2/2
✓ Branch 0 taken 2628608 times.
✓ Branch 1 taken 164288 times.
5585792 for(j=16; j>0; --j) {
833 5257216 int b = a;
834 5257216 a += V;
835 5257216 src[0] = CLIP((b ) >> 5);
836 5257216 src[1] = CLIP((b+ H) >> 5);
837 5257216 src[2] = CLIP((b+2*H) >> 5);
838 5257216 src[3] = CLIP((b+3*H) >> 5);
839 5257216 src[4] = CLIP((b+4*H) >> 5);
840 5257216 src[5] = CLIP((b+5*H) >> 5);
841 5257216 src[6] = CLIP((b+6*H) >> 5);
842 5257216 src[7] = CLIP((b+7*H) >> 5);
843 5257216 src += stride;
844 }
845 }
846
847 #define SRC(x,y) src[(x)+(y)*stride]
848 #define PL(y) \
849 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
850 #define PREDICT_8x8_LOAD_LEFT \
851 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
852 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
853 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
854 const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
855
856 #define PT(x) \
857 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
858 #define PREDICT_8x8_LOAD_TOP \
859 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
860 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
861 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
862 const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
863 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
864
865 #define PTR(x) \
866 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
867 #define PREDICT_8x8_LOAD_TOPRIGHT \
868 int t8, t9, t10, t11, t12, t13, t14, t15; \
869 if(has_topright) { \
870 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
871 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
872 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
873
874 #define PREDICT_8x8_LOAD_TOPLEFT \
875 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
876
877 #define PREDICT_8x8_DC(v) \
878 int y; \
879 for( y = 0; y < 8; y++ ) { \
880 AV_WN4PA(((pixel4*)src)+0, v); \
881 AV_WN4PA(((pixel4*)src)+1, v); \
882 src += stride; \
883 }
884
885 2240 static void FUNCC(pred8x8l_128_dc)(uint8_t *_src, int has_topleft,
886 int has_topright, ptrdiff_t _stride)
887 {
888 2240 pixel *src = (pixel*)_src;
889 2240 int stride = _stride>>(sizeof(pixel)-1);
890
891
2/2
✓ Branch 0 taken 8960 times.
✓ Branch 1 taken 1120 times.
20160 PREDICT_8x8_DC(PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1)));
892 }
893 132790 static void FUNCC(pred8x8l_left_dc)(uint8_t *_src, int has_topleft,
894 int has_topright, ptrdiff_t _stride)
895 {
896 265580 pixel *src = (pixel*)_src;
897 265580 int stride = _stride>>(sizeof(pixel)-1);
898
899
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 132778 times.
265580 PREDICT_8x8_LOAD_LEFT;
900 265580 const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3);
901
2/2
✓ Branch 0 taken 1062320 times.
✓ Branch 1 taken 132790 times.
2390220 PREDICT_8x8_DC(dc);
902 }
903 16948 static void FUNCC(pred8x8l_top_dc)(uint8_t *_src, int has_topleft,
904 int has_topright, ptrdiff_t _stride)
905 {
906 33896 pixel *src = (pixel*)_src;
907 33896 int stride = _stride>>(sizeof(pixel)-1);
908
909
4/4
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 16932 times.
✓ Branch 2 taken 16932 times.
✓ Branch 3 taken 16 times.
33896 PREDICT_8x8_LOAD_TOP;
910 33896 const pixel4 dc = PIXEL_SPLAT_X4((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3);
911
2/2
✓ Branch 0 taken 135584 times.
✓ Branch 1 taken 16948 times.
305064 PREDICT_8x8_DC(dc);
912 }
913 1848018 static void FUNCC(pred8x8l_dc)(uint8_t *_src, int has_topleft,
914 int has_topright, ptrdiff_t _stride)
915 {
916 3696036 pixel *src = (pixel*)_src;
917 3696036 int stride = _stride>>(sizeof(pixel)-1);
918
919
2/2
✓ Branch 0 taken 1847917 times.
✓ Branch 1 taken 101 times.
3696036 PREDICT_8x8_LOAD_LEFT;
920
4/4
✓ Branch 0 taken 1847917 times.
✓ Branch 1 taken 101 times.
✓ Branch 2 taken 1287318 times.
✓ Branch 3 taken 560700 times.
3696036 PREDICT_8x8_LOAD_TOP;
921 3696036 const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7
922 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4);
923
2/2
✓ Branch 0 taken 14784144 times.
✓ Branch 1 taken 1848018 times.
33264324 PREDICT_8x8_DC(dc);
924 }
925 1649885 static void FUNCC(pred8x8l_horizontal)(uint8_t *_src, int has_topleft,
926 int has_topright, ptrdiff_t _stride)
927 {
928 3299770 pixel *src = (pixel*)_src;
929 3299770 int stride = _stride>>(sizeof(pixel)-1);
930 pixel4 a;
931
932
2/2
✓ Branch 0 taken 1608131 times.
✓ Branch 1 taken 41754 times.
3299770 PREDICT_8x8_LOAD_LEFT;
933 #define ROW(y) a = PIXEL_SPLAT_X4(l##y); \
934 AV_WN4PA(src+y*stride, a); \
935 AV_WN4PA(src+y*stride+4, a);
936 3299770 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
937 #undef ROW
938 }
939 517940 static void FUNCC(pred8x8l_vertical)(uint8_t *_src, int has_topleft,
940 int has_topright, ptrdiff_t _stride)
941 {
942 int y;
943 1035880 pixel *src = (pixel*)_src;
944 1035880 int stride = _stride>>(sizeof(pixel)-1);
945 pixel4 a, b;
946
947
4/4
✓ Branch 0 taken 501926 times.
✓ Branch 1 taken 16014 times.
✓ Branch 2 taken 388023 times.
✓ Branch 3 taken 129917 times.
1035880 PREDICT_8x8_LOAD_TOP;
948 1035880 src[0] = t0;
949 1035880 src[1] = t1;
950 1035880 src[2] = t2;
951 1035880 src[3] = t3;
952 1035880 src[4] = t4;
953 1035880 src[5] = t5;
954 1035880 src[6] = t6;
955 1035880 src[7] = t7;
956 1035880 a = AV_RN4PA(((pixel4*)src)+0);
957 1035880 b = AV_RN4PA(((pixel4*)src)+1);
958
2/2
✓ Branch 0 taken 3625580 times.
✓ Branch 1 taken 517940 times.
8287040 for( y = 1; y < 8; y++ ) {
959 7251160 AV_WN4PA(((pixel4*)(src+y*stride))+0, a);
960 7251160 AV_WN4PA(((pixel4*)(src+y*stride))+1, b);
961 }
962 }
963 194050 static void FUNCC(pred8x8l_down_left)(uint8_t *_src, int has_topleft,
964 int has_topright, ptrdiff_t _stride)
965 {
966 388100 pixel *src = (pixel*)_src;
967 388100 int stride = _stride>>(sizeof(pixel)-1);
968
4/4
✓ Branch 0 taken 192082 times.
✓ Branch 1 taken 1968 times.
✓ Branch 2 taken 147447 times.
✓ Branch 3 taken 46603 times.
388100 PREDICT_8x8_LOAD_TOP;
969
2/2
✓ Branch 0 taken 147447 times.
✓ Branch 1 taken 46603 times.
388100 PREDICT_8x8_LOAD_TOPRIGHT;
970 388100 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
971 388100 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
972 388100 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
973 388100 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
974 388100 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
975 388100 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
976 388100 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
977 388100 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
978 388100 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
979 388100 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
980 388100 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
981 388100 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
982 388100 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
983 388100 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
984 388100 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
985 }
986 293665 static void FUNCC(pred8x8l_down_right)(uint8_t *_src, int has_topleft,
987 int has_topright, ptrdiff_t _stride)
988 {
989 587330 pixel *src = (pixel*)_src;
990 587330 int stride = _stride>>(sizeof(pixel)-1);
991
3/4
✓ Branch 0 taken 293665 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 226852 times.
✓ Branch 3 taken 66813 times.
587330 PREDICT_8x8_LOAD_TOP;
992
1/2
✓ Branch 0 taken 293665 times.
✗ Branch 1 not taken.
587330 PREDICT_8x8_LOAD_LEFT;
993 587330 PREDICT_8x8_LOAD_TOPLEFT;
994 587330 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
995 587330 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
996 587330 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
997 587330 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
998 587330 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
999 587330 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
1000 587330 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
1001 587330 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
1002 587330 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
1003 587330 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
1004 587330 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
1005 587330 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
1006 587330 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
1007 587330 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
1008 587330 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
1009 }
1010 197599 static void FUNCC(pred8x8l_vertical_right)(uint8_t *_src, int has_topleft,
1011 int has_topright, ptrdiff_t _stride)
1012 {
1013 395198 pixel *src = (pixel*)_src;
1014 395198 int stride = _stride>>(sizeof(pixel)-1);
1015
3/4
✓ Branch 0 taken 197599 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 151346 times.
✓ Branch 3 taken 46253 times.
395198 PREDICT_8x8_LOAD_TOP;
1016
1/2
✓ Branch 0 taken 197599 times.
✗ Branch 1 not taken.
395198 PREDICT_8x8_LOAD_LEFT;
1017 395198 PREDICT_8x8_LOAD_TOPLEFT;
1018 395198 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
1019 395198 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
1020 395198 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
1021 395198 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
1022 395198 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
1023 395198 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
1024 395198 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
1025 395198 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
1026 395198 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
1027 395198 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
1028 395198 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
1029 395198 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
1030 395198 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
1031 395198 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
1032 395198 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
1033 395198 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
1034 395198 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
1035 395198 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
1036 395198 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
1037 395198 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
1038 395198 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
1039 395198 SRC(7,0)= (t6 + t7 + 1) >> 1;
1040 }
1041 447851 static void FUNCC(pred8x8l_horizontal_down)(uint8_t *_src, int has_topleft,
1042 int has_topright, ptrdiff_t _stride)
1043 {
1044 895702 pixel *src = (pixel*)_src;
1045 895702 int stride = _stride>>(sizeof(pixel)-1);
1046
4/4
✓ Branch 0 taken 447837 times.
✓ Branch 1 taken 14 times.
✓ Branch 2 taken 344102 times.
✓ Branch 3 taken 103749 times.
895702 PREDICT_8x8_LOAD_TOP;
1047
2/2
✓ Branch 0 taken 447837 times.
✓ Branch 1 taken 14 times.
895702 PREDICT_8x8_LOAD_LEFT;
1048 895702 PREDICT_8x8_LOAD_TOPLEFT;
1049 895702 SRC(0,7)= (l6 + l7 + 1) >> 1;
1050 895702 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
1051 895702 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
1052 895702 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
1053 895702 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
1054 895702 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
1055 895702 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
1056 895702 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
1057 895702 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
1058 895702 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
1059 895702 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
1060 895702 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
1061 895702 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
1062 895702 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
1063 895702 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
1064 895702 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
1065 895702 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
1066 895702 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
1067 895702 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
1068 895702 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
1069 895702 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
1070 895702 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
1071 }
1072 198165 static void FUNCC(pred8x8l_vertical_left)(uint8_t *_src, int has_topleft,
1073 int has_topright, ptrdiff_t _stride)
1074 {
1075 396330 pixel *src = (pixel*)_src;
1076 396330 int stride = _stride>>(sizeof(pixel)-1);
1077
4/4
✓ Branch 0 taken 196410 times.
✓ Branch 1 taken 1755 times.
✓ Branch 2 taken 154360 times.
✓ Branch 3 taken 43805 times.
396330 PREDICT_8x8_LOAD_TOP;
1078
2/2
✓ Branch 0 taken 154360 times.
✓ Branch 1 taken 43805 times.
396330 PREDICT_8x8_LOAD_TOPRIGHT;
1079 396330 SRC(0,0)= (t0 + t1 + 1) >> 1;
1080 396330 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
1081 396330 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
1082 396330 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
1083 396330 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
1084 396330 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
1085 396330 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
1086 396330 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
1087 396330 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
1088 396330 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
1089 396330 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
1090 396330 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
1091 396330 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
1092 396330 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
1093 396330 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
1094 396330 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
1095 396330 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
1096 396330 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
1097 396330 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
1098 396330 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
1099 396330 SRC(7,6)= (t10 + t11 + 1) >> 1;
1100 396330 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
1101 }
1102 548555 static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, int has_topleft,
1103 int has_topright, ptrdiff_t _stride)
1104 {
1105 1097110 pixel *src = (pixel*)_src;
1106 1097110 int stride = _stride>>(sizeof(pixel)-1);
1107
2/2
✓ Branch 0 taken 524777 times.
✓ Branch 1 taken 23778 times.
1097110 PREDICT_8x8_LOAD_LEFT;
1108 1097110 SRC(0,0)= (l0 + l1 + 1) >> 1;
1109 1097110 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
1110 1097110 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
1111 1097110 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
1112 1097110 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
1113 1097110 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
1114 1097110 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
1115 1097110 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
1116 1097110 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
1117 1097110 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
1118 1097110 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
1119 1097110 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
1120 1097110 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
1121 1097110 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
1122 1097110 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
1123 1097110 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
1124 1097110 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
1125 1097110 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
1126 }
1127
1128 static void FUNCC(pred8x8l_vertical_filter_add)(uint8_t *_src, int16_t *_block, int has_topleft,
1129 int has_topright, ptrdiff_t _stride)
1130 {
1131 int i;
1132 pixel *src = (pixel*)_src;
1133 const dctcoef *block = (const dctcoef*)_block;
1134 pixel pix[8];
1135 int stride = _stride>>(sizeof(pixel)-1);
1136 PREDICT_8x8_LOAD_TOP;
1137
1138 pix[0] = t0;
1139 pix[1] = t1;
1140 pix[2] = t2;
1141 pix[3] = t3;
1142 pix[4] = t4;
1143 pix[5] = t5;
1144 pix[6] = t6;
1145 pix[7] = t7;
1146
1147 for(i=0; i<8; i++){
1148 pixel v = pix[i];
1149 src[0*stride]= v += block[0];
1150 src[1*stride]= v += block[8];
1151 src[2*stride]= v += block[16];
1152 src[3*stride]= v += block[24];
1153 src[4*stride]= v += block[32];
1154 src[5*stride]= v += block[40];
1155 src[6*stride]= v += block[48];
1156 src[7*stride]= v + block[56];
1157 src++;
1158 block++;
1159 }
1160
1161 memset(_block, 0, sizeof(dctcoef) * 64);
1162 }
1163
1164 static void FUNCC(pred8x8l_horizontal_filter_add)(uint8_t *_src, int16_t *_block, int has_topleft,
1165 int has_topright, ptrdiff_t _stride)
1166 {
1167 int i;
1168 pixel *src = (pixel*)_src;
1169 const dctcoef *block = (const dctcoef*)_block;
1170 pixel pix[8];
1171 int stride = _stride>>(sizeof(pixel)-1);
1172 PREDICT_8x8_LOAD_LEFT;
1173
1174 pix[0] = l0;
1175 pix[1] = l1;
1176 pix[2] = l2;
1177 pix[3] = l3;
1178 pix[4] = l4;
1179 pix[5] = l5;
1180 pix[6] = l6;
1181 pix[7] = l7;
1182
1183 for(i=0; i<8; i++){
1184 pixel v = pix[i];
1185 src[0]= v += block[0];
1186 src[1]= v += block[1];
1187 src[2]= v += block[2];
1188 src[3]= v += block[3];
1189 src[4]= v += block[4];
1190 src[5]= v += block[5];
1191 src[6]= v += block[6];
1192 src[7]= v + block[7];
1193 src+= stride;
1194 block+= 8;
1195 }
1196
1197 memset(_block, 0, sizeof(dctcoef) * 64);
1198 }
1199
1200 #undef PREDICT_8x8_LOAD_LEFT
1201 #undef PREDICT_8x8_LOAD_TOP
1202 #undef PREDICT_8x8_LOAD_TOPLEFT
1203 #undef PREDICT_8x8_LOAD_TOPRIGHT
1204 #undef PREDICT_8x8_DC
1205 #undef PTR
1206 #undef PT
1207 #undef PL
1208 #undef SRC
1209
1210 160640 static void FUNCC(pred4x4_vertical_add)(uint8_t *_pix, int16_t *_block,
1211 ptrdiff_t stride)
1212 {
1213 int i;
1214 160640 pixel *pix = (pixel*)_pix;
1215 160640 const dctcoef *block = (const dctcoef*)_block;
1216 160640 stride >>= sizeof(pixel)-1;
1217 160640 pix -= stride;
1218
2/2
✓ Branch 0 taken 321280 times.
✓ Branch 1 taken 80320 times.
803200 for(i=0; i<4; i++){
1219 642560 pixel v = pix[0];
1220 642560 pix[1*stride]= v += block[0];
1221 642560 pix[2*stride]= v += block[4];
1222 642560 pix[3*stride]= v += block[8];
1223 642560 pix[4*stride]= v + block[12];
1224 642560 pix++;
1225 642560 block++;
1226 }
1227
1228 160640 memset(_block, 0, sizeof(dctcoef) * 16);
1229 }
1230
1231 208454 static void FUNCC(pred4x4_horizontal_add)(uint8_t *_pix, int16_t *_block,
1232 ptrdiff_t stride)
1233 {
1234 int i;
1235 208454 pixel *pix = (pixel*)_pix;
1236 208454 const dctcoef *block = (const dctcoef*)_block;
1237 208454 stride >>= sizeof(pixel)-1;
1238
2/2
✓ Branch 0 taken 416908 times.
✓ Branch 1 taken 104227 times.
1042270 for(i=0; i<4; i++){
1239 833816 pixel v = pix[-1];
1240 833816 pix[0]= v += block[0];
1241 833816 pix[1]= v += block[1];
1242 833816 pix[2]= v += block[2];
1243 833816 pix[3]= v + block[3];
1244 833816 pix+= stride;
1245 833816 block+= 4;
1246 }
1247
1248 208454 memset(_block, 0, sizeof(dctcoef) * 16);
1249 }
1250
1251 2148 static void FUNCC(pred8x8l_vertical_add)(uint8_t *_pix, int16_t *_block,
1252 ptrdiff_t stride)
1253 {
1254 int i;
1255 2148 pixel *pix = (pixel*)_pix;
1256 2148 const dctcoef *block = (const dctcoef*)_block;
1257 2148 stride >>= sizeof(pixel)-1;
1258 2148 pix -= stride;
1259
2/2
✓ Branch 0 taken 8592 times.
✓ Branch 1 taken 1074 times.
19332 for(i=0; i<8; i++){
1260 17184 pixel v = pix[0];
1261 17184 pix[1*stride]= v += block[0];
1262 17184 pix[2*stride]= v += block[8];
1263 17184 pix[3*stride]= v += block[16];
1264 17184 pix[4*stride]= v += block[24];
1265 17184 pix[5*stride]= v += block[32];
1266 17184 pix[6*stride]= v += block[40];
1267 17184 pix[7*stride]= v += block[48];
1268 17184 pix[8*stride]= v + block[56];
1269 17184 pix++;
1270 17184 block++;
1271 }
1272
1273 2148 memset(_block, 0, sizeof(dctcoef) * 64);
1274 }
1275
1276 2828 static void FUNCC(pred8x8l_horizontal_add)(uint8_t *_pix, int16_t *_block,
1277 ptrdiff_t stride)
1278 {
1279 int i;
1280 2828 pixel *pix = (pixel*)_pix;
1281 2828 const dctcoef *block = (const dctcoef*)_block;
1282 2828 stride >>= sizeof(pixel)-1;
1283
2/2
✓ Branch 0 taken 11312 times.
✓ Branch 1 taken 1414 times.
25452 for(i=0; i<8; i++){
1284 22624 pixel v = pix[-1];
1285 22624 pix[0]= v += block[0];
1286 22624 pix[1]= v += block[1];
1287 22624 pix[2]= v += block[2];
1288 22624 pix[3]= v += block[3];
1289 22624 pix[4]= v += block[4];
1290 22624 pix[5]= v += block[5];
1291 22624 pix[6]= v += block[6];
1292 22624 pix[7]= v + block[7];
1293 22624 pix+= stride;
1294 22624 block+= 8;
1295 }
1296
1297 2828 memset(_block, 0, sizeof(dctcoef) * 64);
1298 }
1299
1300 706 static void FUNCC(pred16x16_vertical_add)(uint8_t *pix, const int *block_offset,
1301 int16_t *block,
1302 ptrdiff_t stride)
1303 {
1304 int i;
1305
2/2
✓ Branch 0 taken 5648 times.
✓ Branch 1 taken 353 times.
12002 for(i=0; i<16; i++)
1306 11296 FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1307 }
1308
1309 544 static void FUNCC(pred16x16_horizontal_add)(uint8_t *pix,
1310 const int *block_offset,
1311 int16_t *block,
1312 ptrdiff_t stride)
1313 {
1314 int i;
1315
2/2
✓ Branch 0 taken 4352 times.
✓ Branch 1 taken 272 times.
9248 for(i=0; i<16; i++)
1316 8704 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1317 }
1318
1319 4516 static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset,
1320 int16_t *block, ptrdiff_t stride)
1321 {
1322 int i;
1323
2/2
✓ Branch 0 taken 9032 times.
✓ Branch 1 taken 2258 times.
22580 for(i=0; i<4; i++)
1324 18064 FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1325 }
1326
1327 static void FUNCC(pred8x16_vertical_add)(uint8_t *pix, const int *block_offset,
1328 int16_t *block, ptrdiff_t stride)
1329 {
1330 int i;
1331 for(i=0; i<4; i++)
1332 FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1333 for(i=4; i<8; i++)
1334 FUNCC(pred4x4_vertical_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);
1335 }
1336
1337 5776 static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset,
1338 int16_t *block,
1339 ptrdiff_t stride)
1340 {
1341 int i;
1342
2/2
✓ Branch 0 taken 11552 times.
✓ Branch 1 taken 2888 times.
28880 for(i=0; i<4; i++)
1343 23104 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1344 }
1345
1346 static void FUNCC(pred8x16_horizontal_add)(uint8_t *pix,
1347 const int *block_offset,
1348 int16_t *block, ptrdiff_t stride)
1349 {
1350 int i;
1351 for(i=0; i<4; i++)
1352 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1353 for(i=4; i<8; i++)
1354 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);
1355 }
1356