FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/h264pred_template.c
Date: 2025-01-20 09:27:23
Exec Total Coverage
Lines: 804 870 92.4%
Functions: 174 334 52.1%
Branches: 162 184 88.0%

Line Branch Exec Source
1 /*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003-2011 Michael Niedermayer <michaelni@gmx.at>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 /**
23 * @file
24 * H.264 / AVC / MPEG-4 part10 prediction functions.
25 * @author Michael Niedermayer <michaelni@gmx.at>
26 */
27
28 #include "libavutil/intreadwrite.h"
29
30 #include "mathops.h"
31
32 #include "bit_depth_template.c"
33
34 9062086 static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright,
35 ptrdiff_t _stride)
36 {
37 9062086 pixel *src = (pixel*)_src;
38 9062086 int stride = _stride>>(sizeof(pixel)-1);
39 9062086 const pixel4 a= AV_RN4PA(src-stride);
40
41 9062086 AV_WN4PA(src+0*stride, a);
42 9062086 AV_WN4PA(src+1*stride, a);
43 9062086 AV_WN4PA(src+2*stride, a);
44 9062086 AV_WN4PA(src+3*stride, a);
45 9062086 }
46
47 14838770 static void FUNCC(pred4x4_horizontal)(uint8_t *_src, const uint8_t *topright,
48 ptrdiff_t _stride)
49 {
50 14838770 pixel *src = (pixel*)_src;
51 14838770 int stride = _stride>>(sizeof(pixel)-1);
52 14838770 AV_WN4PA(src+0*stride, PIXEL_SPLAT_X4(src[-1+0*stride]));
53 14838770 AV_WN4PA(src+1*stride, PIXEL_SPLAT_X4(src[-1+1*stride]));
54 14838770 AV_WN4PA(src+2*stride, PIXEL_SPLAT_X4(src[-1+2*stride]));
55 14838770 AV_WN4PA(src+3*stride, PIXEL_SPLAT_X4(src[-1+3*stride]));
56 14838770 }
57
58 6148750 static void FUNCC(pred4x4_dc)(uint8_t *_src, const uint8_t *topright,
59 ptrdiff_t _stride)
60 {
61 6148750 pixel *src = (pixel*)_src;
62 6148750 int stride = _stride>>(sizeof(pixel)-1);
63 6148750 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
64 6148750 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
65 6148750 const pixel4 a = PIXEL_SPLAT_X4(dc);
66
67 6148750 AV_WN4PA(src+0*stride, a);
68 6148750 AV_WN4PA(src+1*stride, a);
69 6148750 AV_WN4PA(src+2*stride, a);
70 6148750 AV_WN4PA(src+3*stride, a);
71 6148750 }
72
73 621844 static void FUNCC(pred4x4_left_dc)(uint8_t *_src, const uint8_t *topright,
74 ptrdiff_t _stride)
75 {
76 621844 pixel *src = (pixel*)_src;
77 621844 int stride = _stride>>(sizeof(pixel)-1);
78 621844 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
79 621844 const pixel4 a = PIXEL_SPLAT_X4(dc);
80
81 621844 AV_WN4PA(src+0*stride, a);
82 621844 AV_WN4PA(src+1*stride, a);
83 621844 AV_WN4PA(src+2*stride, a);
84 621844 AV_WN4PA(src+3*stride, a);
85 621844 }
86
87 120720 static void FUNCC(pred4x4_top_dc)(uint8_t *_src, const uint8_t *topright,
88 ptrdiff_t _stride)
89 {
90 120720 pixel *src = (pixel*)_src;
91 120720 int stride = _stride>>(sizeof(pixel)-1);
92 120720 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
93 120720 const pixel4 a = PIXEL_SPLAT_X4(dc);
94
95 120720 AV_WN4PA(src+0*stride, a);
96 120720 AV_WN4PA(src+1*stride, a);
97 120720 AV_WN4PA(src+2*stride, a);
98 120720 AV_WN4PA(src+3*stride, a);
99 120720 }
100
101 15394 static void FUNCC(pred4x4_128_dc)(uint8_t *_src, const uint8_t *topright,
102 ptrdiff_t _stride)
103 {
104 15394 pixel *src = (pixel*)_src;
105 15394 int stride = _stride>>(sizeof(pixel)-1);
106 15394 const pixel4 a = PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1));
107
108 15394 AV_WN4PA(src+0*stride, a);
109 15394 AV_WN4PA(src+1*stride, a);
110 15394 AV_WN4PA(src+2*stride, a);
111 15394 AV_WN4PA(src+3*stride, a);
112 15394 }
113
114
115 #define LOAD_TOP_RIGHT_EDGE\
116 const unsigned av_unused t4 = topright[0];\
117 const unsigned av_unused t5 = topright[1];\
118 const unsigned av_unused t6 = topright[2];\
119 const unsigned av_unused t7 = topright[3];\
120
121 #define LOAD_DOWN_LEFT_EDGE\
122 const unsigned av_unused l4 = src[-1+4*stride];\
123 const unsigned av_unused l5 = src[-1+5*stride];\
124 const unsigned av_unused l6 = src[-1+6*stride];\
125 const unsigned av_unused l7 = src[-1+7*stride];\
126
127 #define LOAD_LEFT_EDGE\
128 const unsigned av_unused l0 = src[-1+0*stride];\
129 const unsigned av_unused l1 = src[-1+1*stride];\
130 const unsigned av_unused l2 = src[-1+2*stride];\
131 const unsigned av_unused l3 = src[-1+3*stride];\
132
133 #define LOAD_TOP_EDGE\
134 const unsigned av_unused t0 = src[ 0-1*stride];\
135 const unsigned av_unused t1 = src[ 1-1*stride];\
136 const unsigned av_unused t2 = src[ 2-1*stride];\
137 const unsigned av_unused t3 = src[ 3-1*stride];\
138
139 3846332 static void FUNCC(pred4x4_down_right)(uint8_t *_src, const uint8_t *topright,
140 ptrdiff_t _stride)
141 {
142 3846332 pixel *src = (pixel*)_src;
143 3846332 int stride = _stride>>(sizeof(pixel)-1);
144 3846332 const int lt= src[-1-1*stride];
145 3846332 LOAD_TOP_EDGE
146 3846332 LOAD_LEFT_EDGE
147
148 3846332 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
149 3846332 src[0+2*stride]=
150 3846332 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
151 3846332 src[0+1*stride]=
152 3846332 src[1+2*stride]=
153 3846332 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
154 3846332 src[0+0*stride]=
155 3846332 src[1+1*stride]=
156 3846332 src[2+2*stride]=
157 3846332 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
158 3846332 src[1+0*stride]=
159 3846332 src[2+1*stride]=
160 3846332 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
161 3846332 src[2+0*stride]=
162 3846332 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
163 3846332 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
164 3846332 }
165
166 2303104 static void FUNCC(pred4x4_down_left)(uint8_t *_src, const uint8_t *_topright,
167 ptrdiff_t _stride)
168 {
169 2303104 pixel *src = (pixel*)_src;
170 2303104 const pixel *topright = (const pixel*)_topright;
171 2303104 int stride = _stride>>(sizeof(pixel)-1);
172 2303104 LOAD_TOP_EDGE
173 2303104 LOAD_TOP_RIGHT_EDGE
174 // LOAD_LEFT_EDGE
175
176 2303104 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
177 2303104 src[1+0*stride]=
178 2303104 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
179 2303104 src[2+0*stride]=
180 2303104 src[1+1*stride]=
181 2303104 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
182 2303104 src[3+0*stride]=
183 2303104 src[2+1*stride]=
184 2303104 src[1+2*stride]=
185 2303104 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
186 2303104 src[3+1*stride]=
187 2303104 src[2+2*stride]=
188 2303104 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
189 2303104 src[3+2*stride]=
190 2303104 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
191 2303104 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
192 2303104 }
193
194 2669236 static void FUNCC(pred4x4_vertical_right)(uint8_t *_src,
195 const uint8_t *topright,
196 ptrdiff_t _stride)
197 {
198 2669236 pixel *src = (pixel*)_src;
199 2669236 int stride = _stride>>(sizeof(pixel)-1);
200 2669236 const int lt= src[-1-1*stride];
201 2669236 LOAD_TOP_EDGE
202 2669236 LOAD_LEFT_EDGE
203
204 2669236 src[0+0*stride]=
205 2669236 src[1+2*stride]=(lt + t0 + 1)>>1;
206 2669236 src[1+0*stride]=
207 2669236 src[2+2*stride]=(t0 + t1 + 1)>>1;
208 2669236 src[2+0*stride]=
209 2669236 src[3+2*stride]=(t1 + t2 + 1)>>1;
210 2669236 src[3+0*stride]=(t2 + t3 + 1)>>1;
211 2669236 src[0+1*stride]=
212 2669236 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
213 2669236 src[1+1*stride]=
214 2669236 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
215 2669236 src[2+1*stride]=
216 2669236 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
217 2669236 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
218 2669236 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
219 2669236 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
220 2669236 }
221
222 2183352 static void FUNCC(pred4x4_vertical_left)(uint8_t *_src,
223 const uint8_t *_topright,
224 ptrdiff_t _stride)
225 {
226 2183352 pixel *src = (pixel*)_src;
227 2183352 const pixel *topright = (const pixel*)_topright;
228 2183352 int stride = _stride>>(sizeof(pixel)-1);
229 2183352 LOAD_TOP_EDGE
230 2183352 LOAD_TOP_RIGHT_EDGE
231
232 2183352 src[0+0*stride]=(t0 + t1 + 1)>>1;
233 2183352 src[1+0*stride]=
234 2183352 src[0+2*stride]=(t1 + t2 + 1)>>1;
235 2183352 src[2+0*stride]=
236 2183352 src[1+2*stride]=(t2 + t3 + 1)>>1;
237 2183352 src[3+0*stride]=
238 2183352 src[2+2*stride]=(t3 + t4+ 1)>>1;
239 2183352 src[3+2*stride]=(t4 + t5+ 1)>>1;
240 2183352 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
241 2183352 src[1+1*stride]=
242 2183352 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
243 2183352 src[2+1*stride]=
244 2183352 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
245 2183352 src[3+1*stride]=
246 2183352 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
247 2183352 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
248 2183352 }
249
250 4073604 static void FUNCC(pred4x4_horizontal_up)(uint8_t *_src, const uint8_t *topright,
251 ptrdiff_t _stride)
252 {
253 4073604 pixel *src = (pixel*)_src;
254 4073604 int stride = _stride>>(sizeof(pixel)-1);
255 4073604 LOAD_LEFT_EDGE
256
257 4073604 src[0+0*stride]=(l0 + l1 + 1)>>1;
258 4073604 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
259 4073604 src[2+0*stride]=
260 4073604 src[0+1*stride]=(l1 + l2 + 1)>>1;
261 4073604 src[3+0*stride]=
262 4073604 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
263 4073604 src[2+1*stride]=
264 4073604 src[0+2*stride]=(l2 + l3 + 1)>>1;
265 4073604 src[3+1*stride]=
266 4073604 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
267 4073604 src[3+2*stride]=
268 4073604 src[1+3*stride]=
269 4073604 src[0+3*stride]=
270 4073604 src[2+2*stride]=
271 4073604 src[2+3*stride]=
272 4073604 src[3+3*stride]=l3;
273 4073604 }
274
275 4374932 static void FUNCC(pred4x4_horizontal_down)(uint8_t *_src,
276 const uint8_t *topright,
277 ptrdiff_t _stride)
278 {
279 4374932 pixel *src = (pixel*)_src;
280 4374932 int stride = _stride>>(sizeof(pixel)-1);
281 4374932 const int lt= src[-1-1*stride];
282 4374932 LOAD_TOP_EDGE
283 4374932 LOAD_LEFT_EDGE
284
285 4374932 src[0+0*stride]=
286 4374932 src[2+1*stride]=(lt + l0 + 1)>>1;
287 4374932 src[1+0*stride]=
288 4374932 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
289 4374932 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
290 4374932 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
291 4374932 src[0+1*stride]=
292 4374932 src[2+2*stride]=(l0 + l1 + 1)>>1;
293 4374932 src[1+1*stride]=
294 4374932 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
295 4374932 src[0+2*stride]=
296 4374932 src[2+3*stride]=(l1 + l2+ 1)>>1;
297 4374932 src[1+2*stride]=
298 4374932 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
299 4374932 src[0+3*stride]=(l2 + l3 + 1)>>1;
300 4374932 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
301 4374932 }
302
303 624218 static void FUNCC(pred16x16_vertical)(uint8_t *_src, ptrdiff_t _stride)
304 {
305 int i;
306 624218 pixel *src = (pixel*)_src;
307 624218 int stride = _stride>>(sizeof(pixel)-1);
308 624218 const pixel4 a = AV_RN4PA(((pixel4*)(src-stride))+0);
309 624218 const pixel4 b = AV_RN4PA(((pixel4*)(src-stride))+1);
310 624218 const pixel4 c = AV_RN4PA(((pixel4*)(src-stride))+2);
311 624218 const pixel4 d = AV_RN4PA(((pixel4*)(src-stride))+3);
312
313
2/2
✓ Branch 0 taken 4993744 times.
✓ Branch 1 taken 312109 times.
10611706 for(i=0; i<16; i++){
314 9987488 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
315 9987488 AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
316 9987488 AV_WN4PA(((pixel4*)(src+i*stride))+2, c);
317 9987488 AV_WN4PA(((pixel4*)(src+i*stride))+3, d);
318 }
319 624218 }
320
321 530774 static void FUNCC(pred16x16_horizontal)(uint8_t *_src, ptrdiff_t stride)
322 {
323 int i;
324 530774 pixel *src = (pixel*)_src;
325 530774 stride >>= sizeof(pixel)-1;
326
327
2/2
✓ Branch 0 taken 4246192 times.
✓ Branch 1 taken 265387 times.
9023158 for(i=0; i<16; i++){
328 8492384 const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
329
330 8492384 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
331 8492384 AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
332 8492384 AV_WN4PA(((pixel4*)(src+i*stride))+2, a);
333 8492384 AV_WN4PA(((pixel4*)(src+i*stride))+3, a);
334 }
335 530774 }
336
337 #define PREDICT_16x16_DC(v)\
338 for(i=0; i<16; i++){\
339 AV_WN4PA(src+ 0, v);\
340 AV_WN4PA(src+ 4, v);\
341 AV_WN4PA(src+ 8, v);\
342 AV_WN4PA(src+12, v);\
343 src += stride;\
344 }
345
346 505354 static void FUNCC(pred16x16_dc)(uint8_t *_src, ptrdiff_t stride)
347 {
348 505354 int i, dc=0;
349 505354 pixel *src = (pixel*)_src;
350 pixel4 dcsplat;
351 505354 stride >>= sizeof(pixel)-1;
352
353
2/2
✓ Branch 0 taken 4042832 times.
✓ Branch 1 taken 252677 times.
8591018 for(i=0;i<16; i++){
354 8085664 dc+= src[-1+i*stride];
355 }
356
357
2/2
✓ Branch 0 taken 4042832 times.
✓ Branch 1 taken 252677 times.
8591018 for(i=0;i<16; i++){
358 8085664 dc+= src[i-stride];
359 }
360
361 505354 dcsplat = PIXEL_SPLAT_X4((dc+16)>>5);
362
2/2
✓ Branch 0 taken 4042832 times.
✓ Branch 1 taken 252677 times.
8591018 PREDICT_16x16_DC(dcsplat);
363 505354 }
364
365 71496 static void FUNCC(pred16x16_left_dc)(uint8_t *_src, ptrdiff_t stride)
366 {
367 71496 int i, dc=0;
368 71496 pixel *src = (pixel*)_src;
369 pixel4 dcsplat;
370 71496 stride >>= sizeof(pixel)-1;
371
372
2/2
✓ Branch 0 taken 571968 times.
✓ Branch 1 taken 35748 times.
1215432 for(i=0;i<16; i++){
373 1143936 dc+= src[-1+i*stride];
374 }
375
376 71496 dcsplat = PIXEL_SPLAT_X4((dc+8)>>4);
377
2/2
✓ Branch 0 taken 571968 times.
✓ Branch 1 taken 35748 times.
1215432 PREDICT_16x16_DC(dcsplat);
378 71496 }
379
380 11758 static void FUNCC(pred16x16_top_dc)(uint8_t *_src, ptrdiff_t stride)
381 {
382 11758 int i, dc=0;
383 11758 pixel *src = (pixel*)_src;
384 pixel4 dcsplat;
385 11758 stride >>= sizeof(pixel)-1;
386
387
2/2
✓ Branch 0 taken 94064 times.
✓ Branch 1 taken 5879 times.
199886 for(i=0;i<16; i++){
388 188128 dc+= src[i-stride];
389 }
390
391 11758 dcsplat = PIXEL_SPLAT_X4((dc+8)>>4);
392
2/2
✓ Branch 0 taken 94064 times.
✓ Branch 1 taken 5879 times.
199886 PREDICT_16x16_DC(dcsplat);
393 11758 }
394
395 #define PRED16x16_X(n, v) \
396 static void FUNCC(pred16x16_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\
397 {\
398 int i;\
399 pixel *src = (pixel*)_src;\
400 stride >>= sizeof(pixel)-1;\
401 PREDICT_16x16_DC(PIXEL_SPLAT_X4(v));\
402 }
403
404
2/2
✓ Branch 0 taken 42512 times.
✓ Branch 1 taken 2657 times.
90338 PRED16x16_X(128, (1<<(BIT_DEPTH-1))+0)
405 #if BIT_DEPTH == 8
406
2/2
✓ Branch 0 taken 192 times.
✓ Branch 1 taken 12 times.
204 PRED16x16_X(127, (1<<(BIT_DEPTH-1))-1)
407
2/2
✓ Branch 0 taken 160 times.
✓ Branch 1 taken 10 times.
170 PRED16x16_X(129, (1<<(BIT_DEPTH-1))+1)
408 #endif
409
410 309142 static inline void FUNCC(pred16x16_plane_compat)(uint8_t *_src,
411 ptrdiff_t _stride,
412 const int svq3,
413 const int rv40)
414 {
415 int i, j, k;
416 int a;
417 309142 pixel *src = (pixel*)_src;
418 309142 int stride = _stride>>(sizeof(pixel)-1);
419 309142 const pixel * const src0 = src +7-stride;
420 309142 const pixel * src1 = src +8*stride-1;
421 309142 const pixel * src2 = src1-2*stride; // == src+6*stride-1;
422 309142 int H = src0[1] - src0[-1];
423 309142 int V = src1[0] - src2[ 0];
424
2/2
✓ Branch 0 taken 1081997 times.
✓ Branch 1 taken 154571 times.
2473136 for(k=2; k<=8; ++k) {
425 2163994 src1 += stride; src2 -= stride;
426 2163994 H += k*(src0[k] - src0[-k]);
427 2163994 V += k*(src1[0] - src2[ 0]);
428 }
429
2/2
✓ Branch 0 taken 56 times.
✓ Branch 1 taken 154515 times.
309142 if(svq3){
430 112 H = ( 5*(H/4) ) / 16;
431 112 V = ( 5*(V/4) ) / 16;
432
433 /* required for 100% accuracy */
434 112 i = H; H = V; V = i;
435
2/2
✓ Branch 0 taken 1217 times.
✓ Branch 1 taken 153298 times.
309030 }else if(rv40){
436 2434 H = ( H + (H>>2) ) >> 4;
437 2434 V = ( V + (V>>2) ) >> 4;
438 }else{
439 306596 H = ( 5*H+32 ) >> 6;
440 306596 V = ( 5*V+32 ) >> 6;
441 }
442
443 309142 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
444
2/2
✓ Branch 0 taken 2473136 times.
✓ Branch 1 taken 154571 times.
5255414 for(j=16; j>0; --j) {
445 4946272 int b = a;
446 4946272 a += V;
447
2/2
✓ Branch 0 taken 9892544 times.
✓ Branch 1 taken 2473136 times.
24731360 for(i=-16; i<0; i+=4) {
448 19785088 src[16+i] = CLIP((b ) >> 5);
449 19785088 src[17+i] = CLIP((b+ H) >> 5);
450 19785088 src[18+i] = CLIP((b+2*H) >> 5);
451 19785088 src[19+i] = CLIP((b+3*H) >> 5);
452 19785088 b += 4*H;
453 }
454 4946272 src += stride;
455 }
456 309142 }
457
458 306596 static void FUNCC(pred16x16_plane)(uint8_t *src, ptrdiff_t stride)
459 {
460 306596 FUNCC(pred16x16_plane_compat)(src, stride, 0, 0);
461 306596 }
462
463 1461776 static void FUNCC(pred8x8_vertical)(uint8_t *_src, ptrdiff_t _stride)
464 {
465 int i;
466 1461776 pixel *src = (pixel*)_src;
467 1461776 int stride = _stride>>(sizeof(pixel)-1);
468 1461776 const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0);
469 1461776 const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1);
470
471
2/2
✓ Branch 0 taken 5847104 times.
✓ Branch 1 taken 730888 times.
13155984 for(i=0; i<8; i++){
472 11694208 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
473 11694208 AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
474 }
475 1461776 }
476
477 322924 static void FUNCC(pred8x16_vertical)(uint8_t *_src, ptrdiff_t _stride)
478 {
479 int i;
480 322924 pixel *src = (pixel*)_src;
481 322924 int stride = _stride>>(sizeof(pixel)-1);
482 322924 const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0);
483 322924 const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1);
484
485
2/2
✓ Branch 0 taken 2583392 times.
✓ Branch 1 taken 161462 times.
5489708 for(i=0; i<16; i++){
486 5166784 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
487 5166784 AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
488 }
489 322924 }
490
491 3023632 static void FUNCC(pred8x8_horizontal)(uint8_t *_src, ptrdiff_t stride)
492 {
493 int i;
494 3023632 pixel *src = (pixel*)_src;
495 3023632 stride >>= sizeof(pixel)-1;
496
497
2/2
✓ Branch 0 taken 12094528 times.
✓ Branch 1 taken 1511816 times.
27212688 for(i=0; i<8; i++){
498 24189056 const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
499 24189056 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
500 24189056 AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
501 }
502 3023632 }
503
504 595976 static void FUNCC(pred8x16_horizontal)(uint8_t *_src, ptrdiff_t stride)
505 {
506 int i;
507 595976 pixel *src = (pixel*)_src;
508 595976 stride >>= sizeof(pixel)-1;
509
2/2
✓ Branch 0 taken 4767808 times.
✓ Branch 1 taken 297988 times.
10131592 for(i=0; i<16; i++){
510 9535616 const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
511 9535616 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
512 9535616 AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
513 }
514 595976 }
515
516 #define PRED8x8_X(n, v)\
517 static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\
518 {\
519 int i;\
520 const pixel4 a = PIXEL_SPLAT_X4(v);\
521 pixel *src = (pixel*)_src;\
522 stride >>= sizeof(pixel)-1;\
523 for(i=0; i<8; i++){\
524 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);\
525 AV_WN4PA(((pixel4*)(src+i*stride))+1, a);\
526 }\
527 }
528
529
2/2
✓ Branch 0 taken 1047296 times.
✓ Branch 1 taken 130912 times.
2356416 PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0)
530 #if BIT_DEPTH == 8
531
2/2
✓ Branch 0 taken 928 times.
✓ Branch 1 taken 116 times.
1044 PRED8x8_X(127, (1<<(BIT_DEPTH-1))-1)
532
2/2
✓ Branch 0 taken 2160 times.
✓ Branch 1 taken 270 times.
2430 PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1)
533 #endif
534
535 3076 static void FUNCC(pred8x16_128_dc)(uint8_t *_src, ptrdiff_t stride)
536 {
537 3076 FUNCC(pred8x8_128_dc)(_src, stride);
538 3076 FUNCC(pred8x8_128_dc)(_src+8*stride, stride);
539 3076 }
540
541 1183716 static void FUNCC(pred8x8_left_dc)(uint8_t *_src, ptrdiff_t stride)
542 {
543 int i;
544 int dc0, dc2;
545 pixel4 dc0splat, dc2splat;
546 1183716 pixel *src = (pixel*)_src;
547 1183716 stride >>= sizeof(pixel)-1;
548
549 1183716 dc0=dc2=0;
550
2/2
✓ Branch 0 taken 2367432 times.
✓ Branch 1 taken 591858 times.
5918580 for(i=0;i<4; i++){
551 4734864 dc0+= src[-1+i*stride];
552 4734864 dc2+= src[-1+(i+4)*stride];
553 }
554 1183716 dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
555 1183716 dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
556
557
2/2
✓ Branch 0 taken 2367432 times.
✓ Branch 1 taken 591858 times.
5918580 for(i=0; i<4; i++){
558 4734864 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
559 4734864 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc0splat);
560 }
561
2/2
✓ Branch 0 taken 2367432 times.
✓ Branch 1 taken 591858 times.
5918580 for(i=4; i<8; i++){
562 4734864 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
563 4734864 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc2splat);
564 }
565 1183716 }
566
567 195540 static void FUNCC(pred8x16_left_dc)(uint8_t *_src, ptrdiff_t stride)
568 {
569 195540 FUNCC(pred8x8_left_dc)(_src, stride);
570 195540 FUNCC(pred8x8_left_dc)(_src+8*stride, stride);
571 195540 }
572
573 174280 static void FUNCC(pred8x8_top_dc)(uint8_t *_src, ptrdiff_t stride)
574 {
575 int i;
576 int dc0, dc1;
577 pixel4 dc0splat, dc1splat;
578 174280 pixel *src = (pixel*)_src;
579 174280 stride >>= sizeof(pixel)-1;
580
581 174280 dc0=dc1=0;
582
2/2
✓ Branch 0 taken 348560 times.
✓ Branch 1 taken 87140 times.
871400 for(i=0;i<4; i++){
583 697120 dc0+= src[i-stride];
584 697120 dc1+= src[4+i-stride];
585 }
586 174280 dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
587 174280 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
588
589
2/2
✓ Branch 0 taken 348560 times.
✓ Branch 1 taken 87140 times.
871400 for(i=0; i<4; i++){
590 697120 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
591 697120 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
592 }
593
2/2
✓ Branch 0 taken 348560 times.
✓ Branch 1 taken 87140 times.
871400 for(i=4; i<8; i++){
594 697120 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
595 697120 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
596 }
597 174280 }
598
599 26616 static void FUNCC(pred8x16_top_dc)(uint8_t *_src, ptrdiff_t stride)
600 {
601 int i;
602 int dc0, dc1;
603 pixel4 dc0splat, dc1splat;
604 26616 pixel *src = (pixel*)_src;
605 26616 stride >>= sizeof(pixel)-1;
606
607 26616 dc0=dc1=0;
608
2/2
✓ Branch 0 taken 53232 times.
✓ Branch 1 taken 13308 times.
133080 for(i=0;i<4; i++){
609 106464 dc0+= src[i-stride];
610 106464 dc1+= src[4+i-stride];
611 }
612 26616 dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
613 26616 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
614
615
2/2
✓ Branch 0 taken 212928 times.
✓ Branch 1 taken 13308 times.
452472 for(i=0; i<16; i++){
616 425856 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
617 425856 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
618 }
619 26616 }
620
621 5979028 static void FUNCC(pred8x8_dc)(uint8_t *_src, ptrdiff_t stride)
622 {
623 int i;
624 int dc0, dc1, dc2;
625 pixel4 dc0splat, dc1splat, dc2splat, dc3splat;
626 5979028 pixel *src = (pixel*)_src;
627 5979028 stride >>= sizeof(pixel)-1;
628
629 5979028 dc0=dc1=dc2=0;
630
2/2
✓ Branch 0 taken 11958056 times.
✓ Branch 1 taken 2989514 times.
29895140 for(i=0;i<4; i++){
631 23916112 dc0+= src[-1+i*stride] + src[i-stride];
632 23916112 dc1+= src[4+i-stride];
633 23916112 dc2+= src[-1+(i+4)*stride];
634 }
635 5979028 dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);
636 5979028 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
637 5979028 dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
638 5979028 dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3);
639
640
2/2
✓ Branch 0 taken 11958056 times.
✓ Branch 1 taken 2989514 times.
29895140 for(i=0; i<4; i++){
641 23916112 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
642 23916112 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
643 }
644
2/2
✓ Branch 0 taken 11958056 times.
✓ Branch 1 taken 2989514 times.
29895140 for(i=4; i<8; i++){
645 23916112 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
646 23916112 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat);
647 }
648 5979028 }
649
650 1594572 static void FUNCC(pred8x16_dc)(uint8_t *_src, ptrdiff_t stride)
651 {
652 int i;
653 int dc0, dc1, dc2, dc3, dc4;
654 pixel4 dc0splat, dc1splat, dc2splat, dc3splat, dc4splat, dc5splat, dc6splat, dc7splat;
655 1594572 pixel *src = (pixel*)_src;
656 1594572 stride >>= sizeof(pixel)-1;
657
658 1594572 dc0=dc1=dc2=dc3=dc4=0;
659
2/2
✓ Branch 0 taken 3189144 times.
✓ Branch 1 taken 797286 times.
7972860 for(i=0;i<4; i++){
660 6378288 dc0+= src[-1+i*stride] + src[i-stride];
661 6378288 dc1+= src[4+i-stride];
662 6378288 dc2+= src[-1+(i+4)*stride];
663 6378288 dc3+= src[-1+(i+8)*stride];
664 6378288 dc4+= src[-1+(i+12)*stride];
665 }
666 1594572 dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);
667 1594572 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
668 1594572 dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
669 1594572 dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3);
670 1594572 dc4splat = PIXEL_SPLAT_X4((dc3 + 2)>>2);
671 1594572 dc5splat = PIXEL_SPLAT_X4((dc1 + dc3 + 4)>>3);
672 1594572 dc6splat = PIXEL_SPLAT_X4((dc4 + 2)>>2);
673 1594572 dc7splat = PIXEL_SPLAT_X4((dc1 + dc4 + 4)>>3);
674
675
2/2
✓ Branch 0 taken 3189144 times.
✓ Branch 1 taken 797286 times.
7972860 for(i=0; i<4; i++){
676 6378288 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
677 6378288 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
678 }
679
2/2
✓ Branch 0 taken 3189144 times.
✓ Branch 1 taken 797286 times.
7972860 for(i=4; i<8; i++){
680 6378288 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
681 6378288 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat);
682 }
683
2/2
✓ Branch 0 taken 3189144 times.
✓ Branch 1 taken 797286 times.
7972860 for(i=8; i<12; i++){
684 6378288 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc4splat);
685 6378288 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc5splat);
686 }
687
2/2
✓ Branch 0 taken 3189144 times.
✓ Branch 1 taken 797286 times.
7972860 for(i=12; i<16; i++){
688 6378288 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc6splat);
689 6378288 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc7splat);
690 }
691 1594572 }
692
693 //the following 4 function should not be optimized!
694 68 static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, ptrdiff_t stride)
695 {
696 68 FUNCC(pred8x8_top_dc)(src, stride);
697 68 FUNCC(pred4x4_dc)(src, NULL, stride);
698 68 }
699
700 12 static void FUNC(pred8x16_mad_cow_dc_l0t)(uint8_t *src, ptrdiff_t stride)
701 {
702 12 FUNCC(pred8x16_top_dc)(src, stride);
703 12 FUNCC(pred4x4_dc)(src, NULL, stride);
704 12 }
705
706 36 static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, ptrdiff_t stride)
707 {
708 36 FUNCC(pred8x8_dc)(src, stride);
709 36 FUNCC(pred4x4_top_dc)(src, NULL, stride);
710 36 }
711
712 12 static void FUNC(pred8x16_mad_cow_dc_0lt)(uint8_t *src, ptrdiff_t stride)
713 {
714 12 FUNCC(pred8x16_dc)(src, stride);
715 12 FUNCC(pred4x4_top_dc)(src, NULL, stride);
716 12 }
717
718 20 static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, ptrdiff_t stride)
719 {
720 20 FUNCC(pred8x8_left_dc)(src, stride);
721 20 FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride);
722 20 FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
723 20 }
724
725 12 static void FUNC(pred8x16_mad_cow_dc_l00)(uint8_t *src, ptrdiff_t stride)
726 {
727 12 FUNCC(pred8x16_left_dc)(src, stride);
728 12 FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride);
729 12 FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
730 12 }
731
732 32 static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, ptrdiff_t stride)
733 {
734 32 FUNCC(pred8x8_left_dc)(src, stride);
735 32 FUNCC(pred4x4_128_dc)(src , NULL, stride);
736 32 FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
737 32 }
738
739 12 static void FUNC(pred8x16_mad_cow_dc_0l0)(uint8_t *src, ptrdiff_t stride)
740 {
741 12 FUNCC(pred8x16_left_dc)(src, stride);
742 12 FUNCC(pred4x4_128_dc)(src , NULL, stride);
743 12 FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
744 12 }
745
746 945428 static void FUNCC(pred8x8_plane)(uint8_t *_src, ptrdiff_t _stride)
747 {
748 int j, k;
749 int a;
750 945428 pixel *src = (pixel*)_src;
751 945428 int stride = _stride>>(sizeof(pixel)-1);
752 945428 const pixel * const src0 = src +3-stride;
753 945428 const pixel * src1 = src +4*stride-1;
754 945428 const pixel * src2 = src1-2*stride; // == src+2*stride-1;
755 945428 int H = src0[1] - src0[-1];
756 945428 int V = src1[0] - src2[ 0];
757
2/2
✓ Branch 0 taken 1418142 times.
✓ Branch 1 taken 472714 times.
3781712 for(k=2; k<=4; ++k) {
758 2836284 src1 += stride; src2 -= stride;
759 2836284 H += k*(src0[k] - src0[-k]);
760 2836284 V += k*(src1[0] - src2[ 0]);
761 }
762 945428 H = ( 17*H+16 ) >> 5;
763 945428 V = ( 17*V+16 ) >> 5;
764
765 945428 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
766
2/2
✓ Branch 0 taken 3781712 times.
✓ Branch 1 taken 472714 times.
8508852 for(j=8; j>0; --j) {
767 7563424 int b = a;
768 7563424 a += V;
769 7563424 src[0] = CLIP((b ) >> 5);
770 7563424 src[1] = CLIP((b+ H) >> 5);
771 7563424 src[2] = CLIP((b+2*H) >> 5);
772 7563424 src[3] = CLIP((b+3*H) >> 5);
773 7563424 src[4] = CLIP((b+4*H) >> 5);
774 7563424 src[5] = CLIP((b+5*H) >> 5);
775 7563424 src[6] = CLIP((b+6*H) >> 5);
776 7563424 src[7] = CLIP((b+7*H) >> 5);
777 7563424 src += stride;
778 }
779 945428 }
780
781 328576 static void FUNCC(pred8x16_plane)(uint8_t *_src, ptrdiff_t _stride)
782 {
783 int j, k;
784 int a;
785 328576 pixel *src = (pixel*)_src;
786 328576 int stride = _stride>>(sizeof(pixel)-1);
787 328576 const pixel * const src0 = src +3-stride;
788 328576 const pixel * src1 = src +8*stride-1;
789 328576 const pixel * src2 = src1-2*stride; // == src+6*stride-1;
790 328576 int H = src0[1] - src0[-1];
791 328576 int V = src1[0] - src2[ 0];
792
793
2/2
✓ Branch 0 taken 492864 times.
✓ Branch 1 taken 164288 times.
1314304 for (k = 2; k <= 4; ++k) {
794 985728 src1 += stride; src2 -= stride;
795 985728 H += k*(src0[k] - src0[-k]);
796 985728 V += k*(src1[0] - src2[ 0]);
797 }
798
2/2
✓ Branch 0 taken 657152 times.
✓ Branch 1 taken 164288 times.
1642880 for (; k <= 8; ++k) {
799 1314304 src1 += stride; src2 -= stride;
800 1314304 V += k*(src1[0] - src2[0]);
801 }
802
803 328576 H = (17*H+16) >> 5;
804 328576 V = (5*V+32) >> 6;
805
806 328576 a = 16*(src1[0] + src2[8] + 1) - 7*V - 3*H;
807
2/2
✓ Branch 0 taken 2628608 times.
✓ Branch 1 taken 164288 times.
5585792 for(j=16; j>0; --j) {
808 5257216 int b = a;
809 5257216 a += V;
810 5257216 src[0] = CLIP((b ) >> 5);
811 5257216 src[1] = CLIP((b+ H) >> 5);
812 5257216 src[2] = CLIP((b+2*H) >> 5);
813 5257216 src[3] = CLIP((b+3*H) >> 5);
814 5257216 src[4] = CLIP((b+4*H) >> 5);
815 5257216 src[5] = CLIP((b+5*H) >> 5);
816 5257216 src[6] = CLIP((b+6*H) >> 5);
817 5257216 src[7] = CLIP((b+7*H) >> 5);
818 5257216 src += stride;
819 }
820 328576 }
821
822 #define SRC(x,y) src[(x)+(y)*stride]
823 #define PL(y) \
824 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
825 #define PREDICT_8x8_LOAD_LEFT \
826 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
827 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
828 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
829 const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
830
831 #define PT(x) \
832 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
833 #define PREDICT_8x8_LOAD_TOP \
834 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
835 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
836 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
837 const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
838 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
839
840 #define PTR(x) \
841 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
842 #define PREDICT_8x8_LOAD_TOPRIGHT \
843 int t8, t9, t10, t11, t12, t13, t14, t15; \
844 if(has_topright) { \
845 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
846 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
847 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
848
849 #define PREDICT_8x8_LOAD_TOPLEFT \
850 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
851
852 #define PREDICT_8x8_DC(v) \
853 int y; \
854 for( y = 0; y < 8; y++ ) { \
855 AV_WN4PA(((pixel4*)src)+0, v); \
856 AV_WN4PA(((pixel4*)src)+1, v); \
857 src += stride; \
858 }
859
860 2248 static void FUNCC(pred8x8l_128_dc)(uint8_t *_src, int has_topleft,
861 int has_topright, ptrdiff_t _stride)
862 {
863 2248 pixel *src = (pixel*)_src;
864 2248 int stride = _stride>>(sizeof(pixel)-1);
865
866
2/2
✓ Branch 0 taken 8992 times.
✓ Branch 1 taken 1124 times.
20232 PREDICT_8x8_DC(PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1)));
867 2248 }
868 270392 static void FUNCC(pred8x8l_left_dc)(uint8_t *_src, int has_topleft,
869 int has_topright, ptrdiff_t _stride)
870 {
871 270392 pixel *src = (pixel*)_src;
872 270392 int stride = _stride>>(sizeof(pixel)-1);
873
874
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 135184 times.
270392 PREDICT_8x8_LOAD_LEFT;
875 270392 const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3);
876
2/2
✓ Branch 0 taken 1081568 times.
✓ Branch 1 taken 135196 times.
2433528 PREDICT_8x8_DC(dc);
877 270392 }
878 34678 static void FUNCC(pred8x8l_top_dc)(uint8_t *_src, int has_topleft,
879 int has_topright, ptrdiff_t _stride)
880 {
881 34678 pixel *src = (pixel*)_src;
882 34678 int stride = _stride>>(sizeof(pixel)-1);
883
884
4/4
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 17323 times.
✓ Branch 2 taken 17323 times.
✓ Branch 3 taken 16 times.
34678 PREDICT_8x8_LOAD_TOP;
885 34678 const pixel4 dc = PIXEL_SPLAT_X4((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3);
886
2/2
✓ Branch 0 taken 138712 times.
✓ Branch 1 taken 17339 times.
312102 PREDICT_8x8_DC(dc);
887 34678 }
888 3708424 static void FUNCC(pred8x8l_dc)(uint8_t *_src, int has_topleft,
889 int has_topright, ptrdiff_t _stride)
890 {
891 3708424 pixel *src = (pixel*)_src;
892 3708424 int stride = _stride>>(sizeof(pixel)-1);
893
894
2/2
✓ Branch 0 taken 1854111 times.
✓ Branch 1 taken 101 times.
3708424 PREDICT_8x8_LOAD_LEFT;
895
4/4
✓ Branch 0 taken 1854111 times.
✓ Branch 1 taken 101 times.
✓ Branch 2 taken 1292211 times.
✓ Branch 3 taken 562001 times.
3708424 PREDICT_8x8_LOAD_TOP;
896 3708424 const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7
897 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4);
898
2/2
✓ Branch 0 taken 14833696 times.
✓ Branch 1 taken 1854212 times.
33375816 PREDICT_8x8_DC(dc);
899 3708424 }
900 3323410 static void FUNCC(pred8x8l_horizontal)(uint8_t *_src, int has_topleft,
901 int has_topright, ptrdiff_t _stride)
902 {
903 3323410 pixel *src = (pixel*)_src;
904 3323410 int stride = _stride>>(sizeof(pixel)-1);
905 pixel4 a;
906
907
2/2
✓ Branch 0 taken 1619873 times.
✓ Branch 1 taken 41832 times.
3323410 PREDICT_8x8_LOAD_LEFT;
908 #define ROW(y) a = PIXEL_SPLAT_X4(l##y); \
909 AV_WN4PA(src+y*stride, a); \
910 AV_WN4PA(src+y*stride+4, a);
911 3323410 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
912 #undef ROW
913 3323410 }
914 1050144 static void FUNCC(pred8x8l_vertical)(uint8_t *_src, int has_topleft,
915 int has_topright, ptrdiff_t _stride)
916 {
917 int y;
918 1050144 pixel *src = (pixel*)_src;
919 1050144 int stride = _stride>>(sizeof(pixel)-1);
920 pixel4 a, b;
921
922
4/4
✓ Branch 0 taken 508853 times.
✓ Branch 1 taken 16219 times.
✓ Branch 2 taken 393478 times.
✓ Branch 3 taken 131594 times.
1050144 PREDICT_8x8_LOAD_TOP;
923 1050144 src[0] = t0;
924 1050144 src[1] = t1;
925 1050144 src[2] = t2;
926 1050144 src[3] = t3;
927 1050144 src[4] = t4;
928 1050144 src[5] = t5;
929 1050144 src[6] = t6;
930 1050144 src[7] = t7;
931 1050144 a = AV_RN4PA(((pixel4*)src)+0);
932 1050144 b = AV_RN4PA(((pixel4*)src)+1);
933
2/2
✓ Branch 0 taken 3675504 times.
✓ Branch 1 taken 525072 times.
8401152 for( y = 1; y < 8; y++ ) {
934 7351008 AV_WN4PA(((pixel4*)(src+y*stride))+0, a);
935 7351008 AV_WN4PA(((pixel4*)(src+y*stride))+1, b);
936 }
937 1050144 }
938 392172 static void FUNCC(pred8x8l_down_left)(uint8_t *_src, int has_topleft,
939 int has_topright, ptrdiff_t _stride)
940 {
941 392172 pixel *src = (pixel*)_src;
942 392172 int stride = _stride>>(sizeof(pixel)-1);
943
4/4
✓ Branch 0 taken 193992 times.
✓ Branch 1 taken 2094 times.
✓ Branch 2 taken 149036 times.
✓ Branch 3 taken 47050 times.
392172 PREDICT_8x8_LOAD_TOP;
944
2/2
✓ Branch 0 taken 149036 times.
✓ Branch 1 taken 47050 times.
392172 PREDICT_8x8_LOAD_TOPRIGHT;
945 392172 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
946 392172 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
947 392172 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
948 392172 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
949 392172 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
950 392172 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
951 392172 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
952 392172 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
953 392172 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
954 392172 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
955 392172 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
956 392172 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
957 392172 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
958 392172 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
959 392172 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
960 392172 }
961 591700 static void FUNCC(pred8x8l_down_right)(uint8_t *_src, int has_topleft,
962 int has_topright, ptrdiff_t _stride)
963 {
964 591700 pixel *src = (pixel*)_src;
965 591700 int stride = _stride>>(sizeof(pixel)-1);
966
3/4
✓ Branch 0 taken 295850 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 228520 times.
✓ Branch 3 taken 67330 times.
591700 PREDICT_8x8_LOAD_TOP;
967
1/2
✓ Branch 0 taken 295850 times.
✗ Branch 1 not taken.
591700 PREDICT_8x8_LOAD_LEFT;
968 591700 PREDICT_8x8_LOAD_TOPLEFT;
969 591700 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
970 591700 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
971 591700 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
972 591700 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
973 591700 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
974 591700 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
975 591700 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
976 591700 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
977 591700 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
978 591700 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
979 591700 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
980 591700 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
981 591700 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
982 591700 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
983 591700 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
984 591700 }
985 400744 static void FUNCC(pred8x8l_vertical_right)(uint8_t *_src, int has_topleft,
986 int has_topright, ptrdiff_t _stride)
987 {
988 400744 pixel *src = (pixel*)_src;
989 400744 int stride = _stride>>(sizeof(pixel)-1);
990
3/4
✓ Branch 0 taken 200372 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 153467 times.
✓ Branch 3 taken 46905 times.
400744 PREDICT_8x8_LOAD_TOP;
991
1/2
✓ Branch 0 taken 200372 times.
✗ Branch 1 not taken.
400744 PREDICT_8x8_LOAD_LEFT;
992 400744 PREDICT_8x8_LOAD_TOPLEFT;
993 400744 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
994 400744 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
995 400744 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
996 400744 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
997 400744 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
998 400744 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
999 400744 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
1000 400744 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
1001 400744 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
1002 400744 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
1003 400744 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
1004 400744 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
1005 400744 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
1006 400744 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
1007 400744 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
1008 400744 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
1009 400744 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
1010 400744 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
1011 400744 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
1012 400744 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
1013 400744 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
1014 400744 SRC(7,0)= (t6 + t7 + 1) >> 1;
1015 400744 }
1016 904452 static void FUNCC(pred8x8l_horizontal_down)(uint8_t *_src, int has_topleft,
1017 int has_topright, ptrdiff_t _stride)
1018 {
1019 904452 pixel *src = (pixel*)_src;
1020 904452 int stride = _stride>>(sizeof(pixel)-1);
1021
4/4
✓ Branch 0 taken 452212 times.
✓ Branch 1 taken 14 times.
✓ Branch 2 taken 347099 times.
✓ Branch 3 taken 105127 times.
904452 PREDICT_8x8_LOAD_TOP;
1022
2/2
✓ Branch 0 taken 452212 times.
✓ Branch 1 taken 14 times.
904452 PREDICT_8x8_LOAD_LEFT;
1023 904452 PREDICT_8x8_LOAD_TOPLEFT;
1024 904452 SRC(0,7)= (l6 + l7 + 1) >> 1;
1025 904452 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
1026 904452 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
1027 904452 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
1028 904452 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
1029 904452 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
1030 904452 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
1031 904452 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
1032 904452 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
1033 904452 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
1034 904452 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
1035 904452 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
1036 904452 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
1037 904452 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
1038 904452 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
1039 904452 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
1040 904452 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
1041 904452 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
1042 904452 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
1043 904452 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
1044 904452 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
1045 904452 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
1046 904452 }
1047 402430 static void FUNCC(pred8x8l_vertical_left)(uint8_t *_src, int has_topleft,
1048 int has_topright, ptrdiff_t _stride)
1049 {
1050 402430 pixel *src = (pixel*)_src;
1051 402430 int stride = _stride>>(sizeof(pixel)-1);
1052
4/4
✓ Branch 0 taken 199328 times.
✓ Branch 1 taken 1887 times.
✓ Branch 2 taken 156788 times.
✓ Branch 3 taken 44427 times.
402430 PREDICT_8x8_LOAD_TOP;
1053
2/2
✓ Branch 0 taken 156788 times.
✓ Branch 1 taken 44427 times.
402430 PREDICT_8x8_LOAD_TOPRIGHT;
1054 402430 SRC(0,0)= (t0 + t1 + 1) >> 1;
1055 402430 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
1056 402430 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
1057 402430 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
1058 402430 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
1059 402430 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
1060 402430 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
1061 402430 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
1062 402430 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
1063 402430 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
1064 402430 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
1065 402430 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
1066 402430 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
1067 402430 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
1068 402430 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
1069 402430 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
1070 402430 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
1071 402430 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
1072 402430 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
1073 402430 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
1074 402430 SRC(7,6)= (t10 + t11 + 1) >> 1;
1075 402430 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
1076 402430 }
1077 1107170 static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, int has_topleft,
1078 int has_topright, ptrdiff_t _stride)
1079 {
1080 1107170 pixel *src = (pixel*)_src;
1081 1107170 int stride = _stride>>(sizeof(pixel)-1);
1082
2/2
✓ Branch 0 taken 529779 times.
✓ Branch 1 taken 23806 times.
1107170 PREDICT_8x8_LOAD_LEFT;
1083 1107170 SRC(0,0)= (l0 + l1 + 1) >> 1;
1084 1107170 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
1085 1107170 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
1086 1107170 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
1087 1107170 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
1088 1107170 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
1089 1107170 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
1090 1107170 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
1091 1107170 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
1092 1107170 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
1093 1107170 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
1094 1107170 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
1095 1107170 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
1096 1107170 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
1097 1107170 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
1098 1107170 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
1099 1107170 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
1100 1107170 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
1101 1107170 }
1102
1103 static void FUNCC(pred8x8l_vertical_filter_add)(uint8_t *_src, int16_t *_block, int has_topleft,
1104 int has_topright, ptrdiff_t _stride)
1105 {
1106 int i;
1107 pixel *src = (pixel*)_src;
1108 const dctcoef *block = (const dctcoef*)_block;
1109 pixel pix[8];
1110 int stride = _stride>>(sizeof(pixel)-1);
1111 PREDICT_8x8_LOAD_TOP;
1112
1113 pix[0] = t0;
1114 pix[1] = t1;
1115 pix[2] = t2;
1116 pix[3] = t3;
1117 pix[4] = t4;
1118 pix[5] = t5;
1119 pix[6] = t6;
1120 pix[7] = t7;
1121
1122 for(i=0; i<8; i++){
1123 pixel v = pix[i];
1124 src[0*stride]= v += block[0];
1125 src[1*stride]= v += block[8];
1126 src[2*stride]= v += block[16];
1127 src[3*stride]= v += block[24];
1128 src[4*stride]= v += block[32];
1129 src[5*stride]= v += block[40];
1130 src[6*stride]= v += block[48];
1131 src[7*stride]= v + block[56];
1132 src++;
1133 block++;
1134 }
1135
1136 memset(_block, 0, sizeof(dctcoef) * 64);
1137 }
1138
1139 static void FUNCC(pred8x8l_horizontal_filter_add)(uint8_t *_src, int16_t *_block, int has_topleft,
1140 int has_topright, ptrdiff_t _stride)
1141 {
1142 int i;
1143 pixel *src = (pixel*)_src;
1144 const dctcoef *block = (const dctcoef*)_block;
1145 pixel pix[8];
1146 int stride = _stride>>(sizeof(pixel)-1);
1147 PREDICT_8x8_LOAD_LEFT;
1148
1149 pix[0] = l0;
1150 pix[1] = l1;
1151 pix[2] = l2;
1152 pix[3] = l3;
1153 pix[4] = l4;
1154 pix[5] = l5;
1155 pix[6] = l6;
1156 pix[7] = l7;
1157
1158 for(i=0; i<8; i++){
1159 pixel v = pix[i];
1160 src[0]= v += block[0];
1161 src[1]= v += block[1];
1162 src[2]= v += block[2];
1163 src[3]= v += block[3];
1164 src[4]= v += block[4];
1165 src[5]= v += block[5];
1166 src[6]= v += block[6];
1167 src[7]= v + block[7];
1168 src+= stride;
1169 block+= 8;
1170 }
1171
1172 memset(_block, 0, sizeof(dctcoef) * 64);
1173 }
1174
1175 #undef PREDICT_8x8_LOAD_LEFT
1176 #undef PREDICT_8x8_LOAD_TOP
1177 #undef PREDICT_8x8_LOAD_TOPLEFT
1178 #undef PREDICT_8x8_LOAD_TOPRIGHT
1179 #undef PREDICT_8x8_DC
1180 #undef PTR
1181 #undef PT
1182 #undef PL
1183 #undef SRC
1184
1185 182984 static void FUNCC(pred4x4_vertical_add)(uint8_t *_pix, int16_t *_block,
1186 ptrdiff_t stride)
1187 {
1188 int i;
1189 182984 pixel *pix = (pixel*)_pix;
1190 182984 const dctcoef *block = (const dctcoef*)_block;
1191 182984 stride >>= sizeof(pixel)-1;
1192 182984 pix -= stride;
1193
2/2
✓ Branch 0 taken 365968 times.
✓ Branch 1 taken 91492 times.
914920 for(i=0; i<4; i++){
1194 731936 pixel v = pix[0];
1195 731936 pix[1*stride]= v += block[0];
1196 731936 pix[2*stride]= v += block[4];
1197 731936 pix[3*stride]= v += block[8];
1198 731936 pix[4*stride]= v + block[12];
1199 731936 pix++;
1200 731936 block++;
1201 }
1202
1203 182984 memset(_block, 0, sizeof(dctcoef) * 16);
1204 182984 }
1205
1206 239402 static void FUNCC(pred4x4_horizontal_add)(uint8_t *_pix, int16_t *_block,
1207 ptrdiff_t stride)
1208 {
1209 int i;
1210 239402 pixel *pix = (pixel*)_pix;
1211 239402 const dctcoef *block = (const dctcoef*)_block;
1212 239402 stride >>= sizeof(pixel)-1;
1213
2/2
✓ Branch 0 taken 478804 times.
✓ Branch 1 taken 119701 times.
1197010 for(i=0; i<4; i++){
1214 957608 pixel v = pix[-1];
1215 957608 pix[0]= v += block[0];
1216 957608 pix[1]= v += block[1];
1217 957608 pix[2]= v += block[2];
1218 957608 pix[3]= v + block[3];
1219 957608 pix+= stride;
1220 957608 block+= 4;
1221 }
1222
1223 239402 memset(_block, 0, sizeof(dctcoef) * 16);
1224 239402 }
1225
1226 2148 static void FUNCC(pred8x8l_vertical_add)(uint8_t *_pix, int16_t *_block,
1227 ptrdiff_t stride)
1228 {
1229 int i;
1230 2148 pixel *pix = (pixel*)_pix;
1231 2148 const dctcoef *block = (const dctcoef*)_block;
1232 2148 stride >>= sizeof(pixel)-1;
1233 2148 pix -= stride;
1234
2/2
✓ Branch 0 taken 8592 times.
✓ Branch 1 taken 1074 times.
19332 for(i=0; i<8; i++){
1235 17184 pixel v = pix[0];
1236 17184 pix[1*stride]= v += block[0];
1237 17184 pix[2*stride]= v += block[8];
1238 17184 pix[3*stride]= v += block[16];
1239 17184 pix[4*stride]= v += block[24];
1240 17184 pix[5*stride]= v += block[32];
1241 17184 pix[6*stride]= v += block[40];
1242 17184 pix[7*stride]= v += block[48];
1243 17184 pix[8*stride]= v + block[56];
1244 17184 pix++;
1245 17184 block++;
1246 }
1247
1248 2148 memset(_block, 0, sizeof(dctcoef) * 64);
1249 2148 }
1250
1251 2828 static void FUNCC(pred8x8l_horizontal_add)(uint8_t *_pix, int16_t *_block,
1252 ptrdiff_t stride)
1253 {
1254 int i;
1255 2828 pixel *pix = (pixel*)_pix;
1256 2828 const dctcoef *block = (const dctcoef*)_block;
1257 2828 stride >>= sizeof(pixel)-1;
1258
2/2
✓ Branch 0 taken 11312 times.
✓ Branch 1 taken 1414 times.
25452 for(i=0; i<8; i++){
1259 22624 pixel v = pix[-1];
1260 22624 pix[0]= v += block[0];
1261 22624 pix[1]= v += block[1];
1262 22624 pix[2]= v += block[2];
1263 22624 pix[3]= v += block[3];
1264 22624 pix[4]= v += block[4];
1265 22624 pix[5]= v += block[5];
1266 22624 pix[6]= v += block[6];
1267 22624 pix[7]= v + block[7];
1268 22624 pix+= stride;
1269 22624 block+= 8;
1270 }
1271
1272 2828 memset(_block, 0, sizeof(dctcoef) * 64);
1273 2828 }
1274
1275 724 static void FUNCC(pred16x16_vertical_add)(uint8_t *pix, const int *block_offset,
1276 int16_t *block,
1277 ptrdiff_t stride)
1278 {
1279 int i;
1280
2/2
✓ Branch 0 taken 5792 times.
✓ Branch 1 taken 362 times.
12308 for(i=0; i<16; i++)
1281 11584 FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1282 724 }
1283
1284 556 static void FUNCC(pred16x16_horizontal_add)(uint8_t *pix,
1285 const int *block_offset,
1286 int16_t *block,
1287 ptrdiff_t stride)
1288 {
1289 int i;
1290
2/2
✓ Branch 0 taken 4448 times.
✓ Branch 1 taken 278 times.
9452 for(i=0; i<16; i++)
1291 8896 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1292 556 }
1293
1294 4516 static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset,
1295 int16_t *block, ptrdiff_t stride)
1296 {
1297 int i;
1298
2/2
✓ Branch 0 taken 9032 times.
✓ Branch 1 taken 2258 times.
22580 for(i=0; i<4; i++)
1299 18064 FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1300 4516 }
1301
1302 static void FUNCC(pred8x16_vertical_add)(uint8_t *pix, const int *block_offset,
1303 int16_t *block, ptrdiff_t stride)
1304 {
1305 int i;
1306 for(i=0; i<4; i++)
1307 FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1308 for(i=4; i<8; i++)
1309 FUNCC(pred4x4_vertical_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);
1310 }
1311
1312 5776 static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset,
1313 int16_t *block,
1314 ptrdiff_t stride)
1315 {
1316 int i;
1317
2/2
✓ Branch 0 taken 11552 times.
✓ Branch 1 taken 2888 times.
28880 for(i=0; i<4; i++)
1318 23104 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1319 5776 }
1320
1321 static void FUNCC(pred8x16_horizontal_add)(uint8_t *pix,
1322 const int *block_offset,
1323 int16_t *block, ptrdiff_t stride)
1324 {
1325 int i;
1326 for(i=0; i<4; i++)
1327 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1328 for(i=4; i<8; i++)
1329 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);
1330 }
1331