FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/h264pred_template.c
Date: 2024-04-25 15:36:26
Exec Total Coverage
Lines: 804 870 92.4%
Functions: 174 334 52.1%
Branches: 162 184 88.0%

Line Branch Exec Source
1 /*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003-2011 Michael Niedermayer <michaelni@gmx.at>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 /**
23 * @file
24 * H.264 / AVC / MPEG-4 part10 prediction functions.
25 * @author Michael Niedermayer <michaelni@gmx.at>
26 */
27
28 #include "libavutil/intreadwrite.h"
29
30 #include "mathops.h"
31
32 #include "bit_depth_template.c"
33
34 9058064 static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright,
35 ptrdiff_t _stride)
36 {
37 9058064 pixel *src = (pixel*)_src;
38 9058064 int stride = _stride>>(sizeof(pixel)-1);
39 9058064 const pixel4 a= AV_RN4PA(src-stride);
40
41 9058064 AV_WN4PA(src+0*stride, a);
42 9058064 AV_WN4PA(src+1*stride, a);
43 9058064 AV_WN4PA(src+2*stride, a);
44 9058064 AV_WN4PA(src+3*stride, a);
45 9058064 }
46
47 14834470 static void FUNCC(pred4x4_horizontal)(uint8_t *_src, const uint8_t *topright,
48 ptrdiff_t _stride)
49 {
50 14834470 pixel *src = (pixel*)_src;
51 14834470 int stride = _stride>>(sizeof(pixel)-1);
52 14834470 AV_WN4PA(src+0*stride, PIXEL_SPLAT_X4(src[-1+0*stride]));
53 14834470 AV_WN4PA(src+1*stride, PIXEL_SPLAT_X4(src[-1+1*stride]));
54 14834470 AV_WN4PA(src+2*stride, PIXEL_SPLAT_X4(src[-1+2*stride]));
55 14834470 AV_WN4PA(src+3*stride, PIXEL_SPLAT_X4(src[-1+3*stride]));
56 14834470 }
57
58 6144492 static void FUNCC(pred4x4_dc)(uint8_t *_src, const uint8_t *topright,
59 ptrdiff_t _stride)
60 {
61 6144492 pixel *src = (pixel*)_src;
62 6144492 int stride = _stride>>(sizeof(pixel)-1);
63 6144492 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
64 6144492 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
65 6144492 const pixel4 a = PIXEL_SPLAT_X4(dc);
66
67 6144492 AV_WN4PA(src+0*stride, a);
68 6144492 AV_WN4PA(src+1*stride, a);
69 6144492 AV_WN4PA(src+2*stride, a);
70 6144492 AV_WN4PA(src+3*stride, a);
71 6144492 }
72
73 621520 static void FUNCC(pred4x4_left_dc)(uint8_t *_src, const uint8_t *topright,
74 ptrdiff_t _stride)
75 {
76 621520 pixel *src = (pixel*)_src;
77 621520 int stride = _stride>>(sizeof(pixel)-1);
78 621520 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
79 621520 const pixel4 a = PIXEL_SPLAT_X4(dc);
80
81 621520 AV_WN4PA(src+0*stride, a);
82 621520 AV_WN4PA(src+1*stride, a);
83 621520 AV_WN4PA(src+2*stride, a);
84 621520 AV_WN4PA(src+3*stride, a);
85 621520 }
86
87 120564 static void FUNCC(pred4x4_top_dc)(uint8_t *_src, const uint8_t *topright,
88 ptrdiff_t _stride)
89 {
90 120564 pixel *src = (pixel*)_src;
91 120564 int stride = _stride>>(sizeof(pixel)-1);
92 120564 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
93 120564 const pixel4 a = PIXEL_SPLAT_X4(dc);
94
95 120564 AV_WN4PA(src+0*stride, a);
96 120564 AV_WN4PA(src+1*stride, a);
97 120564 AV_WN4PA(src+2*stride, a);
98 120564 AV_WN4PA(src+3*stride, a);
99 120564 }
100
101 15388 static void FUNCC(pred4x4_128_dc)(uint8_t *_src, const uint8_t *topright,
102 ptrdiff_t _stride)
103 {
104 15388 pixel *src = (pixel*)_src;
105 15388 int stride = _stride>>(sizeof(pixel)-1);
106 15388 const pixel4 a = PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1));
107
108 15388 AV_WN4PA(src+0*stride, a);
109 15388 AV_WN4PA(src+1*stride, a);
110 15388 AV_WN4PA(src+2*stride, a);
111 15388 AV_WN4PA(src+3*stride, a);
112 15388 }
113
114
115 #define LOAD_TOP_RIGHT_EDGE\
116 const unsigned av_unused t4 = topright[0];\
117 const unsigned av_unused t5 = topright[1];\
118 const unsigned av_unused t6 = topright[2];\
119 const unsigned av_unused t7 = topright[3];\
120
121 #define LOAD_DOWN_LEFT_EDGE\
122 const unsigned av_unused l4 = src[-1+4*stride];\
123 const unsigned av_unused l5 = src[-1+5*stride];\
124 const unsigned av_unused l6 = src[-1+6*stride];\
125 const unsigned av_unused l7 = src[-1+7*stride];\
126
127 #define LOAD_LEFT_EDGE\
128 const unsigned av_unused l0 = src[-1+0*stride];\
129 const unsigned av_unused l1 = src[-1+1*stride];\
130 const unsigned av_unused l2 = src[-1+2*stride];\
131 const unsigned av_unused l3 = src[-1+3*stride];\
132
133 #define LOAD_TOP_EDGE\
134 const unsigned av_unused t0 = src[ 0-1*stride];\
135 const unsigned av_unused t1 = src[ 1-1*stride];\
136 const unsigned av_unused t2 = src[ 2-1*stride];\
137 const unsigned av_unused t3 = src[ 3-1*stride];\
138
139 3844162 static void FUNCC(pred4x4_down_right)(uint8_t *_src, const uint8_t *topright,
140 ptrdiff_t _stride)
141 {
142 3844162 pixel *src = (pixel*)_src;
143 3844162 int stride = _stride>>(sizeof(pixel)-1);
144 3844162 const int lt= src[-1-1*stride];
145 3844162 LOAD_TOP_EDGE
146 3844162 LOAD_LEFT_EDGE
147
148 3844162 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
149 3844162 src[0+2*stride]=
150 3844162 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
151 3844162 src[0+1*stride]=
152 3844162 src[1+2*stride]=
153 3844162 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
154 3844162 src[0+0*stride]=
155 3844162 src[1+1*stride]=
156 3844162 src[2+2*stride]=
157 3844162 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
158 3844162 src[1+0*stride]=
159 3844162 src[2+1*stride]=
160 3844162 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
161 3844162 src[2+0*stride]=
162 3844162 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
163 3844162 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
164 3844162 }
165
166 2300792 static void FUNCC(pred4x4_down_left)(uint8_t *_src, const uint8_t *_topright,
167 ptrdiff_t _stride)
168 {
169 2300792 pixel *src = (pixel*)_src;
170 2300792 const pixel *topright = (const pixel*)_topright;
171 2300792 int stride = _stride>>(sizeof(pixel)-1);
172 2300792 LOAD_TOP_EDGE
173 2300792 LOAD_TOP_RIGHT_EDGE
174 // LOAD_LEFT_EDGE
175
176 2300792 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
177 2300792 src[1+0*stride]=
178 2300792 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
179 2300792 src[2+0*stride]=
180 2300792 src[1+1*stride]=
181 2300792 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
182 2300792 src[3+0*stride]=
183 2300792 src[2+1*stride]=
184 2300792 src[1+2*stride]=
185 2300792 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
186 2300792 src[3+1*stride]=
187 2300792 src[2+2*stride]=
188 2300792 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
189 2300792 src[3+2*stride]=
190 2300792 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
191 2300792 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
192 2300792 }
193
194 2667578 static void FUNCC(pred4x4_vertical_right)(uint8_t *_src,
195 const uint8_t *topright,
196 ptrdiff_t _stride)
197 {
198 2667578 pixel *src = (pixel*)_src;
199 2667578 int stride = _stride>>(sizeof(pixel)-1);
200 2667578 const int lt= src[-1-1*stride];
201 2667578 LOAD_TOP_EDGE
202 2667578 LOAD_LEFT_EDGE
203
204 2667578 src[0+0*stride]=
205 2667578 src[1+2*stride]=(lt + t0 + 1)>>1;
206 2667578 src[1+0*stride]=
207 2667578 src[2+2*stride]=(t0 + t1 + 1)>>1;
208 2667578 src[2+0*stride]=
209 2667578 src[3+2*stride]=(t1 + t2 + 1)>>1;
210 2667578 src[3+0*stride]=(t2 + t3 + 1)>>1;
211 2667578 src[0+1*stride]=
212 2667578 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
213 2667578 src[1+1*stride]=
214 2667578 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
215 2667578 src[2+1*stride]=
216 2667578 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
217 2667578 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
218 2667578 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
219 2667578 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
220 2667578 }
221
222 2182166 static void FUNCC(pred4x4_vertical_left)(uint8_t *_src,
223 const uint8_t *_topright,
224 ptrdiff_t _stride)
225 {
226 2182166 pixel *src = (pixel*)_src;
227 2182166 const pixel *topright = (const pixel*)_topright;
228 2182166 int stride = _stride>>(sizeof(pixel)-1);
229 2182166 LOAD_TOP_EDGE
230 2182166 LOAD_TOP_RIGHT_EDGE
231
232 2182166 src[0+0*stride]=(t0 + t1 + 1)>>1;
233 2182166 src[1+0*stride]=
234 2182166 src[0+2*stride]=(t1 + t2 + 1)>>1;
235 2182166 src[2+0*stride]=
236 2182166 src[1+2*stride]=(t2 + t3 + 1)>>1;
237 2182166 src[3+0*stride]=
238 2182166 src[2+2*stride]=(t3 + t4+ 1)>>1;
239 2182166 src[3+2*stride]=(t4 + t5+ 1)>>1;
240 2182166 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
241 2182166 src[1+1*stride]=
242 2182166 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
243 2182166 src[2+1*stride]=
244 2182166 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
245 2182166 src[3+1*stride]=
246 2182166 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
247 2182166 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
248 2182166 }
249
250 4072114 static void FUNCC(pred4x4_horizontal_up)(uint8_t *_src, const uint8_t *topright,
251 ptrdiff_t _stride)
252 {
253 4072114 pixel *src = (pixel*)_src;
254 4072114 int stride = _stride>>(sizeof(pixel)-1);
255 4072114 LOAD_LEFT_EDGE
256
257 4072114 src[0+0*stride]=(l0 + l1 + 1)>>1;
258 4072114 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
259 4072114 src[2+0*stride]=
260 4072114 src[0+1*stride]=(l1 + l2 + 1)>>1;
261 4072114 src[3+0*stride]=
262 4072114 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
263 4072114 src[2+1*stride]=
264 4072114 src[0+2*stride]=(l2 + l3 + 1)>>1;
265 4072114 src[3+1*stride]=
266 4072114 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
267 4072114 src[3+2*stride]=
268 4072114 src[1+3*stride]=
269 4072114 src[0+3*stride]=
270 4072114 src[2+2*stride]=
271 4072114 src[2+3*stride]=
272 4072114 src[3+3*stride]=l3;
273 4072114 }
274
275 4373146 static void FUNCC(pred4x4_horizontal_down)(uint8_t *_src,
276 const uint8_t *topright,
277 ptrdiff_t _stride)
278 {
279 4373146 pixel *src = (pixel*)_src;
280 4373146 int stride = _stride>>(sizeof(pixel)-1);
281 4373146 const int lt= src[-1-1*stride];
282 4373146 LOAD_TOP_EDGE
283 4373146 LOAD_LEFT_EDGE
284
285 4373146 src[0+0*stride]=
286 4373146 src[2+1*stride]=(lt + l0 + 1)>>1;
287 4373146 src[1+0*stride]=
288 4373146 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
289 4373146 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
290 4373146 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
291 4373146 src[0+1*stride]=
292 4373146 src[2+2*stride]=(l0 + l1 + 1)>>1;
293 4373146 src[1+1*stride]=
294 4373146 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
295 4373146 src[0+2*stride]=
296 4373146 src[2+3*stride]=(l1 + l2+ 1)>>1;
297 4373146 src[1+2*stride]=
298 4373146 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
299 4373146 src[0+3*stride]=(l2 + l3 + 1)>>1;
300 4373146 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
301 4373146 }
302
303 559392 static void FUNCC(pred16x16_vertical)(uint8_t *_src, ptrdiff_t _stride)
304 {
305 int i;
306 559392 pixel *src = (pixel*)_src;
307 559392 int stride = _stride>>(sizeof(pixel)-1);
308 559392 const pixel4 a = AV_RN4PA(((pixel4*)(src-stride))+0);
309 559392 const pixel4 b = AV_RN4PA(((pixel4*)(src-stride))+1);
310 559392 const pixel4 c = AV_RN4PA(((pixel4*)(src-stride))+2);
311 559392 const pixel4 d = AV_RN4PA(((pixel4*)(src-stride))+3);
312
313
2/2
✓ Branch 0 taken 4475136 times.
✓ Branch 1 taken 279696 times.
9509664 for(i=0; i<16; i++){
314 8950272 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
315 8950272 AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
316 8950272 AV_WN4PA(((pixel4*)(src+i*stride))+2, c);
317 8950272 AV_WN4PA(((pixel4*)(src+i*stride))+3, d);
318 }
319 559392 }
320
321 530214 static void FUNCC(pred16x16_horizontal)(uint8_t *_src, ptrdiff_t stride)
322 {
323 int i;
324 530214 pixel *src = (pixel*)_src;
325 530214 stride >>= sizeof(pixel)-1;
326
327
2/2
✓ Branch 0 taken 4241712 times.
✓ Branch 1 taken 265107 times.
9013638 for(i=0; i<16; i++){
328 8483424 const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
329
330 8483424 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
331 8483424 AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
332 8483424 AV_WN4PA(((pixel4*)(src+i*stride))+2, a);
333 8483424 AV_WN4PA(((pixel4*)(src+i*stride))+3, a);
334 }
335 530214 }
336
337 #define PREDICT_16x16_DC(v)\
338 for(i=0; i<16; i++){\
339 AV_WN4PA(src+ 0, v);\
340 AV_WN4PA(src+ 4, v);\
341 AV_WN4PA(src+ 8, v);\
342 AV_WN4PA(src+12, v);\
343 src += stride;\
344 }
345
346 504908 static void FUNCC(pred16x16_dc)(uint8_t *_src, ptrdiff_t stride)
347 {
348 504908 int i, dc=0;
349 504908 pixel *src = (pixel*)_src;
350 pixel4 dcsplat;
351 504908 stride >>= sizeof(pixel)-1;
352
353
2/2
✓ Branch 0 taken 4039264 times.
✓ Branch 1 taken 252454 times.
8583436 for(i=0;i<16; i++){
354 8078528 dc+= src[-1+i*stride];
355 }
356
357
2/2
✓ Branch 0 taken 4039264 times.
✓ Branch 1 taken 252454 times.
8583436 for(i=0;i<16; i++){
358 8078528 dc+= src[i-stride];
359 }
360
361 504908 dcsplat = PIXEL_SPLAT_X4((dc+16)>>5);
362
2/2
✓ Branch 0 taken 4039264 times.
✓ Branch 1 taken 252454 times.
8583436 PREDICT_16x16_DC(dcsplat);
363 504908 }
364
365 69954 static void FUNCC(pred16x16_left_dc)(uint8_t *_src, ptrdiff_t stride)
366 {
367 69954 int i, dc=0;
368 69954 pixel *src = (pixel*)_src;
369 pixel4 dcsplat;
370 69954 stride >>= sizeof(pixel)-1;
371
372
2/2
✓ Branch 0 taken 559632 times.
✓ Branch 1 taken 34977 times.
1189218 for(i=0;i<16; i++){
373 1119264 dc+= src[-1+i*stride];
374 }
375
376 69954 dcsplat = PIXEL_SPLAT_X4((dc+8)>>4);
377
2/2
✓ Branch 0 taken 559632 times.
✓ Branch 1 taken 34977 times.
1189218 PREDICT_16x16_DC(dcsplat);
378 69954 }
379
380 11714 static void FUNCC(pred16x16_top_dc)(uint8_t *_src, ptrdiff_t stride)
381 {
382 11714 int i, dc=0;
383 11714 pixel *src = (pixel*)_src;
384 pixel4 dcsplat;
385 11714 stride >>= sizeof(pixel)-1;
386
387
2/2
✓ Branch 0 taken 93712 times.
✓ Branch 1 taken 5857 times.
199138 for(i=0;i<16; i++){
388 187424 dc+= src[i-stride];
389 }
390
391 11714 dcsplat = PIXEL_SPLAT_X4((dc+8)>>4);
392
2/2
✓ Branch 0 taken 93712 times.
✓ Branch 1 taken 5857 times.
199138 PREDICT_16x16_DC(dcsplat);
393 11714 }
394
395 #define PRED16x16_X(n, v) \
396 static void FUNCC(pred16x16_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\
397 {\
398 int i;\
399 pixel *src = (pixel*)_src;\
400 stride >>= sizeof(pixel)-1;\
401 PREDICT_16x16_DC(PIXEL_SPLAT_X4(v));\
402 }
403
404
2/2
✓ Branch 0 taken 42352 times.
✓ Branch 1 taken 2647 times.
89998 PRED16x16_X(128, (1<<(BIT_DEPTH-1))+0)
405 #if BIT_DEPTH == 8
406
2/2
✓ Branch 0 taken 192 times.
✓ Branch 1 taken 12 times.
204 PRED16x16_X(127, (1<<(BIT_DEPTH-1))-1)
407
2/2
✓ Branch 0 taken 160 times.
✓ Branch 1 taken 10 times.
170 PRED16x16_X(129, (1<<(BIT_DEPTH-1))+1)
408 #endif
409
410 308870 static inline void FUNCC(pred16x16_plane_compat)(uint8_t *_src,
411 ptrdiff_t _stride,
412 const int svq3,
413 const int rv40)
414 {
415 int i, j, k;
416 int a;
417 INIT_CLIP
418 308870 pixel *src = (pixel*)_src;
419 308870 int stride = _stride>>(sizeof(pixel)-1);
420 308870 const pixel * const src0 = src +7-stride;
421 308870 const pixel * src1 = src +8*stride-1;
422 308870 const pixel * src2 = src1-2*stride; // == src+6*stride-1;
423 308870 int H = src0[1] - src0[-1];
424 308870 int V = src1[0] - src2[ 0];
425
2/2
✓ Branch 0 taken 1081045 times.
✓ Branch 1 taken 154435 times.
2470960 for(k=2; k<=8; ++k) {
426 2162090 src1 += stride; src2 -= stride;
427 2162090 H += k*(src0[k] - src0[-k]);
428 2162090 V += k*(src1[0] - src2[ 0]);
429 }
430
2/2
✓ Branch 0 taken 56 times.
✓ Branch 1 taken 154379 times.
308870 if(svq3){
431 112 H = ( 5*(H/4) ) / 16;
432 112 V = ( 5*(V/4) ) / 16;
433
434 /* required for 100% accuracy */
435 112 i = H; H = V; V = i;
436
2/2
✓ Branch 0 taken 1217 times.
✓ Branch 1 taken 153162 times.
308758 }else if(rv40){
437 2434 H = ( H + (H>>2) ) >> 4;
438 2434 V = ( V + (V>>2) ) >> 4;
439 }else{
440 306324 H = ( 5*H+32 ) >> 6;
441 306324 V = ( 5*V+32 ) >> 6;
442 }
443
444 308870 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
445
2/2
✓ Branch 0 taken 2470960 times.
✓ Branch 1 taken 154435 times.
5250790 for(j=16; j>0; --j) {
446 4941920 int b = a;
447 4941920 a += V;
448
2/2
✓ Branch 0 taken 9883840 times.
✓ Branch 1 taken 2470960 times.
24709600 for(i=-16; i<0; i+=4) {
449 19767680 src[16+i] = CLIP((b ) >> 5);
450 19767680 src[17+i] = CLIP((b+ H) >> 5);
451 19767680 src[18+i] = CLIP((b+2*H) >> 5);
452 19767680 src[19+i] = CLIP((b+3*H) >> 5);
453 19767680 b += 4*H;
454 }
455 4941920 src += stride;
456 }
457 308870 }
458
459 306324 static void FUNCC(pred16x16_plane)(uint8_t *src, ptrdiff_t stride)
460 {
461 306324 FUNCC(pred16x16_plane_compat)(src, stride, 0, 0);
462 306324 }
463
464 1460876 static void FUNCC(pred8x8_vertical)(uint8_t *_src, ptrdiff_t _stride)
465 {
466 int i;
467 1460876 pixel *src = (pixel*)_src;
468 1460876 int stride = _stride>>(sizeof(pixel)-1);
469 1460876 const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0);
470 1460876 const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1);
471
472
2/2
✓ Branch 0 taken 5843504 times.
✓ Branch 1 taken 730438 times.
13147884 for(i=0; i<8; i++){
473 11687008 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
474 11687008 AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
475 }
476 1460876 }
477
478 322924 static void FUNCC(pred8x16_vertical)(uint8_t *_src, ptrdiff_t _stride)
479 {
480 int i;
481 322924 pixel *src = (pixel*)_src;
482 322924 int stride = _stride>>(sizeof(pixel)-1);
483 322924 const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0);
484 322924 const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1);
485
486
2/2
✓ Branch 0 taken 2583392 times.
✓ Branch 1 taken 161462 times.
5489708 for(i=0; i<16; i++){
487 5166784 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
488 5166784 AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
489 }
490 322924 }
491
492 3021684 static void FUNCC(pred8x8_horizontal)(uint8_t *_src, ptrdiff_t stride)
493 {
494 int i;
495 3021684 pixel *src = (pixel*)_src;
496 3021684 stride >>= sizeof(pixel)-1;
497
498
2/2
✓ Branch 0 taken 12086736 times.
✓ Branch 1 taken 1510842 times.
27195156 for(i=0; i<8; i++){
499 24173472 const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
500 24173472 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
501 24173472 AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
502 }
503 3021684 }
504
505 595976 static void FUNCC(pred8x16_horizontal)(uint8_t *_src, ptrdiff_t stride)
506 {
507 int i;
508 595976 pixel *src = (pixel*)_src;
509 595976 stride >>= sizeof(pixel)-1;
510
2/2
✓ Branch 0 taken 4767808 times.
✓ Branch 1 taken 297988 times.
10131592 for(i=0; i<16; i++){
511 9535616 const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
512 9535616 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
513 9535616 AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
514 }
515 595976 }
516
517 #define PRED8x8_X(n, v)\
518 static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\
519 {\
520 int i;\
521 const pixel4 a = PIXEL_SPLAT_X4(v);\
522 pixel *src = (pixel*)_src;\
523 stride >>= sizeof(pixel)-1;\
524 for(i=0; i<8; i++){\
525 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);\
526 AV_WN4PA(((pixel4*)(src+i*stride))+1, a);\
527 }\
528 }
529
530
2/2
✓ Branch 0 taken 1047024 times.
✓ Branch 1 taken 130878 times.
2355804 PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0)
531 #if BIT_DEPTH == 8
532
2/2
✓ Branch 0 taken 928 times.
✓ Branch 1 taken 116 times.
1044 PRED8x8_X(127, (1<<(BIT_DEPTH-1))-1)
533
2/2
✓ Branch 0 taken 2160 times.
✓ Branch 1 taken 270 times.
2430 PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1)
534 #endif
535
536 3076 static void FUNCC(pred8x16_128_dc)(uint8_t *_src, ptrdiff_t stride)
537 {
538 3076 FUNCC(pred8x8_128_dc)(_src, stride);
539 3076 FUNCC(pred8x8_128_dc)(_src+8*stride, stride);
540 3076 }
541
542 1180092 static void FUNCC(pred8x8_left_dc)(uint8_t *_src, ptrdiff_t stride)
543 {
544 int i;
545 int dc0, dc2;
546 pixel4 dc0splat, dc2splat;
547 1180092 pixel *src = (pixel*)_src;
548 1180092 stride >>= sizeof(pixel)-1;
549
550 1180092 dc0=dc2=0;
551
2/2
✓ Branch 0 taken 2360184 times.
✓ Branch 1 taken 590046 times.
5900460 for(i=0;i<4; i++){
552 4720368 dc0+= src[-1+i*stride];
553 4720368 dc2+= src[-1+(i+4)*stride];
554 }
555 1180092 dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
556 1180092 dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
557
558
2/2
✓ Branch 0 taken 2360184 times.
✓ Branch 1 taken 590046 times.
5900460 for(i=0; i<4; i++){
559 4720368 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
560 4720368 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc0splat);
561 }
562
2/2
✓ Branch 0 taken 2360184 times.
✓ Branch 1 taken 590046 times.
5900460 for(i=4; i<8; i++){
563 4720368 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
564 4720368 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc2splat);
565 }
566 1180092 }
567
568 195540 static void FUNCC(pred8x16_left_dc)(uint8_t *_src, ptrdiff_t stride)
569 {
570 195540 FUNCC(pred8x8_left_dc)(_src, stride);
571 195540 FUNCC(pred8x8_left_dc)(_src+8*stride, stride);
572 195540 }
573
574 172964 static void FUNCC(pred8x8_top_dc)(uint8_t *_src, ptrdiff_t stride)
575 {
576 int i;
577 int dc0, dc1;
578 pixel4 dc0splat, dc1splat;
579 172964 pixel *src = (pixel*)_src;
580 172964 stride >>= sizeof(pixel)-1;
581
582 172964 dc0=dc1=0;
583
2/2
✓ Branch 0 taken 345928 times.
✓ Branch 1 taken 86482 times.
864820 for(i=0;i<4; i++){
584 691856 dc0+= src[i-stride];
585 691856 dc1+= src[4+i-stride];
586 }
587 172964 dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
588 172964 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
589
590
2/2
✓ Branch 0 taken 345928 times.
✓ Branch 1 taken 86482 times.
864820 for(i=0; i<4; i++){
591 691856 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
592 691856 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
593 }
594
2/2
✓ Branch 0 taken 345928 times.
✓ Branch 1 taken 86482 times.
864820 for(i=4; i<8; i++){
595 691856 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
596 691856 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
597 }
598 172964 }
599
600 26616 static void FUNCC(pred8x16_top_dc)(uint8_t *_src, ptrdiff_t stride)
601 {
602 int i;
603 int dc0, dc1;
604 pixel4 dc0splat, dc1splat;
605 26616 pixel *src = (pixel*)_src;
606 26616 stride >>= sizeof(pixel)-1;
607
608 26616 dc0=dc1=0;
609
2/2
✓ Branch 0 taken 53232 times.
✓ Branch 1 taken 13308 times.
133080 for(i=0;i<4; i++){
610 106464 dc0+= src[i-stride];
611 106464 dc1+= src[4+i-stride];
612 }
613 26616 dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
614 26616 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
615
616
2/2
✓ Branch 0 taken 212928 times.
✓ Branch 1 taken 13308 times.
452472 for(i=0; i<16; i++){
617 425856 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
618 425856 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
619 }
620 26616 }
621
622 5847992 static void FUNCC(pred8x8_dc)(uint8_t *_src, ptrdiff_t stride)
623 {
624 int i;
625 int dc0, dc1, dc2;
626 pixel4 dc0splat, dc1splat, dc2splat, dc3splat;
627 5847992 pixel *src = (pixel*)_src;
628 5847992 stride >>= sizeof(pixel)-1;
629
630 5847992 dc0=dc1=dc2=0;
631
2/2
✓ Branch 0 taken 11695984 times.
✓ Branch 1 taken 2923996 times.
29239960 for(i=0;i<4; i++){
632 23391968 dc0+= src[-1+i*stride] + src[i-stride];
633 23391968 dc1+= src[4+i-stride];
634 23391968 dc2+= src[-1+(i+4)*stride];
635 }
636 5847992 dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);
637 5847992 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
638 5847992 dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
639 5847992 dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3);
640
641
2/2
✓ Branch 0 taken 11695984 times.
✓ Branch 1 taken 2923996 times.
29239960 for(i=0; i<4; i++){
642 23391968 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
643 23391968 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
644 }
645
2/2
✓ Branch 0 taken 11695984 times.
✓ Branch 1 taken 2923996 times.
29239960 for(i=4; i<8; i++){
646 23391968 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
647 23391968 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat);
648 }
649 5847992 }
650
651 1594572 static void FUNCC(pred8x16_dc)(uint8_t *_src, ptrdiff_t stride)
652 {
653 int i;
654 int dc0, dc1, dc2, dc3, dc4;
655 pixel4 dc0splat, dc1splat, dc2splat, dc3splat, dc4splat, dc5splat, dc6splat, dc7splat;
656 1594572 pixel *src = (pixel*)_src;
657 1594572 stride >>= sizeof(pixel)-1;
658
659 1594572 dc0=dc1=dc2=dc3=dc4=0;
660
2/2
✓ Branch 0 taken 3189144 times.
✓ Branch 1 taken 797286 times.
7972860 for(i=0;i<4; i++){
661 6378288 dc0+= src[-1+i*stride] + src[i-stride];
662 6378288 dc1+= src[4+i-stride];
663 6378288 dc2+= src[-1+(i+4)*stride];
664 6378288 dc3+= src[-1+(i+8)*stride];
665 6378288 dc4+= src[-1+(i+12)*stride];
666 }
667 1594572 dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);
668 1594572 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
669 1594572 dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
670 1594572 dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3);
671 1594572 dc4splat = PIXEL_SPLAT_X4((dc3 + 2)>>2);
672 1594572 dc5splat = PIXEL_SPLAT_X4((dc1 + dc3 + 4)>>3);
673 1594572 dc6splat = PIXEL_SPLAT_X4((dc4 + 2)>>2);
674 1594572 dc7splat = PIXEL_SPLAT_X4((dc1 + dc4 + 4)>>3);
675
676
2/2
✓ Branch 0 taken 3189144 times.
✓ Branch 1 taken 797286 times.
7972860 for(i=0; i<4; i++){
677 6378288 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
678 6378288 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
679 }
680
2/2
✓ Branch 0 taken 3189144 times.
✓ Branch 1 taken 797286 times.
7972860 for(i=4; i<8; i++){
681 6378288 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
682 6378288 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat);
683 }
684
2/2
✓ Branch 0 taken 3189144 times.
✓ Branch 1 taken 797286 times.
7972860 for(i=8; i<12; i++){
685 6378288 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc4splat);
686 6378288 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc5splat);
687 }
688
2/2
✓ Branch 0 taken 3189144 times.
✓ Branch 1 taken 797286 times.
7972860 for(i=12; i<16; i++){
689 6378288 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc6splat);
690 6378288 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc7splat);
691 }
692 1594572 }
693
694 //the following 4 function should not be optimized!
695 68 static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, ptrdiff_t stride)
696 {
697 68 FUNCC(pred8x8_top_dc)(src, stride);
698 68 FUNCC(pred4x4_dc)(src, NULL, stride);
699 68 }
700
701 12 static void FUNC(pred8x16_mad_cow_dc_l0t)(uint8_t *src, ptrdiff_t stride)
702 {
703 12 FUNCC(pred8x16_top_dc)(src, stride);
704 12 FUNCC(pred4x4_dc)(src, NULL, stride);
705 12 }
706
707 36 static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, ptrdiff_t stride)
708 {
709 36 FUNCC(pred8x8_dc)(src, stride);
710 36 FUNCC(pred4x4_top_dc)(src, NULL, stride);
711 36 }
712
713 12 static void FUNC(pred8x16_mad_cow_dc_0lt)(uint8_t *src, ptrdiff_t stride)
714 {
715 12 FUNCC(pred8x16_dc)(src, stride);
716 12 FUNCC(pred4x4_top_dc)(src, NULL, stride);
717 12 }
718
719 20 static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, ptrdiff_t stride)
720 {
721 20 FUNCC(pred8x8_left_dc)(src, stride);
722 20 FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride);
723 20 FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
724 20 }
725
726 12 static void FUNC(pred8x16_mad_cow_dc_l00)(uint8_t *src, ptrdiff_t stride)
727 {
728 12 FUNCC(pred8x16_left_dc)(src, stride);
729 12 FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride);
730 12 FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
731 12 }
732
733 32 static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, ptrdiff_t stride)
734 {
735 32 FUNCC(pred8x8_left_dc)(src, stride);
736 32 FUNCC(pred4x4_128_dc)(src , NULL, stride);
737 32 FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
738 32 }
739
740 12 static void FUNC(pred8x16_mad_cow_dc_0l0)(uint8_t *src, ptrdiff_t stride)
741 {
742 12 FUNCC(pred8x16_left_dc)(src, stride);
743 12 FUNCC(pred4x4_128_dc)(src , NULL, stride);
744 12 FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
745 12 }
746
747 944724 static void FUNCC(pred8x8_plane)(uint8_t *_src, ptrdiff_t _stride)
748 {
749 int j, k;
750 int a;
751 INIT_CLIP
752 944724 pixel *src = (pixel*)_src;
753 944724 int stride = _stride>>(sizeof(pixel)-1);
754 944724 const pixel * const src0 = src +3-stride;
755 944724 const pixel * src1 = src +4*stride-1;
756 944724 const pixel * src2 = src1-2*stride; // == src+2*stride-1;
757 944724 int H = src0[1] - src0[-1];
758 944724 int V = src1[0] - src2[ 0];
759
2/2
✓ Branch 0 taken 1417086 times.
✓ Branch 1 taken 472362 times.
3778896 for(k=2; k<=4; ++k) {
760 2834172 src1 += stride; src2 -= stride;
761 2834172 H += k*(src0[k] - src0[-k]);
762 2834172 V += k*(src1[0] - src2[ 0]);
763 }
764 944724 H = ( 17*H+16 ) >> 5;
765 944724 V = ( 17*V+16 ) >> 5;
766
767 944724 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
768
2/2
✓ Branch 0 taken 3778896 times.
✓ Branch 1 taken 472362 times.
8502516 for(j=8; j>0; --j) {
769 7557792 int b = a;
770 7557792 a += V;
771 7557792 src[0] = CLIP((b ) >> 5);
772 7557792 src[1] = CLIP((b+ H) >> 5);
773 7557792 src[2] = CLIP((b+2*H) >> 5);
774 7557792 src[3] = CLIP((b+3*H) >> 5);
775 7557792 src[4] = CLIP((b+4*H) >> 5);
776 7557792 src[5] = CLIP((b+5*H) >> 5);
777 7557792 src[6] = CLIP((b+6*H) >> 5);
778 7557792 src[7] = CLIP((b+7*H) >> 5);
779 7557792 src += stride;
780 }
781 944724 }
782
783 328576 static void FUNCC(pred8x16_plane)(uint8_t *_src, ptrdiff_t _stride)
784 {
785 int j, k;
786 int a;
787 INIT_CLIP
788 328576 pixel *src = (pixel*)_src;
789 328576 int stride = _stride>>(sizeof(pixel)-1);
790 328576 const pixel * const src0 = src +3-stride;
791 328576 const pixel * src1 = src +8*stride-1;
792 328576 const pixel * src2 = src1-2*stride; // == src+6*stride-1;
793 328576 int H = src0[1] - src0[-1];
794 328576 int V = src1[0] - src2[ 0];
795
796
2/2
✓ Branch 0 taken 492864 times.
✓ Branch 1 taken 164288 times.
1314304 for (k = 2; k <= 4; ++k) {
797 985728 src1 += stride; src2 -= stride;
798 985728 H += k*(src0[k] - src0[-k]);
799 985728 V += k*(src1[0] - src2[ 0]);
800 }
801
2/2
✓ Branch 0 taken 657152 times.
✓ Branch 1 taken 164288 times.
1642880 for (; k <= 8; ++k) {
802 1314304 src1 += stride; src2 -= stride;
803 1314304 V += k*(src1[0] - src2[0]);
804 }
805
806 328576 H = (17*H+16) >> 5;
807 328576 V = (5*V+32) >> 6;
808
809 328576 a = 16*(src1[0] + src2[8] + 1) - 7*V - 3*H;
810
2/2
✓ Branch 0 taken 2628608 times.
✓ Branch 1 taken 164288 times.
5585792 for(j=16; j>0; --j) {
811 5257216 int b = a;
812 5257216 a += V;
813 5257216 src[0] = CLIP((b ) >> 5);
814 5257216 src[1] = CLIP((b+ H) >> 5);
815 5257216 src[2] = CLIP((b+2*H) >> 5);
816 5257216 src[3] = CLIP((b+3*H) >> 5);
817 5257216 src[4] = CLIP((b+4*H) >> 5);
818 5257216 src[5] = CLIP((b+5*H) >> 5);
819 5257216 src[6] = CLIP((b+6*H) >> 5);
820 5257216 src[7] = CLIP((b+7*H) >> 5);
821 5257216 src += stride;
822 }
823 328576 }
824
825 #define SRC(x,y) src[(x)+(y)*stride]
826 #define PL(y) \
827 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
828 #define PREDICT_8x8_LOAD_LEFT \
829 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
830 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
831 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
832 const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
833
834 #define PT(x) \
835 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
836 #define PREDICT_8x8_LOAD_TOP \
837 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
838 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
839 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
840 const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
841 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
842
843 #define PTR(x) \
844 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
845 #define PREDICT_8x8_LOAD_TOPRIGHT \
846 int t8, t9, t10, t11, t12, t13, t14, t15; \
847 if(has_topright) { \
848 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
849 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
850 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
851
852 #define PREDICT_8x8_LOAD_TOPLEFT \
853 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
854
855 #define PREDICT_8x8_DC(v) \
856 int y; \
857 for( y = 0; y < 8; y++ ) { \
858 AV_WN4PA(((pixel4*)src)+0, v); \
859 AV_WN4PA(((pixel4*)src)+1, v); \
860 src += stride; \
861 }
862
863 2240 static void FUNCC(pred8x8l_128_dc)(uint8_t *_src, int has_topleft,
864 int has_topright, ptrdiff_t _stride)
865 {
866 2240 pixel *src = (pixel*)_src;
867 2240 int stride = _stride>>(sizeof(pixel)-1);
868
869
2/2
✓ Branch 0 taken 8960 times.
✓ Branch 1 taken 1120 times.
20160 PREDICT_8x8_DC(PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1)));
870 2240 }
871 270176 static void FUNCC(pred8x8l_left_dc)(uint8_t *_src, int has_topleft,
872 int has_topright, ptrdiff_t _stride)
873 {
874 270176 pixel *src = (pixel*)_src;
875 270176 int stride = _stride>>(sizeof(pixel)-1);
876
877
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 135076 times.
270176 PREDICT_8x8_LOAD_LEFT;
878 270176 const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3);
879
2/2
✓ Branch 0 taken 1080704 times.
✓ Branch 1 taken 135088 times.
2431584 PREDICT_8x8_DC(dc);
880 270176 }
881 34678 static void FUNCC(pred8x8l_top_dc)(uint8_t *_src, int has_topleft,
882 int has_topright, ptrdiff_t _stride)
883 {
884 34678 pixel *src = (pixel*)_src;
885 34678 int stride = _stride>>(sizeof(pixel)-1);
886
887
4/4
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 17323 times.
✓ Branch 2 taken 17323 times.
✓ Branch 3 taken 16 times.
34678 PREDICT_8x8_LOAD_TOP;
888 34678 const pixel4 dc = PIXEL_SPLAT_X4((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3);
889
2/2
✓ Branch 0 taken 138712 times.
✓ Branch 1 taken 17339 times.
312102 PREDICT_8x8_DC(dc);
890 34678 }
891 3707728 static void FUNCC(pred8x8l_dc)(uint8_t *_src, int has_topleft,
892 int has_topright, ptrdiff_t _stride)
893 {
894 3707728 pixel *src = (pixel*)_src;
895 3707728 int stride = _stride>>(sizeof(pixel)-1);
896
897
2/2
✓ Branch 0 taken 1853763 times.
✓ Branch 1 taken 101 times.
3707728 PREDICT_8x8_LOAD_LEFT;
898
4/4
✓ Branch 0 taken 1853763 times.
✓ Branch 1 taken 101 times.
✓ Branch 2 taken 1291945 times.
✓ Branch 3 taken 561919 times.
3707728 PREDICT_8x8_LOAD_TOP;
899 3707728 const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7
900 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4);
901
2/2
✓ Branch 0 taken 14830912 times.
✓ Branch 1 taken 1853864 times.
33369552 PREDICT_8x8_DC(dc);
902 3707728 }
903 3322906 static void FUNCC(pred8x8l_horizontal)(uint8_t *_src, int has_topleft,
904 int has_topright, ptrdiff_t _stride)
905 {
906 3322906 pixel *src = (pixel*)_src;
907 3322906 int stride = _stride>>(sizeof(pixel)-1);
908 pixel4 a;
909
910
2/2
✓ Branch 0 taken 1619629 times.
✓ Branch 1 taken 41824 times.
3322906 PREDICT_8x8_LOAD_LEFT;
911 #define ROW(y) a = PIXEL_SPLAT_X4(l##y); \
912 AV_WN4PA(src+y*stride, a); \
913 AV_WN4PA(src+y*stride+4, a);
914 3322906 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
915 #undef ROW
916 3322906 }
917 1049920 static void FUNCC(pred8x8l_vertical)(uint8_t *_src, int has_topleft,
918 int has_topright, ptrdiff_t _stride)
919 {
920 int y;
921 1049920 pixel *src = (pixel*)_src;
922 1049920 int stride = _stride>>(sizeof(pixel)-1);
923 pixel4 a, b;
924
925
4/4
✓ Branch 0 taken 508757 times.
✓ Branch 1 taken 16203 times.
✓ Branch 2 taken 393400 times.
✓ Branch 3 taken 131560 times.
1049920 PREDICT_8x8_LOAD_TOP;
926 1049920 src[0] = t0;
927 1049920 src[1] = t1;
928 1049920 src[2] = t2;
929 1049920 src[3] = t3;
930 1049920 src[4] = t4;
931 1049920 src[5] = t5;
932 1049920 src[6] = t6;
933 1049920 src[7] = t7;
934 1049920 a = AV_RN4PA(((pixel4*)src)+0);
935 1049920 b = AV_RN4PA(((pixel4*)src)+1);
936
2/2
✓ Branch 0 taken 3674720 times.
✓ Branch 1 taken 524960 times.
8399360 for( y = 1; y < 8; y++ ) {
937 7349440 AV_WN4PA(((pixel4*)(src+y*stride))+0, a);
938 7349440 AV_WN4PA(((pixel4*)(src+y*stride))+1, b);
939 }
940 1049920 }
941 392168 static void FUNCC(pred8x8l_down_left)(uint8_t *_src, int has_topleft,
942 int has_topright, ptrdiff_t _stride)
943 {
944 392168 pixel *src = (pixel*)_src;
945 392168 int stride = _stride>>(sizeof(pixel)-1);
946
4/4
✓ Branch 0 taken 193990 times.
✓ Branch 1 taken 2094 times.
✓ Branch 2 taken 149034 times.
✓ Branch 3 taken 47050 times.
392168 PREDICT_8x8_LOAD_TOP;
947
2/2
✓ Branch 0 taken 149034 times.
✓ Branch 1 taken 47050 times.
392168 PREDICT_8x8_LOAD_TOPRIGHT;
948 392168 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
949 392168 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
950 392168 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
951 392168 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
952 392168 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
953 392168 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
954 392168 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
955 392168 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
956 392168 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
957 392168 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
958 392168 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
959 392168 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
960 392168 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
961 392168 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
962 392168 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
963 392168 }
964 591594 static void FUNCC(pred8x8l_down_right)(uint8_t *_src, int has_topleft,
965 int has_topright, ptrdiff_t _stride)
966 {
967 591594 pixel *src = (pixel*)_src;
968 591594 int stride = _stride>>(sizeof(pixel)-1);
969
3/4
✓ Branch 0 taken 295797 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 228475 times.
✓ Branch 3 taken 67322 times.
591594 PREDICT_8x8_LOAD_TOP;
970
1/2
✓ Branch 0 taken 295797 times.
✗ Branch 1 not taken.
591594 PREDICT_8x8_LOAD_LEFT;
971 591594 PREDICT_8x8_LOAD_TOPLEFT;
972 591594 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
973 591594 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
974 591594 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
975 591594 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
976 591594 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
977 591594 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
978 591594 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
979 591594 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
980 591594 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
981 591594 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
982 591594 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
983 591594 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
984 591594 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
985 591594 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
986 591594 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
987 591594 }
988 400722 static void FUNCC(pred8x8l_vertical_right)(uint8_t *_src, int has_topleft,
989 int has_topright, ptrdiff_t _stride)
990 {
991 400722 pixel *src = (pixel*)_src;
992 400722 int stride = _stride>>(sizeof(pixel)-1);
993
3/4
✓ Branch 0 taken 200361 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 153457 times.
✓ Branch 3 taken 46904 times.
400722 PREDICT_8x8_LOAD_TOP;
994
1/2
✓ Branch 0 taken 200361 times.
✗ Branch 1 not taken.
400722 PREDICT_8x8_LOAD_LEFT;
995 400722 PREDICT_8x8_LOAD_TOPLEFT;
996 400722 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
997 400722 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
998 400722 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
999 400722 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
1000 400722 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
1001 400722 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
1002 400722 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
1003 400722 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
1004 400722 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
1005 400722 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
1006 400722 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
1007 400722 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
1008 400722 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
1009 400722 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
1010 400722 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
1011 400722 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
1012 400722 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
1013 400722 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
1014 400722 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
1015 400722 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
1016 400722 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
1017 400722 SRC(7,0)= (t6 + t7 + 1) >> 1;
1018 400722 }
1019 904026 static void FUNCC(pred8x8l_horizontal_down)(uint8_t *_src, int has_topleft,
1020 int has_topright, ptrdiff_t _stride)
1021 {
1022 904026 pixel *src = (pixel*)_src;
1023 904026 int stride = _stride>>(sizeof(pixel)-1);
1024
4/4
✓ Branch 0 taken 451999 times.
✓ Branch 1 taken 14 times.
✓ Branch 2 taken 346960 times.
✓ Branch 3 taken 105053 times.
904026 PREDICT_8x8_LOAD_TOP;
1025
2/2
✓ Branch 0 taken 451999 times.
✓ Branch 1 taken 14 times.
904026 PREDICT_8x8_LOAD_LEFT;
1026 904026 PREDICT_8x8_LOAD_TOPLEFT;
1027 904026 SRC(0,7)= (l6 + l7 + 1) >> 1;
1028 904026 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
1029 904026 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
1030 904026 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
1031 904026 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
1032 904026 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
1033 904026 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
1034 904026 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
1035 904026 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
1036 904026 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
1037 904026 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
1038 904026 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
1039 904026 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
1040 904026 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
1041 904026 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
1042 904026 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
1043 904026 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
1044 904026 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
1045 904026 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
1046 904026 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
1047 904026 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
1048 904026 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
1049 904026 }
1050 402428 static void FUNCC(pred8x8l_vertical_left)(uint8_t *_src, int has_topleft,
1051 int has_topright, ptrdiff_t _stride)
1052 {
1053 402428 pixel *src = (pixel*)_src;
1054 402428 int stride = _stride>>(sizeof(pixel)-1);
1055
4/4
✓ Branch 0 taken 199327 times.
✓ Branch 1 taken 1887 times.
✓ Branch 2 taken 156787 times.
✓ Branch 3 taken 44427 times.
402428 PREDICT_8x8_LOAD_TOP;
1056
2/2
✓ Branch 0 taken 156787 times.
✓ Branch 1 taken 44427 times.
402428 PREDICT_8x8_LOAD_TOPRIGHT;
1057 402428 SRC(0,0)= (t0 + t1 + 1) >> 1;
1058 402428 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
1059 402428 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
1060 402428 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
1061 402428 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
1062 402428 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
1063 402428 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
1064 402428 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
1065 402428 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
1066 402428 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
1067 402428 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
1068 402428 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
1069 402428 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
1070 402428 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
1071 402428 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
1072 402428 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
1073 402428 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
1074 402428 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
1075 402428 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
1076 402428 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
1077 402428 SRC(7,6)= (t10 + t11 + 1) >> 1;
1078 402428 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
1079 402428 }
1080 1106882 static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, int has_topleft,
1081 int has_topright, ptrdiff_t _stride)
1082 {
1083 1106882 pixel *src = (pixel*)_src;
1084 1106882 int stride = _stride>>(sizeof(pixel)-1);
1085
2/2
✓ Branch 0 taken 529657 times.
✓ Branch 1 taken 23784 times.
1106882 PREDICT_8x8_LOAD_LEFT;
1086 1106882 SRC(0,0)= (l0 + l1 + 1) >> 1;
1087 1106882 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
1088 1106882 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
1089 1106882 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
1090 1106882 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
1091 1106882 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
1092 1106882 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
1093 1106882 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
1094 1106882 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
1095 1106882 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
1096 1106882 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
1097 1106882 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
1098 1106882 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
1099 1106882 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
1100 1106882 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
1101 1106882 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
1102 1106882 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
1103 1106882 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
1104 1106882 }
1105
1106 static void FUNCC(pred8x8l_vertical_filter_add)(uint8_t *_src, int16_t *_block, int has_topleft,
1107 int has_topright, ptrdiff_t _stride)
1108 {
1109 int i;
1110 pixel *src = (pixel*)_src;
1111 const dctcoef *block = (const dctcoef*)_block;
1112 pixel pix[8];
1113 int stride = _stride>>(sizeof(pixel)-1);
1114 PREDICT_8x8_LOAD_TOP;
1115
1116 pix[0] = t0;
1117 pix[1] = t1;
1118 pix[2] = t2;
1119 pix[3] = t3;
1120 pix[4] = t4;
1121 pix[5] = t5;
1122 pix[6] = t6;
1123 pix[7] = t7;
1124
1125 for(i=0; i<8; i++){
1126 pixel v = pix[i];
1127 src[0*stride]= v += block[0];
1128 src[1*stride]= v += block[8];
1129 src[2*stride]= v += block[16];
1130 src[3*stride]= v += block[24];
1131 src[4*stride]= v += block[32];
1132 src[5*stride]= v += block[40];
1133 src[6*stride]= v += block[48];
1134 src[7*stride]= v + block[56];
1135 src++;
1136 block++;
1137 }
1138
1139 memset(_block, 0, sizeof(dctcoef) * 64);
1140 }
1141
1142 static void FUNCC(pred8x8l_horizontal_filter_add)(uint8_t *_src, int16_t *_block, int has_topleft,
1143 int has_topright, ptrdiff_t _stride)
1144 {
1145 int i;
1146 pixel *src = (pixel*)_src;
1147 const dctcoef *block = (const dctcoef*)_block;
1148 pixel pix[8];
1149 int stride = _stride>>(sizeof(pixel)-1);
1150 PREDICT_8x8_LOAD_LEFT;
1151
1152 pix[0] = l0;
1153 pix[1] = l1;
1154 pix[2] = l2;
1155 pix[3] = l3;
1156 pix[4] = l4;
1157 pix[5] = l5;
1158 pix[6] = l6;
1159 pix[7] = l7;
1160
1161 for(i=0; i<8; i++){
1162 pixel v = pix[i];
1163 src[0]= v += block[0];
1164 src[1]= v += block[1];
1165 src[2]= v += block[2];
1166 src[3]= v += block[3];
1167 src[4]= v += block[4];
1168 src[5]= v += block[5];
1169 src[6]= v += block[6];
1170 src[7]= v + block[7];
1171 src+= stride;
1172 block+= 8;
1173 }
1174
1175 memset(_block, 0, sizeof(dctcoef) * 64);
1176 }
1177
1178 #undef PREDICT_8x8_LOAD_LEFT
1179 #undef PREDICT_8x8_LOAD_TOP
1180 #undef PREDICT_8x8_LOAD_TOPLEFT
1181 #undef PREDICT_8x8_LOAD_TOPRIGHT
1182 #undef PREDICT_8x8_DC
1183 #undef PTR
1184 #undef PT
1185 #undef PL
1186 #undef SRC
1187
1188 182984 static void FUNCC(pred4x4_vertical_add)(uint8_t *_pix, int16_t *_block,
1189 ptrdiff_t stride)
1190 {
1191 int i;
1192 182984 pixel *pix = (pixel*)_pix;
1193 182984 const dctcoef *block = (const dctcoef*)_block;
1194 182984 stride >>= sizeof(pixel)-1;
1195 182984 pix -= stride;
1196
2/2
✓ Branch 0 taken 365968 times.
✓ Branch 1 taken 91492 times.
914920 for(i=0; i<4; i++){
1197 731936 pixel v = pix[0];
1198 731936 pix[1*stride]= v += block[0];
1199 731936 pix[2*stride]= v += block[4];
1200 731936 pix[3*stride]= v += block[8];
1201 731936 pix[4*stride]= v + block[12];
1202 731936 pix++;
1203 731936 block++;
1204 }
1205
1206 182984 memset(_block, 0, sizeof(dctcoef) * 16);
1207 182984 }
1208
1209 239402 static void FUNCC(pred4x4_horizontal_add)(uint8_t *_pix, int16_t *_block,
1210 ptrdiff_t stride)
1211 {
1212 int i;
1213 239402 pixel *pix = (pixel*)_pix;
1214 239402 const dctcoef *block = (const dctcoef*)_block;
1215 239402 stride >>= sizeof(pixel)-1;
1216
2/2
✓ Branch 0 taken 478804 times.
✓ Branch 1 taken 119701 times.
1197010 for(i=0; i<4; i++){
1217 957608 pixel v = pix[-1];
1218 957608 pix[0]= v += block[0];
1219 957608 pix[1]= v += block[1];
1220 957608 pix[2]= v += block[2];
1221 957608 pix[3]= v + block[3];
1222 957608 pix+= stride;
1223 957608 block+= 4;
1224 }
1225
1226 239402 memset(_block, 0, sizeof(dctcoef) * 16);
1227 239402 }
1228
1229 2148 static void FUNCC(pred8x8l_vertical_add)(uint8_t *_pix, int16_t *_block,
1230 ptrdiff_t stride)
1231 {
1232 int i;
1233 2148 pixel *pix = (pixel*)_pix;
1234 2148 const dctcoef *block = (const dctcoef*)_block;
1235 2148 stride >>= sizeof(pixel)-1;
1236 2148 pix -= stride;
1237
2/2
✓ Branch 0 taken 8592 times.
✓ Branch 1 taken 1074 times.
19332 for(i=0; i<8; i++){
1238 17184 pixel v = pix[0];
1239 17184 pix[1*stride]= v += block[0];
1240 17184 pix[2*stride]= v += block[8];
1241 17184 pix[3*stride]= v += block[16];
1242 17184 pix[4*stride]= v += block[24];
1243 17184 pix[5*stride]= v += block[32];
1244 17184 pix[6*stride]= v += block[40];
1245 17184 pix[7*stride]= v += block[48];
1246 17184 pix[8*stride]= v + block[56];
1247 17184 pix++;
1248 17184 block++;
1249 }
1250
1251 2148 memset(_block, 0, sizeof(dctcoef) * 64);
1252 2148 }
1253
1254 2828 static void FUNCC(pred8x8l_horizontal_add)(uint8_t *_pix, int16_t *_block,
1255 ptrdiff_t stride)
1256 {
1257 int i;
1258 2828 pixel *pix = (pixel*)_pix;
1259 2828 const dctcoef *block = (const dctcoef*)_block;
1260 2828 stride >>= sizeof(pixel)-1;
1261
2/2
✓ Branch 0 taken 11312 times.
✓ Branch 1 taken 1414 times.
25452 for(i=0; i<8; i++){
1262 22624 pixel v = pix[-1];
1263 22624 pix[0]= v += block[0];
1264 22624 pix[1]= v += block[1];
1265 22624 pix[2]= v += block[2];
1266 22624 pix[3]= v += block[3];
1267 22624 pix[4]= v += block[4];
1268 22624 pix[5]= v += block[5];
1269 22624 pix[6]= v += block[6];
1270 22624 pix[7]= v + block[7];
1271 22624 pix+= stride;
1272 22624 block+= 8;
1273 }
1274
1275 2828 memset(_block, 0, sizeof(dctcoef) * 64);
1276 2828 }
1277
1278 724 static void FUNCC(pred16x16_vertical_add)(uint8_t *pix, const int *block_offset,
1279 int16_t *block,
1280 ptrdiff_t stride)
1281 {
1282 int i;
1283
2/2
✓ Branch 0 taken 5792 times.
✓ Branch 1 taken 362 times.
12308 for(i=0; i<16; i++)
1284 11584 FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1285 724 }
1286
1287 556 static void FUNCC(pred16x16_horizontal_add)(uint8_t *pix,
1288 const int *block_offset,
1289 int16_t *block,
1290 ptrdiff_t stride)
1291 {
1292 int i;
1293
2/2
✓ Branch 0 taken 4448 times.
✓ Branch 1 taken 278 times.
9452 for(i=0; i<16; i++)
1294 8896 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1295 556 }
1296
1297 4516 static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset,
1298 int16_t *block, ptrdiff_t stride)
1299 {
1300 int i;
1301
2/2
✓ Branch 0 taken 9032 times.
✓ Branch 1 taken 2258 times.
22580 for(i=0; i<4; i++)
1302 18064 FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1303 4516 }
1304
1305 static void FUNCC(pred8x16_vertical_add)(uint8_t *pix, const int *block_offset,
1306 int16_t *block, ptrdiff_t stride)
1307 {
1308 int i;
1309 for(i=0; i<4; i++)
1310 FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1311 for(i=4; i<8; i++)
1312 FUNCC(pred4x4_vertical_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);
1313 }
1314
1315 5776 static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset,
1316 int16_t *block,
1317 ptrdiff_t stride)
1318 {
1319 int i;
1320
2/2
✓ Branch 0 taken 11552 times.
✓ Branch 1 taken 2888 times.
28880 for(i=0; i<4; i++)
1321 23104 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1322 5776 }
1323
1324 static void FUNCC(pred8x16_horizontal_add)(uint8_t *pix,
1325 const int *block_offset,
1326 int16_t *block, ptrdiff_t stride)
1327 {
1328 int i;
1329 for(i=0; i<4; i++)
1330 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1331 for(i=4; i<8; i++)
1332 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);
1333 }
1334