FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/h264pred_template.c
Date: 2022-01-28 07:56:06
Exec Total Coverage
Lines: 744 806 92.3%
Branches: 162 184 88.0%

Line Branch Exec Source
1 /*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003-2011 Michael Niedermayer <michaelni@gmx.at>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 /**
23 * @file
24 * H.264 / AVC / MPEG-4 part10 prediction functions.
25 * @author Michael Niedermayer <michaelni@gmx.at>
26 */
27
28 #include "libavutil/intreadwrite.h"
29
30 #include "mathops.h"
31
32 #include "bit_depth_template.c"
33
34 8610298 static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright,
35 ptrdiff_t _stride)
36 {
37 8610298 pixel *src = (pixel*)_src;
38 8610298 int stride = _stride>>(sizeof(pixel)-1);
39 8610298 const pixel4 a= AV_RN4PA(src-stride);
40
41 8610298 AV_WN4PA(src+0*stride, a);
42 8610298 AV_WN4PA(src+1*stride, a);
43 8610298 AV_WN4PA(src+2*stride, a);
44 8610298 AV_WN4PA(src+3*stride, a);
45 }
46
47 13770430 static void FUNCC(pred4x4_horizontal)(uint8_t *_src, const uint8_t *topright,
48 ptrdiff_t _stride)
49 {
50 13770430 pixel *src = (pixel*)_src;
51 13770430 int stride = _stride>>(sizeof(pixel)-1);
52 13770430 AV_WN4PA(src+0*stride, PIXEL_SPLAT_X4(src[-1+0*stride]));
53 13770430 AV_WN4PA(src+1*stride, PIXEL_SPLAT_X4(src[-1+1*stride]));
54 13770430 AV_WN4PA(src+2*stride, PIXEL_SPLAT_X4(src[-1+2*stride]));
55 13770430 AV_WN4PA(src+3*stride, PIXEL_SPLAT_X4(src[-1+3*stride]));
56 }
57
58 6001802 static void FUNCC(pred4x4_dc)(uint8_t *_src, const uint8_t *topright,
59 ptrdiff_t _stride)
60 {
61 6001802 pixel *src = (pixel*)_src;
62 6001802 int stride = _stride>>(sizeof(pixel)-1);
63 6001802 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
64 6001802 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
65 6001802 const pixel4 a = PIXEL_SPLAT_X4(dc);
66
67 6001802 AV_WN4PA(src+0*stride, a);
68 6001802 AV_WN4PA(src+1*stride, a);
69 6001802 AV_WN4PA(src+2*stride, a);
70 6001802 AV_WN4PA(src+3*stride, a);
71 }
72
73 584956 static void FUNCC(pred4x4_left_dc)(uint8_t *_src, const uint8_t *topright,
74 ptrdiff_t _stride)
75 {
76 584956 pixel *src = (pixel*)_src;
77 584956 int stride = _stride>>(sizeof(pixel)-1);
78 584956 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
79 584956 const pixel4 a = PIXEL_SPLAT_X4(dc);
80
81 584956 AV_WN4PA(src+0*stride, a);
82 584956 AV_WN4PA(src+1*stride, a);
83 584956 AV_WN4PA(src+2*stride, a);
84 584956 AV_WN4PA(src+3*stride, a);
85 }
86
87 116944 static void FUNCC(pred4x4_top_dc)(uint8_t *_src, const uint8_t *topright,
88 ptrdiff_t _stride)
89 {
90 116944 pixel *src = (pixel*)_src;
91 116944 int stride = _stride>>(sizeof(pixel)-1);
92 116944 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
93 116944 const pixel4 a = PIXEL_SPLAT_X4(dc);
94
95 116944 AV_WN4PA(src+0*stride, a);
96 116944 AV_WN4PA(src+1*stride, a);
97 116944 AV_WN4PA(src+2*stride, a);
98 116944 AV_WN4PA(src+3*stride, a);
99 }
100
101 14738 static void FUNCC(pred4x4_128_dc)(uint8_t *_src, const uint8_t *topright,
102 ptrdiff_t _stride)
103 {
104 14738 pixel *src = (pixel*)_src;
105 14738 int stride = _stride>>(sizeof(pixel)-1);
106 14738 const pixel4 a = PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1));
107
108 14738 AV_WN4PA(src+0*stride, a);
109 14738 AV_WN4PA(src+1*stride, a);
110 14738 AV_WN4PA(src+2*stride, a);
111 14738 AV_WN4PA(src+3*stride, a);
112 }
113
114
115 #define LOAD_TOP_RIGHT_EDGE\
116 const unsigned av_unused t4 = topright[0];\
117 const unsigned av_unused t5 = topright[1];\
118 const unsigned av_unused t6 = topright[2];\
119 const unsigned av_unused t7 = topright[3];\
120
121 #define LOAD_DOWN_LEFT_EDGE\
122 const unsigned av_unused l4 = src[-1+4*stride];\
123 const unsigned av_unused l5 = src[-1+5*stride];\
124 const unsigned av_unused l6 = src[-1+6*stride];\
125 const unsigned av_unused l7 = src[-1+7*stride];\
126
127 #define LOAD_LEFT_EDGE\
128 const unsigned av_unused l0 = src[-1+0*stride];\
129 const unsigned av_unused l1 = src[-1+1*stride];\
130 const unsigned av_unused l2 = src[-1+2*stride];\
131 const unsigned av_unused l3 = src[-1+3*stride];\
132
133 #define LOAD_TOP_EDGE\
134 const unsigned av_unused t0 = src[ 0-1*stride];\
135 const unsigned av_unused t1 = src[ 1-1*stride];\
136 const unsigned av_unused t2 = src[ 2-1*stride];\
137 const unsigned av_unused t3 = src[ 3-1*stride];\
138
139 3782726 static void FUNCC(pred4x4_down_right)(uint8_t *_src, const uint8_t *topright,
140 ptrdiff_t _stride)
141 {
142 3782726 pixel *src = (pixel*)_src;
143 3782726 int stride = _stride>>(sizeof(pixel)-1);
144 3782726 const int lt= src[-1-1*stride];
145 3782726 LOAD_TOP_EDGE
146 3782726 LOAD_LEFT_EDGE
147
148 3782726 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
149 3782726 src[0+2*stride]=
150 3782726 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
151 3782726 src[0+1*stride]=
152 3782726 src[1+2*stride]=
153 3782726 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
154 3782726 src[0+0*stride]=
155 3782726 src[1+1*stride]=
156 3782726 src[2+2*stride]=
157 3782726 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
158 3782726 src[1+0*stride]=
159 3782726 src[2+1*stride]=
160 3782726 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
161 3782726 src[2+0*stride]=
162 3782726 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
163 3782726 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
164 }
165
166 2261034 static void FUNCC(pred4x4_down_left)(uint8_t *_src, const uint8_t *_topright,
167 ptrdiff_t _stride)
168 {
169 2261034 pixel *src = (pixel*)_src;
170 2261034 const pixel *topright = (const pixel*)_topright;
171 2261034 int stride = _stride>>(sizeof(pixel)-1);
172 2261034 LOAD_TOP_EDGE
173 2261034 LOAD_TOP_RIGHT_EDGE
174 // LOAD_LEFT_EDGE
175
176 2261034 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
177 2261034 src[1+0*stride]=
178 2261034 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
179 2261034 src[2+0*stride]=
180 2261034 src[1+1*stride]=
181 2261034 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
182 2261034 src[3+0*stride]=
183 2261034 src[2+1*stride]=
184 2261034 src[1+2*stride]=
185 2261034 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
186 2261034 src[3+1*stride]=
187 2261034 src[2+2*stride]=
188 2261034 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
189 2261034 src[3+2*stride]=
190 2261034 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
191 2261034 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
192 }
193
194 2614948 static void FUNCC(pred4x4_vertical_right)(uint8_t *_src,
195 const uint8_t *topright,
196 ptrdiff_t _stride)
197 {
198 2614948 pixel *src = (pixel*)_src;
199 2614948 int stride = _stride>>(sizeof(pixel)-1);
200 2614948 const int lt= src[-1-1*stride];
201 2614948 LOAD_TOP_EDGE
202 2614948 LOAD_LEFT_EDGE
203
204 2614948 src[0+0*stride]=
205 2614948 src[1+2*stride]=(lt + t0 + 1)>>1;
206 2614948 src[1+0*stride]=
207 2614948 src[2+2*stride]=(t0 + t1 + 1)>>1;
208 2614948 src[2+0*stride]=
209 2614948 src[3+2*stride]=(t1 + t2 + 1)>>1;
210 2614948 src[3+0*stride]=(t2 + t3 + 1)>>1;
211 2614948 src[0+1*stride]=
212 2614948 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
213 2614948 src[1+1*stride]=
214 2614948 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
215 2614948 src[2+1*stride]=
216 2614948 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
217 2614948 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
218 2614948 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
219 2614948 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
220 }
221
222 2143710 static void FUNCC(pred4x4_vertical_left)(uint8_t *_src,
223 const uint8_t *_topright,
224 ptrdiff_t _stride)
225 {
226 2143710 pixel *src = (pixel*)_src;
227 2143710 const pixel *topright = (const pixel*)_topright;
228 2143710 int stride = _stride>>(sizeof(pixel)-1);
229 2143710 LOAD_TOP_EDGE
230 2143710 LOAD_TOP_RIGHT_EDGE
231
232 2143710 src[0+0*stride]=(t0 + t1 + 1)>>1;
233 2143710 src[1+0*stride]=
234 2143710 src[0+2*stride]=(t1 + t2 + 1)>>1;
235 2143710 src[2+0*stride]=
236 2143710 src[1+2*stride]=(t2 + t3 + 1)>>1;
237 2143710 src[3+0*stride]=
238 2143710 src[2+2*stride]=(t3 + t4+ 1)>>1;
239 2143710 src[3+2*stride]=(t4 + t5+ 1)>>1;
240 2143710 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
241 2143710 src[1+1*stride]=
242 2143710 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
243 2143710 src[2+1*stride]=
244 2143710 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
245 2143710 src[3+1*stride]=
246 2143710 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
247 2143710 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
248 }
249
250 3994260 static void FUNCC(pred4x4_horizontal_up)(uint8_t *_src, const uint8_t *topright,
251 ptrdiff_t _stride)
252 {
253 3994260 pixel *src = (pixel*)_src;
254 3994260 int stride = _stride>>(sizeof(pixel)-1);
255 3994260 LOAD_LEFT_EDGE
256
257 3994260 src[0+0*stride]=(l0 + l1 + 1)>>1;
258 3994260 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
259 3994260 src[2+0*stride]=
260 3994260 src[0+1*stride]=(l1 + l2 + 1)>>1;
261 3994260 src[3+0*stride]=
262 3994260 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
263 3994260 src[2+1*stride]=
264 3994260 src[0+2*stride]=(l2 + l3 + 1)>>1;
265 3994260 src[3+1*stride]=
266 3994260 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
267 3994260 src[3+2*stride]=
268 3994260 src[1+3*stride]=
269 3994260 src[0+3*stride]=
270 3994260 src[2+2*stride]=
271 3994260 src[2+3*stride]=
272 3994260 src[3+3*stride]=l3;
273 }
274
275 4298476 static void FUNCC(pred4x4_horizontal_down)(uint8_t *_src,
276 const uint8_t *topright,
277 ptrdiff_t _stride)
278 {
279 4298476 pixel *src = (pixel*)_src;
280 4298476 int stride = _stride>>(sizeof(pixel)-1);
281 4298476 const int lt= src[-1-1*stride];
282 4298476 LOAD_TOP_EDGE
283 4298476 LOAD_LEFT_EDGE
284
285 4298476 src[0+0*stride]=
286 4298476 src[2+1*stride]=(lt + l0 + 1)>>1;
287 4298476 src[1+0*stride]=
288 4298476 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
289 4298476 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
290 4298476 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
291 4298476 src[0+1*stride]=
292 4298476 src[2+2*stride]=(l0 + l1 + 1)>>1;
293 4298476 src[1+1*stride]=
294 4298476 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
295 4298476 src[0+2*stride]=
296 4298476 src[2+3*stride]=(l1 + l2+ 1)>>1;
297 4298476 src[1+2*stride]=
298 4298476 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
299 4298476 src[0+3*stride]=(l2 + l3 + 1)>>1;
300 4298476 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
301 }
302
303 440262 static void FUNCC(pred16x16_vertical)(uint8_t *_src, ptrdiff_t _stride)
304 {
305 int i;
306 440262 pixel *src = (pixel*)_src;
307 440262 int stride = _stride>>(sizeof(pixel)-1);
308 440262 const pixel4 a = AV_RN4PA(((pixel4*)(src-stride))+0);
309 440262 const pixel4 b = AV_RN4PA(((pixel4*)(src-stride))+1);
310 440262 const pixel4 c = AV_RN4PA(((pixel4*)(src-stride))+2);
311 440262 const pixel4 d = AV_RN4PA(((pixel4*)(src-stride))+3);
312
313
2/2
✓ Branch 0 taken 3522096 times.
✓ Branch 1 taken 220131 times.
7484454 for(i=0; i<16; i++){
314 7044192 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
315 7044192 AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
316 7044192 AV_WN4PA(((pixel4*)(src+i*stride))+2, c);
317 7044192 AV_WN4PA(((pixel4*)(src+i*stride))+3, d);
318 }
319 }
320
321 487150 static void FUNCC(pred16x16_horizontal)(uint8_t *_src, ptrdiff_t stride)
322 {
323 int i;
324 487150 pixel *src = (pixel*)_src;
325 487150 stride >>= sizeof(pixel)-1;
326
327
2/2
✓ Branch 0 taken 3897200 times.
✓ Branch 1 taken 243575 times.
8281550 for(i=0; i<16; i++){
328 7794400 const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
329
330 7794400 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
331 7794400 AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
332 7794400 AV_WN4PA(((pixel4*)(src+i*stride))+2, a);
333 7794400 AV_WN4PA(((pixel4*)(src+i*stride))+3, a);
334 }
335 }
336
337 #define PREDICT_16x16_DC(v)\
338 for(i=0; i<16; i++){\
339 AV_WN4PA(src+ 0, v);\
340 AV_WN4PA(src+ 4, v);\
341 AV_WN4PA(src+ 8, v);\
342 AV_WN4PA(src+12, v);\
343 src += stride;\
344 }
345
346 497262 static void FUNCC(pred16x16_dc)(uint8_t *_src, ptrdiff_t stride)
347 {
348 497262 int i, dc=0;
349 497262 pixel *src = (pixel*)_src;
350 pixel4 dcsplat;
351 497262 stride >>= sizeof(pixel)-1;
352
353
2/2
✓ Branch 0 taken 3978096 times.
✓ Branch 1 taken 248631 times.
8453454 for(i=0;i<16; i++){
354 7956192 dc+= src[-1+i*stride];
355 }
356
357
2/2
✓ Branch 0 taken 3978096 times.
✓ Branch 1 taken 248631 times.
8453454 for(i=0;i<16; i++){
358 7956192 dc+= src[i-stride];
359 }
360
361 497262 dcsplat = PIXEL_SPLAT_X4((dc+16)>>5);
362
2/2
✓ Branch 0 taken 3978096 times.
✓ Branch 1 taken 248631 times.
8453454 PREDICT_16x16_DC(dcsplat);
363 }
364
365 67194 static void FUNCC(pred16x16_left_dc)(uint8_t *_src, ptrdiff_t stride)
366 {
367 67194 int i, dc=0;
368 67194 pixel *src = (pixel*)_src;
369 pixel4 dcsplat;
370 67194 stride >>= sizeof(pixel)-1;
371
372
2/2
✓ Branch 0 taken 537552 times.
✓ Branch 1 taken 33597 times.
1142298 for(i=0;i<16; i++){
373 1075104 dc+= src[-1+i*stride];
374 }
375
376 67194 dcsplat = PIXEL_SPLAT_X4((dc+8)>>4);
377
2/2
✓ Branch 0 taken 537552 times.
✓ Branch 1 taken 33597 times.
1142298 PREDICT_16x16_DC(dcsplat);
378 }
379
380 11448 static void FUNCC(pred16x16_top_dc)(uint8_t *_src, ptrdiff_t stride)
381 {
382 11448 int i, dc=0;
383 11448 pixel *src = (pixel*)_src;
384 pixel4 dcsplat;
385 11448 stride >>= sizeof(pixel)-1;
386
387
2/2
✓ Branch 0 taken 91584 times.
✓ Branch 1 taken 5724 times.
194616 for(i=0;i<16; i++){
388 183168 dc+= src[i-stride];
389 }
390
391 11448 dcsplat = PIXEL_SPLAT_X4((dc+8)>>4);
392
2/2
✓ Branch 0 taken 91584 times.
✓ Branch 1 taken 5724 times.
194616 PREDICT_16x16_DC(dcsplat);
393 }
394
395 #define PRED16x16_X(n, v) \
396 static void FUNCC(pred16x16_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\
397 {\
398 int i;\
399 pixel *src = (pixel*)_src;\
400 stride >>= sizeof(pixel)-1;\
401 PREDICT_16x16_DC(PIXEL_SPLAT_X4(v));\
402 }
403
404
2/2
✓ Branch 0 taken 32640 times.
✓ Branch 1 taken 2040 times.
69360 PRED16x16_X(128, (1<<(BIT_DEPTH-1))+0)
405 #if BIT_DEPTH == 8
406
2/2
✓ Branch 0 taken 192 times.
✓ Branch 1 taken 12 times.
204 PRED16x16_X(127, (1<<(BIT_DEPTH-1))-1)
407
2/2
✓ Branch 0 taken 160 times.
✓ Branch 1 taken 10 times.
170 PRED16x16_X(129, (1<<(BIT_DEPTH-1))+1)
408 #endif
409
410 303094 static inline void FUNCC(pred16x16_plane_compat)(uint8_t *_src,
411 ptrdiff_t _stride,
412 const int svq3,
413 const int rv40)
414 {
415 int i, j, k;
416 int a;
417 INIT_CLIP
418 303094 pixel *src = (pixel*)_src;
419 303094 int stride = _stride>>(sizeof(pixel)-1);
420 303094 const pixel * const src0 = src +7-stride;
421 303094 const pixel * src1 = src +8*stride-1;
422 303094 const pixel * src2 = src1-2*stride; // == src+6*stride-1;
423 303094 int H = src0[1] - src0[-1];
424 303094 int V = src1[0] - src2[ 0];
425
2/2
✓ Branch 0 taken 1060829 times.
✓ Branch 1 taken 151547 times.
2424752 for(k=2; k<=8; ++k) {
426 2121658 src1 += stride; src2 -= stride;
427 2121658 H += k*(src0[k] - src0[-k]);
428 2121658 V += k*(src1[0] - src2[ 0]);
429 }
430
2/2
✓ Branch 0 taken 56 times.
✓ Branch 1 taken 151491 times.
303094 if(svq3){
431 112 H = ( 5*(H/4) ) / 16;
432 112 V = ( 5*(V/4) ) / 16;
433
434 /* required for 100% accuracy */
435 112 i = H; H = V; V = i;
436
2/2
✓ Branch 0 taken 1216 times.
✓ Branch 1 taken 150275 times.
302982 }else if(rv40){
437 2432 H = ( H + (H>>2) ) >> 4;
438 2432 V = ( V + (V>>2) ) >> 4;
439 }else{
440 300550 H = ( 5*H+32 ) >> 6;
441 300550 V = ( 5*V+32 ) >> 6;
442 }
443
444 303094 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
445
2/2
✓ Branch 0 taken 2424752 times.
✓ Branch 1 taken 151547 times.
5152598 for(j=16; j>0; --j) {
446 4849504 int b = a;
447 4849504 a += V;
448
2/2
✓ Branch 0 taken 9699008 times.
✓ Branch 1 taken 2424752 times.
24247520 for(i=-16; i<0; i+=4) {
449 19398016 src[16+i] = CLIP((b ) >> 5);
450 19398016 src[17+i] = CLIP((b+ H) >> 5);
451 19398016 src[18+i] = CLIP((b+2*H) >> 5);
452 19398016 src[19+i] = CLIP((b+3*H) >> 5);
453 19398016 b += 4*H;
454 }
455 4849504 src += stride;
456 }
457 }
458
459 300550 static void FUNCC(pred16x16_plane)(uint8_t *src, ptrdiff_t stride)
460 {
461 300550 FUNCC(pred16x16_plane_compat)(src, stride, 0, 0);
462 }
463
464 1427356 static void FUNCC(pred8x8_vertical)(uint8_t *_src, ptrdiff_t _stride)
465 {
466 int i;
467 1427356 pixel *src = (pixel*)_src;
468 1427356 int stride = _stride>>(sizeof(pixel)-1);
469 1427356 const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0);
470 1427356 const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1);
471
472
2/2
✓ Branch 0 taken 5709424 times.
✓ Branch 1 taken 713678 times.
12846204 for(i=0; i<8; i++){
473 11418848 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
474 11418848 AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
475 }
476 }
477
478 322924 static void FUNCC(pred8x16_vertical)(uint8_t *_src, ptrdiff_t _stride)
479 {
480 int i;
481 322924 pixel *src = (pixel*)_src;
482 322924 int stride = _stride>>(sizeof(pixel)-1);
483 322924 const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0);
484 322924 const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1);
485
486
2/2
✓ Branch 0 taken 2583392 times.
✓ Branch 1 taken 161462 times.
5489708 for(i=0; i<16; i++){
487 5166784 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
488 5166784 AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
489 }
490 }
491
492 2942756 static void FUNCC(pred8x8_horizontal)(uint8_t *_src, ptrdiff_t stride)
493 {
494 int i;
495 2942756 pixel *src = (pixel*)_src;
496 2942756 stride >>= sizeof(pixel)-1;
497
498
2/2
✓ Branch 0 taken 11771024 times.
✓ Branch 1 taken 1471378 times.
26484804 for(i=0; i<8; i++){
499 23542048 const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
500 23542048 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
501 23542048 AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
502 }
503 }
504
505 595976 static void FUNCC(pred8x16_horizontal)(uint8_t *_src, ptrdiff_t stride)
506 {
507 int i;
508 595976 pixel *src = (pixel*)_src;
509 595976 stride >>= sizeof(pixel)-1;
510
2/2
✓ Branch 0 taken 4767808 times.
✓ Branch 1 taken 297988 times.
10131592 for(i=0; i<16; i++){
511 9535616 const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
512 9535616 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
513 9535616 AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
514 }
515 }
516
517 #define PRED8x8_X(n, v)\
518 static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\
519 {\
520 int i;\
521 const pixel4 a = PIXEL_SPLAT_X4(v);\
522 pixel *src = (pixel*)_src;\
523 stride >>= sizeof(pixel)-1;\
524 for(i=0; i<8; i++){\
525 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);\
526 AV_WN4PA(((pixel4*)(src+i*stride))+1, a);\
527 }\
528 }
529
530
2/2
✓ Branch 0 taken 1032112 times.
✓ Branch 1 taken 129014 times.
2322252 PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0)
531 #if BIT_DEPTH == 8
532
2/2
✓ Branch 0 taken 912 times.
✓ Branch 1 taken 114 times.
1026 PRED8x8_X(127, (1<<(BIT_DEPTH-1))-1)
533
2/2
✓ Branch 0 taken 2144 times.
✓ Branch 1 taken 268 times.
2412 PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1)
534 #endif
535
536 3076 static void FUNCC(pred8x16_128_dc)(uint8_t *_src, ptrdiff_t stride)
537 {
538 3076 FUNCC(pred8x8_128_dc)(_src, stride);
539 3076 FUNCC(pred8x8_128_dc)(_src+8*stride, stride);
540 }
541
542 1082788 static void FUNCC(pred8x8_left_dc)(uint8_t *_src, ptrdiff_t stride)
543 {
544 int i;
545 int dc0, dc2;
546 pixel4 dc0splat, dc2splat;
547 1082788 pixel *src = (pixel*)_src;
548 1082788 stride >>= sizeof(pixel)-1;
549
550 1082788 dc0=dc2=0;
551
2/2
✓ Branch 0 taken 2165576 times.
✓ Branch 1 taken 541394 times.
5413940 for(i=0;i<4; i++){
552 4331152 dc0+= src[-1+i*stride];
553 4331152 dc2+= src[-1+(i+4)*stride];
554 }
555 1082788 dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
556 1082788 dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
557
558
2/2
✓ Branch 0 taken 2165576 times.
✓ Branch 1 taken 541394 times.
5413940 for(i=0; i<4; i++){
559 4331152 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
560 4331152 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc0splat);
561 }
562
2/2
✓ Branch 0 taken 2165576 times.
✓ Branch 1 taken 541394 times.
5413940 for(i=4; i<8; i++){
563 4331152 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
564 4331152 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc2splat);
565 }
566 }
567
568 195540 static void FUNCC(pred8x16_left_dc)(uint8_t *_src, ptrdiff_t stride)
569 {
570 195540 FUNCC(pred8x8_left_dc)(_src, stride);
571 195540 FUNCC(pred8x8_left_dc)(_src+8*stride, stride);
572 }
573
574 165896 static void FUNCC(pred8x8_top_dc)(uint8_t *_src, ptrdiff_t stride)
575 {
576 int i;
577 int dc0, dc1;
578 pixel4 dc0splat, dc1splat;
579 165896 pixel *src = (pixel*)_src;
580 165896 stride >>= sizeof(pixel)-1;
581
582 165896 dc0=dc1=0;
583
2/2
✓ Branch 0 taken 331792 times.
✓ Branch 1 taken 82948 times.
829480 for(i=0;i<4; i++){
584 663584 dc0+= src[i-stride];
585 663584 dc1+= src[4+i-stride];
586 }
587 165896 dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
588 165896 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
589
590
2/2
✓ Branch 0 taken 331792 times.
✓ Branch 1 taken 82948 times.
829480 for(i=0; i<4; i++){
591 663584 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
592 663584 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
593 }
594
2/2
✓ Branch 0 taken 331792 times.
✓ Branch 1 taken 82948 times.
829480 for(i=4; i<8; i++){
595 663584 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
596 663584 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
597 }
598 }
599
600 26616 static void FUNCC(pred8x16_top_dc)(uint8_t *_src, ptrdiff_t stride)
601 {
602 int i;
603 int dc0, dc1;
604 pixel4 dc0splat, dc1splat;
605 26616 pixel *src = (pixel*)_src;
606 26616 stride >>= sizeof(pixel)-1;
607
608 26616 dc0=dc1=0;
609
2/2
✓ Branch 0 taken 53232 times.
✓ Branch 1 taken 13308 times.
133080 for(i=0;i<4; i++){
610 106464 dc0+= src[i-stride];
611 106464 dc1+= src[4+i-stride];
612 }
613 26616 dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
614 26616 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
615
616
2/2
✓ Branch 0 taken 212928 times.
✓ Branch 1 taken 13308 times.
452472 for(i=0; i<16; i++){
617 425856 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
618 425856 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
619 }
620 }
621
622 5531148 static void FUNCC(pred8x8_dc)(uint8_t *_src, ptrdiff_t stride)
623 {
624 int i;
625 int dc0, dc1, dc2;
626 pixel4 dc0splat, dc1splat, dc2splat, dc3splat;
627 5531148 pixel *src = (pixel*)_src;
628 5531148 stride >>= sizeof(pixel)-1;
629
630 5531148 dc0=dc1=dc2=0;
631
2/2
✓ Branch 0 taken 11062296 times.
✓ Branch 1 taken 2765574 times.
27655740 for(i=0;i<4; i++){
632 22124592 dc0+= src[-1+i*stride] + src[i-stride];
633 22124592 dc1+= src[4+i-stride];
634 22124592 dc2+= src[-1+(i+4)*stride];
635 }
636 5531148 dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);
637 5531148 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
638 5531148 dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
639 5531148 dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3);
640
641
2/2
✓ Branch 0 taken 11062296 times.
✓ Branch 1 taken 2765574 times.
27655740 for(i=0; i<4; i++){
642 22124592 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
643 22124592 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
644 }
645
2/2
✓ Branch 0 taken 11062296 times.
✓ Branch 1 taken 2765574 times.
27655740 for(i=4; i<8; i++){
646 22124592 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
647 22124592 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat);
648 }
649 }
650
651 1594572 static void FUNCC(pred8x16_dc)(uint8_t *_src, ptrdiff_t stride)
652 {
653 int i;
654 int dc0, dc1, dc2, dc3, dc4;
655 pixel4 dc0splat, dc1splat, dc2splat, dc3splat, dc4splat, dc5splat, dc6splat, dc7splat;
656 1594572 pixel *src = (pixel*)_src;
657 1594572 stride >>= sizeof(pixel)-1;
658
659 1594572 dc0=dc1=dc2=dc3=dc4=0;
660
2/2
✓ Branch 0 taken 3189144 times.
✓ Branch 1 taken 797286 times.
7972860 for(i=0;i<4; i++){
661 6378288 dc0+= src[-1+i*stride] + src[i-stride];
662 6378288 dc1+= src[4+i-stride];
663 6378288 dc2+= src[-1+(i+4)*stride];
664 6378288 dc3+= src[-1+(i+8)*stride];
665 6378288 dc4+= src[-1+(i+12)*stride];
666 }
667 1594572 dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);
668 1594572 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
669 1594572 dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
670 1594572 dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3);
671 1594572 dc4splat = PIXEL_SPLAT_X4((dc3 + 2)>>2);
672 1594572 dc5splat = PIXEL_SPLAT_X4((dc1 + dc3 + 4)>>3);
673 1594572 dc6splat = PIXEL_SPLAT_X4((dc4 + 2)>>2);
674 1594572 dc7splat = PIXEL_SPLAT_X4((dc1 + dc4 + 4)>>3);
675
676
2/2
✓ Branch 0 taken 3189144 times.
✓ Branch 1 taken 797286 times.
7972860 for(i=0; i<4; i++){
677 6378288 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
678 6378288 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
679 }
680
2/2
✓ Branch 0 taken 3189144 times.
✓ Branch 1 taken 797286 times.
7972860 for(i=4; i<8; i++){
681 6378288 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
682 6378288 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat);
683 }
684
2/2
✓ Branch 0 taken 3189144 times.
✓ Branch 1 taken 797286 times.
7972860 for(i=8; i<12; i++){
685 6378288 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc4splat);
686 6378288 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc5splat);
687 }
688
2/2
✓ Branch 0 taken 3189144 times.
✓ Branch 1 taken 797286 times.
7972860 for(i=12; i<16; i++){
689 6378288 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc6splat);
690 6378288 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc7splat);
691 }
692 }
693
694 //the following 4 function should not be optimized!
695 68 static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, ptrdiff_t stride)
696 {
697 68 FUNCC(pred8x8_top_dc)(src, stride);
698 68 FUNCC(pred4x4_dc)(src, NULL, stride);
699 }
700
701 12 static void FUNC(pred8x16_mad_cow_dc_l0t)(uint8_t *src, ptrdiff_t stride)
702 {
703 12 FUNCC(pred8x16_top_dc)(src, stride);
704 12 FUNCC(pred4x4_dc)(src, NULL, stride);
705 }
706
707 36 static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, ptrdiff_t stride)
708 {
709 36 FUNCC(pred8x8_dc)(src, stride);
710 36 FUNCC(pred4x4_top_dc)(src, NULL, stride);
711 }
712
713 12 static void FUNC(pred8x16_mad_cow_dc_0lt)(uint8_t *src, ptrdiff_t stride)
714 {
715 12 FUNCC(pred8x16_dc)(src, stride);
716 12 FUNCC(pred4x4_top_dc)(src, NULL, stride);
717 }
718
719 20 static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, ptrdiff_t stride)
720 {
721 20 FUNCC(pred8x8_left_dc)(src, stride);
722 20 FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride);
723 20 FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
724 }
725
726 12 static void FUNC(pred8x16_mad_cow_dc_l00)(uint8_t *src, ptrdiff_t stride)
727 {
728 12 FUNCC(pred8x16_left_dc)(src, stride);
729 12 FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride);
730 12 FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
731 }
732
733 32 static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, ptrdiff_t stride)
734 {
735 32 FUNCC(pred8x8_left_dc)(src, stride);
736 32 FUNCC(pred4x4_128_dc)(src , NULL, stride);
737 32 FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
738 }
739
740 12 static void FUNC(pred8x16_mad_cow_dc_0l0)(uint8_t *src, ptrdiff_t stride)
741 {
742 12 FUNCC(pred8x16_left_dc)(src, stride);
743 12 FUNCC(pred4x4_128_dc)(src , NULL, stride);
744 12 FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
745 }
746
747 927552 static void FUNCC(pred8x8_plane)(uint8_t *_src, ptrdiff_t _stride)
748 {
749 int j, k;
750 int a;
751 INIT_CLIP
752 927552 pixel *src = (pixel*)_src;
753 927552 int stride = _stride>>(sizeof(pixel)-1);
754 927552 const pixel * const src0 = src +3-stride;
755 927552 const pixel * src1 = src +4*stride-1;
756 927552 const pixel * src2 = src1-2*stride; // == src+2*stride-1;
757 927552 int H = src0[1] - src0[-1];
758 927552 int V = src1[0] - src2[ 0];
759
2/2
✓ Branch 0 taken 1391328 times.
✓ Branch 1 taken 463776 times.
3710208 for(k=2; k<=4; ++k) {
760 2782656 src1 += stride; src2 -= stride;
761 2782656 H += k*(src0[k] - src0[-k]);
762 2782656 V += k*(src1[0] - src2[ 0]);
763 }
764 927552 H = ( 17*H+16 ) >> 5;
765 927552 V = ( 17*V+16 ) >> 5;
766
767 927552 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
768
2/2
✓ Branch 0 taken 3710208 times.
✓ Branch 1 taken 463776 times.
8347968 for(j=8; j>0; --j) {
769 7420416 int b = a;
770 7420416 a += V;
771 7420416 src[0] = CLIP((b ) >> 5);
772 7420416 src[1] = CLIP((b+ H) >> 5);
773 7420416 src[2] = CLIP((b+2*H) >> 5);
774 7420416 src[3] = CLIP((b+3*H) >> 5);
775 7420416 src[4] = CLIP((b+4*H) >> 5);
776 7420416 src[5] = CLIP((b+5*H) >> 5);
777 7420416 src[6] = CLIP((b+6*H) >> 5);
778 7420416 src[7] = CLIP((b+7*H) >> 5);
779 7420416 src += stride;
780 }
781 }
782
783 328576 static void FUNCC(pred8x16_plane)(uint8_t *_src, ptrdiff_t _stride)
784 {
785 int j, k;
786 int a;
787 INIT_CLIP
788 328576 pixel *src = (pixel*)_src;
789 328576 int stride = _stride>>(sizeof(pixel)-1);
790 328576 const pixel * const src0 = src +3-stride;
791 328576 const pixel * src1 = src +8*stride-1;
792 328576 const pixel * src2 = src1-2*stride; // == src+6*stride-1;
793 328576 int H = src0[1] - src0[-1];
794 328576 int V = src1[0] - src2[ 0];
795
796
2/2
✓ Branch 0 taken 492864 times.
✓ Branch 1 taken 164288 times.
1314304 for (k = 2; k <= 4; ++k) {
797 985728 src1 += stride; src2 -= stride;
798 985728 H += k*(src0[k] - src0[-k]);
799 985728 V += k*(src1[0] - src2[ 0]);
800 }
801
2/2
✓ Branch 0 taken 657152 times.
✓ Branch 1 taken 164288 times.
1642880 for (; k <= 8; ++k) {
802 1314304 src1 += stride; src2 -= stride;
803 1314304 V += k*(src1[0] - src2[0]);
804 }
805
806 328576 H = (17*H+16) >> 5;
807 328576 V = (5*V+32) >> 6;
808
809 328576 a = 16*(src1[0] + src2[8] + 1) - 7*V - 3*H;
810
2/2
✓ Branch 0 taken 2628608 times.
✓ Branch 1 taken 164288 times.
5585792 for(j=16; j>0; --j) {
811 5257216 int b = a;
812 5257216 a += V;
813 5257216 src[0] = CLIP((b ) >> 5);
814 5257216 src[1] = CLIP((b+ H) >> 5);
815 5257216 src[2] = CLIP((b+2*H) >> 5);
816 5257216 src[3] = CLIP((b+3*H) >> 5);
817 5257216 src[4] = CLIP((b+4*H) >> 5);
818 5257216 src[5] = CLIP((b+5*H) >> 5);
819 5257216 src[6] = CLIP((b+6*H) >> 5);
820 5257216 src[7] = CLIP((b+7*H) >> 5);
821 5257216 src += stride;
822 }
823 }
824
825 #define SRC(x,y) src[(x)+(y)*stride]
826 #define PL(y) \
827 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
828 #define PREDICT_8x8_LOAD_LEFT \
829 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
830 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
831 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
832 const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
833
834 #define PT(x) \
835 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
836 #define PREDICT_8x8_LOAD_TOP \
837 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
838 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
839 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
840 const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
841 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
842
843 #define PTR(x) \
844 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
845 #define PREDICT_8x8_LOAD_TOPRIGHT \
846 int t8, t9, t10, t11, t12, t13, t14, t15; \
847 if(has_topright) { \
848 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
849 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
850 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
851
852 #define PREDICT_8x8_LOAD_TOPLEFT \
853 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
854
855 #define PREDICT_8x8_DC(v) \
856 int y; \
857 for( y = 0; y < 8; y++ ) { \
858 AV_WN4PA(((pixel4*)src)+0, v); \
859 AV_WN4PA(((pixel4*)src)+1, v); \
860 src += stride; \
861 }
862
863 2240 static void FUNCC(pred8x8l_128_dc)(uint8_t *_src, int has_topleft,
864 int has_topright, ptrdiff_t _stride)
865 {
866 2240 pixel *src = (pixel*)_src;
867 2240 int stride = _stride>>(sizeof(pixel)-1);
868
869
2/2
✓ Branch 0 taken 8960 times.
✓ Branch 1 taken 1120 times.
20160 PREDICT_8x8_DC(PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1)));
870 }
871 132790 static void FUNCC(pred8x8l_left_dc)(uint8_t *_src, int has_topleft,
872 int has_topright, ptrdiff_t _stride)
873 {
874 265580 pixel *src = (pixel*)_src;
875 265580 int stride = _stride>>(sizeof(pixel)-1);
876
877
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 132778 times.
265580 PREDICT_8x8_LOAD_LEFT;
878 265580 const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3);
879
2/2
✓ Branch 0 taken 1062320 times.
✓ Branch 1 taken 132790 times.
2390220 PREDICT_8x8_DC(dc);
880 }
881 16948 static void FUNCC(pred8x8l_top_dc)(uint8_t *_src, int has_topleft,
882 int has_topright, ptrdiff_t _stride)
883 {
884 33896 pixel *src = (pixel*)_src;
885 33896 int stride = _stride>>(sizeof(pixel)-1);
886
887
4/4
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 16932 times.
✓ Branch 2 taken 16932 times.
✓ Branch 3 taken 16 times.
33896 PREDICT_8x8_LOAD_TOP;
888 33896 const pixel4 dc = PIXEL_SPLAT_X4((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3);
889
2/2
✓ Branch 0 taken 135584 times.
✓ Branch 1 taken 16948 times.
305064 PREDICT_8x8_DC(dc);
890 }
891 1848018 static void FUNCC(pred8x8l_dc)(uint8_t *_src, int has_topleft,
892 int has_topright, ptrdiff_t _stride)
893 {
894 3696036 pixel *src = (pixel*)_src;
895 3696036 int stride = _stride>>(sizeof(pixel)-1);
896
897
2/2
✓ Branch 0 taken 1847917 times.
✓ Branch 1 taken 101 times.
3696036 PREDICT_8x8_LOAD_LEFT;
898
4/4
✓ Branch 0 taken 1847917 times.
✓ Branch 1 taken 101 times.
✓ Branch 2 taken 1287318 times.
✓ Branch 3 taken 560700 times.
3696036 PREDICT_8x8_LOAD_TOP;
899 3696036 const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7
900 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4);
901
2/2
✓ Branch 0 taken 14784144 times.
✓ Branch 1 taken 1848018 times.
33264324 PREDICT_8x8_DC(dc);
902 }
903 1649885 static void FUNCC(pred8x8l_horizontal)(uint8_t *_src, int has_topleft,
904 int has_topright, ptrdiff_t _stride)
905 {
906 3299770 pixel *src = (pixel*)_src;
907 3299770 int stride = _stride>>(sizeof(pixel)-1);
908 pixel4 a;
909
910
2/2
✓ Branch 0 taken 1608131 times.
✓ Branch 1 taken 41754 times.
3299770 PREDICT_8x8_LOAD_LEFT;
911 #define ROW(y) a = PIXEL_SPLAT_X4(l##y); \
912 AV_WN4PA(src+y*stride, a); \
913 AV_WN4PA(src+y*stride+4, a);
914 3299770 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
915 #undef ROW
916 }
917 517940 static void FUNCC(pred8x8l_vertical)(uint8_t *_src, int has_topleft,
918 int has_topright, ptrdiff_t _stride)
919 {
920 int y;
921 1035880 pixel *src = (pixel*)_src;
922 1035880 int stride = _stride>>(sizeof(pixel)-1);
923 pixel4 a, b;
924
925
4/4
✓ Branch 0 taken 501926 times.
✓ Branch 1 taken 16014 times.
✓ Branch 2 taken 388023 times.
✓ Branch 3 taken 129917 times.
1035880 PREDICT_8x8_LOAD_TOP;
926 1035880 src[0] = t0;
927 1035880 src[1] = t1;
928 1035880 src[2] = t2;
929 1035880 src[3] = t3;
930 1035880 src[4] = t4;
931 1035880 src[5] = t5;
932 1035880 src[6] = t6;
933 1035880 src[7] = t7;
934 1035880 a = AV_RN4PA(((pixel4*)src)+0);
935 1035880 b = AV_RN4PA(((pixel4*)src)+1);
936
2/2
✓ Branch 0 taken 3625580 times.
✓ Branch 1 taken 517940 times.
8287040 for( y = 1; y < 8; y++ ) {
937 7251160 AV_WN4PA(((pixel4*)(src+y*stride))+0, a);
938 7251160 AV_WN4PA(((pixel4*)(src+y*stride))+1, b);
939 }
940 }
941 194050 static void FUNCC(pred8x8l_down_left)(uint8_t *_src, int has_topleft,
942 int has_topright, ptrdiff_t _stride)
943 {
944 388100 pixel *src = (pixel*)_src;
945 388100 int stride = _stride>>(sizeof(pixel)-1);
946
4/4
✓ Branch 0 taken 192082 times.
✓ Branch 1 taken 1968 times.
✓ Branch 2 taken 147447 times.
✓ Branch 3 taken 46603 times.
388100 PREDICT_8x8_LOAD_TOP;
947
2/2
✓ Branch 0 taken 147447 times.
✓ Branch 1 taken 46603 times.
388100 PREDICT_8x8_LOAD_TOPRIGHT;
948 388100 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
949 388100 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
950 388100 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
951 388100 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
952 388100 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
953 388100 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
954 388100 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
955 388100 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
956 388100 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
957 388100 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
958 388100 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
959 388100 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
960 388100 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
961 388100 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
962 388100 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
963 }
964 293665 static void FUNCC(pred8x8l_down_right)(uint8_t *_src, int has_topleft,
965 int has_topright, ptrdiff_t _stride)
966 {
967 587330 pixel *src = (pixel*)_src;
968 587330 int stride = _stride>>(sizeof(pixel)-1);
969
3/4
✓ Branch 0 taken 293665 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 226852 times.
✓ Branch 3 taken 66813 times.
587330 PREDICT_8x8_LOAD_TOP;
970
1/2
✓ Branch 0 taken 293665 times.
✗ Branch 1 not taken.
587330 PREDICT_8x8_LOAD_LEFT;
971 587330 PREDICT_8x8_LOAD_TOPLEFT;
972 587330 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
973 587330 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
974 587330 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
975 587330 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
976 587330 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
977 587330 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
978 587330 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
979 587330 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
980 587330 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
981 587330 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
982 587330 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
983 587330 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
984 587330 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
985 587330 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
986 587330 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
987 }
988 197599 static void FUNCC(pred8x8l_vertical_right)(uint8_t *_src, int has_topleft,
989 int has_topright, ptrdiff_t _stride)
990 {
991 395198 pixel *src = (pixel*)_src;
992 395198 int stride = _stride>>(sizeof(pixel)-1);
993
3/4
✓ Branch 0 taken 197599 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 151346 times.
✓ Branch 3 taken 46253 times.
395198 PREDICT_8x8_LOAD_TOP;
994
1/2
✓ Branch 0 taken 197599 times.
✗ Branch 1 not taken.
395198 PREDICT_8x8_LOAD_LEFT;
995 395198 PREDICT_8x8_LOAD_TOPLEFT;
996 395198 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
997 395198 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
998 395198 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
999 395198 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
1000 395198 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
1001 395198 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
1002 395198 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
1003 395198 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
1004 395198 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
1005 395198 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
1006 395198 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
1007 395198 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
1008 395198 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
1009 395198 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
1010 395198 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
1011 395198 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
1012 395198 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
1013 395198 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
1014 395198 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
1015 395198 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
1016 395198 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
1017 395198 SRC(7,0)= (t6 + t7 + 1) >> 1;
1018 }
1019 447851 static void FUNCC(pred8x8l_horizontal_down)(uint8_t *_src, int has_topleft,
1020 int has_topright, ptrdiff_t _stride)
1021 {
1022 895702 pixel *src = (pixel*)_src;
1023 895702 int stride = _stride>>(sizeof(pixel)-1);
1024
4/4
✓ Branch 0 taken 447837 times.
✓ Branch 1 taken 14 times.
✓ Branch 2 taken 344102 times.
✓ Branch 3 taken 103749 times.
895702 PREDICT_8x8_LOAD_TOP;
1025
2/2
✓ Branch 0 taken 447837 times.
✓ Branch 1 taken 14 times.
895702 PREDICT_8x8_LOAD_LEFT;
1026 895702 PREDICT_8x8_LOAD_TOPLEFT;
1027 895702 SRC(0,7)= (l6 + l7 + 1) >> 1;
1028 895702 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
1029 895702 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
1030 895702 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
1031 895702 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
1032 895702 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
1033 895702 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
1034 895702 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
1035 895702 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
1036 895702 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
1037 895702 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
1038 895702 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
1039 895702 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
1040 895702 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
1041 895702 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
1042 895702 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
1043 895702 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
1044 895702 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
1045 895702 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
1046 895702 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
1047 895702 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
1048 895702 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
1049 }
1050 198165 static void FUNCC(pred8x8l_vertical_left)(uint8_t *_src, int has_topleft,
1051 int has_topright, ptrdiff_t _stride)
1052 {
1053 396330 pixel *src = (pixel*)_src;
1054 396330 int stride = _stride>>(sizeof(pixel)-1);
1055
4/4
✓ Branch 0 taken 196410 times.
✓ Branch 1 taken 1755 times.
✓ Branch 2 taken 154360 times.
✓ Branch 3 taken 43805 times.
396330 PREDICT_8x8_LOAD_TOP;
1056
2/2
✓ Branch 0 taken 154360 times.
✓ Branch 1 taken 43805 times.
396330 PREDICT_8x8_LOAD_TOPRIGHT;
1057 396330 SRC(0,0)= (t0 + t1 + 1) >> 1;
1058 396330 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
1059 396330 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
1060 396330 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
1061 396330 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
1062 396330 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
1063 396330 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
1064 396330 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
1065 396330 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
1066 396330 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
1067 396330 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
1068 396330 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
1069 396330 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
1070 396330 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
1071 396330 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
1072 396330 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
1073 396330 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
1074 396330 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
1075 396330 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
1076 396330 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
1077 396330 SRC(7,6)= (t10 + t11 + 1) >> 1;
1078 396330 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
1079 }
1080 548555 static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, int has_topleft,
1081 int has_topright, ptrdiff_t _stride)
1082 {
1083 1097110 pixel *src = (pixel*)_src;
1084 1097110 int stride = _stride>>(sizeof(pixel)-1);
1085
2/2
✓ Branch 0 taken 524777 times.
✓ Branch 1 taken 23778 times.
1097110 PREDICT_8x8_LOAD_LEFT;
1086 1097110 SRC(0,0)= (l0 + l1 + 1) >> 1;
1087 1097110 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
1088 1097110 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
1089 1097110 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
1090 1097110 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
1091 1097110 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
1092 1097110 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
1093 1097110 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
1094 1097110 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
1095 1097110 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
1096 1097110 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
1097 1097110 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
1098 1097110 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
1099 1097110 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
1100 1097110 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
1101 1097110 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
1102 1097110 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
1103 1097110 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
1104 }
1105
1106 static void FUNCC(pred8x8l_vertical_filter_add)(uint8_t *_src, int16_t *_block, int has_topleft,
1107 int has_topright, ptrdiff_t _stride)
1108 {
1109 int i;
1110 pixel *src = (pixel*)_src;
1111 const dctcoef *block = (const dctcoef*)_block;
1112 pixel pix[8];
1113 int stride = _stride>>(sizeof(pixel)-1);
1114 PREDICT_8x8_LOAD_TOP;
1115
1116 pix[0] = t0;
1117 pix[1] = t1;
1118 pix[2] = t2;
1119 pix[3] = t3;
1120 pix[4] = t4;
1121 pix[5] = t5;
1122 pix[6] = t6;
1123 pix[7] = t7;
1124
1125 for(i=0; i<8; i++){
1126 pixel v = pix[i];
1127 src[0*stride]= v += block[0];
1128 src[1*stride]= v += block[8];
1129 src[2*stride]= v += block[16];
1130 src[3*stride]= v += block[24];
1131 src[4*stride]= v += block[32];
1132 src[5*stride]= v += block[40];
1133 src[6*stride]= v += block[48];
1134 src[7*stride]= v + block[56];
1135 src++;
1136 block++;
1137 }
1138
1139 memset(_block, 0, sizeof(dctcoef) * 64);
1140 }
1141
1142 static void FUNCC(pred8x8l_horizontal_filter_add)(uint8_t *_src, int16_t *_block, int has_topleft,
1143 int has_topright, ptrdiff_t _stride)
1144 {
1145 int i;
1146 pixel *src = (pixel*)_src;
1147 const dctcoef *block = (const dctcoef*)_block;
1148 pixel pix[8];
1149 int stride = _stride>>(sizeof(pixel)-1);
1150 PREDICT_8x8_LOAD_LEFT;
1151
1152 pix[0] = l0;
1153 pix[1] = l1;
1154 pix[2] = l2;
1155 pix[3] = l3;
1156 pix[4] = l4;
1157 pix[5] = l5;
1158 pix[6] = l6;
1159 pix[7] = l7;
1160
1161 for(i=0; i<8; i++){
1162 pixel v = pix[i];
1163 src[0]= v += block[0];
1164 src[1]= v += block[1];
1165 src[2]= v += block[2];
1166 src[3]= v += block[3];
1167 src[4]= v += block[4];
1168 src[5]= v += block[5];
1169 src[6]= v += block[6];
1170 src[7]= v + block[7];
1171 src+= stride;
1172 block+= 8;
1173 }
1174
1175 memset(_block, 0, sizeof(dctcoef) * 64);
1176 }
1177
1178 #undef PREDICT_8x8_LOAD_LEFT
1179 #undef PREDICT_8x8_LOAD_TOP
1180 #undef PREDICT_8x8_LOAD_TOPLEFT
1181 #undef PREDICT_8x8_LOAD_TOPRIGHT
1182 #undef PREDICT_8x8_DC
1183 #undef PTR
1184 #undef PT
1185 #undef PL
1186 #undef SRC
1187
1188 182984 static void FUNCC(pred4x4_vertical_add)(uint8_t *_pix, int16_t *_block,
1189 ptrdiff_t stride)
1190 {
1191 int i;
1192 182984 pixel *pix = (pixel*)_pix;
1193 182984 const dctcoef *block = (const dctcoef*)_block;
1194 182984 stride >>= sizeof(pixel)-1;
1195 182984 pix -= stride;
1196
2/2
✓ Branch 0 taken 365968 times.
✓ Branch 1 taken 91492 times.
914920 for(i=0; i<4; i++){
1197 731936 pixel v = pix[0];
1198 731936 pix[1*stride]= v += block[0];
1199 731936 pix[2*stride]= v += block[4];
1200 731936 pix[3*stride]= v += block[8];
1201 731936 pix[4*stride]= v + block[12];
1202 731936 pix++;
1203 731936 block++;
1204 }
1205
1206 182984 memset(_block, 0, sizeof(dctcoef) * 16);
1207 }
1208
1209 239402 static void FUNCC(pred4x4_horizontal_add)(uint8_t *_pix, int16_t *_block,
1210 ptrdiff_t stride)
1211 {
1212 int i;
1213 239402 pixel *pix = (pixel*)_pix;
1214 239402 const dctcoef *block = (const dctcoef*)_block;
1215 239402 stride >>= sizeof(pixel)-1;
1216
2/2
✓ Branch 0 taken 478804 times.
✓ Branch 1 taken 119701 times.
1197010 for(i=0; i<4; i++){
1217 957608 pixel v = pix[-1];
1218 957608 pix[0]= v += block[0];
1219 957608 pix[1]= v += block[1];
1220 957608 pix[2]= v += block[2];
1221 957608 pix[3]= v + block[3];
1222 957608 pix+= stride;
1223 957608 block+= 4;
1224 }
1225
1226 239402 memset(_block, 0, sizeof(dctcoef) * 16);
1227 }
1228
1229 2148 static void FUNCC(pred8x8l_vertical_add)(uint8_t *_pix, int16_t *_block,
1230 ptrdiff_t stride)
1231 {
1232 int i;
1233 2148 pixel *pix = (pixel*)_pix;
1234 2148 const dctcoef *block = (const dctcoef*)_block;
1235 2148 stride >>= sizeof(pixel)-1;
1236 2148 pix -= stride;
1237
2/2
✓ Branch 0 taken 8592 times.
✓ Branch 1 taken 1074 times.
19332 for(i=0; i<8; i++){
1238 17184 pixel v = pix[0];
1239 17184 pix[1*stride]= v += block[0];
1240 17184 pix[2*stride]= v += block[8];
1241 17184 pix[3*stride]= v += block[16];
1242 17184 pix[4*stride]= v += block[24];
1243 17184 pix[5*stride]= v += block[32];
1244 17184 pix[6*stride]= v += block[40];
1245 17184 pix[7*stride]= v += block[48];
1246 17184 pix[8*stride]= v + block[56];
1247 17184 pix++;
1248 17184 block++;
1249 }
1250
1251 2148 memset(_block, 0, sizeof(dctcoef) * 64);
1252 }
1253
1254 2828 static void FUNCC(pred8x8l_horizontal_add)(uint8_t *_pix, int16_t *_block,
1255 ptrdiff_t stride)
1256 {
1257 int i;
1258 2828 pixel *pix = (pixel*)_pix;
1259 2828 const dctcoef *block = (const dctcoef*)_block;
1260 2828 stride >>= sizeof(pixel)-1;
1261
2/2
✓ Branch 0 taken 11312 times.
✓ Branch 1 taken 1414 times.
25452 for(i=0; i<8; i++){
1262 22624 pixel v = pix[-1];
1263 22624 pix[0]= v += block[0];
1264 22624 pix[1]= v += block[1];
1265 22624 pix[2]= v += block[2];
1266 22624 pix[3]= v += block[3];
1267 22624 pix[4]= v += block[4];
1268 22624 pix[5]= v += block[5];
1269 22624 pix[6]= v += block[6];
1270 22624 pix[7]= v + block[7];
1271 22624 pix+= stride;
1272 22624 block+= 8;
1273 }
1274
1275 2828 memset(_block, 0, sizeof(dctcoef) * 64);
1276 }
1277
1278 724 static void FUNCC(pred16x16_vertical_add)(uint8_t *pix, const int *block_offset,
1279 int16_t *block,
1280 ptrdiff_t stride)
1281 {
1282 int i;
1283
2/2
✓ Branch 0 taken 5792 times.
✓ Branch 1 taken 362 times.
12308 for(i=0; i<16; i++)
1284 11584 FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1285 }
1286
1287 556 static void FUNCC(pred16x16_horizontal_add)(uint8_t *pix,
1288 const int *block_offset,
1289 int16_t *block,
1290 ptrdiff_t stride)
1291 {
1292 int i;
1293
2/2
✓ Branch 0 taken 4448 times.
✓ Branch 1 taken 278 times.
9452 for(i=0; i<16; i++)
1294 8896 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1295 }
1296
1297 4516 static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset,
1298 int16_t *block, ptrdiff_t stride)
1299 {
1300 int i;
1301
2/2
✓ Branch 0 taken 9032 times.
✓ Branch 1 taken 2258 times.
22580 for(i=0; i<4; i++)
1302 18064 FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1303 }
1304
1305 static void FUNCC(pred8x16_vertical_add)(uint8_t *pix, const int *block_offset,
1306 int16_t *block, ptrdiff_t stride)
1307 {
1308 int i;
1309 for(i=0; i<4; i++)
1310 FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1311 for(i=4; i<8; i++)
1312 FUNCC(pred4x4_vertical_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);
1313 }
1314
1315 5776 static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset,
1316 int16_t *block,
1317 ptrdiff_t stride)
1318 {
1319 int i;
1320
2/2
✓ Branch 0 taken 11552 times.
✓ Branch 1 taken 2888 times.
28880 for(i=0; i<4; i++)
1321 23104 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1322 }
1323
1324 static void FUNCC(pred8x16_horizontal_add)(uint8_t *pix,
1325 const int *block_offset,
1326 int16_t *block, ptrdiff_t stride)
1327 {
1328 int i;
1329 for(i=0; i<4; i++)
1330 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1331 for(i=4; i<8; i++)
1332 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);
1333 }
1334