FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/hevc/dsp_template.c
Date: 2024-11-20 23:03:26
Exec Total Coverage
Lines: 401 401 100.0%
Functions: 213 232 91.8%
Branches: 246 256 96.1%

Line Branch Exec Source
1 /*
2 * HEVC video decoder
3 *
4 * Copyright (C) 2012 - 2013 Guillaume Martres
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include "get_bits.h"
24 #include "hevcdec.h"
25
26 #include "bit_depth_template.c"
27 #include "dsp.h"
28 #include "h26x/h2656_sao_template.c"
29 #include "h26x/h2656_inter_template.c"
30
31 74598 static void FUNC(put_pcm)(uint8_t *_dst, ptrdiff_t stride, int width, int height,
32 GetBitContext *gb, int pcm_bit_depth)
33 {
34 int x, y;
35 74598 pixel *dst = (pixel *)_dst;
36
37 74598 stride /= sizeof(pixel);
38
39
2/2
✓ Branch 0 taken 327424 times.
✓ Branch 1 taken 37299 times.
729446 for (y = 0; y < height; y++) {
40
2/2
✓ Branch 0 taken 4382080 times.
✓ Branch 1 taken 327424 times.
9419008 for (x = 0; x < width; x++)
41 8764160 dst[x] = get_bits(gb, pcm_bit_depth) << (BIT_DEPTH - pcm_bit_depth);
42 654848 dst += stride;
43 }
44 74598 }
45
46 29453092 static av_always_inline void FUNC(add_residual)(uint8_t *_dst, const int16_t *res,
47 ptrdiff_t stride, int size)
48 {
49 int x, y;
50 29453092 pixel *dst = (pixel *)_dst;
51
52 29453092 stride /= sizeof(pixel);
53
54
2/2
✓ Branch 0 taken 116245116 times.
✓ Branch 1 taken 14726546 times.
261943324 for (y = 0; y < size; y++) {
55
2/2
✓ Branch 0 taken 1581897296 times.
✓ Branch 1 taken 116245116 times.
3396284824 for (x = 0; x < size; x++) {
56 3163794592 dst[x] = av_clip_pixel(dst[x] + *res);
57 3163794592 res++;
58 }
59 232490232 dst += stride;
60 }
61 29453092 }
62
63 16727906 static void FUNC(add_residual4x4)(uint8_t *_dst, const int16_t *res,
64 ptrdiff_t stride)
65 {
66 16727906 FUNC(add_residual)(_dst, res, stride, 4);
67 16727906 }
68
69 7623282 static void FUNC(add_residual8x8)(uint8_t *_dst, const int16_t *res,
70 ptrdiff_t stride)
71 {
72 7623282 FUNC(add_residual)(_dst, res, stride, 8);
73 7623282 }
74
75 3666786 static void FUNC(add_residual16x16)(uint8_t *_dst, const int16_t *res,
76 ptrdiff_t stride)
77 {
78 3666786 FUNC(add_residual)(_dst, res, stride, 16);
79 3666786 }
80
81 1435118 static void FUNC(add_residual32x32)(uint8_t *_dst, const int16_t *res,
82 ptrdiff_t stride)
83 {
84 1435118 FUNC(add_residual)(_dst, res, stride, 32);
85 1435118 }
86
87 54126 static void FUNC(transform_rdpcm)(int16_t *_coeffs, int16_t log2_size, int mode)
88 {
89 54126 int16_t *coeffs = (int16_t *) _coeffs;
90 int x, y;
91 54126 int size = 1 << log2_size;
92
93
2/2
✓ Branch 0 taken 9989 times.
✓ Branch 1 taken 17074 times.
54126 if (mode) {
94 19978 coeffs += size;
95
2/2
✓ Branch 0 taken 56315 times.
✓ Branch 1 taken 9989 times.
132608 for (y = 0; y < size - 1; y++) {
96
2/2
✓ Branch 0 taken 523168 times.
✓ Branch 1 taken 56315 times.
1158966 for (x = 0; x < size; x++)
97 1046336 coeffs[x] += coeffs[x - size];
98 112630 coeffs += size;
99 }
100 } else {
101
2/2
✓ Branch 0 taken 124352 times.
✓ Branch 1 taken 17074 times.
282852 for (y = 0; y < size; y++) {
102
2/2
✓ Branch 0 taken 1049312 times.
✓ Branch 1 taken 124352 times.
2347328 for (x = 1; x < size; x++)
103 2098624 coeffs[x] += coeffs[x - 1];
104 248704 coeffs += size;
105 }
106 }
107 54126 }
108
109 877016 static void FUNC(dequant)(int16_t *coeffs, int16_t log2_size)
110 {
111 877016 int shift = 15 - BIT_DEPTH - log2_size;
112 int x, y;
113 877016 int size = 1 << log2_size;
114
115
2/2
✓ Branch 0 taken 429222 times.
✓ Branch 1 taken 9286 times.
877016 if (shift > 0) {
116 858444 int offset = 1 << (shift - 1);
117
2/2
✓ Branch 0 taken 1790600 times.
✓ Branch 1 taken 429222 times.
4439644 for (y = 0; y < size; y++) {
118
2/2
✓ Branch 0 taken 8185344 times.
✓ Branch 1 taken 1790600 times.
19951888 for (x = 0; x < size; x++) {
119 16370688 *coeffs = (*coeffs + offset) >> shift;
120 16370688 coeffs++;
121 }
122 }
123 } else {
124
2/2
✓ Branch 0 taken 74992 times.
✓ Branch 1 taken 9286 times.
168556 for (y = 0; y < size; y++) {
125
2/2
✓ Branch 0 taken 613504 times.
✓ Branch 1 taken 74992 times.
1376992 for (x = 0; x < size; x++) {
126 1227008 *coeffs = *(uint16_t*)coeffs << -shift;
127 1227008 coeffs++;
128 }
129 }
130 }
131 877016 }
132
133 #define SET(dst, x) (dst) = (x)
134 #define SCALE(dst, x) (dst) = av_clip_int16(((x) + add) >> shift)
135
136 #define TR_4x4_LUMA(dst, src, step, assign) \
137 do { \
138 int c0 = src[0 * step] + src[2 * step]; \
139 int c1 = src[2 * step] + src[3 * step]; \
140 int c2 = src[0 * step] - src[3 * step]; \
141 int c3 = 74 * src[1 * step]; \
142 \
143 assign(dst[2 * step], 74 * (src[0 * step] - \
144 src[2 * step] + \
145 src[3 * step])); \
146 assign(dst[0 * step], 29 * c0 + 55 * c1 + c3); \
147 assign(dst[1 * step], 55 * c2 - 29 * c1 + c3); \
148 assign(dst[3 * step], 55 * c0 + 29 * c2 - c3); \
149 } while (0)
150
151 7369832 static void FUNC(transform_4x4_luma)(int16_t *coeffs)
152 {
153 int i;
154 7369832 int shift = 7;
155 7369832 int add = 1 << (shift - 1);
156 7369832 int16_t *src = coeffs;
157
158
2/2
✓ Branch 0 taken 14739664 times.
✓ Branch 1 taken 3684916 times.
36849160 for (i = 0; i < 4; i++) {
159 29479328 TR_4x4_LUMA(src, src, 4, SCALE);
160 29479328 src++;
161 }
162
163 7369832 shift = 20 - BIT_DEPTH;
164 7369832 add = 1 << (shift - 1);
165
2/2
✓ Branch 0 taken 14739664 times.
✓ Branch 1 taken 3684916 times.
36849160 for (i = 0; i < 4; i++) {
166 29479328 TR_4x4_LUMA(coeffs, coeffs, 1, SCALE);
167 29479328 coeffs += 4;
168 }
169 7369832 }
170
171 #undef TR_4x4_LUMA
172
173 #define TR_4(dst, src, dstep, sstep, assign, end) \
174 do { \
175 const int e0 = 64 * src[0 * sstep] + 64 * src[2 * sstep]; \
176 const int e1 = 64 * src[0 * sstep] - 64 * src[2 * sstep]; \
177 const int o0 = 83 * src[1 * sstep] + 36 * src[3 * sstep]; \
178 const int o1 = 36 * src[1 * sstep] - 83 * src[3 * sstep]; \
179 \
180 assign(dst[0 * dstep], e0 + o0); \
181 assign(dst[1 * dstep], e1 + o1); \
182 assign(dst[2 * dstep], e1 - o1); \
183 assign(dst[3 * dstep], e0 - o0); \
184 } while (0)
185
186 #define TR_8(dst, src, dstep, sstep, assign, end) \
187 do { \
188 int i, j; \
189 int e_8[4]; \
190 int o_8[4] = { 0 }; \
191 for (i = 0; i < 4; i++) \
192 for (j = 1; j < end; j += 2) \
193 o_8[i] += transform[4 * j][i] * src[j * sstep]; \
194 TR_4(e_8, src, 1, 2 * sstep, SET, 4); \
195 \
196 for (i = 0; i < 4; i++) { \
197 assign(dst[i * dstep], e_8[i] + o_8[i]); \
198 assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]); \
199 } \
200 } while (0)
201
202 #define TR_16(dst, src, dstep, sstep, assign, end) \
203 do { \
204 int i, j; \
205 int e_16[8]; \
206 int o_16[8] = { 0 }; \
207 for (i = 0; i < 8; i++) \
208 for (j = 1; j < end; j += 2) \
209 o_16[i] += transform[2 * j][i] * src[j * sstep]; \
210 TR_8(e_16, src, 1, 2 * sstep, SET, 8); \
211 \
212 for (i = 0; i < 8; i++) { \
213 assign(dst[i * dstep], e_16[i] + o_16[i]); \
214 assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]); \
215 } \
216 } while (0)
217
218 #define TR_32(dst, src, dstep, sstep, assign, end) \
219 do { \
220 int i, j; \
221 int e_32[16]; \
222 int o_32[16] = { 0 }; \
223 for (i = 0; i < 16; i++) \
224 for (j = 1; j < end; j += 2) \
225 o_32[i] += transform[j][i] * src[j * sstep]; \
226 TR_16(e_32, src, 1, 2 * sstep, SET, end / 2); \
227 \
228 for (i = 0; i < 16; i++) { \
229 assign(dst[i * dstep], e_32[i] + o_32[i]); \
230 assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]); \
231 } \
232 } while (0)
233
234 #define IDCT_VAR4(H) \
235 int limit2 = FFMIN(col_limit + 4, H)
236 #define IDCT_VAR8(H) \
237 int limit = FFMIN(col_limit, H); \
238 int limit2 = FFMIN(col_limit + 4, H)
239 #define IDCT_VAR16(H) IDCT_VAR8(H)
240 #define IDCT_VAR32(H) IDCT_VAR8(H)
241
242 #define IDCT(H) \
243 static void FUNC(idct_ ## H ## x ## H )(int16_t *coeffs, \
244 int col_limit) \
245 { \
246 int i; \
247 int shift = 7; \
248 int add = 1 << (shift - 1); \
249 int16_t *src = coeffs; \
250 IDCT_VAR ## H(H); \
251 \
252 for (i = 0; i < H; i++) { \
253 TR_ ## H(src, src, H, H, SCALE, limit2); \
254 if (limit2 < H && i%4 == 0 && !!i) \
255 limit2 -= 4; \
256 src++; \
257 } \
258 \
259 shift = 20 - BIT_DEPTH; \
260 add = 1 << (shift - 1); \
261 for (i = 0; i < H; i++) { \
262 TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit); \
263 coeffs += H; \
264 } \
265 }
266
267 #define IDCT_DC(H) \
268 static void FUNC(idct_ ## H ## x ## H ## _dc)(int16_t *coeffs) \
269 { \
270 int i, j; \
271 int shift = 14 - BIT_DEPTH; \
272 int add = 1 << (shift - 1); \
273 int coeff = (((coeffs[0] + 1) >> 1) + add) >> shift; \
274 \
275 for (j = 0; j < H; j++) { \
276 for (i = 0; i < H; i++) { \
277 coeffs[i + j * H] = coeff; \
278 } \
279 } \
280 }
281
282
5/10
✗ Branch 0 not taken.
✓ Branch 1 taken 10688476 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 10688476 times.
✓ Branch 7 taken 2672119 times.
✓ Branch 8 taken 10688476 times.
✓ Branch 9 taken 2672119 times.
48098142 IDCT( 4)
283
17/22
✓ Branch 0 taken 376372352 times.
✓ Branch 1 taken 94093088 times.
✓ Branch 2 taken 94093088 times.
✓ Branch 3 taken 23523272 times.
✓ Branch 4 taken 94093088 times.
✓ Branch 5 taken 23523272 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 23523272 times.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 23523272 times.
✓ Branch 13 taken 2940409 times.
✓ Branch 14 taken 264298560 times.
✓ Branch 15 taken 94093088 times.
✓ Branch 16 taken 94093088 times.
✓ Branch 17 taken 23523272 times.
✓ Branch 18 taken 94093088 times.
✓ Branch 19 taken 23523272 times.
✓ Branch 20 taken 23523272 times.
✓ Branch 21 taken 2940409 times.
2134060434 IDCT( 8)
284
34/34
✓ Branch 0 taken 707399376 times.
✓ Branch 1 taken 190189568 times.
✓ Branch 2 taken 190189568 times.
✓ Branch 3 taken 23773696 times.
✓ Branch 4 taken 380379136 times.
✓ Branch 5 taken 95094784 times.
✓ Branch 6 taken 95094784 times.
✓ Branch 7 taken 23773696 times.
✓ Branch 8 taken 95094784 times.
✓ Branch 9 taken 23773696 times.
✓ Branch 10 taken 190189568 times.
✓ Branch 11 taken 23773696 times.
✓ Branch 12 taken 17904240 times.
✓ Branch 13 taken 5869456 times.
✓ Branch 14 taken 4476060 times.
✓ Branch 15 taken 13428180 times.
✓ Branch 16 taken 3357045 times.
✓ Branch 17 taken 1119015 times.
✓ Branch 18 taken 23773696 times.
✓ Branch 19 taken 1485856 times.
✓ Branch 20 taken 745224320 times.
✓ Branch 21 taken 190189568 times.
✓ Branch 22 taken 190189568 times.
✓ Branch 23 taken 23773696 times.
✓ Branch 24 taken 380379136 times.
✓ Branch 25 taken 95094784 times.
✓ Branch 26 taken 95094784 times.
✓ Branch 27 taken 23773696 times.
✓ Branch 28 taken 95094784 times.
✓ Branch 29 taken 23773696 times.
✓ Branch 30 taken 190189568 times.
✓ Branch 31 taken 23773696 times.
✓ Branch 32 taken 23773696 times.
✓ Branch 33 taken 1485856 times.
6807105248 IDCT(16)
285
46/46
✓ Branch 0 taken 1189891040 times.
✓ Branch 1 taken 302496256 times.
✓ Branch 2 taken 302496256 times.
✓ Branch 3 taken 18906016 times.
✓ Branch 4 taken 289395288 times.
✓ Branch 5 taken 151248128 times.
✓ Branch 6 taken 151248128 times.
✓ Branch 7 taken 18906016 times.
✓ Branch 8 taken 302496256 times.
✓ Branch 9 taken 75624064 times.
✓ Branch 10 taken 75624064 times.
✓ Branch 11 taken 18906016 times.
✓ Branch 12 taken 75624064 times.
✓ Branch 13 taken 18906016 times.
✓ Branch 14 taken 151248128 times.
✓ Branch 15 taken 18906016 times.
✓ Branch 16 taken 302496256 times.
✓ Branch 17 taken 18906016 times.
✓ Branch 18 taken 16701600 times.
✓ Branch 19 taken 2204416 times.
✓ Branch 20 taken 4175400 times.
✓ Branch 21 taken 12526200 times.
✓ Branch 22 taken 3653475 times.
✓ Branch 23 taken 521925 times.
✓ Branch 24 taken 18906016 times.
✓ Branch 25 taken 590813 times.
✓ Branch 26 taken 1790057984 times.
✓ Branch 27 taken 302496256 times.
✓ Branch 28 taken 302496256 times.
✓ Branch 29 taken 18906016 times.
✓ Branch 30 taken 436636160 times.
✓ Branch 31 taken 151248128 times.
✓ Branch 32 taken 151248128 times.
✓ Branch 33 taken 18906016 times.
✓ Branch 34 taken 302496256 times.
✓ Branch 35 taken 75624064 times.
✓ Branch 36 taken 75624064 times.
✓ Branch 37 taken 18906016 times.
✓ Branch 38 taken 75624064 times.
✓ Branch 39 taken 18906016 times.
✓ Branch 40 taken 151248128 times.
✓ Branch 41 taken 18906016 times.
✓ Branch 42 taken 302496256 times.
✓ Branch 43 taken 18906016 times.
✓ Branch 44 taken 18906016 times.
✓ Branch 45 taken 590813 times.
12933699242 IDCT(32)
286
287
4/4
✓ Branch 0 taken 17976448 times.
✓ Branch 1 taken 4494112 times.
✓ Branch 2 taken 4494112 times.
✓ Branch 3 taken 1123528 times.
47188176 IDCT_DC( 4)
288
4/4
✓ Branch 0 taken 50940480 times.
✓ Branch 1 taken 6367560 times.
✓ Branch 2 taken 6367560 times.
✓ Branch 3 taken 795945 times.
116207970 IDCT_DC( 8)
289
4/4
✓ Branch 0 taken 84889600 times.
✓ Branch 1 taken 5305600 times.
✓ Branch 2 taken 5305600 times.
✓ Branch 3 taken 331600 times.
181053600 IDCT_DC(16)
290
4/4
✓ Branch 0 taken 127001600 times.
✓ Branch 1 taken 3968800 times.
✓ Branch 2 taken 3968800 times.
✓ Branch 3 taken 124025 times.
262188850 IDCT_DC(32)
291
292 #undef TR_4
293 #undef TR_8
294 #undef TR_16
295 #undef TR_32
296
297 #undef SET
298 #undef SCALE
299
300 ////////////////////////////////////////////////////////////////////////////////
301 //
302 ////////////////////////////////////////////////////////////////////////////////
303 #define ff_hevc_pel_filters ff_hevc_qpel_filters
304 #define DECL_HV_FILTER(f) \
305 const int8_t *hf = ff_hevc_ ## f ## _filters[mx]; \
306 const int8_t *vf = ff_hevc_ ## f ## _filters[my];
307
308 #define FW_PUT(p, f, t) \
309 static void FUNC(put_hevc_## f)(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, int height, \
310 intptr_t mx, intptr_t my, int width) \
311 { \
312 DECL_HV_FILTER(p) \
313 FUNC(put_ ## t)(dst, src, srcstride, height, hf, vf, width); \
314 }
315
316 #define FW_PUT_UNI(p, f, t) \
317 static void FUNC(put_hevc_ ## f)(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \
318 ptrdiff_t srcstride, int height, intptr_t mx, intptr_t my, int width) \
319 { \
320 DECL_HV_FILTER(p) \
321 FUNC(put_ ## t)(dst, dststride, src, srcstride, height, hf, vf, width); \
322 }
323
324 #define FW_PUT_UNI_W(p, f, t) \
325 static void FUNC(put_hevc_ ## f)(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \
326 ptrdiff_t srcstride,int height, int denom, int wx, int ox, \
327 intptr_t mx, intptr_t my, int width) \
328 { \
329 DECL_HV_FILTER(p) \
330 FUNC(put_ ## t)(dst, dststride, src, srcstride, height, denom, wx, ox, hf, vf, width); \
331 }
332
333 #define FW_PUT_FUNCS(f, t, dir) \
334 FW_PUT(f, f ## _ ## dir, t ## _ ## dir) \
335 FW_PUT_UNI(f, f ## _uni_ ## dir, uni_ ## t ## _ ## dir) \
336 FW_PUT_UNI_W(f, f ## _uni_w_ ## dir, uni_## t ## _w_ ## dir)
337
338 4955258 FW_PUT(pel, pel_pixels, pixels)
339 5077448 FW_PUT_UNI(pel, pel_uni_pixels, uni_pixels)
340 145958 FW_PUT_UNI_W(pel, pel_uni_w_pixels, uni_w_pixels)
341
342 3114704 FW_PUT_FUNCS(qpel, luma, h )
343 2600776 FW_PUT_FUNCS(qpel, luma, v )
344 8186578 FW_PUT_FUNCS(qpel, luma, hv )
345 5047532 FW_PUT_FUNCS(epel, chroma, h )
346 3811016 FW_PUT_FUNCS(epel, chroma, v )
347 21217332 FW_PUT_FUNCS(epel, chroma, hv )
348
349 5414456 static void FUNC(put_hevc_pel_bi_pixels)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride,
350 const int16_t *src2,
351 int height, intptr_t mx, intptr_t my, int width)
352 {
353 int x, y;
354 5414456 const pixel *src = (const pixel *)_src;
355 5414456 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
356 5414456 pixel *dst = (pixel *)_dst;
357 5414456 ptrdiff_t dststride = _dststride / sizeof(pixel);
358
359 5414456 int shift = 14 + 1 - BIT_DEPTH;
360 #if BIT_DEPTH < 14
361 5414456 int offset = 1 << (shift - 1);
362 #else
363 int offset = 0;
364 #endif
365
366
2/2
✓ Branch 0 taken 40991802 times.
✓ Branch 1 taken 2707228 times.
87398060 for (y = 0; y < height; y++) {
367
2/2
✓ Branch 0 taken 1061369396 times.
✓ Branch 1 taken 40991802 times.
2204722396 for (x = 0; x < width; x++)
368 2122738792 dst[x] = av_clip_pixel(((src[x] << (14 - BIT_DEPTH)) + src2[x] + offset) >> shift);
369 81983604 src += srcstride;
370 81983604 dst += dststride;
371 81983604 src2 += MAX_PB_SIZE;
372 }
373 5414456 }
374
375 64154 static void FUNC(put_hevc_pel_bi_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride,
376 const int16_t *src2,
377 int height, int denom, int wx0, int wx1,
378 int ox0, int ox1, intptr_t mx, intptr_t my, int width)
379 {
380 int x, y;
381 64154 const pixel *src = (const pixel *)_src;
382 64154 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
383 64154 pixel *dst = (pixel *)_dst;
384 64154 ptrdiff_t dststride = _dststride / sizeof(pixel);
385
386 64154 int shift = 14 + 1 - BIT_DEPTH;
387 64154 int log2Wd = denom + shift - 1;
388
389 64154 ox0 = ox0 * (1 << (BIT_DEPTH - 8));
390 64154 ox1 = ox1 * (1 << (BIT_DEPTH - 8));
391
2/2
✓ Branch 0 taken 671000 times.
✓ Branch 1 taken 32077 times.
1406154 for (y = 0; y < height; y++) {
392
2/2
✓ Branch 0 taken 21890728 times.
✓ Branch 1 taken 671000 times.
45123456 for (x = 0; x < width; x++) {
393 43781456 dst[x] = av_clip_pixel(( (src[x] << (14 - BIT_DEPTH)) * wx1 + src2[x] * wx0 + (ox0 + ox1 + 1) * (1 << log2Wd)) >> (log2Wd + 1));
394 }
395 1342000 src += srcstride;
396 1342000 dst += dststride;
397 1342000 src2 += MAX_PB_SIZE;
398 }
399 64154 }
400
401 ////////////////////////////////////////////////////////////////////////////////
402 //
403 ////////////////////////////////////////////////////////////////////////////////
404 #define QPEL_FILTER(src, stride) \
405 (filter[0] * src[x - 3 * stride] + \
406 filter[1] * src[x - 2 * stride] + \
407 filter[2] * src[x - stride] + \
408 filter[3] * src[x ] + \
409 filter[4] * src[x + stride] + \
410 filter[5] * src[x + 2 * stride] + \
411 filter[6] * src[x + 3 * stride] + \
412 filter[7] * src[x + 4 * stride])
413
414 1354728 static void FUNC(put_hevc_qpel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride,
415 const int16_t *src2,
416 int height, intptr_t mx, intptr_t my, int width)
417 {
418 int x, y;
419 1354728 const pixel *src = (const pixel*)_src;
420 1354728 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
421 1354728 pixel *dst = (pixel *)_dst;
422 1354728 ptrdiff_t dststride = _dststride / sizeof(pixel);
423
424 1354728 const int8_t *filter = ff_hevc_qpel_filters[mx];
425
426 1354728 int shift = 14 + 1 - BIT_DEPTH;
427 #if BIT_DEPTH < 14
428 1354728 int offset = 1 << (shift - 1);
429 #else
430 int offset = 0;
431 #endif
432
433
2/2
✓ Branch 0 taken 15073720 times.
✓ Branch 1 taken 677364 times.
31502168 for (y = 0; y < height; y++) {
434
2/2
✓ Branch 0 taken 511366472 times.
✓ Branch 1 taken 15073720 times.
1052880384 for (x = 0; x < width; x++)
435 1022732944 dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
436 30147440 src += srcstride;
437 30147440 dst += dststride;
438 30147440 src2 += MAX_PB_SIZE;
439 }
440 1354728 }
441
442 1066828 static void FUNC(put_hevc_qpel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride,
443 const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
444 int height, intptr_t mx, intptr_t my, int width)
445 {
446 int x, y;
447 1066828 const pixel *src = (const pixel*)_src;
448 1066828 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
449 1066828 pixel *dst = (pixel *)_dst;
450 1066828 ptrdiff_t dststride = _dststride / sizeof(pixel);
451
452 1066828 const int8_t *filter = ff_hevc_qpel_filters[my];
453
454 1066828 int shift = 14 + 1 - BIT_DEPTH;
455 #if BIT_DEPTH < 14
456 1066828 int offset = 1 << (shift - 1);
457 #else
458 int offset = 0;
459 #endif
460
461
2/2
✓ Branch 0 taken 11505952 times.
✓ Branch 1 taken 533414 times.
24078732 for (y = 0; y < height; y++) {
462
2/2
✓ Branch 0 taken 376371400 times.
✓ Branch 1 taken 11505952 times.
775754704 for (x = 0; x < width; x++)
463 752742800 dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
464 23011904 src += srcstride;
465 23011904 dst += dststride;
466 23011904 src2 += MAX_PB_SIZE;
467 }
468 1066828 }
469
470 3346270 static void FUNC(put_hevc_qpel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride,
471 const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
472 int height, intptr_t mx, intptr_t my, int width)
473 {
474 int x, y;
475 const int8_t *filter;
476 3346270 const pixel *src = (const pixel*)_src;
477 3346270 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
478 3346270 pixel *dst = (pixel *)_dst;
479 3346270 ptrdiff_t dststride = _dststride / sizeof(pixel);
480 int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
481 3346270 int16_t *tmp = tmp_array;
482 3346270 int shift = 14 + 1 - BIT_DEPTH;
483 #if BIT_DEPTH < 14
484 3346270 int offset = 1 << (shift - 1);
485 #else
486 int offset = 0;
487 #endif
488
489 3346270 src -= QPEL_EXTRA_BEFORE * srcstride;
490 3346270 filter = ff_hevc_qpel_filters[mx];
491
2/2
✓ Branch 0 taken 47805493 times.
✓ Branch 1 taken 1673135 times.
98957256 for (y = 0; y < height + QPEL_EXTRA; y++) {
492
2/2
✓ Branch 0 taken 1420553512 times.
✓ Branch 1 taken 47805493 times.
2936718010 for (x = 0; x < width; x++)
493 2841107024 tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
494 95610986 src += srcstride;
495 95610986 tmp += MAX_PB_SIZE;
496 }
497
498 3346270 tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
499 3346270 filter = ff_hevc_qpel_filters[my];
500
501
2/2
✓ Branch 0 taken 36093548 times.
✓ Branch 1 taken 1673135 times.
75533366 for (y = 0; y < height; y++) {
502
2/2
✓ Branch 0 taken 1170958152 times.
✓ Branch 1 taken 36093548 times.
2414103400 for (x = 0; x < width; x++)
503 2341916304 dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
504 72187096 tmp += MAX_PB_SIZE;
505 72187096 dst += dststride;
506 72187096 src2 += MAX_PB_SIZE;
507 }
508 3346270 }
509
510 22810 static void FUNC(put_hevc_qpel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride,
511 const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
512 int height, int denom, int wx0, int wx1,
513 int ox0, int ox1, intptr_t mx, intptr_t my, int width)
514 {
515 int x, y;
516 22810 const pixel *src = (const pixel*)_src;
517 22810 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
518 22810 pixel *dst = (pixel *)_dst;
519 22810 ptrdiff_t dststride = _dststride / sizeof(pixel);
520
521 22810 const int8_t *filter = ff_hevc_qpel_filters[mx];
522
523 22810 int shift = 14 + 1 - BIT_DEPTH;
524 22810 int log2Wd = denom + shift - 1;
525
526 22810 ox0 = ox0 * (1 << (BIT_DEPTH - 8));
527 22810 ox1 = ox1 * (1 << (BIT_DEPTH - 8));
528
2/2
✓ Branch 0 taken 278600 times.
✓ Branch 1 taken 11405 times.
580010 for (y = 0; y < height; y++) {
529
2/2
✓ Branch 0 taken 9869200 times.
✓ Branch 1 taken 278600 times.
20295600 for (x = 0; x < width; x++)
530 19738400 dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
531 ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
532 557200 src += srcstride;
533 557200 dst += dststride;
534 557200 src2 += MAX_PB_SIZE;
535 }
536 22810 }
537
538 20448 static void FUNC(put_hevc_qpel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride,
539 const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
540 int height, int denom, int wx0, int wx1,
541 int ox0, int ox1, intptr_t mx, intptr_t my, int width)
542 {
543 int x, y;
544 20448 const pixel *src = (const pixel*)_src;
545 20448 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
546 20448 pixel *dst = (pixel *)_dst;
547 20448 ptrdiff_t dststride = _dststride / sizeof(pixel);
548
549 20448 const int8_t *filter = ff_hevc_qpel_filters[my];
550
551 20448 int shift = 14 + 1 - BIT_DEPTH;
552 20448 int log2Wd = denom + shift - 1;
553
554 20448 ox0 = ox0 * (1 << (BIT_DEPTH - 8));
555 20448 ox1 = ox1 * (1 << (BIT_DEPTH - 8));
556
2/2
✓ Branch 0 taken 236324 times.
✓ Branch 1 taken 10224 times.
493096 for (y = 0; y < height; y++) {
557
2/2
✓ Branch 0 taken 7942032 times.
✓ Branch 1 taken 236324 times.
16356712 for (x = 0; x < width; x++)
558 15884064 dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
559 ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
560 472648 src += srcstride;
561 472648 dst += dststride;
562 472648 src2 += MAX_PB_SIZE;
563 }
564 20448 }
565
566 71042 static void FUNC(put_hevc_qpel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride,
567 const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
568 int height, int denom, int wx0, int wx1,
569 int ox0, int ox1, intptr_t mx, intptr_t my, int width)
570 {
571 int x, y;
572 const int8_t *filter;
573 71042 const pixel *src = (const pixel*)_src;
574 71042 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
575 71042 pixel *dst = (pixel *)_dst;
576 71042 ptrdiff_t dststride = _dststride / sizeof(pixel);
577 int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
578 71042 int16_t *tmp = tmp_array;
579 71042 int shift = 14 + 1 - BIT_DEPTH;
580 71042 int log2Wd = denom + shift - 1;
581
582 71042 src -= QPEL_EXTRA_BEFORE * srcstride;
583 71042 filter = ff_hevc_qpel_filters[mx];
584
2/2
✓ Branch 0 taken 1131207 times.
✓ Branch 1 taken 35521 times.
2333456 for (y = 0; y < height + QPEL_EXTRA; y++) {
585
2/2
✓ Branch 0 taken 36956072 times.
✓ Branch 1 taken 1131207 times.
76174558 for (x = 0; x < width; x++)
586 73912144 tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
587 2262414 src += srcstride;
588 2262414 tmp += MAX_PB_SIZE;
589 }
590
591 71042 tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
592 71042 filter = ff_hevc_qpel_filters[my];
593
594 71042 ox0 = ox0 * (1 << (BIT_DEPTH - 8));
595 71042 ox1 = ox1 * (1 << (BIT_DEPTH - 8));
596
2/2
✓ Branch 0 taken 882560 times.
✓ Branch 1 taken 35521 times.
1836162 for (y = 0; y < height; y++) {
597
2/2
✓ Branch 0 taken 30984848 times.
✓ Branch 1 taken 882560 times.
63734816 for (x = 0; x < width; x++)
598 61969696 dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
599 ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
600 1765120 tmp += MAX_PB_SIZE;
601 1765120 dst += dststride;
602 1765120 src2 += MAX_PB_SIZE;
603 }
604 71042 }
605
606 ////////////////////////////////////////////////////////////////////////////////
607 //
608 ////////////////////////////////////////////////////////////////////////////////
609 #define EPEL_FILTER(src, stride) \
610 (filter[0] * src[x - stride] + \
611 filter[1] * src[x] + \
612 filter[2] * src[x + stride] + \
613 filter[3] * src[x + 2 * stride])
614
615 2154950 static void FUNC(put_hevc_epel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride,
616 const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
617 int height, intptr_t mx, intptr_t my, int width)
618 {
619 int x, y;
620 2154950 const pixel *src = (const pixel *)_src;
621 2154950 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
622 2154950 pixel *dst = (pixel *)_dst;
623 2154950 ptrdiff_t dststride = _dststride / sizeof(pixel);
624 2154950 const int8_t *filter = ff_hevc_epel_filters[mx];
625 2154950 int shift = 14 + 1 - BIT_DEPTH;
626 #if BIT_DEPTH < 14
627 2154950 int offset = 1 << (shift - 1);
628 #else
629 int offset = 0;
630 #endif
631
632
2/2
✓ Branch 0 taken 12739858 times.
✓ Branch 1 taken 1077475 times.
27634666 for (y = 0; y < height; y++) {
633
2/2
✓ Branch 0 taken 216603220 times.
✓ Branch 1 taken 12739858 times.
458686156 for (x = 0; x < width; x++) {
634 433206440 dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
635 }
636 25479716 dst += dststride;
637 25479716 src += srcstride;
638 25479716 src2 += MAX_PB_SIZE;
639 }
640 2154950 }
641
642 1497610 static void FUNC(put_hevc_epel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride,
643 const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
644 int height, intptr_t mx, intptr_t my, int width)
645 {
646 int x, y;
647 1497610 const pixel *src = (const pixel *)_src;
648 1497610 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
649 1497610 const int8_t *filter = ff_hevc_epel_filters[my];
650 1497610 pixel *dst = (pixel *)_dst;
651 1497610 ptrdiff_t dststride = _dststride / sizeof(pixel);
652 1497610 int shift = 14 + 1 - BIT_DEPTH;
653 #if BIT_DEPTH < 14
654 1497610 int offset = 1 << (shift - 1);
655 #else
656 int offset = 0;
657 #endif
658
659
2/2
✓ Branch 0 taken 9046810 times.
✓ Branch 1 taken 748805 times.
19591230 for (y = 0; y < height; y++) {
660
2/2
✓ Branch 0 taken 148837300 times.
✓ Branch 1 taken 9046810 times.
315768220 for (x = 0; x < width; x++)
661 297674600 dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
662 18093620 dst += dststride;
663 18093620 src += srcstride;
664 18093620 src2 += MAX_PB_SIZE;
665 }
666 1497610 }
667
668 8511098 static void FUNC(put_hevc_epel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride,
669 const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
670 int height, intptr_t mx, intptr_t my, int width)
671 {
672 int x, y;
673 8511098 const pixel *src = (const pixel *)_src;
674 8511098 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
675 8511098 pixel *dst = (pixel *)_dst;
676 8511098 ptrdiff_t dststride = _dststride / sizeof(pixel);
677 8511098 const int8_t *filter = ff_hevc_epel_filters[mx];
678 int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
679 8511098 int16_t *tmp = tmp_array;
680 8511098 int shift = 14 + 1 - BIT_DEPTH;
681 #if BIT_DEPTH < 14
682 8511098 int offset = 1 << (shift - 1);
683 #else
684 int offset = 0;
685 #endif
686
687 8511098 src -= EPEL_EXTRA_BEFORE * srcstride;
688
689
2/2
✓ Branch 0 taken 61618213 times.
✓ Branch 1 taken 4255549 times.
131747524 for (y = 0; y < height + EPEL_EXTRA; y++) {
690
2/2
✓ Branch 0 taken 917947938 times.
✓ Branch 1 taken 61618213 times.
1959132302 for (x = 0; x < width; x++)
691 1835895876 tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
692 123236426 src += srcstride;
693 123236426 tmp += MAX_PB_SIZE;
694 }
695
696 8511098 tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
697 8511098 filter = ff_hevc_epel_filters[my];
698
699
2/2
✓ Branch 0 taken 48851566 times.
✓ Branch 1 taken 4255549 times.
106214230 for (y = 0; y < height; y++) {
700
2/2
✓ Branch 0 taken 781027188 times.
✓ Branch 1 taken 48851566 times.
1659757508 for (x = 0; x < width; x++)
701 1562054376 dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
702 97703132 tmp += MAX_PB_SIZE;
703 97703132 dst += dststride;
704 97703132 src2 += MAX_PB_SIZE;
705 }
706 8511098 }
707
708 33080 static void FUNC(put_hevc_epel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride,
709 const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
710 int height, int denom, int wx0, int wx1,
711 int ox0, int ox1, intptr_t mx, intptr_t my, int width)
712 {
713 int x, y;
714 33080 const pixel *src = (const pixel *)_src;
715 33080 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
716 33080 pixel *dst = (pixel *)_dst;
717 33080 ptrdiff_t dststride = _dststride / sizeof(pixel);
718 33080 const int8_t *filter = ff_hevc_epel_filters[mx];
719 33080 int shift = 14 + 1 - BIT_DEPTH;
720 33080 int log2Wd = denom + shift - 1;
721
722 33080 ox0 = ox0 * (1 << (BIT_DEPTH - 8));
723 33080 ox1 = ox1 * (1 << (BIT_DEPTH - 8));
724
2/2
✓ Branch 0 taken 233792 times.
✓ Branch 1 taken 16540 times.
500664 for (y = 0; y < height; y++) {
725
2/2
✓ Branch 0 taken 5306152 times.
✓ Branch 1 taken 233792 times.
11079888 for (x = 0; x < width; x++)
726 10612304 dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
727 ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
728 467584 src += srcstride;
729 467584 dst += dststride;
730 467584 src2 += MAX_PB_SIZE;
731 }
732 33080 }
733
734 24552 static void FUNC(put_hevc_epel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride,
735 const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
736 int height, int denom, int wx0, int wx1,
737 int ox0, int ox1, intptr_t mx, intptr_t my, int width)
738 {
739 int x, y;
740 24552 const pixel *src = (const pixel *)_src;
741 24552 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
742 24552 const int8_t *filter = ff_hevc_epel_filters[my];
743 24552 pixel *dst = (pixel *)_dst;
744 24552 ptrdiff_t dststride = _dststride / sizeof(pixel);
745 24552 int shift = 14 + 1 - BIT_DEPTH;
746 24552 int log2Wd = denom + shift - 1;
747
748 24552 ox0 = ox0 * (1 << (BIT_DEPTH - 8));
749 24552 ox1 = ox1 * (1 << (BIT_DEPTH - 8));
750
2/2
✓ Branch 0 taken 167956 times.
✓ Branch 1 taken 12276 times.
360464 for (y = 0; y < height; y++) {
751
2/2
✓ Branch 0 taken 3922696 times.
✓ Branch 1 taken 167956 times.
8181304 for (x = 0; x < width; x++)
752 7845392 dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
753 ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
754 335912 src += srcstride;
755 335912 dst += dststride;
756 335912 src2 += MAX_PB_SIZE;
757 }
758 24552 }
759
760 166916 static void FUNC(put_hevc_epel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride,
761 const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
762 int height, int denom, int wx0, int wx1,
763 int ox0, int ox1, intptr_t mx, intptr_t my, int width)
764 {
765 int x, y;
766 166916 const pixel *src = (const pixel *)_src;
767 166916 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
768 166916 pixel *dst = (pixel *)_dst;
769 166916 ptrdiff_t dststride = _dststride / sizeof(pixel);
770 166916 const int8_t *filter = ff_hevc_epel_filters[mx];
771 int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
772 166916 int16_t *tmp = tmp_array;
773 166916 int shift = 14 + 1 - BIT_DEPTH;
774 166916 int log2Wd = denom + shift - 1;
775
776 166916 src -= EPEL_EXTRA_BEFORE * srcstride;
777
778
2/2
✓ Branch 0 taken 1288910 times.
✓ Branch 1 taken 83458 times.
2744736 for (y = 0; y < height + EPEL_EXTRA; y++) {
779
2/2
✓ Branch 0 taken 21782220 times.
✓ Branch 1 taken 1288910 times.
46142260 for (x = 0; x < width; x++)
780 43564440 tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
781 2577820 src += srcstride;
782 2577820 tmp += MAX_PB_SIZE;
783 }
784
785 166916 tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
786 166916 filter = ff_hevc_epel_filters[my];
787
788 166916 ox0 = ox0 * (1 << (BIT_DEPTH - 8));
789 166916 ox1 = ox1 * (1 << (BIT_DEPTH - 8));
790
2/2
✓ Branch 0 taken 1038536 times.
✓ Branch 1 taken 83458 times.
2243988 for (y = 0; y < height; y++) {
791
2/2
✓ Branch 0 taken 18780552 times.
✓ Branch 1 taken 1038536 times.
39638176 for (x = 0; x < width; x++)
792 37561104 dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
793 ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
794 2077072 tmp += MAX_PB_SIZE;
795 2077072 dst += dststride;
796 2077072 src2 += MAX_PB_SIZE;
797 }
798 166916 }
799
800 // line zero
801 #define P3 pix[-4 * xstride]
802 #define P2 pix[-3 * xstride]
803 #define P1 pix[-2 * xstride]
804 #define P0 pix[-1 * xstride]
805 #define Q0 pix[0 * xstride]
806 #define Q1 pix[1 * xstride]
807 #define Q2 pix[2 * xstride]
808 #define Q3 pix[3 * xstride]
809
810 // line three. used only for deblocking decision
811 #define TP3 pix[-4 * xstride + 3 * ystride]
812 #define TP2 pix[-3 * xstride + 3 * ystride]
813 #define TP1 pix[-2 * xstride + 3 * ystride]
814 #define TP0 pix[-1 * xstride + 3 * ystride]
815 #define TQ0 pix[0 * xstride + 3 * ystride]
816 #define TQ1 pix[1 * xstride + 3 * ystride]
817 #define TQ2 pix[2 * xstride + 3 * ystride]
818 #define TQ3 pix[3 * xstride + 3 * ystride]
819
820 #include "h26x/h2656_deblock_template.c"
821
822 75367524 static void FUNC(hevc_loop_filter_luma)(uint8_t *_pix,
823 ptrdiff_t _xstride, ptrdiff_t _ystride,
824 int beta, const int *_tc,
825 const uint8_t *_no_p, const uint8_t *_no_q)
826 {
827 75367524 ptrdiff_t xstride = _xstride / sizeof(pixel);
828 75367524 ptrdiff_t ystride = _ystride / sizeof(pixel);
829
830 75367524 beta <<= BIT_DEPTH - 8;
831
832
2/2
✓ Branch 0 taken 75367524 times.
✓ Branch 1 taken 37683762 times.
226102572 for (int j = 0; j < 2; j++) {
833 150735048 pixel* pix = (pixel*)_pix + j * 4 * ystride;
834 150735048 const int dp0 = abs(P2 - 2 * P1 + P0);
835 150735048 const int dq0 = abs(Q2 - 2 * Q1 + Q0);
836 150735048 const int dp3 = abs(TP2 - 2 * TP1 + TP0);
837 150735048 const int dq3 = abs(TQ2 - 2 * TQ1 + TQ0);
838 150735048 const int d0 = dp0 + dq0;
839 150735048 const int d3 = dp3 + dq3;
840 150735048 const int tc = _tc[j] << (BIT_DEPTH - 8);
841 150735048 const int no_p = _no_p[j];
842 150735048 const int no_q = _no_q[j];
843
844
2/2
✓ Branch 0 taken 56237031 times.
✓ Branch 1 taken 19130493 times.
150735048 if (d0 + d3 < beta) {
845 112474062 const int beta_3 = beta >> 3;
846 112474062 const int beta_2 = beta >> 2;
847 112474062 const int tc25 = ((tc * 5 + 1) >> 1);
848
849
4/4
✓ Branch 0 taken 16482518 times.
✓ Branch 1 taken 39754513 times.
✓ Branch 2 taken 16017580 times.
✓ Branch 3 taken 464938 times.
112474062 if (abs(P3 - P0) + abs(Q3 - Q0) < beta_3 && abs(P0 - Q0) < tc25 &&
850
4/4
✓ Branch 0 taken 12173560 times.
✓ Branch 1 taken 3844020 times.
✓ Branch 2 taken 12077029 times.
✓ Branch 3 taken 96531 times.
32035160 abs(TP3 - TP0) + abs(TQ3 - TQ0) < beta_3 && abs(TP0 - TQ0) < tc25 &&
851
4/4
✓ Branch 0 taken 11664326 times.
✓ Branch 1 taken 412703 times.
✓ Branch 2 taken 11410404 times.
✓ Branch 3 taken 253922 times.
46974866 (d0 << 1) < beta_2 && (d3 << 1) < beta_2) {
852 22820808 const int tc2 = tc << 1;
853 22820808 FUNC(loop_filter_luma_strong)(pix, xstride, ystride, tc2, tc2, tc2, no_p, no_q);
854 } else {
855 89653254 int nd_p = 1;
856 89653254 int nd_q = 1;
857
2/2
✓ Branch 0 taken 31747996 times.
✓ Branch 1 taken 13078631 times.
89653254 if (dp0 + dp3 < ((beta + (beta >> 1)) >> 3))
858 63495992 nd_p = 2;
859
2/2
✓ Branch 0 taken 30914480 times.
✓ Branch 1 taken 13912147 times.
89653254 if (dq0 + dq3 < ((beta + (beta >> 1)) >> 3))
860 61828960 nd_q = 2;
861 89653254 FUNC(loop_filter_luma_weak)(pix, xstride, ystride, tc, beta, no_p, no_q, nd_p, nd_q);
862 }
863 }
864 }
865 75367524 }
866
867 23893564 static void FUNC(hevc_loop_filter_chroma)(uint8_t *_pix, ptrdiff_t _xstride,
868 ptrdiff_t _ystride, const int *_tc,
869 const uint8_t *_no_p, const uint8_t *_no_q)
870 {
871 int no_p, no_q;
872 23893564 ptrdiff_t xstride = _xstride / sizeof(pixel);
873 23893564 ptrdiff_t ystride = _ystride / sizeof(pixel);
874 23893564 const int size = 4;
875
876
2/2
✓ Branch 0 taken 23893564 times.
✓ Branch 1 taken 11946782 times.
71680692 for (int j = 0; j < 2; j++) {
877 47787128 pixel *pix = (pixel *)_pix + j * size * ystride;
878 47787128 const int tc = _tc[j] << (BIT_DEPTH - 8);
879
2/2
✓ Branch 0 taken 21484855 times.
✓ Branch 1 taken 2408709 times.
47787128 if (tc > 0) {
880 42969710 no_p = _no_p[j];
881 42969710 no_q = _no_q[j];
882
883 42969710 FUNC(loop_filter_chroma_weak)(pix, xstride, ystride, size, tc, no_p, no_q);
884 }
885 }
886 23893564 }
887
888 11654090 static void FUNC(hevc_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
889 const int32_t *tc, const uint8_t *no_p,
890 const uint8_t *no_q)
891 {
892 11654090 FUNC(hevc_loop_filter_chroma)(pix, stride, sizeof(pixel), tc, no_p, no_q);
893 11654090 }
894
895 12239474 static void FUNC(hevc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
896 const int32_t *tc, const uint8_t *no_p,
897 const uint8_t *no_q)
898 {
899 12239474 FUNC(hevc_loop_filter_chroma)(pix, sizeof(pixel), stride, tc, no_p, no_q);
900 12239474 }
901
902 38025412 static void FUNC(hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
903 int beta, const int32_t *tc, const uint8_t *no_p,
904 const uint8_t *no_q)
905 {
906 38025412 FUNC(hevc_loop_filter_luma)(pix, stride, sizeof(pixel),
907 beta, tc, no_p, no_q);
908 38025412 }
909
910 37342112 static void FUNC(hevc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
911 int beta, const int32_t *tc, const uint8_t *no_p,
912 const uint8_t *no_q)
913 {
914 37342112 FUNC(hevc_loop_filter_luma)(pix, sizeof(pixel), stride,
915 beta, tc, no_p, no_q);
916 37342112 }
917
918 #undef P3
919 #undef P2
920 #undef P1
921 #undef P0
922 #undef Q0
923 #undef Q1
924 #undef Q2
925 #undef Q3
926
927 #undef TP3
928 #undef TP2
929 #undef TP1
930 #undef TP0
931 #undef TQ0
932 #undef TQ1
933 #undef TQ2
934 #undef TQ3
935