FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/hevcdsp_template.c
Date: 2024-04-18 20:30:25
Exec Total Coverage
Lines: 401 401 100.0%
Functions: 213 232 91.8%
Branches: 246 256 96.1%

Line Branch Exec Source
1 /*
2 * HEVC video decoder
3 *
4 * Copyright (C) 2012 - 2013 Guillaume Martres
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include "get_bits.h"
24 #include "hevcdec.h"
25
26 #include "bit_depth_template.c"
27 #include "hevcdsp.h"
28 #include "h26x/h2656_sao_template.c"
29 #include "h26x/h2656_inter_template.c"
30
31 74598 static void FUNC(put_pcm)(uint8_t *_dst, ptrdiff_t stride, int width, int height,
32 GetBitContext *gb, int pcm_bit_depth)
33 {
34 int x, y;
35 74598 pixel *dst = (pixel *)_dst;
36
37 74598 stride /= sizeof(pixel);
38
39
2/2
✓ Branch 0 taken 327424 times.
✓ Branch 1 taken 37299 times.
729446 for (y = 0; y < height; y++) {
40
2/2
✓ Branch 0 taken 4382080 times.
✓ Branch 1 taken 327424 times.
9419008 for (x = 0; x < width; x++)
41 8764160 dst[x] = get_bits(gb, pcm_bit_depth) << (BIT_DEPTH - pcm_bit_depth);
42 654848 dst += stride;
43 }
44 74598 }
45
46 27984680 static av_always_inline void FUNC(add_residual)(uint8_t *_dst, const int16_t *res,
47 ptrdiff_t stride, int size)
48 {
49 int x, y;
50 27984680 pixel *dst = (pixel *)_dst;
51
52 27984680 stride /= sizeof(pixel);
53
54
2/2
✓ Branch 0 taken 106577264 times.
✓ Branch 1 taken 13992340 times.
241139208 for (y = 0; y < size; y++) {
55
2/2
✓ Branch 0 taken 1378092640 times.
✓ Branch 1 taken 106577264 times.
2969339808 for (x = 0; x < size; x++) {
56 2756185280 dst[x] = av_clip_pixel(dst[x] + *res);
57 2756185280 res++;
58 }
59 213154528 dst += stride;
60 }
61 27984680 }
62
63 16229908 static void FUNC(add_residual4x4)(uint8_t *_dst, const int16_t *res,
64 ptrdiff_t stride)
65 {
66 16229908 FUNC(add_residual)(_dst, res, stride, 4);
67 16229908 }
68
69 7289974 static void FUNC(add_residual8x8)(uint8_t *_dst, const int16_t *res,
70 ptrdiff_t stride)
71 {
72 7289974 FUNC(add_residual)(_dst, res, stride, 8);
73 7289974 }
74
75 3309902 static void FUNC(add_residual16x16)(uint8_t *_dst, const int16_t *res,
76 ptrdiff_t stride)
77 {
78 3309902 FUNC(add_residual)(_dst, res, stride, 16);
79 3309902 }
80
81 1154896 static void FUNC(add_residual32x32)(uint8_t *_dst, const int16_t *res,
82 ptrdiff_t stride)
83 {
84 1154896 FUNC(add_residual)(_dst, res, stride, 32);
85 1154896 }
86
87 54126 static void FUNC(transform_rdpcm)(int16_t *_coeffs, int16_t log2_size, int mode)
88 {
89 54126 int16_t *coeffs = (int16_t *) _coeffs;
90 int x, y;
91 54126 int size = 1 << log2_size;
92
93
2/2
✓ Branch 0 taken 9989 times.
✓ Branch 1 taken 17074 times.
54126 if (mode) {
94 19978 coeffs += size;
95
2/2
✓ Branch 0 taken 56315 times.
✓ Branch 1 taken 9989 times.
132608 for (y = 0; y < size - 1; y++) {
96
2/2
✓ Branch 0 taken 523168 times.
✓ Branch 1 taken 56315 times.
1158966 for (x = 0; x < size; x++)
97 1046336 coeffs[x] += coeffs[x - size];
98 112630 coeffs += size;
99 }
100 } else {
101
2/2
✓ Branch 0 taken 124352 times.
✓ Branch 1 taken 17074 times.
282852 for (y = 0; y < size; y++) {
102
2/2
✓ Branch 0 taken 1049312 times.
✓ Branch 1 taken 124352 times.
2347328 for (x = 1; x < size; x++)
103 2098624 coeffs[x] += coeffs[x - 1];
104 248704 coeffs += size;
105 }
106 }
107 54126 }
108
109 864282 static void FUNC(dequant)(int16_t *coeffs, int16_t log2_size)
110 {
111 864282 int shift = 15 - BIT_DEPTH - log2_size;
112 int x, y;
113 864282 int size = 1 << log2_size;
114
115
2/2
✓ Branch 0 taken 422855 times.
✓ Branch 1 taken 9286 times.
864282 if (shift > 0) {
116 845710 int offset = 1 << (shift - 1);
117
2/2
✓ Branch 0 taken 1765132 times.
✓ Branch 1 taken 422855 times.
4375974 for (y = 0; y < size; y++) {
118
2/2
✓ Branch 0 taken 8083472 times.
✓ Branch 1 taken 1765132 times.
19697208 for (x = 0; x < size; x++) {
119 16166944 *coeffs = (*coeffs + offset) >> shift;
120 16166944 coeffs++;
121 }
122 }
123 } else {
124
2/2
✓ Branch 0 taken 74992 times.
✓ Branch 1 taken 9286 times.
168556 for (y = 0; y < size; y++) {
125
2/2
✓ Branch 0 taken 613504 times.
✓ Branch 1 taken 74992 times.
1376992 for (x = 0; x < size; x++) {
126 1227008 *coeffs = *(uint16_t*)coeffs << -shift;
127 1227008 coeffs++;
128 }
129 }
130 }
131 864282 }
132
133 #define SET(dst, x) (dst) = (x)
134 #define SCALE(dst, x) (dst) = av_clip_int16(((x) + add) >> shift)
135
136 #define TR_4x4_LUMA(dst, src, step, assign) \
137 do { \
138 int c0 = src[0 * step] + src[2 * step]; \
139 int c1 = src[2 * step] + src[3 * step]; \
140 int c2 = src[0 * step] - src[3 * step]; \
141 int c3 = 74 * src[1 * step]; \
142 \
143 assign(dst[2 * step], 74 * (src[0 * step] - \
144 src[2 * step] + \
145 src[3 * step])); \
146 assign(dst[0 * step], 29 * c0 + 55 * c1 + c3); \
147 assign(dst[1 * step], 55 * c2 - 29 * c1 + c3); \
148 assign(dst[3 * step], 55 * c0 + 29 * c2 - c3); \
149 } while (0)
150
151 7148174 static void FUNC(transform_4x4_luma)(int16_t *coeffs)
152 {
153 int i;
154 7148174 int shift = 7;
155 7148174 int add = 1 << (shift - 1);
156 7148174 int16_t *src = coeffs;
157
158
2/2
✓ Branch 0 taken 14296348 times.
✓ Branch 1 taken 3574087 times.
35740870 for (i = 0; i < 4; i++) {
159 28592696 TR_4x4_LUMA(src, src, 4, SCALE);
160 28592696 src++;
161 }
162
163 7148174 shift = 20 - BIT_DEPTH;
164 7148174 add = 1 << (shift - 1);
165
2/2
✓ Branch 0 taken 14296348 times.
✓ Branch 1 taken 3574087 times.
35740870 for (i = 0; i < 4; i++) {
166 28592696 TR_4x4_LUMA(coeffs, coeffs, 1, SCALE);
167 28592696 coeffs += 4;
168 }
169 7148174 }
170
171 #undef TR_4x4_LUMA
172
173 #define TR_4(dst, src, dstep, sstep, assign, end) \
174 do { \
175 const int e0 = 64 * src[0 * sstep] + 64 * src[2 * sstep]; \
176 const int e1 = 64 * src[0 * sstep] - 64 * src[2 * sstep]; \
177 const int o0 = 83 * src[1 * sstep] + 36 * src[3 * sstep]; \
178 const int o1 = 36 * src[1 * sstep] - 83 * src[3 * sstep]; \
179 \
180 assign(dst[0 * dstep], e0 + o0); \
181 assign(dst[1 * dstep], e1 + o1); \
182 assign(dst[2 * dstep], e1 - o1); \
183 assign(dst[3 * dstep], e0 - o0); \
184 } while (0)
185
186 #define TR_8(dst, src, dstep, sstep, assign, end) \
187 do { \
188 int i, j; \
189 int e_8[4]; \
190 int o_8[4] = { 0 }; \
191 for (i = 0; i < 4; i++) \
192 for (j = 1; j < end; j += 2) \
193 o_8[i] += transform[4 * j][i] * src[j * sstep]; \
194 TR_4(e_8, src, 1, 2 * sstep, SET, 4); \
195 \
196 for (i = 0; i < 4; i++) { \
197 assign(dst[i * dstep], e_8[i] + o_8[i]); \
198 assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]); \
199 } \
200 } while (0)
201
202 #define TR_16(dst, src, dstep, sstep, assign, end) \
203 do { \
204 int i, j; \
205 int e_16[8]; \
206 int o_16[8] = { 0 }; \
207 for (i = 0; i < 8; i++) \
208 for (j = 1; j < end; j += 2) \
209 o_16[i] += transform[2 * j][i] * src[j * sstep]; \
210 TR_8(e_16, src, 1, 2 * sstep, SET, 8); \
211 \
212 for (i = 0; i < 8; i++) { \
213 assign(dst[i * dstep], e_16[i] + o_16[i]); \
214 assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]); \
215 } \
216 } while (0)
217
218 #define TR_32(dst, src, dstep, sstep, assign, end) \
219 do { \
220 int i, j; \
221 int e_32[16]; \
222 int o_32[16] = { 0 }; \
223 for (i = 0; i < 16; i++) \
224 for (j = 1; j < end; j += 2) \
225 o_32[i] += transform[j][i] * src[j * sstep]; \
226 TR_16(e_32, src, 1, 2 * sstep, SET, end / 2); \
227 \
228 for (i = 0; i < 16; i++) { \
229 assign(dst[i * dstep], e_32[i] + o_32[i]); \
230 assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]); \
231 } \
232 } while (0)
233
234 #define IDCT_VAR4(H) \
235 int limit2 = FFMIN(col_limit + 4, H)
236 #define IDCT_VAR8(H) \
237 int limit = FFMIN(col_limit, H); \
238 int limit2 = FFMIN(col_limit + 4, H)
239 #define IDCT_VAR16(H) IDCT_VAR8(H)
240 #define IDCT_VAR32(H) IDCT_VAR8(H)
241
242 #define IDCT(H) \
243 static void FUNC(idct_ ## H ## x ## H )(int16_t *coeffs, \
244 int col_limit) \
245 { \
246 int i; \
247 int shift = 7; \
248 int add = 1 << (shift - 1); \
249 int16_t *src = coeffs; \
250 IDCT_VAR ## H(H); \
251 \
252 for (i = 0; i < H; i++) { \
253 TR_ ## H(src, src, H, H, SCALE, limit2); \
254 if (limit2 < H && i%4 == 0 && !!i) \
255 limit2 -= 4; \
256 src++; \
257 } \
258 \
259 shift = 20 - BIT_DEPTH; \
260 add = 1 << (shift - 1); \
261 for (i = 0; i < H; i++) { \
262 TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit); \
263 coeffs += H; \
264 } \
265 }
266
267 #define IDCT_DC(H) \
268 static void FUNC(idct_ ## H ## x ## H ## _dc)(int16_t *coeffs) \
269 { \
270 int i, j; \
271 int shift = 14 - BIT_DEPTH; \
272 int add = 1 << (shift - 1); \
273 int coeff = (((coeffs[0] + 1) >> 1) + add) >> shift; \
274 \
275 for (j = 0; j < H; j++) { \
276 for (i = 0; i < H; i++) { \
277 coeffs[i + j * H] = coeff; \
278 } \
279 } \
280 }
281
282
5/10
✗ Branch 0 not taken.
✓ Branch 1 taken 10522288 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 10522288 times.
✓ Branch 7 taken 2630572 times.
✓ Branch 8 taken 10522288 times.
✓ Branch 9 taken 2630572 times.
47350296 IDCT( 4)
283
17/22
✓ Branch 0 taken 363722752 times.
✓ Branch 1 taken 90930688 times.
✓ Branch 2 taken 90930688 times.
✓ Branch 3 taken 22732672 times.
✓ Branch 4 taken 90930688 times.
✓ Branch 5 taken 22732672 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 22732672 times.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 22732672 times.
✓ Branch 13 taken 2841584 times.
✓ Branch 14 taken 257345664 times.
✓ Branch 15 taken 90930688 times.
✓ Branch 16 taken 90930688 times.
✓ Branch 17 taken 22732672 times.
✓ Branch 18 taken 90930688 times.
✓ Branch 19 taken 22732672 times.
✓ Branch 20 taken 22732672 times.
✓ Branch 21 taken 2841584 times.
2066196192 IDCT( 8)
284
34/34
✓ Branch 0 taken 675988560 times.
✓ Branch 1 taken 177314432 times.
✓ Branch 2 taken 177314432 times.
✓ Branch 3 taken 22164304 times.
✓ Branch 4 taken 354628864 times.
✓ Branch 5 taken 88657216 times.
✓ Branch 6 taken 88657216 times.
✓ Branch 7 taken 22164304 times.
✓ Branch 8 taken 88657216 times.
✓ Branch 9 taken 22164304 times.
✓ Branch 10 taken 177314432 times.
✓ Branch 11 taken 22164304 times.
✓ Branch 12 taken 16383344 times.
✓ Branch 13 taken 5780960 times.
✓ Branch 14 taken 4095836 times.
✓ Branch 15 taken 12287508 times.
✓ Branch 16 taken 3071877 times.
✓ Branch 17 taken 1023959 times.
✓ Branch 18 taken 22164304 times.
✓ Branch 19 taken 1385269 times.
✓ Branch 20 taken 710262400 times.
✓ Branch 21 taken 177314432 times.
✓ Branch 22 taken 177314432 times.
✓ Branch 23 taken 22164304 times.
✓ Branch 24 taken 354628864 times.
✓ Branch 25 taken 88657216 times.
✓ Branch 26 taken 88657216 times.
✓ Branch 27 taken 22164304 times.
✓ Branch 28 taken 88657216 times.
✓ Branch 29 taken 22164304 times.
✓ Branch 30 taken 177314432 times.
✓ Branch 31 taken 22164304 times.
✓ Branch 32 taken 22164304 times.
✓ Branch 33 taken 1385269 times.
6410218314 IDCT(16)
285
46/46
✓ Branch 0 taken 1117379424 times.
✓ Branch 1 taken 254509056 times.
✓ Branch 2 taken 254509056 times.
✓ Branch 3 taken 15906816 times.
✓ Branch 4 taken 271677880 times.
✓ Branch 5 taken 127254528 times.
✓ Branch 6 taken 127254528 times.
✓ Branch 7 taken 15906816 times.
✓ Branch 8 taken 254509056 times.
✓ Branch 9 taken 63627264 times.
✓ Branch 10 taken 63627264 times.
✓ Branch 11 taken 15906816 times.
✓ Branch 12 taken 63627264 times.
✓ Branch 13 taken 15906816 times.
✓ Branch 14 taken 127254528 times.
✓ Branch 15 taken 15906816 times.
✓ Branch 16 taken 254509056 times.
✓ Branch 17 taken 15906816 times.
✓ Branch 18 taken 13727744 times.
✓ Branch 19 taken 2179072 times.
✓ Branch 20 taken 3431936 times.
✓ Branch 21 taken 10295808 times.
✓ Branch 22 taken 3002944 times.
✓ Branch 23 taken 428992 times.
✓ Branch 24 taken 15906816 times.
✓ Branch 25 taken 497088 times.
✓ Branch 26 taken 1640090112 times.
✓ Branch 27 taken 254509056 times.
✓ Branch 28 taken 254509056 times.
✓ Branch 29 taken 15906816 times.
✓ Branch 30 taken 399700224 times.
✓ Branch 31 taken 127254528 times.
✓ Branch 32 taken 127254528 times.
✓ Branch 33 taken 15906816 times.
✓ Branch 34 taken 254509056 times.
✓ Branch 35 taken 63627264 times.
✓ Branch 36 taken 63627264 times.
✓ Branch 37 taken 15906816 times.
✓ Branch 38 taken 63627264 times.
✓ Branch 39 taken 15906816 times.
✓ Branch 40 taken 127254528 times.
✓ Branch 41 taken 15906816 times.
✓ Branch 42 taken 254509056 times.
✓ Branch 43 taken 15906816 times.
✓ Branch 44 taken 15906816 times.
✓ Branch 45 taken 497088 times.
11503479728 IDCT(32)
286
287
4/4
✓ Branch 0 taken 17376944 times.
✓ Branch 1 taken 4344236 times.
✓ Branch 2 taken 4344236 times.
✓ Branch 3 taken 1086059 times.
45614478 IDCT_DC( 4)
288
4/4
✓ Branch 0 taken 46941312 times.
✓ Branch 1 taken 5867664 times.
✓ Branch 2 taken 5867664 times.
✓ Branch 3 taken 733458 times.
107084868 IDCT_DC( 8)
289
4/4
✓ Branch 0 taken 65204736 times.
✓ Branch 1 taken 4075296 times.
✓ Branch 2 taken 4075296 times.
✓ Branch 3 taken 254706 times.
139069476 IDCT_DC(16)
290
4/4
✓ Branch 0 taken 79559680 times.
✓ Branch 1 taken 2486240 times.
✓ Branch 2 taken 2486240 times.
✓ Branch 3 taken 77695 times.
164247230 IDCT_DC(32)
291
292 #undef TR_4
293 #undef TR_8
294 #undef TR_16
295 #undef TR_32
296
297 #undef SET
298 #undef SCALE
299
300 ////////////////////////////////////////////////////////////////////////////////
301 //
302 ////////////////////////////////////////////////////////////////////////////////
303 #define ff_hevc_pel_filters ff_hevc_qpel_filters
304 #define DECL_HV_FILTER(f) \
305 const uint8_t *hf = ff_hevc_ ## f ## _filters[mx]; \
306 const uint8_t *vf = ff_hevc_ ## f ## _filters[my];
307
308 #define FW_PUT(p, f, t) \
309 static void FUNC(put_hevc_## f)(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, int height, \
310 intptr_t mx, intptr_t my, int width) \
311 { \
312 DECL_HV_FILTER(p) \
313 FUNC(put_ ## t)(dst, src, srcstride, height, hf, vf, width); \
314 }
315
316 #define FW_PUT_UNI(p, f, t) \
317 static void FUNC(put_hevc_ ## f)(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \
318 ptrdiff_t srcstride, int height, intptr_t mx, intptr_t my, int width) \
319 { \
320 DECL_HV_FILTER(p) \
321 FUNC(put_ ## t)(dst, dststride, src, srcstride, height, hf, vf, width); \
322 }
323
324 #define FW_PUT_UNI_W(p, f, t) \
325 static void FUNC(put_hevc_ ## f)(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \
326 ptrdiff_t srcstride,int height, int denom, int wx, int ox, \
327 intptr_t mx, intptr_t my, int width) \
328 { \
329 DECL_HV_FILTER(p) \
330 FUNC(put_ ## t)(dst, dststride, src, srcstride, height, denom, wx, ox, hf, vf, width); \
331 }
332
333 #define FW_PUT_FUNCS(f, t, dir) \
334 FW_PUT(f, f ## _ ## dir, t ## _ ## dir) \
335 FW_PUT_UNI(f, f ## _uni_ ## dir, uni_ ## t ## _ ## dir) \
336 FW_PUT_UNI_W(f, f ## _uni_w_ ## dir, uni_## t ## _w_ ## dir)
337
338 4751490 FW_PUT(pel, pel_pixels, pixels)
339 4799914 FW_PUT_UNI(pel, pel_uni_pixels, uni_pixels)
340 145958 FW_PUT_UNI_W(pel, pel_uni_w_pixels, uni_w_pixels)
341
342 2974420 FW_PUT_FUNCS(qpel, luma, h )
343 2475264 FW_PUT_FUNCS(qpel, luma, v )
344 7773908 FW_PUT_FUNCS(qpel, luma, hv )
345 4767280 FW_PUT_FUNCS(epel, chroma, h )
346 3626976 FW_PUT_FUNCS(epel, chroma, v )
347 20203300 FW_PUT_FUNCS(epel, chroma, hv )
348
349 5198258 static void FUNC(put_hevc_pel_bi_pixels)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride,
350 const int16_t *src2,
351 int height, intptr_t mx, intptr_t my, int width)
352 {
353 int x, y;
354 5198258 const pixel *src = (const pixel *)_src;
355 5198258 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
356 5198258 pixel *dst = (pixel *)_dst;
357 5198258 ptrdiff_t dststride = _dststride / sizeof(pixel);
358
359 5198258 int shift = 14 + 1 - BIT_DEPTH;
360 #if BIT_DEPTH < 14
361 5198258 int offset = 1 << (shift - 1);
362 #else
363 int offset = 0;
364 #endif
365
366
2/2
✓ Branch 0 taken 38401358 times.
✓ Branch 1 taken 2599129 times.
82000974 for (y = 0; y < height; y++) {
367
2/2
✓ Branch 0 taken 967620084 times.
✓ Branch 1 taken 38401358 times.
2012042884 for (x = 0; x < width; x++)
368 1935240168 dst[x] = av_clip_pixel(((src[x] << (14 - BIT_DEPTH)) + src2[x] + offset) >> shift);
369 76802716 src += srcstride;
370 76802716 dst += dststride;
371 76802716 src2 += MAX_PB_SIZE;
372 }
373 5198258 }
374
375 64154 static void FUNC(put_hevc_pel_bi_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride,
376 const int16_t *src2,
377 int height, int denom, int wx0, int wx1,
378 int ox0, int ox1, intptr_t mx, intptr_t my, int width)
379 {
380 int x, y;
381 64154 const pixel *src = (const pixel *)_src;
382 64154 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
383 64154 pixel *dst = (pixel *)_dst;
384 64154 ptrdiff_t dststride = _dststride / sizeof(pixel);
385
386 64154 int shift = 14 + 1 - BIT_DEPTH;
387 64154 int log2Wd = denom + shift - 1;
388
389 64154 ox0 = ox0 * (1 << (BIT_DEPTH - 8));
390 64154 ox1 = ox1 * (1 << (BIT_DEPTH - 8));
391
2/2
✓ Branch 0 taken 671000 times.
✓ Branch 1 taken 32077 times.
1406154 for (y = 0; y < height; y++) {
392
2/2
✓ Branch 0 taken 21890728 times.
✓ Branch 1 taken 671000 times.
45123456 for (x = 0; x < width; x++) {
393 43781456 dst[x] = av_clip_pixel(( (src[x] << (14 - BIT_DEPTH)) * wx1 + src2[x] * wx0 + (ox0 + ox1 + 1) * (1 << log2Wd)) >> (log2Wd + 1));
394 }
395 1342000 src += srcstride;
396 1342000 dst += dststride;
397 1342000 src2 += MAX_PB_SIZE;
398 }
399 64154 }
400
401 ////////////////////////////////////////////////////////////////////////////////
402 //
403 ////////////////////////////////////////////////////////////////////////////////
404 #define QPEL_FILTER(src, stride) \
405 (filter[0] * src[x - 3 * stride] + \
406 filter[1] * src[x - 2 * stride] + \
407 filter[2] * src[x - stride] + \
408 filter[3] * src[x ] + \
409 filter[4] * src[x + stride] + \
410 filter[5] * src[x + 2 * stride] + \
411 filter[6] * src[x + 3 * stride] + \
412 filter[7] * src[x + 4 * stride])
413
414 1287348 static void FUNC(put_hevc_qpel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride,
415 const int16_t *src2,
416 int height, intptr_t mx, intptr_t my, int width)
417 {
418 int x, y;
419 1287348 const pixel *src = (const pixel*)_src;
420 1287348 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
421 1287348 pixel *dst = (pixel *)_dst;
422 1287348 ptrdiff_t dststride = _dststride / sizeof(pixel);
423
424 1287348 const int8_t *filter = ff_hevc_qpel_filters[mx];
425
426 1287348 int shift = 14 + 1 - BIT_DEPTH;
427 #if BIT_DEPTH < 14
428 1287348 int offset = 1 << (shift - 1);
429 #else
430 int offset = 0;
431 #endif
432
433
2/2
✓ Branch 0 taken 13936548 times.
✓ Branch 1 taken 643674 times.
29160444 for (y = 0; y < height; y++) {
434
2/2
✓ Branch 0 taken 457729096 times.
✓ Branch 1 taken 13936548 times.
943331288 for (x = 0; x < width; x++)
435 915458192 dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
436 27873096 src += srcstride;
437 27873096 dst += dststride;
438 27873096 src2 += MAX_PB_SIZE;
439 }
440 1287348 }
441
442 1024872 static void FUNC(put_hevc_qpel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride,
443 const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
444 int height, intptr_t mx, intptr_t my, int width)
445 {
446 int x, y;
447 1024872 const pixel *src = (const pixel*)_src;
448 1024872 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
449 1024872 pixel *dst = (pixel *)_dst;
450 1024872 ptrdiff_t dststride = _dststride / sizeof(pixel);
451
452 1024872 const int8_t *filter = ff_hevc_qpel_filters[my];
453
454 1024872 int shift = 14 + 1 - BIT_DEPTH;
455 #if BIT_DEPTH < 14
456 1024872 int offset = 1 << (shift - 1);
457 #else
458 int offset = 0;
459 #endif
460
461
2/2
✓ Branch 0 taken 10864452 times.
✓ Branch 1 taken 512436 times.
22753776 for (y = 0; y < height; y++) {
462
2/2
✓ Branch 0 taken 348786952 times.
✓ Branch 1 taken 10864452 times.
719302808 for (x = 0; x < width; x++)
463 697573904 dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
464 21728904 src += srcstride;
465 21728904 dst += dststride;
466 21728904 src2 += MAX_PB_SIZE;
467 }
468 1024872 }
469
470 3195682 static void FUNC(put_hevc_qpel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride,
471 const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
472 int height, intptr_t mx, intptr_t my, int width)
473 {
474 int x, y;
475 const int8_t *filter;
476 3195682 const pixel *src = (const pixel*)_src;
477 3195682 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
478 3195682 pixel *dst = (pixel *)_dst;
479 3195682 ptrdiff_t dststride = _dststride / sizeof(pixel);
480 int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
481 3195682 int16_t *tmp = tmp_array;
482 3195682 int shift = 14 + 1 - BIT_DEPTH;
483 #if BIT_DEPTH < 14
484 3195682 int offset = 1 << (shift - 1);
485 #else
486 int offset = 0;
487 #endif
488
489 3195682 src -= QPEL_EXTRA_BEFORE * srcstride;
490 3195682 filter = ff_hevc_qpel_filters[mx];
491
2/2
✓ Branch 0 taken 44950087 times.
✓ Branch 1 taken 1597841 times.
93095856 for (y = 0; y < height + QPEL_EXTRA; y++) {
492
2/2
✓ Branch 0 taken 1304439384 times.
✓ Branch 1 taken 44950087 times.
2698778942 for (x = 0; x < width; x++)
493 2608878768 tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
494 89900174 src += srcstride;
495 89900174 tmp += MAX_PB_SIZE;
496 }
497
498 3195682 tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
499 3195682 filter = ff_hevc_qpel_filters[my];
500
501
2/2
✓ Branch 0 taken 33765200 times.
✓ Branch 1 taken 1597841 times.
70726082 for (y = 0; y < height; y++) {
502
2/2
✓ Branch 0 taken 1070725512 times.
✓ Branch 1 taken 33765200 times.
2208981424 for (x = 0; x < width; x++)
503 2141451024 dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
504 67530400 tmp += MAX_PB_SIZE;
505 67530400 dst += dststride;
506 67530400 src2 += MAX_PB_SIZE;
507 }
508 3195682 }
509
510 22810 static void FUNC(put_hevc_qpel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride,
511 const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
512 int height, int denom, int wx0, int wx1,
513 int ox0, int ox1, intptr_t mx, intptr_t my, int width)
514 {
515 int x, y;
516 22810 const pixel *src = (const pixel*)_src;
517 22810 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
518 22810 pixel *dst = (pixel *)_dst;
519 22810 ptrdiff_t dststride = _dststride / sizeof(pixel);
520
521 22810 const int8_t *filter = ff_hevc_qpel_filters[mx];
522
523 22810 int shift = 14 + 1 - BIT_DEPTH;
524 22810 int log2Wd = denom + shift - 1;
525
526 22810 ox0 = ox0 * (1 << (BIT_DEPTH - 8));
527 22810 ox1 = ox1 * (1 << (BIT_DEPTH - 8));
528
2/2
✓ Branch 0 taken 278600 times.
✓ Branch 1 taken 11405 times.
580010 for (y = 0; y < height; y++) {
529
2/2
✓ Branch 0 taken 9869200 times.
✓ Branch 1 taken 278600 times.
20295600 for (x = 0; x < width; x++)
530 19738400 dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
531 ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
532 557200 src += srcstride;
533 557200 dst += dststride;
534 557200 src2 += MAX_PB_SIZE;
535 }
536 22810 }
537
538 20448 static void FUNC(put_hevc_qpel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride,
539 const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
540 int height, int denom, int wx0, int wx1,
541 int ox0, int ox1, intptr_t mx, intptr_t my, int width)
542 {
543 int x, y;
544 20448 const pixel *src = (const pixel*)_src;
545 20448 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
546 20448 pixel *dst = (pixel *)_dst;
547 20448 ptrdiff_t dststride = _dststride / sizeof(pixel);
548
549 20448 const int8_t *filter = ff_hevc_qpel_filters[my];
550
551 20448 int shift = 14 + 1 - BIT_DEPTH;
552 20448 int log2Wd = denom + shift - 1;
553
554 20448 ox0 = ox0 * (1 << (BIT_DEPTH - 8));
555 20448 ox1 = ox1 * (1 << (BIT_DEPTH - 8));
556
2/2
✓ Branch 0 taken 236324 times.
✓ Branch 1 taken 10224 times.
493096 for (y = 0; y < height; y++) {
557
2/2
✓ Branch 0 taken 7942032 times.
✓ Branch 1 taken 236324 times.
16356712 for (x = 0; x < width; x++)
558 15884064 dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
559 ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
560 472648 src += srcstride;
561 472648 dst += dststride;
562 472648 src2 += MAX_PB_SIZE;
563 }
564 20448 }
565
566 71042 static void FUNC(put_hevc_qpel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride,
567 const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
568 int height, int denom, int wx0, int wx1,
569 int ox0, int ox1, intptr_t mx, intptr_t my, int width)
570 {
571 int x, y;
572 const int8_t *filter;
573 71042 const pixel *src = (const pixel*)_src;
574 71042 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
575 71042 pixel *dst = (pixel *)_dst;
576 71042 ptrdiff_t dststride = _dststride / sizeof(pixel);
577 int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
578 71042 int16_t *tmp = tmp_array;
579 71042 int shift = 14 + 1 - BIT_DEPTH;
580 71042 int log2Wd = denom + shift - 1;
581
582 71042 src -= QPEL_EXTRA_BEFORE * srcstride;
583 71042 filter = ff_hevc_qpel_filters[mx];
584
2/2
✓ Branch 0 taken 1131207 times.
✓ Branch 1 taken 35521 times.
2333456 for (y = 0; y < height + QPEL_EXTRA; y++) {
585
2/2
✓ Branch 0 taken 36956072 times.
✓ Branch 1 taken 1131207 times.
76174558 for (x = 0; x < width; x++)
586 73912144 tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
587 2262414 src += srcstride;
588 2262414 tmp += MAX_PB_SIZE;
589 }
590
591 71042 tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
592 71042 filter = ff_hevc_qpel_filters[my];
593
594 71042 ox0 = ox0 * (1 << (BIT_DEPTH - 8));
595 71042 ox1 = ox1 * (1 << (BIT_DEPTH - 8));
596
2/2
✓ Branch 0 taken 882560 times.
✓ Branch 1 taken 35521 times.
1836162 for (y = 0; y < height; y++) {
597
2/2
✓ Branch 0 taken 30984848 times.
✓ Branch 1 taken 882560 times.
63734816 for (x = 0; x < width; x++)
598 61969696 dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
599 ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
600 1765120 tmp += MAX_PB_SIZE;
601 1765120 dst += dststride;
602 1765120 src2 += MAX_PB_SIZE;
603 }
604 71042 }
605
606 ////////////////////////////////////////////////////////////////////////////////
607 //
608 ////////////////////////////////////////////////////////////////////////////////
609 #define EPEL_FILTER(src, stride) \
610 (filter[0] * src[x - stride] + \
611 filter[1] * src[x] + \
612 filter[2] * src[x + stride] + \
613 filter[3] * src[x + 2 * stride])
614
615 2020670 static void FUNC(put_hevc_epel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride,
616 const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
617 int height, intptr_t mx, intptr_t my, int width)
618 {
619 int x, y;
620 2020670 const pixel *src = (const pixel *)_src;
621 2020670 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
622 2020670 pixel *dst = (pixel *)_dst;
623 2020670 ptrdiff_t dststride = _dststride / sizeof(pixel);
624 2020670 const int8_t *filter = ff_hevc_epel_filters[mx];
625 2020670 int shift = 14 + 1 - BIT_DEPTH;
626 #if BIT_DEPTH < 14
627 2020670 int offset = 1 << (shift - 1);
628 #else
629 int offset = 0;
630 #endif
631
632
2/2
✓ Branch 0 taken 11523918 times.
✓ Branch 1 taken 1010335 times.
25068506 for (y = 0; y < height; y++) {
633
2/2
✓ Branch 0 taken 186679348 times.
✓ Branch 1 taken 11523918 times.
396406532 for (x = 0; x < width; x++) {
634 373358696 dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
635 }
636 23047836 dst += dststride;
637 23047836 src += srcstride;
638 23047836 src2 += MAX_PB_SIZE;
639 }
640 2020670 }
641
642 1448070 static void FUNC(put_hevc_epel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride,
643 const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
644 int height, intptr_t mx, intptr_t my, int width)
645 {
646 int x, y;
647 1448070 const pixel *src = (const pixel *)_src;
648 1448070 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
649 1448070 const int8_t *filter = ff_hevc_epel_filters[my];
650 1448070 pixel *dst = (pixel *)_dst;
651 1448070 ptrdiff_t dststride = _dststride / sizeof(pixel);
652 1448070 int shift = 14 + 1 - BIT_DEPTH;
653 #if BIT_DEPTH < 14
654 1448070 int offset = 1 << (shift - 1);
655 #else
656 int offset = 0;
657 #endif
658
659
2/2
✓ Branch 0 taken 8683602 times.
✓ Branch 1 taken 724035 times.
18815274 for (y = 0; y < height; y++) {
660
2/2
✓ Branch 0 taken 141364564 times.
✓ Branch 1 taken 8683602 times.
300096332 for (x = 0; x < width; x++)
661 282729128 dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
662 17367204 dst += dststride;
663 17367204 src += srcstride;
664 17367204 src2 += MAX_PB_SIZE;
665 }
666 1448070 }
667
668 8135842 static void FUNC(put_hevc_epel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride,
669 const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
670 int height, intptr_t mx, intptr_t my, int width)
671 {
672 int x, y;
673 8135842 const pixel *src = (const pixel *)_src;
674 8135842 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
675 8135842 pixel *dst = (pixel *)_dst;
676 8135842 ptrdiff_t dststride = _dststride / sizeof(pixel);
677 8135842 const int8_t *filter = ff_hevc_epel_filters[mx];
678 int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
679 8135842 int16_t *tmp = tmp_array;
680 8135842 int shift = 14 + 1 - BIT_DEPTH;
681 #if BIT_DEPTH < 14
682 8135842 int offset = 1 << (shift - 1);
683 #else
684 int offset = 0;
685 #endif
686
687 8135842 src -= EPEL_EXTRA_BEFORE * srcstride;
688
689
2/2
✓ Branch 0 taken 58184829 times.
✓ Branch 1 taken 4067921 times.
124505500 for (y = 0; y < height + EPEL_EXTRA; y++) {
690
2/2
✓ Branch 0 taken 848106018 times.
✓ Branch 1 taken 58184829 times.
1812581694 for (x = 0; x < width; x++)
691 1696212036 tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
692 116369658 src += srcstride;
693 116369658 tmp += MAX_PB_SIZE;
694 }
695
696 8135842 tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
697 8135842 filter = ff_hevc_epel_filters[my];
698
699
2/2
✓ Branch 0 taken 45981066 times.
✓ Branch 1 taken 4067921 times.
100097974 for (y = 0; y < height; y++) {
700
2/2
✓ Branch 0 taken 719577684 times.
✓ Branch 1 taken 45981066 times.
1531117500 for (x = 0; x < width; x++)
701 1439155368 dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
702 91962132 tmp += MAX_PB_SIZE;
703 91962132 dst += dststride;
704 91962132 src2 += MAX_PB_SIZE;
705 }
706 8135842 }
707
708 33080 static void FUNC(put_hevc_epel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride,
709 const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
710 int height, int denom, int wx0, int wx1,
711 int ox0, int ox1, intptr_t mx, intptr_t my, int width)
712 {
713 int x, y;
714 33080 const pixel *src = (const pixel *)_src;
715 33080 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
716 33080 pixel *dst = (pixel *)_dst;
717 33080 ptrdiff_t dststride = _dststride / sizeof(pixel);
718 33080 const int8_t *filter = ff_hevc_epel_filters[mx];
719 33080 int shift = 14 + 1 - BIT_DEPTH;
720 33080 int log2Wd = denom + shift - 1;
721
722 33080 ox0 = ox0 * (1 << (BIT_DEPTH - 8));
723 33080 ox1 = ox1 * (1 << (BIT_DEPTH - 8));
724
2/2
✓ Branch 0 taken 233792 times.
✓ Branch 1 taken 16540 times.
500664 for (y = 0; y < height; y++) {
725
2/2
✓ Branch 0 taken 5306152 times.
✓ Branch 1 taken 233792 times.
11079888 for (x = 0; x < width; x++)
726 10612304 dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
727 ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
728 467584 src += srcstride;
729 467584 dst += dststride;
730 467584 src2 += MAX_PB_SIZE;
731 }
732 33080 }
733
734 24552 static void FUNC(put_hevc_epel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride,
735 const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
736 int height, int denom, int wx0, int wx1,
737 int ox0, int ox1, intptr_t mx, intptr_t my, int width)
738 {
739 int x, y;
740 24552 const pixel *src = (const pixel *)_src;
741 24552 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
742 24552 const int8_t *filter = ff_hevc_epel_filters[my];
743 24552 pixel *dst = (pixel *)_dst;
744 24552 ptrdiff_t dststride = _dststride / sizeof(pixel);
745 24552 int shift = 14 + 1 - BIT_DEPTH;
746 24552 int log2Wd = denom + shift - 1;
747
748 24552 ox0 = ox0 * (1 << (BIT_DEPTH - 8));
749 24552 ox1 = ox1 * (1 << (BIT_DEPTH - 8));
750
2/2
✓ Branch 0 taken 167956 times.
✓ Branch 1 taken 12276 times.
360464 for (y = 0; y < height; y++) {
751
2/2
✓ Branch 0 taken 3922696 times.
✓ Branch 1 taken 167956 times.
8181304 for (x = 0; x < width; x++)
752 7845392 dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
753 ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
754 335912 src += srcstride;
755 335912 dst += dststride;
756 335912 src2 += MAX_PB_SIZE;
757 }
758 24552 }
759
760 166916 static void FUNC(put_hevc_epel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride,
761 const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
762 int height, int denom, int wx0, int wx1,
763 int ox0, int ox1, intptr_t mx, intptr_t my, int width)
764 {
765 int x, y;
766 166916 const pixel *src = (const pixel *)_src;
767 166916 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
768 166916 pixel *dst = (pixel *)_dst;
769 166916 ptrdiff_t dststride = _dststride / sizeof(pixel);
770 166916 const int8_t *filter = ff_hevc_epel_filters[mx];
771 int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
772 166916 int16_t *tmp = tmp_array;
773 166916 int shift = 14 + 1 - BIT_DEPTH;
774 166916 int log2Wd = denom + shift - 1;
775
776 166916 src -= EPEL_EXTRA_BEFORE * srcstride;
777
778
2/2
✓ Branch 0 taken 1288910 times.
✓ Branch 1 taken 83458 times.
2744736 for (y = 0; y < height + EPEL_EXTRA; y++) {
779
2/2
✓ Branch 0 taken 21782220 times.
✓ Branch 1 taken 1288910 times.
46142260 for (x = 0; x < width; x++)
780 43564440 tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
781 2577820 src += srcstride;
782 2577820 tmp += MAX_PB_SIZE;
783 }
784
785 166916 tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
786 166916 filter = ff_hevc_epel_filters[my];
787
788 166916 ox0 = ox0 * (1 << (BIT_DEPTH - 8));
789 166916 ox1 = ox1 * (1 << (BIT_DEPTH - 8));
790
2/2
✓ Branch 0 taken 1038536 times.
✓ Branch 1 taken 83458 times.
2243988 for (y = 0; y < height; y++) {
791
2/2
✓ Branch 0 taken 18780552 times.
✓ Branch 1 taken 1038536 times.
39638176 for (x = 0; x < width; x++)
792 37561104 dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
793 ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
794 2077072 tmp += MAX_PB_SIZE;
795 2077072 dst += dststride;
796 2077072 src2 += MAX_PB_SIZE;
797 }
798 166916 }
799
800 // line zero
801 #define P3 pix[-4 * xstride]
802 #define P2 pix[-3 * xstride]
803 #define P1 pix[-2 * xstride]
804 #define P0 pix[-1 * xstride]
805 #define Q0 pix[0 * xstride]
806 #define Q1 pix[1 * xstride]
807 #define Q2 pix[2 * xstride]
808 #define Q3 pix[3 * xstride]
809
810 // line three. used only for deblocking decision
811 #define TP3 pix[-4 * xstride + 3 * ystride]
812 #define TP2 pix[-3 * xstride + 3 * ystride]
813 #define TP1 pix[-2 * xstride + 3 * ystride]
814 #define TP0 pix[-1 * xstride + 3 * ystride]
815 #define TQ0 pix[0 * xstride + 3 * ystride]
816 #define TQ1 pix[1 * xstride + 3 * ystride]
817 #define TQ2 pix[2 * xstride + 3 * ystride]
818 #define TQ3 pix[3 * xstride + 3 * ystride]
819
820 #include "h26x/h2656_deblock_template.c"
821
822 69369456 static void FUNC(hevc_loop_filter_luma)(uint8_t *_pix,
823 ptrdiff_t _xstride, ptrdiff_t _ystride,
824 int beta, const int *_tc,
825 const uint8_t *_no_p, const uint8_t *_no_q)
826 {
827 69369456 ptrdiff_t xstride = _xstride / sizeof(pixel);
828 69369456 ptrdiff_t ystride = _ystride / sizeof(pixel);
829
830 69369456 beta <<= BIT_DEPTH - 8;
831
832
2/2
✓ Branch 0 taken 69369456 times.
✓ Branch 1 taken 34684728 times.
208108368 for (int j = 0; j < 2; j++) {
833 138738912 pixel* pix = (pixel*)_pix + j * 4 * ystride;
834 138738912 const int dp0 = abs(P2 - 2 * P1 + P0);
835 138738912 const int dq0 = abs(Q2 - 2 * Q1 + Q0);
836 138738912 const int dp3 = abs(TP2 - 2 * TP1 + TP0);
837 138738912 const int dq3 = abs(TQ2 - 2 * TQ1 + TQ0);
838 138738912 const int d0 = dp0 + dq0;
839 138738912 const int d3 = dp3 + dq3;
840 138738912 const int tc = _tc[j] << (BIT_DEPTH - 8);
841 138738912 const int no_p = _no_p[j];
842 138738912 const int no_q = _no_q[j];
843
844
2/2
✓ Branch 0 taken 51382763 times.
✓ Branch 1 taken 17986693 times.
138738912 if (d0 + d3 < beta) {
845 102765526 const int beta_3 = beta >> 3;
846 102765526 const int beta_2 = beta >> 2;
847 102765526 const int tc25 = ((tc * 5 + 1) >> 1);
848
849
4/4
✓ Branch 0 taken 14508596 times.
✓ Branch 1 taken 36874167 times.
✓ Branch 2 taken 14059633 times.
✓ Branch 3 taken 448963 times.
102765526 if (abs(P3 - P0) + abs(Q3 - Q0) < beta_3 && abs(P0 - Q0) < tc25 &&
850
4/4
✓ Branch 0 taken 10523165 times.
✓ Branch 1 taken 3536468 times.
✓ Branch 2 taken 10431265 times.
✓ Branch 3 taken 91900 times.
28119266 abs(TP3 - TP0) + abs(TQ3 - TQ0) < beta_3 && abs(TP0 - TQ0) < tc25 &&
851
4/4
✓ Branch 0 taken 10073806 times.
✓ Branch 1 taken 357459 times.
✓ Branch 2 taken 9859243 times.
✓ Branch 3 taken 214563 times.
40581016 (d0 << 1) < beta_2 && (d3 << 1) < beta_2) {
852 19718486 const int tc2 = tc << 1;
853 19718486 FUNC(loop_filter_luma_strong)(pix, xstride, ystride, tc2, tc2, tc2, no_p, no_q);
854 } else {
855 83047040 int nd_p = 1;
856 83047040 int nd_q = 1;
857
2/2
✓ Branch 0 taken 29186469 times.
✓ Branch 1 taken 12337051 times.
83047040 if (dp0 + dp3 < ((beta + (beta >> 1)) >> 3))
858 58372938 nd_p = 2;
859
2/2
✓ Branch 0 taken 28413322 times.
✓ Branch 1 taken 13110198 times.
83047040 if (dq0 + dq3 < ((beta + (beta >> 1)) >> 3))
860 56826644 nd_q = 2;
861 83047040 FUNC(loop_filter_luma_weak)(pix, xstride, ystride, tc, beta, no_p, no_q, nd_p, nd_q);
862 }
863 }
864 }
865 69369456 }
866
867 21773032 static void FUNC(hevc_loop_filter_chroma)(uint8_t *_pix, ptrdiff_t _xstride,
868 ptrdiff_t _ystride, const int *_tc,
869 const uint8_t *_no_p, const uint8_t *_no_q)
870 {
871 int no_p, no_q;
872 21773032 ptrdiff_t xstride = _xstride / sizeof(pixel);
873 21773032 ptrdiff_t ystride = _ystride / sizeof(pixel);
874 21773032 const int size = 4;
875
876
2/2
✓ Branch 0 taken 21773032 times.
✓ Branch 1 taken 10886516 times.
65319096 for (int j = 0; j < 2; j++) {
877 43546064 pixel *pix = (pixel *)_pix + j * size * ystride;
878 43546064 const int tc = _tc[j] << (BIT_DEPTH - 8);
879
2/2
✓ Branch 0 taken 19498373 times.
✓ Branch 1 taken 2274659 times.
43546064 if (tc > 0) {
880 38996746 no_p = _no_p[j];
881 38996746 no_q = _no_q[j];
882
883 38996746 FUNC(loop_filter_chroma_weak)(pix, xstride, ystride, size, tc, no_p, no_q);
884 }
885 }
886 21773032 }
887
888 10601606 static void FUNC(hevc_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
889 const int32_t *tc, const uint8_t *no_p,
890 const uint8_t *no_q)
891 {
892 10601606 FUNC(hevc_loop_filter_chroma)(pix, stride, sizeof(pixel), tc, no_p, no_q);
893 10601606 }
894
895 11171426 static void FUNC(hevc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
896 const int32_t *tc, const uint8_t *no_p,
897 const uint8_t *no_q)
898 {
899 11171426 FUNC(hevc_loop_filter_chroma)(pix, sizeof(pixel), stride, tc, no_p, no_q);
900 11171426 }
901
902 35019436 static void FUNC(hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
903 int beta, const int32_t *tc, const uint8_t *no_p,
904 const uint8_t *no_q)
905 {
906 35019436 FUNC(hevc_loop_filter_luma)(pix, stride, sizeof(pixel),
907 beta, tc, no_p, no_q);
908 35019436 }
909
910 34350020 static void FUNC(hevc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
911 int beta, const int32_t *tc, const uint8_t *no_p,
912 const uint8_t *no_q)
913 {
914 34350020 FUNC(hevc_loop_filter_luma)(pix, sizeof(pixel), stride,
915 beta, tc, no_p, no_q);
916 34350020 }
917
918 #undef P3
919 #undef P2
920 #undef P1
921 #undef P0
922 #undef Q0
923 #undef Q1
924 #undef Q2
925 #undef Q3
926
927 #undef TP3
928 #undef TP2
929 #undef TP1
930 #undef TP0
931 #undef TQ0
932 #undef TQ1
933 #undef TQ2
934 #undef TQ3
935