FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/hevc/dsp_template.c
Date: 2026-04-24 19:58:39
Exec Total Coverage
Lines: 401 401 100.0%
Functions: 213 232 91.8%
Branches: 248 258 96.1%

Line Branch Exec Source
1 /*
2 * HEVC video decoder
3 *
4 * Copyright (C) 2012 - 2013 Guillaume Martres
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include "get_bits.h"
24 #include "hevcdec.h"
25
26 #include "bit_depth_template.c"
27 #include "dsp.h"
28 #include "h26x/h2656_sao_template.c"
29 #include "h26x/h2656_inter_template.c"
30
31 74598 static void FUNC(put_pcm)(uint8_t *_dst, ptrdiff_t stride, int width, int height,
32 GetBitContext *gb, int pcm_bit_depth)
33 {
34 int x, y;
35 74598 pixel *dst = (pixel *)_dst;
36
37 74598 stride /= sizeof(pixel);
38
39
2/2
✓ Branch 0 taken 327424 times.
✓ Branch 1 taken 37299 times.
729446 for (y = 0; y < height; y++) {
40
2/2
✓ Branch 0 taken 4382080 times.
✓ Branch 1 taken 327424 times.
9419008 for (x = 0; x < width; x++)
41 8764160 dst[x] = get_bits(gb, pcm_bit_depth) << (BIT_DEPTH - pcm_bit_depth);
42 654848 dst += stride;
43 }
44 74598 }
45
46 32505218 static av_always_inline void FUNC(add_residual)(uint8_t *restrict dst8, const int16_t *restrict res,
47 ptrdiff_t stride, int size)
48 {
49 int x, y;
50
51
2/2
✓ Branch 0 taken 132390696 times.
✓ Branch 1 taken 16252609 times.
297286610 for (y = 0; y < size; y++) {
52 264781392 pixel *restrict dst = (pixel *)dst8;
53
2/2
✓ Branch 0 taken 1841127232 times.
✓ Branch 1 taken 132390696 times.
3947035856 for (x = 0; x < size; x++) {
54 3682254464 dst[x] = av_clip_pixel(dst[x] + *res);
55 3682254464 res++;
56 }
57 264781392 dst8 += stride;
58 }
59 32505218 }
60
61 17708288 static void FUNC(add_residual4x4)(uint8_t *_dst, const int16_t *res,
62 ptrdiff_t stride)
63 {
64 17708288 FUNC(add_residual)(_dst, res, stride, 4);
65 17708288 }
66
67 8674138 static void FUNC(add_residual8x8)(uint8_t *_dst, const int16_t *res,
68 ptrdiff_t stride)
69 {
70 8674138 FUNC(add_residual)(_dst, res, stride, 8);
71 8674138 }
72
73 4460888 static void FUNC(add_residual16x16)(uint8_t *_dst, const int16_t *res,
74 ptrdiff_t stride)
75 {
76 4460888 FUNC(add_residual)(_dst, res, stride, 16);
77 4460888 }
78
79 1661904 static void FUNC(add_residual32x32)(uint8_t *_dst, const int16_t *res,
80 ptrdiff_t stride)
81 {
82 1661904 FUNC(add_residual)(_dst, res, stride, 32);
83 1661904 }
84
85 54126 static void FUNC(transform_rdpcm)(int16_t *_coeffs, int16_t log2_size, int mode)
86 {
87 54126 int16_t *coeffs = (int16_t *) _coeffs;
88 int x, y;
89 54126 int size = 1 << log2_size;
90
91
2/2
✓ Branch 0 taken 9989 times.
✓ Branch 1 taken 17074 times.
54126 if (mode) {
92 19978 coeffs += size;
93
2/2
✓ Branch 0 taken 56315 times.
✓ Branch 1 taken 9989 times.
132608 for (y = 0; y < size - 1; y++) {
94
2/2
✓ Branch 0 taken 523168 times.
✓ Branch 1 taken 56315 times.
1158966 for (x = 0; x < size; x++)
95 1046336 coeffs[x] += coeffs[x - size];
96 112630 coeffs += size;
97 }
98 } else {
99
2/2
✓ Branch 0 taken 124352 times.
✓ Branch 1 taken 17074 times.
282852 for (y = 0; y < size; y++) {
100
2/2
✓ Branch 0 taken 1049312 times.
✓ Branch 1 taken 124352 times.
2347328 for (x = 1; x < size; x++)
101 2098624 coeffs[x] += coeffs[x - 1];
102 248704 coeffs += size;
103 }
104 }
105 54126 }
106
107 /**
108 * HEVC transform dequantization (ITU-T H.265 8.6.3)
109 *
110 * @param coeffs transform coefficient buffer (in-place)
111 * @param log2_size log2 of transform block size, range: 2..5 (4x4 to 32x32)
112 * This value comes from recursive split_transform_flag parsing
113 * in the bitstream, bounded by log2_min_tb_size (min 2) and
114 * log2_max_trafo_size (max 5) from SPS.
115 *
116 * Formula: shift = 15 - BIT_DEPTH - log2_size
117 *
118 * bit_depth | 4x4 (2) | 8x8 (3) | 16x16 (4) | 32x32 (5)
119 * ----------+---------+---------+-----------+----------
120 * 8-bit | 5 | 4 | 3 | 2 (shift right)
121 * 10-bit | 3 | 2 | 1 | 0 (shift right / no-op)
122 * 12-bit | 1 | 0 | -1 | -2 (shift right / no-op / shift left)
123 *
124 * When shift == 0, output equals input (identity transform), so we skip
125 * the loop entirely for better performance.
126 */
127 888660 static void FUNC(dequant)(int16_t *coeffs, int16_t log2_size)
128 {
129 888660 int shift = 15 - BIT_DEPTH - log2_size;
130 int x, y;
131 888660 int size = 1 << log2_size;
132
133
2/2
✓ Branch 0 taken 145123 times.
✓ Branch 1 taken 9294 times.
308834 if (BIT_DEPTH <= 9 || shift > 0) {
134 870072 int offset = 1 << (shift - 1);
135
2/2
✓ Branch 0 taken 1814020 times.
✓ Branch 1 taken 435036 times.
4498112 for (y = 0; y < size; y++) {
136
2/2
✓ Branch 0 taken 8282832 times.
✓ Branch 1 taken 1814020 times.
20193704 for (x = 0; x < size; x++) {
137 16565664 *coeffs = (*coeffs + offset) >> shift;
138 16565664 coeffs++;
139 }
140 }
141
2/2
✓ Branch 0 taken 80 times.
✓ Branch 1 taken 9212 times.
18584 } else if (BIT_DEPTH > 10 && shift < 0) {
142
2/2
✓ Branch 0 taken 1408 times.
✓ Branch 1 taken 80 times.
2976 for (y = 0; y < size; y++) {
143
2/2
✓ Branch 0 taken 26624 times.
✓ Branch 1 taken 1408 times.
56064 for (x = 0; x < size; x++) {
144 53248 *coeffs = *(uint16_t*)coeffs << -shift;
145 53248 coeffs++;
146 }
147 }
148 }
149 /* shift == 0: no operation needed (identity transform) */
150 888660 }
151
152 #define SET(dst, x) (dst) = (x)
153 #define SCALE(dst, x) (dst) = av_clip_int16(((x) + add) >> shift)
154
155 #define TR_4x4_LUMA(dst, src, step, assign) \
156 do { \
157 int c0 = src[0 * step] + src[2 * step]; \
158 int c1 = src[2 * step] + src[3 * step]; \
159 int c2 = src[0 * step] - src[3 * step]; \
160 int c3 = 74 * src[1 * step]; \
161 \
162 assign(dst[2 * step], 74 * (src[0 * step] - \
163 src[2 * step] + \
164 src[3 * step])); \
165 assign(dst[0 * step], 29 * c0 + 55 * c1 + c3); \
166 assign(dst[1 * step], 55 * c2 - 29 * c1 + c3); \
167 assign(dst[3 * step], 55 * c0 + 29 * c2 - c3); \
168 } while (0)
169
170 8023490 static void FUNC(transform_4x4_luma)(int16_t *coeffs)
171 {
172 int i;
173 8023490 int shift = 7;
174 8023490 int add = 1 << (shift - 1);
175 8023490 int16_t *src = coeffs;
176
177
2/2
✓ Branch 0 taken 16046980 times.
✓ Branch 1 taken 4011745 times.
40117450 for (i = 0; i < 4; i++) {
178 32093960 TR_4x4_LUMA(src, src, 4, SCALE);
179 32093960 src++;
180 }
181
182 8023490 shift = 20 - BIT_DEPTH;
183 8023490 add = 1 << (shift - 1);
184
2/2
✓ Branch 0 taken 16046980 times.
✓ Branch 1 taken 4011745 times.
40117450 for (i = 0; i < 4; i++) {
185 32093960 TR_4x4_LUMA(coeffs, coeffs, 1, SCALE);
186 32093960 coeffs += 4;
187 }
188 8023490 }
189
190 #undef TR_4x4_LUMA
191
192 #define TR_4(dst, src, dstep, sstep, assign, end) \
193 do { \
194 const int e0 = 64 * src[0 * sstep] + 64 * src[2 * sstep]; \
195 const int e1 = 64 * src[0 * sstep] - 64 * src[2 * sstep]; \
196 const int o0 = 83 * src[1 * sstep] + 36 * src[3 * sstep]; \
197 const int o1 = 36 * src[1 * sstep] - 83 * src[3 * sstep]; \
198 \
199 assign(dst[0 * dstep], e0 + o0); \
200 assign(dst[1 * dstep], e1 + o1); \
201 assign(dst[2 * dstep], e1 - o1); \
202 assign(dst[3 * dstep], e0 - o0); \
203 } while (0)
204
205 #define TR_8(dst, src, dstep, sstep, assign, end) \
206 do { \
207 int i, j; \
208 int e_8[4]; \
209 int o_8[4] = { 0 }; \
210 for (i = 0; i < 4; i++) \
211 for (j = 1; j < end; j += 2) \
212 o_8[i] += transform[4 * j][i] * src[j * sstep]; \
213 TR_4(e_8, src, 1, 2 * sstep, SET, 4); \
214 \
215 for (i = 0; i < 4; i++) { \
216 assign(dst[i * dstep], e_8[i] + o_8[i]); \
217 assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]); \
218 } \
219 } while (0)
220
221 #define TR_16(dst, src, dstep, sstep, assign, end) \
222 do { \
223 int i, j; \
224 int e_16[8]; \
225 int o_16[8] = { 0 }; \
226 for (i = 0; i < 8; i++) \
227 for (j = 1; j < end; j += 2) \
228 o_16[i] += transform[2 * j][i] * src[j * sstep]; \
229 TR_8(e_16, src, 1, 2 * sstep, SET, 8); \
230 \
231 for (i = 0; i < 8; i++) { \
232 assign(dst[i * dstep], e_16[i] + o_16[i]); \
233 assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]); \
234 } \
235 } while (0)
236
237 #define TR_32(dst, src, dstep, sstep, assign, end) \
238 do { \
239 int i, j; \
240 int e_32[16]; \
241 int o_32[16] = { 0 }; \
242 for (i = 0; i < 16; i++) \
243 for (j = 1; j < end; j += 2) \
244 o_32[i] += transform[j][i] * src[j * sstep]; \
245 TR_16(e_32, src, 1, 2 * sstep, SET, end / 2); \
246 \
247 for (i = 0; i < 16; i++) { \
248 assign(dst[i * dstep], e_32[i] + o_32[i]); \
249 assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]); \
250 } \
251 } while (0)
252
253 #define IDCT_VAR4(H) \
254 int limit2 = FFMIN(col_limit + 4, H)
255 #define IDCT_VAR8(H) \
256 int limit = FFMIN(col_limit, H); \
257 int limit2 = FFMIN(col_limit + 4, H)
258 #define IDCT_VAR16(H) IDCT_VAR8(H)
259 #define IDCT_VAR32(H) IDCT_VAR8(H)
260
261 #define IDCT(H) \
262 static void FUNC(idct_ ## H ## x ## H )(int16_t *coeffs, \
263 int col_limit) \
264 { \
265 int i; \
266 int shift = 7; \
267 int add = 1 << (shift - 1); \
268 int16_t *src = coeffs; \
269 IDCT_VAR ## H(H); \
270 \
271 for (i = 0; i < H; i++) { \
272 TR_ ## H(src, src, H, H, SCALE, limit2); \
273 if (limit2 < H && i%4 == 0 && !!i) \
274 limit2 -= 4; \
275 src++; \
276 } \
277 \
278 shift = 20 - BIT_DEPTH; \
279 add = 1 << (shift - 1); \
280 for (i = 0; i < H; i++) { \
281 TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit); \
282 coeffs += H; \
283 } \
284 }
285
286 #define IDCT_DC(H) \
287 static void FUNC(idct_ ## H ## x ## H ## _dc)(int16_t *coeffs) \
288 { \
289 int i, j; \
290 int shift = 14 - BIT_DEPTH; \
291 int add = 1 << (shift - 1); \
292 int coeff = (((coeffs[0] + 1) >> 1) + add) >> shift; \
293 \
294 for (j = 0; j < H; j++) { \
295 for (i = 0; i < H; i++) { \
296 coeffs[i + j * H] = coeff; \
297 } \
298 } \
299 }
300
301
5/10
✗ Branch 0 not taken.
✓ Branch 1 taken 11060940 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 11060940 times.
✓ Branch 7 taken 2765235 times.
✓ Branch 8 taken 11060940 times.
✓ Branch 9 taken 2765235 times.
49774230 IDCT( 4)
302
17/22
✓ Branch 0 taken 423574784 times.
✓ Branch 1 taken 105893696 times.
✓ Branch 2 taken 105893696 times.
✓ Branch 3 taken 26473424 times.
✓ Branch 4 taken 105893696 times.
✓ Branch 5 taken 26473424 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 26473424 times.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 26473424 times.
✓ Branch 13 taken 3309178 times.
✓ Branch 14 taken 297800192 times.
✓ Branch 15 taken 105893696 times.
✓ Branch 16 taken 105893696 times.
✓ Branch 17 taken 26473424 times.
✓ Branch 18 taken 105893696 times.
✓ Branch 19 taken 26473424 times.
✓ Branch 20 taken 26473424 times.
✓ Branch 21 taken 3309178 times.
2402411572 IDCT( 8)
303
34/34
✓ Branch 0 taken 856155104 times.
✓ Branch 1 taken 226427520 times.
✓ Branch 2 taken 226427520 times.
✓ Branch 3 taken 28303440 times.
✓ Branch 4 taken 452855040 times.
✓ Branch 5 taken 113213760 times.
✓ Branch 6 taken 113213760 times.
✓ Branch 7 taken 28303440 times.
✓ Branch 8 taken 113213760 times.
✓ Branch 9 taken 28303440 times.
✓ Branch 10 taken 226427520 times.
✓ Branch 11 taken 28303440 times.
✓ Branch 12 taken 21047264 times.
✓ Branch 13 taken 7256176 times.
✓ Branch 14 taken 5261816 times.
✓ Branch 15 taken 15785448 times.
✓ Branch 16 taken 3946362 times.
✓ Branch 17 taken 1315454 times.
✓ Branch 18 taken 28303440 times.
✓ Branch 19 taken 1768965 times.
✓ Branch 20 taken 899885568 times.
✓ Branch 21 taken 226427520 times.
✓ Branch 22 taken 226427520 times.
✓ Branch 23 taken 28303440 times.
✓ Branch 24 taken 452855040 times.
✓ Branch 25 taken 113213760 times.
✓ Branch 26 taken 113213760 times.
✓ Branch 27 taken 28303440 times.
✓ Branch 28 taken 113213760 times.
✓ Branch 29 taken 28303440 times.
✓ Branch 30 taken 226427520 times.
✓ Branch 31 taken 28303440 times.
✓ Branch 32 taken 28303440 times.
✓ Branch 33 taken 1768965 times.
8157383434 IDCT(16)
304
46/46
✓ Branch 0 taken 1339889984 times.
✓ Branch 1 taken 350994432 times.
✓ Branch 2 taken 350994432 times.
✓ Branch 3 taken 21937152 times.
✓ Branch 4 taken 325889408 times.
✓ Branch 5 taken 175497216 times.
✓ Branch 6 taken 175497216 times.
✓ Branch 7 taken 21937152 times.
✓ Branch 8 taken 350994432 times.
✓ Branch 9 taken 87748608 times.
✓ Branch 10 taken 87748608 times.
✓ Branch 11 taken 21937152 times.
✓ Branch 12 taken 87748608 times.
✓ Branch 13 taken 21937152 times.
✓ Branch 14 taken 175497216 times.
✓ Branch 15 taken 21937152 times.
✓ Branch 16 taken 350994432 times.
✓ Branch 17 taken 21937152 times.
✓ Branch 18 taken 19500576 times.
✓ Branch 19 taken 2436576 times.
✓ Branch 20 taken 4875144 times.
✓ Branch 21 taken 14625432 times.
✓ Branch 22 taken 4265751 times.
✓ Branch 23 taken 609393 times.
✓ Branch 24 taken 21937152 times.
✓ Branch 25 taken 685536 times.
✓ Branch 26 taken 2028439552 times.
✓ Branch 27 taken 350994432 times.
✓ Branch 28 taken 350994432 times.
✓ Branch 29 taken 21937152 times.
✓ Branch 30 taken 494815232 times.
✓ Branch 31 taken 175497216 times.
✓ Branch 32 taken 175497216 times.
✓ Branch 33 taken 21937152 times.
✓ Branch 34 taken 350994432 times.
✓ Branch 35 taken 87748608 times.
✓ Branch 36 taken 87748608 times.
✓ Branch 37 taken 21937152 times.
✓ Branch 38 taken 87748608 times.
✓ Branch 39 taken 21937152 times.
✓ Branch 40 taken 175497216 times.
✓ Branch 41 taken 21937152 times.
✓ Branch 42 taken 350994432 times.
✓ Branch 43 taken 21937152 times.
✓ Branch 44 taken 21937152 times.
✓ Branch 45 taken 685536 times.
14785087808 IDCT(32)
305
306
4/4
✓ Branch 0 taken 19465600 times.
✓ Branch 1 taken 4866400 times.
✓ Branch 2 taken 4866400 times.
✓ Branch 3 taken 1216600 times.
51097200 IDCT_DC( 4)
307
4/4
✓ Branch 0 taken 60966656 times.
✓ Branch 1 taken 7620832 times.
✓ Branch 2 taken 7620832 times.
✓ Branch 3 taken 952604 times.
139080184 IDCT_DC( 8)
308
4/4
✓ Branch 0 taken 114058752 times.
✓ Branch 1 taken 7128672 times.
✓ Branch 2 taken 7128672 times.
✓ Branch 3 taken 445542 times.
243265932 IDCT_DC(16)
309
4/4
✓ Branch 0 taken 146119680 times.
✓ Branch 1 taken 4566240 times.
✓ Branch 2 taken 4566240 times.
✓ Branch 3 taken 142695 times.
301657230 IDCT_DC(32)
310
311 #undef TR_4
312 #undef TR_8
313 #undef TR_16
314 #undef TR_32
315
316 #undef SET
317 #undef SCALE
318
319 ////////////////////////////////////////////////////////////////////////////////
320 //
321 ////////////////////////////////////////////////////////////////////////////////
322 #define ff_hevc_pel_filters ff_hevc_qpel_filters
323 #define DECL_HV_FILTER(f) \
324 const int8_t *hf = ff_hevc_ ## f ## _filters[mx]; \
325 const int8_t *vf = ff_hevc_ ## f ## _filters[my];
326
327 #define FW_PUT(p, f, t) \
328 static void FUNC(put_hevc_## f)(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, int height, \
329 intptr_t mx, intptr_t my, int width) \
330 { \
331 DECL_HV_FILTER(p) \
332 FUNC(put_ ## t)(dst, src, srcstride, height, hf, vf, width); \
333 }
334
335 #define FW_PUT_UNI(p, f, t) \
336 static void FUNC(put_hevc_ ## f)(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \
337 ptrdiff_t srcstride, int height, intptr_t mx, intptr_t my, int width) \
338 { \
339 DECL_HV_FILTER(p) \
340 FUNC(put_ ## t)(dst, dststride, src, srcstride, height, hf, vf, width); \
341 }
342
343 #define FW_PUT_UNI_W(p, f, t) \
344 static void FUNC(put_hevc_ ## f)(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \
345 ptrdiff_t srcstride,int height, int denom, int wx, int ox, \
346 intptr_t mx, intptr_t my, int width) \
347 { \
348 DECL_HV_FILTER(p) \
349 FUNC(put_ ## t)(dst, dststride, src, srcstride, height, denom, wx, ox, hf, vf, width); \
350 }
351
352 #define FW_PUT_FUNCS(f, t, dir) \
353 FW_PUT(f, f ## _ ## dir, t ## _ ## dir) \
354 FW_PUT_UNI(f, f ## _uni_ ## dir, uni_ ## t ## _ ## dir) \
355 FW_PUT_UNI_W(f, f ## _uni_w_ ## dir, uni_## t ## _w_ ## dir)
356
357 4997298 FW_PUT(pel, pel_pixels, pixels)
358 6980298 FW_PUT_UNI(pel, pel_uni_pixels, uni_pixels)
359 146434 FW_PUT_UNI_W(pel, pel_uni_w_pixels, uni_w_pixels)
360
361 3444674 FW_PUT_FUNCS(qpel, luma, h )
362 2977700 FW_PUT_FUNCS(qpel, luma, v )
363 9269802 FW_PUT_FUNCS(qpel, luma, hv )
364 5499376 FW_PUT_FUNCS(epel, chroma, h )
365 4387812 FW_PUT_FUNCS(epel, chroma, v )
366 24023776 FW_PUT_FUNCS(epel, chroma, hv )
367
368 5462242 static void FUNC(put_hevc_pel_bi_pixels)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride,
369 const int16_t *src2,
370 int height, intptr_t mx, intptr_t my, int width)
371 {
372 int x, y;
373 5462242 const pixel *src = (const pixel *)_src;
374 5462242 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
375 5462242 pixel *dst = (pixel *)_dst;
376 5462242 ptrdiff_t dststride = _dststride / sizeof(pixel);
377
378 5462242 int shift = 14 + 1 - BIT_DEPTH;
379 #if BIT_DEPTH < 14
380 5462242 int offset = 1 << (shift - 1);
381 #else
382 int offset = 0;
383 #endif
384
385
2/2
✓ Branch 0 taken 41305794 times.
✓ Branch 1 taken 2731121 times.
88073830 for (y = 0; y < height; y++) {
386
2/2
✓ Branch 0 taken 1067725940 times.
✓ Branch 1 taken 41305794 times.
2218063468 for (x = 0; x < width; x++)
387 2135451880 dst[x] = av_clip_pixel(((src[x] << (14 - BIT_DEPTH)) + src2[x] + offset) >> shift);
388 82611588 src += srcstride;
389 82611588 dst += dststride;
390 82611588 src2 += MAX_PB_SIZE;
391 }
392 5462242 }
393
394 64154 static void FUNC(put_hevc_pel_bi_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride,
395 const int16_t *src2,
396 int height, int denom, int wx0, int wx1,
397 int ox0, int ox1, intptr_t mx, intptr_t my, int width)
398 {
399 int x, y;
400 64154 const pixel *src = (const pixel *)_src;
401 64154 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
402 64154 pixel *dst = (pixel *)_dst;
403 64154 ptrdiff_t dststride = _dststride / sizeof(pixel);
404
405 64154 int shift = 14 + 1 - BIT_DEPTH;
406 64154 int log2Wd = denom + shift - 1;
407
408 64154 ox0 = ox0 * (1 << (BIT_DEPTH - 8));
409 64154 ox1 = ox1 * (1 << (BIT_DEPTH - 8));
410
2/2
✓ Branch 0 taken 671000 times.
✓ Branch 1 taken 32077 times.
1406154 for (y = 0; y < height; y++) {
411
2/2
✓ Branch 0 taken 21890728 times.
✓ Branch 1 taken 671000 times.
45123456 for (x = 0; x < width; x++) {
412 43781456 dst[x] = av_clip_pixel(( (src[x] << (14 - BIT_DEPTH)) * wx1 + src2[x] * wx0 + (ox0 + ox1 + 1) * (1 << log2Wd)) >> (log2Wd + 1));
413 }
414 1342000 src += srcstride;
415 1342000 dst += dststride;
416 1342000 src2 += MAX_PB_SIZE;
417 }
418 64154 }
419
420 ////////////////////////////////////////////////////////////////////////////////
421 //
422 ////////////////////////////////////////////////////////////////////////////////
423 #define QPEL_FILTER(src, stride) \
424 (filter[0] * src[x - 3 * stride] + \
425 filter[1] * src[x - 2 * stride] + \
426 filter[2] * src[x - stride] + \
427 filter[3] * src[x ] + \
428 filter[4] * src[x + stride] + \
429 filter[5] * src[x + 2 * stride] + \
430 filter[6] * src[x + 3 * stride] + \
431 filter[7] * src[x + 4 * stride])
432
433 1362336 static void FUNC(put_hevc_qpel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride,
434 const int16_t *src2,
435 int height, intptr_t mx, intptr_t my, int width)
436 {
437 int x, y;
438 1362336 const pixel *src = (const pixel*)_src;
439 1362336 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
440 1362336 pixel *dst = (pixel *)_dst;
441 1362336 ptrdiff_t dststride = _dststride / sizeof(pixel);
442
443 1362336 const int8_t *filter = ff_hevc_qpel_filters[mx];
444
445 1362336 int shift = 14 + 1 - BIT_DEPTH;
446 #if BIT_DEPTH < 14
447 1362336 int offset = 1 << (shift - 1);
448 #else
449 int offset = 0;
450 #endif
451
452
2/2
✓ Branch 0 taken 15144496 times.
✓ Branch 1 taken 681168 times.
31651328 for (y = 0; y < height; y++) {
453
2/2
✓ Branch 0 taken 513210376 times.
✓ Branch 1 taken 15144496 times.
1056709744 for (x = 0; x < width; x++)
454 1026420752 dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
455 30288992 src += srcstride;
456 30288992 dst += dststride;
457 30288992 src2 += MAX_PB_SIZE;
458 }
459 1362336 }
460
461 1073322 static void FUNC(put_hevc_qpel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride,
462 const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
463 int height, intptr_t mx, intptr_t my, int width)
464 {
465 int x, y;
466 1073322 const pixel *src = (const pixel*)_src;
467 1073322 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
468 1073322 pixel *dst = (pixel *)_dst;
469 1073322 ptrdiff_t dststride = _dststride / sizeof(pixel);
470
471 1073322 const int8_t *filter = ff_hevc_qpel_filters[my];
472
473 1073322 int shift = 14 + 1 - BIT_DEPTH;
474 #if BIT_DEPTH < 14
475 1073322 int offset = 1 << (shift - 1);
476 #else
477 int offset = 0;
478 #endif
479
480
2/2
✓ Branch 0 taken 11564808 times.
✓ Branch 1 taken 536661 times.
24202938 for (y = 0; y < height; y++) {
481
2/2
✓ Branch 0 taken 377783176 times.
✓ Branch 1 taken 11564808 times.
778695968 for (x = 0; x < width; x++)
482 755566352 dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
483 23129616 src += srcstride;
484 23129616 dst += dststride;
485 23129616 src2 += MAX_PB_SIZE;
486 }
487 1073322 }
488
489 3364606 static void FUNC(put_hevc_qpel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride,
490 const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
491 int height, intptr_t mx, intptr_t my, int width)
492 {
493 int x, y;
494 const int8_t *filter;
495 3364606 const pixel *src = (const pixel*)_src;
496 3364606 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
497 3364606 pixel *dst = (pixel *)_dst;
498 3364606 ptrdiff_t dststride = _dststride / sizeof(pixel);
499 int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
500 3364606 int16_t *tmp = tmp_array;
501 3364606 int shift = 14 + 1 - BIT_DEPTH;
502 #if BIT_DEPTH < 14
503 3364606 int offset = 1 << (shift - 1);
504 #else
505 int offset = 0;
506 #endif
507
508 3364606 src -= QPEL_EXTRA_BEFORE * srcstride;
509 3364606 filter = ff_hevc_qpel_filters[mx];
510
2/2
✓ Branch 0 taken 48024893 times.
✓ Branch 1 taken 1682303 times.
99414392 for (y = 0; y < height + QPEL_EXTRA; y++) {
511
2/2
✓ Branch 0 taken 1424943792 times.
✓ Branch 1 taken 48024893 times.
2945937370 for (x = 0; x < width; x++)
512 2849887584 tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
513 96049786 src += srcstride;
514 96049786 tmp += MAX_PB_SIZE;
515 }
516
517 3364606 tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
518 3364606 filter = ff_hevc_qpel_filters[my];
519
520
2/2
✓ Branch 0 taken 36248772 times.
✓ Branch 1 taken 1682303 times.
75862150 for (y = 0; y < height; y++) {
521
2/2
✓ Branch 0 taken 1174254024 times.
✓ Branch 1 taken 36248772 times.
2421005592 for (x = 0; x < width; x++)
522 2348508048 dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
523 72497544 tmp += MAX_PB_SIZE;
524 72497544 dst += dststride;
525 72497544 src2 += MAX_PB_SIZE;
526 }
527 3364606 }
528
529 22810 static void FUNC(put_hevc_qpel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride,
530 const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
531 int height, int denom, int wx0, int wx1,
532 int ox0, int ox1, intptr_t mx, intptr_t my, int width)
533 {
534 int x, y;
535 22810 const pixel *src = (const pixel*)_src;
536 22810 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
537 22810 pixel *dst = (pixel *)_dst;
538 22810 ptrdiff_t dststride = _dststride / sizeof(pixel);
539
540 22810 const int8_t *filter = ff_hevc_qpel_filters[mx];
541
542 22810 int shift = 14 + 1 - BIT_DEPTH;
543 22810 int log2Wd = denom + shift - 1;
544
545 22810 ox0 = ox0 * (1 << (BIT_DEPTH - 8));
546 22810 ox1 = ox1 * (1 << (BIT_DEPTH - 8));
547
2/2
✓ Branch 0 taken 278600 times.
✓ Branch 1 taken 11405 times.
580010 for (y = 0; y < height; y++) {
548
2/2
✓ Branch 0 taken 9869200 times.
✓ Branch 1 taken 278600 times.
20295600 for (x = 0; x < width; x++)
549 19738400 dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
550 ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
551 557200 src += srcstride;
552 557200 dst += dststride;
553 557200 src2 += MAX_PB_SIZE;
554 }
555 22810 }
556
557 20448 static void FUNC(put_hevc_qpel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride,
558 const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
559 int height, int denom, int wx0, int wx1,
560 int ox0, int ox1, intptr_t mx, intptr_t my, int width)
561 {
562 int x, y;
563 20448 const pixel *src = (const pixel*)_src;
564 20448 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
565 20448 pixel *dst = (pixel *)_dst;
566 20448 ptrdiff_t dststride = _dststride / sizeof(pixel);
567
568 20448 const int8_t *filter = ff_hevc_qpel_filters[my];
569
570 20448 int shift = 14 + 1 - BIT_DEPTH;
571 20448 int log2Wd = denom + shift - 1;
572
573 20448 ox0 = ox0 * (1 << (BIT_DEPTH - 8));
574 20448 ox1 = ox1 * (1 << (BIT_DEPTH - 8));
575
2/2
✓ Branch 0 taken 236324 times.
✓ Branch 1 taken 10224 times.
493096 for (y = 0; y < height; y++) {
576
2/2
✓ Branch 0 taken 7942032 times.
✓ Branch 1 taken 236324 times.
16356712 for (x = 0; x < width; x++)
577 15884064 dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
578 ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
579 472648 src += srcstride;
580 472648 dst += dststride;
581 472648 src2 += MAX_PB_SIZE;
582 }
583 20448 }
584
585 71042 static void FUNC(put_hevc_qpel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride,
586 const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
587 int height, int denom, int wx0, int wx1,
588 int ox0, int ox1, intptr_t mx, intptr_t my, int width)
589 {
590 int x, y;
591 const int8_t *filter;
592 71042 const pixel *src = (const pixel*)_src;
593 71042 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
594 71042 pixel *dst = (pixel *)_dst;
595 71042 ptrdiff_t dststride = _dststride / sizeof(pixel);
596 int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
597 71042 int16_t *tmp = tmp_array;
598 71042 int shift = 14 + 1 - BIT_DEPTH;
599 71042 int log2Wd = denom + shift - 1;
600
601 71042 src -= QPEL_EXTRA_BEFORE * srcstride;
602 71042 filter = ff_hevc_qpel_filters[mx];
603
2/2
✓ Branch 0 taken 1131207 times.
✓ Branch 1 taken 35521 times.
2333456 for (y = 0; y < height + QPEL_EXTRA; y++) {
604
2/2
✓ Branch 0 taken 36956072 times.
✓ Branch 1 taken 1131207 times.
76174558 for (x = 0; x < width; x++)
605 73912144 tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
606 2262414 src += srcstride;
607 2262414 tmp += MAX_PB_SIZE;
608 }
609
610 71042 tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
611 71042 filter = ff_hevc_qpel_filters[my];
612
613 71042 ox0 = ox0 * (1 << (BIT_DEPTH - 8));
614 71042 ox1 = ox1 * (1 << (BIT_DEPTH - 8));
615
2/2
✓ Branch 0 taken 882560 times.
✓ Branch 1 taken 35521 times.
1836162 for (y = 0; y < height; y++) {
616
2/2
✓ Branch 0 taken 30984848 times.
✓ Branch 1 taken 882560 times.
63734816 for (x = 0; x < width; x++)
617 61969696 dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
618 ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
619 1765120 tmp += MAX_PB_SIZE;
620 1765120 dst += dststride;
621 1765120 src2 += MAX_PB_SIZE;
622 }
623 71042 }
624
625 ////////////////////////////////////////////////////////////////////////////////
626 //
627 ////////////////////////////////////////////////////////////////////////////////
628 #define EPEL_FILTER(src, stride) \
629 (filter[0] * src[x - stride] + \
630 filter[1] * src[x] + \
631 filter[2] * src[x + stride] + \
632 filter[3] * src[x + 2 * stride])
633
634 2165962 static void FUNC(put_hevc_epel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride,
635 const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
636 int height, intptr_t mx, intptr_t my, int width)
637 {
638 int x, y;
639 2165962 const pixel *src = (const pixel *)_src;
640 2165962 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
641 2165962 pixel *dst = (pixel *)_dst;
642 2165962 ptrdiff_t dststride = _dststride / sizeof(pixel);
643 2165962 const int8_t *filter = ff_hevc_epel_filters[mx];
644 2165962 int shift = 14 + 1 - BIT_DEPTH;
645 #if BIT_DEPTH < 14
646 2165962 int offset = 1 << (shift - 1);
647 #else
648 int offset = 0;
649 #endif
650
651
2/2
✓ Branch 0 taken 12792482 times.
✓ Branch 1 taken 1082981 times.
27750926 for (y = 0; y < height; y++) {
652
2/2
✓ Branch 0 taken 217326132 times.
✓ Branch 1 taken 12792482 times.
460237228 for (x = 0; x < width; x++) {
653 434652264 dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
654 }
655 25584964 dst += dststride;
656 25584964 src += srcstride;
657 25584964 src2 += MAX_PB_SIZE;
658 }
659 2165962 }
660
661 1506398 static void FUNC(put_hevc_epel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride,
662 const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
663 int height, intptr_t mx, intptr_t my, int width)
664 {
665 int x, y;
666 1506398 const pixel *src = (const pixel *)_src;
667 1506398 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
668 1506398 const int8_t *filter = ff_hevc_epel_filters[my];
669 1506398 pixel *dst = (pixel *)_dst;
670 1506398 ptrdiff_t dststride = _dststride / sizeof(pixel);
671 1506398 int shift = 14 + 1 - BIT_DEPTH;
672 #if BIT_DEPTH < 14
673 1506398 int offset = 1 << (shift - 1);
674 #else
675 int offset = 0;
676 #endif
677
678
2/2
✓ Branch 0 taken 9087706 times.
✓ Branch 1 taken 753199 times.
19681810 for (y = 0; y < height; y++) {
679
2/2
✓ Branch 0 taken 149357300 times.
✓ Branch 1 taken 9087706 times.
316890012 for (x = 0; x < width; x++)
680 298714600 dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
681 18175412 dst += dststride;
682 18175412 src += srcstride;
683 18175412 src2 += MAX_PB_SIZE;
684 }
685 1506398 }
686
687 8559622 static void FUNC(put_hevc_epel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride,
688 const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
689 int height, intptr_t mx, intptr_t my, int width)
690 {
691 int x, y;
692 8559622 const pixel *src = (const pixel *)_src;
693 8559622 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
694 8559622 pixel *dst = (pixel *)_dst;
695 8559622 ptrdiff_t dststride = _dststride / sizeof(pixel);
696 8559622 const int8_t *filter = ff_hevc_epel_filters[mx];
697 int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
698 8559622 int16_t *tmp = tmp_array;
699 8559622 int shift = 14 + 1 - BIT_DEPTH;
700 #if BIT_DEPTH < 14
701 8559622 int offset = 1 << (shift - 1);
702 #else
703 int offset = 0;
704 #endif
705
706 8559622 src -= EPEL_EXTRA_BEFORE * srcstride;
707
708
2/2
✓ Branch 0 taken 61896855 times.
✓ Branch 1 taken 4279811 times.
132353332 for (y = 0; y < height + EPEL_EXTRA; y++) {
709
2/2
✓ Branch 0 taken 920742850 times.
✓ Branch 1 taken 61896855 times.
1965279410 for (x = 0; x < width; x++)
710 1841485700 tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
711 123793710 src += srcstride;
712 123793710 tmp += MAX_PB_SIZE;
713 }
714
715 8559622 tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
716 8559622 filter = ff_hevc_epel_filters[my];
717
718
2/2
✓ Branch 0 taken 49057422 times.
✓ Branch 1 taken 4279811 times.
106674466 for (y = 0; y < height; y++) {
719
2/2
✓ Branch 0 taken 783203092 times.
✓ Branch 1 taken 49057422 times.
1664521028 for (x = 0; x < width; x++)
720 1566406184 dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
721 98114844 tmp += MAX_PB_SIZE;
722 98114844 dst += dststride;
723 98114844 src2 += MAX_PB_SIZE;
724 }
725 8559622 }
726
727 33080 static void FUNC(put_hevc_epel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride,
728 const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
729 int height, int denom, int wx0, int wx1,
730 int ox0, int ox1, intptr_t mx, intptr_t my, int width)
731 {
732 int x, y;
733 33080 const pixel *src = (const pixel *)_src;
734 33080 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
735 33080 pixel *dst = (pixel *)_dst;
736 33080 ptrdiff_t dststride = _dststride / sizeof(pixel);
737 33080 const int8_t *filter = ff_hevc_epel_filters[mx];
738 33080 int shift = 14 + 1 - BIT_DEPTH;
739 33080 int log2Wd = denom + shift - 1;
740
741 33080 ox0 = ox0 * (1 << (BIT_DEPTH - 8));
742 33080 ox1 = ox1 * (1 << (BIT_DEPTH - 8));
743
2/2
✓ Branch 0 taken 233792 times.
✓ Branch 1 taken 16540 times.
500664 for (y = 0; y < height; y++) {
744
2/2
✓ Branch 0 taken 5306152 times.
✓ Branch 1 taken 233792 times.
11079888 for (x = 0; x < width; x++)
745 10612304 dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
746 ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
747 467584 src += srcstride;
748 467584 dst += dststride;
749 467584 src2 += MAX_PB_SIZE;
750 }
751 33080 }
752
753 24552 static void FUNC(put_hevc_epel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride,
754 const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
755 int height, int denom, int wx0, int wx1,
756 int ox0, int ox1, intptr_t mx, intptr_t my, int width)
757 {
758 int x, y;
759 24552 const pixel *src = (const pixel *)_src;
760 24552 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
761 24552 const int8_t *filter = ff_hevc_epel_filters[my];
762 24552 pixel *dst = (pixel *)_dst;
763 24552 ptrdiff_t dststride = _dststride / sizeof(pixel);
764 24552 int shift = 14 + 1 - BIT_DEPTH;
765 24552 int log2Wd = denom + shift - 1;
766
767 24552 ox0 = ox0 * (1 << (BIT_DEPTH - 8));
768 24552 ox1 = ox1 * (1 << (BIT_DEPTH - 8));
769
2/2
✓ Branch 0 taken 167956 times.
✓ Branch 1 taken 12276 times.
360464 for (y = 0; y < height; y++) {
770
2/2
✓ Branch 0 taken 3922696 times.
✓ Branch 1 taken 167956 times.
8181304 for (x = 0; x < width; x++)
771 7845392 dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
772 ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
773 335912 src += srcstride;
774 335912 dst += dststride;
775 335912 src2 += MAX_PB_SIZE;
776 }
777 24552 }
778
779 166916 static void FUNC(put_hevc_epel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride,
780 const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
781 int height, int denom, int wx0, int wx1,
782 int ox0, int ox1, intptr_t mx, intptr_t my, int width)
783 {
784 int x, y;
785 166916 const pixel *src = (const pixel *)_src;
786 166916 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
787 166916 pixel *dst = (pixel *)_dst;
788 166916 ptrdiff_t dststride = _dststride / sizeof(pixel);
789 166916 const int8_t *filter = ff_hevc_epel_filters[mx];
790 int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
791 166916 int16_t *tmp = tmp_array;
792 166916 int shift = 14 + 1 - BIT_DEPTH;
793 166916 int log2Wd = denom + shift - 1;
794
795 166916 src -= EPEL_EXTRA_BEFORE * srcstride;
796
797
2/2
✓ Branch 0 taken 1288910 times.
✓ Branch 1 taken 83458 times.
2744736 for (y = 0; y < height + EPEL_EXTRA; y++) {
798
2/2
✓ Branch 0 taken 21782220 times.
✓ Branch 1 taken 1288910 times.
46142260 for (x = 0; x < width; x++)
799 43564440 tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
800 2577820 src += srcstride;
801 2577820 tmp += MAX_PB_SIZE;
802 }
803
804 166916 tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
805 166916 filter = ff_hevc_epel_filters[my];
806
807 166916 ox0 = ox0 * (1 << (BIT_DEPTH - 8));
808 166916 ox1 = ox1 * (1 << (BIT_DEPTH - 8));
809
2/2
✓ Branch 0 taken 1038536 times.
✓ Branch 1 taken 83458 times.
2243988 for (y = 0; y < height; y++) {
810
2/2
✓ Branch 0 taken 18780552 times.
✓ Branch 1 taken 1038536 times.
39638176 for (x = 0; x < width; x++)
811 37561104 dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
812 ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
813 2077072 tmp += MAX_PB_SIZE;
814 2077072 dst += dststride;
815 2077072 src2 += MAX_PB_SIZE;
816 }
817 166916 }
818
819 // line zero
820 #define P3 pix[-4 * xstride]
821 #define P2 pix[-3 * xstride]
822 #define P1 pix[-2 * xstride]
823 #define P0 pix[-1 * xstride]
824 #define Q0 pix[0 * xstride]
825 #define Q1 pix[1 * xstride]
826 #define Q2 pix[2 * xstride]
827 #define Q3 pix[3 * xstride]
828
829 // line three. used only for deblocking decision
830 #define TP3 pix[-4 * xstride + 3 * ystride]
831 #define TP2 pix[-3 * xstride + 3 * ystride]
832 #define TP1 pix[-2 * xstride + 3 * ystride]
833 #define TP0 pix[-1 * xstride + 3 * ystride]
834 #define TQ0 pix[0 * xstride + 3 * ystride]
835 #define TQ1 pix[1 * xstride + 3 * ystride]
836 #define TQ2 pix[2 * xstride + 3 * ystride]
837 #define TQ3 pix[3 * xstride + 3 * ystride]
838
839 #include "h26x/h2656_deblock_template.c"
840
841 85719872 static void FUNC(hevc_loop_filter_luma)(uint8_t *_pix,
842 ptrdiff_t _xstride, ptrdiff_t _ystride,
843 int beta, const int *_tc,
844 const uint8_t *_no_p, const uint8_t *_no_q)
845 {
846 85719872 ptrdiff_t xstride = _xstride / sizeof(pixel);
847 85719872 ptrdiff_t ystride = _ystride / sizeof(pixel);
848
849 85719872 beta <<= BIT_DEPTH - 8;
850
851
2/2
✓ Branch 0 taken 85719872 times.
✓ Branch 1 taken 42859936 times.
257159616 for (int j = 0; j < 2; j++) {
852 171439744 pixel* pix = (pixel*)_pix + j * 4 * ystride;
853 171439744 const int dp0 = abs(P2 - 2 * P1 + P0);
854 171439744 const int dq0 = abs(Q2 - 2 * Q1 + Q0);
855 171439744 const int dp3 = abs(TP2 - 2 * TP1 + TP0);
856 171439744 const int dq3 = abs(TQ2 - 2 * TQ1 + TQ0);
857 171439744 const int d0 = dp0 + dq0;
858 171439744 const int d3 = dp3 + dq3;
859 171439744 const int tc = _tc[j] << (BIT_DEPTH - 8);
860 171439744 const int no_p = _no_p[j];
861 171439744 const int no_q = _no_q[j];
862
863
2/2
✓ Branch 0 taken 64560496 times.
✓ Branch 1 taken 21159376 times.
171439744 if (d0 + d3 < beta) {
864 129120992 const int beta_3 = beta >> 3;
865 129120992 const int beta_2 = beta >> 2;
866 129120992 const int tc25 = ((tc * 5 + 1) >> 1);
867
868
4/4
✓ Branch 0 taken 19435217 times.
✓ Branch 1 taken 45125279 times.
✓ Branch 2 taken 18939239 times.
✓ Branch 3 taken 495978 times.
129120992 if (abs(P3 - P0) + abs(Q3 - Q0) < beta_3 && abs(P0 - Q0) < tc25 &&
869
4/4
✓ Branch 0 taken 14278793 times.
✓ Branch 1 taken 4660446 times.
✓ Branch 2 taken 14174787 times.
✓ Branch 3 taken 104006 times.
37878478 abs(TP3 - TP0) + abs(TQ3 - TQ0) < beta_3 && abs(TP0 - TQ0) < tc25 &&
870
4/4
✓ Branch 0 taken 13677442 times.
✓ Branch 1 taken 497345 times.
✓ Branch 2 taken 13365700 times.
✓ Branch 3 taken 311742 times.
55080974 (d0 << 1) < beta_2 && (d3 << 1) < beta_2) {
871 26731400 const int tc2 = tc << 1;
872 26731400 FUNC(loop_filter_luma_strong)(pix, xstride, ystride, tc2, tc2, tc2, no_p, no_q);
873 } else {
874 102389592 int nd_p = 1;
875 102389592 int nd_q = 1;
876
2/2
✓ Branch 0 taken 35739751 times.
✓ Branch 1 taken 15455045 times.
102389592 if (dp0 + dp3 < ((beta + (beta >> 1)) >> 3))
877 71479502 nd_p = 2;
878
2/2
✓ Branch 0 taken 34866989 times.
✓ Branch 1 taken 16327807 times.
102389592 if (dq0 + dq3 < ((beta + (beta >> 1)) >> 3))
879 69733978 nd_q = 2;
880 102389592 FUNC(loop_filter_luma_weak)(pix, xstride, ystride, tc, beta, no_p, no_q, nd_p, nd_q);
881 }
882 }
883 }
884 85719872 }
885
886 26156308 static void FUNC(hevc_loop_filter_chroma)(uint8_t *_pix, ptrdiff_t _xstride,
887 ptrdiff_t _ystride, const int *_tc,
888 const uint8_t *_no_p, const uint8_t *_no_q)
889 {
890 int no_p, no_q;
891 26156308 ptrdiff_t xstride = _xstride / sizeof(pixel);
892 26156308 ptrdiff_t ystride = _ystride / sizeof(pixel);
893 26156308 const int size = 4;
894
895
2/2
✓ Branch 0 taken 26156308 times.
✓ Branch 1 taken 13078154 times.
78468924 for (int j = 0; j < 2; j++) {
896 52312616 pixel *pix = (pixel *)_pix + j * size * ystride;
897 52312616 const int tc = _tc[j] << (BIT_DEPTH - 8);
898
2/2
✓ Branch 0 taken 23535267 times.
✓ Branch 1 taken 2621041 times.
52312616 if (tc > 0) {
899 47070534 no_p = _no_p[j];
900 47070534 no_q = _no_q[j];
901
902 47070534 FUNC(loop_filter_chroma_weak)(pix, xstride, ystride, size, tc, no_p, no_q);
903 }
904 }
905 26156308 }
906
907 12782610 static void FUNC(hevc_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
908 const int32_t *tc, const uint8_t *no_p,
909 const uint8_t *no_q)
910 {
911 12782610 FUNC(hevc_loop_filter_chroma)(pix, stride, sizeof(pixel), tc, no_p, no_q);
912 12782610 }
913
914 13373698 static void FUNC(hevc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
915 const int32_t *tc, const uint8_t *no_p,
916 const uint8_t *no_q)
917 {
918 13373698 FUNC(hevc_loop_filter_chroma)(pix, sizeof(pixel), stride, tc, no_p, no_q);
919 13373698 }
920
921 43213298 static void FUNC(hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
922 int beta, const int32_t *tc, const uint8_t *no_p,
923 const uint8_t *no_q)
924 {
925 43213298 FUNC(hevc_loop_filter_luma)(pix, stride, sizeof(pixel),
926 beta, tc, no_p, no_q);
927 43213298 }
928
929 42506574 static void FUNC(hevc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
930 int beta, const int32_t *tc, const uint8_t *no_p,
931 const uint8_t *no_q)
932 {
933 42506574 FUNC(hevc_loop_filter_luma)(pix, sizeof(pixel), stride,
934 beta, tc, no_p, no_q);
935 42506574 }
936
937 #undef P3
938 #undef P2
939 #undef P1
940 #undef P0
941 #undef Q0
942 #undef Q1
943 #undef Q2
944 #undef Q3
945
946 #undef TP3
947 #undef TP2
948 #undef TP1
949 #undef TP0
950 #undef TQ0
951 #undef TQ1
952 #undef TQ2
953 #undef TQ3
954