FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/x86/hevc/dsp_init.c
Date: 2025-01-20 09:27:23
Exec Total Coverage
Lines: 636 642 99.1%
Functions: 1015 1015 100.0%
Branches: 414 418 99.0%

Line Branch Exec Source
1 /*
2 * Copyright (c) 2013 Seppo Tomperi
3 * Copyright (c) 2013-2014 Pierre-Edouard Lepere
4 * Copyright (c) 2023-2024 Wu Jianhua
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include "config.h"
24
25 #include "libavutil/cpu.h"
26 #include "libavutil/mem_internal.h"
27 #include "libavutil/x86/asm.h"
28 #include "libavutil/x86/cpu.h"
29 #include "libavcodec/hevc/dsp.h"
30 #include "libavcodec/x86/hevcdsp.h"
31 #include "libavcodec/x86/h26x/h2656dsp.h"
32
33 #define LFC_FUNC(DIR, DEPTH, OPT) \
34 void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, const int *tc, const uint8_t *no_p, const uint8_t *no_q);
35
36 #define LFL_FUNC(DIR, DEPTH, OPT) \
37 void ff_hevc_ ## DIR ## _loop_filter_luma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, int beta, const int *tc, const uint8_t *no_p, const uint8_t *no_q);
38
39 #define LFC_FUNCS(type, depth, opt) \
40 LFC_FUNC(h, depth, opt) \
41 LFC_FUNC(v, depth, opt)
42
43 #define LFL_FUNCS(type, depth, opt) \
44 LFL_FUNC(h, depth, opt) \
45 LFL_FUNC(v, depth, opt)
46
47 LFC_FUNCS(uint8_t, 8, sse2)
48 LFC_FUNCS(uint8_t, 10, sse2)
49 LFC_FUNCS(uint8_t, 12, sse2)
50 LFC_FUNCS(uint8_t, 8, avx)
51 LFC_FUNCS(uint8_t, 10, avx)
52 LFC_FUNCS(uint8_t, 12, avx)
53 LFL_FUNCS(uint8_t, 8, sse2)
54 LFL_FUNCS(uint8_t, 10, sse2)
55 LFL_FUNCS(uint8_t, 12, sse2)
56 LFL_FUNCS(uint8_t, 8, ssse3)
57 LFL_FUNCS(uint8_t, 10, ssse3)
58 LFL_FUNCS(uint8_t, 12, ssse3)
59 LFL_FUNCS(uint8_t, 8, avx)
60 LFL_FUNCS(uint8_t, 10, avx)
61 LFL_FUNCS(uint8_t, 12, avx)
62
63 #define IDCT_DC_FUNCS(W, opt) \
64 void ff_hevc_idct_ ## W ## _dc_8_ ## opt(int16_t *coeffs); \
65 void ff_hevc_idct_ ## W ## _dc_10_ ## opt(int16_t *coeffs); \
66 void ff_hevc_idct_ ## W ## _dc_12_ ## opt(int16_t *coeffs)
67
68 IDCT_DC_FUNCS(4x4, mmxext);
69 IDCT_DC_FUNCS(8x8, sse2);
70 IDCT_DC_FUNCS(16x16, sse2);
71 IDCT_DC_FUNCS(32x32, sse2);
72 IDCT_DC_FUNCS(16x16, avx2);
73 IDCT_DC_FUNCS(32x32, avx2);
74
75 #define IDCT_FUNCS(opt) \
76 void ff_hevc_idct_4x4_8_ ## opt(int16_t *coeffs, int col_limit); \
77 void ff_hevc_idct_4x4_10_ ## opt(int16_t *coeffs, int col_limit); \
78 void ff_hevc_idct_8x8_8_ ## opt(int16_t *coeffs, int col_limit); \
79 void ff_hevc_idct_8x8_10_ ## opt(int16_t *coeffs, int col_limit); \
80 void ff_hevc_idct_16x16_8_ ## opt(int16_t *coeffs, int col_limit); \
81 void ff_hevc_idct_16x16_10_ ## opt(int16_t *coeffs, int col_limit); \
82 void ff_hevc_idct_32x32_8_ ## opt(int16_t *coeffs, int col_limit); \
83 void ff_hevc_idct_32x32_10_ ## opt(int16_t *coeffs, int col_limit);
84
85 IDCT_FUNCS(sse2)
86 IDCT_FUNCS(avx)
87
88
89 #define ff_hevc_pel_filters ff_hevc_qpel_filters
90 #define DECL_HV_FILTER(f) \
91 const uint8_t *hf = ff_hevc_ ## f ## _filters[mx]; \
92 const uint8_t *vf = ff_hevc_ ## f ## _filters[my];
93
94 #define FW_PUT(p, a, b, depth, opt) \
95 void ff_hevc_put_hevc_ ## a ## _ ## depth ## _##opt(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, \
96 int height, intptr_t mx, intptr_t my,int width) \
97 { \
98 DECL_HV_FILTER(p) \
99 ff_h2656_put_ ## b ## _ ## depth ## _##opt(dst, 2 * MAX_PB_SIZE, src, srcstride, height, hf, vf, width); \
100 }
101
102 #define FW_PUT_UNI(p, a, b, depth, opt) \
103 void ff_hevc_put_hevc_uni_ ## a ## _ ## depth ## _##opt(uint8_t *dst, ptrdiff_t dststride, \
104 const uint8_t *src, ptrdiff_t srcstride, \
105 int height, intptr_t mx, intptr_t my, int width) \
106 { \
107 DECL_HV_FILTER(p) \
108 ff_h2656_put_uni_ ## b ## _ ## depth ## _##opt(dst, dststride, src, srcstride, height, hf, vf, width); \
109 }
110
111 #if ARCH_X86_64 && HAVE_SSE4_EXTERNAL
112
113 #define FW_PUT_FUNCS(p, a, b, depth, opt) \
114 FW_PUT(p, a, b, depth, opt) \
115 FW_PUT_UNI(p, a, b, depth, opt)
116
117 #define FW_PEL(w, depth, opt) FW_PUT_FUNCS(pel, pel_pixels##w, pixels##w, depth, opt)
118
119 #define FW_DIR(npel, n, w, depth, opt) \
120 FW_PUT_FUNCS(npel, npel ## _h##w, n ## tap_h##w, depth, opt) \
121 FW_PUT_FUNCS(npel, npel ## _v##w, n ## tap_v##w, depth, opt)
122
123 #define FW_DIR_HV(npel, n, w, depth, opt) \
124 FW_PUT_FUNCS(npel, npel ## _hv##w, n ## tap_hv##w, depth, opt)
125
126 17410 FW_PEL(4, 8, sse4)
127 76 FW_PEL(6, 8, sse4)
128 26322 FW_PEL(8, 8, sse4)
129 76 FW_PEL(12, 8, sse4)
130 150684 FW_PEL(16, 8, sse4)
131 2400 FW_PEL(4, 10, sse4)
132 76 FW_PEL(6, 10, sse4)
133 33624 FW_PEL(8, 10, sse4)
134 304 FW_PEL(4, 12, sse4)
135 76 FW_PEL(6, 12, sse4)
136 1824 FW_PEL(8, 12, sse4)
137
138 #define FW_EPEL(w, depth, opt) FW_DIR(epel, 4, w, depth, opt)
139 #define FW_EPEL_HV(w, depth, opt) FW_DIR_HV(epel, 4, w, depth, opt)
140 #define FW_EPEL_FUNCS(w, depth, opt) \
141 FW_EPEL(w, depth, opt) \
142 FW_EPEL_HV(w, depth, opt)
143
144 152 FW_EPEL(12, 8, sse4)
145
146 10508 FW_EPEL_FUNCS(4, 8, sse4)
147 228 FW_EPEL_FUNCS(6, 8, sse4)
148 12880 FW_EPEL_FUNCS(8, 8, sse4)
149 21140 FW_EPEL_FUNCS(16, 8, sse4)
150 6884 FW_EPEL_FUNCS(4, 10, sse4)
151 228 FW_EPEL_FUNCS(6, 10, sse4)
152 31064 FW_EPEL_FUNCS(8, 10, sse4)
153 912 FW_EPEL_FUNCS(4, 12, sse4)
154 228 FW_EPEL_FUNCS(6, 12, sse4)
155 5472 FW_EPEL_FUNCS(8, 12, sse4)
156
157 #define FW_QPEL(w, depth, opt) FW_DIR(qpel, 8, w, depth, opt)
158 #define FW_QPEL_HV(w, depth, opt) FW_DIR_HV(qpel, 8, w, depth, opt)
159 #define FW_QPEL_FUNCS(w, depth, opt) \
160 FW_QPEL(w, depth, opt) \
161 FW_QPEL_HV(w, depth, opt)
162
163 152 FW_QPEL(12, 8, sse4)
164 1638 FW_QPEL(16, 8, sse4)
165
166 306 FW_QPEL_FUNCS(4, 8, sse4)
167 4518 FW_QPEL_FUNCS(8, 8, sse4)
168 912 FW_QPEL_FUNCS(4, 10, sse4)
169 14268 FW_QPEL_FUNCS(8, 10, sse4)
170 912 FW_QPEL_FUNCS(4, 12, sse4)
171 5472 FW_QPEL_FUNCS(8, 12, sse4)
172
173 #if HAVE_AVX2_EXTERNAL
174
175 27552 FW_PEL(32, 8, avx2)
176 3028 FW_PUT(pel, pel_pixels16, pixels16, 10, avx2)
177
178 1396 FW_EPEL(32, 8, avx2)
179 5112 FW_EPEL(16, 10, avx2)
180
181 752 FW_EPEL_HV(32, 8, avx2)
182 6732 FW_EPEL_HV(16, 10, avx2)
183
184 32 FW_QPEL(32, 8, avx2)
185 8960 FW_QPEL(16, 10, avx2)
186
187 11354 FW_QPEL_HV(16, 10, avx2)
188
189 #endif
190 #endif
191
192 #define mc_rep_func(name, bitd, step, W, opt) \
193 void ff_hevc_put_hevc_##name##W##_##bitd##_##opt(int16_t *_dst, \
194 const uint8_t *_src, ptrdiff_t _srcstride, int height, \
195 intptr_t mx, intptr_t my, int width) \
196 { \
197 int i; \
198 int16_t *dst; \
199 for (i = 0; i < W; i += step) { \
200 const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \
201 dst = _dst + i; \
202 ff_hevc_put_hevc_##name##step##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width); \
203 } \
204 }
205 #define mc_rep_uni_func(name, bitd, step, W, opt) \
206 void ff_hevc_put_hevc_uni_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, \
207 const uint8_t *_src, ptrdiff_t _srcstride, int height, \
208 intptr_t mx, intptr_t my, int width) \
209 { \
210 int i; \
211 uint8_t *dst; \
212 for (i = 0; i < W; i += step) { \
213 const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \
214 dst = _dst + (i * ((bitd + 7) / 8)); \
215 ff_hevc_put_hevc_uni_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, \
216 height, mx, my, width); \
217 } \
218 }
219 #define mc_rep_bi_func(name, bitd, step, W, opt) \
220 void ff_hevc_put_hevc_bi_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const uint8_t *_src, \
221 ptrdiff_t _srcstride, const int16_t *_src2, \
222 int height, intptr_t mx, intptr_t my, int width) \
223 { \
224 int i; \
225 uint8_t *dst; \
226 for (i = 0; i < W ; i += step) { \
227 const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \
228 const int16_t *src2 = _src2 + i; \
229 dst = _dst + (i * ((bitd + 7) / 8)); \
230 ff_hevc_put_hevc_bi_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, \
231 height, mx, my, width); \
232 } \
233 }
234
235 #define mc_rep_funcs(name, bitd, step, W, opt) \
236 mc_rep_func(name, bitd, step, W, opt) \
237 mc_rep_uni_func(name, bitd, step, W, opt) \
238 mc_rep_bi_func(name, bitd, step, W, opt)
239
240 #define mc_rep_func2(name, bitd, step1, step2, W, opt) \
241 void ff_hevc_put_hevc_##name##W##_##bitd##_##opt(int16_t *dst, \
242 const uint8_t *src, ptrdiff_t _srcstride, int height, \
243 intptr_t mx, intptr_t my, int width) \
244 { \
245 ff_hevc_put_hevc_##name##step1##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width); \
246 ff_hevc_put_hevc_##name##step2##_##bitd##_##opt(dst + step1, src + (step1 * ((bitd + 7) / 8)), \
247 _srcstride, height, mx, my, width); \
248 }
249 #define mc_rep_uni_func2(name, bitd, step1, step2, W, opt) \
250 void ff_hevc_put_hevc_uni_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, \
251 const uint8_t *src, ptrdiff_t _srcstride, int height, \
252 intptr_t mx, intptr_t my, int width) \
253 { \
254 ff_hevc_put_hevc_uni_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, height, mx, my, width);\
255 ff_hevc_put_hevc_uni_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride, \
256 src + (step1 * ((bitd + 7) / 8)), _srcstride, \
257 height, mx, my, width); \
258 }
259 #define mc_rep_bi_func2(name, bitd, step1, step2, W, opt) \
260 void ff_hevc_put_hevc_bi_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \
261 ptrdiff_t _srcstride, const int16_t *src2, \
262 int height, intptr_t mx, intptr_t my, int width) \
263 { \
264 ff_hevc_put_hevc_bi_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, height, mx, my, width);\
265 ff_hevc_put_hevc_bi_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride, \
266 src + (step1 * ((bitd + 7) / 8)), _srcstride, \
267 src2 + step1, height, mx, my, width); \
268 }
269
270 #define mc_rep_funcs2(name, bitd, step1, step2, W, opt) \
271 mc_rep_func2(name, bitd, step1, step2, W, opt) \
272 mc_rep_uni_func2(name, bitd, step1, step2, W, opt) \
273 mc_rep_bi_func2(name, bitd, step1, step2, W, opt)
274
275 #if ARCH_X86_64 && HAVE_SSE4_EXTERNAL
276
277 #define mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
278 void ff_hevc_put_hevc_##name##width1##_10_##opt1(int16_t *dst, const uint8_t *src, ptrdiff_t _srcstride, \
279 int height, intptr_t mx, intptr_t my, int width) \
280 \
281 { \
282 ff_hevc_put_hevc_##name##width2##_10_##opt1(dst, src, _srcstride, height, mx, my, width); \
283 ff_hevc_put_hevc_##name##width3##_10_##opt2(dst+ width2, src+ width4, _srcstride, height, mx, my, width); \
284 }
285
286 #define mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
287 void ff_hevc_put_hevc_bi_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \
288 ptrdiff_t _srcstride, const int16_t *src2, \
289 int height, intptr_t mx, intptr_t my, int width) \
290 { \
291 ff_hevc_put_hevc_bi_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, src2, \
292 height, mx, my, width); \
293 ff_hevc_put_hevc_bi_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, src2+width2,\
294 height, mx, my, width); \
295 }
296
297 #define mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
298 void ff_hevc_put_hevc_uni_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, \
299 const uint8_t *src, ptrdiff_t _srcstride, int height, \
300 intptr_t mx, intptr_t my, int width) \
301 { \
302 ff_hevc_put_hevc_uni_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, \
303 height, mx, my, width); \
304 ff_hevc_put_hevc_uni_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, \
305 height, mx, my, width); \
306 }
307
308 #define mc_rep_mixs_10(name, width1, width2, width3, opt1, opt2, width4) \
309 mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
310 mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
311 mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4)
312
313 #define mc_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
314 void ff_hevc_put_hevc_##name##width1##_8_##opt1(int16_t *dst, const uint8_t *src, ptrdiff_t _srcstride, \
315 int height, intptr_t mx, intptr_t my, int width) \
316 \
317 { \
318 ff_hevc_put_hevc_##name##width2##_8_##opt1(dst, src, _srcstride, height, mx, my, width); \
319 ff_hevc_put_hevc_##name##width3##_8_##opt2(dst+ width2, src+ width2, _srcstride, height, mx, my, width); \
320 }
321
322 #define mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
323 void ff_hevc_put_hevc_bi_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \
324 ptrdiff_t _srcstride, const int16_t *src2, \
325 int height, intptr_t mx, intptr_t my, int width) \
326 { \
327 ff_hevc_put_hevc_bi_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \
328 src2, height, mx, my, width); \
329 ff_hevc_put_hevc_bi_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \
330 src2+width2, height, mx, my, width); \
331 }
332
333 #define mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
334 void ff_hevc_put_hevc_uni_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, \
335 const uint8_t *src, ptrdiff_t _srcstride, int height, \
336 intptr_t mx, intptr_t my, int width) \
337 { \
338 ff_hevc_put_hevc_uni_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \
339 height, mx, my, width); \
340 ff_hevc_put_hevc_uni_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \
341 height, mx, my, width); \
342 }
343
344 #define mc_rep_mixs_8(name, width1, width2, width3, opt1, opt2) \
345 mc_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
346 mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
347 mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2)
348
349 #if HAVE_AVX2_EXTERNAL
350
351 8 mc_rep_mixs_8(pel_pixels, 48, 32, 16, avx2, sse4)
352 6 mc_rep_mixs_8(epel_hv, 48, 32, 16, avx2, sse4)
353 6 mc_rep_mixs_8(epel_h , 48, 32, 16, avx2, sse4)
354 6 mc_rep_mixs_8(epel_v , 48, 32, 16, avx2, sse4)
355
356 5 mc_rep_mix_10(pel_pixels, 24, 16, 8, avx2, sse4, 32)
357 1 mc_bi_rep_mix_10(pel_pixels,24, 16, 8, avx2, sse4, 32)
358 6 mc_rep_mixs_10(epel_hv, 24, 16, 8, avx2, sse4, 32)
359 6 mc_rep_mixs_10(epel_h , 24, 16, 8, avx2, sse4, 32)
360 6 mc_rep_mixs_10(epel_v , 24, 16, 8, avx2, sse4, 32)
361
362
363 6 mc_rep_mixs_10(qpel_h , 24, 16, 8, avx2, sse4, 32)
364 6 mc_rep_mixs_10(qpel_v , 24, 16, 8, avx2, sse4, 32)
365 6 mc_rep_mixs_10(qpel_hv, 24, 16, 8, avx2, sse4, 32)
366
367
368
2/2
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 78 times.
234 mc_rep_uni_func(pel_pixels, 8, 64, 128, avx2)//used for 10bit
369
2/2
✓ Branch 1 taken 3 times.
✓ Branch 2 taken 1 times.
4 mc_rep_uni_func(pel_pixels, 8, 32, 96, avx2) //used for 10bit
370
371
2/2
✓ Branch 1 taken 6722 times.
✓ Branch 2 taken 3361 times.
20166 mc_rep_funcs(pel_pixels, 8, 32, 64, avx2)
372
373
2/2
✓ Branch 1 taken 2026 times.
✓ Branch 2 taken 1013 times.
3039 mc_rep_func(pel_pixels, 10, 16, 32, avx2)
374
2/2
✓ Branch 1 taken 3 times.
✓ Branch 2 taken 1 times.
4 mc_rep_func(pel_pixels, 10, 16, 48, avx2)
375
2/2
✓ Branch 1 taken 304 times.
✓ Branch 2 taken 152 times.
456 mc_rep_func(pel_pixels, 10, 32, 64, avx2)
376
377
2/2
✓ Branch 1 taken 176 times.
✓ Branch 2 taken 88 times.
264 mc_rep_bi_func(pel_pixels, 10, 16, 32, avx2)
378
2/2
✓ Branch 1 taken 3 times.
✓ Branch 2 taken 1 times.
4 mc_rep_bi_func(pel_pixels, 10, 16, 48, avx2)
379
2/2
✓ Branch 1 taken 42 times.
✓ Branch 2 taken 21 times.
63 mc_rep_bi_func(pel_pixels, 10, 32, 64, avx2)
380
381
2/2
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
18 mc_rep_funcs(epel_h, 8, 32, 64, avx2)
382
383
2/2
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
18 mc_rep_funcs(epel_v, 8, 32, 64, avx2)
384
385
2/2
✓ Branch 1 taken 1290 times.
✓ Branch 2 taken 645 times.
3870 mc_rep_funcs(epel_h, 10, 16, 32, avx2)
386
2/2
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
24 mc_rep_funcs(epel_h, 10, 16, 48, avx2)
387
2/2
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
18 mc_rep_funcs(epel_h, 10, 32, 64, avx2)
388
389
2/2
✓ Branch 1 taken 154 times.
✓ Branch 2 taken 77 times.
462 mc_rep_funcs(epel_v, 10, 16, 32, avx2)
390
2/2
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
24 mc_rep_funcs(epel_v, 10, 16, 48, avx2)
391
2/2
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
18 mc_rep_funcs(epel_v, 10, 32, 64, avx2)
392
393
394
2/2
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
18 mc_rep_funcs(epel_hv, 8, 32, 64, avx2)
395
396
2/2
✓ Branch 1 taken 1598 times.
✓ Branch 2 taken 799 times.
4794 mc_rep_funcs(epel_hv, 10, 16, 32, avx2)
397
2/2
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
24 mc_rep_funcs(epel_hv, 10, 16, 48, avx2)
398
2/2
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
18 mc_rep_funcs(epel_hv, 10, 32, 64, avx2)
399
400
2/2
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
18 mc_rep_funcs(qpel_h, 8, 32, 64, avx2)
401 6 mc_rep_mixs_8(qpel_h , 48, 32, 16, avx2, sse4)
402
403
2/2
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
18 mc_rep_funcs(qpel_v, 8, 32, 64, avx2)
404 6 mc_rep_mixs_8(qpel_v, 48, 32, 16, avx2, sse4)
405
406
2/2
✓ Branch 1 taken 1982 times.
✓ Branch 2 taken 991 times.
5946 mc_rep_funcs(qpel_h, 10, 16, 32, avx2)
407
2/2
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
24 mc_rep_funcs(qpel_h, 10, 16, 48, avx2)
408
2/2
✓ Branch 1 taken 512 times.
✓ Branch 2 taken 256 times.
1536 mc_rep_funcs(qpel_h, 10, 32, 64, avx2)
409
410
2/2
✓ Branch 1 taken 910 times.
✓ Branch 2 taken 455 times.
2730 mc_rep_funcs(qpel_v, 10, 16, 32, avx2)
411
2/2
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
24 mc_rep_funcs(qpel_v, 10, 16, 48, avx2)
412
2/2
✓ Branch 1 taken 202 times.
✓ Branch 2 taken 101 times.
606 mc_rep_funcs(qpel_v, 10, 32, 64, avx2)
413
414
2/2
✓ Branch 1 taken 3116 times.
✓ Branch 2 taken 1558 times.
9348 mc_rep_funcs(qpel_hv, 10, 16, 32, avx2)
415
2/2
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
24 mc_rep_funcs(qpel_hv, 10, 16, 48, avx2)
416
2/2
✓ Branch 1 taken 668 times.
✓ Branch 2 taken 334 times.
2004 mc_rep_funcs(qpel_hv, 10, 32, 64, avx2)
417
418 #endif //AVX2
419
420
2/2
✓ Branch 1 taken 30924 times.
✓ Branch 2 taken 7731 times.
77310 mc_rep_funcs(pel_pixels, 8, 16, 64, sse4)
421
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(pel_pixels, 8, 16, 48, sse4)
422
2/2
✓ Branch 1 taken 33874 times.
✓ Branch 2 taken 16937 times.
101622 mc_rep_funcs(pel_pixels, 8, 16, 32, sse4)
423
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(pel_pixels, 8, 8, 24, sse4)
424
2/2
✓ Branch 1 taken 2960 times.
✓ Branch 2 taken 370 times.
6660 mc_rep_funcs(pel_pixels,10, 8, 64, sse4)
425
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
588 mc_rep_funcs(pel_pixels,10, 8, 48, sse4)
426
2/2
✓ Branch 1 taken 6208 times.
✓ Branch 2 taken 1552 times.
15520 mc_rep_funcs(pel_pixels,10, 8, 32, sse4)
427
2/2
✓ Branch 1 taken 144 times.
✓ Branch 2 taken 48 times.
384 mc_rep_funcs(pel_pixels,10, 8, 24, sse4)
428
2/2
✓ Branch 1 taken 4020 times.
✓ Branch 2 taken 2010 times.
12060 mc_rep_funcs(pel_pixels,10, 8, 16, sse4)
429
2/2
✓ Branch 1 taken 177 times.
✓ Branch 2 taken 59 times.
472 mc_rep_funcs(pel_pixels,10, 4, 12, sse4)
430
2/2
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
702 mc_rep_funcs(pel_pixels,12, 8, 64, sse4)
431
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
546 mc_rep_funcs(pel_pixels,12, 8, 48, sse4)
432
2/2
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
390 mc_rep_funcs(pel_pixels,12, 8, 32, sse4)
433
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(pel_pixels,12, 8, 24, sse4)
434
2/2
✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
234 mc_rep_funcs(pel_pixels,12, 8, 16, sse4)
435
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(pel_pixels,12, 4, 12, sse4)
436
437
2/2
✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
420 mc_rep_funcs(epel_h, 8, 16, 64, sse4)
438
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(epel_h, 8, 16, 48, sse4)
439
2/2
✓ Branch 1 taken 1076 times.
✓ Branch 2 taken 538 times.
3228 mc_rep_funcs(epel_h, 8, 16, 32, sse4)
440
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_h, 8, 8, 24, sse4)
441
2/2
✓ Branch 1 taken 336 times.
✓ Branch 2 taken 42 times.
756 mc_rep_funcs(epel_h,10, 8, 64, sse4)
442
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
588 mc_rep_funcs(epel_h,10, 8, 48, sse4)
443
2/2
✓ Branch 1 taken 1016 times.
✓ Branch 2 taken 254 times.
2540 mc_rep_funcs(epel_h,10, 8, 32, sse4)
444
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(epel_h,10, 8, 24, sse4)
445
2/2
✓ Branch 1 taken 1140 times.
✓ Branch 2 taken 570 times.
3420 mc_rep_funcs(epel_h,10, 8, 16, sse4)
446
2/2
✓ Branch 1 taken 129 times.
✓ Branch 2 taken 43 times.
344 mc_rep_funcs(epel_h,10, 4, 12, sse4)
447
2/2
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
702 mc_rep_funcs(epel_h,12, 8, 64, sse4)
448
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
546 mc_rep_funcs(epel_h,12, 8, 48, sse4)
449
2/2
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
390 mc_rep_funcs(epel_h,12, 8, 32, sse4)
450
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_h,12, 8, 24, sse4)
451
2/2
✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
234 mc_rep_funcs(epel_h,12, 8, 16, sse4)
452
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_h,12, 4, 12, sse4)
453
2/2
✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
420 mc_rep_funcs(epel_v, 8, 16, 64, sse4)
454
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(epel_v, 8, 16, 48, sse4)
455
2/2
✓ Branch 1 taken 2056 times.
✓ Branch 2 taken 1028 times.
6168 mc_rep_funcs(epel_v, 8, 16, 32, sse4)
456
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_v, 8, 8, 24, sse4)
457
2/2
✓ Branch 1 taken 336 times.
✓ Branch 2 taken 42 times.
756 mc_rep_funcs(epel_v,10, 8, 64, sse4)
458
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
588 mc_rep_funcs(epel_v,10, 8, 48, sse4)
459
2/2
✓ Branch 1 taken 296 times.
✓ Branch 2 taken 74 times.
740 mc_rep_funcs(epel_v,10, 8, 32, sse4)
460
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(epel_v,10, 8, 24, sse4)
461
2/2
✓ Branch 1 taken 204 times.
✓ Branch 2 taken 102 times.
612 mc_rep_funcs(epel_v,10, 8, 16, sse4)
462
2/2
✓ Branch 1 taken 141 times.
✓ Branch 2 taken 47 times.
376 mc_rep_funcs(epel_v,10, 4, 12, sse4)
463
2/2
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
702 mc_rep_funcs(epel_v,12, 8, 64, sse4)
464
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
546 mc_rep_funcs(epel_v,12, 8, 48, sse4)
465
2/2
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
390 mc_rep_funcs(epel_v,12, 8, 32, sse4)
466
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_v,12, 8, 24, sse4)
467
2/2
✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
234 mc_rep_funcs(epel_v,12, 8, 16, sse4)
468
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_v,12, 4, 12, sse4)
469
2/2
✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
420 mc_rep_funcs(epel_hv, 8, 16, 64, sse4)
470
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(epel_hv, 8, 16, 48, sse4)
471
2/2
✓ Branch 1 taken 1860 times.
✓ Branch 2 taken 930 times.
5580 mc_rep_funcs(epel_hv, 8, 16, 32, sse4)
472
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_hv, 8, 8, 24, sse4)
473 78 mc_rep_funcs2(epel_hv,8, 8, 4, 12, sse4)
474
2/2
✓ Branch 1 taken 336 times.
✓ Branch 2 taken 42 times.
756 mc_rep_funcs(epel_hv,10, 8, 64, sse4)
475
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
588 mc_rep_funcs(epel_hv,10, 8, 48, sse4)
476
2/2
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 78 times.
780 mc_rep_funcs(epel_hv,10, 8, 32, sse4)
477
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(epel_hv,10, 8, 24, sse4)
478
2/2
✓ Branch 1 taken 236 times.
✓ Branch 2 taken 118 times.
708 mc_rep_funcs(epel_hv,10, 8, 16, sse4)
479
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_hv,10, 4, 12, sse4)
480
2/2
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
702 mc_rep_funcs(epel_hv,12, 8, 64, sse4)
481
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
546 mc_rep_funcs(epel_hv,12, 8, 48, sse4)
482
2/2
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
390 mc_rep_funcs(epel_hv,12, 8, 32, sse4)
483
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_hv,12, 8, 24, sse4)
484
2/2
✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
234 mc_rep_funcs(epel_hv,12, 8, 16, sse4)
485
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_hv,12, 4, 12, sse4)
486
487
2/2
✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
420 mc_rep_funcs(qpel_h, 8, 16, 64, sse4)
488
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(qpel_h, 8, 16, 48, sse4)
489
2/2
✓ Branch 1 taken 84 times.
✓ Branch 2 taken 42 times.
252 mc_rep_funcs(qpel_h, 8, 16, 32, sse4)
490
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_h, 8, 8, 24, sse4)
491
2/2
✓ Branch 1 taken 1184 times.
✓ Branch 2 taken 148 times.
2664 mc_rep_funcs(qpel_h,10, 8, 64, sse4)
492
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
588 mc_rep_funcs(qpel_h,10, 8, 48, sse4)
493
2/2
✓ Branch 1 taken 1248 times.
✓ Branch 2 taken 312 times.
3120 mc_rep_funcs(qpel_h,10, 8, 32, sse4)
494
2/2
✓ Branch 1 taken 132 times.
✓ Branch 2 taken 44 times.
352 mc_rep_funcs(qpel_h,10, 8, 24, sse4)
495
2/2
✓ Branch 1 taken 424 times.
✓ Branch 2 taken 212 times.
1272 mc_rep_funcs(qpel_h,10, 8, 16, sse4)
496
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_h,10, 4, 12, sse4)
497
2/2
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
702 mc_rep_funcs(qpel_h,12, 8, 64, sse4)
498
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
546 mc_rep_funcs(qpel_h,12, 8, 48, sse4)
499
2/2
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
390 mc_rep_funcs(qpel_h,12, 8, 32, sse4)
500
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_h,12, 8, 24, sse4)
501
2/2
✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
234 mc_rep_funcs(qpel_h,12, 8, 16, sse4)
502
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_h,12, 4, 12, sse4)
503
2/2
✓ Branch 1 taken 176 times.
✓ Branch 2 taken 44 times.
440 mc_rep_funcs(qpel_v, 8, 16, 64, sse4)
504
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(qpel_v, 8, 16, 48, sse4)
505
2/2
✓ Branch 1 taken 92 times.
✓ Branch 2 taken 46 times.
276 mc_rep_funcs(qpel_v, 8, 16, 32, sse4)
506
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_v, 8, 8, 24, sse4)
507
2/2
✓ Branch 1 taken 464 times.
✓ Branch 2 taken 58 times.
1044 mc_rep_funcs(qpel_v,10, 8, 64, sse4)
508
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
588 mc_rep_funcs(qpel_v,10, 8, 48, sse4)
509
2/2
✓ Branch 1 taken 304 times.
✓ Branch 2 taken 76 times.
760 mc_rep_funcs(qpel_v,10, 8, 32, sse4)
510
2/2
✓ Branch 1 taken 138 times.
✓ Branch 2 taken 46 times.
368 mc_rep_funcs(qpel_v,10, 8, 24, sse4)
511
2/2
✓ Branch 1 taken 96 times.
✓ Branch 2 taken 48 times.
288 mc_rep_funcs(qpel_v,10, 8, 16, sse4)
512
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_v,10, 4, 12, sse4)
513
2/2
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
702 mc_rep_funcs(qpel_v,12, 8, 64, sse4)
514
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
546 mc_rep_funcs(qpel_v,12, 8, 48, sse4)
515
2/2
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
390 mc_rep_funcs(qpel_v,12, 8, 32, sse4)
516
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_v,12, 8, 24, sse4)
517
2/2
✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
234 mc_rep_funcs(qpel_v,12, 8, 16, sse4)
518
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_v,12, 4, 12, sse4)
519
2/2
✓ Branch 1 taken 928 times.
✓ Branch 2 taken 116 times.
2088 mc_rep_funcs(qpel_hv, 8, 8, 64, sse4)
520
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
546 mc_rep_funcs(qpel_hv, 8, 8, 48, sse4)
521
2/2
✓ Branch 1 taken 292 times.
✓ Branch 2 taken 73 times.
730 mc_rep_funcs(qpel_hv, 8, 8, 32, sse4)
522
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_hv, 8, 8, 24, sse4)
523
2/2
✓ Branch 1 taken 202 times.
✓ Branch 2 taken 101 times.
606 mc_rep_funcs(qpel_hv, 8, 8, 16, sse4)
524 78 mc_rep_funcs2(qpel_hv,8, 8, 4, 12, sse4)
525
2/2
✓ Branch 1 taken 480 times.
✓ Branch 2 taken 60 times.
1080 mc_rep_funcs(qpel_hv,10, 8, 64, sse4)
526
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
588 mc_rep_funcs(qpel_hv,10, 8, 48, sse4)
527
2/2
✓ Branch 1 taken 272 times.
✓ Branch 2 taken 68 times.
680 mc_rep_funcs(qpel_hv,10, 8, 32, sse4)
528
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(qpel_hv,10, 8, 24, sse4)
529
2/2
✓ Branch 1 taken 100 times.
✓ Branch 2 taken 50 times.
300 mc_rep_funcs(qpel_hv,10, 8, 16, sse4)
530
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_hv,10, 4, 12, sse4)
531
2/2
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
702 mc_rep_funcs(qpel_hv,12, 8, 64, sse4)
532
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
546 mc_rep_funcs(qpel_hv,12, 8, 48, sse4)
533
2/2
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
390 mc_rep_funcs(qpel_hv,12, 8, 32, sse4)
534
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_hv,12, 8, 24, sse4)
535
2/2
✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
234 mc_rep_funcs(qpel_hv,12, 8, 16, sse4)
536
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_hv,12, 4, 12, sse4)
537
538 #define mc_rep_uni_w(bitd, step, W, opt) \
539 void ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const int16_t *_src, \
540 int height, int denom, int _wx, int _ox) \
541 { \
542 int i; \
543 uint8_t *dst; \
544 for (i = 0; i < W; i += step) { \
545 const int16_t *src = _src + i; \
546 dst= _dst + (i * ((bitd + 7) / 8)); \
547 ff_hevc_put_hevc_uni_w##step##_##bitd##_##opt(dst, dststride, src, \
548 height, denom, _wx, _ox); \
549 } \
550 }
551
552
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
378 mc_rep_uni_w(8, 6, 12, sse4)
553
2/2
✓ Branch 1 taken 21144 times.
✓ Branch 2 taken 10572 times.
31716 mc_rep_uni_w(8, 8, 16, sse4)
554
2/2
✓ Branch 1 taken 378 times.
✓ Branch 2 taken 126 times.
504 mc_rep_uni_w(8, 8, 24, sse4)
555
2/2
✓ Branch 1 taken 54668 times.
✓ Branch 2 taken 13667 times.
68335 mc_rep_uni_w(8, 8, 32, sse4)
556
2/2
✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
882 mc_rep_uni_w(8, 8, 48, sse4)
557
2/2
✓ Branch 1 taken 43016 times.
✓ Branch 2 taken 5377 times.
48393 mc_rep_uni_w(8, 8, 64, sse4)
558
559
2/2
✓ Branch 1 taken 284 times.
✓ Branch 2 taken 142 times.
426 mc_rep_uni_w(10, 6, 12, sse4)
560
2/2
✓ Branch 1 taken 3220 times.
✓ Branch 2 taken 1610 times.
4830 mc_rep_uni_w(10, 8, 16, sse4)
561
2/2
✓ Branch 1 taken 402 times.
✓ Branch 2 taken 134 times.
536 mc_rep_uni_w(10, 8, 24, sse4)
562
2/2
✓ Branch 1 taken 4728 times.
✓ Branch 2 taken 1182 times.
5910 mc_rep_uni_w(10, 8, 32, sse4)
563
2/2
✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
882 mc_rep_uni_w(10, 8, 48, sse4)
564
2/2
✓ Branch 1 taken 2832 times.
✓ Branch 2 taken 354 times.
3186 mc_rep_uni_w(10, 8, 64, sse4)
565
566
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
378 mc_rep_uni_w(12, 6, 12, sse4)
567
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
378 mc_rep_uni_w(12, 8, 16, sse4)
568
2/2
✓ Branch 1 taken 378 times.
✓ Branch 2 taken 126 times.
504 mc_rep_uni_w(12, 8, 24, sse4)
569
2/2
✓ Branch 1 taken 504 times.
✓ Branch 2 taken 126 times.
630 mc_rep_uni_w(12, 8, 32, sse4)
570
2/2
✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
882 mc_rep_uni_w(12, 8, 48, sse4)
571
2/2
✓ Branch 1 taken 1008 times.
✓ Branch 2 taken 126 times.
1134 mc_rep_uni_w(12, 8, 64, sse4)
572
573 #define mc_rep_bi_w(bitd, step, W, opt) \
574 void ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const int16_t *_src, \
575 const int16_t *_src2, int height, \
576 int denom, int _wx0, int _wx1, int _ox0, int _ox1) \
577 { \
578 int i; \
579 uint8_t *dst; \
580 for (i = 0; i < W; i += step) { \
581 const int16_t *src = _src + i; \
582 const int16_t *src2 = _src2 + i; \
583 dst = _dst + (i * ((bitd + 7) / 8)); \
584 ff_hevc_put_hevc_bi_w##step##_##bitd##_##opt(dst, dststride, src, src2, \
585 height, denom, _wx0, _wx1, _ox0, _ox1); \
586 } \
587 }
588
589
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
378 mc_rep_bi_w(8, 6, 12, sse4)
590
2/2
✓ Branch 1 taken 4896 times.
✓ Branch 2 taken 2448 times.
7344 mc_rep_bi_w(8, 8, 16, sse4)
591
2/2
✓ Branch 1 taken 378 times.
✓ Branch 2 taken 126 times.
504 mc_rep_bi_w(8, 8, 24, sse4)
592
2/2
✓ Branch 1 taken 23552 times.
✓ Branch 2 taken 5888 times.
29440 mc_rep_bi_w(8, 8, 32, sse4)
593
2/2
✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
882 mc_rep_bi_w(8, 8, 48, sse4)
594
2/2
✓ Branch 1 taken 21144 times.
✓ Branch 2 taken 2643 times.
23787 mc_rep_bi_w(8, 8, 64, sse4)
595
596
2/2
✓ Branch 1 taken 268 times.
✓ Branch 2 taken 134 times.
402 mc_rep_bi_w(10, 6, 12, sse4)
597
2/2
✓ Branch 1 taken 2916 times.
✓ Branch 2 taken 1458 times.
4374 mc_rep_bi_w(10, 8, 16, sse4)
598
2/2
✓ Branch 1 taken 390 times.
✓ Branch 2 taken 130 times.
520 mc_rep_bi_w(10, 8, 24, sse4)
599
2/2
✓ Branch 1 taken 4760 times.
✓ Branch 2 taken 1190 times.
5950 mc_rep_bi_w(10, 8, 32, sse4)
600
2/2
✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
882 mc_rep_bi_w(10, 8, 48, sse4)
601
2/2
✓ Branch 1 taken 2928 times.
✓ Branch 2 taken 366 times.
3294 mc_rep_bi_w(10, 8, 64, sse4)
602
603
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
378 mc_rep_bi_w(12, 6, 12, sse4)
604
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
378 mc_rep_bi_w(12, 8, 16, sse4)
605
2/2
✓ Branch 1 taken 378 times.
✓ Branch 2 taken 126 times.
504 mc_rep_bi_w(12, 8, 24, sse4)
606
2/2
✓ Branch 1 taken 504 times.
✓ Branch 2 taken 126 times.
630 mc_rep_bi_w(12, 8, 32, sse4)
607
2/2
✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
882 mc_rep_bi_w(12, 8, 48, sse4)
608
2/2
✓ Branch 1 taken 1008 times.
✓ Branch 2 taken 126 times.
1134 mc_rep_bi_w(12, 8, 64, sse4)
609
610 #define mc_uni_w_func(name, bitd, W, opt) \
611 void ff_hevc_put_hevc_uni_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \
612 const uint8_t *_src, ptrdiff_t _srcstride, \
613 int height, int denom, \
614 int _wx, int _ox, \
615 intptr_t mx, intptr_t my, int width) \
616 { \
617 LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \
618 ff_hevc_put_hevc_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \
619 ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(_dst, _dststride, temp, height, denom, _wx, _ox);\
620 }
621
622 #define mc_uni_w_funcs(name, bitd, opt) \
623 mc_uni_w_func(name, bitd, 4, opt) \
624 mc_uni_w_func(name, bitd, 8, opt) \
625 mc_uni_w_func(name, bitd, 12, opt) \
626 mc_uni_w_func(name, bitd, 16, opt) \
627 mc_uni_w_func(name, bitd, 24, opt) \
628 mc_uni_w_func(name, bitd, 32, opt) \
629 mc_uni_w_func(name, bitd, 48, opt) \
630 mc_uni_w_func(name, bitd, 64, opt)
631
632 81664 mc_uni_w_funcs(pel_pixels, 8, sse4)
633 18 mc_uni_w_func(pel_pixels, 8, 6, sse4)
634 7784 mc_uni_w_funcs(epel_h, 8, sse4)
635 18 mc_uni_w_func(epel_h, 8, 6, sse4)
636 7424 mc_uni_w_funcs(epel_v, 8, sse4)
637 18 mc_uni_w_func(epel_v, 8, 6, sse4)
638 7664 mc_uni_w_funcs(epel_hv, 8, sse4)
639 18 mc_uni_w_func(epel_hv, 8, 6, sse4)
640 298 mc_uni_w_funcs(qpel_h, 8, sse4)
641 308 mc_uni_w_funcs(qpel_v, 8, sse4)
642 882 mc_uni_w_funcs(qpel_hv, 8, sse4)
643
644 4980 mc_uni_w_funcs(pel_pixels, 10, sse4)
645 18 mc_uni_w_func(pel_pixels, 10, 6, sse4)
646 1352 mc_uni_w_funcs(epel_h, 10, sse4)
647 18 mc_uni_w_func(epel_h, 10, 6, sse4)
648 360 mc_uni_w_funcs(epel_v, 10, sse4)
649 18 mc_uni_w_func(epel_v, 10, 6, sse4)
650 536 mc_uni_w_funcs(epel_hv, 10, sse4)
651 18 mc_uni_w_func(epel_hv, 10, 6, sse4)
652 844 mc_uni_w_funcs(qpel_h, 10, sse4)
653 332 mc_uni_w_funcs(qpel_v, 10, sse4)
654 380 mc_uni_w_funcs(qpel_hv, 10, sse4)
655
656 288 mc_uni_w_funcs(pel_pixels, 12, sse4)
657 18 mc_uni_w_func(pel_pixels, 12, 6, sse4)
658 288 mc_uni_w_funcs(epel_h, 12, sse4)
659 18 mc_uni_w_func(epel_h, 12, 6, sse4)
660 288 mc_uni_w_funcs(epel_v, 12, sse4)
661 18 mc_uni_w_func(epel_v, 12, 6, sse4)
662 288 mc_uni_w_funcs(epel_hv, 12, sse4)
663 18 mc_uni_w_func(epel_hv, 12, 6, sse4)
664 288 mc_uni_w_funcs(qpel_h, 12, sse4)
665 288 mc_uni_w_funcs(qpel_v, 12, sse4)
666 288 mc_uni_w_funcs(qpel_hv, 12, sse4)
667
668 #define mc_bi_w_func(name, bitd, W, opt) \
669 void ff_hevc_put_hevc_bi_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \
670 const uint8_t *_src, ptrdiff_t _srcstride, \
671 const int16_t *_src2, \
672 int height, int denom, \
673 int _wx0, int _wx1, int _ox0, int _ox1, \
674 intptr_t mx, intptr_t my, int width) \
675 { \
676 LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \
677 ff_hevc_put_hevc_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \
678 ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(_dst, _dststride, temp, _src2, \
679 height, denom, _wx0, _wx1, _ox0, _ox1); \
680 }
681
682 #define mc_bi_w_funcs(name, bitd, opt) \
683 mc_bi_w_func(name, bitd, 4, opt) \
684 mc_bi_w_func(name, bitd, 8, opt) \
685 mc_bi_w_func(name, bitd, 12, opt) \
686 mc_bi_w_func(name, bitd, 16, opt) \
687 mc_bi_w_func(name, bitd, 24, opt) \
688 mc_bi_w_func(name, bitd, 32, opt) \
689 mc_bi_w_func(name, bitd, 48, opt) \
690 mc_bi_w_func(name, bitd, 64, opt)
691
692 23926 mc_bi_w_funcs(pel_pixels, 8, sse4)
693 18 mc_bi_w_func(pel_pixels, 8, 6, sse4)
694 1240 mc_bi_w_funcs(epel_h, 8, sse4)
695 18 mc_bi_w_func(epel_h, 8, 6, sse4)
696 4860 mc_bi_w_funcs(epel_v, 8, sse4)
697 18 mc_bi_w_func(epel_v, 8, 6, sse4)
698 2416 mc_bi_w_funcs(epel_hv, 8, sse4)
699 18 mc_bi_w_func(epel_hv, 8, 6, sse4)
700 288 mc_bi_w_funcs(qpel_h, 8, sse4)
701 288 mc_bi_w_funcs(qpel_v, 8, sse4)
702 288 mc_bi_w_funcs(qpel_hv, 8, sse4)
703
704 4336 mc_bi_w_funcs(pel_pixels, 10, sse4)
705 18 mc_bi_w_func(pel_pixels, 10, 6, sse4)
706 1392 mc_bi_w_funcs(epel_h, 10, sse4)
707 18 mc_bi_w_func(epel_h, 10, 6, sse4)
708 448 mc_bi_w_funcs(epel_v, 10, sse4)
709 18 mc_bi_w_func(epel_v, 10, 6, sse4)
710 312 mc_bi_w_funcs(epel_hv, 10, sse4)
711 18 mc_bi_w_func(epel_hv, 10, 6, sse4)
712 836 mc_bi_w_funcs(qpel_h, 10, sse4)
713 368 mc_bi_w_funcs(qpel_v, 10, sse4)
714 300 mc_bi_w_funcs(qpel_hv, 10, sse4)
715
716 288 mc_bi_w_funcs(pel_pixels, 12, sse4)
717 18 mc_bi_w_func(pel_pixels, 12, 6, sse4)
718 288 mc_bi_w_funcs(epel_h, 12, sse4)
719 18 mc_bi_w_func(epel_h, 12, 6, sse4)
720 288 mc_bi_w_funcs(epel_v, 12, sse4)
721 18 mc_bi_w_func(epel_v, 12, 6, sse4)
722 288 mc_bi_w_funcs(epel_hv, 12, sse4)
723 18 mc_bi_w_func(epel_hv, 12, 6, sse4)
724 288 mc_bi_w_funcs(qpel_h, 12, sse4)
725 288 mc_bi_w_funcs(qpel_v, 12, sse4)
726 288 mc_bi_w_funcs(qpel_hv, 12, sse4)
727 #endif //ARCH_X86_64 && HAVE_SSE4_EXTERNAL
728
729 #define SAO_BAND_FILTER_FUNCS(bitd, opt) \
730 void ff_hevc_sao_band_filter_8_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
731 const int16_t *sao_offset_val, int sao_left_class, int width, int height); \
732 void ff_hevc_sao_band_filter_16_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
733 const int16_t *sao_offset_val, int sao_left_class, int width, int height); \
734 void ff_hevc_sao_band_filter_32_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
735 const int16_t *sao_offset_val, int sao_left_class, int width, int height); \
736 void ff_hevc_sao_band_filter_48_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
737 const int16_t *sao_offset_val, int sao_left_class, int width, int height); \
738 void ff_hevc_sao_band_filter_64_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
739 const int16_t *sao_offset_val, int sao_left_class, int width, int height);
740
741 SAO_BAND_FILTER_FUNCS(8, sse2)
742 SAO_BAND_FILTER_FUNCS(10, sse2)
743 SAO_BAND_FILTER_FUNCS(12, sse2)
744 SAO_BAND_FILTER_FUNCS(8, avx)
745 SAO_BAND_FILTER_FUNCS(10, avx)
746 SAO_BAND_FILTER_FUNCS(12, avx)
747 SAO_BAND_FILTER_FUNCS(8, avx2)
748 SAO_BAND_FILTER_FUNCS(10, avx2)
749 SAO_BAND_FILTER_FUNCS(12, avx2)
750
751 #define SAO_BAND_INIT(bitd, opt) do { \
752 c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_##bitd##_##opt; \
753 c->sao_band_filter[1] = ff_hevc_sao_band_filter_16_##bitd##_##opt; \
754 c->sao_band_filter[2] = ff_hevc_sao_band_filter_32_##bitd##_##opt; \
755 c->sao_band_filter[3] = ff_hevc_sao_band_filter_48_##bitd##_##opt; \
756 c->sao_band_filter[4] = ff_hevc_sao_band_filter_64_##bitd##_##opt; \
757 } while (0)
758
759 #define SAO_EDGE_FILTER_FUNCS(bitd, opt) \
760 void ff_hevc_sao_edge_filter_8_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
761 const int16_t *sao_offset_val, int eo, int width, int height); \
762 void ff_hevc_sao_edge_filter_16_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
763 const int16_t *sao_offset_val, int eo, int width, int height); \
764 void ff_hevc_sao_edge_filter_32_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
765 const int16_t *sao_offset_val, int eo, int width, int height); \
766 void ff_hevc_sao_edge_filter_48_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
767 const int16_t *sao_offset_val, int eo, int width, int height); \
768 void ff_hevc_sao_edge_filter_64_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
769 const int16_t *sao_offset_val, int eo, int width, int height); \
770
771 SAO_EDGE_FILTER_FUNCS(8, ssse3)
772 SAO_EDGE_FILTER_FUNCS(8, avx2)
773 SAO_EDGE_FILTER_FUNCS(10, sse2)
774 SAO_EDGE_FILTER_FUNCS(10, avx2)
775 SAO_EDGE_FILTER_FUNCS(12, sse2)
776 SAO_EDGE_FILTER_FUNCS(12, avx2)
777
778 #define SAO_EDGE_INIT(bitd, opt) do { \
779 c->sao_edge_filter[0] = ff_hevc_sao_edge_filter_8_##bitd##_##opt; \
780 c->sao_edge_filter[1] = ff_hevc_sao_edge_filter_16_##bitd##_##opt; \
781 c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_##bitd##_##opt; \
782 c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_##bitd##_##opt; \
783 c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_##bitd##_##opt; \
784 } while (0)
785
786 #define EPEL_LINKS(pointer, my, mx, fname, bitd, opt ) \
787 PEL_LINK(pointer, 1, my , mx , fname##4 , bitd, opt ); \
788 PEL_LINK(pointer, 2, my , mx , fname##6 , bitd, opt ); \
789 PEL_LINK(pointer, 3, my , mx , fname##8 , bitd, opt ); \
790 PEL_LINK(pointer, 4, my , mx , fname##12, bitd, opt ); \
791 PEL_LINK(pointer, 5, my , mx , fname##16, bitd, opt ); \
792 PEL_LINK(pointer, 6, my , mx , fname##24, bitd, opt ); \
793 PEL_LINK(pointer, 7, my , mx , fname##32, bitd, opt ); \
794 PEL_LINK(pointer, 8, my , mx , fname##48, bitd, opt ); \
795 PEL_LINK(pointer, 9, my , mx , fname##64, bitd, opt )
796 #define QPEL_LINKS(pointer, my, mx, fname, bitd, opt) \
797 PEL_LINK(pointer, 1, my , mx , fname##4 , bitd, opt ); \
798 PEL_LINK(pointer, 3, my , mx , fname##8 , bitd, opt ); \
799 PEL_LINK(pointer, 4, my , mx , fname##12, bitd, opt ); \
800 PEL_LINK(pointer, 5, my , mx , fname##16, bitd, opt ); \
801 PEL_LINK(pointer, 6, my , mx , fname##24, bitd, opt ); \
802 PEL_LINK(pointer, 7, my , mx , fname##32, bitd, opt ); \
803 PEL_LINK(pointer, 8, my , mx , fname##48, bitd, opt ); \
804 PEL_LINK(pointer, 9, my , mx , fname##64, bitd, opt )
805
806 1480 void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
807 {
808 1480 int cpu_flags = av_get_cpu_flags();
809
810
2/2
✓ Branch 0 taken 617 times.
✓ Branch 1 taken 863 times.
1480 if (bit_depth == 8) {
811
2/2
✓ Branch 0 taken 226 times.
✓ Branch 1 taken 391 times.
617 if (EXTERNAL_MMXEXT(cpu_flags)) {
812 226 c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_mmxext;
813
814 226 c->add_residual[0] = ff_hevc_add_residual_4_8_mmxext;
815 }
816
2/2
✓ Branch 0 taken 186 times.
✓ Branch 1 taken 431 times.
617 if (EXTERNAL_SSE2(cpu_flags)) {
817 186 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
818 186 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;
819 if (ARCH_X86_64) {
820 186 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2;
821 186 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2;
822
823 186 c->idct[2] = ff_hevc_idct_16x16_8_sse2;
824 186 c->idct[3] = ff_hevc_idct_32x32_8_sse2;
825 }
826 186 SAO_BAND_INIT(8, sse2);
827
828 186 c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_sse2;
829 186 c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_sse2;
830 186 c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_sse2;
831
832 186 c->idct[0] = ff_hevc_idct_4x4_8_sse2;
833 186 c->idct[1] = ff_hevc_idct_8x8_8_sse2;
834
835 186 c->add_residual[1] = ff_hevc_add_residual_8_8_sse2;
836 186 c->add_residual[2] = ff_hevc_add_residual_16_8_sse2;
837 186 c->add_residual[3] = ff_hevc_add_residual_32_8_sse2;
838 }
839
2/2
✓ Branch 0 taken 146 times.
✓ Branch 1 taken 471 times.
617 if (EXTERNAL_SSSE3(cpu_flags)) {
840 if(ARCH_X86_64) {
841 146 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
842 146 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3;
843 }
844 146 SAO_EDGE_INIT(8, ssse3);
845 }
846
2/2
✓ Branch 0 taken 126 times.
✓ Branch 1 taken 491 times.
617 if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
847
848 126 EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 8, sse4);
849 126 EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 8, sse4);
850 126 EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 8, sse4);
851 126 EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 8, sse4);
852
853 126 QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 8, sse4);
854 126 QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 8, sse4);
855 126 QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 8, sse4);
856 126 QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 8, sse4);
857 }
858
2/2
✓ Branch 0 taken 66 times.
✓ Branch 1 taken 551 times.
617 if (EXTERNAL_AVX(cpu_flags)) {
859 66 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_avx;
860 66 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_avx;
861 if (ARCH_X86_64) {
862 66 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx;
863 66 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx;
864
865 66 c->idct[2] = ff_hevc_idct_16x16_8_avx;
866 66 c->idct[3] = ff_hevc_idct_32x32_8_avx;
867 }
868 66 SAO_BAND_INIT(8, avx);
869
870 66 c->idct[0] = ff_hevc_idct_4x4_8_avx;
871 66 c->idct[1] = ff_hevc_idct_8x8_8_avx;
872
873 66 c->add_residual[1] = ff_hevc_add_residual_8_8_avx;
874 66 c->add_residual[2] = ff_hevc_add_residual_16_8_avx;
875 66 c->add_residual[3] = ff_hevc_add_residual_32_8_avx;
876 }
877
2/2
✓ Branch 0 taken 26 times.
✓ Branch 1 taken 591 times.
617 if (EXTERNAL_AVX2(cpu_flags)) {
878 26 c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_8_avx2;
879 26 c->sao_band_filter[1] = ff_hevc_sao_band_filter_16_8_avx2;
880 }
881
3/4
✓ Branch 0 taken 26 times.
✓ Branch 1 taken 591 times.
✓ Branch 2 taken 26 times.
✗ Branch 3 not taken.
617 if (EXTERNAL_AVX2_FAST(cpu_flags)) {
882 26 c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_avx2;
883 26 c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_avx2;
884 if (ARCH_X86_64) {
885 26 c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_avx2;
886 26 c->put_hevc_epel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_avx2;
887 26 c->put_hevc_epel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_avx2;
888
889 26 c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_avx2;
890 26 c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_avx2;
891 26 c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_avx2;
892
893 26 c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2;
894 26 c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2;
895 26 c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2;
896
897 26 c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2;
898 26 c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2;
899 26 c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2;
900
901 26 c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_8_avx2;
902 26 c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_8_avx2;
903 26 c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_8_avx2;
904
905 26 c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_8_avx2;
906 26 c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_8_avx2;
907 26 c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_8_avx2;
908
909 26 c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_8_avx2;
910 26 c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_8_avx2;
911 26 c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_8_avx2;
912
913 26 c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_8_avx2;
914 26 c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_8_avx2;
915 26 c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_8_avx2;
916
917 26 c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_8_avx2;
918 26 c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_8_avx2;
919 26 c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_8_avx2;
920
921 26 c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_8_avx2;
922 26 c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_8_avx2;
923 26 c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_8_avx2;
924
925 26 c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_8_avx2;
926 26 c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_8_avx2;
927 26 c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_8_avx2;
928
929 26 c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_8_avx2;
930 26 c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_8_avx2;
931 26 c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_8_avx2;
932
933 26 c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_8_avx2;
934 26 c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_8_avx2;
935 26 c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_8_avx2;
936
937 26 c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_8_avx2;
938 26 c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_8_avx2;
939 26 c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_8_avx2;
940
941 26 c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_8_avx2;
942 26 c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_8_avx2;
943 26 c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_8_avx2;
944
945 26 c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_8_avx2;
946 26 c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_8_avx2;
947 26 c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_8_avx2;
948
949 26 c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_8_avx2;
950 26 c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_8_avx2;
951 26 c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_8_avx2;
952
953 26 c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_8_avx2;
954 26 c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_8_avx2;
955 26 c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_8_avx2;
956
957 26 c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_8_avx2;
958 26 c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_8_avx2;
959 26 c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_8_avx2;
960
961 26 c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_8_avx2;
962 26 c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_8_avx2;
963 26 c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_8_avx2;
964
965 26 c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_8_avx2;
966 26 c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_8_avx2;
967 26 c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_8_avx2;
968 }
969 26 SAO_BAND_INIT(8, avx2);
970
971 26 c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_8_avx2;
972 26 c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_8_avx2;
973 26 c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_8_avx2;
974
975 26 c->add_residual[3] = ff_hevc_add_residual_32_8_avx2;
976 }
977
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 617 times.
617 if (EXTERNAL_AVX512ICL(cpu_flags) && ARCH_X86_64) {
978 c->put_hevc_qpel[1][0][1] = ff_hevc_put_hevc_qpel_h4_8_avx512icl;
979 c->put_hevc_qpel[3][0][1] = ff_hevc_put_hevc_qpel_h8_8_avx512icl;
980 c->put_hevc_qpel[5][0][1] = ff_hevc_put_hevc_qpel_h16_8_avx512icl;
981 c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_8_avx512icl;
982 c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_8_avx512icl;
983 c->put_hevc_qpel[3][1][1] = ff_hevc_put_hevc_qpel_hv8_8_avx512icl;
984 }
985
2/2
✓ Branch 0 taken 307 times.
✓ Branch 1 taken 556 times.
863 } else if (bit_depth == 10) {
986
2/2
✓ Branch 0 taken 230 times.
✓ Branch 1 taken 77 times.
307 if (EXTERNAL_MMXEXT(cpu_flags)) {
987 230 c->add_residual[0] = ff_hevc_add_residual_4_10_mmxext;
988 230 c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_mmxext;
989 }
990
2/2
✓ Branch 0 taken 190 times.
✓ Branch 1 taken 117 times.
307 if (EXTERNAL_SSE2(cpu_flags)) {
991 190 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2;
992 190 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2;
993 if (ARCH_X86_64) {
994 190 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_sse2;
995 190 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2;
996
997 190 c->idct[2] = ff_hevc_idct_16x16_10_sse2;
998 190 c->idct[3] = ff_hevc_idct_32x32_10_sse2;
999 }
1000 190 SAO_BAND_INIT(10, sse2);
1001 190 SAO_EDGE_INIT(10, sse2);
1002
1003 190 c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_sse2;
1004 190 c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_sse2;
1005 190 c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_sse2;
1006
1007 190 c->idct[0] = ff_hevc_idct_4x4_10_sse2;
1008 190 c->idct[1] = ff_hevc_idct_8x8_10_sse2;
1009
1010 190 c->add_residual[1] = ff_hevc_add_residual_8_10_sse2;
1011 190 c->add_residual[2] = ff_hevc_add_residual_16_10_sse2;
1012 190 c->add_residual[3] = ff_hevc_add_residual_32_10_sse2;
1013 }
1014
2/2
✓ Branch 0 taken 150 times.
✓ Branch 1 taken 157 times.
307 if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
1015 150 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3;
1016 150 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3;
1017 }
1018
2/2
✓ Branch 0 taken 130 times.
✓ Branch 1 taken 177 times.
307 if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
1019 130 EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4);
1020 130 EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 10, sse4);
1021 130 EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 10, sse4);
1022 130 EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 10, sse4);
1023
1024 130 QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 10, sse4);
1025 130 QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 10, sse4);
1026 130 QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 10, sse4);
1027 130 QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 10, sse4);
1028 }
1029
2/2
✓ Branch 0 taken 70 times.
✓ Branch 1 taken 237 times.
307 if (EXTERNAL_AVX(cpu_flags)) {
1030 70 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_avx;
1031 70 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_avx;
1032 if (ARCH_X86_64) {
1033 70 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_avx;
1034 70 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_avx;
1035
1036 70 c->idct[2] = ff_hevc_idct_16x16_10_avx;
1037 70 c->idct[3] = ff_hevc_idct_32x32_10_avx;
1038 }
1039
1040 70 c->idct[0] = ff_hevc_idct_4x4_10_avx;
1041 70 c->idct[1] = ff_hevc_idct_8x8_10_avx;
1042
1043 70 SAO_BAND_INIT(10, avx);
1044 }
1045
2/2
✓ Branch 0 taken 30 times.
✓ Branch 1 taken 277 times.
307 if (EXTERNAL_AVX2(cpu_flags)) {
1046 30 c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_10_avx2;
1047 }
1048
3/4
✓ Branch 0 taken 30 times.
✓ Branch 1 taken 277 times.
✓ Branch 2 taken 30 times.
✗ Branch 3 not taken.
307 if (EXTERNAL_AVX2_FAST(cpu_flags)) {
1049 30 c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_avx2;
1050 30 c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_avx2;
1051 if (ARCH_X86_64) {
1052 30 c->put_hevc_epel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_10_avx2;
1053 30 c->put_hevc_epel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_10_avx2;
1054 30 c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_10_avx2;
1055 30 c->put_hevc_epel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_10_avx2;
1056 30 c->put_hevc_epel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_10_avx2;
1057
1058 30 c->put_hevc_qpel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_10_avx2;
1059 30 c->put_hevc_qpel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_10_avx2;
1060 30 c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_10_avx2;
1061 30 c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_10_avx2;
1062 30 c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_10_avx2;
1063
1064 30 c->put_hevc_epel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2;
1065 30 c->put_hevc_epel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2;
1066 30 c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2;
1067 30 c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels96_8_avx2;
1068 30 c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels128_8_avx2;
1069
1070 30 c->put_hevc_qpel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2;
1071 30 c->put_hevc_qpel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2;
1072 30 c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2;
1073 30 c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels96_8_avx2;
1074 30 c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels128_8_avx2;
1075
1076 30 c->put_hevc_epel_bi[5][0][0] = ff_hevc_put_hevc_bi_pel_pixels16_10_avx2;
1077 30 c->put_hevc_epel_bi[6][0][0] = ff_hevc_put_hevc_bi_pel_pixels24_10_avx2;
1078 30 c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_10_avx2;
1079 30 c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_10_avx2;
1080 30 c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_10_avx2;
1081 30 c->put_hevc_qpel_bi[5][0][0] = ff_hevc_put_hevc_bi_pel_pixels16_10_avx2;
1082 30 c->put_hevc_qpel_bi[6][0][0] = ff_hevc_put_hevc_bi_pel_pixels24_10_avx2;
1083 30 c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_10_avx2;
1084 30 c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_10_avx2;
1085 30 c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_10_avx2;
1086
1087 30 c->put_hevc_epel[5][0][1] = ff_hevc_put_hevc_epel_h16_10_avx2;
1088 30 c->put_hevc_epel[6][0][1] = ff_hevc_put_hevc_epel_h24_10_avx2;
1089 30 c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_10_avx2;
1090 30 c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_10_avx2;
1091 30 c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_10_avx2;
1092
1093 30 c->put_hevc_epel_uni[5][0][1] = ff_hevc_put_hevc_uni_epel_h16_10_avx2;
1094 30 c->put_hevc_epel_uni[6][0][1] = ff_hevc_put_hevc_uni_epel_h24_10_avx2;
1095 30 c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_10_avx2;
1096 30 c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_10_avx2;
1097 30 c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_10_avx2;
1098
1099 30 c->put_hevc_epel_bi[5][0][1] = ff_hevc_put_hevc_bi_epel_h16_10_avx2;
1100 30 c->put_hevc_epel_bi[6][0][1] = ff_hevc_put_hevc_bi_epel_h24_10_avx2;
1101 30 c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_10_avx2;
1102 30 c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_10_avx2;
1103 30 c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_10_avx2;
1104
1105 30 c->put_hevc_epel[5][1][0] = ff_hevc_put_hevc_epel_v16_10_avx2;
1106 30 c->put_hevc_epel[6][1][0] = ff_hevc_put_hevc_epel_v24_10_avx2;
1107 30 c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_10_avx2;
1108 30 c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_10_avx2;
1109 30 c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_10_avx2;
1110
1111 30 c->put_hevc_epel_uni[5][1][0] = ff_hevc_put_hevc_uni_epel_v16_10_avx2;
1112 30 c->put_hevc_epel_uni[6][1][0] = ff_hevc_put_hevc_uni_epel_v24_10_avx2;
1113 30 c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_10_avx2;
1114 30 c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_10_avx2;
1115 30 c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_10_avx2;
1116
1117 30 c->put_hevc_epel_bi[5][1][0] = ff_hevc_put_hevc_bi_epel_v16_10_avx2;
1118 30 c->put_hevc_epel_bi[6][1][0] = ff_hevc_put_hevc_bi_epel_v24_10_avx2;
1119 30 c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_10_avx2;
1120 30 c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_10_avx2;
1121 30 c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_10_avx2;
1122
1123 30 c->put_hevc_epel[5][1][1] = ff_hevc_put_hevc_epel_hv16_10_avx2;
1124 30 c->put_hevc_epel[6][1][1] = ff_hevc_put_hevc_epel_hv24_10_avx2;
1125 30 c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_10_avx2;
1126 30 c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_10_avx2;
1127 30 c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_10_avx2;
1128
1129 30 c->put_hevc_epel_uni[5][1][1] = ff_hevc_put_hevc_uni_epel_hv16_10_avx2;
1130 30 c->put_hevc_epel_uni[6][1][1] = ff_hevc_put_hevc_uni_epel_hv24_10_avx2;
1131 30 c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_10_avx2;
1132 30 c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_10_avx2;
1133 30 c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_10_avx2;
1134
1135 30 c->put_hevc_epel_bi[5][1][1] = ff_hevc_put_hevc_bi_epel_hv16_10_avx2;
1136 30 c->put_hevc_epel_bi[6][1][1] = ff_hevc_put_hevc_bi_epel_hv24_10_avx2;
1137 30 c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_10_avx2;
1138 30 c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_10_avx2;
1139 30 c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_10_avx2;
1140
1141 30 c->put_hevc_qpel[5][0][1] = ff_hevc_put_hevc_qpel_h16_10_avx2;
1142 30 c->put_hevc_qpel[6][0][1] = ff_hevc_put_hevc_qpel_h24_10_avx2;
1143 30 c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_10_avx2;
1144 30 c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_10_avx2;
1145 30 c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_10_avx2;
1146
1147 30 c->put_hevc_qpel_uni[5][0][1] = ff_hevc_put_hevc_uni_qpel_h16_10_avx2;
1148 30 c->put_hevc_qpel_uni[6][0][1] = ff_hevc_put_hevc_uni_qpel_h24_10_avx2;
1149 30 c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_10_avx2;
1150 30 c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_10_avx2;
1151 30 c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_10_avx2;
1152
1153 30 c->put_hevc_qpel_bi[5][0][1] = ff_hevc_put_hevc_bi_qpel_h16_10_avx2;
1154 30 c->put_hevc_qpel_bi[6][0][1] = ff_hevc_put_hevc_bi_qpel_h24_10_avx2;
1155 30 c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_10_avx2;
1156 30 c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_10_avx2;
1157 30 c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_10_avx2;
1158
1159 30 c->put_hevc_qpel[5][1][0] = ff_hevc_put_hevc_qpel_v16_10_avx2;
1160 30 c->put_hevc_qpel[6][1][0] = ff_hevc_put_hevc_qpel_v24_10_avx2;
1161 30 c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_10_avx2;
1162 30 c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_10_avx2;
1163 30 c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_10_avx2;
1164
1165 30 c->put_hevc_qpel_uni[5][1][0] = ff_hevc_put_hevc_uni_qpel_v16_10_avx2;
1166 30 c->put_hevc_qpel_uni[6][1][0] = ff_hevc_put_hevc_uni_qpel_v24_10_avx2;
1167 30 c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_10_avx2;
1168 30 c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_10_avx2;
1169 30 c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_10_avx2;
1170
1171 30 c->put_hevc_qpel_bi[5][1][0] = ff_hevc_put_hevc_bi_qpel_v16_10_avx2;
1172 30 c->put_hevc_qpel_bi[6][1][0] = ff_hevc_put_hevc_bi_qpel_v24_10_avx2;
1173 30 c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_10_avx2;
1174 30 c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_10_avx2;
1175 30 c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_10_avx2;
1176
1177 30 c->put_hevc_qpel[5][1][1] = ff_hevc_put_hevc_qpel_hv16_10_avx2;
1178 30 c->put_hevc_qpel[6][1][1] = ff_hevc_put_hevc_qpel_hv24_10_avx2;
1179 30 c->put_hevc_qpel[7][1][1] = ff_hevc_put_hevc_qpel_hv32_10_avx2;
1180 30 c->put_hevc_qpel[8][1][1] = ff_hevc_put_hevc_qpel_hv48_10_avx2;
1181 30 c->put_hevc_qpel[9][1][1] = ff_hevc_put_hevc_qpel_hv64_10_avx2;
1182
1183 30 c->put_hevc_qpel_uni[5][1][1] = ff_hevc_put_hevc_uni_qpel_hv16_10_avx2;
1184 30 c->put_hevc_qpel_uni[6][1][1] = ff_hevc_put_hevc_uni_qpel_hv24_10_avx2;
1185 30 c->put_hevc_qpel_uni[7][1][1] = ff_hevc_put_hevc_uni_qpel_hv32_10_avx2;
1186 30 c->put_hevc_qpel_uni[8][1][1] = ff_hevc_put_hevc_uni_qpel_hv48_10_avx2;
1187 30 c->put_hevc_qpel_uni[9][1][1] = ff_hevc_put_hevc_uni_qpel_hv64_10_avx2;
1188
1189 30 c->put_hevc_qpel_bi[5][1][1] = ff_hevc_put_hevc_bi_qpel_hv16_10_avx2;
1190 30 c->put_hevc_qpel_bi[6][1][1] = ff_hevc_put_hevc_bi_qpel_hv24_10_avx2;
1191 30 c->put_hevc_qpel_bi[7][1][1] = ff_hevc_put_hevc_bi_qpel_hv32_10_avx2;
1192 30 c->put_hevc_qpel_bi[8][1][1] = ff_hevc_put_hevc_bi_qpel_hv48_10_avx2;
1193 30 c->put_hevc_qpel_bi[9][1][1] = ff_hevc_put_hevc_bi_qpel_hv64_10_avx2;
1194 }
1195 30 SAO_BAND_INIT(10, avx2);
1196 30 SAO_EDGE_INIT(10, avx2);
1197
1198 30 c->add_residual[2] = ff_hevc_add_residual_16_10_avx2;
1199 30 c->add_residual[3] = ff_hevc_add_residual_32_10_avx2;
1200 }
1201
2/2
✓ Branch 0 taken 270 times.
✓ Branch 1 taken 286 times.
556 } else if (bit_depth == 12) {
1202
2/2
✓ Branch 0 taken 220 times.
✓ Branch 1 taken 50 times.
270 if (EXTERNAL_MMXEXT(cpu_flags)) {
1203 220 c->idct_dc[0] = ff_hevc_idct_4x4_dc_12_mmxext;
1204 }
1205
2/2
✓ Branch 0 taken 180 times.
✓ Branch 1 taken 90 times.
270 if (EXTERNAL_SSE2(cpu_flags)) {
1206 180 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_sse2;
1207 180 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_sse2;
1208 if (ARCH_X86_64) {
1209 180 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_sse2;
1210 180 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2;
1211 }
1212 180 SAO_BAND_INIT(12, sse2);
1213 180 SAO_EDGE_INIT(12, sse2);
1214
1215 180 c->idct_dc[1] = ff_hevc_idct_8x8_dc_12_sse2;
1216 180 c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_sse2;
1217 180 c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_sse2;
1218 }
1219
2/2
✓ Branch 0 taken 140 times.
✓ Branch 1 taken 130 times.
270 if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
1220 140 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_ssse3;
1221 140 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_ssse3;
1222 }
1223
2/2
✓ Branch 0 taken 120 times.
✓ Branch 1 taken 150 times.
270 if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
1224 120 EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 12, sse4);
1225 120 EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 12, sse4);
1226 120 EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 12, sse4);
1227 120 EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 12, sse4);
1228
1229 120 QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 12, sse4);
1230 120 QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 12, sse4);
1231 120 QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 12, sse4);
1232 120 QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 12, sse4);
1233 }
1234
2/2
✓ Branch 0 taken 60 times.
✓ Branch 1 taken 210 times.
270 if (EXTERNAL_AVX(cpu_flags)) {
1235 60 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_avx;
1236 60 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_avx;
1237 if (ARCH_X86_64) {
1238 60 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_avx;
1239 60 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_avx;
1240 }
1241 60 SAO_BAND_INIT(12, avx);
1242 }
1243
2/2
✓ Branch 0 taken 20 times.
✓ Branch 1 taken 250 times.
270 if (EXTERNAL_AVX2(cpu_flags)) {
1244 20 c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_12_avx2;
1245 }
1246
3/4
✓ Branch 0 taken 20 times.
✓ Branch 1 taken 250 times.
✓ Branch 2 taken 20 times.
✗ Branch 3 not taken.
270 if (EXTERNAL_AVX2_FAST(cpu_flags)) {
1247 20 c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_avx2;
1248 20 c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_avx2;
1249
1250 20 SAO_BAND_INIT(12, avx2);
1251 20 SAO_EDGE_INIT(12, avx2);
1252 }
1253 }
1254 1480 }
1255