FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/x86/hevc/dsp_init.c
Date: 2025-04-25 22:50:00
Exec Total Coverage
Lines: 636 642 99.1%
Functions: 1015 1015 100.0%
Branches: 414 418 99.0%

Line Branch Exec Source
1 /*
2 * Copyright (c) 2013 Seppo Tomperi
3 * Copyright (c) 2013-2014 Pierre-Edouard Lepere
4 * Copyright (c) 2023-2024 Wu Jianhua
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include "config.h"
24
25 #include "libavutil/cpu.h"
26 #include "libavutil/mem_internal.h"
27 #include "libavutil/x86/asm.h"
28 #include "libavutil/x86/cpu.h"
29 #include "libavcodec/hevc/dsp.h"
30 #include "libavcodec/x86/hevc/dsp.h"
31 #include "libavcodec/x86/h26x/h2656dsp.h"
32
33 #define LFC_FUNC(DIR, DEPTH, OPT) \
34 void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, const int *tc, const uint8_t *no_p, const uint8_t *no_q);
35
36 #define LFL_FUNC(DIR, DEPTH, OPT) \
37 void ff_hevc_ ## DIR ## _loop_filter_luma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, int beta, const int *tc, const uint8_t *no_p, const uint8_t *no_q);
38
39 #define LFC_FUNCS(type, depth, opt) \
40 LFC_FUNC(h, depth, opt) \
41 LFC_FUNC(v, depth, opt)
42
43 #define LFL_FUNCS(type, depth, opt) \
44 LFL_FUNC(h, depth, opt) \
45 LFL_FUNC(v, depth, opt)
46
47 LFC_FUNCS(uint8_t, 8, sse2)
48 LFC_FUNCS(uint8_t, 10, sse2)
49 LFC_FUNCS(uint8_t, 12, sse2)
50 LFC_FUNCS(uint8_t, 8, avx)
51 LFC_FUNCS(uint8_t, 10, avx)
52 LFC_FUNCS(uint8_t, 12, avx)
53 LFL_FUNCS(uint8_t, 8, sse2)
54 LFL_FUNCS(uint8_t, 10, sse2)
55 LFL_FUNCS(uint8_t, 12, sse2)
56 LFL_FUNCS(uint8_t, 8, ssse3)
57 LFL_FUNCS(uint8_t, 10, ssse3)
58 LFL_FUNCS(uint8_t, 12, ssse3)
59 LFL_FUNCS(uint8_t, 8, avx)
60 LFL_FUNCS(uint8_t, 10, avx)
61 LFL_FUNCS(uint8_t, 12, avx)
62
63 #define IDCT_DC_FUNCS(W, opt) \
64 void ff_hevc_idct_ ## W ## _dc_8_ ## opt(int16_t *coeffs); \
65 void ff_hevc_idct_ ## W ## _dc_10_ ## opt(int16_t *coeffs); \
66 void ff_hevc_idct_ ## W ## _dc_12_ ## opt(int16_t *coeffs)
67
68 IDCT_DC_FUNCS(4x4, mmxext);
69 IDCT_DC_FUNCS(8x8, sse2);
70 IDCT_DC_FUNCS(16x16, sse2);
71 IDCT_DC_FUNCS(32x32, sse2);
72 IDCT_DC_FUNCS(16x16, avx2);
73 IDCT_DC_FUNCS(32x32, avx2);
74
75 #define IDCT_FUNCS(opt) \
76 void ff_hevc_idct_4x4_8_ ## opt(int16_t *coeffs, int col_limit); \
77 void ff_hevc_idct_4x4_10_ ## opt(int16_t *coeffs, int col_limit); \
78 void ff_hevc_idct_8x8_8_ ## opt(int16_t *coeffs, int col_limit); \
79 void ff_hevc_idct_8x8_10_ ## opt(int16_t *coeffs, int col_limit); \
80 void ff_hevc_idct_16x16_8_ ## opt(int16_t *coeffs, int col_limit); \
81 void ff_hevc_idct_16x16_10_ ## opt(int16_t *coeffs, int col_limit); \
82 void ff_hevc_idct_32x32_8_ ## opt(int16_t *coeffs, int col_limit); \
83 void ff_hevc_idct_32x32_10_ ## opt(int16_t *coeffs, int col_limit);
84
85 IDCT_FUNCS(sse2)
86 IDCT_FUNCS(avx)
87
88
89 #define ff_hevc_pel_filters ff_hevc_qpel_filters
90 #define DECL_HV_FILTER(f) \
91 const uint8_t *hf = ff_hevc_ ## f ## _filters[mx]; \
92 const uint8_t *vf = ff_hevc_ ## f ## _filters[my];
93
94 #define FW_PUT(p, a, b, depth, opt) \
95 static void hevc_put_ ## a ## _ ## depth ## _##opt(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, \
96 int height, intptr_t mx, intptr_t my,int width) \
97 { \
98 DECL_HV_FILTER(p) \
99 ff_h2656_put_ ## b ## _ ## depth ## _##opt(dst, 2 * MAX_PB_SIZE, src, srcstride, height, hf, vf, width); \
100 }
101
102 #define FW_PUT_UNI(p, a, b, depth, opt) \
103 static void hevc_put_uni_ ## a ## _ ## depth ## _##opt(uint8_t *dst, ptrdiff_t dststride, \
104 const uint8_t *src, ptrdiff_t srcstride, \
105 int height, intptr_t mx, intptr_t my, int width) \
106 { \
107 DECL_HV_FILTER(p) \
108 ff_h2656_put_uni_ ## b ## _ ## depth ## _##opt(dst, dststride, src, srcstride, height, hf, vf, width); \
109 }
110
111 #if ARCH_X86_64 && HAVE_SSE4_EXTERNAL
112
113 #define FW_PUT_FUNCS(p, a, b, depth, opt) \
114 FW_PUT(p, a, b, depth, opt) \
115 FW_PUT_UNI(p, a, b, depth, opt)
116
117 #define FW_PEL(w, depth, opt) FW_PUT_FUNCS(pel, pel_pixels##w, pixels##w, depth, opt)
118
119 #define FW_DIR(npel, n, w, depth, opt) \
120 FW_PUT_FUNCS(npel, npel ## _h##w, n ## tap_h##w, depth, opt) \
121 FW_PUT_FUNCS(npel, npel ## _v##w, n ## tap_v##w, depth, opt)
122
123 #define FW_DIR_HV(npel, n, w, depth, opt) \
124 FW_PUT_FUNCS(npel, npel ## _hv##w, n ## tap_hv##w, depth, opt)
125
126 17410 FW_PEL(4, 8, sse4)
127 76 FW_PEL(6, 8, sse4)
128 26322 FW_PEL(8, 8, sse4)
129 76 FW_PEL(12, 8, sse4)
130 150684 FW_PEL(16, 8, sse4)
131 2400 FW_PEL(4, 10, sse4)
132 76 FW_PEL(6, 10, sse4)
133 33624 FW_PEL(8, 10, sse4)
134 304 FW_PEL(4, 12, sse4)
135 76 FW_PEL(6, 12, sse4)
136 1824 FW_PEL(8, 12, sse4)
137
138 #define FW_EPEL(w, depth, opt) FW_DIR(epel, 4, w, depth, opt)
139 #define FW_EPEL_HV(w, depth, opt) FW_DIR_HV(epel, 4, w, depth, opt)
140 #define FW_EPEL_FUNCS(w, depth, opt) \
141 FW_EPEL(w, depth, opt) \
142 FW_EPEL_HV(w, depth, opt)
143
144 152 FW_EPEL(12, 8, sse4)
145
146 10508 FW_EPEL_FUNCS(4, 8, sse4)
147 228 FW_EPEL_FUNCS(6, 8, sse4)
148 12880 FW_EPEL_FUNCS(8, 8, sse4)
149 21140 FW_EPEL_FUNCS(16, 8, sse4)
150 6884 FW_EPEL_FUNCS(4, 10, sse4)
151 228 FW_EPEL_FUNCS(6, 10, sse4)
152 31064 FW_EPEL_FUNCS(8, 10, sse4)
153 912 FW_EPEL_FUNCS(4, 12, sse4)
154 228 FW_EPEL_FUNCS(6, 12, sse4)
155 5472 FW_EPEL_FUNCS(8, 12, sse4)
156
157 #define FW_QPEL(w, depth, opt) FW_DIR(qpel, 8, w, depth, opt)
158 #define FW_QPEL_HV(w, depth, opt) FW_DIR_HV(qpel, 8, w, depth, opt)
159 #define FW_QPEL_FUNCS(w, depth, opt) \
160 FW_QPEL(w, depth, opt) \
161 FW_QPEL_HV(w, depth, opt)
162
163 152 FW_QPEL(12, 8, sse4)
164 1638 FW_QPEL(16, 8, sse4)
165
166 306 FW_QPEL_FUNCS(4, 8, sse4)
167 4518 FW_QPEL_FUNCS(8, 8, sse4)
168 912 FW_QPEL_FUNCS(4, 10, sse4)
169 14268 FW_QPEL_FUNCS(8, 10, sse4)
170 912 FW_QPEL_FUNCS(4, 12, sse4)
171 5472 FW_QPEL_FUNCS(8, 12, sse4)
172
173 #if HAVE_AVX2_EXTERNAL
174
175 27552 FW_PEL(32, 8, avx2)
176 3028 FW_PUT(pel, pel_pixels16, pixels16, 10, avx2)
177
178 1396 FW_EPEL(32, 8, avx2)
179 5112 FW_EPEL(16, 10, avx2)
180
181 752 FW_EPEL_HV(32, 8, avx2)
182 6732 FW_EPEL_HV(16, 10, avx2)
183
184 32 FW_QPEL(32, 8, avx2)
185 8960 FW_QPEL(16, 10, avx2)
186
187 11354 FW_QPEL_HV(16, 10, avx2)
188
189 #endif
190 #endif
191
192 #define mc_rep_func(name, bitd, step, W, opt) \
193 static void hevc_put_##name##W##_##bitd##_##opt(int16_t *_dst, \
194 const uint8_t *_src, ptrdiff_t _srcstride, int height, \
195 intptr_t mx, intptr_t my, int width) \
196 { \
197 int i; \
198 int16_t *dst; \
199 for (i = 0; i < W; i += step) { \
200 const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \
201 dst = _dst + i; \
202 hevc_put_##name##step##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width); \
203 } \
204 }
205 #define mc_rep_uni_func(name, bitd, step, W, opt) \
206 static void hevc_put_uni_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, \
207 const uint8_t *_src, ptrdiff_t _srcstride, int height, \
208 intptr_t mx, intptr_t my, int width) \
209 { \
210 int i; \
211 uint8_t *dst; \
212 for (i = 0; i < W; i += step) { \
213 const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \
214 dst = _dst + (i * ((bitd + 7) / 8)); \
215 hevc_put_uni_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, \
216 height, mx, my, width); \
217 } \
218 }
219 #define mc_rep_bi_func(name, bitd, step, W, opt) \
220 static void ff_hevc_put_bi_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const uint8_t *_src, \
221 ptrdiff_t _srcstride, const int16_t *_src2, \
222 int height, intptr_t mx, intptr_t my, int width) \
223 { \
224 int i; \
225 uint8_t *dst; \
226 for (i = 0; i < W ; i += step) { \
227 const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \
228 const int16_t *src2 = _src2 + i; \
229 dst = _dst + (i * ((bitd + 7) / 8)); \
230 ff_hevc_put_bi_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, \
231 height, mx, my, width); \
232 } \
233 }
234
235 #define mc_rep_funcs(name, bitd, step, W, opt) \
236 mc_rep_func(name, bitd, step, W, opt) \
237 mc_rep_uni_func(name, bitd, step, W, opt) \
238 mc_rep_bi_func(name, bitd, step, W, opt)
239
240 #define mc_rep_func2(name, bitd, step1, step2, W, opt) \
241 static void hevc_put_##name##W##_##bitd##_##opt(int16_t *dst, \
242 const uint8_t *src, ptrdiff_t _srcstride, int height, \
243 intptr_t mx, intptr_t my, int width) \
244 { \
245 hevc_put_##name##step1##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width); \
246 hevc_put_##name##step2##_##bitd##_##opt(dst + step1, src + (step1 * ((bitd + 7) / 8)), \
247 _srcstride, height, mx, my, width); \
248 }
249 #define mc_rep_uni_func2(name, bitd, step1, step2, W, opt) \
250 static void hevc_put_uni_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, \
251 const uint8_t *src, ptrdiff_t _srcstride, int height, \
252 intptr_t mx, intptr_t my, int width) \
253 { \
254 hevc_put_uni_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, height, mx, my, width); \
255 hevc_put_uni_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride, \
256 src + (step1 * ((bitd + 7) / 8)), _srcstride, \
257 height, mx, my, width); \
258 }
259 #define mc_rep_bi_func2(name, bitd, step1, step2, W, opt) \
260 static void ff_hevc_put_bi_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \
261 ptrdiff_t _srcstride, const int16_t *src2, \
262 int height, intptr_t mx, intptr_t my, int width) \
263 { \
264 ff_hevc_put_bi_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, height, mx, my, width);\
265 ff_hevc_put_bi_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride, \
266 src + (step1 * ((bitd + 7) / 8)), _srcstride, \
267 src2 + step1, height, mx, my, width); \
268 }
269
270 #define mc_rep_funcs2(name, bitd, step1, step2, W, opt) \
271 mc_rep_func2(name, bitd, step1, step2, W, opt) \
272 mc_rep_uni_func2(name, bitd, step1, step2, W, opt) \
273 mc_rep_bi_func2(name, bitd, step1, step2, W, opt)
274
275 #if ARCH_X86_64 && HAVE_SSE4_EXTERNAL
276
277 #define mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
278 static void hevc_put_##name##width1##_10_##opt1(int16_t *dst, const uint8_t *src, ptrdiff_t _srcstride, \
279 int height, intptr_t mx, intptr_t my, int width) \
280 \
281 { \
282 hevc_put_##name##width2##_10_##opt1(dst, src, _srcstride, height, mx, my, width); \
283 hevc_put_##name##width3##_10_##opt2(dst+ width2, src+ width4, _srcstride, height, mx, my, width); \
284 }
285
286 #define mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
287 static void ff_hevc_put_bi_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \
288 ptrdiff_t _srcstride, const int16_t *src2, \
289 int height, intptr_t mx, intptr_t my, int width) \
290 { \
291 ff_hevc_put_bi_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, src2, \
292 height, mx, my, width); \
293 ff_hevc_put_bi_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, src2+width2, \
294 height, mx, my, width); \
295 }
296
297 #define mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
298 static void hevc_put_uni_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, \
299 const uint8_t *src, ptrdiff_t _srcstride, int height, \
300 intptr_t mx, intptr_t my, int width) \
301 { \
302 hevc_put_uni_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, \
303 height, mx, my, width); \
304 hevc_put_uni_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, \
305 height, mx, my, width); \
306 }
307
308 #define mc_rep_mixs_10(name, width1, width2, width3, opt1, opt2, width4) \
309 mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
310 mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
311 mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4)
312
313 #define mc_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
314 static void hevc_put_##name##width1##_8_##opt1(int16_t *dst, const uint8_t *src, ptrdiff_t _srcstride, \
315 int height, intptr_t mx, intptr_t my, int width) \
316 \
317 { \
318 hevc_put_##name##width2##_8_##opt1(dst, src, _srcstride, height, mx, my, width); \
319 hevc_put_##name##width3##_8_##opt2(dst+ width2, src+ width2, _srcstride, height, mx, my, width); \
320 }
321
322 #define mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
323 static void ff_hevc_put_bi_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \
324 ptrdiff_t _srcstride, const int16_t *src2, \
325 int height, intptr_t mx, intptr_t my, int width) \
326 { \
327 ff_hevc_put_bi_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \
328 src2, height, mx, my, width); \
329 ff_hevc_put_bi_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \
330 src2+width2, height, mx, my, width); \
331 }
332
333 #define mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
334 static void hevc_put_uni_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, \
335 const uint8_t *src, ptrdiff_t _srcstride, int height, \
336 intptr_t mx, intptr_t my, int width) \
337 { \
338 hevc_put_uni_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \
339 height, mx, my, width); \
340 hevc_put_uni_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \
341 height, mx, my, width); \
342 }
343
344 #define mc_rep_mixs_8(name, width1, width2, width3, opt1, opt2) \
345 mc_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
346 mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
347 mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2)
348
349 #if HAVE_AVX2_EXTERNAL
350
351 8 mc_rep_mixs_8(pel_pixels, 48, 32, 16, avx2, sse4)
352 6 mc_rep_mixs_8(epel_hv, 48, 32, 16, avx2, sse4)
353 6 mc_rep_mixs_8(epel_h , 48, 32, 16, avx2, sse4)
354 6 mc_rep_mixs_8(epel_v , 48, 32, 16, avx2, sse4)
355
356 5 mc_rep_mix_10(pel_pixels, 24, 16, 8, avx2, sse4, 32)
357 1 mc_bi_rep_mix_10(pel_pixels,24, 16, 8, avx2, sse4, 32)
358 6 mc_rep_mixs_10(epel_hv, 24, 16, 8, avx2, sse4, 32)
359 6 mc_rep_mixs_10(epel_h , 24, 16, 8, avx2, sse4, 32)
360 6 mc_rep_mixs_10(epel_v , 24, 16, 8, avx2, sse4, 32)
361
362
363 6 mc_rep_mixs_10(qpel_h , 24, 16, 8, avx2, sse4, 32)
364 6 mc_rep_mixs_10(qpel_v , 24, 16, 8, avx2, sse4, 32)
365 6 mc_rep_mixs_10(qpel_hv, 24, 16, 8, avx2, sse4, 32)
366
367
368
2/2
✓ Branch 1 taken 6722 times.
✓ Branch 2 taken 3361 times.
20166 mc_rep_funcs(pel_pixels, 8, 32, 64, avx2)
369
370
2/2
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 78 times.
234 mc_rep_uni_func(pel_pixels, 8, 64, 128, avx2)//used for 10bit
371
2/2
✓ Branch 1 taken 3 times.
✓ Branch 2 taken 1 times.
4 mc_rep_uni_func(pel_pixels, 8, 32, 96, avx2) //used for 10bit
372
373
2/2
✓ Branch 1 taken 2026 times.
✓ Branch 2 taken 1013 times.
3039 mc_rep_func(pel_pixels, 10, 16, 32, avx2)
374
2/2
✓ Branch 1 taken 3 times.
✓ Branch 2 taken 1 times.
4 mc_rep_func(pel_pixels, 10, 16, 48, avx2)
375
2/2
✓ Branch 1 taken 304 times.
✓ Branch 2 taken 152 times.
456 mc_rep_func(pel_pixels, 10, 32, 64, avx2)
376
377
2/2
✓ Branch 1 taken 176 times.
✓ Branch 2 taken 88 times.
264 mc_rep_bi_func(pel_pixels, 10, 16, 32, avx2)
378
2/2
✓ Branch 1 taken 3 times.
✓ Branch 2 taken 1 times.
4 mc_rep_bi_func(pel_pixels, 10, 16, 48, avx2)
379
2/2
✓ Branch 1 taken 42 times.
✓ Branch 2 taken 21 times.
63 mc_rep_bi_func(pel_pixels, 10, 32, 64, avx2)
380
381
2/2
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
18 mc_rep_funcs(epel_h, 8, 32, 64, avx2)
382
383
2/2
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
18 mc_rep_funcs(epel_v, 8, 32, 64, avx2)
384
385
2/2
✓ Branch 1 taken 1290 times.
✓ Branch 2 taken 645 times.
3870 mc_rep_funcs(epel_h, 10, 16, 32, avx2)
386
2/2
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
24 mc_rep_funcs(epel_h, 10, 16, 48, avx2)
387
2/2
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
18 mc_rep_funcs(epel_h, 10, 32, 64, avx2)
388
389
2/2
✓ Branch 1 taken 154 times.
✓ Branch 2 taken 77 times.
462 mc_rep_funcs(epel_v, 10, 16, 32, avx2)
390
2/2
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
24 mc_rep_funcs(epel_v, 10, 16, 48, avx2)
391
2/2
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
18 mc_rep_funcs(epel_v, 10, 32, 64, avx2)
392
393
394
2/2
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
18 mc_rep_funcs(epel_hv, 8, 32, 64, avx2)
395
396
2/2
✓ Branch 1 taken 1598 times.
✓ Branch 2 taken 799 times.
4794 mc_rep_funcs(epel_hv, 10, 16, 32, avx2)
397
2/2
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
24 mc_rep_funcs(epel_hv, 10, 16, 48, avx2)
398
2/2
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
18 mc_rep_funcs(epel_hv, 10, 32, 64, avx2)
399
400
2/2
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
18 mc_rep_funcs(qpel_h, 8, 32, 64, avx2)
401 6 mc_rep_mixs_8(qpel_h , 48, 32, 16, avx2, sse4)
402
403
2/2
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
18 mc_rep_funcs(qpel_v, 8, 32, 64, avx2)
404 6 mc_rep_mixs_8(qpel_v, 48, 32, 16, avx2, sse4)
405
406
2/2
✓ Branch 1 taken 1982 times.
✓ Branch 2 taken 991 times.
5946 mc_rep_funcs(qpel_h, 10, 16, 32, avx2)
407
2/2
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
24 mc_rep_funcs(qpel_h, 10, 16, 48, avx2)
408
2/2
✓ Branch 1 taken 512 times.
✓ Branch 2 taken 256 times.
1536 mc_rep_funcs(qpel_h, 10, 32, 64, avx2)
409
410
2/2
✓ Branch 1 taken 910 times.
✓ Branch 2 taken 455 times.
2730 mc_rep_funcs(qpel_v, 10, 16, 32, avx2)
411
2/2
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
24 mc_rep_funcs(qpel_v, 10, 16, 48, avx2)
412
2/2
✓ Branch 1 taken 202 times.
✓ Branch 2 taken 101 times.
606 mc_rep_funcs(qpel_v, 10, 32, 64, avx2)
413
414
2/2
✓ Branch 1 taken 3116 times.
✓ Branch 2 taken 1558 times.
9348 mc_rep_funcs(qpel_hv, 10, 16, 32, avx2)
415
2/2
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
24 mc_rep_funcs(qpel_hv, 10, 16, 48, avx2)
416
2/2
✓ Branch 1 taken 668 times.
✓ Branch 2 taken 334 times.
2004 mc_rep_funcs(qpel_hv, 10, 32, 64, avx2)
417
418 #endif //AVX2
419
420
2/2
✓ Branch 1 taken 30924 times.
✓ Branch 2 taken 7731 times.
77310 mc_rep_funcs(pel_pixels, 8, 16, 64, sse4)
421
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(pel_pixels, 8, 16, 48, sse4)
422
2/2
✓ Branch 1 taken 33874 times.
✓ Branch 2 taken 16937 times.
101622 mc_rep_funcs(pel_pixels, 8, 16, 32, sse4)
423
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(pel_pixels, 8, 8, 24, sse4)
424
2/2
✓ Branch 1 taken 2960 times.
✓ Branch 2 taken 370 times.
6660 mc_rep_funcs(pel_pixels,10, 8, 64, sse4)
425
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
588 mc_rep_funcs(pel_pixels,10, 8, 48, sse4)
426
2/2
✓ Branch 1 taken 6208 times.
✓ Branch 2 taken 1552 times.
15520 mc_rep_funcs(pel_pixels,10, 8, 32, sse4)
427
2/2
✓ Branch 1 taken 144 times.
✓ Branch 2 taken 48 times.
384 mc_rep_funcs(pel_pixels,10, 8, 24, sse4)
428
2/2
✓ Branch 1 taken 4020 times.
✓ Branch 2 taken 2010 times.
12060 mc_rep_funcs(pel_pixels,10, 8, 16, sse4)
429
2/2
✓ Branch 1 taken 177 times.
✓ Branch 2 taken 59 times.
472 mc_rep_funcs(pel_pixels,10, 4, 12, sse4)
430
2/2
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
702 mc_rep_funcs(pel_pixels,12, 8, 64, sse4)
431
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
546 mc_rep_funcs(pel_pixels,12, 8, 48, sse4)
432
2/2
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
390 mc_rep_funcs(pel_pixels,12, 8, 32, sse4)
433
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(pel_pixels,12, 8, 24, sse4)
434
2/2
✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
234 mc_rep_funcs(pel_pixels,12, 8, 16, sse4)
435
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(pel_pixels,12, 4, 12, sse4)
436
437
2/2
✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
420 mc_rep_funcs(epel_h, 8, 16, 64, sse4)
438
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(epel_h, 8, 16, 48, sse4)
439
2/2
✓ Branch 1 taken 1076 times.
✓ Branch 2 taken 538 times.
3228 mc_rep_funcs(epel_h, 8, 16, 32, sse4)
440
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_h, 8, 8, 24, sse4)
441
2/2
✓ Branch 1 taken 336 times.
✓ Branch 2 taken 42 times.
756 mc_rep_funcs(epel_h,10, 8, 64, sse4)
442
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
588 mc_rep_funcs(epel_h,10, 8, 48, sse4)
443
2/2
✓ Branch 1 taken 1016 times.
✓ Branch 2 taken 254 times.
2540 mc_rep_funcs(epel_h,10, 8, 32, sse4)
444
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(epel_h,10, 8, 24, sse4)
445
2/2
✓ Branch 1 taken 1140 times.
✓ Branch 2 taken 570 times.
3420 mc_rep_funcs(epel_h,10, 8, 16, sse4)
446
2/2
✓ Branch 1 taken 129 times.
✓ Branch 2 taken 43 times.
344 mc_rep_funcs(epel_h,10, 4, 12, sse4)
447
2/2
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
702 mc_rep_funcs(epel_h,12, 8, 64, sse4)
448
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
546 mc_rep_funcs(epel_h,12, 8, 48, sse4)
449
2/2
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
390 mc_rep_funcs(epel_h,12, 8, 32, sse4)
450
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_h,12, 8, 24, sse4)
451
2/2
✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
234 mc_rep_funcs(epel_h,12, 8, 16, sse4)
452
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_h,12, 4, 12, sse4)
453
2/2
✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
420 mc_rep_funcs(epel_v, 8, 16, 64, sse4)
454
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(epel_v, 8, 16, 48, sse4)
455
2/2
✓ Branch 1 taken 2056 times.
✓ Branch 2 taken 1028 times.
6168 mc_rep_funcs(epel_v, 8, 16, 32, sse4)
456
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_v, 8, 8, 24, sse4)
457
2/2
✓ Branch 1 taken 336 times.
✓ Branch 2 taken 42 times.
756 mc_rep_funcs(epel_v,10, 8, 64, sse4)
458
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
588 mc_rep_funcs(epel_v,10, 8, 48, sse4)
459
2/2
✓ Branch 1 taken 296 times.
✓ Branch 2 taken 74 times.
740 mc_rep_funcs(epel_v,10, 8, 32, sse4)
460
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(epel_v,10, 8, 24, sse4)
461
2/2
✓ Branch 1 taken 204 times.
✓ Branch 2 taken 102 times.
612 mc_rep_funcs(epel_v,10, 8, 16, sse4)
462
2/2
✓ Branch 1 taken 141 times.
✓ Branch 2 taken 47 times.
376 mc_rep_funcs(epel_v,10, 4, 12, sse4)
463
2/2
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
702 mc_rep_funcs(epel_v,12, 8, 64, sse4)
464
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
546 mc_rep_funcs(epel_v,12, 8, 48, sse4)
465
2/2
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
390 mc_rep_funcs(epel_v,12, 8, 32, sse4)
466
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_v,12, 8, 24, sse4)
467
2/2
✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
234 mc_rep_funcs(epel_v,12, 8, 16, sse4)
468
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_v,12, 4, 12, sse4)
469
2/2
✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
420 mc_rep_funcs(epel_hv, 8, 16, 64, sse4)
470
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(epel_hv, 8, 16, 48, sse4)
471
2/2
✓ Branch 1 taken 1860 times.
✓ Branch 2 taken 930 times.
5580 mc_rep_funcs(epel_hv, 8, 16, 32, sse4)
472
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_hv, 8, 8, 24, sse4)
473 78 mc_rep_funcs2(epel_hv,8, 8, 4, 12, sse4)
474
2/2
✓ Branch 1 taken 336 times.
✓ Branch 2 taken 42 times.
756 mc_rep_funcs(epel_hv,10, 8, 64, sse4)
475
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
588 mc_rep_funcs(epel_hv,10, 8, 48, sse4)
476
2/2
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 78 times.
780 mc_rep_funcs(epel_hv,10, 8, 32, sse4)
477
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(epel_hv,10, 8, 24, sse4)
478
2/2
✓ Branch 1 taken 236 times.
✓ Branch 2 taken 118 times.
708 mc_rep_funcs(epel_hv,10, 8, 16, sse4)
479
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_hv,10, 4, 12, sse4)
480
2/2
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
702 mc_rep_funcs(epel_hv,12, 8, 64, sse4)
481
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
546 mc_rep_funcs(epel_hv,12, 8, 48, sse4)
482
2/2
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
390 mc_rep_funcs(epel_hv,12, 8, 32, sse4)
483
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_hv,12, 8, 24, sse4)
484
2/2
✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
234 mc_rep_funcs(epel_hv,12, 8, 16, sse4)
485
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_hv,12, 4, 12, sse4)
486
487
2/2
✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
420 mc_rep_funcs(qpel_h, 8, 16, 64, sse4)
488
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(qpel_h, 8, 16, 48, sse4)
489
2/2
✓ Branch 1 taken 84 times.
✓ Branch 2 taken 42 times.
252 mc_rep_funcs(qpel_h, 8, 16, 32, sse4)
490
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_h, 8, 8, 24, sse4)
491
2/2
✓ Branch 1 taken 1184 times.
✓ Branch 2 taken 148 times.
2664 mc_rep_funcs(qpel_h,10, 8, 64, sse4)
492
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
588 mc_rep_funcs(qpel_h,10, 8, 48, sse4)
493
2/2
✓ Branch 1 taken 1248 times.
✓ Branch 2 taken 312 times.
3120 mc_rep_funcs(qpel_h,10, 8, 32, sse4)
494
2/2
✓ Branch 1 taken 132 times.
✓ Branch 2 taken 44 times.
352 mc_rep_funcs(qpel_h,10, 8, 24, sse4)
495
2/2
✓ Branch 1 taken 424 times.
✓ Branch 2 taken 212 times.
1272 mc_rep_funcs(qpel_h,10, 8, 16, sse4)
496
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_h,10, 4, 12, sse4)
497
2/2
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
702 mc_rep_funcs(qpel_h,12, 8, 64, sse4)
498
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
546 mc_rep_funcs(qpel_h,12, 8, 48, sse4)
499
2/2
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
390 mc_rep_funcs(qpel_h,12, 8, 32, sse4)
500
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_h,12, 8, 24, sse4)
501
2/2
✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
234 mc_rep_funcs(qpel_h,12, 8, 16, sse4)
502
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_h,12, 4, 12, sse4)
503
2/2
✓ Branch 1 taken 176 times.
✓ Branch 2 taken 44 times.
440 mc_rep_funcs(qpel_v, 8, 16, 64, sse4)
504
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(qpel_v, 8, 16, 48, sse4)
505
2/2
✓ Branch 1 taken 92 times.
✓ Branch 2 taken 46 times.
276 mc_rep_funcs(qpel_v, 8, 16, 32, sse4)
506
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_v, 8, 8, 24, sse4)
507
2/2
✓ Branch 1 taken 464 times.
✓ Branch 2 taken 58 times.
1044 mc_rep_funcs(qpel_v,10, 8, 64, sse4)
508
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
588 mc_rep_funcs(qpel_v,10, 8, 48, sse4)
509
2/2
✓ Branch 1 taken 304 times.
✓ Branch 2 taken 76 times.
760 mc_rep_funcs(qpel_v,10, 8, 32, sse4)
510
2/2
✓ Branch 1 taken 138 times.
✓ Branch 2 taken 46 times.
368 mc_rep_funcs(qpel_v,10, 8, 24, sse4)
511
2/2
✓ Branch 1 taken 96 times.
✓ Branch 2 taken 48 times.
288 mc_rep_funcs(qpel_v,10, 8, 16, sse4)
512
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_v,10, 4, 12, sse4)
513
2/2
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
702 mc_rep_funcs(qpel_v,12, 8, 64, sse4)
514
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
546 mc_rep_funcs(qpel_v,12, 8, 48, sse4)
515
2/2
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
390 mc_rep_funcs(qpel_v,12, 8, 32, sse4)
516
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_v,12, 8, 24, sse4)
517
2/2
✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
234 mc_rep_funcs(qpel_v,12, 8, 16, sse4)
518
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_v,12, 4, 12, sse4)
519
2/2
✓ Branch 1 taken 928 times.
✓ Branch 2 taken 116 times.
2088 mc_rep_funcs(qpel_hv, 8, 8, 64, sse4)
520
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
546 mc_rep_funcs(qpel_hv, 8, 8, 48, sse4)
521
2/2
✓ Branch 1 taken 292 times.
✓ Branch 2 taken 73 times.
730 mc_rep_funcs(qpel_hv, 8, 8, 32, sse4)
522
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_hv, 8, 8, 24, sse4)
523
2/2
✓ Branch 1 taken 202 times.
✓ Branch 2 taken 101 times.
606 mc_rep_funcs(qpel_hv, 8, 8, 16, sse4)
524 78 mc_rep_funcs2(qpel_hv,8, 8, 4, 12, sse4)
525
2/2
✓ Branch 1 taken 480 times.
✓ Branch 2 taken 60 times.
1080 mc_rep_funcs(qpel_hv,10, 8, 64, sse4)
526
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
588 mc_rep_funcs(qpel_hv,10, 8, 48, sse4)
527
2/2
✓ Branch 1 taken 272 times.
✓ Branch 2 taken 68 times.
680 mc_rep_funcs(qpel_hv,10, 8, 32, sse4)
528
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(qpel_hv,10, 8, 24, sse4)
529
2/2
✓ Branch 1 taken 100 times.
✓ Branch 2 taken 50 times.
300 mc_rep_funcs(qpel_hv,10, 8, 16, sse4)
530
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_hv,10, 4, 12, sse4)
531
2/2
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
702 mc_rep_funcs(qpel_hv,12, 8, 64, sse4)
532
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
546 mc_rep_funcs(qpel_hv,12, 8, 48, sse4)
533
2/2
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
390 mc_rep_funcs(qpel_hv,12, 8, 32, sse4)
534
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_hv,12, 8, 24, sse4)
535
2/2
✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
234 mc_rep_funcs(qpel_hv,12, 8, 16, sse4)
536
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_hv,12, 4, 12, sse4)
537
538 #define mc_rep_uni_w(bitd, step, W, opt) \
539 void ff_hevc_put_uni_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const int16_t *_src, \
540 int height, int denom, int _wx, int _ox) \
541 { \
542 int i; \
543 uint8_t *dst; \
544 for (i = 0; i < W; i += step) { \
545 const int16_t *src = _src + i; \
546 dst= _dst + (i * ((bitd + 7) / 8)); \
547 ff_hevc_put_uni_w##step##_##bitd##_##opt(dst, dststride, src, \
548 height, denom, _wx, _ox); \
549 } \
550 }
551
552
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
378 mc_rep_uni_w(8, 6, 12, sse4)
553
2/2
✓ Branch 1 taken 21144 times.
✓ Branch 2 taken 10572 times.
31716 mc_rep_uni_w(8, 8, 16, sse4)
554
2/2
✓ Branch 1 taken 378 times.
✓ Branch 2 taken 126 times.
504 mc_rep_uni_w(8, 8, 24, sse4)
555
2/2
✓ Branch 1 taken 54668 times.
✓ Branch 2 taken 13667 times.
68335 mc_rep_uni_w(8, 8, 32, sse4)
556
2/2
✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
882 mc_rep_uni_w(8, 8, 48, sse4)
557
2/2
✓ Branch 1 taken 43016 times.
✓ Branch 2 taken 5377 times.
48393 mc_rep_uni_w(8, 8, 64, sse4)
558
559
2/2
✓ Branch 1 taken 284 times.
✓ Branch 2 taken 142 times.
426 mc_rep_uni_w(10, 6, 12, sse4)
560
2/2
✓ Branch 1 taken 3220 times.
✓ Branch 2 taken 1610 times.
4830 mc_rep_uni_w(10, 8, 16, sse4)
561
2/2
✓ Branch 1 taken 402 times.
✓ Branch 2 taken 134 times.
536 mc_rep_uni_w(10, 8, 24, sse4)
562
2/2
✓ Branch 1 taken 4728 times.
✓ Branch 2 taken 1182 times.
5910 mc_rep_uni_w(10, 8, 32, sse4)
563
2/2
✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
882 mc_rep_uni_w(10, 8, 48, sse4)
564
2/2
✓ Branch 1 taken 2832 times.
✓ Branch 2 taken 354 times.
3186 mc_rep_uni_w(10, 8, 64, sse4)
565
566
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
378 mc_rep_uni_w(12, 6, 12, sse4)
567
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
378 mc_rep_uni_w(12, 8, 16, sse4)
568
2/2
✓ Branch 1 taken 378 times.
✓ Branch 2 taken 126 times.
504 mc_rep_uni_w(12, 8, 24, sse4)
569
2/2
✓ Branch 1 taken 504 times.
✓ Branch 2 taken 126 times.
630 mc_rep_uni_w(12, 8, 32, sse4)
570
2/2
✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
882 mc_rep_uni_w(12, 8, 48, sse4)
571
2/2
✓ Branch 1 taken 1008 times.
✓ Branch 2 taken 126 times.
1134 mc_rep_uni_w(12, 8, 64, sse4)
572
573 #define mc_rep_bi_w(bitd, step, W, opt) \
574 void ff_hevc_put_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const int16_t *_src, \
575 const int16_t *_src2, int height, \
576 int denom, int _wx0, int _wx1, int _ox0, int _ox1) \
577 { \
578 int i; \
579 uint8_t *dst; \
580 for (i = 0; i < W; i += step) { \
581 const int16_t *src = _src + i; \
582 const int16_t *src2 = _src2 + i; \
583 dst = _dst + (i * ((bitd + 7) / 8)); \
584 ff_hevc_put_bi_w##step##_##bitd##_##opt(dst, dststride, src, src2, \
585 height, denom, _wx0, _wx1, _ox0, _ox1); \
586 } \
587 }
588
589
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
378 mc_rep_bi_w(8, 6, 12, sse4)
590
2/2
✓ Branch 1 taken 4896 times.
✓ Branch 2 taken 2448 times.
7344 mc_rep_bi_w(8, 8, 16, sse4)
591
2/2
✓ Branch 1 taken 378 times.
✓ Branch 2 taken 126 times.
504 mc_rep_bi_w(8, 8, 24, sse4)
592
2/2
✓ Branch 1 taken 23552 times.
✓ Branch 2 taken 5888 times.
29440 mc_rep_bi_w(8, 8, 32, sse4)
593
2/2
✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
882 mc_rep_bi_w(8, 8, 48, sse4)
594
2/2
✓ Branch 1 taken 21144 times.
✓ Branch 2 taken 2643 times.
23787 mc_rep_bi_w(8, 8, 64, sse4)
595
596
2/2
✓ Branch 1 taken 268 times.
✓ Branch 2 taken 134 times.
402 mc_rep_bi_w(10, 6, 12, sse4)
597
2/2
✓ Branch 1 taken 2916 times.
✓ Branch 2 taken 1458 times.
4374 mc_rep_bi_w(10, 8, 16, sse4)
598
2/2
✓ Branch 1 taken 390 times.
✓ Branch 2 taken 130 times.
520 mc_rep_bi_w(10, 8, 24, sse4)
599
2/2
✓ Branch 1 taken 4760 times.
✓ Branch 2 taken 1190 times.
5950 mc_rep_bi_w(10, 8, 32, sse4)
600
2/2
✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
882 mc_rep_bi_w(10, 8, 48, sse4)
601
2/2
✓ Branch 1 taken 2928 times.
✓ Branch 2 taken 366 times.
3294 mc_rep_bi_w(10, 8, 64, sse4)
602
603
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
378 mc_rep_bi_w(12, 6, 12, sse4)
604
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
378 mc_rep_bi_w(12, 8, 16, sse4)
605
2/2
✓ Branch 1 taken 378 times.
✓ Branch 2 taken 126 times.
504 mc_rep_bi_w(12, 8, 24, sse4)
606
2/2
✓ Branch 1 taken 504 times.
✓ Branch 2 taken 126 times.
630 mc_rep_bi_w(12, 8, 32, sse4)
607
2/2
✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
882 mc_rep_bi_w(12, 8, 48, sse4)
608
2/2
✓ Branch 1 taken 1008 times.
✓ Branch 2 taken 126 times.
1134 mc_rep_bi_w(12, 8, 64, sse4)
609
610 #define mc_uni_w_func(name, bitd, W, opt) \
611 static void hevc_put_uni_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \
612 const uint8_t *_src, ptrdiff_t _srcstride, \
613 int height, int denom, \
614 int _wx, int _ox, \
615 intptr_t mx, intptr_t my, int width) \
616 { \
617 LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \
618 hevc_put_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \
619 ff_hevc_put_uni_w##W##_##bitd##_##opt(_dst, _dststride, temp, height, denom, _wx, _ox); \
620 }
621
622 #define mc_uni_w_funcs(name, bitd, opt) \
623 mc_uni_w_func(name, bitd, 4, opt) \
624 mc_uni_w_func(name, bitd, 8, opt) \
625 mc_uni_w_func(name, bitd, 12, opt) \
626 mc_uni_w_func(name, bitd, 16, opt) \
627 mc_uni_w_func(name, bitd, 24, opt) \
628 mc_uni_w_func(name, bitd, 32, opt) \
629 mc_uni_w_func(name, bitd, 48, opt) \
630 mc_uni_w_func(name, bitd, 64, opt)
631
632 81664 mc_uni_w_funcs(pel_pixels, 8, sse4)
633 18 mc_uni_w_func(pel_pixels, 8, 6, sse4)
634 7784 mc_uni_w_funcs(epel_h, 8, sse4)
635 18 mc_uni_w_func(epel_h, 8, 6, sse4)
636 7424 mc_uni_w_funcs(epel_v, 8, sse4)
637 18 mc_uni_w_func(epel_v, 8, 6, sse4)
638 7664 mc_uni_w_funcs(epel_hv, 8, sse4)
639 18 mc_uni_w_func(epel_hv, 8, 6, sse4)
640 298 mc_uni_w_funcs(qpel_h, 8, sse4)
641 308 mc_uni_w_funcs(qpel_v, 8, sse4)
642 882 mc_uni_w_funcs(qpel_hv, 8, sse4)
643
644 4980 mc_uni_w_funcs(pel_pixels, 10, sse4)
645 18 mc_uni_w_func(pel_pixels, 10, 6, sse4)
646 1352 mc_uni_w_funcs(epel_h, 10, sse4)
647 18 mc_uni_w_func(epel_h, 10, 6, sse4)
648 360 mc_uni_w_funcs(epel_v, 10, sse4)
649 18 mc_uni_w_func(epel_v, 10, 6, sse4)
650 536 mc_uni_w_funcs(epel_hv, 10, sse4)
651 18 mc_uni_w_func(epel_hv, 10, 6, sse4)
652 844 mc_uni_w_funcs(qpel_h, 10, sse4)
653 332 mc_uni_w_funcs(qpel_v, 10, sse4)
654 380 mc_uni_w_funcs(qpel_hv, 10, sse4)
655
656 288 mc_uni_w_funcs(pel_pixels, 12, sse4)
657 18 mc_uni_w_func(pel_pixels, 12, 6, sse4)
658 288 mc_uni_w_funcs(epel_h, 12, sse4)
659 18 mc_uni_w_func(epel_h, 12, 6, sse4)
660 288 mc_uni_w_funcs(epel_v, 12, sse4)
661 18 mc_uni_w_func(epel_v, 12, 6, sse4)
662 288 mc_uni_w_funcs(epel_hv, 12, sse4)
663 18 mc_uni_w_func(epel_hv, 12, 6, sse4)
664 288 mc_uni_w_funcs(qpel_h, 12, sse4)
665 288 mc_uni_w_funcs(qpel_v, 12, sse4)
666 288 mc_uni_w_funcs(qpel_hv, 12, sse4)
667
668 #define mc_bi_w_func(name, bitd, W, opt) \
669 static void hevc_put_bi_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \
670 const uint8_t *_src, ptrdiff_t _srcstride, \
671 const int16_t *_src2, \
672 int height, int denom, \
673 int _wx0, int _wx1, int _ox0, int _ox1, \
674 intptr_t mx, intptr_t my, int width) \
675 { \
676 LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \
677 hevc_put_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \
678 ff_hevc_put_bi_w##W##_##bitd##_##opt(_dst, _dststride, temp, _src2, \
679 height, denom, _wx0, _wx1, _ox0, _ox1); \
680 }
681
682 #define mc_bi_w_funcs(name, bitd, opt) \
683 mc_bi_w_func(name, bitd, 4, opt) \
684 mc_bi_w_func(name, bitd, 8, opt) \
685 mc_bi_w_func(name, bitd, 12, opt) \
686 mc_bi_w_func(name, bitd, 16, opt) \
687 mc_bi_w_func(name, bitd, 24, opt) \
688 mc_bi_w_func(name, bitd, 32, opt) \
689 mc_bi_w_func(name, bitd, 48, opt) \
690 mc_bi_w_func(name, bitd, 64, opt)
691
692 23926 mc_bi_w_funcs(pel_pixels, 8, sse4)
693 18 mc_bi_w_func(pel_pixels, 8, 6, sse4)
694 1240 mc_bi_w_funcs(epel_h, 8, sse4)
695 18 mc_bi_w_func(epel_h, 8, 6, sse4)
696 4860 mc_bi_w_funcs(epel_v, 8, sse4)
697 18 mc_bi_w_func(epel_v, 8, 6, sse4)
698 2416 mc_bi_w_funcs(epel_hv, 8, sse4)
699 18 mc_bi_w_func(epel_hv, 8, 6, sse4)
700 288 mc_bi_w_funcs(qpel_h, 8, sse4)
701 288 mc_bi_w_funcs(qpel_v, 8, sse4)
702 288 mc_bi_w_funcs(qpel_hv, 8, sse4)
703
704 4336 mc_bi_w_funcs(pel_pixels, 10, sse4)
705 18 mc_bi_w_func(pel_pixels, 10, 6, sse4)
706 1392 mc_bi_w_funcs(epel_h, 10, sse4)
707 18 mc_bi_w_func(epel_h, 10, 6, sse4)
708 448 mc_bi_w_funcs(epel_v, 10, sse4)
709 18 mc_bi_w_func(epel_v, 10, 6, sse4)
710 312 mc_bi_w_funcs(epel_hv, 10, sse4)
711 18 mc_bi_w_func(epel_hv, 10, 6, sse4)
712 836 mc_bi_w_funcs(qpel_h, 10, sse4)
713 368 mc_bi_w_funcs(qpel_v, 10, sse4)
714 300 mc_bi_w_funcs(qpel_hv, 10, sse4)
715
716 288 mc_bi_w_funcs(pel_pixels, 12, sse4)
717 18 mc_bi_w_func(pel_pixels, 12, 6, sse4)
718 288 mc_bi_w_funcs(epel_h, 12, sse4)
719 18 mc_bi_w_func(epel_h, 12, 6, sse4)
720 288 mc_bi_w_funcs(epel_v, 12, sse4)
721 18 mc_bi_w_func(epel_v, 12, 6, sse4)
722 288 mc_bi_w_funcs(epel_hv, 12, sse4)
723 18 mc_bi_w_func(epel_hv, 12, 6, sse4)
724 288 mc_bi_w_funcs(qpel_h, 12, sse4)
725 288 mc_bi_w_funcs(qpel_v, 12, sse4)
726 288 mc_bi_w_funcs(qpel_hv, 12, sse4)
727 #endif //ARCH_X86_64 && HAVE_SSE4_EXTERNAL
728
729 #define SAO_BAND_FILTER_FUNCS(bitd, opt) \
730 void ff_hevc_sao_band_filter_8_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
731 const int16_t *sao_offset_val, int sao_left_class, int width, int height); \
732 void ff_hevc_sao_band_filter_16_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
733 const int16_t *sao_offset_val, int sao_left_class, int width, int height); \
734 void ff_hevc_sao_band_filter_32_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
735 const int16_t *sao_offset_val, int sao_left_class, int width, int height); \
736 void ff_hevc_sao_band_filter_48_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
737 const int16_t *sao_offset_val, int sao_left_class, int width, int height); \
738 void ff_hevc_sao_band_filter_64_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
739 const int16_t *sao_offset_val, int sao_left_class, int width, int height);
740
741 SAO_BAND_FILTER_FUNCS(8, sse2)
742 SAO_BAND_FILTER_FUNCS(10, sse2)
743 SAO_BAND_FILTER_FUNCS(12, sse2)
744 SAO_BAND_FILTER_FUNCS(8, avx)
745 SAO_BAND_FILTER_FUNCS(10, avx)
746 SAO_BAND_FILTER_FUNCS(12, avx)
747 SAO_BAND_FILTER_FUNCS(8, avx2)
748 SAO_BAND_FILTER_FUNCS(10, avx2)
749 SAO_BAND_FILTER_FUNCS(12, avx2)
750
751 #define SAO_BAND_INIT(bitd, opt) do { \
752 c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_##bitd##_##opt; \
753 c->sao_band_filter[1] = ff_hevc_sao_band_filter_16_##bitd##_##opt; \
754 c->sao_band_filter[2] = ff_hevc_sao_band_filter_32_##bitd##_##opt; \
755 c->sao_band_filter[3] = ff_hevc_sao_band_filter_48_##bitd##_##opt; \
756 c->sao_band_filter[4] = ff_hevc_sao_band_filter_64_##bitd##_##opt; \
757 } while (0)
758
759 #define SAO_EDGE_FILTER_FUNCS(bitd, opt) \
760 void ff_hevc_sao_edge_filter_8_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
761 const int16_t *sao_offset_val, int eo, int width, int height); \
762 void ff_hevc_sao_edge_filter_16_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
763 const int16_t *sao_offset_val, int eo, int width, int height); \
764 void ff_hevc_sao_edge_filter_32_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
765 const int16_t *sao_offset_val, int eo, int width, int height); \
766 void ff_hevc_sao_edge_filter_48_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
767 const int16_t *sao_offset_val, int eo, int width, int height); \
768 void ff_hevc_sao_edge_filter_64_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
769 const int16_t *sao_offset_val, int eo, int width, int height); \
770
771 SAO_EDGE_FILTER_FUNCS(8, ssse3)
772 SAO_EDGE_FILTER_FUNCS(8, avx2)
773 SAO_EDGE_FILTER_FUNCS(10, sse2)
774 SAO_EDGE_FILTER_FUNCS(10, avx2)
775 SAO_EDGE_FILTER_FUNCS(12, sse2)
776 SAO_EDGE_FILTER_FUNCS(12, avx2)
777
778 #define SAO_EDGE_INIT(bitd, opt) do { \
779 c->sao_edge_filter[0] = ff_hevc_sao_edge_filter_8_##bitd##_##opt; \
780 c->sao_edge_filter[1] = ff_hevc_sao_edge_filter_16_##bitd##_##opt; \
781 c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_##bitd##_##opt; \
782 c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_##bitd##_##opt; \
783 c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_##bitd##_##opt; \
784 } while (0)
785
786 #define PEL_LINK(dst, idx1, idx2, idx3, name, D, opt) \
787 dst [idx1][idx2][idx3] = hevc_put_ ## name ## _ ## D ## _##opt; \
788 dst ## _bi [idx1][idx2][idx3] = ff_hevc_put_bi_ ## name ## _ ## D ## _##opt; \
789 dst ## _uni [idx1][idx2][idx3] = hevc_put_uni_ ## name ## _ ## D ## _##opt; \
790 dst ## _uni_w[idx1][idx2][idx3] = hevc_put_uni_w_ ## name ## _ ## D ## _##opt; \
791 dst ## _bi_w [idx1][idx2][idx3] = hevc_put_bi_w_ ## name ## _ ## D ## _##opt
792
793 #define EPEL_LINKS(pointer, my, mx, fname, bitd, opt ) \
794 PEL_LINK(pointer, 1, my , mx , fname##4 , bitd, opt ); \
795 PEL_LINK(pointer, 2, my , mx , fname##6 , bitd, opt ); \
796 PEL_LINK(pointer, 3, my , mx , fname##8 , bitd, opt ); \
797 PEL_LINK(pointer, 4, my , mx , fname##12, bitd, opt ); \
798 PEL_LINK(pointer, 5, my , mx , fname##16, bitd, opt ); \
799 PEL_LINK(pointer, 6, my , mx , fname##24, bitd, opt ); \
800 PEL_LINK(pointer, 7, my , mx , fname##32, bitd, opt ); \
801 PEL_LINK(pointer, 8, my , mx , fname##48, bitd, opt ); \
802 PEL_LINK(pointer, 9, my , mx , fname##64, bitd, opt )
803 #define QPEL_LINKS(pointer, my, mx, fname, bitd, opt) \
804 PEL_LINK(pointer, 1, my , mx , fname##4 , bitd, opt ); \
805 PEL_LINK(pointer, 3, my , mx , fname##8 , bitd, opt ); \
806 PEL_LINK(pointer, 4, my , mx , fname##12, bitd, opt ); \
807 PEL_LINK(pointer, 5, my , mx , fname##16, bitd, opt ); \
808 PEL_LINK(pointer, 6, my , mx , fname##24, bitd, opt ); \
809 PEL_LINK(pointer, 7, my , mx , fname##32, bitd, opt ); \
810 PEL_LINK(pointer, 8, my , mx , fname##48, bitd, opt ); \
811 PEL_LINK(pointer, 9, my , mx , fname##64, bitd, opt )
812
813 1484 void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
814 {
815 1484 int cpu_flags = av_get_cpu_flags();
816
817
2/2
✓ Branch 0 taken 619 times.
✓ Branch 1 taken 865 times.
1484 if (bit_depth == 8) {
818
2/2
✓ Branch 0 taken 226 times.
✓ Branch 1 taken 393 times.
619 if (EXTERNAL_MMXEXT(cpu_flags)) {
819 226 c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_mmxext;
820
821 226 c->add_residual[0] = ff_hevc_add_residual_4_8_mmxext;
822 }
823
2/2
✓ Branch 0 taken 186 times.
✓ Branch 1 taken 433 times.
619 if (EXTERNAL_SSE2(cpu_flags)) {
824 186 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
825 186 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;
826 if (ARCH_X86_64) {
827 186 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2;
828 186 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2;
829
830 186 c->idct[2] = ff_hevc_idct_16x16_8_sse2;
831 186 c->idct[3] = ff_hevc_idct_32x32_8_sse2;
832 }
833 186 SAO_BAND_INIT(8, sse2);
834
835 186 c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_sse2;
836 186 c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_sse2;
837 186 c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_sse2;
838
839 186 c->idct[0] = ff_hevc_idct_4x4_8_sse2;
840 186 c->idct[1] = ff_hevc_idct_8x8_8_sse2;
841
842 186 c->add_residual[1] = ff_hevc_add_residual_8_8_sse2;
843 186 c->add_residual[2] = ff_hevc_add_residual_16_8_sse2;
844 186 c->add_residual[3] = ff_hevc_add_residual_32_8_sse2;
845 }
846
2/2
✓ Branch 0 taken 146 times.
✓ Branch 1 taken 473 times.
619 if (EXTERNAL_SSSE3(cpu_flags)) {
847 if(ARCH_X86_64) {
848 146 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
849 146 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3;
850 }
851 146 SAO_EDGE_INIT(8, ssse3);
852 }
853 #if HAVE_SSE4_EXTERNAL && ARCH_X86_64
854
2/2
✓ Branch 0 taken 126 times.
✓ Branch 1 taken 493 times.
619 if (EXTERNAL_SSE4(cpu_flags)) {
855
856 126 EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 8, sse4);
857 126 EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 8, sse4);
858 126 EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 8, sse4);
859 126 EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 8, sse4);
860
861 126 QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 8, sse4);
862 126 QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 8, sse4);
863 126 QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 8, sse4);
864 126 QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 8, sse4);
865 }
866 #endif
867
2/2
✓ Branch 0 taken 66 times.
✓ Branch 1 taken 553 times.
619 if (EXTERNAL_AVX(cpu_flags)) {
868 66 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_avx;
869 66 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_avx;
870 if (ARCH_X86_64) {
871 66 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx;
872 66 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx;
873
874 66 c->idct[2] = ff_hevc_idct_16x16_8_avx;
875 66 c->idct[3] = ff_hevc_idct_32x32_8_avx;
876 }
877 66 SAO_BAND_INIT(8, avx);
878
879 66 c->idct[0] = ff_hevc_idct_4x4_8_avx;
880 66 c->idct[1] = ff_hevc_idct_8x8_8_avx;
881
882 66 c->add_residual[1] = ff_hevc_add_residual_8_8_avx;
883 66 c->add_residual[2] = ff_hevc_add_residual_16_8_avx;
884 66 c->add_residual[3] = ff_hevc_add_residual_32_8_avx;
885 }
886
2/2
✓ Branch 0 taken 26 times.
✓ Branch 1 taken 593 times.
619 if (EXTERNAL_AVX2(cpu_flags)) {
887 26 c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_8_avx2;
888 26 c->sao_band_filter[1] = ff_hevc_sao_band_filter_16_8_avx2;
889 }
890 #if HAVE_AVX2_EXTERNAL
891
3/4
✓ Branch 0 taken 26 times.
✓ Branch 1 taken 593 times.
✓ Branch 2 taken 26 times.
✗ Branch 3 not taken.
619 if (EXTERNAL_AVX2_FAST(cpu_flags)) {
892 26 c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_avx2;
893 26 c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_avx2;
894
895 #if ARCH_X86_64
896 26 c->put_hevc_epel[7][0][0] = hevc_put_pel_pixels32_8_avx2;
897 26 c->put_hevc_epel[8][0][0] = hevc_put_pel_pixels48_8_avx2;
898 26 c->put_hevc_epel[9][0][0] = hevc_put_pel_pixels64_8_avx2;
899
900 26 c->put_hevc_qpel[7][0][0] = hevc_put_pel_pixels32_8_avx2;
901 26 c->put_hevc_qpel[8][0][0] = hevc_put_pel_pixels48_8_avx2;
902 26 c->put_hevc_qpel[9][0][0] = hevc_put_pel_pixels64_8_avx2;
903
904 26 c->put_hevc_epel_uni[7][0][0] = hevc_put_uni_pel_pixels32_8_avx2;
905 26 c->put_hevc_epel_uni[8][0][0] = hevc_put_uni_pel_pixels48_8_avx2;
906 26 c->put_hevc_epel_uni[9][0][0] = hevc_put_uni_pel_pixels64_8_avx2;
907
908 26 c->put_hevc_qpel_uni[7][0][0] = hevc_put_uni_pel_pixels32_8_avx2;
909 26 c->put_hevc_qpel_uni[8][0][0] = hevc_put_uni_pel_pixels48_8_avx2;
910 26 c->put_hevc_qpel_uni[9][0][0] = hevc_put_uni_pel_pixels64_8_avx2;
911
912 26 c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_bi_pel_pixels32_8_avx2;
913 26 c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_bi_pel_pixels48_8_avx2;
914 26 c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_bi_pel_pixels64_8_avx2;
915
916 26 c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_bi_pel_pixels32_8_avx2;
917 26 c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_bi_pel_pixels48_8_avx2;
918 26 c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_bi_pel_pixels64_8_avx2;
919
920 26 c->put_hevc_epel[7][0][1] = hevc_put_epel_h32_8_avx2;
921 26 c->put_hevc_epel[8][0][1] = hevc_put_epel_h48_8_avx2;
922 26 c->put_hevc_epel[9][0][1] = hevc_put_epel_h64_8_avx2;
923
924 26 c->put_hevc_epel_uni[7][0][1] = hevc_put_uni_epel_h32_8_avx2;
925 26 c->put_hevc_epel_uni[8][0][1] = hevc_put_uni_epel_h48_8_avx2;
926 26 c->put_hevc_epel_uni[9][0][1] = hevc_put_uni_epel_h64_8_avx2;
927
928 26 c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_bi_epel_h32_8_avx2;
929 26 c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_bi_epel_h48_8_avx2;
930 26 c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_bi_epel_h64_8_avx2;
931
932 26 c->put_hevc_epel[7][1][0] = hevc_put_epel_v32_8_avx2;
933 26 c->put_hevc_epel[8][1][0] = hevc_put_epel_v48_8_avx2;
934 26 c->put_hevc_epel[9][1][0] = hevc_put_epel_v64_8_avx2;
935
936 26 c->put_hevc_epel_uni[7][1][0] = hevc_put_uni_epel_v32_8_avx2;
937 26 c->put_hevc_epel_uni[8][1][0] = hevc_put_uni_epel_v48_8_avx2;
938 26 c->put_hevc_epel_uni[9][1][0] = hevc_put_uni_epel_v64_8_avx2;
939
940 26 c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_bi_epel_v32_8_avx2;
941 26 c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_bi_epel_v48_8_avx2;
942 26 c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_bi_epel_v64_8_avx2;
943
944 26 c->put_hevc_epel[7][1][1] = hevc_put_epel_hv32_8_avx2;
945 26 c->put_hevc_epel[8][1][1] = hevc_put_epel_hv48_8_avx2;
946 26 c->put_hevc_epel[9][1][1] = hevc_put_epel_hv64_8_avx2;
947
948 26 c->put_hevc_epel_uni[7][1][1] = hevc_put_uni_epel_hv32_8_avx2;
949 26 c->put_hevc_epel_uni[8][1][1] = hevc_put_uni_epel_hv48_8_avx2;
950 26 c->put_hevc_epel_uni[9][1][1] = hevc_put_uni_epel_hv64_8_avx2;
951
952 26 c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_bi_epel_hv32_8_avx2;
953 26 c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_bi_epel_hv48_8_avx2;
954 26 c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_bi_epel_hv64_8_avx2;
955
956 26 c->put_hevc_qpel[7][0][1] = hevc_put_qpel_h32_8_avx2;
957 26 c->put_hevc_qpel[8][0][1] = hevc_put_qpel_h48_8_avx2;
958 26 c->put_hevc_qpel[9][0][1] = hevc_put_qpel_h64_8_avx2;
959
960 26 c->put_hevc_qpel[7][1][0] = hevc_put_qpel_v32_8_avx2;
961 26 c->put_hevc_qpel[8][1][0] = hevc_put_qpel_v48_8_avx2;
962 26 c->put_hevc_qpel[9][1][0] = hevc_put_qpel_v64_8_avx2;
963
964 26 c->put_hevc_qpel_uni[7][0][1] = hevc_put_uni_qpel_h32_8_avx2;
965 26 c->put_hevc_qpel_uni[8][0][1] = hevc_put_uni_qpel_h48_8_avx2;
966 26 c->put_hevc_qpel_uni[9][0][1] = hevc_put_uni_qpel_h64_8_avx2;
967
968 26 c->put_hevc_qpel_uni[7][1][0] = hevc_put_uni_qpel_v32_8_avx2;
969 26 c->put_hevc_qpel_uni[8][1][0] = hevc_put_uni_qpel_v48_8_avx2;
970 26 c->put_hevc_qpel_uni[9][1][0] = hevc_put_uni_qpel_v64_8_avx2;
971
972 26 c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_bi_qpel_h32_8_avx2;
973 26 c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_bi_qpel_h48_8_avx2;
974 26 c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_bi_qpel_h64_8_avx2;
975
976 26 c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_bi_qpel_v32_8_avx2;
977 26 c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_bi_qpel_v48_8_avx2;
978 26 c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_bi_qpel_v64_8_avx2;
979 #endif /* ARCH_X86_64 */
980
981 26 SAO_BAND_INIT(8, avx2);
982
983 26 c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_8_avx2;
984 26 c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_8_avx2;
985 26 c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_8_avx2;
986
987 26 c->add_residual[3] = ff_hevc_add_residual_32_8_avx2;
988 }
989 #endif /* HAVE_AVX2_EXTERNAL */
990
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 619 times.
619 if (EXTERNAL_AVX512ICL(cpu_flags) && ARCH_X86_64) {
991 c->put_hevc_qpel[1][0][1] = ff_hevc_put_qpel_h4_8_avx512icl;
992 c->put_hevc_qpel[3][0][1] = ff_hevc_put_qpel_h8_8_avx512icl;
993 c->put_hevc_qpel[5][0][1] = ff_hevc_put_qpel_h16_8_avx512icl;
994 c->put_hevc_qpel[7][0][1] = ff_hevc_put_qpel_h32_8_avx512icl;
995 c->put_hevc_qpel[9][0][1] = ff_hevc_put_qpel_h64_8_avx512icl;
996 c->put_hevc_qpel[3][1][1] = ff_hevc_put_qpel_hv8_8_avx512icl;
997 }
998
2/2
✓ Branch 0 taken 309 times.
✓ Branch 1 taken 556 times.
865 } else if (bit_depth == 10) {
999
2/2
✓ Branch 0 taken 231 times.
✓ Branch 1 taken 78 times.
309 if (EXTERNAL_MMXEXT(cpu_flags)) {
1000 231 c->add_residual[0] = ff_hevc_add_residual_4_10_mmxext;
1001 231 c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_mmxext;
1002 }
1003
2/2
✓ Branch 0 taken 191 times.
✓ Branch 1 taken 118 times.
309 if (EXTERNAL_SSE2(cpu_flags)) {
1004 191 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2;
1005 191 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2;
1006 if (ARCH_X86_64) {
1007 191 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_sse2;
1008 191 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2;
1009
1010 191 c->idct[2] = ff_hevc_idct_16x16_10_sse2;
1011 191 c->idct[3] = ff_hevc_idct_32x32_10_sse2;
1012 }
1013 191 SAO_BAND_INIT(10, sse2);
1014 191 SAO_EDGE_INIT(10, sse2);
1015
1016 191 c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_sse2;
1017 191 c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_sse2;
1018 191 c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_sse2;
1019
1020 191 c->idct[0] = ff_hevc_idct_4x4_10_sse2;
1021 191 c->idct[1] = ff_hevc_idct_8x8_10_sse2;
1022
1023 191 c->add_residual[1] = ff_hevc_add_residual_8_10_sse2;
1024 191 c->add_residual[2] = ff_hevc_add_residual_16_10_sse2;
1025 191 c->add_residual[3] = ff_hevc_add_residual_32_10_sse2;
1026 }
1027
2/2
✓ Branch 0 taken 151 times.
✓ Branch 1 taken 158 times.
309 if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
1028 151 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3;
1029 151 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3;
1030 }
1031 #if HAVE_SSE4_EXTERNAL && ARCH_X86_64
1032
2/2
✓ Branch 0 taken 131 times.
✓ Branch 1 taken 178 times.
309 if (EXTERNAL_SSE4(cpu_flags)) {
1033 131 EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4);
1034 131 EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 10, sse4);
1035 131 EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 10, sse4);
1036 131 EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 10, sse4);
1037
1038 131 QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 10, sse4);
1039 131 QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 10, sse4);
1040 131 QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 10, sse4);
1041 131 QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 10, sse4);
1042 }
1043 #endif
1044
2/2
✓ Branch 0 taken 71 times.
✓ Branch 1 taken 238 times.
309 if (EXTERNAL_AVX(cpu_flags)) {
1045 71 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_avx;
1046 71 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_avx;
1047 if (ARCH_X86_64) {
1048 71 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_avx;
1049 71 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_avx;
1050
1051 71 c->idct[2] = ff_hevc_idct_16x16_10_avx;
1052 71 c->idct[3] = ff_hevc_idct_32x32_10_avx;
1053 }
1054
1055 71 c->idct[0] = ff_hevc_idct_4x4_10_avx;
1056 71 c->idct[1] = ff_hevc_idct_8x8_10_avx;
1057
1058 71 SAO_BAND_INIT(10, avx);
1059 }
1060
2/2
✓ Branch 0 taken 31 times.
✓ Branch 1 taken 278 times.
309 if (EXTERNAL_AVX2(cpu_flags)) {
1061 31 c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_10_avx2;
1062 }
1063 #if HAVE_AVX2_EXTERNAL
1064
3/4
✓ Branch 0 taken 31 times.
✓ Branch 1 taken 278 times.
✓ Branch 2 taken 31 times.
✗ Branch 3 not taken.
309 if (EXTERNAL_AVX2_FAST(cpu_flags)) {
1065 31 c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_avx2;
1066 31 c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_avx2;
1067
1068 #if ARCH_X86_64
1069 31 c->put_hevc_epel[5][0][0] = hevc_put_pel_pixels16_10_avx2;
1070 31 c->put_hevc_epel[6][0][0] = hevc_put_pel_pixels24_10_avx2;
1071 31 c->put_hevc_epel[7][0][0] = hevc_put_pel_pixels32_10_avx2;
1072 31 c->put_hevc_epel[8][0][0] = hevc_put_pel_pixels48_10_avx2;
1073 31 c->put_hevc_epel[9][0][0] = hevc_put_pel_pixels64_10_avx2;
1074
1075 31 c->put_hevc_qpel[5][0][0] = hevc_put_pel_pixels16_10_avx2;
1076 31 c->put_hevc_qpel[6][0][0] = hevc_put_pel_pixels24_10_avx2;
1077 31 c->put_hevc_qpel[7][0][0] = hevc_put_pel_pixels32_10_avx2;
1078 31 c->put_hevc_qpel[8][0][0] = hevc_put_pel_pixels48_10_avx2;
1079 31 c->put_hevc_qpel[9][0][0] = hevc_put_pel_pixels64_10_avx2;
1080
1081 31 c->put_hevc_epel_uni[5][0][0] = hevc_put_uni_pel_pixels32_8_avx2;
1082 31 c->put_hevc_epel_uni[6][0][0] = hevc_put_uni_pel_pixels48_8_avx2;
1083 31 c->put_hevc_epel_uni[7][0][0] = hevc_put_uni_pel_pixels64_8_avx2;
1084 31 c->put_hevc_epel_uni[8][0][0] = hevc_put_uni_pel_pixels96_8_avx2;
1085 31 c->put_hevc_epel_uni[9][0][0] = hevc_put_uni_pel_pixels128_8_avx2;
1086
1087 31 c->put_hevc_qpel_uni[5][0][0] = hevc_put_uni_pel_pixels32_8_avx2;
1088 31 c->put_hevc_qpel_uni[6][0][0] = hevc_put_uni_pel_pixels48_8_avx2;
1089 31 c->put_hevc_qpel_uni[7][0][0] = hevc_put_uni_pel_pixels64_8_avx2;
1090 31 c->put_hevc_qpel_uni[8][0][0] = hevc_put_uni_pel_pixels96_8_avx2;
1091 31 c->put_hevc_qpel_uni[9][0][0] = hevc_put_uni_pel_pixels128_8_avx2;
1092
1093 31 c->put_hevc_epel_bi[5][0][0] = ff_hevc_put_bi_pel_pixels16_10_avx2;
1094 31 c->put_hevc_epel_bi[6][0][0] = ff_hevc_put_bi_pel_pixels24_10_avx2;
1095 31 c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_bi_pel_pixels32_10_avx2;
1096 31 c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_bi_pel_pixels48_10_avx2;
1097 31 c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_bi_pel_pixels64_10_avx2;
1098 31 c->put_hevc_qpel_bi[5][0][0] = ff_hevc_put_bi_pel_pixels16_10_avx2;
1099 31 c->put_hevc_qpel_bi[6][0][0] = ff_hevc_put_bi_pel_pixels24_10_avx2;
1100 31 c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_bi_pel_pixels32_10_avx2;
1101 31 c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_bi_pel_pixels48_10_avx2;
1102 31 c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_bi_pel_pixels64_10_avx2;
1103
1104 31 c->put_hevc_epel[5][0][1] = hevc_put_epel_h16_10_avx2;
1105 31 c->put_hevc_epel[6][0][1] = hevc_put_epel_h24_10_avx2;
1106 31 c->put_hevc_epel[7][0][1] = hevc_put_epel_h32_10_avx2;
1107 31 c->put_hevc_epel[8][0][1] = hevc_put_epel_h48_10_avx2;
1108 31 c->put_hevc_epel[9][0][1] = hevc_put_epel_h64_10_avx2;
1109
1110 31 c->put_hevc_epel_uni[5][0][1] = hevc_put_uni_epel_h16_10_avx2;
1111 31 c->put_hevc_epel_uni[6][0][1] = hevc_put_uni_epel_h24_10_avx2;
1112 31 c->put_hevc_epel_uni[7][0][1] = hevc_put_uni_epel_h32_10_avx2;
1113 31 c->put_hevc_epel_uni[8][0][1] = hevc_put_uni_epel_h48_10_avx2;
1114 31 c->put_hevc_epel_uni[9][0][1] = hevc_put_uni_epel_h64_10_avx2;
1115
1116 31 c->put_hevc_epel_bi[5][0][1] = ff_hevc_put_bi_epel_h16_10_avx2;
1117 31 c->put_hevc_epel_bi[6][0][1] = ff_hevc_put_bi_epel_h24_10_avx2;
1118 31 c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_bi_epel_h32_10_avx2;
1119 31 c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_bi_epel_h48_10_avx2;
1120 31 c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_bi_epel_h64_10_avx2;
1121
1122 31 c->put_hevc_epel[5][1][0] = hevc_put_epel_v16_10_avx2;
1123 31 c->put_hevc_epel[6][1][0] = hevc_put_epel_v24_10_avx2;
1124 31 c->put_hevc_epel[7][1][0] = hevc_put_epel_v32_10_avx2;
1125 31 c->put_hevc_epel[8][1][0] = hevc_put_epel_v48_10_avx2;
1126 31 c->put_hevc_epel[9][1][0] = hevc_put_epel_v64_10_avx2;
1127
1128 31 c->put_hevc_epel_uni[5][1][0] = hevc_put_uni_epel_v16_10_avx2;
1129 31 c->put_hevc_epel_uni[6][1][0] = hevc_put_uni_epel_v24_10_avx2;
1130 31 c->put_hevc_epel_uni[7][1][0] = hevc_put_uni_epel_v32_10_avx2;
1131 31 c->put_hevc_epel_uni[8][1][0] = hevc_put_uni_epel_v48_10_avx2;
1132 31 c->put_hevc_epel_uni[9][1][0] = hevc_put_uni_epel_v64_10_avx2;
1133
1134 31 c->put_hevc_epel_bi[5][1][0] = ff_hevc_put_bi_epel_v16_10_avx2;
1135 31 c->put_hevc_epel_bi[6][1][0] = ff_hevc_put_bi_epel_v24_10_avx2;
1136 31 c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_bi_epel_v32_10_avx2;
1137 31 c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_bi_epel_v48_10_avx2;
1138 31 c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_bi_epel_v64_10_avx2;
1139
1140 31 c->put_hevc_epel[5][1][1] = hevc_put_epel_hv16_10_avx2;
1141 31 c->put_hevc_epel[6][1][1] = hevc_put_epel_hv24_10_avx2;
1142 31 c->put_hevc_epel[7][1][1] = hevc_put_epel_hv32_10_avx2;
1143 31 c->put_hevc_epel[8][1][1] = hevc_put_epel_hv48_10_avx2;
1144 31 c->put_hevc_epel[9][1][1] = hevc_put_epel_hv64_10_avx2;
1145
1146 31 c->put_hevc_epel_uni[5][1][1] = hevc_put_uni_epel_hv16_10_avx2;
1147 31 c->put_hevc_epel_uni[6][1][1] = hevc_put_uni_epel_hv24_10_avx2;
1148 31 c->put_hevc_epel_uni[7][1][1] = hevc_put_uni_epel_hv32_10_avx2;
1149 31 c->put_hevc_epel_uni[8][1][1] = hevc_put_uni_epel_hv48_10_avx2;
1150 31 c->put_hevc_epel_uni[9][1][1] = hevc_put_uni_epel_hv64_10_avx2;
1151
1152 31 c->put_hevc_epel_bi[5][1][1] = ff_hevc_put_bi_epel_hv16_10_avx2;
1153 31 c->put_hevc_epel_bi[6][1][1] = ff_hevc_put_bi_epel_hv24_10_avx2;
1154 31 c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_bi_epel_hv32_10_avx2;
1155 31 c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_bi_epel_hv48_10_avx2;
1156 31 c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_bi_epel_hv64_10_avx2;
1157
1158 31 c->put_hevc_qpel[5][0][1] = hevc_put_qpel_h16_10_avx2;
1159 31 c->put_hevc_qpel[6][0][1] = hevc_put_qpel_h24_10_avx2;
1160 31 c->put_hevc_qpel[7][0][1] = hevc_put_qpel_h32_10_avx2;
1161 31 c->put_hevc_qpel[8][0][1] = hevc_put_qpel_h48_10_avx2;
1162 31 c->put_hevc_qpel[9][0][1] = hevc_put_qpel_h64_10_avx2;
1163
1164 31 c->put_hevc_qpel_uni[5][0][1] = hevc_put_uni_qpel_h16_10_avx2;
1165 31 c->put_hevc_qpel_uni[6][0][1] = hevc_put_uni_qpel_h24_10_avx2;
1166 31 c->put_hevc_qpel_uni[7][0][1] = hevc_put_uni_qpel_h32_10_avx2;
1167 31 c->put_hevc_qpel_uni[8][0][1] = hevc_put_uni_qpel_h48_10_avx2;
1168 31 c->put_hevc_qpel_uni[9][0][1] = hevc_put_uni_qpel_h64_10_avx2;
1169
1170 31 c->put_hevc_qpel_bi[5][0][1] = ff_hevc_put_bi_qpel_h16_10_avx2;
1171 31 c->put_hevc_qpel_bi[6][0][1] = ff_hevc_put_bi_qpel_h24_10_avx2;
1172 31 c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_bi_qpel_h32_10_avx2;
1173 31 c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_bi_qpel_h48_10_avx2;
1174 31 c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_bi_qpel_h64_10_avx2;
1175
1176 31 c->put_hevc_qpel[5][1][0] = hevc_put_qpel_v16_10_avx2;
1177 31 c->put_hevc_qpel[6][1][0] = hevc_put_qpel_v24_10_avx2;
1178 31 c->put_hevc_qpel[7][1][0] = hevc_put_qpel_v32_10_avx2;
1179 31 c->put_hevc_qpel[8][1][0] = hevc_put_qpel_v48_10_avx2;
1180 31 c->put_hevc_qpel[9][1][0] = hevc_put_qpel_v64_10_avx2;
1181
1182 31 c->put_hevc_qpel_uni[5][1][0] = hevc_put_uni_qpel_v16_10_avx2;
1183 31 c->put_hevc_qpel_uni[6][1][0] = hevc_put_uni_qpel_v24_10_avx2;
1184 31 c->put_hevc_qpel_uni[7][1][0] = hevc_put_uni_qpel_v32_10_avx2;
1185 31 c->put_hevc_qpel_uni[8][1][0] = hevc_put_uni_qpel_v48_10_avx2;
1186 31 c->put_hevc_qpel_uni[9][1][0] = hevc_put_uni_qpel_v64_10_avx2;
1187
1188 31 c->put_hevc_qpel_bi[5][1][0] = ff_hevc_put_bi_qpel_v16_10_avx2;
1189 31 c->put_hevc_qpel_bi[6][1][0] = ff_hevc_put_bi_qpel_v24_10_avx2;
1190 31 c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_bi_qpel_v32_10_avx2;
1191 31 c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_bi_qpel_v48_10_avx2;
1192 31 c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_bi_qpel_v64_10_avx2;
1193
1194 31 c->put_hevc_qpel[5][1][1] = hevc_put_qpel_hv16_10_avx2;
1195 31 c->put_hevc_qpel[6][1][1] = hevc_put_qpel_hv24_10_avx2;
1196 31 c->put_hevc_qpel[7][1][1] = hevc_put_qpel_hv32_10_avx2;
1197 31 c->put_hevc_qpel[8][1][1] = hevc_put_qpel_hv48_10_avx2;
1198 31 c->put_hevc_qpel[9][1][1] = hevc_put_qpel_hv64_10_avx2;
1199
1200 31 c->put_hevc_qpel_uni[5][1][1] = hevc_put_uni_qpel_hv16_10_avx2;
1201 31 c->put_hevc_qpel_uni[6][1][1] = hevc_put_uni_qpel_hv24_10_avx2;
1202 31 c->put_hevc_qpel_uni[7][1][1] = hevc_put_uni_qpel_hv32_10_avx2;
1203 31 c->put_hevc_qpel_uni[8][1][1] = hevc_put_uni_qpel_hv48_10_avx2;
1204 31 c->put_hevc_qpel_uni[9][1][1] = hevc_put_uni_qpel_hv64_10_avx2;
1205
1206 31 c->put_hevc_qpel_bi[5][1][1] = ff_hevc_put_bi_qpel_hv16_10_avx2;
1207 31 c->put_hevc_qpel_bi[6][1][1] = ff_hevc_put_bi_qpel_hv24_10_avx2;
1208 31 c->put_hevc_qpel_bi[7][1][1] = ff_hevc_put_bi_qpel_hv32_10_avx2;
1209 31 c->put_hevc_qpel_bi[8][1][1] = ff_hevc_put_bi_qpel_hv48_10_avx2;
1210 31 c->put_hevc_qpel_bi[9][1][1] = ff_hevc_put_bi_qpel_hv64_10_avx2;
1211 #endif /* ARCH_X86_64 */
1212
1213 31 SAO_BAND_INIT(10, avx2);
1214 31 SAO_EDGE_INIT(10, avx2);
1215
1216 31 c->add_residual[2] = ff_hevc_add_residual_16_10_avx2;
1217 31 c->add_residual[3] = ff_hevc_add_residual_32_10_avx2;
1218 }
1219 #endif /* HAVE_AVX2_EXTERNAL */
1220
2/2
✓ Branch 0 taken 270 times.
✓ Branch 1 taken 286 times.
556 } else if (bit_depth == 12) {
1221
2/2
✓ Branch 0 taken 220 times.
✓ Branch 1 taken 50 times.
270 if (EXTERNAL_MMXEXT(cpu_flags)) {
1222 220 c->idct_dc[0] = ff_hevc_idct_4x4_dc_12_mmxext;
1223 }
1224
2/2
✓ Branch 0 taken 180 times.
✓ Branch 1 taken 90 times.
270 if (EXTERNAL_SSE2(cpu_flags)) {
1225 180 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_sse2;
1226 180 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_sse2;
1227 if (ARCH_X86_64) {
1228 180 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_sse2;
1229 180 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2;
1230 }
1231 180 SAO_BAND_INIT(12, sse2);
1232 180 SAO_EDGE_INIT(12, sse2);
1233
1234 180 c->idct_dc[1] = ff_hevc_idct_8x8_dc_12_sse2;
1235 180 c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_sse2;
1236 180 c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_sse2;
1237 }
1238
2/2
✓ Branch 0 taken 140 times.
✓ Branch 1 taken 130 times.
270 if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
1239 140 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_ssse3;
1240 140 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_ssse3;
1241 }
1242 #if HAVE_SSE4_EXTERNAL && ARCH_X86_64
1243
2/2
✓ Branch 0 taken 120 times.
✓ Branch 1 taken 150 times.
270 if (EXTERNAL_SSE4(cpu_flags)) {
1244 120 EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 12, sse4);
1245 120 EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 12, sse4);
1246 120 EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 12, sse4);
1247 120 EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 12, sse4);
1248
1249 120 QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 12, sse4);
1250 120 QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 12, sse4);
1251 120 QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 12, sse4);
1252 120 QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 12, sse4);
1253 }
1254 #endif
1255
2/2
✓ Branch 0 taken 60 times.
✓ Branch 1 taken 210 times.
270 if (EXTERNAL_AVX(cpu_flags)) {
1256 60 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_avx;
1257 60 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_avx;
1258 if (ARCH_X86_64) {
1259 60 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_avx;
1260 60 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_avx;
1261 }
1262 60 SAO_BAND_INIT(12, avx);
1263 }
1264
2/2
✓ Branch 0 taken 20 times.
✓ Branch 1 taken 250 times.
270 if (EXTERNAL_AVX2(cpu_flags)) {
1265 20 c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_12_avx2;
1266 }
1267
3/4
✓ Branch 0 taken 20 times.
✓ Branch 1 taken 250 times.
✓ Branch 2 taken 20 times.
✗ Branch 3 not taken.
270 if (EXTERNAL_AVX2_FAST(cpu_flags)) {
1268 20 c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_avx2;
1269 20 c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_avx2;
1270
1271 20 SAO_BAND_INIT(12, avx2);
1272 20 SAO_EDGE_INIT(12, avx2);
1273 }
1274 }
1275 1484 }
1276