FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/x86/hevc/dsp_init.c
Date: 2026-04-23 21:43:00
Exec Total Coverage
Lines: 633 639 99.1%
Functions: 1015 1015 100.0%
Branches: 412 416 99.0%

Line Branch Exec Source
1 /*
2 * Copyright (c) 2013 Seppo Tomperi
3 * Copyright (c) 2013-2014 Pierre-Edouard Lepere
4 * Copyright (c) 2023-2024 Wu Jianhua
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include "config.h"
24
25 #include "libavutil/cpu.h"
26 #include "libavutil/mem_internal.h"
27 #include "libavutil/x86/asm.h"
28 #include "libavutil/x86/cpu.h"
29 #include "libavcodec/hevc/dsp.h"
30 #include "libavcodec/x86/hevc/dsp.h"
31 #include "libavcodec/x86/h26x/h2656dsp.h"
32
33 void ff_hevc_dequant_8_ssse3(int16_t *coeffs, int16_t log2_size);
34
35 #define LFC_FUNC(DIR, DEPTH, OPT) \
36 void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, const int *tc, const uint8_t *no_p, const uint8_t *no_q);
37
38 #define LFL_FUNC(DIR, DEPTH, OPT) \
39 void ff_hevc_ ## DIR ## _loop_filter_luma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, int beta, const int *tc, const uint8_t *no_p, const uint8_t *no_q);
40
41 #define LFC_FUNCS(type, depth, opt) \
42 LFC_FUNC(h, depth, opt) \
43 LFC_FUNC(v, depth, opt)
44
45 #define LFL_FUNCS(type, depth, opt) \
46 LFL_FUNC(h, depth, opt) \
47 LFL_FUNC(v, depth, opt)
48
49 LFC_FUNCS(uint8_t, 8, sse2)
50 LFC_FUNCS(uint8_t, 10, sse2)
51 LFC_FUNCS(uint8_t, 12, sse2)
52 LFC_FUNCS(uint8_t, 8, avx)
53 LFC_FUNCS(uint8_t, 10, avx)
54 LFC_FUNCS(uint8_t, 12, avx)
55 LFL_FUNCS(uint8_t, 8, sse2)
56 LFL_FUNCS(uint8_t, 10, sse2)
57 LFL_FUNCS(uint8_t, 12, sse2)
58 LFL_FUNCS(uint8_t, 8, ssse3)
59 LFL_FUNCS(uint8_t, 10, ssse3)
60 LFL_FUNCS(uint8_t, 12, ssse3)
61 LFL_FUNCS(uint8_t, 8, avx)
62 LFL_FUNCS(uint8_t, 10, avx)
63 LFL_FUNCS(uint8_t, 12, avx)
64
65 #define IDCT_DC_FUNCS(W, opt) \
66 void ff_hevc_idct_ ## W ## _dc_8_ ## opt(int16_t *coeffs); \
67 void ff_hevc_idct_ ## W ## _dc_10_ ## opt(int16_t *coeffs); \
68 void ff_hevc_idct_ ## W ## _dc_12_ ## opt(int16_t *coeffs)
69
70 IDCT_DC_FUNCS(4x4, sse2);
71 IDCT_DC_FUNCS(8x8, sse2);
72 IDCT_DC_FUNCS(16x16, sse2);
73 IDCT_DC_FUNCS(32x32, sse2);
74 IDCT_DC_FUNCS(16x16, avx2);
75 IDCT_DC_FUNCS(32x32, avx2);
76
77 #define IDCT_FUNCS(opt) \
78 void ff_hevc_idct_4x4_8_ ## opt(int16_t *coeffs, int col_limit); \
79 void ff_hevc_idct_4x4_10_ ## opt(int16_t *coeffs, int col_limit); \
80 void ff_hevc_idct_8x8_8_ ## opt(int16_t *coeffs, int col_limit); \
81 void ff_hevc_idct_8x8_10_ ## opt(int16_t *coeffs, int col_limit); \
82 void ff_hevc_idct_16x16_8_ ## opt(int16_t *coeffs, int col_limit); \
83 void ff_hevc_idct_16x16_10_ ## opt(int16_t *coeffs, int col_limit); \
84 void ff_hevc_idct_32x32_8_ ## opt(int16_t *coeffs, int col_limit); \
85 void ff_hevc_idct_32x32_10_ ## opt(int16_t *coeffs, int col_limit);
86
87 IDCT_FUNCS(sse2)
88 IDCT_FUNCS(avx)
89
90
91 #define ff_hevc_pel_filters ff_hevc_qpel_filters
92 #define DECL_HV_FILTER(f) \
93 const uint8_t *hf = ff_hevc_ ## f ## _filters[mx]; \
94 const uint8_t *vf = ff_hevc_ ## f ## _filters[my];
95
96 #define FW_PUT(p, a, b, depth, opt) \
97 static void hevc_put_ ## a ## _ ## depth ## _##opt(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, \
98 int height, intptr_t mx, intptr_t my,int width) \
99 { \
100 DECL_HV_FILTER(p) \
101 ff_h2656_put_ ## b ## _ ## depth ## _##opt(dst, 2 * MAX_PB_SIZE, src, srcstride, height, hf, vf, width); \
102 }
103
104 #define FW_PUT_UNI(p, a, b, depth, opt) \
105 static void hevc_put_uni_ ## a ## _ ## depth ## _##opt(uint8_t *dst, ptrdiff_t dststride, \
106 const uint8_t *src, ptrdiff_t srcstride, \
107 int height, intptr_t mx, intptr_t my, int width) \
108 { \
109 DECL_HV_FILTER(p) \
110 ff_h2656_put_uni_ ## b ## _ ## depth ## _##opt(dst, dststride, src, srcstride, height, hf, vf, width); \
111 }
112
113 #if ARCH_X86_64 && HAVE_SSE4_EXTERNAL
114
115 #define FW_PUT_FUNCS(p, a, b, depth, opt) \
116 FW_PUT(p, a, b, depth, opt) \
117 FW_PUT_UNI(p, a, b, depth, opt)
118
119 #define FW_PEL(w, depth, opt) FW_PUT_FUNCS(pel, pel_pixels##w, pixels##w, depth, opt)
120
121 #define FW_DIR(npel, n, w, depth, opt) \
122 FW_PUT_FUNCS(npel, npel ## _h##w, n ## tap_h##w, depth, opt) \
123 FW_PUT_FUNCS(npel, npel ## _v##w, n ## tap_v##w, depth, opt)
124
125 #define FW_DIR_HV(npel, n, w, depth, opt) \
126 FW_PUT_FUNCS(npel, npel ## _hv##w, n ## tap_hv##w, depth, opt)
127
128 17410 FW_PEL(4, 8, sse4)
129 76 FW_PEL(6, 8, sse4)
130 26322 FW_PEL(8, 8, sse4)
131 76 FW_PEL(12, 8, sse4)
132 150684 FW_PEL(16, 8, sse4)
133 2400 FW_PEL(4, 10, sse4)
134 76 FW_PEL(6, 10, sse4)
135 33624 FW_PEL(8, 10, sse4)
136 304 FW_PEL(4, 12, sse4)
137 76 FW_PEL(6, 12, sse4)
138 1824 FW_PEL(8, 12, sse4)
139
140 #define FW_EPEL(w, depth, opt) FW_DIR(epel, 4, w, depth, opt)
141 #define FW_EPEL_HV(w, depth, opt) FW_DIR_HV(epel, 4, w, depth, opt)
142 #define FW_EPEL_FUNCS(w, depth, opt) \
143 FW_EPEL(w, depth, opt) \
144 FW_EPEL_HV(w, depth, opt)
145
146 152 FW_EPEL(12, 8, sse4)
147
148 10508 FW_EPEL_FUNCS(4, 8, sse4)
149 228 FW_EPEL_FUNCS(6, 8, sse4)
150 12880 FW_EPEL_FUNCS(8, 8, sse4)
151 21140 FW_EPEL_FUNCS(16, 8, sse4)
152 6884 FW_EPEL_FUNCS(4, 10, sse4)
153 228 FW_EPEL_FUNCS(6, 10, sse4)
154 31064 FW_EPEL_FUNCS(8, 10, sse4)
155 912 FW_EPEL_FUNCS(4, 12, sse4)
156 228 FW_EPEL_FUNCS(6, 12, sse4)
157 5472 FW_EPEL_FUNCS(8, 12, sse4)
158
159 #define FW_QPEL(w, depth, opt) FW_DIR(qpel, 8, w, depth, opt)
160 #define FW_QPEL_HV(w, depth, opt) FW_DIR_HV(qpel, 8, w, depth, opt)
161 #define FW_QPEL_FUNCS(w, depth, opt) \
162 FW_QPEL(w, depth, opt) \
163 FW_QPEL_HV(w, depth, opt)
164
165 152 FW_QPEL(12, 8, sse4)
166 1638 FW_QPEL(16, 8, sse4)
167
168 306 FW_QPEL_FUNCS(4, 8, sse4)
169 4518 FW_QPEL_FUNCS(8, 8, sse4)
170 912 FW_QPEL_FUNCS(4, 10, sse4)
171 14268 FW_QPEL_FUNCS(8, 10, sse4)
172 912 FW_QPEL_FUNCS(4, 12, sse4)
173 5472 FW_QPEL_FUNCS(8, 12, sse4)
174
175 #if HAVE_AVX2_EXTERNAL
176
177 27552 FW_PEL(32, 8, avx2)
178 3028 FW_PUT(pel, pel_pixels16, pixels16, 10, avx2)
179
180 1396 FW_EPEL(32, 8, avx2)
181 5112 FW_EPEL(16, 10, avx2)
182
183 752 FW_EPEL_HV(32, 8, avx2)
184 6732 FW_EPEL_HV(16, 10, avx2)
185
186 32 FW_QPEL(32, 8, avx2)
187 8960 FW_QPEL(16, 10, avx2)
188
189 11354 FW_QPEL_HV(16, 10, avx2)
190
191 #endif
192 #endif
193
194 #define mc_rep_func(name, bitd, step, W, opt) \
195 static void hevc_put_##name##W##_##bitd##_##opt(int16_t *_dst, \
196 const uint8_t *_src, ptrdiff_t _srcstride, int height, \
197 intptr_t mx, intptr_t my, int width) \
198 { \
199 int i; \
200 int16_t *dst; \
201 for (i = 0; i < W; i += step) { \
202 const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \
203 dst = _dst + i; \
204 hevc_put_##name##step##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width); \
205 } \
206 }
207 #define mc_rep_uni_func(name, bitd, step, W, opt) \
208 static void hevc_put_uni_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, \
209 const uint8_t *_src, ptrdiff_t _srcstride, int height, \
210 intptr_t mx, intptr_t my, int width) \
211 { \
212 int i; \
213 uint8_t *dst; \
214 for (i = 0; i < W; i += step) { \
215 const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \
216 dst = _dst + (i * ((bitd + 7) / 8)); \
217 hevc_put_uni_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, \
218 height, mx, my, width); \
219 } \
220 }
221 #define mc_rep_bi_func(name, bitd, step, W, opt) \
222 static void ff_hevc_put_bi_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const uint8_t *_src, \
223 ptrdiff_t _srcstride, const int16_t *_src2, \
224 int height, intptr_t mx, intptr_t my, int width) \
225 { \
226 int i; \
227 uint8_t *dst; \
228 for (i = 0; i < W ; i += step) { \
229 const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \
230 const int16_t *src2 = _src2 + i; \
231 dst = _dst + (i * ((bitd + 7) / 8)); \
232 ff_hevc_put_bi_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, \
233 height, mx, my, width); \
234 } \
235 }
236
237 #define mc_rep_funcs(name, bitd, step, W, opt) \
238 mc_rep_func(name, bitd, step, W, opt) \
239 mc_rep_uni_func(name, bitd, step, W, opt) \
240 mc_rep_bi_func(name, bitd, step, W, opt)
241
242 #define mc_rep_func2(name, bitd, step1, step2, W, opt) \
243 static void hevc_put_##name##W##_##bitd##_##opt(int16_t *dst, \
244 const uint8_t *src, ptrdiff_t _srcstride, int height, \
245 intptr_t mx, intptr_t my, int width) \
246 { \
247 hevc_put_##name##step1##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width); \
248 hevc_put_##name##step2##_##bitd##_##opt(dst + step1, src + (step1 * ((bitd + 7) / 8)), \
249 _srcstride, height, mx, my, width); \
250 }
251 #define mc_rep_uni_func2(name, bitd, step1, step2, W, opt) \
252 static void hevc_put_uni_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, \
253 const uint8_t *src, ptrdiff_t _srcstride, int height, \
254 intptr_t mx, intptr_t my, int width) \
255 { \
256 hevc_put_uni_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, height, mx, my, width); \
257 hevc_put_uni_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride, \
258 src + (step1 * ((bitd + 7) / 8)), _srcstride, \
259 height, mx, my, width); \
260 }
261 #define mc_rep_bi_func2(name, bitd, step1, step2, W, opt) \
262 static void ff_hevc_put_bi_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \
263 ptrdiff_t _srcstride, const int16_t *src2, \
264 int height, intptr_t mx, intptr_t my, int width) \
265 { \
266 ff_hevc_put_bi_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, height, mx, my, width);\
267 ff_hevc_put_bi_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride, \
268 src + (step1 * ((bitd + 7) / 8)), _srcstride, \
269 src2 + step1, height, mx, my, width); \
270 }
271
272 #define mc_rep_funcs2(name, bitd, step1, step2, W, opt) \
273 mc_rep_func2(name, bitd, step1, step2, W, opt) \
274 mc_rep_uni_func2(name, bitd, step1, step2, W, opt) \
275 mc_rep_bi_func2(name, bitd, step1, step2, W, opt)
276
277 #if ARCH_X86_64 && HAVE_SSE4_EXTERNAL
278
279 #define mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
280 static void hevc_put_##name##width1##_10_##opt1(int16_t *dst, const uint8_t *src, ptrdiff_t _srcstride, \
281 int height, intptr_t mx, intptr_t my, int width) \
282 \
283 { \
284 hevc_put_##name##width2##_10_##opt1(dst, src, _srcstride, height, mx, my, width); \
285 hevc_put_##name##width3##_10_##opt2(dst+ width2, src+ width4, _srcstride, height, mx, my, width); \
286 }
287
288 #define mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
289 static void ff_hevc_put_bi_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \
290 ptrdiff_t _srcstride, const int16_t *src2, \
291 int height, intptr_t mx, intptr_t my, int width) \
292 { \
293 ff_hevc_put_bi_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, src2, \
294 height, mx, my, width); \
295 ff_hevc_put_bi_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, src2+width2, \
296 height, mx, my, width); \
297 }
298
299 #define mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
300 static void hevc_put_uni_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, \
301 const uint8_t *src, ptrdiff_t _srcstride, int height, \
302 intptr_t mx, intptr_t my, int width) \
303 { \
304 hevc_put_uni_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, \
305 height, mx, my, width); \
306 hevc_put_uni_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, \
307 height, mx, my, width); \
308 }
309
310 #define mc_rep_mixs_10(name, width1, width2, width3, opt1, opt2, width4) \
311 mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
312 mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
313 mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4)
314
315 #define mc_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
316 static void hevc_put_##name##width1##_8_##opt1(int16_t *dst, const uint8_t *src, ptrdiff_t _srcstride, \
317 int height, intptr_t mx, intptr_t my, int width) \
318 \
319 { \
320 hevc_put_##name##width2##_8_##opt1(dst, src, _srcstride, height, mx, my, width); \
321 hevc_put_##name##width3##_8_##opt2(dst+ width2, src+ width2, _srcstride, height, mx, my, width); \
322 }
323
324 #define mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
325 static void ff_hevc_put_bi_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \
326 ptrdiff_t _srcstride, const int16_t *src2, \
327 int height, intptr_t mx, intptr_t my, int width) \
328 { \
329 ff_hevc_put_bi_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \
330 src2, height, mx, my, width); \
331 ff_hevc_put_bi_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \
332 src2+width2, height, mx, my, width); \
333 }
334
335 #define mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
336 static void hevc_put_uni_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, \
337 const uint8_t *src, ptrdiff_t _srcstride, int height, \
338 intptr_t mx, intptr_t my, int width) \
339 { \
340 hevc_put_uni_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \
341 height, mx, my, width); \
342 hevc_put_uni_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \
343 height, mx, my, width); \
344 }
345
346 #define mc_rep_mixs_8(name, width1, width2, width3, opt1, opt2) \
347 mc_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
348 mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
349 mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2)
350
351 #if HAVE_AVX2_EXTERNAL
352
353 8 mc_rep_mixs_8(pel_pixels, 48, 32, 16, avx2, sse4)
354 6 mc_rep_mixs_8(epel_hv, 48, 32, 16, avx2, sse4)
355 6 mc_rep_mixs_8(epel_h , 48, 32, 16, avx2, sse4)
356 6 mc_rep_mixs_8(epel_v , 48, 32, 16, avx2, sse4)
357
358 5 mc_rep_mix_10(pel_pixels, 24, 16, 8, avx2, sse4, 32)
359 1 mc_bi_rep_mix_10(pel_pixels,24, 16, 8, avx2, sse4, 32)
360 6 mc_rep_mixs_10(epel_hv, 24, 16, 8, avx2, sse4, 32)
361 6 mc_rep_mixs_10(epel_h , 24, 16, 8, avx2, sse4, 32)
362 6 mc_rep_mixs_10(epel_v , 24, 16, 8, avx2, sse4, 32)
363
364
365 6 mc_rep_mixs_10(qpel_h , 24, 16, 8, avx2, sse4, 32)
366 6 mc_rep_mixs_10(qpel_v , 24, 16, 8, avx2, sse4, 32)
367 6 mc_rep_mixs_10(qpel_hv, 24, 16, 8, avx2, sse4, 32)
368
369
370
2/2
✓ Branch 1 taken 6722 times.
✓ Branch 2 taken 3361 times.
20166 mc_rep_funcs(pel_pixels, 8, 32, 64, avx2)
371
372
2/2
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 78 times.
234 mc_rep_uni_func(pel_pixels, 8, 64, 128, avx2)//used for 10bit
373
2/2
✓ Branch 1 taken 3 times.
✓ Branch 2 taken 1 times.
4 mc_rep_uni_func(pel_pixels, 8, 32, 96, avx2) //used for 10bit
374
375
2/2
✓ Branch 1 taken 2026 times.
✓ Branch 2 taken 1013 times.
3039 mc_rep_func(pel_pixels, 10, 16, 32, avx2)
376
2/2
✓ Branch 1 taken 3 times.
✓ Branch 2 taken 1 times.
4 mc_rep_func(pel_pixels, 10, 16, 48, avx2)
377
2/2
✓ Branch 1 taken 304 times.
✓ Branch 2 taken 152 times.
456 mc_rep_func(pel_pixels, 10, 32, 64, avx2)
378
379
2/2
✓ Branch 1 taken 176 times.
✓ Branch 2 taken 88 times.
264 mc_rep_bi_func(pel_pixels, 10, 16, 32, avx2)
380
2/2
✓ Branch 1 taken 3 times.
✓ Branch 2 taken 1 times.
4 mc_rep_bi_func(pel_pixels, 10, 16, 48, avx2)
381
2/2
✓ Branch 1 taken 42 times.
✓ Branch 2 taken 21 times.
63 mc_rep_bi_func(pel_pixels, 10, 32, 64, avx2)
382
383
2/2
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
18 mc_rep_funcs(epel_h, 8, 32, 64, avx2)
384
385
2/2
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
18 mc_rep_funcs(epel_v, 8, 32, 64, avx2)
386
387
2/2
✓ Branch 1 taken 1290 times.
✓ Branch 2 taken 645 times.
3870 mc_rep_funcs(epel_h, 10, 16, 32, avx2)
388
2/2
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
24 mc_rep_funcs(epel_h, 10, 16, 48, avx2)
389
2/2
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
18 mc_rep_funcs(epel_h, 10, 32, 64, avx2)
390
391
2/2
✓ Branch 1 taken 154 times.
✓ Branch 2 taken 77 times.
462 mc_rep_funcs(epel_v, 10, 16, 32, avx2)
392
2/2
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
24 mc_rep_funcs(epel_v, 10, 16, 48, avx2)
393
2/2
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
18 mc_rep_funcs(epel_v, 10, 32, 64, avx2)
394
395
396
2/2
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
18 mc_rep_funcs(epel_hv, 8, 32, 64, avx2)
397
398
2/2
✓ Branch 1 taken 1598 times.
✓ Branch 2 taken 799 times.
4794 mc_rep_funcs(epel_hv, 10, 16, 32, avx2)
399
2/2
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
24 mc_rep_funcs(epel_hv, 10, 16, 48, avx2)
400
2/2
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
18 mc_rep_funcs(epel_hv, 10, 32, 64, avx2)
401
402
2/2
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
18 mc_rep_funcs(qpel_h, 8, 32, 64, avx2)
403 6 mc_rep_mixs_8(qpel_h , 48, 32, 16, avx2, sse4)
404
405
2/2
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
18 mc_rep_funcs(qpel_v, 8, 32, 64, avx2)
406 6 mc_rep_mixs_8(qpel_v, 48, 32, 16, avx2, sse4)
407
408
2/2
✓ Branch 1 taken 1982 times.
✓ Branch 2 taken 991 times.
5946 mc_rep_funcs(qpel_h, 10, 16, 32, avx2)
409
2/2
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
24 mc_rep_funcs(qpel_h, 10, 16, 48, avx2)
410
2/2
✓ Branch 1 taken 512 times.
✓ Branch 2 taken 256 times.
1536 mc_rep_funcs(qpel_h, 10, 32, 64, avx2)
411
412
2/2
✓ Branch 1 taken 910 times.
✓ Branch 2 taken 455 times.
2730 mc_rep_funcs(qpel_v, 10, 16, 32, avx2)
413
2/2
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
24 mc_rep_funcs(qpel_v, 10, 16, 48, avx2)
414
2/2
✓ Branch 1 taken 202 times.
✓ Branch 2 taken 101 times.
606 mc_rep_funcs(qpel_v, 10, 32, 64, avx2)
415
416
2/2
✓ Branch 1 taken 3116 times.
✓ Branch 2 taken 1558 times.
9348 mc_rep_funcs(qpel_hv, 10, 16, 32, avx2)
417
2/2
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
24 mc_rep_funcs(qpel_hv, 10, 16, 48, avx2)
418
2/2
✓ Branch 1 taken 668 times.
✓ Branch 2 taken 334 times.
2004 mc_rep_funcs(qpel_hv, 10, 32, 64, avx2)
419
420 #endif //AVX2
421
422
2/2
✓ Branch 1 taken 30924 times.
✓ Branch 2 taken 7731 times.
77310 mc_rep_funcs(pel_pixels, 8, 16, 64, sse4)
423
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(pel_pixels, 8, 16, 48, sse4)
424
2/2
✓ Branch 1 taken 33874 times.
✓ Branch 2 taken 16937 times.
101622 mc_rep_funcs(pel_pixels, 8, 16, 32, sse4)
425
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(pel_pixels, 8, 8, 24, sse4)
426
2/2
✓ Branch 1 taken 2960 times.
✓ Branch 2 taken 370 times.
6660 mc_rep_funcs(pel_pixels,10, 8, 64, sse4)
427
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
588 mc_rep_funcs(pel_pixels,10, 8, 48, sse4)
428
2/2
✓ Branch 1 taken 6208 times.
✓ Branch 2 taken 1552 times.
15520 mc_rep_funcs(pel_pixels,10, 8, 32, sse4)
429
2/2
✓ Branch 1 taken 144 times.
✓ Branch 2 taken 48 times.
384 mc_rep_funcs(pel_pixels,10, 8, 24, sse4)
430
2/2
✓ Branch 1 taken 4020 times.
✓ Branch 2 taken 2010 times.
12060 mc_rep_funcs(pel_pixels,10, 8, 16, sse4)
431
2/2
✓ Branch 1 taken 177 times.
✓ Branch 2 taken 59 times.
472 mc_rep_funcs(pel_pixels,10, 4, 12, sse4)
432
2/2
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
702 mc_rep_funcs(pel_pixels,12, 8, 64, sse4)
433
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
546 mc_rep_funcs(pel_pixels,12, 8, 48, sse4)
434
2/2
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
390 mc_rep_funcs(pel_pixels,12, 8, 32, sse4)
435
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(pel_pixels,12, 8, 24, sse4)
436
2/2
✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
234 mc_rep_funcs(pel_pixels,12, 8, 16, sse4)
437
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(pel_pixels,12, 4, 12, sse4)
438
439
2/2
✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
420 mc_rep_funcs(epel_h, 8, 16, 64, sse4)
440
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(epel_h, 8, 16, 48, sse4)
441
2/2
✓ Branch 1 taken 1076 times.
✓ Branch 2 taken 538 times.
3228 mc_rep_funcs(epel_h, 8, 16, 32, sse4)
442
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_h, 8, 8, 24, sse4)
443
2/2
✓ Branch 1 taken 336 times.
✓ Branch 2 taken 42 times.
756 mc_rep_funcs(epel_h,10, 8, 64, sse4)
444
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
588 mc_rep_funcs(epel_h,10, 8, 48, sse4)
445
2/2
✓ Branch 1 taken 1016 times.
✓ Branch 2 taken 254 times.
2540 mc_rep_funcs(epel_h,10, 8, 32, sse4)
446
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(epel_h,10, 8, 24, sse4)
447
2/2
✓ Branch 1 taken 1140 times.
✓ Branch 2 taken 570 times.
3420 mc_rep_funcs(epel_h,10, 8, 16, sse4)
448
2/2
✓ Branch 1 taken 129 times.
✓ Branch 2 taken 43 times.
344 mc_rep_funcs(epel_h,10, 4, 12, sse4)
449
2/2
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
702 mc_rep_funcs(epel_h,12, 8, 64, sse4)
450
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
546 mc_rep_funcs(epel_h,12, 8, 48, sse4)
451
2/2
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
390 mc_rep_funcs(epel_h,12, 8, 32, sse4)
452
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_h,12, 8, 24, sse4)
453
2/2
✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
234 mc_rep_funcs(epel_h,12, 8, 16, sse4)
454
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_h,12, 4, 12, sse4)
455
2/2
✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
420 mc_rep_funcs(epel_v, 8, 16, 64, sse4)
456
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(epel_v, 8, 16, 48, sse4)
457
2/2
✓ Branch 1 taken 2056 times.
✓ Branch 2 taken 1028 times.
6168 mc_rep_funcs(epel_v, 8, 16, 32, sse4)
458
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_v, 8, 8, 24, sse4)
459
2/2
✓ Branch 1 taken 336 times.
✓ Branch 2 taken 42 times.
756 mc_rep_funcs(epel_v,10, 8, 64, sse4)
460
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
588 mc_rep_funcs(epel_v,10, 8, 48, sse4)
461
2/2
✓ Branch 1 taken 296 times.
✓ Branch 2 taken 74 times.
740 mc_rep_funcs(epel_v,10, 8, 32, sse4)
462
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(epel_v,10, 8, 24, sse4)
463
2/2
✓ Branch 1 taken 204 times.
✓ Branch 2 taken 102 times.
612 mc_rep_funcs(epel_v,10, 8, 16, sse4)
464
2/2
✓ Branch 1 taken 141 times.
✓ Branch 2 taken 47 times.
376 mc_rep_funcs(epel_v,10, 4, 12, sse4)
465
2/2
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
702 mc_rep_funcs(epel_v,12, 8, 64, sse4)
466
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
546 mc_rep_funcs(epel_v,12, 8, 48, sse4)
467
2/2
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
390 mc_rep_funcs(epel_v,12, 8, 32, sse4)
468
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_v,12, 8, 24, sse4)
469
2/2
✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
234 mc_rep_funcs(epel_v,12, 8, 16, sse4)
470
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_v,12, 4, 12, sse4)
471
2/2
✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
420 mc_rep_funcs(epel_hv, 8, 16, 64, sse4)
472
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(epel_hv, 8, 16, 48, sse4)
473
2/2
✓ Branch 1 taken 1860 times.
✓ Branch 2 taken 930 times.
5580 mc_rep_funcs(epel_hv, 8, 16, 32, sse4)
474
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_hv, 8, 8, 24, sse4)
475 78 mc_rep_funcs2(epel_hv,8, 8, 4, 12, sse4)
476
2/2
✓ Branch 1 taken 336 times.
✓ Branch 2 taken 42 times.
756 mc_rep_funcs(epel_hv,10, 8, 64, sse4)
477
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
588 mc_rep_funcs(epel_hv,10, 8, 48, sse4)
478
2/2
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 78 times.
780 mc_rep_funcs(epel_hv,10, 8, 32, sse4)
479
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(epel_hv,10, 8, 24, sse4)
480
2/2
✓ Branch 1 taken 236 times.
✓ Branch 2 taken 118 times.
708 mc_rep_funcs(epel_hv,10, 8, 16, sse4)
481
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_hv,10, 4, 12, sse4)
482
2/2
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
702 mc_rep_funcs(epel_hv,12, 8, 64, sse4)
483
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
546 mc_rep_funcs(epel_hv,12, 8, 48, sse4)
484
2/2
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
390 mc_rep_funcs(epel_hv,12, 8, 32, sse4)
485
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_hv,12, 8, 24, sse4)
486
2/2
✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
234 mc_rep_funcs(epel_hv,12, 8, 16, sse4)
487
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_hv,12, 4, 12, sse4)
488
489
2/2
✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
420 mc_rep_funcs(qpel_h, 8, 16, 64, sse4)
490
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(qpel_h, 8, 16, 48, sse4)
491
2/2
✓ Branch 1 taken 84 times.
✓ Branch 2 taken 42 times.
252 mc_rep_funcs(qpel_h, 8, 16, 32, sse4)
492
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_h, 8, 8, 24, sse4)
493
2/2
✓ Branch 1 taken 1184 times.
✓ Branch 2 taken 148 times.
2664 mc_rep_funcs(qpel_h,10, 8, 64, sse4)
494
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
588 mc_rep_funcs(qpel_h,10, 8, 48, sse4)
495
2/2
✓ Branch 1 taken 1248 times.
✓ Branch 2 taken 312 times.
3120 mc_rep_funcs(qpel_h,10, 8, 32, sse4)
496
2/2
✓ Branch 1 taken 132 times.
✓ Branch 2 taken 44 times.
352 mc_rep_funcs(qpel_h,10, 8, 24, sse4)
497
2/2
✓ Branch 1 taken 424 times.
✓ Branch 2 taken 212 times.
1272 mc_rep_funcs(qpel_h,10, 8, 16, sse4)
498
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_h,10, 4, 12, sse4)
499
2/2
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
702 mc_rep_funcs(qpel_h,12, 8, 64, sse4)
500
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
546 mc_rep_funcs(qpel_h,12, 8, 48, sse4)
501
2/2
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
390 mc_rep_funcs(qpel_h,12, 8, 32, sse4)
502
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_h,12, 8, 24, sse4)
503
2/2
✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
234 mc_rep_funcs(qpel_h,12, 8, 16, sse4)
504
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_h,12, 4, 12, sse4)
505
2/2
✓ Branch 1 taken 176 times.
✓ Branch 2 taken 44 times.
440 mc_rep_funcs(qpel_v, 8, 16, 64, sse4)
506
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(qpel_v, 8, 16, 48, sse4)
507
2/2
✓ Branch 1 taken 92 times.
✓ Branch 2 taken 46 times.
276 mc_rep_funcs(qpel_v, 8, 16, 32, sse4)
508
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_v, 8, 8, 24, sse4)
509
2/2
✓ Branch 1 taken 464 times.
✓ Branch 2 taken 58 times.
1044 mc_rep_funcs(qpel_v,10, 8, 64, sse4)
510
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
588 mc_rep_funcs(qpel_v,10, 8, 48, sse4)
511
2/2
✓ Branch 1 taken 304 times.
✓ Branch 2 taken 76 times.
760 mc_rep_funcs(qpel_v,10, 8, 32, sse4)
512
2/2
✓ Branch 1 taken 138 times.
✓ Branch 2 taken 46 times.
368 mc_rep_funcs(qpel_v,10, 8, 24, sse4)
513
2/2
✓ Branch 1 taken 96 times.
✓ Branch 2 taken 48 times.
288 mc_rep_funcs(qpel_v,10, 8, 16, sse4)
514
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_v,10, 4, 12, sse4)
515
2/2
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
702 mc_rep_funcs(qpel_v,12, 8, 64, sse4)
516
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
546 mc_rep_funcs(qpel_v,12, 8, 48, sse4)
517
2/2
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
390 mc_rep_funcs(qpel_v,12, 8, 32, sse4)
518
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_v,12, 8, 24, sse4)
519
2/2
✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
234 mc_rep_funcs(qpel_v,12, 8, 16, sse4)
520
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_v,12, 4, 12, sse4)
521
2/2
✓ Branch 1 taken 928 times.
✓ Branch 2 taken 116 times.
2088 mc_rep_funcs(qpel_hv, 8, 8, 64, sse4)
522
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
546 mc_rep_funcs(qpel_hv, 8, 8, 48, sse4)
523
2/2
✓ Branch 1 taken 292 times.
✓ Branch 2 taken 73 times.
730 mc_rep_funcs(qpel_hv, 8, 8, 32, sse4)
524
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_hv, 8, 8, 24, sse4)
525
2/2
✓ Branch 1 taken 202 times.
✓ Branch 2 taken 101 times.
606 mc_rep_funcs(qpel_hv, 8, 8, 16, sse4)
526 78 mc_rep_funcs2(qpel_hv,8, 8, 4, 12, sse4)
527
2/2
✓ Branch 1 taken 480 times.
✓ Branch 2 taken 60 times.
1080 mc_rep_funcs(qpel_hv,10, 8, 64, sse4)
528
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
588 mc_rep_funcs(qpel_hv,10, 8, 48, sse4)
529
2/2
✓ Branch 1 taken 272 times.
✓ Branch 2 taken 68 times.
680 mc_rep_funcs(qpel_hv,10, 8, 32, sse4)
530
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(qpel_hv,10, 8, 24, sse4)
531
2/2
✓ Branch 1 taken 100 times.
✓ Branch 2 taken 50 times.
300 mc_rep_funcs(qpel_hv,10, 8, 16, sse4)
532
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_hv,10, 4, 12, sse4)
533
2/2
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
702 mc_rep_funcs(qpel_hv,12, 8, 64, sse4)
534
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
546 mc_rep_funcs(qpel_hv,12, 8, 48, sse4)
535
2/2
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
390 mc_rep_funcs(qpel_hv,12, 8, 32, sse4)
536
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_hv,12, 8, 24, sse4)
537
2/2
✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
234 mc_rep_funcs(qpel_hv,12, 8, 16, sse4)
538
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_hv,12, 4, 12, sse4)
539
540 #define mc_rep_uni_w(bitd, step, W, opt) \
541 void ff_hevc_put_uni_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const int16_t *_src, \
542 int height, int denom, int _wx, int _ox) \
543 { \
544 int i; \
545 uint8_t *dst; \
546 for (i = 0; i < W; i += step) { \
547 const int16_t *src = _src + i; \
548 dst= _dst + (i * ((bitd + 7) / 8)); \
549 ff_hevc_put_uni_w##step##_##bitd##_##opt(dst, dststride, src, \
550 height, denom, _wx, _ox); \
551 } \
552 }
553
554
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
378 mc_rep_uni_w(8, 6, 12, sse4)
555
2/2
✓ Branch 1 taken 21144 times.
✓ Branch 2 taken 10572 times.
31716 mc_rep_uni_w(8, 8, 16, sse4)
556
2/2
✓ Branch 1 taken 378 times.
✓ Branch 2 taken 126 times.
504 mc_rep_uni_w(8, 8, 24, sse4)
557
2/2
✓ Branch 1 taken 54668 times.
✓ Branch 2 taken 13667 times.
68335 mc_rep_uni_w(8, 8, 32, sse4)
558
2/2
✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
882 mc_rep_uni_w(8, 8, 48, sse4)
559
2/2
✓ Branch 1 taken 43016 times.
✓ Branch 2 taken 5377 times.
48393 mc_rep_uni_w(8, 8, 64, sse4)
560
561
2/2
✓ Branch 1 taken 284 times.
✓ Branch 2 taken 142 times.
426 mc_rep_uni_w(10, 6, 12, sse4)
562
2/2
✓ Branch 1 taken 3220 times.
✓ Branch 2 taken 1610 times.
4830 mc_rep_uni_w(10, 8, 16, sse4)
563
2/2
✓ Branch 1 taken 402 times.
✓ Branch 2 taken 134 times.
536 mc_rep_uni_w(10, 8, 24, sse4)
564
2/2
✓ Branch 1 taken 4728 times.
✓ Branch 2 taken 1182 times.
5910 mc_rep_uni_w(10, 8, 32, sse4)
565
2/2
✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
882 mc_rep_uni_w(10, 8, 48, sse4)
566
2/2
✓ Branch 1 taken 2832 times.
✓ Branch 2 taken 354 times.
3186 mc_rep_uni_w(10, 8, 64, sse4)
567
568
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
378 mc_rep_uni_w(12, 6, 12, sse4)
569
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
378 mc_rep_uni_w(12, 8, 16, sse4)
570
2/2
✓ Branch 1 taken 378 times.
✓ Branch 2 taken 126 times.
504 mc_rep_uni_w(12, 8, 24, sse4)
571
2/2
✓ Branch 1 taken 504 times.
✓ Branch 2 taken 126 times.
630 mc_rep_uni_w(12, 8, 32, sse4)
572
2/2
✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
882 mc_rep_uni_w(12, 8, 48, sse4)
573
2/2
✓ Branch 1 taken 1008 times.
✓ Branch 2 taken 126 times.
1134 mc_rep_uni_w(12, 8, 64, sse4)
574
575 #define mc_rep_bi_w(bitd, step, W, opt) \
576 void ff_hevc_put_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const int16_t *_src, \
577 const int16_t *_src2, int height, \
578 int denom, int _wx0, int _wx1, int _ox0, int _ox1) \
579 { \
580 int i; \
581 uint8_t *dst; \
582 for (i = 0; i < W; i += step) { \
583 const int16_t *src = _src + i; \
584 const int16_t *src2 = _src2 + i; \
585 dst = _dst + (i * ((bitd + 7) / 8)); \
586 ff_hevc_put_bi_w##step##_##bitd##_##opt(dst, dststride, src, src2, \
587 height, denom, _wx0, _wx1, _ox0, _ox1); \
588 } \
589 }
590
591
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
378 mc_rep_bi_w(8, 6, 12, sse4)
592
2/2
✓ Branch 1 taken 4896 times.
✓ Branch 2 taken 2448 times.
7344 mc_rep_bi_w(8, 8, 16, sse4)
593
2/2
✓ Branch 1 taken 378 times.
✓ Branch 2 taken 126 times.
504 mc_rep_bi_w(8, 8, 24, sse4)
594
2/2
✓ Branch 1 taken 23552 times.
✓ Branch 2 taken 5888 times.
29440 mc_rep_bi_w(8, 8, 32, sse4)
595
2/2
✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
882 mc_rep_bi_w(8, 8, 48, sse4)
596
2/2
✓ Branch 1 taken 21144 times.
✓ Branch 2 taken 2643 times.
23787 mc_rep_bi_w(8, 8, 64, sse4)
597
598
2/2
✓ Branch 1 taken 268 times.
✓ Branch 2 taken 134 times.
402 mc_rep_bi_w(10, 6, 12, sse4)
599
2/2
✓ Branch 1 taken 2916 times.
✓ Branch 2 taken 1458 times.
4374 mc_rep_bi_w(10, 8, 16, sse4)
600
2/2
✓ Branch 1 taken 390 times.
✓ Branch 2 taken 130 times.
520 mc_rep_bi_w(10, 8, 24, sse4)
601
2/2
✓ Branch 1 taken 4760 times.
✓ Branch 2 taken 1190 times.
5950 mc_rep_bi_w(10, 8, 32, sse4)
602
2/2
✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
882 mc_rep_bi_w(10, 8, 48, sse4)
603
2/2
✓ Branch 1 taken 2928 times.
✓ Branch 2 taken 366 times.
3294 mc_rep_bi_w(10, 8, 64, sse4)
604
605
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
378 mc_rep_bi_w(12, 6, 12, sse4)
606
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
378 mc_rep_bi_w(12, 8, 16, sse4)
607
2/2
✓ Branch 1 taken 378 times.
✓ Branch 2 taken 126 times.
504 mc_rep_bi_w(12, 8, 24, sse4)
608
2/2
✓ Branch 1 taken 504 times.
✓ Branch 2 taken 126 times.
630 mc_rep_bi_w(12, 8, 32, sse4)
609
2/2
✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
882 mc_rep_bi_w(12, 8, 48, sse4)
610
2/2
✓ Branch 1 taken 1008 times.
✓ Branch 2 taken 126 times.
1134 mc_rep_bi_w(12, 8, 64, sse4)
611
612 #define mc_uni_w_func(name, bitd, W, opt) \
613 static void hevc_put_uni_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \
614 const uint8_t *_src, ptrdiff_t _srcstride, \
615 int height, int denom, \
616 int _wx, int _ox, \
617 intptr_t mx, intptr_t my, int width) \
618 { \
619 LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \
620 hevc_put_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \
621 ff_hevc_put_uni_w##W##_##bitd##_##opt(_dst, _dststride, temp, height, denom, _wx, _ox); \
622 }
623
624 #define mc_uni_w_funcs(name, bitd, opt) \
625 mc_uni_w_func(name, bitd, 4, opt) \
626 mc_uni_w_func(name, bitd, 8, opt) \
627 mc_uni_w_func(name, bitd, 12, opt) \
628 mc_uni_w_func(name, bitd, 16, opt) \
629 mc_uni_w_func(name, bitd, 24, opt) \
630 mc_uni_w_func(name, bitd, 32, opt) \
631 mc_uni_w_func(name, bitd, 48, opt) \
632 mc_uni_w_func(name, bitd, 64, opt)
633
634 81664 mc_uni_w_funcs(pel_pixels, 8, sse4)
635 18 mc_uni_w_func(pel_pixels, 8, 6, sse4)
636 7784 mc_uni_w_funcs(epel_h, 8, sse4)
637 18 mc_uni_w_func(epel_h, 8, 6, sse4)
638 7424 mc_uni_w_funcs(epel_v, 8, sse4)
639 18 mc_uni_w_func(epel_v, 8, 6, sse4)
640 7664 mc_uni_w_funcs(epel_hv, 8, sse4)
641 18 mc_uni_w_func(epel_hv, 8, 6, sse4)
642 298 mc_uni_w_funcs(qpel_h, 8, sse4)
643 308 mc_uni_w_funcs(qpel_v, 8, sse4)
644 882 mc_uni_w_funcs(qpel_hv, 8, sse4)
645
646 4980 mc_uni_w_funcs(pel_pixels, 10, sse4)
647 18 mc_uni_w_func(pel_pixels, 10, 6, sse4)
648 1352 mc_uni_w_funcs(epel_h, 10, sse4)
649 18 mc_uni_w_func(epel_h, 10, 6, sse4)
650 360 mc_uni_w_funcs(epel_v, 10, sse4)
651 18 mc_uni_w_func(epel_v, 10, 6, sse4)
652 536 mc_uni_w_funcs(epel_hv, 10, sse4)
653 18 mc_uni_w_func(epel_hv, 10, 6, sse4)
654 844 mc_uni_w_funcs(qpel_h, 10, sse4)
655 332 mc_uni_w_funcs(qpel_v, 10, sse4)
656 380 mc_uni_w_funcs(qpel_hv, 10, sse4)
657
658 288 mc_uni_w_funcs(pel_pixels, 12, sse4)
659 18 mc_uni_w_func(pel_pixels, 12, 6, sse4)
660 288 mc_uni_w_funcs(epel_h, 12, sse4)
661 18 mc_uni_w_func(epel_h, 12, 6, sse4)
662 288 mc_uni_w_funcs(epel_v, 12, sse4)
663 18 mc_uni_w_func(epel_v, 12, 6, sse4)
664 288 mc_uni_w_funcs(epel_hv, 12, sse4)
665 18 mc_uni_w_func(epel_hv, 12, 6, sse4)
666 288 mc_uni_w_funcs(qpel_h, 12, sse4)
667 288 mc_uni_w_funcs(qpel_v, 12, sse4)
668 288 mc_uni_w_funcs(qpel_hv, 12, sse4)
669
670 #define mc_bi_w_func(name, bitd, W, opt) \
671 static void hevc_put_bi_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \
672 const uint8_t *_src, ptrdiff_t _srcstride, \
673 const int16_t *_src2, \
674 int height, int denom, \
675 int _wx0, int _wx1, int _ox0, int _ox1, \
676 intptr_t mx, intptr_t my, int width) \
677 { \
678 LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \
679 hevc_put_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \
680 ff_hevc_put_bi_w##W##_##bitd##_##opt(_dst, _dststride, temp, _src2, \
681 height, denom, _wx0, _wx1, _ox0, _ox1); \
682 }
683
684 #define mc_bi_w_funcs(name, bitd, opt) \
685 mc_bi_w_func(name, bitd, 4, opt) \
686 mc_bi_w_func(name, bitd, 8, opt) \
687 mc_bi_w_func(name, bitd, 12, opt) \
688 mc_bi_w_func(name, bitd, 16, opt) \
689 mc_bi_w_func(name, bitd, 24, opt) \
690 mc_bi_w_func(name, bitd, 32, opt) \
691 mc_bi_w_func(name, bitd, 48, opt) \
692 mc_bi_w_func(name, bitd, 64, opt)
693
694 23926 mc_bi_w_funcs(pel_pixels, 8, sse4)
695 18 mc_bi_w_func(pel_pixels, 8, 6, sse4)
696 1240 mc_bi_w_funcs(epel_h, 8, sse4)
697 18 mc_bi_w_func(epel_h, 8, 6, sse4)
698 4860 mc_bi_w_funcs(epel_v, 8, sse4)
699 18 mc_bi_w_func(epel_v, 8, 6, sse4)
700 2416 mc_bi_w_funcs(epel_hv, 8, sse4)
701 18 mc_bi_w_func(epel_hv, 8, 6, sse4)
702 288 mc_bi_w_funcs(qpel_h, 8, sse4)
703 288 mc_bi_w_funcs(qpel_v, 8, sse4)
704 288 mc_bi_w_funcs(qpel_hv, 8, sse4)
705
706 4336 mc_bi_w_funcs(pel_pixels, 10, sse4)
707 18 mc_bi_w_func(pel_pixels, 10, 6, sse4)
708 1392 mc_bi_w_funcs(epel_h, 10, sse4)
709 18 mc_bi_w_func(epel_h, 10, 6, sse4)
710 448 mc_bi_w_funcs(epel_v, 10, sse4)
711 18 mc_bi_w_func(epel_v, 10, 6, sse4)
712 312 mc_bi_w_funcs(epel_hv, 10, sse4)
713 18 mc_bi_w_func(epel_hv, 10, 6, sse4)
714 836 mc_bi_w_funcs(qpel_h, 10, sse4)
715 368 mc_bi_w_funcs(qpel_v, 10, sse4)
716 300 mc_bi_w_funcs(qpel_hv, 10, sse4)
717
718 288 mc_bi_w_funcs(pel_pixels, 12, sse4)
719 18 mc_bi_w_func(pel_pixels, 12, 6, sse4)
720 288 mc_bi_w_funcs(epel_h, 12, sse4)
721 18 mc_bi_w_func(epel_h, 12, 6, sse4)
722 288 mc_bi_w_funcs(epel_v, 12, sse4)
723 18 mc_bi_w_func(epel_v, 12, 6, sse4)
724 288 mc_bi_w_funcs(epel_hv, 12, sse4)
725 18 mc_bi_w_func(epel_hv, 12, 6, sse4)
726 288 mc_bi_w_funcs(qpel_h, 12, sse4)
727 288 mc_bi_w_funcs(qpel_v, 12, sse4)
728 288 mc_bi_w_funcs(qpel_hv, 12, sse4)
729 #endif //ARCH_X86_64 && HAVE_SSE4_EXTERNAL
730
731 #define SAO_BAND_FILTER_FUNCS(bitd, opt) \
732 void ff_hevc_sao_band_filter_8_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
733 const int16_t *sao_offset_val, int sao_left_class, int width, int height); \
734 void ff_hevc_sao_band_filter_16_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
735 const int16_t *sao_offset_val, int sao_left_class, int width, int height); \
736 void ff_hevc_sao_band_filter_32_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
737 const int16_t *sao_offset_val, int sao_left_class, int width, int height); \
738 void ff_hevc_sao_band_filter_48_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
739 const int16_t *sao_offset_val, int sao_left_class, int width, int height); \
740 void ff_hevc_sao_band_filter_64_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
741 const int16_t *sao_offset_val, int sao_left_class, int width, int height);
742
743 SAO_BAND_FILTER_FUNCS(8, sse2)
744 SAO_BAND_FILTER_FUNCS(10, sse2)
745 SAO_BAND_FILTER_FUNCS(12, sse2)
746 SAO_BAND_FILTER_FUNCS(8, avx)
747 SAO_BAND_FILTER_FUNCS(10, avx)
748 SAO_BAND_FILTER_FUNCS(12, avx)
749 SAO_BAND_FILTER_FUNCS(8, avx2)
750 SAO_BAND_FILTER_FUNCS(10, avx2)
751 SAO_BAND_FILTER_FUNCS(12, avx2)
752
753 #define SAO_BAND_INIT(bitd, opt) do { \
754 c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_##bitd##_##opt; \
755 c->sao_band_filter[1] = ff_hevc_sao_band_filter_16_##bitd##_##opt; \
756 c->sao_band_filter[2] = ff_hevc_sao_band_filter_32_##bitd##_##opt; \
757 c->sao_band_filter[3] = ff_hevc_sao_band_filter_48_##bitd##_##opt; \
758 c->sao_band_filter[4] = ff_hevc_sao_band_filter_64_##bitd##_##opt; \
759 } while (0)
760
761 #define SAO_EDGE_FILTER_FUNCS(bitd, opt) \
762 void ff_hevc_sao_edge_filter_8_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
763 const int16_t *sao_offset_val, int eo, int width, int height); \
764 void ff_hevc_sao_edge_filter_16_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
765 const int16_t *sao_offset_val, int eo, int width, int height); \
766 void ff_hevc_sao_edge_filter_32_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
767 const int16_t *sao_offset_val, int eo, int width, int height); \
768 void ff_hevc_sao_edge_filter_48_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
769 const int16_t *sao_offset_val, int eo, int width, int height); \
770 void ff_hevc_sao_edge_filter_64_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
771 const int16_t *sao_offset_val, int eo, int width, int height); \
772
773 SAO_EDGE_FILTER_FUNCS(8, ssse3)
774 SAO_EDGE_FILTER_FUNCS(8, avx2)
775 SAO_EDGE_FILTER_FUNCS(10, sse2)
776 SAO_EDGE_FILTER_FUNCS(10, avx2)
777 SAO_EDGE_FILTER_FUNCS(12, sse2)
778 SAO_EDGE_FILTER_FUNCS(12, avx2)
779
780 #define SAO_EDGE_INIT(bitd, opt) do { \
781 c->sao_edge_filter[0] = ff_hevc_sao_edge_filter_8_##bitd##_##opt; \
782 c->sao_edge_filter[1] = ff_hevc_sao_edge_filter_16_##bitd##_##opt; \
783 c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_##bitd##_##opt; \
784 c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_##bitd##_##opt; \
785 c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_##bitd##_##opt; \
786 } while (0)
787
788 #define PEL_LINK(dst, idx1, idx2, idx3, name, D, opt) \
789 dst [idx1][idx2][idx3] = hevc_put_ ## name ## _ ## D ## _##opt; \
790 dst ## _bi [idx1][idx2][idx3] = ff_hevc_put_bi_ ## name ## _ ## D ## _##opt; \
791 dst ## _uni [idx1][idx2][idx3] = hevc_put_uni_ ## name ## _ ## D ## _##opt; \
792 dst ## _uni_w[idx1][idx2][idx3] = hevc_put_uni_w_ ## name ## _ ## D ## _##opt; \
793 dst ## _bi_w [idx1][idx2][idx3] = hevc_put_bi_w_ ## name ## _ ## D ## _##opt
794
795 #define EPEL_LINKS(pointer, my, mx, fname, bitd, opt ) \
796 PEL_LINK(pointer, 1, my , mx , fname##4 , bitd, opt ); \
797 PEL_LINK(pointer, 2, my , mx , fname##6 , bitd, opt ); \
798 PEL_LINK(pointer, 3, my , mx , fname##8 , bitd, opt ); \
799 PEL_LINK(pointer, 4, my , mx , fname##12, bitd, opt ); \
800 PEL_LINK(pointer, 5, my , mx , fname##16, bitd, opt ); \
801 PEL_LINK(pointer, 6, my , mx , fname##24, bitd, opt ); \
802 PEL_LINK(pointer, 7, my , mx , fname##32, bitd, opt ); \
803 PEL_LINK(pointer, 8, my , mx , fname##48, bitd, opt ); \
804 PEL_LINK(pointer, 9, my , mx , fname##64, bitd, opt )
805 #define QPEL_LINKS(pointer, my, mx, fname, bitd, opt) \
806 PEL_LINK(pointer, 1, my , mx , fname##4 , bitd, opt ); \
807 PEL_LINK(pointer, 3, my , mx , fname##8 , bitd, opt ); \
808 PEL_LINK(pointer, 4, my , mx , fname##12, bitd, opt ); \
809 PEL_LINK(pointer, 5, my , mx , fname##16, bitd, opt ); \
810 PEL_LINK(pointer, 6, my , mx , fname##24, bitd, opt ); \
811 PEL_LINK(pointer, 7, my , mx , fname##32, bitd, opt ); \
812 PEL_LINK(pointer, 8, my , mx , fname##48, bitd, opt ); \
813 PEL_LINK(pointer, 9, my , mx , fname##64, bitd, opt )
814
815 1633 void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
816 {
817 1633 int cpu_flags = av_get_cpu_flags();
818
819
2/2
✓ Branch 0 taken 677 times.
✓ Branch 1 taken 956 times.
1633 if (bit_depth == 8) {
820
2/2
✓ Branch 0 taken 258 times.
✓ Branch 1 taken 419 times.
677 if (EXTERNAL_MMXEXT(cpu_flags)) {
821 258 c->add_residual[0] = ff_hevc_add_residual_4_8_mmxext;
822 }
823
2/2
✓ Branch 0 taken 216 times.
✓ Branch 1 taken 461 times.
677 if (EXTERNAL_SSE2(cpu_flags)) {
824 216 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
825 216 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;
826 #if ARCH_X86_64
827 216 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2;
828 216 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2;
829
830 216 c->idct[2] = ff_hevc_idct_16x16_8_sse2;
831 216 c->idct[3] = ff_hevc_idct_32x32_8_sse2;
832 #endif
833 216 SAO_BAND_INIT(8, sse2);
834
835 216 c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_sse2;
836 216 c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_sse2;
837 216 c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_sse2;
838 216 c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_sse2;
839
840 216 c->idct[0] = ff_hevc_idct_4x4_8_sse2;
841 216 c->idct[1] = ff_hevc_idct_8x8_8_sse2;
842
843 216 c->add_residual[1] = ff_hevc_add_residual_8_8_sse2;
844 216 c->add_residual[2] = ff_hevc_add_residual_16_8_sse2;
845 216 c->add_residual[3] = ff_hevc_add_residual_32_8_sse2;
846 }
847
2/2
✓ Branch 0 taken 174 times.
✓ Branch 1 taken 503 times.
677 if (EXTERNAL_SSSE3(cpu_flags)) {
848 #if ARCH_X86_64
849 174 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
850 174 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3;
851 #endif
852 174 c->dequant = ff_hevc_dequant_8_ssse3;
853 174 SAO_EDGE_INIT(8, ssse3);
854 }
855 #if HAVE_SSE4_EXTERNAL && ARCH_X86_64
856
2/2
✓ Branch 0 taken 153 times.
✓ Branch 1 taken 524 times.
677 if (EXTERNAL_SSE4(cpu_flags)) {
857
858 153 EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 8, sse4);
859 153 EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 8, sse4);
860 153 EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 8, sse4);
861 153 EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 8, sse4);
862
863 153 QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 8, sse4);
864 153 QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 8, sse4);
865 153 QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 8, sse4);
866 153 QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 8, sse4);
867 }
868 #endif
869
2/2
✓ Branch 0 taken 69 times.
✓ Branch 1 taken 608 times.
677 if (EXTERNAL_AVX(cpu_flags)) {
870 69 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_avx;
871 69 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_avx;
872 #if ARCH_X86_64
873 69 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx;
874 69 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx;
875
876 69 c->idct[2] = ff_hevc_idct_16x16_8_avx;
877 69 c->idct[3] = ff_hevc_idct_32x32_8_avx;
878 #endif
879 69 SAO_BAND_INIT(8, avx);
880
881 69 c->idct[0] = ff_hevc_idct_4x4_8_avx;
882 69 c->idct[1] = ff_hevc_idct_8x8_8_avx;
883 }
884
2/2
✓ Branch 0 taken 27 times.
✓ Branch 1 taken 650 times.
677 if (EXTERNAL_AVX2(cpu_flags)) {
885 27 c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_8_avx2;
886 27 c->sao_band_filter[1] = ff_hevc_sao_band_filter_16_8_avx2;
887 }
888 #if HAVE_AVX2_EXTERNAL
889
3/4
✓ Branch 0 taken 27 times.
✓ Branch 1 taken 650 times.
✓ Branch 2 taken 27 times.
✗ Branch 3 not taken.
677 if (EXTERNAL_AVX2_FAST(cpu_flags)) {
890 27 c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_avx2;
891 27 c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_avx2;
892
893 #if ARCH_X86_64
894 27 c->put_hevc_epel[7][0][0] = hevc_put_pel_pixels32_8_avx2;
895 27 c->put_hevc_epel[8][0][0] = hevc_put_pel_pixels48_8_avx2;
896 27 c->put_hevc_epel[9][0][0] = hevc_put_pel_pixels64_8_avx2;
897
898 27 c->put_hevc_qpel[7][0][0] = hevc_put_pel_pixels32_8_avx2;
899 27 c->put_hevc_qpel[8][0][0] = hevc_put_pel_pixels48_8_avx2;
900 27 c->put_hevc_qpel[9][0][0] = hevc_put_pel_pixels64_8_avx2;
901
902 27 c->put_hevc_epel_uni[7][0][0] = hevc_put_uni_pel_pixels32_8_avx2;
903 27 c->put_hevc_epel_uni[8][0][0] = hevc_put_uni_pel_pixels48_8_avx2;
904 27 c->put_hevc_epel_uni[9][0][0] = hevc_put_uni_pel_pixels64_8_avx2;
905
906 27 c->put_hevc_qpel_uni[7][0][0] = hevc_put_uni_pel_pixels32_8_avx2;
907 27 c->put_hevc_qpel_uni[8][0][0] = hevc_put_uni_pel_pixels48_8_avx2;
908 27 c->put_hevc_qpel_uni[9][0][0] = hevc_put_uni_pel_pixels64_8_avx2;
909
910 27 c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_bi_pel_pixels32_8_avx2;
911 27 c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_bi_pel_pixels48_8_avx2;
912 27 c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_bi_pel_pixels64_8_avx2;
913
914 27 c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_bi_pel_pixels32_8_avx2;
915 27 c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_bi_pel_pixels48_8_avx2;
916 27 c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_bi_pel_pixels64_8_avx2;
917
918 27 c->put_hevc_epel[7][0][1] = hevc_put_epel_h32_8_avx2;
919 27 c->put_hevc_epel[8][0][1] = hevc_put_epel_h48_8_avx2;
920 27 c->put_hevc_epel[9][0][1] = hevc_put_epel_h64_8_avx2;
921
922 27 c->put_hevc_epel_uni[7][0][1] = hevc_put_uni_epel_h32_8_avx2;
923 27 c->put_hevc_epel_uni[8][0][1] = hevc_put_uni_epel_h48_8_avx2;
924 27 c->put_hevc_epel_uni[9][0][1] = hevc_put_uni_epel_h64_8_avx2;
925
926 27 c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_bi_epel_h32_8_avx2;
927 27 c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_bi_epel_h48_8_avx2;
928 27 c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_bi_epel_h64_8_avx2;
929
930 27 c->put_hevc_epel[7][1][0] = hevc_put_epel_v32_8_avx2;
931 27 c->put_hevc_epel[8][1][0] = hevc_put_epel_v48_8_avx2;
932 27 c->put_hevc_epel[9][1][0] = hevc_put_epel_v64_8_avx2;
933
934 27 c->put_hevc_epel_uni[7][1][0] = hevc_put_uni_epel_v32_8_avx2;
935 27 c->put_hevc_epel_uni[8][1][0] = hevc_put_uni_epel_v48_8_avx2;
936 27 c->put_hevc_epel_uni[9][1][0] = hevc_put_uni_epel_v64_8_avx2;
937
938 27 c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_bi_epel_v32_8_avx2;
939 27 c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_bi_epel_v48_8_avx2;
940 27 c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_bi_epel_v64_8_avx2;
941
942 27 c->put_hevc_epel[7][1][1] = hevc_put_epel_hv32_8_avx2;
943 27 c->put_hevc_epel[8][1][1] = hevc_put_epel_hv48_8_avx2;
944 27 c->put_hevc_epel[9][1][1] = hevc_put_epel_hv64_8_avx2;
945
946 27 c->put_hevc_epel_uni[7][1][1] = hevc_put_uni_epel_hv32_8_avx2;
947 27 c->put_hevc_epel_uni[8][1][1] = hevc_put_uni_epel_hv48_8_avx2;
948 27 c->put_hevc_epel_uni[9][1][1] = hevc_put_uni_epel_hv64_8_avx2;
949
950 27 c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_bi_epel_hv32_8_avx2;
951 27 c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_bi_epel_hv48_8_avx2;
952 27 c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_bi_epel_hv64_8_avx2;
953
954 27 c->put_hevc_qpel[7][0][1] = hevc_put_qpel_h32_8_avx2;
955 27 c->put_hevc_qpel[8][0][1] = hevc_put_qpel_h48_8_avx2;
956 27 c->put_hevc_qpel[9][0][1] = hevc_put_qpel_h64_8_avx2;
957
958 27 c->put_hevc_qpel[7][1][0] = hevc_put_qpel_v32_8_avx2;
959 27 c->put_hevc_qpel[8][1][0] = hevc_put_qpel_v48_8_avx2;
960 27 c->put_hevc_qpel[9][1][0] = hevc_put_qpel_v64_8_avx2;
961
962 27 c->put_hevc_qpel_uni[7][0][1] = hevc_put_uni_qpel_h32_8_avx2;
963 27 c->put_hevc_qpel_uni[8][0][1] = hevc_put_uni_qpel_h48_8_avx2;
964 27 c->put_hevc_qpel_uni[9][0][1] = hevc_put_uni_qpel_h64_8_avx2;
965
966 27 c->put_hevc_qpel_uni[7][1][0] = hevc_put_uni_qpel_v32_8_avx2;
967 27 c->put_hevc_qpel_uni[8][1][0] = hevc_put_uni_qpel_v48_8_avx2;
968 27 c->put_hevc_qpel_uni[9][1][0] = hevc_put_uni_qpel_v64_8_avx2;
969
970 27 c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_bi_qpel_h32_8_avx2;
971 27 c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_bi_qpel_h48_8_avx2;
972 27 c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_bi_qpel_h64_8_avx2;
973
974 27 c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_bi_qpel_v32_8_avx2;
975 27 c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_bi_qpel_v48_8_avx2;
976 27 c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_bi_qpel_v64_8_avx2;
977 #endif /* ARCH_X86_64 */
978
979 27 SAO_BAND_INIT(8, avx2);
980
981 27 c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_8_avx2;
982 27 c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_8_avx2;
983 27 c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_8_avx2;
984
985 27 c->add_residual[3] = ff_hevc_add_residual_32_8_avx2;
986 }
987 #endif /* HAVE_AVX2_EXTERNAL */
988 #if ARCH_X86_64
989
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 677 times.
677 if (EXTERNAL_AVX512ICL(cpu_flags)) {
990 c->put_hevc_qpel[1][0][1] = ff_hevc_put_qpel_h4_8_avx512icl;
991 c->put_hevc_qpel[3][0][1] = ff_hevc_put_qpel_h8_8_avx512icl;
992 c->put_hevc_qpel[5][0][1] = ff_hevc_put_qpel_h16_8_avx512icl;
993 c->put_hevc_qpel[7][0][1] = ff_hevc_put_qpel_h32_8_avx512icl;
994 c->put_hevc_qpel[9][0][1] = ff_hevc_put_qpel_h64_8_avx512icl;
995 c->put_hevc_qpel[3][1][1] = ff_hevc_put_qpel_hv8_8_avx512icl;
996 }
997 #endif
998
2/2
✓ Branch 0 taken 344 times.
✓ Branch 1 taken 612 times.
956 } else if (bit_depth == 10) {
999
2/2
✓ Branch 0 taken 264 times.
✓ Branch 1 taken 80 times.
344 if (EXTERNAL_MMXEXT(cpu_flags)) {
1000 264 c->add_residual[0] = ff_hevc_add_residual_4_10_mmxext;
1001 }
1002
2/2
✓ Branch 0 taken 222 times.
✓ Branch 1 taken 122 times.
344 if (EXTERNAL_SSE2(cpu_flags)) {
1003 222 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2;
1004 222 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2;
1005 #if ARCH_X86_64
1006 222 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_sse2;
1007 222 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2;
1008
1009 222 c->idct[2] = ff_hevc_idct_16x16_10_sse2;
1010 222 c->idct[3] = ff_hevc_idct_32x32_10_sse2;
1011 #endif
1012 222 SAO_BAND_INIT(10, sse2);
1013 222 SAO_EDGE_INIT(10, sse2);
1014
1015 222 c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_sse2;
1016 222 c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_sse2;
1017 222 c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_sse2;
1018 222 c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_sse2;
1019
1020 222 c->idct[0] = ff_hevc_idct_4x4_10_sse2;
1021 222 c->idct[1] = ff_hevc_idct_8x8_10_sse2;
1022
1023 222 c->add_residual[1] = ff_hevc_add_residual_8_10_sse2;
1024 222 c->add_residual[2] = ff_hevc_add_residual_16_10_sse2;
1025 222 c->add_residual[3] = ff_hevc_add_residual_32_10_sse2;
1026 }
1027 #if ARCH_X86_64
1028
2/2
✓ Branch 0 taken 180 times.
✓ Branch 1 taken 164 times.
344 if (EXTERNAL_SSSE3(cpu_flags)) {
1029 180 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3;
1030 180 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3;
1031 }
1032 #endif
1033 #if HAVE_SSE4_EXTERNAL && ARCH_X86_64
1034
2/2
✓ Branch 0 taken 159 times.
✓ Branch 1 taken 185 times.
344 if (EXTERNAL_SSE4(cpu_flags)) {
1035 159 EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4);
1036 159 EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 10, sse4);
1037 159 EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 10, sse4);
1038 159 EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 10, sse4);
1039
1040 159 QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 10, sse4);
1041 159 QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 10, sse4);
1042 159 QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 10, sse4);
1043 159 QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 10, sse4);
1044 }
1045 #endif
1046
2/2
✓ Branch 0 taken 75 times.
✓ Branch 1 taken 269 times.
344 if (EXTERNAL_AVX(cpu_flags)) {
1047 75 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_avx;
1048 75 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_avx;
1049 #if ARCH_X86_64
1050 75 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_avx;
1051 75 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_avx;
1052
1053 75 c->idct[2] = ff_hevc_idct_16x16_10_avx;
1054 75 c->idct[3] = ff_hevc_idct_32x32_10_avx;
1055 #endif
1056
1057 75 c->idct[0] = ff_hevc_idct_4x4_10_avx;
1058 75 c->idct[1] = ff_hevc_idct_8x8_10_avx;
1059
1060 75 SAO_BAND_INIT(10, avx);
1061 }
1062
2/2
✓ Branch 0 taken 33 times.
✓ Branch 1 taken 311 times.
344 if (EXTERNAL_AVX2(cpu_flags)) {
1063 33 c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_10_avx2;
1064 }
1065 #if HAVE_AVX2_EXTERNAL
1066
3/4
✓ Branch 0 taken 33 times.
✓ Branch 1 taken 311 times.
✓ Branch 2 taken 33 times.
✗ Branch 3 not taken.
344 if (EXTERNAL_AVX2_FAST(cpu_flags)) {
1067 33 c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_avx2;
1068 33 c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_avx2;
1069
1070 #if ARCH_X86_64
1071 33 c->put_hevc_epel[5][0][0] = hevc_put_pel_pixels16_10_avx2;
1072 33 c->put_hevc_epel[6][0][0] = hevc_put_pel_pixels24_10_avx2;
1073 33 c->put_hevc_epel[7][0][0] = hevc_put_pel_pixels32_10_avx2;
1074 33 c->put_hevc_epel[8][0][0] = hevc_put_pel_pixels48_10_avx2;
1075 33 c->put_hevc_epel[9][0][0] = hevc_put_pel_pixels64_10_avx2;
1076
1077 33 c->put_hevc_qpel[5][0][0] = hevc_put_pel_pixels16_10_avx2;
1078 33 c->put_hevc_qpel[6][0][0] = hevc_put_pel_pixels24_10_avx2;
1079 33 c->put_hevc_qpel[7][0][0] = hevc_put_pel_pixels32_10_avx2;
1080 33 c->put_hevc_qpel[8][0][0] = hevc_put_pel_pixels48_10_avx2;
1081 33 c->put_hevc_qpel[9][0][0] = hevc_put_pel_pixels64_10_avx2;
1082
1083 33 c->put_hevc_epel_uni[5][0][0] = hevc_put_uni_pel_pixels32_8_avx2;
1084 33 c->put_hevc_epel_uni[6][0][0] = hevc_put_uni_pel_pixels48_8_avx2;
1085 33 c->put_hevc_epel_uni[7][0][0] = hevc_put_uni_pel_pixels64_8_avx2;
1086 33 c->put_hevc_epel_uni[8][0][0] = hevc_put_uni_pel_pixels96_8_avx2;
1087 33 c->put_hevc_epel_uni[9][0][0] = hevc_put_uni_pel_pixels128_8_avx2;
1088
1089 33 c->put_hevc_qpel_uni[5][0][0] = hevc_put_uni_pel_pixels32_8_avx2;
1090 33 c->put_hevc_qpel_uni[6][0][0] = hevc_put_uni_pel_pixels48_8_avx2;
1091 33 c->put_hevc_qpel_uni[7][0][0] = hevc_put_uni_pel_pixels64_8_avx2;
1092 33 c->put_hevc_qpel_uni[8][0][0] = hevc_put_uni_pel_pixels96_8_avx2;
1093 33 c->put_hevc_qpel_uni[9][0][0] = hevc_put_uni_pel_pixels128_8_avx2;
1094
1095 33 c->put_hevc_epel_bi[5][0][0] = ff_hevc_put_bi_pel_pixels16_10_avx2;
1096 33 c->put_hevc_epel_bi[6][0][0] = ff_hevc_put_bi_pel_pixels24_10_avx2;
1097 33 c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_bi_pel_pixels32_10_avx2;
1098 33 c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_bi_pel_pixels48_10_avx2;
1099 33 c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_bi_pel_pixels64_10_avx2;
1100 33 c->put_hevc_qpel_bi[5][0][0] = ff_hevc_put_bi_pel_pixels16_10_avx2;
1101 33 c->put_hevc_qpel_bi[6][0][0] = ff_hevc_put_bi_pel_pixels24_10_avx2;
1102 33 c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_bi_pel_pixels32_10_avx2;
1103 33 c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_bi_pel_pixels48_10_avx2;
1104 33 c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_bi_pel_pixels64_10_avx2;
1105
1106 33 c->put_hevc_epel[5][0][1] = hevc_put_epel_h16_10_avx2;
1107 33 c->put_hevc_epel[6][0][1] = hevc_put_epel_h24_10_avx2;
1108 33 c->put_hevc_epel[7][0][1] = hevc_put_epel_h32_10_avx2;
1109 33 c->put_hevc_epel[8][0][1] = hevc_put_epel_h48_10_avx2;
1110 33 c->put_hevc_epel[9][0][1] = hevc_put_epel_h64_10_avx2;
1111
1112 33 c->put_hevc_epel_uni[5][0][1] = hevc_put_uni_epel_h16_10_avx2;
1113 33 c->put_hevc_epel_uni[6][0][1] = hevc_put_uni_epel_h24_10_avx2;
1114 33 c->put_hevc_epel_uni[7][0][1] = hevc_put_uni_epel_h32_10_avx2;
1115 33 c->put_hevc_epel_uni[8][0][1] = hevc_put_uni_epel_h48_10_avx2;
1116 33 c->put_hevc_epel_uni[9][0][1] = hevc_put_uni_epel_h64_10_avx2;
1117
1118 33 c->put_hevc_epel_bi[5][0][1] = ff_hevc_put_bi_epel_h16_10_avx2;
1119 33 c->put_hevc_epel_bi[6][0][1] = ff_hevc_put_bi_epel_h24_10_avx2;
1120 33 c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_bi_epel_h32_10_avx2;
1121 33 c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_bi_epel_h48_10_avx2;
1122 33 c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_bi_epel_h64_10_avx2;
1123
1124 33 c->put_hevc_epel[5][1][0] = hevc_put_epel_v16_10_avx2;
1125 33 c->put_hevc_epel[6][1][0] = hevc_put_epel_v24_10_avx2;
1126 33 c->put_hevc_epel[7][1][0] = hevc_put_epel_v32_10_avx2;
1127 33 c->put_hevc_epel[8][1][0] = hevc_put_epel_v48_10_avx2;
1128 33 c->put_hevc_epel[9][1][0] = hevc_put_epel_v64_10_avx2;
1129
1130 33 c->put_hevc_epel_uni[5][1][0] = hevc_put_uni_epel_v16_10_avx2;
1131 33 c->put_hevc_epel_uni[6][1][0] = hevc_put_uni_epel_v24_10_avx2;
1132 33 c->put_hevc_epel_uni[7][1][0] = hevc_put_uni_epel_v32_10_avx2;
1133 33 c->put_hevc_epel_uni[8][1][0] = hevc_put_uni_epel_v48_10_avx2;
1134 33 c->put_hevc_epel_uni[9][1][0] = hevc_put_uni_epel_v64_10_avx2;
1135
1136 33 c->put_hevc_epel_bi[5][1][0] = ff_hevc_put_bi_epel_v16_10_avx2;
1137 33 c->put_hevc_epel_bi[6][1][0] = ff_hevc_put_bi_epel_v24_10_avx2;
1138 33 c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_bi_epel_v32_10_avx2;
1139 33 c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_bi_epel_v48_10_avx2;
1140 33 c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_bi_epel_v64_10_avx2;
1141
1142 33 c->put_hevc_epel[5][1][1] = hevc_put_epel_hv16_10_avx2;
1143 33 c->put_hevc_epel[6][1][1] = hevc_put_epel_hv24_10_avx2;
1144 33 c->put_hevc_epel[7][1][1] = hevc_put_epel_hv32_10_avx2;
1145 33 c->put_hevc_epel[8][1][1] = hevc_put_epel_hv48_10_avx2;
1146 33 c->put_hevc_epel[9][1][1] = hevc_put_epel_hv64_10_avx2;
1147
1148 33 c->put_hevc_epel_uni[5][1][1] = hevc_put_uni_epel_hv16_10_avx2;
1149 33 c->put_hevc_epel_uni[6][1][1] = hevc_put_uni_epel_hv24_10_avx2;
1150 33 c->put_hevc_epel_uni[7][1][1] = hevc_put_uni_epel_hv32_10_avx2;
1151 33 c->put_hevc_epel_uni[8][1][1] = hevc_put_uni_epel_hv48_10_avx2;
1152 33 c->put_hevc_epel_uni[9][1][1] = hevc_put_uni_epel_hv64_10_avx2;
1153
1154 33 c->put_hevc_epel_bi[5][1][1] = ff_hevc_put_bi_epel_hv16_10_avx2;
1155 33 c->put_hevc_epel_bi[6][1][1] = ff_hevc_put_bi_epel_hv24_10_avx2;
1156 33 c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_bi_epel_hv32_10_avx2;
1157 33 c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_bi_epel_hv48_10_avx2;
1158 33 c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_bi_epel_hv64_10_avx2;
1159
1160 33 c->put_hevc_qpel[5][0][1] = hevc_put_qpel_h16_10_avx2;
1161 33 c->put_hevc_qpel[6][0][1] = hevc_put_qpel_h24_10_avx2;
1162 33 c->put_hevc_qpel[7][0][1] = hevc_put_qpel_h32_10_avx2;
1163 33 c->put_hevc_qpel[8][0][1] = hevc_put_qpel_h48_10_avx2;
1164 33 c->put_hevc_qpel[9][0][1] = hevc_put_qpel_h64_10_avx2;
1165
1166 33 c->put_hevc_qpel_uni[5][0][1] = hevc_put_uni_qpel_h16_10_avx2;
1167 33 c->put_hevc_qpel_uni[6][0][1] = hevc_put_uni_qpel_h24_10_avx2;
1168 33 c->put_hevc_qpel_uni[7][0][1] = hevc_put_uni_qpel_h32_10_avx2;
1169 33 c->put_hevc_qpel_uni[8][0][1] = hevc_put_uni_qpel_h48_10_avx2;
1170 33 c->put_hevc_qpel_uni[9][0][1] = hevc_put_uni_qpel_h64_10_avx2;
1171
1172 33 c->put_hevc_qpel_bi[5][0][1] = ff_hevc_put_bi_qpel_h16_10_avx2;
1173 33 c->put_hevc_qpel_bi[6][0][1] = ff_hevc_put_bi_qpel_h24_10_avx2;
1174 33 c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_bi_qpel_h32_10_avx2;
1175 33 c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_bi_qpel_h48_10_avx2;
1176 33 c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_bi_qpel_h64_10_avx2;
1177
1178 33 c->put_hevc_qpel[5][1][0] = hevc_put_qpel_v16_10_avx2;
1179 33 c->put_hevc_qpel[6][1][0] = hevc_put_qpel_v24_10_avx2;
1180 33 c->put_hevc_qpel[7][1][0] = hevc_put_qpel_v32_10_avx2;
1181 33 c->put_hevc_qpel[8][1][0] = hevc_put_qpel_v48_10_avx2;
1182 33 c->put_hevc_qpel[9][1][0] = hevc_put_qpel_v64_10_avx2;
1183
1184 33 c->put_hevc_qpel_uni[5][1][0] = hevc_put_uni_qpel_v16_10_avx2;
1185 33 c->put_hevc_qpel_uni[6][1][0] = hevc_put_uni_qpel_v24_10_avx2;
1186 33 c->put_hevc_qpel_uni[7][1][0] = hevc_put_uni_qpel_v32_10_avx2;
1187 33 c->put_hevc_qpel_uni[8][1][0] = hevc_put_uni_qpel_v48_10_avx2;
1188 33 c->put_hevc_qpel_uni[9][1][0] = hevc_put_uni_qpel_v64_10_avx2;
1189
1190 33 c->put_hevc_qpel_bi[5][1][0] = ff_hevc_put_bi_qpel_v16_10_avx2;
1191 33 c->put_hevc_qpel_bi[6][1][0] = ff_hevc_put_bi_qpel_v24_10_avx2;
1192 33 c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_bi_qpel_v32_10_avx2;
1193 33 c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_bi_qpel_v48_10_avx2;
1194 33 c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_bi_qpel_v64_10_avx2;
1195
1196 33 c->put_hevc_qpel[5][1][1] = hevc_put_qpel_hv16_10_avx2;
1197 33 c->put_hevc_qpel[6][1][1] = hevc_put_qpel_hv24_10_avx2;
1198 33 c->put_hevc_qpel[7][1][1] = hevc_put_qpel_hv32_10_avx2;
1199 33 c->put_hevc_qpel[8][1][1] = hevc_put_qpel_hv48_10_avx2;
1200 33 c->put_hevc_qpel[9][1][1] = hevc_put_qpel_hv64_10_avx2;
1201
1202 33 c->put_hevc_qpel_uni[5][1][1] = hevc_put_uni_qpel_hv16_10_avx2;
1203 33 c->put_hevc_qpel_uni[6][1][1] = hevc_put_uni_qpel_hv24_10_avx2;
1204 33 c->put_hevc_qpel_uni[7][1][1] = hevc_put_uni_qpel_hv32_10_avx2;
1205 33 c->put_hevc_qpel_uni[8][1][1] = hevc_put_uni_qpel_hv48_10_avx2;
1206 33 c->put_hevc_qpel_uni[9][1][1] = hevc_put_uni_qpel_hv64_10_avx2;
1207
1208 33 c->put_hevc_qpel_bi[5][1][1] = ff_hevc_put_bi_qpel_hv16_10_avx2;
1209 33 c->put_hevc_qpel_bi[6][1][1] = ff_hevc_put_bi_qpel_hv24_10_avx2;
1210 33 c->put_hevc_qpel_bi[7][1][1] = ff_hevc_put_bi_qpel_hv32_10_avx2;
1211 33 c->put_hevc_qpel_bi[8][1][1] = ff_hevc_put_bi_qpel_hv48_10_avx2;
1212 33 c->put_hevc_qpel_bi[9][1][1] = ff_hevc_put_bi_qpel_hv64_10_avx2;
1213 #endif /* ARCH_X86_64 */
1214
1215 33 SAO_BAND_INIT(10, avx2);
1216 33 SAO_EDGE_INIT(10, avx2);
1217
1218 33 c->add_residual[2] = ff_hevc_add_residual_16_10_avx2;
1219 33 c->add_residual[3] = ff_hevc_add_residual_32_10_avx2;
1220 }
1221 #endif /* HAVE_AVX2_EXTERNAL */
1222
2/2
✓ Branch 0 taken 304 times.
✓ Branch 1 taken 308 times.
612 } else if (bit_depth == 12) {
1223
2/2
✓ Branch 0 taken 210 times.
✓ Branch 1 taken 94 times.
304 if (EXTERNAL_SSE2(cpu_flags)) {
1224 210 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_sse2;
1225 210 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_sse2;
1226 #if ARCH_X86_64
1227 210 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_sse2;
1228 210 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2;
1229 #endif
1230 210 SAO_BAND_INIT(12, sse2);
1231 210 SAO_EDGE_INIT(12, sse2);
1232
1233 210 c->idct_dc[0] = ff_hevc_idct_4x4_dc_12_sse2;
1234 210 c->idct_dc[1] = ff_hevc_idct_8x8_dc_12_sse2;
1235 210 c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_sse2;
1236 210 c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_sse2;
1237 }
1238 #if ARCH_X86_64
1239
2/2
✓ Branch 0 taken 168 times.
✓ Branch 1 taken 136 times.
304 if (EXTERNAL_SSSE3(cpu_flags)) {
1240 168 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_ssse3;
1241 168 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_ssse3;
1242 }
1243 #endif
1244 #if HAVE_SSE4_EXTERNAL && ARCH_X86_64
1245
2/2
✓ Branch 0 taken 147 times.
✓ Branch 1 taken 157 times.
304 if (EXTERNAL_SSE4(cpu_flags)) {
1246 147 EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 12, sse4);
1247 147 EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 12, sse4);
1248 147 EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 12, sse4);
1249 147 EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 12, sse4);
1250
1251 147 QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 12, sse4);
1252 147 QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 12, sse4);
1253 147 QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 12, sse4);
1254 147 QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 12, sse4);
1255 }
1256 #endif
1257
2/2
✓ Branch 0 taken 63 times.
✓ Branch 1 taken 241 times.
304 if (EXTERNAL_AVX(cpu_flags)) {
1258 63 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_avx;
1259 63 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_avx;
1260 #if ARCH_X86_64
1261 63 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_avx;
1262 63 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_avx;
1263 #endif
1264 63 SAO_BAND_INIT(12, avx);
1265 }
1266
2/2
✓ Branch 0 taken 21 times.
✓ Branch 1 taken 283 times.
304 if (EXTERNAL_AVX2(cpu_flags)) {
1267 21 c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_12_avx2;
1268 }
1269
3/4
✓ Branch 0 taken 21 times.
✓ Branch 1 taken 283 times.
✓ Branch 2 taken 21 times.
✗ Branch 3 not taken.
304 if (EXTERNAL_AVX2_FAST(cpu_flags)) {
1270 21 c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_avx2;
1271 21 c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_avx2;
1272
1273 21 SAO_BAND_INIT(12, avx2);
1274 21 SAO_EDGE_INIT(12, avx2);
1275 }
1276 }
1277 1633 }
1278