FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/x86/hevcdsp_init.c
Date: 2022-12-05 03:11:11
Exec Total Coverage
Lines: 597 603 99.0%
Functions: 860 860 100.0%
Branches: 414 418 99.0%

Line Branch Exec Source
1 /*
2 * Copyright (c) 2013 Seppo Tomperi
3 * Copyright (c) 2013 - 2014 Pierre-Edouard Lepere
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 #include "config.h"
23
24 #include "libavutil/cpu.h"
25 #include "libavutil/mem_internal.h"
26 #include "libavutil/x86/asm.h"
27 #include "libavutil/x86/cpu.h"
28 #include "libavcodec/hevcdsp.h"
29 #include "libavcodec/x86/hevcdsp.h"
30
31 #define LFC_FUNC(DIR, DEPTH, OPT) \
32 void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, const int *tc, const uint8_t *no_p, const uint8_t *no_q);
33
34 #define LFL_FUNC(DIR, DEPTH, OPT) \
35 void ff_hevc_ ## DIR ## _loop_filter_luma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, int beta, const int *tc, const uint8_t *no_p, const uint8_t *no_q);
36
37 #define LFC_FUNCS(type, depth, opt) \
38 LFC_FUNC(h, depth, opt) \
39 LFC_FUNC(v, depth, opt)
40
41 #define LFL_FUNCS(type, depth, opt) \
42 LFL_FUNC(h, depth, opt) \
43 LFL_FUNC(v, depth, opt)
44
45 LFC_FUNCS(uint8_t, 8, sse2)
46 LFC_FUNCS(uint8_t, 10, sse2)
47 LFC_FUNCS(uint8_t, 12, sse2)
48 LFC_FUNCS(uint8_t, 8, avx)
49 LFC_FUNCS(uint8_t, 10, avx)
50 LFC_FUNCS(uint8_t, 12, avx)
51 LFL_FUNCS(uint8_t, 8, sse2)
52 LFL_FUNCS(uint8_t, 10, sse2)
53 LFL_FUNCS(uint8_t, 12, sse2)
54 LFL_FUNCS(uint8_t, 8, ssse3)
55 LFL_FUNCS(uint8_t, 10, ssse3)
56 LFL_FUNCS(uint8_t, 12, ssse3)
57 LFL_FUNCS(uint8_t, 8, avx)
58 LFL_FUNCS(uint8_t, 10, avx)
59 LFL_FUNCS(uint8_t, 12, avx)
60
61 #define IDCT_DC_FUNCS(W, opt) \
62 void ff_hevc_idct_ ## W ## _dc_8_ ## opt(int16_t *coeffs); \
63 void ff_hevc_idct_ ## W ## _dc_10_ ## opt(int16_t *coeffs); \
64 void ff_hevc_idct_ ## W ## _dc_12_ ## opt(int16_t *coeffs)
65
66 IDCT_DC_FUNCS(4x4, mmxext);
67 IDCT_DC_FUNCS(8x8, sse2);
68 IDCT_DC_FUNCS(16x16, sse2);
69 IDCT_DC_FUNCS(32x32, sse2);
70 IDCT_DC_FUNCS(16x16, avx2);
71 IDCT_DC_FUNCS(32x32, avx2);
72
73 #define IDCT_FUNCS(opt) \
74 void ff_hevc_idct_4x4_8_ ## opt(int16_t *coeffs, int col_limit); \
75 void ff_hevc_idct_4x4_10_ ## opt(int16_t *coeffs, int col_limit); \
76 void ff_hevc_idct_8x8_8_ ## opt(int16_t *coeffs, int col_limit); \
77 void ff_hevc_idct_8x8_10_ ## opt(int16_t *coeffs, int col_limit); \
78 void ff_hevc_idct_16x16_8_ ## opt(int16_t *coeffs, int col_limit); \
79 void ff_hevc_idct_16x16_10_ ## opt(int16_t *coeffs, int col_limit); \
80 void ff_hevc_idct_32x32_8_ ## opt(int16_t *coeffs, int col_limit); \
81 void ff_hevc_idct_32x32_10_ ## opt(int16_t *coeffs, int col_limit);
82
83 IDCT_FUNCS(sse2)
84 IDCT_FUNCS(avx)
85
86 #define mc_rep_func(name, bitd, step, W, opt) \
87 void ff_hevc_put_hevc_##name##W##_##bitd##_##opt(int16_t *_dst, \
88 const uint8_t *_src, ptrdiff_t _srcstride, int height, \
89 intptr_t mx, intptr_t my, int width) \
90 { \
91 int i; \
92 int16_t *dst; \
93 for (i = 0; i < W; i += step) { \
94 const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \
95 dst = _dst + i; \
96 ff_hevc_put_hevc_##name##step##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width); \
97 } \
98 }
99 #define mc_rep_uni_func(name, bitd, step, W, opt) \
100 void ff_hevc_put_hevc_uni_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, \
101 const uint8_t *_src, ptrdiff_t _srcstride, int height, \
102 intptr_t mx, intptr_t my, int width) \
103 { \
104 int i; \
105 uint8_t *dst; \
106 for (i = 0; i < W; i += step) { \
107 const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \
108 dst = _dst + (i * ((bitd + 7) / 8)); \
109 ff_hevc_put_hevc_uni_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, \
110 height, mx, my, width); \
111 } \
112 }
113 #define mc_rep_bi_func(name, bitd, step, W, opt) \
114 void ff_hevc_put_hevc_bi_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const uint8_t *_src, \
115 ptrdiff_t _srcstride, const int16_t *_src2, \
116 int height, intptr_t mx, intptr_t my, int width) \
117 { \
118 int i; \
119 uint8_t *dst; \
120 for (i = 0; i < W ; i += step) { \
121 const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \
122 const int16_t *src2 = _src2 + i; \
123 dst = _dst + (i * ((bitd + 7) / 8)); \
124 ff_hevc_put_hevc_bi_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, \
125 height, mx, my, width); \
126 } \
127 }
128
129 #define mc_rep_funcs(name, bitd, step, W, opt) \
130 mc_rep_func(name, bitd, step, W, opt) \
131 mc_rep_uni_func(name, bitd, step, W, opt) \
132 mc_rep_bi_func(name, bitd, step, W, opt)
133
134 #define mc_rep_func2(name, bitd, step1, step2, W, opt) \
135 void ff_hevc_put_hevc_##name##W##_##bitd##_##opt(int16_t *dst, \
136 const uint8_t *src, ptrdiff_t _srcstride, int height, \
137 intptr_t mx, intptr_t my, int width) \
138 { \
139 ff_hevc_put_hevc_##name##step1##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width); \
140 ff_hevc_put_hevc_##name##step2##_##bitd##_##opt(dst + step1, src + (step1 * ((bitd + 7) / 8)), \
141 _srcstride, height, mx, my, width); \
142 }
143 #define mc_rep_uni_func2(name, bitd, step1, step2, W, opt) \
144 void ff_hevc_put_hevc_uni_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, \
145 const uint8_t *src, ptrdiff_t _srcstride, int height, \
146 intptr_t mx, intptr_t my, int width) \
147 { \
148 ff_hevc_put_hevc_uni_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, height, mx, my, width);\
149 ff_hevc_put_hevc_uni_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride, \
150 src + (step1 * ((bitd + 7) / 8)), _srcstride, \
151 height, mx, my, width); \
152 }
153 #define mc_rep_bi_func2(name, bitd, step1, step2, W, opt) \
154 void ff_hevc_put_hevc_bi_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \
155 ptrdiff_t _srcstride, const int16_t *src2, \
156 int height, intptr_t mx, intptr_t my, int width) \
157 { \
158 ff_hevc_put_hevc_bi_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, height, mx, my, width);\
159 ff_hevc_put_hevc_bi_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride, \
160 src + (step1 * ((bitd + 7) / 8)), _srcstride, \
161 src2 + step1, height, mx, my, width); \
162 }
163
164 #define mc_rep_funcs2(name, bitd, step1, step2, W, opt) \
165 mc_rep_func2(name, bitd, step1, step2, W, opt) \
166 mc_rep_uni_func2(name, bitd, step1, step2, W, opt) \
167 mc_rep_bi_func2(name, bitd, step1, step2, W, opt)
168
169 #if ARCH_X86_64 && HAVE_SSE4_EXTERNAL
170
171 #define mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
172 void ff_hevc_put_hevc_##name##width1##_10_##opt1(int16_t *dst, const uint8_t *src, ptrdiff_t _srcstride, \
173 int height, intptr_t mx, intptr_t my, int width) \
174 \
175 { \
176 ff_hevc_put_hevc_##name##width2##_10_##opt1(dst, src, _srcstride, height, mx, my, width); \
177 ff_hevc_put_hevc_##name##width3##_10_##opt2(dst+ width2, src+ width4, _srcstride, height, mx, my, width); \
178 }
179
180 #define mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
181 void ff_hevc_put_hevc_bi_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \
182 ptrdiff_t _srcstride, const int16_t *src2, \
183 int height, intptr_t mx, intptr_t my, int width) \
184 { \
185 ff_hevc_put_hevc_bi_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, src2, \
186 height, mx, my, width); \
187 ff_hevc_put_hevc_bi_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, src2+width2,\
188 height, mx, my, width); \
189 }
190
191 #define mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
192 void ff_hevc_put_hevc_uni_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, \
193 const uint8_t *src, ptrdiff_t _srcstride, int height, \
194 intptr_t mx, intptr_t my, int width) \
195 { \
196 ff_hevc_put_hevc_uni_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, \
197 height, mx, my, width); \
198 ff_hevc_put_hevc_uni_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, \
199 height, mx, my, width); \
200 }
201
202 #define mc_rep_mixs_10(name, width1, width2, width3, opt1, opt2, width4) \
203 mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
204 mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
205 mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4)
206
207 #define mc_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
208 void ff_hevc_put_hevc_##name##width1##_8_##opt1(int16_t *dst, const uint8_t *src, ptrdiff_t _srcstride, \
209 int height, intptr_t mx, intptr_t my, int width) \
210 \
211 { \
212 ff_hevc_put_hevc_##name##width2##_8_##opt1(dst, src, _srcstride, height, mx, my, width); \
213 ff_hevc_put_hevc_##name##width3##_8_##opt2(dst+ width2, src+ width2, _srcstride, height, mx, my, width); \
214 }
215
216 #define mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
217 void ff_hevc_put_hevc_bi_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \
218 ptrdiff_t _srcstride, const int16_t *src2, \
219 int height, intptr_t mx, intptr_t my, int width) \
220 { \
221 ff_hevc_put_hevc_bi_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \
222 src2, height, mx, my, width); \
223 ff_hevc_put_hevc_bi_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \
224 src2+width2, height, mx, my, width); \
225 }
226
227 #define mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
228 void ff_hevc_put_hevc_uni_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, \
229 const uint8_t *src, ptrdiff_t _srcstride, int height, \
230 intptr_t mx, intptr_t my, int width) \
231 { \
232 ff_hevc_put_hevc_uni_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \
233 height, mx, my, width); \
234 ff_hevc_put_hevc_uni_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \
235 height, mx, my, width); \
236 }
237
238 #define mc_rep_mixs_8(name, width1, width2, width3, opt1, opt2) \
239 mc_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
240 mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
241 mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2)
242
243 #if HAVE_AVX2_EXTERNAL
244
245 8 mc_rep_mixs_8(pel_pixels, 48, 32, 16, avx2, sse4)
246 6 mc_rep_mixs_8(epel_hv, 48, 32, 16, avx2, sse4)
247 6 mc_rep_mixs_8(epel_h , 48, 32, 16, avx2, sse4)
248 6 mc_rep_mixs_8(epel_v , 48, 32, 16, avx2, sse4)
249
250 1 mc_rep_mix_10(pel_pixels, 24, 16, 8, avx2, sse4, 32)
251 1 mc_bi_rep_mix_10(pel_pixels,24, 16, 8, avx2, sse4, 32)
252 6 mc_rep_mixs_10(epel_hv, 24, 16, 8, avx2, sse4, 32)
253 6 mc_rep_mixs_10(epel_h , 24, 16, 8, avx2, sse4, 32)
254 6 mc_rep_mixs_10(epel_v , 24, 16, 8, avx2, sse4, 32)
255
256
257 6 mc_rep_mixs_10(qpel_h , 24, 16, 8, avx2, sse4, 32)
258 6 mc_rep_mixs_10(qpel_v , 24, 16, 8, avx2, sse4, 32)
259 6 mc_rep_mixs_10(qpel_hv, 24, 16, 8, avx2, sse4, 32)
260
261
262
2/2
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 78 times.
234 mc_rep_uni_func(pel_pixels, 8, 64, 128, avx2)//used for 10bit
263
2/2
✓ Branch 1 taken 3 times.
✓ Branch 2 taken 1 times.
4 mc_rep_uni_func(pel_pixels, 8, 32, 96, avx2) //used for 10bit
264
265
2/2
✓ Branch 1 taken 5736 times.
✓ Branch 2 taken 2868 times.
17208 mc_rep_funcs(pel_pixels, 8, 32, 64, avx2)
266
267
2/2
✓ Branch 1 taken 62 times.
✓ Branch 2 taken 31 times.
93 mc_rep_func(pel_pixels, 10, 16, 32, avx2)
268
2/2
✓ Branch 1 taken 3 times.
✓ Branch 2 taken 1 times.
4 mc_rep_func(pel_pixels, 10, 16, 48, avx2)
269
2/2
✓ Branch 1 taken 16 times.
✓ Branch 2 taken 8 times.
24 mc_rep_func(pel_pixels, 10, 32, 64, avx2)
270
271
2/2
✓ Branch 1 taken 176 times.
✓ Branch 2 taken 88 times.
264 mc_rep_bi_func(pel_pixels, 10, 16, 32, avx2)
272
2/2
✓ Branch 1 taken 3 times.
✓ Branch 2 taken 1 times.
4 mc_rep_bi_func(pel_pixels, 10, 16, 48, avx2)
273
2/2
✓ Branch 1 taken 42 times.
✓ Branch 2 taken 21 times.
63 mc_rep_bi_func(pel_pixels, 10, 32, 64, avx2)
274
275
2/2
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
18 mc_rep_funcs(epel_h, 8, 32, 64, avx2)
276
277
2/2
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
18 mc_rep_funcs(epel_v, 8, 32, 64, avx2)
278
279
2/2
✓ Branch 1 taken 994 times.
✓ Branch 2 taken 497 times.
2982 mc_rep_funcs(epel_h, 10, 16, 32, avx2)
280
2/2
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
24 mc_rep_funcs(epel_h, 10, 16, 48, avx2)
281
2/2
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
18 mc_rep_funcs(epel_h, 10, 32, 64, avx2)
282
283
2/2
✓ Branch 1 taken 138 times.
✓ Branch 2 taken 69 times.
414 mc_rep_funcs(epel_v, 10, 16, 32, avx2)
284
2/2
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
24 mc_rep_funcs(epel_v, 10, 16, 48, avx2)
285
2/2
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
18 mc_rep_funcs(epel_v, 10, 32, 64, avx2)
286
287
288
2/2
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
18 mc_rep_funcs(epel_hv, 8, 32, 64, avx2)
289
290
2/2
✓ Branch 1 taken 1526 times.
✓ Branch 2 taken 763 times.
4578 mc_rep_funcs(epel_hv, 10, 16, 32, avx2)
291
2/2
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
24 mc_rep_funcs(epel_hv, 10, 16, 48, avx2)
292
2/2
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
18 mc_rep_funcs(epel_hv, 10, 32, 64, avx2)
293
294
2/2
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
18 mc_rep_funcs(qpel_h, 8, 32, 64, avx2)
295 6 mc_rep_mixs_8(qpel_h , 48, 32, 16, avx2, sse4)
296
297
2/2
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
18 mc_rep_funcs(qpel_v, 8, 32, 64, avx2)
298 6 mc_rep_mixs_8(qpel_v, 48, 32, 16, avx2, sse4)
299
300
2/2
✓ Branch 1 taken 1378 times.
✓ Branch 2 taken 689 times.
4134 mc_rep_funcs(qpel_h, 10, 16, 32, avx2)
301
2/2
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
24 mc_rep_funcs(qpel_h, 10, 16, 48, avx2)
302
2/2
✓ Branch 1 taken 364 times.
✓ Branch 2 taken 182 times.
1092 mc_rep_funcs(qpel_h, 10, 32, 64, avx2)
303
304
2/2
✓ Branch 1 taken 874 times.
✓ Branch 2 taken 437 times.
2622 mc_rep_funcs(qpel_v, 10, 16, 32, avx2)
305
2/2
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
24 mc_rep_funcs(qpel_v, 10, 16, 48, avx2)
306
2/2
✓ Branch 1 taken 194 times.
✓ Branch 2 taken 97 times.
582 mc_rep_funcs(qpel_v, 10, 32, 64, avx2)
307
308
2/2
✓ Branch 1 taken 3016 times.
✓ Branch 2 taken 1508 times.
9048 mc_rep_funcs(qpel_hv, 10, 16, 32, avx2)
309
2/2
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
24 mc_rep_funcs(qpel_hv, 10, 16, 48, avx2)
310
2/2
✓ Branch 1 taken 632 times.
✓ Branch 2 taken 316 times.
1896 mc_rep_funcs(qpel_hv, 10, 32, 64, avx2)
311
312 #endif //AVX2
313
314
2/2
✓ Branch 1 taken 25312 times.
✓ Branch 2 taken 6328 times.
63280 mc_rep_funcs(pel_pixels, 8, 16, 64, sse4)
315
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(pel_pixels, 8, 16, 48, sse4)
316
2/2
✓ Branch 1 taken 28226 times.
✓ Branch 2 taken 14113 times.
84678 mc_rep_funcs(pel_pixels, 8, 16, 32, sse4)
317
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(pel_pixels, 8, 8, 24, sse4)
318
2/2
✓ Branch 1 taken 336 times.
✓ Branch 2 taken 42 times.
756 mc_rep_funcs(pel_pixels,10, 8, 64, sse4)
319
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
588 mc_rep_funcs(pel_pixels,10, 8, 48, sse4)
320
2/2
✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
420 mc_rep_funcs(pel_pixels,10, 8, 32, sse4)
321
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(pel_pixels,10, 8, 24, sse4)
322
2/2
✓ Branch 1 taken 84 times.
✓ Branch 2 taken 42 times.
252 mc_rep_funcs(pel_pixels,10, 8, 16, sse4)
323
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(pel_pixels,10, 4, 12, sse4)
324
2/2
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
702 mc_rep_funcs(pel_pixels,12, 8, 64, sse4)
325
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
546 mc_rep_funcs(pel_pixels,12, 8, 48, sse4)
326
2/2
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
390 mc_rep_funcs(pel_pixels,12, 8, 32, sse4)
327
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(pel_pixels,12, 8, 24, sse4)
328
2/2
✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
234 mc_rep_funcs(pel_pixels,12, 8, 16, sse4)
329
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(pel_pixels,12, 4, 12, sse4)
330
331
2/2
✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
420 mc_rep_funcs(epel_h, 8, 16, 64, sse4)
332
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(epel_h, 8, 16, 48, sse4)
333
2/2
✓ Branch 1 taken 1032 times.
✓ Branch 2 taken 516 times.
3096 mc_rep_funcs(epel_h, 8, 16, 32, sse4)
334
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_h, 8, 8, 24, sse4)
335
2/2
✓ Branch 1 taken 336 times.
✓ Branch 2 taken 42 times.
756 mc_rep_funcs(epel_h,10, 8, 64, sse4)
336
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
588 mc_rep_funcs(epel_h,10, 8, 48, sse4)
337
2/2
✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
420 mc_rep_funcs(epel_h,10, 8, 32, sse4)
338
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(epel_h,10, 8, 24, sse4)
339
2/2
✓ Branch 1 taken 84 times.
✓ Branch 2 taken 42 times.
252 mc_rep_funcs(epel_h,10, 8, 16, sse4)
340
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_h,10, 4, 12, sse4)
341
2/2
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
702 mc_rep_funcs(epel_h,12, 8, 64, sse4)
342
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
546 mc_rep_funcs(epel_h,12, 8, 48, sse4)
343
2/2
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
390 mc_rep_funcs(epel_h,12, 8, 32, sse4)
344
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_h,12, 8, 24, sse4)
345
2/2
✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
234 mc_rep_funcs(epel_h,12, 8, 16, sse4)
346
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_h,12, 4, 12, sse4)
347
2/2
✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
420 mc_rep_funcs(epel_v, 8, 16, 64, sse4)
348
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(epel_v, 8, 16, 48, sse4)
349
2/2
✓ Branch 1 taken 1892 times.
✓ Branch 2 taken 946 times.
5676 mc_rep_funcs(epel_v, 8, 16, 32, sse4)
350
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_v, 8, 8, 24, sse4)
351
2/2
✓ Branch 1 taken 336 times.
✓ Branch 2 taken 42 times.
756 mc_rep_funcs(epel_v,10, 8, 64, sse4)
352
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
588 mc_rep_funcs(epel_v,10, 8, 48, sse4)
353
2/2
✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
420 mc_rep_funcs(epel_v,10, 8, 32, sse4)
354
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(epel_v,10, 8, 24, sse4)
355
2/2
✓ Branch 1 taken 84 times.
✓ Branch 2 taken 42 times.
252 mc_rep_funcs(epel_v,10, 8, 16, sse4)
356
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_v,10, 4, 12, sse4)
357
2/2
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
702 mc_rep_funcs(epel_v,12, 8, 64, sse4)
358
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
546 mc_rep_funcs(epel_v,12, 8, 48, sse4)
359
2/2
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
390 mc_rep_funcs(epel_v,12, 8, 32, sse4)
360
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_v,12, 8, 24, sse4)
361
2/2
✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
234 mc_rep_funcs(epel_v,12, 8, 16, sse4)
362
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_v,12, 4, 12, sse4)
363
2/2
✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
420 mc_rep_funcs(epel_hv, 8, 16, 64, sse4)
364
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(epel_hv, 8, 16, 48, sse4)
365
2/2
✓ Branch 1 taken 1512 times.
✓ Branch 2 taken 756 times.
4536 mc_rep_funcs(epel_hv, 8, 16, 32, sse4)
366
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_hv, 8, 8, 24, sse4)
367 78 mc_rep_funcs2(epel_hv,8, 8, 4, 12, sse4)
368
2/2
✓ Branch 1 taken 336 times.
✓ Branch 2 taken 42 times.
756 mc_rep_funcs(epel_hv,10, 8, 64, sse4)
369
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
588 mc_rep_funcs(epel_hv,10, 8, 48, sse4)
370
2/2
✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
420 mc_rep_funcs(epel_hv,10, 8, 32, sse4)
371
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(epel_hv,10, 8, 24, sse4)
372
2/2
✓ Branch 1 taken 84 times.
✓ Branch 2 taken 42 times.
252 mc_rep_funcs(epel_hv,10, 8, 16, sse4)
373
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_hv,10, 4, 12, sse4)
374
2/2
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
702 mc_rep_funcs(epel_hv,12, 8, 64, sse4)
375
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
546 mc_rep_funcs(epel_hv,12, 8, 48, sse4)
376
2/2
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
390 mc_rep_funcs(epel_hv,12, 8, 32, sse4)
377
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_hv,12, 8, 24, sse4)
378
2/2
✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
234 mc_rep_funcs(epel_hv,12, 8, 16, sse4)
379
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(epel_hv,12, 4, 12, sse4)
380
381
2/2
✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
420 mc_rep_funcs(qpel_h, 8, 16, 64, sse4)
382
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(qpel_h, 8, 16, 48, sse4)
383
2/2
✓ Branch 1 taken 84 times.
✓ Branch 2 taken 42 times.
252 mc_rep_funcs(qpel_h, 8, 16, 32, sse4)
384
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_h, 8, 8, 24, sse4)
385
2/2
✓ Branch 1 taken 336 times.
✓ Branch 2 taken 42 times.
756 mc_rep_funcs(qpel_h,10, 8, 64, sse4)
386
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
588 mc_rep_funcs(qpel_h,10, 8, 48, sse4)
387
2/2
✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
420 mc_rep_funcs(qpel_h,10, 8, 32, sse4)
388
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(qpel_h,10, 8, 24, sse4)
389
2/2
✓ Branch 1 taken 84 times.
✓ Branch 2 taken 42 times.
252 mc_rep_funcs(qpel_h,10, 8, 16, sse4)
390
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_h,10, 4, 12, sse4)
391
2/2
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
702 mc_rep_funcs(qpel_h,12, 8, 64, sse4)
392
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
546 mc_rep_funcs(qpel_h,12, 8, 48, sse4)
393
2/2
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
390 mc_rep_funcs(qpel_h,12, 8, 32, sse4)
394
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_h,12, 8, 24, sse4)
395
2/2
✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
234 mc_rep_funcs(qpel_h,12, 8, 16, sse4)
396
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_h,12, 4, 12, sse4)
397
2/2
✓ Branch 1 taken 176 times.
✓ Branch 2 taken 44 times.
440 mc_rep_funcs(qpel_v, 8, 16, 64, sse4)
398
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(qpel_v, 8, 16, 48, sse4)
399
2/2
✓ Branch 1 taken 92 times.
✓ Branch 2 taken 46 times.
276 mc_rep_funcs(qpel_v, 8, 16, 32, sse4)
400
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_v, 8, 8, 24, sse4)
401
2/2
✓ Branch 1 taken 336 times.
✓ Branch 2 taken 42 times.
756 mc_rep_funcs(qpel_v,10, 8, 64, sse4)
402
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
588 mc_rep_funcs(qpel_v,10, 8, 48, sse4)
403
2/2
✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
420 mc_rep_funcs(qpel_v,10, 8, 32, sse4)
404
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(qpel_v,10, 8, 24, sse4)
405
2/2
✓ Branch 1 taken 84 times.
✓ Branch 2 taken 42 times.
252 mc_rep_funcs(qpel_v,10, 8, 16, sse4)
406
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_v,10, 4, 12, sse4)
407
2/2
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
702 mc_rep_funcs(qpel_v,12, 8, 64, sse4)
408
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
546 mc_rep_funcs(qpel_v,12, 8, 48, sse4)
409
2/2
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
390 mc_rep_funcs(qpel_v,12, 8, 32, sse4)
410
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_v,12, 8, 24, sse4)
411
2/2
✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
234 mc_rep_funcs(qpel_v,12, 8, 16, sse4)
412
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_v,12, 4, 12, sse4)
413
2/2
✓ Branch 1 taken 928 times.
✓ Branch 2 taken 116 times.
2088 mc_rep_funcs(qpel_hv, 8, 8, 64, sse4)
414
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
546 mc_rep_funcs(qpel_hv, 8, 8, 48, sse4)
415
2/2
✓ Branch 1 taken 292 times.
✓ Branch 2 taken 73 times.
730 mc_rep_funcs(qpel_hv, 8, 8, 32, sse4)
416
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_hv, 8, 8, 24, sse4)
417
2/2
✓ Branch 1 taken 202 times.
✓ Branch 2 taken 101 times.
606 mc_rep_funcs(qpel_hv, 8, 8, 16, sse4)
418 78 mc_rep_funcs2(qpel_hv,8, 8, 4, 12, sse4)
419
2/2
✓ Branch 1 taken 336 times.
✓ Branch 2 taken 42 times.
756 mc_rep_funcs(qpel_hv,10, 8, 64, sse4)
420
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
588 mc_rep_funcs(qpel_hv,10, 8, 48, sse4)
421
2/2
✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
420 mc_rep_funcs(qpel_hv,10, 8, 32, sse4)
422
2/2
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
336 mc_rep_funcs(qpel_hv,10, 8, 24, sse4)
423
2/2
✓ Branch 1 taken 84 times.
✓ Branch 2 taken 42 times.
252 mc_rep_funcs(qpel_hv,10, 8, 16, sse4)
424
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_hv,10, 4, 12, sse4)
425
2/2
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
702 mc_rep_funcs(qpel_hv,12, 8, 64, sse4)
426
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
546 mc_rep_funcs(qpel_hv,12, 8, 48, sse4)
427
2/2
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
390 mc_rep_funcs(qpel_hv,12, 8, 32, sse4)
428
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_hv,12, 8, 24, sse4)
429
2/2
✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
234 mc_rep_funcs(qpel_hv,12, 8, 16, sse4)
430
2/2
✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
312 mc_rep_funcs(qpel_hv,12, 4, 12, sse4)
431
432 #define mc_rep_uni_w(bitd, step, W, opt) \
433 void ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const int16_t *_src, \
434 int height, int denom, int _wx, int _ox) \
435 { \
436 int i; \
437 uint8_t *dst; \
438 for (i = 0; i < W; i += step) { \
439 const int16_t *src = _src + i; \
440 dst= _dst + (i * ((bitd + 7) / 8)); \
441 ff_hevc_put_hevc_uni_w##step##_##bitd##_##opt(dst, dststride, src, \
442 height, denom, _wx, _ox); \
443 } \
444 }
445
446
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
378 mc_rep_uni_w(8, 6, 12, sse4)
447
2/2
✓ Branch 1 taken 20172 times.
✓ Branch 2 taken 10086 times.
30258 mc_rep_uni_w(8, 8, 16, sse4)
448
2/2
✓ Branch 1 taken 378 times.
✓ Branch 2 taken 126 times.
504 mc_rep_uni_w(8, 8, 24, sse4)
449
2/2
✓ Branch 1 taken 46904 times.
✓ Branch 2 taken 11726 times.
58630 mc_rep_uni_w(8, 8, 32, sse4)
450
2/2
✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
882 mc_rep_uni_w(8, 8, 48, sse4)
451
2/2
✓ Branch 1 taken 35736 times.
✓ Branch 2 taken 4467 times.
40203 mc_rep_uni_w(8, 8, 64, sse4)
452
453
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
378 mc_rep_uni_w(10, 6, 12, sse4)
454
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
378 mc_rep_uni_w(10, 8, 16, sse4)
455
2/2
✓ Branch 1 taken 378 times.
✓ Branch 2 taken 126 times.
504 mc_rep_uni_w(10, 8, 24, sse4)
456
2/2
✓ Branch 1 taken 504 times.
✓ Branch 2 taken 126 times.
630 mc_rep_uni_w(10, 8, 32, sse4)
457
2/2
✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
882 mc_rep_uni_w(10, 8, 48, sse4)
458
2/2
✓ Branch 1 taken 1008 times.
✓ Branch 2 taken 126 times.
1134 mc_rep_uni_w(10, 8, 64, sse4)
459
460
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
378 mc_rep_uni_w(12, 6, 12, sse4)
461
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
378 mc_rep_uni_w(12, 8, 16, sse4)
462
2/2
✓ Branch 1 taken 378 times.
✓ Branch 2 taken 126 times.
504 mc_rep_uni_w(12, 8, 24, sse4)
463
2/2
✓ Branch 1 taken 504 times.
✓ Branch 2 taken 126 times.
630 mc_rep_uni_w(12, 8, 32, sse4)
464
2/2
✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
882 mc_rep_uni_w(12, 8, 48, sse4)
465
2/2
✓ Branch 1 taken 1008 times.
✓ Branch 2 taken 126 times.
1134 mc_rep_uni_w(12, 8, 64, sse4)
466
467 #define mc_rep_bi_w(bitd, step, W, opt) \
468 void ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const int16_t *_src, \
469 const int16_t *_src2, int height, \
470 int denom, int _wx0, int _wx1, int _ox0, int _ox1) \
471 { \
472 int i; \
473 uint8_t *dst; \
474 for (i = 0; i < W; i += step) { \
475 const int16_t *src = _src + i; \
476 const int16_t *src2 = _src2 + i; \
477 dst = _dst + (i * ((bitd + 7) / 8)); \
478 ff_hevc_put_hevc_bi_w##step##_##bitd##_##opt(dst, dststride, src, src2, \
479 height, denom, _wx0, _wx1, _ox0, _ox1); \
480 } \
481 }
482
483
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
378 mc_rep_bi_w(8, 6, 12, sse4)
484
2/2
✓ Branch 1 taken 3774 times.
✓ Branch 2 taken 1887 times.
5661 mc_rep_bi_w(8, 8, 16, sse4)
485
2/2
✓ Branch 1 taken 378 times.
✓ Branch 2 taken 126 times.
504 mc_rep_bi_w(8, 8, 24, sse4)
486
2/2
✓ Branch 1 taken 18908 times.
✓ Branch 2 taken 4727 times.
23635 mc_rep_bi_w(8, 8, 32, sse4)
487
2/2
✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
882 mc_rep_bi_w(8, 8, 48, sse4)
488
2/2
✓ Branch 1 taken 17200 times.
✓ Branch 2 taken 2150 times.
19350 mc_rep_bi_w(8, 8, 64, sse4)
489
490
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
378 mc_rep_bi_w(10, 6, 12, sse4)
491
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
378 mc_rep_bi_w(10, 8, 16, sse4)
492
2/2
✓ Branch 1 taken 378 times.
✓ Branch 2 taken 126 times.
504 mc_rep_bi_w(10, 8, 24, sse4)
493
2/2
✓ Branch 1 taken 504 times.
✓ Branch 2 taken 126 times.
630 mc_rep_bi_w(10, 8, 32, sse4)
494
2/2
✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
882 mc_rep_bi_w(10, 8, 48, sse4)
495
2/2
✓ Branch 1 taken 1008 times.
✓ Branch 2 taken 126 times.
1134 mc_rep_bi_w(10, 8, 64, sse4)
496
497
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
378 mc_rep_bi_w(12, 6, 12, sse4)
498
2/2
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
378 mc_rep_bi_w(12, 8, 16, sse4)
499
2/2
✓ Branch 1 taken 378 times.
✓ Branch 2 taken 126 times.
504 mc_rep_bi_w(12, 8, 24, sse4)
500
2/2
✓ Branch 1 taken 504 times.
✓ Branch 2 taken 126 times.
630 mc_rep_bi_w(12, 8, 32, sse4)
501
2/2
✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
882 mc_rep_bi_w(12, 8, 48, sse4)
502
2/2
✓ Branch 1 taken 1008 times.
✓ Branch 2 taken 126 times.
1134 mc_rep_bi_w(12, 8, 64, sse4)
503
504 #define mc_uni_w_func(name, bitd, W, opt) \
505 void ff_hevc_put_hevc_uni_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \
506 const uint8_t *_src, ptrdiff_t _srcstride, \
507 int height, int denom, \
508 int _wx, int _ox, \
509 intptr_t mx, intptr_t my, int width) \
510 { \
511 LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \
512 ff_hevc_put_hevc_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \
513 ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(_dst, _dststride, temp, height, denom, _wx, _ox);\
514 }
515
516 #define mc_uni_w_funcs(name, bitd, opt) \
517 mc_uni_w_func(name, bitd, 4, opt) \
518 mc_uni_w_func(name, bitd, 8, opt) \
519 mc_uni_w_func(name, bitd, 12, opt) \
520 mc_uni_w_func(name, bitd, 16, opt) \
521 mc_uni_w_func(name, bitd, 24, opt) \
522 mc_uni_w_func(name, bitd, 32, opt) \
523 mc_uni_w_func(name, bitd, 48, opt) \
524 mc_uni_w_func(name, bitd, 64, opt)
525
526 71520 mc_uni_w_funcs(pel_pixels, 8, sse4)
527 18 mc_uni_w_func(pel_pixels, 8, 6, sse4)
528 7720 mc_uni_w_funcs(epel_h, 8, sse4)
529 18 mc_uni_w_func(epel_h, 8, 6, sse4)
530 7408 mc_uni_w_funcs(epel_v, 8, sse4)
531 18 mc_uni_w_func(epel_v, 8, 6, sse4)
532 7568 mc_uni_w_funcs(epel_hv, 8, sse4)
533 18 mc_uni_w_func(epel_hv, 8, 6, sse4)
534 298 mc_uni_w_funcs(qpel_h, 8, sse4)
535 308 mc_uni_w_funcs(qpel_v, 8, sse4)
536 882 mc_uni_w_funcs(qpel_hv, 8, sse4)
537
538 288 mc_uni_w_funcs(pel_pixels, 10, sse4)
539 18 mc_uni_w_func(pel_pixels, 10, 6, sse4)
540 288 mc_uni_w_funcs(epel_h, 10, sse4)
541 18 mc_uni_w_func(epel_h, 10, 6, sse4)
542 288 mc_uni_w_funcs(epel_v, 10, sse4)
543 18 mc_uni_w_func(epel_v, 10, 6, sse4)
544 288 mc_uni_w_funcs(epel_hv, 10, sse4)
545 18 mc_uni_w_func(epel_hv, 10, 6, sse4)
546 288 mc_uni_w_funcs(qpel_h, 10, sse4)
547 288 mc_uni_w_funcs(qpel_v, 10, sse4)
548 288 mc_uni_w_funcs(qpel_hv, 10, sse4)
549
550 288 mc_uni_w_funcs(pel_pixels, 12, sse4)
551 18 mc_uni_w_func(pel_pixels, 12, 6, sse4)
552 288 mc_uni_w_funcs(epel_h, 12, sse4)
553 18 mc_uni_w_func(epel_h, 12, 6, sse4)
554 288 mc_uni_w_funcs(epel_v, 12, sse4)
555 18 mc_uni_w_func(epel_v, 12, 6, sse4)
556 288 mc_uni_w_funcs(epel_hv, 12, sse4)
557 18 mc_uni_w_func(epel_hv, 12, 6, sse4)
558 288 mc_uni_w_funcs(qpel_h, 12, sse4)
559 288 mc_uni_w_funcs(qpel_v, 12, sse4)
560 288 mc_uni_w_funcs(qpel_hv, 12, sse4)
561
562 #define mc_bi_w_func(name, bitd, W, opt) \
563 void ff_hevc_put_hevc_bi_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \
564 const uint8_t *_src, ptrdiff_t _srcstride, \
565 const int16_t *_src2, \
566 int height, int denom, \
567 int _wx0, int _wx1, int _ox0, int _ox1, \
568 intptr_t mx, intptr_t my, int width) \
569 { \
570 LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \
571 ff_hevc_put_hevc_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \
572 ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(_dst, _dststride, temp, _src2, \
573 height, denom, _wx0, _wx1, _ox0, _ox1); \
574 }
575
576 #define mc_bi_w_funcs(name, bitd, opt) \
577 mc_bi_w_func(name, bitd, 4, opt) \
578 mc_bi_w_func(name, bitd, 8, opt) \
579 mc_bi_w_func(name, bitd, 12, opt) \
580 mc_bi_w_func(name, bitd, 16, opt) \
581 mc_bi_w_func(name, bitd, 24, opt) \
582 mc_bi_w_func(name, bitd, 32, opt) \
583 mc_bi_w_func(name, bitd, 48, opt) \
584 mc_bi_w_func(name, bitd, 64, opt)
585
586 19038 mc_bi_w_funcs(pel_pixels, 8, sse4)
587 18 mc_bi_w_func(pel_pixels, 8, 6, sse4)
588 864 mc_bi_w_funcs(epel_h, 8, sse4)
589 18 mc_bi_w_func(epel_h, 8, 6, sse4)
590 4020 mc_bi_w_funcs(epel_v, 8, sse4)
591 18 mc_bi_w_func(epel_v, 8, 6, sse4)
592 1428 mc_bi_w_funcs(epel_hv, 8, sse4)
593 18 mc_bi_w_func(epel_hv, 8, 6, sse4)
594 288 mc_bi_w_funcs(qpel_h, 8, sse4)
595 288 mc_bi_w_funcs(qpel_v, 8, sse4)
596 288 mc_bi_w_funcs(qpel_hv, 8, sse4)
597
598 288 mc_bi_w_funcs(pel_pixels, 10, sse4)
599 18 mc_bi_w_func(pel_pixels, 10, 6, sse4)
600 288 mc_bi_w_funcs(epel_h, 10, sse4)
601 18 mc_bi_w_func(epel_h, 10, 6, sse4)
602 288 mc_bi_w_funcs(epel_v, 10, sse4)
603 18 mc_bi_w_func(epel_v, 10, 6, sse4)
604 288 mc_bi_w_funcs(epel_hv, 10, sse4)
605 18 mc_bi_w_func(epel_hv, 10, 6, sse4)
606 288 mc_bi_w_funcs(qpel_h, 10, sse4)
607 288 mc_bi_w_funcs(qpel_v, 10, sse4)
608 288 mc_bi_w_funcs(qpel_hv, 10, sse4)
609
610 288 mc_bi_w_funcs(pel_pixels, 12, sse4)
611 18 mc_bi_w_func(pel_pixels, 12, 6, sse4)
612 288 mc_bi_w_funcs(epel_h, 12, sse4)
613 18 mc_bi_w_func(epel_h, 12, 6, sse4)
614 288 mc_bi_w_funcs(epel_v, 12, sse4)
615 18 mc_bi_w_func(epel_v, 12, 6, sse4)
616 288 mc_bi_w_funcs(epel_hv, 12, sse4)
617 18 mc_bi_w_func(epel_hv, 12, 6, sse4)
618 288 mc_bi_w_funcs(qpel_h, 12, sse4)
619 288 mc_bi_w_funcs(qpel_v, 12, sse4)
620 288 mc_bi_w_funcs(qpel_hv, 12, sse4)
621 #endif //ARCH_X86_64 && HAVE_SSE4_EXTERNAL
622
623 #define SAO_BAND_FILTER_FUNCS(bitd, opt) \
624 void ff_hevc_sao_band_filter_8_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
625 const int16_t *sao_offset_val, int sao_left_class, int width, int height); \
626 void ff_hevc_sao_band_filter_16_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
627 const int16_t *sao_offset_val, int sao_left_class, int width, int height); \
628 void ff_hevc_sao_band_filter_32_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
629 const int16_t *sao_offset_val, int sao_left_class, int width, int height); \
630 void ff_hevc_sao_band_filter_48_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
631 const int16_t *sao_offset_val, int sao_left_class, int width, int height); \
632 void ff_hevc_sao_band_filter_64_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
633 const int16_t *sao_offset_val, int sao_left_class, int width, int height);
634
635 SAO_BAND_FILTER_FUNCS(8, sse2)
636 SAO_BAND_FILTER_FUNCS(10, sse2)
637 SAO_BAND_FILTER_FUNCS(12, sse2)
638 SAO_BAND_FILTER_FUNCS(8, avx)
639 SAO_BAND_FILTER_FUNCS(10, avx)
640 SAO_BAND_FILTER_FUNCS(12, avx)
641 SAO_BAND_FILTER_FUNCS(8, avx2)
642 SAO_BAND_FILTER_FUNCS(10, avx2)
643 SAO_BAND_FILTER_FUNCS(12, avx2)
644
645 #define SAO_BAND_INIT(bitd, opt) do { \
646 c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_##bitd##_##opt; \
647 c->sao_band_filter[1] = ff_hevc_sao_band_filter_16_##bitd##_##opt; \
648 c->sao_band_filter[2] = ff_hevc_sao_band_filter_32_##bitd##_##opt; \
649 c->sao_band_filter[3] = ff_hevc_sao_band_filter_48_##bitd##_##opt; \
650 c->sao_band_filter[4] = ff_hevc_sao_band_filter_64_##bitd##_##opt; \
651 } while (0)
652
653 #define SAO_EDGE_FILTER_FUNCS(bitd, opt) \
654 void ff_hevc_sao_edge_filter_8_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
655 const int16_t *sao_offset_val, int eo, int width, int height); \
656 void ff_hevc_sao_edge_filter_16_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
657 const int16_t *sao_offset_val, int eo, int width, int height); \
658 void ff_hevc_sao_edge_filter_32_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
659 const int16_t *sao_offset_val, int eo, int width, int height); \
660 void ff_hevc_sao_edge_filter_48_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
661 const int16_t *sao_offset_val, int eo, int width, int height); \
662 void ff_hevc_sao_edge_filter_64_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
663 const int16_t *sao_offset_val, int eo, int width, int height); \
664
665 SAO_EDGE_FILTER_FUNCS(8, ssse3)
666 SAO_EDGE_FILTER_FUNCS(8, avx2)
667 SAO_EDGE_FILTER_FUNCS(10, sse2)
668 SAO_EDGE_FILTER_FUNCS(10, avx2)
669 SAO_EDGE_FILTER_FUNCS(12, sse2)
670 SAO_EDGE_FILTER_FUNCS(12, avx2)
671
672 #define SAO_EDGE_INIT(bitd, opt) do { \
673 c->sao_edge_filter[0] = ff_hevc_sao_edge_filter_8_##bitd##_##opt; \
674 c->sao_edge_filter[1] = ff_hevc_sao_edge_filter_16_##bitd##_##opt; \
675 c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_##bitd##_##opt; \
676 c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_##bitd##_##opt; \
677 c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_##bitd##_##opt; \
678 } while (0)
679
680 #define EPEL_LINKS(pointer, my, mx, fname, bitd, opt ) \
681 PEL_LINK(pointer, 1, my , mx , fname##4 , bitd, opt ); \
682 PEL_LINK(pointer, 2, my , mx , fname##6 , bitd, opt ); \
683 PEL_LINK(pointer, 3, my , mx , fname##8 , bitd, opt ); \
684 PEL_LINK(pointer, 4, my , mx , fname##12, bitd, opt ); \
685 PEL_LINK(pointer, 5, my , mx , fname##16, bitd, opt ); \
686 PEL_LINK(pointer, 6, my , mx , fname##24, bitd, opt ); \
687 PEL_LINK(pointer, 7, my , mx , fname##32, bitd, opt ); \
688 PEL_LINK(pointer, 8, my , mx , fname##48, bitd, opt ); \
689 PEL_LINK(pointer, 9, my , mx , fname##64, bitd, opt )
690 #define QPEL_LINKS(pointer, my, mx, fname, bitd, opt) \
691 PEL_LINK(pointer, 1, my , mx , fname##4 , bitd, opt ); \
692 PEL_LINK(pointer, 3, my , mx , fname##8 , bitd, opt ); \
693 PEL_LINK(pointer, 4, my , mx , fname##12, bitd, opt ); \
694 PEL_LINK(pointer, 5, my , mx , fname##16, bitd, opt ); \
695 PEL_LINK(pointer, 6, my , mx , fname##24, bitd, opt ); \
696 PEL_LINK(pointer, 7, my , mx , fname##32, bitd, opt ); \
697 PEL_LINK(pointer, 8, my , mx , fname##48, bitd, opt ); \
698 PEL_LINK(pointer, 9, my , mx , fname##64, bitd, opt )
699
700 1250 void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
701 {
702 1250 int cpu_flags = av_get_cpu_flags();
703
704
2/2
✓ Branch 0 taken 525 times.
✓ Branch 1 taken 725 times.
1250 if (bit_depth == 8) {
705
2/2
✓ Branch 0 taken 171 times.
✓ Branch 1 taken 354 times.
525 if (EXTERNAL_MMXEXT(cpu_flags)) {
706 171 c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_mmxext;
707
708 171 c->add_residual[0] = ff_hevc_add_residual_4_8_mmxext;
709 }
710
2/2
✓ Branch 0 taken 141 times.
✓ Branch 1 taken 384 times.
525 if (EXTERNAL_SSE2(cpu_flags)) {
711 141 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
712 141 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;
713 if (ARCH_X86_64) {
714 141 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2;
715 141 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2;
716
717 141 c->idct[2] = ff_hevc_idct_16x16_8_sse2;
718 141 c->idct[3] = ff_hevc_idct_32x32_8_sse2;
719 }
720 141 SAO_BAND_INIT(8, sse2);
721
722 141 c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_sse2;
723 141 c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_sse2;
724 141 c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_sse2;
725
726 141 c->idct[0] = ff_hevc_idct_4x4_8_sse2;
727 141 c->idct[1] = ff_hevc_idct_8x8_8_sse2;
728
729 141 c->add_residual[1] = ff_hevc_add_residual_8_8_sse2;
730 141 c->add_residual[2] = ff_hevc_add_residual_16_8_sse2;
731 141 c->add_residual[3] = ff_hevc_add_residual_32_8_sse2;
732 }
733
2/2
✓ Branch 0 taken 111 times.
✓ Branch 1 taken 414 times.
525 if (EXTERNAL_SSSE3(cpu_flags)) {
734 if(ARCH_X86_64) {
735 111 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
736 111 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3;
737 }
738 111 SAO_EDGE_INIT(8, ssse3);
739 }
740
2/2
✓ Branch 0 taken 96 times.
✓ Branch 1 taken 429 times.
525 if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
741
742 96 EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 8, sse4);
743 96 EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 8, sse4);
744 96 EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 8, sse4);
745 96 EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 8, sse4);
746
747 96 QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 8, sse4);
748 96 QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 8, sse4);
749 96 QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 8, sse4);
750 96 QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 8, sse4);
751 }
752
2/2
✓ Branch 0 taken 51 times.
✓ Branch 1 taken 474 times.
525 if (EXTERNAL_AVX(cpu_flags)) {
753 51 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_avx;
754 51 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_avx;
755 if (ARCH_X86_64) {
756 51 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx;
757 51 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx;
758
759 51 c->idct[2] = ff_hevc_idct_16x16_8_avx;
760 51 c->idct[3] = ff_hevc_idct_32x32_8_avx;
761 }
762 51 SAO_BAND_INIT(8, avx);
763
764 51 c->idct[0] = ff_hevc_idct_4x4_8_avx;
765 51 c->idct[1] = ff_hevc_idct_8x8_8_avx;
766
767 51 c->add_residual[1] = ff_hevc_add_residual_8_8_avx;
768 51 c->add_residual[2] = ff_hevc_add_residual_16_8_avx;
769 51 c->add_residual[3] = ff_hevc_add_residual_32_8_avx;
770 }
771
2/2
✓ Branch 0 taken 21 times.
✓ Branch 1 taken 504 times.
525 if (EXTERNAL_AVX2(cpu_flags)) {
772 21 c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_8_avx2;
773 21 c->sao_band_filter[1] = ff_hevc_sao_band_filter_16_8_avx2;
774 }
775
3/4
✓ Branch 0 taken 21 times.
✓ Branch 1 taken 504 times.
✓ Branch 2 taken 21 times.
✗ Branch 3 not taken.
525 if (EXTERNAL_AVX2_FAST(cpu_flags)) {
776 21 c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_avx2;
777 21 c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_avx2;
778 if (ARCH_X86_64) {
779 21 c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_avx2;
780 21 c->put_hevc_epel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_avx2;
781 21 c->put_hevc_epel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_avx2;
782
783 21 c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_avx2;
784 21 c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_avx2;
785 21 c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_avx2;
786
787 21 c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2;
788 21 c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2;
789 21 c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2;
790
791 21 c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2;
792 21 c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2;
793 21 c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2;
794
795 21 c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_8_avx2;
796 21 c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_8_avx2;
797 21 c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_8_avx2;
798
799 21 c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_8_avx2;
800 21 c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_8_avx2;
801 21 c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_8_avx2;
802
803 21 c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_8_avx2;
804 21 c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_8_avx2;
805 21 c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_8_avx2;
806
807 21 c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_8_avx2;
808 21 c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_8_avx2;
809 21 c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_8_avx2;
810
811 21 c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_8_avx2;
812 21 c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_8_avx2;
813 21 c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_8_avx2;
814
815 21 c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_8_avx2;
816 21 c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_8_avx2;
817 21 c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_8_avx2;
818
819 21 c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_8_avx2;
820 21 c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_8_avx2;
821 21 c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_8_avx2;
822
823 21 c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_8_avx2;
824 21 c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_8_avx2;
825 21 c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_8_avx2;
826
827 21 c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_8_avx2;
828 21 c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_8_avx2;
829 21 c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_8_avx2;
830
831 21 c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_8_avx2;
832 21 c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_8_avx2;
833 21 c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_8_avx2;
834
835 21 c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_8_avx2;
836 21 c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_8_avx2;
837 21 c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_8_avx2;
838
839 21 c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_8_avx2;
840 21 c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_8_avx2;
841 21 c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_8_avx2;
842
843 21 c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_8_avx2;
844 21 c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_8_avx2;
845 21 c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_8_avx2;
846
847 21 c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_8_avx2;
848 21 c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_8_avx2;
849 21 c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_8_avx2;
850
851 21 c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_8_avx2;
852 21 c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_8_avx2;
853 21 c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_8_avx2;
854
855 21 c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_8_avx2;
856 21 c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_8_avx2;
857 21 c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_8_avx2;
858
859 21 c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_8_avx2;
860 21 c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_8_avx2;
861 21 c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_8_avx2;
862 }
863 21 SAO_BAND_INIT(8, avx2);
864
865 21 c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_8_avx2;
866 21 c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_8_avx2;
867 21 c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_8_avx2;
868
869 21 c->add_residual[3] = ff_hevc_add_residual_32_8_avx2;
870 }
871
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 525 times.
525 if (EXTERNAL_AVX512ICL(cpu_flags) && ARCH_X86_64) {
872 c->put_hevc_qpel[1][0][1] = ff_hevc_put_hevc_qpel_h4_8_avx512icl;
873 c->put_hevc_qpel[3][0][1] = ff_hevc_put_hevc_qpel_h8_8_avx512icl;
874 c->put_hevc_qpel[5][0][1] = ff_hevc_put_hevc_qpel_h16_8_avx512icl;
875 c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_8_avx512icl;
876 c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_8_avx512icl;
877 c->put_hevc_qpel[3][1][1] = ff_hevc_put_hevc_qpel_hv8_8_avx512icl;
878 }
879
2/2
✓ Branch 0 taken 234 times.
✓ Branch 1 taken 491 times.
725 } else if (bit_depth == 10) {
880
2/2
✓ Branch 0 taken 173 times.
✓ Branch 1 taken 61 times.
234 if (EXTERNAL_MMXEXT(cpu_flags)) {
881 173 c->add_residual[0] = ff_hevc_add_residual_4_10_mmxext;
882 173 c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_mmxext;
883 }
884
2/2
✓ Branch 0 taken 143 times.
✓ Branch 1 taken 91 times.
234 if (EXTERNAL_SSE2(cpu_flags)) {
885 143 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2;
886 143 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2;
887 if (ARCH_X86_64) {
888 143 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_sse2;
889 143 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2;
890
891 143 c->idct[2] = ff_hevc_idct_16x16_10_sse2;
892 143 c->idct[3] = ff_hevc_idct_32x32_10_sse2;
893 }
894 143 SAO_BAND_INIT(10, sse2);
895 143 SAO_EDGE_INIT(10, sse2);
896
897 143 c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_sse2;
898 143 c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_sse2;
899 143 c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_sse2;
900
901 143 c->idct[0] = ff_hevc_idct_4x4_10_sse2;
902 143 c->idct[1] = ff_hevc_idct_8x8_10_sse2;
903
904 143 c->add_residual[1] = ff_hevc_add_residual_8_10_sse2;
905 143 c->add_residual[2] = ff_hevc_add_residual_16_10_sse2;
906 143 c->add_residual[3] = ff_hevc_add_residual_32_10_sse2;
907 }
908
2/2
✓ Branch 0 taken 113 times.
✓ Branch 1 taken 121 times.
234 if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
909 113 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3;
910 113 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3;
911 }
912
2/2
✓ Branch 0 taken 98 times.
✓ Branch 1 taken 136 times.
234 if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
913 98 EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4);
914 98 EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 10, sse4);
915 98 EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 10, sse4);
916 98 EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 10, sse4);
917
918 98 QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 10, sse4);
919 98 QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 10, sse4);
920 98 QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 10, sse4);
921 98 QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 10, sse4);
922 }
923
2/2
✓ Branch 0 taken 53 times.
✓ Branch 1 taken 181 times.
234 if (EXTERNAL_AVX(cpu_flags)) {
924 53 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_avx;
925 53 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_avx;
926 if (ARCH_X86_64) {
927 53 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_avx;
928 53 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_avx;
929
930 53 c->idct[2] = ff_hevc_idct_16x16_10_avx;
931 53 c->idct[3] = ff_hevc_idct_32x32_10_avx;
932 }
933
934 53 c->idct[0] = ff_hevc_idct_4x4_10_avx;
935 53 c->idct[1] = ff_hevc_idct_8x8_10_avx;
936
937 53 SAO_BAND_INIT(10, avx);
938 }
939
2/2
✓ Branch 0 taken 23 times.
✓ Branch 1 taken 211 times.
234 if (EXTERNAL_AVX2(cpu_flags)) {
940 23 c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_10_avx2;
941 }
942
3/4
✓ Branch 0 taken 23 times.
✓ Branch 1 taken 211 times.
✓ Branch 2 taken 23 times.
✗ Branch 3 not taken.
234 if (EXTERNAL_AVX2_FAST(cpu_flags)) {
943 23 c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_avx2;
944 23 c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_avx2;
945 if (ARCH_X86_64) {
946 23 c->put_hevc_epel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_10_avx2;
947 23 c->put_hevc_epel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_10_avx2;
948 23 c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_10_avx2;
949 23 c->put_hevc_epel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_10_avx2;
950 23 c->put_hevc_epel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_10_avx2;
951
952 23 c->put_hevc_qpel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_10_avx2;
953 23 c->put_hevc_qpel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_10_avx2;
954 23 c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_10_avx2;
955 23 c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_10_avx2;
956 23 c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_10_avx2;
957
958 23 c->put_hevc_epel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2;
959 23 c->put_hevc_epel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2;
960 23 c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2;
961 23 c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels96_8_avx2;
962 23 c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels128_8_avx2;
963
964 23 c->put_hevc_qpel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2;
965 23 c->put_hevc_qpel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2;
966 23 c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2;
967 23 c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels96_8_avx2;
968 23 c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels128_8_avx2;
969
970 23 c->put_hevc_epel_bi[5][0][0] = ff_hevc_put_hevc_bi_pel_pixels16_10_avx2;
971 23 c->put_hevc_epel_bi[6][0][0] = ff_hevc_put_hevc_bi_pel_pixels24_10_avx2;
972 23 c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_10_avx2;
973 23 c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_10_avx2;
974 23 c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_10_avx2;
975 23 c->put_hevc_qpel_bi[5][0][0] = ff_hevc_put_hevc_bi_pel_pixels16_10_avx2;
976 23 c->put_hevc_qpel_bi[6][0][0] = ff_hevc_put_hevc_bi_pel_pixels24_10_avx2;
977 23 c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_10_avx2;
978 23 c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_10_avx2;
979 23 c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_10_avx2;
980
981 23 c->put_hevc_epel[5][0][1] = ff_hevc_put_hevc_epel_h16_10_avx2;
982 23 c->put_hevc_epel[6][0][1] = ff_hevc_put_hevc_epel_h24_10_avx2;
983 23 c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_10_avx2;
984 23 c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_10_avx2;
985 23 c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_10_avx2;
986
987 23 c->put_hevc_epel_uni[5][0][1] = ff_hevc_put_hevc_uni_epel_h16_10_avx2;
988 23 c->put_hevc_epel_uni[6][0][1] = ff_hevc_put_hevc_uni_epel_h24_10_avx2;
989 23 c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_10_avx2;
990 23 c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_10_avx2;
991 23 c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_10_avx2;
992
993 23 c->put_hevc_epel_bi[5][0][1] = ff_hevc_put_hevc_bi_epel_h16_10_avx2;
994 23 c->put_hevc_epel_bi[6][0][1] = ff_hevc_put_hevc_bi_epel_h24_10_avx2;
995 23 c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_10_avx2;
996 23 c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_10_avx2;
997 23 c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_10_avx2;
998
999 23 c->put_hevc_epel[5][1][0] = ff_hevc_put_hevc_epel_v16_10_avx2;
1000 23 c->put_hevc_epel[6][1][0] = ff_hevc_put_hevc_epel_v24_10_avx2;
1001 23 c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_10_avx2;
1002 23 c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_10_avx2;
1003 23 c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_10_avx2;
1004
1005 23 c->put_hevc_epel_uni[5][1][0] = ff_hevc_put_hevc_uni_epel_v16_10_avx2;
1006 23 c->put_hevc_epel_uni[6][1][0] = ff_hevc_put_hevc_uni_epel_v24_10_avx2;
1007 23 c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_10_avx2;
1008 23 c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_10_avx2;
1009 23 c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_10_avx2;
1010
1011 23 c->put_hevc_epel_bi[5][1][0] = ff_hevc_put_hevc_bi_epel_v16_10_avx2;
1012 23 c->put_hevc_epel_bi[6][1][0] = ff_hevc_put_hevc_bi_epel_v24_10_avx2;
1013 23 c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_10_avx2;
1014 23 c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_10_avx2;
1015 23 c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_10_avx2;
1016
1017 23 c->put_hevc_epel[5][1][1] = ff_hevc_put_hevc_epel_hv16_10_avx2;
1018 23 c->put_hevc_epel[6][1][1] = ff_hevc_put_hevc_epel_hv24_10_avx2;
1019 23 c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_10_avx2;
1020 23 c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_10_avx2;
1021 23 c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_10_avx2;
1022
1023 23 c->put_hevc_epel_uni[5][1][1] = ff_hevc_put_hevc_uni_epel_hv16_10_avx2;
1024 23 c->put_hevc_epel_uni[6][1][1] = ff_hevc_put_hevc_uni_epel_hv24_10_avx2;
1025 23 c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_10_avx2;
1026 23 c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_10_avx2;
1027 23 c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_10_avx2;
1028
1029 23 c->put_hevc_epel_bi[5][1][1] = ff_hevc_put_hevc_bi_epel_hv16_10_avx2;
1030 23 c->put_hevc_epel_bi[6][1][1] = ff_hevc_put_hevc_bi_epel_hv24_10_avx2;
1031 23 c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_10_avx2;
1032 23 c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_10_avx2;
1033 23 c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_10_avx2;
1034
1035 23 c->put_hevc_qpel[5][0][1] = ff_hevc_put_hevc_qpel_h16_10_avx2;
1036 23 c->put_hevc_qpel[6][0][1] = ff_hevc_put_hevc_qpel_h24_10_avx2;
1037 23 c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_10_avx2;
1038 23 c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_10_avx2;
1039 23 c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_10_avx2;
1040
1041 23 c->put_hevc_qpel_uni[5][0][1] = ff_hevc_put_hevc_uni_qpel_h16_10_avx2;
1042 23 c->put_hevc_qpel_uni[6][0][1] = ff_hevc_put_hevc_uni_qpel_h24_10_avx2;
1043 23 c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_10_avx2;
1044 23 c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_10_avx2;
1045 23 c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_10_avx2;
1046
1047 23 c->put_hevc_qpel_bi[5][0][1] = ff_hevc_put_hevc_bi_qpel_h16_10_avx2;
1048 23 c->put_hevc_qpel_bi[6][0][1] = ff_hevc_put_hevc_bi_qpel_h24_10_avx2;
1049 23 c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_10_avx2;
1050 23 c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_10_avx2;
1051 23 c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_10_avx2;
1052
1053 23 c->put_hevc_qpel[5][1][0] = ff_hevc_put_hevc_qpel_v16_10_avx2;
1054 23 c->put_hevc_qpel[6][1][0] = ff_hevc_put_hevc_qpel_v24_10_avx2;
1055 23 c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_10_avx2;
1056 23 c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_10_avx2;
1057 23 c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_10_avx2;
1058
1059 23 c->put_hevc_qpel_uni[5][1][0] = ff_hevc_put_hevc_uni_qpel_v16_10_avx2;
1060 23 c->put_hevc_qpel_uni[6][1][0] = ff_hevc_put_hevc_uni_qpel_v24_10_avx2;
1061 23 c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_10_avx2;
1062 23 c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_10_avx2;
1063 23 c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_10_avx2;
1064
1065 23 c->put_hevc_qpel_bi[5][1][0] = ff_hevc_put_hevc_bi_qpel_v16_10_avx2;
1066 23 c->put_hevc_qpel_bi[6][1][0] = ff_hevc_put_hevc_bi_qpel_v24_10_avx2;
1067 23 c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_10_avx2;
1068 23 c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_10_avx2;
1069 23 c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_10_avx2;
1070
1071 23 c->put_hevc_qpel[5][1][1] = ff_hevc_put_hevc_qpel_hv16_10_avx2;
1072 23 c->put_hevc_qpel[6][1][1] = ff_hevc_put_hevc_qpel_hv24_10_avx2;
1073 23 c->put_hevc_qpel[7][1][1] = ff_hevc_put_hevc_qpel_hv32_10_avx2;
1074 23 c->put_hevc_qpel[8][1][1] = ff_hevc_put_hevc_qpel_hv48_10_avx2;
1075 23 c->put_hevc_qpel[9][1][1] = ff_hevc_put_hevc_qpel_hv64_10_avx2;
1076
1077 23 c->put_hevc_qpel_uni[5][1][1] = ff_hevc_put_hevc_uni_qpel_hv16_10_avx2;
1078 23 c->put_hevc_qpel_uni[6][1][1] = ff_hevc_put_hevc_uni_qpel_hv24_10_avx2;
1079 23 c->put_hevc_qpel_uni[7][1][1] = ff_hevc_put_hevc_uni_qpel_hv32_10_avx2;
1080 23 c->put_hevc_qpel_uni[8][1][1] = ff_hevc_put_hevc_uni_qpel_hv48_10_avx2;
1081 23 c->put_hevc_qpel_uni[9][1][1] = ff_hevc_put_hevc_uni_qpel_hv64_10_avx2;
1082
1083 23 c->put_hevc_qpel_bi[5][1][1] = ff_hevc_put_hevc_bi_qpel_hv16_10_avx2;
1084 23 c->put_hevc_qpel_bi[6][1][1] = ff_hevc_put_hevc_bi_qpel_hv24_10_avx2;
1085 23 c->put_hevc_qpel_bi[7][1][1] = ff_hevc_put_hevc_bi_qpel_hv32_10_avx2;
1086 23 c->put_hevc_qpel_bi[8][1][1] = ff_hevc_put_hevc_bi_qpel_hv48_10_avx2;
1087 23 c->put_hevc_qpel_bi[9][1][1] = ff_hevc_put_hevc_bi_qpel_hv64_10_avx2;
1088 }
1089 23 SAO_BAND_INIT(10, avx2);
1090 23 SAO_EDGE_INIT(10, avx2);
1091
1092 23 c->add_residual[2] = ff_hevc_add_residual_16_10_avx2;
1093 23 c->add_residual[3] = ff_hevc_add_residual_32_10_avx2;
1094 }
1095
2/2
✓ Branch 0 taken 205 times.
✓ Branch 1 taken 286 times.
491 } else if (bit_depth == 12) {
1096
2/2
✓ Branch 0 taken 165 times.
✓ Branch 1 taken 40 times.
205 if (EXTERNAL_MMXEXT(cpu_flags)) {
1097 165 c->idct_dc[0] = ff_hevc_idct_4x4_dc_12_mmxext;
1098 }
1099
2/2
✓ Branch 0 taken 135 times.
✓ Branch 1 taken 70 times.
205 if (EXTERNAL_SSE2(cpu_flags)) {
1100 135 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_sse2;
1101 135 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_sse2;
1102 if (ARCH_X86_64) {
1103 135 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_sse2;
1104 135 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2;
1105 }
1106 135 SAO_BAND_INIT(12, sse2);
1107 135 SAO_EDGE_INIT(12, sse2);
1108
1109 135 c->idct_dc[1] = ff_hevc_idct_8x8_dc_12_sse2;
1110 135 c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_sse2;
1111 135 c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_sse2;
1112 }
1113
2/2
✓ Branch 0 taken 105 times.
✓ Branch 1 taken 100 times.
205 if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
1114 105 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_ssse3;
1115 105 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_ssse3;
1116 }
1117
2/2
✓ Branch 0 taken 90 times.
✓ Branch 1 taken 115 times.
205 if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
1118 90 EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 12, sse4);
1119 90 EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 12, sse4);
1120 90 EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 12, sse4);
1121 90 EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 12, sse4);
1122
1123 90 QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 12, sse4);
1124 90 QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 12, sse4);
1125 90 QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 12, sse4);
1126 90 QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 12, sse4);
1127 }
1128
2/2
✓ Branch 0 taken 45 times.
✓ Branch 1 taken 160 times.
205 if (EXTERNAL_AVX(cpu_flags)) {
1129 45 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_avx;
1130 45 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_avx;
1131 if (ARCH_X86_64) {
1132 45 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_avx;
1133 45 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_avx;
1134 }
1135 45 SAO_BAND_INIT(12, avx);
1136 }
1137
2/2
✓ Branch 0 taken 15 times.
✓ Branch 1 taken 190 times.
205 if (EXTERNAL_AVX2(cpu_flags)) {
1138 15 c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_12_avx2;
1139 }
1140
3/4
✓ Branch 0 taken 15 times.
✓ Branch 1 taken 190 times.
✓ Branch 2 taken 15 times.
✗ Branch 3 not taken.
205 if (EXTERNAL_AVX2_FAST(cpu_flags)) {
1141 15 c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_avx2;
1142 15 c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_avx2;
1143
1144 15 SAO_BAND_INIT(12, avx2);
1145 15 SAO_EDGE_INIT(12, avx2);
1146 }
1147 }
1148 1250 }
1149