Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * Copyright (c) 2013 Seppo Tomperi | ||
3 | * Copyright (c) 2013-2014 Pierre-Edouard Lepere | ||
4 | * Copyright (c) 2023-2024 Wu Jianhua | ||
5 | * | ||
6 | * This file is part of FFmpeg. | ||
7 | * | ||
8 | * FFmpeg is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU Lesser General Public | ||
10 | * License as published by the Free Software Foundation; either | ||
11 | * version 2.1 of the License, or (at your option) any later version. | ||
12 | * | ||
13 | * FFmpeg is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
16 | * Lesser General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU Lesser General Public | ||
19 | * License along with FFmpeg; if not, write to the Free Software | ||
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
21 | */ | ||
22 | |||
23 | #include "config.h" | ||
24 | |||
25 | #include "libavutil/cpu.h" | ||
26 | #include "libavutil/mem_internal.h" | ||
27 | #include "libavutil/x86/asm.h" | ||
28 | #include "libavutil/x86/cpu.h" | ||
29 | #include "libavcodec/hevc/dsp.h" | ||
30 | #include "libavcodec/x86/hevc/dsp.h" | ||
31 | #include "libavcodec/x86/h26x/h2656dsp.h" | ||
32 | |||
33 | #define LFC_FUNC(DIR, DEPTH, OPT) \ | ||
34 | void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, const int *tc, const uint8_t *no_p, const uint8_t *no_q); | ||
35 | |||
36 | #define LFL_FUNC(DIR, DEPTH, OPT) \ | ||
37 | void ff_hevc_ ## DIR ## _loop_filter_luma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, int beta, const int *tc, const uint8_t *no_p, const uint8_t *no_q); | ||
38 | |||
39 | #define LFC_FUNCS(type, depth, opt) \ | ||
40 | LFC_FUNC(h, depth, opt) \ | ||
41 | LFC_FUNC(v, depth, opt) | ||
42 | |||
43 | #define LFL_FUNCS(type, depth, opt) \ | ||
44 | LFL_FUNC(h, depth, opt) \ | ||
45 | LFL_FUNC(v, depth, opt) | ||
46 | |||
47 | LFC_FUNCS(uint8_t, 8, sse2) | ||
48 | LFC_FUNCS(uint8_t, 10, sse2) | ||
49 | LFC_FUNCS(uint8_t, 12, sse2) | ||
50 | LFC_FUNCS(uint8_t, 8, avx) | ||
51 | LFC_FUNCS(uint8_t, 10, avx) | ||
52 | LFC_FUNCS(uint8_t, 12, avx) | ||
53 | LFL_FUNCS(uint8_t, 8, sse2) | ||
54 | LFL_FUNCS(uint8_t, 10, sse2) | ||
55 | LFL_FUNCS(uint8_t, 12, sse2) | ||
56 | LFL_FUNCS(uint8_t, 8, ssse3) | ||
57 | LFL_FUNCS(uint8_t, 10, ssse3) | ||
58 | LFL_FUNCS(uint8_t, 12, ssse3) | ||
59 | LFL_FUNCS(uint8_t, 8, avx) | ||
60 | LFL_FUNCS(uint8_t, 10, avx) | ||
61 | LFL_FUNCS(uint8_t, 12, avx) | ||
62 | |||
63 | #define IDCT_DC_FUNCS(W, opt) \ | ||
64 | void ff_hevc_idct_ ## W ## _dc_8_ ## opt(int16_t *coeffs); \ | ||
65 | void ff_hevc_idct_ ## W ## _dc_10_ ## opt(int16_t *coeffs); \ | ||
66 | void ff_hevc_idct_ ## W ## _dc_12_ ## opt(int16_t *coeffs) | ||
67 | |||
68 | IDCT_DC_FUNCS(4x4, mmxext); | ||
69 | IDCT_DC_FUNCS(8x8, sse2); | ||
70 | IDCT_DC_FUNCS(16x16, sse2); | ||
71 | IDCT_DC_FUNCS(32x32, sse2); | ||
72 | IDCT_DC_FUNCS(16x16, avx2); | ||
73 | IDCT_DC_FUNCS(32x32, avx2); | ||
74 | |||
75 | #define IDCT_FUNCS(opt) \ | ||
76 | void ff_hevc_idct_4x4_8_ ## opt(int16_t *coeffs, int col_limit); \ | ||
77 | void ff_hevc_idct_4x4_10_ ## opt(int16_t *coeffs, int col_limit); \ | ||
78 | void ff_hevc_idct_8x8_8_ ## opt(int16_t *coeffs, int col_limit); \ | ||
79 | void ff_hevc_idct_8x8_10_ ## opt(int16_t *coeffs, int col_limit); \ | ||
80 | void ff_hevc_idct_16x16_8_ ## opt(int16_t *coeffs, int col_limit); \ | ||
81 | void ff_hevc_idct_16x16_10_ ## opt(int16_t *coeffs, int col_limit); \ | ||
82 | void ff_hevc_idct_32x32_8_ ## opt(int16_t *coeffs, int col_limit); \ | ||
83 | void ff_hevc_idct_32x32_10_ ## opt(int16_t *coeffs, int col_limit); | ||
84 | |||
85 | IDCT_FUNCS(sse2) | ||
86 | IDCT_FUNCS(avx) | ||
87 | |||
88 | |||
89 | #define ff_hevc_pel_filters ff_hevc_qpel_filters | ||
90 | #define DECL_HV_FILTER(f) \ | ||
91 | const uint8_t *hf = ff_hevc_ ## f ## _filters[mx]; \ | ||
92 | const uint8_t *vf = ff_hevc_ ## f ## _filters[my]; | ||
93 | |||
94 | #define FW_PUT(p, a, b, depth, opt) \ | ||
95 | static void hevc_put_ ## a ## _ ## depth ## _##opt(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, \ | ||
96 | int height, intptr_t mx, intptr_t my,int width) \ | ||
97 | { \ | ||
98 | DECL_HV_FILTER(p) \ | ||
99 | ff_h2656_put_ ## b ## _ ## depth ## _##opt(dst, 2 * MAX_PB_SIZE, src, srcstride, height, hf, vf, width); \ | ||
100 | } | ||
101 | |||
102 | #define FW_PUT_UNI(p, a, b, depth, opt) \ | ||
103 | static void hevc_put_uni_ ## a ## _ ## depth ## _##opt(uint8_t *dst, ptrdiff_t dststride, \ | ||
104 | const uint8_t *src, ptrdiff_t srcstride, \ | ||
105 | int height, intptr_t mx, intptr_t my, int width) \ | ||
106 | { \ | ||
107 | DECL_HV_FILTER(p) \ | ||
108 | ff_h2656_put_uni_ ## b ## _ ## depth ## _##opt(dst, dststride, src, srcstride, height, hf, vf, width); \ | ||
109 | } | ||
110 | |||
111 | #if ARCH_X86_64 && HAVE_SSE4_EXTERNAL | ||
112 | |||
113 | #define FW_PUT_FUNCS(p, a, b, depth, opt) \ | ||
114 | FW_PUT(p, a, b, depth, opt) \ | ||
115 | FW_PUT_UNI(p, a, b, depth, opt) | ||
116 | |||
117 | #define FW_PEL(w, depth, opt) FW_PUT_FUNCS(pel, pel_pixels##w, pixels##w, depth, opt) | ||
118 | |||
119 | #define FW_DIR(npel, n, w, depth, opt) \ | ||
120 | FW_PUT_FUNCS(npel, npel ## _h##w, n ## tap_h##w, depth, opt) \ | ||
121 | FW_PUT_FUNCS(npel, npel ## _v##w, n ## tap_v##w, depth, opt) | ||
122 | |||
123 | #define FW_DIR_HV(npel, n, w, depth, opt) \ | ||
124 | FW_PUT_FUNCS(npel, npel ## _hv##w, n ## tap_hv##w, depth, opt) | ||
125 | |||
126 | 17410 | FW_PEL(4, 8, sse4) | |
127 | 76 | FW_PEL(6, 8, sse4) | |
128 | 26322 | FW_PEL(8, 8, sse4) | |
129 | 76 | FW_PEL(12, 8, sse4) | |
130 | 150684 | FW_PEL(16, 8, sse4) | |
131 | 2400 | FW_PEL(4, 10, sse4) | |
132 | 76 | FW_PEL(6, 10, sse4) | |
133 | 33624 | FW_PEL(8, 10, sse4) | |
134 | 304 | FW_PEL(4, 12, sse4) | |
135 | 76 | FW_PEL(6, 12, sse4) | |
136 | 1824 | FW_PEL(8, 12, sse4) | |
137 | |||
138 | #define FW_EPEL(w, depth, opt) FW_DIR(epel, 4, w, depth, opt) | ||
139 | #define FW_EPEL_HV(w, depth, opt) FW_DIR_HV(epel, 4, w, depth, opt) | ||
140 | #define FW_EPEL_FUNCS(w, depth, opt) \ | ||
141 | FW_EPEL(w, depth, opt) \ | ||
142 | FW_EPEL_HV(w, depth, opt) | ||
143 | |||
144 | 152 | FW_EPEL(12, 8, sse4) | |
145 | |||
146 | 10508 | FW_EPEL_FUNCS(4, 8, sse4) | |
147 | 228 | FW_EPEL_FUNCS(6, 8, sse4) | |
148 | 12880 | FW_EPEL_FUNCS(8, 8, sse4) | |
149 | 21140 | FW_EPEL_FUNCS(16, 8, sse4) | |
150 | 6884 | FW_EPEL_FUNCS(4, 10, sse4) | |
151 | 228 | FW_EPEL_FUNCS(6, 10, sse4) | |
152 | 31064 | FW_EPEL_FUNCS(8, 10, sse4) | |
153 | 912 | FW_EPEL_FUNCS(4, 12, sse4) | |
154 | 228 | FW_EPEL_FUNCS(6, 12, sse4) | |
155 | 5472 | FW_EPEL_FUNCS(8, 12, sse4) | |
156 | |||
157 | #define FW_QPEL(w, depth, opt) FW_DIR(qpel, 8, w, depth, opt) | ||
158 | #define FW_QPEL_HV(w, depth, opt) FW_DIR_HV(qpel, 8, w, depth, opt) | ||
159 | #define FW_QPEL_FUNCS(w, depth, opt) \ | ||
160 | FW_QPEL(w, depth, opt) \ | ||
161 | FW_QPEL_HV(w, depth, opt) | ||
162 | |||
163 | 152 | FW_QPEL(12, 8, sse4) | |
164 | 1638 | FW_QPEL(16, 8, sse4) | |
165 | |||
166 | 306 | FW_QPEL_FUNCS(4, 8, sse4) | |
167 | 4518 | FW_QPEL_FUNCS(8, 8, sse4) | |
168 | 912 | FW_QPEL_FUNCS(4, 10, sse4) | |
169 | 14268 | FW_QPEL_FUNCS(8, 10, sse4) | |
170 | 912 | FW_QPEL_FUNCS(4, 12, sse4) | |
171 | 5472 | FW_QPEL_FUNCS(8, 12, sse4) | |
172 | |||
173 | #if HAVE_AVX2_EXTERNAL | ||
174 | |||
175 | 27552 | FW_PEL(32, 8, avx2) | |
176 | 3028 | FW_PUT(pel, pel_pixels16, pixels16, 10, avx2) | |
177 | |||
178 | 1396 | FW_EPEL(32, 8, avx2) | |
179 | 5112 | FW_EPEL(16, 10, avx2) | |
180 | |||
181 | 752 | FW_EPEL_HV(32, 8, avx2) | |
182 | 6732 | FW_EPEL_HV(16, 10, avx2) | |
183 | |||
184 | 32 | FW_QPEL(32, 8, avx2) | |
185 | 8960 | FW_QPEL(16, 10, avx2) | |
186 | |||
187 | 11354 | FW_QPEL_HV(16, 10, avx2) | |
188 | |||
189 | #endif | ||
190 | #endif | ||
191 | |||
192 | #define mc_rep_func(name, bitd, step, W, opt) \ | ||
193 | static void hevc_put_##name##W##_##bitd##_##opt(int16_t *_dst, \ | ||
194 | const uint8_t *_src, ptrdiff_t _srcstride, int height, \ | ||
195 | intptr_t mx, intptr_t my, int width) \ | ||
196 | { \ | ||
197 | int i; \ | ||
198 | int16_t *dst; \ | ||
199 | for (i = 0; i < W; i += step) { \ | ||
200 | const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \ | ||
201 | dst = _dst + i; \ | ||
202 | hevc_put_##name##step##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width); \ | ||
203 | } \ | ||
204 | } | ||
205 | #define mc_rep_uni_func(name, bitd, step, W, opt) \ | ||
206 | static void hevc_put_uni_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, \ | ||
207 | const uint8_t *_src, ptrdiff_t _srcstride, int height, \ | ||
208 | intptr_t mx, intptr_t my, int width) \ | ||
209 | { \ | ||
210 | int i; \ | ||
211 | uint8_t *dst; \ | ||
212 | for (i = 0; i < W; i += step) { \ | ||
213 | const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \ | ||
214 | dst = _dst + (i * ((bitd + 7) / 8)); \ | ||
215 | hevc_put_uni_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, \ | ||
216 | height, mx, my, width); \ | ||
217 | } \ | ||
218 | } | ||
219 | #define mc_rep_bi_func(name, bitd, step, W, opt) \ | ||
220 | static void ff_hevc_put_bi_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const uint8_t *_src, \ | ||
221 | ptrdiff_t _srcstride, const int16_t *_src2, \ | ||
222 | int height, intptr_t mx, intptr_t my, int width) \ | ||
223 | { \ | ||
224 | int i; \ | ||
225 | uint8_t *dst; \ | ||
226 | for (i = 0; i < W ; i += step) { \ | ||
227 | const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \ | ||
228 | const int16_t *src2 = _src2 + i; \ | ||
229 | dst = _dst + (i * ((bitd + 7) / 8)); \ | ||
230 | ff_hevc_put_bi_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, \ | ||
231 | height, mx, my, width); \ | ||
232 | } \ | ||
233 | } | ||
234 | |||
235 | #define mc_rep_funcs(name, bitd, step, W, opt) \ | ||
236 | mc_rep_func(name, bitd, step, W, opt) \ | ||
237 | mc_rep_uni_func(name, bitd, step, W, opt) \ | ||
238 | mc_rep_bi_func(name, bitd, step, W, opt) | ||
239 | |||
240 | #define mc_rep_func2(name, bitd, step1, step2, W, opt) \ | ||
241 | static void hevc_put_##name##W##_##bitd##_##opt(int16_t *dst, \ | ||
242 | const uint8_t *src, ptrdiff_t _srcstride, int height, \ | ||
243 | intptr_t mx, intptr_t my, int width) \ | ||
244 | { \ | ||
245 | hevc_put_##name##step1##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width); \ | ||
246 | hevc_put_##name##step2##_##bitd##_##opt(dst + step1, src + (step1 * ((bitd + 7) / 8)), \ | ||
247 | _srcstride, height, mx, my, width); \ | ||
248 | } | ||
249 | #define mc_rep_uni_func2(name, bitd, step1, step2, W, opt) \ | ||
250 | static void hevc_put_uni_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, \ | ||
251 | const uint8_t *src, ptrdiff_t _srcstride, int height, \ | ||
252 | intptr_t mx, intptr_t my, int width) \ | ||
253 | { \ | ||
254 | hevc_put_uni_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, height, mx, my, width); \ | ||
255 | hevc_put_uni_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride, \ | ||
256 | src + (step1 * ((bitd + 7) / 8)), _srcstride, \ | ||
257 | height, mx, my, width); \ | ||
258 | } | ||
259 | #define mc_rep_bi_func2(name, bitd, step1, step2, W, opt) \ | ||
260 | static void ff_hevc_put_bi_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ | ||
261 | ptrdiff_t _srcstride, const int16_t *src2, \ | ||
262 | int height, intptr_t mx, intptr_t my, int width) \ | ||
263 | { \ | ||
264 | ff_hevc_put_bi_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, height, mx, my, width);\ | ||
265 | ff_hevc_put_bi_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride, \ | ||
266 | src + (step1 * ((bitd + 7) / 8)), _srcstride, \ | ||
267 | src2 + step1, height, mx, my, width); \ | ||
268 | } | ||
269 | |||
270 | #define mc_rep_funcs2(name, bitd, step1, step2, W, opt) \ | ||
271 | mc_rep_func2(name, bitd, step1, step2, W, opt) \ | ||
272 | mc_rep_uni_func2(name, bitd, step1, step2, W, opt) \ | ||
273 | mc_rep_bi_func2(name, bitd, step1, step2, W, opt) | ||
274 | |||
275 | #if ARCH_X86_64 && HAVE_SSE4_EXTERNAL | ||
276 | |||
277 | #define mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \ | ||
278 | static void hevc_put_##name##width1##_10_##opt1(int16_t *dst, const uint8_t *src, ptrdiff_t _srcstride, \ | ||
279 | int height, intptr_t mx, intptr_t my, int width) \ | ||
280 | \ | ||
281 | { \ | ||
282 | hevc_put_##name##width2##_10_##opt1(dst, src, _srcstride, height, mx, my, width); \ | ||
283 | hevc_put_##name##width3##_10_##opt2(dst+ width2, src+ width4, _srcstride, height, mx, my, width); \ | ||
284 | } | ||
285 | |||
286 | #define mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \ | ||
287 | static void ff_hevc_put_bi_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ | ||
288 | ptrdiff_t _srcstride, const int16_t *src2, \ | ||
289 | int height, intptr_t mx, intptr_t my, int width) \ | ||
290 | { \ | ||
291 | ff_hevc_put_bi_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, src2, \ | ||
292 | height, mx, my, width); \ | ||
293 | ff_hevc_put_bi_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, src2+width2, \ | ||
294 | height, mx, my, width); \ | ||
295 | } | ||
296 | |||
297 | #define mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \ | ||
298 | static void hevc_put_uni_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, \ | ||
299 | const uint8_t *src, ptrdiff_t _srcstride, int height, \ | ||
300 | intptr_t mx, intptr_t my, int width) \ | ||
301 | { \ | ||
302 | hevc_put_uni_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, \ | ||
303 | height, mx, my, width); \ | ||
304 | hevc_put_uni_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, \ | ||
305 | height, mx, my, width); \ | ||
306 | } | ||
307 | |||
308 | #define mc_rep_mixs_10(name, width1, width2, width3, opt1, opt2, width4) \ | ||
309 | mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \ | ||
310 | mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \ | ||
311 | mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) | ||
312 | |||
313 | #define mc_rep_mix_8(name, width1, width2, width3, opt1, opt2) \ | ||
314 | static void hevc_put_##name##width1##_8_##opt1(int16_t *dst, const uint8_t *src, ptrdiff_t _srcstride, \ | ||
315 | int height, intptr_t mx, intptr_t my, int width) \ | ||
316 | \ | ||
317 | { \ | ||
318 | hevc_put_##name##width2##_8_##opt1(dst, src, _srcstride, height, mx, my, width); \ | ||
319 | hevc_put_##name##width3##_8_##opt2(dst+ width2, src+ width2, _srcstride, height, mx, my, width); \ | ||
320 | } | ||
321 | |||
322 | #define mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2) \ | ||
323 | static void ff_hevc_put_bi_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ | ||
324 | ptrdiff_t _srcstride, const int16_t *src2, \ | ||
325 | int height, intptr_t mx, intptr_t my, int width) \ | ||
326 | { \ | ||
327 | ff_hevc_put_bi_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \ | ||
328 | src2, height, mx, my, width); \ | ||
329 | ff_hevc_put_bi_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \ | ||
330 | src2+width2, height, mx, my, width); \ | ||
331 | } | ||
332 | |||
333 | #define mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2) \ | ||
334 | static void hevc_put_uni_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, \ | ||
335 | const uint8_t *src, ptrdiff_t _srcstride, int height, \ | ||
336 | intptr_t mx, intptr_t my, int width) \ | ||
337 | { \ | ||
338 | hevc_put_uni_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \ | ||
339 | height, mx, my, width); \ | ||
340 | hevc_put_uni_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \ | ||
341 | height, mx, my, width); \ | ||
342 | } | ||
343 | |||
344 | #define mc_rep_mixs_8(name, width1, width2, width3, opt1, opt2) \ | ||
345 | mc_rep_mix_8(name, width1, width2, width3, opt1, opt2) \ | ||
346 | mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2) \ | ||
347 | mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2) | ||
348 | |||
349 | #if HAVE_AVX2_EXTERNAL | ||
350 | |||
351 | 8 | mc_rep_mixs_8(pel_pixels, 48, 32, 16, avx2, sse4) | |
352 | 6 | mc_rep_mixs_8(epel_hv, 48, 32, 16, avx2, sse4) | |
353 | 6 | mc_rep_mixs_8(epel_h , 48, 32, 16, avx2, sse4) | |
354 | 6 | mc_rep_mixs_8(epel_v , 48, 32, 16, avx2, sse4) | |
355 | |||
356 | 5 | mc_rep_mix_10(pel_pixels, 24, 16, 8, avx2, sse4, 32) | |
357 | 1 | mc_bi_rep_mix_10(pel_pixels,24, 16, 8, avx2, sse4, 32) | |
358 | 6 | mc_rep_mixs_10(epel_hv, 24, 16, 8, avx2, sse4, 32) | |
359 | 6 | mc_rep_mixs_10(epel_h , 24, 16, 8, avx2, sse4, 32) | |
360 | 6 | mc_rep_mixs_10(epel_v , 24, 16, 8, avx2, sse4, 32) | |
361 | |||
362 | |||
363 | 6 | mc_rep_mixs_10(qpel_h , 24, 16, 8, avx2, sse4, 32) | |
364 | 6 | mc_rep_mixs_10(qpel_v , 24, 16, 8, avx2, sse4, 32) | |
365 | 6 | mc_rep_mixs_10(qpel_hv, 24, 16, 8, avx2, sse4, 32) | |
366 | |||
367 | |||
368 |
2/2✓ Branch 1 taken 6722 times.
✓ Branch 2 taken 3361 times.
|
20166 | mc_rep_funcs(pel_pixels, 8, 32, 64, avx2) |
369 | |||
370 |
2/2✓ Branch 1 taken 156 times.
✓ Branch 2 taken 78 times.
|
234 | mc_rep_uni_func(pel_pixels, 8, 64, 128, avx2)//used for 10bit |
371 |
2/2✓ Branch 1 taken 3 times.
✓ Branch 2 taken 1 times.
|
4 | mc_rep_uni_func(pel_pixels, 8, 32, 96, avx2) //used for 10bit |
372 | |||
373 |
2/2✓ Branch 1 taken 2026 times.
✓ Branch 2 taken 1013 times.
|
3039 | mc_rep_func(pel_pixels, 10, 16, 32, avx2) |
374 |
2/2✓ Branch 1 taken 3 times.
✓ Branch 2 taken 1 times.
|
4 | mc_rep_func(pel_pixels, 10, 16, 48, avx2) |
375 |
2/2✓ Branch 1 taken 304 times.
✓ Branch 2 taken 152 times.
|
456 | mc_rep_func(pel_pixels, 10, 32, 64, avx2) |
376 | |||
377 |
2/2✓ Branch 1 taken 176 times.
✓ Branch 2 taken 88 times.
|
264 | mc_rep_bi_func(pel_pixels, 10, 16, 32, avx2) |
378 |
2/2✓ Branch 1 taken 3 times.
✓ Branch 2 taken 1 times.
|
4 | mc_rep_bi_func(pel_pixels, 10, 16, 48, avx2) |
379 |
2/2✓ Branch 1 taken 42 times.
✓ Branch 2 taken 21 times.
|
63 | mc_rep_bi_func(pel_pixels, 10, 32, 64, avx2) |
380 | |||
381 |
2/2✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
|
18 | mc_rep_funcs(epel_h, 8, 32, 64, avx2) |
382 | |||
383 |
2/2✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
|
18 | mc_rep_funcs(epel_v, 8, 32, 64, avx2) |
384 | |||
385 |
2/2✓ Branch 1 taken 1290 times.
✓ Branch 2 taken 645 times.
|
3870 | mc_rep_funcs(epel_h, 10, 16, 32, avx2) |
386 |
2/2✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
|
24 | mc_rep_funcs(epel_h, 10, 16, 48, avx2) |
387 |
2/2✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
|
18 | mc_rep_funcs(epel_h, 10, 32, 64, avx2) |
388 | |||
389 |
2/2✓ Branch 1 taken 154 times.
✓ Branch 2 taken 77 times.
|
462 | mc_rep_funcs(epel_v, 10, 16, 32, avx2) |
390 |
2/2✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
|
24 | mc_rep_funcs(epel_v, 10, 16, 48, avx2) |
391 |
2/2✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
|
18 | mc_rep_funcs(epel_v, 10, 32, 64, avx2) |
392 | |||
393 | |||
394 |
2/2✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
|
18 | mc_rep_funcs(epel_hv, 8, 32, 64, avx2) |
395 | |||
396 |
2/2✓ Branch 1 taken 1598 times.
✓ Branch 2 taken 799 times.
|
4794 | mc_rep_funcs(epel_hv, 10, 16, 32, avx2) |
397 |
2/2✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
|
24 | mc_rep_funcs(epel_hv, 10, 16, 48, avx2) |
398 |
2/2✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
|
18 | mc_rep_funcs(epel_hv, 10, 32, 64, avx2) |
399 | |||
400 |
2/2✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
|
18 | mc_rep_funcs(qpel_h, 8, 32, 64, avx2) |
401 | 6 | mc_rep_mixs_8(qpel_h , 48, 32, 16, avx2, sse4) | |
402 | |||
403 |
2/2✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
|
18 | mc_rep_funcs(qpel_v, 8, 32, 64, avx2) |
404 | 6 | mc_rep_mixs_8(qpel_v, 48, 32, 16, avx2, sse4) | |
405 | |||
406 |
2/2✓ Branch 1 taken 1982 times.
✓ Branch 2 taken 991 times.
|
5946 | mc_rep_funcs(qpel_h, 10, 16, 32, avx2) |
407 |
2/2✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
|
24 | mc_rep_funcs(qpel_h, 10, 16, 48, avx2) |
408 |
2/2✓ Branch 1 taken 512 times.
✓ Branch 2 taken 256 times.
|
1536 | mc_rep_funcs(qpel_h, 10, 32, 64, avx2) |
409 | |||
410 |
2/2✓ Branch 1 taken 910 times.
✓ Branch 2 taken 455 times.
|
2730 | mc_rep_funcs(qpel_v, 10, 16, 32, avx2) |
411 |
2/2✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
|
24 | mc_rep_funcs(qpel_v, 10, 16, 48, avx2) |
412 |
2/2✓ Branch 1 taken 202 times.
✓ Branch 2 taken 101 times.
|
606 | mc_rep_funcs(qpel_v, 10, 32, 64, avx2) |
413 | |||
414 |
2/2✓ Branch 1 taken 3116 times.
✓ Branch 2 taken 1558 times.
|
9348 | mc_rep_funcs(qpel_hv, 10, 16, 32, avx2) |
415 |
2/2✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
|
24 | mc_rep_funcs(qpel_hv, 10, 16, 48, avx2) |
416 |
2/2✓ Branch 1 taken 668 times.
✓ Branch 2 taken 334 times.
|
2004 | mc_rep_funcs(qpel_hv, 10, 32, 64, avx2) |
417 | |||
418 | #endif //AVX2 | ||
419 | |||
420 |
2/2✓ Branch 1 taken 30924 times.
✓ Branch 2 taken 7731 times.
|
77310 | mc_rep_funcs(pel_pixels, 8, 16, 64, sse4) |
421 |
2/2✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
|
336 | mc_rep_funcs(pel_pixels, 8, 16, 48, sse4) |
422 |
2/2✓ Branch 1 taken 33874 times.
✓ Branch 2 taken 16937 times.
|
101622 | mc_rep_funcs(pel_pixels, 8, 16, 32, sse4) |
423 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(pel_pixels, 8, 8, 24, sse4) |
424 |
2/2✓ Branch 1 taken 2960 times.
✓ Branch 2 taken 370 times.
|
6660 | mc_rep_funcs(pel_pixels,10, 8, 64, sse4) |
425 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
|
588 | mc_rep_funcs(pel_pixels,10, 8, 48, sse4) |
426 |
2/2✓ Branch 1 taken 6208 times.
✓ Branch 2 taken 1552 times.
|
15520 | mc_rep_funcs(pel_pixels,10, 8, 32, sse4) |
427 |
2/2✓ Branch 1 taken 144 times.
✓ Branch 2 taken 48 times.
|
384 | mc_rep_funcs(pel_pixels,10, 8, 24, sse4) |
428 |
2/2✓ Branch 1 taken 4020 times.
✓ Branch 2 taken 2010 times.
|
12060 | mc_rep_funcs(pel_pixels,10, 8, 16, sse4) |
429 |
2/2✓ Branch 1 taken 177 times.
✓ Branch 2 taken 59 times.
|
472 | mc_rep_funcs(pel_pixels,10, 4, 12, sse4) |
430 |
2/2✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
|
702 | mc_rep_funcs(pel_pixels,12, 8, 64, sse4) |
431 |
2/2✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
|
546 | mc_rep_funcs(pel_pixels,12, 8, 48, sse4) |
432 |
2/2✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
|
390 | mc_rep_funcs(pel_pixels,12, 8, 32, sse4) |
433 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(pel_pixels,12, 8, 24, sse4) |
434 |
2/2✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
|
234 | mc_rep_funcs(pel_pixels,12, 8, 16, sse4) |
435 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(pel_pixels,12, 4, 12, sse4) |
436 | |||
437 |
2/2✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
|
420 | mc_rep_funcs(epel_h, 8, 16, 64, sse4) |
438 |
2/2✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
|
336 | mc_rep_funcs(epel_h, 8, 16, 48, sse4) |
439 |
2/2✓ Branch 1 taken 1076 times.
✓ Branch 2 taken 538 times.
|
3228 | mc_rep_funcs(epel_h, 8, 16, 32, sse4) |
440 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(epel_h, 8, 8, 24, sse4) |
441 |
2/2✓ Branch 1 taken 336 times.
✓ Branch 2 taken 42 times.
|
756 | mc_rep_funcs(epel_h,10, 8, 64, sse4) |
442 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
|
588 | mc_rep_funcs(epel_h,10, 8, 48, sse4) |
443 |
2/2✓ Branch 1 taken 1016 times.
✓ Branch 2 taken 254 times.
|
2540 | mc_rep_funcs(epel_h,10, 8, 32, sse4) |
444 |
2/2✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
|
336 | mc_rep_funcs(epel_h,10, 8, 24, sse4) |
445 |
2/2✓ Branch 1 taken 1140 times.
✓ Branch 2 taken 570 times.
|
3420 | mc_rep_funcs(epel_h,10, 8, 16, sse4) |
446 |
2/2✓ Branch 1 taken 129 times.
✓ Branch 2 taken 43 times.
|
344 | mc_rep_funcs(epel_h,10, 4, 12, sse4) |
447 |
2/2✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
|
702 | mc_rep_funcs(epel_h,12, 8, 64, sse4) |
448 |
2/2✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
|
546 | mc_rep_funcs(epel_h,12, 8, 48, sse4) |
449 |
2/2✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
|
390 | mc_rep_funcs(epel_h,12, 8, 32, sse4) |
450 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(epel_h,12, 8, 24, sse4) |
451 |
2/2✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
|
234 | mc_rep_funcs(epel_h,12, 8, 16, sse4) |
452 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(epel_h,12, 4, 12, sse4) |
453 |
2/2✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
|
420 | mc_rep_funcs(epel_v, 8, 16, 64, sse4) |
454 |
2/2✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
|
336 | mc_rep_funcs(epel_v, 8, 16, 48, sse4) |
455 |
2/2✓ Branch 1 taken 2056 times.
✓ Branch 2 taken 1028 times.
|
6168 | mc_rep_funcs(epel_v, 8, 16, 32, sse4) |
456 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(epel_v, 8, 8, 24, sse4) |
457 |
2/2✓ Branch 1 taken 336 times.
✓ Branch 2 taken 42 times.
|
756 | mc_rep_funcs(epel_v,10, 8, 64, sse4) |
458 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
|
588 | mc_rep_funcs(epel_v,10, 8, 48, sse4) |
459 |
2/2✓ Branch 1 taken 296 times.
✓ Branch 2 taken 74 times.
|
740 | mc_rep_funcs(epel_v,10, 8, 32, sse4) |
460 |
2/2✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
|
336 | mc_rep_funcs(epel_v,10, 8, 24, sse4) |
461 |
2/2✓ Branch 1 taken 204 times.
✓ Branch 2 taken 102 times.
|
612 | mc_rep_funcs(epel_v,10, 8, 16, sse4) |
462 |
2/2✓ Branch 1 taken 141 times.
✓ Branch 2 taken 47 times.
|
376 | mc_rep_funcs(epel_v,10, 4, 12, sse4) |
463 |
2/2✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
|
702 | mc_rep_funcs(epel_v,12, 8, 64, sse4) |
464 |
2/2✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
|
546 | mc_rep_funcs(epel_v,12, 8, 48, sse4) |
465 |
2/2✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
|
390 | mc_rep_funcs(epel_v,12, 8, 32, sse4) |
466 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(epel_v,12, 8, 24, sse4) |
467 |
2/2✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
|
234 | mc_rep_funcs(epel_v,12, 8, 16, sse4) |
468 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(epel_v,12, 4, 12, sse4) |
469 |
2/2✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
|
420 | mc_rep_funcs(epel_hv, 8, 16, 64, sse4) |
470 |
2/2✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
|
336 | mc_rep_funcs(epel_hv, 8, 16, 48, sse4) |
471 |
2/2✓ Branch 1 taken 1860 times.
✓ Branch 2 taken 930 times.
|
5580 | mc_rep_funcs(epel_hv, 8, 16, 32, sse4) |
472 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(epel_hv, 8, 8, 24, sse4) |
473 | 78 | mc_rep_funcs2(epel_hv,8, 8, 4, 12, sse4) | |
474 |
2/2✓ Branch 1 taken 336 times.
✓ Branch 2 taken 42 times.
|
756 | mc_rep_funcs(epel_hv,10, 8, 64, sse4) |
475 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
|
588 | mc_rep_funcs(epel_hv,10, 8, 48, sse4) |
476 |
2/2✓ Branch 1 taken 312 times.
✓ Branch 2 taken 78 times.
|
780 | mc_rep_funcs(epel_hv,10, 8, 32, sse4) |
477 |
2/2✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
|
336 | mc_rep_funcs(epel_hv,10, 8, 24, sse4) |
478 |
2/2✓ Branch 1 taken 236 times.
✓ Branch 2 taken 118 times.
|
708 | mc_rep_funcs(epel_hv,10, 8, 16, sse4) |
479 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(epel_hv,10, 4, 12, sse4) |
480 |
2/2✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
|
702 | mc_rep_funcs(epel_hv,12, 8, 64, sse4) |
481 |
2/2✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
|
546 | mc_rep_funcs(epel_hv,12, 8, 48, sse4) |
482 |
2/2✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
|
390 | mc_rep_funcs(epel_hv,12, 8, 32, sse4) |
483 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(epel_hv,12, 8, 24, sse4) |
484 |
2/2✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
|
234 | mc_rep_funcs(epel_hv,12, 8, 16, sse4) |
485 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(epel_hv,12, 4, 12, sse4) |
486 | |||
487 |
2/2✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
|
420 | mc_rep_funcs(qpel_h, 8, 16, 64, sse4) |
488 |
2/2✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
|
336 | mc_rep_funcs(qpel_h, 8, 16, 48, sse4) |
489 |
2/2✓ Branch 1 taken 84 times.
✓ Branch 2 taken 42 times.
|
252 | mc_rep_funcs(qpel_h, 8, 16, 32, sse4) |
490 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_h, 8, 8, 24, sse4) |
491 |
2/2✓ Branch 1 taken 1184 times.
✓ Branch 2 taken 148 times.
|
2664 | mc_rep_funcs(qpel_h,10, 8, 64, sse4) |
492 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
|
588 | mc_rep_funcs(qpel_h,10, 8, 48, sse4) |
493 |
2/2✓ Branch 1 taken 1248 times.
✓ Branch 2 taken 312 times.
|
3120 | mc_rep_funcs(qpel_h,10, 8, 32, sse4) |
494 |
2/2✓ Branch 1 taken 132 times.
✓ Branch 2 taken 44 times.
|
352 | mc_rep_funcs(qpel_h,10, 8, 24, sse4) |
495 |
2/2✓ Branch 1 taken 424 times.
✓ Branch 2 taken 212 times.
|
1272 | mc_rep_funcs(qpel_h,10, 8, 16, sse4) |
496 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_h,10, 4, 12, sse4) |
497 |
2/2✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
|
702 | mc_rep_funcs(qpel_h,12, 8, 64, sse4) |
498 |
2/2✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
|
546 | mc_rep_funcs(qpel_h,12, 8, 48, sse4) |
499 |
2/2✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
|
390 | mc_rep_funcs(qpel_h,12, 8, 32, sse4) |
500 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_h,12, 8, 24, sse4) |
501 |
2/2✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
|
234 | mc_rep_funcs(qpel_h,12, 8, 16, sse4) |
502 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_h,12, 4, 12, sse4) |
503 |
2/2✓ Branch 1 taken 176 times.
✓ Branch 2 taken 44 times.
|
440 | mc_rep_funcs(qpel_v, 8, 16, 64, sse4) |
504 |
2/2✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
|
336 | mc_rep_funcs(qpel_v, 8, 16, 48, sse4) |
505 |
2/2✓ Branch 1 taken 92 times.
✓ Branch 2 taken 46 times.
|
276 | mc_rep_funcs(qpel_v, 8, 16, 32, sse4) |
506 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_v, 8, 8, 24, sse4) |
507 |
2/2✓ Branch 1 taken 464 times.
✓ Branch 2 taken 58 times.
|
1044 | mc_rep_funcs(qpel_v,10, 8, 64, sse4) |
508 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
|
588 | mc_rep_funcs(qpel_v,10, 8, 48, sse4) |
509 |
2/2✓ Branch 1 taken 304 times.
✓ Branch 2 taken 76 times.
|
760 | mc_rep_funcs(qpel_v,10, 8, 32, sse4) |
510 |
2/2✓ Branch 1 taken 138 times.
✓ Branch 2 taken 46 times.
|
368 | mc_rep_funcs(qpel_v,10, 8, 24, sse4) |
511 |
2/2✓ Branch 1 taken 96 times.
✓ Branch 2 taken 48 times.
|
288 | mc_rep_funcs(qpel_v,10, 8, 16, sse4) |
512 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_v,10, 4, 12, sse4) |
513 |
2/2✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
|
702 | mc_rep_funcs(qpel_v,12, 8, 64, sse4) |
514 |
2/2✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
|
546 | mc_rep_funcs(qpel_v,12, 8, 48, sse4) |
515 |
2/2✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
|
390 | mc_rep_funcs(qpel_v,12, 8, 32, sse4) |
516 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_v,12, 8, 24, sse4) |
517 |
2/2✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
|
234 | mc_rep_funcs(qpel_v,12, 8, 16, sse4) |
518 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_v,12, 4, 12, sse4) |
519 |
2/2✓ Branch 1 taken 928 times.
✓ Branch 2 taken 116 times.
|
2088 | mc_rep_funcs(qpel_hv, 8, 8, 64, sse4) |
520 |
2/2✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
|
546 | mc_rep_funcs(qpel_hv, 8, 8, 48, sse4) |
521 |
2/2✓ Branch 1 taken 292 times.
✓ Branch 2 taken 73 times.
|
730 | mc_rep_funcs(qpel_hv, 8, 8, 32, sse4) |
522 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_hv, 8, 8, 24, sse4) |
523 |
2/2✓ Branch 1 taken 202 times.
✓ Branch 2 taken 101 times.
|
606 | mc_rep_funcs(qpel_hv, 8, 8, 16, sse4) |
524 | 78 | mc_rep_funcs2(qpel_hv,8, 8, 4, 12, sse4) | |
525 |
2/2✓ Branch 1 taken 480 times.
✓ Branch 2 taken 60 times.
|
1080 | mc_rep_funcs(qpel_hv,10, 8, 64, sse4) |
526 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
|
588 | mc_rep_funcs(qpel_hv,10, 8, 48, sse4) |
527 |
2/2✓ Branch 1 taken 272 times.
✓ Branch 2 taken 68 times.
|
680 | mc_rep_funcs(qpel_hv,10, 8, 32, sse4) |
528 |
2/2✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
|
336 | mc_rep_funcs(qpel_hv,10, 8, 24, sse4) |
529 |
2/2✓ Branch 1 taken 100 times.
✓ Branch 2 taken 50 times.
|
300 | mc_rep_funcs(qpel_hv,10, 8, 16, sse4) |
530 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_hv,10, 4, 12, sse4) |
531 |
2/2✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
|
702 | mc_rep_funcs(qpel_hv,12, 8, 64, sse4) |
532 |
2/2✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
|
546 | mc_rep_funcs(qpel_hv,12, 8, 48, sse4) |
533 |
2/2✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
|
390 | mc_rep_funcs(qpel_hv,12, 8, 32, sse4) |
534 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_hv,12, 8, 24, sse4) |
535 |
2/2✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
|
234 | mc_rep_funcs(qpel_hv,12, 8, 16, sse4) |
536 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_hv,12, 4, 12, sse4) |
537 | |||
538 | #define mc_rep_uni_w(bitd, step, W, opt) \ | ||
539 | void ff_hevc_put_uni_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const int16_t *_src, \ | ||
540 | int height, int denom, int _wx, int _ox) \ | ||
541 | { \ | ||
542 | int i; \ | ||
543 | uint8_t *dst; \ | ||
544 | for (i = 0; i < W; i += step) { \ | ||
545 | const int16_t *src = _src + i; \ | ||
546 | dst= _dst + (i * ((bitd + 7) / 8)); \ | ||
547 | ff_hevc_put_uni_w##step##_##bitd##_##opt(dst, dststride, src, \ | ||
548 | height, denom, _wx, _ox); \ | ||
549 | } \ | ||
550 | } | ||
551 | |||
552 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
|
378 | mc_rep_uni_w(8, 6, 12, sse4) |
553 |
2/2✓ Branch 1 taken 21144 times.
✓ Branch 2 taken 10572 times.
|
31716 | mc_rep_uni_w(8, 8, 16, sse4) |
554 |
2/2✓ Branch 1 taken 378 times.
✓ Branch 2 taken 126 times.
|
504 | mc_rep_uni_w(8, 8, 24, sse4) |
555 |
2/2✓ Branch 1 taken 54668 times.
✓ Branch 2 taken 13667 times.
|
68335 | mc_rep_uni_w(8, 8, 32, sse4) |
556 |
2/2✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
|
882 | mc_rep_uni_w(8, 8, 48, sse4) |
557 |
2/2✓ Branch 1 taken 43016 times.
✓ Branch 2 taken 5377 times.
|
48393 | mc_rep_uni_w(8, 8, 64, sse4) |
558 | |||
559 |
2/2✓ Branch 1 taken 284 times.
✓ Branch 2 taken 142 times.
|
426 | mc_rep_uni_w(10, 6, 12, sse4) |
560 |
2/2✓ Branch 1 taken 3220 times.
✓ Branch 2 taken 1610 times.
|
4830 | mc_rep_uni_w(10, 8, 16, sse4) |
561 |
2/2✓ Branch 1 taken 402 times.
✓ Branch 2 taken 134 times.
|
536 | mc_rep_uni_w(10, 8, 24, sse4) |
562 |
2/2✓ Branch 1 taken 4728 times.
✓ Branch 2 taken 1182 times.
|
5910 | mc_rep_uni_w(10, 8, 32, sse4) |
563 |
2/2✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
|
882 | mc_rep_uni_w(10, 8, 48, sse4) |
564 |
2/2✓ Branch 1 taken 2832 times.
✓ Branch 2 taken 354 times.
|
3186 | mc_rep_uni_w(10, 8, 64, sse4) |
565 | |||
566 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
|
378 | mc_rep_uni_w(12, 6, 12, sse4) |
567 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
|
378 | mc_rep_uni_w(12, 8, 16, sse4) |
568 |
2/2✓ Branch 1 taken 378 times.
✓ Branch 2 taken 126 times.
|
504 | mc_rep_uni_w(12, 8, 24, sse4) |
569 |
2/2✓ Branch 1 taken 504 times.
✓ Branch 2 taken 126 times.
|
630 | mc_rep_uni_w(12, 8, 32, sse4) |
570 |
2/2✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
|
882 | mc_rep_uni_w(12, 8, 48, sse4) |
571 |
2/2✓ Branch 1 taken 1008 times.
✓ Branch 2 taken 126 times.
|
1134 | mc_rep_uni_w(12, 8, 64, sse4) |
572 | |||
573 | #define mc_rep_bi_w(bitd, step, W, opt) \ | ||
574 | void ff_hevc_put_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const int16_t *_src, \ | ||
575 | const int16_t *_src2, int height, \ | ||
576 | int denom, int _wx0, int _wx1, int _ox0, int _ox1) \ | ||
577 | { \ | ||
578 | int i; \ | ||
579 | uint8_t *dst; \ | ||
580 | for (i = 0; i < W; i += step) { \ | ||
581 | const int16_t *src = _src + i; \ | ||
582 | const int16_t *src2 = _src2 + i; \ | ||
583 | dst = _dst + (i * ((bitd + 7) / 8)); \ | ||
584 | ff_hevc_put_bi_w##step##_##bitd##_##opt(dst, dststride, src, src2, \ | ||
585 | height, denom, _wx0, _wx1, _ox0, _ox1); \ | ||
586 | } \ | ||
587 | } | ||
588 | |||
589 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
|
378 | mc_rep_bi_w(8, 6, 12, sse4) |
590 |
2/2✓ Branch 1 taken 4896 times.
✓ Branch 2 taken 2448 times.
|
7344 | mc_rep_bi_w(8, 8, 16, sse4) |
591 |
2/2✓ Branch 1 taken 378 times.
✓ Branch 2 taken 126 times.
|
504 | mc_rep_bi_w(8, 8, 24, sse4) |
592 |
2/2✓ Branch 1 taken 23552 times.
✓ Branch 2 taken 5888 times.
|
29440 | mc_rep_bi_w(8, 8, 32, sse4) |
593 |
2/2✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
|
882 | mc_rep_bi_w(8, 8, 48, sse4) |
594 |
2/2✓ Branch 1 taken 21144 times.
✓ Branch 2 taken 2643 times.
|
23787 | mc_rep_bi_w(8, 8, 64, sse4) |
595 | |||
596 |
2/2✓ Branch 1 taken 268 times.
✓ Branch 2 taken 134 times.
|
402 | mc_rep_bi_w(10, 6, 12, sse4) |
597 |
2/2✓ Branch 1 taken 2916 times.
✓ Branch 2 taken 1458 times.
|
4374 | mc_rep_bi_w(10, 8, 16, sse4) |
598 |
2/2✓ Branch 1 taken 390 times.
✓ Branch 2 taken 130 times.
|
520 | mc_rep_bi_w(10, 8, 24, sse4) |
599 |
2/2✓ Branch 1 taken 4760 times.
✓ Branch 2 taken 1190 times.
|
5950 | mc_rep_bi_w(10, 8, 32, sse4) |
600 |
2/2✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
|
882 | mc_rep_bi_w(10, 8, 48, sse4) |
601 |
2/2✓ Branch 1 taken 2928 times.
✓ Branch 2 taken 366 times.
|
3294 | mc_rep_bi_w(10, 8, 64, sse4) |
602 | |||
603 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
|
378 | mc_rep_bi_w(12, 6, 12, sse4) |
604 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
|
378 | mc_rep_bi_w(12, 8, 16, sse4) |
605 |
2/2✓ Branch 1 taken 378 times.
✓ Branch 2 taken 126 times.
|
504 | mc_rep_bi_w(12, 8, 24, sse4) |
606 |
2/2✓ Branch 1 taken 504 times.
✓ Branch 2 taken 126 times.
|
630 | mc_rep_bi_w(12, 8, 32, sse4) |
607 |
2/2✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
|
882 | mc_rep_bi_w(12, 8, 48, sse4) |
608 |
2/2✓ Branch 1 taken 1008 times.
✓ Branch 2 taken 126 times.
|
1134 | mc_rep_bi_w(12, 8, 64, sse4) |
609 | |||
610 | #define mc_uni_w_func(name, bitd, W, opt) \ | ||
611 | static void hevc_put_uni_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \ | ||
612 | const uint8_t *_src, ptrdiff_t _srcstride, \ | ||
613 | int height, int denom, \ | ||
614 | int _wx, int _ox, \ | ||
615 | intptr_t mx, intptr_t my, int width) \ | ||
616 | { \ | ||
617 | LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \ | ||
618 | hevc_put_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \ | ||
619 | ff_hevc_put_uni_w##W##_##bitd##_##opt(_dst, _dststride, temp, height, denom, _wx, _ox); \ | ||
620 | } | ||
621 | |||
622 | #define mc_uni_w_funcs(name, bitd, opt) \ | ||
623 | mc_uni_w_func(name, bitd, 4, opt) \ | ||
624 | mc_uni_w_func(name, bitd, 8, opt) \ | ||
625 | mc_uni_w_func(name, bitd, 12, opt) \ | ||
626 | mc_uni_w_func(name, bitd, 16, opt) \ | ||
627 | mc_uni_w_func(name, bitd, 24, opt) \ | ||
628 | mc_uni_w_func(name, bitd, 32, opt) \ | ||
629 | mc_uni_w_func(name, bitd, 48, opt) \ | ||
630 | mc_uni_w_func(name, bitd, 64, opt) | ||
631 | |||
632 | 81664 | mc_uni_w_funcs(pel_pixels, 8, sse4) | |
633 | 18 | mc_uni_w_func(pel_pixels, 8, 6, sse4) | |
634 | 7784 | mc_uni_w_funcs(epel_h, 8, sse4) | |
635 | 18 | mc_uni_w_func(epel_h, 8, 6, sse4) | |
636 | 7424 | mc_uni_w_funcs(epel_v, 8, sse4) | |
637 | 18 | mc_uni_w_func(epel_v, 8, 6, sse4) | |
638 | 7664 | mc_uni_w_funcs(epel_hv, 8, sse4) | |
639 | 18 | mc_uni_w_func(epel_hv, 8, 6, sse4) | |
640 | 298 | mc_uni_w_funcs(qpel_h, 8, sse4) | |
641 | 308 | mc_uni_w_funcs(qpel_v, 8, sse4) | |
642 | 882 | mc_uni_w_funcs(qpel_hv, 8, sse4) | |
643 | |||
644 | 4980 | mc_uni_w_funcs(pel_pixels, 10, sse4) | |
645 | 18 | mc_uni_w_func(pel_pixels, 10, 6, sse4) | |
646 | 1352 | mc_uni_w_funcs(epel_h, 10, sse4) | |
647 | 18 | mc_uni_w_func(epel_h, 10, 6, sse4) | |
648 | 360 | mc_uni_w_funcs(epel_v, 10, sse4) | |
649 | 18 | mc_uni_w_func(epel_v, 10, 6, sse4) | |
650 | 536 | mc_uni_w_funcs(epel_hv, 10, sse4) | |
651 | 18 | mc_uni_w_func(epel_hv, 10, 6, sse4) | |
652 | 844 | mc_uni_w_funcs(qpel_h, 10, sse4) | |
653 | 332 | mc_uni_w_funcs(qpel_v, 10, sse4) | |
654 | 380 | mc_uni_w_funcs(qpel_hv, 10, sse4) | |
655 | |||
656 | 288 | mc_uni_w_funcs(pel_pixels, 12, sse4) | |
657 | 18 | mc_uni_w_func(pel_pixels, 12, 6, sse4) | |
658 | 288 | mc_uni_w_funcs(epel_h, 12, sse4) | |
659 | 18 | mc_uni_w_func(epel_h, 12, 6, sse4) | |
660 | 288 | mc_uni_w_funcs(epel_v, 12, sse4) | |
661 | 18 | mc_uni_w_func(epel_v, 12, 6, sse4) | |
662 | 288 | mc_uni_w_funcs(epel_hv, 12, sse4) | |
663 | 18 | mc_uni_w_func(epel_hv, 12, 6, sse4) | |
664 | 288 | mc_uni_w_funcs(qpel_h, 12, sse4) | |
665 | 288 | mc_uni_w_funcs(qpel_v, 12, sse4) | |
666 | 288 | mc_uni_w_funcs(qpel_hv, 12, sse4) | |
667 | |||
668 | #define mc_bi_w_func(name, bitd, W, opt) \ | ||
669 | static void hevc_put_bi_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \ | ||
670 | const uint8_t *_src, ptrdiff_t _srcstride, \ | ||
671 | const int16_t *_src2, \ | ||
672 | int height, int denom, \ | ||
673 | int _wx0, int _wx1, int _ox0, int _ox1, \ | ||
674 | intptr_t mx, intptr_t my, int width) \ | ||
675 | { \ | ||
676 | LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \ | ||
677 | hevc_put_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \ | ||
678 | ff_hevc_put_bi_w##W##_##bitd##_##opt(_dst, _dststride, temp, _src2, \ | ||
679 | height, denom, _wx0, _wx1, _ox0, _ox1); \ | ||
680 | } | ||
681 | |||
682 | #define mc_bi_w_funcs(name, bitd, opt) \ | ||
683 | mc_bi_w_func(name, bitd, 4, opt) \ | ||
684 | mc_bi_w_func(name, bitd, 8, opt) \ | ||
685 | mc_bi_w_func(name, bitd, 12, opt) \ | ||
686 | mc_bi_w_func(name, bitd, 16, opt) \ | ||
687 | mc_bi_w_func(name, bitd, 24, opt) \ | ||
688 | mc_bi_w_func(name, bitd, 32, opt) \ | ||
689 | mc_bi_w_func(name, bitd, 48, opt) \ | ||
690 | mc_bi_w_func(name, bitd, 64, opt) | ||
691 | |||
692 | 23926 | mc_bi_w_funcs(pel_pixels, 8, sse4) | |
693 | 18 | mc_bi_w_func(pel_pixels, 8, 6, sse4) | |
694 | 1240 | mc_bi_w_funcs(epel_h, 8, sse4) | |
695 | 18 | mc_bi_w_func(epel_h, 8, 6, sse4) | |
696 | 4860 | mc_bi_w_funcs(epel_v, 8, sse4) | |
697 | 18 | mc_bi_w_func(epel_v, 8, 6, sse4) | |
698 | 2416 | mc_bi_w_funcs(epel_hv, 8, sse4) | |
699 | 18 | mc_bi_w_func(epel_hv, 8, 6, sse4) | |
700 | 288 | mc_bi_w_funcs(qpel_h, 8, sse4) | |
701 | 288 | mc_bi_w_funcs(qpel_v, 8, sse4) | |
702 | 288 | mc_bi_w_funcs(qpel_hv, 8, sse4) | |
703 | |||
704 | 4336 | mc_bi_w_funcs(pel_pixels, 10, sse4) | |
705 | 18 | mc_bi_w_func(pel_pixels, 10, 6, sse4) | |
706 | 1392 | mc_bi_w_funcs(epel_h, 10, sse4) | |
707 | 18 | mc_bi_w_func(epel_h, 10, 6, sse4) | |
708 | 448 | mc_bi_w_funcs(epel_v, 10, sse4) | |
709 | 18 | mc_bi_w_func(epel_v, 10, 6, sse4) | |
710 | 312 | mc_bi_w_funcs(epel_hv, 10, sse4) | |
711 | 18 | mc_bi_w_func(epel_hv, 10, 6, sse4) | |
712 | 836 | mc_bi_w_funcs(qpel_h, 10, sse4) | |
713 | 368 | mc_bi_w_funcs(qpel_v, 10, sse4) | |
714 | 300 | mc_bi_w_funcs(qpel_hv, 10, sse4) | |
715 | |||
716 | 288 | mc_bi_w_funcs(pel_pixels, 12, sse4) | |
717 | 18 | mc_bi_w_func(pel_pixels, 12, 6, sse4) | |
718 | 288 | mc_bi_w_funcs(epel_h, 12, sse4) | |
719 | 18 | mc_bi_w_func(epel_h, 12, 6, sse4) | |
720 | 288 | mc_bi_w_funcs(epel_v, 12, sse4) | |
721 | 18 | mc_bi_w_func(epel_v, 12, 6, sse4) | |
722 | 288 | mc_bi_w_funcs(epel_hv, 12, sse4) | |
723 | 18 | mc_bi_w_func(epel_hv, 12, 6, sse4) | |
724 | 288 | mc_bi_w_funcs(qpel_h, 12, sse4) | |
725 | 288 | mc_bi_w_funcs(qpel_v, 12, sse4) | |
726 | 288 | mc_bi_w_funcs(qpel_hv, 12, sse4) | |
727 | #endif //ARCH_X86_64 && HAVE_SSE4_EXTERNAL | ||
728 | |||
729 | #define SAO_BAND_FILTER_FUNCS(bitd, opt) \ | ||
730 | void ff_hevc_sao_band_filter_8_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \ | ||
731 | const int16_t *sao_offset_val, int sao_left_class, int width, int height); \ | ||
732 | void ff_hevc_sao_band_filter_16_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \ | ||
733 | const int16_t *sao_offset_val, int sao_left_class, int width, int height); \ | ||
734 | void ff_hevc_sao_band_filter_32_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \ | ||
735 | const int16_t *sao_offset_val, int sao_left_class, int width, int height); \ | ||
736 | void ff_hevc_sao_band_filter_48_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \ | ||
737 | const int16_t *sao_offset_val, int sao_left_class, int width, int height); \ | ||
738 | void ff_hevc_sao_band_filter_64_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \ | ||
739 | const int16_t *sao_offset_val, int sao_left_class, int width, int height); | ||
740 | |||
741 | SAO_BAND_FILTER_FUNCS(8, sse2) | ||
742 | SAO_BAND_FILTER_FUNCS(10, sse2) | ||
743 | SAO_BAND_FILTER_FUNCS(12, sse2) | ||
744 | SAO_BAND_FILTER_FUNCS(8, avx) | ||
745 | SAO_BAND_FILTER_FUNCS(10, avx) | ||
746 | SAO_BAND_FILTER_FUNCS(12, avx) | ||
747 | SAO_BAND_FILTER_FUNCS(8, avx2) | ||
748 | SAO_BAND_FILTER_FUNCS(10, avx2) | ||
749 | SAO_BAND_FILTER_FUNCS(12, avx2) | ||
750 | |||
751 | #define SAO_BAND_INIT(bitd, opt) do { \ | ||
752 | c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_##bitd##_##opt; \ | ||
753 | c->sao_band_filter[1] = ff_hevc_sao_band_filter_16_##bitd##_##opt; \ | ||
754 | c->sao_band_filter[2] = ff_hevc_sao_band_filter_32_##bitd##_##opt; \ | ||
755 | c->sao_band_filter[3] = ff_hevc_sao_band_filter_48_##bitd##_##opt; \ | ||
756 | c->sao_band_filter[4] = ff_hevc_sao_band_filter_64_##bitd##_##opt; \ | ||
757 | } while (0) | ||
758 | |||
759 | #define SAO_EDGE_FILTER_FUNCS(bitd, opt) \ | ||
760 | void ff_hevc_sao_edge_filter_8_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \ | ||
761 | const int16_t *sao_offset_val, int eo, int width, int height); \ | ||
762 | void ff_hevc_sao_edge_filter_16_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \ | ||
763 | const int16_t *sao_offset_val, int eo, int width, int height); \ | ||
764 | void ff_hevc_sao_edge_filter_32_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \ | ||
765 | const int16_t *sao_offset_val, int eo, int width, int height); \ | ||
766 | void ff_hevc_sao_edge_filter_48_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \ | ||
767 | const int16_t *sao_offset_val, int eo, int width, int height); \ | ||
768 | void ff_hevc_sao_edge_filter_64_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \ | ||
769 | const int16_t *sao_offset_val, int eo, int width, int height); \ | ||
770 | |||
771 | SAO_EDGE_FILTER_FUNCS(8, ssse3) | ||
772 | SAO_EDGE_FILTER_FUNCS(8, avx2) | ||
773 | SAO_EDGE_FILTER_FUNCS(10, sse2) | ||
774 | SAO_EDGE_FILTER_FUNCS(10, avx2) | ||
775 | SAO_EDGE_FILTER_FUNCS(12, sse2) | ||
776 | SAO_EDGE_FILTER_FUNCS(12, avx2) | ||
777 | |||
778 | #define SAO_EDGE_INIT(bitd, opt) do { \ | ||
779 | c->sao_edge_filter[0] = ff_hevc_sao_edge_filter_8_##bitd##_##opt; \ | ||
780 | c->sao_edge_filter[1] = ff_hevc_sao_edge_filter_16_##bitd##_##opt; \ | ||
781 | c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_##bitd##_##opt; \ | ||
782 | c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_##bitd##_##opt; \ | ||
783 | c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_##bitd##_##opt; \ | ||
784 | } while (0) | ||
785 | |||
786 | #define PEL_LINK(dst, idx1, idx2, idx3, name, D, opt) \ | ||
787 | dst [idx1][idx2][idx3] = hevc_put_ ## name ## _ ## D ## _##opt; \ | ||
788 | dst ## _bi [idx1][idx2][idx3] = ff_hevc_put_bi_ ## name ## _ ## D ## _##opt; \ | ||
789 | dst ## _uni [idx1][idx2][idx3] = hevc_put_uni_ ## name ## _ ## D ## _##opt; \ | ||
790 | dst ## _uni_w[idx1][idx2][idx3] = hevc_put_uni_w_ ## name ## _ ## D ## _##opt; \ | ||
791 | dst ## _bi_w [idx1][idx2][idx3] = hevc_put_bi_w_ ## name ## _ ## D ## _##opt | ||
792 | |||
793 | #define EPEL_LINKS(pointer, my, mx, fname, bitd, opt ) \ | ||
794 | PEL_LINK(pointer, 1, my , mx , fname##4 , bitd, opt ); \ | ||
795 | PEL_LINK(pointer, 2, my , mx , fname##6 , bitd, opt ); \ | ||
796 | PEL_LINK(pointer, 3, my , mx , fname##8 , bitd, opt ); \ | ||
797 | PEL_LINK(pointer, 4, my , mx , fname##12, bitd, opt ); \ | ||
798 | PEL_LINK(pointer, 5, my , mx , fname##16, bitd, opt ); \ | ||
799 | PEL_LINK(pointer, 6, my , mx , fname##24, bitd, opt ); \ | ||
800 | PEL_LINK(pointer, 7, my , mx , fname##32, bitd, opt ); \ | ||
801 | PEL_LINK(pointer, 8, my , mx , fname##48, bitd, opt ); \ | ||
802 | PEL_LINK(pointer, 9, my , mx , fname##64, bitd, opt ) | ||
803 | #define QPEL_LINKS(pointer, my, mx, fname, bitd, opt) \ | ||
804 | PEL_LINK(pointer, 1, my , mx , fname##4 , bitd, opt ); \ | ||
805 | PEL_LINK(pointer, 3, my , mx , fname##8 , bitd, opt ); \ | ||
806 | PEL_LINK(pointer, 4, my , mx , fname##12, bitd, opt ); \ | ||
807 | PEL_LINK(pointer, 5, my , mx , fname##16, bitd, opt ); \ | ||
808 | PEL_LINK(pointer, 6, my , mx , fname##24, bitd, opt ); \ | ||
809 | PEL_LINK(pointer, 7, my , mx , fname##32, bitd, opt ); \ | ||
810 | PEL_LINK(pointer, 8, my , mx , fname##48, bitd, opt ); \ | ||
811 | PEL_LINK(pointer, 9, my , mx , fname##64, bitd, opt ) | ||
812 | |||
813 | 1484 | void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) | |
814 | { | ||
815 | 1484 | int cpu_flags = av_get_cpu_flags(); | |
816 | |||
817 |
2/2✓ Branch 0 taken 619 times.
✓ Branch 1 taken 865 times.
|
1484 | if (bit_depth == 8) { |
818 |
2/2✓ Branch 0 taken 226 times.
✓ Branch 1 taken 393 times.
|
619 | if (EXTERNAL_MMXEXT(cpu_flags)) { |
819 | 226 | c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_mmxext; | |
820 | |||
821 | 226 | c->add_residual[0] = ff_hevc_add_residual_4_8_mmxext; | |
822 | } | ||
823 |
2/2✓ Branch 0 taken 186 times.
✓ Branch 1 taken 433 times.
|
619 | if (EXTERNAL_SSE2(cpu_flags)) { |
824 | 186 | c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2; | |
825 | 186 | c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2; | |
826 | if (ARCH_X86_64) { | ||
827 | 186 | c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2; | |
828 | 186 | c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2; | |
829 | |||
830 | 186 | c->idct[2] = ff_hevc_idct_16x16_8_sse2; | |
831 | 186 | c->idct[3] = ff_hevc_idct_32x32_8_sse2; | |
832 | } | ||
833 | 186 | SAO_BAND_INIT(8, sse2); | |
834 | |||
835 | 186 | c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_sse2; | |
836 | 186 | c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_sse2; | |
837 | 186 | c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_sse2; | |
838 | |||
839 | 186 | c->idct[0] = ff_hevc_idct_4x4_8_sse2; | |
840 | 186 | c->idct[1] = ff_hevc_idct_8x8_8_sse2; | |
841 | |||
842 | 186 | c->add_residual[1] = ff_hevc_add_residual_8_8_sse2; | |
843 | 186 | c->add_residual[2] = ff_hevc_add_residual_16_8_sse2; | |
844 | 186 | c->add_residual[3] = ff_hevc_add_residual_32_8_sse2; | |
845 | } | ||
846 |
2/2✓ Branch 0 taken 146 times.
✓ Branch 1 taken 473 times.
|
619 | if (EXTERNAL_SSSE3(cpu_flags)) { |
847 | if(ARCH_X86_64) { | ||
848 | 146 | c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3; | |
849 | 146 | c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3; | |
850 | } | ||
851 | 146 | SAO_EDGE_INIT(8, ssse3); | |
852 | } | ||
853 | #if HAVE_SSE4_EXTERNAL && ARCH_X86_64 | ||
854 |
2/2✓ Branch 0 taken 126 times.
✓ Branch 1 taken 493 times.
|
619 | if (EXTERNAL_SSE4(cpu_flags)) { |
855 | |||
856 | 126 | EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 8, sse4); | |
857 | 126 | EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 8, sse4); | |
858 | 126 | EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 8, sse4); | |
859 | 126 | EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 8, sse4); | |
860 | |||
861 | 126 | QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 8, sse4); | |
862 | 126 | QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 8, sse4); | |
863 | 126 | QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 8, sse4); | |
864 | 126 | QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 8, sse4); | |
865 | } | ||
866 | #endif | ||
867 |
2/2✓ Branch 0 taken 66 times.
✓ Branch 1 taken 553 times.
|
619 | if (EXTERNAL_AVX(cpu_flags)) { |
868 | 66 | c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_avx; | |
869 | 66 | c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_avx; | |
870 | if (ARCH_X86_64) { | ||
871 | 66 | c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx; | |
872 | 66 | c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx; | |
873 | |||
874 | 66 | c->idct[2] = ff_hevc_idct_16x16_8_avx; | |
875 | 66 | c->idct[3] = ff_hevc_idct_32x32_8_avx; | |
876 | } | ||
877 | 66 | SAO_BAND_INIT(8, avx); | |
878 | |||
879 | 66 | c->idct[0] = ff_hevc_idct_4x4_8_avx; | |
880 | 66 | c->idct[1] = ff_hevc_idct_8x8_8_avx; | |
881 | |||
882 | 66 | c->add_residual[1] = ff_hevc_add_residual_8_8_avx; | |
883 | 66 | c->add_residual[2] = ff_hevc_add_residual_16_8_avx; | |
884 | 66 | c->add_residual[3] = ff_hevc_add_residual_32_8_avx; | |
885 | } | ||
886 |
2/2✓ Branch 0 taken 26 times.
✓ Branch 1 taken 593 times.
|
619 | if (EXTERNAL_AVX2(cpu_flags)) { |
887 | 26 | c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_8_avx2; | |
888 | 26 | c->sao_band_filter[1] = ff_hevc_sao_band_filter_16_8_avx2; | |
889 | } | ||
890 | #if HAVE_AVX2_EXTERNAL | ||
891 |
3/4✓ Branch 0 taken 26 times.
✓ Branch 1 taken 593 times.
✓ Branch 2 taken 26 times.
✗ Branch 3 not taken.
|
619 | if (EXTERNAL_AVX2_FAST(cpu_flags)) { |
892 | 26 | c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_avx2; | |
893 | 26 | c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_avx2; | |
894 | |||
895 | #if ARCH_X86_64 | ||
896 | 26 | c->put_hevc_epel[7][0][0] = hevc_put_pel_pixels32_8_avx2; | |
897 | 26 | c->put_hevc_epel[8][0][0] = hevc_put_pel_pixels48_8_avx2; | |
898 | 26 | c->put_hevc_epel[9][0][0] = hevc_put_pel_pixels64_8_avx2; | |
899 | |||
900 | 26 | c->put_hevc_qpel[7][0][0] = hevc_put_pel_pixels32_8_avx2; | |
901 | 26 | c->put_hevc_qpel[8][0][0] = hevc_put_pel_pixels48_8_avx2; | |
902 | 26 | c->put_hevc_qpel[9][0][0] = hevc_put_pel_pixels64_8_avx2; | |
903 | |||
904 | 26 | c->put_hevc_epel_uni[7][0][0] = hevc_put_uni_pel_pixels32_8_avx2; | |
905 | 26 | c->put_hevc_epel_uni[8][0][0] = hevc_put_uni_pel_pixels48_8_avx2; | |
906 | 26 | c->put_hevc_epel_uni[9][0][0] = hevc_put_uni_pel_pixels64_8_avx2; | |
907 | |||
908 | 26 | c->put_hevc_qpel_uni[7][0][0] = hevc_put_uni_pel_pixels32_8_avx2; | |
909 | 26 | c->put_hevc_qpel_uni[8][0][0] = hevc_put_uni_pel_pixels48_8_avx2; | |
910 | 26 | c->put_hevc_qpel_uni[9][0][0] = hevc_put_uni_pel_pixels64_8_avx2; | |
911 | |||
912 | 26 | c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_bi_pel_pixels32_8_avx2; | |
913 | 26 | c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_bi_pel_pixels48_8_avx2; | |
914 | 26 | c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_bi_pel_pixels64_8_avx2; | |
915 | |||
916 | 26 | c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_bi_pel_pixels32_8_avx2; | |
917 | 26 | c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_bi_pel_pixels48_8_avx2; | |
918 | 26 | c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_bi_pel_pixels64_8_avx2; | |
919 | |||
920 | 26 | c->put_hevc_epel[7][0][1] = hevc_put_epel_h32_8_avx2; | |
921 | 26 | c->put_hevc_epel[8][0][1] = hevc_put_epel_h48_8_avx2; | |
922 | 26 | c->put_hevc_epel[9][0][1] = hevc_put_epel_h64_8_avx2; | |
923 | |||
924 | 26 | c->put_hevc_epel_uni[7][0][1] = hevc_put_uni_epel_h32_8_avx2; | |
925 | 26 | c->put_hevc_epel_uni[8][0][1] = hevc_put_uni_epel_h48_8_avx2; | |
926 | 26 | c->put_hevc_epel_uni[9][0][1] = hevc_put_uni_epel_h64_8_avx2; | |
927 | |||
928 | 26 | c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_bi_epel_h32_8_avx2; | |
929 | 26 | c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_bi_epel_h48_8_avx2; | |
930 | 26 | c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_bi_epel_h64_8_avx2; | |
931 | |||
932 | 26 | c->put_hevc_epel[7][1][0] = hevc_put_epel_v32_8_avx2; | |
933 | 26 | c->put_hevc_epel[8][1][0] = hevc_put_epel_v48_8_avx2; | |
934 | 26 | c->put_hevc_epel[9][1][0] = hevc_put_epel_v64_8_avx2; | |
935 | |||
936 | 26 | c->put_hevc_epel_uni[7][1][0] = hevc_put_uni_epel_v32_8_avx2; | |
937 | 26 | c->put_hevc_epel_uni[8][1][0] = hevc_put_uni_epel_v48_8_avx2; | |
938 | 26 | c->put_hevc_epel_uni[9][1][0] = hevc_put_uni_epel_v64_8_avx2; | |
939 | |||
940 | 26 | c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_bi_epel_v32_8_avx2; | |
941 | 26 | c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_bi_epel_v48_8_avx2; | |
942 | 26 | c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_bi_epel_v64_8_avx2; | |
943 | |||
944 | 26 | c->put_hevc_epel[7][1][1] = hevc_put_epel_hv32_8_avx2; | |
945 | 26 | c->put_hevc_epel[8][1][1] = hevc_put_epel_hv48_8_avx2; | |
946 | 26 | c->put_hevc_epel[9][1][1] = hevc_put_epel_hv64_8_avx2; | |
947 | |||
948 | 26 | c->put_hevc_epel_uni[7][1][1] = hevc_put_uni_epel_hv32_8_avx2; | |
949 | 26 | c->put_hevc_epel_uni[8][1][1] = hevc_put_uni_epel_hv48_8_avx2; | |
950 | 26 | c->put_hevc_epel_uni[9][1][1] = hevc_put_uni_epel_hv64_8_avx2; | |
951 | |||
952 | 26 | c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_bi_epel_hv32_8_avx2; | |
953 | 26 | c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_bi_epel_hv48_8_avx2; | |
954 | 26 | c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_bi_epel_hv64_8_avx2; | |
955 | |||
956 | 26 | c->put_hevc_qpel[7][0][1] = hevc_put_qpel_h32_8_avx2; | |
957 | 26 | c->put_hevc_qpel[8][0][1] = hevc_put_qpel_h48_8_avx2; | |
958 | 26 | c->put_hevc_qpel[9][0][1] = hevc_put_qpel_h64_8_avx2; | |
959 | |||
960 | 26 | c->put_hevc_qpel[7][1][0] = hevc_put_qpel_v32_8_avx2; | |
961 | 26 | c->put_hevc_qpel[8][1][0] = hevc_put_qpel_v48_8_avx2; | |
962 | 26 | c->put_hevc_qpel[9][1][0] = hevc_put_qpel_v64_8_avx2; | |
963 | |||
964 | 26 | c->put_hevc_qpel_uni[7][0][1] = hevc_put_uni_qpel_h32_8_avx2; | |
965 | 26 | c->put_hevc_qpel_uni[8][0][1] = hevc_put_uni_qpel_h48_8_avx2; | |
966 | 26 | c->put_hevc_qpel_uni[9][0][1] = hevc_put_uni_qpel_h64_8_avx2; | |
967 | |||
968 | 26 | c->put_hevc_qpel_uni[7][1][0] = hevc_put_uni_qpel_v32_8_avx2; | |
969 | 26 | c->put_hevc_qpel_uni[8][1][0] = hevc_put_uni_qpel_v48_8_avx2; | |
970 | 26 | c->put_hevc_qpel_uni[9][1][0] = hevc_put_uni_qpel_v64_8_avx2; | |
971 | |||
972 | 26 | c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_bi_qpel_h32_8_avx2; | |
973 | 26 | c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_bi_qpel_h48_8_avx2; | |
974 | 26 | c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_bi_qpel_h64_8_avx2; | |
975 | |||
976 | 26 | c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_bi_qpel_v32_8_avx2; | |
977 | 26 | c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_bi_qpel_v48_8_avx2; | |
978 | 26 | c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_bi_qpel_v64_8_avx2; | |
979 | #endif /* ARCH_X86_64 */ | ||
980 | |||
981 | 26 | SAO_BAND_INIT(8, avx2); | |
982 | |||
983 | 26 | c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_8_avx2; | |
984 | 26 | c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_8_avx2; | |
985 | 26 | c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_8_avx2; | |
986 | |||
987 | 26 | c->add_residual[3] = ff_hevc_add_residual_32_8_avx2; | |
988 | } | ||
989 | #endif /* HAVE_AVX2_EXTERNAL */ | ||
990 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 619 times.
|
619 | if (EXTERNAL_AVX512ICL(cpu_flags) && ARCH_X86_64) { |
991 | ✗ | c->put_hevc_qpel[1][0][1] = ff_hevc_put_qpel_h4_8_avx512icl; | |
992 | ✗ | c->put_hevc_qpel[3][0][1] = ff_hevc_put_qpel_h8_8_avx512icl; | |
993 | ✗ | c->put_hevc_qpel[5][0][1] = ff_hevc_put_qpel_h16_8_avx512icl; | |
994 | ✗ | c->put_hevc_qpel[7][0][1] = ff_hevc_put_qpel_h32_8_avx512icl; | |
995 | ✗ | c->put_hevc_qpel[9][0][1] = ff_hevc_put_qpel_h64_8_avx512icl; | |
996 | ✗ | c->put_hevc_qpel[3][1][1] = ff_hevc_put_qpel_hv8_8_avx512icl; | |
997 | } | ||
998 |
2/2✓ Branch 0 taken 309 times.
✓ Branch 1 taken 556 times.
|
865 | } else if (bit_depth == 10) { |
999 |
2/2✓ Branch 0 taken 231 times.
✓ Branch 1 taken 78 times.
|
309 | if (EXTERNAL_MMXEXT(cpu_flags)) { |
1000 | 231 | c->add_residual[0] = ff_hevc_add_residual_4_10_mmxext; | |
1001 | 231 | c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_mmxext; | |
1002 | } | ||
1003 |
2/2✓ Branch 0 taken 191 times.
✓ Branch 1 taken 118 times.
|
309 | if (EXTERNAL_SSE2(cpu_flags)) { |
1004 | 191 | c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2; | |
1005 | 191 | c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2; | |
1006 | if (ARCH_X86_64) { | ||
1007 | 191 | c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_sse2; | |
1008 | 191 | c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2; | |
1009 | |||
1010 | 191 | c->idct[2] = ff_hevc_idct_16x16_10_sse2; | |
1011 | 191 | c->idct[3] = ff_hevc_idct_32x32_10_sse2; | |
1012 | } | ||
1013 | 191 | SAO_BAND_INIT(10, sse2); | |
1014 | 191 | SAO_EDGE_INIT(10, sse2); | |
1015 | |||
1016 | 191 | c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_sse2; | |
1017 | 191 | c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_sse2; | |
1018 | 191 | c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_sse2; | |
1019 | |||
1020 | 191 | c->idct[0] = ff_hevc_idct_4x4_10_sse2; | |
1021 | 191 | c->idct[1] = ff_hevc_idct_8x8_10_sse2; | |
1022 | |||
1023 | 191 | c->add_residual[1] = ff_hevc_add_residual_8_10_sse2; | |
1024 | 191 | c->add_residual[2] = ff_hevc_add_residual_16_10_sse2; | |
1025 | 191 | c->add_residual[3] = ff_hevc_add_residual_32_10_sse2; | |
1026 | } | ||
1027 |
2/2✓ Branch 0 taken 151 times.
✓ Branch 1 taken 158 times.
|
309 | if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) { |
1028 | 151 | c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3; | |
1029 | 151 | c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3; | |
1030 | } | ||
1031 | #if HAVE_SSE4_EXTERNAL && ARCH_X86_64 | ||
1032 |
2/2✓ Branch 0 taken 131 times.
✓ Branch 1 taken 178 times.
|
309 | if (EXTERNAL_SSE4(cpu_flags)) { |
1033 | 131 | EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4); | |
1034 | 131 | EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 10, sse4); | |
1035 | 131 | EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 10, sse4); | |
1036 | 131 | EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 10, sse4); | |
1037 | |||
1038 | 131 | QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 10, sse4); | |
1039 | 131 | QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 10, sse4); | |
1040 | 131 | QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 10, sse4); | |
1041 | 131 | QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 10, sse4); | |
1042 | } | ||
1043 | #endif | ||
1044 |
2/2✓ Branch 0 taken 71 times.
✓ Branch 1 taken 238 times.
|
309 | if (EXTERNAL_AVX(cpu_flags)) { |
1045 | 71 | c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_avx; | |
1046 | 71 | c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_avx; | |
1047 | if (ARCH_X86_64) { | ||
1048 | 71 | c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_avx; | |
1049 | 71 | c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_avx; | |
1050 | |||
1051 | 71 | c->idct[2] = ff_hevc_idct_16x16_10_avx; | |
1052 | 71 | c->idct[3] = ff_hevc_idct_32x32_10_avx; | |
1053 | } | ||
1054 | |||
1055 | 71 | c->idct[0] = ff_hevc_idct_4x4_10_avx; | |
1056 | 71 | c->idct[1] = ff_hevc_idct_8x8_10_avx; | |
1057 | |||
1058 | 71 | SAO_BAND_INIT(10, avx); | |
1059 | } | ||
1060 |
2/2✓ Branch 0 taken 31 times.
✓ Branch 1 taken 278 times.
|
309 | if (EXTERNAL_AVX2(cpu_flags)) { |
1061 | 31 | c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_10_avx2; | |
1062 | } | ||
1063 | #if HAVE_AVX2_EXTERNAL | ||
1064 |
3/4✓ Branch 0 taken 31 times.
✓ Branch 1 taken 278 times.
✓ Branch 2 taken 31 times.
✗ Branch 3 not taken.
|
309 | if (EXTERNAL_AVX2_FAST(cpu_flags)) { |
1065 | 31 | c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_avx2; | |
1066 | 31 | c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_avx2; | |
1067 | |||
1068 | #if ARCH_X86_64 | ||
1069 | 31 | c->put_hevc_epel[5][0][0] = hevc_put_pel_pixels16_10_avx2; | |
1070 | 31 | c->put_hevc_epel[6][0][0] = hevc_put_pel_pixels24_10_avx2; | |
1071 | 31 | c->put_hevc_epel[7][0][0] = hevc_put_pel_pixels32_10_avx2; | |
1072 | 31 | c->put_hevc_epel[8][0][0] = hevc_put_pel_pixels48_10_avx2; | |
1073 | 31 | c->put_hevc_epel[9][0][0] = hevc_put_pel_pixels64_10_avx2; | |
1074 | |||
1075 | 31 | c->put_hevc_qpel[5][0][0] = hevc_put_pel_pixels16_10_avx2; | |
1076 | 31 | c->put_hevc_qpel[6][0][0] = hevc_put_pel_pixels24_10_avx2; | |
1077 | 31 | c->put_hevc_qpel[7][0][0] = hevc_put_pel_pixels32_10_avx2; | |
1078 | 31 | c->put_hevc_qpel[8][0][0] = hevc_put_pel_pixels48_10_avx2; | |
1079 | 31 | c->put_hevc_qpel[9][0][0] = hevc_put_pel_pixels64_10_avx2; | |
1080 | |||
1081 | 31 | c->put_hevc_epel_uni[5][0][0] = hevc_put_uni_pel_pixels32_8_avx2; | |
1082 | 31 | c->put_hevc_epel_uni[6][0][0] = hevc_put_uni_pel_pixels48_8_avx2; | |
1083 | 31 | c->put_hevc_epel_uni[7][0][0] = hevc_put_uni_pel_pixels64_8_avx2; | |
1084 | 31 | c->put_hevc_epel_uni[8][0][0] = hevc_put_uni_pel_pixels96_8_avx2; | |
1085 | 31 | c->put_hevc_epel_uni[9][0][0] = hevc_put_uni_pel_pixels128_8_avx2; | |
1086 | |||
1087 | 31 | c->put_hevc_qpel_uni[5][0][0] = hevc_put_uni_pel_pixels32_8_avx2; | |
1088 | 31 | c->put_hevc_qpel_uni[6][0][0] = hevc_put_uni_pel_pixels48_8_avx2; | |
1089 | 31 | c->put_hevc_qpel_uni[7][0][0] = hevc_put_uni_pel_pixels64_8_avx2; | |
1090 | 31 | c->put_hevc_qpel_uni[8][0][0] = hevc_put_uni_pel_pixels96_8_avx2; | |
1091 | 31 | c->put_hevc_qpel_uni[9][0][0] = hevc_put_uni_pel_pixels128_8_avx2; | |
1092 | |||
1093 | 31 | c->put_hevc_epel_bi[5][0][0] = ff_hevc_put_bi_pel_pixels16_10_avx2; | |
1094 | 31 | c->put_hevc_epel_bi[6][0][0] = ff_hevc_put_bi_pel_pixels24_10_avx2; | |
1095 | 31 | c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_bi_pel_pixels32_10_avx2; | |
1096 | 31 | c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_bi_pel_pixels48_10_avx2; | |
1097 | 31 | c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_bi_pel_pixels64_10_avx2; | |
1098 | 31 | c->put_hevc_qpel_bi[5][0][0] = ff_hevc_put_bi_pel_pixels16_10_avx2; | |
1099 | 31 | c->put_hevc_qpel_bi[6][0][0] = ff_hevc_put_bi_pel_pixels24_10_avx2; | |
1100 | 31 | c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_bi_pel_pixels32_10_avx2; | |
1101 | 31 | c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_bi_pel_pixels48_10_avx2; | |
1102 | 31 | c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_bi_pel_pixels64_10_avx2; | |
1103 | |||
1104 | 31 | c->put_hevc_epel[5][0][1] = hevc_put_epel_h16_10_avx2; | |
1105 | 31 | c->put_hevc_epel[6][0][1] = hevc_put_epel_h24_10_avx2; | |
1106 | 31 | c->put_hevc_epel[7][0][1] = hevc_put_epel_h32_10_avx2; | |
1107 | 31 | c->put_hevc_epel[8][0][1] = hevc_put_epel_h48_10_avx2; | |
1108 | 31 | c->put_hevc_epel[9][0][1] = hevc_put_epel_h64_10_avx2; | |
1109 | |||
1110 | 31 | c->put_hevc_epel_uni[5][0][1] = hevc_put_uni_epel_h16_10_avx2; | |
1111 | 31 | c->put_hevc_epel_uni[6][0][1] = hevc_put_uni_epel_h24_10_avx2; | |
1112 | 31 | c->put_hevc_epel_uni[7][0][1] = hevc_put_uni_epel_h32_10_avx2; | |
1113 | 31 | c->put_hevc_epel_uni[8][0][1] = hevc_put_uni_epel_h48_10_avx2; | |
1114 | 31 | c->put_hevc_epel_uni[9][0][1] = hevc_put_uni_epel_h64_10_avx2; | |
1115 | |||
1116 | 31 | c->put_hevc_epel_bi[5][0][1] = ff_hevc_put_bi_epel_h16_10_avx2; | |
1117 | 31 | c->put_hevc_epel_bi[6][0][1] = ff_hevc_put_bi_epel_h24_10_avx2; | |
1118 | 31 | c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_bi_epel_h32_10_avx2; | |
1119 | 31 | c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_bi_epel_h48_10_avx2; | |
1120 | 31 | c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_bi_epel_h64_10_avx2; | |
1121 | |||
1122 | 31 | c->put_hevc_epel[5][1][0] = hevc_put_epel_v16_10_avx2; | |
1123 | 31 | c->put_hevc_epel[6][1][0] = hevc_put_epel_v24_10_avx2; | |
1124 | 31 | c->put_hevc_epel[7][1][0] = hevc_put_epel_v32_10_avx2; | |
1125 | 31 | c->put_hevc_epel[8][1][0] = hevc_put_epel_v48_10_avx2; | |
1126 | 31 | c->put_hevc_epel[9][1][0] = hevc_put_epel_v64_10_avx2; | |
1127 | |||
1128 | 31 | c->put_hevc_epel_uni[5][1][0] = hevc_put_uni_epel_v16_10_avx2; | |
1129 | 31 | c->put_hevc_epel_uni[6][1][0] = hevc_put_uni_epel_v24_10_avx2; | |
1130 | 31 | c->put_hevc_epel_uni[7][1][0] = hevc_put_uni_epel_v32_10_avx2; | |
1131 | 31 | c->put_hevc_epel_uni[8][1][0] = hevc_put_uni_epel_v48_10_avx2; | |
1132 | 31 | c->put_hevc_epel_uni[9][1][0] = hevc_put_uni_epel_v64_10_avx2; | |
1133 | |||
1134 | 31 | c->put_hevc_epel_bi[5][1][0] = ff_hevc_put_bi_epel_v16_10_avx2; | |
1135 | 31 | c->put_hevc_epel_bi[6][1][0] = ff_hevc_put_bi_epel_v24_10_avx2; | |
1136 | 31 | c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_bi_epel_v32_10_avx2; | |
1137 | 31 | c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_bi_epel_v48_10_avx2; | |
1138 | 31 | c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_bi_epel_v64_10_avx2; | |
1139 | |||
1140 | 31 | c->put_hevc_epel[5][1][1] = hevc_put_epel_hv16_10_avx2; | |
1141 | 31 | c->put_hevc_epel[6][1][1] = hevc_put_epel_hv24_10_avx2; | |
1142 | 31 | c->put_hevc_epel[7][1][1] = hevc_put_epel_hv32_10_avx2; | |
1143 | 31 | c->put_hevc_epel[8][1][1] = hevc_put_epel_hv48_10_avx2; | |
1144 | 31 | c->put_hevc_epel[9][1][1] = hevc_put_epel_hv64_10_avx2; | |
1145 | |||
1146 | 31 | c->put_hevc_epel_uni[5][1][1] = hevc_put_uni_epel_hv16_10_avx2; | |
1147 | 31 | c->put_hevc_epel_uni[6][1][1] = hevc_put_uni_epel_hv24_10_avx2; | |
1148 | 31 | c->put_hevc_epel_uni[7][1][1] = hevc_put_uni_epel_hv32_10_avx2; | |
1149 | 31 | c->put_hevc_epel_uni[8][1][1] = hevc_put_uni_epel_hv48_10_avx2; | |
1150 | 31 | c->put_hevc_epel_uni[9][1][1] = hevc_put_uni_epel_hv64_10_avx2; | |
1151 | |||
1152 | 31 | c->put_hevc_epel_bi[5][1][1] = ff_hevc_put_bi_epel_hv16_10_avx2; | |
1153 | 31 | c->put_hevc_epel_bi[6][1][1] = ff_hevc_put_bi_epel_hv24_10_avx2; | |
1154 | 31 | c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_bi_epel_hv32_10_avx2; | |
1155 | 31 | c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_bi_epel_hv48_10_avx2; | |
1156 | 31 | c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_bi_epel_hv64_10_avx2; | |
1157 | |||
1158 | 31 | c->put_hevc_qpel[5][0][1] = hevc_put_qpel_h16_10_avx2; | |
1159 | 31 | c->put_hevc_qpel[6][0][1] = hevc_put_qpel_h24_10_avx2; | |
1160 | 31 | c->put_hevc_qpel[7][0][1] = hevc_put_qpel_h32_10_avx2; | |
1161 | 31 | c->put_hevc_qpel[8][0][1] = hevc_put_qpel_h48_10_avx2; | |
1162 | 31 | c->put_hevc_qpel[9][0][1] = hevc_put_qpel_h64_10_avx2; | |
1163 | |||
1164 | 31 | c->put_hevc_qpel_uni[5][0][1] = hevc_put_uni_qpel_h16_10_avx2; | |
1165 | 31 | c->put_hevc_qpel_uni[6][0][1] = hevc_put_uni_qpel_h24_10_avx2; | |
1166 | 31 | c->put_hevc_qpel_uni[7][0][1] = hevc_put_uni_qpel_h32_10_avx2; | |
1167 | 31 | c->put_hevc_qpel_uni[8][0][1] = hevc_put_uni_qpel_h48_10_avx2; | |
1168 | 31 | c->put_hevc_qpel_uni[9][0][1] = hevc_put_uni_qpel_h64_10_avx2; | |
1169 | |||
1170 | 31 | c->put_hevc_qpel_bi[5][0][1] = ff_hevc_put_bi_qpel_h16_10_avx2; | |
1171 | 31 | c->put_hevc_qpel_bi[6][0][1] = ff_hevc_put_bi_qpel_h24_10_avx2; | |
1172 | 31 | c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_bi_qpel_h32_10_avx2; | |
1173 | 31 | c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_bi_qpel_h48_10_avx2; | |
1174 | 31 | c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_bi_qpel_h64_10_avx2; | |
1175 | |||
1176 | 31 | c->put_hevc_qpel[5][1][0] = hevc_put_qpel_v16_10_avx2; | |
1177 | 31 | c->put_hevc_qpel[6][1][0] = hevc_put_qpel_v24_10_avx2; | |
1178 | 31 | c->put_hevc_qpel[7][1][0] = hevc_put_qpel_v32_10_avx2; | |
1179 | 31 | c->put_hevc_qpel[8][1][0] = hevc_put_qpel_v48_10_avx2; | |
1180 | 31 | c->put_hevc_qpel[9][1][0] = hevc_put_qpel_v64_10_avx2; | |
1181 | |||
1182 | 31 | c->put_hevc_qpel_uni[5][1][0] = hevc_put_uni_qpel_v16_10_avx2; | |
1183 | 31 | c->put_hevc_qpel_uni[6][1][0] = hevc_put_uni_qpel_v24_10_avx2; | |
1184 | 31 | c->put_hevc_qpel_uni[7][1][0] = hevc_put_uni_qpel_v32_10_avx2; | |
1185 | 31 | c->put_hevc_qpel_uni[8][1][0] = hevc_put_uni_qpel_v48_10_avx2; | |
1186 | 31 | c->put_hevc_qpel_uni[9][1][0] = hevc_put_uni_qpel_v64_10_avx2; | |
1187 | |||
1188 | 31 | c->put_hevc_qpel_bi[5][1][0] = ff_hevc_put_bi_qpel_v16_10_avx2; | |
1189 | 31 | c->put_hevc_qpel_bi[6][1][0] = ff_hevc_put_bi_qpel_v24_10_avx2; | |
1190 | 31 | c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_bi_qpel_v32_10_avx2; | |
1191 | 31 | c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_bi_qpel_v48_10_avx2; | |
1192 | 31 | c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_bi_qpel_v64_10_avx2; | |
1193 | |||
1194 | 31 | c->put_hevc_qpel[5][1][1] = hevc_put_qpel_hv16_10_avx2; | |
1195 | 31 | c->put_hevc_qpel[6][1][1] = hevc_put_qpel_hv24_10_avx2; | |
1196 | 31 | c->put_hevc_qpel[7][1][1] = hevc_put_qpel_hv32_10_avx2; | |
1197 | 31 | c->put_hevc_qpel[8][1][1] = hevc_put_qpel_hv48_10_avx2; | |
1198 | 31 | c->put_hevc_qpel[9][1][1] = hevc_put_qpel_hv64_10_avx2; | |
1199 | |||
1200 | 31 | c->put_hevc_qpel_uni[5][1][1] = hevc_put_uni_qpel_hv16_10_avx2; | |
1201 | 31 | c->put_hevc_qpel_uni[6][1][1] = hevc_put_uni_qpel_hv24_10_avx2; | |
1202 | 31 | c->put_hevc_qpel_uni[7][1][1] = hevc_put_uni_qpel_hv32_10_avx2; | |
1203 | 31 | c->put_hevc_qpel_uni[8][1][1] = hevc_put_uni_qpel_hv48_10_avx2; | |
1204 | 31 | c->put_hevc_qpel_uni[9][1][1] = hevc_put_uni_qpel_hv64_10_avx2; | |
1205 | |||
1206 | 31 | c->put_hevc_qpel_bi[5][1][1] = ff_hevc_put_bi_qpel_hv16_10_avx2; | |
1207 | 31 | c->put_hevc_qpel_bi[6][1][1] = ff_hevc_put_bi_qpel_hv24_10_avx2; | |
1208 | 31 | c->put_hevc_qpel_bi[7][1][1] = ff_hevc_put_bi_qpel_hv32_10_avx2; | |
1209 | 31 | c->put_hevc_qpel_bi[8][1][1] = ff_hevc_put_bi_qpel_hv48_10_avx2; | |
1210 | 31 | c->put_hevc_qpel_bi[9][1][1] = ff_hevc_put_bi_qpel_hv64_10_avx2; | |
1211 | #endif /* ARCH_X86_64 */ | ||
1212 | |||
1213 | 31 | SAO_BAND_INIT(10, avx2); | |
1214 | 31 | SAO_EDGE_INIT(10, avx2); | |
1215 | |||
1216 | 31 | c->add_residual[2] = ff_hevc_add_residual_16_10_avx2; | |
1217 | 31 | c->add_residual[3] = ff_hevc_add_residual_32_10_avx2; | |
1218 | } | ||
1219 | #endif /* HAVE_AVX2_EXTERNAL */ | ||
1220 |
2/2✓ Branch 0 taken 270 times.
✓ Branch 1 taken 286 times.
|
556 | } else if (bit_depth == 12) { |
1221 |
2/2✓ Branch 0 taken 220 times.
✓ Branch 1 taken 50 times.
|
270 | if (EXTERNAL_MMXEXT(cpu_flags)) { |
1222 | 220 | c->idct_dc[0] = ff_hevc_idct_4x4_dc_12_mmxext; | |
1223 | } | ||
1224 |
2/2✓ Branch 0 taken 180 times.
✓ Branch 1 taken 90 times.
|
270 | if (EXTERNAL_SSE2(cpu_flags)) { |
1225 | 180 | c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_sse2; | |
1226 | 180 | c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_sse2; | |
1227 | if (ARCH_X86_64) { | ||
1228 | 180 | c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_sse2; | |
1229 | 180 | c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2; | |
1230 | } | ||
1231 | 180 | SAO_BAND_INIT(12, sse2); | |
1232 | 180 | SAO_EDGE_INIT(12, sse2); | |
1233 | |||
1234 | 180 | c->idct_dc[1] = ff_hevc_idct_8x8_dc_12_sse2; | |
1235 | 180 | c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_sse2; | |
1236 | 180 | c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_sse2; | |
1237 | } | ||
1238 |
2/2✓ Branch 0 taken 140 times.
✓ Branch 1 taken 130 times.
|
270 | if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) { |
1239 | 140 | c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_ssse3; | |
1240 | 140 | c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_ssse3; | |
1241 | } | ||
1242 | #if HAVE_SSE4_EXTERNAL && ARCH_X86_64 | ||
1243 |
2/2✓ Branch 0 taken 120 times.
✓ Branch 1 taken 150 times.
|
270 | if (EXTERNAL_SSE4(cpu_flags)) { |
1244 | 120 | EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 12, sse4); | |
1245 | 120 | EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 12, sse4); | |
1246 | 120 | EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 12, sse4); | |
1247 | 120 | EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 12, sse4); | |
1248 | |||
1249 | 120 | QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 12, sse4); | |
1250 | 120 | QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 12, sse4); | |
1251 | 120 | QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 12, sse4); | |
1252 | 120 | QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 12, sse4); | |
1253 | } | ||
1254 | #endif | ||
1255 |
2/2✓ Branch 0 taken 60 times.
✓ Branch 1 taken 210 times.
|
270 | if (EXTERNAL_AVX(cpu_flags)) { |
1256 | 60 | c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_avx; | |
1257 | 60 | c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_avx; | |
1258 | if (ARCH_X86_64) { | ||
1259 | 60 | c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_avx; | |
1260 | 60 | c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_avx; | |
1261 | } | ||
1262 | 60 | SAO_BAND_INIT(12, avx); | |
1263 | } | ||
1264 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 250 times.
|
270 | if (EXTERNAL_AVX2(cpu_flags)) { |
1265 | 20 | c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_12_avx2; | |
1266 | } | ||
1267 |
3/4✓ Branch 0 taken 20 times.
✓ Branch 1 taken 250 times.
✓ Branch 2 taken 20 times.
✗ Branch 3 not taken.
|
270 | if (EXTERNAL_AVX2_FAST(cpu_flags)) { |
1268 | 20 | c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_avx2; | |
1269 | 20 | c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_avx2; | |
1270 | |||
1271 | 20 | SAO_BAND_INIT(12, avx2); | |
1272 | 20 | SAO_EDGE_INIT(12, avx2); | |
1273 | } | ||
1274 | } | ||
1275 | 1484 | } | |
1276 |