Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * Copyright (c) 2013 Seppo Tomperi | ||
3 | * Copyright (c) 2013-2014 Pierre-Edouard Lepere | ||
4 | * Copyright (c) 2023-2024 Wu Jianhua | ||
5 | * | ||
6 | * This file is part of FFmpeg. | ||
7 | * | ||
8 | * FFmpeg is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU Lesser General Public | ||
10 | * License as published by the Free Software Foundation; either | ||
11 | * version 2.1 of the License, or (at your option) any later version. | ||
12 | * | ||
13 | * FFmpeg is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
16 | * Lesser General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU Lesser General Public | ||
19 | * License along with FFmpeg; if not, write to the Free Software | ||
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
21 | */ | ||
22 | |||
23 | #include "config.h" | ||
24 | |||
25 | #include "libavutil/cpu.h" | ||
26 | #include "libavutil/mem_internal.h" | ||
27 | #include "libavutil/x86/asm.h" | ||
28 | #include "libavutil/x86/cpu.h" | ||
29 | #include "libavcodec/hevc/dsp.h" | ||
30 | #include "libavcodec/x86/hevcdsp.h" | ||
31 | #include "libavcodec/x86/h26x/h2656dsp.h" | ||
32 | |||
33 | #define LFC_FUNC(DIR, DEPTH, OPT) \ | ||
34 | void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, const int *tc, const uint8_t *no_p, const uint8_t *no_q); | ||
35 | |||
36 | #define LFL_FUNC(DIR, DEPTH, OPT) \ | ||
37 | void ff_hevc_ ## DIR ## _loop_filter_luma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, int beta, const int *tc, const uint8_t *no_p, const uint8_t *no_q); | ||
38 | |||
39 | #define LFC_FUNCS(type, depth, opt) \ | ||
40 | LFC_FUNC(h, depth, opt) \ | ||
41 | LFC_FUNC(v, depth, opt) | ||
42 | |||
43 | #define LFL_FUNCS(type, depth, opt) \ | ||
44 | LFL_FUNC(h, depth, opt) \ | ||
45 | LFL_FUNC(v, depth, opt) | ||
46 | |||
47 | LFC_FUNCS(uint8_t, 8, sse2) | ||
48 | LFC_FUNCS(uint8_t, 10, sse2) | ||
49 | LFC_FUNCS(uint8_t, 12, sse2) | ||
50 | LFC_FUNCS(uint8_t, 8, avx) | ||
51 | LFC_FUNCS(uint8_t, 10, avx) | ||
52 | LFC_FUNCS(uint8_t, 12, avx) | ||
53 | LFL_FUNCS(uint8_t, 8, sse2) | ||
54 | LFL_FUNCS(uint8_t, 10, sse2) | ||
55 | LFL_FUNCS(uint8_t, 12, sse2) | ||
56 | LFL_FUNCS(uint8_t, 8, ssse3) | ||
57 | LFL_FUNCS(uint8_t, 10, ssse3) | ||
58 | LFL_FUNCS(uint8_t, 12, ssse3) | ||
59 | LFL_FUNCS(uint8_t, 8, avx) | ||
60 | LFL_FUNCS(uint8_t, 10, avx) | ||
61 | LFL_FUNCS(uint8_t, 12, avx) | ||
62 | |||
63 | #define IDCT_DC_FUNCS(W, opt) \ | ||
64 | void ff_hevc_idct_ ## W ## _dc_8_ ## opt(int16_t *coeffs); \ | ||
65 | void ff_hevc_idct_ ## W ## _dc_10_ ## opt(int16_t *coeffs); \ | ||
66 | void ff_hevc_idct_ ## W ## _dc_12_ ## opt(int16_t *coeffs) | ||
67 | |||
68 | IDCT_DC_FUNCS(4x4, mmxext); | ||
69 | IDCT_DC_FUNCS(8x8, sse2); | ||
70 | IDCT_DC_FUNCS(16x16, sse2); | ||
71 | IDCT_DC_FUNCS(32x32, sse2); | ||
72 | IDCT_DC_FUNCS(16x16, avx2); | ||
73 | IDCT_DC_FUNCS(32x32, avx2); | ||
74 | |||
75 | #define IDCT_FUNCS(opt) \ | ||
76 | void ff_hevc_idct_4x4_8_ ## opt(int16_t *coeffs, int col_limit); \ | ||
77 | void ff_hevc_idct_4x4_10_ ## opt(int16_t *coeffs, int col_limit); \ | ||
78 | void ff_hevc_idct_8x8_8_ ## opt(int16_t *coeffs, int col_limit); \ | ||
79 | void ff_hevc_idct_8x8_10_ ## opt(int16_t *coeffs, int col_limit); \ | ||
80 | void ff_hevc_idct_16x16_8_ ## opt(int16_t *coeffs, int col_limit); \ | ||
81 | void ff_hevc_idct_16x16_10_ ## opt(int16_t *coeffs, int col_limit); \ | ||
82 | void ff_hevc_idct_32x32_8_ ## opt(int16_t *coeffs, int col_limit); \ | ||
83 | void ff_hevc_idct_32x32_10_ ## opt(int16_t *coeffs, int col_limit); | ||
84 | |||
85 | IDCT_FUNCS(sse2) | ||
86 | IDCT_FUNCS(avx) | ||
87 | |||
88 | |||
89 | #define ff_hevc_pel_filters ff_hevc_qpel_filters | ||
90 | #define DECL_HV_FILTER(f) \ | ||
91 | const uint8_t *hf = ff_hevc_ ## f ## _filters[mx]; \ | ||
92 | const uint8_t *vf = ff_hevc_ ## f ## _filters[my]; | ||
93 | |||
94 | #define FW_PUT(p, a, b, depth, opt) \ | ||
95 | void ff_hevc_put_hevc_ ## a ## _ ## depth ## _##opt(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, \ | ||
96 | int height, intptr_t mx, intptr_t my,int width) \ | ||
97 | { \ | ||
98 | DECL_HV_FILTER(p) \ | ||
99 | ff_h2656_put_ ## b ## _ ## depth ## _##opt(dst, 2 * MAX_PB_SIZE, src, srcstride, height, hf, vf, width); \ | ||
100 | } | ||
101 | |||
102 | #define FW_PUT_UNI(p, a, b, depth, opt) \ | ||
103 | void ff_hevc_put_hevc_uni_ ## a ## _ ## depth ## _##opt(uint8_t *dst, ptrdiff_t dststride, \ | ||
104 | const uint8_t *src, ptrdiff_t srcstride, \ | ||
105 | int height, intptr_t mx, intptr_t my, int width) \ | ||
106 | { \ | ||
107 | DECL_HV_FILTER(p) \ | ||
108 | ff_h2656_put_uni_ ## b ## _ ## depth ## _##opt(dst, dststride, src, srcstride, height, hf, vf, width); \ | ||
109 | } | ||
110 | |||
111 | #if ARCH_X86_64 && HAVE_SSE4_EXTERNAL | ||
112 | |||
113 | #define FW_PUT_FUNCS(p, a, b, depth, opt) \ | ||
114 | FW_PUT(p, a, b, depth, opt) \ | ||
115 | FW_PUT_UNI(p, a, b, depth, opt) | ||
116 | |||
117 | #define FW_PEL(w, depth, opt) FW_PUT_FUNCS(pel, pel_pixels##w, pixels##w, depth, opt) | ||
118 | |||
119 | #define FW_DIR(npel, n, w, depth, opt) \ | ||
120 | FW_PUT_FUNCS(npel, npel ## _h##w, n ## tap_h##w, depth, opt) \ | ||
121 | FW_PUT_FUNCS(npel, npel ## _v##w, n ## tap_v##w, depth, opt) | ||
122 | |||
123 | #define FW_DIR_HV(npel, n, w, depth, opt) \ | ||
124 | FW_PUT_FUNCS(npel, npel ## _hv##w, n ## tap_hv##w, depth, opt) | ||
125 | |||
126 | 17410 | FW_PEL(4, 8, sse4) | |
127 | 76 | FW_PEL(6, 8, sse4) | |
128 | 26322 | FW_PEL(8, 8, sse4) | |
129 | 76 | FW_PEL(12, 8, sse4) | |
130 | 150684 | FW_PEL(16, 8, sse4) | |
131 | 2400 | FW_PEL(4, 10, sse4) | |
132 | 76 | FW_PEL(6, 10, sse4) | |
133 | 33624 | FW_PEL(8, 10, sse4) | |
134 | 304 | FW_PEL(4, 12, sse4) | |
135 | 76 | FW_PEL(6, 12, sse4) | |
136 | 1824 | FW_PEL(8, 12, sse4) | |
137 | |||
138 | #define FW_EPEL(w, depth, opt) FW_DIR(epel, 4, w, depth, opt) | ||
139 | #define FW_EPEL_HV(w, depth, opt) FW_DIR_HV(epel, 4, w, depth, opt) | ||
140 | #define FW_EPEL_FUNCS(w, depth, opt) \ | ||
141 | FW_EPEL(w, depth, opt) \ | ||
142 | FW_EPEL_HV(w, depth, opt) | ||
143 | |||
144 | 152 | FW_EPEL(12, 8, sse4) | |
145 | |||
146 | 10508 | FW_EPEL_FUNCS(4, 8, sse4) | |
147 | 228 | FW_EPEL_FUNCS(6, 8, sse4) | |
148 | 12880 | FW_EPEL_FUNCS(8, 8, sse4) | |
149 | 21140 | FW_EPEL_FUNCS(16, 8, sse4) | |
150 | 6884 | FW_EPEL_FUNCS(4, 10, sse4) | |
151 | 228 | FW_EPEL_FUNCS(6, 10, sse4) | |
152 | 31064 | FW_EPEL_FUNCS(8, 10, sse4) | |
153 | 912 | FW_EPEL_FUNCS(4, 12, sse4) | |
154 | 228 | FW_EPEL_FUNCS(6, 12, sse4) | |
155 | 5472 | FW_EPEL_FUNCS(8, 12, sse4) | |
156 | |||
157 | #define FW_QPEL(w, depth, opt) FW_DIR(qpel, 8, w, depth, opt) | ||
158 | #define FW_QPEL_HV(w, depth, opt) FW_DIR_HV(qpel, 8, w, depth, opt) | ||
159 | #define FW_QPEL_FUNCS(w, depth, opt) \ | ||
160 | FW_QPEL(w, depth, opt) \ | ||
161 | FW_QPEL_HV(w, depth, opt) | ||
162 | |||
163 | 152 | FW_QPEL(12, 8, sse4) | |
164 | 1638 | FW_QPEL(16, 8, sse4) | |
165 | |||
166 | 306 | FW_QPEL_FUNCS(4, 8, sse4) | |
167 | 4518 | FW_QPEL_FUNCS(8, 8, sse4) | |
168 | 912 | FW_QPEL_FUNCS(4, 10, sse4) | |
169 | 14268 | FW_QPEL_FUNCS(8, 10, sse4) | |
170 | 912 | FW_QPEL_FUNCS(4, 12, sse4) | |
171 | 5472 | FW_QPEL_FUNCS(8, 12, sse4) | |
172 | |||
173 | #if HAVE_AVX2_EXTERNAL | ||
174 | |||
175 | 27552 | FW_PEL(32, 8, avx2) | |
176 | 3028 | FW_PUT(pel, pel_pixels16, pixels16, 10, avx2) | |
177 | |||
178 | 1396 | FW_EPEL(32, 8, avx2) | |
179 | 5112 | FW_EPEL(16, 10, avx2) | |
180 | |||
181 | 752 | FW_EPEL_HV(32, 8, avx2) | |
182 | 6732 | FW_EPEL_HV(16, 10, avx2) | |
183 | |||
184 | 32 | FW_QPEL(32, 8, avx2) | |
185 | 8960 | FW_QPEL(16, 10, avx2) | |
186 | |||
187 | 11354 | FW_QPEL_HV(16, 10, avx2) | |
188 | |||
189 | #endif | ||
190 | #endif | ||
191 | |||
192 | #define mc_rep_func(name, bitd, step, W, opt) \ | ||
193 | void ff_hevc_put_hevc_##name##W##_##bitd##_##opt(int16_t *_dst, \ | ||
194 | const uint8_t *_src, ptrdiff_t _srcstride, int height, \ | ||
195 | intptr_t mx, intptr_t my, int width) \ | ||
196 | { \ | ||
197 | int i; \ | ||
198 | int16_t *dst; \ | ||
199 | for (i = 0; i < W; i += step) { \ | ||
200 | const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \ | ||
201 | dst = _dst + i; \ | ||
202 | ff_hevc_put_hevc_##name##step##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width); \ | ||
203 | } \ | ||
204 | } | ||
205 | #define mc_rep_uni_func(name, bitd, step, W, opt) \ | ||
206 | void ff_hevc_put_hevc_uni_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, \ | ||
207 | const uint8_t *_src, ptrdiff_t _srcstride, int height, \ | ||
208 | intptr_t mx, intptr_t my, int width) \ | ||
209 | { \ | ||
210 | int i; \ | ||
211 | uint8_t *dst; \ | ||
212 | for (i = 0; i < W; i += step) { \ | ||
213 | const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \ | ||
214 | dst = _dst + (i * ((bitd + 7) / 8)); \ | ||
215 | ff_hevc_put_hevc_uni_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, \ | ||
216 | height, mx, my, width); \ | ||
217 | } \ | ||
218 | } | ||
219 | #define mc_rep_bi_func(name, bitd, step, W, opt) \ | ||
220 | void ff_hevc_put_hevc_bi_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const uint8_t *_src, \ | ||
221 | ptrdiff_t _srcstride, const int16_t *_src2, \ | ||
222 | int height, intptr_t mx, intptr_t my, int width) \ | ||
223 | { \ | ||
224 | int i; \ | ||
225 | uint8_t *dst; \ | ||
226 | for (i = 0; i < W ; i += step) { \ | ||
227 | const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \ | ||
228 | const int16_t *src2 = _src2 + i; \ | ||
229 | dst = _dst + (i * ((bitd + 7) / 8)); \ | ||
230 | ff_hevc_put_hevc_bi_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, \ | ||
231 | height, mx, my, width); \ | ||
232 | } \ | ||
233 | } | ||
234 | |||
235 | #define mc_rep_funcs(name, bitd, step, W, opt) \ | ||
236 | mc_rep_func(name, bitd, step, W, opt) \ | ||
237 | mc_rep_uni_func(name, bitd, step, W, opt) \ | ||
238 | mc_rep_bi_func(name, bitd, step, W, opt) | ||
239 | |||
240 | #define mc_rep_func2(name, bitd, step1, step2, W, opt) \ | ||
241 | void ff_hevc_put_hevc_##name##W##_##bitd##_##opt(int16_t *dst, \ | ||
242 | const uint8_t *src, ptrdiff_t _srcstride, int height, \ | ||
243 | intptr_t mx, intptr_t my, int width) \ | ||
244 | { \ | ||
245 | ff_hevc_put_hevc_##name##step1##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width); \ | ||
246 | ff_hevc_put_hevc_##name##step2##_##bitd##_##opt(dst + step1, src + (step1 * ((bitd + 7) / 8)), \ | ||
247 | _srcstride, height, mx, my, width); \ | ||
248 | } | ||
249 | #define mc_rep_uni_func2(name, bitd, step1, step2, W, opt) \ | ||
250 | void ff_hevc_put_hevc_uni_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, \ | ||
251 | const uint8_t *src, ptrdiff_t _srcstride, int height, \ | ||
252 | intptr_t mx, intptr_t my, int width) \ | ||
253 | { \ | ||
254 | ff_hevc_put_hevc_uni_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, height, mx, my, width);\ | ||
255 | ff_hevc_put_hevc_uni_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride, \ | ||
256 | src + (step1 * ((bitd + 7) / 8)), _srcstride, \ | ||
257 | height, mx, my, width); \ | ||
258 | } | ||
259 | #define mc_rep_bi_func2(name, bitd, step1, step2, W, opt) \ | ||
260 | void ff_hevc_put_hevc_bi_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ | ||
261 | ptrdiff_t _srcstride, const int16_t *src2, \ | ||
262 | int height, intptr_t mx, intptr_t my, int width) \ | ||
263 | { \ | ||
264 | ff_hevc_put_hevc_bi_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, height, mx, my, width);\ | ||
265 | ff_hevc_put_hevc_bi_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride, \ | ||
266 | src + (step1 * ((bitd + 7) / 8)), _srcstride, \ | ||
267 | src2 + step1, height, mx, my, width); \ | ||
268 | } | ||
269 | |||
270 | #define mc_rep_funcs2(name, bitd, step1, step2, W, opt) \ | ||
271 | mc_rep_func2(name, bitd, step1, step2, W, opt) \ | ||
272 | mc_rep_uni_func2(name, bitd, step1, step2, W, opt) \ | ||
273 | mc_rep_bi_func2(name, bitd, step1, step2, W, opt) | ||
274 | |||
275 | #if ARCH_X86_64 && HAVE_SSE4_EXTERNAL | ||
276 | |||
277 | #define mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \ | ||
278 | void ff_hevc_put_hevc_##name##width1##_10_##opt1(int16_t *dst, const uint8_t *src, ptrdiff_t _srcstride, \ | ||
279 | int height, intptr_t mx, intptr_t my, int width) \ | ||
280 | \ | ||
281 | { \ | ||
282 | ff_hevc_put_hevc_##name##width2##_10_##opt1(dst, src, _srcstride, height, mx, my, width); \ | ||
283 | ff_hevc_put_hevc_##name##width3##_10_##opt2(dst+ width2, src+ width4, _srcstride, height, mx, my, width); \ | ||
284 | } | ||
285 | |||
286 | #define mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \ | ||
287 | void ff_hevc_put_hevc_bi_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ | ||
288 | ptrdiff_t _srcstride, const int16_t *src2, \ | ||
289 | int height, intptr_t mx, intptr_t my, int width) \ | ||
290 | { \ | ||
291 | ff_hevc_put_hevc_bi_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, src2, \ | ||
292 | height, mx, my, width); \ | ||
293 | ff_hevc_put_hevc_bi_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, src2+width2,\ | ||
294 | height, mx, my, width); \ | ||
295 | } | ||
296 | |||
297 | #define mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \ | ||
298 | void ff_hevc_put_hevc_uni_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, \ | ||
299 | const uint8_t *src, ptrdiff_t _srcstride, int height, \ | ||
300 | intptr_t mx, intptr_t my, int width) \ | ||
301 | { \ | ||
302 | ff_hevc_put_hevc_uni_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, \ | ||
303 | height, mx, my, width); \ | ||
304 | ff_hevc_put_hevc_uni_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, \ | ||
305 | height, mx, my, width); \ | ||
306 | } | ||
307 | |||
308 | #define mc_rep_mixs_10(name, width1, width2, width3, opt1, opt2, width4) \ | ||
309 | mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \ | ||
310 | mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \ | ||
311 | mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) | ||
312 | |||
313 | #define mc_rep_mix_8(name, width1, width2, width3, opt1, opt2) \ | ||
314 | void ff_hevc_put_hevc_##name##width1##_8_##opt1(int16_t *dst, const uint8_t *src, ptrdiff_t _srcstride, \ | ||
315 | int height, intptr_t mx, intptr_t my, int width) \ | ||
316 | \ | ||
317 | { \ | ||
318 | ff_hevc_put_hevc_##name##width2##_8_##opt1(dst, src, _srcstride, height, mx, my, width); \ | ||
319 | ff_hevc_put_hevc_##name##width3##_8_##opt2(dst+ width2, src+ width2, _srcstride, height, mx, my, width); \ | ||
320 | } | ||
321 | |||
322 | #define mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2) \ | ||
323 | void ff_hevc_put_hevc_bi_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ | ||
324 | ptrdiff_t _srcstride, const int16_t *src2, \ | ||
325 | int height, intptr_t mx, intptr_t my, int width) \ | ||
326 | { \ | ||
327 | ff_hevc_put_hevc_bi_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \ | ||
328 | src2, height, mx, my, width); \ | ||
329 | ff_hevc_put_hevc_bi_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \ | ||
330 | src2+width2, height, mx, my, width); \ | ||
331 | } | ||
332 | |||
333 | #define mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2) \ | ||
334 | void ff_hevc_put_hevc_uni_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, \ | ||
335 | const uint8_t *src, ptrdiff_t _srcstride, int height, \ | ||
336 | intptr_t mx, intptr_t my, int width) \ | ||
337 | { \ | ||
338 | ff_hevc_put_hevc_uni_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \ | ||
339 | height, mx, my, width); \ | ||
340 | ff_hevc_put_hevc_uni_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \ | ||
341 | height, mx, my, width); \ | ||
342 | } | ||
343 | |||
344 | #define mc_rep_mixs_8(name, width1, width2, width3, opt1, opt2) \ | ||
345 | mc_rep_mix_8(name, width1, width2, width3, opt1, opt2) \ | ||
346 | mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2) \ | ||
347 | mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2) | ||
348 | |||
349 | #if HAVE_AVX2_EXTERNAL | ||
350 | |||
351 | 8 | mc_rep_mixs_8(pel_pixels, 48, 32, 16, avx2, sse4) | |
352 | 6 | mc_rep_mixs_8(epel_hv, 48, 32, 16, avx2, sse4) | |
353 | 6 | mc_rep_mixs_8(epel_h , 48, 32, 16, avx2, sse4) | |
354 | 6 | mc_rep_mixs_8(epel_v , 48, 32, 16, avx2, sse4) | |
355 | |||
356 | 5 | mc_rep_mix_10(pel_pixels, 24, 16, 8, avx2, sse4, 32) | |
357 | 1 | mc_bi_rep_mix_10(pel_pixels,24, 16, 8, avx2, sse4, 32) | |
358 | 6 | mc_rep_mixs_10(epel_hv, 24, 16, 8, avx2, sse4, 32) | |
359 | 6 | mc_rep_mixs_10(epel_h , 24, 16, 8, avx2, sse4, 32) | |
360 | 6 | mc_rep_mixs_10(epel_v , 24, 16, 8, avx2, sse4, 32) | |
361 | |||
362 | |||
363 | 6 | mc_rep_mixs_10(qpel_h , 24, 16, 8, avx2, sse4, 32) | |
364 | 6 | mc_rep_mixs_10(qpel_v , 24, 16, 8, avx2, sse4, 32) | |
365 | 6 | mc_rep_mixs_10(qpel_hv, 24, 16, 8, avx2, sse4, 32) | |
366 | |||
367 | |||
368 |
2/2✓ Branch 1 taken 156 times.
✓ Branch 2 taken 78 times.
|
234 | mc_rep_uni_func(pel_pixels, 8, 64, 128, avx2)//used for 10bit |
369 |
2/2✓ Branch 1 taken 3 times.
✓ Branch 2 taken 1 times.
|
4 | mc_rep_uni_func(pel_pixels, 8, 32, 96, avx2) //used for 10bit |
370 | |||
371 |
2/2✓ Branch 1 taken 6722 times.
✓ Branch 2 taken 3361 times.
|
20166 | mc_rep_funcs(pel_pixels, 8, 32, 64, avx2) |
372 | |||
373 |
2/2✓ Branch 1 taken 2026 times.
✓ Branch 2 taken 1013 times.
|
3039 | mc_rep_func(pel_pixels, 10, 16, 32, avx2) |
374 |
2/2✓ Branch 1 taken 3 times.
✓ Branch 2 taken 1 times.
|
4 | mc_rep_func(pel_pixels, 10, 16, 48, avx2) |
375 |
2/2✓ Branch 1 taken 304 times.
✓ Branch 2 taken 152 times.
|
456 | mc_rep_func(pel_pixels, 10, 32, 64, avx2) |
376 | |||
377 |
2/2✓ Branch 1 taken 176 times.
✓ Branch 2 taken 88 times.
|
264 | mc_rep_bi_func(pel_pixels, 10, 16, 32, avx2) |
378 |
2/2✓ Branch 1 taken 3 times.
✓ Branch 2 taken 1 times.
|
4 | mc_rep_bi_func(pel_pixels, 10, 16, 48, avx2) |
379 |
2/2✓ Branch 1 taken 42 times.
✓ Branch 2 taken 21 times.
|
63 | mc_rep_bi_func(pel_pixels, 10, 32, 64, avx2) |
380 | |||
381 |
2/2✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
|
18 | mc_rep_funcs(epel_h, 8, 32, 64, avx2) |
382 | |||
383 |
2/2✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
|
18 | mc_rep_funcs(epel_v, 8, 32, 64, avx2) |
384 | |||
385 |
2/2✓ Branch 1 taken 1290 times.
✓ Branch 2 taken 645 times.
|
3870 | mc_rep_funcs(epel_h, 10, 16, 32, avx2) |
386 |
2/2✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
|
24 | mc_rep_funcs(epel_h, 10, 16, 48, avx2) |
387 |
2/2✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
|
18 | mc_rep_funcs(epel_h, 10, 32, 64, avx2) |
388 | |||
389 |
2/2✓ Branch 1 taken 154 times.
✓ Branch 2 taken 77 times.
|
462 | mc_rep_funcs(epel_v, 10, 16, 32, avx2) |
390 |
2/2✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
|
24 | mc_rep_funcs(epel_v, 10, 16, 48, avx2) |
391 |
2/2✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
|
18 | mc_rep_funcs(epel_v, 10, 32, 64, avx2) |
392 | |||
393 | |||
394 |
2/2✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
|
18 | mc_rep_funcs(epel_hv, 8, 32, 64, avx2) |
395 | |||
396 |
2/2✓ Branch 1 taken 1598 times.
✓ Branch 2 taken 799 times.
|
4794 | mc_rep_funcs(epel_hv, 10, 16, 32, avx2) |
397 |
2/2✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
|
24 | mc_rep_funcs(epel_hv, 10, 16, 48, avx2) |
398 |
2/2✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
|
18 | mc_rep_funcs(epel_hv, 10, 32, 64, avx2) |
399 | |||
400 |
2/2✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
|
18 | mc_rep_funcs(qpel_h, 8, 32, 64, avx2) |
401 | 6 | mc_rep_mixs_8(qpel_h , 48, 32, 16, avx2, sse4) | |
402 | |||
403 |
2/2✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
|
18 | mc_rep_funcs(qpel_v, 8, 32, 64, avx2) |
404 | 6 | mc_rep_mixs_8(qpel_v, 48, 32, 16, avx2, sse4) | |
405 | |||
406 |
2/2✓ Branch 1 taken 1982 times.
✓ Branch 2 taken 991 times.
|
5946 | mc_rep_funcs(qpel_h, 10, 16, 32, avx2) |
407 |
2/2✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
|
24 | mc_rep_funcs(qpel_h, 10, 16, 48, avx2) |
408 |
2/2✓ Branch 1 taken 512 times.
✓ Branch 2 taken 256 times.
|
1536 | mc_rep_funcs(qpel_h, 10, 32, 64, avx2) |
409 | |||
410 |
2/2✓ Branch 1 taken 910 times.
✓ Branch 2 taken 455 times.
|
2730 | mc_rep_funcs(qpel_v, 10, 16, 32, avx2) |
411 |
2/2✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
|
24 | mc_rep_funcs(qpel_v, 10, 16, 48, avx2) |
412 |
2/2✓ Branch 1 taken 202 times.
✓ Branch 2 taken 101 times.
|
606 | mc_rep_funcs(qpel_v, 10, 32, 64, avx2) |
413 | |||
414 |
2/2✓ Branch 1 taken 3116 times.
✓ Branch 2 taken 1558 times.
|
9348 | mc_rep_funcs(qpel_hv, 10, 16, 32, avx2) |
415 |
2/2✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
|
24 | mc_rep_funcs(qpel_hv, 10, 16, 48, avx2) |
416 |
2/2✓ Branch 1 taken 668 times.
✓ Branch 2 taken 334 times.
|
2004 | mc_rep_funcs(qpel_hv, 10, 32, 64, avx2) |
417 | |||
418 | #endif //AVX2 | ||
419 | |||
420 |
2/2✓ Branch 1 taken 30924 times.
✓ Branch 2 taken 7731 times.
|
77310 | mc_rep_funcs(pel_pixels, 8, 16, 64, sse4) |
421 |
2/2✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
|
336 | mc_rep_funcs(pel_pixels, 8, 16, 48, sse4) |
422 |
2/2✓ Branch 1 taken 33874 times.
✓ Branch 2 taken 16937 times.
|
101622 | mc_rep_funcs(pel_pixels, 8, 16, 32, sse4) |
423 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(pel_pixels, 8, 8, 24, sse4) |
424 |
2/2✓ Branch 1 taken 2960 times.
✓ Branch 2 taken 370 times.
|
6660 | mc_rep_funcs(pel_pixels,10, 8, 64, sse4) |
425 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
|
588 | mc_rep_funcs(pel_pixels,10, 8, 48, sse4) |
426 |
2/2✓ Branch 1 taken 6208 times.
✓ Branch 2 taken 1552 times.
|
15520 | mc_rep_funcs(pel_pixels,10, 8, 32, sse4) |
427 |
2/2✓ Branch 1 taken 144 times.
✓ Branch 2 taken 48 times.
|
384 | mc_rep_funcs(pel_pixels,10, 8, 24, sse4) |
428 |
2/2✓ Branch 1 taken 4020 times.
✓ Branch 2 taken 2010 times.
|
12060 | mc_rep_funcs(pel_pixels,10, 8, 16, sse4) |
429 |
2/2✓ Branch 1 taken 177 times.
✓ Branch 2 taken 59 times.
|
472 | mc_rep_funcs(pel_pixels,10, 4, 12, sse4) |
430 |
2/2✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
|
702 | mc_rep_funcs(pel_pixels,12, 8, 64, sse4) |
431 |
2/2✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
|
546 | mc_rep_funcs(pel_pixels,12, 8, 48, sse4) |
432 |
2/2✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
|
390 | mc_rep_funcs(pel_pixels,12, 8, 32, sse4) |
433 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(pel_pixels,12, 8, 24, sse4) |
434 |
2/2✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
|
234 | mc_rep_funcs(pel_pixels,12, 8, 16, sse4) |
435 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(pel_pixels,12, 4, 12, sse4) |
436 | |||
437 |
2/2✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
|
420 | mc_rep_funcs(epel_h, 8, 16, 64, sse4) |
438 |
2/2✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
|
336 | mc_rep_funcs(epel_h, 8, 16, 48, sse4) |
439 |
2/2✓ Branch 1 taken 1076 times.
✓ Branch 2 taken 538 times.
|
3228 | mc_rep_funcs(epel_h, 8, 16, 32, sse4) |
440 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(epel_h, 8, 8, 24, sse4) |
441 |
2/2✓ Branch 1 taken 336 times.
✓ Branch 2 taken 42 times.
|
756 | mc_rep_funcs(epel_h,10, 8, 64, sse4) |
442 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
|
588 | mc_rep_funcs(epel_h,10, 8, 48, sse4) |
443 |
2/2✓ Branch 1 taken 1016 times.
✓ Branch 2 taken 254 times.
|
2540 | mc_rep_funcs(epel_h,10, 8, 32, sse4) |
444 |
2/2✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
|
336 | mc_rep_funcs(epel_h,10, 8, 24, sse4) |
445 |
2/2✓ Branch 1 taken 1140 times.
✓ Branch 2 taken 570 times.
|
3420 | mc_rep_funcs(epel_h,10, 8, 16, sse4) |
446 |
2/2✓ Branch 1 taken 129 times.
✓ Branch 2 taken 43 times.
|
344 | mc_rep_funcs(epel_h,10, 4, 12, sse4) |
447 |
2/2✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
|
702 | mc_rep_funcs(epel_h,12, 8, 64, sse4) |
448 |
2/2✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
|
546 | mc_rep_funcs(epel_h,12, 8, 48, sse4) |
449 |
2/2✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
|
390 | mc_rep_funcs(epel_h,12, 8, 32, sse4) |
450 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(epel_h,12, 8, 24, sse4) |
451 |
2/2✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
|
234 | mc_rep_funcs(epel_h,12, 8, 16, sse4) |
452 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(epel_h,12, 4, 12, sse4) |
453 |
2/2✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
|
420 | mc_rep_funcs(epel_v, 8, 16, 64, sse4) |
454 |
2/2✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
|
336 | mc_rep_funcs(epel_v, 8, 16, 48, sse4) |
455 |
2/2✓ Branch 1 taken 2056 times.
✓ Branch 2 taken 1028 times.
|
6168 | mc_rep_funcs(epel_v, 8, 16, 32, sse4) |
456 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(epel_v, 8, 8, 24, sse4) |
457 |
2/2✓ Branch 1 taken 336 times.
✓ Branch 2 taken 42 times.
|
756 | mc_rep_funcs(epel_v,10, 8, 64, sse4) |
458 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
|
588 | mc_rep_funcs(epel_v,10, 8, 48, sse4) |
459 |
2/2✓ Branch 1 taken 296 times.
✓ Branch 2 taken 74 times.
|
740 | mc_rep_funcs(epel_v,10, 8, 32, sse4) |
460 |
2/2✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
|
336 | mc_rep_funcs(epel_v,10, 8, 24, sse4) |
461 |
2/2✓ Branch 1 taken 204 times.
✓ Branch 2 taken 102 times.
|
612 | mc_rep_funcs(epel_v,10, 8, 16, sse4) |
462 |
2/2✓ Branch 1 taken 141 times.
✓ Branch 2 taken 47 times.
|
376 | mc_rep_funcs(epel_v,10, 4, 12, sse4) |
463 |
2/2✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
|
702 | mc_rep_funcs(epel_v,12, 8, 64, sse4) |
464 |
2/2✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
|
546 | mc_rep_funcs(epel_v,12, 8, 48, sse4) |
465 |
2/2✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
|
390 | mc_rep_funcs(epel_v,12, 8, 32, sse4) |
466 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(epel_v,12, 8, 24, sse4) |
467 |
2/2✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
|
234 | mc_rep_funcs(epel_v,12, 8, 16, sse4) |
468 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(epel_v,12, 4, 12, sse4) |
469 |
2/2✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
|
420 | mc_rep_funcs(epel_hv, 8, 16, 64, sse4) |
470 |
2/2✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
|
336 | mc_rep_funcs(epel_hv, 8, 16, 48, sse4) |
471 |
2/2✓ Branch 1 taken 1860 times.
✓ Branch 2 taken 930 times.
|
5580 | mc_rep_funcs(epel_hv, 8, 16, 32, sse4) |
472 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(epel_hv, 8, 8, 24, sse4) |
473 | 78 | mc_rep_funcs2(epel_hv,8, 8, 4, 12, sse4) | |
474 |
2/2✓ Branch 1 taken 336 times.
✓ Branch 2 taken 42 times.
|
756 | mc_rep_funcs(epel_hv,10, 8, 64, sse4) |
475 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
|
588 | mc_rep_funcs(epel_hv,10, 8, 48, sse4) |
476 |
2/2✓ Branch 1 taken 312 times.
✓ Branch 2 taken 78 times.
|
780 | mc_rep_funcs(epel_hv,10, 8, 32, sse4) |
477 |
2/2✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
|
336 | mc_rep_funcs(epel_hv,10, 8, 24, sse4) |
478 |
2/2✓ Branch 1 taken 236 times.
✓ Branch 2 taken 118 times.
|
708 | mc_rep_funcs(epel_hv,10, 8, 16, sse4) |
479 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(epel_hv,10, 4, 12, sse4) |
480 |
2/2✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
|
702 | mc_rep_funcs(epel_hv,12, 8, 64, sse4) |
481 |
2/2✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
|
546 | mc_rep_funcs(epel_hv,12, 8, 48, sse4) |
482 |
2/2✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
|
390 | mc_rep_funcs(epel_hv,12, 8, 32, sse4) |
483 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(epel_hv,12, 8, 24, sse4) |
484 |
2/2✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
|
234 | mc_rep_funcs(epel_hv,12, 8, 16, sse4) |
485 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(epel_hv,12, 4, 12, sse4) |
486 | |||
487 |
2/2✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
|
420 | mc_rep_funcs(qpel_h, 8, 16, 64, sse4) |
488 |
2/2✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
|
336 | mc_rep_funcs(qpel_h, 8, 16, 48, sse4) |
489 |
2/2✓ Branch 1 taken 84 times.
✓ Branch 2 taken 42 times.
|
252 | mc_rep_funcs(qpel_h, 8, 16, 32, sse4) |
490 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_h, 8, 8, 24, sse4) |
491 |
2/2✓ Branch 1 taken 1184 times.
✓ Branch 2 taken 148 times.
|
2664 | mc_rep_funcs(qpel_h,10, 8, 64, sse4) |
492 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
|
588 | mc_rep_funcs(qpel_h,10, 8, 48, sse4) |
493 |
2/2✓ Branch 1 taken 1248 times.
✓ Branch 2 taken 312 times.
|
3120 | mc_rep_funcs(qpel_h,10, 8, 32, sse4) |
494 |
2/2✓ Branch 1 taken 132 times.
✓ Branch 2 taken 44 times.
|
352 | mc_rep_funcs(qpel_h,10, 8, 24, sse4) |
495 |
2/2✓ Branch 1 taken 424 times.
✓ Branch 2 taken 212 times.
|
1272 | mc_rep_funcs(qpel_h,10, 8, 16, sse4) |
496 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_h,10, 4, 12, sse4) |
497 |
2/2✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
|
702 | mc_rep_funcs(qpel_h,12, 8, 64, sse4) |
498 |
2/2✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
|
546 | mc_rep_funcs(qpel_h,12, 8, 48, sse4) |
499 |
2/2✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
|
390 | mc_rep_funcs(qpel_h,12, 8, 32, sse4) |
500 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_h,12, 8, 24, sse4) |
501 |
2/2✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
|
234 | mc_rep_funcs(qpel_h,12, 8, 16, sse4) |
502 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_h,12, 4, 12, sse4) |
503 |
2/2✓ Branch 1 taken 176 times.
✓ Branch 2 taken 44 times.
|
440 | mc_rep_funcs(qpel_v, 8, 16, 64, sse4) |
504 |
2/2✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
|
336 | mc_rep_funcs(qpel_v, 8, 16, 48, sse4) |
505 |
2/2✓ Branch 1 taken 92 times.
✓ Branch 2 taken 46 times.
|
276 | mc_rep_funcs(qpel_v, 8, 16, 32, sse4) |
506 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_v, 8, 8, 24, sse4) |
507 |
2/2✓ Branch 1 taken 464 times.
✓ Branch 2 taken 58 times.
|
1044 | mc_rep_funcs(qpel_v,10, 8, 64, sse4) |
508 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
|
588 | mc_rep_funcs(qpel_v,10, 8, 48, sse4) |
509 |
2/2✓ Branch 1 taken 304 times.
✓ Branch 2 taken 76 times.
|
760 | mc_rep_funcs(qpel_v,10, 8, 32, sse4) |
510 |
2/2✓ Branch 1 taken 138 times.
✓ Branch 2 taken 46 times.
|
368 | mc_rep_funcs(qpel_v,10, 8, 24, sse4) |
511 |
2/2✓ Branch 1 taken 96 times.
✓ Branch 2 taken 48 times.
|
288 | mc_rep_funcs(qpel_v,10, 8, 16, sse4) |
512 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_v,10, 4, 12, sse4) |
513 |
2/2✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
|
702 | mc_rep_funcs(qpel_v,12, 8, 64, sse4) |
514 |
2/2✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
|
546 | mc_rep_funcs(qpel_v,12, 8, 48, sse4) |
515 |
2/2✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
|
390 | mc_rep_funcs(qpel_v,12, 8, 32, sse4) |
516 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_v,12, 8, 24, sse4) |
517 |
2/2✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
|
234 | mc_rep_funcs(qpel_v,12, 8, 16, sse4) |
518 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_v,12, 4, 12, sse4) |
519 |
2/2✓ Branch 1 taken 928 times.
✓ Branch 2 taken 116 times.
|
2088 | mc_rep_funcs(qpel_hv, 8, 8, 64, sse4) |
520 |
2/2✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
|
546 | mc_rep_funcs(qpel_hv, 8, 8, 48, sse4) |
521 |
2/2✓ Branch 1 taken 292 times.
✓ Branch 2 taken 73 times.
|
730 | mc_rep_funcs(qpel_hv, 8, 8, 32, sse4) |
522 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_hv, 8, 8, 24, sse4) |
523 |
2/2✓ Branch 1 taken 202 times.
✓ Branch 2 taken 101 times.
|
606 | mc_rep_funcs(qpel_hv, 8, 8, 16, sse4) |
524 | 78 | mc_rep_funcs2(qpel_hv,8, 8, 4, 12, sse4) | |
525 |
2/2✓ Branch 1 taken 480 times.
✓ Branch 2 taken 60 times.
|
1080 | mc_rep_funcs(qpel_hv,10, 8, 64, sse4) |
526 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
|
588 | mc_rep_funcs(qpel_hv,10, 8, 48, sse4) |
527 |
2/2✓ Branch 1 taken 272 times.
✓ Branch 2 taken 68 times.
|
680 | mc_rep_funcs(qpel_hv,10, 8, 32, sse4) |
528 |
2/2✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
|
336 | mc_rep_funcs(qpel_hv,10, 8, 24, sse4) |
529 |
2/2✓ Branch 1 taken 100 times.
✓ Branch 2 taken 50 times.
|
300 | mc_rep_funcs(qpel_hv,10, 8, 16, sse4) |
530 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_hv,10, 4, 12, sse4) |
531 |
2/2✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
|
702 | mc_rep_funcs(qpel_hv,12, 8, 64, sse4) |
532 |
2/2✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
|
546 | mc_rep_funcs(qpel_hv,12, 8, 48, sse4) |
533 |
2/2✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
|
390 | mc_rep_funcs(qpel_hv,12, 8, 32, sse4) |
534 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_hv,12, 8, 24, sse4) |
535 |
2/2✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
|
234 | mc_rep_funcs(qpel_hv,12, 8, 16, sse4) |
536 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_hv,12, 4, 12, sse4) |
537 | |||
538 | #define mc_rep_uni_w(bitd, step, W, opt) \ | ||
539 | void ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const int16_t *_src, \ | ||
540 | int height, int denom, int _wx, int _ox) \ | ||
541 | { \ | ||
542 | int i; \ | ||
543 | uint8_t *dst; \ | ||
544 | for (i = 0; i < W; i += step) { \ | ||
545 | const int16_t *src = _src + i; \ | ||
546 | dst= _dst + (i * ((bitd + 7) / 8)); \ | ||
547 | ff_hevc_put_hevc_uni_w##step##_##bitd##_##opt(dst, dststride, src, \ | ||
548 | height, denom, _wx, _ox); \ | ||
549 | } \ | ||
550 | } | ||
551 | |||
552 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
|
378 | mc_rep_uni_w(8, 6, 12, sse4) |
553 |
2/2✓ Branch 1 taken 21144 times.
✓ Branch 2 taken 10572 times.
|
31716 | mc_rep_uni_w(8, 8, 16, sse4) |
554 |
2/2✓ Branch 1 taken 378 times.
✓ Branch 2 taken 126 times.
|
504 | mc_rep_uni_w(8, 8, 24, sse4) |
555 |
2/2✓ Branch 1 taken 54668 times.
✓ Branch 2 taken 13667 times.
|
68335 | mc_rep_uni_w(8, 8, 32, sse4) |
556 |
2/2✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
|
882 | mc_rep_uni_w(8, 8, 48, sse4) |
557 |
2/2✓ Branch 1 taken 43016 times.
✓ Branch 2 taken 5377 times.
|
48393 | mc_rep_uni_w(8, 8, 64, sse4) |
558 | |||
559 |
2/2✓ Branch 1 taken 284 times.
✓ Branch 2 taken 142 times.
|
426 | mc_rep_uni_w(10, 6, 12, sse4) |
560 |
2/2✓ Branch 1 taken 3220 times.
✓ Branch 2 taken 1610 times.
|
4830 | mc_rep_uni_w(10, 8, 16, sse4) |
561 |
2/2✓ Branch 1 taken 402 times.
✓ Branch 2 taken 134 times.
|
536 | mc_rep_uni_w(10, 8, 24, sse4) |
562 |
2/2✓ Branch 1 taken 4728 times.
✓ Branch 2 taken 1182 times.
|
5910 | mc_rep_uni_w(10, 8, 32, sse4) |
563 |
2/2✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
|
882 | mc_rep_uni_w(10, 8, 48, sse4) |
564 |
2/2✓ Branch 1 taken 2832 times.
✓ Branch 2 taken 354 times.
|
3186 | mc_rep_uni_w(10, 8, 64, sse4) |
565 | |||
566 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
|
378 | mc_rep_uni_w(12, 6, 12, sse4) |
567 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
|
378 | mc_rep_uni_w(12, 8, 16, sse4) |
568 |
2/2✓ Branch 1 taken 378 times.
✓ Branch 2 taken 126 times.
|
504 | mc_rep_uni_w(12, 8, 24, sse4) |
569 |
2/2✓ Branch 1 taken 504 times.
✓ Branch 2 taken 126 times.
|
630 | mc_rep_uni_w(12, 8, 32, sse4) |
570 |
2/2✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
|
882 | mc_rep_uni_w(12, 8, 48, sse4) |
571 |
2/2✓ Branch 1 taken 1008 times.
✓ Branch 2 taken 126 times.
|
1134 | mc_rep_uni_w(12, 8, 64, sse4) |
572 | |||
573 | #define mc_rep_bi_w(bitd, step, W, opt) \ | ||
574 | void ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const int16_t *_src, \ | ||
575 | const int16_t *_src2, int height, \ | ||
576 | int denom, int _wx0, int _wx1, int _ox0, int _ox1) \ | ||
577 | { \ | ||
578 | int i; \ | ||
579 | uint8_t *dst; \ | ||
580 | for (i = 0; i < W; i += step) { \ | ||
581 | const int16_t *src = _src + i; \ | ||
582 | const int16_t *src2 = _src2 + i; \ | ||
583 | dst = _dst + (i * ((bitd + 7) / 8)); \ | ||
584 | ff_hevc_put_hevc_bi_w##step##_##bitd##_##opt(dst, dststride, src, src2, \ | ||
585 | height, denom, _wx0, _wx1, _ox0, _ox1); \ | ||
586 | } \ | ||
587 | } | ||
588 | |||
589 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
|
378 | mc_rep_bi_w(8, 6, 12, sse4) |
590 |
2/2✓ Branch 1 taken 4896 times.
✓ Branch 2 taken 2448 times.
|
7344 | mc_rep_bi_w(8, 8, 16, sse4) |
591 |
2/2✓ Branch 1 taken 378 times.
✓ Branch 2 taken 126 times.
|
504 | mc_rep_bi_w(8, 8, 24, sse4) |
592 |
2/2✓ Branch 1 taken 23552 times.
✓ Branch 2 taken 5888 times.
|
29440 | mc_rep_bi_w(8, 8, 32, sse4) |
593 |
2/2✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
|
882 | mc_rep_bi_w(8, 8, 48, sse4) |
594 |
2/2✓ Branch 1 taken 21144 times.
✓ Branch 2 taken 2643 times.
|
23787 | mc_rep_bi_w(8, 8, 64, sse4) |
595 | |||
596 |
2/2✓ Branch 1 taken 268 times.
✓ Branch 2 taken 134 times.
|
402 | mc_rep_bi_w(10, 6, 12, sse4) |
597 |
2/2✓ Branch 1 taken 2916 times.
✓ Branch 2 taken 1458 times.
|
4374 | mc_rep_bi_w(10, 8, 16, sse4) |
598 |
2/2✓ Branch 1 taken 390 times.
✓ Branch 2 taken 130 times.
|
520 | mc_rep_bi_w(10, 8, 24, sse4) |
599 |
2/2✓ Branch 1 taken 4760 times.
✓ Branch 2 taken 1190 times.
|
5950 | mc_rep_bi_w(10, 8, 32, sse4) |
600 |
2/2✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
|
882 | mc_rep_bi_w(10, 8, 48, sse4) |
601 |
2/2✓ Branch 1 taken 2928 times.
✓ Branch 2 taken 366 times.
|
3294 | mc_rep_bi_w(10, 8, 64, sse4) |
602 | |||
603 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
|
378 | mc_rep_bi_w(12, 6, 12, sse4) |
604 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
|
378 | mc_rep_bi_w(12, 8, 16, sse4) |
605 |
2/2✓ Branch 1 taken 378 times.
✓ Branch 2 taken 126 times.
|
504 | mc_rep_bi_w(12, 8, 24, sse4) |
606 |
2/2✓ Branch 1 taken 504 times.
✓ Branch 2 taken 126 times.
|
630 | mc_rep_bi_w(12, 8, 32, sse4) |
607 |
2/2✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
|
882 | mc_rep_bi_w(12, 8, 48, sse4) |
608 |
2/2✓ Branch 1 taken 1008 times.
✓ Branch 2 taken 126 times.
|
1134 | mc_rep_bi_w(12, 8, 64, sse4) |
609 | |||
610 | #define mc_uni_w_func(name, bitd, W, opt) \ | ||
611 | void ff_hevc_put_hevc_uni_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \ | ||
612 | const uint8_t *_src, ptrdiff_t _srcstride, \ | ||
613 | int height, int denom, \ | ||
614 | int _wx, int _ox, \ | ||
615 | intptr_t mx, intptr_t my, int width) \ | ||
616 | { \ | ||
617 | LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \ | ||
618 | ff_hevc_put_hevc_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \ | ||
619 | ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(_dst, _dststride, temp, height, denom, _wx, _ox);\ | ||
620 | } | ||
621 | |||
622 | #define mc_uni_w_funcs(name, bitd, opt) \ | ||
623 | mc_uni_w_func(name, bitd, 4, opt) \ | ||
624 | mc_uni_w_func(name, bitd, 8, opt) \ | ||
625 | mc_uni_w_func(name, bitd, 12, opt) \ | ||
626 | mc_uni_w_func(name, bitd, 16, opt) \ | ||
627 | mc_uni_w_func(name, bitd, 24, opt) \ | ||
628 | mc_uni_w_func(name, bitd, 32, opt) \ | ||
629 | mc_uni_w_func(name, bitd, 48, opt) \ | ||
630 | mc_uni_w_func(name, bitd, 64, opt) | ||
631 | |||
632 | 81664 | mc_uni_w_funcs(pel_pixels, 8, sse4) | |
633 | 18 | mc_uni_w_func(pel_pixels, 8, 6, sse4) | |
634 | 7784 | mc_uni_w_funcs(epel_h, 8, sse4) | |
635 | 18 | mc_uni_w_func(epel_h, 8, 6, sse4) | |
636 | 7424 | mc_uni_w_funcs(epel_v, 8, sse4) | |
637 | 18 | mc_uni_w_func(epel_v, 8, 6, sse4) | |
638 | 7664 | mc_uni_w_funcs(epel_hv, 8, sse4) | |
639 | 18 | mc_uni_w_func(epel_hv, 8, 6, sse4) | |
640 | 298 | mc_uni_w_funcs(qpel_h, 8, sse4) | |
641 | 308 | mc_uni_w_funcs(qpel_v, 8, sse4) | |
642 | 882 | mc_uni_w_funcs(qpel_hv, 8, sse4) | |
643 | |||
644 | 4980 | mc_uni_w_funcs(pel_pixels, 10, sse4) | |
645 | 18 | mc_uni_w_func(pel_pixels, 10, 6, sse4) | |
646 | 1352 | mc_uni_w_funcs(epel_h, 10, sse4) | |
647 | 18 | mc_uni_w_func(epel_h, 10, 6, sse4) | |
648 | 360 | mc_uni_w_funcs(epel_v, 10, sse4) | |
649 | 18 | mc_uni_w_func(epel_v, 10, 6, sse4) | |
650 | 536 | mc_uni_w_funcs(epel_hv, 10, sse4) | |
651 | 18 | mc_uni_w_func(epel_hv, 10, 6, sse4) | |
652 | 844 | mc_uni_w_funcs(qpel_h, 10, sse4) | |
653 | 332 | mc_uni_w_funcs(qpel_v, 10, sse4) | |
654 | 380 | mc_uni_w_funcs(qpel_hv, 10, sse4) | |
655 | |||
656 | 288 | mc_uni_w_funcs(pel_pixels, 12, sse4) | |
657 | 18 | mc_uni_w_func(pel_pixels, 12, 6, sse4) | |
658 | 288 | mc_uni_w_funcs(epel_h, 12, sse4) | |
659 | 18 | mc_uni_w_func(epel_h, 12, 6, sse4) | |
660 | 288 | mc_uni_w_funcs(epel_v, 12, sse4) | |
661 | 18 | mc_uni_w_func(epel_v, 12, 6, sse4) | |
662 | 288 | mc_uni_w_funcs(epel_hv, 12, sse4) | |
663 | 18 | mc_uni_w_func(epel_hv, 12, 6, sse4) | |
664 | 288 | mc_uni_w_funcs(qpel_h, 12, sse4) | |
665 | 288 | mc_uni_w_funcs(qpel_v, 12, sse4) | |
666 | 288 | mc_uni_w_funcs(qpel_hv, 12, sse4) | |
667 | |||
668 | #define mc_bi_w_func(name, bitd, W, opt) \ | ||
669 | void ff_hevc_put_hevc_bi_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \ | ||
670 | const uint8_t *_src, ptrdiff_t _srcstride, \ | ||
671 | const int16_t *_src2, \ | ||
672 | int height, int denom, \ | ||
673 | int _wx0, int _wx1, int _ox0, int _ox1, \ | ||
674 | intptr_t mx, intptr_t my, int width) \ | ||
675 | { \ | ||
676 | LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \ | ||
677 | ff_hevc_put_hevc_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \ | ||
678 | ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(_dst, _dststride, temp, _src2, \ | ||
679 | height, denom, _wx0, _wx1, _ox0, _ox1); \ | ||
680 | } | ||
681 | |||
682 | #define mc_bi_w_funcs(name, bitd, opt) \ | ||
683 | mc_bi_w_func(name, bitd, 4, opt) \ | ||
684 | mc_bi_w_func(name, bitd, 8, opt) \ | ||
685 | mc_bi_w_func(name, bitd, 12, opt) \ | ||
686 | mc_bi_w_func(name, bitd, 16, opt) \ | ||
687 | mc_bi_w_func(name, bitd, 24, opt) \ | ||
688 | mc_bi_w_func(name, bitd, 32, opt) \ | ||
689 | mc_bi_w_func(name, bitd, 48, opt) \ | ||
690 | mc_bi_w_func(name, bitd, 64, opt) | ||
691 | |||
692 | 23926 | mc_bi_w_funcs(pel_pixels, 8, sse4) | |
693 | 18 | mc_bi_w_func(pel_pixels, 8, 6, sse4) | |
694 | 1240 | mc_bi_w_funcs(epel_h, 8, sse4) | |
695 | 18 | mc_bi_w_func(epel_h, 8, 6, sse4) | |
696 | 4860 | mc_bi_w_funcs(epel_v, 8, sse4) | |
697 | 18 | mc_bi_w_func(epel_v, 8, 6, sse4) | |
698 | 2416 | mc_bi_w_funcs(epel_hv, 8, sse4) | |
699 | 18 | mc_bi_w_func(epel_hv, 8, 6, sse4) | |
700 | 288 | mc_bi_w_funcs(qpel_h, 8, sse4) | |
701 | 288 | mc_bi_w_funcs(qpel_v, 8, sse4) | |
702 | 288 | mc_bi_w_funcs(qpel_hv, 8, sse4) | |
703 | |||
704 | 4336 | mc_bi_w_funcs(pel_pixels, 10, sse4) | |
705 | 18 | mc_bi_w_func(pel_pixels, 10, 6, sse4) | |
706 | 1392 | mc_bi_w_funcs(epel_h, 10, sse4) | |
707 | 18 | mc_bi_w_func(epel_h, 10, 6, sse4) | |
708 | 448 | mc_bi_w_funcs(epel_v, 10, sse4) | |
709 | 18 | mc_bi_w_func(epel_v, 10, 6, sse4) | |
710 | 312 | mc_bi_w_funcs(epel_hv, 10, sse4) | |
711 | 18 | mc_bi_w_func(epel_hv, 10, 6, sse4) | |
712 | 836 | mc_bi_w_funcs(qpel_h, 10, sse4) | |
713 | 368 | mc_bi_w_funcs(qpel_v, 10, sse4) | |
714 | 300 | mc_bi_w_funcs(qpel_hv, 10, sse4) | |
715 | |||
716 | 288 | mc_bi_w_funcs(pel_pixels, 12, sse4) | |
717 | 18 | mc_bi_w_func(pel_pixels, 12, 6, sse4) | |
718 | 288 | mc_bi_w_funcs(epel_h, 12, sse4) | |
719 | 18 | mc_bi_w_func(epel_h, 12, 6, sse4) | |
720 | 288 | mc_bi_w_funcs(epel_v, 12, sse4) | |
721 | 18 | mc_bi_w_func(epel_v, 12, 6, sse4) | |
722 | 288 | mc_bi_w_funcs(epel_hv, 12, sse4) | |
723 | 18 | mc_bi_w_func(epel_hv, 12, 6, sse4) | |
724 | 288 | mc_bi_w_funcs(qpel_h, 12, sse4) | |
725 | 288 | mc_bi_w_funcs(qpel_v, 12, sse4) | |
726 | 288 | mc_bi_w_funcs(qpel_hv, 12, sse4) | |
727 | #endif //ARCH_X86_64 && HAVE_SSE4_EXTERNAL | ||
728 | |||
729 | #define SAO_BAND_FILTER_FUNCS(bitd, opt) \ | ||
730 | void ff_hevc_sao_band_filter_8_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \ | ||
731 | const int16_t *sao_offset_val, int sao_left_class, int width, int height); \ | ||
732 | void ff_hevc_sao_band_filter_16_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \ | ||
733 | const int16_t *sao_offset_val, int sao_left_class, int width, int height); \ | ||
734 | void ff_hevc_sao_band_filter_32_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \ | ||
735 | const int16_t *sao_offset_val, int sao_left_class, int width, int height); \ | ||
736 | void ff_hevc_sao_band_filter_48_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \ | ||
737 | const int16_t *sao_offset_val, int sao_left_class, int width, int height); \ | ||
738 | void ff_hevc_sao_band_filter_64_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \ | ||
739 | const int16_t *sao_offset_val, int sao_left_class, int width, int height); | ||
740 | |||
741 | SAO_BAND_FILTER_FUNCS(8, sse2) | ||
742 | SAO_BAND_FILTER_FUNCS(10, sse2) | ||
743 | SAO_BAND_FILTER_FUNCS(12, sse2) | ||
744 | SAO_BAND_FILTER_FUNCS(8, avx) | ||
745 | SAO_BAND_FILTER_FUNCS(10, avx) | ||
746 | SAO_BAND_FILTER_FUNCS(12, avx) | ||
747 | SAO_BAND_FILTER_FUNCS(8, avx2) | ||
748 | SAO_BAND_FILTER_FUNCS(10, avx2) | ||
749 | SAO_BAND_FILTER_FUNCS(12, avx2) | ||
750 | |||
751 | #define SAO_BAND_INIT(bitd, opt) do { \ | ||
752 | c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_##bitd##_##opt; \ | ||
753 | c->sao_band_filter[1] = ff_hevc_sao_band_filter_16_##bitd##_##opt; \ | ||
754 | c->sao_band_filter[2] = ff_hevc_sao_band_filter_32_##bitd##_##opt; \ | ||
755 | c->sao_band_filter[3] = ff_hevc_sao_band_filter_48_##bitd##_##opt; \ | ||
756 | c->sao_band_filter[4] = ff_hevc_sao_band_filter_64_##bitd##_##opt; \ | ||
757 | } while (0) | ||
758 | |||
759 | #define SAO_EDGE_FILTER_FUNCS(bitd, opt) \ | ||
760 | void ff_hevc_sao_edge_filter_8_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \ | ||
761 | const int16_t *sao_offset_val, int eo, int width, int height); \ | ||
762 | void ff_hevc_sao_edge_filter_16_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \ | ||
763 | const int16_t *sao_offset_val, int eo, int width, int height); \ | ||
764 | void ff_hevc_sao_edge_filter_32_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \ | ||
765 | const int16_t *sao_offset_val, int eo, int width, int height); \ | ||
766 | void ff_hevc_sao_edge_filter_48_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \ | ||
767 | const int16_t *sao_offset_val, int eo, int width, int height); \ | ||
768 | void ff_hevc_sao_edge_filter_64_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \ | ||
769 | const int16_t *sao_offset_val, int eo, int width, int height); \ | ||
770 | |||
771 | SAO_EDGE_FILTER_FUNCS(8, ssse3) | ||
772 | SAO_EDGE_FILTER_FUNCS(8, avx2) | ||
773 | SAO_EDGE_FILTER_FUNCS(10, sse2) | ||
774 | SAO_EDGE_FILTER_FUNCS(10, avx2) | ||
775 | SAO_EDGE_FILTER_FUNCS(12, sse2) | ||
776 | SAO_EDGE_FILTER_FUNCS(12, avx2) | ||
777 | |||
778 | #define SAO_EDGE_INIT(bitd, opt) do { \ | ||
779 | c->sao_edge_filter[0] = ff_hevc_sao_edge_filter_8_##bitd##_##opt; \ | ||
780 | c->sao_edge_filter[1] = ff_hevc_sao_edge_filter_16_##bitd##_##opt; \ | ||
781 | c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_##bitd##_##opt; \ | ||
782 | c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_##bitd##_##opt; \ | ||
783 | c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_##bitd##_##opt; \ | ||
784 | } while (0) | ||
785 | |||
786 | #define EPEL_LINKS(pointer, my, mx, fname, bitd, opt ) \ | ||
787 | PEL_LINK(pointer, 1, my , mx , fname##4 , bitd, opt ); \ | ||
788 | PEL_LINK(pointer, 2, my , mx , fname##6 , bitd, opt ); \ | ||
789 | PEL_LINK(pointer, 3, my , mx , fname##8 , bitd, opt ); \ | ||
790 | PEL_LINK(pointer, 4, my , mx , fname##12, bitd, opt ); \ | ||
791 | PEL_LINK(pointer, 5, my , mx , fname##16, bitd, opt ); \ | ||
792 | PEL_LINK(pointer, 6, my , mx , fname##24, bitd, opt ); \ | ||
793 | PEL_LINK(pointer, 7, my , mx , fname##32, bitd, opt ); \ | ||
794 | PEL_LINK(pointer, 8, my , mx , fname##48, bitd, opt ); \ | ||
795 | PEL_LINK(pointer, 9, my , mx , fname##64, bitd, opt ) | ||
796 | #define QPEL_LINKS(pointer, my, mx, fname, bitd, opt) \ | ||
797 | PEL_LINK(pointer, 1, my , mx , fname##4 , bitd, opt ); \ | ||
798 | PEL_LINK(pointer, 3, my , mx , fname##8 , bitd, opt ); \ | ||
799 | PEL_LINK(pointer, 4, my , mx , fname##12, bitd, opt ); \ | ||
800 | PEL_LINK(pointer, 5, my , mx , fname##16, bitd, opt ); \ | ||
801 | PEL_LINK(pointer, 6, my , mx , fname##24, bitd, opt ); \ | ||
802 | PEL_LINK(pointer, 7, my , mx , fname##32, bitd, opt ); \ | ||
803 | PEL_LINK(pointer, 8, my , mx , fname##48, bitd, opt ); \ | ||
804 | PEL_LINK(pointer, 9, my , mx , fname##64, bitd, opt ) | ||
805 | |||
806 | 1480 | void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) | |
807 | { | ||
808 | 1480 | int cpu_flags = av_get_cpu_flags(); | |
809 | |||
810 |
2/2✓ Branch 0 taken 617 times.
✓ Branch 1 taken 863 times.
|
1480 | if (bit_depth == 8) { |
811 |
2/2✓ Branch 0 taken 226 times.
✓ Branch 1 taken 391 times.
|
617 | if (EXTERNAL_MMXEXT(cpu_flags)) { |
812 | 226 | c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_mmxext; | |
813 | |||
814 | 226 | c->add_residual[0] = ff_hevc_add_residual_4_8_mmxext; | |
815 | } | ||
816 |
2/2✓ Branch 0 taken 186 times.
✓ Branch 1 taken 431 times.
|
617 | if (EXTERNAL_SSE2(cpu_flags)) { |
817 | 186 | c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2; | |
818 | 186 | c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2; | |
819 | if (ARCH_X86_64) { | ||
820 | 186 | c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2; | |
821 | 186 | c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2; | |
822 | |||
823 | 186 | c->idct[2] = ff_hevc_idct_16x16_8_sse2; | |
824 | 186 | c->idct[3] = ff_hevc_idct_32x32_8_sse2; | |
825 | } | ||
826 | 186 | SAO_BAND_INIT(8, sse2); | |
827 | |||
828 | 186 | c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_sse2; | |
829 | 186 | c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_sse2; | |
830 | 186 | c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_sse2; | |
831 | |||
832 | 186 | c->idct[0] = ff_hevc_idct_4x4_8_sse2; | |
833 | 186 | c->idct[1] = ff_hevc_idct_8x8_8_sse2; | |
834 | |||
835 | 186 | c->add_residual[1] = ff_hevc_add_residual_8_8_sse2; | |
836 | 186 | c->add_residual[2] = ff_hevc_add_residual_16_8_sse2; | |
837 | 186 | c->add_residual[3] = ff_hevc_add_residual_32_8_sse2; | |
838 | } | ||
839 |
2/2✓ Branch 0 taken 146 times.
✓ Branch 1 taken 471 times.
|
617 | if (EXTERNAL_SSSE3(cpu_flags)) { |
840 | if(ARCH_X86_64) { | ||
841 | 146 | c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3; | |
842 | 146 | c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3; | |
843 | } | ||
844 | 146 | SAO_EDGE_INIT(8, ssse3); | |
845 | } | ||
846 |
2/2✓ Branch 0 taken 126 times.
✓ Branch 1 taken 491 times.
|
617 | if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) { |
847 | |||
848 | 126 | EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 8, sse4); | |
849 | 126 | EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 8, sse4); | |
850 | 126 | EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 8, sse4); | |
851 | 126 | EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 8, sse4); | |
852 | |||
853 | 126 | QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 8, sse4); | |
854 | 126 | QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 8, sse4); | |
855 | 126 | QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 8, sse4); | |
856 | 126 | QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 8, sse4); | |
857 | } | ||
858 |
2/2✓ Branch 0 taken 66 times.
✓ Branch 1 taken 551 times.
|
617 | if (EXTERNAL_AVX(cpu_flags)) { |
859 | 66 | c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_avx; | |
860 | 66 | c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_avx; | |
861 | if (ARCH_X86_64) { | ||
862 | 66 | c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx; | |
863 | 66 | c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx; | |
864 | |||
865 | 66 | c->idct[2] = ff_hevc_idct_16x16_8_avx; | |
866 | 66 | c->idct[3] = ff_hevc_idct_32x32_8_avx; | |
867 | } | ||
868 | 66 | SAO_BAND_INIT(8, avx); | |
869 | |||
870 | 66 | c->idct[0] = ff_hevc_idct_4x4_8_avx; | |
871 | 66 | c->idct[1] = ff_hevc_idct_8x8_8_avx; | |
872 | |||
873 | 66 | c->add_residual[1] = ff_hevc_add_residual_8_8_avx; | |
874 | 66 | c->add_residual[2] = ff_hevc_add_residual_16_8_avx; | |
875 | 66 | c->add_residual[3] = ff_hevc_add_residual_32_8_avx; | |
876 | } | ||
877 |
2/2✓ Branch 0 taken 26 times.
✓ Branch 1 taken 591 times.
|
617 | if (EXTERNAL_AVX2(cpu_flags)) { |
878 | 26 | c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_8_avx2; | |
879 | 26 | c->sao_band_filter[1] = ff_hevc_sao_band_filter_16_8_avx2; | |
880 | } | ||
881 |
3/4✓ Branch 0 taken 26 times.
✓ Branch 1 taken 591 times.
✓ Branch 2 taken 26 times.
✗ Branch 3 not taken.
|
617 | if (EXTERNAL_AVX2_FAST(cpu_flags)) { |
882 | 26 | c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_avx2; | |
883 | 26 | c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_avx2; | |
884 | if (ARCH_X86_64) { | ||
885 | 26 | c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_avx2; | |
886 | 26 | c->put_hevc_epel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_avx2; | |
887 | 26 | c->put_hevc_epel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_avx2; | |
888 | |||
889 | 26 | c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_avx2; | |
890 | 26 | c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_avx2; | |
891 | 26 | c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_avx2; | |
892 | |||
893 | 26 | c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; | |
894 | 26 | c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; | |
895 | 26 | c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; | |
896 | |||
897 | 26 | c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; | |
898 | 26 | c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; | |
899 | 26 | c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; | |
900 | |||
901 | 26 | c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_8_avx2; | |
902 | 26 | c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_8_avx2; | |
903 | 26 | c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_8_avx2; | |
904 | |||
905 | 26 | c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_8_avx2; | |
906 | 26 | c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_8_avx2; | |
907 | 26 | c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_8_avx2; | |
908 | |||
909 | 26 | c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_8_avx2; | |
910 | 26 | c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_8_avx2; | |
911 | 26 | c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_8_avx2; | |
912 | |||
913 | 26 | c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_8_avx2; | |
914 | 26 | c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_8_avx2; | |
915 | 26 | c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_8_avx2; | |
916 | |||
917 | 26 | c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_8_avx2; | |
918 | 26 | c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_8_avx2; | |
919 | 26 | c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_8_avx2; | |
920 | |||
921 | 26 | c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_8_avx2; | |
922 | 26 | c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_8_avx2; | |
923 | 26 | c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_8_avx2; | |
924 | |||
925 | 26 | c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_8_avx2; | |
926 | 26 | c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_8_avx2; | |
927 | 26 | c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_8_avx2; | |
928 | |||
929 | 26 | c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_8_avx2; | |
930 | 26 | c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_8_avx2; | |
931 | 26 | c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_8_avx2; | |
932 | |||
933 | 26 | c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_8_avx2; | |
934 | 26 | c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_8_avx2; | |
935 | 26 | c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_8_avx2; | |
936 | |||
937 | 26 | c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_8_avx2; | |
938 | 26 | c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_8_avx2; | |
939 | 26 | c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_8_avx2; | |
940 | |||
941 | 26 | c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_8_avx2; | |
942 | 26 | c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_8_avx2; | |
943 | 26 | c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_8_avx2; | |
944 | |||
945 | 26 | c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_8_avx2; | |
946 | 26 | c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_8_avx2; | |
947 | 26 | c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_8_avx2; | |
948 | |||
949 | 26 | c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_8_avx2; | |
950 | 26 | c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_8_avx2; | |
951 | 26 | c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_8_avx2; | |
952 | |||
953 | 26 | c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_8_avx2; | |
954 | 26 | c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_8_avx2; | |
955 | 26 | c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_8_avx2; | |
956 | |||
957 | 26 | c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_8_avx2; | |
958 | 26 | c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_8_avx2; | |
959 | 26 | c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_8_avx2; | |
960 | |||
961 | 26 | c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_8_avx2; | |
962 | 26 | c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_8_avx2; | |
963 | 26 | c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_8_avx2; | |
964 | |||
965 | 26 | c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_8_avx2; | |
966 | 26 | c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_8_avx2; | |
967 | 26 | c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_8_avx2; | |
968 | } | ||
969 | 26 | SAO_BAND_INIT(8, avx2); | |
970 | |||
971 | 26 | c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_8_avx2; | |
972 | 26 | c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_8_avx2; | |
973 | 26 | c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_8_avx2; | |
974 | |||
975 | 26 | c->add_residual[3] = ff_hevc_add_residual_32_8_avx2; | |
976 | } | ||
977 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 617 times.
|
617 | if (EXTERNAL_AVX512ICL(cpu_flags) && ARCH_X86_64) { |
978 | ✗ | c->put_hevc_qpel[1][0][1] = ff_hevc_put_hevc_qpel_h4_8_avx512icl; | |
979 | ✗ | c->put_hevc_qpel[3][0][1] = ff_hevc_put_hevc_qpel_h8_8_avx512icl; | |
980 | ✗ | c->put_hevc_qpel[5][0][1] = ff_hevc_put_hevc_qpel_h16_8_avx512icl; | |
981 | ✗ | c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_8_avx512icl; | |
982 | ✗ | c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_8_avx512icl; | |
983 | ✗ | c->put_hevc_qpel[3][1][1] = ff_hevc_put_hevc_qpel_hv8_8_avx512icl; | |
984 | } | ||
985 |
2/2✓ Branch 0 taken 307 times.
✓ Branch 1 taken 556 times.
|
863 | } else if (bit_depth == 10) { |
986 |
2/2✓ Branch 0 taken 230 times.
✓ Branch 1 taken 77 times.
|
307 | if (EXTERNAL_MMXEXT(cpu_flags)) { |
987 | 230 | c->add_residual[0] = ff_hevc_add_residual_4_10_mmxext; | |
988 | 230 | c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_mmxext; | |
989 | } | ||
990 |
2/2✓ Branch 0 taken 190 times.
✓ Branch 1 taken 117 times.
|
307 | if (EXTERNAL_SSE2(cpu_flags)) { |
991 | 190 | c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2; | |
992 | 190 | c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2; | |
993 | if (ARCH_X86_64) { | ||
994 | 190 | c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_sse2; | |
995 | 190 | c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2; | |
996 | |||
997 | 190 | c->idct[2] = ff_hevc_idct_16x16_10_sse2; | |
998 | 190 | c->idct[3] = ff_hevc_idct_32x32_10_sse2; | |
999 | } | ||
1000 | 190 | SAO_BAND_INIT(10, sse2); | |
1001 | 190 | SAO_EDGE_INIT(10, sse2); | |
1002 | |||
1003 | 190 | c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_sse2; | |
1004 | 190 | c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_sse2; | |
1005 | 190 | c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_sse2; | |
1006 | |||
1007 | 190 | c->idct[0] = ff_hevc_idct_4x4_10_sse2; | |
1008 | 190 | c->idct[1] = ff_hevc_idct_8x8_10_sse2; | |
1009 | |||
1010 | 190 | c->add_residual[1] = ff_hevc_add_residual_8_10_sse2; | |
1011 | 190 | c->add_residual[2] = ff_hevc_add_residual_16_10_sse2; | |
1012 | 190 | c->add_residual[3] = ff_hevc_add_residual_32_10_sse2; | |
1013 | } | ||
1014 |
2/2✓ Branch 0 taken 150 times.
✓ Branch 1 taken 157 times.
|
307 | if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) { |
1015 | 150 | c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3; | |
1016 | 150 | c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3; | |
1017 | } | ||
1018 |
2/2✓ Branch 0 taken 130 times.
✓ Branch 1 taken 177 times.
|
307 | if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) { |
1019 | 130 | EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4); | |
1020 | 130 | EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 10, sse4); | |
1021 | 130 | EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 10, sse4); | |
1022 | 130 | EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 10, sse4); | |
1023 | |||
1024 | 130 | QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 10, sse4); | |
1025 | 130 | QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 10, sse4); | |
1026 | 130 | QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 10, sse4); | |
1027 | 130 | QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 10, sse4); | |
1028 | } | ||
1029 |
2/2✓ Branch 0 taken 70 times.
✓ Branch 1 taken 237 times.
|
307 | if (EXTERNAL_AVX(cpu_flags)) { |
1030 | 70 | c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_avx; | |
1031 | 70 | c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_avx; | |
1032 | if (ARCH_X86_64) { | ||
1033 | 70 | c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_avx; | |
1034 | 70 | c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_avx; | |
1035 | |||
1036 | 70 | c->idct[2] = ff_hevc_idct_16x16_10_avx; | |
1037 | 70 | c->idct[3] = ff_hevc_idct_32x32_10_avx; | |
1038 | } | ||
1039 | |||
1040 | 70 | c->idct[0] = ff_hevc_idct_4x4_10_avx; | |
1041 | 70 | c->idct[1] = ff_hevc_idct_8x8_10_avx; | |
1042 | |||
1043 | 70 | SAO_BAND_INIT(10, avx); | |
1044 | } | ||
1045 |
2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 277 times.
|
307 | if (EXTERNAL_AVX2(cpu_flags)) { |
1046 | 30 | c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_10_avx2; | |
1047 | } | ||
1048 |
3/4✓ Branch 0 taken 30 times.
✓ Branch 1 taken 277 times.
✓ Branch 2 taken 30 times.
✗ Branch 3 not taken.
|
307 | if (EXTERNAL_AVX2_FAST(cpu_flags)) { |
1049 | 30 | c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_avx2; | |
1050 | 30 | c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_avx2; | |
1051 | if (ARCH_X86_64) { | ||
1052 | 30 | c->put_hevc_epel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_10_avx2; | |
1053 | 30 | c->put_hevc_epel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_10_avx2; | |
1054 | 30 | c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_10_avx2; | |
1055 | 30 | c->put_hevc_epel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_10_avx2; | |
1056 | 30 | c->put_hevc_epel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_10_avx2; | |
1057 | |||
1058 | 30 | c->put_hevc_qpel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_10_avx2; | |
1059 | 30 | c->put_hevc_qpel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_10_avx2; | |
1060 | 30 | c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_10_avx2; | |
1061 | 30 | c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_10_avx2; | |
1062 | 30 | c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_10_avx2; | |
1063 | |||
1064 | 30 | c->put_hevc_epel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; | |
1065 | 30 | c->put_hevc_epel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; | |
1066 | 30 | c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; | |
1067 | 30 | c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels96_8_avx2; | |
1068 | 30 | c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels128_8_avx2; | |
1069 | |||
1070 | 30 | c->put_hevc_qpel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; | |
1071 | 30 | c->put_hevc_qpel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; | |
1072 | 30 | c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; | |
1073 | 30 | c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels96_8_avx2; | |
1074 | 30 | c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels128_8_avx2; | |
1075 | |||
1076 | 30 | c->put_hevc_epel_bi[5][0][0] = ff_hevc_put_hevc_bi_pel_pixels16_10_avx2; | |
1077 | 30 | c->put_hevc_epel_bi[6][0][0] = ff_hevc_put_hevc_bi_pel_pixels24_10_avx2; | |
1078 | 30 | c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_10_avx2; | |
1079 | 30 | c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_10_avx2; | |
1080 | 30 | c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_10_avx2; | |
1081 | 30 | c->put_hevc_qpel_bi[5][0][0] = ff_hevc_put_hevc_bi_pel_pixels16_10_avx2; | |
1082 | 30 | c->put_hevc_qpel_bi[6][0][0] = ff_hevc_put_hevc_bi_pel_pixels24_10_avx2; | |
1083 | 30 | c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_10_avx2; | |
1084 | 30 | c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_10_avx2; | |
1085 | 30 | c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_10_avx2; | |
1086 | |||
1087 | 30 | c->put_hevc_epel[5][0][1] = ff_hevc_put_hevc_epel_h16_10_avx2; | |
1088 | 30 | c->put_hevc_epel[6][0][1] = ff_hevc_put_hevc_epel_h24_10_avx2; | |
1089 | 30 | c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_10_avx2; | |
1090 | 30 | c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_10_avx2; | |
1091 | 30 | c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_10_avx2; | |
1092 | |||
1093 | 30 | c->put_hevc_epel_uni[5][0][1] = ff_hevc_put_hevc_uni_epel_h16_10_avx2; | |
1094 | 30 | c->put_hevc_epel_uni[6][0][1] = ff_hevc_put_hevc_uni_epel_h24_10_avx2; | |
1095 | 30 | c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_10_avx2; | |
1096 | 30 | c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_10_avx2; | |
1097 | 30 | c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_10_avx2; | |
1098 | |||
1099 | 30 | c->put_hevc_epel_bi[5][0][1] = ff_hevc_put_hevc_bi_epel_h16_10_avx2; | |
1100 | 30 | c->put_hevc_epel_bi[6][0][1] = ff_hevc_put_hevc_bi_epel_h24_10_avx2; | |
1101 | 30 | c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_10_avx2; | |
1102 | 30 | c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_10_avx2; | |
1103 | 30 | c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_10_avx2; | |
1104 | |||
1105 | 30 | c->put_hevc_epel[5][1][0] = ff_hevc_put_hevc_epel_v16_10_avx2; | |
1106 | 30 | c->put_hevc_epel[6][1][0] = ff_hevc_put_hevc_epel_v24_10_avx2; | |
1107 | 30 | c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_10_avx2; | |
1108 | 30 | c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_10_avx2; | |
1109 | 30 | c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_10_avx2; | |
1110 | |||
1111 | 30 | c->put_hevc_epel_uni[5][1][0] = ff_hevc_put_hevc_uni_epel_v16_10_avx2; | |
1112 | 30 | c->put_hevc_epel_uni[6][1][0] = ff_hevc_put_hevc_uni_epel_v24_10_avx2; | |
1113 | 30 | c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_10_avx2; | |
1114 | 30 | c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_10_avx2; | |
1115 | 30 | c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_10_avx2; | |
1116 | |||
1117 | 30 | c->put_hevc_epel_bi[5][1][0] = ff_hevc_put_hevc_bi_epel_v16_10_avx2; | |
1118 | 30 | c->put_hevc_epel_bi[6][1][0] = ff_hevc_put_hevc_bi_epel_v24_10_avx2; | |
1119 | 30 | c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_10_avx2; | |
1120 | 30 | c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_10_avx2; | |
1121 | 30 | c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_10_avx2; | |
1122 | |||
1123 | 30 | c->put_hevc_epel[5][1][1] = ff_hevc_put_hevc_epel_hv16_10_avx2; | |
1124 | 30 | c->put_hevc_epel[6][1][1] = ff_hevc_put_hevc_epel_hv24_10_avx2; | |
1125 | 30 | c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_10_avx2; | |
1126 | 30 | c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_10_avx2; | |
1127 | 30 | c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_10_avx2; | |
1128 | |||
1129 | 30 | c->put_hevc_epel_uni[5][1][1] = ff_hevc_put_hevc_uni_epel_hv16_10_avx2; | |
1130 | 30 | c->put_hevc_epel_uni[6][1][1] = ff_hevc_put_hevc_uni_epel_hv24_10_avx2; | |
1131 | 30 | c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_10_avx2; | |
1132 | 30 | c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_10_avx2; | |
1133 | 30 | c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_10_avx2; | |
1134 | |||
1135 | 30 | c->put_hevc_epel_bi[5][1][1] = ff_hevc_put_hevc_bi_epel_hv16_10_avx2; | |
1136 | 30 | c->put_hevc_epel_bi[6][1][1] = ff_hevc_put_hevc_bi_epel_hv24_10_avx2; | |
1137 | 30 | c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_10_avx2; | |
1138 | 30 | c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_10_avx2; | |
1139 | 30 | c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_10_avx2; | |
1140 | |||
1141 | 30 | c->put_hevc_qpel[5][0][1] = ff_hevc_put_hevc_qpel_h16_10_avx2; | |
1142 | 30 | c->put_hevc_qpel[6][0][1] = ff_hevc_put_hevc_qpel_h24_10_avx2; | |
1143 | 30 | c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_10_avx2; | |
1144 | 30 | c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_10_avx2; | |
1145 | 30 | c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_10_avx2; | |
1146 | |||
1147 | 30 | c->put_hevc_qpel_uni[5][0][1] = ff_hevc_put_hevc_uni_qpel_h16_10_avx2; | |
1148 | 30 | c->put_hevc_qpel_uni[6][0][1] = ff_hevc_put_hevc_uni_qpel_h24_10_avx2; | |
1149 | 30 | c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_10_avx2; | |
1150 | 30 | c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_10_avx2; | |
1151 | 30 | c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_10_avx2; | |
1152 | |||
1153 | 30 | c->put_hevc_qpel_bi[5][0][1] = ff_hevc_put_hevc_bi_qpel_h16_10_avx2; | |
1154 | 30 | c->put_hevc_qpel_bi[6][0][1] = ff_hevc_put_hevc_bi_qpel_h24_10_avx2; | |
1155 | 30 | c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_10_avx2; | |
1156 | 30 | c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_10_avx2; | |
1157 | 30 | c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_10_avx2; | |
1158 | |||
1159 | 30 | c->put_hevc_qpel[5][1][0] = ff_hevc_put_hevc_qpel_v16_10_avx2; | |
1160 | 30 | c->put_hevc_qpel[6][1][0] = ff_hevc_put_hevc_qpel_v24_10_avx2; | |
1161 | 30 | c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_10_avx2; | |
1162 | 30 | c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_10_avx2; | |
1163 | 30 | c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_10_avx2; | |
1164 | |||
1165 | 30 | c->put_hevc_qpel_uni[5][1][0] = ff_hevc_put_hevc_uni_qpel_v16_10_avx2; | |
1166 | 30 | c->put_hevc_qpel_uni[6][1][0] = ff_hevc_put_hevc_uni_qpel_v24_10_avx2; | |
1167 | 30 | c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_10_avx2; | |
1168 | 30 | c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_10_avx2; | |
1169 | 30 | c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_10_avx2; | |
1170 | |||
1171 | 30 | c->put_hevc_qpel_bi[5][1][0] = ff_hevc_put_hevc_bi_qpel_v16_10_avx2; | |
1172 | 30 | c->put_hevc_qpel_bi[6][1][0] = ff_hevc_put_hevc_bi_qpel_v24_10_avx2; | |
1173 | 30 | c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_10_avx2; | |
1174 | 30 | c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_10_avx2; | |
1175 | 30 | c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_10_avx2; | |
1176 | |||
1177 | 30 | c->put_hevc_qpel[5][1][1] = ff_hevc_put_hevc_qpel_hv16_10_avx2; | |
1178 | 30 | c->put_hevc_qpel[6][1][1] = ff_hevc_put_hevc_qpel_hv24_10_avx2; | |
1179 | 30 | c->put_hevc_qpel[7][1][1] = ff_hevc_put_hevc_qpel_hv32_10_avx2; | |
1180 | 30 | c->put_hevc_qpel[8][1][1] = ff_hevc_put_hevc_qpel_hv48_10_avx2; | |
1181 | 30 | c->put_hevc_qpel[9][1][1] = ff_hevc_put_hevc_qpel_hv64_10_avx2; | |
1182 | |||
1183 | 30 | c->put_hevc_qpel_uni[5][1][1] = ff_hevc_put_hevc_uni_qpel_hv16_10_avx2; | |
1184 | 30 | c->put_hevc_qpel_uni[6][1][1] = ff_hevc_put_hevc_uni_qpel_hv24_10_avx2; | |
1185 | 30 | c->put_hevc_qpel_uni[7][1][1] = ff_hevc_put_hevc_uni_qpel_hv32_10_avx2; | |
1186 | 30 | c->put_hevc_qpel_uni[8][1][1] = ff_hevc_put_hevc_uni_qpel_hv48_10_avx2; | |
1187 | 30 | c->put_hevc_qpel_uni[9][1][1] = ff_hevc_put_hevc_uni_qpel_hv64_10_avx2; | |
1188 | |||
1189 | 30 | c->put_hevc_qpel_bi[5][1][1] = ff_hevc_put_hevc_bi_qpel_hv16_10_avx2; | |
1190 | 30 | c->put_hevc_qpel_bi[6][1][1] = ff_hevc_put_hevc_bi_qpel_hv24_10_avx2; | |
1191 | 30 | c->put_hevc_qpel_bi[7][1][1] = ff_hevc_put_hevc_bi_qpel_hv32_10_avx2; | |
1192 | 30 | c->put_hevc_qpel_bi[8][1][1] = ff_hevc_put_hevc_bi_qpel_hv48_10_avx2; | |
1193 | 30 | c->put_hevc_qpel_bi[9][1][1] = ff_hevc_put_hevc_bi_qpel_hv64_10_avx2; | |
1194 | } | ||
1195 | 30 | SAO_BAND_INIT(10, avx2); | |
1196 | 30 | SAO_EDGE_INIT(10, avx2); | |
1197 | |||
1198 | 30 | c->add_residual[2] = ff_hevc_add_residual_16_10_avx2; | |
1199 | 30 | c->add_residual[3] = ff_hevc_add_residual_32_10_avx2; | |
1200 | } | ||
1201 |
2/2✓ Branch 0 taken 270 times.
✓ Branch 1 taken 286 times.
|
556 | } else if (bit_depth == 12) { |
1202 |
2/2✓ Branch 0 taken 220 times.
✓ Branch 1 taken 50 times.
|
270 | if (EXTERNAL_MMXEXT(cpu_flags)) { |
1203 | 220 | c->idct_dc[0] = ff_hevc_idct_4x4_dc_12_mmxext; | |
1204 | } | ||
1205 |
2/2✓ Branch 0 taken 180 times.
✓ Branch 1 taken 90 times.
|
270 | if (EXTERNAL_SSE2(cpu_flags)) { |
1206 | 180 | c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_sse2; | |
1207 | 180 | c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_sse2; | |
1208 | if (ARCH_X86_64) { | ||
1209 | 180 | c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_sse2; | |
1210 | 180 | c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2; | |
1211 | } | ||
1212 | 180 | SAO_BAND_INIT(12, sse2); | |
1213 | 180 | SAO_EDGE_INIT(12, sse2); | |
1214 | |||
1215 | 180 | c->idct_dc[1] = ff_hevc_idct_8x8_dc_12_sse2; | |
1216 | 180 | c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_sse2; | |
1217 | 180 | c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_sse2; | |
1218 | } | ||
1219 |
2/2✓ Branch 0 taken 140 times.
✓ Branch 1 taken 130 times.
|
270 | if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) { |
1220 | 140 | c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_ssse3; | |
1221 | 140 | c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_ssse3; | |
1222 | } | ||
1223 |
2/2✓ Branch 0 taken 120 times.
✓ Branch 1 taken 150 times.
|
270 | if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) { |
1224 | 120 | EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 12, sse4); | |
1225 | 120 | EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 12, sse4); | |
1226 | 120 | EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 12, sse4); | |
1227 | 120 | EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 12, sse4); | |
1228 | |||
1229 | 120 | QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 12, sse4); | |
1230 | 120 | QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 12, sse4); | |
1231 | 120 | QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 12, sse4); | |
1232 | 120 | QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 12, sse4); | |
1233 | } | ||
1234 |
2/2✓ Branch 0 taken 60 times.
✓ Branch 1 taken 210 times.
|
270 | if (EXTERNAL_AVX(cpu_flags)) { |
1235 | 60 | c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_avx; | |
1236 | 60 | c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_avx; | |
1237 | if (ARCH_X86_64) { | ||
1238 | 60 | c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_avx; | |
1239 | 60 | c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_avx; | |
1240 | } | ||
1241 | 60 | SAO_BAND_INIT(12, avx); | |
1242 | } | ||
1243 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 250 times.
|
270 | if (EXTERNAL_AVX2(cpu_flags)) { |
1244 | 20 | c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_12_avx2; | |
1245 | } | ||
1246 |
3/4✓ Branch 0 taken 20 times.
✓ Branch 1 taken 250 times.
✓ Branch 2 taken 20 times.
✗ Branch 3 not taken.
|
270 | if (EXTERNAL_AVX2_FAST(cpu_flags)) { |
1247 | 20 | c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_avx2; | |
1248 | 20 | c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_avx2; | |
1249 | |||
1250 | 20 | SAO_BAND_INIT(12, avx2); | |
1251 | 20 | SAO_EDGE_INIT(12, avx2); | |
1252 | } | ||
1253 | } | ||
1254 | 1480 | } | |
1255 |