| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * Copyright (c) 2013 Seppo Tomperi | ||
| 3 | * Copyright (c) 2013-2014 Pierre-Edouard Lepere | ||
| 4 | * Copyright (c) 2023-2024 Wu Jianhua | ||
| 5 | * | ||
| 6 | * This file is part of FFmpeg. | ||
| 7 | * | ||
| 8 | * FFmpeg is free software; you can redistribute it and/or | ||
| 9 | * modify it under the terms of the GNU Lesser General Public | ||
| 10 | * License as published by the Free Software Foundation; either | ||
| 11 | * version 2.1 of the License, or (at your option) any later version. | ||
| 12 | * | ||
| 13 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 16 | * Lesser General Public License for more details. | ||
| 17 | * | ||
| 18 | * You should have received a copy of the GNU Lesser General Public | ||
| 19 | * License along with FFmpeg; if not, write to the Free Software | ||
| 20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 21 | */ | ||
| 22 | |||
| 23 | #include "config.h" | ||
| 24 | |||
| 25 | #include "libavutil/cpu.h" | ||
| 26 | #include "libavutil/mem_internal.h" | ||
| 27 | #include "libavutil/x86/asm.h" | ||
| 28 | #include "libavutil/x86/cpu.h" | ||
| 29 | #include "libavcodec/hevc/dsp.h" | ||
| 30 | #include "libavcodec/x86/hevc/dsp.h" | ||
| 31 | #include "libavcodec/x86/h26x/h2656dsp.h" | ||
| 32 | |||
| 33 | #define LFC_FUNC(DIR, DEPTH, OPT) \ | ||
| 34 | void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, const int *tc, const uint8_t *no_p, const uint8_t *no_q); | ||
| 35 | |||
| 36 | #define LFL_FUNC(DIR, DEPTH, OPT) \ | ||
| 37 | void ff_hevc_ ## DIR ## _loop_filter_luma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, int beta, const int *tc, const uint8_t *no_p, const uint8_t *no_q); | ||
| 38 | |||
| 39 | #define LFC_FUNCS(type, depth, opt) \ | ||
| 40 | LFC_FUNC(h, depth, opt) \ | ||
| 41 | LFC_FUNC(v, depth, opt) | ||
| 42 | |||
| 43 | #define LFL_FUNCS(type, depth, opt) \ | ||
| 44 | LFL_FUNC(h, depth, opt) \ | ||
| 45 | LFL_FUNC(v, depth, opt) | ||
| 46 | |||
| 47 | LFC_FUNCS(uint8_t, 8, sse2) | ||
| 48 | LFC_FUNCS(uint8_t, 10, sse2) | ||
| 49 | LFC_FUNCS(uint8_t, 12, sse2) | ||
| 50 | LFC_FUNCS(uint8_t, 8, avx) | ||
| 51 | LFC_FUNCS(uint8_t, 10, avx) | ||
| 52 | LFC_FUNCS(uint8_t, 12, avx) | ||
| 53 | LFL_FUNCS(uint8_t, 8, sse2) | ||
| 54 | LFL_FUNCS(uint8_t, 10, sse2) | ||
| 55 | LFL_FUNCS(uint8_t, 12, sse2) | ||
| 56 | LFL_FUNCS(uint8_t, 8, ssse3) | ||
| 57 | LFL_FUNCS(uint8_t, 10, ssse3) | ||
| 58 | LFL_FUNCS(uint8_t, 12, ssse3) | ||
| 59 | LFL_FUNCS(uint8_t, 8, avx) | ||
| 60 | LFL_FUNCS(uint8_t, 10, avx) | ||
| 61 | LFL_FUNCS(uint8_t, 12, avx) | ||
| 62 | |||
| 63 | #define IDCT_DC_FUNCS(W, opt) \ | ||
| 64 | void ff_hevc_idct_ ## W ## _dc_8_ ## opt(int16_t *coeffs); \ | ||
| 65 | void ff_hevc_idct_ ## W ## _dc_10_ ## opt(int16_t *coeffs); \ | ||
| 66 | void ff_hevc_idct_ ## W ## _dc_12_ ## opt(int16_t *coeffs) | ||
| 67 | |||
| 68 | IDCT_DC_FUNCS(4x4, sse2); | ||
| 69 | IDCT_DC_FUNCS(8x8, sse2); | ||
| 70 | IDCT_DC_FUNCS(16x16, sse2); | ||
| 71 | IDCT_DC_FUNCS(32x32, sse2); | ||
| 72 | IDCT_DC_FUNCS(16x16, avx2); | ||
| 73 | IDCT_DC_FUNCS(32x32, avx2); | ||
| 74 | |||
| 75 | #define IDCT_FUNCS(opt) \ | ||
| 76 | void ff_hevc_idct_4x4_8_ ## opt(int16_t *coeffs, int col_limit); \ | ||
| 77 | void ff_hevc_idct_4x4_10_ ## opt(int16_t *coeffs, int col_limit); \ | ||
| 78 | void ff_hevc_idct_8x8_8_ ## opt(int16_t *coeffs, int col_limit); \ | ||
| 79 | void ff_hevc_idct_8x8_10_ ## opt(int16_t *coeffs, int col_limit); \ | ||
| 80 | void ff_hevc_idct_16x16_8_ ## opt(int16_t *coeffs, int col_limit); \ | ||
| 81 | void ff_hevc_idct_16x16_10_ ## opt(int16_t *coeffs, int col_limit); \ | ||
| 82 | void ff_hevc_idct_32x32_8_ ## opt(int16_t *coeffs, int col_limit); \ | ||
| 83 | void ff_hevc_idct_32x32_10_ ## opt(int16_t *coeffs, int col_limit); | ||
| 84 | |||
| 85 | IDCT_FUNCS(sse2) | ||
| 86 | IDCT_FUNCS(avx) | ||
| 87 | |||
| 88 | |||
| 89 | #define ff_hevc_pel_filters ff_hevc_qpel_filters | ||
| 90 | #define DECL_HV_FILTER(f) \ | ||
| 91 | const uint8_t *hf = ff_hevc_ ## f ## _filters[mx]; \ | ||
| 92 | const uint8_t *vf = ff_hevc_ ## f ## _filters[my]; | ||
| 93 | |||
| 94 | #define FW_PUT(p, a, b, depth, opt) \ | ||
| 95 | static void hevc_put_ ## a ## _ ## depth ## _##opt(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, \ | ||
| 96 | int height, intptr_t mx, intptr_t my,int width) \ | ||
| 97 | { \ | ||
| 98 | DECL_HV_FILTER(p) \ | ||
| 99 | ff_h2656_put_ ## b ## _ ## depth ## _##opt(dst, 2 * MAX_PB_SIZE, src, srcstride, height, hf, vf, width); \ | ||
| 100 | } | ||
| 101 | |||
| 102 | #define FW_PUT_UNI(p, a, b, depth, opt) \ | ||
| 103 | static void hevc_put_uni_ ## a ## _ ## depth ## _##opt(uint8_t *dst, ptrdiff_t dststride, \ | ||
| 104 | const uint8_t *src, ptrdiff_t srcstride, \ | ||
| 105 | int height, intptr_t mx, intptr_t my, int width) \ | ||
| 106 | { \ | ||
| 107 | DECL_HV_FILTER(p) \ | ||
| 108 | ff_h2656_put_uni_ ## b ## _ ## depth ## _##opt(dst, dststride, src, srcstride, height, hf, vf, width); \ | ||
| 109 | } | ||
| 110 | |||
| 111 | #if ARCH_X86_64 && HAVE_SSE4_EXTERNAL | ||
| 112 | |||
| 113 | #define FW_PUT_FUNCS(p, a, b, depth, opt) \ | ||
| 114 | FW_PUT(p, a, b, depth, opt) \ | ||
| 115 | FW_PUT_UNI(p, a, b, depth, opt) | ||
| 116 | |||
| 117 | #define FW_PEL(w, depth, opt) FW_PUT_FUNCS(pel, pel_pixels##w, pixels##w, depth, opt) | ||
| 118 | |||
| 119 | #define FW_DIR(npel, n, w, depth, opt) \ | ||
| 120 | FW_PUT_FUNCS(npel, npel ## _h##w, n ## tap_h##w, depth, opt) \ | ||
| 121 | FW_PUT_FUNCS(npel, npel ## _v##w, n ## tap_v##w, depth, opt) | ||
| 122 | |||
| 123 | #define FW_DIR_HV(npel, n, w, depth, opt) \ | ||
| 124 | FW_PUT_FUNCS(npel, npel ## _hv##w, n ## tap_hv##w, depth, opt) | ||
| 125 | |||
| 126 | 17410 | FW_PEL(4, 8, sse4) | |
| 127 | 76 | FW_PEL(6, 8, sse4) | |
| 128 | 26322 | FW_PEL(8, 8, sse4) | |
| 129 | 76 | FW_PEL(12, 8, sse4) | |
| 130 | 150684 | FW_PEL(16, 8, sse4) | |
| 131 | 2400 | FW_PEL(4, 10, sse4) | |
| 132 | 76 | FW_PEL(6, 10, sse4) | |
| 133 | 33624 | FW_PEL(8, 10, sse4) | |
| 134 | 304 | FW_PEL(4, 12, sse4) | |
| 135 | 76 | FW_PEL(6, 12, sse4) | |
| 136 | 1824 | FW_PEL(8, 12, sse4) | |
| 137 | |||
| 138 | #define FW_EPEL(w, depth, opt) FW_DIR(epel, 4, w, depth, opt) | ||
| 139 | #define FW_EPEL_HV(w, depth, opt) FW_DIR_HV(epel, 4, w, depth, opt) | ||
| 140 | #define FW_EPEL_FUNCS(w, depth, opt) \ | ||
| 141 | FW_EPEL(w, depth, opt) \ | ||
| 142 | FW_EPEL_HV(w, depth, opt) | ||
| 143 | |||
| 144 | 152 | FW_EPEL(12, 8, sse4) | |
| 145 | |||
| 146 | 10508 | FW_EPEL_FUNCS(4, 8, sse4) | |
| 147 | 228 | FW_EPEL_FUNCS(6, 8, sse4) | |
| 148 | 12880 | FW_EPEL_FUNCS(8, 8, sse4) | |
| 149 | 21140 | FW_EPEL_FUNCS(16, 8, sse4) | |
| 150 | 6884 | FW_EPEL_FUNCS(4, 10, sse4) | |
| 151 | 228 | FW_EPEL_FUNCS(6, 10, sse4) | |
| 152 | 31064 | FW_EPEL_FUNCS(8, 10, sse4) | |
| 153 | 912 | FW_EPEL_FUNCS(4, 12, sse4) | |
| 154 | 228 | FW_EPEL_FUNCS(6, 12, sse4) | |
| 155 | 5472 | FW_EPEL_FUNCS(8, 12, sse4) | |
| 156 | |||
| 157 | #define FW_QPEL(w, depth, opt) FW_DIR(qpel, 8, w, depth, opt) | ||
| 158 | #define FW_QPEL_HV(w, depth, opt) FW_DIR_HV(qpel, 8, w, depth, opt) | ||
| 159 | #define FW_QPEL_FUNCS(w, depth, opt) \ | ||
| 160 | FW_QPEL(w, depth, opt) \ | ||
| 161 | FW_QPEL_HV(w, depth, opt) | ||
| 162 | |||
| 163 | 152 | FW_QPEL(12, 8, sse4) | |
| 164 | 1638 | FW_QPEL(16, 8, sse4) | |
| 165 | |||
| 166 | 306 | FW_QPEL_FUNCS(4, 8, sse4) | |
| 167 | 4518 | FW_QPEL_FUNCS(8, 8, sse4) | |
| 168 | 912 | FW_QPEL_FUNCS(4, 10, sse4) | |
| 169 | 14268 | FW_QPEL_FUNCS(8, 10, sse4) | |
| 170 | 912 | FW_QPEL_FUNCS(4, 12, sse4) | |
| 171 | 5472 | FW_QPEL_FUNCS(8, 12, sse4) | |
| 172 | |||
| 173 | #if HAVE_AVX2_EXTERNAL | ||
| 174 | |||
| 175 | 27552 | FW_PEL(32, 8, avx2) | |
| 176 | 3028 | FW_PUT(pel, pel_pixels16, pixels16, 10, avx2) | |
| 177 | |||
| 178 | 1396 | FW_EPEL(32, 8, avx2) | |
| 179 | 5112 | FW_EPEL(16, 10, avx2) | |
| 180 | |||
| 181 | 752 | FW_EPEL_HV(32, 8, avx2) | |
| 182 | 6732 | FW_EPEL_HV(16, 10, avx2) | |
| 183 | |||
| 184 | 32 | FW_QPEL(32, 8, avx2) | |
| 185 | 8960 | FW_QPEL(16, 10, avx2) | |
| 186 | |||
| 187 | 11354 | FW_QPEL_HV(16, 10, avx2) | |
| 188 | |||
| 189 | #endif | ||
| 190 | #endif | ||
| 191 | |||
| 192 | #define mc_rep_func(name, bitd, step, W, opt) \ | ||
| 193 | static void hevc_put_##name##W##_##bitd##_##opt(int16_t *_dst, \ | ||
| 194 | const uint8_t *_src, ptrdiff_t _srcstride, int height, \ | ||
| 195 | intptr_t mx, intptr_t my, int width) \ | ||
| 196 | { \ | ||
| 197 | int i; \ | ||
| 198 | int16_t *dst; \ | ||
| 199 | for (i = 0; i < W; i += step) { \ | ||
| 200 | const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \ | ||
| 201 | dst = _dst + i; \ | ||
| 202 | hevc_put_##name##step##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width); \ | ||
| 203 | } \ | ||
| 204 | } | ||
| 205 | #define mc_rep_uni_func(name, bitd, step, W, opt) \ | ||
| 206 | static void hevc_put_uni_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, \ | ||
| 207 | const uint8_t *_src, ptrdiff_t _srcstride, int height, \ | ||
| 208 | intptr_t mx, intptr_t my, int width) \ | ||
| 209 | { \ | ||
| 210 | int i; \ | ||
| 211 | uint8_t *dst; \ | ||
| 212 | for (i = 0; i < W; i += step) { \ | ||
| 213 | const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \ | ||
| 214 | dst = _dst + (i * ((bitd + 7) / 8)); \ | ||
| 215 | hevc_put_uni_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, \ | ||
| 216 | height, mx, my, width); \ | ||
| 217 | } \ | ||
| 218 | } | ||
| 219 | #define mc_rep_bi_func(name, bitd, step, W, opt) \ | ||
| 220 | static void ff_hevc_put_bi_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const uint8_t *_src, \ | ||
| 221 | ptrdiff_t _srcstride, const int16_t *_src2, \ | ||
| 222 | int height, intptr_t mx, intptr_t my, int width) \ | ||
| 223 | { \ | ||
| 224 | int i; \ | ||
| 225 | uint8_t *dst; \ | ||
| 226 | for (i = 0; i < W ; i += step) { \ | ||
| 227 | const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \ | ||
| 228 | const int16_t *src2 = _src2 + i; \ | ||
| 229 | dst = _dst + (i * ((bitd + 7) / 8)); \ | ||
| 230 | ff_hevc_put_bi_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, \ | ||
| 231 | height, mx, my, width); \ | ||
| 232 | } \ | ||
| 233 | } | ||
| 234 | |||
| 235 | #define mc_rep_funcs(name, bitd, step, W, opt) \ | ||
| 236 | mc_rep_func(name, bitd, step, W, opt) \ | ||
| 237 | mc_rep_uni_func(name, bitd, step, W, opt) \ | ||
| 238 | mc_rep_bi_func(name, bitd, step, W, opt) | ||
| 239 | |||
| 240 | #define mc_rep_func2(name, bitd, step1, step2, W, opt) \ | ||
| 241 | static void hevc_put_##name##W##_##bitd##_##opt(int16_t *dst, \ | ||
| 242 | const uint8_t *src, ptrdiff_t _srcstride, int height, \ | ||
| 243 | intptr_t mx, intptr_t my, int width) \ | ||
| 244 | { \ | ||
| 245 | hevc_put_##name##step1##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width); \ | ||
| 246 | hevc_put_##name##step2##_##bitd##_##opt(dst + step1, src + (step1 * ((bitd + 7) / 8)), \ | ||
| 247 | _srcstride, height, mx, my, width); \ | ||
| 248 | } | ||
| 249 | #define mc_rep_uni_func2(name, bitd, step1, step2, W, opt) \ | ||
| 250 | static void hevc_put_uni_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, \ | ||
| 251 | const uint8_t *src, ptrdiff_t _srcstride, int height, \ | ||
| 252 | intptr_t mx, intptr_t my, int width) \ | ||
| 253 | { \ | ||
| 254 | hevc_put_uni_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, height, mx, my, width); \ | ||
| 255 | hevc_put_uni_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride, \ | ||
| 256 | src + (step1 * ((bitd + 7) / 8)), _srcstride, \ | ||
| 257 | height, mx, my, width); \ | ||
| 258 | } | ||
| 259 | #define mc_rep_bi_func2(name, bitd, step1, step2, W, opt) \ | ||
| 260 | static void ff_hevc_put_bi_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ | ||
| 261 | ptrdiff_t _srcstride, const int16_t *src2, \ | ||
| 262 | int height, intptr_t mx, intptr_t my, int width) \ | ||
| 263 | { \ | ||
| 264 | ff_hevc_put_bi_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, height, mx, my, width);\ | ||
| 265 | ff_hevc_put_bi_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride, \ | ||
| 266 | src + (step1 * ((bitd + 7) / 8)), _srcstride, \ | ||
| 267 | src2 + step1, height, mx, my, width); \ | ||
| 268 | } | ||
| 269 | |||
| 270 | #define mc_rep_funcs2(name, bitd, step1, step2, W, opt) \ | ||
| 271 | mc_rep_func2(name, bitd, step1, step2, W, opt) \ | ||
| 272 | mc_rep_uni_func2(name, bitd, step1, step2, W, opt) \ | ||
| 273 | mc_rep_bi_func2(name, bitd, step1, step2, W, opt) | ||
| 274 | |||
| 275 | #if ARCH_X86_64 && HAVE_SSE4_EXTERNAL | ||
| 276 | |||
| 277 | #define mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \ | ||
| 278 | static void hevc_put_##name##width1##_10_##opt1(int16_t *dst, const uint8_t *src, ptrdiff_t _srcstride, \ | ||
| 279 | int height, intptr_t mx, intptr_t my, int width) \ | ||
| 280 | \ | ||
| 281 | { \ | ||
| 282 | hevc_put_##name##width2##_10_##opt1(dst, src, _srcstride, height, mx, my, width); \ | ||
| 283 | hevc_put_##name##width3##_10_##opt2(dst+ width2, src+ width4, _srcstride, height, mx, my, width); \ | ||
| 284 | } | ||
| 285 | |||
| 286 | #define mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \ | ||
| 287 | static void ff_hevc_put_bi_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ | ||
| 288 | ptrdiff_t _srcstride, const int16_t *src2, \ | ||
| 289 | int height, intptr_t mx, intptr_t my, int width) \ | ||
| 290 | { \ | ||
| 291 | ff_hevc_put_bi_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, src2, \ | ||
| 292 | height, mx, my, width); \ | ||
| 293 | ff_hevc_put_bi_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, src2+width2, \ | ||
| 294 | height, mx, my, width); \ | ||
| 295 | } | ||
| 296 | |||
| 297 | #define mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \ | ||
| 298 | static void hevc_put_uni_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, \ | ||
| 299 | const uint8_t *src, ptrdiff_t _srcstride, int height, \ | ||
| 300 | intptr_t mx, intptr_t my, int width) \ | ||
| 301 | { \ | ||
| 302 | hevc_put_uni_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, \ | ||
| 303 | height, mx, my, width); \ | ||
| 304 | hevc_put_uni_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, \ | ||
| 305 | height, mx, my, width); \ | ||
| 306 | } | ||
| 307 | |||
| 308 | #define mc_rep_mixs_10(name, width1, width2, width3, opt1, opt2, width4) \ | ||
| 309 | mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \ | ||
| 310 | mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \ | ||
| 311 | mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) | ||
| 312 | |||
| 313 | #define mc_rep_mix_8(name, width1, width2, width3, opt1, opt2) \ | ||
| 314 | static void hevc_put_##name##width1##_8_##opt1(int16_t *dst, const uint8_t *src, ptrdiff_t _srcstride, \ | ||
| 315 | int height, intptr_t mx, intptr_t my, int width) \ | ||
| 316 | \ | ||
| 317 | { \ | ||
| 318 | hevc_put_##name##width2##_8_##opt1(dst, src, _srcstride, height, mx, my, width); \ | ||
| 319 | hevc_put_##name##width3##_8_##opt2(dst+ width2, src+ width2, _srcstride, height, mx, my, width); \ | ||
| 320 | } | ||
| 321 | |||
| 322 | #define mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2) \ | ||
| 323 | static void ff_hevc_put_bi_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ | ||
| 324 | ptrdiff_t _srcstride, const int16_t *src2, \ | ||
| 325 | int height, intptr_t mx, intptr_t my, int width) \ | ||
| 326 | { \ | ||
| 327 | ff_hevc_put_bi_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \ | ||
| 328 | src2, height, mx, my, width); \ | ||
| 329 | ff_hevc_put_bi_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \ | ||
| 330 | src2+width2, height, mx, my, width); \ | ||
| 331 | } | ||
| 332 | |||
| 333 | #define mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2) \ | ||
| 334 | static void hevc_put_uni_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, \ | ||
| 335 | const uint8_t *src, ptrdiff_t _srcstride, int height, \ | ||
| 336 | intptr_t mx, intptr_t my, int width) \ | ||
| 337 | { \ | ||
| 338 | hevc_put_uni_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \ | ||
| 339 | height, mx, my, width); \ | ||
| 340 | hevc_put_uni_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \ | ||
| 341 | height, mx, my, width); \ | ||
| 342 | } | ||
| 343 | |||
| 344 | #define mc_rep_mixs_8(name, width1, width2, width3, opt1, opt2) \ | ||
| 345 | mc_rep_mix_8(name, width1, width2, width3, opt1, opt2) \ | ||
| 346 | mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2) \ | ||
| 347 | mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2) | ||
| 348 | |||
| 349 | #if HAVE_AVX2_EXTERNAL | ||
| 350 | |||
| 351 | 8 | mc_rep_mixs_8(pel_pixels, 48, 32, 16, avx2, sse4) | |
| 352 | 6 | mc_rep_mixs_8(epel_hv, 48, 32, 16, avx2, sse4) | |
| 353 | 6 | mc_rep_mixs_8(epel_h , 48, 32, 16, avx2, sse4) | |
| 354 | 6 | mc_rep_mixs_8(epel_v , 48, 32, 16, avx2, sse4) | |
| 355 | |||
| 356 | 5 | mc_rep_mix_10(pel_pixels, 24, 16, 8, avx2, sse4, 32) | |
| 357 | 1 | mc_bi_rep_mix_10(pel_pixels,24, 16, 8, avx2, sse4, 32) | |
| 358 | 6 | mc_rep_mixs_10(epel_hv, 24, 16, 8, avx2, sse4, 32) | |
| 359 | 6 | mc_rep_mixs_10(epel_h , 24, 16, 8, avx2, sse4, 32) | |
| 360 | 6 | mc_rep_mixs_10(epel_v , 24, 16, 8, avx2, sse4, 32) | |
| 361 | |||
| 362 | |||
| 363 | 6 | mc_rep_mixs_10(qpel_h , 24, 16, 8, avx2, sse4, 32) | |
| 364 | 6 | mc_rep_mixs_10(qpel_v , 24, 16, 8, avx2, sse4, 32) | |
| 365 | 6 | mc_rep_mixs_10(qpel_hv, 24, 16, 8, avx2, sse4, 32) | |
| 366 | |||
| 367 | |||
| 368 |
2/2✓ Branch 1 taken 6722 times.
✓ Branch 2 taken 3361 times.
|
20166 | mc_rep_funcs(pel_pixels, 8, 32, 64, avx2) |
| 369 | |||
| 370 |
2/2✓ Branch 1 taken 156 times.
✓ Branch 2 taken 78 times.
|
234 | mc_rep_uni_func(pel_pixels, 8, 64, 128, avx2)//used for 10bit |
| 371 |
2/2✓ Branch 1 taken 3 times.
✓ Branch 2 taken 1 times.
|
4 | mc_rep_uni_func(pel_pixels, 8, 32, 96, avx2) //used for 10bit |
| 372 | |||
| 373 |
2/2✓ Branch 1 taken 2026 times.
✓ Branch 2 taken 1013 times.
|
3039 | mc_rep_func(pel_pixels, 10, 16, 32, avx2) |
| 374 |
2/2✓ Branch 1 taken 3 times.
✓ Branch 2 taken 1 times.
|
4 | mc_rep_func(pel_pixels, 10, 16, 48, avx2) |
| 375 |
2/2✓ Branch 1 taken 304 times.
✓ Branch 2 taken 152 times.
|
456 | mc_rep_func(pel_pixels, 10, 32, 64, avx2) |
| 376 | |||
| 377 |
2/2✓ Branch 1 taken 176 times.
✓ Branch 2 taken 88 times.
|
264 | mc_rep_bi_func(pel_pixels, 10, 16, 32, avx2) |
| 378 |
2/2✓ Branch 1 taken 3 times.
✓ Branch 2 taken 1 times.
|
4 | mc_rep_bi_func(pel_pixels, 10, 16, 48, avx2) |
| 379 |
2/2✓ Branch 1 taken 42 times.
✓ Branch 2 taken 21 times.
|
63 | mc_rep_bi_func(pel_pixels, 10, 32, 64, avx2) |
| 380 | |||
| 381 |
2/2✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
|
18 | mc_rep_funcs(epel_h, 8, 32, 64, avx2) |
| 382 | |||
| 383 |
2/2✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
|
18 | mc_rep_funcs(epel_v, 8, 32, 64, avx2) |
| 384 | |||
| 385 |
2/2✓ Branch 1 taken 1290 times.
✓ Branch 2 taken 645 times.
|
3870 | mc_rep_funcs(epel_h, 10, 16, 32, avx2) |
| 386 |
2/2✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
|
24 | mc_rep_funcs(epel_h, 10, 16, 48, avx2) |
| 387 |
2/2✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
|
18 | mc_rep_funcs(epel_h, 10, 32, 64, avx2) |
| 388 | |||
| 389 |
2/2✓ Branch 1 taken 154 times.
✓ Branch 2 taken 77 times.
|
462 | mc_rep_funcs(epel_v, 10, 16, 32, avx2) |
| 390 |
2/2✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
|
24 | mc_rep_funcs(epel_v, 10, 16, 48, avx2) |
| 391 |
2/2✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
|
18 | mc_rep_funcs(epel_v, 10, 32, 64, avx2) |
| 392 | |||
| 393 | |||
| 394 |
2/2✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
|
18 | mc_rep_funcs(epel_hv, 8, 32, 64, avx2) |
| 395 | |||
| 396 |
2/2✓ Branch 1 taken 1598 times.
✓ Branch 2 taken 799 times.
|
4794 | mc_rep_funcs(epel_hv, 10, 16, 32, avx2) |
| 397 |
2/2✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
|
24 | mc_rep_funcs(epel_hv, 10, 16, 48, avx2) |
| 398 |
2/2✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
|
18 | mc_rep_funcs(epel_hv, 10, 32, 64, avx2) |
| 399 | |||
| 400 |
2/2✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
|
18 | mc_rep_funcs(qpel_h, 8, 32, 64, avx2) |
| 401 | 6 | mc_rep_mixs_8(qpel_h , 48, 32, 16, avx2, sse4) | |
| 402 | |||
| 403 |
2/2✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
|
18 | mc_rep_funcs(qpel_v, 8, 32, 64, avx2) |
| 404 | 6 | mc_rep_mixs_8(qpel_v, 48, 32, 16, avx2, sse4) | |
| 405 | |||
| 406 |
2/2✓ Branch 1 taken 1982 times.
✓ Branch 2 taken 991 times.
|
5946 | mc_rep_funcs(qpel_h, 10, 16, 32, avx2) |
| 407 |
2/2✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
|
24 | mc_rep_funcs(qpel_h, 10, 16, 48, avx2) |
| 408 |
2/2✓ Branch 1 taken 512 times.
✓ Branch 2 taken 256 times.
|
1536 | mc_rep_funcs(qpel_h, 10, 32, 64, avx2) |
| 409 | |||
| 410 |
2/2✓ Branch 1 taken 910 times.
✓ Branch 2 taken 455 times.
|
2730 | mc_rep_funcs(qpel_v, 10, 16, 32, avx2) |
| 411 |
2/2✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
|
24 | mc_rep_funcs(qpel_v, 10, 16, 48, avx2) |
| 412 |
2/2✓ Branch 1 taken 202 times.
✓ Branch 2 taken 101 times.
|
606 | mc_rep_funcs(qpel_v, 10, 32, 64, avx2) |
| 413 | |||
| 414 |
2/2✓ Branch 1 taken 3116 times.
✓ Branch 2 taken 1558 times.
|
9348 | mc_rep_funcs(qpel_hv, 10, 16, 32, avx2) |
| 415 |
2/2✓ Branch 1 taken 9 times.
✓ Branch 2 taken 3 times.
|
24 | mc_rep_funcs(qpel_hv, 10, 16, 48, avx2) |
| 416 |
2/2✓ Branch 1 taken 668 times.
✓ Branch 2 taken 334 times.
|
2004 | mc_rep_funcs(qpel_hv, 10, 32, 64, avx2) |
| 417 | |||
| 418 | #endif //AVX2 | ||
| 419 | |||
| 420 |
2/2✓ Branch 1 taken 30924 times.
✓ Branch 2 taken 7731 times.
|
77310 | mc_rep_funcs(pel_pixels, 8, 16, 64, sse4) |
| 421 |
2/2✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
|
336 | mc_rep_funcs(pel_pixels, 8, 16, 48, sse4) |
| 422 |
2/2✓ Branch 1 taken 33874 times.
✓ Branch 2 taken 16937 times.
|
101622 | mc_rep_funcs(pel_pixels, 8, 16, 32, sse4) |
| 423 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(pel_pixels, 8, 8, 24, sse4) |
| 424 |
2/2✓ Branch 1 taken 2960 times.
✓ Branch 2 taken 370 times.
|
6660 | mc_rep_funcs(pel_pixels,10, 8, 64, sse4) |
| 425 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
|
588 | mc_rep_funcs(pel_pixels,10, 8, 48, sse4) |
| 426 |
2/2✓ Branch 1 taken 6208 times.
✓ Branch 2 taken 1552 times.
|
15520 | mc_rep_funcs(pel_pixels,10, 8, 32, sse4) |
| 427 |
2/2✓ Branch 1 taken 144 times.
✓ Branch 2 taken 48 times.
|
384 | mc_rep_funcs(pel_pixels,10, 8, 24, sse4) |
| 428 |
2/2✓ Branch 1 taken 4020 times.
✓ Branch 2 taken 2010 times.
|
12060 | mc_rep_funcs(pel_pixels,10, 8, 16, sse4) |
| 429 |
2/2✓ Branch 1 taken 177 times.
✓ Branch 2 taken 59 times.
|
472 | mc_rep_funcs(pel_pixels,10, 4, 12, sse4) |
| 430 |
2/2✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
|
702 | mc_rep_funcs(pel_pixels,12, 8, 64, sse4) |
| 431 |
2/2✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
|
546 | mc_rep_funcs(pel_pixels,12, 8, 48, sse4) |
| 432 |
2/2✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
|
390 | mc_rep_funcs(pel_pixels,12, 8, 32, sse4) |
| 433 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(pel_pixels,12, 8, 24, sse4) |
| 434 |
2/2✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
|
234 | mc_rep_funcs(pel_pixels,12, 8, 16, sse4) |
| 435 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(pel_pixels,12, 4, 12, sse4) |
| 436 | |||
| 437 |
2/2✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
|
420 | mc_rep_funcs(epel_h, 8, 16, 64, sse4) |
| 438 |
2/2✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
|
336 | mc_rep_funcs(epel_h, 8, 16, 48, sse4) |
| 439 |
2/2✓ Branch 1 taken 1076 times.
✓ Branch 2 taken 538 times.
|
3228 | mc_rep_funcs(epel_h, 8, 16, 32, sse4) |
| 440 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(epel_h, 8, 8, 24, sse4) |
| 441 |
2/2✓ Branch 1 taken 336 times.
✓ Branch 2 taken 42 times.
|
756 | mc_rep_funcs(epel_h,10, 8, 64, sse4) |
| 442 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
|
588 | mc_rep_funcs(epel_h,10, 8, 48, sse4) |
| 443 |
2/2✓ Branch 1 taken 1016 times.
✓ Branch 2 taken 254 times.
|
2540 | mc_rep_funcs(epel_h,10, 8, 32, sse4) |
| 444 |
2/2✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
|
336 | mc_rep_funcs(epel_h,10, 8, 24, sse4) |
| 445 |
2/2✓ Branch 1 taken 1140 times.
✓ Branch 2 taken 570 times.
|
3420 | mc_rep_funcs(epel_h,10, 8, 16, sse4) |
| 446 |
2/2✓ Branch 1 taken 129 times.
✓ Branch 2 taken 43 times.
|
344 | mc_rep_funcs(epel_h,10, 4, 12, sse4) |
| 447 |
2/2✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
|
702 | mc_rep_funcs(epel_h,12, 8, 64, sse4) |
| 448 |
2/2✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
|
546 | mc_rep_funcs(epel_h,12, 8, 48, sse4) |
| 449 |
2/2✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
|
390 | mc_rep_funcs(epel_h,12, 8, 32, sse4) |
| 450 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(epel_h,12, 8, 24, sse4) |
| 451 |
2/2✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
|
234 | mc_rep_funcs(epel_h,12, 8, 16, sse4) |
| 452 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(epel_h,12, 4, 12, sse4) |
| 453 |
2/2✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
|
420 | mc_rep_funcs(epel_v, 8, 16, 64, sse4) |
| 454 |
2/2✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
|
336 | mc_rep_funcs(epel_v, 8, 16, 48, sse4) |
| 455 |
2/2✓ Branch 1 taken 2056 times.
✓ Branch 2 taken 1028 times.
|
6168 | mc_rep_funcs(epel_v, 8, 16, 32, sse4) |
| 456 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(epel_v, 8, 8, 24, sse4) |
| 457 |
2/2✓ Branch 1 taken 336 times.
✓ Branch 2 taken 42 times.
|
756 | mc_rep_funcs(epel_v,10, 8, 64, sse4) |
| 458 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
|
588 | mc_rep_funcs(epel_v,10, 8, 48, sse4) |
| 459 |
2/2✓ Branch 1 taken 296 times.
✓ Branch 2 taken 74 times.
|
740 | mc_rep_funcs(epel_v,10, 8, 32, sse4) |
| 460 |
2/2✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
|
336 | mc_rep_funcs(epel_v,10, 8, 24, sse4) |
| 461 |
2/2✓ Branch 1 taken 204 times.
✓ Branch 2 taken 102 times.
|
612 | mc_rep_funcs(epel_v,10, 8, 16, sse4) |
| 462 |
2/2✓ Branch 1 taken 141 times.
✓ Branch 2 taken 47 times.
|
376 | mc_rep_funcs(epel_v,10, 4, 12, sse4) |
| 463 |
2/2✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
|
702 | mc_rep_funcs(epel_v,12, 8, 64, sse4) |
| 464 |
2/2✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
|
546 | mc_rep_funcs(epel_v,12, 8, 48, sse4) |
| 465 |
2/2✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
|
390 | mc_rep_funcs(epel_v,12, 8, 32, sse4) |
| 466 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(epel_v,12, 8, 24, sse4) |
| 467 |
2/2✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
|
234 | mc_rep_funcs(epel_v,12, 8, 16, sse4) |
| 468 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(epel_v,12, 4, 12, sse4) |
| 469 |
2/2✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
|
420 | mc_rep_funcs(epel_hv, 8, 16, 64, sse4) |
| 470 |
2/2✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
|
336 | mc_rep_funcs(epel_hv, 8, 16, 48, sse4) |
| 471 |
2/2✓ Branch 1 taken 1860 times.
✓ Branch 2 taken 930 times.
|
5580 | mc_rep_funcs(epel_hv, 8, 16, 32, sse4) |
| 472 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(epel_hv, 8, 8, 24, sse4) |
| 473 | 78 | mc_rep_funcs2(epel_hv,8, 8, 4, 12, sse4) | |
| 474 |
2/2✓ Branch 1 taken 336 times.
✓ Branch 2 taken 42 times.
|
756 | mc_rep_funcs(epel_hv,10, 8, 64, sse4) |
| 475 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
|
588 | mc_rep_funcs(epel_hv,10, 8, 48, sse4) |
| 476 |
2/2✓ Branch 1 taken 312 times.
✓ Branch 2 taken 78 times.
|
780 | mc_rep_funcs(epel_hv,10, 8, 32, sse4) |
| 477 |
2/2✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
|
336 | mc_rep_funcs(epel_hv,10, 8, 24, sse4) |
| 478 |
2/2✓ Branch 1 taken 236 times.
✓ Branch 2 taken 118 times.
|
708 | mc_rep_funcs(epel_hv,10, 8, 16, sse4) |
| 479 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(epel_hv,10, 4, 12, sse4) |
| 480 |
2/2✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
|
702 | mc_rep_funcs(epel_hv,12, 8, 64, sse4) |
| 481 |
2/2✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
|
546 | mc_rep_funcs(epel_hv,12, 8, 48, sse4) |
| 482 |
2/2✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
|
390 | mc_rep_funcs(epel_hv,12, 8, 32, sse4) |
| 483 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(epel_hv,12, 8, 24, sse4) |
| 484 |
2/2✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
|
234 | mc_rep_funcs(epel_hv,12, 8, 16, sse4) |
| 485 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(epel_hv,12, 4, 12, sse4) |
| 486 | |||
| 487 |
2/2✓ Branch 1 taken 168 times.
✓ Branch 2 taken 42 times.
|
420 | mc_rep_funcs(qpel_h, 8, 16, 64, sse4) |
| 488 |
2/2✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
|
336 | mc_rep_funcs(qpel_h, 8, 16, 48, sse4) |
| 489 |
2/2✓ Branch 1 taken 84 times.
✓ Branch 2 taken 42 times.
|
252 | mc_rep_funcs(qpel_h, 8, 16, 32, sse4) |
| 490 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_h, 8, 8, 24, sse4) |
| 491 |
2/2✓ Branch 1 taken 1184 times.
✓ Branch 2 taken 148 times.
|
2664 | mc_rep_funcs(qpel_h,10, 8, 64, sse4) |
| 492 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
|
588 | mc_rep_funcs(qpel_h,10, 8, 48, sse4) |
| 493 |
2/2✓ Branch 1 taken 1248 times.
✓ Branch 2 taken 312 times.
|
3120 | mc_rep_funcs(qpel_h,10, 8, 32, sse4) |
| 494 |
2/2✓ Branch 1 taken 132 times.
✓ Branch 2 taken 44 times.
|
352 | mc_rep_funcs(qpel_h,10, 8, 24, sse4) |
| 495 |
2/2✓ Branch 1 taken 424 times.
✓ Branch 2 taken 212 times.
|
1272 | mc_rep_funcs(qpel_h,10, 8, 16, sse4) |
| 496 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_h,10, 4, 12, sse4) |
| 497 |
2/2✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
|
702 | mc_rep_funcs(qpel_h,12, 8, 64, sse4) |
| 498 |
2/2✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
|
546 | mc_rep_funcs(qpel_h,12, 8, 48, sse4) |
| 499 |
2/2✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
|
390 | mc_rep_funcs(qpel_h,12, 8, 32, sse4) |
| 500 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_h,12, 8, 24, sse4) |
| 501 |
2/2✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
|
234 | mc_rep_funcs(qpel_h,12, 8, 16, sse4) |
| 502 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_h,12, 4, 12, sse4) |
| 503 |
2/2✓ Branch 1 taken 176 times.
✓ Branch 2 taken 44 times.
|
440 | mc_rep_funcs(qpel_v, 8, 16, 64, sse4) |
| 504 |
2/2✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
|
336 | mc_rep_funcs(qpel_v, 8, 16, 48, sse4) |
| 505 |
2/2✓ Branch 1 taken 92 times.
✓ Branch 2 taken 46 times.
|
276 | mc_rep_funcs(qpel_v, 8, 16, 32, sse4) |
| 506 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_v, 8, 8, 24, sse4) |
| 507 |
2/2✓ Branch 1 taken 464 times.
✓ Branch 2 taken 58 times.
|
1044 | mc_rep_funcs(qpel_v,10, 8, 64, sse4) |
| 508 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
|
588 | mc_rep_funcs(qpel_v,10, 8, 48, sse4) |
| 509 |
2/2✓ Branch 1 taken 304 times.
✓ Branch 2 taken 76 times.
|
760 | mc_rep_funcs(qpel_v,10, 8, 32, sse4) |
| 510 |
2/2✓ Branch 1 taken 138 times.
✓ Branch 2 taken 46 times.
|
368 | mc_rep_funcs(qpel_v,10, 8, 24, sse4) |
| 511 |
2/2✓ Branch 1 taken 96 times.
✓ Branch 2 taken 48 times.
|
288 | mc_rep_funcs(qpel_v,10, 8, 16, sse4) |
| 512 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_v,10, 4, 12, sse4) |
| 513 |
2/2✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
|
702 | mc_rep_funcs(qpel_v,12, 8, 64, sse4) |
| 514 |
2/2✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
|
546 | mc_rep_funcs(qpel_v,12, 8, 48, sse4) |
| 515 |
2/2✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
|
390 | mc_rep_funcs(qpel_v,12, 8, 32, sse4) |
| 516 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_v,12, 8, 24, sse4) |
| 517 |
2/2✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
|
234 | mc_rep_funcs(qpel_v,12, 8, 16, sse4) |
| 518 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_v,12, 4, 12, sse4) |
| 519 |
2/2✓ Branch 1 taken 928 times.
✓ Branch 2 taken 116 times.
|
2088 | mc_rep_funcs(qpel_hv, 8, 8, 64, sse4) |
| 520 |
2/2✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
|
546 | mc_rep_funcs(qpel_hv, 8, 8, 48, sse4) |
| 521 |
2/2✓ Branch 1 taken 292 times.
✓ Branch 2 taken 73 times.
|
730 | mc_rep_funcs(qpel_hv, 8, 8, 32, sse4) |
| 522 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_hv, 8, 8, 24, sse4) |
| 523 |
2/2✓ Branch 1 taken 202 times.
✓ Branch 2 taken 101 times.
|
606 | mc_rep_funcs(qpel_hv, 8, 8, 16, sse4) |
| 524 | 78 | mc_rep_funcs2(qpel_hv,8, 8, 4, 12, sse4) | |
| 525 |
2/2✓ Branch 1 taken 480 times.
✓ Branch 2 taken 60 times.
|
1080 | mc_rep_funcs(qpel_hv,10, 8, 64, sse4) |
| 526 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 42 times.
|
588 | mc_rep_funcs(qpel_hv,10, 8, 48, sse4) |
| 527 |
2/2✓ Branch 1 taken 272 times.
✓ Branch 2 taken 68 times.
|
680 | mc_rep_funcs(qpel_hv,10, 8, 32, sse4) |
| 528 |
2/2✓ Branch 1 taken 126 times.
✓ Branch 2 taken 42 times.
|
336 | mc_rep_funcs(qpel_hv,10, 8, 24, sse4) |
| 529 |
2/2✓ Branch 1 taken 100 times.
✓ Branch 2 taken 50 times.
|
300 | mc_rep_funcs(qpel_hv,10, 8, 16, sse4) |
| 530 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_hv,10, 4, 12, sse4) |
| 531 |
2/2✓ Branch 1 taken 312 times.
✓ Branch 2 taken 39 times.
|
702 | mc_rep_funcs(qpel_hv,12, 8, 64, sse4) |
| 532 |
2/2✓ Branch 1 taken 234 times.
✓ Branch 2 taken 39 times.
|
546 | mc_rep_funcs(qpel_hv,12, 8, 48, sse4) |
| 533 |
2/2✓ Branch 1 taken 156 times.
✓ Branch 2 taken 39 times.
|
390 | mc_rep_funcs(qpel_hv,12, 8, 32, sse4) |
| 534 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_hv,12, 8, 24, sse4) |
| 535 |
2/2✓ Branch 1 taken 78 times.
✓ Branch 2 taken 39 times.
|
234 | mc_rep_funcs(qpel_hv,12, 8, 16, sse4) |
| 536 |
2/2✓ Branch 1 taken 117 times.
✓ Branch 2 taken 39 times.
|
312 | mc_rep_funcs(qpel_hv,12, 4, 12, sse4) |
| 537 | |||
| 538 | #define mc_rep_uni_w(bitd, step, W, opt) \ | ||
| 539 | void ff_hevc_put_uni_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const int16_t *_src, \ | ||
| 540 | int height, int denom, int _wx, int _ox) \ | ||
| 541 | { \ | ||
| 542 | int i; \ | ||
| 543 | uint8_t *dst; \ | ||
| 544 | for (i = 0; i < W; i += step) { \ | ||
| 545 | const int16_t *src = _src + i; \ | ||
| 546 | dst= _dst + (i * ((bitd + 7) / 8)); \ | ||
| 547 | ff_hevc_put_uni_w##step##_##bitd##_##opt(dst, dststride, src, \ | ||
| 548 | height, denom, _wx, _ox); \ | ||
| 549 | } \ | ||
| 550 | } | ||
| 551 | |||
| 552 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
|
378 | mc_rep_uni_w(8, 6, 12, sse4) |
| 553 |
2/2✓ Branch 1 taken 21144 times.
✓ Branch 2 taken 10572 times.
|
31716 | mc_rep_uni_w(8, 8, 16, sse4) |
| 554 |
2/2✓ Branch 1 taken 378 times.
✓ Branch 2 taken 126 times.
|
504 | mc_rep_uni_w(8, 8, 24, sse4) |
| 555 |
2/2✓ Branch 1 taken 54668 times.
✓ Branch 2 taken 13667 times.
|
68335 | mc_rep_uni_w(8, 8, 32, sse4) |
| 556 |
2/2✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
|
882 | mc_rep_uni_w(8, 8, 48, sse4) |
| 557 |
2/2✓ Branch 1 taken 43016 times.
✓ Branch 2 taken 5377 times.
|
48393 | mc_rep_uni_w(8, 8, 64, sse4) |
| 558 | |||
| 559 |
2/2✓ Branch 1 taken 284 times.
✓ Branch 2 taken 142 times.
|
426 | mc_rep_uni_w(10, 6, 12, sse4) |
| 560 |
2/2✓ Branch 1 taken 3220 times.
✓ Branch 2 taken 1610 times.
|
4830 | mc_rep_uni_w(10, 8, 16, sse4) |
| 561 |
2/2✓ Branch 1 taken 402 times.
✓ Branch 2 taken 134 times.
|
536 | mc_rep_uni_w(10, 8, 24, sse4) |
| 562 |
2/2✓ Branch 1 taken 4728 times.
✓ Branch 2 taken 1182 times.
|
5910 | mc_rep_uni_w(10, 8, 32, sse4) |
| 563 |
2/2✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
|
882 | mc_rep_uni_w(10, 8, 48, sse4) |
| 564 |
2/2✓ Branch 1 taken 2832 times.
✓ Branch 2 taken 354 times.
|
3186 | mc_rep_uni_w(10, 8, 64, sse4) |
| 565 | |||
| 566 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
|
378 | mc_rep_uni_w(12, 6, 12, sse4) |
| 567 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
|
378 | mc_rep_uni_w(12, 8, 16, sse4) |
| 568 |
2/2✓ Branch 1 taken 378 times.
✓ Branch 2 taken 126 times.
|
504 | mc_rep_uni_w(12, 8, 24, sse4) |
| 569 |
2/2✓ Branch 1 taken 504 times.
✓ Branch 2 taken 126 times.
|
630 | mc_rep_uni_w(12, 8, 32, sse4) |
| 570 |
2/2✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
|
882 | mc_rep_uni_w(12, 8, 48, sse4) |
| 571 |
2/2✓ Branch 1 taken 1008 times.
✓ Branch 2 taken 126 times.
|
1134 | mc_rep_uni_w(12, 8, 64, sse4) |
| 572 | |||
| 573 | #define mc_rep_bi_w(bitd, step, W, opt) \ | ||
| 574 | void ff_hevc_put_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const int16_t *_src, \ | ||
| 575 | const int16_t *_src2, int height, \ | ||
| 576 | int denom, int _wx0, int _wx1, int _ox0, int _ox1) \ | ||
| 577 | { \ | ||
| 578 | int i; \ | ||
| 579 | uint8_t *dst; \ | ||
| 580 | for (i = 0; i < W; i += step) { \ | ||
| 581 | const int16_t *src = _src + i; \ | ||
| 582 | const int16_t *src2 = _src2 + i; \ | ||
| 583 | dst = _dst + (i * ((bitd + 7) / 8)); \ | ||
| 584 | ff_hevc_put_bi_w##step##_##bitd##_##opt(dst, dststride, src, src2, \ | ||
| 585 | height, denom, _wx0, _wx1, _ox0, _ox1); \ | ||
| 586 | } \ | ||
| 587 | } | ||
| 588 | |||
| 589 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
|
378 | mc_rep_bi_w(8, 6, 12, sse4) |
| 590 |
2/2✓ Branch 1 taken 4896 times.
✓ Branch 2 taken 2448 times.
|
7344 | mc_rep_bi_w(8, 8, 16, sse4) |
| 591 |
2/2✓ Branch 1 taken 378 times.
✓ Branch 2 taken 126 times.
|
504 | mc_rep_bi_w(8, 8, 24, sse4) |
| 592 |
2/2✓ Branch 1 taken 23552 times.
✓ Branch 2 taken 5888 times.
|
29440 | mc_rep_bi_w(8, 8, 32, sse4) |
| 593 |
2/2✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
|
882 | mc_rep_bi_w(8, 8, 48, sse4) |
| 594 |
2/2✓ Branch 1 taken 21144 times.
✓ Branch 2 taken 2643 times.
|
23787 | mc_rep_bi_w(8, 8, 64, sse4) |
| 595 | |||
| 596 |
2/2✓ Branch 1 taken 268 times.
✓ Branch 2 taken 134 times.
|
402 | mc_rep_bi_w(10, 6, 12, sse4) |
| 597 |
2/2✓ Branch 1 taken 2916 times.
✓ Branch 2 taken 1458 times.
|
4374 | mc_rep_bi_w(10, 8, 16, sse4) |
| 598 |
2/2✓ Branch 1 taken 390 times.
✓ Branch 2 taken 130 times.
|
520 | mc_rep_bi_w(10, 8, 24, sse4) |
| 599 |
2/2✓ Branch 1 taken 4760 times.
✓ Branch 2 taken 1190 times.
|
5950 | mc_rep_bi_w(10, 8, 32, sse4) |
| 600 |
2/2✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
|
882 | mc_rep_bi_w(10, 8, 48, sse4) |
| 601 |
2/2✓ Branch 1 taken 2928 times.
✓ Branch 2 taken 366 times.
|
3294 | mc_rep_bi_w(10, 8, 64, sse4) |
| 602 | |||
| 603 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
|
378 | mc_rep_bi_w(12, 6, 12, sse4) |
| 604 |
2/2✓ Branch 1 taken 252 times.
✓ Branch 2 taken 126 times.
|
378 | mc_rep_bi_w(12, 8, 16, sse4) |
| 605 |
2/2✓ Branch 1 taken 378 times.
✓ Branch 2 taken 126 times.
|
504 | mc_rep_bi_w(12, 8, 24, sse4) |
| 606 |
2/2✓ Branch 1 taken 504 times.
✓ Branch 2 taken 126 times.
|
630 | mc_rep_bi_w(12, 8, 32, sse4) |
| 607 |
2/2✓ Branch 1 taken 756 times.
✓ Branch 2 taken 126 times.
|
882 | mc_rep_bi_w(12, 8, 48, sse4) |
| 608 |
2/2✓ Branch 1 taken 1008 times.
✓ Branch 2 taken 126 times.
|
1134 | mc_rep_bi_w(12, 8, 64, sse4) |
| 609 | |||
| 610 | #define mc_uni_w_func(name, bitd, W, opt) \ | ||
| 611 | static void hevc_put_uni_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \ | ||
| 612 | const uint8_t *_src, ptrdiff_t _srcstride, \ | ||
| 613 | int height, int denom, \ | ||
| 614 | int _wx, int _ox, \ | ||
| 615 | intptr_t mx, intptr_t my, int width) \ | ||
| 616 | { \ | ||
| 617 | LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \ | ||
| 618 | hevc_put_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \ | ||
| 619 | ff_hevc_put_uni_w##W##_##bitd##_##opt(_dst, _dststride, temp, height, denom, _wx, _ox); \ | ||
| 620 | } | ||
| 621 | |||
| 622 | #define mc_uni_w_funcs(name, bitd, opt) \ | ||
| 623 | mc_uni_w_func(name, bitd, 4, opt) \ | ||
| 624 | mc_uni_w_func(name, bitd, 8, opt) \ | ||
| 625 | mc_uni_w_func(name, bitd, 12, opt) \ | ||
| 626 | mc_uni_w_func(name, bitd, 16, opt) \ | ||
| 627 | mc_uni_w_func(name, bitd, 24, opt) \ | ||
| 628 | mc_uni_w_func(name, bitd, 32, opt) \ | ||
| 629 | mc_uni_w_func(name, bitd, 48, opt) \ | ||
| 630 | mc_uni_w_func(name, bitd, 64, opt) | ||
| 631 | |||
| 632 | 81664 | mc_uni_w_funcs(pel_pixels, 8, sse4) | |
| 633 | 18 | mc_uni_w_func(pel_pixels, 8, 6, sse4) | |
| 634 | 7784 | mc_uni_w_funcs(epel_h, 8, sse4) | |
| 635 | 18 | mc_uni_w_func(epel_h, 8, 6, sse4) | |
| 636 | 7424 | mc_uni_w_funcs(epel_v, 8, sse4) | |
| 637 | 18 | mc_uni_w_func(epel_v, 8, 6, sse4) | |
| 638 | 7664 | mc_uni_w_funcs(epel_hv, 8, sse4) | |
| 639 | 18 | mc_uni_w_func(epel_hv, 8, 6, sse4) | |
| 640 | 298 | mc_uni_w_funcs(qpel_h, 8, sse4) | |
| 641 | 308 | mc_uni_w_funcs(qpel_v, 8, sse4) | |
| 642 | 882 | mc_uni_w_funcs(qpel_hv, 8, sse4) | |
| 643 | |||
| 644 | 4980 | mc_uni_w_funcs(pel_pixels, 10, sse4) | |
| 645 | 18 | mc_uni_w_func(pel_pixels, 10, 6, sse4) | |
| 646 | 1352 | mc_uni_w_funcs(epel_h, 10, sse4) | |
| 647 | 18 | mc_uni_w_func(epel_h, 10, 6, sse4) | |
| 648 | 360 | mc_uni_w_funcs(epel_v, 10, sse4) | |
| 649 | 18 | mc_uni_w_func(epel_v, 10, 6, sse4) | |
| 650 | 536 | mc_uni_w_funcs(epel_hv, 10, sse4) | |
| 651 | 18 | mc_uni_w_func(epel_hv, 10, 6, sse4) | |
| 652 | 844 | mc_uni_w_funcs(qpel_h, 10, sse4) | |
| 653 | 332 | mc_uni_w_funcs(qpel_v, 10, sse4) | |
| 654 | 380 | mc_uni_w_funcs(qpel_hv, 10, sse4) | |
| 655 | |||
| 656 | 288 | mc_uni_w_funcs(pel_pixels, 12, sse4) | |
| 657 | 18 | mc_uni_w_func(pel_pixels, 12, 6, sse4) | |
| 658 | 288 | mc_uni_w_funcs(epel_h, 12, sse4) | |
| 659 | 18 | mc_uni_w_func(epel_h, 12, 6, sse4) | |
| 660 | 288 | mc_uni_w_funcs(epel_v, 12, sse4) | |
| 661 | 18 | mc_uni_w_func(epel_v, 12, 6, sse4) | |
| 662 | 288 | mc_uni_w_funcs(epel_hv, 12, sse4) | |
| 663 | 18 | mc_uni_w_func(epel_hv, 12, 6, sse4) | |
| 664 | 288 | mc_uni_w_funcs(qpel_h, 12, sse4) | |
| 665 | 288 | mc_uni_w_funcs(qpel_v, 12, sse4) | |
| 666 | 288 | mc_uni_w_funcs(qpel_hv, 12, sse4) | |
| 667 | |||
| 668 | #define mc_bi_w_func(name, bitd, W, opt) \ | ||
| 669 | static void hevc_put_bi_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \ | ||
| 670 | const uint8_t *_src, ptrdiff_t _srcstride, \ | ||
| 671 | const int16_t *_src2, \ | ||
| 672 | int height, int denom, \ | ||
| 673 | int _wx0, int _wx1, int _ox0, int _ox1, \ | ||
| 674 | intptr_t mx, intptr_t my, int width) \ | ||
| 675 | { \ | ||
| 676 | LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \ | ||
| 677 | hevc_put_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \ | ||
| 678 | ff_hevc_put_bi_w##W##_##bitd##_##opt(_dst, _dststride, temp, _src2, \ | ||
| 679 | height, denom, _wx0, _wx1, _ox0, _ox1); \ | ||
| 680 | } | ||
| 681 | |||
| 682 | #define mc_bi_w_funcs(name, bitd, opt) \ | ||
| 683 | mc_bi_w_func(name, bitd, 4, opt) \ | ||
| 684 | mc_bi_w_func(name, bitd, 8, opt) \ | ||
| 685 | mc_bi_w_func(name, bitd, 12, opt) \ | ||
| 686 | mc_bi_w_func(name, bitd, 16, opt) \ | ||
| 687 | mc_bi_w_func(name, bitd, 24, opt) \ | ||
| 688 | mc_bi_w_func(name, bitd, 32, opt) \ | ||
| 689 | mc_bi_w_func(name, bitd, 48, opt) \ | ||
| 690 | mc_bi_w_func(name, bitd, 64, opt) | ||
| 691 | |||
| 692 | 23926 | mc_bi_w_funcs(pel_pixels, 8, sse4) | |
| 693 | 18 | mc_bi_w_func(pel_pixels, 8, 6, sse4) | |
| 694 | 1240 | mc_bi_w_funcs(epel_h, 8, sse4) | |
| 695 | 18 | mc_bi_w_func(epel_h, 8, 6, sse4) | |
| 696 | 4860 | mc_bi_w_funcs(epel_v, 8, sse4) | |
| 697 | 18 | mc_bi_w_func(epel_v, 8, 6, sse4) | |
| 698 | 2416 | mc_bi_w_funcs(epel_hv, 8, sse4) | |
| 699 | 18 | mc_bi_w_func(epel_hv, 8, 6, sse4) | |
| 700 | 288 | mc_bi_w_funcs(qpel_h, 8, sse4) | |
| 701 | 288 | mc_bi_w_funcs(qpel_v, 8, sse4) | |
| 702 | 288 | mc_bi_w_funcs(qpel_hv, 8, sse4) | |
| 703 | |||
| 704 | 4336 | mc_bi_w_funcs(pel_pixels, 10, sse4) | |
| 705 | 18 | mc_bi_w_func(pel_pixels, 10, 6, sse4) | |
| 706 | 1392 | mc_bi_w_funcs(epel_h, 10, sse4) | |
| 707 | 18 | mc_bi_w_func(epel_h, 10, 6, sse4) | |
| 708 | 448 | mc_bi_w_funcs(epel_v, 10, sse4) | |
| 709 | 18 | mc_bi_w_func(epel_v, 10, 6, sse4) | |
| 710 | 312 | mc_bi_w_funcs(epel_hv, 10, sse4) | |
| 711 | 18 | mc_bi_w_func(epel_hv, 10, 6, sse4) | |
| 712 | 836 | mc_bi_w_funcs(qpel_h, 10, sse4) | |
| 713 | 368 | mc_bi_w_funcs(qpel_v, 10, sse4) | |
| 714 | 300 | mc_bi_w_funcs(qpel_hv, 10, sse4) | |
| 715 | |||
| 716 | 288 | mc_bi_w_funcs(pel_pixels, 12, sse4) | |
| 717 | 18 | mc_bi_w_func(pel_pixels, 12, 6, sse4) | |
| 718 | 288 | mc_bi_w_funcs(epel_h, 12, sse4) | |
| 719 | 18 | mc_bi_w_func(epel_h, 12, 6, sse4) | |
| 720 | 288 | mc_bi_w_funcs(epel_v, 12, sse4) | |
| 721 | 18 | mc_bi_w_func(epel_v, 12, 6, sse4) | |
| 722 | 288 | mc_bi_w_funcs(epel_hv, 12, sse4) | |
| 723 | 18 | mc_bi_w_func(epel_hv, 12, 6, sse4) | |
| 724 | 288 | mc_bi_w_funcs(qpel_h, 12, sse4) | |
| 725 | 288 | mc_bi_w_funcs(qpel_v, 12, sse4) | |
| 726 | 288 | mc_bi_w_funcs(qpel_hv, 12, sse4) | |
| 727 | #endif //ARCH_X86_64 && HAVE_SSE4_EXTERNAL | ||
| 728 | |||
| 729 | #define SAO_BAND_FILTER_FUNCS(bitd, opt) \ | ||
| 730 | void ff_hevc_sao_band_filter_8_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \ | ||
| 731 | const int16_t *sao_offset_val, int sao_left_class, int width, int height); \ | ||
| 732 | void ff_hevc_sao_band_filter_16_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \ | ||
| 733 | const int16_t *sao_offset_val, int sao_left_class, int width, int height); \ | ||
| 734 | void ff_hevc_sao_band_filter_32_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \ | ||
| 735 | const int16_t *sao_offset_val, int sao_left_class, int width, int height); \ | ||
| 736 | void ff_hevc_sao_band_filter_48_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \ | ||
| 737 | const int16_t *sao_offset_val, int sao_left_class, int width, int height); \ | ||
| 738 | void ff_hevc_sao_band_filter_64_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \ | ||
| 739 | const int16_t *sao_offset_val, int sao_left_class, int width, int height); | ||
| 740 | |||
| 741 | SAO_BAND_FILTER_FUNCS(8, sse2) | ||
| 742 | SAO_BAND_FILTER_FUNCS(10, sse2) | ||
| 743 | SAO_BAND_FILTER_FUNCS(12, sse2) | ||
| 744 | SAO_BAND_FILTER_FUNCS(8, avx) | ||
| 745 | SAO_BAND_FILTER_FUNCS(10, avx) | ||
| 746 | SAO_BAND_FILTER_FUNCS(12, avx) | ||
| 747 | SAO_BAND_FILTER_FUNCS(8, avx2) | ||
| 748 | SAO_BAND_FILTER_FUNCS(10, avx2) | ||
| 749 | SAO_BAND_FILTER_FUNCS(12, avx2) | ||
| 750 | |||
| 751 | #define SAO_BAND_INIT(bitd, opt) do { \ | ||
| 752 | c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_##bitd##_##opt; \ | ||
| 753 | c->sao_band_filter[1] = ff_hevc_sao_band_filter_16_##bitd##_##opt; \ | ||
| 754 | c->sao_band_filter[2] = ff_hevc_sao_band_filter_32_##bitd##_##opt; \ | ||
| 755 | c->sao_band_filter[3] = ff_hevc_sao_band_filter_48_##bitd##_##opt; \ | ||
| 756 | c->sao_band_filter[4] = ff_hevc_sao_band_filter_64_##bitd##_##opt; \ | ||
| 757 | } while (0) | ||
| 758 | |||
| 759 | #define SAO_EDGE_FILTER_FUNCS(bitd, opt) \ | ||
| 760 | void ff_hevc_sao_edge_filter_8_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \ | ||
| 761 | const int16_t *sao_offset_val, int eo, int width, int height); \ | ||
| 762 | void ff_hevc_sao_edge_filter_16_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \ | ||
| 763 | const int16_t *sao_offset_val, int eo, int width, int height); \ | ||
| 764 | void ff_hevc_sao_edge_filter_32_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \ | ||
| 765 | const int16_t *sao_offset_val, int eo, int width, int height); \ | ||
| 766 | void ff_hevc_sao_edge_filter_48_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \ | ||
| 767 | const int16_t *sao_offset_val, int eo, int width, int height); \ | ||
| 768 | void ff_hevc_sao_edge_filter_64_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \ | ||
| 769 | const int16_t *sao_offset_val, int eo, int width, int height); \ | ||
| 770 | |||
| 771 | SAO_EDGE_FILTER_FUNCS(8, ssse3) | ||
| 772 | SAO_EDGE_FILTER_FUNCS(8, avx2) | ||
| 773 | SAO_EDGE_FILTER_FUNCS(10, sse2) | ||
| 774 | SAO_EDGE_FILTER_FUNCS(10, avx2) | ||
| 775 | SAO_EDGE_FILTER_FUNCS(12, sse2) | ||
| 776 | SAO_EDGE_FILTER_FUNCS(12, avx2) | ||
| 777 | |||
| 778 | #define SAO_EDGE_INIT(bitd, opt) do { \ | ||
| 779 | c->sao_edge_filter[0] = ff_hevc_sao_edge_filter_8_##bitd##_##opt; \ | ||
| 780 | c->sao_edge_filter[1] = ff_hevc_sao_edge_filter_16_##bitd##_##opt; \ | ||
| 781 | c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_##bitd##_##opt; \ | ||
| 782 | c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_##bitd##_##opt; \ | ||
| 783 | c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_##bitd##_##opt; \ | ||
| 784 | } while (0) | ||
| 785 | |||
| 786 | #define PEL_LINK(dst, idx1, idx2, idx3, name, D, opt) \ | ||
| 787 | dst [idx1][idx2][idx3] = hevc_put_ ## name ## _ ## D ## _##opt; \ | ||
| 788 | dst ## _bi [idx1][idx2][idx3] = ff_hevc_put_bi_ ## name ## _ ## D ## _##opt; \ | ||
| 789 | dst ## _uni [idx1][idx2][idx3] = hevc_put_uni_ ## name ## _ ## D ## _##opt; \ | ||
| 790 | dst ## _uni_w[idx1][idx2][idx3] = hevc_put_uni_w_ ## name ## _ ## D ## _##opt; \ | ||
| 791 | dst ## _bi_w [idx1][idx2][idx3] = hevc_put_bi_w_ ## name ## _ ## D ## _##opt | ||
| 792 | |||
| 793 | #define EPEL_LINKS(pointer, my, mx, fname, bitd, opt ) \ | ||
| 794 | PEL_LINK(pointer, 1, my , mx , fname##4 , bitd, opt ); \ | ||
| 795 | PEL_LINK(pointer, 2, my , mx , fname##6 , bitd, opt ); \ | ||
| 796 | PEL_LINK(pointer, 3, my , mx , fname##8 , bitd, opt ); \ | ||
| 797 | PEL_LINK(pointer, 4, my , mx , fname##12, bitd, opt ); \ | ||
| 798 | PEL_LINK(pointer, 5, my , mx , fname##16, bitd, opt ); \ | ||
| 799 | PEL_LINK(pointer, 6, my , mx , fname##24, bitd, opt ); \ | ||
| 800 | PEL_LINK(pointer, 7, my , mx , fname##32, bitd, opt ); \ | ||
| 801 | PEL_LINK(pointer, 8, my , mx , fname##48, bitd, opt ); \ | ||
| 802 | PEL_LINK(pointer, 9, my , mx , fname##64, bitd, opt ) | ||
| 803 | #define QPEL_LINKS(pointer, my, mx, fname, bitd, opt) \ | ||
| 804 | PEL_LINK(pointer, 1, my , mx , fname##4 , bitd, opt ); \ | ||
| 805 | PEL_LINK(pointer, 3, my , mx , fname##8 , bitd, opt ); \ | ||
| 806 | PEL_LINK(pointer, 4, my , mx , fname##12, bitd, opt ); \ | ||
| 807 | PEL_LINK(pointer, 5, my , mx , fname##16, bitd, opt ); \ | ||
| 808 | PEL_LINK(pointer, 6, my , mx , fname##24, bitd, opt ); \ | ||
| 809 | PEL_LINK(pointer, 7, my , mx , fname##32, bitd, opt ); \ | ||
| 810 | PEL_LINK(pointer, 8, my , mx , fname##48, bitd, opt ); \ | ||
| 811 | PEL_LINK(pointer, 9, my , mx , fname##64, bitd, opt ) | ||
| 812 | |||
| 813 | 1509 | void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) | |
| 814 | { | ||
| 815 | 1509 | int cpu_flags = av_get_cpu_flags(); | |
| 816 | |||
| 817 |
2/2✓ Branch 0 taken 643 times.
✓ Branch 1 taken 866 times.
|
1509 | if (bit_depth == 8) { |
| 818 |
2/2✓ Branch 0 taken 226 times.
✓ Branch 1 taken 417 times.
|
643 | if (EXTERNAL_MMXEXT(cpu_flags)) { |
| 819 | 226 | c->add_residual[0] = ff_hevc_add_residual_4_8_mmxext; | |
| 820 | } | ||
| 821 |
2/2✓ Branch 0 taken 186 times.
✓ Branch 1 taken 457 times.
|
643 | if (EXTERNAL_SSE2(cpu_flags)) { |
| 822 | 186 | c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2; | |
| 823 | 186 | c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2; | |
| 824 | if (ARCH_X86_64) { | ||
| 825 | 186 | c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2; | |
| 826 | 186 | c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2; | |
| 827 | |||
| 828 | 186 | c->idct[2] = ff_hevc_idct_16x16_8_sse2; | |
| 829 | 186 | c->idct[3] = ff_hevc_idct_32x32_8_sse2; | |
| 830 | } | ||
| 831 | 186 | SAO_BAND_INIT(8, sse2); | |
| 832 | |||
| 833 | 186 | c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_sse2; | |
| 834 | 186 | c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_sse2; | |
| 835 | 186 | c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_sse2; | |
| 836 | 186 | c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_sse2; | |
| 837 | |||
| 838 | 186 | c->idct[0] = ff_hevc_idct_4x4_8_sse2; | |
| 839 | 186 | c->idct[1] = ff_hevc_idct_8x8_8_sse2; | |
| 840 | |||
| 841 | 186 | c->add_residual[1] = ff_hevc_add_residual_8_8_sse2; | |
| 842 | 186 | c->add_residual[2] = ff_hevc_add_residual_16_8_sse2; | |
| 843 | 186 | c->add_residual[3] = ff_hevc_add_residual_32_8_sse2; | |
| 844 | } | ||
| 845 |
2/2✓ Branch 0 taken 146 times.
✓ Branch 1 taken 497 times.
|
643 | if (EXTERNAL_SSSE3(cpu_flags)) { |
| 846 | if(ARCH_X86_64) { | ||
| 847 | 146 | c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3; | |
| 848 | 146 | c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3; | |
| 849 | } | ||
| 850 | 146 | SAO_EDGE_INIT(8, ssse3); | |
| 851 | } | ||
| 852 | #if HAVE_SSE4_EXTERNAL && ARCH_X86_64 | ||
| 853 |
2/2✓ Branch 0 taken 126 times.
✓ Branch 1 taken 517 times.
|
643 | if (EXTERNAL_SSE4(cpu_flags)) { |
| 854 | |||
| 855 | 126 | EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 8, sse4); | |
| 856 | 126 | EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 8, sse4); | |
| 857 | 126 | EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 8, sse4); | |
| 858 | 126 | EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 8, sse4); | |
| 859 | |||
| 860 | 126 | QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 8, sse4); | |
| 861 | 126 | QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 8, sse4); | |
| 862 | 126 | QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 8, sse4); | |
| 863 | 126 | QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 8, sse4); | |
| 864 | } | ||
| 865 | #endif | ||
| 866 |
2/2✓ Branch 0 taken 66 times.
✓ Branch 1 taken 577 times.
|
643 | if (EXTERNAL_AVX(cpu_flags)) { |
| 867 | 66 | c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_avx; | |
| 868 | 66 | c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_avx; | |
| 869 | if (ARCH_X86_64) { | ||
| 870 | 66 | c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx; | |
| 871 | 66 | c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx; | |
| 872 | |||
| 873 | 66 | c->idct[2] = ff_hevc_idct_16x16_8_avx; | |
| 874 | 66 | c->idct[3] = ff_hevc_idct_32x32_8_avx; | |
| 875 | } | ||
| 876 | 66 | SAO_BAND_INIT(8, avx); | |
| 877 | |||
| 878 | 66 | c->idct[0] = ff_hevc_idct_4x4_8_avx; | |
| 879 | 66 | c->idct[1] = ff_hevc_idct_8x8_8_avx; | |
| 880 | } | ||
| 881 |
2/2✓ Branch 0 taken 26 times.
✓ Branch 1 taken 617 times.
|
643 | if (EXTERNAL_AVX2(cpu_flags)) { |
| 882 | 26 | c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_8_avx2; | |
| 883 | 26 | c->sao_band_filter[1] = ff_hevc_sao_band_filter_16_8_avx2; | |
| 884 | } | ||
| 885 | #if HAVE_AVX2_EXTERNAL | ||
| 886 |
3/4✓ Branch 0 taken 26 times.
✓ Branch 1 taken 617 times.
✓ Branch 2 taken 26 times.
✗ Branch 3 not taken.
|
643 | if (EXTERNAL_AVX2_FAST(cpu_flags)) { |
| 887 | 26 | c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_avx2; | |
| 888 | 26 | c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_avx2; | |
| 889 | |||
| 890 | #if ARCH_X86_64 | ||
| 891 | 26 | c->put_hevc_epel[7][0][0] = hevc_put_pel_pixels32_8_avx2; | |
| 892 | 26 | c->put_hevc_epel[8][0][0] = hevc_put_pel_pixels48_8_avx2; | |
| 893 | 26 | c->put_hevc_epel[9][0][0] = hevc_put_pel_pixels64_8_avx2; | |
| 894 | |||
| 895 | 26 | c->put_hevc_qpel[7][0][0] = hevc_put_pel_pixels32_8_avx2; | |
| 896 | 26 | c->put_hevc_qpel[8][0][0] = hevc_put_pel_pixels48_8_avx2; | |
| 897 | 26 | c->put_hevc_qpel[9][0][0] = hevc_put_pel_pixels64_8_avx2; | |
| 898 | |||
| 899 | 26 | c->put_hevc_epel_uni[7][0][0] = hevc_put_uni_pel_pixels32_8_avx2; | |
| 900 | 26 | c->put_hevc_epel_uni[8][0][0] = hevc_put_uni_pel_pixels48_8_avx2; | |
| 901 | 26 | c->put_hevc_epel_uni[9][0][0] = hevc_put_uni_pel_pixels64_8_avx2; | |
| 902 | |||
| 903 | 26 | c->put_hevc_qpel_uni[7][0][0] = hevc_put_uni_pel_pixels32_8_avx2; | |
| 904 | 26 | c->put_hevc_qpel_uni[8][0][0] = hevc_put_uni_pel_pixels48_8_avx2; | |
| 905 | 26 | c->put_hevc_qpel_uni[9][0][0] = hevc_put_uni_pel_pixels64_8_avx2; | |
| 906 | |||
| 907 | 26 | c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_bi_pel_pixels32_8_avx2; | |
| 908 | 26 | c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_bi_pel_pixels48_8_avx2; | |
| 909 | 26 | c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_bi_pel_pixels64_8_avx2; | |
| 910 | |||
| 911 | 26 | c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_bi_pel_pixels32_8_avx2; | |
| 912 | 26 | c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_bi_pel_pixels48_8_avx2; | |
| 913 | 26 | c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_bi_pel_pixels64_8_avx2; | |
| 914 | |||
| 915 | 26 | c->put_hevc_epel[7][0][1] = hevc_put_epel_h32_8_avx2; | |
| 916 | 26 | c->put_hevc_epel[8][0][1] = hevc_put_epel_h48_8_avx2; | |
| 917 | 26 | c->put_hevc_epel[9][0][1] = hevc_put_epel_h64_8_avx2; | |
| 918 | |||
| 919 | 26 | c->put_hevc_epel_uni[7][0][1] = hevc_put_uni_epel_h32_8_avx2; | |
| 920 | 26 | c->put_hevc_epel_uni[8][0][1] = hevc_put_uni_epel_h48_8_avx2; | |
| 921 | 26 | c->put_hevc_epel_uni[9][0][1] = hevc_put_uni_epel_h64_8_avx2; | |
| 922 | |||
| 923 | 26 | c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_bi_epel_h32_8_avx2; | |
| 924 | 26 | c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_bi_epel_h48_8_avx2; | |
| 925 | 26 | c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_bi_epel_h64_8_avx2; | |
| 926 | |||
| 927 | 26 | c->put_hevc_epel[7][1][0] = hevc_put_epel_v32_8_avx2; | |
| 928 | 26 | c->put_hevc_epel[8][1][0] = hevc_put_epel_v48_8_avx2; | |
| 929 | 26 | c->put_hevc_epel[9][1][0] = hevc_put_epel_v64_8_avx2; | |
| 930 | |||
| 931 | 26 | c->put_hevc_epel_uni[7][1][0] = hevc_put_uni_epel_v32_8_avx2; | |
| 932 | 26 | c->put_hevc_epel_uni[8][1][0] = hevc_put_uni_epel_v48_8_avx2; | |
| 933 | 26 | c->put_hevc_epel_uni[9][1][0] = hevc_put_uni_epel_v64_8_avx2; | |
| 934 | |||
| 935 | 26 | c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_bi_epel_v32_8_avx2; | |
| 936 | 26 | c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_bi_epel_v48_8_avx2; | |
| 937 | 26 | c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_bi_epel_v64_8_avx2; | |
| 938 | |||
| 939 | 26 | c->put_hevc_epel[7][1][1] = hevc_put_epel_hv32_8_avx2; | |
| 940 | 26 | c->put_hevc_epel[8][1][1] = hevc_put_epel_hv48_8_avx2; | |
| 941 | 26 | c->put_hevc_epel[9][1][1] = hevc_put_epel_hv64_8_avx2; | |
| 942 | |||
| 943 | 26 | c->put_hevc_epel_uni[7][1][1] = hevc_put_uni_epel_hv32_8_avx2; | |
| 944 | 26 | c->put_hevc_epel_uni[8][1][1] = hevc_put_uni_epel_hv48_8_avx2; | |
| 945 | 26 | c->put_hevc_epel_uni[9][1][1] = hevc_put_uni_epel_hv64_8_avx2; | |
| 946 | |||
| 947 | 26 | c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_bi_epel_hv32_8_avx2; | |
| 948 | 26 | c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_bi_epel_hv48_8_avx2; | |
| 949 | 26 | c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_bi_epel_hv64_8_avx2; | |
| 950 | |||
| 951 | 26 | c->put_hevc_qpel[7][0][1] = hevc_put_qpel_h32_8_avx2; | |
| 952 | 26 | c->put_hevc_qpel[8][0][1] = hevc_put_qpel_h48_8_avx2; | |
| 953 | 26 | c->put_hevc_qpel[9][0][1] = hevc_put_qpel_h64_8_avx2; | |
| 954 | |||
| 955 | 26 | c->put_hevc_qpel[7][1][0] = hevc_put_qpel_v32_8_avx2; | |
| 956 | 26 | c->put_hevc_qpel[8][1][0] = hevc_put_qpel_v48_8_avx2; | |
| 957 | 26 | c->put_hevc_qpel[9][1][0] = hevc_put_qpel_v64_8_avx2; | |
| 958 | |||
| 959 | 26 | c->put_hevc_qpel_uni[7][0][1] = hevc_put_uni_qpel_h32_8_avx2; | |
| 960 | 26 | c->put_hevc_qpel_uni[8][0][1] = hevc_put_uni_qpel_h48_8_avx2; | |
| 961 | 26 | c->put_hevc_qpel_uni[9][0][1] = hevc_put_uni_qpel_h64_8_avx2; | |
| 962 | |||
| 963 | 26 | c->put_hevc_qpel_uni[7][1][0] = hevc_put_uni_qpel_v32_8_avx2; | |
| 964 | 26 | c->put_hevc_qpel_uni[8][1][0] = hevc_put_uni_qpel_v48_8_avx2; | |
| 965 | 26 | c->put_hevc_qpel_uni[9][1][0] = hevc_put_uni_qpel_v64_8_avx2; | |
| 966 | |||
| 967 | 26 | c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_bi_qpel_h32_8_avx2; | |
| 968 | 26 | c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_bi_qpel_h48_8_avx2; | |
| 969 | 26 | c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_bi_qpel_h64_8_avx2; | |
| 970 | |||
| 971 | 26 | c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_bi_qpel_v32_8_avx2; | |
| 972 | 26 | c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_bi_qpel_v48_8_avx2; | |
| 973 | 26 | c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_bi_qpel_v64_8_avx2; | |
| 974 | #endif /* ARCH_X86_64 */ | ||
| 975 | |||
| 976 | 26 | SAO_BAND_INIT(8, avx2); | |
| 977 | |||
| 978 | 26 | c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_8_avx2; | |
| 979 | 26 | c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_8_avx2; | |
| 980 | 26 | c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_8_avx2; | |
| 981 | |||
| 982 | 26 | c->add_residual[3] = ff_hevc_add_residual_32_8_avx2; | |
| 983 | } | ||
| 984 | #endif /* HAVE_AVX2_EXTERNAL */ | ||
| 985 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 643 times.
|
643 | if (EXTERNAL_AVX512ICL(cpu_flags) && ARCH_X86_64) { |
| 986 | ✗ | c->put_hevc_qpel[1][0][1] = ff_hevc_put_qpel_h4_8_avx512icl; | |
| 987 | ✗ | c->put_hevc_qpel[3][0][1] = ff_hevc_put_qpel_h8_8_avx512icl; | |
| 988 | ✗ | c->put_hevc_qpel[5][0][1] = ff_hevc_put_qpel_h16_8_avx512icl; | |
| 989 | ✗ | c->put_hevc_qpel[7][0][1] = ff_hevc_put_qpel_h32_8_avx512icl; | |
| 990 | ✗ | c->put_hevc_qpel[9][0][1] = ff_hevc_put_qpel_h64_8_avx512icl; | |
| 991 | ✗ | c->put_hevc_qpel[3][1][1] = ff_hevc_put_qpel_hv8_8_avx512icl; | |
| 992 | } | ||
| 993 |
2/2✓ Branch 0 taken 310 times.
✓ Branch 1 taken 556 times.
|
866 | } else if (bit_depth == 10) { |
| 994 |
2/2✓ Branch 0 taken 232 times.
✓ Branch 1 taken 78 times.
|
310 | if (EXTERNAL_MMXEXT(cpu_flags)) { |
| 995 | 232 | c->add_residual[0] = ff_hevc_add_residual_4_10_mmxext; | |
| 996 | } | ||
| 997 |
2/2✓ Branch 0 taken 192 times.
✓ Branch 1 taken 118 times.
|
310 | if (EXTERNAL_SSE2(cpu_flags)) { |
| 998 | 192 | c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2; | |
| 999 | 192 | c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2; | |
| 1000 | if (ARCH_X86_64) { | ||
| 1001 | 192 | c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_sse2; | |
| 1002 | 192 | c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2; | |
| 1003 | |||
| 1004 | 192 | c->idct[2] = ff_hevc_idct_16x16_10_sse2; | |
| 1005 | 192 | c->idct[3] = ff_hevc_idct_32x32_10_sse2; | |
| 1006 | } | ||
| 1007 | 192 | SAO_BAND_INIT(10, sse2); | |
| 1008 | 192 | SAO_EDGE_INIT(10, sse2); | |
| 1009 | |||
| 1010 | 192 | c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_sse2; | |
| 1011 | 192 | c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_sse2; | |
| 1012 | 192 | c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_sse2; | |
| 1013 | 192 | c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_sse2; | |
| 1014 | |||
| 1015 | 192 | c->idct[0] = ff_hevc_idct_4x4_10_sse2; | |
| 1016 | 192 | c->idct[1] = ff_hevc_idct_8x8_10_sse2; | |
| 1017 | |||
| 1018 | 192 | c->add_residual[1] = ff_hevc_add_residual_8_10_sse2; | |
| 1019 | 192 | c->add_residual[2] = ff_hevc_add_residual_16_10_sse2; | |
| 1020 | 192 | c->add_residual[3] = ff_hevc_add_residual_32_10_sse2; | |
| 1021 | } | ||
| 1022 |
2/2✓ Branch 0 taken 152 times.
✓ Branch 1 taken 158 times.
|
310 | if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) { |
| 1023 | 152 | c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3; | |
| 1024 | 152 | c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3; | |
| 1025 | } | ||
| 1026 | #if HAVE_SSE4_EXTERNAL && ARCH_X86_64 | ||
| 1027 |
2/2✓ Branch 0 taken 132 times.
✓ Branch 1 taken 178 times.
|
310 | if (EXTERNAL_SSE4(cpu_flags)) { |
| 1028 | 132 | EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4); | |
| 1029 | 132 | EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 10, sse4); | |
| 1030 | 132 | EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 10, sse4); | |
| 1031 | 132 | EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 10, sse4); | |
| 1032 | |||
| 1033 | 132 | QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 10, sse4); | |
| 1034 | 132 | QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 10, sse4); | |
| 1035 | 132 | QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 10, sse4); | |
| 1036 | 132 | QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 10, sse4); | |
| 1037 | } | ||
| 1038 | #endif | ||
| 1039 |
2/2✓ Branch 0 taken 72 times.
✓ Branch 1 taken 238 times.
|
310 | if (EXTERNAL_AVX(cpu_flags)) { |
| 1040 | 72 | c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_avx; | |
| 1041 | 72 | c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_avx; | |
| 1042 | if (ARCH_X86_64) { | ||
| 1043 | 72 | c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_avx; | |
| 1044 | 72 | c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_avx; | |
| 1045 | |||
| 1046 | 72 | c->idct[2] = ff_hevc_idct_16x16_10_avx; | |
| 1047 | 72 | c->idct[3] = ff_hevc_idct_32x32_10_avx; | |
| 1048 | } | ||
| 1049 | |||
| 1050 | 72 | c->idct[0] = ff_hevc_idct_4x4_10_avx; | |
| 1051 | 72 | c->idct[1] = ff_hevc_idct_8x8_10_avx; | |
| 1052 | |||
| 1053 | 72 | SAO_BAND_INIT(10, avx); | |
| 1054 | } | ||
| 1055 |
2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 278 times.
|
310 | if (EXTERNAL_AVX2(cpu_flags)) { |
| 1056 | 32 | c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_10_avx2; | |
| 1057 | } | ||
| 1058 | #if HAVE_AVX2_EXTERNAL | ||
| 1059 |
3/4✓ Branch 0 taken 32 times.
✓ Branch 1 taken 278 times.
✓ Branch 2 taken 32 times.
✗ Branch 3 not taken.
|
310 | if (EXTERNAL_AVX2_FAST(cpu_flags)) { |
| 1060 | 32 | c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_avx2; | |
| 1061 | 32 | c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_avx2; | |
| 1062 | |||
| 1063 | #if ARCH_X86_64 | ||
| 1064 | 32 | c->put_hevc_epel[5][0][0] = hevc_put_pel_pixels16_10_avx2; | |
| 1065 | 32 | c->put_hevc_epel[6][0][0] = hevc_put_pel_pixels24_10_avx2; | |
| 1066 | 32 | c->put_hevc_epel[7][0][0] = hevc_put_pel_pixels32_10_avx2; | |
| 1067 | 32 | c->put_hevc_epel[8][0][0] = hevc_put_pel_pixels48_10_avx2; | |
| 1068 | 32 | c->put_hevc_epel[9][0][0] = hevc_put_pel_pixels64_10_avx2; | |
| 1069 | |||
| 1070 | 32 | c->put_hevc_qpel[5][0][0] = hevc_put_pel_pixels16_10_avx2; | |
| 1071 | 32 | c->put_hevc_qpel[6][0][0] = hevc_put_pel_pixels24_10_avx2; | |
| 1072 | 32 | c->put_hevc_qpel[7][0][0] = hevc_put_pel_pixels32_10_avx2; | |
| 1073 | 32 | c->put_hevc_qpel[8][0][0] = hevc_put_pel_pixels48_10_avx2; | |
| 1074 | 32 | c->put_hevc_qpel[9][0][0] = hevc_put_pel_pixels64_10_avx2; | |
| 1075 | |||
| 1076 | 32 | c->put_hevc_epel_uni[5][0][0] = hevc_put_uni_pel_pixels32_8_avx2; | |
| 1077 | 32 | c->put_hevc_epel_uni[6][0][0] = hevc_put_uni_pel_pixels48_8_avx2; | |
| 1078 | 32 | c->put_hevc_epel_uni[7][0][0] = hevc_put_uni_pel_pixels64_8_avx2; | |
| 1079 | 32 | c->put_hevc_epel_uni[8][0][0] = hevc_put_uni_pel_pixels96_8_avx2; | |
| 1080 | 32 | c->put_hevc_epel_uni[9][0][0] = hevc_put_uni_pel_pixels128_8_avx2; | |
| 1081 | |||
| 1082 | 32 | c->put_hevc_qpel_uni[5][0][0] = hevc_put_uni_pel_pixels32_8_avx2; | |
| 1083 | 32 | c->put_hevc_qpel_uni[6][0][0] = hevc_put_uni_pel_pixels48_8_avx2; | |
| 1084 | 32 | c->put_hevc_qpel_uni[7][0][0] = hevc_put_uni_pel_pixels64_8_avx2; | |
| 1085 | 32 | c->put_hevc_qpel_uni[8][0][0] = hevc_put_uni_pel_pixels96_8_avx2; | |
| 1086 | 32 | c->put_hevc_qpel_uni[9][0][0] = hevc_put_uni_pel_pixels128_8_avx2; | |
| 1087 | |||
| 1088 | 32 | c->put_hevc_epel_bi[5][0][0] = ff_hevc_put_bi_pel_pixels16_10_avx2; | |
| 1089 | 32 | c->put_hevc_epel_bi[6][0][0] = ff_hevc_put_bi_pel_pixels24_10_avx2; | |
| 1090 | 32 | c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_bi_pel_pixels32_10_avx2; | |
| 1091 | 32 | c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_bi_pel_pixels48_10_avx2; | |
| 1092 | 32 | c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_bi_pel_pixels64_10_avx2; | |
| 1093 | 32 | c->put_hevc_qpel_bi[5][0][0] = ff_hevc_put_bi_pel_pixels16_10_avx2; | |
| 1094 | 32 | c->put_hevc_qpel_bi[6][0][0] = ff_hevc_put_bi_pel_pixels24_10_avx2; | |
| 1095 | 32 | c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_bi_pel_pixels32_10_avx2; | |
| 1096 | 32 | c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_bi_pel_pixels48_10_avx2; | |
| 1097 | 32 | c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_bi_pel_pixels64_10_avx2; | |
| 1098 | |||
| 1099 | 32 | c->put_hevc_epel[5][0][1] = hevc_put_epel_h16_10_avx2; | |
| 1100 | 32 | c->put_hevc_epel[6][0][1] = hevc_put_epel_h24_10_avx2; | |
| 1101 | 32 | c->put_hevc_epel[7][0][1] = hevc_put_epel_h32_10_avx2; | |
| 1102 | 32 | c->put_hevc_epel[8][0][1] = hevc_put_epel_h48_10_avx2; | |
| 1103 | 32 | c->put_hevc_epel[9][0][1] = hevc_put_epel_h64_10_avx2; | |
| 1104 | |||
| 1105 | 32 | c->put_hevc_epel_uni[5][0][1] = hevc_put_uni_epel_h16_10_avx2; | |
| 1106 | 32 | c->put_hevc_epel_uni[6][0][1] = hevc_put_uni_epel_h24_10_avx2; | |
| 1107 | 32 | c->put_hevc_epel_uni[7][0][1] = hevc_put_uni_epel_h32_10_avx2; | |
| 1108 | 32 | c->put_hevc_epel_uni[8][0][1] = hevc_put_uni_epel_h48_10_avx2; | |
| 1109 | 32 | c->put_hevc_epel_uni[9][0][1] = hevc_put_uni_epel_h64_10_avx2; | |
| 1110 | |||
| 1111 | 32 | c->put_hevc_epel_bi[5][0][1] = ff_hevc_put_bi_epel_h16_10_avx2; | |
| 1112 | 32 | c->put_hevc_epel_bi[6][0][1] = ff_hevc_put_bi_epel_h24_10_avx2; | |
| 1113 | 32 | c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_bi_epel_h32_10_avx2; | |
| 1114 | 32 | c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_bi_epel_h48_10_avx2; | |
| 1115 | 32 | c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_bi_epel_h64_10_avx2; | |
| 1116 | |||
| 1117 | 32 | c->put_hevc_epel[5][1][0] = hevc_put_epel_v16_10_avx2; | |
| 1118 | 32 | c->put_hevc_epel[6][1][0] = hevc_put_epel_v24_10_avx2; | |
| 1119 | 32 | c->put_hevc_epel[7][1][0] = hevc_put_epel_v32_10_avx2; | |
| 1120 | 32 | c->put_hevc_epel[8][1][0] = hevc_put_epel_v48_10_avx2; | |
| 1121 | 32 | c->put_hevc_epel[9][1][0] = hevc_put_epel_v64_10_avx2; | |
| 1122 | |||
| 1123 | 32 | c->put_hevc_epel_uni[5][1][0] = hevc_put_uni_epel_v16_10_avx2; | |
| 1124 | 32 | c->put_hevc_epel_uni[6][1][0] = hevc_put_uni_epel_v24_10_avx2; | |
| 1125 | 32 | c->put_hevc_epel_uni[7][1][0] = hevc_put_uni_epel_v32_10_avx2; | |
| 1126 | 32 | c->put_hevc_epel_uni[8][1][0] = hevc_put_uni_epel_v48_10_avx2; | |
| 1127 | 32 | c->put_hevc_epel_uni[9][1][0] = hevc_put_uni_epel_v64_10_avx2; | |
| 1128 | |||
| 1129 | 32 | c->put_hevc_epel_bi[5][1][0] = ff_hevc_put_bi_epel_v16_10_avx2; | |
| 1130 | 32 | c->put_hevc_epel_bi[6][1][0] = ff_hevc_put_bi_epel_v24_10_avx2; | |
| 1131 | 32 | c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_bi_epel_v32_10_avx2; | |
| 1132 | 32 | c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_bi_epel_v48_10_avx2; | |
| 1133 | 32 | c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_bi_epel_v64_10_avx2; | |
| 1134 | |||
| 1135 | 32 | c->put_hevc_epel[5][1][1] = hevc_put_epel_hv16_10_avx2; | |
| 1136 | 32 | c->put_hevc_epel[6][1][1] = hevc_put_epel_hv24_10_avx2; | |
| 1137 | 32 | c->put_hevc_epel[7][1][1] = hevc_put_epel_hv32_10_avx2; | |
| 1138 | 32 | c->put_hevc_epel[8][1][1] = hevc_put_epel_hv48_10_avx2; | |
| 1139 | 32 | c->put_hevc_epel[9][1][1] = hevc_put_epel_hv64_10_avx2; | |
| 1140 | |||
| 1141 | 32 | c->put_hevc_epel_uni[5][1][1] = hevc_put_uni_epel_hv16_10_avx2; | |
| 1142 | 32 | c->put_hevc_epel_uni[6][1][1] = hevc_put_uni_epel_hv24_10_avx2; | |
| 1143 | 32 | c->put_hevc_epel_uni[7][1][1] = hevc_put_uni_epel_hv32_10_avx2; | |
| 1144 | 32 | c->put_hevc_epel_uni[8][1][1] = hevc_put_uni_epel_hv48_10_avx2; | |
| 1145 | 32 | c->put_hevc_epel_uni[9][1][1] = hevc_put_uni_epel_hv64_10_avx2; | |
| 1146 | |||
| 1147 | 32 | c->put_hevc_epel_bi[5][1][1] = ff_hevc_put_bi_epel_hv16_10_avx2; | |
| 1148 | 32 | c->put_hevc_epel_bi[6][1][1] = ff_hevc_put_bi_epel_hv24_10_avx2; | |
| 1149 | 32 | c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_bi_epel_hv32_10_avx2; | |
| 1150 | 32 | c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_bi_epel_hv48_10_avx2; | |
| 1151 | 32 | c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_bi_epel_hv64_10_avx2; | |
| 1152 | |||
| 1153 | 32 | c->put_hevc_qpel[5][0][1] = hevc_put_qpel_h16_10_avx2; | |
| 1154 | 32 | c->put_hevc_qpel[6][0][1] = hevc_put_qpel_h24_10_avx2; | |
| 1155 | 32 | c->put_hevc_qpel[7][0][1] = hevc_put_qpel_h32_10_avx2; | |
| 1156 | 32 | c->put_hevc_qpel[8][0][1] = hevc_put_qpel_h48_10_avx2; | |
| 1157 | 32 | c->put_hevc_qpel[9][0][1] = hevc_put_qpel_h64_10_avx2; | |
| 1158 | |||
| 1159 | 32 | c->put_hevc_qpel_uni[5][0][1] = hevc_put_uni_qpel_h16_10_avx2; | |
| 1160 | 32 | c->put_hevc_qpel_uni[6][0][1] = hevc_put_uni_qpel_h24_10_avx2; | |
| 1161 | 32 | c->put_hevc_qpel_uni[7][0][1] = hevc_put_uni_qpel_h32_10_avx2; | |
| 1162 | 32 | c->put_hevc_qpel_uni[8][0][1] = hevc_put_uni_qpel_h48_10_avx2; | |
| 1163 | 32 | c->put_hevc_qpel_uni[9][0][1] = hevc_put_uni_qpel_h64_10_avx2; | |
| 1164 | |||
| 1165 | 32 | c->put_hevc_qpel_bi[5][0][1] = ff_hevc_put_bi_qpel_h16_10_avx2; | |
| 1166 | 32 | c->put_hevc_qpel_bi[6][0][1] = ff_hevc_put_bi_qpel_h24_10_avx2; | |
| 1167 | 32 | c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_bi_qpel_h32_10_avx2; | |
| 1168 | 32 | c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_bi_qpel_h48_10_avx2; | |
| 1169 | 32 | c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_bi_qpel_h64_10_avx2; | |
| 1170 | |||
| 1171 | 32 | c->put_hevc_qpel[5][1][0] = hevc_put_qpel_v16_10_avx2; | |
| 1172 | 32 | c->put_hevc_qpel[6][1][0] = hevc_put_qpel_v24_10_avx2; | |
| 1173 | 32 | c->put_hevc_qpel[7][1][0] = hevc_put_qpel_v32_10_avx2; | |
| 1174 | 32 | c->put_hevc_qpel[8][1][0] = hevc_put_qpel_v48_10_avx2; | |
| 1175 | 32 | c->put_hevc_qpel[9][1][0] = hevc_put_qpel_v64_10_avx2; | |
| 1176 | |||
| 1177 | 32 | c->put_hevc_qpel_uni[5][1][0] = hevc_put_uni_qpel_v16_10_avx2; | |
| 1178 | 32 | c->put_hevc_qpel_uni[6][1][0] = hevc_put_uni_qpel_v24_10_avx2; | |
| 1179 | 32 | c->put_hevc_qpel_uni[7][1][0] = hevc_put_uni_qpel_v32_10_avx2; | |
| 1180 | 32 | c->put_hevc_qpel_uni[8][1][0] = hevc_put_uni_qpel_v48_10_avx2; | |
| 1181 | 32 | c->put_hevc_qpel_uni[9][1][0] = hevc_put_uni_qpel_v64_10_avx2; | |
| 1182 | |||
| 1183 | 32 | c->put_hevc_qpel_bi[5][1][0] = ff_hevc_put_bi_qpel_v16_10_avx2; | |
| 1184 | 32 | c->put_hevc_qpel_bi[6][1][0] = ff_hevc_put_bi_qpel_v24_10_avx2; | |
| 1185 | 32 | c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_bi_qpel_v32_10_avx2; | |
| 1186 | 32 | c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_bi_qpel_v48_10_avx2; | |
| 1187 | 32 | c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_bi_qpel_v64_10_avx2; | |
| 1188 | |||
| 1189 | 32 | c->put_hevc_qpel[5][1][1] = hevc_put_qpel_hv16_10_avx2; | |
| 1190 | 32 | c->put_hevc_qpel[6][1][1] = hevc_put_qpel_hv24_10_avx2; | |
| 1191 | 32 | c->put_hevc_qpel[7][1][1] = hevc_put_qpel_hv32_10_avx2; | |
| 1192 | 32 | c->put_hevc_qpel[8][1][1] = hevc_put_qpel_hv48_10_avx2; | |
| 1193 | 32 | c->put_hevc_qpel[9][1][1] = hevc_put_qpel_hv64_10_avx2; | |
| 1194 | |||
| 1195 | 32 | c->put_hevc_qpel_uni[5][1][1] = hevc_put_uni_qpel_hv16_10_avx2; | |
| 1196 | 32 | c->put_hevc_qpel_uni[6][1][1] = hevc_put_uni_qpel_hv24_10_avx2; | |
| 1197 | 32 | c->put_hevc_qpel_uni[7][1][1] = hevc_put_uni_qpel_hv32_10_avx2; | |
| 1198 | 32 | c->put_hevc_qpel_uni[8][1][1] = hevc_put_uni_qpel_hv48_10_avx2; | |
| 1199 | 32 | c->put_hevc_qpel_uni[9][1][1] = hevc_put_uni_qpel_hv64_10_avx2; | |
| 1200 | |||
| 1201 | 32 | c->put_hevc_qpel_bi[5][1][1] = ff_hevc_put_bi_qpel_hv16_10_avx2; | |
| 1202 | 32 | c->put_hevc_qpel_bi[6][1][1] = ff_hevc_put_bi_qpel_hv24_10_avx2; | |
| 1203 | 32 | c->put_hevc_qpel_bi[7][1][1] = ff_hevc_put_bi_qpel_hv32_10_avx2; | |
| 1204 | 32 | c->put_hevc_qpel_bi[8][1][1] = ff_hevc_put_bi_qpel_hv48_10_avx2; | |
| 1205 | 32 | c->put_hevc_qpel_bi[9][1][1] = ff_hevc_put_bi_qpel_hv64_10_avx2; | |
| 1206 | #endif /* ARCH_X86_64 */ | ||
| 1207 | |||
| 1208 | 32 | SAO_BAND_INIT(10, avx2); | |
| 1209 | 32 | SAO_EDGE_INIT(10, avx2); | |
| 1210 | |||
| 1211 | 32 | c->add_residual[2] = ff_hevc_add_residual_16_10_avx2; | |
| 1212 | 32 | c->add_residual[3] = ff_hevc_add_residual_32_10_avx2; | |
| 1213 | } | ||
| 1214 | #endif /* HAVE_AVX2_EXTERNAL */ | ||
| 1215 |
2/2✓ Branch 0 taken 270 times.
✓ Branch 1 taken 286 times.
|
556 | } else if (bit_depth == 12) { |
| 1216 |
2/2✓ Branch 0 taken 180 times.
✓ Branch 1 taken 90 times.
|
270 | if (EXTERNAL_SSE2(cpu_flags)) { |
| 1217 | 180 | c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_sse2; | |
| 1218 | 180 | c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_sse2; | |
| 1219 | if (ARCH_X86_64) { | ||
| 1220 | 180 | c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_sse2; | |
| 1221 | 180 | c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2; | |
| 1222 | } | ||
| 1223 | 180 | SAO_BAND_INIT(12, sse2); | |
| 1224 | 180 | SAO_EDGE_INIT(12, sse2); | |
| 1225 | |||
| 1226 | 180 | c->idct_dc[0] = ff_hevc_idct_4x4_dc_12_sse2; | |
| 1227 | 180 | c->idct_dc[1] = ff_hevc_idct_8x8_dc_12_sse2; | |
| 1228 | 180 | c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_sse2; | |
| 1229 | 180 | c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_sse2; | |
| 1230 | } | ||
| 1231 |
2/2✓ Branch 0 taken 140 times.
✓ Branch 1 taken 130 times.
|
270 | if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) { |
| 1232 | 140 | c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_ssse3; | |
| 1233 | 140 | c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_ssse3; | |
| 1234 | } | ||
| 1235 | #if HAVE_SSE4_EXTERNAL && ARCH_X86_64 | ||
| 1236 |
2/2✓ Branch 0 taken 120 times.
✓ Branch 1 taken 150 times.
|
270 | if (EXTERNAL_SSE4(cpu_flags)) { |
| 1237 | 120 | EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 12, sse4); | |
| 1238 | 120 | EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 12, sse4); | |
| 1239 | 120 | EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 12, sse4); | |
| 1240 | 120 | EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 12, sse4); | |
| 1241 | |||
| 1242 | 120 | QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 12, sse4); | |
| 1243 | 120 | QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 12, sse4); | |
| 1244 | 120 | QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 12, sse4); | |
| 1245 | 120 | QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 12, sse4); | |
| 1246 | } | ||
| 1247 | #endif | ||
| 1248 |
2/2✓ Branch 0 taken 60 times.
✓ Branch 1 taken 210 times.
|
270 | if (EXTERNAL_AVX(cpu_flags)) { |
| 1249 | 60 | c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_avx; | |
| 1250 | 60 | c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_avx; | |
| 1251 | if (ARCH_X86_64) { | ||
| 1252 | 60 | c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_avx; | |
| 1253 | 60 | c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_avx; | |
| 1254 | } | ||
| 1255 | 60 | SAO_BAND_INIT(12, avx); | |
| 1256 | } | ||
| 1257 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 250 times.
|
270 | if (EXTERNAL_AVX2(cpu_flags)) { |
| 1258 | 20 | c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_12_avx2; | |
| 1259 | } | ||
| 1260 |
3/4✓ Branch 0 taken 20 times.
✓ Branch 1 taken 250 times.
✓ Branch 2 taken 20 times.
✗ Branch 3 not taken.
|
270 | if (EXTERNAL_AVX2_FAST(cpu_flags)) { |
| 1261 | 20 | c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_avx2; | |
| 1262 | 20 | c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_avx2; | |
| 1263 | |||
| 1264 | 20 | SAO_BAND_INIT(12, avx2); | |
| 1265 | 20 | SAO_EDGE_INIT(12, avx2); | |
| 1266 | } | ||
| 1267 | } | ||
| 1268 | 1509 | } | |
| 1269 |