| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * VVC DSP init for x86 | ||
| 3 | * | ||
| 4 | * Copyright (C) 2022-2024 Nuo Mi | ||
| 5 | * Copyright (c) 2023-2024 Wu Jianhua | ||
| 6 | * | ||
| 7 | * This file is part of FFmpeg. | ||
| 8 | * | ||
| 9 | * FFmpeg is free software; you can redistribute it and/or | ||
| 10 | * modify it under the terms of the GNU Lesser General Public | ||
| 11 | * License as published by the Free Software Foundation; either | ||
| 12 | * version 2.1 of the License, or (at your option) any later version. | ||
| 13 | * | ||
| 14 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 17 | * Lesser General Public License for more details. | ||
| 18 | * | ||
| 19 | * You should have received a copy of the GNU Lesser General Public | ||
| 20 | * License along with FFmpeg; if not, write to the Free Software | ||
| 21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 22 | */ | ||
| 23 | |||
| 24 | #include "config.h" | ||
| 25 | |||
| 26 | #include "libavutil/attributes.h" | ||
| 27 | #include "libavutil/cpu.h" | ||
| 28 | #include "libavutil/x86/cpu.h" | ||
| 29 | #include "libavcodec/vvc/dec.h" | ||
| 30 | #include "libavcodec/vvc/ctu.h" | ||
| 31 | #include "libavcodec/vvc/dsp.h" | ||
| 32 | #include "libavcodec/x86/h26x/h2656dsp.h" | ||
| 33 | |||
| 34 | #if ARCH_X86_64 | ||
| 35 | |||
| 36 | #define bf(fn, bd, opt) fn##_##bd##_##opt | ||
| 37 | #define BF(fn, bpc, opt) fn##_##bpc##bpc_##opt | ||
| 38 | |||
| 39 | #define DMVR_PROTOTYPES(bd, opt) \ | ||
| 40 | void ff_vvc_dmvr_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \ | ||
| 41 | int height, intptr_t mx, intptr_t my, int width); \ | ||
| 42 | void ff_vvc_dmvr_h_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \ | ||
| 43 | int height, intptr_t mx, intptr_t my, int width); \ | ||
| 44 | void ff_vvc_dmvr_v_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \ | ||
| 45 | int height, intptr_t mx, intptr_t my, int width); \ | ||
| 46 | void ff_vvc_dmvr_hv_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \ | ||
| 47 | int height, intptr_t mx, intptr_t my, int width); \ | ||
| 48 | |||
| 49 | DMVR_PROTOTYPES( 8, avx2) | ||
| 50 | DMVR_PROTOTYPES(10, avx2) | ||
| 51 | DMVR_PROTOTYPES(12, avx2) | ||
| 52 | |||
| 53 | #define OF_INIT(BD, OPT) do { \ | ||
| 54 | void ff_vvc_apply_bdof_## BD ## _ ## OPT(uint8_t *dst, ptrdiff_t dst_stride, \ | ||
| 55 | const int16_t *src0, const int16_t *src1, \ | ||
| 56 | int w, int h); \ | ||
| 57 | c->inter.apply_bdof = ff_vvc_apply_bdof_## BD ##_## OPT; \ | ||
| 58 | } while (0) | ||
| 59 | |||
| 60 | #define ALF_BPC_PROTOTYPES(bpc, opt) \ | ||
| 61 | void BF(ff_vvc_alf_classify_grad, bpc, opt)(int *gradient_sum, \ | ||
| 62 | const uint8_t *src, ptrdiff_t src_stride, intptr_t width, intptr_t height, intptr_t vb_pos); \ | ||
| 63 | void BF(ff_vvc_alf_classify, bpc, opt)(int *class_idx, int *transpose_idx, const int *gradient_sum, \ | ||
| 64 | intptr_t width, intptr_t height, intptr_t vb_pos, intptr_t bit_depth); \ | ||
| 65 | |||
| 66 | ALF_BPC_PROTOTYPES(8, avx2) | ||
| 67 | ALF_BPC_PROTOTYPES(16, avx2) | ||
| 68 | |||
| 69 | #if ARCH_X86_64 | ||
| 70 | #define FW_PUT(name, depth, opt) \ | ||
| 71 | static void vvc_put_ ## name ## _ ## depth ## _##opt(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, \ | ||
| 72 | int height, const int8_t *hf, const int8_t *vf, int width) \ | ||
| 73 | { \ | ||
| 74 | ff_h2656_put_## name ## _ ## depth ## _##opt(dst, 2 * MAX_PB_SIZE, src, srcstride, height, hf, vf, width); \ | ||
| 75 | } | ||
| 76 | |||
| 77 | #if HAVE_SSE4_EXTERNAL | ||
| 78 | #define FW_PUT_TAP(fname, bitd, opt ) \ | ||
| 79 | FW_PUT(fname##4, bitd, opt ) \ | ||
| 80 | FW_PUT(fname##8, bitd, opt ) \ | ||
| 81 | FW_PUT(fname##16, bitd, opt ) \ | ||
| 82 | FW_PUT(fname##32, bitd, opt ) \ | ||
| 83 | FW_PUT(fname##64, bitd, opt ) \ | ||
| 84 | FW_PUT(fname##128, bitd, opt ) \ | ||
| 85 | |||
| 86 | #define FW_PUT_4TAP(fname, bitd, opt) \ | ||
| 87 | FW_PUT(fname ## 2, bitd, opt) \ | ||
| 88 | FW_PUT_TAP(fname, bitd, opt) | ||
| 89 | |||
| 90 | #define FW_PUT_4TAP_SSE4(bitd) \ | ||
| 91 | FW_PUT_4TAP(pixels, bitd, sse4) \ | ||
| 92 | FW_PUT_4TAP(4tap_h, bitd, sse4) \ | ||
| 93 | FW_PUT_4TAP(4tap_v, bitd, sse4) \ | ||
| 94 | FW_PUT_4TAP(4tap_hv, bitd, sse4) | ||
| 95 | |||
| 96 | #define FW_PUT_8TAP_SSE4(bitd) \ | ||
| 97 | FW_PUT_TAP(8tap_h, bitd, sse4) \ | ||
| 98 | FW_PUT_TAP(8tap_v, bitd, sse4) \ | ||
| 99 | FW_PUT_TAP(8tap_hv, bitd, sse4) | ||
| 100 | |||
| 101 | #define FW_PUT_SSE4(bitd) \ | ||
| 102 | FW_PUT_4TAP_SSE4(bitd) \ | ||
| 103 | FW_PUT_8TAP_SSE4(bitd) | ||
| 104 | |||
| 105 | 7564 | FW_PUT_SSE4( 8) | |
| 106 | 1096 | FW_PUT_SSE4(10) | |
| 107 | 1096 | FW_PUT_SSE4(12) | |
| 108 | #endif | ||
| 109 | |||
| 110 | #if HAVE_AVX2_EXTERNAL | ||
| 111 | #define FW_PUT_TAP_AVX2(n, bitd) \ | ||
| 112 | FW_PUT(n ## tap_h32, bitd, avx2) \ | ||
| 113 | FW_PUT(n ## tap_h64, bitd, avx2) \ | ||
| 114 | FW_PUT(n ## tap_h128, bitd, avx2) \ | ||
| 115 | FW_PUT(n ## tap_v32, bitd, avx2) \ | ||
| 116 | FW_PUT(n ## tap_v64, bitd, avx2) \ | ||
| 117 | FW_PUT(n ## tap_v128, bitd, avx2) | ||
| 118 | |||
| 119 | #define FW_PUT_AVX2(bitd) \ | ||
| 120 | FW_PUT(pixels32, bitd, avx2) \ | ||
| 121 | FW_PUT(pixels64, bitd, avx2) \ | ||
| 122 | FW_PUT(pixels128, bitd, avx2) \ | ||
| 123 | FW_PUT_TAP_AVX2(4, bitd) \ | ||
| 124 | FW_PUT_TAP_AVX2(8, bitd) \ | ||
| 125 | |||
| 126 | 1804 | FW_PUT_AVX2( 8) | |
| 127 | 234 | FW_PUT_AVX2(10) | |
| 128 | 234 | FW_PUT_AVX2(12) | |
| 129 | |||
| 130 | #define FW_PUT_TAP_16BPC_AVX2(n, bitd) \ | ||
| 131 | FW_PUT(n ## tap_h16, bitd, avx2) \ | ||
| 132 | FW_PUT(n ## tap_v16, bitd, avx2) \ | ||
| 133 | FW_PUT(n ## tap_hv16, bitd, avx2) \ | ||
| 134 | FW_PUT(n ## tap_hv32, bitd, avx2) \ | ||
| 135 | FW_PUT(n ## tap_hv64, bitd, avx2) \ | ||
| 136 | FW_PUT(n ## tap_hv128, bitd, avx2) | ||
| 137 | |||
| 138 | #define FW_PUT_16BPC_AVX2(bitd) \ | ||
| 139 | FW_PUT(pixels16, bitd, avx2) \ | ||
| 140 | FW_PUT_TAP_16BPC_AVX2(4, bitd) \ | ||
| 141 | FW_PUT_TAP_16BPC_AVX2(8, bitd) | ||
| 142 | |||
| 143 | 182 | FW_PUT_16BPC_AVX2(10) | |
| 144 | 182 | FW_PUT_16BPC_AVX2(12) | |
| 145 | |||
| 146 | #define ALF_FUNCS(bpc, bd, opt) \ | ||
| 147 | static void bf(vvc_alf_classify, bd, opt)(int *class_idx, int *transpose_idx, \ | ||
| 148 | const uint8_t *src, ptrdiff_t src_stride, int width, int height, int vb_pos, int *gradient_tmp) \ | ||
| 149 | { \ | ||
| 150 | BF(ff_vvc_alf_classify_grad, bpc, opt)(gradient_tmp, src, src_stride, width, height, vb_pos); \ | ||
| 151 | BF(ff_vvc_alf_classify, bpc, opt)(class_idx, transpose_idx, gradient_tmp, width, height, vb_pos, bd); \ | ||
| 152 | } \ | ||
| 153 | |||
| 154 | 256 | ALF_FUNCS(8, 8, avx2) | |
| 155 | 256 | ALF_FUNCS(16, 10, avx2) | |
| 156 | 256 | ALF_FUNCS(16, 12, avx2) | |
| 157 | |||
| 158 | #endif | ||
| 159 | |||
| 160 | #define SAO_FILTER_FUNC(wd, bitd, opt) \ | ||
| 161 | void ff_vvc_sao_band_filter_##wd##_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \ | ||
| 162 | const int16_t *sao_offset_val, int sao_left_class, int width, int height); \ | ||
| 163 | void ff_vvc_sao_edge_filter_##wd##_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \ | ||
| 164 | const int16_t *sao_offset_val, int eo, int width, int height); \ | ||
| 165 | |||
| 166 | #define SAO_FILTER_FUNCS(bitd, opt) \ | ||
| 167 | SAO_FILTER_FUNC(8, bitd, opt) \ | ||
| 168 | SAO_FILTER_FUNC(16, bitd, opt) \ | ||
| 169 | SAO_FILTER_FUNC(32, bitd, opt) \ | ||
| 170 | SAO_FILTER_FUNC(48, bitd, opt) \ | ||
| 171 | SAO_FILTER_FUNC(64, bitd, opt) \ | ||
| 172 | SAO_FILTER_FUNC(80, bitd, opt) \ | ||
| 173 | SAO_FILTER_FUNC(96, bitd, opt) \ | ||
| 174 | SAO_FILTER_FUNC(112, bitd, opt) \ | ||
| 175 | SAO_FILTER_FUNC(128, bitd, opt) \ | ||
| 176 | |||
| 177 | SAO_FILTER_FUNCS(8, avx2) | ||
| 178 | SAO_FILTER_FUNCS(10, avx2) | ||
| 179 | SAO_FILTER_FUNCS(12, avx2) | ||
| 180 | |||
| 181 | #define SAO_FILTER_INIT(type, bitd, opt) do { \ | ||
| 182 | c->sao.type##_filter[0] = ff_vvc_sao_##type##_filter_8_##bitd##_##opt; \ | ||
| 183 | c->sao.type##_filter[1] = ff_vvc_sao_##type##_filter_16_##bitd##_##opt; \ | ||
| 184 | c->sao.type##_filter[2] = ff_vvc_sao_##type##_filter_32_##bitd##_##opt; \ | ||
| 185 | c->sao.type##_filter[3] = ff_vvc_sao_##type##_filter_48_##bitd##_##opt; \ | ||
| 186 | c->sao.type##_filter[4] = ff_vvc_sao_##type##_filter_64_##bitd##_##opt; \ | ||
| 187 | c->sao.type##_filter[5] = ff_vvc_sao_##type##_filter_80_##bitd##_##opt; \ | ||
| 188 | c->sao.type##_filter[6] = ff_vvc_sao_##type##_filter_96_##bitd##_##opt; \ | ||
| 189 | c->sao.type##_filter[7] = ff_vvc_sao_##type##_filter_112_##bitd##_##opt; \ | ||
| 190 | c->sao.type##_filter[8] = ff_vvc_sao_##type##_filter_128_##bitd##_##opt; \ | ||
| 191 | } while (0) | ||
| 192 | |||
| 193 | #define SAO_INIT(bitd, opt) do { \ | ||
| 194 | SAO_FILTER_INIT(band, bitd, opt); \ | ||
| 195 | SAO_FILTER_INIT(edge, bitd, opt); \ | ||
| 196 | } while (0) | ||
| 197 | |||
| 198 | #define AVG_INIT(bd, opt) do { \ | ||
| 199 | void bf(ff_vvc_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \ | ||
| 200 | const int16_t *src0, const int16_t *src1, int width, int height);\ | ||
| 201 | void bf(ff_vvc_w_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \ | ||
| 202 | const int16_t *src0, const int16_t *src1, int width, int height, \ | ||
| 203 | int denom, int w0, int w1, int o); \ | ||
| 204 | c->inter.avg = bf(ff_vvc_avg, bd, opt); \ | ||
| 205 | c->inter.w_avg = bf(ff_vvc_w_avg, bd, opt); \ | ||
| 206 | } while (0) | ||
| 207 | |||
| 208 | #define DMVR_INIT(bd) do { \ | ||
| 209 | c->inter.dmvr[0][0] = ff_vvc_dmvr_##bd##_avx2; \ | ||
| 210 | c->inter.dmvr[0][1] = ff_vvc_dmvr_h_##bd##_avx2; \ | ||
| 211 | c->inter.dmvr[1][0] = ff_vvc_dmvr_v_##bd##_avx2; \ | ||
| 212 | c->inter.dmvr[1][1] = ff_vvc_dmvr_hv_##bd##_avx2; \ | ||
| 213 | } while (0) | ||
| 214 | |||
| 215 | #define PEL_LINK(dst, C, W, idx1, idx2, name, D, opt) \ | ||
| 216 | dst[C][W][idx1][idx2] = vvc_put_## name ## _ ## D ## _##opt; \ | ||
| 217 | dst ## _uni[C][W][idx1][idx2] = ff_h2656_put_uni_ ## name ## _ ## D ## _##opt; \ | ||
| 218 | |||
| 219 | #define MC_TAP_LINKS(pointer, C, my, mx, fname, bitd, opt ) \ | ||
| 220 | PEL_LINK(pointer, C, 1, my , mx , fname##4 , bitd, opt ); \ | ||
| 221 | PEL_LINK(pointer, C, 2, my , mx , fname##8 , bitd, opt ); \ | ||
| 222 | PEL_LINK(pointer, C, 3, my , mx , fname##16, bitd, opt ); \ | ||
| 223 | PEL_LINK(pointer, C, 4, my , mx , fname##32, bitd, opt ); \ | ||
| 224 | PEL_LINK(pointer, C, 5, my , mx , fname##64, bitd, opt ); \ | ||
| 225 | PEL_LINK(pointer, C, 6, my , mx , fname##128, bitd, opt ); | ||
| 226 | |||
| 227 | #define MC_8TAP_LINKS(pointer, my, mx, fname, bitd, opt) \ | ||
| 228 | MC_TAP_LINKS(pointer, LUMA, my, mx, fname, bitd, opt) | ||
| 229 | |||
| 230 | #define MC_8TAP_LINKS_SSE4(bd) \ | ||
| 231 | MC_8TAP_LINKS(c->inter.put, 0, 0, pixels, bd, sse4); \ | ||
| 232 | MC_8TAP_LINKS(c->inter.put, 0, 1, 8tap_h, bd, sse4); \ | ||
| 233 | MC_8TAP_LINKS(c->inter.put, 1, 0, 8tap_v, bd, sse4); \ | ||
| 234 | MC_8TAP_LINKS(c->inter.put, 1, 1, 8tap_hv, bd, sse4) | ||
| 235 | |||
| 236 | #define MC_4TAP_LINKS(pointer, my, mx, fname, bitd, opt) \ | ||
| 237 | PEL_LINK(pointer, CHROMA, 0, my , mx , fname##2 , bitd, opt ); \ | ||
| 238 | MC_TAP_LINKS(pointer, CHROMA, my, mx, fname, bitd, opt) \ | ||
| 239 | |||
| 240 | #define MC_4TAP_LINKS_SSE4(bd) \ | ||
| 241 | MC_4TAP_LINKS(c->inter.put, 0, 0, pixels, bd, sse4); \ | ||
| 242 | MC_4TAP_LINKS(c->inter.put, 0, 1, 4tap_h, bd, sse4); \ | ||
| 243 | MC_4TAP_LINKS(c->inter.put, 1, 0, 4tap_v, bd, sse4); \ | ||
| 244 | MC_4TAP_LINKS(c->inter.put, 1, 1, 4tap_hv, bd, sse4) | ||
| 245 | |||
| 246 | #define MC_LINK_SSE4(bd) \ | ||
| 247 | MC_4TAP_LINKS_SSE4(bd) \ | ||
| 248 | MC_8TAP_LINKS_SSE4(bd) | ||
| 249 | |||
| 250 | #define MC_TAP_LINKS_AVX2(C,tap,bd) do { \ | ||
| 251 | PEL_LINK(c->inter.put, C, 4, 0, 0, pixels32, bd, avx2) \ | ||
| 252 | PEL_LINK(c->inter.put, C, 5, 0, 0, pixels64, bd, avx2) \ | ||
| 253 | PEL_LINK(c->inter.put, C, 6, 0, 0, pixels128, bd, avx2) \ | ||
| 254 | PEL_LINK(c->inter.put, C, 4, 0, 1, tap##tap_h32, bd, avx2) \ | ||
| 255 | PEL_LINK(c->inter.put, C, 5, 0, 1, tap##tap_h64, bd, avx2) \ | ||
| 256 | PEL_LINK(c->inter.put, C, 6, 0, 1, tap##tap_h128, bd, avx2) \ | ||
| 257 | PEL_LINK(c->inter.put, C, 4, 1, 0, tap##tap_v32, bd, avx2) \ | ||
| 258 | PEL_LINK(c->inter.put, C, 5, 1, 0, tap##tap_v64, bd, avx2) \ | ||
| 259 | PEL_LINK(c->inter.put, C, 6, 1, 0, tap##tap_v128, bd, avx2) \ | ||
| 260 | } while (0) | ||
| 261 | |||
| 262 | #define MC_LINKS_AVX2(bd) \ | ||
| 263 | MC_TAP_LINKS_AVX2(LUMA, 8, bd); \ | ||
| 264 | MC_TAP_LINKS_AVX2(CHROMA, 4, bd); | ||
| 265 | |||
| 266 | #define MC_TAP_LINKS_16BPC_AVX2(C, tap, bd) do { \ | ||
| 267 | PEL_LINK(c->inter.put, C, 3, 0, 0, pixels16, bd, avx2) \ | ||
| 268 | PEL_LINK(c->inter.put, C, 3, 0, 1, tap##tap_h16, bd, avx2) \ | ||
| 269 | PEL_LINK(c->inter.put, C, 3, 1, 0, tap##tap_v16, bd, avx2) \ | ||
| 270 | PEL_LINK(c->inter.put, C, 3, 1, 1, tap##tap_hv16, bd, avx2) \ | ||
| 271 | PEL_LINK(c->inter.put, C, 4, 1, 1, tap##tap_hv32, bd, avx2) \ | ||
| 272 | PEL_LINK(c->inter.put, C, 5, 1, 1, tap##tap_hv64, bd, avx2) \ | ||
| 273 | PEL_LINK(c->inter.put, C, 6, 1, 1, tap##tap_hv128, bd, avx2) \ | ||
| 274 | } while (0) | ||
| 275 | |||
| 276 | #define MC_LINKS_16BPC_AVX2(bd) \ | ||
| 277 | MC_TAP_LINKS_16BPC_AVX2(LUMA, 8, bd); \ | ||
| 278 | MC_TAP_LINKS_16BPC_AVX2(CHROMA, 4, bd); | ||
| 279 | |||
| 280 | int ff_vvc_sad_avx2(const int16_t *src0, const int16_t *src1, int dx, int dy, int block_w, int block_h); | ||
| 281 | #define SAD_INIT() c->inter.sad = ff_vvc_sad_avx2 | ||
| 282 | |||
| 283 | #define ALF_INIT(bd, opt) do { \ | ||
| 284 | void bf(ff_vvc_alf_filter_luma, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \ | ||
| 285 | const uint8_t *src, ptrdiff_t src_stride, int width, int height, \ | ||
| 286 | const int16_t *filter, const int16_t *clip, int vb_pos); \ | ||
| 287 | void bf(ff_vvc_alf_filter_chroma, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \ | ||
| 288 | const uint8_t *src, ptrdiff_t src_stride, int width, int height, \ | ||
| 289 | const int16_t *filter, const int16_t *clip, int vb_pos); \ | ||
| 290 | c->alf.filter[LUMA] = bf(ff_vvc_alf_filter_luma, bd, opt); \ | ||
| 291 | c->alf.filter[CHROMA] = bf(ff_vvc_alf_filter_chroma, bd, opt); \ | ||
| 292 | c->alf.classify = bf(vvc_alf_classify, bd, opt); \ | ||
| 293 | } while (0) | ||
| 294 | |||
| 295 | #endif | ||
| 296 | |||
| 297 | |||
| 298 | #endif // ARCH_X86_64 | ||
| 299 | |||
| 300 | 1578 | av_cold void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd) | |
| 301 | { | ||
| 302 | #if ARCH_X86_64 | ||
| 303 | 1578 | const int cpu_flags = av_get_cpu_flags(); | |
| 304 | |||
| 305 |
3/4✓ Branch 0 taken 223 times.
✓ Branch 1 taken 1201 times.
✓ Branch 2 taken 154 times.
✗ Branch 3 not taken.
|
1578 | switch (bd) { |
| 306 | 223 | case 8: | |
| 307 | #if HAVE_SSE4_EXTERNAL | ||
| 308 |
2/2✓ Branch 0 taken 109 times.
✓ Branch 1 taken 114 times.
|
223 | if (EXTERNAL_SSE4(cpu_flags)) { |
| 309 | 109 | MC_LINK_SSE4(8); | |
| 310 | } | ||
| 311 | #endif | ||
| 312 | #if HAVE_AVX2_EXTERNAL | ||
| 313 |
3/4✓ Branch 0 taken 43 times.
✓ Branch 1 taken 180 times.
✓ Branch 2 taken 43 times.
✗ Branch 3 not taken.
|
223 | if (EXTERNAL_AVX2_FAST(cpu_flags)) { |
| 314 | // inter | ||
| 315 | 43 | AVG_INIT(8, avx2); | |
| 316 | 43 | DMVR_INIT(8); | |
| 317 | 43 | MC_LINKS_AVX2(8); | |
| 318 | 43 | OF_INIT(8, avx2); | |
| 319 | 43 | SAD_INIT(); | |
| 320 | |||
| 321 | // filter | ||
| 322 | 43 | ALF_INIT(8, avx2); | |
| 323 | 43 | SAO_INIT(8, avx2); | |
| 324 | } | ||
| 325 | #endif | ||
| 326 | 223 | break; | |
| 327 | 1201 | case 10: | |
| 328 | #if HAVE_SSE4_EXTERNAL | ||
| 329 |
2/2✓ Branch 0 taken 84 times.
✓ Branch 1 taken 1117 times.
|
1201 | if (EXTERNAL_SSE4(cpu_flags)) { |
| 330 | 84 | MC_LINK_SSE4(10); | |
| 331 | } | ||
| 332 | #endif | ||
| 333 | #if HAVE_AVX2_EXTERNAL | ||
| 334 |
3/4✓ Branch 0 taken 12 times.
✓ Branch 1 taken 1189 times.
✓ Branch 2 taken 12 times.
✗ Branch 3 not taken.
|
1201 | if (EXTERNAL_AVX2_FAST(cpu_flags)) { |
| 335 | // inter | ||
| 336 | 12 | AVG_INIT(10, avx2); | |
| 337 | 12 | DMVR_INIT(10); | |
| 338 | 12 | MC_LINKS_AVX2(10); | |
| 339 | 12 | MC_LINKS_16BPC_AVX2(10); | |
| 340 | 12 | OF_INIT(10, avx2); | |
| 341 | 12 | SAD_INIT(); | |
| 342 | |||
| 343 | // filter | ||
| 344 | 12 | ALF_INIT(10, avx2); | |
| 345 | 12 | SAO_INIT(10, avx2); | |
| 346 | } | ||
| 347 | #endif | ||
| 348 | 1201 | break; | |
| 349 | 154 | case 12: | |
| 350 | #if HAVE_SSE4_EXTERNAL | ||
| 351 |
2/2✓ Branch 0 taken 77 times.
✓ Branch 1 taken 77 times.
|
154 | if (EXTERNAL_SSE4(cpu_flags)) { |
| 352 | 77 | MC_LINK_SSE4(12); | |
| 353 | } | ||
| 354 | #endif | ||
| 355 | #if HAVE_AVX2_EXTERNAL | ||
| 356 |
3/4✓ Branch 0 taken 11 times.
✓ Branch 1 taken 143 times.
✓ Branch 2 taken 11 times.
✗ Branch 3 not taken.
|
154 | if (EXTERNAL_AVX2_FAST(cpu_flags)) { |
| 357 | // inter | ||
| 358 | 11 | AVG_INIT(12, avx2); | |
| 359 | 11 | DMVR_INIT(12); | |
| 360 | 11 | MC_LINKS_AVX2(12); | |
| 361 | 11 | MC_LINKS_16BPC_AVX2(12); | |
| 362 | 11 | OF_INIT(12, avx2); | |
| 363 | 11 | SAD_INIT(); | |
| 364 | |||
| 365 | // filter | ||
| 366 | 11 | ALF_INIT(12, avx2); | |
| 367 | 11 | SAO_INIT(12, avx2); | |
| 368 | } | ||
| 369 | #endif | ||
| 370 | 154 | break; | |
| 371 | ✗ | default: | |
| 372 | ✗ | break; | |
| 373 | } | ||
| 374 | #endif | ||
| 375 | 1578 | } | |
| 376 |