FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/x86/vvc/dsp_init.c
Date: 2026-04-24 19:58:39
Exec Total Coverage
Lines: 53 55 96.4%
Functions: 213 213 100.0%
Branches: 18 22 81.8%

Line Branch Exec Source
1 /*
2 * VVC DSP init for x86
3 *
4 * Copyright (C) 2022-2024 Nuo Mi
5 * Copyright (c) 2023-2024 Wu Jianhua
6 *
7 * This file is part of FFmpeg.
8 *
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #include "config.h"
25
26 #include "libavutil/attributes.h"
27 #include "libavutil/cpu.h"
28 #include "libavutil/x86/cpu.h"
29 #include "libavcodec/vvc/dec.h"
30 #include "libavcodec/vvc/ctu.h"
31 #include "libavcodec/vvc/dsp.h"
32 #include "libavcodec/x86/h26x/h2656dsp.h"
33
34 #if ARCH_X86_64
35
36 #define bf(fn, bd, opt) fn##_##bd##_##opt
37 #define BF(fn, bpc, opt) fn##_##bpc##bpc_##opt
38
39 #define DMVR_PROTOTYPES(bd, opt) \
40 void ff_vvc_dmvr_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \
41 int height, intptr_t mx, intptr_t my, int width); \
42 void ff_vvc_dmvr_h_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \
43 int height, intptr_t mx, intptr_t my, int width); \
44 void ff_vvc_dmvr_v_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \
45 int height, intptr_t mx, intptr_t my, int width); \
46 void ff_vvc_dmvr_hv_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \
47 int height, intptr_t mx, intptr_t my, int width); \
48
49 DMVR_PROTOTYPES( 8, avx2)
50 DMVR_PROTOTYPES(10, avx2)
51 DMVR_PROTOTYPES(12, avx2)
52
53 #define OF_INIT(BD, OPT) do { \
54 void ff_vvc_apply_bdof_## BD ## _ ## OPT(uint8_t *dst, ptrdiff_t dst_stride, \
55 const int16_t *src0, const int16_t *src1, \
56 int w, int h); \
57 c->inter.apply_bdof = ff_vvc_apply_bdof_## BD ##_## OPT; \
58 } while (0)
59
60 #define ALF_BPC_PROTOTYPES(bpc, opt) \
61 void BF(ff_vvc_alf_classify_grad, bpc, opt)(int *gradient_sum, \
62 const uint8_t *src, ptrdiff_t src_stride, intptr_t width, intptr_t height, intptr_t vb_pos); \
63 void BF(ff_vvc_alf_classify, bpc, opt)(int *class_idx, int *transpose_idx, const int *gradient_sum, \
64 intptr_t width, intptr_t height, intptr_t vb_pos, intptr_t bit_depth); \
65
66 ALF_BPC_PROTOTYPES(8, avx2)
67 ALF_BPC_PROTOTYPES(16, avx2)
68
69 #if ARCH_X86_64
70 #define FW_PUT(name, depth, opt) \
71 static void vvc_put_ ## name ## _ ## depth ## _##opt(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, \
72 int height, const int8_t *hf, const int8_t *vf, int width) \
73 { \
74 ff_h2656_put_## name ## _ ## depth ## _##opt(dst, 2 * MAX_PB_SIZE, src, srcstride, height, hf, vf, width); \
75 }
76
77 #if HAVE_SSE4_EXTERNAL
78 #define FW_PUT_TAP(fname, bitd, opt ) \
79 FW_PUT(fname##4, bitd, opt ) \
80 FW_PUT(fname##8, bitd, opt ) \
81 FW_PUT(fname##16, bitd, opt ) \
82 FW_PUT(fname##32, bitd, opt ) \
83 FW_PUT(fname##64, bitd, opt ) \
84 FW_PUT(fname##128, bitd, opt ) \
85
86 #define FW_PUT_4TAP(fname, bitd, opt) \
87 FW_PUT(fname ## 2, bitd, opt) \
88 FW_PUT_TAP(fname, bitd, opt)
89
90 #define FW_PUT_4TAP_SSE4(bitd) \
91 FW_PUT_4TAP(pixels, bitd, sse4) \
92 FW_PUT_4TAP(4tap_h, bitd, sse4) \
93 FW_PUT_4TAP(4tap_v, bitd, sse4) \
94 FW_PUT_4TAP(4tap_hv, bitd, sse4)
95
96 #define FW_PUT_8TAP_SSE4(bitd) \
97 FW_PUT_TAP(8tap_h, bitd, sse4) \
98 FW_PUT_TAP(8tap_v, bitd, sse4) \
99 FW_PUT_TAP(8tap_hv, bitd, sse4)
100
101 #define FW_PUT_SSE4(bitd) \
102 FW_PUT_4TAP_SSE4(bitd) \
103 FW_PUT_8TAP_SSE4(bitd)
104
105 7564 FW_PUT_SSE4( 8)
106 1096 FW_PUT_SSE4(10)
107 1096 FW_PUT_SSE4(12)
108 #endif
109
110 #if HAVE_AVX2_EXTERNAL
111 #define FW_PUT_TAP_AVX2(n, bitd) \
112 FW_PUT(n ## tap_h32, bitd, avx2) \
113 FW_PUT(n ## tap_h64, bitd, avx2) \
114 FW_PUT(n ## tap_h128, bitd, avx2) \
115 FW_PUT(n ## tap_v32, bitd, avx2) \
116 FW_PUT(n ## tap_v64, bitd, avx2) \
117 FW_PUT(n ## tap_v128, bitd, avx2)
118
119 #define FW_PUT_AVX2(bitd) \
120 FW_PUT(pixels32, bitd, avx2) \
121 FW_PUT(pixels64, bitd, avx2) \
122 FW_PUT(pixels128, bitd, avx2) \
123 FW_PUT_TAP_AVX2(4, bitd) \
124 FW_PUT_TAP_AVX2(8, bitd) \
125
126 1804 FW_PUT_AVX2( 8)
127 234 FW_PUT_AVX2(10)
128 234 FW_PUT_AVX2(12)
129
130 #define FW_PUT_TAP_16BPC_AVX2(n, bitd) \
131 FW_PUT(n ## tap_h16, bitd, avx2) \
132 FW_PUT(n ## tap_v16, bitd, avx2) \
133 FW_PUT(n ## tap_hv16, bitd, avx2) \
134 FW_PUT(n ## tap_hv32, bitd, avx2) \
135 FW_PUT(n ## tap_hv64, bitd, avx2) \
136 FW_PUT(n ## tap_hv128, bitd, avx2)
137
138 #define FW_PUT_16BPC_AVX2(bitd) \
139 FW_PUT(pixels16, bitd, avx2) \
140 FW_PUT_TAP_16BPC_AVX2(4, bitd) \
141 FW_PUT_TAP_16BPC_AVX2(8, bitd)
142
143 182 FW_PUT_16BPC_AVX2(10)
144 182 FW_PUT_16BPC_AVX2(12)
145
146 #define ALF_FUNCS(bpc, bd, opt) \
147 static void bf(vvc_alf_classify, bd, opt)(int *class_idx, int *transpose_idx, \
148 const uint8_t *src, ptrdiff_t src_stride, int width, int height, int vb_pos, int *gradient_tmp) \
149 { \
150 BF(ff_vvc_alf_classify_grad, bpc, opt)(gradient_tmp, src, src_stride, width, height, vb_pos); \
151 BF(ff_vvc_alf_classify, bpc, opt)(class_idx, transpose_idx, gradient_tmp, width, height, vb_pos, bd); \
152 } \
153
154 256 ALF_FUNCS(8, 8, avx2)
155 256 ALF_FUNCS(16, 10, avx2)
156 256 ALF_FUNCS(16, 12, avx2)
157
158 #endif
159
160 #define SAO_FILTER_FUNC(wd, bitd, opt) \
161 void ff_vvc_sao_band_filter_##wd##_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
162 const int16_t *sao_offset_val, int sao_left_class, int width, int height); \
163 void ff_vvc_sao_edge_filter_##wd##_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
164 const int16_t *sao_offset_val, int eo, int width, int height); \
165
166 #define SAO_FILTER_FUNCS(bitd, opt) \
167 SAO_FILTER_FUNC(8, bitd, opt) \
168 SAO_FILTER_FUNC(16, bitd, opt) \
169 SAO_FILTER_FUNC(32, bitd, opt) \
170 SAO_FILTER_FUNC(48, bitd, opt) \
171 SAO_FILTER_FUNC(64, bitd, opt) \
172 SAO_FILTER_FUNC(80, bitd, opt) \
173 SAO_FILTER_FUNC(96, bitd, opt) \
174 SAO_FILTER_FUNC(112, bitd, opt) \
175 SAO_FILTER_FUNC(128, bitd, opt) \
176
177 SAO_FILTER_FUNCS(8, avx2)
178 SAO_FILTER_FUNCS(10, avx2)
179 SAO_FILTER_FUNCS(12, avx2)
180
181 #define SAO_FILTER_INIT(type, bitd, opt) do { \
182 c->sao.type##_filter[0] = ff_vvc_sao_##type##_filter_8_##bitd##_##opt; \
183 c->sao.type##_filter[1] = ff_vvc_sao_##type##_filter_16_##bitd##_##opt; \
184 c->sao.type##_filter[2] = ff_vvc_sao_##type##_filter_32_##bitd##_##opt; \
185 c->sao.type##_filter[3] = ff_vvc_sao_##type##_filter_48_##bitd##_##opt; \
186 c->sao.type##_filter[4] = ff_vvc_sao_##type##_filter_64_##bitd##_##opt; \
187 c->sao.type##_filter[5] = ff_vvc_sao_##type##_filter_80_##bitd##_##opt; \
188 c->sao.type##_filter[6] = ff_vvc_sao_##type##_filter_96_##bitd##_##opt; \
189 c->sao.type##_filter[7] = ff_vvc_sao_##type##_filter_112_##bitd##_##opt; \
190 c->sao.type##_filter[8] = ff_vvc_sao_##type##_filter_128_##bitd##_##opt; \
191 } while (0)
192
193 #define SAO_INIT(bitd, opt) do { \
194 SAO_FILTER_INIT(band, bitd, opt); \
195 SAO_FILTER_INIT(edge, bitd, opt); \
196 } while (0)
197
198 #define AVG_INIT(bd, opt) do { \
199 void bf(ff_vvc_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
200 const int16_t *src0, const int16_t *src1, int width, int height);\
201 void bf(ff_vvc_w_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
202 const int16_t *src0, const int16_t *src1, int width, int height, \
203 int denom, int w0, int w1, int o); \
204 c->inter.avg = bf(ff_vvc_avg, bd, opt); \
205 c->inter.w_avg = bf(ff_vvc_w_avg, bd, opt); \
206 } while (0)
207
208 #define DMVR_INIT(bd) do { \
209 c->inter.dmvr[0][0] = ff_vvc_dmvr_##bd##_avx2; \
210 c->inter.dmvr[0][1] = ff_vvc_dmvr_h_##bd##_avx2; \
211 c->inter.dmvr[1][0] = ff_vvc_dmvr_v_##bd##_avx2; \
212 c->inter.dmvr[1][1] = ff_vvc_dmvr_hv_##bd##_avx2; \
213 } while (0)
214
215 #define PEL_LINK(dst, C, W, idx1, idx2, name, D, opt) \
216 dst[C][W][idx1][idx2] = vvc_put_## name ## _ ## D ## _##opt; \
217 dst ## _uni[C][W][idx1][idx2] = ff_h2656_put_uni_ ## name ## _ ## D ## _##opt; \
218
219 #define MC_TAP_LINKS(pointer, C, my, mx, fname, bitd, opt ) \
220 PEL_LINK(pointer, C, 1, my , mx , fname##4 , bitd, opt ); \
221 PEL_LINK(pointer, C, 2, my , mx , fname##8 , bitd, opt ); \
222 PEL_LINK(pointer, C, 3, my , mx , fname##16, bitd, opt ); \
223 PEL_LINK(pointer, C, 4, my , mx , fname##32, bitd, opt ); \
224 PEL_LINK(pointer, C, 5, my , mx , fname##64, bitd, opt ); \
225 PEL_LINK(pointer, C, 6, my , mx , fname##128, bitd, opt );
226
227 #define MC_8TAP_LINKS(pointer, my, mx, fname, bitd, opt) \
228 MC_TAP_LINKS(pointer, LUMA, my, mx, fname, bitd, opt)
229
230 #define MC_8TAP_LINKS_SSE4(bd) \
231 MC_8TAP_LINKS(c->inter.put, 0, 0, pixels, bd, sse4); \
232 MC_8TAP_LINKS(c->inter.put, 0, 1, 8tap_h, bd, sse4); \
233 MC_8TAP_LINKS(c->inter.put, 1, 0, 8tap_v, bd, sse4); \
234 MC_8TAP_LINKS(c->inter.put, 1, 1, 8tap_hv, bd, sse4)
235
236 #define MC_4TAP_LINKS(pointer, my, mx, fname, bitd, opt) \
237 PEL_LINK(pointer, CHROMA, 0, my , mx , fname##2 , bitd, opt ); \
238 MC_TAP_LINKS(pointer, CHROMA, my, mx, fname, bitd, opt) \
239
240 #define MC_4TAP_LINKS_SSE4(bd) \
241 MC_4TAP_LINKS(c->inter.put, 0, 0, pixels, bd, sse4); \
242 MC_4TAP_LINKS(c->inter.put, 0, 1, 4tap_h, bd, sse4); \
243 MC_4TAP_LINKS(c->inter.put, 1, 0, 4tap_v, bd, sse4); \
244 MC_4TAP_LINKS(c->inter.put, 1, 1, 4tap_hv, bd, sse4)
245
246 #define MC_LINK_SSE4(bd) \
247 MC_4TAP_LINKS_SSE4(bd) \
248 MC_8TAP_LINKS_SSE4(bd)
249
250 #define MC_TAP_LINKS_AVX2(C,tap,bd) do { \
251 PEL_LINK(c->inter.put, C, 4, 0, 0, pixels32, bd, avx2) \
252 PEL_LINK(c->inter.put, C, 5, 0, 0, pixels64, bd, avx2) \
253 PEL_LINK(c->inter.put, C, 6, 0, 0, pixels128, bd, avx2) \
254 PEL_LINK(c->inter.put, C, 4, 0, 1, tap##tap_h32, bd, avx2) \
255 PEL_LINK(c->inter.put, C, 5, 0, 1, tap##tap_h64, bd, avx2) \
256 PEL_LINK(c->inter.put, C, 6, 0, 1, tap##tap_h128, bd, avx2) \
257 PEL_LINK(c->inter.put, C, 4, 1, 0, tap##tap_v32, bd, avx2) \
258 PEL_LINK(c->inter.put, C, 5, 1, 0, tap##tap_v64, bd, avx2) \
259 PEL_LINK(c->inter.put, C, 6, 1, 0, tap##tap_v128, bd, avx2) \
260 } while (0)
261
262 #define MC_LINKS_AVX2(bd) \
263 MC_TAP_LINKS_AVX2(LUMA, 8, bd); \
264 MC_TAP_LINKS_AVX2(CHROMA, 4, bd);
265
266 #define MC_TAP_LINKS_16BPC_AVX2(C, tap, bd) do { \
267 PEL_LINK(c->inter.put, C, 3, 0, 0, pixels16, bd, avx2) \
268 PEL_LINK(c->inter.put, C, 3, 0, 1, tap##tap_h16, bd, avx2) \
269 PEL_LINK(c->inter.put, C, 3, 1, 0, tap##tap_v16, bd, avx2) \
270 PEL_LINK(c->inter.put, C, 3, 1, 1, tap##tap_hv16, bd, avx2) \
271 PEL_LINK(c->inter.put, C, 4, 1, 1, tap##tap_hv32, bd, avx2) \
272 PEL_LINK(c->inter.put, C, 5, 1, 1, tap##tap_hv64, bd, avx2) \
273 PEL_LINK(c->inter.put, C, 6, 1, 1, tap##tap_hv128, bd, avx2) \
274 } while (0)
275
276 #define MC_LINKS_16BPC_AVX2(bd) \
277 MC_TAP_LINKS_16BPC_AVX2(LUMA, 8, bd); \
278 MC_TAP_LINKS_16BPC_AVX2(CHROMA, 4, bd);
279
280 int ff_vvc_sad_avx2(const int16_t *src0, const int16_t *src1, int dx, int dy, int block_w, int block_h);
281 #define SAD_INIT() c->inter.sad = ff_vvc_sad_avx2
282
283 #define ALF_INIT(bd, opt) do { \
284 void bf(ff_vvc_alf_filter_luma, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
285 const uint8_t *src, ptrdiff_t src_stride, int width, int height, \
286 const int16_t *filter, const int16_t *clip, int vb_pos); \
287 void bf(ff_vvc_alf_filter_chroma, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
288 const uint8_t *src, ptrdiff_t src_stride, int width, int height, \
289 const int16_t *filter, const int16_t *clip, int vb_pos); \
290 c->alf.filter[LUMA] = bf(ff_vvc_alf_filter_luma, bd, opt); \
291 c->alf.filter[CHROMA] = bf(ff_vvc_alf_filter_chroma, bd, opt); \
292 c->alf.classify = bf(vvc_alf_classify, bd, opt); \
293 } while (0)
294
295 #endif
296
297
298 #endif // ARCH_X86_64
299
300 1578 av_cold void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd)
301 {
302 #if ARCH_X86_64
303 1578 const int cpu_flags = av_get_cpu_flags();
304
305
3/4
✓ Branch 0 taken 223 times.
✓ Branch 1 taken 1201 times.
✓ Branch 2 taken 154 times.
✗ Branch 3 not taken.
1578 switch (bd) {
306 223 case 8:
307 #if HAVE_SSE4_EXTERNAL
308
2/2
✓ Branch 0 taken 109 times.
✓ Branch 1 taken 114 times.
223 if (EXTERNAL_SSE4(cpu_flags)) {
309 109 MC_LINK_SSE4(8);
310 }
311 #endif
312 #if HAVE_AVX2_EXTERNAL
313
3/4
✓ Branch 0 taken 43 times.
✓ Branch 1 taken 180 times.
✓ Branch 2 taken 43 times.
✗ Branch 3 not taken.
223 if (EXTERNAL_AVX2_FAST(cpu_flags)) {
314 // inter
315 43 AVG_INIT(8, avx2);
316 43 DMVR_INIT(8);
317 43 MC_LINKS_AVX2(8);
318 43 OF_INIT(8, avx2);
319 43 SAD_INIT();
320
321 // filter
322 43 ALF_INIT(8, avx2);
323 43 SAO_INIT(8, avx2);
324 }
325 #endif
326 223 break;
327 1201 case 10:
328 #if HAVE_SSE4_EXTERNAL
329
2/2
✓ Branch 0 taken 84 times.
✓ Branch 1 taken 1117 times.
1201 if (EXTERNAL_SSE4(cpu_flags)) {
330 84 MC_LINK_SSE4(10);
331 }
332 #endif
333 #if HAVE_AVX2_EXTERNAL
334
3/4
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 1189 times.
✓ Branch 2 taken 12 times.
✗ Branch 3 not taken.
1201 if (EXTERNAL_AVX2_FAST(cpu_flags)) {
335 // inter
336 12 AVG_INIT(10, avx2);
337 12 DMVR_INIT(10);
338 12 MC_LINKS_AVX2(10);
339 12 MC_LINKS_16BPC_AVX2(10);
340 12 OF_INIT(10, avx2);
341 12 SAD_INIT();
342
343 // filter
344 12 ALF_INIT(10, avx2);
345 12 SAO_INIT(10, avx2);
346 }
347 #endif
348 1201 break;
349 154 case 12:
350 #if HAVE_SSE4_EXTERNAL
351
2/2
✓ Branch 0 taken 77 times.
✓ Branch 1 taken 77 times.
154 if (EXTERNAL_SSE4(cpu_flags)) {
352 77 MC_LINK_SSE4(12);
353 }
354 #endif
355 #if HAVE_AVX2_EXTERNAL
356
3/4
✓ Branch 0 taken 11 times.
✓ Branch 1 taken 143 times.
✓ Branch 2 taken 11 times.
✗ Branch 3 not taken.
154 if (EXTERNAL_AVX2_FAST(cpu_flags)) {
357 // inter
358 11 AVG_INIT(12, avx2);
359 11 DMVR_INIT(12);
360 11 MC_LINKS_AVX2(12);
361 11 MC_LINKS_16BPC_AVX2(12);
362 11 OF_INIT(12, avx2);
363 11 SAD_INIT();
364
365 // filter
366 11 ALF_INIT(12, avx2);
367 11 SAO_INIT(12, avx2);
368 }
369 #endif
370 154 break;
371 default:
372 break;
373 }
374 #endif
375 1578 }
376