Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * This file is part of FFmpeg. | ||
3 | * | ||
4 | * FFmpeg is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU Lesser General Public | ||
6 | * License as published by the Free Software Foundation; either | ||
7 | * version 2.1 of the License, or (at your option) any later version. | ||
8 | * | ||
9 | * FFmpeg is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
12 | * Lesser General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU Lesser General Public | ||
15 | * License along with FFmpeg; if not, write to the Free Software | ||
16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | |||
19 | #include "config.h" | ||
20 | |||
21 | #if HAVE_SCHED_GETAFFINITY | ||
22 | #ifndef _GNU_SOURCE | ||
23 | # define _GNU_SOURCE | ||
24 | #endif | ||
25 | #include <sched.h> | ||
26 | #endif | ||
27 | |||
28 | #include <stddef.h> | ||
29 | #include <stdint.h> | ||
30 | #include <stdatomic.h> | ||
31 | |||
32 | #include "attributes.h" | ||
33 | #include "cpu.h" | ||
34 | #include "cpu_internal.h" | ||
35 | #include "opt.h" | ||
36 | #include "common.h" | ||
37 | |||
38 | #if HAVE_GETPROCESSAFFINITYMASK || HAVE_WINRT | ||
39 | #include <windows.h> | ||
40 | #endif | ||
41 | #if HAVE_SYSCTL | ||
42 | #if HAVE_SYS_PARAM_H | ||
43 | #include <sys/param.h> | ||
44 | #endif | ||
45 | #include <sys/types.h> | ||
46 | #include <sys/sysctl.h> | ||
47 | #endif | ||
48 | #if HAVE_UNISTD_H | ||
49 | #include <unistd.h> | ||
50 | #endif | ||
51 | |||
52 | #if HAVE_GETAUXVAL || HAVE_ELF_AUX_INFO | ||
53 | #include <sys/auxv.h> | ||
54 | #endif | ||
55 | |||
56 | static atomic_int cpu_flags = -1; | ||
57 | static atomic_int cpu_count = -1; | ||
58 | |||
59 | 9461 | static int get_cpu_flags(void) | |
60 | { | ||
61 | #if ARCH_MIPS | ||
62 | return ff_get_cpu_flags_mips(); | ||
63 | #elif ARCH_AARCH64 | ||
64 | return ff_get_cpu_flags_aarch64(); | ||
65 | #elif ARCH_ARM | ||
66 | return ff_get_cpu_flags_arm(); | ||
67 | #elif ARCH_PPC | ||
68 | return ff_get_cpu_flags_ppc(); | ||
69 | #elif ARCH_RISCV | ||
70 | return ff_get_cpu_flags_riscv(); | ||
71 | #elif ARCH_X86 | ||
72 | 9461 | return ff_get_cpu_flags_x86(); | |
73 | #elif ARCH_LOONGARCH | ||
74 | return ff_get_cpu_flags_loongarch(); | ||
75 | #endif | ||
76 | return 0; | ||
77 | } | ||
78 | |||
79 | 10511 | void av_force_cpu_flags(int arg){ | |
80 | 10511 | if (ARCH_X86 && | |
81 |
2/2✓ Branch 0 taken 2485 times.
✓ Branch 1 taken 8026 times.
|
10511 | (arg & ( AV_CPU_FLAG_3DNOW | |
82 | AV_CPU_FLAG_3DNOWEXT | | ||
83 | AV_CPU_FLAG_MMXEXT | | ||
84 | AV_CPU_FLAG_SSE | | ||
85 | AV_CPU_FLAG_SSE2 | | ||
86 | AV_CPU_FLAG_SSE2SLOW | | ||
87 | AV_CPU_FLAG_SSE3 | | ||
88 | AV_CPU_FLAG_SSE3SLOW | | ||
89 | AV_CPU_FLAG_SSSE3 | | ||
90 | AV_CPU_FLAG_SSE4 | | ||
91 | AV_CPU_FLAG_SSE42 | | ||
92 | AV_CPU_FLAG_AVX | | ||
93 | AV_CPU_FLAG_AVXSLOW | | ||
94 | AV_CPU_FLAG_XOP | | ||
95 | AV_CPU_FLAG_FMA3 | | ||
96 | AV_CPU_FLAG_FMA4 | | ||
97 | AV_CPU_FLAG_AVX2 | | ||
98 | AV_CPU_FLAG_AVX512 )) | ||
99 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2485 times.
|
2485 | && !(arg & AV_CPU_FLAG_MMX)) { |
100 | ✗ | av_log(NULL, AV_LOG_WARNING, "MMX implied by specified flags\n"); | |
101 | ✗ | arg |= AV_CPU_FLAG_MMX; | |
102 | } | ||
103 | |||
104 | 10511 | atomic_store_explicit(&cpu_flags, arg, memory_order_relaxed); | |
105 | 10511 | } | |
106 | |||
107 | 923007 | int av_get_cpu_flags(void) | |
108 | { | ||
109 | 923007 | int flags = atomic_load_explicit(&cpu_flags, memory_order_relaxed); | |
110 |
2/2✓ Branch 0 taken 9461 times.
✓ Branch 1 taken 913546 times.
|
923007 | if (flags == -1) { |
111 | 9461 | flags = get_cpu_flags(); | |
112 | 9461 | atomic_store_explicit(&cpu_flags, flags, memory_order_relaxed); | |
113 | } | ||
114 | 923007 | return flags; | |
115 | } | ||
116 | |||
117 | 7915 | int av_parse_cpu_caps(unsigned *flags, const char *s) | |
118 | { | ||
119 | static const AVOption cpuflags_opts[] = { | ||
120 | { "flags" , NULL, 0, AV_OPT_TYPE_FLAGS, { .i64 = 0 }, INT64_MIN, INT64_MAX, .unit = "flags" }, | ||
121 | #if ARCH_PPC | ||
122 | { "altivec" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ALTIVEC }, .unit = "flags" }, | ||
123 | { "vsx" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_VSX }, .unit = "flags" }, | ||
124 | { "power8" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_POWER8 }, .unit = "flags" }, | ||
125 | #elif ARCH_X86 | ||
126 | { "mmx" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MMX }, .unit = "flags" }, | ||
127 | { "mmx2" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MMX2 }, .unit = "flags" }, | ||
128 | { "mmxext" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MMX2 }, .unit = "flags" }, | ||
129 | { "sse" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SSE }, .unit = "flags" }, | ||
130 | { "sse2" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SSE2 }, .unit = "flags" }, | ||
131 | { "sse2slow", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SSE2SLOW }, .unit = "flags" }, | ||
132 | { "sse3" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SSE3 }, .unit = "flags" }, | ||
133 | { "sse3slow", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SSE3SLOW }, .unit = "flags" }, | ||
134 | { "ssse3" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SSSE3 }, .unit = "flags" }, | ||
135 | { "atom" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ATOM }, .unit = "flags" }, | ||
136 | { "sse4.1" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SSE4 }, .unit = "flags" }, | ||
137 | { "sse4.2" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SSE42 }, .unit = "flags" }, | ||
138 | { "avx" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVX }, .unit = "flags" }, | ||
139 | { "avxslow" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVXSLOW }, .unit = "flags" }, | ||
140 | { "xop" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_XOP }, .unit = "flags" }, | ||
141 | { "fma3" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_FMA3 }, .unit = "flags" }, | ||
142 | { "fma4" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_FMA4 }, .unit = "flags" }, | ||
143 | { "avx2" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVX2 }, .unit = "flags" }, | ||
144 | { "bmi1" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_BMI1 }, .unit = "flags" }, | ||
145 | { "bmi2" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_BMI2 }, .unit = "flags" }, | ||
146 | { "3dnow" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_3DNOW }, .unit = "flags" }, | ||
147 | { "3dnowext", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_3DNOWEXT }, .unit = "flags" }, | ||
148 | { "cmov", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_CMOV }, .unit = "flags" }, | ||
149 | { "aesni", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AESNI }, .unit = "flags" }, | ||
150 | { "avx512" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVX512 }, .unit = "flags" }, | ||
151 | { "avx512icl", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVX512ICL }, .unit = "flags" }, | ||
152 | { "slowgather", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SLOW_GATHER }, .unit = "flags" }, | ||
153 | |||
154 | #define CPU_FLAG_P2 AV_CPU_FLAG_CMOV | AV_CPU_FLAG_MMX | ||
155 | #define CPU_FLAG_P3 CPU_FLAG_P2 | AV_CPU_FLAG_MMX2 | AV_CPU_FLAG_SSE | ||
156 | #define CPU_FLAG_P4 CPU_FLAG_P3| AV_CPU_FLAG_SSE2 | ||
157 | { "pentium2", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPU_FLAG_P2 }, .unit = "flags" }, | ||
158 | { "pentium3", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPU_FLAG_P3 }, .unit = "flags" }, | ||
159 | { "pentium4", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPU_FLAG_P4 }, .unit = "flags" }, | ||
160 | |||
161 | #define CPU_FLAG_K62 AV_CPU_FLAG_MMX | AV_CPU_FLAG_3DNOW | ||
162 | #define CPU_FLAG_ATHLON CPU_FLAG_K62 | AV_CPU_FLAG_CMOV | AV_CPU_FLAG_3DNOWEXT | AV_CPU_FLAG_MMX2 | ||
163 | #define CPU_FLAG_ATHLONXP CPU_FLAG_ATHLON | AV_CPU_FLAG_SSE | ||
164 | #define CPU_FLAG_K8 CPU_FLAG_ATHLONXP | AV_CPU_FLAG_SSE2 | ||
165 | { "k6", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MMX }, .unit = "flags" }, | ||
166 | { "k62", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPU_FLAG_K62 }, .unit = "flags" }, | ||
167 | { "athlon", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPU_FLAG_ATHLON }, .unit = "flags" }, | ||
168 | { "athlonxp", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPU_FLAG_ATHLONXP }, .unit = "flags" }, | ||
169 | { "k8", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPU_FLAG_K8 }, .unit = "flags" }, | ||
170 | #elif ARCH_ARM | ||
171 | { "armv5te", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARMV5TE }, .unit = "flags" }, | ||
172 | { "armv6", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARMV6 }, .unit = "flags" }, | ||
173 | { "armv6t2", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARMV6T2 }, .unit = "flags" }, | ||
174 | { "vfp", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_VFP }, .unit = "flags" }, | ||
175 | { "vfp_vm", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_VFP_VM }, .unit = "flags" }, | ||
176 | { "vfpv3", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_VFPV3 }, .unit = "flags" }, | ||
177 | { "neon", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_NEON }, .unit = "flags" }, | ||
178 | { "setend", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SETEND }, .unit = "flags" }, | ||
179 | #elif ARCH_AARCH64 | ||
180 | { "armv8", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARMV8 }, .unit = "flags" }, | ||
181 | { "neon", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_NEON }, .unit = "flags" }, | ||
182 | { "vfp", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_VFP }, .unit = "flags" }, | ||
183 | { "dotprod", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_DOTPROD }, .unit = "flags" }, | ||
184 | { "i8mm", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_I8MM }, .unit = "flags" }, | ||
185 | { "sve", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SVE }, .unit = "flags" }, | ||
186 | { "sve2", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SVE2 }, .unit = "flags" }, | ||
187 | #elif ARCH_MIPS | ||
188 | { "mmi", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MMI }, .unit = "flags" }, | ||
189 | { "msa", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MSA }, .unit = "flags" }, | ||
190 | #elif ARCH_LOONGARCH | ||
191 | { "lsx", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_LSX }, .unit = "flags" }, | ||
192 | { "lasx", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_LASX }, .unit = "flags" }, | ||
193 | #elif ARCH_RISCV | ||
194 | { "rvi", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVI }, .unit = "flags" }, | ||
195 | { "rvb", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVB }, .unit = "flags" }, | ||
196 | { "zve32x", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVV_I32 }, .unit = "flags" }, | ||
197 | { "zve32f", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVV_F32 }, .unit = "flags" }, | ||
198 | { "zve64x", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVV_I64 }, .unit = "flags" }, | ||
199 | { "zve64d", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVV_F64 }, .unit = "flags" }, | ||
200 | { "zbb", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVB_BASIC }, .unit = "flags" }, | ||
201 | { "zvbb", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RV_ZVBB }, .unit = "flags" }, | ||
202 | { "misaligned", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RV_MISALIGNED }, .unit = "flags" }, | ||
203 | #endif | ||
204 | { NULL }, | ||
205 | }; | ||
206 | static const AVClass class = { | ||
207 | .class_name = "cpuflags", | ||
208 | .item_name = av_default_item_name, | ||
209 | .option = cpuflags_opts, | ||
210 | .version = LIBAVUTIL_VERSION_INT, | ||
211 | }; | ||
212 | 7915 | const AVClass *pclass = &class; | |
213 | |||
214 | 7915 | return av_opt_eval_flags(&pclass, &cpuflags_opts[0], s, flags); | |
215 | } | ||
216 | |||
217 | 5137 | int av_cpu_count(void) | |
218 | { | ||
219 | static atomic_int printed = 0; | ||
220 | |||
221 | 5137 | int nb_cpus = 1; | |
222 | 5137 | int count = 0; | |
223 | #if HAVE_WINRT | ||
224 | SYSTEM_INFO sysinfo; | ||
225 | #endif | ||
226 | #if HAVE_SCHED_GETAFFINITY && defined(CPU_COUNT) | ||
227 | cpu_set_t cpuset; | ||
228 | |||
229 | 5137 | CPU_ZERO(&cpuset); | |
230 | |||
231 |
1/2✓ Branch 1 taken 5137 times.
✗ Branch 2 not taken.
|
5137 | if (!sched_getaffinity(0, sizeof(cpuset), &cpuset)) |
232 | 5137 | nb_cpus = CPU_COUNT(&cpuset); | |
233 | #elif HAVE_GETPROCESSAFFINITYMASK | ||
234 | DWORD_PTR proc_aff, sys_aff; | ||
235 | if (GetProcessAffinityMask(GetCurrentProcess(), &proc_aff, &sys_aff)) | ||
236 | nb_cpus = av_popcount64(proc_aff); | ||
237 | #elif HAVE_SYSCTL && defined(HW_NCPUONLINE) | ||
238 | int mib[2] = { CTL_HW, HW_NCPUONLINE }; | ||
239 | size_t len = sizeof(nb_cpus); | ||
240 | |||
241 | if (sysctl(mib, 2, &nb_cpus, &len, NULL, 0) == -1) | ||
242 | nb_cpus = 0; | ||
243 | #elif HAVE_SYSCTL && defined(HW_NCPU) | ||
244 | int mib[2] = { CTL_HW, HW_NCPU }; | ||
245 | size_t len = sizeof(nb_cpus); | ||
246 | |||
247 | if (sysctl(mib, 2, &nb_cpus, &len, NULL, 0) == -1) | ||
248 | nb_cpus = 0; | ||
249 | #elif HAVE_SYSCONF && defined(_SC_NPROC_ONLN) | ||
250 | nb_cpus = sysconf(_SC_NPROC_ONLN); | ||
251 | #elif HAVE_SYSCONF && defined(_SC_NPROCESSORS_ONLN) | ||
252 | nb_cpus = sysconf(_SC_NPROCESSORS_ONLN); | ||
253 | #elif HAVE_WINRT | ||
254 | GetNativeSystemInfo(&sysinfo); | ||
255 | nb_cpus = sysinfo.dwNumberOfProcessors; | ||
256 | #endif | ||
257 | |||
258 |
2/2✓ Branch 0 taken 3463 times.
✓ Branch 1 taken 1674 times.
|
5137 | if (!atomic_exchange_explicit(&printed, 1, memory_order_relaxed)) |
259 | 3463 | av_log(NULL, AV_LOG_DEBUG, "detected %d logical cores\n", nb_cpus); | |
260 | |||
261 | 5137 | count = atomic_load_explicit(&cpu_count, memory_order_relaxed); | |
262 | |||
263 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5137 times.
|
5137 | if (count > 0) { |
264 | ✗ | nb_cpus = count; | |
265 | ✗ | av_log(NULL, AV_LOG_DEBUG, "overriding to %d logical cores\n", nb_cpus); | |
266 | } | ||
267 | |||
268 | 5137 | return nb_cpus; | |
269 | } | ||
270 | |||
271 | ✗ | void av_cpu_force_count(int count) | |
272 | { | ||
273 | ✗ | atomic_store_explicit(&cpu_count, count, memory_order_relaxed); | |
274 | ✗ | } | |
275 | |||
276 | 330781 | size_t av_cpu_max_align(void) | |
277 | { | ||
278 | #if ARCH_MIPS | ||
279 | return ff_get_cpu_max_align_mips(); | ||
280 | #elif ARCH_AARCH64 | ||
281 | return ff_get_cpu_max_align_aarch64(); | ||
282 | #elif ARCH_ARM | ||
283 | return ff_get_cpu_max_align_arm(); | ||
284 | #elif ARCH_PPC | ||
285 | return ff_get_cpu_max_align_ppc(); | ||
286 | #elif ARCH_X86 | ||
287 | 330781 | return ff_get_cpu_max_align_x86(); | |
288 | #elif ARCH_LOONGARCH | ||
289 | return ff_get_cpu_max_align_loongarch(); | ||
290 | #endif | ||
291 | |||
292 | return 8; | ||
293 | } | ||
294 | |||
295 | ✗ | unsigned long ff_getauxval(unsigned long type) | |
296 | { | ||
297 | #if HAVE_GETAUXVAL | ||
298 | ✗ | return getauxval(type); | |
299 | #elif HAVE_ELF_AUX_INFO | ||
300 | unsigned long aux = 0; | ||
301 | int ret = elf_aux_info(type, &aux, sizeof(aux)); | ||
302 | if (ret != 0) { | ||
303 | errno = ret; | ||
304 | } | ||
305 | return aux; | ||
306 | #else | ||
307 | errno = ENOSYS; | ||
308 | return 0; | ||
309 | #endif | ||
310 | } | ||
311 |