LCOV - code coverage report
Current view: top level - src/libavutil/x86 - cpu.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 59 76 77.6 %
Date: 2017-09-22 00:04:37 Functions: 1 1 100.0 %

          Line data    Source code
       1             : /*
       2             :  * CPU detection code, extracted from mmx.h
       3             :  * (c)1997-99 by H. Dietz and R. Fisher
       4             :  * Converted to C and improved by Fabrice Bellard.
       5             :  *
       6             :  * This file is part of FFmpeg.
       7             :  *
       8             :  * FFmpeg is free software; you can redistribute it and/or
       9             :  * modify it under the terms of the GNU Lesser General Public
      10             :  * License as published by the Free Software Foundation; either
      11             :  * version 2.1 of the License, or (at your option) any later version.
      12             :  *
      13             :  * FFmpeg is distributed in the hope that it will be useful,
      14             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      15             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      16             :  * Lesser General Public License for more details.
      17             :  *
      18             :  * You should have received a copy of the GNU Lesser General Public
      19             :  * License along with FFmpeg; if not, write to the Free Software
      20             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      21             :  */
      22             : 
      23             : #include <stdlib.h>
      24             : #include <string.h>
      25             : 
      26             : #include "libavutil/x86/asm.h"
      27             : #include "libavutil/x86/cpu.h"
      28             : #include "libavutil/cpu.h"
      29             : #include "libavutil/cpu_internal.h"
      30             : 
      31             : #if HAVE_X86ASM
      32             : 
      33             : #define cpuid(index, eax, ebx, ecx, edx)        \
      34             :     ff_cpu_cpuid(index, &eax, &ebx, &ecx, &edx)
      35             : 
      36             : #define xgetbv(index, eax, edx)                 \
      37             :     ff_cpu_xgetbv(index, &eax, &edx)
      38             : 
      39             : #elif HAVE_INLINE_ASM
      40             : 
      41             : /* ebx saving is necessary for PIC. gcc seems unable to see it alone */
      42             : #define cpuid(index, eax, ebx, ecx, edx)                        \
      43             :     __asm__ volatile (                                          \
      44             :         "mov    %%"FF_REG_b", %%"FF_REG_S" \n\t"                \
      45             :         "cpuid                       \n\t"                      \
      46             :         "xchg   %%"FF_REG_b", %%"FF_REG_S                       \
      47             :         : "=a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx)        \
      48             :         : "0" (index), "2"(0))
      49             : 
      50             : #define xgetbv(index, eax, edx)                                 \
      51             :     __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c" (index))
      52             : 
      53             : #define get_eflags(x)                           \
      54             :     __asm__ volatile ("pushfl     \n"           \
      55             :                       "pop    %0  \n"           \
      56             :                       : "=r"(x))
      57             : 
      58             : #define set_eflags(x)                           \
      59             :     __asm__ volatile ("push    %0 \n"           \
      60             :                       "popfl      \n"           \
      61             :                       :: "r"(x))
      62             : 
      63             : #endif /* HAVE_INLINE_ASM */
      64             : 
      65             : #if ARCH_X86_64
      66             : 
      67             : #define cpuid_test() 1
      68             : 
      69             : #elif HAVE_X86ASM
      70             : 
      71             : #define cpuid_test ff_cpu_cpuid_test
      72             : 
      73             : #elif HAVE_INLINE_ASM
      74             : 
      75             : static int cpuid_test(void)
      76             : {
      77             :     x86_reg a, c;
      78             : 
      79             :     /* Check if CPUID is supported by attempting to toggle the ID bit in
      80             :      * the EFLAGS register. */
      81             :     get_eflags(a);
      82             :     set_eflags(a ^ 0x200000);
      83             :     get_eflags(c);
      84             : 
      85             :     return a != c;
      86             : }
      87             : #endif
      88             : 
      89             : /* Function to test if multimedia instructions are supported...  */
      90        6101 : int ff_get_cpu_flags_x86(void)
      91             : {
      92        6101 :     int rval = 0;
      93             : 
      94             : #ifdef cpuid
      95             : 
      96             :     int eax, ebx, ecx, edx;
      97        6101 :     int max_std_level, max_ext_level, std_caps = 0, ext_caps = 0;
      98        6101 :     int family = 0, model = 0;
      99             :     union { int i[3]; char c[12]; } vendor;
     100             : 
     101             :     if (!cpuid_test())
     102             :         return 0; /* CPUID not supported */
     103             : 
     104        6101 :     cpuid(0, max_std_level, vendor.i[0], vendor.i[2], vendor.i[1]);
     105             : 
     106        6101 :     if (max_std_level >= 1) {
     107        6101 :         cpuid(1, eax, ebx, ecx, std_caps);
     108        6101 :         family = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
     109        6101 :         model  = ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0);
     110        6101 :         if (std_caps & (1 << 15))
     111        6101 :             rval |= AV_CPU_FLAG_CMOV;
     112        6101 :         if (std_caps & (1 << 23))
     113        6101 :             rval |= AV_CPU_FLAG_MMX;
     114        6101 :         if (std_caps & (1 << 25))
     115        6101 :             rval |= AV_CPU_FLAG_MMXEXT;
     116             : #if HAVE_SSE
     117        6101 :         if (std_caps & (1 << 25))
     118        6101 :             rval |= AV_CPU_FLAG_SSE;
     119        6101 :         if (std_caps & (1 << 26))
     120        6101 :             rval |= AV_CPU_FLAG_SSE2;
     121        6101 :         if (ecx & 1)
     122        6101 :             rval |= AV_CPU_FLAG_SSE3;
     123        6101 :         if (ecx & 0x00000200 )
     124        6101 :             rval |= AV_CPU_FLAG_SSSE3;
     125        6101 :         if (ecx & 0x00080000 )
     126        6101 :             rval |= AV_CPU_FLAG_SSE4;
     127        6101 :         if (ecx & 0x00100000 )
     128        6101 :             rval |= AV_CPU_FLAG_SSE42;
     129        6101 :         if (ecx & 0x01000000 )
     130        6101 :             rval |= AV_CPU_FLAG_AESNI;
     131             : #if HAVE_AVX
     132             :         /* Check OXSAVE and AVX bits */
     133        6101 :         if ((ecx & 0x18000000) == 0x18000000) {
     134             :             /* Check for OS support */
     135        6101 :             xgetbv(0, eax, edx);
     136        6101 :             if ((eax & 0x6) == 0x6) {
     137        6101 :                 rval |= AV_CPU_FLAG_AVX;
     138        6101 :                 if (ecx & 0x00001000)
     139        6101 :                     rval |= AV_CPU_FLAG_FMA3;
     140             :             }
     141             :         }
     142             : #endif /* HAVE_AVX */
     143             : #endif /* HAVE_SSE */
     144             :     }
     145        6101 :     if (max_std_level >= 7) {
     146        6101 :         cpuid(7, eax, ebx, ecx, edx);
     147             : #if HAVE_AVX2
     148        6101 :         if ((rval & AV_CPU_FLAG_AVX) && (ebx & 0x00000020))
     149        6101 :             rval |= AV_CPU_FLAG_AVX2;
     150             : #endif /* HAVE_AVX2 */
     151             :         /* BMI1/2 don't need OS support */
     152        6101 :         if (ebx & 0x00000008) {
     153        6101 :             rval |= AV_CPU_FLAG_BMI1;
     154        6101 :             if (ebx & 0x00000100)
     155        6101 :                 rval |= AV_CPU_FLAG_BMI2;
     156             :         }
     157             :     }
     158             : 
     159        6101 :     cpuid(0x80000000, max_ext_level, ebx, ecx, edx);
     160             : 
     161        6101 :     if (max_ext_level >= 0x80000001) {
     162        6101 :         cpuid(0x80000001, eax, ebx, ecx, ext_caps);
     163        6101 :         if (ext_caps & (1U << 31))
     164           0 :             rval |= AV_CPU_FLAG_3DNOW;
     165        6101 :         if (ext_caps & (1 << 30))
     166           0 :             rval |= AV_CPU_FLAG_3DNOWEXT;
     167        6101 :         if (ext_caps & (1 << 23))
     168           0 :             rval |= AV_CPU_FLAG_MMX;
     169        6101 :         if (ext_caps & (1 << 22))
     170           0 :             rval |= AV_CPU_FLAG_MMXEXT;
     171             : 
     172        6101 :         if (!strncmp(vendor.c, "AuthenticAMD", 12)) {
     173             :         /* Allow for selectively disabling SSE2 functions on AMD processors
     174             :            with SSE2 support but not SSE4a. This includes Athlon64, some
     175             :            Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
     176             :            than SSE2 often enough to utilize this special-case flag.
     177             :            AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
     178             :            so that SSE2 is used unless explicitly disabled by checking
     179             :            AV_CPU_FLAG_SSE2SLOW. */
     180           0 :             if (rval & AV_CPU_FLAG_SSE2 && !(ecx & 0x00000040))
     181           0 :                 rval |= AV_CPU_FLAG_SSE2SLOW;
     182             : 
     183             :         /* Similar to the above but for AVX functions on AMD processors.
     184             :            This is necessary only for functions using YMM registers on Bulldozer
     185             :            and Jaguar based CPUs as they lack 256-bit execution units. SSE/AVX
     186             :            functions using XMM registers are always faster on them.
     187             :            AV_CPU_FLAG_AVX and AV_CPU_FLAG_AVXSLOW are both set so that AVX is
     188             :            used unless explicitly disabled by checking AV_CPU_FLAG_AVXSLOW. */
     189           0 :             if ((family == 0x15 || family == 0x16) && (rval & AV_CPU_FLAG_AVX))
     190           0 :                 rval |= AV_CPU_FLAG_AVXSLOW;
     191             :         }
     192             : 
     193             :         /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
     194             :          * used unless the OS has AVX support. */
     195        6101 :         if (rval & AV_CPU_FLAG_AVX) {
     196        6101 :             if (ecx & 0x00000800)
     197           0 :                 rval |= AV_CPU_FLAG_XOP;
     198        6101 :             if (ecx & 0x00010000)
     199           0 :                 rval |= AV_CPU_FLAG_FMA4;
     200             :         }
     201             :     }
     202             : 
     203        6101 :     if (!strncmp(vendor.c, "GenuineIntel", 12)) {
     204        6101 :         if (family == 6 && (model == 9 || model == 13 || model == 14)) {
     205             :             /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
     206             :              * 6/14 (core1 "yonah") theoretically support sse2, but it's
     207             :              * usually slower than mmx, so let's just pretend they don't.
     208             :              * AV_CPU_FLAG_SSE2 is disabled and AV_CPU_FLAG_SSE2SLOW is
     209             :              * enabled so that SSE2 is not used unless explicitly enabled
     210             :              * by checking AV_CPU_FLAG_SSE2SLOW. The same situation
     211             :              * applies for AV_CPU_FLAG_SSE3 and AV_CPU_FLAG_SSE3SLOW. */
     212           0 :             if (rval & AV_CPU_FLAG_SSE2)
     213           0 :                 rval ^= AV_CPU_FLAG_SSE2SLOW | AV_CPU_FLAG_SSE2;
     214           0 :             if (rval & AV_CPU_FLAG_SSE3)
     215           0 :                 rval ^= AV_CPU_FLAG_SSE3SLOW | AV_CPU_FLAG_SSE3;
     216             :         }
     217             :         /* The Atom processor has SSSE3 support, which is useful in many cases,
     218             :          * but sometimes the SSSE3 version is slower than the SSE2 equivalent
     219             :          * on the Atom, but is generally faster on other processors supporting
     220             :          * SSSE3. This flag allows for selectively disabling certain SSSE3
     221             :          * functions on the Atom. */
     222        6101 :         if (family == 6 && model == 28)
     223           0 :             rval |= AV_CPU_FLAG_ATOM;
     224             : 
     225             :         /* Conroe has a slow shuffle unit. Check the model number to ensure not
     226             :          * to include crippled low-end Penryns and Nehalems that lack SSE4. */
     227        6101 :         if ((rval & AV_CPU_FLAG_SSSE3) && !(rval & AV_CPU_FLAG_SSE4) &&
     228           0 :             family == 6 && model < 23)
     229           0 :             rval |= AV_CPU_FLAG_SSSE3SLOW;
     230             :     }
     231             : 
     232             : #endif /* cpuid */
     233             : 
     234        6101 :     return rval;
     235             : }

Generated by: LCOV version 1.13