LCOV - code coverage report
Current view: top level - libavutil/x86 - cpu.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 61 89 68.5 %
Date: 2018-05-20 11:54:08 Functions: 1 2 50.0 %

          Line data    Source code
       1             : /*
       2             :  * CPU detection code, extracted from mmx.h
       3             :  * (c)1997-99 by H. Dietz and R. Fisher
       4             :  * Converted to C and improved by Fabrice Bellard.
       5             :  *
       6             :  * This file is part of FFmpeg.
       7             :  *
       8             :  * FFmpeg is free software; you can redistribute it and/or
       9             :  * modify it under the terms of the GNU Lesser General Public
      10             :  * License as published by the Free Software Foundation; either
      11             :  * version 2.1 of the License, or (at your option) any later version.
      12             :  *
      13             :  * FFmpeg is distributed in the hope that it will be useful,
      14             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      15             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      16             :  * Lesser General Public License for more details.
      17             :  *
      18             :  * You should have received a copy of the GNU Lesser General Public
      19             :  * License along with FFmpeg; if not, write to the Free Software
      20             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      21             :  */
      22             : 
      23             : #include <stdlib.h>
      24             : #include <string.h>
      25             : 
      26             : #include "libavutil/x86/asm.h"
      27             : #include "libavutil/x86/cpu.h"
      28             : #include "libavutil/cpu.h"
      29             : #include "libavutil/cpu_internal.h"
      30             : 
      31             : #if HAVE_X86ASM
      32             : 
      33             : #define cpuid(index, eax, ebx, ecx, edx)        \
      34             :     ff_cpu_cpuid(index, &eax, &ebx, &ecx, &edx)
      35             : 
      36             : #define xgetbv(index, eax, edx)                 \
      37             :     ff_cpu_xgetbv(index, &eax, &edx)
      38             : 
      39             : #elif HAVE_INLINE_ASM
      40             : 
      41             : /* ebx saving is necessary for PIC. gcc seems unable to see it alone */
      42             : #define cpuid(index, eax, ebx, ecx, edx)                        \
      43             :     __asm__ volatile (                                          \
      44             :         "mov    %%"FF_REG_b", %%"FF_REG_S" \n\t"                \
      45             :         "cpuid                       \n\t"                      \
      46             :         "xchg   %%"FF_REG_b", %%"FF_REG_S                       \
      47             :         : "=a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx)        \
      48             :         : "0" (index), "2"(0))
      49             : 
      50             : #define xgetbv(index, eax, edx)                                 \
      51             :     __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c" (index))
      52             : 
      53             : #define get_eflags(x)                           \
      54             :     __asm__ volatile ("pushfl     \n"           \
      55             :                       "pop    %0  \n"           \
      56             :                       : "=r"(x))
      57             : 
      58             : #define set_eflags(x)                           \
      59             :     __asm__ volatile ("push    %0 \n"           \
      60             :                       "popfl      \n"           \
      61             :                       :: "r"(x))
      62             : 
      63             : #endif /* HAVE_INLINE_ASM */
      64             : 
      65             : #if ARCH_X86_64
      66             : 
      67             : #define cpuid_test() 1
      68             : 
      69             : #elif HAVE_X86ASM
      70             : 
      71             : #define cpuid_test ff_cpu_cpuid_test
      72             : 
      73             : #elif HAVE_INLINE_ASM
      74             : 
      75             : static int cpuid_test(void)
      76             : {
      77             :     x86_reg a, c;
      78             : 
      79             :     /* Check if CPUID is supported by attempting to toggle the ID bit in
      80             :      * the EFLAGS register. */
      81             :     get_eflags(a);
      82             :     set_eflags(a ^ 0x200000);
      83             :     get_eflags(c);
      84             : 
      85             :     return a != c;
      86             : }
      87             : #endif
      88             : 
      89             : /* Function to test if multimedia instructions are supported...  */
      90        6557 : int ff_get_cpu_flags_x86(void)
      91             : {
      92        6557 :     int rval = 0;
      93             : 
      94             : #ifdef cpuid
      95             : 
      96             :     int eax, ebx, ecx, edx;
      97        6557 :     int max_std_level, max_ext_level, std_caps = 0, ext_caps = 0;
      98        6557 :     int family = 0, model = 0;
      99             :     union { int i[3]; char c[12]; } vendor;
     100        6557 :     int xcr0_lo = 0, xcr0_hi = 0;
     101             : 
     102             :     if (!cpuid_test())
     103             :         return 0; /* CPUID not supported */
     104             : 
     105        6557 :     cpuid(0, max_std_level, vendor.i[0], vendor.i[2], vendor.i[1]);
     106             : 
     107        6557 :     if (max_std_level >= 1) {
     108        6557 :         cpuid(1, eax, ebx, ecx, std_caps);
     109        6557 :         family = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
     110        6557 :         model  = ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0);
     111        6557 :         if (std_caps & (1 << 15))
     112        6557 :             rval |= AV_CPU_FLAG_CMOV;
     113        6557 :         if (std_caps & (1 << 23))
     114        6557 :             rval |= AV_CPU_FLAG_MMX;
     115        6557 :         if (std_caps & (1 << 25))
     116        6557 :             rval |= AV_CPU_FLAG_MMXEXT;
     117             : #if HAVE_SSE
     118        6557 :         if (std_caps & (1 << 25))
     119        6557 :             rval |= AV_CPU_FLAG_SSE;
     120        6557 :         if (std_caps & (1 << 26))
     121        6557 :             rval |= AV_CPU_FLAG_SSE2;
     122        6557 :         if (ecx & 1)
     123        6557 :             rval |= AV_CPU_FLAG_SSE3;
     124        6557 :         if (ecx & 0x00000200 )
     125        6557 :             rval |= AV_CPU_FLAG_SSSE3;
     126        6557 :         if (ecx & 0x00080000 )
     127        6557 :             rval |= AV_CPU_FLAG_SSE4;
     128        6557 :         if (ecx & 0x00100000 )
     129        6557 :             rval |= AV_CPU_FLAG_SSE42;
     130        6557 :         if (ecx & 0x01000000 )
     131        6557 :             rval |= AV_CPU_FLAG_AESNI;
     132             : #if HAVE_AVX
     133             :         /* Check OXSAVE and AVX bits */
     134        6557 :         if ((ecx & 0x18000000) == 0x18000000) {
     135             :             /* Check for OS support */
     136        6557 :             xgetbv(0, xcr0_lo, xcr0_hi);
     137        6557 :             if ((xcr0_lo & 0x6) == 0x6) {
     138        6557 :                 rval |= AV_CPU_FLAG_AVX;
     139        6557 :                 if (ecx & 0x00001000)
     140        6557 :                     rval |= AV_CPU_FLAG_FMA3;
     141             :             }
     142             :         }
     143             : #endif /* HAVE_AVX */
     144             : #endif /* HAVE_SSE */
     145             :     }
     146        6557 :     if (max_std_level >= 7) {
     147        6557 :         cpuid(7, eax, ebx, ecx, edx);
     148             : #if HAVE_AVX2
     149        6557 :         if ((rval & AV_CPU_FLAG_AVX) && (ebx & 0x00000020))
     150        6557 :             rval |= AV_CPU_FLAG_AVX2;
     151             : #if HAVE_AVX512 /* F, CD, BW, DQ, VL */
     152        6557 :         if ((xcr0_lo & 0xe0) == 0xe0) { /* OPMASK/ZMM state */
     153           0 :             if ((rval & AV_CPU_FLAG_AVX2) && (ebx & 0xd0030000) == 0xd0030000)
     154           0 :                 rval |= AV_CPU_FLAG_AVX512;
     155             : 
     156             :         }
     157             : #endif /* HAVE_AVX512 */
     158             : #endif /* HAVE_AVX2 */
     159             :         /* BMI1/2 don't need OS support */
     160        6557 :         if (ebx & 0x00000008) {
     161        6557 :             rval |= AV_CPU_FLAG_BMI1;
     162        6557 :             if (ebx & 0x00000100)
     163        6557 :                 rval |= AV_CPU_FLAG_BMI2;
     164             :         }
     165             :     }
     166             : 
     167        6557 :     cpuid(0x80000000, max_ext_level, ebx, ecx, edx);
     168             : 
     169        6557 :     if (max_ext_level >= 0x80000001) {
     170        6557 :         cpuid(0x80000001, eax, ebx, ecx, ext_caps);
     171        6557 :         if (ext_caps & (1U << 31))
     172           0 :             rval |= AV_CPU_FLAG_3DNOW;
     173        6557 :         if (ext_caps & (1 << 30))
     174           0 :             rval |= AV_CPU_FLAG_3DNOWEXT;
     175        6557 :         if (ext_caps & (1 << 23))
     176           0 :             rval |= AV_CPU_FLAG_MMX;
     177        6557 :         if (ext_caps & (1 << 22))
     178           0 :             rval |= AV_CPU_FLAG_MMXEXT;
     179             : 
     180        6557 :         if (!strncmp(vendor.c, "AuthenticAMD", 12)) {
     181             :         /* Allow for selectively disabling SSE2 functions on AMD processors
     182             :            with SSE2 support but not SSE4a. This includes Athlon64, some
     183             :            Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
     184             :            than SSE2 often enough to utilize this special-case flag.
     185             :            AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
     186             :            so that SSE2 is used unless explicitly disabled by checking
     187             :            AV_CPU_FLAG_SSE2SLOW. */
     188           0 :             if (rval & AV_CPU_FLAG_SSE2 && !(ecx & 0x00000040))
     189           0 :                 rval |= AV_CPU_FLAG_SSE2SLOW;
     190             : 
     191             :         /* Similar to the above but for AVX functions on AMD processors.
     192             :            This is necessary only for functions using YMM registers on Bulldozer
     193             :            and Jaguar based CPUs as they lack 256-bit execution units. SSE/AVX
     194             :            functions using XMM registers are always faster on them.
     195             :            AV_CPU_FLAG_AVX and AV_CPU_FLAG_AVXSLOW are both set so that AVX is
     196             :            used unless explicitly disabled by checking AV_CPU_FLAG_AVXSLOW. */
     197           0 :             if ((family == 0x15 || family == 0x16) && (rval & AV_CPU_FLAG_AVX))
     198           0 :                 rval |= AV_CPU_FLAG_AVXSLOW;
     199             :         }
     200             : 
     201             :         /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
     202             :          * used unless the OS has AVX support. */
     203        6557 :         if (rval & AV_CPU_FLAG_AVX) {
     204        6557 :             if (ecx & 0x00000800)
     205           0 :                 rval |= AV_CPU_FLAG_XOP;
     206        6557 :             if (ecx & 0x00010000)
     207           0 :                 rval |= AV_CPU_FLAG_FMA4;
     208             :         }
     209             :     }
     210             : 
     211        6557 :     if (!strncmp(vendor.c, "GenuineIntel", 12)) {
     212        6557 :         if (family == 6 && (model == 9 || model == 13 || model == 14)) {
     213             :             /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
     214             :              * 6/14 (core1 "yonah") theoretically support sse2, but it's
     215             :              * usually slower than mmx, so let's just pretend they don't.
     216             :              * AV_CPU_FLAG_SSE2 is disabled and AV_CPU_FLAG_SSE2SLOW is
     217             :              * enabled so that SSE2 is not used unless explicitly enabled
     218             :              * by checking AV_CPU_FLAG_SSE2SLOW. The same situation
     219             :              * applies for AV_CPU_FLAG_SSE3 and AV_CPU_FLAG_SSE3SLOW. */
     220           0 :             if (rval & AV_CPU_FLAG_SSE2)
     221           0 :                 rval ^= AV_CPU_FLAG_SSE2SLOW | AV_CPU_FLAG_SSE2;
     222           0 :             if (rval & AV_CPU_FLAG_SSE3)
     223           0 :                 rval ^= AV_CPU_FLAG_SSE3SLOW | AV_CPU_FLAG_SSE3;
     224             :         }
     225             :         /* The Atom processor has SSSE3 support, which is useful in many cases,
     226             :          * but sometimes the SSSE3 version is slower than the SSE2 equivalent
     227             :          * on the Atom, but is generally faster on other processors supporting
     228             :          * SSSE3. This flag allows for selectively disabling certain SSSE3
     229             :          * functions on the Atom. */
     230        6557 :         if (family == 6 && model == 28)
     231           0 :             rval |= AV_CPU_FLAG_ATOM;
     232             : 
     233             :         /* Conroe has a slow shuffle unit. Check the model number to ensure not
     234             :          * to include crippled low-end Penryns and Nehalems that lack SSE4. */
     235        6557 :         if ((rval & AV_CPU_FLAG_SSSE3) && !(rval & AV_CPU_FLAG_SSE4) &&
     236           0 :             family == 6 && model < 23)
     237           0 :             rval |= AV_CPU_FLAG_SSSE3SLOW;
     238             :     }
     239             : 
     240             : #endif /* cpuid */
     241             : 
     242        6557 :     return rval;
     243             : }
     244             : 
     245           0 : size_t ff_get_cpu_max_align_x86(void)
     246             : {
     247           0 :     int flags = av_get_cpu_flags();
     248             : 
     249           0 :     if (flags & AV_CPU_FLAG_AVX512)
     250           0 :         return 64;
     251           0 :     if (flags & (AV_CPU_FLAG_AVX2      |
     252             :                  AV_CPU_FLAG_AVX       |
     253             :                  AV_CPU_FLAG_XOP       |
     254             :                  AV_CPU_FLAG_FMA4      |
     255             :                  AV_CPU_FLAG_FMA3      |
     256             :                  AV_CPU_FLAG_AVXSLOW))
     257           0 :         return 32;
     258           0 :     if (flags & (AV_CPU_FLAG_AESNI     |
     259             :                  AV_CPU_FLAG_SSE42     |
     260             :                  AV_CPU_FLAG_SSE4      |
     261             :                  AV_CPU_FLAG_SSSE3     |
     262             :                  AV_CPU_FLAG_SSE3      |
     263             :                  AV_CPU_FLAG_SSE2      |
     264             :                  AV_CPU_FLAG_SSE       |
     265             :                  AV_CPU_FLAG_ATOM      |
     266             :                  AV_CPU_FLAG_SSSE3SLOW |
     267             :                  AV_CPU_FLAG_SSE3SLOW  |
     268             :                  AV_CPU_FLAG_SSE2SLOW))
     269           0 :         return 16;
     270             : 
     271           0 :     return 8;
     272             : }

Generated by: LCOV version 1.13