LCOV - code coverage report
Current view: top level - src - hwf-x86.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 58 79 73.4 %
Date: 2017-03-02 16:44:37 Functions: 5 5 100.0 %

          Line data    Source code
       1             : /* hwf-x86.c - Detect hardware features - x86 part
       2             :  * Copyright (C) 2007, 2011, 2012  Free Software Foundation, Inc.
       3             :  * Copyright (C) 2012  Jussi Kivilinna
       4             :  *
       5             :  * This file is part of Libgcrypt.
       6             :  *
       7             :  * Libgcrypt is free software; you can redistribute it and/or modify
       8             :  * it under the terms of the GNU Lesser General Public License as
       9             :  * published by the Free Software Foundation; either version 2.1 of
      10             :  * the License, or (at your option) any later version.
      11             :  *
      12             :  * Libgcrypt is distributed in the hope that it will be useful,
      13             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      14             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      15             :  * GNU Lesser General Public License for more details.
      16             :  *
      17             :  * You should have received a copy of the GNU Lesser General Public
      18             :  * License along with this program; if not, see <http://www.gnu.org/licenses/>.
      19             :  */
      20             : 
      21             : #include <config.h>
      22             : #include <stdio.h>
      23             : #include <stdlib.h>
      24             : #include <string.h>
      25             : #include <stdarg.h>
      26             : #include <unistd.h>
      27             : 
      28             : #include "g10lib.h"
      29             : #include "hwf-common.h"
      30             : 
      31             : #if !defined (__i386__) && !defined (__x86_64__)
      32             : # error Module build for wrong CPU.
      33             : #endif
      34             : 
      35             : /* We use the next macro to decide whether we can test for certain
      36             :    features.  */
      37             : #undef HAS_X86_CPUID
      38             : 
      39             : #if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && defined (__GNUC__)
      40             : # define HAS_X86_CPUID 1
      41             : 
      42             : static int
      43             : is_cpuid_available(void)
      44             : {
      45             :   int has_cpuid = 0;
      46             : 
      47             :   /* Detect the CPUID feature by testing some undefined behaviour (16
      48             :      vs 32 bit pushf/popf). */
      49             :   asm volatile
      50             :     ("pushf\n\t"                 /* Copy flags to EAX.  */
      51             :      "popl %%eax\n\t"
      52             :      "movl %%eax, %%ecx\n\t"     /* Save flags into ECX.  */
      53             :      "xorl $0x200000, %%eax\n\t" /* Toggle ID bit and copy it to the flags.  */
      54             :      "pushl %%eax\n\t"
      55             :      "popf\n\t"
      56             :      "pushf\n\t"                 /* Copy changed flags again to EAX.  */
      57             :      "popl %%eax\n\t"
      58             :      "pushl %%ecx\n\t"           /* Restore flags from ECX.  */
      59             :      "popf\n\t"
      60             :      "xorl %%eax, %%ecx\n\t"     /* Compare flags against saved flags.  */
      61             :      "jz .Lno_cpuid%=\n\t"       /* Toggling did not work, thus no CPUID.  */
      62             :      "movl $1, %0\n"             /* Worked. true -> HAS_CPUID.  */
      63             :      ".Lno_cpuid%=:\n\t"
      64             :      : "+r" (has_cpuid)
      65             :      :
      66             :      : "%eax", "%ecx", "cc"
      67             :      );
      68             : 
      69             :   return has_cpuid;
      70             : }
      71             : 
      72             : static void
      73             : get_cpuid(unsigned int in, unsigned int *eax, unsigned int *ebx,
      74             :           unsigned int *ecx, unsigned int *edx)
      75             : {
      76             :   unsigned int regs[4];
      77             : 
      78             :   asm volatile
      79             :     ("pushl %%ebx\n\t"           /* Save GOT register.  */
      80             :      "movl %1, %%ebx\n\t"
      81             :      "cpuid\n\t"
      82             :      "movl %%ebx, %1\n\t"
      83             :      "popl %%ebx\n\t"            /* Restore GOT register. */
      84             :      : "=a" (regs[0]), "=D" (regs[1]), "=c" (regs[2]), "=d" (regs[3])
      85             :      : "0" (in), "1" (0), "2" (0), "3" (0)
      86             :      : "cc"
      87             :      );
      88             : 
      89             :   if (eax)
      90             :     *eax = regs[0];
      91             :   if (ebx)
      92             :     *ebx = regs[1];
      93             :   if (ecx)
      94             :     *ecx = regs[2];
      95             :   if (edx)
      96             :     *edx = regs[3];
      97             : }
      98             : 
      99             : #if defined(ENABLE_AVX_SUPPORT) || defined(ENABLE_AVX2_SUPPORT)
     100             : static unsigned int
     101             : get_xgetbv(void)
     102             : {
     103             :   unsigned int t_eax, t_edx;
     104             : 
     105             :   asm volatile
     106             :     ("xgetbv\n\t"
     107             :      : "=a" (t_eax), "=d" (t_edx)
     108             :      : "c" (0)
     109             :     );
     110             : 
     111             :   return t_eax;
     112             : }
     113             : #endif /* ENABLE_AVX_SUPPORT || ENABLE_AVX2_SUPPORT */
     114             : 
     115             : #endif /* i386 && GNUC */
     116             : 
     117             : 
     118             : #if defined (__x86_64__) && defined (__GNUC__)
     119             : # define HAS_X86_CPUID 1
     120             : 
     121             : static int
     122          34 : is_cpuid_available(void)
     123             : {
     124          34 :   return 1;
     125             : }
     126             : 
     127             : static void
     128         102 : get_cpuid(unsigned int in, unsigned int *eax, unsigned int *ebx,
     129             :           unsigned int *ecx, unsigned int *edx)
     130             : {
     131             :   unsigned int regs[4];
     132             : 
     133         102 :   asm volatile
     134             :     ("cpuid\n\t"
     135             :      : "=a" (regs[0]), "=b" (regs[1]), "=c" (regs[2]), "=d" (regs[3])
     136             :      : "0" (in), "1" (0), "2" (0), "3" (0)
     137             :      : "cc"
     138             :      );
     139             : 
     140         102 :   if (eax)
     141          68 :     *eax = regs[0];
     142         102 :   if (ebx)
     143          68 :     *ebx = regs[1];
     144         102 :   if (ecx)
     145          68 :     *ecx = regs[2];
     146         102 :   if (edx)
     147          34 :     *edx = regs[3];
     148         102 : }
     149             : 
     150             : #if defined(ENABLE_AVX_SUPPORT) || defined(ENABLE_AVX2_SUPPORT)
     151             : static unsigned int
     152          34 : get_xgetbv(void)
     153             : {
     154             :   unsigned int t_eax, t_edx;
     155             : 
     156          34 :   asm volatile
     157             :     ("xgetbv\n\t"
     158             :      : "=a" (t_eax), "=d" (t_edx)
     159             :      : "c" (0)
     160             :     );
     161             : 
     162          34 :   return t_eax;
     163             : }
     164             : #endif /* ENABLE_AVX_SUPPORT || ENABLE_AVX2_SUPPORT */
     165             : 
     166             : #endif /* x86-64 && GNUC */
     167             : 
     168             : 
     169             : #ifdef HAS_X86_CPUID
     170             : static unsigned int
     171          34 : detect_x86_gnuc (void)
     172             : {
     173             :   union
     174             :   {
     175             :     char c[12+1];
     176             :     unsigned int ui[3];
     177             :   } vendor_id;
     178             :   unsigned int features;
     179          34 :   unsigned int os_supports_avx_avx2_registers = 0;
     180             :   unsigned int max_cpuid_level;
     181             :   unsigned int fms, family, model;
     182          34 :   unsigned int result = 0;
     183          34 :   unsigned int avoid_vpgather = 0;
     184             : 
     185             :   (void)os_supports_avx_avx2_registers;
     186             : 
     187          34 :   if (!is_cpuid_available())
     188           0 :     return 0;
     189             : 
     190          34 :   get_cpuid(0, &max_cpuid_level, &vendor_id.ui[0], &vendor_id.ui[2],
     191             :             &vendor_id.ui[1]);
     192          34 :   vendor_id.c[12] = 0;
     193             : 
     194             :   if (0)
     195             :     ; /* Just to make "else if" and ifdef macros look pretty.  */
     196             : #ifdef ENABLE_PADLOCK_SUPPORT
     197          34 :   else if (!strcmp (vendor_id.c, "CentaurHauls"))
     198             :     {
     199             :       /* This is a VIA CPU.  Check what PadLock features we have.  */
     200             : 
     201             :       /* Check for extended centaur (EAX).  */
     202           0 :       get_cpuid(0xC0000000, &features, NULL, NULL, NULL);
     203             : 
     204             :       /* Has extended centaur features? */
     205           0 :       if (features > 0xC0000000)
     206             :         {
     207             :            /* Ask for the extended feature flags (EDX). */
     208           0 :            get_cpuid(0xC0000001, NULL, NULL, NULL, &features);
     209             : 
     210             :            /* Test bits 2 and 3 to see whether the RNG exists and is enabled. */
     211           0 :            if ((features & 0x0C) == 0x0C)
     212           0 :              result |= HWF_PADLOCK_RNG;
     213             : 
     214             :            /* Test bits 6 and 7 to see whether the ACE exists and is enabled. */
     215           0 :            if ((features & 0xC0) == 0xC0)
     216           0 :              result |= HWF_PADLOCK_AES;
     217             : 
     218             :            /* Test bits 10 and 11 to see whether the PHE exists and is
     219             :               enabled.  */
     220           0 :            if ((features & 0xC00) == 0xC00)
     221           0 :              result |= HWF_PADLOCK_SHA;
     222             : 
     223             :            /* Test bits 12 and 13 to see whether the MONTMUL exists and is
     224             :               enabled.  */
     225           0 :            if ((features & 0x3000) == 0x3000)
     226           0 :              result |= HWF_PADLOCK_MMUL;
     227             :         }
     228             :     }
     229             : #endif /*ENABLE_PADLOCK_SUPPORT*/
     230          34 :   else if (!strcmp (vendor_id.c, "GenuineIntel"))
     231             :     {
     232             :       /* This is an Intel CPU.  */
     233          34 :       result |= HWF_INTEL_CPU;
     234             :     }
     235           0 :   else if (!strcmp (vendor_id.c, "AuthenticAMD"))
     236             :     {
     237             :       /* This is an AMD CPU.  */
     238             :     }
     239             : 
     240             :   /* Detect Intel features, that might also be supported by other
     241             :      vendors.  */
     242             : 
     243             :   /* Get CPU family/model/stepping (EAX) and Intel feature flags (ECX).  */
     244          34 :   get_cpuid(1, &fms, NULL, &features, NULL);
     245             : 
     246          34 :   family = ((fms & 0xf00) >> 8) + ((fms & 0xff00000) >> 20);
     247          34 :   model = ((fms & 0xf0) >> 4) + ((fms & 0xf0000) >> 12);
     248             : 
     249          34 :   if ((result & HWF_INTEL_CPU) && family == 6)
     250             :     {
     251             :       /* These Intel Core processor models have SHLD/SHRD instruction that
     252             :        * can do integer rotation faster actual ROL/ROR instructions. */
     253          34 :       switch (model)
     254             :         {
     255             :         case 0x2A:
     256             :         case 0x2D:
     257             :         case 0x3A:
     258             :         case 0x3C:
     259             :         case 0x3F:
     260             :         case 0x45:
     261             :         case 0x46:
     262             :         case 0x3D:
     263             :         case 0x4F:
     264             :         case 0x56:
     265             :         case 0x47:
     266             :         case 0x4E:
     267             :         case 0x5E:
     268             :         case 0x8E:
     269             :         case 0x9E:
     270             :         case 0x55:
     271             :         case 0x66:
     272          34 :           result |= HWF_INTEL_FAST_SHLD;
     273          34 :           break;
     274             :         }
     275             : 
     276             :       /* These Intel Core processors that have AVX2 have slow VPGATHER and
     277             :        * should be avoided for table-lookup use. */
     278          34 :       switch (model)
     279             :         {
     280             :         case 0x3C:
     281             :         case 0x3F:
     282             :         case 0x45:
     283             :         case 0x46:
     284             :           /* Haswell */
     285           0 :           avoid_vpgather |= 1;
     286           0 :           break;
     287             :         }
     288          34 :     }
     289             :   else
     290             :     {
     291             :       /* Avoid VPGATHER for non-Intel CPUs as testing is needed to
     292             :        * make sure it is fast enough. */
     293             : 
     294           0 :       avoid_vpgather |= 1;
     295             :     }
     296             : 
     297             : #ifdef ENABLE_PCLMUL_SUPPORT
     298             :   /* Test bit 1 for PCLMUL.  */
     299          34 :   if (features & 0x00000002)
     300          34 :      result |= HWF_INTEL_PCLMUL;
     301             : #endif
     302             :   /* Test bit 9 for SSSE3.  */
     303          34 :   if (features & 0x00000200)
     304          34 :      result |= HWF_INTEL_SSSE3;
     305             :   /* Test bit 19 for SSE4.1.  */
     306          34 :   if (features & 0x00080000)
     307          34 :      result |= HWF_INTEL_SSE4_1;
     308             : #ifdef ENABLE_AESNI_SUPPORT
     309             :   /* Test bit 25 for AES-NI.  */
     310          34 :   if (features & 0x02000000)
     311          34 :      result |= HWF_INTEL_AESNI;
     312             : #endif /*ENABLE_AESNI_SUPPORT*/
     313             : #if defined(ENABLE_AVX_SUPPORT) || defined(ENABLE_AVX2_SUPPORT)
     314             :   /* Test bit 27 for OSXSAVE (required for AVX/AVX2).  */
     315          34 :   if (features & 0x08000000)
     316             :     {
     317             :       /* Check that OS has enabled both XMM and YMM state support.  */
     318          34 :       if ((get_xgetbv() & 0x6) == 0x6)
     319          34 :         os_supports_avx_avx2_registers = 1;
     320             :     }
     321             : #endif
     322             : #ifdef ENABLE_AVX_SUPPORT
     323             :   /* Test bit 28 for AVX.  */
     324          34 :   if (features & 0x10000000)
     325          34 :     if (os_supports_avx_avx2_registers)
     326          34 :       result |= HWF_INTEL_AVX;
     327             : #endif /*ENABLE_AVX_SUPPORT*/
     328             : #ifdef ENABLE_DRNG_SUPPORT
     329             :   /* Test bit 30 for RDRAND.  */
     330          34 :   if (features & 0x40000000)
     331           0 :      result |= HWF_INTEL_RDRAND;
     332             : #endif /*ENABLE_DRNG_SUPPORT*/
     333             : 
     334             :   /* Check additional Intel feature flags.  Early Intel P5 processors report
     335             :    * too high max_cpuid_level, so don't check level 7 if processor does not
     336             :    * support SSE3 (as cpuid:7 contains only features for newer processors).
     337             :    * Source: http://www.sandpile.org/x86/cpuid.htm  */
     338          34 :   if (max_cpuid_level >= 7 && (features & 0x00000001))
     339             :     {
     340             :       /* Get CPUID:7 contains further Intel feature flags. */
     341          34 :       get_cpuid(7, NULL, &features, NULL, NULL);
     342             : 
     343             :       /* Test bit 8 for BMI2.  */
     344          34 :       if (features & 0x00000100)
     345           0 :           result |= HWF_INTEL_BMI2;
     346             : 
     347             : #ifdef ENABLE_AVX2_SUPPORT
     348             :       /* Test bit 5 for AVX2.  */
     349          34 :       if (features & 0x00000020)
     350           0 :         if (os_supports_avx_avx2_registers)
     351           0 :           result |= HWF_INTEL_AVX2;
     352             : 
     353          34 :       if ((result & HWF_INTEL_AVX2) && !avoid_vpgather)
     354           0 :         result |= HWF_INTEL_FAST_VPGATHER;
     355             : #endif /*ENABLE_AVX_SUPPORT*/
     356             :     }
     357             : 
     358          34 :   return result;
     359             : }
     360             : #endif /* HAS_X86_CPUID */
     361             : 
     362             : 
     363             : unsigned int
     364          34 : _gcry_hwf_detect_x86 (void)
     365             : {
     366             : #if defined (HAS_X86_CPUID)
     367          34 :   return detect_x86_gnuc ();
     368             : #else
     369             :   return 0;
     370             : #endif
     371             : }

Generated by: LCOV version 1.13