Line data Source code
1 : /* hwf-x86.c - Detect hardware features - x86 part
2 : * Copyright (C) 2007, 2011, 2012 Free Software Foundation, Inc.
3 : * Copyright (C) 2012 Jussi Kivilinna
4 : *
5 : * This file is part of Libgcrypt.
6 : *
7 : * Libgcrypt is free software; you can redistribute it and/or modify
8 : * it under the terms of the GNU Lesser General Public License as
9 : * published by the Free Software Foundation; either version 2.1 of
10 : * the License, or (at your option) any later version.
11 : *
12 : * Libgcrypt is distributed in the hope that it will be useful,
13 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 : * GNU Lesser General Public License for more details.
16 : *
17 : * You should have received a copy of the GNU Lesser General Public
18 : * License along with this program; if not, see <http://www.gnu.org/licenses/>.
19 : */
20 :
21 : #include <config.h>
22 : #include <stdio.h>
23 : #include <stdlib.h>
24 : #include <string.h>
25 : #include <stdarg.h>
26 : #include <unistd.h>
27 :
28 : #include "g10lib.h"
29 : #include "hwf-common.h"
30 :
31 : #if !defined (__i386__) && !defined (__x86_64__)
32 : # error Module build for wrong CPU.
33 : #endif
34 :
35 : /* We use the next macro to decide whether we can test for certain
36 : features. */
37 : #undef HAS_X86_CPUID
38 :
39 : #if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && defined (__GNUC__)
40 : # define HAS_X86_CPUID 1
41 :
42 : static int
43 : is_cpuid_available(void)
44 : {
45 : int has_cpuid = 0;
46 :
47 : /* Detect the CPUID feature by testing some undefined behaviour (16
48 : vs 32 bit pushf/popf). */
49 : asm volatile
50 : ("pushf\n\t" /* Copy flags to EAX. */
51 : "popl %%eax\n\t"
52 : "movl %%eax, %%ecx\n\t" /* Save flags into ECX. */
53 : "xorl $0x200000, %%eax\n\t" /* Toggle ID bit and copy it to the flags. */
54 : "pushl %%eax\n\t"
55 : "popf\n\t"
56 : "pushf\n\t" /* Copy changed flags again to EAX. */
57 : "popl %%eax\n\t"
58 : "pushl %%ecx\n\t" /* Restore flags from ECX. */
59 : "popf\n\t"
60 : "xorl %%eax, %%ecx\n\t" /* Compare flags against saved flags. */
61 : "jz .Lno_cpuid%=\n\t" /* Toggling did not work, thus no CPUID. */
62 : "movl $1, %0\n" /* Worked. true -> HAS_CPUID. */
63 : ".Lno_cpuid%=:\n\t"
64 : : "+r" (has_cpuid)
65 : :
66 : : "%eax", "%ecx", "cc"
67 : );
68 :
69 : return has_cpuid;
70 : }
71 :
72 : static void
73 : get_cpuid(unsigned int in, unsigned int *eax, unsigned int *ebx,
74 : unsigned int *ecx, unsigned int *edx)
75 : {
76 : unsigned int regs[4];
77 :
78 : asm volatile
79 : ("pushl %%ebx\n\t" /* Save GOT register. */
80 : "movl %1, %%ebx\n\t"
81 : "cpuid\n\t"
82 : "movl %%ebx, %1\n\t"
83 : "popl %%ebx\n\t" /* Restore GOT register. */
84 : : "=a" (regs[0]), "=D" (regs[1]), "=c" (regs[2]), "=d" (regs[3])
85 : : "0" (in), "1" (0), "2" (0), "3" (0)
86 : : "cc"
87 : );
88 :
89 : if (eax)
90 : *eax = regs[0];
91 : if (ebx)
92 : *ebx = regs[1];
93 : if (ecx)
94 : *ecx = regs[2];
95 : if (edx)
96 : *edx = regs[3];
97 : }
98 :
99 : #if defined(ENABLE_AVX_SUPPORT) || defined(ENABLE_AVX2_SUPPORT)
100 : static unsigned int
101 : get_xgetbv(void)
102 : {
103 : unsigned int t_eax, t_edx;
104 :
105 : asm volatile
106 : ("xgetbv\n\t"
107 : : "=a" (t_eax), "=d" (t_edx)
108 : : "c" (0)
109 : );
110 :
111 : return t_eax;
112 : }
113 : #endif /* ENABLE_AVX_SUPPORT || ENABLE_AVX2_SUPPORT */
114 :
115 : #endif /* i386 && GNUC */
116 :
117 :
118 : #if defined (__x86_64__) && defined (__GNUC__)
119 : # define HAS_X86_CPUID 1
120 :
121 : static int
122 34 : is_cpuid_available(void)
123 : {
124 34 : return 1;
125 : }
126 :
127 : static void
128 102 : get_cpuid(unsigned int in, unsigned int *eax, unsigned int *ebx,
129 : unsigned int *ecx, unsigned int *edx)
130 : {
131 : unsigned int regs[4];
132 :
133 102 : asm volatile
134 : ("cpuid\n\t"
135 : : "=a" (regs[0]), "=b" (regs[1]), "=c" (regs[2]), "=d" (regs[3])
136 : : "0" (in), "1" (0), "2" (0), "3" (0)
137 : : "cc"
138 : );
139 :
140 102 : if (eax)
141 68 : *eax = regs[0];
142 102 : if (ebx)
143 68 : *ebx = regs[1];
144 102 : if (ecx)
145 68 : *ecx = regs[2];
146 102 : if (edx)
147 34 : *edx = regs[3];
148 102 : }
149 :
150 : #if defined(ENABLE_AVX_SUPPORT) || defined(ENABLE_AVX2_SUPPORT)
151 : static unsigned int
152 34 : get_xgetbv(void)
153 : {
154 : unsigned int t_eax, t_edx;
155 :
156 34 : asm volatile
157 : ("xgetbv\n\t"
158 : : "=a" (t_eax), "=d" (t_edx)
159 : : "c" (0)
160 : );
161 :
162 34 : return t_eax;
163 : }
164 : #endif /* ENABLE_AVX_SUPPORT || ENABLE_AVX2_SUPPORT */
165 :
166 : #endif /* x86-64 && GNUC */
167 :
168 :
169 : #ifdef HAS_X86_CPUID
170 : static unsigned int
171 34 : detect_x86_gnuc (void)
172 : {
173 : union
174 : {
175 : char c[12+1];
176 : unsigned int ui[3];
177 : } vendor_id;
178 : unsigned int features;
179 34 : unsigned int os_supports_avx_avx2_registers = 0;
180 : unsigned int max_cpuid_level;
181 : unsigned int fms, family, model;
182 34 : unsigned int result = 0;
183 34 : unsigned int avoid_vpgather = 0;
184 :
185 : (void)os_supports_avx_avx2_registers;
186 :
187 34 : if (!is_cpuid_available())
188 0 : return 0;
189 :
190 34 : get_cpuid(0, &max_cpuid_level, &vendor_id.ui[0], &vendor_id.ui[2],
191 : &vendor_id.ui[1]);
192 34 : vendor_id.c[12] = 0;
193 :
194 : if (0)
195 : ; /* Just to make "else if" and ifdef macros look pretty. */
196 : #ifdef ENABLE_PADLOCK_SUPPORT
197 34 : else if (!strcmp (vendor_id.c, "CentaurHauls"))
198 : {
199 : /* This is a VIA CPU. Check what PadLock features we have. */
200 :
201 : /* Check for extended centaur (EAX). */
202 0 : get_cpuid(0xC0000000, &features, NULL, NULL, NULL);
203 :
204 : /* Has extended centaur features? */
205 0 : if (features > 0xC0000000)
206 : {
207 : /* Ask for the extended feature flags (EDX). */
208 0 : get_cpuid(0xC0000001, NULL, NULL, NULL, &features);
209 :
210 : /* Test bits 2 and 3 to see whether the RNG exists and is enabled. */
211 0 : if ((features & 0x0C) == 0x0C)
212 0 : result |= HWF_PADLOCK_RNG;
213 :
214 : /* Test bits 6 and 7 to see whether the ACE exists and is enabled. */
215 0 : if ((features & 0xC0) == 0xC0)
216 0 : result |= HWF_PADLOCK_AES;
217 :
218 : /* Test bits 10 and 11 to see whether the PHE exists and is
219 : enabled. */
220 0 : if ((features & 0xC00) == 0xC00)
221 0 : result |= HWF_PADLOCK_SHA;
222 :
223 : /* Test bits 12 and 13 to see whether the MONTMUL exists and is
224 : enabled. */
225 0 : if ((features & 0x3000) == 0x3000)
226 0 : result |= HWF_PADLOCK_MMUL;
227 : }
228 : }
229 : #endif /*ENABLE_PADLOCK_SUPPORT*/
230 34 : else if (!strcmp (vendor_id.c, "GenuineIntel"))
231 : {
232 : /* This is an Intel CPU. */
233 34 : result |= HWF_INTEL_CPU;
234 : }
235 0 : else if (!strcmp (vendor_id.c, "AuthenticAMD"))
236 : {
237 : /* This is an AMD CPU. */
238 : }
239 :
240 : /* Detect Intel features, that might also be supported by other
241 : vendors. */
242 :
243 : /* Get CPU family/model/stepping (EAX) and Intel feature flags (ECX). */
244 34 : get_cpuid(1, &fms, NULL, &features, NULL);
245 :
246 34 : family = ((fms & 0xf00) >> 8) + ((fms & 0xff00000) >> 20);
247 34 : model = ((fms & 0xf0) >> 4) + ((fms & 0xf0000) >> 12);
248 :
249 34 : if ((result & HWF_INTEL_CPU) && family == 6)
250 : {
251 : /* These Intel Core processor models have SHLD/SHRD instruction that
252 : * can do integer rotation faster actual ROL/ROR instructions. */
253 34 : switch (model)
254 : {
255 : case 0x2A:
256 : case 0x2D:
257 : case 0x3A:
258 : case 0x3C:
259 : case 0x3F:
260 : case 0x45:
261 : case 0x46:
262 : case 0x3D:
263 : case 0x4F:
264 : case 0x56:
265 : case 0x47:
266 : case 0x4E:
267 : case 0x5E:
268 : case 0x8E:
269 : case 0x9E:
270 : case 0x55:
271 : case 0x66:
272 34 : result |= HWF_INTEL_FAST_SHLD;
273 34 : break;
274 : }
275 :
276 : /* These Intel Core processors that have AVX2 have slow VPGATHER and
277 : * should be avoided for table-lookup use. */
278 34 : switch (model)
279 : {
280 : case 0x3C:
281 : case 0x3F:
282 : case 0x45:
283 : case 0x46:
284 : /* Haswell */
285 0 : avoid_vpgather |= 1;
286 0 : break;
287 : }
288 34 : }
289 : else
290 : {
291 : /* Avoid VPGATHER for non-Intel CPUs as testing is needed to
292 : * make sure it is fast enough. */
293 :
294 0 : avoid_vpgather |= 1;
295 : }
296 :
297 : #ifdef ENABLE_PCLMUL_SUPPORT
298 : /* Test bit 1 for PCLMUL. */
299 34 : if (features & 0x00000002)
300 34 : result |= HWF_INTEL_PCLMUL;
301 : #endif
302 : /* Test bit 9 for SSSE3. */
303 34 : if (features & 0x00000200)
304 34 : result |= HWF_INTEL_SSSE3;
305 : /* Test bit 19 for SSE4.1. */
306 34 : if (features & 0x00080000)
307 34 : result |= HWF_INTEL_SSE4_1;
308 : #ifdef ENABLE_AESNI_SUPPORT
309 : /* Test bit 25 for AES-NI. */
310 34 : if (features & 0x02000000)
311 34 : result |= HWF_INTEL_AESNI;
312 : #endif /*ENABLE_AESNI_SUPPORT*/
313 : #if defined(ENABLE_AVX_SUPPORT) || defined(ENABLE_AVX2_SUPPORT)
314 : /* Test bit 27 for OSXSAVE (required for AVX/AVX2). */
315 34 : if (features & 0x08000000)
316 : {
317 : /* Check that OS has enabled both XMM and YMM state support. */
318 34 : if ((get_xgetbv() & 0x6) == 0x6)
319 34 : os_supports_avx_avx2_registers = 1;
320 : }
321 : #endif
322 : #ifdef ENABLE_AVX_SUPPORT
323 : /* Test bit 28 for AVX. */
324 34 : if (features & 0x10000000)
325 34 : if (os_supports_avx_avx2_registers)
326 34 : result |= HWF_INTEL_AVX;
327 : #endif /*ENABLE_AVX_SUPPORT*/
328 : #ifdef ENABLE_DRNG_SUPPORT
329 : /* Test bit 30 for RDRAND. */
330 34 : if (features & 0x40000000)
331 0 : result |= HWF_INTEL_RDRAND;
332 : #endif /*ENABLE_DRNG_SUPPORT*/
333 :
334 : /* Check additional Intel feature flags. Early Intel P5 processors report
335 : * too high max_cpuid_level, so don't check level 7 if processor does not
336 : * support SSE3 (as cpuid:7 contains only features for newer processors).
337 : * Source: http://www.sandpile.org/x86/cpuid.htm */
338 34 : if (max_cpuid_level >= 7 && (features & 0x00000001))
339 : {
340 : /* Get CPUID:7 contains further Intel feature flags. */
341 34 : get_cpuid(7, NULL, &features, NULL, NULL);
342 :
343 : /* Test bit 8 for BMI2. */
344 34 : if (features & 0x00000100)
345 0 : result |= HWF_INTEL_BMI2;
346 :
347 : #ifdef ENABLE_AVX2_SUPPORT
348 : /* Test bit 5 for AVX2. */
349 34 : if (features & 0x00000020)
350 0 : if (os_supports_avx_avx2_registers)
351 0 : result |= HWF_INTEL_AVX2;
352 :
353 34 : if ((result & HWF_INTEL_AVX2) && !avoid_vpgather)
354 0 : result |= HWF_INTEL_FAST_VPGATHER;
355 : #endif /*ENABLE_AVX_SUPPORT*/
356 : }
357 :
358 34 : return result;
359 : }
360 : #endif /* HAS_X86_CPUID */
361 :
362 :
363 : unsigned int
364 34 : _gcry_hwf_detect_x86 (void)
365 : {
366 : #if defined (HAS_X86_CPUID)
367 34 : return detect_x86_gnuc ();
368 : #else
369 : return 0;
370 : #endif
371 : }
|