summaryrefslogtreecommitdiff
path: root/i386
diff options
context:
space:
mode:
authorSamuel Thibault <samuel.thibault@ens-lyon.org>2009-11-28 17:09:38 +0100
committerSamuel Thibault <samuel.thibault@ens-lyon.org>2009-11-28 17:09:38 +0100
commit9e608d32e9e85246decd868f326efde66108c7f3 (patch)
tree5fdbd40c0b0833c86447c382a8daf82e1f4ac4a5 /i386
parent77c3d3e9505853ea2d5e1b68f7c7e6c2481cccaf (diff)
Add XMM FPU registers save/restore support.
* i386/include/mach/i386/fp_reg.h (struct i386_fp_regs): Invert array indices. (struct i386_xfp_save): New structure. (FP_387X): New macro. * i386/i386/thread.h (struct i386_fpsave_state): Add xfp_save_state member, keep existing fp_save_state and fp_regs members in an unnamed union member. Move fp_valid member to the end of the structure. * i386/i386/fpu.h (fxsave, fxrstor): New macros. (fpu_save_context): Use fxsave() when FPU is FXSR-capable. * i386/i386/fpu.c: Include <i386/locore.h> (mxcsr_feature_mask): New variable. (fp_save, fp_load): Add declaration. (init_fpu): Add FXSR-capable FPU detection. (fpu_module_init): Request 16-byte alignment to zinit() for i386_fpsave_state structures. (twd_i387_to_fxsr, twd_fxsr_to_i387): New functions. (fpu_set_state): Convert FPU state when FPU is FXSR-capable. (fpu_get_state): Convert FPU state when FPU is FXSR-capable. (fpexterrflt): Pass to i386_exception either xfp_save_state or fp_save_state according to FPU type. (fpastintr): Likewise. (fp_load): Likewise. Use fxrstor() when FPU is FXSR-capable. (fp_save): Use fxsave() when FPU is FXSR-capable. (fp_state_alloc): Add FXSR-aware initialization.
Diffstat (limited to 'i386')
-rw-r--r--i386/i386/fpu.c208
-rw-r--r--i386/i386/fpu.h11
-rw-r--r--i386/i386/thread.h9
-rw-r--r--i386/include/mach/i386/fp_reg.h23
-rw-r--r--i386/include/mach/i386/thread_status.h2
5 files changed, 217 insertions, 36 deletions
diff --git a/i386/i386/fpu.c b/i386/i386/fpu.c
index 8679816..109d0d7 100644
--- a/i386/i386/fpu.c
+++ b/i386/i386/fpu.c
@@ -23,6 +23,15 @@
* any improvements or extensions that they make and grant Carnegie Mellon
* the rights to redistribute these changes.
*/
+
+/*
+ * Copyright (C) 1994 Linus Torvalds
+ *
+ * Pentium III FXSR, SSE support
+ * General FPU state handling cleanups
+ * Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
/*
* Support for 80387 floating point or FP emulator.
*/
@@ -43,6 +52,7 @@
#include <i386/thread.h>
#include <i386/fpu.h>
#include <i386/pio.h>
+#include <i386/locore.h>
#include "cpu_number.h"
#if 0
@@ -63,6 +73,10 @@ extern void i386_exception();
int fp_kind = FP_387; /* 80387 present */
zone_t ifps_zone; /* zone for FPU save area */
+static unsigned long mxcsr_feature_mask = 0xffffffff; /* Always AND user-provided mxcsr with this security mask */
+
+void fp_save(thread_t thread);
+void fp_load(thread_t thread);
#if NCPUS == 1
volatile thread_t fp_thread = THREAD_NULL;
@@ -134,7 +148,20 @@ init_fpu()
/*
* We have a 387.
*/
- fp_kind = FP_387;
+ if (CPU_HAS_FEATURE(CPU_FEATURE_FXSR)) {
+ static /* because we _need_ alignment */
+ struct i386_xfp_save save;
+ unsigned long mask;
+ fp_kind = FP_387X;
+ printf("Enabling FXSR\n");
+ set_cr4(get_cr4() | CR4_OSFXSR);
+ fxsave(&save);
+ mask = save.fp_mxcsr_mask;
+ if (!mask)
+ mask = 0x0000ffbf;
+ mxcsr_feature_mask &= mask;
+ } else
+ fp_kind = FP_387;
}
/*
* Trap wait instructions. Turn off FPU for now.
@@ -155,7 +182,7 @@ init_fpu()
void
fpu_module_init()
{
- ifps_zone = zinit(sizeof(struct i386_fpsave_state), 0,
+ ifps_zone = zinit(sizeof(struct i386_fpsave_state), 16,
THREAD_MAX * sizeof(struct i386_fpsave_state),
THREAD_CHUNK * sizeof(struct i386_fpsave_state),
0, "i386 fpsave state");
@@ -186,6 +213,73 @@ ASSERT_IPL(SPL0);
zfree(ifps_zone, (vm_offset_t) fps);
}
+/* The two following functions were stolen from Linux's i387.c */
+static inline unsigned short
+twd_i387_to_fxsr (unsigned short twd)
+{
+ unsigned int tmp; /* to avoid 16 bit prefixes in the code */
+
+ /* Transform each pair of bits into 01 (valid) or 00 (empty) */
+ tmp = ~twd;
+ tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
+ /* and move the valid bits to the lower byte. */
+ tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
+ tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
+ tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
+ return tmp;
+}
+
+static inline unsigned long
+twd_fxsr_to_i387 (struct i386_xfp_save *fxsave)
+{
+ struct {
+ unsigned short significand[4];
+ unsigned short exponent;
+ unsigned short padding[3];
+ } *st = NULL;
+ unsigned long tos = (fxsave->fp_status >> 11) & 7;
+ unsigned long twd = (unsigned long) fxsave->fp_tag;
+ unsigned long tag;
+ unsigned long ret = 0xffff0000u;
+ int i;
+
+#define FPREG_ADDR(f, n) ((void *)&(f)->fp_reg_word + (n) * 16);
+
+ for (i = 0 ; i < 8 ; i++) {
+ if (twd & 0x1) {
+ st = FPREG_ADDR (fxsave, (i - tos) & 7);
+
+ switch (st->exponent & 0x7fff) {
+ case 0x7fff:
+ tag = 2; /* Special */
+ break;
+ case 0x0000:
+ if (!st->significand[0] &&
+ !st->significand[1] &&
+ !st->significand[2] &&
+ !st->significand[3] ) {
+ tag = 1; /* Zero */
+ } else {
+ tag = 2; /* Special */
+ }
+ break;
+ default:
+ if (st->significand[3] & 0x8000) {
+ tag = 0; /* Valid */
+ } else {
+ tag = 2; /* Special */
+ }
+ break;
+ }
+ } else {
+ tag = 3; /* Empty */
+ }
+ ret |= (tag << (2 * i));
+ twd = twd >> 1;
+ }
+ return ret;
+}
+
/*
* Set the floating-point state for a thread.
* If the thread is not the current thread, it is
@@ -264,16 +358,30 @@ ASSERT_IPL(SPL0);
*/
memset(&ifps->fp_save_state, 0, sizeof(struct i386_fp_save));
- ifps->fp_save_state.fp_control = user_fp_state->fp_control;
- ifps->fp_save_state.fp_status = user_fp_state->fp_status;
- ifps->fp_save_state.fp_tag = user_fp_state->fp_tag;
- ifps->fp_save_state.fp_eip = user_fp_state->fp_eip;
- ifps->fp_save_state.fp_cs = user_fp_state->fp_cs;
- ifps->fp_save_state.fp_opcode = user_fp_state->fp_opcode;
- ifps->fp_save_state.fp_dp = user_fp_state->fp_dp;
- ifps->fp_save_state.fp_ds = user_fp_state->fp_ds;
- ifps->fp_regs = *user_fp_regs;
- ifps->fp_valid = TRUE;
+ if (fp_kind == FP_387X) {
+ int i;
+
+ ifps->xfp_save_state.fp_control = user_fp_state->fp_control;
+ ifps->xfp_save_state.fp_status = user_fp_state->fp_status;
+ ifps->xfp_save_state.fp_tag = twd_i387_to_fxsr(user_fp_state->fp_tag);
+ ifps->xfp_save_state.fp_eip = user_fp_state->fp_eip;
+ ifps->xfp_save_state.fp_cs = user_fp_state->fp_cs;
+ ifps->xfp_save_state.fp_opcode = user_fp_state->fp_opcode;
+ ifps->xfp_save_state.fp_dp = user_fp_state->fp_dp;
+ ifps->xfp_save_state.fp_ds = user_fp_state->fp_ds;
+ for (i=0; i<8; i++)
+ memcpy(&ifps->xfp_save_state.fp_reg_word[i], &user_fp_regs[i], sizeof(user_fp_regs[i]));
+ } else {
+ ifps->fp_save_state.fp_control = user_fp_state->fp_control;
+ ifps->fp_save_state.fp_status = user_fp_state->fp_status;
+ ifps->fp_save_state.fp_tag = user_fp_state->fp_tag;
+ ifps->fp_save_state.fp_eip = user_fp_state->fp_eip;
+ ifps->fp_save_state.fp_cs = user_fp_state->fp_cs;
+ ifps->fp_save_state.fp_opcode = user_fp_state->fp_opcode;
+ ifps->fp_save_state.fp_dp = user_fp_state->fp_dp;
+ ifps->fp_save_state.fp_ds = user_fp_state->fp_ds;
+ ifps->fp_regs = *user_fp_regs;
+ }
simple_unlock(&pcb->lock);
if (new_ifps != 0)
@@ -343,15 +451,30 @@ ASSERT_IPL(SPL0);
*/
memset(user_fp_state, 0, sizeof(struct i386_fp_save));
- user_fp_state->fp_control = ifps->fp_save_state.fp_control;
- user_fp_state->fp_status = ifps->fp_save_state.fp_status;
- user_fp_state->fp_tag = ifps->fp_save_state.fp_tag;
- user_fp_state->fp_eip = ifps->fp_save_state.fp_eip;
- user_fp_state->fp_cs = ifps->fp_save_state.fp_cs;
- user_fp_state->fp_opcode = ifps->fp_save_state.fp_opcode;
- user_fp_state->fp_dp = ifps->fp_save_state.fp_dp;
- user_fp_state->fp_ds = ifps->fp_save_state.fp_ds;
- *user_fp_regs = ifps->fp_regs;
+ if (fp_kind == FP_387X) {
+ int i;
+
+ user_fp_state->fp_control = ifps->xfp_save_state.fp_control;
+ user_fp_state->fp_status = ifps->xfp_save_state.fp_status;
+ user_fp_state->fp_tag = twd_fxsr_to_i387(&ifps->xfp_save_state);
+ user_fp_state->fp_eip = ifps->xfp_save_state.fp_eip;
+ user_fp_state->fp_cs = ifps->xfp_save_state.fp_cs;
+ user_fp_state->fp_opcode = ifps->xfp_save_state.fp_opcode;
+ user_fp_state->fp_dp = ifps->xfp_save_state.fp_dp;
+ user_fp_state->fp_ds = ifps->xfp_save_state.fp_ds;
+ for (i=0; i<8; i++)
+ memcpy(&user_fp_regs[i], &ifps->xfp_save_state.fp_reg_word[i], sizeof(user_fp_regs[i]));
+ } else {
+ user_fp_state->fp_control = ifps->fp_save_state.fp_control;
+ user_fp_state->fp_status = ifps->fp_save_state.fp_status;
+ user_fp_state->fp_tag = ifps->fp_save_state.fp_tag;
+ user_fp_state->fp_eip = ifps->fp_save_state.fp_eip;
+ user_fp_state->fp_cs = ifps->fp_save_state.fp_cs;
+ user_fp_state->fp_opcode = ifps->fp_save_state.fp_opcode;
+ user_fp_state->fp_dp = ifps->fp_save_state.fp_dp;
+ user_fp_state->fp_ds = ifps->fp_save_state.fp_ds;
+ *user_fp_regs = ifps->fp_regs;
+ }
}
simple_unlock(&pcb->lock);
@@ -546,7 +669,9 @@ fpexterrflt()
*/
i386_exception(EXC_ARITHMETIC,
EXC_I386_EXTERR,
- thread->pcb->ims.ifps->fp_save_state.fp_status);
+ fp_kind == FP_387X ?
+ thread->pcb->ims.ifps->xfp_save_state.fp_status :
+ thread->pcb->ims.ifps->fp_save_state.fp_status);
/*NOTREACHED*/
}
@@ -601,7 +726,9 @@ ASSERT_IPL(SPL0);
*/
i386_exception(EXC_ARITHMETIC,
EXC_I386_EXTERR,
- thread->pcb->ims.ifps->fp_save_state.fp_status);
+ fp_kind == FP_387X ?
+ thread->pcb->ims.ifps->xfp_save_state.fp_status :
+ thread->pcb->ims.ifps->fp_save_state.fp_status);
/*NOTREACHED*/
}
@@ -623,7 +750,10 @@ fp_save(thread)
if (ifps != 0 && !ifps->fp_valid) {
/* registers are in FPU */
ifps->fp_valid = TRUE;
- fnsave(&ifps->fp_save_state);
+ if (fp_kind == FP_387X)
+ fxsave(&ifps->xfp_save_state);
+ else
+ fnsave(&ifps->fp_save_state);
}
}
@@ -664,14 +794,19 @@ ASSERT_IPL(SPL0);
*/
i386_exception(EXC_ARITHMETIC,
EXC_I386_EXTERR,
- thread->pcb->ims.ifps->fp_save_state.fp_status);
+ fp_kind == FP_387X ?
+ thread->pcb->ims.ifps->xfp_save_state.fp_status :
+ thread->pcb->ims.ifps->fp_save_state.fp_status);
/*NOTREACHED*/
#endif
} else if (! ifps->fp_valid) {
printf("fp_load: invalid FPU state!\n");
fninit ();
} else {
- frstor(ifps->fp_save_state);
+ if (fp_kind == FP_387X)
+ fxrstor(ifps->xfp_save_state);
+ else
+ frstor(ifps->fp_save_state);
}
ifps->fp_valid = FALSE; /* in FPU */
}
@@ -693,11 +828,22 @@ fp_state_alloc()
pcb->ims.ifps = ifps;
ifps->fp_valid = TRUE;
- ifps->fp_save_state.fp_control = (0x037f
- & ~(FPC_IM|FPC_ZM|FPC_OM|FPC_PC))
- | (FPC_PC_53|FPC_IC_AFF);
- ifps->fp_save_state.fp_status = 0;
- ifps->fp_save_state.fp_tag = 0xffff; /* all empty */
+
+ if (fp_kind == FP_387X) {
+ ifps->xfp_save_state.fp_control = (0x037f
+ & ~(FPC_IM|FPC_ZM|FPC_OM|FPC_PC))
+ | (FPC_PC_53|FPC_IC_AFF);
+ ifps->xfp_save_state.fp_status = 0;
+ ifps->xfp_save_state.fp_tag = 0xffff; /* all empty */
+ if (CPU_HAS_FEATURE(CPU_FEATURE_SSE))
+ ifps->xfp_save_state.fp_mxcsr = 0x1f80;
+ } else {
+ ifps->fp_save_state.fp_control = (0x037f
+ & ~(FPC_IM|FPC_ZM|FPC_OM|FPC_PC))
+ | (FPC_PC_53|FPC_IC_AFF);
+ ifps->fp_save_state.fp_status = 0;
+ ifps->fp_save_state.fp_tag = 0xffff; /* all empty */
+ }
}
#if AT386
diff --git a/i386/i386/fpu.h b/i386/i386/fpu.h
index 7efb7e2..1a1b61f 100644
--- a/i386/i386/fpu.h
+++ b/i386/i386/fpu.h
@@ -67,6 +67,12 @@
#define frstor(state) \
asm volatile("frstor %0" : : "m" (state))
+#define fxsave(state) \
+ asm volatile("fxsave %0" : "=m" (*state))
+
+#define fxrstor(state) \
+ asm volatile("fxrstor %0" : : "m" (state))
+
#define fwait() \
asm("fwait");
@@ -86,7 +92,10 @@
if (ifps != 0 && !ifps->fp_valid) { \
/* registers are in FPU - save to memory */ \
ifps->fp_valid = TRUE; \
- fnsave(&ifps->fp_save_state); \
+ if (fp_kind == FP_387X) \
+ fxsave(&ifps->xfp_save_state); \
+ else \
+ fnsave(&ifps->fp_save_state); \
set_ts(); \
} \
}
diff --git a/i386/i386/thread.h b/i386/i386/thread.h
index 76aa1ef..f2ae8bf 100644
--- a/i386/i386/thread.h
+++ b/i386/i386/thread.h
@@ -111,9 +111,14 @@ struct i386_kernel_state {
*/
struct i386_fpsave_state {
+ union {
+ struct {
+ struct i386_fp_save fp_save_state;
+ struct i386_fp_regs fp_regs;
+ };
+ struct i386_xfp_save xfp_save_state;
+ };
boolean_t fp_valid;
- struct i386_fp_save fp_save_state;
- struct i386_fp_regs fp_regs;
};
/*
diff --git a/i386/include/mach/i386/fp_reg.h b/i386/include/mach/i386/fp_reg.h
index 6fe7af5..5673055 100644
--- a/i386/include/mach/i386/fp_reg.h
+++ b/i386/include/mach/i386/fp_reg.h
@@ -46,10 +46,30 @@ struct i386_fp_save {
};
struct i386_fp_regs {
- unsigned short fp_reg_word[5][8];
+ unsigned short fp_reg_word[8][5];
/* space for 8 80-bit FP registers */
};
+struct i386_xfp_save {
+ unsigned short fp_control; /* control */
+ unsigned short fp_status; /* status */
+ unsigned short fp_tag; /* register tags */
+ unsigned short fp_opcode; /* opcode of failed instruction */
+ unsigned int fp_eip; /* eip at failed instruction */
+ unsigned short fp_cs; /* cs at failed instruction */
+ unsigned short fp_unused_1;
+ unsigned int fp_dp; /* data address */
+ unsigned short fp_ds; /* data segment */
+ unsigned short fp_unused_2;
+ unsigned int fp_mxcsr; /* MXCSR */
+ unsigned int fp_mxcsr_mask; /* MXCSR_MASK */
+ unsigned char fp_reg_word[8][16];
+ /* space for 8 128-bit FP registers */
+ unsigned char fp_xreg_word[8][16];
+ /* space for 8 128-bit XMM registers */
+ unsigned int padding[56];
+} __attribute__((aligned(16)));
+
/*
* Control register
*/
@@ -104,5 +124,6 @@ struct i386_fp_regs {
#define FP_SOFT 1 /* software FP emulator */
#define FP_287 2 /* 80287 */
#define FP_387 3 /* 80387 or 80486 */
+#define FP_387X 4 /* FXSAVE/RSTOR-capable */
#endif /* _MACH_I386_FP_REG_H_ */
diff --git a/i386/include/mach/i386/thread_status.h b/i386/include/mach/i386/thread_status.h
index cc3dc66..5f20355 100644
--- a/i386/include/mach/i386/thread_status.h
+++ b/i386/include/mach/i386/thread_status.h
@@ -111,7 +111,7 @@ struct i386_thread_state {
(sizeof (struct i386_fp_save) + sizeof (struct i386_fp_regs))
struct i386_float_state {
- int fpkind; /* FP_NO..FP_387 (readonly) */
+ int fpkind; /* FP_NO..FP_387X (readonly) */
int initialized;
unsigned char hw_state[FP_STATE_BYTES]; /* actual "hardware" state */
int exc_status; /* exception status (readonly) */