From 6765f83300116cdd1b3493f9c2e76051446e6bfc Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Mon, 13 Jul 2009 01:36:16 +0000 Subject: * debian/patches/20_mmx_support.patch: rename to 20_xmm_support.patch and fix comment: the added support is XMM registers, not MMX. --- debian/changelog | 2 + debian/patches/20_mmx_support.patch | 414 ------------------------------------ debian/patches/20_xmm_support.patch | 414 ++++++++++++++++++++++++++++++++++++ debian/patches/series | 2 +- 4 files changed, 417 insertions(+), 415 deletions(-) delete mode 100644 debian/patches/20_mmx_support.patch create mode 100644 debian/patches/20_xmm_support.patch diff --git a/debian/changelog b/debian/changelog index 97ab67b..3eecc0b 100644 --- a/debian/changelog +++ b/debian/changelog @@ -8,6 +8,8 @@ gnumach (2:1.3.99.dfsg.cvs20090220-2) UNRELEASED; urgency=low [ Samuel Thibault ] * debian/patches/14_alloc_params.patch: reserve 128MiB for virtual memory space. Fixes boot with more than ~900MiB. + * debian/patches/20_mmx_support.patch: rename to 20_xmm_support.patch and + fix comment: the added support is XMM registers, not MMX. -- Guillem Jover Mon, 08 Jun 2009 22:23:13 +0200 diff --git a/debian/patches/20_mmx_support.patch b/debian/patches/20_mmx_support.patch deleted file mode 100644 index 692a57f..0000000 --- a/debian/patches/20_mmx_support.patch +++ /dev/null @@ -1,414 +0,0 @@ -2007-03-03 Samuel Thibault - - Add MMX FPU registers save/restore support. - * i386/include/mach/i386/fp_reg.h (struct i386_fp_regs): Invert array - indices. - (struct i386_xfp_save): New structure. - (FP_387X): New macro. - * i386/i386/thread.h (struct i386_fpsave_state): Add xfp_save_state - member, keep existing fp_save_state and fp_regs members in an unnamed - union member. Move fp_valid member to the end of the structure. - * i386/i386/fpu.h (fxsave, fxrstor): New macros. - (fpu_save_context): Use fxsave() when FPU is FXSR-capable. - * i386/i386/fpu.c: Include - (fp_save, fp_load): Add declaration. - (init_fpu): Add FXSR-capable FPU detection. - (fpu_module_init): Request 16-byte alignment to zinit() for - i386_fpsave_state structures. - (fpu_set_state): Convert FPU state when FPU is FXSR-capable. Free the - just-allocated ifps, not the one currently in use. - (fpu_get_state): Convert FPU state when FPU is FXSR-capable. - (fp_save): Use fxsave() when FPU is FXSR-capable. - (fp_load): Use fxrstor() when FPU is FXSR-capable. - (fp_state_alloc): Add FXSR-aware initialization. - - -Index: b/i386/i386/fpu.c -=================================================================== ---- a/i386/i386/fpu.c -+++ b/i386/i386/fpu.c -@@ -43,6 +43,7 @@ - #include - #include - #include -+#include - #include "cpu_number.h" - - #if 0 -@@ -63,6 +64,10 @@ extern void i386_exception(); - - int fp_kind = FP_387; /* 80387 present */ - zone_t ifps_zone; /* zone for FPU save area */ -+static unsigned long mxcsr_feature_mask = 0xffffffff; /* Always AND user-provided mxcsr with this security mask */ -+ -+void fp_save(thread_t thread); -+void fp_load(thread_t thread); - - #if NCPUS == 1 - volatile thread_t fp_thread = THREAD_NULL; -@@ -130,7 +135,20 @@ init_fpu() - /* - * We have a 387. - */ -- fp_kind = FP_387; -+ if (CPU_HAS_FEATURE(CPU_FEATURE_FXSR)) { -+ static /* because we _need_ alignment */ -+ struct i386_xfp_save save; -+ unsigned long mask; -+ fp_kind = FP_387X; -+ printf("Enabling FXSR\n"); -+ set_cr4(get_cr4() | CR4_OSFXSR); -+ fxsave(&save); -+ mask = save.fp_mxcsr_mask; -+ if (!mask) -+ mask = 0x0000ffbf; -+ mxcsr_feature_mask &= mask; -+ } else -+ fp_kind = FP_387; - } - /* - * Trap wait instructions. Turn off FPU for now. -@@ -152,7 +170,7 @@ init_fpu() - void - fpu_module_init() - { -- ifps_zone = zinit(sizeof(struct i386_fpsave_state), 0, -+ ifps_zone = zinit(sizeof(struct i386_fpsave_state), 16, - THREAD_MAX * sizeof(struct i386_fpsave_state), - THREAD_CHUNK * sizeof(struct i386_fpsave_state), - 0, "i386 fpsave state"); -@@ -183,6 +201,74 @@ ASSERT_IPL(SPL0); - zfree(ifps_zone, (vm_offset_t) fps); - } - -+/* The two following functions were stolen from Linux, and hence are covered -+ * by the GPL */ -+static inline unsigned short -+twd_i387_to_fxsr (unsigned short twd) -+{ -+ unsigned int tmp; /* to avoid 16 bit prefixes in the code */ -+ -+ /* Transform each pair of bits into 01 (valid) or 00 (empty) */ -+ tmp = ~twd; -+ tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ -+ /* and move the valid bits to the lower byte. */ -+ tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ -+ tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ -+ tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ -+ return tmp; -+} -+ -+static inline unsigned long -+twd_fxsr_to_i387 (struct i386_xfp_save *fxsave) -+{ -+ struct { -+ unsigned short significand[4]; -+ unsigned short exponent; -+ unsigned short padding[3]; -+ } *st = NULL; -+ unsigned long tos = (fxsave->fp_status >> 11) & 7; -+ unsigned long twd = (unsigned long) fxsave->fp_tag; -+ unsigned long tag; -+ unsigned long ret = 0xffff0000u; -+ int i; -+ -+#define FPREG_ADDR(f, n) ((void *)&(f)->fp_reg_word + (n) * 16); -+ -+ for (i = 0 ; i < 8 ; i++) { -+ if (twd & 0x1) { -+ st = FPREG_ADDR (fxsave, (i - tos) & 7); -+ -+ switch (st->exponent & 0x7fff) { -+ case 0x7fff: -+ tag = 2; /* Special */ -+ break; -+ case 0x0000: -+ if (!st->significand[0] && -+ !st->significand[1] && -+ !st->significand[2] && -+ !st->significand[3] ) { -+ tag = 1; /* Zero */ -+ } else { -+ tag = 2; /* Special */ -+ } -+ break; -+ default: -+ if (st->significand[3] & 0x8000) { -+ tag = 0; /* Valid */ -+ } else { -+ tag = 2; /* Special */ -+ } -+ break; -+ } -+ } else { -+ tag = 3; /* Empty */ -+ } -+ ret |= (tag << (2 * i)); -+ twd = twd >> 1; -+ } -+ return ret; -+} -+ - /* - * Set the floating-point state for a thread. - * If the thread is not the current thread, it is -@@ -261,16 +347,30 @@ ASSERT_IPL(SPL0); - */ - memset(&ifps->fp_save_state, 0, sizeof(struct i386_fp_save)); - -- ifps->fp_save_state.fp_control = user_fp_state->fp_control; -- ifps->fp_save_state.fp_status = user_fp_state->fp_status; -- ifps->fp_save_state.fp_tag = user_fp_state->fp_tag; -- ifps->fp_save_state.fp_eip = user_fp_state->fp_eip; -- ifps->fp_save_state.fp_cs = user_fp_state->fp_cs; -- ifps->fp_save_state.fp_opcode = user_fp_state->fp_opcode; -- ifps->fp_save_state.fp_dp = user_fp_state->fp_dp; -- ifps->fp_save_state.fp_ds = user_fp_state->fp_ds; -- ifps->fp_regs = *user_fp_regs; -- ifps->fp_valid = TRUE; -+ if (fp_kind == FP_387X) { -+ int i; -+ -+ ifps->xfp_save_state.fp_control = user_fp_state->fp_control; -+ ifps->xfp_save_state.fp_status = user_fp_state->fp_status; -+ ifps->xfp_save_state.fp_tag = twd_i387_to_fxsr(user_fp_state->fp_tag); -+ ifps->xfp_save_state.fp_eip = user_fp_state->fp_eip; -+ ifps->xfp_save_state.fp_cs = user_fp_state->fp_cs; -+ ifps->xfp_save_state.fp_opcode = user_fp_state->fp_opcode; -+ ifps->xfp_save_state.fp_dp = user_fp_state->fp_dp; -+ ifps->xfp_save_state.fp_ds = user_fp_state->fp_ds; -+ for (i=0; i<8; i++) -+ memcpy(&ifps->xfp_save_state.fp_reg_word[i], &user_fp_regs[i], sizeof(user_fp_regs[i])); -+ } else { -+ ifps->fp_save_state.fp_control = user_fp_state->fp_control; -+ ifps->fp_save_state.fp_status = user_fp_state->fp_status; -+ ifps->fp_save_state.fp_tag = user_fp_state->fp_tag; -+ ifps->fp_save_state.fp_eip = user_fp_state->fp_eip; -+ ifps->fp_save_state.fp_cs = user_fp_state->fp_cs; -+ ifps->fp_save_state.fp_opcode = user_fp_state->fp_opcode; -+ ifps->fp_save_state.fp_dp = user_fp_state->fp_dp; -+ ifps->fp_save_state.fp_ds = user_fp_state->fp_ds; -+ ifps->fp_regs = *user_fp_regs; -+ } - - simple_unlock(&pcb->lock); - if (new_ifps != 0) -@@ -340,15 +440,30 @@ ASSERT_IPL(SPL0); - */ - memset(user_fp_state, 0, sizeof(struct i386_fp_save)); - -- user_fp_state->fp_control = ifps->fp_save_state.fp_control; -- user_fp_state->fp_status = ifps->fp_save_state.fp_status; -- user_fp_state->fp_tag = ifps->fp_save_state.fp_tag; -- user_fp_state->fp_eip = ifps->fp_save_state.fp_eip; -- user_fp_state->fp_cs = ifps->fp_save_state.fp_cs; -- user_fp_state->fp_opcode = ifps->fp_save_state.fp_opcode; -- user_fp_state->fp_dp = ifps->fp_save_state.fp_dp; -- user_fp_state->fp_ds = ifps->fp_save_state.fp_ds; -- *user_fp_regs = ifps->fp_regs; -+ if (fp_kind == FP_387X) { -+ int i; -+ -+ user_fp_state->fp_control = ifps->xfp_save_state.fp_control; -+ user_fp_state->fp_status = ifps->xfp_save_state.fp_status; -+ user_fp_state->fp_tag = twd_fxsr_to_i387(&ifps->xfp_save_state); -+ user_fp_state->fp_eip = ifps->xfp_save_state.fp_eip; -+ user_fp_state->fp_cs = ifps->xfp_save_state.fp_cs; -+ user_fp_state->fp_opcode = ifps->xfp_save_state.fp_opcode; -+ user_fp_state->fp_dp = ifps->xfp_save_state.fp_dp; -+ user_fp_state->fp_ds = ifps->xfp_save_state.fp_ds; -+ for (i=0; i<8; i++) -+ memcpy(&user_fp_regs[i], &ifps->xfp_save_state.fp_reg_word[i], sizeof(user_fp_regs[i])); -+ } else { -+ user_fp_state->fp_control = ifps->fp_save_state.fp_control; -+ user_fp_state->fp_status = ifps->fp_save_state.fp_status; -+ user_fp_state->fp_tag = ifps->fp_save_state.fp_tag; -+ user_fp_state->fp_eip = ifps->fp_save_state.fp_eip; -+ user_fp_state->fp_cs = ifps->fp_save_state.fp_cs; -+ user_fp_state->fp_opcode = ifps->fp_save_state.fp_opcode; -+ user_fp_state->fp_dp = ifps->fp_save_state.fp_dp; -+ user_fp_state->fp_ds = ifps->fp_save_state.fp_ds; -+ *user_fp_regs = ifps->fp_regs; -+ } - } - simple_unlock(&pcb->lock); - -@@ -532,7 +647,9 @@ ASSERT_IPL(SPL0); - */ - i386_exception(EXC_ARITHMETIC, - EXC_I386_EXTERR, -- thread->pcb->ims.ifps->fp_save_state.fp_status); -+ fp_kind == FP_387X ? -+ thread->pcb->ims.ifps->xfp_save_state.fp_status : -+ thread->pcb->ims.ifps->fp_save_state.fp_status); - /*NOTREACHED*/ - } - -@@ -554,7 +671,10 @@ fp_save(thread) - if (ifps != 0 && !ifps->fp_valid) { - /* registers are in FPU */ - ifps->fp_valid = TRUE; -- fnsave(&ifps->fp_save_state); -+ if (fp_kind == FP_387X) -+ fxsave(&ifps->xfp_save_state); -+ else -+ fnsave(&ifps->fp_save_state); - } - } - -@@ -595,14 +715,19 @@ ASSERT_IPL(SPL0); - */ - i386_exception(EXC_ARITHMETIC, - EXC_I386_EXTERR, -- thread->pcb->ims.ifps->fp_save_state.fp_status); -+ fp_kind == FP_387X ? -+ thread->pcb->ims.ifps->xfp_save_state.fp_status : -+ thread->pcb->ims.ifps->fp_save_state.fp_status); - /*NOTREACHED*/ - #endif - } else if (! ifps->fp_valid) { - printf("fp_load: invalid FPU state!\n"); - fninit (); - } else { -- frstor(ifps->fp_save_state); -+ if (fp_kind == FP_387X) -+ fxrstor(ifps->xfp_save_state); -+ else -+ frstor(ifps->fp_save_state); - } - ifps->fp_valid = FALSE; /* in FPU */ - } -@@ -624,11 +749,22 @@ fp_state_alloc() - pcb->ims.ifps = ifps; - - ifps->fp_valid = TRUE; -- ifps->fp_save_state.fp_control = (0x037f -- & ~(FPC_IM|FPC_ZM|FPC_OM|FPC_PC)) -- | (FPC_PC_53|FPC_IC_AFF); -- ifps->fp_save_state.fp_status = 0; -- ifps->fp_save_state.fp_tag = 0xffff; /* all empty */ -+ -+ if (fp_kind == FP_387X) { -+ ifps->xfp_save_state.fp_control = (0x037f -+ & ~(FPC_IM|FPC_ZM|FPC_OM|FPC_PC)) -+ | (FPC_PC_53|FPC_IC_AFF); -+ ifps->xfp_save_state.fp_status = 0; -+ ifps->xfp_save_state.fp_tag = 0xffff; /* all empty */ -+ if (CPU_HAS_FEATURE(CPU_FEATURE_SSE)) -+ ifps->xfp_save_state.fp_mxcsr = 0x1f80; -+ } else { -+ ifps->fp_save_state.fp_control = (0x037f -+ & ~(FPC_IM|FPC_ZM|FPC_OM|FPC_PC)) -+ | (FPC_PC_53|FPC_IC_AFF); -+ ifps->fp_save_state.fp_status = 0; -+ ifps->fp_save_state.fp_tag = 0xffff; /* all empty */ -+ } - } - - #if AT386 -Index: b/i386/i386/fpu.h -=================================================================== ---- a/i386/i386/fpu.h -+++ b/i386/i386/fpu.h -@@ -67,6 +67,12 @@ - #define frstor(state) \ - asm volatile("frstor %0" : : "m" (state)) - -+#define fxsave(state) \ -+ asm volatile("fxsave %0" : "=m" (*state)) -+ -+#define fxrstor(state) \ -+ asm volatile("fxrstor %0" : : "m" (state)) -+ - #define fwait() \ - asm("fwait"); - -@@ -86,7 +92,10 @@ - if (ifps != 0 && !ifps->fp_valid) { \ - /* registers are in FPU - save to memory */ \ - ifps->fp_valid = TRUE; \ -- fnsave(&ifps->fp_save_state); \ -+ if (fp_kind == FP_387X) \ -+ fxsave(&ifps->xfp_save_state); \ -+ else \ -+ fnsave(&ifps->fp_save_state); \ - set_ts(); \ - } \ - } -Index: b/i386/i386/thread.h -=================================================================== ---- a/i386/i386/thread.h -+++ b/i386/i386/thread.h -@@ -111,9 +111,14 @@ struct i386_kernel_state { - */ - - struct i386_fpsave_state { -+ union { -+ struct { -+ struct i386_fp_save fp_save_state; -+ struct i386_fp_regs fp_regs; -+ }; -+ struct i386_xfp_save xfp_save_state; -+ }; - boolean_t fp_valid; -- struct i386_fp_save fp_save_state; -- struct i386_fp_regs fp_regs; - }; - - /* -Index: b/i386/include/mach/i386/fp_reg.h -=================================================================== ---- a/i386/include/mach/i386/fp_reg.h -+++ b/i386/include/mach/i386/fp_reg.h -@@ -46,10 +46,30 @@ struct i386_fp_save { - }; - - struct i386_fp_regs { -- unsigned short fp_reg_word[5][8]; -+ unsigned short fp_reg_word[8][5]; - /* space for 8 80-bit FP registers */ - }; - -+struct i386_xfp_save { -+ unsigned short fp_control; /* control */ -+ unsigned short fp_status; /* status */ -+ unsigned short fp_tag; /* register tags */ -+ unsigned short fp_opcode; /* opcode of failed instruction */ -+ unsigned int fp_eip; /* eip at failed instruction */ -+ unsigned short fp_cs; /* cs at failed instruction */ -+ unsigned short fp_unused_1; -+ unsigned int fp_dp; /* data address */ -+ unsigned short fp_ds; /* data segment */ -+ unsigned short fp_unused_2; -+ unsigned int fp_mxcsr; /* MXCSR */ -+ unsigned int fp_mxcsr_mask; /* MXCSR_MASK */ -+ unsigned char fp_reg_word[8][16]; -+ /* space for 8 128-bit FP registers */ -+ unsigned char fp_xreg_word[8][16]; -+ /* space for 8 128-bit XMM registers */ -+ unsigned int padding[56]; -+} __attribute__((aligned(16))); -+ - /* - * Control register - */ -@@ -104,5 +124,6 @@ struct i386_fp_regs { - #define FP_SOFT 1 /* software FP emulator */ - #define FP_287 2 /* 80287 */ - #define FP_387 3 /* 80387 or 80486 */ -+#define FP_387X 4 /* FXSAVE/RSTOR-capable */ - - #endif /* _MACH_I386_FP_REG_H_ */ -Index: b/i386/include/mach/i386/thread_status.h -=================================================================== ---- a/i386/include/mach/i386/thread_status.h -+++ b/i386/include/mach/i386/thread_status.h -@@ -111,7 +111,7 @@ struct i386_thread_state { - (sizeof (struct i386_fp_save) + sizeof (struct i386_fp_regs)) - - struct i386_float_state { -- int fpkind; /* FP_NO..FP_387 (readonly) */ -+ int fpkind; /* FP_NO..FP_387X (readonly) */ - int initialized; - unsigned char hw_state[FP_STATE_BYTES]; /* actual "hardware" state */ - int exc_status; /* exception status (readonly) */ diff --git a/debian/patches/20_xmm_support.patch b/debian/patches/20_xmm_support.patch new file mode 100644 index 0000000..48b7ff8 --- /dev/null +++ b/debian/patches/20_xmm_support.patch @@ -0,0 +1,414 @@ +2007-03-03 Samuel Thibault + + Add XMM FPU registers save/restore support. + * i386/include/mach/i386/fp_reg.h (struct i386_fp_regs): Invert array + indices. + (struct i386_xfp_save): New structure. + (FP_387X): New macro. + * i386/i386/thread.h (struct i386_fpsave_state): Add xfp_save_state + member, keep existing fp_save_state and fp_regs members in an unnamed + union member. Move fp_valid member to the end of the structure. + * i386/i386/fpu.h (fxsave, fxrstor): New macros. + (fpu_save_context): Use fxsave() when FPU is FXSR-capable. + * i386/i386/fpu.c: Include + (fp_save, fp_load): Add declaration. + (init_fpu): Add FXSR-capable FPU detection. + (fpu_module_init): Request 16-byte alignment to zinit() for + i386_fpsave_state structures. + (fpu_set_state): Convert FPU state when FPU is FXSR-capable. Free the + just-allocated ifps, not the one currently in use. + (fpu_get_state): Convert FPU state when FPU is FXSR-capable. + (fp_save): Use fxsave() when FPU is FXSR-capable. + (fp_load): Use fxrstor() when FPU is FXSR-capable. + (fp_state_alloc): Add FXSR-aware initialization. + + +Index: b/i386/i386/fpu.c +=================================================================== +--- a/i386/i386/fpu.c ++++ b/i386/i386/fpu.c +@@ -43,6 +43,7 @@ + #include + #include + #include ++#include + #include "cpu_number.h" + + #if 0 +@@ -63,6 +64,10 @@ extern void i386_exception(); + + int fp_kind = FP_387; /* 80387 present */ + zone_t ifps_zone; /* zone for FPU save area */ ++static unsigned long mxcsr_feature_mask = 0xffffffff; /* Always AND user-provided mxcsr with this security mask */ ++ ++void fp_save(thread_t thread); ++void fp_load(thread_t thread); + + #if NCPUS == 1 + volatile thread_t fp_thread = THREAD_NULL; +@@ -130,7 +135,20 @@ init_fpu() + /* + * We have a 387. + */ +- fp_kind = FP_387; ++ if (CPU_HAS_FEATURE(CPU_FEATURE_FXSR)) { ++ static /* because we _need_ alignment */ ++ struct i386_xfp_save save; ++ unsigned long mask; ++ fp_kind = FP_387X; ++ printf("Enabling FXSR\n"); ++ set_cr4(get_cr4() | CR4_OSFXSR); ++ fxsave(&save); ++ mask = save.fp_mxcsr_mask; ++ if (!mask) ++ mask = 0x0000ffbf; ++ mxcsr_feature_mask &= mask; ++ } else ++ fp_kind = FP_387; + } + /* + * Trap wait instructions. Turn off FPU for now. +@@ -152,7 +170,7 @@ init_fpu() + void + fpu_module_init() + { +- ifps_zone = zinit(sizeof(struct i386_fpsave_state), 0, ++ ifps_zone = zinit(sizeof(struct i386_fpsave_state), 16, + THREAD_MAX * sizeof(struct i386_fpsave_state), + THREAD_CHUNK * sizeof(struct i386_fpsave_state), + 0, "i386 fpsave state"); +@@ -183,6 +201,74 @@ ASSERT_IPL(SPL0); + zfree(ifps_zone, (vm_offset_t) fps); + } + ++/* The two following functions were stolen from Linux, and hence are covered ++ * by the GPL */ ++static inline unsigned short ++twd_i387_to_fxsr (unsigned short twd) ++{ ++ unsigned int tmp; /* to avoid 16 bit prefixes in the code */ ++ ++ /* Transform each pair of bits into 01 (valid) or 00 (empty) */ ++ tmp = ~twd; ++ tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ ++ /* and move the valid bits to the lower byte. */ ++ tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ ++ tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ ++ tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ ++ return tmp; ++} ++ ++static inline unsigned long ++twd_fxsr_to_i387 (struct i386_xfp_save *fxsave) ++{ ++ struct { ++ unsigned short significand[4]; ++ unsigned short exponent; ++ unsigned short padding[3]; ++ } *st = NULL; ++ unsigned long tos = (fxsave->fp_status >> 11) & 7; ++ unsigned long twd = (unsigned long) fxsave->fp_tag; ++ unsigned long tag; ++ unsigned long ret = 0xffff0000u; ++ int i; ++ ++#define FPREG_ADDR(f, n) ((void *)&(f)->fp_reg_word + (n) * 16); ++ ++ for (i = 0 ; i < 8 ; i++) { ++ if (twd & 0x1) { ++ st = FPREG_ADDR (fxsave, (i - tos) & 7); ++ ++ switch (st->exponent & 0x7fff) { ++ case 0x7fff: ++ tag = 2; /* Special */ ++ break; ++ case 0x0000: ++ if (!st->significand[0] && ++ !st->significand[1] && ++ !st->significand[2] && ++ !st->significand[3] ) { ++ tag = 1; /* Zero */ ++ } else { ++ tag = 2; /* Special */ ++ } ++ break; ++ default: ++ if (st->significand[3] & 0x8000) { ++ tag = 0; /* Valid */ ++ } else { ++ tag = 2; /* Special */ ++ } ++ break; ++ } ++ } else { ++ tag = 3; /* Empty */ ++ } ++ ret |= (tag << (2 * i)); ++ twd = twd >> 1; ++ } ++ return ret; ++} ++ + /* + * Set the floating-point state for a thread. + * If the thread is not the current thread, it is +@@ -261,16 +347,30 @@ ASSERT_IPL(SPL0); + */ + memset(&ifps->fp_save_state, 0, sizeof(struct i386_fp_save)); + +- ifps->fp_save_state.fp_control = user_fp_state->fp_control; +- ifps->fp_save_state.fp_status = user_fp_state->fp_status; +- ifps->fp_save_state.fp_tag = user_fp_state->fp_tag; +- ifps->fp_save_state.fp_eip = user_fp_state->fp_eip; +- ifps->fp_save_state.fp_cs = user_fp_state->fp_cs; +- ifps->fp_save_state.fp_opcode = user_fp_state->fp_opcode; +- ifps->fp_save_state.fp_dp = user_fp_state->fp_dp; +- ifps->fp_save_state.fp_ds = user_fp_state->fp_ds; +- ifps->fp_regs = *user_fp_regs; +- ifps->fp_valid = TRUE; ++ if (fp_kind == FP_387X) { ++ int i; ++ ++ ifps->xfp_save_state.fp_control = user_fp_state->fp_control; ++ ifps->xfp_save_state.fp_status = user_fp_state->fp_status; ++ ifps->xfp_save_state.fp_tag = twd_i387_to_fxsr(user_fp_state->fp_tag); ++ ifps->xfp_save_state.fp_eip = user_fp_state->fp_eip; ++ ifps->xfp_save_state.fp_cs = user_fp_state->fp_cs; ++ ifps->xfp_save_state.fp_opcode = user_fp_state->fp_opcode; ++ ifps->xfp_save_state.fp_dp = user_fp_state->fp_dp; ++ ifps->xfp_save_state.fp_ds = user_fp_state->fp_ds; ++ for (i=0; i<8; i++) ++ memcpy(&ifps->xfp_save_state.fp_reg_word[i], &user_fp_regs[i], sizeof(user_fp_regs[i])); ++ } else { ++ ifps->fp_save_state.fp_control = user_fp_state->fp_control; ++ ifps->fp_save_state.fp_status = user_fp_state->fp_status; ++ ifps->fp_save_state.fp_tag = user_fp_state->fp_tag; ++ ifps->fp_save_state.fp_eip = user_fp_state->fp_eip; ++ ifps->fp_save_state.fp_cs = user_fp_state->fp_cs; ++ ifps->fp_save_state.fp_opcode = user_fp_state->fp_opcode; ++ ifps->fp_save_state.fp_dp = user_fp_state->fp_dp; ++ ifps->fp_save_state.fp_ds = user_fp_state->fp_ds; ++ ifps->fp_regs = *user_fp_regs; ++ } + + simple_unlock(&pcb->lock); + if (new_ifps != 0) +@@ -340,15 +440,30 @@ ASSERT_IPL(SPL0); + */ + memset(user_fp_state, 0, sizeof(struct i386_fp_save)); + +- user_fp_state->fp_control = ifps->fp_save_state.fp_control; +- user_fp_state->fp_status = ifps->fp_save_state.fp_status; +- user_fp_state->fp_tag = ifps->fp_save_state.fp_tag; +- user_fp_state->fp_eip = ifps->fp_save_state.fp_eip; +- user_fp_state->fp_cs = ifps->fp_save_state.fp_cs; +- user_fp_state->fp_opcode = ifps->fp_save_state.fp_opcode; +- user_fp_state->fp_dp = ifps->fp_save_state.fp_dp; +- user_fp_state->fp_ds = ifps->fp_save_state.fp_ds; +- *user_fp_regs = ifps->fp_regs; ++ if (fp_kind == FP_387X) { ++ int i; ++ ++ user_fp_state->fp_control = ifps->xfp_save_state.fp_control; ++ user_fp_state->fp_status = ifps->xfp_save_state.fp_status; ++ user_fp_state->fp_tag = twd_fxsr_to_i387(&ifps->xfp_save_state); ++ user_fp_state->fp_eip = ifps->xfp_save_state.fp_eip; ++ user_fp_state->fp_cs = ifps->xfp_save_state.fp_cs; ++ user_fp_state->fp_opcode = ifps->xfp_save_state.fp_opcode; ++ user_fp_state->fp_dp = ifps->xfp_save_state.fp_dp; ++ user_fp_state->fp_ds = ifps->xfp_save_state.fp_ds; ++ for (i=0; i<8; i++) ++ memcpy(&user_fp_regs[i], &ifps->xfp_save_state.fp_reg_word[i], sizeof(user_fp_regs[i])); ++ } else { ++ user_fp_state->fp_control = ifps->fp_save_state.fp_control; ++ user_fp_state->fp_status = ifps->fp_save_state.fp_status; ++ user_fp_state->fp_tag = ifps->fp_save_state.fp_tag; ++ user_fp_state->fp_eip = ifps->fp_save_state.fp_eip; ++ user_fp_state->fp_cs = ifps->fp_save_state.fp_cs; ++ user_fp_state->fp_opcode = ifps->fp_save_state.fp_opcode; ++ user_fp_state->fp_dp = ifps->fp_save_state.fp_dp; ++ user_fp_state->fp_ds = ifps->fp_save_state.fp_ds; ++ *user_fp_regs = ifps->fp_regs; ++ } + } + simple_unlock(&pcb->lock); + +@@ -532,7 +647,9 @@ ASSERT_IPL(SPL0); + */ + i386_exception(EXC_ARITHMETIC, + EXC_I386_EXTERR, +- thread->pcb->ims.ifps->fp_save_state.fp_status); ++ fp_kind == FP_387X ? ++ thread->pcb->ims.ifps->xfp_save_state.fp_status : ++ thread->pcb->ims.ifps->fp_save_state.fp_status); + /*NOTREACHED*/ + } + +@@ -554,7 +671,10 @@ fp_save(thread) + if (ifps != 0 && !ifps->fp_valid) { + /* registers are in FPU */ + ifps->fp_valid = TRUE; +- fnsave(&ifps->fp_save_state); ++ if (fp_kind == FP_387X) ++ fxsave(&ifps->xfp_save_state); ++ else ++ fnsave(&ifps->fp_save_state); + } + } + +@@ -595,14 +715,19 @@ ASSERT_IPL(SPL0); + */ + i386_exception(EXC_ARITHMETIC, + EXC_I386_EXTERR, +- thread->pcb->ims.ifps->fp_save_state.fp_status); ++ fp_kind == FP_387X ? ++ thread->pcb->ims.ifps->xfp_save_state.fp_status : ++ thread->pcb->ims.ifps->fp_save_state.fp_status); + /*NOTREACHED*/ + #endif + } else if (! ifps->fp_valid) { + printf("fp_load: invalid FPU state!\n"); + fninit (); + } else { +- frstor(ifps->fp_save_state); ++ if (fp_kind == FP_387X) ++ fxrstor(ifps->xfp_save_state); ++ else ++ frstor(ifps->fp_save_state); + } + ifps->fp_valid = FALSE; /* in FPU */ + } +@@ -624,11 +749,22 @@ fp_state_alloc() + pcb->ims.ifps = ifps; + + ifps->fp_valid = TRUE; +- ifps->fp_save_state.fp_control = (0x037f +- & ~(FPC_IM|FPC_ZM|FPC_OM|FPC_PC)) +- | (FPC_PC_53|FPC_IC_AFF); +- ifps->fp_save_state.fp_status = 0; +- ifps->fp_save_state.fp_tag = 0xffff; /* all empty */ ++ ++ if (fp_kind == FP_387X) { ++ ifps->xfp_save_state.fp_control = (0x037f ++ & ~(FPC_IM|FPC_ZM|FPC_OM|FPC_PC)) ++ | (FPC_PC_53|FPC_IC_AFF); ++ ifps->xfp_save_state.fp_status = 0; ++ ifps->xfp_save_state.fp_tag = 0xffff; /* all empty */ ++ if (CPU_HAS_FEATURE(CPU_FEATURE_SSE)) ++ ifps->xfp_save_state.fp_mxcsr = 0x1f80; ++ } else { ++ ifps->fp_save_state.fp_control = (0x037f ++ & ~(FPC_IM|FPC_ZM|FPC_OM|FPC_PC)) ++ | (FPC_PC_53|FPC_IC_AFF); ++ ifps->fp_save_state.fp_status = 0; ++ ifps->fp_save_state.fp_tag = 0xffff; /* all empty */ ++ } + } + + #if AT386 +Index: b/i386/i386/fpu.h +=================================================================== +--- a/i386/i386/fpu.h ++++ b/i386/i386/fpu.h +@@ -67,6 +67,12 @@ + #define frstor(state) \ + asm volatile("frstor %0" : : "m" (state)) + ++#define fxsave(state) \ ++ asm volatile("fxsave %0" : "=m" (*state)) ++ ++#define fxrstor(state) \ ++ asm volatile("fxrstor %0" : : "m" (state)) ++ + #define fwait() \ + asm("fwait"); + +@@ -86,7 +92,10 @@ + if (ifps != 0 && !ifps->fp_valid) { \ + /* registers are in FPU - save to memory */ \ + ifps->fp_valid = TRUE; \ +- fnsave(&ifps->fp_save_state); \ ++ if (fp_kind == FP_387X) \ ++ fxsave(&ifps->xfp_save_state); \ ++ else \ ++ fnsave(&ifps->fp_save_state); \ + set_ts(); \ + } \ + } +Index: b/i386/i386/thread.h +=================================================================== +--- a/i386/i386/thread.h ++++ b/i386/i386/thread.h +@@ -111,9 +111,14 @@ struct i386_kernel_state { + */ + + struct i386_fpsave_state { ++ union { ++ struct { ++ struct i386_fp_save fp_save_state; ++ struct i386_fp_regs fp_regs; ++ }; ++ struct i386_xfp_save xfp_save_state; ++ }; + boolean_t fp_valid; +- struct i386_fp_save fp_save_state; +- struct i386_fp_regs fp_regs; + }; + + /* +Index: b/i386/include/mach/i386/fp_reg.h +=================================================================== +--- a/i386/include/mach/i386/fp_reg.h ++++ b/i386/include/mach/i386/fp_reg.h +@@ -46,10 +46,30 @@ struct i386_fp_save { + }; + + struct i386_fp_regs { +- unsigned short fp_reg_word[5][8]; ++ unsigned short fp_reg_word[8][5]; + /* space for 8 80-bit FP registers */ + }; + ++struct i386_xfp_save { ++ unsigned short fp_control; /* control */ ++ unsigned short fp_status; /* status */ ++ unsigned short fp_tag; /* register tags */ ++ unsigned short fp_opcode; /* opcode of failed instruction */ ++ unsigned int fp_eip; /* eip at failed instruction */ ++ unsigned short fp_cs; /* cs at failed instruction */ ++ unsigned short fp_unused_1; ++ unsigned int fp_dp; /* data address */ ++ unsigned short fp_ds; /* data segment */ ++ unsigned short fp_unused_2; ++ unsigned int fp_mxcsr; /* MXCSR */ ++ unsigned int fp_mxcsr_mask; /* MXCSR_MASK */ ++ unsigned char fp_reg_word[8][16]; ++ /* space for 8 128-bit FP registers */ ++ unsigned char fp_xreg_word[8][16]; ++ /* space for 8 128-bit XMM registers */ ++ unsigned int padding[56]; ++} __attribute__((aligned(16))); ++ + /* + * Control register + */ +@@ -104,5 +124,6 @@ struct i386_fp_regs { + #define FP_SOFT 1 /* software FP emulator */ + #define FP_287 2 /* 80287 */ + #define FP_387 3 /* 80387 or 80486 */ ++#define FP_387X 4 /* FXSAVE/RSTOR-capable */ + + #endif /* _MACH_I386_FP_REG_H_ */ +Index: b/i386/include/mach/i386/thread_status.h +=================================================================== +--- a/i386/include/mach/i386/thread_status.h ++++ b/i386/include/mach/i386/thread_status.h +@@ -111,7 +111,7 @@ struct i386_thread_state { + (sizeof (struct i386_fp_save) + sizeof (struct i386_fp_regs)) + + struct i386_float_state { +- int fpkind; /* FP_NO..FP_387 (readonly) */ ++ int fpkind; /* FP_NO..FP_387X (readonly) */ + int initialized; + unsigned char hw_state[FP_STATE_BYTES]; /* actual "hardware" state */ + int exc_status; /* exception status (readonly) */ diff --git a/debian/patches/series b/debian/patches/series index 7f434cb..5e0fce4 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -6,6 +6,6 @@ 14_alloc_params.patch 15_mem_obj_proxy.patch 16_ide_multsect.patch -20_mmx_support.patch +20_xmm_support.patch 24_pci_irq_fix.patch 40_iopl_mmap.patch -- cgit v1.2.3