diff options
-rw-r--r-- | debian/patches/series | 1 | ||||
-rw-r--r-- | debian/patches/sysenter0001-yyy-sysenter-prototype.patch | 566 |
2 files changed, 567 insertions, 0 deletions
diff --git a/debian/patches/series b/debian/patches/series index cc6414e..11603a5 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -19,3 +19,4 @@ error-handling0003-kern-gracefully-handle-resource-shortage.patch 0006-xxx-drop-cleanup-unused-code.patch 0007-ipc-inline-key-ipc-entry-lookup-functions.patch 0008-update-radix-tree-code.patch +sysenter0001-yyy-sysenter-prototype.patch diff --git a/debian/patches/sysenter0001-yyy-sysenter-prototype.patch b/debian/patches/sysenter0001-yyy-sysenter-prototype.patch new file mode 100644 index 0000000..e9e8667 --- /dev/null +++ b/debian/patches/sysenter0001-yyy-sysenter-prototype.patch @@ -0,0 +1,566 @@ +From 8b6d8befa4326106aa730a9a8d4497a964cfe99e Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Mon, 27 Apr 2015 11:05:52 +0200 +Subject: [PATCH gnumach] yyy sysenter prototype + +--- + i386/Makefrag.am | 2 + + i386/i386/gdt.c | 17 ++++ + i386/i386/gdt.h | 7 +- + i386/i386/i386asm.sym | 1 + + i386/i386/locore.S | 224 ++++++++++++++++++++++++++++++++++++++++++++++++ + i386/i386/pcb.c | 24 +++--- + i386/i386/syscall.c | 103 ++++++++++++++++++++++ + i386/i386/syscall.h | 7 ++ + i386/i386/tss.h | 1 + + i386/i386at/conf.c | 8 ++ + i386/i386at/model_dep.c | 2 + + 11 files changed, 383 insertions(+), 13 deletions(-) + create mode 100644 i386/i386/syscall.c + create mode 100644 i386/i386/syscall.h + +diff --git a/i386/Makefrag.am b/i386/Makefrag.am +index 4dd6a9f..f59ac29 100644 +--- a/i386/Makefrag.am ++++ b/i386/Makefrag.am +@@ -147,6 +147,8 @@ libkernel_a_SOURCES += \ + i386/i386/setjmp.h \ + i386/i386/spl.S \ + i386/i386/spl.h \ ++ i386/i386/syscall.c \ ++ i386/i386/syscall.h \ + i386/i386/task.h \ + i386/i386/thread.h \ + i386/i386/time_stamp.h \ +diff --git a/i386/i386/gdt.c b/i386/i386/gdt.c +index c895eb3..0f9d0e3 100644 +--- a/i386/i386/gdt.c ++++ b/i386/i386/gdt.c +@@ -57,6 +57,23 @@ gdt_init(void) + LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS, + LINEAR_MAX_KERNEL_ADDRESS - (LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) - 1, + ACC_PL_K|ACC_DATA_W, SZ_32); ++ fill_gdt_descriptor(KERNEL_ENTER_CS, ++ LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS, ++ LINEAR_MAX_KERNEL_ADDRESS - (LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) - 1, ++ ACC_PL_K|ACC_CODE_R, SZ_32); ++ fill_gdt_descriptor(KERNEL_ENTER_DS, ++ LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS, ++ LINEAR_MAX_KERNEL_ADDRESS - (LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) - 1, ++ ACC_PL_K|ACC_DATA_W, SZ_32); ++ fill_gdt_descriptor(USER_EXIT_CS, ++ VM_MIN_ADDRESS, ++ VM_MAX_ADDRESS-VM_MIN_ADDRESS-4096, ++ /* XXX LINEAR_... */ ++ ACC_PL_U|ACC_CODE_R, SZ_32); ++ fill_gdt_descriptor(USER_EXIT_DS, ++ VM_MIN_ADDRESS, ++ VM_MAX_ADDRESS-VM_MIN_ADDRESS-4096, ++ ACC_PL_U|ACC_DATA_W, SZ_32); + #ifndef MACH_PV_DESCRIPTORS + fill_gdt_descriptor(LINEAR_DS, + 0, +diff --git a/i386/i386/gdt.h b/i386/i386/gdt.h +index d865640..37ca6f5 100644 +--- a/i386/i386/gdt.h ++++ b/i386/i386/gdt.h +@@ -55,7 +55,12 @@ + #define USER_GDT 0x48 /* user-defined GDT entries */ + #define USER_GDT_SLOTS 2 + +-#define GDTSZ (USER_GDT/8 + USER_GDT_SLOTS) ++#define KERNEL_ENTER_CS (0x58 | SEL_PL_K) /* kernel code */ ++#define KERNEL_ENTER_DS (0x60 | SEL_PL_K) /* kernel data */ ++#define USER_EXIT_CS (0x68 | SEL_PL_U) /* user code */ ++#define USER_EXIT_DS (0x70 | SEL_PL_U) /* user data */ ++ ++#define GDTSZ (USER_EXIT_DS/8 + 1) + + extern struct real_descriptor gdt[GDTSZ]; + +diff --git a/i386/i386/i386asm.sym b/i386/i386/i386asm.sym +index dd1a2ed..e495d1a 100644 +--- a/i386/i386/i386asm.sym ++++ b/i386/i386/i386asm.sym +@@ -70,6 +70,7 @@ size i386_kernel_state iks + + size i386_exception_link iel + ++size i386_saved_state iss + offset i386_saved_state r cs + offset i386_saved_state r uesp + offset i386_saved_state r eax +diff --git a/i386/i386/locore.S b/i386/i386/locore.S +index cfda86f..d8241a7 100644 +--- a/i386/i386/locore.S ++++ b/i386/i386/locore.S +@@ -521,6 +521,9 @@ _return_to_user: + */ + + _return_from_kernel: ++ cmpl $0x7fffffff, R_TRAPNO(%esp) /* YYY */ ++ je return_from_sysenter ++ + _kret_popl_gs: + popl %gs /* restore segment registers */ + _kret_popl_fs: +@@ -978,6 +981,18 @@ ttd_from_iret_i: /* on interrupt stack */ + + #endif /* MACH_TTD */ + ++/* User stub for calling the kernel using the trap gate. */ ++ .globl user_trapgate_stub_start ++user_trapgate_stub_start: ++ popl %ecx /* Pop return address into %ecx. */ ++ popl %eax /* Pop syscall number into %eax. */ ++ pushl %ecx /* Push back return address. */ ++ lcall $7, $0 ++ subl $4, %esp /* magic */ ++ ret ++ .globl user_trapgate_stub_end ++user_trapgate_stub_end: ++ + /* + * System call enters through a call gate. Flags are not saved - + * we must shuffle stack to look like trap save area. +@@ -1170,6 +1185,215 @@ syscall_addr: + /* set error code - read user space */ + jmp _take_trap /* treat as a trap */ + ++/* ++ * SYSENTER-based system calls. ++ * ++ * Calling convention: ++ * %eax - syscall number ++ * %ebx - syscall argument 1 ++ * %ecx - syscall argument 2 ++ * %edx - syscall argument 3 ++ * %esi - syscall argument 4 ++ * %edi - userspace return address ++ * %ebp - userspace stack pointer ++ */ ++ ++/* User stub for calling the kernel using the sysenter instruction. */ ++ .globl user_sysenter_stub_start ++user_sysenter_stub_start: ++ push %ebp ++ mov %esp, %ebp ++ pushf ++ push %ebx /* Store callee-saved registers. */ ++ push %esi ++ push %edi ++ mov 8(%ebp), %eax /* Move syscall number into %eax. */ ++ mov 12(%ebp), %ebx /* Move first argument into %ebx. */ ++ mov 16(%ebp), %ecx /* Move second argument into %ecx. */ ++ mov 20(%ebp), %edx /* Move third argument into %edx. */ ++ mov 24(%ebp), %esi /* Move fourth argument into %esi. */ ++ call get_ip /* compute location of sysexit */ ++get_ip: pop %edi /* load current ip */ ++ add $8, %edi /* userspace return address */ ++ movl %esp, %ebp /* userspace stack pointer */ ++ sysenter ++ pop %edi ++ pop %esi ++ pop %ebx ++ popf ++ pop %ebp ++ ret ++ .globl user_sysenter_stub_end ++user_sysenter_stub_end: ++ ++/* ++ * SYSENTER entry point. ++ * ++ * Control enters at `sysenter_entry' with %esp pointing to the ++ * per-cpu sysenter stack. We store all arguments here. We keep %ebp ++ * pointing to the top of this structure to copy the arguments. ++ ++ * ++ * sysenter stack layout: ++ * ++ * sysenter stack base -> EAX ++ * ECX ++ * EDX ++ * EBX ++ * ESP ++ * EBP ++ * ESI ++ * EDI ++ * DS ++ * ES ++ * FS ++ * %ebp -> GS ++ */ ++/* Offsets from %ebp */ ++#define SE_EAX (4 * 11) ++#define SE_ECX (4 * 10) ++#define SE_EDX (4 * 9) ++#define SE_EBX (4 * 8) ++#define SE_ESX (4 * 7) ++#define SE_EBP (4 * 6) ++#define SE_ESI (4 * 5) ++#define SE_EDI (4 * 4) ++#define SE_DS (4 * 3) ++#define SE_ES (4 * 2) ++#define SE_FS (4 * 1) ++#define SE_GS (4 * 0) ++#define SE_STACK_POINTER SE_EBP ++#define SE_RETURN_ADDRESS SE_EDI ++ ++#define SE_USER_SKIP 20 /* skip past the scratchpad */ ++ ++ENTRY(sysenter_entry) ++ pusha /* save all registers */ ++ cld /* clear direction flag */ ++ pushl %ds /* save the segment registers */ ++ pushl %es ++ pushl %fs ++ pushl %gs ++ mov %esp, %ebp /* to access the sysenter stack */ ++ ++ mov %ss,%cx /* switch to kernel data segment */ ++ mov %cx,%ds ++ mov %cx,%es ++ mov %cx,%fs ++ mov %cx,%gs ++ ++ CPU_NUMBER(%edx) ++ movl CX(EXT(kernel_stack),%edx),%ebx ++ /* get current kernel stack */ ++ movl %ebx, %ecx ++ or $(KERNEL_STACK_SIZE-1),%ecx ++ movl -3-IKS_SIZE(%ecx), %esp /* switch to PCB stack */ ++ addl $(ISS_SIZE - 16 /* vm86 */ - 6 * 4 /* unused */), %esp ++ /* point to trap number */ ++ ++ /* Populate trap save area. */ ++ pushl $0x7fffffff /* trap number */ ++ pushl %eax /* %eax: unused */ ++ pushl SE_STACK_POINTER(%ebp) /* %ecx: for sysexit */ ++ pushl SE_RETURN_ADDRESS(%ebp) /* %edx: for sysexit */ ++ subl $(5 * 4), %esp /* unused */ ++ pushl SE_DS(%ebp) /* copy the segment registers */ ++ pushl SE_ES(%ebp) ++ pushl SE_FS(%ebp) ++ pushl SE_GS(%ebp) ++ ++ xchgl %ebx, %esp /* switch to kernel stack */ ++ /* %ebx points to user registers */ ++ negl %eax /* get system call number */ ++ jl sysenter_mach_call_range ++ /* out of range if it was positive */ ++ cmpl EXT(mach_trap_count),%eax ++ /* check system call table bounds */ ++ jg sysenter_mach_call_range ++ /* error if out of range */ ++ ++ shll $4,%eax /* manual indexing */ ++ movl EXT(mach_trap_table)(%eax),%ecx ++ /* get number of arguments */ ++ ++ cmp $4, %ecx ++ ja se_args_5plus ++ je se_args_4 ++ cmp $2, %ecx ++ ja se_args_3 ++ je se_args_2 ++ cmp $1, %ecx ++ je se_args_1 ++ jmp se_args_0 ++ ++se_args_5plus: ++ ++ sub $4, %ecx /* skip the four first arguments */ ++ movl SE_STACK_POINTER(%ebp), %esi ++ /* get user stack pointer */ ++ lea (4 /* skip user return address */\ ++ +4 /* point past last argument */\ ++ +16 /* skip register arguments */\ ++ +SE_USER_SKIP)(%esi,%ecx,4),%esi ++ /* and skip past the userspace ++ local storage */ ++ ++ movl $USER_DS,%edx /* use user data segment for accesses */ ++ mov %dx,%fs ++ movl %esp,%edx /* save kernel ESP for error recovery */ ++ ++0: subl $4,%esi ++ RECOVER(sysenter_mach_call_addr_push) ++ pushl %fs:(%esi) /* push argument on stack */ ++ loop 0b /* loop for all arguments */ ++ ++se_args_4: ++ push SE_ESI(%ebp) /* push fourth argument */ ++se_args_3: ++ push SE_EDX(%ebp) /* push third argument */ ++se_args_2: ++ push SE_ECX(%ebp) /* push second argument */ ++se_args_1: ++ push SE_EBX(%ebp) /* push first argument */ ++se_args_0: ++ sti /* xxx: sti/cli where ? */ ++ call *EXT(mach_trap_table)+4(%eax) ++ /* call procedure */ ++ cli /* xxx: sti/cli where ? */ ++ movl %ebx, %esp /* switch to pcb stack */ ++ movl %eax, R_EAX(%esp) /* save return value */ ++ jmp _return_from_trap /* check for AST, then... */ ++return_from_sysenter: /* return here */ ++ popl %gs /* restore segment registers */ ++ popl %fs ++ popl %es ++ popl %ds ++ popa ++ sti /* xxx: sti/cli where ? */ ++ sysexit ++ ++/* ++ * Address out of range. Change to page fault. ++ * %esi holds failing address. ++ */ ++sysenter_mach_call_addr_push: ++ movl %edx,%esp /* clean parameters from stack */ ++ movl %esi,R_CR2(%ebx) /* set fault address */ ++ movl $(T_PAGE_FAULT),R_TRAPNO(%ebx) ++ /* set page-fault trap */ ++ movl $(T_PF_USER),R_ERR(%ebx) ++ /* set error code - read user space */ ++ jmp _take_trap /* treat as a trap */ ++ ++/* ++ * System call out of range. Treat as invalid-instruction trap. ++ * (? general protection?) ++ */ ++sysenter_mach_call_range: ++ movl $(T_INVALID_OPCODE),R_TRAPNO(%ebx) ++ /* set invalid-operation trap */ ++ movl $0,R_ERR(%ebx) /* clear error code */ ++ jmp _take_trap /* treat as a trap */ + + .data + DATA(cpu_features) +diff --git a/i386/i386/pcb.c b/i386/i386/pcb.c +index dabe481..888012c 100644 +--- a/i386/i386/pcb.c ++++ b/i386/i386/pcb.c +@@ -391,12 +391,12 @@ void pcb_init(thread_t thread) + * Guarantee that the bootstrapped thread will be in user + * mode. + */ +- pcb->iss.cs = USER_CS; +- pcb->iss.ss = USER_DS; +- pcb->iss.ds = USER_DS; +- pcb->iss.es = USER_DS; +- pcb->iss.fs = USER_DS; +- pcb->iss.gs = USER_DS; ++ pcb->iss.cs = USER_EXIT_CS; ++ pcb->iss.ss = USER_EXIT_DS; ++ pcb->iss.ds = USER_EXIT_DS; ++ pcb->iss.es = USER_EXIT_DS; ++ pcb->iss.fs = USER_EXIT_DS; ++ pcb->iss.gs = USER_EXIT_DS; + pcb->iss.efl = EFL_USER_SET; + + thread->pcb = pcb; +@@ -524,12 +524,12 @@ kern_return_t thread_setstatus( + * 386 mode. Set segment registers for flat + * 32-bit address space. + */ +- saved_state->cs = USER_CS; +- saved_state->ss = USER_DS; +- saved_state->ds = USER_DS; +- saved_state->es = USER_DS; +- saved_state->fs = USER_DS; +- saved_state->gs = USER_DS; ++ saved_state->cs = USER_EXIT_CS; ++ saved_state->ss = USER_EXIT_DS; ++ saved_state->ds = USER_EXIT_DS; ++ saved_state->es = USER_EXIT_DS; ++ saved_state->fs = USER_EXIT_DS; ++ saved_state->gs = USER_EXIT_DS; + } + else { + /* +diff --git a/i386/i386/syscall.c b/i386/i386/syscall.c +new file mode 100644 +index 0000000..e9b17d0 +--- /dev/null ++++ b/i386/i386/syscall.c +@@ -0,0 +1,103 @@ ++#include <mach/vm_param.h> ++#include <mach/vm_prot.h> ++#include <vm/pmap.h> ++#include <vm/vm_kern.h> ++#include <string.h> ++#include <kern/debug.h> ++ ++#include <machine/tss.h> ++#include <i386/i386/ktss.h> ++#include <i386/i386/gdt.h> ++#include <i386/i386/locore.h> ++ ++#include "syscall.h" ++ ++#include <kern/printf.h> // xxx ++ ++static vm_offset_t msyscall = 0; ++ ++void user_trapgate_stub_start(); ++void user_trapgate_stub_end(); ++ ++void user_sysenter_stub_start(); ++void user_sysenter_stub_end(); ++ ++void ++syscall_init(void) ++{ ++ kern_return_t kr; ++ vm_offset_t user_stub_start; ++ vm_offset_t user_stub_end; ++ ++ kr = kmem_alloc_wired(kernel_map, &msyscall, PAGE_SIZE); ++ if (kr != KERN_SUCCESS) ++ panic("syscall_init"); ++ ++ memset((void *) msyscall, 0, PAGE_SIZE); ++ ++ if (CPU_HAS_FEATURE (CPU_FEATURE_SEP)) { ++ printf ("syscall: using SYSENTER/SYSEXIT\n"); ++ user_stub_start = (vm_offset_t) user_sysenter_stub_start; ++ user_stub_end = (vm_offset_t) user_sysenter_stub_end; ++ } else { ++ printf ("syscall: using trap gate\n"); ++ user_stub_start = (vm_offset_t) user_trapgate_stub_start; ++ user_stub_end = (vm_offset_t) user_trapgate_stub_end; ++ } ++ ++ memcpy((void *) msyscall, (void *) user_stub_start, ++ (size_t) (user_stub_end - user_stub_start)); ++ ++ syscall_init_cpu(); ++} ++ ++static void ++wrmsr(unsigned int msr, unsigned long long val) ++{ ++ __asm__ __volatile__("wrmsr" ++ : /* no Outputs */ ++ : "c" (msr), "A" (val)); ++} ++ ++#define MSR_IA32_SYSENTER_CS 0x00000174 ++#define MSR_IA32_SYSENTER_ESP 0x00000175 ++#define MSR_IA32_SYSENTER_EIP 0x00000176 ++ ++extern void sysenter_entry(void); ++ ++void ++syscall_init_cpu(void) ++{ ++ if (! CPU_HAS_FEATURE (CPU_FEATURE_SEP)) ++ return; ++ ++ //struct task_tss *tss = curr_ktss (cpu_number ()); ++ struct task_tss *tss = &ktss; ++ ++ wrmsr(MSR_IA32_SYSENTER_CS, KERNEL_ENTER_CS); ++ wrmsr(MSR_IA32_SYSENTER_ESP, ++ (unsigned long) tss->sysenter_stack + sizeof tss->sysenter_stack); ++ wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry); ++} ++ ++int ++syscall_open(dev_t dev, int flag, io_req_t ior) ++{ ++ return 0; ++} ++ ++void ++syscall_close(dev_t dev, int flag) ++{ ++ return; ++} ++ ++int ++syscall_mmap(dev_t dev, vm_offset_t off, vm_prot_t prot) ++{ ++ if (prot & VM_PROT_WRITE) ++ return (-1); ++ ++ return (i386_btop(pmap_extract(pmap_kernel(), ++ (vm_offset_t) msyscall))); ++} +diff --git a/i386/i386/syscall.h b/i386/i386/syscall.h +new file mode 100644 +index 0000000..de9670c +--- /dev/null ++++ b/i386/i386/syscall.h +@@ -0,0 +1,7 @@ ++// XXX ++ ++void syscall_init(void); ++void syscall_init_cpu(void); ++int syscall_open(dev_t dev, int flag, io_req_t ior); ++void syscall_close(dev_t dev, int flag); ++int syscall_mmap(dev_t dev, vm_offset_t off, vm_prot_t prot); +diff --git a/i386/i386/tss.h b/i386/i386/tss.h +index ff25f21..8c939c7 100644 +--- a/i386/i386/tss.h ++++ b/i386/i386/tss.h +@@ -76,6 +76,7 @@ struct task_tss + struct i386_tss tss; + unsigned char iopb[IOPB_BYTES]; + unsigned char barrier; ++ unsigned long sysenter_stack[64]; /* xxx */ + }; + + +diff --git a/i386/i386at/conf.c b/i386/i386at/conf.c +index ab4f680..d7f9e6f 100644 +--- a/i386/i386at/conf.c ++++ b/i386/i386at/conf.c +@@ -68,6 +68,9 @@ + #define hypcnname "hyp" + #endif /* MACH_HYP */ + ++#include <i386/syscall.h> ++#define syscall_name "syscall" ++ + /* + * List of devices - console must be at slot 0 + */ +@@ -143,6 +146,11 @@ struct dev_ops dev_name_list[] = + nodev }, + #endif /* MACH_HYP */ + ++ { syscall_name, syscall_open, syscall_close, nulldev_read, ++ nulldev_write, nulldev_getstat, nulldev_setstat, ++ syscall_mmap, ++ nodev, nulldev, nulldev_portdeath, 0, ++ nodev }, + }; + int dev_name_count = sizeof(dev_name_list)/sizeof(dev_name_list[0]); + +diff --git a/i386/i386at/model_dep.c b/i386/i386at/model_dep.c +index bc34c9b..210e54d 100644 +--- a/i386/i386at/model_dep.c ++++ b/i386/i386at/model_dep.c +@@ -63,6 +63,7 @@ + #include <i386/proc_reg.h> + #include <i386/locore.h> + #include <i386/model_dep.h> ++#include <i386/syscall.h> + #include <i386at/autoconf.h> + #include <i386at/idt.h> + #include <i386at/int_init.h> +@@ -197,6 +198,7 @@ void machine_init(void) + */ + pmap_unmap_page_zero(); + #endif ++ syscall_init(); + } + + /* Conserve power on processor CPU. */ +-- +2.1.4 + |