From 8b6d8befa4326106aa730a9a8d4497a964cfe99e Mon Sep 17 00:00:00 2001 From: Justus Winter <4winter@informatik.uni-hamburg.de> Date: Mon, 27 Apr 2015 11:05:52 +0200 Subject: [PATCH gnumach] yyy sysenter prototype --- i386/Makefrag.am | 2 + i386/i386/gdt.c | 17 ++++ i386/i386/gdt.h | 7 +- i386/i386/i386asm.sym | 1 + i386/i386/locore.S | 224 ++++++++++++++++++++++++++++++++++++++++++++++++ i386/i386/pcb.c | 24 +++--- i386/i386/syscall.c | 103 ++++++++++++++++++++++ i386/i386/syscall.h | 7 ++ i386/i386/tss.h | 1 + i386/i386at/conf.c | 8 ++ i386/i386at/model_dep.c | 2 + 11 files changed, 383 insertions(+), 13 deletions(-) create mode 100644 i386/i386/syscall.c create mode 100644 i386/i386/syscall.h diff --git a/i386/Makefrag.am b/i386/Makefrag.am index 4dd6a9f..f59ac29 100644 --- a/i386/Makefrag.am +++ b/i386/Makefrag.am @@ -147,6 +147,8 @@ libkernel_a_SOURCES += \ i386/i386/setjmp.h \ i386/i386/spl.S \ i386/i386/spl.h \ + i386/i386/syscall.c \ + i386/i386/syscall.h \ i386/i386/task.h \ i386/i386/thread.h \ i386/i386/time_stamp.h \ diff --git a/i386/i386/gdt.c b/i386/i386/gdt.c index c895eb3..0f9d0e3 100644 --- a/i386/i386/gdt.c +++ b/i386/i386/gdt.c @@ -57,6 +57,23 @@ gdt_init(void) LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS, LINEAR_MAX_KERNEL_ADDRESS - (LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) - 1, ACC_PL_K|ACC_DATA_W, SZ_32); + fill_gdt_descriptor(KERNEL_ENTER_CS, + LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS, + LINEAR_MAX_KERNEL_ADDRESS - (LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) - 1, + ACC_PL_K|ACC_CODE_R, SZ_32); + fill_gdt_descriptor(KERNEL_ENTER_DS, + LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS, + LINEAR_MAX_KERNEL_ADDRESS - (LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) - 1, + ACC_PL_K|ACC_DATA_W, SZ_32); + fill_gdt_descriptor(USER_EXIT_CS, + VM_MIN_ADDRESS, + VM_MAX_ADDRESS-VM_MIN_ADDRESS-4096, + /* XXX LINEAR_... */ + ACC_PL_U|ACC_CODE_R, SZ_32); + fill_gdt_descriptor(USER_EXIT_DS, + VM_MIN_ADDRESS, + VM_MAX_ADDRESS-VM_MIN_ADDRESS-4096, + ACC_PL_U|ACC_DATA_W, SZ_32); #ifndef MACH_PV_DESCRIPTORS fill_gdt_descriptor(LINEAR_DS, 0, diff --git a/i386/i386/gdt.h b/i386/i386/gdt.h index d865640..37ca6f5 100644 --- a/i386/i386/gdt.h +++ b/i386/i386/gdt.h @@ -55,7 +55,12 @@ #define USER_GDT 0x48 /* user-defined GDT entries */ #define USER_GDT_SLOTS 2 -#define GDTSZ (USER_GDT/8 + USER_GDT_SLOTS) +#define KERNEL_ENTER_CS (0x58 | SEL_PL_K) /* kernel code */ +#define KERNEL_ENTER_DS (0x60 | SEL_PL_K) /* kernel data */ +#define USER_EXIT_CS (0x68 | SEL_PL_U) /* user code */ +#define USER_EXIT_DS (0x70 | SEL_PL_U) /* user data */ + +#define GDTSZ (USER_EXIT_DS/8 + 1) extern struct real_descriptor gdt[GDTSZ]; diff --git a/i386/i386/i386asm.sym b/i386/i386/i386asm.sym index dd1a2ed..e495d1a 100644 --- a/i386/i386/i386asm.sym +++ b/i386/i386/i386asm.sym @@ -70,6 +70,7 @@ size i386_kernel_state iks size i386_exception_link iel +size i386_saved_state iss offset i386_saved_state r cs offset i386_saved_state r uesp offset i386_saved_state r eax diff --git a/i386/i386/locore.S b/i386/i386/locore.S index cfda86f..d8241a7 100644 --- a/i386/i386/locore.S +++ b/i386/i386/locore.S @@ -521,6 +521,9 @@ _return_to_user: */ _return_from_kernel: + cmpl $0x7fffffff, R_TRAPNO(%esp) /* YYY */ + je return_from_sysenter + _kret_popl_gs: popl %gs /* restore segment registers */ _kret_popl_fs: @@ -978,6 +981,18 @@ ttd_from_iret_i: /* on interrupt stack */ #endif /* MACH_TTD */ +/* User stub for calling the kernel using the trap gate. */ + .globl user_trapgate_stub_start +user_trapgate_stub_start: + popl %ecx /* Pop return address into %ecx. */ + popl %eax /* Pop syscall number into %eax. */ + pushl %ecx /* Push back return address. */ + lcall $7, $0 + subl $4, %esp /* magic */ + ret + .globl user_trapgate_stub_end +user_trapgate_stub_end: + /* * System call enters through a call gate. Flags are not saved - * we must shuffle stack to look like trap save area. @@ -1170,6 +1185,215 @@ syscall_addr: /* set error code - read user space */ jmp _take_trap /* treat as a trap */ +/* + * SYSENTER-based system calls. + * + * Calling convention: + * %eax - syscall number + * %ebx - syscall argument 1 + * %ecx - syscall argument 2 + * %edx - syscall argument 3 + * %esi - syscall argument 4 + * %edi - userspace return address + * %ebp - userspace stack pointer + */ + +/* User stub for calling the kernel using the sysenter instruction. */ + .globl user_sysenter_stub_start +user_sysenter_stub_start: + push %ebp + mov %esp, %ebp + pushf + push %ebx /* Store callee-saved registers. */ + push %esi + push %edi + mov 8(%ebp), %eax /* Move syscall number into %eax. */ + mov 12(%ebp), %ebx /* Move first argument into %ebx. */ + mov 16(%ebp), %ecx /* Move second argument into %ecx. */ + mov 20(%ebp), %edx /* Move third argument into %edx. */ + mov 24(%ebp), %esi /* Move fourth argument into %esi. */ + call get_ip /* compute location of sysexit */ +get_ip: pop %edi /* load current ip */ + add $8, %edi /* userspace return address */ + movl %esp, %ebp /* userspace stack pointer */ + sysenter + pop %edi + pop %esi + pop %ebx + popf + pop %ebp + ret + .globl user_sysenter_stub_end +user_sysenter_stub_end: + +/* + * SYSENTER entry point. + * + * Control enters at `sysenter_entry' with %esp pointing to the + * per-cpu sysenter stack. We store all arguments here. We keep %ebp + * pointing to the top of this structure to copy the arguments. + + * + * sysenter stack layout: + * + * sysenter stack base -> EAX + * ECX + * EDX + * EBX + * ESP + * EBP + * ESI + * EDI + * DS + * ES + * FS + * %ebp -> GS + */ +/* Offsets from %ebp */ +#define SE_EAX (4 * 11) +#define SE_ECX (4 * 10) +#define SE_EDX (4 * 9) +#define SE_EBX (4 * 8) +#define SE_ESX (4 * 7) +#define SE_EBP (4 * 6) +#define SE_ESI (4 * 5) +#define SE_EDI (4 * 4) +#define SE_DS (4 * 3) +#define SE_ES (4 * 2) +#define SE_FS (4 * 1) +#define SE_GS (4 * 0) +#define SE_STACK_POINTER SE_EBP +#define SE_RETURN_ADDRESS SE_EDI + +#define SE_USER_SKIP 20 /* skip past the scratchpad */ + +ENTRY(sysenter_entry) + pusha /* save all registers */ + cld /* clear direction flag */ + pushl %ds /* save the segment registers */ + pushl %es + pushl %fs + pushl %gs + mov %esp, %ebp /* to access the sysenter stack */ + + mov %ss,%cx /* switch to kernel data segment */ + mov %cx,%ds + mov %cx,%es + mov %cx,%fs + mov %cx,%gs + + CPU_NUMBER(%edx) + movl CX(EXT(kernel_stack),%edx),%ebx + /* get current kernel stack */ + movl %ebx, %ecx + or $(KERNEL_STACK_SIZE-1),%ecx + movl -3-IKS_SIZE(%ecx), %esp /* switch to PCB stack */ + addl $(ISS_SIZE - 16 /* vm86 */ - 6 * 4 /* unused */), %esp + /* point to trap number */ + + /* Populate trap save area. */ + pushl $0x7fffffff /* trap number */ + pushl %eax /* %eax: unused */ + pushl SE_STACK_POINTER(%ebp) /* %ecx: for sysexit */ + pushl SE_RETURN_ADDRESS(%ebp) /* %edx: for sysexit */ + subl $(5 * 4), %esp /* unused */ + pushl SE_DS(%ebp) /* copy the segment registers */ + pushl SE_ES(%ebp) + pushl SE_FS(%ebp) + pushl SE_GS(%ebp) + + xchgl %ebx, %esp /* switch to kernel stack */ + /* %ebx points to user registers */ + negl %eax /* get system call number */ + jl sysenter_mach_call_range + /* out of range if it was positive */ + cmpl EXT(mach_trap_count),%eax + /* check system call table bounds */ + jg sysenter_mach_call_range + /* error if out of range */ + + shll $4,%eax /* manual indexing */ + movl EXT(mach_trap_table)(%eax),%ecx + /* get number of arguments */ + + cmp $4, %ecx + ja se_args_5plus + je se_args_4 + cmp $2, %ecx + ja se_args_3 + je se_args_2 + cmp $1, %ecx + je se_args_1 + jmp se_args_0 + +se_args_5plus: + + sub $4, %ecx /* skip the four first arguments */ + movl SE_STACK_POINTER(%ebp), %esi + /* get user stack pointer */ + lea (4 /* skip user return address */\ + +4 /* point past last argument */\ + +16 /* skip register arguments */\ + +SE_USER_SKIP)(%esi,%ecx,4),%esi + /* and skip past the userspace + local storage */ + + movl $USER_DS,%edx /* use user data segment for accesses */ + mov %dx,%fs + movl %esp,%edx /* save kernel ESP for error recovery */ + +0: subl $4,%esi + RECOVER(sysenter_mach_call_addr_push) + pushl %fs:(%esi) /* push argument on stack */ + loop 0b /* loop for all arguments */ + +se_args_4: + push SE_ESI(%ebp) /* push fourth argument */ +se_args_3: + push SE_EDX(%ebp) /* push third argument */ +se_args_2: + push SE_ECX(%ebp) /* push second argument */ +se_args_1: + push SE_EBX(%ebp) /* push first argument */ +se_args_0: + sti /* xxx: sti/cli where ? */ + call *EXT(mach_trap_table)+4(%eax) + /* call procedure */ + cli /* xxx: sti/cli where ? */ + movl %ebx, %esp /* switch to pcb stack */ + movl %eax, R_EAX(%esp) /* save return value */ + jmp _return_from_trap /* check for AST, then... */ +return_from_sysenter: /* return here */ + popl %gs /* restore segment registers */ + popl %fs + popl %es + popl %ds + popa + sti /* xxx: sti/cli where ? */ + sysexit + +/* + * Address out of range. Change to page fault. + * %esi holds failing address. + */ +sysenter_mach_call_addr_push: + movl %edx,%esp /* clean parameters from stack */ + movl %esi,R_CR2(%ebx) /* set fault address */ + movl $(T_PAGE_FAULT),R_TRAPNO(%ebx) + /* set page-fault trap */ + movl $(T_PF_USER),R_ERR(%ebx) + /* set error code - read user space */ + jmp _take_trap /* treat as a trap */ + +/* + * System call out of range. Treat as invalid-instruction trap. + * (? general protection?) + */ +sysenter_mach_call_range: + movl $(T_INVALID_OPCODE),R_TRAPNO(%ebx) + /* set invalid-operation trap */ + movl $0,R_ERR(%ebx) /* clear error code */ + jmp _take_trap /* treat as a trap */ .data DATA(cpu_features) diff --git a/i386/i386/pcb.c b/i386/i386/pcb.c index dabe481..888012c 100644 --- a/i386/i386/pcb.c +++ b/i386/i386/pcb.c @@ -391,12 +391,12 @@ void pcb_init(thread_t thread) * Guarantee that the bootstrapped thread will be in user * mode. */ - pcb->iss.cs = USER_CS; - pcb->iss.ss = USER_DS; - pcb->iss.ds = USER_DS; - pcb->iss.es = USER_DS; - pcb->iss.fs = USER_DS; - pcb->iss.gs = USER_DS; + pcb->iss.cs = USER_EXIT_CS; + pcb->iss.ss = USER_EXIT_DS; + pcb->iss.ds = USER_EXIT_DS; + pcb->iss.es = USER_EXIT_DS; + pcb->iss.fs = USER_EXIT_DS; + pcb->iss.gs = USER_EXIT_DS; pcb->iss.efl = EFL_USER_SET; thread->pcb = pcb; @@ -524,12 +524,12 @@ kern_return_t thread_setstatus( * 386 mode. Set segment registers for flat * 32-bit address space. */ - saved_state->cs = USER_CS; - saved_state->ss = USER_DS; - saved_state->ds = USER_DS; - saved_state->es = USER_DS; - saved_state->fs = USER_DS; - saved_state->gs = USER_DS; + saved_state->cs = USER_EXIT_CS; + saved_state->ss = USER_EXIT_DS; + saved_state->ds = USER_EXIT_DS; + saved_state->es = USER_EXIT_DS; + saved_state->fs = USER_EXIT_DS; + saved_state->gs = USER_EXIT_DS; } else { /* diff --git a/i386/i386/syscall.c b/i386/i386/syscall.c new file mode 100644 index 0000000..e9b17d0 --- /dev/null +++ b/i386/i386/syscall.c @@ -0,0 +1,103 @@ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "syscall.h" + +#include // xxx + +static vm_offset_t msyscall = 0; + +void user_trapgate_stub_start(); +void user_trapgate_stub_end(); + +void user_sysenter_stub_start(); +void user_sysenter_stub_end(); + +void +syscall_init(void) +{ + kern_return_t kr; + vm_offset_t user_stub_start; + vm_offset_t user_stub_end; + + kr = kmem_alloc_wired(kernel_map, &msyscall, PAGE_SIZE); + if (kr != KERN_SUCCESS) + panic("syscall_init"); + + memset((void *) msyscall, 0, PAGE_SIZE); + + if (CPU_HAS_FEATURE (CPU_FEATURE_SEP)) { + printf ("syscall: using SYSENTER/SYSEXIT\n"); + user_stub_start = (vm_offset_t) user_sysenter_stub_start; + user_stub_end = (vm_offset_t) user_sysenter_stub_end; + } else { + printf ("syscall: using trap gate\n"); + user_stub_start = (vm_offset_t) user_trapgate_stub_start; + user_stub_end = (vm_offset_t) user_trapgate_stub_end; + } + + memcpy((void *) msyscall, (void *) user_stub_start, + (size_t) (user_stub_end - user_stub_start)); + + syscall_init_cpu(); +} + +static void +wrmsr(unsigned int msr, unsigned long long val) +{ + __asm__ __volatile__("wrmsr" + : /* no Outputs */ + : "c" (msr), "A" (val)); +} + +#define MSR_IA32_SYSENTER_CS 0x00000174 +#define MSR_IA32_SYSENTER_ESP 0x00000175 +#define MSR_IA32_SYSENTER_EIP 0x00000176 + +extern void sysenter_entry(void); + +void +syscall_init_cpu(void) +{ + if (! CPU_HAS_FEATURE (CPU_FEATURE_SEP)) + return; + + //struct task_tss *tss = curr_ktss (cpu_number ()); + struct task_tss *tss = &ktss; + + wrmsr(MSR_IA32_SYSENTER_CS, KERNEL_ENTER_CS); + wrmsr(MSR_IA32_SYSENTER_ESP, + (unsigned long) tss->sysenter_stack + sizeof tss->sysenter_stack); + wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry); +} + +int +syscall_open(dev_t dev, int flag, io_req_t ior) +{ + return 0; +} + +void +syscall_close(dev_t dev, int flag) +{ + return; +} + +int +syscall_mmap(dev_t dev, vm_offset_t off, vm_prot_t prot) +{ + if (prot & VM_PROT_WRITE) + return (-1); + + return (i386_btop(pmap_extract(pmap_kernel(), + (vm_offset_t) msyscall))); +} diff --git a/i386/i386/syscall.h b/i386/i386/syscall.h new file mode 100644 index 0000000..de9670c --- /dev/null +++ b/i386/i386/syscall.h @@ -0,0 +1,7 @@ +// XXX + +void syscall_init(void); +void syscall_init_cpu(void); +int syscall_open(dev_t dev, int flag, io_req_t ior); +void syscall_close(dev_t dev, int flag); +int syscall_mmap(dev_t dev, vm_offset_t off, vm_prot_t prot); diff --git a/i386/i386/tss.h b/i386/i386/tss.h index ff25f21..8c939c7 100644 --- a/i386/i386/tss.h +++ b/i386/i386/tss.h @@ -76,6 +76,7 @@ struct task_tss struct i386_tss tss; unsigned char iopb[IOPB_BYTES]; unsigned char barrier; + unsigned long sysenter_stack[64]; /* xxx */ }; diff --git a/i386/i386at/conf.c b/i386/i386at/conf.c index ab4f680..d7f9e6f 100644 --- a/i386/i386at/conf.c +++ b/i386/i386at/conf.c @@ -68,6 +68,9 @@ #define hypcnname "hyp" #endif /* MACH_HYP */ +#include +#define syscall_name "syscall" + /* * List of devices - console must be at slot 0 */ @@ -143,6 +146,11 @@ struct dev_ops dev_name_list[] = nodev }, #endif /* MACH_HYP */ + { syscall_name, syscall_open, syscall_close, nulldev_read, + nulldev_write, nulldev_getstat, nulldev_setstat, + syscall_mmap, + nodev, nulldev, nulldev_portdeath, 0, + nodev }, }; int dev_name_count = sizeof(dev_name_list)/sizeof(dev_name_list[0]); diff --git a/i386/i386at/model_dep.c b/i386/i386at/model_dep.c index bc34c9b..210e54d 100644 --- a/i386/i386at/model_dep.c +++ b/i386/i386at/model_dep.c @@ -63,6 +63,7 @@ #include #include #include +#include #include #include #include @@ -197,6 +198,7 @@ void machine_init(void) */ pmap_unmap_page_zero(); #endif + syscall_init(); } /* Conserve power on processor CPU. */ -- 2.1.4