From 55dbc2b5d857d35262ad3116803dfb31b733d031 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Wed, 16 Dec 2009 01:11:51 +0100 Subject: Add Xen support 2009-03-11 Samuel Thibault * i386/i386/vm_param.h (VM_MIN_KERNEL_ADDRESS) [MACH_XEN]: Set to 0x20000000. * i386/i386/i386asm.sym (pfn_list) [VM_MIN_KERNEL_ADDRESS == LINEAR_MIN_KERNEL_ADDRESS]: Define to constant PFN_LIST. 2009-02-27 Samuel Thibault * i386/intel/pmap.c [MACH_HYP] (INVALIDATE_TLB): Call hyp_invlpg instead of flush_tlb when e - s is compile-time known to be PAGE_SIZE. 2008-11-27 Samuel Thibault * i386/configfrag.ac (enable_pae): Enable by default on the Xen platform. 2007-11-14 Samuel Thibault * i386/i386at/model_dep.c (machine_relax): New function. (c_boot_entry): Refuse to boot as dom0. 2007-10-17 Samuel Thibault * i386/i386/fpu.c [MACH_XEN]: Disable unused fpintr(). 2007-08-12 Samuel Thibault * Makefile.am (clib_routines): Add _START. * i386/xen/xen_boothdr: Use _START for VIRT_BASE and PADDR_OFFSET. Add GUEST_VERSION and XEN_ELFNOTE_FEATURES. 2007-06-13 Samuel Thibault * i386/i386/user_ldt.h (user_ldt) [MACH_XEN]: Add alloc field. * i386/i386/user_ldt.c (i386_set_ldt) [MACH_XEN]: Round allocation of LDT to a page, set back LDT pages read/write before freeing them. (user_ldt_free) [MACH_XEN]: Likewise. 2007-04-18 Samuel Thibault * device/ds_routines.c [MACH_HYP]: Add hypervisor block and net devices. 2007-02-19 Thomas Schwinge * i386/xen/Makefrag.am [PLATFORM_xen] (gnumach_LINKFLAGS): Define. * Makefrag.am: Include `xen/Makefrag.am'. * configure.ac: Include `xen/configfrag.ac'. (--enable-platform): Support the `xen' platform. * i386/configfrag.ac: Likewise. * i386/Makefrag.am [PLATFORM_xen]: Include `i386/xen/Makefrag.am'. 2007-02-19 Samuel Thibault Thomas Schwinge * i386/xen/Makefrag.am: New file. * xen/Makefrag.am: Likewise. * xen/configfrag.ac: Likewise. 2007-02-11 (and later dates) Samuel Thibault Xen support * Makefile.am (clib_routines): Add _start. * Makefrag.am (include_mach_HEADERS): Add include/mach/xen.h. * device/cons.c (cnputc): Call hyp_console_write. * i386/Makefrag.am (libkernel_a_SOURCES): Move non-Xen source to [PLATFORM_at]. * i386/i386/debug_trace.S: Include * i386/i386/fpu.c [MACH_HYP] (init_fpu): Call set_ts() and clear_ts(), do not enable CR0_EM. * i386/i386/gdt.c: Include and . [MACH_XEN]: Make gdt array extern. [MACH_XEN] (gdt_init): Register gdt with hypervisor. Request 4gb segments assist. Shift la_shift. [MACH_PSEUDO_PHYS] (gdt_init): Shift pfn_list. * i386/i386/gdt.h [MACH_XEN]: Don't define KERNEL_LDT and LINEAR_DS. * i386/i386/i386asm.sym: Include . [MACH_XEN]: Remove KERNEL_LDT, Add shared_info's CPU_CLI, CPU_PENDING, CPU_PENDING_SEL, PENDING, EVTMASK and CR2. * i386/i386/idt.c [MACH_HYP] (idt_init): Register trap table with hypervisor. * i386/i386/idt_inittab.S: Include . [MACH_XEN]: Set IDT_ENTRY() for hypervisor. Set trap table terminator. * i386/i386/ktss.c [MACH_XEN] (ktss_init): Request exception task switch from hypervisor. * i386/i386/ldt.c: Include and [MACH_XEN]: Make ldt array extern. [MACH_XEN] (ldt_init): Set ldt readwrite. [MACH_HYP] (ldt_init): Register ldt with hypervisor. * i386/i386/locore.S: Include . Handle KERNEL_RING == 1 case. [MACH_XEN]: Read hyp_shared_info's CR2 instead of %cr2. [MACH_PSEUDO_PHYS]: Add mfn_to_pfn computation. [MACH_HYP]: Drop Cyrix I/O-based detection. Read cr3 instead of %cr3. Make hypervisor call for pte invalidation. * i386/i386/mp_desc.c: Include . [MACH_HYP] (mp_desc_init): Panic. * i386/i386/pcb.c: Include . [MACH_XEN] (switch_ktss): Request stack switch from hypervisor. [MACH_HYP] (switch_ktss): Request ldt and gdt switch from hypervisor. * i386/i386/phys.c: Include [MACH_PSEUDO_PHYS] (kvtophys): Do page translation. * i386/i386/proc_reg.h [MACH_HYP] (cr3): New declaration. (set_cr3, get_cr3, set_ts, clear_ts): Implement macros. * i386/i386/seg.h [MACH_HYP]: Define KERNEL_RING macro. Include [MACH_XEN] (fill_descriptor): Register descriptor with hypervisor. * i386/i386/spl.S: Include and [MACH_XEN] (pic_mask): #define to int_mask. [MACH_XEN] (SETMASK): Implement. * i386/i386/vm_param.h [MACH_XEN] (HYP_VIRT_START): New macro. [MACH_XEN]: Set VM_MAX_KERNEL_ADDRESS to HYP_VIRT_START- LINEAR_MIN_KERNEL_ADDRESS + VM_MIN_KERNEL_ADDRESS. Increase KERNEL_STACK_SIZE and INTSTACK_SIZE to 4 pages. * i386/i386at/conf.c [MACH_HYP]: Remove hardware devices, add hypervisor console device. * i386/i386at/cons_conf.c [MACH_HYP]: Add hypervisor console device. * i386/i386at/model_dep.c: Include , . [MACH_XEN] Include , , , . [MACH_PSEUDO_PHYS]: New boot_info, mfn_list, pfn_list variables. [MACH_XEN]: New la_shift variable. [MACH_HYP] (avail_next, mem_size_init): Drop BIOS skipping mecanism. [MACH_HYP] (machine_init): Call hyp_init(), drop hardware initialization. [MACH_HYP] (machine_idle): Call hyp_idle(). [MACH_HYP] (halt_cpu): Call hyp_halt(). [MACH_HYP] (halt_all_cpus): Call hyp_reboot() or hyp_halt(). [MACH_HYP] (i386at_init): Initialize with hypervisor. [MACH_XEN] (c_boot_entry): Add Xen-specific initialization. [MACH_HYP] (init_alloc_aligned, pmap_valid_page): Drop zones skipping mecanism. * i386/intel/pmap.c: Include . [MACH_PSEUDO_PHYS] (WRITE_PTE): Do page translation. [MACH_HYP] (INVALIDATE_TLB): Request invalidation from hypervisor. [MACH_XEN] (pmap_map_bd, pmap_create, pmap_destroy, pmap_remove_range) (pmap_page_protect, pmap_protect, pmap_enter, pmap_change_wiring) (pmap_attribute_clear, pmap_unmap_page_zero, pmap_collect): Request MMU update from hypervisor. [MACH_XEN] (pmap_bootstrap): Request pagetable initialization from hypervisor. [MACH_XEN] (pmap_set_page_readwrite, pmap_set_page_readonly) (pmap_set_page_readonly_init, pmap_clear_bootstrap_pagetable) (pmap_map_mfn): New functions. * i386/intel/pmap.h [MACH_XEN] (INTEL_PTE_GLOBAL): Disable global page support. [MACH_PSEUDO_PHYS] (pte_to_pa): Do page translation. [MACH_XEN] (pmap_set_page_readwrite, pmap_set_page_readonly) (pmap_set_page_readonly_init, pmap_clear_bootstrap_pagetable) (pmap_map_mfn): Declare functions. * i386/i386/xen.h: New file. * i386/xen/xen.c: New file. * i386/xen/xen_boothdr.S: New file. * i386/xen/xen_locore.S: New file. * include/mach/xen.h: New file. * kern/bootstrap.c [MACH_XEN] (boot_info): Declare variable. [MACH_XEN] (bootstrap_create): Rebase multiboot header. * kern/debug.c: Include . [MACH_HYP] (panic): Call hyp_crash() without delay. * linux/dev/include/asm-i386/segment.h [MACH_HYP] (KERNEL_CS) (KERNEL_DS): Use ring 1. * xen/block.c: New file. * xen/block.h: Likewise. * xen/console.c: Likewise. * xen/console.h: Likewise. * xen/evt.c: Likewise. * xen/evt.h: Likewise. * xen/grant.c: Likewise. * xen/grant.h: Likewise. * xen/net.c: Likewise. * xen/net.h: Likewise. * xen/ring.c: Likewise. * xen/ring.h: Likewise. * xen/store.c: Likewise. * xen/store.h: Likewise. * xen/time.c: Likewise. * xen/time.h: Likewise. * xen/xen.c: Likewise. * xen/xen.h: Likewise. * xen/public/COPYING: Import file from Xen. * xen/public/callback.h: Likewise. * xen/public/dom0_ops.h: Likewise. * xen/public/domctl.h: Likewise. * xen/public/elfnote.h: Likewise. * xen/public/elfstructs.h: Likewise. * xen/public/event_channel.h: Likewise. * xen/public/features.h: Likewise. * xen/public/grant_table.h: Likewise. * xen/public/kexec.h: Likewise. * xen/public/libelf.h: Likewise. * xen/public/memory.h: Likewise. * xen/public/nmi.h: Likewise. * xen/public/physdev.h: Likewise. * xen/public/platform.h: Likewise. * xen/public/sched.h: Likewise. * xen/public/sysctl.h: Likewise. * xen/public/trace.h: Likewise. * xen/public/vcpu.h: Likewise. * xen/public/version.h: Likewise. * xen/public/xen-compat.h: Likewise. * xen/public/xen.h: Likewise. * xen/public/xencomm.h: Likewise. * xen/public/xenoprof.h: Likewise. * xen/public/arch-x86/xen-mca.h: Likewise. * xen/public/arch-x86/xen-x86_32.h: Likewise. * xen/public/arch-x86/xen-x86_64.h: Likewise. * xen/public/arch-x86/xen.h: Likewise. * xen/public/arch-x86_32.h: Likewise. * xen/public/arch-x86_64.h: Likewise. * xen/public/io/blkif.h: Likewise. * xen/public/io/console.h: Likewise. * xen/public/io/fbif.h: Likewise. * xen/public/io/fsif.h: Likewise. * xen/public/io/kbdif.h: Likewise. * xen/public/io/netif.h: Likewise. * xen/public/io/pciif.h: Likewise. * xen/public/io/protocols.h: Likewise. * xen/public/io/ring.h: Likewise. * xen/public/io/tpmif.h: Likewise. * xen/public/io/xenbus.h: Likewise. * xen/public/io/xs_wire.h: Likewise. --- i386/intel/pmap.c | 388 +++++++++++++++++++++++++++++++++++++++++++++++++++++- i386/intel/pmap.h | 17 +++ 2 files changed, 401 insertions(+), 4 deletions(-) (limited to 'i386/intel') diff --git a/i386/intel/pmap.c b/i386/intel/pmap.c index c633fd9..ee19c4b 100644 --- a/i386/intel/pmap.c +++ b/i386/intel/pmap.c @@ -77,13 +77,18 @@ #include #include +#include #include #include #include #include #include +#ifdef MACH_PSEUDO_PHYS +#define WRITE_PTE(pte_p, pte_entry) *(pte_p) = pte_entry?pa_to_ma(pte_entry):0; +#else /* MACH_PSEUDO_PHYS */ #define WRITE_PTE(pte_p, pte_entry) *(pte_p) = (pte_entry); +#endif /* MACH_PSEUDO_PHYS */ /* * Private data structures. @@ -325,6 +330,19 @@ lock_data_t pmap_system_lock; #define MAX_TBIS_SIZE 32 /* > this -> TBIA */ /* XXX */ +#ifdef MACH_HYP +#if 1 +#define INVALIDATE_TLB(pmap, s, e) hyp_mmuext_op_void(MMUEXT_TLB_FLUSH_LOCAL) +#else +#define INVALIDATE_TLB(pmap, s, e) do { \ + if (__builtin_constant_p((e) - (s)) \ + && (e) - (s) == PAGE_SIZE) \ + hyp_invlpg((pmap) == kernel_pmap ? kvtolin(s) : (s)); \ + else \ + hyp_mmuext_op_void(MMUEXT_TLB_FLUSH_LOCAL); \ +} while(0) +#endif +#else /* MACH_HYP */ #if 0 /* It is hard to know when a TLB flush becomes less expensive than a bunch of * invlpgs. But it surely is more expensive than just one invlpg. */ @@ -338,6 +356,7 @@ lock_data_t pmap_system_lock; #else #define INVALIDATE_TLB(pmap, s, e) flush_tlb() #endif +#endif /* MACH_HYP */ #if NCPUS > 1 @@ -507,6 +526,10 @@ vm_offset_t pmap_map_bd(virt, start, end, prot) register pt_entry_t template; register pt_entry_t *pte; int spl; +#ifdef MACH_XEN + int n, i = 0; + struct mmu_update update[HYP_BATCH_MMU_UPDATES]; +#endif /* MACH_XEN */ template = pa_to_pte(start) | INTEL_PTE_NCACHE|INTEL_PTE_WTHRU @@ -521,11 +544,30 @@ vm_offset_t pmap_map_bd(virt, start, end, prot) pte = pmap_pte(kernel_pmap, virt); if (pte == PT_ENTRY_NULL) panic("pmap_map_bd: Invalid kernel address\n"); +#ifdef MACH_XEN + update[i].ptr = kv_to_ma(pte); + update[i].val = pa_to_ma(template); + i++; + if (i == HYP_BATCH_MMU_UPDATES) { + hyp_mmu_update(kvtolin(&update), i, kvtolin(&n), DOMID_SELF); + if (n != i) + panic("couldn't pmap_map_bd\n"); + i = 0; + } +#else /* MACH_XEN */ WRITE_PTE(pte, template) +#endif /* MACH_XEN */ pte_increment_pa(template); virt += PAGE_SIZE; start += PAGE_SIZE; } +#ifdef MACH_XEN + if (i > HYP_BATCH_MMU_UPDATES) + panic("overflowed array in pmap_map_bd"); + hyp_mmu_update(kvtolin(&update), i, kvtolin(&n), DOMID_SELF); + if (n != i) + panic("couldn't pmap_map_bd\n"); +#endif /* MACH_XEN */ PMAP_READ_UNLOCK(pmap, spl); return(virt); } @@ -583,6 +625,8 @@ void pmap_bootstrap() /* * Allocate and clear a kernel page directory. */ + /* Note: initial Xen mapping holds at least 512kB free mapped page. + * We use that for directly building our linear mapping. */ #if PAE { vm_offset_t addr; @@ -604,6 +648,53 @@ void pmap_bootstrap() kernel_pmap->dirbase[i] = 0; } +#ifdef MACH_XEN + /* + * Xen may only provide as few as 512KB extra bootstrap linear memory, + * which is far from enough to map all available memory, so we need to + * map more bootstrap linear memory. We here map 1 (resp. 4 for PAE) + * other L1 table(s), thus 4MiB extra memory (resp. 8MiB), which is + * enough for a pagetable mapping 4GiB. + */ +#ifdef PAE +#define NSUP_L1 4 +#else +#define NSUP_L1 1 +#endif + pt_entry_t *l1_map[NSUP_L1]; + { + pt_entry_t *base = (pt_entry_t*) boot_info.pt_base; + int i; + int n_l1map; +#ifdef PAE + pt_entry_t *l2_map = (pt_entry_t*) phystokv(pte_to_pa(base[0])); +#else /* PAE */ + pt_entry_t *l2_map = base; +#endif /* PAE */ + for (n_l1map = 0, i = lin2pdenum(VM_MIN_KERNEL_ADDRESS); i < NPTES; i++) { + if (!(l2_map[i] & INTEL_PTE_VALID)) { + struct mmu_update update; + int j, n; + + l1_map[n_l1map] = (pt_entry_t*) phystokv(pmap_grab_page()); + for (j = 0; j < NPTES; j++) + l1_map[n_l1map][j] = intel_ptob(pfn_to_mfn((i - lin2pdenum(VM_MIN_KERNEL_ADDRESS)) * NPTES + j)) | INTEL_PTE_VALID | INTEL_PTE_WRITE; + pmap_set_page_readonly_init(l1_map[n_l1map]); + if (!hyp_mmuext_op_mfn (MMUEXT_PIN_L1_TABLE, kv_to_mfn (l1_map[n_l1map]))) + panic("couldn't pin page %p(%p)", l1_map[n_l1map], kv_to_ma (l1_map[n_l1map])); + update.ptr = kv_to_ma(&l2_map[i]); + update.val = kv_to_ma(l1_map[n_l1map]) | INTEL_PTE_VALID | INTEL_PTE_WRITE; + hyp_mmu_update(kv_to_la(&update), 1, kv_to_la(&n), DOMID_SELF); + if (n != 1) + panic("couldn't complete bootstrap map"); + /* added the last L1 table, can stop */ + if (++n_l1map >= NSUP_L1) + break; + } + } + } +#endif /* MACH_XEN */ + /* * Allocate and set up the kernel page tables. */ @@ -640,19 +731,42 @@ void pmap_bootstrap() WRITE_PTE(pte, 0); } else +#ifdef MACH_XEN + if (va == (vm_offset_t) &hyp_shared_info) + { + *pte = boot_info.shared_info | INTEL_PTE_VALID | INTEL_PTE_WRITE; + va += INTEL_PGBYTES; + } + else +#endif /* MACH_XEN */ { extern char _start[], etext[]; - if ((va >= (vm_offset_t)_start) + if (((va >= (vm_offset_t) _start) && (va + INTEL_PGBYTES <= (vm_offset_t)etext)) +#ifdef MACH_XEN + || (va >= (vm_offset_t) boot_info.pt_base + && (va + INTEL_PGBYTES <= + (vm_offset_t) ptable + INTEL_PGBYTES)) +#endif /* MACH_XEN */ + ) { WRITE_PTE(pte, pa_to_pte(_kvtophys(va)) | INTEL_PTE_VALID | global); } else { - WRITE_PTE(pte, pa_to_pte(_kvtophys(va)) - | INTEL_PTE_VALID | INTEL_PTE_WRITE | global); +#ifdef MACH_XEN + int i; + for (i = 0; i < NSUP_L1; i++) + if (va == (vm_offset_t) l1_map[i]) + WRITE_PTE(pte, pa_to_pte(_kvtophys(va)) + | INTEL_PTE_VALID | global); + if (i == NSUP_L1) +#endif /* MACH_XEN */ + WRITE_PTE(pte, pa_to_pte(_kvtophys(va)) + | INTEL_PTE_VALID | INTEL_PTE_WRITE | global) + } va += INTEL_PGBYTES; } @@ -662,6 +776,11 @@ void pmap_bootstrap() WRITE_PTE(pte, 0); va += INTEL_PGBYTES; } +#ifdef MACH_XEN + pmap_set_page_readonly_init(ptable); + if (!hyp_mmuext_op_mfn (MMUEXT_PIN_L1_TABLE, kv_to_mfn (ptable))) + panic("couldn't pin page %p(%p)\n", ptable, kv_to_ma (ptable)); +#endif /* MACH_XEN */ } } @@ -669,6 +788,100 @@ void pmap_bootstrap() soon after we return from here. */ } +#ifdef MACH_XEN +/* These are only required because of Xen security policies */ + +/* Set back a page read write */ +void pmap_set_page_readwrite(void *_vaddr) { + vm_offset_t vaddr = (vm_offset_t) _vaddr; + vm_offset_t paddr = kvtophys(vaddr); + vm_offset_t canon_vaddr = phystokv(paddr); + if (hyp_do_update_va_mapping (kvtolin(vaddr), pa_to_pte (pa_to_ma(paddr)) | INTEL_PTE_VALID | INTEL_PTE_WRITE, UVMF_NONE)) + panic("couldn't set hiMMU readwrite for addr %p(%p)\n", vaddr, pa_to_ma (paddr)); + if (canon_vaddr != vaddr) + if (hyp_do_update_va_mapping (kvtolin(canon_vaddr), pa_to_pte (pa_to_ma(paddr)) | INTEL_PTE_VALID | INTEL_PTE_WRITE, UVMF_NONE)) + panic("couldn't set hiMMU readwrite for paddr %p(%p)\n", canon_vaddr, pa_to_ma (paddr)); +} + +/* Set a page read only (so as to pin it for instance) */ +void pmap_set_page_readonly(void *_vaddr) { + vm_offset_t vaddr = (vm_offset_t) _vaddr; + vm_offset_t paddr = kvtophys(vaddr); + vm_offset_t canon_vaddr = phystokv(paddr); + if (*pmap_pde(kernel_pmap, vaddr) & INTEL_PTE_VALID) { + if (hyp_do_update_va_mapping (kvtolin(vaddr), pa_to_pte (pa_to_ma(paddr)) | INTEL_PTE_VALID, UVMF_NONE)) + panic("couldn't set hiMMU readonly for vaddr %p(%p)\n", vaddr, pa_to_ma (paddr)); + } + if (canon_vaddr != vaddr && + *pmap_pde(kernel_pmap, canon_vaddr) & INTEL_PTE_VALID) { + if (hyp_do_update_va_mapping (kvtolin(canon_vaddr), pa_to_pte (pa_to_ma(paddr)) | INTEL_PTE_VALID, UVMF_NONE)) + panic("couldn't set hiMMU readonly for vaddr %p canon_vaddr %p paddr %p (%p)\n", vaddr, canon_vaddr, paddr, pa_to_ma (paddr)); + } +} + +/* This needs to be called instead of pmap_set_page_readonly as long as RC3 + * still points to the bootstrap dirbase. */ +void pmap_set_page_readonly_init(void *_vaddr) { + vm_offset_t vaddr = (vm_offset_t) _vaddr; +#if PAE + pt_entry_t *pdpbase = (void*) boot_info.pt_base; + vm_offset_t dirbase = ptetokv(pdpbase[0]); +#else + vm_offset_t dirbase = boot_info.pt_base; +#endif + struct pmap linear_pmap = { + .dirbase = (void*) dirbase, + }; + /* Modify our future kernel map (can't use update_va_mapping for this)... */ + if (*pmap_pde(kernel_pmap, vaddr) & INTEL_PTE_VALID) + if (!hyp_mmu_update_la (kvtolin(vaddr), pa_to_pte (kv_to_ma(vaddr)) | INTEL_PTE_VALID)) + panic("couldn't set hiMMU readonly for vaddr %p(%p)\n", vaddr, kv_to_ma (vaddr)); + /* ... and the bootstrap map. */ + if (*pmap_pde(&linear_pmap, vaddr) & INTEL_PTE_VALID) + if (hyp_do_update_va_mapping (vaddr, pa_to_pte (kv_to_ma(vaddr)) | INTEL_PTE_VALID, UVMF_NONE)) + panic("couldn't set MMU readonly for vaddr %p(%p)\n", vaddr, kv_to_ma (vaddr)); +} + +void pmap_clear_bootstrap_pagetable(pt_entry_t *base) { + int i; + pt_entry_t *dir; + vm_offset_t va = 0; +#if PAE + int j; +#endif /* PAE */ + if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE, kv_to_mfn(base))) + panic("pmap_clear_bootstrap_pagetable: couldn't unpin page %p(%p)\n", base, kv_to_ma(base)); +#if PAE + for (j = 0; j < PDPNUM; j++) + { + pt_entry_t pdpe = base[j]; + if (pdpe & INTEL_PTE_VALID) { + dir = (pt_entry_t *) phystokv(pte_to_pa(pdpe)); +#else /* PAE */ + dir = base; +#endif /* PAE */ + for (i = 0; i < NPTES; i++) { + pt_entry_t pde = dir[i]; + unsigned long pfn = mfn_to_pfn(atop(pde)); + void *pgt = (void*) phystokv(ptoa(pfn)); + if (pde & INTEL_PTE_VALID) + hyp_free_page(pfn, pgt); + va += NPTES * INTEL_PGBYTES; + if (va >= HYP_VIRT_START) + break; + } +#if PAE + hyp_free_page(atop(_kvtophys(dir)), dir); + } else + va += NPTES * NPTES * INTEL_PGBYTES; + if (va >= HYP_VIRT_START) + break; + } +#endif /* PAE */ + hyp_free_page(atop(_kvtophys(base)), base); +} +#endif /* MACH_XEN */ + void pmap_virtual_space(startp, endp) vm_offset_t *startp; vm_offset_t *endp; @@ -823,6 +1036,29 @@ pmap_page_table_page_alloc() return pa; } +#ifdef MACH_XEN +void pmap_map_mfn(void *_addr, unsigned long mfn) { + vm_offset_t addr = (vm_offset_t) _addr; + pt_entry_t *pte, *pdp; + vm_offset_t ptp; + if ((pte = pmap_pte(kernel_pmap, addr)) == PT_ENTRY_NULL) { + ptp = phystokv(pmap_page_table_page_alloc()); + pmap_set_page_readonly((void*) ptp); + if (!hyp_mmuext_op_mfn (MMUEXT_PIN_L1_TABLE, pa_to_mfn(ptp))) + panic("couldn't pin page %p(%p)\n",ptp,kv_to_ma(ptp)); + pdp = pmap_pde(kernel_pmap, addr); + if (!hyp_mmu_update_pte(kv_to_ma(pdp), + pa_to_pte(kv_to_ma(ptp)) | INTEL_PTE_VALID + | INTEL_PTE_USER + | INTEL_PTE_WRITE)) + panic("%s:%d could not set pde %p(%p) to %p(%p)\n",__FILE__,__LINE__,kvtophys((vm_offset_t)pdp),kv_to_ma(pdp), ptp, pa_to_ma(ptp)); + pte = pmap_pte(kernel_pmap, addr); + } + if (!hyp_mmu_update_pte(kv_to_ma(pte), ptoa(mfn) | INTEL_PTE_VALID | INTEL_PTE_WRITE)) + panic("%s:%d could not set pte %p(%p) to %p(%p)\n",__FILE__,__LINE__,pte,kv_to_ma(pte), ptoa(mfn), pa_to_ma(ptoa(mfn))); +} +#endif /* MACH_XEN */ + /* * Deallocate a page-table page. * The page-table page must have all mappings removed, @@ -884,6 +1120,13 @@ pmap_t pmap_create(size) panic("pmap_create"); memcpy(p->dirbase, kernel_page_dir, PDPNUM * INTEL_PGBYTES); +#ifdef MACH_XEN + { + int i; + for (i = 0; i < PDPNUM; i++) + pmap_set_page_readonly((void*) p->dirbase + i * INTEL_PGBYTES); + } +#endif /* MACH_XEN */ #if PAE if (kmem_alloc_wired(kernel_map, @@ -895,6 +1138,9 @@ pmap_t pmap_create(size) for (i = 0; i < PDPNUM; i++) WRITE_PTE(&p->pdpbase[i], pa_to_pte(kvtophys((vm_offset_t) p->dirbase + i * INTEL_PGBYTES)) | INTEL_PTE_VALID); } +#ifdef MACH_XEN + pmap_set_page_readonly(p->pdpbase); +#endif /* MACH_XEN */ #endif /* PAE */ p->ref_count = 1; @@ -954,14 +1200,29 @@ void pmap_destroy(p) if (m == VM_PAGE_NULL) panic("pmap_destroy: pte page not in object"); vm_page_lock_queues(); +#ifdef MACH_XEN + if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE, pa_to_mfn(pa))) + panic("pmap_destroy: couldn't unpin page %p(%p)\n", pa, kv_to_ma(pa)); + pmap_set_page_readwrite((void*) phystokv(pa)); +#endif /* MACH_XEN */ vm_page_free(m); inuse_ptepages_count--; vm_page_unlock_queues(); vm_object_unlock(pmap_object); } } +#ifdef MACH_XEN + { + int i; + for (i = 0; i < PDPNUM; i++) + pmap_set_page_readwrite((void*) p->dirbase + i * INTEL_PGBYTES); + } +#endif /* MACH_XEN */ kmem_free(kernel_map, (vm_offset_t)p->dirbase, PDPNUM * INTEL_PGBYTES); #if PAE +#ifdef MACH_XEN + pmap_set_page_readwrite(p->pdpbase); +#endif /* MACH_XEN */ kmem_free(kernel_map, (vm_offset_t)p->pdpbase, INTEL_PGBYTES); #endif /* PAE */ zfree(pmap_zone, (vm_offset_t) p); @@ -1007,6 +1268,10 @@ void pmap_remove_range(pmap, va, spte, epte) int num_removed, num_unwired; int pai; vm_offset_t pa; +#ifdef MACH_XEN + int n, ii = 0; + struct mmu_update update[HYP_BATCH_MMU_UPDATES]; +#endif /* MACH_XEN */ #if DEBUG_PTE_PAGE if (pmap != kernel_pmap) @@ -1035,7 +1300,19 @@ void pmap_remove_range(pmap, va, spte, epte) register int i = ptes_per_vm_page; register pt_entry_t *lpte = cpte; do { +#ifdef MACH_XEN + update[ii].ptr = kv_to_ma(lpte); + update[ii].val = 0; + ii++; + if (ii == HYP_BATCH_MMU_UPDATES) { + hyp_mmu_update(kvtolin(&update), ii, kvtolin(&n), DOMID_SELF); + if (n != ii) + panic("couldn't pmap_remove_range\n"); + ii = 0; + } +#else /* MACH_XEN */ *lpte = 0; +#endif /* MACH_XEN */ lpte++; } while (--i > 0); continue; @@ -1056,7 +1333,19 @@ void pmap_remove_range(pmap, va, spte, epte) do { pmap_phys_attributes[pai] |= *lpte & (PHYS_MODIFIED|PHYS_REFERENCED); +#ifdef MACH_XEN + update[ii].ptr = kv_to_ma(lpte); + update[ii].val = 0; + ii++; + if (ii == HYP_BATCH_MMU_UPDATES) { + hyp_mmu_update(kvtolin(&update), ii, kvtolin(&n), DOMID_SELF); + if (n != ii) + panic("couldn't pmap_remove_range\n"); + ii = 0; + } +#else /* MACH_XEN */ *lpte = 0; +#endif /* MACH_XEN */ lpte++; } while (--i > 0); } @@ -1102,6 +1391,14 @@ void pmap_remove_range(pmap, va, spte, epte) } } +#ifdef MACH_XEN + if (ii > HYP_BATCH_MMU_UPDATES) + panic("overflowed array in pmap_remove_range"); + hyp_mmu_update(kvtolin(&update), ii, kvtolin(&n), DOMID_SELF); + if (n != ii) + panic("couldn't pmap_remove_range\n"); +#endif /* MACH_XEN */ + /* * Update the counts */ @@ -1246,7 +1543,12 @@ void pmap_page_protect(phys, prot) do { pmap_phys_attributes[pai] |= *pte & (PHYS_MODIFIED|PHYS_REFERENCED); +#ifdef MACH_XEN + if (!hyp_mmu_update_pte(kv_to_ma(pte++), 0)) + panic("%s:%d could not clear pte %p\n",__FILE__,__LINE__,pte-1); +#else /* MACH_XEN */ *pte++ = 0; +#endif /* MACH_XEN */ } while (--i > 0); } @@ -1276,7 +1578,12 @@ void pmap_page_protect(phys, prot) register int i = ptes_per_vm_page; do { +#ifdef MACH_XEN + if (!hyp_mmu_update_pte(kv_to_ma(pte), *pte & ~INTEL_PTE_WRITE)) + panic("%s:%d could not enable write on pte %p\n",__FILE__,__LINE__,pte); +#else /* MACH_XEN */ *pte &= ~INTEL_PTE_WRITE; +#endif /* MACH_XEN */ pte++; } while (--i > 0); @@ -1365,11 +1672,36 @@ void pmap_protect(map, s, e, prot) spte = &spte[ptenum(s)]; epte = &spte[intel_btop(l-s)]; +#ifdef MACH_XEN + int n, i = 0; + struct mmu_update update[HYP_BATCH_MMU_UPDATES]; +#endif /* MACH_XEN */ + while (spte < epte) { - if (*spte & INTEL_PTE_VALID) + if (*spte & INTEL_PTE_VALID) { +#ifdef MACH_XEN + update[i].ptr = kv_to_ma(spte); + update[i].val = *spte & ~INTEL_PTE_WRITE; + i++; + if (i == HYP_BATCH_MMU_UPDATES) { + hyp_mmu_update(kvtolin(&update), i, kvtolin(&n), DOMID_SELF); + if (n != i) + panic("couldn't pmap_protect\n"); + i = 0; + } +#else /* MACH_XEN */ *spte &= ~INTEL_PTE_WRITE; +#endif /* MACH_XEN */ + } spte++; } +#ifdef MACH_XEN + if (i > HYP_BATCH_MMU_UPDATES) + panic("overflowed array in pmap_protect"); + hyp_mmu_update(kvtolin(&update), i, kvtolin(&n), DOMID_SELF); + if (n != i) + panic("couldn't pmap_protect\n"); +#endif /* MACH_XEN */ } s = l; pde++; @@ -1412,6 +1744,8 @@ if (pmap_debug) printf("pmap(%x, %x)\n", v, pa); if (pmap == PMAP_NULL) return; + if (pmap == kernel_pmap && (v < kernel_virtual_start || v >= kernel_virtual_end)) + panic("pmap_enter(%p, %p) falls in physical memory area!\n", v, pa); if (pmap == kernel_pmap && (prot & VM_PROT_WRITE) == 0 && !wired /* hack for io_wire */ ) { /* @@ -1502,9 +1836,20 @@ Retry: /*XX pdp = &pmap->dirbase[pdenum(v) & ~(i-1)];*/ pdp = pmap_pde(pmap, v); do { +#ifdef MACH_XEN + pmap_set_page_readonly((void *) ptp); + if (!hyp_mmuext_op_mfn (MMUEXT_PIN_L1_TABLE, kv_to_mfn(ptp))) + panic("couldn't pin page %p(%p)\n",ptp,kv_to_ma(ptp)); + if (!hyp_mmu_update_pte(pa_to_ma(kvtophys((vm_offset_t)pdp)), + pa_to_pte(pa_to_ma(kvtophys(ptp))) | INTEL_PTE_VALID + | INTEL_PTE_USER + | INTEL_PTE_WRITE)) + panic("%s:%d could not set pde %p(%p,%p) to %p(%p,%p) %p\n",__FILE__,__LINE__, pdp, kvtophys((vm_offset_t)pdp), pa_to_ma(kvtophys((vm_offset_t)pdp)), ptp, kvtophys(ptp), pa_to_ma(kvtophys(ptp)), pa_to_pte(kv_to_ma(ptp))); +#else /* MACH_XEN */ *pdp = pa_to_pte(ptp) | INTEL_PTE_VALID | INTEL_PTE_USER | INTEL_PTE_WRITE; +#endif /* MACH_XEN */ pdp++; ptp += INTEL_PGBYTES; } while (--i > 0); @@ -1544,7 +1889,12 @@ Retry: do { if (*pte & INTEL_PTE_MOD) template |= INTEL_PTE_MOD; +#ifdef MACH_XEN + if (!hyp_mmu_update_pte(kv_to_ma(pte), pa_to_ma(template))) + panic("%s:%d could not set pte %p to %p\n",__FILE__,__LINE__,pte,template); +#else /* MACH_XEN */ WRITE_PTE(pte, template) +#endif /* MACH_XEN */ pte++; pte_increment_pa(template); } while (--i > 0); @@ -1649,7 +1999,12 @@ Retry: template |= INTEL_PTE_WIRED; i = ptes_per_vm_page; do { +#ifdef MACH_XEN + if (!(hyp_mmu_update_pte(kv_to_ma(pte), pa_to_ma(template)))) + panic("%s:%d could not set pte %p to %p\n",__FILE__,__LINE__,pte,template); +#else /* MACH_XEN */ WRITE_PTE(pte, template) +#endif /* MACH_XEN */ pte++; pte_increment_pa(template); } while (--i > 0); @@ -1704,7 +2059,12 @@ void pmap_change_wiring(map, v, wired) map->stats.wired_count--; i = ptes_per_vm_page; do { +#ifdef MACH_XEN + if (!(hyp_mmu_update_pte(kv_to_ma(pte), *pte & ~INTEL_PTE_WIRED))) + panic("%s:%d could not wire down pte %p\n",__FILE__,__LINE__,pte); +#else /* MACH_XEN */ *pte &= ~INTEL_PTE_WIRED; +#endif /* MACH_XEN */ pte++; } while (--i > 0); } @@ -1835,7 +2195,17 @@ void pmap_collect(p) register int i = ptes_per_vm_page; register pt_entry_t *pdep = pdp; do { +#ifdef MACH_XEN + unsigned long pte = *pdep; + void *ptable = (void*) ptetokv(pte); + if (!(hyp_mmu_update_pte(pa_to_ma(kvtophys((vm_offset_t)pdep++)), 0))) + panic("%s:%d could not clear pde %p\n",__FILE__,__LINE__,pdep-1); + if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE, kv_to_mfn(ptable))) + panic("couldn't unpin page %p(%p)\n", ptable, pa_to_ma(kvtophys((vm_offset_t)ptable))); + pmap_set_page_readwrite(ptable); +#else /* MACH_XEN */ *pdep++ = 0; +#endif /* MACH_XEN */ } while (--i > 0); } @@ -2052,7 +2422,12 @@ phys_attribute_clear(phys, bits) { register int i = ptes_per_vm_page; do { +#ifdef MACH_XEN + if (!(hyp_mmu_update_pte(kv_to_ma(pte), *pte & ~bits))) + panic("%s:%d could not clear bits %lx from pte %p\n",__FILE__,__LINE__,bits,pte); +#else /* MACH_XEN */ *pte &= ~bits; +#endif /* MACH_XEN */ } while (--i > 0); } PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE); @@ -2413,7 +2788,12 @@ pmap_unmap_page_zero () if (!pte) return; assert (pte); +#ifdef MACH_XEN + if (!hyp_mmu_update_pte(kv_to_ma(pte), 0)) + printf("couldn't unmap page 0\n"); +#else /* MACH_XEN */ *pte = 0; INVALIDATE_TLB(kernel_pmap, 0, PAGE_SIZE); +#endif /* MACH_XEN */ } #endif /* i386 */ diff --git a/i386/intel/pmap.h b/i386/intel/pmap.h index 7354a0f..a2b6442 100644 --- a/i386/intel/pmap.h +++ b/i386/intel/pmap.h @@ -126,12 +126,21 @@ typedef unsigned int pt_entry_t; #define INTEL_PTE_NCACHE 0x00000010 #define INTEL_PTE_REF 0x00000020 #define INTEL_PTE_MOD 0x00000040 +#ifdef MACH_XEN +/* Not supported */ +#define INTEL_PTE_GLOBAL 0x00000000 +#else /* MACH_XEN */ #define INTEL_PTE_GLOBAL 0x00000100 +#endif /* MACH_XEN */ #define INTEL_PTE_WIRED 0x00000200 #define INTEL_PTE_PFN 0xfffff000 #define pa_to_pte(a) ((a) & INTEL_PTE_PFN) +#ifdef MACH_PSEUDO_PHYS +#define pte_to_pa(p) ma_to_pa((p) & INTEL_PTE_PFN) +#else /* MACH_PSEUDO_PHYS */ #define pte_to_pa(p) ((p) & INTEL_PTE_PFN) +#endif /* MACH_PSEUDO_PHYS */ #define pte_increment_pa(p) ((p) += INTEL_OFFMASK+1) /* @@ -159,6 +168,14 @@ typedef struct pmap *pmap_t; #define PMAP_NULL ((pmap_t) 0) +#ifdef MACH_XEN +extern void pmap_set_page_readwrite(void *addr); +extern void pmap_set_page_readonly(void *addr); +extern void pmap_set_page_readonly_init(void *addr); +extern void pmap_map_mfn(void *addr, unsigned long mfn); +extern void pmap_clear_bootstrap_pagetable(pt_entry_t *addr); +#endif /* MACH_XEN */ + #if PAE #define set_pmap(pmap) set_cr3(kvtophys((vm_offset_t)(pmap)->pdpbase)) #else /* PAE */ -- cgit v1.2.3