summaryrefslogtreecommitdiff
path: root/i386/intel
diff options
context:
space:
mode:
authorSamuel Thibault <samuel.thibault@ens-lyon.org>2009-12-16 01:11:51 +0100
committerSamuel Thibault <samuel.thibault@ens-lyon.org>2009-12-16 01:11:51 +0100
commit55dbc2b5d857d35262ad3116803dfb31b733d031 (patch)
tree9b9a7d7952ff74855de2a6c348e9c856a531158d /i386/intel
parent870925205c78415dc4c594bfae9de8eb31745b81 (diff)
Add Xen support
2009-03-11 Samuel Thibault <samuel.thibault@ens-lyon.org> * i386/i386/vm_param.h (VM_MIN_KERNEL_ADDRESS) [MACH_XEN]: Set to 0x20000000. * i386/i386/i386asm.sym (pfn_list) [VM_MIN_KERNEL_ADDRESS == LINEAR_MIN_KERNEL_ADDRESS]: Define to constant PFN_LIST. 2009-02-27 Samuel Thibault <samuel.thibault@ens-lyon.org> * i386/intel/pmap.c [MACH_HYP] (INVALIDATE_TLB): Call hyp_invlpg instead of flush_tlb when e - s is compile-time known to be PAGE_SIZE. 2008-11-27 Samuel Thibault <samuel.thibault@ens-lyon.org> * i386/configfrag.ac (enable_pae): Enable by default on the Xen platform. 2007-11-14 Samuel Thibault <samuel.thibault@ens-lyon.org> * i386/i386at/model_dep.c (machine_relax): New function. (c_boot_entry): Refuse to boot as dom0. 2007-10-17 Samuel Thibault <samuel.thibault@ens-lyon.org> * i386/i386/fpu.c [MACH_XEN]: Disable unused fpintr(). 2007-08-12 Samuel Thibault <samuel.thibault@ens-lyon.org> * Makefile.am (clib_routines): Add _START. * i386/xen/xen_boothdr: Use _START for VIRT_BASE and PADDR_OFFSET. Add GUEST_VERSION and XEN_ELFNOTE_FEATURES. 2007-06-13 Samuel Thibault <samuel.thibault@ens-lyon.org> * i386/i386/user_ldt.h (user_ldt) [MACH_XEN]: Add alloc field. * i386/i386/user_ldt.c (i386_set_ldt) [MACH_XEN]: Round allocation of LDT to a page, set back LDT pages read/write before freeing them. (user_ldt_free) [MACH_XEN]: Likewise. 2007-04-18 Samuel Thibault <samuel.thibault@ens-lyon.org> * device/ds_routines.c [MACH_HYP]: Add hypervisor block and net devices. 2007-02-19 Thomas Schwinge <tschwinge@gnu.org> * i386/xen/Makefrag.am [PLATFORM_xen] (gnumach_LINKFLAGS): Define. * Makefrag.am: Include `xen/Makefrag.am'. * configure.ac: Include `xen/configfrag.ac'. (--enable-platform): Support the `xen' platform. * i386/configfrag.ac: Likewise. * i386/Makefrag.am [PLATFORM_xen]: Include `i386/xen/Makefrag.am'. 2007-02-19 Samuel Thibault <samuel.thibault@ens-lyon.org> Thomas Schwinge <tschwinge@gnu.org> * i386/xen/Makefrag.am: New file. * xen/Makefrag.am: Likewise. * xen/configfrag.ac: Likewise. 2007-02-11 (and later dates) Samuel Thibault <samuel.thibault@ens-lyon.org> Xen support * Makefile.am (clib_routines): Add _start. * Makefrag.am (include_mach_HEADERS): Add include/mach/xen.h. * device/cons.c (cnputc): Call hyp_console_write. * i386/Makefrag.am (libkernel_a_SOURCES): Move non-Xen source to [PLATFORM_at]. * i386/i386/debug_trace.S: Include <i386/xen.h> * i386/i386/fpu.c [MACH_HYP] (init_fpu): Call set_ts() and clear_ts(), do not enable CR0_EM. * i386/i386/gdt.c: Include <mach/xen.h> and <intel/pmap.h>. [MACH_XEN]: Make gdt array extern. [MACH_XEN] (gdt_init): Register gdt with hypervisor. Request 4gb segments assist. Shift la_shift. [MACH_PSEUDO_PHYS] (gdt_init): Shift pfn_list. * i386/i386/gdt.h [MACH_XEN]: Don't define KERNEL_LDT and LINEAR_DS. * i386/i386/i386asm.sym: Include <i386/xen.h>. [MACH_XEN]: Remove KERNEL_LDT, Add shared_info's CPU_CLI, CPU_PENDING, CPU_PENDING_SEL, PENDING, EVTMASK and CR2. * i386/i386/idt.c [MACH_HYP] (idt_init): Register trap table with hypervisor. * i386/i386/idt_inittab.S: Include <i386/i386asm.h>. [MACH_XEN]: Set IDT_ENTRY() for hypervisor. Set trap table terminator. * i386/i386/ktss.c [MACH_XEN] (ktss_init): Request exception task switch from hypervisor. * i386/i386/ldt.c: Include <mach/xen.h> and <intel/pmap.h> [MACH_XEN]: Make ldt array extern. [MACH_XEN] (ldt_init): Set ldt readwrite. [MACH_HYP] (ldt_init): Register ldt with hypervisor. * i386/i386/locore.S: Include <i386/xen.h>. Handle KERNEL_RING == 1 case. [MACH_XEN]: Read hyp_shared_info's CR2 instead of %cr2. [MACH_PSEUDO_PHYS]: Add mfn_to_pfn computation. [MACH_HYP]: Drop Cyrix I/O-based detection. Read cr3 instead of %cr3. Make hypervisor call for pte invalidation. * i386/i386/mp_desc.c: Include <mach/xen.h>. [MACH_HYP] (mp_desc_init): Panic. * i386/i386/pcb.c: Include <mach/xen.h>. [MACH_XEN] (switch_ktss): Request stack switch from hypervisor. [MACH_HYP] (switch_ktss): Request ldt and gdt switch from hypervisor. * i386/i386/phys.c: Include <mach/xen.h> [MACH_PSEUDO_PHYS] (kvtophys): Do page translation. * i386/i386/proc_reg.h [MACH_HYP] (cr3): New declaration. (set_cr3, get_cr3, set_ts, clear_ts): Implement macros. * i386/i386/seg.h [MACH_HYP]: Define KERNEL_RING macro. Include <mach/xen.h> [MACH_XEN] (fill_descriptor): Register descriptor with hypervisor. * i386/i386/spl.S: Include <i386/xen.h> and <i386/i386/asm.h> [MACH_XEN] (pic_mask): #define to int_mask. [MACH_XEN] (SETMASK): Implement. * i386/i386/vm_param.h [MACH_XEN] (HYP_VIRT_START): New macro. [MACH_XEN]: Set VM_MAX_KERNEL_ADDRESS to HYP_VIRT_START- LINEAR_MIN_KERNEL_ADDRESS + VM_MIN_KERNEL_ADDRESS. Increase KERNEL_STACK_SIZE and INTSTACK_SIZE to 4 pages. * i386/i386at/conf.c [MACH_HYP]: Remove hardware devices, add hypervisor console device. * i386/i386at/cons_conf.c [MACH_HYP]: Add hypervisor console device. * i386/i386at/model_dep.c: Include <sys/types.h>, <mach/xen.h>. [MACH_XEN] Include <xen/console.h>, <xen/store.h>, <xen/evt.h>, <xen/xen.h>. [MACH_PSEUDO_PHYS]: New boot_info, mfn_list, pfn_list variables. [MACH_XEN]: New la_shift variable. [MACH_HYP] (avail_next, mem_size_init): Drop BIOS skipping mecanism. [MACH_HYP] (machine_init): Call hyp_init(), drop hardware initialization. [MACH_HYP] (machine_idle): Call hyp_idle(). [MACH_HYP] (halt_cpu): Call hyp_halt(). [MACH_HYP] (halt_all_cpus): Call hyp_reboot() or hyp_halt(). [MACH_HYP] (i386at_init): Initialize with hypervisor. [MACH_XEN] (c_boot_entry): Add Xen-specific initialization. [MACH_HYP] (init_alloc_aligned, pmap_valid_page): Drop zones skipping mecanism. * i386/intel/pmap.c: Include <mach/xen.h>. [MACH_PSEUDO_PHYS] (WRITE_PTE): Do page translation. [MACH_HYP] (INVALIDATE_TLB): Request invalidation from hypervisor. [MACH_XEN] (pmap_map_bd, pmap_create, pmap_destroy, pmap_remove_range) (pmap_page_protect, pmap_protect, pmap_enter, pmap_change_wiring) (pmap_attribute_clear, pmap_unmap_page_zero, pmap_collect): Request MMU update from hypervisor. [MACH_XEN] (pmap_bootstrap): Request pagetable initialization from hypervisor. [MACH_XEN] (pmap_set_page_readwrite, pmap_set_page_readonly) (pmap_set_page_readonly_init, pmap_clear_bootstrap_pagetable) (pmap_map_mfn): New functions. * i386/intel/pmap.h [MACH_XEN] (INTEL_PTE_GLOBAL): Disable global page support. [MACH_PSEUDO_PHYS] (pte_to_pa): Do page translation. [MACH_XEN] (pmap_set_page_readwrite, pmap_set_page_readonly) (pmap_set_page_readonly_init, pmap_clear_bootstrap_pagetable) (pmap_map_mfn): Declare functions. * i386/i386/xen.h: New file. * i386/xen/xen.c: New file. * i386/xen/xen_boothdr.S: New file. * i386/xen/xen_locore.S: New file. * include/mach/xen.h: New file. * kern/bootstrap.c [MACH_XEN] (boot_info): Declare variable. [MACH_XEN] (bootstrap_create): Rebase multiboot header. * kern/debug.c: Include <mach/xen.h>. [MACH_HYP] (panic): Call hyp_crash() without delay. * linux/dev/include/asm-i386/segment.h [MACH_HYP] (KERNEL_CS) (KERNEL_DS): Use ring 1. * xen/block.c: New file. * xen/block.h: Likewise. * xen/console.c: Likewise. * xen/console.h: Likewise. * xen/evt.c: Likewise. * xen/evt.h: Likewise. * xen/grant.c: Likewise. * xen/grant.h: Likewise. * xen/net.c: Likewise. * xen/net.h: Likewise. * xen/ring.c: Likewise. * xen/ring.h: Likewise. * xen/store.c: Likewise. * xen/store.h: Likewise. * xen/time.c: Likewise. * xen/time.h: Likewise. * xen/xen.c: Likewise. * xen/xen.h: Likewise. * xen/public/COPYING: Import file from Xen. * xen/public/callback.h: Likewise. * xen/public/dom0_ops.h: Likewise. * xen/public/domctl.h: Likewise. * xen/public/elfnote.h: Likewise. * xen/public/elfstructs.h: Likewise. * xen/public/event_channel.h: Likewise. * xen/public/features.h: Likewise. * xen/public/grant_table.h: Likewise. * xen/public/kexec.h: Likewise. * xen/public/libelf.h: Likewise. * xen/public/memory.h: Likewise. * xen/public/nmi.h: Likewise. * xen/public/physdev.h: Likewise. * xen/public/platform.h: Likewise. * xen/public/sched.h: Likewise. * xen/public/sysctl.h: Likewise. * xen/public/trace.h: Likewise. * xen/public/vcpu.h: Likewise. * xen/public/version.h: Likewise. * xen/public/xen-compat.h: Likewise. * xen/public/xen.h: Likewise. * xen/public/xencomm.h: Likewise. * xen/public/xenoprof.h: Likewise. * xen/public/arch-x86/xen-mca.h: Likewise. * xen/public/arch-x86/xen-x86_32.h: Likewise. * xen/public/arch-x86/xen-x86_64.h: Likewise. * xen/public/arch-x86/xen.h: Likewise. * xen/public/arch-x86_32.h: Likewise. * xen/public/arch-x86_64.h: Likewise. * xen/public/io/blkif.h: Likewise. * xen/public/io/console.h: Likewise. * xen/public/io/fbif.h: Likewise. * xen/public/io/fsif.h: Likewise. * xen/public/io/kbdif.h: Likewise. * xen/public/io/netif.h: Likewise. * xen/public/io/pciif.h: Likewise. * xen/public/io/protocols.h: Likewise. * xen/public/io/ring.h: Likewise. * xen/public/io/tpmif.h: Likewise. * xen/public/io/xenbus.h: Likewise. * xen/public/io/xs_wire.h: Likewise.
Diffstat (limited to 'i386/intel')
-rw-r--r--i386/intel/pmap.c388
-rw-r--r--i386/intel/pmap.h17
2 files changed, 401 insertions, 4 deletions
diff --git a/i386/intel/pmap.c b/i386/intel/pmap.c
index c633fd9..ee19c4b 100644
--- a/i386/intel/pmap.c
+++ b/i386/intel/pmap.c
@@ -77,13 +77,18 @@
#include <vm/vm_user.h>
#include <mach/machine/vm_param.h>
+#include <mach/xen.h>
#include <machine/thread.h>
#include <i386/cpu_number.h>
#include <i386/proc_reg.h>
#include <i386/locore.h>
#include <i386/model_dep.h>
+#ifdef MACH_PSEUDO_PHYS
+#define WRITE_PTE(pte_p, pte_entry) *(pte_p) = pte_entry?pa_to_ma(pte_entry):0;
+#else /* MACH_PSEUDO_PHYS */
#define WRITE_PTE(pte_p, pte_entry) *(pte_p) = (pte_entry);
+#endif /* MACH_PSEUDO_PHYS */
/*
* Private data structures.
@@ -325,6 +330,19 @@ lock_data_t pmap_system_lock;
#define MAX_TBIS_SIZE 32 /* > this -> TBIA */ /* XXX */
+#ifdef MACH_HYP
+#if 1
+#define INVALIDATE_TLB(pmap, s, e) hyp_mmuext_op_void(MMUEXT_TLB_FLUSH_LOCAL)
+#else
+#define INVALIDATE_TLB(pmap, s, e) do { \
+ if (__builtin_constant_p((e) - (s)) \
+ && (e) - (s) == PAGE_SIZE) \
+ hyp_invlpg((pmap) == kernel_pmap ? kvtolin(s) : (s)); \
+ else \
+ hyp_mmuext_op_void(MMUEXT_TLB_FLUSH_LOCAL); \
+} while(0)
+#endif
+#else /* MACH_HYP */
#if 0
/* It is hard to know when a TLB flush becomes less expensive than a bunch of
* invlpgs. But it surely is more expensive than just one invlpg. */
@@ -338,6 +356,7 @@ lock_data_t pmap_system_lock;
#else
#define INVALIDATE_TLB(pmap, s, e) flush_tlb()
#endif
+#endif /* MACH_HYP */
#if NCPUS > 1
@@ -507,6 +526,10 @@ vm_offset_t pmap_map_bd(virt, start, end, prot)
register pt_entry_t template;
register pt_entry_t *pte;
int spl;
+#ifdef MACH_XEN
+ int n, i = 0;
+ struct mmu_update update[HYP_BATCH_MMU_UPDATES];
+#endif /* MACH_XEN */
template = pa_to_pte(start)
| INTEL_PTE_NCACHE|INTEL_PTE_WTHRU
@@ -521,11 +544,30 @@ vm_offset_t pmap_map_bd(virt, start, end, prot)
pte = pmap_pte(kernel_pmap, virt);
if (pte == PT_ENTRY_NULL)
panic("pmap_map_bd: Invalid kernel address\n");
+#ifdef MACH_XEN
+ update[i].ptr = kv_to_ma(pte);
+ update[i].val = pa_to_ma(template);
+ i++;
+ if (i == HYP_BATCH_MMU_UPDATES) {
+ hyp_mmu_update(kvtolin(&update), i, kvtolin(&n), DOMID_SELF);
+ if (n != i)
+ panic("couldn't pmap_map_bd\n");
+ i = 0;
+ }
+#else /* MACH_XEN */
WRITE_PTE(pte, template)
+#endif /* MACH_XEN */
pte_increment_pa(template);
virt += PAGE_SIZE;
start += PAGE_SIZE;
}
+#ifdef MACH_XEN
+ if (i > HYP_BATCH_MMU_UPDATES)
+ panic("overflowed array in pmap_map_bd");
+ hyp_mmu_update(kvtolin(&update), i, kvtolin(&n), DOMID_SELF);
+ if (n != i)
+ panic("couldn't pmap_map_bd\n");
+#endif /* MACH_XEN */
PMAP_READ_UNLOCK(pmap, spl);
return(virt);
}
@@ -583,6 +625,8 @@ void pmap_bootstrap()
/*
* Allocate and clear a kernel page directory.
*/
+ /* Note: initial Xen mapping holds at least 512kB free mapped page.
+ * We use that for directly building our linear mapping. */
#if PAE
{
vm_offset_t addr;
@@ -604,6 +648,53 @@ void pmap_bootstrap()
kernel_pmap->dirbase[i] = 0;
}
+#ifdef MACH_XEN
+ /*
+ * Xen may only provide as few as 512KB extra bootstrap linear memory,
+ * which is far from enough to map all available memory, so we need to
+ * map more bootstrap linear memory. We here map 1 (resp. 4 for PAE)
+ * other L1 table(s), thus 4MiB extra memory (resp. 8MiB), which is
+ * enough for a pagetable mapping 4GiB.
+ */
+#ifdef PAE
+#define NSUP_L1 4
+#else
+#define NSUP_L1 1
+#endif
+ pt_entry_t *l1_map[NSUP_L1];
+ {
+ pt_entry_t *base = (pt_entry_t*) boot_info.pt_base;
+ int i;
+ int n_l1map;
+#ifdef PAE
+ pt_entry_t *l2_map = (pt_entry_t*) phystokv(pte_to_pa(base[0]));
+#else /* PAE */
+ pt_entry_t *l2_map = base;
+#endif /* PAE */
+ for (n_l1map = 0, i = lin2pdenum(VM_MIN_KERNEL_ADDRESS); i < NPTES; i++) {
+ if (!(l2_map[i] & INTEL_PTE_VALID)) {
+ struct mmu_update update;
+ int j, n;
+
+ l1_map[n_l1map] = (pt_entry_t*) phystokv(pmap_grab_page());
+ for (j = 0; j < NPTES; j++)
+ l1_map[n_l1map][j] = intel_ptob(pfn_to_mfn((i - lin2pdenum(VM_MIN_KERNEL_ADDRESS)) * NPTES + j)) | INTEL_PTE_VALID | INTEL_PTE_WRITE;
+ pmap_set_page_readonly_init(l1_map[n_l1map]);
+ if (!hyp_mmuext_op_mfn (MMUEXT_PIN_L1_TABLE, kv_to_mfn (l1_map[n_l1map])))
+ panic("couldn't pin page %p(%p)", l1_map[n_l1map], kv_to_ma (l1_map[n_l1map]));
+ update.ptr = kv_to_ma(&l2_map[i]);
+ update.val = kv_to_ma(l1_map[n_l1map]) | INTEL_PTE_VALID | INTEL_PTE_WRITE;
+ hyp_mmu_update(kv_to_la(&update), 1, kv_to_la(&n), DOMID_SELF);
+ if (n != 1)
+ panic("couldn't complete bootstrap map");
+ /* added the last L1 table, can stop */
+ if (++n_l1map >= NSUP_L1)
+ break;
+ }
+ }
+ }
+#endif /* MACH_XEN */
+
/*
* Allocate and set up the kernel page tables.
*/
@@ -640,19 +731,42 @@ void pmap_bootstrap()
WRITE_PTE(pte, 0);
}
else
+#ifdef MACH_XEN
+ if (va == (vm_offset_t) &hyp_shared_info)
+ {
+ *pte = boot_info.shared_info | INTEL_PTE_VALID | INTEL_PTE_WRITE;
+ va += INTEL_PGBYTES;
+ }
+ else
+#endif /* MACH_XEN */
{
extern char _start[], etext[];
- if ((va >= (vm_offset_t)_start)
+ if (((va >= (vm_offset_t) _start)
&& (va + INTEL_PGBYTES <= (vm_offset_t)etext))
+#ifdef MACH_XEN
+ || (va >= (vm_offset_t) boot_info.pt_base
+ && (va + INTEL_PGBYTES <=
+ (vm_offset_t) ptable + INTEL_PGBYTES))
+#endif /* MACH_XEN */
+ )
{
WRITE_PTE(pte, pa_to_pte(_kvtophys(va))
| INTEL_PTE_VALID | global);
}
else
{
- WRITE_PTE(pte, pa_to_pte(_kvtophys(va))
- | INTEL_PTE_VALID | INTEL_PTE_WRITE | global);
+#ifdef MACH_XEN
+ int i;
+ for (i = 0; i < NSUP_L1; i++)
+ if (va == (vm_offset_t) l1_map[i])
+ WRITE_PTE(pte, pa_to_pte(_kvtophys(va))
+ | INTEL_PTE_VALID | global);
+ if (i == NSUP_L1)
+#endif /* MACH_XEN */
+ WRITE_PTE(pte, pa_to_pte(_kvtophys(va))
+ | INTEL_PTE_VALID | INTEL_PTE_WRITE | global)
+
}
va += INTEL_PGBYTES;
}
@@ -662,6 +776,11 @@ void pmap_bootstrap()
WRITE_PTE(pte, 0);
va += INTEL_PGBYTES;
}
+#ifdef MACH_XEN
+ pmap_set_page_readonly_init(ptable);
+ if (!hyp_mmuext_op_mfn (MMUEXT_PIN_L1_TABLE, kv_to_mfn (ptable)))
+ panic("couldn't pin page %p(%p)\n", ptable, kv_to_ma (ptable));
+#endif /* MACH_XEN */
}
}
@@ -669,6 +788,100 @@ void pmap_bootstrap()
soon after we return from here. */
}
+#ifdef MACH_XEN
+/* These are only required because of Xen security policies */
+
+/* Set back a page read write */
+void pmap_set_page_readwrite(void *_vaddr) {
+ vm_offset_t vaddr = (vm_offset_t) _vaddr;
+ vm_offset_t paddr = kvtophys(vaddr);
+ vm_offset_t canon_vaddr = phystokv(paddr);
+ if (hyp_do_update_va_mapping (kvtolin(vaddr), pa_to_pte (pa_to_ma(paddr)) | INTEL_PTE_VALID | INTEL_PTE_WRITE, UVMF_NONE))
+ panic("couldn't set hiMMU readwrite for addr %p(%p)\n", vaddr, pa_to_ma (paddr));
+ if (canon_vaddr != vaddr)
+ if (hyp_do_update_va_mapping (kvtolin(canon_vaddr), pa_to_pte (pa_to_ma(paddr)) | INTEL_PTE_VALID | INTEL_PTE_WRITE, UVMF_NONE))
+ panic("couldn't set hiMMU readwrite for paddr %p(%p)\n", canon_vaddr, pa_to_ma (paddr));
+}
+
+/* Set a page read only (so as to pin it for instance) */
+void pmap_set_page_readonly(void *_vaddr) {
+ vm_offset_t vaddr = (vm_offset_t) _vaddr;
+ vm_offset_t paddr = kvtophys(vaddr);
+ vm_offset_t canon_vaddr = phystokv(paddr);
+ if (*pmap_pde(kernel_pmap, vaddr) & INTEL_PTE_VALID) {
+ if (hyp_do_update_va_mapping (kvtolin(vaddr), pa_to_pte (pa_to_ma(paddr)) | INTEL_PTE_VALID, UVMF_NONE))
+ panic("couldn't set hiMMU readonly for vaddr %p(%p)\n", vaddr, pa_to_ma (paddr));
+ }
+ if (canon_vaddr != vaddr &&
+ *pmap_pde(kernel_pmap, canon_vaddr) & INTEL_PTE_VALID) {
+ if (hyp_do_update_va_mapping (kvtolin(canon_vaddr), pa_to_pte (pa_to_ma(paddr)) | INTEL_PTE_VALID, UVMF_NONE))
+ panic("couldn't set hiMMU readonly for vaddr %p canon_vaddr %p paddr %p (%p)\n", vaddr, canon_vaddr, paddr, pa_to_ma (paddr));
+ }
+}
+
+/* This needs to be called instead of pmap_set_page_readonly as long as RC3
+ * still points to the bootstrap dirbase. */
+void pmap_set_page_readonly_init(void *_vaddr) {
+ vm_offset_t vaddr = (vm_offset_t) _vaddr;
+#if PAE
+ pt_entry_t *pdpbase = (void*) boot_info.pt_base;
+ vm_offset_t dirbase = ptetokv(pdpbase[0]);
+#else
+ vm_offset_t dirbase = boot_info.pt_base;
+#endif
+ struct pmap linear_pmap = {
+ .dirbase = (void*) dirbase,
+ };
+ /* Modify our future kernel map (can't use update_va_mapping for this)... */
+ if (*pmap_pde(kernel_pmap, vaddr) & INTEL_PTE_VALID)
+ if (!hyp_mmu_update_la (kvtolin(vaddr), pa_to_pte (kv_to_ma(vaddr)) | INTEL_PTE_VALID))
+ panic("couldn't set hiMMU readonly for vaddr %p(%p)\n", vaddr, kv_to_ma (vaddr));
+ /* ... and the bootstrap map. */
+ if (*pmap_pde(&linear_pmap, vaddr) & INTEL_PTE_VALID)
+ if (hyp_do_update_va_mapping (vaddr, pa_to_pte (kv_to_ma(vaddr)) | INTEL_PTE_VALID, UVMF_NONE))
+ panic("couldn't set MMU readonly for vaddr %p(%p)\n", vaddr, kv_to_ma (vaddr));
+}
+
+void pmap_clear_bootstrap_pagetable(pt_entry_t *base) {
+ int i;
+ pt_entry_t *dir;
+ vm_offset_t va = 0;
+#if PAE
+ int j;
+#endif /* PAE */
+ if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE, kv_to_mfn(base)))
+ panic("pmap_clear_bootstrap_pagetable: couldn't unpin page %p(%p)\n", base, kv_to_ma(base));
+#if PAE
+ for (j = 0; j < PDPNUM; j++)
+ {
+ pt_entry_t pdpe = base[j];
+ if (pdpe & INTEL_PTE_VALID) {
+ dir = (pt_entry_t *) phystokv(pte_to_pa(pdpe));
+#else /* PAE */
+ dir = base;
+#endif /* PAE */
+ for (i = 0; i < NPTES; i++) {
+ pt_entry_t pde = dir[i];
+ unsigned long pfn = mfn_to_pfn(atop(pde));
+ void *pgt = (void*) phystokv(ptoa(pfn));
+ if (pde & INTEL_PTE_VALID)
+ hyp_free_page(pfn, pgt);
+ va += NPTES * INTEL_PGBYTES;
+ if (va >= HYP_VIRT_START)
+ break;
+ }
+#if PAE
+ hyp_free_page(atop(_kvtophys(dir)), dir);
+ } else
+ va += NPTES * NPTES * INTEL_PGBYTES;
+ if (va >= HYP_VIRT_START)
+ break;
+ }
+#endif /* PAE */
+ hyp_free_page(atop(_kvtophys(base)), base);
+}
+#endif /* MACH_XEN */
+
void pmap_virtual_space(startp, endp)
vm_offset_t *startp;
vm_offset_t *endp;
@@ -823,6 +1036,29 @@ pmap_page_table_page_alloc()
return pa;
}
+#ifdef MACH_XEN
+void pmap_map_mfn(void *_addr, unsigned long mfn) {
+ vm_offset_t addr = (vm_offset_t) _addr;
+ pt_entry_t *pte, *pdp;
+ vm_offset_t ptp;
+ if ((pte = pmap_pte(kernel_pmap, addr)) == PT_ENTRY_NULL) {
+ ptp = phystokv(pmap_page_table_page_alloc());
+ pmap_set_page_readonly((void*) ptp);
+ if (!hyp_mmuext_op_mfn (MMUEXT_PIN_L1_TABLE, pa_to_mfn(ptp)))
+ panic("couldn't pin page %p(%p)\n",ptp,kv_to_ma(ptp));
+ pdp = pmap_pde(kernel_pmap, addr);
+ if (!hyp_mmu_update_pte(kv_to_ma(pdp),
+ pa_to_pte(kv_to_ma(ptp)) | INTEL_PTE_VALID
+ | INTEL_PTE_USER
+ | INTEL_PTE_WRITE))
+ panic("%s:%d could not set pde %p(%p) to %p(%p)\n",__FILE__,__LINE__,kvtophys((vm_offset_t)pdp),kv_to_ma(pdp), ptp, pa_to_ma(ptp));
+ pte = pmap_pte(kernel_pmap, addr);
+ }
+ if (!hyp_mmu_update_pte(kv_to_ma(pte), ptoa(mfn) | INTEL_PTE_VALID | INTEL_PTE_WRITE))
+ panic("%s:%d could not set pte %p(%p) to %p(%p)\n",__FILE__,__LINE__,pte,kv_to_ma(pte), ptoa(mfn), pa_to_ma(ptoa(mfn)));
+}
+#endif /* MACH_XEN */
+
/*
* Deallocate a page-table page.
* The page-table page must have all mappings removed,
@@ -884,6 +1120,13 @@ pmap_t pmap_create(size)
panic("pmap_create");
memcpy(p->dirbase, kernel_page_dir, PDPNUM * INTEL_PGBYTES);
+#ifdef MACH_XEN
+ {
+ int i;
+ for (i = 0; i < PDPNUM; i++)
+ pmap_set_page_readonly((void*) p->dirbase + i * INTEL_PGBYTES);
+ }
+#endif /* MACH_XEN */
#if PAE
if (kmem_alloc_wired(kernel_map,
@@ -895,6 +1138,9 @@ pmap_t pmap_create(size)
for (i = 0; i < PDPNUM; i++)
WRITE_PTE(&p->pdpbase[i], pa_to_pte(kvtophys((vm_offset_t) p->dirbase + i * INTEL_PGBYTES)) | INTEL_PTE_VALID);
}
+#ifdef MACH_XEN
+ pmap_set_page_readonly(p->pdpbase);
+#endif /* MACH_XEN */
#endif /* PAE */
p->ref_count = 1;
@@ -954,14 +1200,29 @@ void pmap_destroy(p)
if (m == VM_PAGE_NULL)
panic("pmap_destroy: pte page not in object");
vm_page_lock_queues();
+#ifdef MACH_XEN
+ if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE, pa_to_mfn(pa)))
+ panic("pmap_destroy: couldn't unpin page %p(%p)\n", pa, kv_to_ma(pa));
+ pmap_set_page_readwrite((void*) phystokv(pa));
+#endif /* MACH_XEN */
vm_page_free(m);
inuse_ptepages_count--;
vm_page_unlock_queues();
vm_object_unlock(pmap_object);
}
}
+#ifdef MACH_XEN
+ {
+ int i;
+ for (i = 0; i < PDPNUM; i++)
+ pmap_set_page_readwrite((void*) p->dirbase + i * INTEL_PGBYTES);
+ }
+#endif /* MACH_XEN */
kmem_free(kernel_map, (vm_offset_t)p->dirbase, PDPNUM * INTEL_PGBYTES);
#if PAE
+#ifdef MACH_XEN
+ pmap_set_page_readwrite(p->pdpbase);
+#endif /* MACH_XEN */
kmem_free(kernel_map, (vm_offset_t)p->pdpbase, INTEL_PGBYTES);
#endif /* PAE */
zfree(pmap_zone, (vm_offset_t) p);
@@ -1007,6 +1268,10 @@ void pmap_remove_range(pmap, va, spte, epte)
int num_removed, num_unwired;
int pai;
vm_offset_t pa;
+#ifdef MACH_XEN
+ int n, ii = 0;
+ struct mmu_update update[HYP_BATCH_MMU_UPDATES];
+#endif /* MACH_XEN */
#if DEBUG_PTE_PAGE
if (pmap != kernel_pmap)
@@ -1035,7 +1300,19 @@ void pmap_remove_range(pmap, va, spte, epte)
register int i = ptes_per_vm_page;
register pt_entry_t *lpte = cpte;
do {
+#ifdef MACH_XEN
+ update[ii].ptr = kv_to_ma(lpte);
+ update[ii].val = 0;
+ ii++;
+ if (ii == HYP_BATCH_MMU_UPDATES) {
+ hyp_mmu_update(kvtolin(&update), ii, kvtolin(&n), DOMID_SELF);
+ if (n != ii)
+ panic("couldn't pmap_remove_range\n");
+ ii = 0;
+ }
+#else /* MACH_XEN */
*lpte = 0;
+#endif /* MACH_XEN */
lpte++;
} while (--i > 0);
continue;
@@ -1056,7 +1333,19 @@ void pmap_remove_range(pmap, va, spte, epte)
do {
pmap_phys_attributes[pai] |=
*lpte & (PHYS_MODIFIED|PHYS_REFERENCED);
+#ifdef MACH_XEN
+ update[ii].ptr = kv_to_ma(lpte);
+ update[ii].val = 0;
+ ii++;
+ if (ii == HYP_BATCH_MMU_UPDATES) {
+ hyp_mmu_update(kvtolin(&update), ii, kvtolin(&n), DOMID_SELF);
+ if (n != ii)
+ panic("couldn't pmap_remove_range\n");
+ ii = 0;
+ }
+#else /* MACH_XEN */
*lpte = 0;
+#endif /* MACH_XEN */
lpte++;
} while (--i > 0);
}
@@ -1102,6 +1391,14 @@ void pmap_remove_range(pmap, va, spte, epte)
}
}
+#ifdef MACH_XEN
+ if (ii > HYP_BATCH_MMU_UPDATES)
+ panic("overflowed array in pmap_remove_range");
+ hyp_mmu_update(kvtolin(&update), ii, kvtolin(&n), DOMID_SELF);
+ if (n != ii)
+ panic("couldn't pmap_remove_range\n");
+#endif /* MACH_XEN */
+
/*
* Update the counts
*/
@@ -1246,7 +1543,12 @@ void pmap_page_protect(phys, prot)
do {
pmap_phys_attributes[pai] |=
*pte & (PHYS_MODIFIED|PHYS_REFERENCED);
+#ifdef MACH_XEN
+ if (!hyp_mmu_update_pte(kv_to_ma(pte++), 0))
+ panic("%s:%d could not clear pte %p\n",__FILE__,__LINE__,pte-1);
+#else /* MACH_XEN */
*pte++ = 0;
+#endif /* MACH_XEN */
} while (--i > 0);
}
@@ -1276,7 +1578,12 @@ void pmap_page_protect(phys, prot)
register int i = ptes_per_vm_page;
do {
+#ifdef MACH_XEN
+ if (!hyp_mmu_update_pte(kv_to_ma(pte), *pte & ~INTEL_PTE_WRITE))
+ panic("%s:%d could not enable write on pte %p\n",__FILE__,__LINE__,pte);
+#else /* MACH_XEN */
*pte &= ~INTEL_PTE_WRITE;
+#endif /* MACH_XEN */
pte++;
} while (--i > 0);
@@ -1365,11 +1672,36 @@ void pmap_protect(map, s, e, prot)
spte = &spte[ptenum(s)];
epte = &spte[intel_btop(l-s)];
+#ifdef MACH_XEN
+ int n, i = 0;
+ struct mmu_update update[HYP_BATCH_MMU_UPDATES];
+#endif /* MACH_XEN */
+
while (spte < epte) {
- if (*spte & INTEL_PTE_VALID)
+ if (*spte & INTEL_PTE_VALID) {
+#ifdef MACH_XEN
+ update[i].ptr = kv_to_ma(spte);
+ update[i].val = *spte & ~INTEL_PTE_WRITE;
+ i++;
+ if (i == HYP_BATCH_MMU_UPDATES) {
+ hyp_mmu_update(kvtolin(&update), i, kvtolin(&n), DOMID_SELF);
+ if (n != i)
+ panic("couldn't pmap_protect\n");
+ i = 0;
+ }
+#else /* MACH_XEN */
*spte &= ~INTEL_PTE_WRITE;
+#endif /* MACH_XEN */
+ }
spte++;
}
+#ifdef MACH_XEN
+ if (i > HYP_BATCH_MMU_UPDATES)
+ panic("overflowed array in pmap_protect");
+ hyp_mmu_update(kvtolin(&update), i, kvtolin(&n), DOMID_SELF);
+ if (n != i)
+ panic("couldn't pmap_protect\n");
+#endif /* MACH_XEN */
}
s = l;
pde++;
@@ -1412,6 +1744,8 @@ if (pmap_debug) printf("pmap(%x, %x)\n", v, pa);
if (pmap == PMAP_NULL)
return;
+ if (pmap == kernel_pmap && (v < kernel_virtual_start || v >= kernel_virtual_end))
+ panic("pmap_enter(%p, %p) falls in physical memory area!\n", v, pa);
if (pmap == kernel_pmap && (prot & VM_PROT_WRITE) == 0
&& !wired /* hack for io_wire */ ) {
/*
@@ -1502,9 +1836,20 @@ Retry:
/*XX pdp = &pmap->dirbase[pdenum(v) & ~(i-1)];*/
pdp = pmap_pde(pmap, v);
do {
+#ifdef MACH_XEN
+ pmap_set_page_readonly((void *) ptp);
+ if (!hyp_mmuext_op_mfn (MMUEXT_PIN_L1_TABLE, kv_to_mfn(ptp)))
+ panic("couldn't pin page %p(%p)\n",ptp,kv_to_ma(ptp));
+ if (!hyp_mmu_update_pte(pa_to_ma(kvtophys((vm_offset_t)pdp)),
+ pa_to_pte(pa_to_ma(kvtophys(ptp))) | INTEL_PTE_VALID
+ | INTEL_PTE_USER
+ | INTEL_PTE_WRITE))
+ panic("%s:%d could not set pde %p(%p,%p) to %p(%p,%p) %p\n",__FILE__,__LINE__, pdp, kvtophys((vm_offset_t)pdp), pa_to_ma(kvtophys((vm_offset_t)pdp)), ptp, kvtophys(ptp), pa_to_ma(kvtophys(ptp)), pa_to_pte(kv_to_ma(ptp)));
+#else /* MACH_XEN */
*pdp = pa_to_pte(ptp) | INTEL_PTE_VALID
| INTEL_PTE_USER
| INTEL_PTE_WRITE;
+#endif /* MACH_XEN */
pdp++;
ptp += INTEL_PGBYTES;
} while (--i > 0);
@@ -1544,7 +1889,12 @@ Retry:
do {
if (*pte & INTEL_PTE_MOD)
template |= INTEL_PTE_MOD;
+#ifdef MACH_XEN
+ if (!hyp_mmu_update_pte(kv_to_ma(pte), pa_to_ma(template)))
+ panic("%s:%d could not set pte %p to %p\n",__FILE__,__LINE__,pte,template);
+#else /* MACH_XEN */
WRITE_PTE(pte, template)
+#endif /* MACH_XEN */
pte++;
pte_increment_pa(template);
} while (--i > 0);
@@ -1649,7 +1999,12 @@ Retry:
template |= INTEL_PTE_WIRED;
i = ptes_per_vm_page;
do {
+#ifdef MACH_XEN
+ if (!(hyp_mmu_update_pte(kv_to_ma(pte), pa_to_ma(template))))
+ panic("%s:%d could not set pte %p to %p\n",__FILE__,__LINE__,pte,template);
+#else /* MACH_XEN */
WRITE_PTE(pte, template)
+#endif /* MACH_XEN */
pte++;
pte_increment_pa(template);
} while (--i > 0);
@@ -1704,7 +2059,12 @@ void pmap_change_wiring(map, v, wired)
map->stats.wired_count--;
i = ptes_per_vm_page;
do {
+#ifdef MACH_XEN
+ if (!(hyp_mmu_update_pte(kv_to_ma(pte), *pte & ~INTEL_PTE_WIRED)))
+ panic("%s:%d could not wire down pte %p\n",__FILE__,__LINE__,pte);
+#else /* MACH_XEN */
*pte &= ~INTEL_PTE_WIRED;
+#endif /* MACH_XEN */
pte++;
} while (--i > 0);
}
@@ -1835,7 +2195,17 @@ void pmap_collect(p)
register int i = ptes_per_vm_page;
register pt_entry_t *pdep = pdp;
do {
+#ifdef MACH_XEN
+ unsigned long pte = *pdep;
+ void *ptable = (void*) ptetokv(pte);
+ if (!(hyp_mmu_update_pte(pa_to_ma(kvtophys((vm_offset_t)pdep++)), 0)))
+ panic("%s:%d could not clear pde %p\n",__FILE__,__LINE__,pdep-1);
+ if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE, kv_to_mfn(ptable)))
+ panic("couldn't unpin page %p(%p)\n", ptable, pa_to_ma(kvtophys((vm_offset_t)ptable)));
+ pmap_set_page_readwrite(ptable);
+#else /* MACH_XEN */
*pdep++ = 0;
+#endif /* MACH_XEN */
} while (--i > 0);
}
@@ -2052,7 +2422,12 @@ phys_attribute_clear(phys, bits)
{
register int i = ptes_per_vm_page;
do {
+#ifdef MACH_XEN
+ if (!(hyp_mmu_update_pte(kv_to_ma(pte), *pte & ~bits)))
+ panic("%s:%d could not clear bits %lx from pte %p\n",__FILE__,__LINE__,bits,pte);
+#else /* MACH_XEN */
*pte &= ~bits;
+#endif /* MACH_XEN */
} while (--i > 0);
}
PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
@@ -2413,7 +2788,12 @@ pmap_unmap_page_zero ()
if (!pte)
return;
assert (pte);
+#ifdef MACH_XEN
+ if (!hyp_mmu_update_pte(kv_to_ma(pte), 0))
+ printf("couldn't unmap page 0\n");
+#else /* MACH_XEN */
*pte = 0;
INVALIDATE_TLB(kernel_pmap, 0, PAGE_SIZE);
+#endif /* MACH_XEN */
}
#endif /* i386 */
diff --git a/i386/intel/pmap.h b/i386/intel/pmap.h
index 7354a0f..a2b6442 100644
--- a/i386/intel/pmap.h
+++ b/i386/intel/pmap.h
@@ -126,12 +126,21 @@ typedef unsigned int pt_entry_t;
#define INTEL_PTE_NCACHE 0x00000010
#define INTEL_PTE_REF 0x00000020
#define INTEL_PTE_MOD 0x00000040
+#ifdef MACH_XEN
+/* Not supported */
+#define INTEL_PTE_GLOBAL 0x00000000
+#else /* MACH_XEN */
#define INTEL_PTE_GLOBAL 0x00000100
+#endif /* MACH_XEN */
#define INTEL_PTE_WIRED 0x00000200
#define INTEL_PTE_PFN 0xfffff000
#define pa_to_pte(a) ((a) & INTEL_PTE_PFN)
+#ifdef MACH_PSEUDO_PHYS
+#define pte_to_pa(p) ma_to_pa((p) & INTEL_PTE_PFN)
+#else /* MACH_PSEUDO_PHYS */
#define pte_to_pa(p) ((p) & INTEL_PTE_PFN)
+#endif /* MACH_PSEUDO_PHYS */
#define pte_increment_pa(p) ((p) += INTEL_OFFMASK+1)
/*
@@ -159,6 +168,14 @@ typedef struct pmap *pmap_t;
#define PMAP_NULL ((pmap_t) 0)
+#ifdef MACH_XEN
+extern void pmap_set_page_readwrite(void *addr);
+extern void pmap_set_page_readonly(void *addr);
+extern void pmap_set_page_readonly_init(void *addr);
+extern void pmap_map_mfn(void *addr, unsigned long mfn);
+extern void pmap_clear_bootstrap_pagetable(pt_entry_t *addr);
+#endif /* MACH_XEN */
+
#if PAE
#define set_pmap(pmap) set_cr3(kvtophys((vm_offset_t)(pmap)->pdpbase))
#else /* PAE */