summaryrefslogtreecommitdiff
path: root/i386/intel
diff options
context:
space:
mode:
Diffstat (limited to 'i386/intel')
-rw-r--r--i386/intel/pmap.c2563
-rw-r--r--i386/intel/pmap.h401
-rw-r--r--i386/intel/read_fault.c178
3 files changed, 3142 insertions, 0 deletions
diff --git a/i386/intel/pmap.c b/i386/intel/pmap.c
new file mode 100644
index 0000000..3c2493b
--- /dev/null
+++ b/i386/intel/pmap.c
@@ -0,0 +1,2563 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: pmap.c
+ * Author: Avadis Tevanian, Jr., Michael Wayne Young
+ * (These guys wrote the Vax version)
+ *
+ * Physical Map management code for Intel i386, i486, and i860.
+ *
+ * Manages physical address maps.
+ *
+ * In addition to hardware address maps, this
+ * module is called upon to provide software-use-only
+ * maps which may or may not be stored in the same
+ * form as hardware maps. These pseudo-maps are
+ * used to store intermediate results from copy
+ * operations to and from address spaces.
+ *
+ * Since the information managed by this module is
+ * also stored by the logical address mapping module,
+ * this module may throw away valid virtual-to-physical
+ * mappings at almost any time. However, invalidations
+ * of virtual-to-physical mappings must be done as
+ * requested.
+ *
+ * In order to cope with hardware architectures which
+ * make virtual-to-physical map invalidates expensive,
+ * this module may delay invalidate or reduced protection
+ * operations until such time as they are actually
+ * necessary. This module is given full information as
+ * to which processors are currently using which maps,
+ * and to when physical maps must be made correct.
+ */
+
+#include <cpus.h>
+
+#include <mach/machine/vm_types.h>
+
+#include <mach/boolean.h>
+#include <kern/thread.h>
+#include <kern/zalloc.h>
+
+#include <kern/lock.h>
+
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_kern.h>
+#include "vm_param.h"
+#include <mach/vm_prot.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_user.h>
+
+#include <mach/machine/vm_param.h>
+#include <machine/thread.h>
+#include "cpu_number.h"
+#if i860
+#include <i860ipsc/nodehw.h>
+#endif
+
+#ifdef ORC
+#define OLIVETTICACHE 1
+#endif ORC
+
+#ifndef OLIVETTICACHE
+#define WRITE_PTE(pte_p, pte_entry) *(pte_p) = (pte_entry);
+#define WRITE_PTE_FAST(pte_p, pte_entry) *(pte_p) = (pte_entry);
+#else OLIVETTICACHE
+#error might not work anymore
+
+/* This gross kludgery is needed for Olivetti XP7 & XP9 boxes to get
+ * around an apparent hardware bug. Other than at startup it doesn't
+ * affect run-time performacne very much, so we leave it in for all
+ * machines.
+ */
+extern unsigned *pstart();
+#define CACHE_LINE 8
+#define CACHE_SIZE 512
+#define CACHE_PAGE 0x1000;
+
+#define WRITE_PTE(pte_p, pte_entry) { write_pte(pte_p, pte_entry); }
+
+write_pte(pte_p, pte_entry)
+pt_entry_t *pte_p, pte_entry;
+{
+ unsigned long count;
+ volatile unsigned long hold, *addr1, *addr2;
+
+ if ( pte_entry != *pte_p )
+ *pte_p = pte_entry;
+ else {
+ /* This isn't necessarily the optimal algorithm */
+ addr1 = (unsigned long *)pstart;
+ for (count = 0; count < CACHE_SIZE; count++) {
+ addr2 = addr1 + CACHE_PAGE;
+ hold = *addr1; /* clear cache bank - A - */
+ hold = *addr2; /* clear cache bank - B - */
+ addr1 += CACHE_LINE;
+ }
+ }
+}
+
+#define WRITE_PTE_FAST(pte_p, pte_entry)*pte_p = pte_entry;
+
+#endif OLIVETTICACHE
+
+/*
+ * Private data structures.
+ */
+
+/*
+ * For each vm_page_t, there is a list of all currently
+ * valid virtual mappings of that page. An entry is
+ * a pv_entry_t; the list is the pv_table.
+ */
+
+typedef struct pv_entry {
+ struct pv_entry *next; /* next pv_entry */
+ pmap_t pmap; /* pmap where mapping lies */
+ vm_offset_t va; /* virtual address for mapping */
+} *pv_entry_t;
+
+#define PV_ENTRY_NULL ((pv_entry_t) 0)
+
+pv_entry_t pv_head_table; /* array of entries, one per page */
+
+/*
+ * pv_list entries are kept on a list that can only be accessed
+ * with the pmap system locked (at SPLVM, not in the cpus_active set).
+ * The list is refilled from the pv_list_zone if it becomes empty.
+ */
+pv_entry_t pv_free_list; /* free list at SPLVM */
+decl_simple_lock_data(, pv_free_list_lock)
+
+#define PV_ALLOC(pv_e) { \
+ simple_lock(&pv_free_list_lock); \
+ if ((pv_e = pv_free_list) != 0) { \
+ pv_free_list = pv_e->next; \
+ } \
+ simple_unlock(&pv_free_list_lock); \
+}
+
+#define PV_FREE(pv_e) { \
+ simple_lock(&pv_free_list_lock); \
+ pv_e->next = pv_free_list; \
+ pv_free_list = pv_e; \
+ simple_unlock(&pv_free_list_lock); \
+}
+
+zone_t pv_list_zone; /* zone of pv_entry structures */
+
+/*
+ * Each entry in the pv_head_table is locked by a bit in the
+ * pv_lock_table. The lock bits are accessed by the physical
+ * address of the page they lock.
+ */
+
+char *pv_lock_table; /* pointer to array of bits */
+#define pv_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
+
+/* Has pmap_init completed? */
+boolean_t pmap_initialized = FALSE;
+
+/*
+ * More-specific code provides these;
+ * they indicate the total extent of physical memory
+ * that we know about and might ever have to manage.
+ */
+extern vm_offset_t phys_first_addr, phys_last_addr;
+
+/*
+ * Range of kernel virtual addresses available for kernel memory mapping.
+ * Does not include the virtual addresses used to map physical memory 1-1.
+ * Initialized by pmap_bootstrap.
+ */
+vm_offset_t kernel_virtual_start;
+vm_offset_t kernel_virtual_end;
+
+/* XXX stupid fixed limit - get rid */
+vm_size_t morevm = 40 * 1024 * 1024; /* VM space for kernel map */
+
+/*
+ * Index into pv_head table, its lock bits, and the modify/reference
+ * bits starting at phys_first_addr.
+ */
+#define pa_index(pa) (atop(pa - phys_first_addr))
+
+#define pai_to_pvh(pai) (&pv_head_table[pai])
+#define lock_pvh_pai(pai) (bit_lock(pai, pv_lock_table))
+#define unlock_pvh_pai(pai) (bit_unlock(pai, pv_lock_table))
+
+/*
+ * Array of physical page attribites for managed pages.
+ * One byte per physical page.
+ */
+char *pmap_phys_attributes;
+
+/*
+ * Physical page attributes. Copy bits from PTE definition.
+ */
+#define PHYS_MODIFIED INTEL_PTE_MOD /* page modified */
+#define PHYS_REFERENCED INTEL_PTE_REF /* page referenced */
+
+/*
+ * Amount of virtual memory mapped by one
+ * page-directory entry.
+ */
+#define PDE_MAPPED_SIZE (pdenum2lin(1))
+
+/*
+ * We allocate page table pages directly from the VM system
+ * through this object. It maps physical memory.
+ */
+vm_object_t pmap_object = VM_OBJECT_NULL;
+
+/*
+ * Locking and TLB invalidation
+ */
+
+/*
+ * Locking Protocols:
+ *
+ * There are two structures in the pmap module that need locking:
+ * the pmaps themselves, and the per-page pv_lists (which are locked
+ * by locking the pv_lock_table entry that corresponds to the pv_head
+ * for the list in question.) Most routines want to lock a pmap and
+ * then do operations in it that require pv_list locking -- however
+ * pmap_remove_all and pmap_copy_on_write operate on a physical page
+ * basis and want to do the locking in the reverse order, i.e. lock
+ * a pv_list and then go through all the pmaps referenced by that list.
+ * To protect against deadlock between these two cases, the pmap_lock
+ * is used. There are three different locking protocols as a result:
+ *
+ * 1. pmap operations only (pmap_extract, pmap_access, ...) Lock only
+ * the pmap.
+ *
+ * 2. pmap-based operations (pmap_enter, pmap_remove, ...) Get a read
+ * lock on the pmap_lock (shared read), then lock the pmap
+ * and finally the pv_lists as needed [i.e. pmap lock before
+ * pv_list lock.]
+ *
+ * 3. pv_list-based operations (pmap_remove_all, pmap_copy_on_write, ...)
+ * Get a write lock on the pmap_lock (exclusive write); this
+ * also guaranteees exclusive access to the pv_lists. Lock the
+ * pmaps as needed.
+ *
+ * At no time may any routine hold more than one pmap lock or more than
+ * one pv_list lock. Because interrupt level routines can allocate
+ * mbufs and cause pmap_enter's, the pmap_lock and the lock on the
+ * kernel_pmap can only be held at splvm.
+ */
+
+#if NCPUS > 1
+/*
+ * We raise the interrupt level to splvm, to block interprocessor
+ * interrupts during pmap operations. We must take the CPU out of
+ * the cpus_active set while interrupts are blocked.
+ */
+#define SPLVM(spl) { \
+ spl = splvm(); \
+ i_bit_clear(cpu_number(), &cpus_active); \
+}
+
+#define SPLX(spl) { \
+ i_bit_set(cpu_number(), &cpus_active); \
+ splx(spl); \
+}
+
+/*
+ * Lock on pmap system
+ */
+lock_data_t pmap_system_lock;
+
+#define PMAP_READ_LOCK(pmap, spl) { \
+ SPLVM(spl); \
+ lock_read(&pmap_system_lock); \
+ simple_lock(&(pmap)->lock); \
+}
+
+#define PMAP_WRITE_LOCK(spl) { \
+ SPLVM(spl); \
+ lock_write(&pmap_system_lock); \
+}
+
+#define PMAP_READ_UNLOCK(pmap, spl) { \
+ simple_unlock(&(pmap)->lock); \
+ lock_read_done(&pmap_system_lock); \
+ SPLX(spl); \
+}
+
+#define PMAP_WRITE_UNLOCK(spl) { \
+ lock_write_done(&pmap_system_lock); \
+ SPLX(spl); \
+}
+
+#define PMAP_WRITE_TO_READ_LOCK(pmap) { \
+ simple_lock(&(pmap)->lock); \
+ lock_write_to_read(&pmap_system_lock); \
+}
+
+#define LOCK_PVH(index) (lock_pvh_pai(index))
+
+#define UNLOCK_PVH(index) (unlock_pvh_pai(index))
+
+#define PMAP_UPDATE_TLBS(pmap, s, e) \
+{ \
+ cpu_set cpu_mask = 1 << cpu_number(); \
+ cpu_set users; \
+ \
+ /* Since the pmap is locked, other updates are locked */ \
+ /* out, and any pmap_activate has finished. */ \
+ \
+ /* find other cpus using the pmap */ \
+ users = (pmap)->cpus_using & ~cpu_mask; \
+ if (users) { \
+ /* signal them, and wait for them to finish */ \
+ /* using the pmap */ \
+ signal_cpus(users, (pmap), (s), (e)); \
+ while ((pmap)->cpus_using & cpus_active & ~cpu_mask) \
+ continue; \
+ } \
+ \
+ /* invalidate our own TLB if pmap is in use */ \
+ if ((pmap)->cpus_using & cpu_mask) { \
+ INVALIDATE_TLB((s), (e)); \
+ } \
+}
+
+#else NCPUS > 1
+
+#define SPLVM(spl)
+#define SPLX(spl)
+
+#define PMAP_READ_LOCK(pmap, spl) SPLVM(spl)
+#define PMAP_WRITE_LOCK(spl) SPLVM(spl)
+#define PMAP_READ_UNLOCK(pmap, spl) SPLX(spl)
+#define PMAP_WRITE_UNLOCK(spl) SPLX(spl)
+#define PMAP_WRITE_TO_READ_LOCK(pmap)
+
+#define LOCK_PVH(index)
+#define UNLOCK_PVH(index)
+
+#define PMAP_UPDATE_TLBS(pmap, s, e) { \
+ /* invalidate our own TLB if pmap is in use */ \
+ if ((pmap)->cpus_using) { \
+ INVALIDATE_TLB((s), (e)); \
+ } \
+}
+
+#endif NCPUS > 1
+
+#define MAX_TBIS_SIZE 32 /* > this -> TBIA */ /* XXX */
+
+#if i860
+/* Do a data cache flush until we find the caching bug XXX prp */
+#define INVALIDATE_TLB(s, e) { \
+ flush(); \
+ flush_tlb(); \
+}
+#else i860
+#define INVALIDATE_TLB(s, e) { \
+ flush_tlb(); \
+}
+#endif i860
+
+
+#if NCPUS > 1
+/*
+ * Structures to keep track of pending TLB invalidations
+ */
+
+#define UPDATE_LIST_SIZE 4
+
+struct pmap_update_item {
+ pmap_t pmap; /* pmap to invalidate */
+ vm_offset_t start; /* start address to invalidate */
+ vm_offset_t end; /* end address to invalidate */
+} ;
+
+typedef struct pmap_update_item *pmap_update_item_t;
+
+/*
+ * List of pmap updates. If the list overflows,
+ * the last entry is changed to invalidate all.
+ */
+struct pmap_update_list {
+ decl_simple_lock_data(, lock)
+ int count;
+ struct pmap_update_item item[UPDATE_LIST_SIZE];
+} ;
+typedef struct pmap_update_list *pmap_update_list_t;
+
+struct pmap_update_list cpu_update_list[NCPUS];
+
+#endif NCPUS > 1
+
+/*
+ * Other useful macros.
+ */
+#define current_pmap() (vm_map_pmap(current_thread()->task->map))
+#define pmap_in_use(pmap, cpu) (((pmap)->cpus_using & (1 << (cpu))) != 0)
+
+struct pmap kernel_pmap_store;
+pmap_t kernel_pmap;
+
+struct zone *pmap_zone; /* zone of pmap structures */
+
+int pmap_debug = 0; /* flag for debugging prints */
+
+#if 0
+int ptes_per_vm_page; /* number of hardware ptes needed
+ to map one VM page. */
+#else
+#define ptes_per_vm_page 1
+#endif
+
+unsigned int inuse_ptepages_count = 0; /* debugging */
+
+extern char end;
+
+/*
+ * Pointer to the basic page directory for the kernel.
+ * Initialized by pmap_bootstrap().
+ */
+pt_entry_t *kernel_page_dir;
+
+void pmap_remove_range(); /* forward */
+#if NCPUS > 1
+void signal_cpus(); /* forward */
+#endif NCPUS > 1
+
+#if i860
+/*
+ * Paging flag
+ */
+int paging_enabled = 0;
+#endif
+
+static inline pt_entry_t *
+pmap_pde(pmap_t pmap, vm_offset_t addr)
+{
+ if (pmap == kernel_pmap)
+ addr = kvtolin(addr);
+ return &pmap->dirbase[lin2pdenum(addr)];
+}
+
+/*
+ * Given an offset and a map, compute the address of the
+ * pte. If the address is invalid with respect to the map
+ * then PT_ENTRY_NULL is returned (and the map may need to grow).
+ *
+ * This is only used internally.
+ */
+pt_entry_t *
+pmap_pte(pmap_t pmap, vm_offset_t addr)
+{
+ pt_entry_t *ptp;
+ pt_entry_t pte;
+
+ if (pmap->dirbase == 0)
+ return(PT_ENTRY_NULL);
+ pte = *pmap_pde(pmap, addr);
+ if ((pte & INTEL_PTE_VALID) == 0)
+ return(PT_ENTRY_NULL);
+ ptp = (pt_entry_t *)ptetokv(pte);
+ return(&ptp[ptenum(addr)]);
+}
+
+#define DEBUG_PTE_PAGE 0
+
+#if DEBUG_PTE_PAGE
+void ptep_check(ptep)
+ ptep_t ptep;
+{
+ register pt_entry_t *pte, *epte;
+ int ctu, ctw;
+
+ /* check the use and wired counts */
+ if (ptep == PTE_PAGE_NULL)
+ return;
+ pte = pmap_pte(ptep->pmap, ptep->va);
+ epte = pte + INTEL_PGBYTES/sizeof(pt_entry_t);
+ ctu = 0;
+ ctw = 0;
+ while (pte < epte) {
+ if (pte->pfn != 0) {
+ ctu++;
+ if (pte->wired)
+ ctw++;
+ }
+ pte += ptes_per_vm_page;
+ }
+
+ if (ctu != ptep->use_count || ctw != ptep->wired_count) {
+ printf("use %d wired %d - actual use %d wired %d\n",
+ ptep->use_count, ptep->wired_count, ctu, ctw);
+ panic("pte count");
+ }
+}
+#endif DEBUG_PTE_PAGE
+
+/*
+ * Map memory at initialization. The physical addresses being
+ * mapped are not managed and are never unmapped.
+ *
+ * For now, VM is already on, we only need to map the
+ * specified memory.
+ */
+vm_offset_t pmap_map(virt, start, end, prot)
+ register vm_offset_t virt;
+ register vm_offset_t start;
+ register vm_offset_t end;
+ register int prot;
+{
+ register int ps;
+
+ ps = PAGE_SIZE;
+ while (start < end) {
+ pmap_enter(kernel_pmap, virt, start, prot, FALSE);
+ virt += ps;
+ start += ps;
+ }
+ return(virt);
+}
+
+/*
+ * Back-door routine for mapping kernel VM at initialization.
+ * Useful for mapping memory outside the range
+ * [phys_first_addr, phys_last_addr) (i.e., devices).
+ * Otherwise like pmap_map.
+#if i860
+ * Sets no-cache bit.
+#endif
+ */
+vm_offset_t pmap_map_bd(virt, start, end, prot)
+ register vm_offset_t virt;
+ register vm_offset_t start;
+ register vm_offset_t end;
+ vm_prot_t prot;
+{
+ register pt_entry_t template;
+ register pt_entry_t *pte;
+
+ template = pa_to_pte(start)
+#if i860
+ | INTEL_PTE_NCACHE
+#endif
+ | INTEL_PTE_VALID;
+ if (prot & VM_PROT_WRITE)
+ template |= INTEL_PTE_WRITE;
+
+ while (start < end) {
+ pte = pmap_pte(kernel_pmap, virt);
+ if (pte == PT_ENTRY_NULL)
+ panic("pmap_map_bd: Invalid kernel address\n");
+ WRITE_PTE_FAST(pte, template)
+ pte_increment_pa(template);
+ virt += PAGE_SIZE;
+ start += PAGE_SIZE;
+ }
+ return(virt);
+}
+
+/*
+ * Bootstrap the system enough to run with virtual memory.
+ * Allocate the kernel page directory and page tables,
+ * and direct-map all physical memory.
+ * Called with mapping off.
+ */
+void pmap_bootstrap()
+{
+ /*
+ * Mapping is turned off; we must reference only physical addresses.
+ * The load image of the system is to be mapped 1-1 physical = virtual.
+ */
+
+ /*
+ * Set ptes_per_vm_page for general use.
+ */
+#if 0
+ ptes_per_vm_page = PAGE_SIZE / INTEL_PGBYTES;
+#endif
+
+ /*
+ * The kernel's pmap is statically allocated so we don't
+ * have to use pmap_create, which is unlikely to work
+ * correctly at this part of the boot sequence.
+ */
+
+ kernel_pmap = &kernel_pmap_store;
+
+#if NCPUS > 1
+ lock_init(&pmap_system_lock, FALSE); /* NOT a sleep lock */
+#endif NCPUS > 1
+
+ simple_lock_init(&kernel_pmap->lock);
+
+ kernel_pmap->ref_count = 1;
+
+ /*
+ * Determine the kernel virtual address range.
+ * It starts at the end of the physical memory
+ * mapped into the kernel address space,
+ * and extends to a stupid arbitrary limit beyond that.
+ */
+ kernel_virtual_start = phys_last_addr;
+ kernel_virtual_end = phys_last_addr + morevm;
+
+ /*
+ * Allocate and clear a kernel page directory.
+ */
+ kernel_pmap->dirbase = kernel_page_dir = (pt_entry_t*)pmap_grab_page();
+ {
+ int i;
+ for (i = 0; i < NPDES; i++)
+ kernel_pmap->dirbase[i] = 0;
+ }
+
+ /*
+ * Allocate and set up the kernel page tables.
+ */
+ {
+ vm_offset_t va;
+
+ /*
+ * Map virtual memory for all known physical memory, 1-1,
+ * from phys_first_addr to phys_last_addr.
+ * Make any mappings completely in the kernel's text segment read-only.
+ *
+ * Also allocate some additional all-null page tables afterwards
+ * for kernel virtual memory allocation,
+ * because this PMAP module is too stupid
+ * to allocate new kernel page tables later.
+ * XX fix this
+ */
+ for (va = phys_first_addr; va < phys_last_addr + morevm; )
+ {
+ pt_entry_t *pde = kernel_page_dir + lin2pdenum(kvtolin(va));
+ pt_entry_t *ptable = (pt_entry_t*)pmap_grab_page();
+ pt_entry_t *pte;
+ vm_offset_t pteva;
+
+ /* Initialize the page directory entry. */
+ *pde = pa_to_pte((vm_offset_t)ptable)
+ | INTEL_PTE_VALID | INTEL_PTE_WRITE;
+
+ /* Initialize the page table. */
+ for (pte = ptable; (va < phys_last_addr) && (pte < ptable+NPTES); pte++)
+ {
+ if ((pte - ptable) < ptenum(va))
+ {
+ WRITE_PTE_FAST(pte, 0);
+ }
+ else
+ {
+ extern char start[], etext[];
+
+ if ((va >= (vm_offset_t)start)
+ && (va + INTEL_PGBYTES <= (vm_offset_t)etext))
+ {
+ WRITE_PTE_FAST(pte, pa_to_pte(va)
+ | INTEL_PTE_VALID);
+ }
+ else
+ {
+ WRITE_PTE_FAST(pte, pa_to_pte(va)
+ | INTEL_PTE_VALID | INTEL_PTE_WRITE);
+ }
+ va += INTEL_PGBYTES;
+ }
+ }
+ for (; pte < ptable+NPTES; pte++)
+ {
+ WRITE_PTE_FAST(pte, 0);
+ va += INTEL_PGBYTES;
+ }
+ }
+ }
+
+#if i860
+#error probably doesnt work anymore
+ XXX move to architecture-specific code just after the pmap_bootstrap call.
+
+ /* kvtophys should now work in phys range */
+
+ /*
+ * Mark page table pages non-cacheable
+ */
+
+ pt_pte = (pt_entry_t *)pte_to_pa(*(kpde + pdenum(sva))) + ptenum(sva);
+
+ for (va = load_start; va < tva; va += INTEL_PGBYTES*NPTES) {
+ /* Mark page table non-cacheable */
+ *pt_pte |= INTEL_PTE_NCACHE;
+ pt_pte++;
+ }
+
+ /*
+ * Map I/O space
+ */
+
+ ppde = kpde;
+ ppde += pdenum(IO_BASE);
+
+ if (pte_to_pa(*ppde) == 0) {
+ /* This pte has not been allocated */
+ ppte = (pt_entry_t *)kvtophys(virtual_avail);
+ ptend = ppte + NPTES;
+ virtual_avail = phystokv((vm_offset_t)ptend);
+ *ppde = pa_to_pte((vm_offset_t)ppte)
+ | INTEL_PTE_VALID
+ | INTEL_PTE_WRITE;
+ pte = ptend;
+
+ /* Mark page table non-cacheable */
+ *pt_pte |= INTEL_PTE_NCACHE;
+ pt_pte++;
+
+ bzero((char *)ppte, INTEL_PGBYTES);
+ } else {
+ ppte = (pt_entry_t *)(*ppde); /* first pte of page */
+ }
+ *ppde |= INTEL_PTE_USER;
+
+
+ WRITE_PTE(ppte + ptenum(FIFO_ADDR),
+ pa_to_pte(FIFO_ADDR_PH)
+ | INTEL_PTE_VALID | INTEL_PTE_WRITE | INTEL_PTE_NCACHE);
+
+ WRITE_PTE(ppte + ptenum(FIFO_ADDR + XEOD_OFF),
+ pa_to_pte(FIFO_ADDR_PH + XEOD_OFF_PH)
+ | INTEL_PTE_VALID | INTEL_PTE_WRITE | INTEL_PTE_NCACHE);
+
+/* XXX Allowed user access to control reg - cfj */
+ WRITE_PTE(ppte + ptenum(CSR_ADDR),
+ pa_to_pte(CSR_ADDR_PH)
+ | INTEL_PTE_VALID | INTEL_PTE_WRITE | INTEL_PTE_NCACHE | INTEL_PTE_USER);
+
+/* XXX Allowed user access to perf reg - cfj */
+ WRITE_PTE(ppte + ptenum(PERFCNT_ADDR),
+ pa_to_pte(PERFCNT_ADDR_PH)
+ | INTEL_PTE_VALID | INTEL_PTE_USER | INTEL_PTE_NCACHE | INTEL_PTE_USER);
+
+ WRITE_PTE(ppte + ptenum(UART_ADDR),
+ pa_to_pte(UART_ADDR_PH)
+ | INTEL_PTE_VALID | INTEL_PTE_WRITE | INTEL_PTE_NCACHE);
+
+ WRITE_PTE(ppte + ptenum(0xFFFFF000),
+ pa_to_pte(avail_end)
+ | INTEL_PTE_VALID | INTEL_PTE_WRITE);
+ avail_start = kvtophys(virtual_avail);
+
+/*
+ * Turn on mapping
+ */
+
+ flush_and_ctxsw(kernel_pmap->dirbase);
+ paging_enabled = 1;
+
+ printf("Paging enabled.\n");
+#endif
+
+ /* Architecture-specific code will turn on paging
+ soon after we return from here. */
+}
+
+void pmap_virtual_space(startp, endp)
+ vm_offset_t *startp;
+ vm_offset_t *endp;
+{
+ *startp = kernel_virtual_start;
+ *endp = kernel_virtual_end;
+}
+
+/*
+ * Initialize the pmap module.
+ * Called by vm_init, to initialize any structures that the pmap
+ * system needs to map virtual memory.
+ */
+void pmap_init()
+{
+ register long npages;
+ vm_offset_t addr;
+ register vm_size_t s;
+ int i;
+
+ /*
+ * Allocate memory for the pv_head_table and its lock bits,
+ * the modify bit array, and the pte_page table.
+ */
+
+ npages = atop(phys_last_addr - phys_first_addr);
+ s = (vm_size_t) (sizeof(struct pv_entry) * npages
+ + pv_lock_table_size(npages)
+ + npages);
+
+ s = round_page(s);
+ if (kmem_alloc_wired(kernel_map, &addr, s) != KERN_SUCCESS)
+ panic("pmap_init");
+ bzero((char *) addr, s);
+
+ /*
+ * Allocate the structures first to preserve word-alignment.
+ */
+ pv_head_table = (pv_entry_t) addr;
+ addr = (vm_offset_t) (pv_head_table + npages);
+
+ pv_lock_table = (char *) addr;
+ addr = (vm_offset_t) (pv_lock_table + pv_lock_table_size(npages));
+
+ pmap_phys_attributes = (char *) addr;
+
+ /*
+ * Create the zone of physical maps,
+ * and of the physical-to-virtual entries.
+ */
+ s = (vm_size_t) sizeof(struct pmap);
+ pmap_zone = zinit(s, 400*s, 4096, 0, "pmap"); /* XXX */
+ s = (vm_size_t) sizeof(struct pv_entry);
+ pv_list_zone = zinit(s, 10000*s, 4096, 0, "pv_list"); /* XXX */
+
+#if NCPUS > 1
+ /*
+ * Set up the pmap request lists
+ */
+ for (i = 0; i < NCPUS; i++) {
+ pmap_update_list_t up = &cpu_update_list[i];
+
+ simple_lock_init(&up->lock);
+ up->count = 0;
+ }
+#endif NCPUS > 1
+
+ /*
+ * Indicate that the PMAP module is now fully initialized.
+ */
+ pmap_initialized = TRUE;
+}
+
+#define valid_page(x) (pmap_initialized && pmap_valid_page(x))
+
+boolean_t pmap_verify_free(phys)
+ vm_offset_t phys;
+{
+ pv_entry_t pv_h;
+ int pai;
+ int spl;
+ boolean_t result;
+
+ assert(phys != vm_page_fictitious_addr);
+ if (!pmap_initialized)
+ return(TRUE);
+
+ if (!pmap_valid_page(phys))
+ return(FALSE);
+
+ PMAP_WRITE_LOCK(spl);
+
+ pai = pa_index(phys);
+ pv_h = pai_to_pvh(pai);
+
+ result = (pv_h->pmap == PMAP_NULL);
+ PMAP_WRITE_UNLOCK(spl);
+
+ return(result);
+}
+
+/*
+ * Routine: pmap_page_table_page_alloc
+ *
+ * Allocates a new physical page to be used as a page-table page.
+ *
+ * Must be called with the pmap system and the pmap unlocked,
+ * since these must be unlocked to use vm_page_grab.
+ */
+vm_offset_t
+pmap_page_table_page_alloc()
+{
+ register vm_page_t m;
+ register vm_offset_t pa;
+
+ check_simple_locks();
+
+ /*
+ * We cannot allocate the pmap_object in pmap_init,
+ * because it is called before the zone package is up.
+ * Allocate it now if it is missing.
+ */
+ if (pmap_object == VM_OBJECT_NULL)
+ pmap_object = vm_object_allocate(phys_last_addr - phys_first_addr);
+
+ /*
+ * Allocate a VM page for the level 2 page table entries.
+ */
+ while ((m = vm_page_grab()) == VM_PAGE_NULL)
+ VM_PAGE_WAIT((void (*)()) 0);
+
+ /*
+ * Map the page to its physical address so that it
+ * can be found later.
+ */
+ pa = m->phys_addr;
+ vm_object_lock(pmap_object);
+ vm_page_insert(m, pmap_object, pa);
+ vm_page_lock_queues();
+ vm_page_wire(m);
+ inuse_ptepages_count++;
+ vm_page_unlock_queues();
+ vm_object_unlock(pmap_object);
+
+ /*
+ * Zero the page.
+ */
+ bzero(phystokv(pa), PAGE_SIZE);
+
+#if i860
+ /*
+ * Mark the page table page(s) non-cacheable.
+ */
+ {
+ int i = ptes_per_vm_page;
+ pt_entry_t *pdp;
+
+ pdp = pmap_pte(kernel_pmap, pa);
+ do {
+ *pdp |= INTEL_PTE_NCACHE;
+ pdp++;
+ } while (--i > 0);
+ }
+#endif
+ return pa;
+}
+
+/*
+ * Deallocate a page-table page.
+ * The page-table page must have all mappings removed,
+ * and be removed from its page directory.
+ */
+void
+pmap_page_table_page_dealloc(pa)
+ vm_offset_t pa;
+{
+ vm_page_t m;
+
+ vm_object_lock(pmap_object);
+ m = vm_page_lookup(pmap_object, pa);
+ vm_page_lock_queues();
+ vm_page_free(m);
+ inuse_ptepages_count--;
+ vm_page_unlock_queues();
+ vm_object_unlock(pmap_object);
+}
+
+/*
+ * Create and return a physical map.
+ *
+ * If the size specified for the map
+ * is zero, the map is an actual physical
+ * map, and may be referenced by the
+ * hardware.
+ *
+ * If the size specified is non-zero,
+ * the map will be used in software only, and
+ * is bounded by that size.
+ */
+pmap_t pmap_create(size)
+ vm_size_t size;
+{
+ register pmap_t p;
+ register pmap_statistics_t stats;
+
+ /*
+ * A software use-only map doesn't even need a map.
+ */
+
+ if (size != 0) {
+ return(PMAP_NULL);
+ }
+
+/*
+ * Allocate a pmap struct from the pmap_zone. Then allocate
+ * the page descriptor table from the pd_zone.
+ */
+
+ p = (pmap_t) zalloc(pmap_zone);
+ if (p == PMAP_NULL)
+ panic("pmap_create");
+
+ if (kmem_alloc_wired(kernel_map,
+ (vm_offset_t *)&p->dirbase, INTEL_PGBYTES)
+ != KERN_SUCCESS)
+ panic("pmap_create");
+
+ bcopy(kernel_page_dir, p->dirbase, INTEL_PGBYTES);
+ p->ref_count = 1;
+
+ simple_lock_init(&p->lock);
+ p->cpus_using = 0;
+
+ /*
+ * Initialize statistics.
+ */
+
+ stats = &p->stats;
+ stats->resident_count = 0;
+ stats->wired_count = 0;
+
+ return(p);
+}
+
+/*
+ * Retire the given physical map from service.
+ * Should only be called if the map contains
+ * no valid mappings.
+ */
+
+void pmap_destroy(p)
+ register pmap_t p;
+{
+ register pt_entry_t *pdep;
+ register vm_offset_t pa;
+ register int c, s;
+ register vm_page_t m;
+
+ if (p == PMAP_NULL)
+ return;
+
+ SPLVM(s);
+ simple_lock(&p->lock);
+ c = --p->ref_count;
+ simple_unlock(&p->lock);
+ SPLX(s);
+
+ if (c != 0) {
+ return; /* still in use */
+ }
+
+ /*
+ * Free the memory maps, then the
+ * pmap structure.
+ */
+ for (pdep = p->dirbase;
+ pdep < &p->dirbase[lin2pdenum(LINEAR_MIN_KERNEL_ADDRESS)];
+ pdep += ptes_per_vm_page) {
+ if (*pdep & INTEL_PTE_VALID) {
+ pa = pte_to_pa(*pdep);
+ vm_object_lock(pmap_object);
+ m = vm_page_lookup(pmap_object, pa);
+ if (m == VM_PAGE_NULL)
+ panic("pmap_destroy: pte page not in object");
+ vm_page_lock_queues();
+ vm_page_free(m);
+ inuse_ptepages_count--;
+ vm_page_unlock_queues();
+ vm_object_unlock(pmap_object);
+ }
+ }
+ kmem_free(kernel_map, p->dirbase, INTEL_PGBYTES);
+ zfree(pmap_zone, (vm_offset_t) p);
+}
+
+/*
+ * Add a reference to the specified pmap.
+ */
+
+void pmap_reference(p)
+ register pmap_t p;
+{
+ int s;
+ if (p != PMAP_NULL) {
+ SPLVM(s);
+ simple_lock(&p->lock);
+ p->ref_count++;
+ simple_unlock(&p->lock);
+ SPLX(s);
+ }
+}
+
+/*
+ * Remove a range of hardware page-table entries.
+ * The entries given are the first (inclusive)
+ * and last (exclusive) entries for the VM pages.
+ * The virtual address is the va for the first pte.
+ *
+ * The pmap must be locked.
+ * If the pmap is not the kernel pmap, the range must lie
+ * entirely within one pte-page. This is NOT checked.
+ * Assumes that the pte-page exists.
+ */
+
+/* static */
+void pmap_remove_range(pmap, va, spte, epte)
+ pmap_t pmap;
+ vm_offset_t va;
+ pt_entry_t *spte;
+ pt_entry_t *epte;
+{
+ register pt_entry_t *cpte;
+ int num_removed, num_unwired;
+ int pai;
+ vm_offset_t pa;
+
+#if DEBUG_PTE_PAGE
+ if (pmap != kernel_pmap)
+ ptep_check(get_pte_page(spte));
+#endif DEBUG_PTE_PAGE
+ num_removed = 0;
+ num_unwired = 0;
+
+ for (cpte = spte; cpte < epte;
+ cpte += ptes_per_vm_page, va += PAGE_SIZE) {
+
+ if (*cpte == 0)
+ continue;
+ pa = pte_to_pa(*cpte);
+
+ num_removed++;
+ if (*cpte & INTEL_PTE_WIRED)
+ num_unwired++;
+
+ if (!valid_page(pa)) {
+
+ /*
+ * Outside range of managed physical memory.
+ * Just remove the mappings.
+ */
+ register int i = ptes_per_vm_page;
+ register pt_entry_t *lpte = cpte;
+ do {
+ *lpte = 0;
+ lpte++;
+ } while (--i > 0);
+ continue;
+ }
+
+ pai = pa_index(pa);
+ LOCK_PVH(pai);
+
+ /*
+ * Get the modify and reference bits.
+ */
+ {
+ register int i;
+ register pt_entry_t *lpte;
+
+ i = ptes_per_vm_page;
+ lpte = cpte;
+ do {
+ pmap_phys_attributes[pai] |=
+ *lpte & (PHYS_MODIFIED|PHYS_REFERENCED);
+ *lpte = 0;
+ lpte++;
+ } while (--i > 0);
+ }
+
+ /*
+ * Remove the mapping from the pvlist for
+ * this physical page.
+ */
+ {
+ register pv_entry_t pv_h, prev, cur;
+
+ pv_h = pai_to_pvh(pai);
+ if (pv_h->pmap == PMAP_NULL) {
+ panic("pmap_remove: null pv_list!");
+ }
+ if (pv_h->va == va && pv_h->pmap == pmap) {
+ /*
+ * Header is the pv_entry. Copy the next one
+ * to header and free the next one (we cannot
+ * free the header)
+ */
+ cur = pv_h->next;
+ if (cur != PV_ENTRY_NULL) {
+ *pv_h = *cur;
+ PV_FREE(cur);
+ }
+ else {
+ pv_h->pmap = PMAP_NULL;
+ }
+ }
+ else {
+ cur = pv_h;
+ do {
+ prev = cur;
+ if ((cur = prev->next) == PV_ENTRY_NULL) {
+ panic("pmap-remove: mapping not in pv_list!");
+ }
+ } while (cur->va != va || cur->pmap != pmap);
+ prev->next = cur->next;
+ PV_FREE(cur);
+ }
+ UNLOCK_PVH(pai);
+ }
+ }
+
+ /*
+ * Update the counts
+ */
+ pmap->stats.resident_count -= num_removed;
+ pmap->stats.wired_count -= num_unwired;
+}
+
+/*
+ * Remove the given range of addresses
+ * from the specified map.
+ *
+ * It is assumed that the start and end are properly
+ * rounded to the hardware page size.
+ */
+
+void pmap_remove(map, s, e)
+ pmap_t map;
+ vm_offset_t s, e;
+{
+ int spl;
+ register pt_entry_t *pde;
+ register pt_entry_t *spte, *epte;
+ vm_offset_t l;
+
+ if (map == PMAP_NULL)
+ return;
+
+ PMAP_READ_LOCK(map, spl);
+
+ /*
+ * Invalidate the translation buffer first
+ */
+ PMAP_UPDATE_TLBS(map, s, e);
+
+ pde = pmap_pde(map, s);
+ while (s < e) {
+ l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
+ if (l > e)
+ l = e;
+ if (*pde & INTEL_PTE_VALID) {
+ spte = (pt_entry_t *)ptetokv(*pde);
+ spte = &spte[ptenum(s)];
+ epte = &spte[intel_btop(l-s)];
+ pmap_remove_range(map, s, spte, epte);
+ }
+ s = l;
+ pde++;
+ }
+
+ PMAP_READ_UNLOCK(map, spl);
+}
+
+/*
+ * Routine: pmap_page_protect
+ *
+ * Function:
+ * Lower the permission for all mappings to a given
+ * page.
+ */
+void pmap_page_protect(phys, prot)
+ vm_offset_t phys;
+ vm_prot_t prot;
+{
+ pv_entry_t pv_h, prev;
+ register pv_entry_t pv_e;
+ register pt_entry_t *pte;
+ int pai;
+ register pmap_t pmap;
+ int spl;
+ boolean_t remove;
+
+ assert(phys != vm_page_fictitious_addr);
+ if (!valid_page(phys)) {
+ /*
+ * Not a managed page.
+ */
+ return;
+ }
+
+ /*
+ * Determine the new protection.
+ */
+ switch (prot) {
+ case VM_PROT_READ:
+ case VM_PROT_READ|VM_PROT_EXECUTE:
+ remove = FALSE;
+ break;
+ case VM_PROT_ALL:
+ return; /* nothing to do */
+ default:
+ remove = TRUE;
+ break;
+ }
+
+ /*
+ * Lock the pmap system first, since we will be changing
+ * several pmaps.
+ */
+
+ PMAP_WRITE_LOCK(spl);
+
+ pai = pa_index(phys);
+ pv_h = pai_to_pvh(pai);
+
+ /*
+ * Walk down PV list, changing or removing all mappings.
+ * We do not have to lock the pv_list because we have
+ * the entire pmap system locked.
+ */
+ if (pv_h->pmap != PMAP_NULL) {
+
+ prev = pv_e = pv_h;
+ do {
+ pmap = pv_e->pmap;
+ /*
+ * Lock the pmap to block pmap_extract and similar routines.
+ */
+ simple_lock(&pmap->lock);
+
+ {
+ register vm_offset_t va;
+
+ va = pv_e->va;
+ pte = pmap_pte(pmap, va);
+
+ /*
+ * Consistency checks.
+ */
+ /* assert(*pte & INTEL_PTE_VALID); XXX */
+ /* assert(pte_to_phys(*pte) == phys); */
+
+ /*
+ * Invalidate TLBs for all CPUs using this mapping.
+ */
+ PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
+ }
+
+ /*
+ * Remove the mapping if new protection is NONE
+ * or if write-protecting a kernel mapping.
+ */
+ if (remove || pmap == kernel_pmap) {
+ /*
+ * Remove the mapping, collecting any modify bits.
+ */
+ if (*pte & INTEL_PTE_WIRED)
+ panic("pmap_remove_all removing a wired page");
+
+ {
+ register int i = ptes_per_vm_page;
+
+ do {
+ pmap_phys_attributes[pai] |=
+ *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
+ *pte++ = 0;
+ } while (--i > 0);
+ }
+
+ pmap->stats.resident_count--;
+
+ /*
+ * Remove the pv_entry.
+ */
+ if (pv_e == pv_h) {
+ /*
+ * Fix up head later.
+ */
+ pv_h->pmap = PMAP_NULL;
+ }
+ else {
+ /*
+ * Delete this entry.
+ */
+ prev->next = pv_e->next;
+ PV_FREE(pv_e);
+ }
+ }
+ else {
+ /*
+ * Write-protect.
+ */
+ register int i = ptes_per_vm_page;
+
+ do {
+ *pte &= ~INTEL_PTE_WRITE;
+ pte++;
+ } while (--i > 0);
+
+ /*
+ * Advance prev.
+ */
+ prev = pv_e;
+ }
+
+ simple_unlock(&pmap->lock);
+
+ } while ((pv_e = prev->next) != PV_ENTRY_NULL);
+
+ /*
+ * If pv_head mapping was removed, fix it up.
+ */
+ if (pv_h->pmap == PMAP_NULL) {
+ pv_e = pv_h->next;
+ if (pv_e != PV_ENTRY_NULL) {
+ *pv_h = *pv_e;
+ PV_FREE(pv_e);
+ }
+ }
+ }
+
+ PMAP_WRITE_UNLOCK(spl);
+}
+
+/*
+ * Set the physical protection on the
+ * specified range of this map as requested.
+ * Will not increase permissions.
+ */
+void pmap_protect(map, s, e, prot)
+ pmap_t map;
+ vm_offset_t s, e;
+ vm_prot_t prot;
+{
+ register pt_entry_t *pde;
+ register pt_entry_t *spte, *epte;
+ vm_offset_t l;
+ int spl;
+
+ if (map == PMAP_NULL)
+ return;
+
+ /*
+ * Determine the new protection.
+ */
+ switch (prot) {
+ case VM_PROT_READ:
+ case VM_PROT_READ|VM_PROT_EXECUTE:
+ break;
+ case VM_PROT_READ|VM_PROT_WRITE:
+ case VM_PROT_ALL:
+ return; /* nothing to do */
+ default:
+ pmap_remove(map, s, e);
+ return;
+ }
+
+ /*
+ * If write-protecting in the kernel pmap,
+ * remove the mappings; the i386 ignores
+ * the write-permission bit in kernel mode.
+ *
+ * XXX should be #if'd for i386
+ */
+ if (map == kernel_pmap) {
+ pmap_remove(map, s, e);
+ return;
+ }
+
+ SPLVM(spl);
+ simple_lock(&map->lock);
+
+ /*
+ * Invalidate the translation buffer first
+ */
+ PMAP_UPDATE_TLBS(map, s, e);
+
+ pde = pmap_pde(map, s);
+ while (s < e) {
+ l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
+ if (l > e)
+ l = e;
+ if (*pde & INTEL_PTE_VALID) {
+ spte = (pt_entry_t *)ptetokv(*pde);
+ spte = &spte[ptenum(s)];
+ epte = &spte[intel_btop(l-s)];
+
+ while (spte < epte) {
+ if (*spte & INTEL_PTE_VALID)
+ *spte &= ~INTEL_PTE_WRITE;
+ spte++;
+ }
+ }
+ s = l;
+ pde++;
+ }
+
+ simple_unlock(&map->lock);
+ SPLX(spl);
+}
+
+/*
+ * Insert the given physical page (p) at
+ * the specified virtual address (v) in the
+ * target physical map with the protection requested.
+ *
+ * If specified, the page will be wired down, meaning
+ * that the related pte can not be reclaimed.
+ *
+ * NB: This is the only routine which MAY NOT lazy-evaluate
+ * or lose information. That is, this routine must actually
+ * insert this page into the given map NOW.
+ */
+void pmap_enter(pmap, v, pa, prot, wired)
+ register pmap_t pmap;
+ vm_offset_t v;
+ register vm_offset_t pa;
+ vm_prot_t prot;
+ boolean_t wired;
+{
+ register pt_entry_t *pte;
+ register pv_entry_t pv_h;
+ register int i, pai;
+ pv_entry_t pv_e;
+ pt_entry_t template;
+ int spl;
+ vm_offset_t old_pa;
+
+ assert(pa != vm_page_fictitious_addr);
+if (pmap_debug) printf("pmap(%x, %x)\n", v, pa);
+ if (pmap == PMAP_NULL)
+ return;
+
+ if (pmap == kernel_pmap && (prot & VM_PROT_WRITE) == 0
+ && !wired /* hack for io_wire */ ) {
+ /*
+ * Because the 386 ignores write protection in kernel mode,
+ * we cannot enter a read-only kernel mapping, and must
+ * remove an existing mapping if changing it.
+ *
+ * XXX should be #if'd for i386
+ */
+ PMAP_READ_LOCK(pmap, spl);
+
+ pte = pmap_pte(pmap, v);
+ if (pte != PT_ENTRY_NULL && *pte != 0) {
+ /*
+ * Invalidate the translation buffer,
+ * then remove the mapping.
+ */
+ PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
+ pmap_remove_range(pmap, v, pte,
+ pte + ptes_per_vm_page);
+ }
+ PMAP_READ_UNLOCK(pmap, spl);
+ return;
+ }
+
+ /*
+ * Must allocate a new pvlist entry while we're unlocked;
+ * zalloc may cause pageout (which will lock the pmap system).
+ * If we determine we need a pvlist entry, we will unlock
+ * and allocate one. Then we will retry, throughing away
+ * the allocated entry later (if we no longer need it).
+ */
+ pv_e = PV_ENTRY_NULL;
+Retry:
+ PMAP_READ_LOCK(pmap, spl);
+
+ /*
+ * Expand pmap to include this pte. Assume that
+ * pmap is always expanded to include enough hardware
+ * pages to map one VM page.
+ */
+
+ while ((pte = pmap_pte(pmap, v)) == PT_ENTRY_NULL) {
+ /*
+ * Need to allocate a new page-table page.
+ */
+ vm_offset_t ptp;
+ pt_entry_t *pdp;
+ int i;
+
+ if (pmap == kernel_pmap) {
+ /*
+ * Would have to enter the new page-table page in
+ * EVERY pmap.
+ */
+ panic("pmap_expand kernel pmap to %#x", v);
+ }
+
+ /*
+ * Unlock the pmap and allocate a new page-table page.
+ */
+ PMAP_READ_UNLOCK(pmap, spl);
+
+ ptp = pmap_page_table_page_alloc();
+
+ /*
+ * Re-lock the pmap and check that another thread has
+ * not already allocated the page-table page. If it
+ * has, discard the new page-table page (and try
+ * again to make sure).
+ */
+ PMAP_READ_LOCK(pmap, spl);
+
+ if (pmap_pte(pmap, v) != PT_ENTRY_NULL) {
+ /*
+ * Oops...
+ */
+ PMAP_READ_UNLOCK(pmap, spl);
+ pmap_page_table_page_dealloc(ptp);
+ PMAP_READ_LOCK(pmap, spl);
+ continue;
+ }
+
+ /*
+ * Enter the new page table page in the page directory.
+ */
+ i = ptes_per_vm_page;
+ /*XX pdp = &pmap->dirbase[pdenum(v) & ~(i-1)];*/
+ pdp = pmap_pde(pmap, v);
+ do {
+ *pdp = pa_to_pte(ptp) | INTEL_PTE_VALID
+ | INTEL_PTE_USER
+ | INTEL_PTE_WRITE;
+ pdp++;
+ ptp += INTEL_PGBYTES;
+ } while (--i > 0);
+#if i860
+ /*
+ * Flush the data cache.
+ */
+ flush();
+#endif /* i860 */
+
+ /*
+ * Now, get the address of the page-table entry.
+ */
+ continue;
+ }
+
+ /*
+ * Special case if the physical page is already mapped
+ * at this address.
+ */
+ old_pa = pte_to_pa(*pte);
+ if (*pte && old_pa == pa) {
+ /*
+ * May be changing its wired attribute or protection
+ */
+
+ if (wired && !(*pte & INTEL_PTE_WIRED))
+ pmap->stats.wired_count++;
+ else if (!wired && (*pte & INTEL_PTE_WIRED))
+ pmap->stats.wired_count--;
+
+ template = pa_to_pte(pa) | INTEL_PTE_VALID;
+ if (pmap != kernel_pmap)
+ template |= INTEL_PTE_USER;
+ if (prot & VM_PROT_WRITE)
+ template |= INTEL_PTE_WRITE;
+ if (wired)
+ template |= INTEL_PTE_WIRED;
+ PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
+ i = ptes_per_vm_page;
+ do {
+ if (*pte & INTEL_PTE_MOD)
+ template |= INTEL_PTE_MOD;
+ WRITE_PTE(pte, template)
+ pte++;
+ pte_increment_pa(template);
+ } while (--i > 0);
+ }
+ else {
+
+ /*
+ * Remove old mapping from the PV list if necessary.
+ */
+ if (*pte) {
+ /*
+ * Invalidate the translation buffer,
+ * then remove the mapping.
+ */
+ PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
+
+ /*
+ * Don't free the pte page if removing last
+ * mapping - we will immediately replace it.
+ */
+ pmap_remove_range(pmap, v, pte,
+ pte + ptes_per_vm_page);
+ }
+
+ if (valid_page(pa)) {
+
+ /*
+ * Enter the mapping in the PV list for this
+ * physical page.
+ */
+
+ pai = pa_index(pa);
+ LOCK_PVH(pai);
+ pv_h = pai_to_pvh(pai);
+
+ if (pv_h->pmap == PMAP_NULL) {
+ /*
+ * No mappings yet
+ */
+ pv_h->va = v;
+ pv_h->pmap = pmap;
+ pv_h->next = PV_ENTRY_NULL;
+ }
+ else {
+#if DEBUG
+ {
+ /* check that this mapping is not already there */
+ pv_entry_t e = pv_h;
+ while (e != PV_ENTRY_NULL) {
+ if (e->pmap == pmap && e->va == v)
+ panic("pmap_enter: already in pv_list");
+ e = e->next;
+ }
+ }
+#endif DEBUG
+
+ /*
+ * Add new pv_entry after header.
+ */
+ if (pv_e == PV_ENTRY_NULL) {
+ PV_ALLOC(pv_e);
+ if (pv_e == PV_ENTRY_NULL) {
+ UNLOCK_PVH(pai);
+ PMAP_READ_UNLOCK(pmap, spl);
+
+ /*
+ * Refill from zone.
+ */
+ pv_e = (pv_entry_t) zalloc(pv_list_zone);
+ goto Retry;
+ }
+ }
+ pv_e->va = v;
+ pv_e->pmap = pmap;
+ pv_e->next = pv_h->next;
+ pv_h->next = pv_e;
+ /*
+ * Remember that we used the pvlist entry.
+ */
+ pv_e = PV_ENTRY_NULL;
+ }
+ UNLOCK_PVH(pai);
+ }
+
+ /*
+ * And count the mapping.
+ */
+
+ pmap->stats.resident_count++;
+ if (wired)
+ pmap->stats.wired_count++;
+
+ /*
+ * Build a template to speed up entering -
+ * only the pfn changes.
+ */
+ template = pa_to_pte(pa) | INTEL_PTE_VALID;
+ if (pmap != kernel_pmap)
+ template |= INTEL_PTE_USER;
+ if (prot & VM_PROT_WRITE)
+ template |= INTEL_PTE_WRITE;
+ if (wired)
+ template |= INTEL_PTE_WIRED;
+ i = ptes_per_vm_page;
+ do {
+ WRITE_PTE(pte, template)
+ pte++;
+ pte_increment_pa(template);
+ } while (--i > 0);
+ }
+
+ if (pv_e != PV_ENTRY_NULL) {
+ PV_FREE(pv_e);
+ }
+
+ PMAP_READ_UNLOCK(pmap, spl);
+}
+
+/*
+ * Routine: pmap_change_wiring
+ * Function: Change the wiring attribute for a map/virtual-address
+ * pair.
+ * In/out conditions:
+ * The mapping must already exist in the pmap.
+ */
+void pmap_change_wiring(map, v, wired)
+ register pmap_t map;
+ vm_offset_t v;
+ boolean_t wired;
+{
+ register pt_entry_t *pte;
+ register int i;
+ int spl;
+
+ /*
+ * We must grab the pmap system lock because we may
+ * change a pte_page queue.
+ */
+ PMAP_READ_LOCK(map, spl);
+
+ if ((pte = pmap_pte(map, v)) == PT_ENTRY_NULL)
+ panic("pmap_change_wiring: pte missing");
+
+ if (wired && !(*pte & INTEL_PTE_WIRED)) {
+ /*
+ * wiring down mapping
+ */
+ map->stats.wired_count++;
+ i = ptes_per_vm_page;
+ do {
+ *pte++ |= INTEL_PTE_WIRED;
+ } while (--i > 0);
+ }
+ else if (!wired && (*pte & INTEL_PTE_WIRED)) {
+ /*
+ * unwiring mapping
+ */
+ map->stats.wired_count--;
+ i = ptes_per_vm_page;
+ do {
+ *pte &= ~INTEL_PTE_WIRED;
+ } while (--i > 0);
+ }
+
+ PMAP_READ_UNLOCK(map, spl);
+}
+
+/*
+ * Routine: pmap_extract
+ * Function:
+ * Extract the physical page address associated
+ * with the given map/virtual_address pair.
+ */
+
+vm_offset_t pmap_extract(pmap, va)
+ register pmap_t pmap;
+ vm_offset_t va;
+{
+ register pt_entry_t *pte;
+ register vm_offset_t pa;
+ int spl;
+
+ SPLVM(spl);
+ simple_lock(&pmap->lock);
+ if ((pte = pmap_pte(pmap, va)) == PT_ENTRY_NULL)
+ pa = (vm_offset_t) 0;
+ else if (!(*pte & INTEL_PTE_VALID))
+ pa = (vm_offset_t) 0;
+ else
+ pa = pte_to_pa(*pte) + (va & INTEL_OFFMASK);
+ simple_unlock(&pmap->lock);
+ SPLX(spl);
+ return(pa);
+}
+
+/*
+ * Copy the range specified by src_addr/len
+ * from the source map to the range dst_addr/len
+ * in the destination map.
+ *
+ * This routine is only advisory and need not do anything.
+ */
+#if 0
+void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
+ pmap_t dst_pmap;
+ pmap_t src_pmap;
+ vm_offset_t dst_addr;
+ vm_size_t len;
+ vm_offset_t src_addr;
+{
+#ifdef lint
+ dst_pmap++; src_pmap++; dst_addr++; len++; src_addr++;
+#endif lint
+}
+#endif 0
+
+/*
+ * Routine: pmap_collect
+ * Function:
+ * Garbage collects the physical map system for
+ * pages which are no longer used.
+ * Success need not be guaranteed -- that is, there
+ * may well be pages which are not referenced, but
+ * others may be collected.
+ * Usage:
+ * Called by the pageout daemon when pages are scarce.
+ */
+void pmap_collect(p)
+ pmap_t p;
+{
+ register pt_entry_t *pdp, *ptp;
+ pt_entry_t *eptp;
+ vm_offset_t pa;
+ int spl, wired;
+
+ if (p == PMAP_NULL)
+ return;
+
+ if (p == kernel_pmap)
+ return;
+
+ /*
+ * Garbage collect map.
+ */
+ PMAP_READ_LOCK(p, spl);
+ PMAP_UPDATE_TLBS(p, VM_MIN_ADDRESS, VM_MAX_ADDRESS);
+
+ for (pdp = p->dirbase;
+ pdp < &p->dirbase[lin2pdenum(LINEAR_MIN_KERNEL_ADDRESS)];
+ pdp += ptes_per_vm_page)
+ {
+ if (*pdp & INTEL_PTE_VALID) {
+
+ pa = pte_to_pa(*pdp);
+ ptp = (pt_entry_t *)phystokv(pa);
+ eptp = ptp + NPTES*ptes_per_vm_page;
+
+ /*
+ * If the pte page has any wired mappings, we cannot
+ * free it.
+ */
+ wired = 0;
+ {
+ register pt_entry_t *ptep;
+ for (ptep = ptp; ptep < eptp; ptep++) {
+ if (*ptep & INTEL_PTE_WIRED) {
+ wired = 1;
+ break;
+ }
+ }
+ }
+ if (!wired) {
+ /*
+ * Remove the virtual addresses mapped by this pte page.
+ */
+ { /*XXX big hack*/
+ vm_offset_t va = pdenum2lin(pdp - p->dirbase);
+ if (p == kernel_pmap)
+ va = lintokv(va);
+ pmap_remove_range(p,
+ va,
+ ptp,
+ eptp);
+ }
+
+ /*
+ * Invalidate the page directory pointer.
+ */
+ {
+ register int i = ptes_per_vm_page;
+ register pt_entry_t *pdep = pdp;
+ do {
+ *pdep++ = 0;
+ } while (--i > 0);
+ }
+
+ PMAP_READ_UNLOCK(p, spl);
+
+ /*
+ * And free the pte page itself.
+ */
+ {
+ register vm_page_t m;
+
+ vm_object_lock(pmap_object);
+ m = vm_page_lookup(pmap_object, pa);
+ if (m == VM_PAGE_NULL)
+ panic("pmap_collect: pte page not in object");
+ vm_page_lock_queues();
+ vm_page_free(m);
+ inuse_ptepages_count--;
+ vm_page_unlock_queues();
+ vm_object_unlock(pmap_object);
+ }
+
+ PMAP_READ_LOCK(p, spl);
+ }
+ }
+ }
+ PMAP_READ_UNLOCK(p, spl);
+ return;
+
+}
+
+/*
+ * Routine: pmap_activate
+ * Function:
+ * Binds the given physical map to the given
+ * processor, and returns a hardware map description.
+ */
+#if 0
+void pmap_activate(my_pmap, th, my_cpu)
+ register pmap_t my_pmap;
+ thread_t th;
+ int my_cpu;
+{
+ PMAP_ACTIVATE(my_pmap, th, my_cpu);
+}
+#endif 0
+
+/*
+ * Routine: pmap_deactivate
+ * Function:
+ * Indicates that the given physical map is no longer
+ * in use on the specified processor. (This is a macro
+ * in pmap.h)
+ */
+#if 0
+void pmap_deactivate(pmap, th, which_cpu)
+ pmap_t pmap;
+ thread_t th;
+ int which_cpu;
+{
+#ifdef lint
+ pmap++; th++; which_cpu++;
+#endif lint
+ PMAP_DEACTIVATE(pmap, th, which_cpu);
+}
+#endif 0
+
+/*
+ * Routine: pmap_kernel
+ * Function:
+ * Returns the physical map handle for the kernel.
+ */
+#if 0
+pmap_t pmap_kernel()
+{
+ return (kernel_pmap);
+}
+#endif 0
+
+/*
+ * pmap_zero_page zeros the specified (machine independent) page.
+ * See machine/phys.c or machine/phys.s for implementation.
+ */
+#if 0
+pmap_zero_page(phys)
+ register vm_offset_t phys;
+{
+ register int i;
+
+ assert(phys != vm_page_fictitious_addr);
+ i = PAGE_SIZE / INTEL_PGBYTES;
+ phys = intel_pfn(phys);
+
+ while (i--)
+ zero_phys(phys++);
+}
+#endif 0
+
+/*
+ * pmap_copy_page copies the specified (machine independent) page.
+ * See machine/phys.c or machine/phys.s for implementation.
+ */
+#if 0
+pmap_copy_page(src, dst)
+ vm_offset_t src, dst;
+{
+ int i;
+
+ assert(src != vm_page_fictitious_addr);
+ assert(dst != vm_page_fictitious_addr);
+ i = PAGE_SIZE / INTEL_PGBYTES;
+
+ while (i--) {
+ copy_phys(intel_pfn(src), intel_pfn(dst));
+ src += INTEL_PGBYTES;
+ dst += INTEL_PGBYTES;
+ }
+}
+#endif 0
+
+/*
+ * Routine: pmap_pageable
+ * Function:
+ * Make the specified pages (by pmap, offset)
+ * pageable (or not) as requested.
+ *
+ * A page which is not pageable may not take
+ * a fault; therefore, its page table entry
+ * must remain valid for the duration.
+ *
+ * This routine is merely advisory; pmap_enter
+ * will specify that these pages are to be wired
+ * down (or not) as appropriate.
+ */
+pmap_pageable(pmap, start, end, pageable)
+ pmap_t pmap;
+ vm_offset_t start;
+ vm_offset_t end;
+ boolean_t pageable;
+{
+#ifdef lint
+ pmap++; start++; end++; pageable++;
+#endif lint
+}
+
+/*
+ * Clear specified attribute bits.
+ */
+void
+phys_attribute_clear(phys, bits)
+ vm_offset_t phys;
+ int bits;
+{
+ pv_entry_t pv_h;
+ register pv_entry_t pv_e;
+ register pt_entry_t *pte;
+ int pai;
+ register pmap_t pmap;
+ int spl;
+
+ assert(phys != vm_page_fictitious_addr);
+ if (!valid_page(phys)) {
+ /*
+ * Not a managed page.
+ */
+ return;
+ }
+
+ /*
+ * Lock the pmap system first, since we will be changing
+ * several pmaps.
+ */
+
+ PMAP_WRITE_LOCK(spl);
+
+ pai = pa_index(phys);
+ pv_h = pai_to_pvh(pai);
+
+ /*
+ * Walk down PV list, clearing all modify or reference bits.
+ * We do not have to lock the pv_list because we have
+ * the entire pmap system locked.
+ */
+ if (pv_h->pmap != PMAP_NULL) {
+ /*
+ * There are some mappings.
+ */
+ for (pv_e = pv_h; pv_e != PV_ENTRY_NULL; pv_e = pv_e->next) {
+
+ pmap = pv_e->pmap;
+ /*
+ * Lock the pmap to block pmap_extract and similar routines.
+ */
+ simple_lock(&pmap->lock);
+
+ {
+ register vm_offset_t va;
+
+ va = pv_e->va;
+ pte = pmap_pte(pmap, va);
+
+#if 0
+ /*
+ * Consistency checks.
+ */
+ assert(*pte & INTEL_PTE_VALID);
+ /* assert(pte_to_phys(*pte) == phys); */
+#endif
+
+ /*
+ * Invalidate TLBs for all CPUs using this mapping.
+ */
+ PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
+ }
+
+ /*
+ * Clear modify or reference bits.
+ */
+ {
+ register int i = ptes_per_vm_page;
+ do {
+ *pte &= ~bits;
+ } while (--i > 0);
+ }
+ simple_unlock(&pmap->lock);
+ }
+ }
+
+ pmap_phys_attributes[pai] &= ~bits;
+
+ PMAP_WRITE_UNLOCK(spl);
+}
+
+/*
+ * Check specified attribute bits.
+ */
+boolean_t
+phys_attribute_test(phys, bits)
+ vm_offset_t phys;
+ int bits;
+{
+ pv_entry_t pv_h;
+ register pv_entry_t pv_e;
+ register pt_entry_t *pte;
+ int pai;
+ register pmap_t pmap;
+ int spl;
+
+ assert(phys != vm_page_fictitious_addr);
+ if (!valid_page(phys)) {
+ /*
+ * Not a managed page.
+ */
+ return (FALSE);
+ }
+
+ /*
+ * Lock the pmap system first, since we will be checking
+ * several pmaps.
+ */
+
+ PMAP_WRITE_LOCK(spl);
+
+ pai = pa_index(phys);
+ pv_h = pai_to_pvh(pai);
+
+ if (pmap_phys_attributes[pai] & bits) {
+ PMAP_WRITE_UNLOCK(spl);
+ return (TRUE);
+ }
+
+ /*
+ * Walk down PV list, checking all mappings.
+ * We do not have to lock the pv_list because we have
+ * the entire pmap system locked.
+ */
+ if (pv_h->pmap != PMAP_NULL) {
+ /*
+ * There are some mappings.
+ */
+ for (pv_e = pv_h; pv_e != PV_ENTRY_NULL; pv_e = pv_e->next) {
+
+ pmap = pv_e->pmap;
+ /*
+ * Lock the pmap to block pmap_extract and similar routines.
+ */
+ simple_lock(&pmap->lock);
+
+ {
+ register vm_offset_t va;
+
+ va = pv_e->va;
+ pte = pmap_pte(pmap, va);
+
+#if 0
+ /*
+ * Consistency checks.
+ */
+ assert(*pte & INTEL_PTE_VALID);
+ /* assert(pte_to_phys(*pte) == phys); */
+#endif
+ }
+
+ /*
+ * Check modify or reference bits.
+ */
+ {
+ register int i = ptes_per_vm_page;
+
+ do {
+ if (*pte & bits) {
+ simple_unlock(&pmap->lock);
+ PMAP_WRITE_UNLOCK(spl);
+ return (TRUE);
+ }
+ } while (--i > 0);
+ }
+ simple_unlock(&pmap->lock);
+ }
+ }
+ PMAP_WRITE_UNLOCK(spl);
+ return (FALSE);
+}
+
+/*
+ * Clear the modify bits on the specified physical page.
+ */
+
+void pmap_clear_modify(phys)
+ register vm_offset_t phys;
+{
+ phys_attribute_clear(phys, PHYS_MODIFIED);
+}
+
+/*
+ * pmap_is_modified:
+ *
+ * Return whether or not the specified physical page is modified
+ * by any physical maps.
+ */
+
+boolean_t pmap_is_modified(phys)
+ register vm_offset_t phys;
+{
+ return (phys_attribute_test(phys, PHYS_MODIFIED));
+}
+
+/*
+ * pmap_clear_reference:
+ *
+ * Clear the reference bit on the specified physical page.
+ */
+
+void pmap_clear_reference(phys)
+ vm_offset_t phys;
+{
+ phys_attribute_clear(phys, PHYS_REFERENCED);
+}
+
+/*
+ * pmap_is_referenced:
+ *
+ * Return whether or not the specified physical page is referenced
+ * by any physical maps.
+ */
+
+boolean_t pmap_is_referenced(phys)
+ vm_offset_t phys;
+{
+ return (phys_attribute_test(phys, PHYS_REFERENCED));
+}
+
+#if NCPUS > 1
+/*
+* TLB Coherence Code (TLB "shootdown" code)
+*
+* Threads that belong to the same task share the same address space and
+* hence share a pmap. However, they may run on distinct cpus and thus
+* have distinct TLBs that cache page table entries. In order to guarantee
+* the TLBs are consistent, whenever a pmap is changed, all threads that
+* are active in that pmap must have their TLB updated. To keep track of
+* this information, the set of cpus that are currently using a pmap is
+* maintained within each pmap structure (cpus_using). Pmap_activate() and
+* pmap_deactivate add and remove, respectively, a cpu from this set.
+* Since the TLBs are not addressable over the bus, each processor must
+* flush its own TLB; a processor that needs to invalidate another TLB
+* needs to interrupt the processor that owns that TLB to signal the
+* update.
+*
+* Whenever a pmap is updated, the lock on that pmap is locked, and all
+* cpus using the pmap are signaled to invalidate. All threads that need
+* to activate a pmap must wait for the lock to clear to await any updates
+* in progress before using the pmap. They must ACQUIRE the lock to add
+* their cpu to the cpus_using set. An implicit assumption made
+* throughout the TLB code is that all kernel code that runs at or higher
+* than splvm blocks out update interrupts, and that such code does not
+* touch pageable pages.
+*
+* A shootdown interrupt serves another function besides signaling a
+* processor to invalidate. The interrupt routine (pmap_update_interrupt)
+* waits for the both the pmap lock (and the kernel pmap lock) to clear,
+* preventing user code from making implicit pmap updates while the
+* sending processor is performing its update. (This could happen via a
+* user data write reference that turns on the modify bit in the page
+* table). It must wait for any kernel updates that may have started
+* concurrently with a user pmap update because the IPC code
+* changes mappings.
+* Spinning on the VALUES of the locks is sufficient (rather than
+* having to acquire the locks) because any updates that occur subsequent
+* to finding the lock unlocked will be signaled via another interrupt.
+* (This assumes the interrupt is cleared before the low level interrupt code
+* calls pmap_update_interrupt()).
+*
+* The signaling processor must wait for any implicit updates in progress
+* to terminate before continuing with its update. Thus it must wait for an
+* acknowledgement of the interrupt from each processor for which such
+* references could be made. For maintaining this information, a set
+* cpus_active is used. A cpu is in this set if and only if it can
+* use a pmap. When pmap_update_interrupt() is entered, a cpu is removed from
+* this set; when all such cpus are removed, it is safe to update.
+*
+* Before attempting to acquire the update lock on a pmap, a cpu (A) must
+* be at least at the priority of the interprocessor interrupt
+* (splip<=splvm). Otherwise, A could grab a lock and be interrupted by a
+* kernel update; it would spin forever in pmap_update_interrupt() trying
+* to acquire the user pmap lock it had already acquired. Furthermore A
+* must remove itself from cpus_active. Otherwise, another cpu holding
+* the lock (B) could be in the process of sending an update signal to A,
+* and thus be waiting for A to remove itself from cpus_active. If A is
+* spinning on the lock at priority this will never happen and a deadlock
+* will result.
+*/
+
+/*
+ * Signal another CPU that it must flush its TLB
+ */
+void signal_cpus(use_list, pmap, start, end)
+ cpu_set use_list;
+ pmap_t pmap;
+ vm_offset_t start, end;
+{
+ register int which_cpu, j;
+ register pmap_update_list_t update_list_p;
+
+ while ((which_cpu = ffs(use_list)) != 0) {
+ which_cpu -= 1; /* convert to 0 origin */
+
+ update_list_p = &cpu_update_list[which_cpu];
+ simple_lock(&update_list_p->lock);
+
+ j = update_list_p->count;
+ if (j >= UPDATE_LIST_SIZE) {
+ /*
+ * list overflowed. Change last item to
+ * indicate overflow.
+ */
+ update_list_p->item[UPDATE_LIST_SIZE-1].pmap = kernel_pmap;
+ update_list_p->item[UPDATE_LIST_SIZE-1].start = VM_MIN_ADDRESS;
+ update_list_p->item[UPDATE_LIST_SIZE-1].end = VM_MAX_KERNEL_ADDRESS;
+ }
+ else {
+ update_list_p->item[j].pmap = pmap;
+ update_list_p->item[j].start = start;
+ update_list_p->item[j].end = end;
+ update_list_p->count = j+1;
+ }
+ cpu_update_needed[which_cpu] = TRUE;
+ simple_unlock(&update_list_p->lock);
+
+ if ((cpus_idle & (1 << which_cpu)) == 0)
+ interrupt_processor(which_cpu);
+ use_list &= ~(1 << which_cpu);
+ }
+}
+
+void process_pmap_updates(my_pmap)
+ register pmap_t my_pmap;
+{
+ register int my_cpu = cpu_number();
+ register pmap_update_list_t update_list_p;
+ register int j;
+ register pmap_t pmap;
+
+ update_list_p = &cpu_update_list[my_cpu];
+ simple_lock(&update_list_p->lock);
+
+ for (j = 0; j < update_list_p->count; j++) {
+ pmap = update_list_p->item[j].pmap;
+ if (pmap == my_pmap ||
+ pmap == kernel_pmap) {
+
+ INVALIDATE_TLB(update_list_p->item[j].start,
+ update_list_p->item[j].end);
+ }
+ }
+ update_list_p->count = 0;
+ cpu_update_needed[my_cpu] = FALSE;
+ simple_unlock(&update_list_p->lock);
+}
+
+/*
+ * Interrupt routine for TBIA requested from other processor.
+ */
+void pmap_update_interrupt()
+{
+ register int my_cpu;
+ register pmap_t my_pmap;
+ int s;
+
+ my_cpu = cpu_number();
+
+ /*
+ * Exit now if we're idle. We'll pick up the update request
+ * when we go active, and we must not put ourselves back in
+ * the active set because we'll never process the interrupt
+ * while we're idle (thus hanging the system).
+ */
+ if (cpus_idle & (1 << my_cpu))
+ return;
+
+ if (current_thread() == THREAD_NULL)
+ my_pmap = kernel_pmap;
+ else {
+ my_pmap = current_pmap();
+ if (!pmap_in_use(my_pmap, my_cpu))
+ my_pmap = kernel_pmap;
+ }
+
+ /*
+ * Raise spl to splvm (above splip) to block out pmap_extract
+ * from IO code (which would put this cpu back in the active
+ * set).
+ */
+ s = splvm();
+
+ do {
+
+ /*
+ * Indicate that we're not using either user or kernel
+ * pmap.
+ */
+ i_bit_clear(my_cpu, &cpus_active);
+
+ /*
+ * Wait for any pmap updates in progress, on either user
+ * or kernel pmap.
+ */
+ while (*(volatile int *)&my_pmap->lock.lock_data ||
+ *(volatile int *)&kernel_pmap->lock.lock_data)
+ continue;
+
+ process_pmap_updates(my_pmap);
+
+ i_bit_set(my_cpu, &cpus_active);
+
+ } while (cpu_update_needed[my_cpu]);
+
+ splx(s);
+}
+#else NCPUS > 1
+/*
+ * Dummy routine to satisfy external reference.
+ */
+void pmap_update_interrupt()
+{
+ /* should never be called. */
+}
+#endif NCPUS > 1
+
+#if i860 /* akp */
+void set_dirbase(dirbase)
+ register vm_offset_t dirbase;
+{
+ /*flush();*/
+ /*flush_tlb();*/
+ flush_and_ctxsw(dirbase);
+}
+#endif i860
+
+#ifdef i386
+/* Unmap page 0 to trap NULL references. */
+void
+pmap_unmap_page_zero ()
+{
+ int *pte;
+
+ pte = (int *) pmap_pte (kernel_pmap, 0);
+ assert (pte);
+ *pte = 0;
+ asm volatile ("movl %%cr3,%%eax; movl %%eax,%%cr3" ::: "ax");
+}
+#endif /* i386 */
diff --git a/i386/intel/pmap.h b/i386/intel/pmap.h
new file mode 100644
index 0000000..6e759a6
--- /dev/null
+++ b/i386/intel/pmap.h
@@ -0,0 +1,401 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: pmap.h
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ * Date: 1985
+ *
+ * Machine-dependent structures for the physical map module.
+ */
+
+#ifndef _PMAP_MACHINE_
+#define _PMAP_MACHINE_ 1
+
+#ifndef ASSEMBLER
+
+#include <kern/zalloc.h>
+#include <kern/lock.h>
+#include <mach/machine/vm_param.h>
+#include <mach/vm_statistics.h>
+#include <mach/kern_return.h>
+
+/*
+ * Define the generic in terms of the specific
+ */
+
+#if i386
+#define INTEL_PGBYTES I386_PGBYTES
+#define INTEL_PGSHIFT I386_PGSHIFT
+#define intel_btop(x) i386_btop(x)
+#define intel_ptob(x) i386_ptob(x)
+#define intel_round_page(x) i386_round_page(x)
+#define intel_trunc_page(x) i386_trunc_page(x)
+#define trunc_intel_to_vm(x) trunc_i386_to_vm(x)
+#define round_intel_to_vm(x) round_i386_to_vm(x)
+#define vm_to_intel(x) vm_to_i386(x)
+#endif i386
+#if i860
+#define INTEL_PGBYTES I860_PGBYTES
+#define INTEL_PGSHIFT I860_PGSHIFT
+#define intel_btop(x) i860_btop(x)
+#define intel_ptob(x) i860_ptob(x)
+#define intel_round_page(x) i860_round_page(x)
+#define intel_trunc_page(x) i860_trunc_page(x)
+#define trunc_intel_to_vm(x) trunc_i860_to_vm(x)
+#define round_intel_to_vm(x) round_i860_to_vm(x)
+#define vm_to_intel(x) vm_to_i860(x)
+#endif i860
+
+/*
+ * i386/i486/i860 Page Table Entry
+ */
+
+typedef unsigned int pt_entry_t;
+#define PT_ENTRY_NULL ((pt_entry_t *) 0)
+
+#endif ASSEMBLER
+
+#define INTEL_OFFMASK 0xfff /* offset within page */
+#define PDESHIFT 22 /* page descriptor shift */
+#define PDEMASK 0x3ff /* mask for page descriptor index */
+#define PTESHIFT 12 /* page table shift */
+#define PTEMASK 0x3ff /* mask for page table index */
+
+/*
+ * Convert linear offset to page descriptor index
+ */
+#define lin2pdenum(a) (((a) >> PDESHIFT) & PDEMASK)
+
+/*
+ * Convert page descriptor index to linear address
+ */
+#define pdenum2lin(a) ((vm_offset_t)(a) << PDESHIFT)
+
+/*
+ * Convert linear offset to page table index
+ */
+#define ptenum(a) (((a) >> PTESHIFT) & PTEMASK)
+
+#define NPTES (intel_ptob(1)/sizeof(pt_entry_t))
+#define NPDES (intel_ptob(1)/sizeof(pt_entry_t))
+
+/*
+ * Hardware pte bit definitions (to be used directly on the ptes
+ * without using the bit fields).
+ */
+
+#if i860
+#define INTEL_PTE_valid 0x00000001
+#else
+#define INTEL_PTE_VALID 0x00000001
+#endif
+#define INTEL_PTE_WRITE 0x00000002
+#define INTEL_PTE_USER 0x00000004
+#define INTEL_PTE_WTHRU 0x00000008
+#define INTEL_PTE_NCACHE 0x00000010
+#define INTEL_PTE_REF 0x00000020
+#define INTEL_PTE_MOD 0x00000040
+#define INTEL_PTE_WIRED 0x00000200
+#define INTEL_PTE_PFN 0xfffff000
+
+#if i860
+#if NOCACHE
+#define INTEL_PTE_VALID (INTEL_PTE_valid \
+ |INTEL_PTE_WTHRU \
+ |INTEL_PTE_NCACHE \
+ |INTEL_PTE_REF \
+ |INTEL_PTE_MOD \
+ )
+#else NOCACHE
+#define INTEL_PTE_VALID (INTEL_PTE_valid \
+ |INTEL_PTE_REF \
+ |INTEL_PTE_MOD \
+ )
+#endif NOCACHE
+#endif i860
+
+#define pa_to_pte(a) ((a) & INTEL_PTE_PFN)
+#define pte_to_pa(p) ((p) & INTEL_PTE_PFN)
+#define pte_increment_pa(p) ((p) += INTEL_OFFMASK+1)
+
+/*
+ * Convert page table entry to kernel virtual address
+ */
+#define ptetokv(a) (phystokv(pte_to_pa(a)))
+
+#ifndef ASSEMBLER
+typedef volatile long cpu_set; /* set of CPUs - must be <= 32 */
+ /* changed by other processors */
+
+struct pmap {
+ pt_entry_t *dirbase; /* page directory pointer register */
+ int ref_count; /* reference count */
+ decl_simple_lock_data(,lock)
+ /* lock on map */
+ struct pmap_statistics stats; /* map statistics */
+ cpu_set cpus_using; /* bitmap of cpus using pmap */
+};
+
+typedef struct pmap *pmap_t;
+
+#define PMAP_NULL ((pmap_t) 0)
+
+#if i860
+/*#define set_dirbase(dirbase) flush_and_ctxsw(dirbase)*//*akp*/
+#else
+#define set_dirbase(dirbase) set_cr3(dirbase)
+#endif
+
+#if NCPUS > 1
+/*
+ * List of cpus that are actively using mapped memory. Any
+ * pmap update operation must wait for all cpus in this list.
+ * Update operations must still be queued to cpus not in this
+ * list.
+ */
+cpu_set cpus_active;
+
+/*
+ * List of cpus that are idle, but still operating, and will want
+ * to see any kernel pmap updates when they become active.
+ */
+cpu_set cpus_idle;
+
+/*
+ * Quick test for pmap update requests.
+ */
+volatile
+boolean_t cpu_update_needed[NCPUS];
+
+/*
+ * External declarations for PMAP_ACTIVATE.
+ */
+
+void process_pmap_updates();
+void pmap_update_interrupt();
+extern pmap_t kernel_pmap;
+
+#endif NCPUS > 1
+
+/*
+ * Machine dependent routines that are used only for i386/i486/i860.
+ */
+
+pt_entry_t *pmap_pte(pmap_t pmap, vm_offset_t addr);
+
+/*
+ * Macros for speed.
+ */
+
+#if NCPUS > 1
+
+/*
+ * For multiple CPUS, PMAP_ACTIVATE and PMAP_DEACTIVATE must manage
+ * fields to control TLB invalidation on other CPUS.
+ */
+
+#define PMAP_ACTIVATE_KERNEL(my_cpu) { \
+ \
+ /* \
+ * Let pmap updates proceed while we wait for this pmap. \
+ */ \
+ i_bit_clear((my_cpu), &cpus_active); \
+ \
+ /* \
+ * Lock the pmap to put this cpu in its active set. \
+ * Wait for updates here. \
+ */ \
+ simple_lock(&kernel_pmap->lock); \
+ \
+ /* \
+ * Process invalidate requests for the kernel pmap. \
+ */ \
+ if (cpu_update_needed[(my_cpu)]) \
+ process_pmap_updates(kernel_pmap); \
+ \
+ /* \
+ * Mark that this cpu is using the pmap. \
+ */ \
+ i_bit_set((my_cpu), &kernel_pmap->cpus_using); \
+ \
+ /* \
+ * Mark this cpu active - IPL will be lowered by \
+ * load_context(). \
+ */ \
+ i_bit_set((my_cpu), &cpus_active); \
+ \
+ simple_unlock(&kernel_pmap->lock); \
+}
+
+#define PMAP_DEACTIVATE_KERNEL(my_cpu) { \
+ /* \
+ * Mark pmap no longer in use by this cpu even if \
+ * pmap is locked against updates. \
+ */ \
+ i_bit_clear((my_cpu), &kernel_pmap->cpus_using); \
+}
+
+#define PMAP_ACTIVATE_USER(pmap, th, my_cpu) { \
+ register pmap_t tpmap = (pmap); \
+ \
+ if (tpmap == kernel_pmap) { \
+ /* \
+ * If this is the kernel pmap, switch to its page tables. \
+ */ \
+ set_dirbase(kvtophys(tpmap->dirbase)); \
+ } \
+ else { \
+ /* \
+ * Let pmap updates proceed while we wait for this pmap. \
+ */ \
+ i_bit_clear((my_cpu), &cpus_active); \
+ \
+ /* \
+ * Lock the pmap to put this cpu in its active set. \
+ * Wait for updates here. \
+ */ \
+ simple_lock(&tpmap->lock); \
+ \
+ /* \
+ * No need to invalidate the TLB - the entire user pmap \
+ * will be invalidated by reloading dirbase. \
+ */ \
+ set_dirbase(kvtophys(tpmap->dirbase)); \
+ \
+ /* \
+ * Mark that this cpu is using the pmap. \
+ */ \
+ i_bit_set((my_cpu), &tpmap->cpus_using); \
+ \
+ /* \
+ * Mark this cpu active - IPL will be lowered by \
+ * load_context(). \
+ */ \
+ i_bit_set((my_cpu), &cpus_active); \
+ \
+ simple_unlock(&tpmap->lock); \
+ } \
+}
+
+#define PMAP_DEACTIVATE_USER(pmap, thread, my_cpu) { \
+ register pmap_t tpmap = (pmap); \
+ \
+ /* \
+ * Do nothing if this is the kernel pmap. \
+ */ \
+ if (tpmap != kernel_pmap) { \
+ /* \
+ * Mark pmap no longer in use by this cpu even if \
+ * pmap is locked against updates. \
+ */ \
+ i_bit_clear((my_cpu), &(pmap)->cpus_using); \
+ } \
+}
+
+#define MARK_CPU_IDLE(my_cpu) { \
+ /* \
+ * Mark this cpu idle, and remove it from the active set, \
+ * since it is not actively using any pmap. Signal_cpus \
+ * will notice that it is idle, and avoid signaling it, \
+ * but will queue the update request for when the cpu \
+ * becomes active. \
+ */ \
+ int s = splvm(); \
+ i_bit_set((my_cpu), &cpus_idle); \
+ i_bit_clear((my_cpu), &cpus_active); \
+ splx(s); \
+}
+
+#define MARK_CPU_ACTIVE(my_cpu) { \
+ \
+ int s = splvm(); \
+ /* \
+ * If a kernel_pmap update was requested while this cpu \
+ * was idle, process it as if we got the interrupt. \
+ * Before doing so, remove this cpu from the idle set. \
+ * Since we do not grab any pmap locks while we flush \
+ * our TLB, another cpu may start an update operation \
+ * before we finish. Removing this cpu from the idle \
+ * set assures that we will receive another update \
+ * interrupt if this happens. \
+ */ \
+ i_bit_clear((my_cpu), &cpus_idle); \
+ \
+ if (cpu_update_needed[(my_cpu)]) \
+ pmap_update_interrupt(); \
+ \
+ /* \
+ * Mark that this cpu is now active. \
+ */ \
+ i_bit_set((my_cpu), &cpus_active); \
+ splx(s); \
+}
+
+#else NCPUS > 1
+
+/*
+ * With only one CPU, we just have to indicate whether the pmap is
+ * in use.
+ */
+
+#define PMAP_ACTIVATE_KERNEL(my_cpu) { \
+ kernel_pmap->cpus_using = TRUE; \
+}
+
+#define PMAP_DEACTIVATE_KERNEL(my_cpu) { \
+ kernel_pmap->cpus_using = FALSE; \
+}
+
+#define PMAP_ACTIVATE_USER(pmap, th, my_cpu) { \
+ register pmap_t tpmap = (pmap); \
+ \
+ set_dirbase(kvtophys(tpmap->dirbase)); \
+ if (tpmap != kernel_pmap) { \
+ tpmap->cpus_using = TRUE; \
+ } \
+}
+
+#define PMAP_DEACTIVATE_USER(pmap, thread, cpu) { \
+ if ((pmap) != kernel_pmap) \
+ (pmap)->cpus_using = FALSE; \
+}
+
+#endif NCPUS > 1
+
+#define PMAP_CONTEXT(pmap, thread)
+
+#define pmap_kernel() (kernel_pmap)
+#define pmap_resident_count(pmap) ((pmap)->stats.resident_count)
+#define pmap_phys_address(frame) ((vm_offset_t) (intel_ptob(frame)))
+#define pmap_phys_to_frame(phys) ((int) (intel_btop(phys)))
+#define pmap_copy(dst_pmap,src_pmap,dst_addr,len,src_addr)
+#define pmap_attribute(pmap,addr,size,attr,value) \
+ (KERN_INVALID_ADDRESS)
+
+#endif ASSEMBLER
+
+#endif _PMAP_MACHINE_
diff --git a/i386/intel/read_fault.c b/i386/intel/read_fault.c
new file mode 100644
index 0000000..d0c03e8
--- /dev/null
+++ b/i386/intel/read_fault.c
@@ -0,0 +1,178 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+
+#include <vm/vm_fault.h>
+#include <mach/kern_return.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/pmap.h>
+
+#include <kern/macro_help.h>
+
+/*
+ * Expansion of vm_fault for read fault in kernel mode.
+ * Must enter the mapping as writable, since the i386
+ * (and i860 in i386 compatability mode) ignores write
+ * protection in kernel mode.
+ */
+kern_return_t
+intel_read_fault(map, vaddr)
+ vm_map_t map;
+ vm_offset_t vaddr;
+{
+ vm_map_version_t version; /* Map version for
+ verification */
+ vm_object_t object; /* Top-level object */
+ vm_offset_t offset; /* Top-level offset */
+ vm_prot_t prot; /* Protection for mapping */
+ vm_page_t result_page; /* Result of vm_fault_page */
+ vm_page_t top_page; /* Placeholder page */
+ boolean_t wired; /* Is map region wired? */
+ boolean_t su;
+ kern_return_t result;
+ register vm_page_t m;
+
+ RetryFault:
+
+ /*
+ * Find the backing store object and offset into it
+ * to begin search.
+ */
+ result = vm_map_lookup(&map, vaddr, VM_PROT_READ, &version,
+ &object, &offset, &prot, &wired, &su);
+ if (result != KERN_SUCCESS)
+ return (result);
+
+ /*
+ * Make a reference to this object to prevent its
+ * disposal while we are playing with it.
+ */
+ assert(object->ref_count > 0);
+ object->ref_count++;
+ vm_object_paging_begin(object);
+
+ result = vm_fault_page(object, offset, VM_PROT_READ, FALSE, TRUE,
+ &prot, &result_page, &top_page,
+ FALSE, (void (*)()) 0);
+
+ if (result != VM_FAULT_SUCCESS) {
+ vm_object_deallocate(object);
+
+ switch (result) {
+ case VM_FAULT_RETRY:
+ goto RetryFault;
+ case VM_FAULT_INTERRUPTED:
+ return (KERN_SUCCESS);
+ case VM_FAULT_MEMORY_SHORTAGE:
+ VM_PAGE_WAIT((void (*)()) 0);
+ goto RetryFault;
+ case VM_FAULT_FICTITIOUS_SHORTAGE:
+ vm_page_more_fictitious();
+ goto RetryFault;
+ case VM_FAULT_MEMORY_ERROR:
+ return (KERN_MEMORY_ERROR);
+ }
+ }
+
+ m = result_page;
+
+ /*
+ * How to clean up the result of vm_fault_page. This
+ * happens whether the mapping is entered or not.
+ */
+
+#define UNLOCK_AND_DEALLOCATE \
+ MACRO_BEGIN \
+ vm_fault_cleanup(m->object, top_page); \
+ vm_object_deallocate(object); \
+ MACRO_END
+
+ /*
+ * What to do with the resulting page from vm_fault_page
+ * if it doesn't get entered into the physical map:
+ */
+
+#define RELEASE_PAGE(m) \
+ MACRO_BEGIN \
+ PAGE_WAKEUP_DONE(m); \
+ vm_page_lock_queues(); \
+ if (!m->active && !m->inactive) \
+ vm_page_activate(m); \
+ vm_page_unlock_queues(); \
+ MACRO_END
+
+ /*
+ * We must verify that the maps have not changed.
+ */
+ vm_object_unlock(m->object);
+ while (!vm_map_verify(map, &version)) {
+ vm_object_t retry_object;
+ vm_offset_t retry_offset;
+ vm_prot_t retry_prot;
+
+ result = vm_map_lookup(&map, vaddr, VM_PROT_READ, &version,
+ &retry_object, &retry_offset, &retry_prot,
+ &wired, &su);
+ if (result != KERN_SUCCESS) {
+ vm_object_lock(m->object);
+ RELEASE_PAGE(m);
+ UNLOCK_AND_DEALLOCATE;
+ return (result);
+ }
+
+ vm_object_unlock(retry_object);
+
+ if (retry_object != object || retry_offset != offset) {
+ vm_object_lock(m->object);
+ RELEASE_PAGE(m);
+ UNLOCK_AND_DEALLOCATE;
+ goto RetryFault;
+ }
+ }
+
+ /*
+ * Put the page in the physical map.
+ */
+ PMAP_ENTER(map->pmap, vaddr, m, VM_PROT_READ|VM_PROT_WRITE, wired);
+
+ vm_object_lock(m->object);
+ vm_page_lock_queues();
+ if (!m->active && !m->inactive)
+ vm_page_activate(m);
+ m->reference = TRUE;
+ vm_page_unlock_queues();
+
+ vm_map_verify_done(map, &version);
+ PAGE_WAKEUP_DONE(m);
+
+ UNLOCK_AND_DEALLOCATE;
+
+#undef UNLOCK_AND_DEALLOCATE
+#undef RELEASE_PAGE
+
+ return (KERN_SUCCESS);
+}