summaryrefslogtreecommitdiff
path: root/vm/vm_fault.c
diff options
context:
space:
mode:
authorThomas Bushnell <thomas@gnu.org>1997-02-25 21:28:37 +0000
committerThomas Bushnell <thomas@gnu.org>1997-02-25 21:28:37 +0000
commitf07a4c844da9f0ecae5bbee1ab94be56505f26f7 (patch)
tree12b07c7e578fc1a5f53dbfde2632408491ff2a70 /vm/vm_fault.c
Initial source
Diffstat (limited to 'vm/vm_fault.c')
-rw-r--r--vm/vm_fault.c2182
1 files changed, 2182 insertions, 0 deletions
diff --git a/vm/vm_fault.c b/vm/vm_fault.c
new file mode 100644
index 0000000..e45687c
--- /dev/null
+++ b/vm/vm_fault.c
@@ -0,0 +1,2182 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1994,1990,1989,1988,1987 Carnegie Mellon University.
+ * Copyright (c) 1993,1994 The University of Utah and
+ * the Computer Systems Laboratory (CSL).
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF
+ * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY
+ * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF
+ * THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm_fault.c
+ * Author: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Page fault handling module.
+ */
+#include <mach_pagemap.h>
+#include <mach_kdb.h>
+#include <mach_pcsample.h>
+
+
+#include <vm/vm_fault.h>
+#include <mach/kern_return.h>
+#include <mach/message.h> /* for error codes */
+#include <kern/counters.h>
+#include <kern/thread.h>
+#include <kern/sched_prim.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/pmap.h>
+#include <mach/vm_statistics.h>
+#include <vm/vm_pageout.h>
+#include <mach/vm_param.h>
+#include <mach/memory_object.h>
+#include "memory_object_user.h"
+ /* For memory_object_data_{request,unlock} */
+#include <kern/mach_param.h>
+#include <kern/macro_help.h>
+#include <kern/zalloc.h>
+
+#if MACH_PCSAMPLE
+#include <kern/pc_sample.h>
+#endif
+
+
+
+/*
+ * State needed by vm_fault_continue.
+ * This is a little hefty to drop directly
+ * into the thread structure.
+ */
+typedef struct vm_fault_state {
+ struct vm_map *vmf_map;
+ vm_offset_t vmf_vaddr;
+ vm_prot_t vmf_fault_type;
+ boolean_t vmf_change_wiring;
+ void (*vmf_continuation)();
+ vm_map_version_t vmf_version;
+ boolean_t vmf_wired;
+ struct vm_object *vmf_object;
+ vm_offset_t vmf_offset;
+ vm_prot_t vmf_prot;
+
+ boolean_t vmfp_backoff;
+ struct vm_object *vmfp_object;
+ vm_offset_t vmfp_offset;
+ struct vm_page *vmfp_first_m;
+ vm_prot_t vmfp_access;
+} vm_fault_state_t;
+
+zone_t vm_fault_state_zone = 0;
+
+int vm_object_absent_max = 50;
+
+int vm_fault_debug = 0;
+
+boolean_t vm_fault_dirty_handling = FALSE;
+boolean_t vm_fault_interruptible = TRUE;
+
+boolean_t software_reference_bits = TRUE;
+
+#if MACH_KDB
+extern struct db_watchpoint *db_watchpoint_list;
+#endif MACH_KDB
+
+/*
+ * Routine: vm_fault_init
+ * Purpose:
+ * Initialize our private data structures.
+ */
+void vm_fault_init()
+{
+ vm_fault_state_zone = zinit(sizeof(vm_fault_state_t),
+ THREAD_MAX * sizeof(vm_fault_state_t),
+ sizeof(vm_fault_state_t),
+ 0, "vm fault state");
+}
+
+/*
+ * Routine: vm_fault_cleanup
+ * Purpose:
+ * Clean up the result of vm_fault_page.
+ * Results:
+ * The paging reference for "object" is released.
+ * "object" is unlocked.
+ * If "top_page" is not null, "top_page" is
+ * freed and the paging reference for the object
+ * containing it is released.
+ *
+ * In/out conditions:
+ * "object" must be locked.
+ */
+void
+vm_fault_cleanup(object, top_page)
+ register vm_object_t object;
+ register vm_page_t top_page;
+{
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+
+ if (top_page != VM_PAGE_NULL) {
+ object = top_page->object;
+ vm_object_lock(object);
+ VM_PAGE_FREE(top_page);
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+ }
+}
+
+
+#if MACH_PCSAMPLE
+/*
+ * Do PC sampling on current thread, assuming
+ * that it is the thread taking this page fault.
+ *
+ * Must check for THREAD_NULL, since faults
+ * can occur before threads are running.
+ */
+
+#define vm_stat_sample(flavor) \
+ MACRO_BEGIN \
+ thread_t _thread_ = current_thread(); \
+ \
+ if (_thread_ != THREAD_NULL) \
+ take_pc_sample_macro(_thread_, (flavor)); \
+ MACRO_END
+
+#else
+#define vm_stat_sample(x)
+#endif /* MACH_PCSAMPLE */
+
+
+
+/*
+ * Routine: vm_fault_page
+ * Purpose:
+ * Find the resident page for the virtual memory
+ * specified by the given virtual memory object
+ * and offset.
+ * Additional arguments:
+ * The required permissions for the page is given
+ * in "fault_type". Desired permissions are included
+ * in "protection".
+ *
+ * If the desired page is known to be resident (for
+ * example, because it was previously wired down), asserting
+ * the "unwiring" parameter will speed the search.
+ *
+ * If the operation can be interrupted (by thread_abort
+ * or thread_terminate), then the "interruptible"
+ * parameter should be asserted.
+ *
+ * Results:
+ * The page containing the proper data is returned
+ * in "result_page".
+ *
+ * In/out conditions:
+ * The source object must be locked and referenced,
+ * and must donate one paging reference. The reference
+ * is not affected. The paging reference and lock are
+ * consumed.
+ *
+ * If the call succeeds, the object in which "result_page"
+ * resides is left locked and holding a paging reference.
+ * If this is not the original object, a busy page in the
+ * original object is returned in "top_page", to prevent other
+ * callers from pursuing this same data, along with a paging
+ * reference for the original object. The "top_page" should
+ * be destroyed when this guarantee is no longer required.
+ * The "result_page" is also left busy. It is not removed
+ * from the pageout queues.
+ */
+vm_fault_return_t vm_fault_page(first_object, first_offset,
+ fault_type, must_be_resident, interruptible,
+ protection,
+ result_page, top_page,
+ resume, continuation)
+ /* Arguments: */
+ vm_object_t first_object; /* Object to begin search */
+ vm_offset_t first_offset; /* Offset into object */
+ vm_prot_t fault_type; /* What access is requested */
+ boolean_t must_be_resident;/* Must page be resident? */
+ boolean_t interruptible; /* May fault be interrupted? */
+ /* Modifies in place: */
+ vm_prot_t *protection; /* Protection for mapping */
+ /* Returns: */
+ vm_page_t *result_page; /* Page found, if successful */
+ vm_page_t *top_page; /* Page in top object, if
+ * not result_page.
+ */
+ /* More arguments: */
+ boolean_t resume; /* We are restarting. */
+ void (*continuation)(); /* Continuation for blocking. */
+{
+ register
+ vm_page_t m;
+ register
+ vm_object_t object;
+ register
+ vm_offset_t offset;
+ vm_page_t first_m;
+ vm_object_t next_object;
+ vm_object_t copy_object;
+ boolean_t look_for_page;
+ vm_prot_t access_required;
+
+#ifdef CONTINUATIONS
+ if (resume) {
+ register vm_fault_state_t *state =
+ (vm_fault_state_t *) current_thread()->ith_other;
+
+ if (state->vmfp_backoff)
+ goto after_block_and_backoff;
+
+ object = state->vmfp_object;
+ offset = state->vmfp_offset;
+ first_m = state->vmfp_first_m;
+ access_required = state->vmfp_access;
+ goto after_thread_block;
+ }
+#else /* not CONTINUATIONS */
+ assert(continuation == 0);
+ assert(!resume);
+#endif /* not CONTINUATIONS */
+
+ vm_stat_sample(SAMPLED_PC_VM_FAULTS_ANY);
+ vm_stat.faults++; /* needs lock XXX */
+
+/*
+ * Recovery actions
+ */
+#define RELEASE_PAGE(m) \
+ MACRO_BEGIN \
+ PAGE_WAKEUP_DONE(m); \
+ vm_page_lock_queues(); \
+ if (!m->active && !m->inactive) \
+ vm_page_activate(m); \
+ vm_page_unlock_queues(); \
+ MACRO_END
+
+ if (vm_fault_dirty_handling
+#if MACH_KDB
+ /*
+ * If there are watchpoints set, then
+ * we don't want to give away write permission
+ * on a read fault. Make the task write fault,
+ * so that the watchpoint code notices the access.
+ */
+ || db_watchpoint_list
+#endif MACH_KDB
+ ) {
+ /*
+ * If we aren't asking for write permission,
+ * then don't give it away. We're using write
+ * faults to set the dirty bit.
+ */
+ if (!(fault_type & VM_PROT_WRITE))
+ *protection &= ~VM_PROT_WRITE;
+ }
+
+ if (!vm_fault_interruptible)
+ interruptible = FALSE;
+
+ /*
+ * INVARIANTS (through entire routine):
+ *
+ * 1) At all times, we must either have the object
+ * lock or a busy page in some object to prevent
+ * some other thread from trying to bring in
+ * the same page.
+ *
+ * Note that we cannot hold any locks during the
+ * pager access or when waiting for memory, so
+ * we use a busy page then.
+ *
+ * Note also that we aren't as concerned about more than
+ * one thread attempting to memory_object_data_unlock
+ * the same page at once, so we don't hold the page
+ * as busy then, but do record the highest unlock
+ * value so far. [Unlock requests may also be delivered
+ * out of order.]
+ *
+ * 2) To prevent another thread from racing us down the
+ * shadow chain and entering a new page in the top
+ * object before we do, we must keep a busy page in
+ * the top object while following the shadow chain.
+ *
+ * 3) We must increment paging_in_progress on any object
+ * for which we have a busy page, to prevent
+ * vm_object_collapse from removing the busy page
+ * without our noticing.
+ *
+ * 4) We leave busy pages on the pageout queues.
+ * If the pageout daemon comes across a busy page,
+ * it will remove the page from the pageout queues.
+ */
+
+ /*
+ * Search for the page at object/offset.
+ */
+
+ object = first_object;
+ offset = first_offset;
+ first_m = VM_PAGE_NULL;
+ access_required = fault_type;
+
+ /*
+ * See whether this page is resident
+ */
+
+ while (TRUE) {
+ m = vm_page_lookup(object, offset);
+ if (m != VM_PAGE_NULL) {
+ /*
+ * If the page is being brought in,
+ * wait for it and then retry.
+ *
+ * A possible optimization: if the page
+ * is known to be resident, we can ignore
+ * pages that are absent (regardless of
+ * whether they're busy).
+ */
+
+ if (m->busy) {
+ kern_return_t wait_result;
+
+ PAGE_ASSERT_WAIT(m, interruptible);
+ vm_object_unlock(object);
+#ifdef CONTINUATIONS
+ if (continuation != (void (*)()) 0) {
+ register vm_fault_state_t *state =
+ (vm_fault_state_t *) current_thread()->ith_other;
+
+ /*
+ * Save variables in case
+ * thread_block discards
+ * our kernel stack.
+ */
+
+ state->vmfp_backoff = FALSE;
+ state->vmfp_object = object;
+ state->vmfp_offset = offset;
+ state->vmfp_first_m = first_m;
+ state->vmfp_access =
+ access_required;
+ state->vmf_prot = *protection;
+
+ counter(c_vm_fault_page_block_busy_user++);
+ thread_block(continuation);
+ } else
+#endif /* CONTINUATIONS */
+ {
+ counter(c_vm_fault_page_block_busy_kernel++);
+ thread_block((void (*)()) 0);
+ }
+ after_thread_block:
+ wait_result = current_thread()->wait_result;
+ vm_object_lock(object);
+ if (wait_result != THREAD_AWAKENED) {
+ vm_fault_cleanup(object, first_m);
+ if (wait_result == THREAD_RESTART)
+ return(VM_FAULT_RETRY);
+ else
+ return(VM_FAULT_INTERRUPTED);
+ }
+ continue;
+ }
+
+ /*
+ * If the page is in error, give up now.
+ */
+
+ if (m->error) {
+ VM_PAGE_FREE(m);
+ vm_fault_cleanup(object, first_m);
+ return(VM_FAULT_MEMORY_ERROR);
+ }
+
+ /*
+ * If the page isn't busy, but is absent,
+ * then it was deemed "unavailable".
+ */
+
+ if (m->absent) {
+ /*
+ * Remove the non-existent page (unless it's
+ * in the top object) and move on down to the
+ * next object (if there is one).
+ */
+
+ offset += object->shadow_offset;
+ access_required = VM_PROT_READ;
+ next_object = object->shadow;
+ if (next_object == VM_OBJECT_NULL) {
+ vm_page_t real_m;
+
+ assert(!must_be_resident);
+
+ /*
+ * Absent page at bottom of shadow
+ * chain; zero fill the page we left
+ * busy in the first object, and flush
+ * the absent page. But first we
+ * need to allocate a real page.
+ */
+
+ real_m = vm_page_grab();
+ if (real_m == VM_PAGE_NULL) {
+ vm_fault_cleanup(object, first_m);
+ return(VM_FAULT_MEMORY_SHORTAGE);
+ }
+
+ if (object != first_object) {
+ VM_PAGE_FREE(m);
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+ object = first_object;
+ offset = first_offset;
+ m = first_m;
+ first_m = VM_PAGE_NULL;
+ vm_object_lock(object);
+ }
+
+ VM_PAGE_FREE(m);
+ assert(real_m->busy);
+ vm_page_lock_queues();
+ vm_page_insert(real_m, object, offset);
+ vm_page_unlock_queues();
+ m = real_m;
+
+ /*
+ * Drop the lock while zero filling
+ * page. Then break because this
+ * is the page we wanted. Checking
+ * the page lock is a waste of time;
+ * this page was either absent or
+ * newly allocated -- in both cases
+ * it can't be page locked by a pager.
+ */
+ vm_object_unlock(object);
+
+ vm_page_zero_fill(m);
+
+ vm_stat_sample(SAMPLED_PC_VM_ZFILL_FAULTS);
+
+ vm_stat.zero_fill_count++;
+ vm_object_lock(object);
+ pmap_clear_modify(m->phys_addr);
+ break;
+ } else {
+ if (must_be_resident) {
+ vm_object_paging_end(object);
+ } else if (object != first_object) {
+ vm_object_paging_end(object);
+ VM_PAGE_FREE(m);
+ } else {
+ first_m = m;
+ m->absent = FALSE;
+ vm_object_absent_release(object);
+ m->busy = TRUE;
+
+ vm_page_lock_queues();
+ VM_PAGE_QUEUES_REMOVE(m);
+ vm_page_unlock_queues();
+ }
+ vm_object_lock(next_object);
+ vm_object_unlock(object);
+ object = next_object;
+ vm_object_paging_begin(object);
+ continue;
+ }
+ }
+
+ /*
+ * If the desired access to this page has
+ * been locked out, request that it be unlocked.
+ */
+
+ if (access_required & m->page_lock) {
+ if ((access_required & m->unlock_request) != access_required) {
+ vm_prot_t new_unlock_request;
+ kern_return_t rc;
+
+ if (!object->pager_ready) {
+ vm_object_assert_wait(object,
+ VM_OBJECT_EVENT_PAGER_READY,
+ interruptible);
+ goto block_and_backoff;
+ }
+
+ new_unlock_request = m->unlock_request =
+ (access_required | m->unlock_request);
+ vm_object_unlock(object);
+ if ((rc = memory_object_data_unlock(
+ object->pager,
+ object->pager_request,
+ offset + object->paging_offset,
+ PAGE_SIZE,
+ new_unlock_request))
+ != KERN_SUCCESS) {
+ printf("vm_fault: memory_object_data_unlock failed\n");
+ vm_object_lock(object);
+ vm_fault_cleanup(object, first_m);
+ return((rc == MACH_SEND_INTERRUPTED) ?
+ VM_FAULT_INTERRUPTED :
+ VM_FAULT_MEMORY_ERROR);
+ }
+ vm_object_lock(object);
+ continue;
+ }
+
+ PAGE_ASSERT_WAIT(m, interruptible);
+ goto block_and_backoff;
+ }
+
+ /*
+ * We mark the page busy and leave it on
+ * the pageout queues. If the pageout
+ * deamon comes across it, then it will
+ * remove the page.
+ */
+
+ if (!software_reference_bits) {
+ vm_page_lock_queues();
+ if (m->inactive) {
+ vm_stat_sample(SAMPLED_PC_VM_REACTIVATION_FAULTS);
+ vm_stat.reactivations++;
+ }
+
+ VM_PAGE_QUEUES_REMOVE(m);
+ vm_page_unlock_queues();
+ }
+
+ assert(!m->busy);
+ m->busy = TRUE;
+ assert(!m->absent);
+ break;
+ }
+
+ look_for_page =
+ (object->pager_created)
+#if MACH_PAGEMAP
+ && (vm_external_state_get(object->existence_info, offset + object->paging_offset) !=
+ VM_EXTERNAL_STATE_ABSENT)
+#endif MACH_PAGEMAP
+ ;
+
+ if ((look_for_page || (object == first_object))
+ && !must_be_resident) {
+ /*
+ * Allocate a new page for this object/offset
+ * pair.
+ */
+
+ m = vm_page_grab_fictitious();
+ if (m == VM_PAGE_NULL) {
+ vm_fault_cleanup(object, first_m);
+ return(VM_FAULT_FICTITIOUS_SHORTAGE);
+ }
+
+ vm_page_lock_queues();
+ vm_page_insert(m, object, offset);
+ vm_page_unlock_queues();
+ }
+
+ if (look_for_page && !must_be_resident) {
+ kern_return_t rc;
+
+ /*
+ * If the memory manager is not ready, we
+ * cannot make requests.
+ */
+ if (!object->pager_ready) {
+ vm_object_assert_wait(object,
+ VM_OBJECT_EVENT_PAGER_READY,
+ interruptible);
+ VM_PAGE_FREE(m);
+ goto block_and_backoff;
+ }
+
+ if (object->internal) {
+ /*
+ * Requests to the default pager
+ * must reserve a real page in advance,
+ * because the pager's data-provided
+ * won't block for pages.
+ */
+
+ if (m->fictitious && !vm_page_convert(m)) {
+ VM_PAGE_FREE(m);
+ vm_fault_cleanup(object, first_m);
+ return(VM_FAULT_MEMORY_SHORTAGE);
+ }
+ } else if (object->absent_count >
+ vm_object_absent_max) {
+ /*
+ * If there are too many outstanding page
+ * requests pending on this object, we
+ * wait for them to be resolved now.
+ */
+
+ vm_object_absent_assert_wait(object, interruptible);
+ VM_PAGE_FREE(m);
+ goto block_and_backoff;
+ }
+
+ /*
+ * Indicate that the page is waiting for data
+ * from the memory manager.
+ */
+
+ m->absent = TRUE;
+ object->absent_count++;
+
+ /*
+ * We have a busy page, so we can
+ * release the object lock.
+ */
+ vm_object_unlock(object);
+
+ /*
+ * Call the memory manager to retrieve the data.
+ */
+
+ vm_stat.pageins++;
+ vm_stat_sample(SAMPLED_PC_VM_PAGEIN_FAULTS);
+
+ if ((rc = memory_object_data_request(object->pager,
+ object->pager_request,
+ m->offset + object->paging_offset,
+ PAGE_SIZE, access_required)) != KERN_SUCCESS) {
+ if (rc != MACH_SEND_INTERRUPTED)
+ printf("%s(0x%x, 0x%x, 0x%x, 0x%x, 0x%x) failed, %d\n",
+ "memory_object_data_request",
+ object->pager,
+ object->pager_request,
+ m->offset + object->paging_offset,
+ PAGE_SIZE, access_required, rc);
+ /*
+ * Don't want to leave a busy page around,
+ * but the data request may have blocked,
+ * so check if it's still there and busy.
+ */
+ vm_object_lock(object);
+ if (m == vm_page_lookup(object,offset) &&
+ m->absent && m->busy)
+ VM_PAGE_FREE(m);
+ vm_fault_cleanup(object, first_m);
+ return((rc == MACH_SEND_INTERRUPTED) ?
+ VM_FAULT_INTERRUPTED :
+ VM_FAULT_MEMORY_ERROR);
+ }
+
+ /*
+ * Retry with same object/offset, since new data may
+ * be in a different page (i.e., m is meaningless at
+ * this point).
+ */
+ vm_object_lock(object);
+ continue;
+ }
+
+ /*
+ * For the XP system, the only case in which we get here is if
+ * object has no pager (or unwiring). If the pager doesn't
+ * have the page this is handled in the m->absent case above
+ * (and if you change things here you should look above).
+ */
+ if (object == first_object)
+ first_m = m;
+ else
+ {
+ assert(m == VM_PAGE_NULL);
+ }
+
+ /*
+ * Move on to the next object. Lock the next
+ * object before unlocking the current one.
+ */
+ access_required = VM_PROT_READ;
+
+ offset += object->shadow_offset;
+ next_object = object->shadow;
+ if (next_object == VM_OBJECT_NULL) {
+ assert(!must_be_resident);
+
+ /*
+ * If there's no object left, fill the page
+ * in the top object with zeros. But first we
+ * need to allocate a real page.
+ */
+
+ if (object != first_object) {
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+
+ object = first_object;
+ offset = first_offset;
+ vm_object_lock(object);
+ }
+
+ m = first_m;
+ assert(m->object == object);
+ first_m = VM_PAGE_NULL;
+
+ if (m->fictitious && !vm_page_convert(m)) {
+ VM_PAGE_FREE(m);
+ vm_fault_cleanup(object, VM_PAGE_NULL);
+ return(VM_FAULT_MEMORY_SHORTAGE);
+ }
+
+ vm_object_unlock(object);
+ vm_page_zero_fill(m);
+ vm_stat_sample(SAMPLED_PC_VM_ZFILL_FAULTS);
+ vm_stat.zero_fill_count++;
+ vm_object_lock(object);
+ pmap_clear_modify(m->phys_addr);
+ break;
+ }
+ else {
+ vm_object_lock(next_object);
+ if ((object != first_object) || must_be_resident)
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+ object = next_object;
+ vm_object_paging_begin(object);
+ }
+ }
+
+ /*
+ * PAGE HAS BEEN FOUND.
+ *
+ * This page (m) is:
+ * busy, so that we can play with it;
+ * not absent, so that nobody else will fill it;
+ * possibly eligible for pageout;
+ *
+ * The top-level page (first_m) is:
+ * VM_PAGE_NULL if the page was found in the
+ * top-level object;
+ * busy, not absent, and ineligible for pageout.
+ *
+ * The current object (object) is locked. A paging
+ * reference is held for the current and top-level
+ * objects.
+ */
+
+#if EXTRA_ASSERTIONS
+ assert(m->busy && !m->absent);
+ assert((first_m == VM_PAGE_NULL) ||
+ (first_m->busy && !first_m->absent &&
+ !first_m->active && !first_m->inactive));
+#endif EXTRA_ASSERTIONS
+
+ /*
+ * If the page is being written, but isn't
+ * already owned by the top-level object,
+ * we have to copy it into a new page owned
+ * by the top-level object.
+ */
+
+ if (object != first_object) {
+ /*
+ * We only really need to copy if we
+ * want to write it.
+ */
+
+ if (fault_type & VM_PROT_WRITE) {
+ vm_page_t copy_m;
+
+ assert(!must_be_resident);
+
+ /*
+ * If we try to collapse first_object at this
+ * point, we may deadlock when we try to get
+ * the lock on an intermediate object (since we
+ * have the bottom object locked). We can't
+ * unlock the bottom object, because the page
+ * we found may move (by collapse) if we do.
+ *
+ * Instead, we first copy the page. Then, when
+ * we have no more use for the bottom object,
+ * we unlock it and try to collapse.
+ *
+ * Note that we copy the page even if we didn't
+ * need to... that's the breaks.
+ */
+
+ /*
+ * Allocate a page for the copy
+ */
+ copy_m = vm_page_grab();
+ if (copy_m == VM_PAGE_NULL) {
+ RELEASE_PAGE(m);
+ vm_fault_cleanup(object, first_m);
+ return(VM_FAULT_MEMORY_SHORTAGE);
+ }
+
+ vm_object_unlock(object);
+ vm_page_copy(m, copy_m);
+ vm_object_lock(object);
+
+ /*
+ * If another map is truly sharing this
+ * page with us, we have to flush all
+ * uses of the original page, since we
+ * can't distinguish those which want the
+ * original from those which need the
+ * new copy.
+ *
+ * XXXO If we know that only one map has
+ * access to this page, then we could
+ * avoid the pmap_page_protect() call.
+ */
+
+ vm_page_lock_queues();
+ vm_page_deactivate(m);
+ pmap_page_protect(m->phys_addr, VM_PROT_NONE);
+ vm_page_unlock_queues();
+
+ /*
+ * We no longer need the old page or object.
+ */
+
+ PAGE_WAKEUP_DONE(m);
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+
+ vm_stat.cow_faults++;
+ vm_stat_sample(SAMPLED_PC_VM_COW_FAULTS);
+ object = first_object;
+ offset = first_offset;
+
+ vm_object_lock(object);
+ VM_PAGE_FREE(first_m);
+ first_m = VM_PAGE_NULL;
+ assert(copy_m->busy);
+ vm_page_lock_queues();
+ vm_page_insert(copy_m, object, offset);
+ vm_page_unlock_queues();
+ m = copy_m;
+
+ /*
+ * Now that we've gotten the copy out of the
+ * way, let's try to collapse the top object.
+ * But we have to play ugly games with
+ * paging_in_progress to do that...
+ */
+
+ vm_object_paging_end(object);
+ vm_object_collapse(object);
+ vm_object_paging_begin(object);
+ }
+ else {
+ *protection &= (~VM_PROT_WRITE);
+ }
+ }
+
+ /*
+ * Now check whether the page needs to be pushed into the
+ * copy object. The use of asymmetric copy on write for
+ * shared temporary objects means that we may do two copies to
+ * satisfy the fault; one above to get the page from a
+ * shadowed object, and one here to push it into the copy.
+ */
+
+ while ((copy_object = first_object->copy) != VM_OBJECT_NULL) {
+ vm_offset_t copy_offset;
+ vm_page_t copy_m;
+
+ /*
+ * If the page is being written, but hasn't been
+ * copied to the copy-object, we have to copy it there.
+ */
+
+ if ((fault_type & VM_PROT_WRITE) == 0) {
+ *protection &= ~VM_PROT_WRITE;
+ break;
+ }
+
+ /*
+ * If the page was guaranteed to be resident,
+ * we must have already performed the copy.
+ */
+
+ if (must_be_resident)
+ break;
+
+ /*
+ * Try to get the lock on the copy_object.
+ */
+ if (!vm_object_lock_try(copy_object)) {
+ vm_object_unlock(object);
+
+ simple_lock_pause(); /* wait a bit */
+
+ vm_object_lock(object);
+ continue;
+ }
+
+ /*
+ * Make another reference to the copy-object,
+ * to keep it from disappearing during the
+ * copy.
+ */
+ assert(copy_object->ref_count > 0);
+ copy_object->ref_count++;
+
+ /*
+ * Does the page exist in the copy?
+ */
+ copy_offset = first_offset - copy_object->shadow_offset;
+ copy_m = vm_page_lookup(copy_object, copy_offset);
+ if (copy_m != VM_PAGE_NULL) {
+ if (copy_m->busy) {
+ /*
+ * If the page is being brought
+ * in, wait for it and then retry.
+ */
+ PAGE_ASSERT_WAIT(copy_m, interruptible);
+ RELEASE_PAGE(m);
+ copy_object->ref_count--;
+ assert(copy_object->ref_count > 0);
+ vm_object_unlock(copy_object);
+ goto block_and_backoff;
+ }
+ }
+ else {
+ /*
+ * Allocate a page for the copy
+ */
+ copy_m = vm_page_alloc(copy_object, copy_offset);
+ if (copy_m == VM_PAGE_NULL) {
+ RELEASE_PAGE(m);
+ copy_object->ref_count--;
+ assert(copy_object->ref_count > 0);
+ vm_object_unlock(copy_object);
+ vm_fault_cleanup(object, first_m);
+ return(VM_FAULT_MEMORY_SHORTAGE);
+ }
+
+ /*
+ * Must copy page into copy-object.
+ */
+
+ vm_page_copy(m, copy_m);
+
+ /*
+ * If the old page was in use by any users
+ * of the copy-object, it must be removed
+ * from all pmaps. (We can't know which
+ * pmaps use it.)
+ */
+
+ vm_page_lock_queues();
+ pmap_page_protect(m->phys_addr, VM_PROT_NONE);
+ copy_m->dirty = TRUE;
+ vm_page_unlock_queues();
+
+ /*
+ * If there's a pager, then immediately
+ * page out this page, using the "initialize"
+ * option. Else, we use the copy.
+ */
+
+ if (!copy_object->pager_created) {
+ vm_page_lock_queues();
+ vm_page_activate(copy_m);
+ vm_page_unlock_queues();
+ PAGE_WAKEUP_DONE(copy_m);
+ } else {
+ /*
+ * The page is already ready for pageout:
+ * not on pageout queues and busy.
+ * Unlock everything except the
+ * copy_object itself.
+ */
+
+ vm_object_unlock(object);
+
+ /*
+ * Write the page to the copy-object,
+ * flushing it from the kernel.
+ */
+
+ vm_pageout_page(copy_m, TRUE, TRUE);
+
+ /*
+ * Since the pageout may have
+ * temporarily dropped the
+ * copy_object's lock, we
+ * check whether we'll have
+ * to deallocate the hard way.
+ */
+
+ if ((copy_object->shadow != object) ||
+ (copy_object->ref_count == 1)) {
+ vm_object_unlock(copy_object);
+ vm_object_deallocate(copy_object);
+ vm_object_lock(object);
+ continue;
+ }
+
+ /*
+ * Pick back up the old object's
+ * lock. [It is safe to do so,
+ * since it must be deeper in the
+ * object tree.]
+ */
+
+ vm_object_lock(object);
+ }
+
+ /*
+ * Because we're pushing a page upward
+ * in the object tree, we must restart
+ * any faults that are waiting here.
+ * [Note that this is an expansion of
+ * PAGE_WAKEUP that uses the THREAD_RESTART
+ * wait result]. Can't turn off the page's
+ * busy bit because we're not done with it.
+ */
+
+ if (m->wanted) {
+ m->wanted = FALSE;
+ thread_wakeup_with_result((event_t) m,
+ THREAD_RESTART);
+ }
+ }
+
+ /*
+ * The reference count on copy_object must be
+ * at least 2: one for our extra reference,
+ * and at least one from the outside world
+ * (we checked that when we last locked
+ * copy_object).
+ */
+ copy_object->ref_count--;
+ assert(copy_object->ref_count > 0);
+ vm_object_unlock(copy_object);
+
+ break;
+ }
+
+ *result_page = m;
+ *top_page = first_m;
+
+ /*
+ * If the page can be written, assume that it will be.
+ * [Earlier, we restrict the permission to allow write
+ * access only if the fault so required, so we don't
+ * mark read-only data as dirty.]
+ */
+
+ if (vm_fault_dirty_handling && (*protection & VM_PROT_WRITE))
+ m->dirty = TRUE;
+
+ return(VM_FAULT_SUCCESS);
+
+ block_and_backoff:
+ vm_fault_cleanup(object, first_m);
+
+#ifdef CONTINUATIONS
+ if (continuation != (void (*)()) 0) {
+ register vm_fault_state_t *state =
+ (vm_fault_state_t *) current_thread()->ith_other;
+
+ /*
+ * Save variables in case we must restart.
+ */
+
+ state->vmfp_backoff = TRUE;
+ state->vmf_prot = *protection;
+
+ counter(c_vm_fault_page_block_backoff_user++);
+ thread_block(continuation);
+ } else
+#endif /* CONTINUATIONS */
+ {
+ counter(c_vm_fault_page_block_backoff_kernel++);
+ thread_block((void (*)()) 0);
+ }
+ after_block_and_backoff:
+ if (current_thread()->wait_result == THREAD_AWAKENED)
+ return VM_FAULT_RETRY;
+ else
+ return VM_FAULT_INTERRUPTED;
+
+#undef RELEASE_PAGE
+}
+
+/*
+ * Routine: vm_fault
+ * Purpose:
+ * Handle page faults, including pseudo-faults
+ * used to change the wiring status of pages.
+ * Returns:
+ * If an explicit (expression) continuation is supplied,
+ * then we call the continuation instead of returning.
+ * Implementation:
+ * Explicit continuations make this a little icky,
+ * because it hasn't been rewritten to embrace CPS.
+ * Instead, we have resume arguments for vm_fault and
+ * vm_fault_page, to let continue the fault computation.
+ *
+ * vm_fault and vm_fault_page save mucho state
+ * in the moral equivalent of a closure. The state
+ * structure is allocated when first entering vm_fault
+ * and deallocated when leaving vm_fault.
+ */
+
+#ifdef CONTINUATIONS
+void
+vm_fault_continue()
+{
+ register vm_fault_state_t *state =
+ (vm_fault_state_t *) current_thread()->ith_other;
+
+ (void) vm_fault(state->vmf_map,
+ state->vmf_vaddr,
+ state->vmf_fault_type,
+ state->vmf_change_wiring,
+ TRUE, state->vmf_continuation);
+ /*NOTREACHED*/
+}
+#endif /* CONTINUATIONS */
+
+kern_return_t vm_fault(map, vaddr, fault_type, change_wiring,
+ resume, continuation)
+ vm_map_t map;
+ vm_offset_t vaddr;
+ vm_prot_t fault_type;
+ boolean_t change_wiring;
+ boolean_t resume;
+ void (*continuation)();
+{
+ vm_map_version_t version; /* Map version for verificiation */
+ boolean_t wired; /* Should mapping be wired down? */
+ vm_object_t object; /* Top-level object */
+ vm_offset_t offset; /* Top-level offset */
+ vm_prot_t prot; /* Protection for mapping */
+ vm_object_t old_copy_object; /* Saved copy object */
+ vm_page_t result_page; /* Result of vm_fault_page */
+ vm_page_t top_page; /* Placeholder page */
+ kern_return_t kr;
+
+ register
+ vm_page_t m; /* Fast access to result_page */
+
+#ifdef CONTINUATIONS
+ if (resume) {
+ register vm_fault_state_t *state =
+ (vm_fault_state_t *) current_thread()->ith_other;
+
+ /*
+ * Retrieve cached variables and
+ * continue vm_fault_page.
+ */
+
+ object = state->vmf_object;
+ if (object == VM_OBJECT_NULL)
+ goto RetryFault;
+ version = state->vmf_version;
+ wired = state->vmf_wired;
+ offset = state->vmf_offset;
+ prot = state->vmf_prot;
+
+ kr = vm_fault_page(object, offset, fault_type,
+ (change_wiring && !wired), !change_wiring,
+ &prot, &result_page, &top_page,
+ TRUE, vm_fault_continue);
+ goto after_vm_fault_page;
+ }
+
+ if (continuation != (void (*)()) 0) {
+ /*
+ * We will probably need to save state.
+ */
+
+ char * state;
+
+ /*
+ * if this assignment stmt is written as
+ * 'active_threads[cpu_number()] = zalloc()',
+ * cpu_number may be evaluated before zalloc;
+ * if zalloc blocks, cpu_number will be wrong
+ */
+
+ state = (char *) zalloc(vm_fault_state_zone);
+ current_thread()->ith_other = state;
+
+ }
+#else /* not CONTINUATIONS */
+ assert(continuation == 0);
+ assert(!resume);
+#endif /* not CONTINUATIONS */
+
+ RetryFault: ;
+
+ /*
+ * Find the backing store object and offset into
+ * it to begin the search.
+ */
+
+ if ((kr = vm_map_lookup(&map, vaddr, fault_type, &version,
+ &object, &offset,
+ &prot, &wired)) != KERN_SUCCESS) {
+ goto done;
+ }
+
+ /*
+ * If the page is wired, we must fault for the current protection
+ * value, to avoid further faults.
+ */
+
+ if (wired)
+ fault_type = prot;
+
+ /*
+ * Make a reference to this object to
+ * prevent its disposal while we are messing with
+ * it. Once we have the reference, the map is free
+ * to be diddled. Since objects reference their
+ * shadows (and copies), they will stay around as well.
+ */
+
+ assert(object->ref_count > 0);
+ object->ref_count++;
+ vm_object_paging_begin(object);
+
+#ifdef CONTINUATIONS
+ if (continuation != (void (*)()) 0) {
+ register vm_fault_state_t *state =
+ (vm_fault_state_t *) current_thread()->ith_other;
+
+ /*
+ * Save variables, in case vm_fault_page discards
+ * our kernel stack and we have to restart.
+ */
+
+ state->vmf_map = map;
+ state->vmf_vaddr = vaddr;
+ state->vmf_fault_type = fault_type;
+ state->vmf_change_wiring = change_wiring;
+ state->vmf_continuation = continuation;
+
+ state->vmf_version = version;
+ state->vmf_wired = wired;
+ state->vmf_object = object;
+ state->vmf_offset = offset;
+ state->vmf_prot = prot;
+
+ kr = vm_fault_page(object, offset, fault_type,
+ (change_wiring && !wired), !change_wiring,
+ &prot, &result_page, &top_page,
+ FALSE, vm_fault_continue);
+ } else
+#endif /* CONTINUATIONS */
+ {
+ kr = vm_fault_page(object, offset, fault_type,
+ (change_wiring && !wired), !change_wiring,
+ &prot, &result_page, &top_page,
+ FALSE, (void (*)()) 0);
+ }
+ after_vm_fault_page:
+
+ /*
+ * If we didn't succeed, lose the object reference immediately.
+ */
+
+ if (kr != VM_FAULT_SUCCESS)
+ vm_object_deallocate(object);
+
+ /*
+ * See why we failed, and take corrective action.
+ */
+
+ switch (kr) {
+ case VM_FAULT_SUCCESS:
+ break;
+ case VM_FAULT_RETRY:
+ goto RetryFault;
+ case VM_FAULT_INTERRUPTED:
+ kr = KERN_SUCCESS;
+ goto done;
+ case VM_FAULT_MEMORY_SHORTAGE:
+#ifdef CONTINUATIONS
+ if (continuation != (void (*)()) 0) {
+ register vm_fault_state_t *state =
+ (vm_fault_state_t *) current_thread()->ith_other;
+
+ /*
+ * Save variables in case VM_PAGE_WAIT
+ * discards our kernel stack.
+ */
+
+ state->vmf_map = map;
+ state->vmf_vaddr = vaddr;
+ state->vmf_fault_type = fault_type;
+ state->vmf_change_wiring = change_wiring;
+ state->vmf_continuation = continuation;
+ state->vmf_object = VM_OBJECT_NULL;
+
+ VM_PAGE_WAIT(vm_fault_continue);
+ } else
+#endif /* CONTINUATIONS */
+ VM_PAGE_WAIT((void (*)()) 0);
+ goto RetryFault;
+ case VM_FAULT_FICTITIOUS_SHORTAGE:
+ vm_page_more_fictitious();
+ goto RetryFault;
+ case VM_FAULT_MEMORY_ERROR:
+ kr = KERN_MEMORY_ERROR;
+ goto done;
+ }
+
+ m = result_page;
+
+ assert((change_wiring && !wired) ?
+ (top_page == VM_PAGE_NULL) :
+ ((top_page == VM_PAGE_NULL) == (m->object == object)));
+
+ /*
+ * How to clean up the result of vm_fault_page. This
+ * happens whether the mapping is entered or not.
+ */
+
+#define UNLOCK_AND_DEALLOCATE \
+ MACRO_BEGIN \
+ vm_fault_cleanup(m->object, top_page); \
+ vm_object_deallocate(object); \
+ MACRO_END
+
+ /*
+ * What to do with the resulting page from vm_fault_page
+ * if it doesn't get entered into the physical map:
+ */
+
+#define RELEASE_PAGE(m) \
+ MACRO_BEGIN \
+ PAGE_WAKEUP_DONE(m); \
+ vm_page_lock_queues(); \
+ if (!m->active && !m->inactive) \
+ vm_page_activate(m); \
+ vm_page_unlock_queues(); \
+ MACRO_END
+
+ /*
+ * We must verify that the maps have not changed
+ * since our last lookup.
+ */
+
+ old_copy_object = m->object->copy;
+
+ vm_object_unlock(m->object);
+ while (!vm_map_verify(map, &version)) {
+ vm_object_t retry_object;
+ vm_offset_t retry_offset;
+ vm_prot_t retry_prot;
+
+ /*
+ * To avoid trying to write_lock the map while another
+ * thread has it read_locked (in vm_map_pageable), we
+ * do not try for write permission. If the page is
+ * still writable, we will get write permission. If it
+ * is not, or has been marked needs_copy, we enter the
+ * mapping without write permission, and will merely
+ * take another fault.
+ */
+ kr = vm_map_lookup(&map, vaddr,
+ fault_type & ~VM_PROT_WRITE, &version,
+ &retry_object, &retry_offset, &retry_prot,
+ &wired);
+
+ if (kr != KERN_SUCCESS) {
+ vm_object_lock(m->object);
+ RELEASE_PAGE(m);
+ UNLOCK_AND_DEALLOCATE;
+ goto done;
+ }
+
+ vm_object_unlock(retry_object);
+ vm_object_lock(m->object);
+
+ if ((retry_object != object) ||
+ (retry_offset != offset)) {
+ RELEASE_PAGE(m);
+ UNLOCK_AND_DEALLOCATE;
+ goto RetryFault;
+ }
+
+ /*
+ * Check whether the protection has changed or the object
+ * has been copied while we left the map unlocked.
+ */
+ prot &= retry_prot;
+ vm_object_unlock(m->object);
+ }
+ vm_object_lock(m->object);
+
+ /*
+ * If the copy object changed while the top-level object
+ * was unlocked, then we must take away write permission.
+ */
+
+ if (m->object->copy != old_copy_object)
+ prot &= ~VM_PROT_WRITE;
+
+ /*
+ * If we want to wire down this page, but no longer have
+ * adequate permissions, we must start all over.
+ */
+
+ if (wired && (prot != fault_type)) {
+ vm_map_verify_done(map, &version);
+ RELEASE_PAGE(m);
+ UNLOCK_AND_DEALLOCATE;
+ goto RetryFault;
+ }
+
+ /*
+ * It's critically important that a wired-down page be faulted
+ * only once in each map for which it is wired.
+ */
+
+ vm_object_unlock(m->object);
+
+ /*
+ * Put this page into the physical map.
+ * We had to do the unlock above because pmap_enter
+ * may cause other faults. The page may be on
+ * the pageout queues. If the pageout daemon comes
+ * across the page, it will remove it from the queues.
+ */
+
+ PMAP_ENTER(map->pmap, vaddr, m, prot, wired);
+
+ /*
+ * If the page is not wired down and isn't already
+ * on a pageout queue, then put it where the
+ * pageout daemon can find it.
+ */
+ vm_object_lock(m->object);
+ vm_page_lock_queues();
+ if (change_wiring) {
+ if (wired)
+ vm_page_wire(m);
+ else
+ vm_page_unwire(m);
+ } else if (software_reference_bits) {
+ if (!m->active && !m->inactive)
+ vm_page_activate(m);
+ m->reference = TRUE;
+ } else {
+ vm_page_activate(m);
+ }
+ vm_page_unlock_queues();
+
+ /*
+ * Unlock everything, and return
+ */
+
+ vm_map_verify_done(map, &version);
+ PAGE_WAKEUP_DONE(m);
+ kr = KERN_SUCCESS;
+ UNLOCK_AND_DEALLOCATE;
+
+#undef UNLOCK_AND_DEALLOCATE
+#undef RELEASE_PAGE
+
+ done:
+#ifdef CONTINUATIONS
+ if (continuation != (void (*)()) 0) {
+ register vm_fault_state_t *state =
+ (vm_fault_state_t *) current_thread()->ith_other;
+
+ zfree(vm_fault_state_zone, (vm_offset_t) state);
+ (*continuation)(kr);
+ /*NOTREACHED*/
+ }
+#endif /* CONTINUATIONS */
+
+ return(kr);
+}
+
+kern_return_t vm_fault_wire_fast();
+
+/*
+ * vm_fault_wire:
+ *
+ * Wire down a range of virtual addresses in a map.
+ */
+void vm_fault_wire(map, entry)
+ vm_map_t map;
+ vm_map_entry_t entry;
+{
+
+ register vm_offset_t va;
+ register pmap_t pmap;
+ register vm_offset_t end_addr = entry->vme_end;
+
+ pmap = vm_map_pmap(map);
+
+ /*
+ * Inform the physical mapping system that the
+ * range of addresses may not fault, so that
+ * page tables and such can be locked down as well.
+ */
+
+ pmap_pageable(pmap, entry->vme_start, end_addr, FALSE);
+
+ /*
+ * We simulate a fault to get the page and enter it
+ * in the physical map.
+ */
+
+ for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
+ if (vm_fault_wire_fast(map, va, entry) != KERN_SUCCESS)
+ (void) vm_fault(map, va, VM_PROT_NONE, TRUE,
+ FALSE, (void (*)()) 0);
+ }
+}
+
+/*
+ * vm_fault_unwire:
+ *
+ * Unwire a range of virtual addresses in a map.
+ */
+void vm_fault_unwire(map, entry)
+ vm_map_t map;
+ vm_map_entry_t entry;
+{
+ register vm_offset_t va;
+ register pmap_t pmap;
+ register vm_offset_t end_addr = entry->vme_end;
+ vm_object_t object;
+
+ pmap = vm_map_pmap(map);
+
+ object = (entry->is_sub_map)
+ ? VM_OBJECT_NULL : entry->object.vm_object;
+
+ /*
+ * Since the pages are wired down, we must be able to
+ * get their mappings from the physical map system.
+ */
+
+ for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
+ pmap_change_wiring(pmap, va, FALSE);
+
+ if (object == VM_OBJECT_NULL) {
+ vm_map_lock_set_recursive(map);
+ (void) vm_fault(map, va, VM_PROT_NONE, TRUE,
+ FALSE, (void (*)()) 0);
+ vm_map_lock_clear_recursive(map);
+ } else {
+ vm_prot_t prot;
+ vm_page_t result_page;
+ vm_page_t top_page;
+ vm_fault_return_t result;
+
+ do {
+ prot = VM_PROT_NONE;
+
+ vm_object_lock(object);
+ vm_object_paging_begin(object);
+ result = vm_fault_page(object,
+ entry->offset +
+ (va - entry->vme_start),
+ VM_PROT_NONE, TRUE,
+ FALSE, &prot,
+ &result_page,
+ &top_page,
+ FALSE, (void (*)()) 0);
+ } while (result == VM_FAULT_RETRY);
+
+ if (result != VM_FAULT_SUCCESS)
+ panic("vm_fault_unwire: failure");
+
+ vm_page_lock_queues();
+ vm_page_unwire(result_page);
+ vm_page_unlock_queues();
+ PAGE_WAKEUP_DONE(result_page);
+
+ vm_fault_cleanup(result_page->object, top_page);
+ }
+ }
+
+ /*
+ * Inform the physical mapping system that the range
+ * of addresses may fault, so that page tables and
+ * such may be unwired themselves.
+ */
+
+ pmap_pageable(pmap, entry->vme_start, end_addr, TRUE);
+}
+
+/*
+ * vm_fault_wire_fast:
+ *
+ * Handle common case of a wire down page fault at the given address.
+ * If successful, the page is inserted into the associated physical map.
+ * The map entry is passed in to avoid the overhead of a map lookup.
+ *
+ * NOTE: the given address should be truncated to the
+ * proper page address.
+ *
+ * KERN_SUCCESS is returned if the page fault is handled; otherwise,
+ * a standard error specifying why the fault is fatal is returned.
+ *
+ * The map in question must be referenced, and remains so.
+ * Caller has a read lock on the map.
+ *
+ * This is a stripped version of vm_fault() for wiring pages. Anything
+ * other than the common case will return KERN_FAILURE, and the caller
+ * is expected to call vm_fault().
+ */
+kern_return_t vm_fault_wire_fast(map, va, entry)
+ vm_map_t map;
+ vm_offset_t va;
+ vm_map_entry_t entry;
+{
+ vm_object_t object;
+ vm_offset_t offset;
+ register vm_page_t m;
+ vm_prot_t prot;
+
+ vm_stat.faults++; /* needs lock XXX */
+/*
+ * Recovery actions
+ */
+
+#undef RELEASE_PAGE
+#define RELEASE_PAGE(m) { \
+ PAGE_WAKEUP_DONE(m); \
+ vm_page_lock_queues(); \
+ vm_page_unwire(m); \
+ vm_page_unlock_queues(); \
+}
+
+
+#undef UNLOCK_THINGS
+#define UNLOCK_THINGS { \
+ object->paging_in_progress--; \
+ vm_object_unlock(object); \
+}
+
+#undef UNLOCK_AND_DEALLOCATE
+#define UNLOCK_AND_DEALLOCATE { \
+ UNLOCK_THINGS; \
+ vm_object_deallocate(object); \
+}
+/*
+ * Give up and have caller do things the hard way.
+ */
+
+#define GIVE_UP { \
+ UNLOCK_AND_DEALLOCATE; \
+ return(KERN_FAILURE); \
+}
+
+
+ /*
+ * If this entry is not directly to a vm_object, bail out.
+ */
+ if (entry->is_sub_map)
+ return(KERN_FAILURE);
+
+ /*
+ * Find the backing store object and offset into it.
+ */
+
+ object = entry->object.vm_object;
+ offset = (va - entry->vme_start) + entry->offset;
+ prot = entry->protection;
+
+ /*
+ * Make a reference to this object to prevent its
+ * disposal while we are messing with it.
+ */
+
+ vm_object_lock(object);
+ assert(object->ref_count > 0);
+ object->ref_count++;
+ object->paging_in_progress++;
+
+ /*
+ * INVARIANTS (through entire routine):
+ *
+ * 1) At all times, we must either have the object
+ * lock or a busy page in some object to prevent
+ * some other thread from trying to bring in
+ * the same page.
+ *
+ * 2) Once we have a busy page, we must remove it from
+ * the pageout queues, so that the pageout daemon
+ * will not grab it away.
+ *
+ */
+
+ /*
+ * Look for page in top-level object. If it's not there or
+ * there's something going on, give up.
+ */
+ m = vm_page_lookup(object, offset);
+ if ((m == VM_PAGE_NULL) || (m->error) ||
+ (m->busy) || (m->absent) || (prot & m->page_lock)) {
+ GIVE_UP;
+ }
+
+ /*
+ * Wire the page down now. All bail outs beyond this
+ * point must unwire the page.
+ */
+
+ vm_page_lock_queues();
+ vm_page_wire(m);
+ vm_page_unlock_queues();
+
+ /*
+ * Mark page busy for other threads.
+ */
+ assert(!m->busy);
+ m->busy = TRUE;
+ assert(!m->absent);
+
+ /*
+ * Give up if the page is being written and there's a copy object
+ */
+ if ((object->copy != VM_OBJECT_NULL) && (prot & VM_PROT_WRITE)) {
+ RELEASE_PAGE(m);
+ GIVE_UP;
+ }
+
+ /*
+ * Put this page into the physical map.
+ * We have to unlock the object because pmap_enter
+ * may cause other faults.
+ */
+ vm_object_unlock(object);
+
+ PMAP_ENTER(map->pmap, va, m, prot, TRUE);
+
+ /*
+ * Must relock object so that paging_in_progress can be cleared.
+ */
+ vm_object_lock(object);
+
+ /*
+ * Unlock everything, and return
+ */
+
+ PAGE_WAKEUP_DONE(m);
+ UNLOCK_AND_DEALLOCATE;
+
+ return(KERN_SUCCESS);
+
+}
+
+/*
+ * Routine: vm_fault_copy_cleanup
+ * Purpose:
+ * Release a page used by vm_fault_copy.
+ */
+
+void vm_fault_copy_cleanup(page, top_page)
+ vm_page_t page;
+ vm_page_t top_page;
+{
+ vm_object_t object = page->object;
+
+ vm_object_lock(object);
+ PAGE_WAKEUP_DONE(page);
+ vm_page_lock_queues();
+ if (!page->active && !page->inactive)
+ vm_page_activate(page);
+ vm_page_unlock_queues();
+ vm_fault_cleanup(object, top_page);
+}
+
+/*
+ * Routine: vm_fault_copy
+ *
+ * Purpose:
+ * Copy pages from one virtual memory object to another --
+ * neither the source nor destination pages need be resident.
+ *
+ * Before actually copying a page, the version associated with
+ * the destination address map wil be verified.
+ *
+ * In/out conditions:
+ * The caller must hold a reference, but not a lock, to
+ * each of the source and destination objects and to the
+ * destination map.
+ *
+ * Results:
+ * Returns KERN_SUCCESS if no errors were encountered in
+ * reading or writing the data. Returns KERN_INTERRUPTED if
+ * the operation was interrupted (only possible if the
+ * "interruptible" argument is asserted). Other return values
+ * indicate a permanent error in copying the data.
+ *
+ * The actual amount of data copied will be returned in the
+ * "copy_size" argument. In the event that the destination map
+ * verification failed, this amount may be less than the amount
+ * requested.
+ */
+kern_return_t vm_fault_copy(
+ src_object,
+ src_offset,
+ src_size,
+ dst_object,
+ dst_offset,
+ dst_map,
+ dst_version,
+ interruptible
+ )
+ vm_object_t src_object;
+ vm_offset_t src_offset;
+ vm_size_t *src_size; /* INOUT */
+ vm_object_t dst_object;
+ vm_offset_t dst_offset;
+ vm_map_t dst_map;
+ vm_map_version_t *dst_version;
+ boolean_t interruptible;
+{
+ vm_page_t result_page;
+ vm_prot_t prot;
+
+ vm_page_t src_page;
+ vm_page_t src_top_page;
+
+ vm_page_t dst_page;
+ vm_page_t dst_top_page;
+
+ vm_size_t amount_done;
+ vm_object_t old_copy_object;
+
+#define RETURN(x) \
+ MACRO_BEGIN \
+ *src_size = amount_done; \
+ MACRO_RETURN(x); \
+ MACRO_END
+
+ amount_done = 0;
+ do { /* while (amount_done != *src_size) */
+
+ RetrySourceFault: ;
+
+ if (src_object == VM_OBJECT_NULL) {
+ /*
+ * No source object. We will just
+ * zero-fill the page in dst_object.
+ */
+
+ src_page = VM_PAGE_NULL;
+ } else {
+ prot = VM_PROT_READ;
+
+ vm_object_lock(src_object);
+ vm_object_paging_begin(src_object);
+
+ switch (vm_fault_page(src_object, src_offset,
+ VM_PROT_READ, FALSE, interruptible,
+ &prot, &result_page, &src_top_page,
+ FALSE, (void (*)()) 0)) {
+
+ case VM_FAULT_SUCCESS:
+ break;
+ case VM_FAULT_RETRY:
+ goto RetrySourceFault;
+ case VM_FAULT_INTERRUPTED:
+ RETURN(MACH_SEND_INTERRUPTED);
+ case VM_FAULT_MEMORY_SHORTAGE:
+ VM_PAGE_WAIT((void (*)()) 0);
+ goto RetrySourceFault;
+ case VM_FAULT_FICTITIOUS_SHORTAGE:
+ vm_page_more_fictitious();
+ goto RetrySourceFault;
+ case VM_FAULT_MEMORY_ERROR:
+ return(KERN_MEMORY_ERROR);
+ }
+
+ src_page = result_page;
+
+ assert((src_top_page == VM_PAGE_NULL) ==
+ (src_page->object == src_object));
+
+ assert ((prot & VM_PROT_READ) != VM_PROT_NONE);
+
+ vm_object_unlock(src_page->object);
+ }
+
+ RetryDestinationFault: ;
+
+ prot = VM_PROT_WRITE;
+
+ vm_object_lock(dst_object);
+ vm_object_paging_begin(dst_object);
+
+ switch (vm_fault_page(dst_object, dst_offset, VM_PROT_WRITE,
+ FALSE, FALSE /* interruptible */,
+ &prot, &result_page, &dst_top_page,
+ FALSE, (void (*)()) 0)) {
+
+ case VM_FAULT_SUCCESS:
+ break;
+ case VM_FAULT_RETRY:
+ goto RetryDestinationFault;
+ case VM_FAULT_INTERRUPTED:
+ if (src_page != VM_PAGE_NULL)
+ vm_fault_copy_cleanup(src_page,
+ src_top_page);
+ RETURN(MACH_SEND_INTERRUPTED);
+ case VM_FAULT_MEMORY_SHORTAGE:
+ VM_PAGE_WAIT((void (*)()) 0);
+ goto RetryDestinationFault;
+ case VM_FAULT_FICTITIOUS_SHORTAGE:
+ vm_page_more_fictitious();
+ goto RetryDestinationFault;
+ case VM_FAULT_MEMORY_ERROR:
+ if (src_page != VM_PAGE_NULL)
+ vm_fault_copy_cleanup(src_page,
+ src_top_page);
+ return(KERN_MEMORY_ERROR);
+ }
+ assert ((prot & VM_PROT_WRITE) != VM_PROT_NONE);
+
+ dst_page = result_page;
+
+ old_copy_object = dst_page->object->copy;
+
+ vm_object_unlock(dst_page->object);
+
+ if (!vm_map_verify(dst_map, dst_version)) {
+
+ BailOut: ;
+
+ if (src_page != VM_PAGE_NULL)
+ vm_fault_copy_cleanup(src_page, src_top_page);
+ vm_fault_copy_cleanup(dst_page, dst_top_page);
+ break;
+ }
+
+
+ vm_object_lock(dst_page->object);
+ if (dst_page->object->copy != old_copy_object) {
+ vm_object_unlock(dst_page->object);
+ vm_map_verify_done(dst_map, dst_version);
+ goto BailOut;
+ }
+ vm_object_unlock(dst_page->object);
+
+ /*
+ * Copy the page, and note that it is dirty
+ * immediately.
+ */
+
+ if (src_page == VM_PAGE_NULL)
+ vm_page_zero_fill(dst_page);
+ else
+ vm_page_copy(src_page, dst_page);
+ dst_page->dirty = TRUE;
+
+ /*
+ * Unlock everything, and return
+ */
+
+ vm_map_verify_done(dst_map, dst_version);
+
+ if (src_page != VM_PAGE_NULL)
+ vm_fault_copy_cleanup(src_page, src_top_page);
+ vm_fault_copy_cleanup(dst_page, dst_top_page);
+
+ amount_done += PAGE_SIZE;
+ src_offset += PAGE_SIZE;
+ dst_offset += PAGE_SIZE;
+
+ } while (amount_done != *src_size);
+
+ RETURN(KERN_SUCCESS);
+#undef RETURN
+
+ /*NOTREACHED*/
+}
+
+
+
+
+
+#ifdef notdef
+
+/*
+ * Routine: vm_fault_page_overwrite
+ *
+ * Description:
+ * A form of vm_fault_page that assumes that the
+ * resulting page will be overwritten in its entirety,
+ * making it unnecessary to obtain the correct *contents*
+ * of the page.
+ *
+ * Implementation:
+ * XXX Untested. Also unused. Eventually, this technology
+ * could be used in vm_fault_copy() to advantage.
+ */
+vm_fault_return_t vm_fault_page_overwrite(dst_object, dst_offset, result_page)
+ register
+ vm_object_t dst_object;
+ vm_offset_t dst_offset;
+ vm_page_t *result_page; /* OUT */
+{
+ register
+ vm_page_t dst_page;
+
+#define interruptible FALSE /* XXX */
+
+ while (TRUE) {
+ /*
+ * Look for a page at this offset
+ */
+
+ while ((dst_page = vm_page_lookup(dst_object, dst_offset))
+ == VM_PAGE_NULL) {
+ /*
+ * No page, no problem... just allocate one.
+ */
+
+ dst_page = vm_page_alloc(dst_object, dst_offset);
+ if (dst_page == VM_PAGE_NULL) {
+ vm_object_unlock(dst_object);
+ VM_PAGE_WAIT((void (*)()) 0);
+ vm_object_lock(dst_object);
+ continue;
+ }
+
+ /*
+ * Pretend that the memory manager
+ * write-protected the page.
+ *
+ * Note that we will be asking for write
+ * permission without asking for the data
+ * first.
+ */
+
+ dst_page->overwriting = TRUE;
+ dst_page->page_lock = VM_PROT_WRITE;
+ dst_page->absent = TRUE;
+ dst_object->absent_count++;
+
+ break;
+
+ /*
+ * When we bail out, we might have to throw
+ * away the page created here.
+ */
+
+#define DISCARD_PAGE \
+ MACRO_BEGIN \
+ vm_object_lock(dst_object); \
+ dst_page = vm_page_lookup(dst_object, dst_offset); \
+ if ((dst_page != VM_PAGE_NULL) && dst_page->overwriting) \
+ VM_PAGE_FREE(dst_page); \
+ vm_object_unlock(dst_object); \
+ MACRO_END
+ }
+
+ /*
+ * If the page is write-protected...
+ */
+
+ if (dst_page->page_lock & VM_PROT_WRITE) {
+ /*
+ * ... and an unlock request hasn't been sent
+ */
+
+ if ( ! (dst_page->unlock_request & VM_PROT_WRITE)) {
+ vm_prot_t u;
+ kern_return_t rc;
+
+ /*
+ * ... then send one now.
+ */
+
+ if (!dst_object->pager_ready) {
+ vm_object_assert_wait(dst_object,
+ VM_OBJECT_EVENT_PAGER_READY,
+ interruptible);
+ vm_object_unlock(dst_object);
+ thread_block((void (*)()) 0);
+ if (current_thread()->wait_result !=
+ THREAD_AWAKENED) {
+ DISCARD_PAGE;
+ return(VM_FAULT_INTERRUPTED);
+ }
+ continue;
+ }
+
+ u = dst_page->unlock_request |= VM_PROT_WRITE;
+ vm_object_unlock(dst_object);
+
+ if ((rc = memory_object_data_unlock(
+ dst_object->pager,
+ dst_object->pager_request,
+ dst_offset + dst_object->paging_offset,
+ PAGE_SIZE,
+ u)) != KERN_SUCCESS) {
+ printf("vm_object_overwrite: memory_object_data_unlock failed\n");
+ DISCARD_PAGE;
+ return((rc == MACH_SEND_INTERRUPTED) ?
+ VM_FAULT_INTERRUPTED :
+ VM_FAULT_MEMORY_ERROR);
+ }
+ vm_object_lock(dst_object);
+ continue;
+ }
+
+ /* ... fall through to wait below */
+ } else {
+ /*
+ * If the page isn't being used for other
+ * purposes, then we're done.
+ */
+ if ( ! (dst_page->busy || dst_page->absent || dst_page->error) )
+ break;
+ }
+
+ PAGE_ASSERT_WAIT(dst_page, interruptible);
+ vm_object_unlock(dst_object);
+ thread_block((void (*)()) 0);
+ if (current_thread()->wait_result != THREAD_AWAKENED) {
+ DISCARD_PAGE;
+ return(VM_FAULT_INTERRUPTED);
+ }
+ }
+
+ *result_page = dst_page;
+ return(VM_FAULT_SUCCESS);
+
+#undef interruptible
+#undef DISCARD_PAGE
+}
+
+#endif notdef