/* * Mach Operating System * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University. * Copyright (c) 1993,1994 The University of Utah and * the Computer Systems Laboratory (CSL). * All rights reserved. * * Permission to use, copy, modify and distribute this software and its * documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF * THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie Mellon * the rights to redistribute these changes. */ /* * File: vm/vm_resident.c * Author: Avadis Tevanian, Jr., Michael Wayne Young * * Resident memory management module. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if MACH_VM_DEBUG #include #include #endif #if MACH_KDB #include #include #endif /* MACH_KDB */ /* * Associated with each page of user-allocatable memory is a * page structure. */ /* * These variables record the values returned by vm_page_bootstrap, * for debugging purposes. The implementation of pmap_steal_memory * and pmap_startup here also uses them internally. */ vm_offset_t virtual_space_start; vm_offset_t virtual_space_end; /* * Resident pages that represent real memory * are allocated from a free list. */ vm_page_t vm_page_queue_free; vm_page_t vm_page_queue_fictitious; decl_simple_lock_data(,vm_page_queue_free_lock) unsigned int vm_page_free_wanted; int vm_page_free_count; int vm_page_fictitious_count; int vm_page_external_count; /* * Occasionally, the virtual memory system uses * resident page structures that do not refer to * real pages, for example to leave a page with * important state information in the VP table. * * These page structures are allocated the way * most other kernel structures are. */ struct kmem_cache vm_page_cache; /* * Fictitious pages don't have a physical address, * but we must initialize phys_addr to something. * For debugging, this should be a strange value * that the pmap module can recognize in assertions. */ vm_offset_t vm_page_fictitious_addr = (vm_offset_t) -1; /* * Resident page structures are also chained on * queues that are used by the page replacement * system (pageout daemon). These queues are * defined here, but are shared by the pageout * module. */ queue_head_t vm_page_queue_active; queue_head_t vm_page_queue_inactive; struct lock vm_page_queue_lock; int vm_page_active_count; int vm_page_inactive_count; int vm_page_wire_count; /* * Several page replacement parameters are also * shared with this module, so that page allocation * (done here in vm_page_alloc) can trigger the * pageout daemon. */ int vm_page_free_target = 0; int vm_page_free_min = 0; int vm_page_inactive_target = 0; int vm_page_free_reserved = 0; int vm_page_laundry_count = 0; int vm_page_external_limit = 0; /* * The VM system has a couple of heuristics for deciding * that pages are "uninteresting" and should be placed * on the inactive queue as likely candidates for replacement. * These variables let the heuristics be controlled at run-time * to make experimentation easier. */ boolean_t vm_page_deactivate_behind = TRUE; boolean_t vm_page_deactivate_hint = TRUE; /* * vm_page_bootstrap: * * Initializes the resident memory module. * * Returns the range of available kernel virtual memory. */ void vm_page_bootstrap( vm_offset_t *startp, vm_offset_t *endp) { /* * Initialize the page queues. */ simple_lock_init(&vm_page_queue_free_lock); lock_init(&vm_page_queue_lock, FALSE); vm_page_queue_free = VM_PAGE_NULL; vm_page_queue_fictitious = VM_PAGE_NULL; queue_init(&vm_page_queue_active); queue_init(&vm_page_queue_inactive); vm_page_free_wanted = 0; /* * Machine-dependent code allocates the resident page table. * It uses vm_page_init to initialize the page frames. * The code also returns to us the virtual space available * to the kernel. We don't trust the pmap module * to get the alignment right. */ pmap_startup(&virtual_space_start, &virtual_space_end); virtual_space_start = round_page(virtual_space_start); virtual_space_end = trunc_page(virtual_space_end); *startp = virtual_space_start; *endp = virtual_space_end; /* printf("vm_page_bootstrap: %d free pages\n", vm_page_free_count);*/ } #ifndef MACHINE_PAGES void pmap_startup( vm_offset_t *startp, vm_offset_t *endp) { pmap_virtual_space(&virtual_space_start, &virtual_space_end); /* * The initial values must be aligned properly, and * we don't trust the pmap module to do it right. */ virtual_space_start = round_page(virtual_space_start); virtual_space_end = trunc_page(virtual_space_end); *startp = virtual_space_start; *endp = virtual_space_end; } #endif /* MACHINE_PAGES */ /* * Routine: vm_page_module_init * Purpose: * Second initialization pass, to be done after * the basic VM system is ready. */ void vm_page_module_init(void) { kmem_cache_init (&vm_page_cache, "vm_page", sizeof(struct vm_page), 0, NULL, 0); } /* * Routine: vm_page_create * Purpose: * After the VM system is up, machine-dependent code * may stumble across more physical memory. For example, * memory that it was reserving for a frame buffer. * vm_page_create turns this memory into available pages. */ void vm_page_create( vm_offset_t start, vm_offset_t end) { printf ("XXX: vm_page_create stubbed out\n"); return; vm_offset_t paddr; vm_page_t m; for (paddr = round_page(start); paddr < trunc_page(end); paddr += PAGE_SIZE) { m = (vm_page_t) kmem_cache_alloc(&vm_page_cache); if (m == VM_PAGE_NULL) panic("vm_page_create"); vm_page_init(m, paddr); vm_page_release(m, FALSE); } } static rdxtree_key_t offset_key(vm_offset_t offset) { return (rdxtree_key_t) atop(offset); } /* * vm_page_insert: [ internal use only ] * * Inserts the given mem entry into the object/object-page * table and object list. * * The object and page must be locked. */ void vm_page_insert( vm_page_t mem, vm_object_t object, vm_offset_t offset) { assert(have_vm_object_lock(object)); VM_PAGE_CHECK(mem); if (mem->tabled) panic("vm_page_insert"); /* * Record the object/offset pair in this page */ mem->object = object; mem->offset = offset; /* * Insert it into the objects radix tree. */ rdxtree_insert(&object->memt, offset_key(offset), mem); mem->tabled = TRUE; /* * Show that the object has one more resident page. */ object->resident_page_count++; assert(object->resident_page_count >= 0); if (object->can_persist && (object->ref_count == 0)) vm_object_cached_pages_update(1); /* * Detect sequential access and inactivate previous page. * We ignore busy pages. */ if (vm_page_deactivate_behind && (offset == object->last_alloc + PAGE_SIZE)) { vm_page_t last_mem; last_mem = vm_page_lookup(object, object->last_alloc); if ((last_mem != VM_PAGE_NULL) && !last_mem->busy) vm_page_deactivate(last_mem); } object->last_alloc = offset; } /* * vm_page_replace: * * Exactly like vm_page_insert, except that we first * remove any existing page at the given offset in object * and we don't do deactivate-behind. * * The object and page must be locked. */ void vm_page_replace( vm_page_t mem, vm_object_t object, vm_offset_t offset) { struct vm_page *old; void **slot; assert(have_vm_object_lock(object)); VM_PAGE_CHECK(mem); if (mem->tabled) panic("vm_page_replace"); /* * Record the object/offset pair in this page */ mem->object = object; mem->offset = offset; /* * Insert it into the objects radix tree, replacing any * page that might have been there. */ slot = rdxtree_lookup_slot(&object->memt, offset_key(offset)); old = rdxtree_replace_slot(slot, mem); if (old != VM_PAGE_NULL) { old->tabled = FALSE; object->resident_page_count--; if (object->can_persist && (object->ref_count == 0)) vm_object_cached_pages_update(-1); /* And free it. */ vm_page_free(old); } mem->tabled = TRUE; /* * And show that the object has one more resident * page. */ object->resident_page_count++; assert(object->resident_page_count >= 0); if (object->can_persist && (object->ref_count == 0)) vm_object_cached_pages_update(1); } /* * vm_page_remove: [ internal use only ] * * Removes the given mem entry from the object/offset-page * table and the object page list. * * The object and page must be locked. */ void vm_page_remove( vm_page_t mem) { assert(have_vm_object_lock(mem->object)); assert(mem->tabled); VM_PAGE_CHECK(mem); /* Remove from the objects radix tree. */ rdxtree_remove(&mem->object->memt, offset_key(mem->offset)); /* * And show that the object has one fewer resident * page. */ mem->object->resident_page_count--; mem->tabled = FALSE; if (mem->object->can_persist && (mem->object->ref_count == 0)) vm_object_cached_pages_update(-1); } /* * vm_page_lookup: * * Returns the page associated with the object/offset * pair specified; if none is found, VM_PAGE_NULL is returned. * * The object must be locked. No side effects. */ vm_page_t vm_page_lookup( vm_object_t object, vm_offset_t offset) { assert(have_vm_object_lock(object)); return rdxtree_lookup(&object->memt, offset_key(offset)); } /* * vm_page_rename: * * Move the given memory entry from its * current object to the specified target object/offset. * * The object must be locked. */ void vm_page_rename( vm_page_t mem, vm_object_t new_object, vm_offset_t new_offset) { assert(have_vm_object_lock(new_object)); /* * Changes to mem->object require the page lock because * the pageout daemon uses that lock to get the object. */ vm_page_lock_queues(); vm_page_remove(mem); vm_page_insert(mem, new_object, new_offset); vm_page_unlock_queues(); } /* * vm_page_grab_fictitious: * * Remove a fictitious page from the free list. * Returns VM_PAGE_NULL if there are no free pages. */ vm_page_t vm_page_grab_fictitious(void) { vm_page_t m; simple_lock(&vm_page_queue_free_lock); m = vm_page_queue_fictitious; if (m != VM_PAGE_NULL) { vm_page_fictitious_count--; vm_page_queue_fictitious = (vm_page_t) m->pageq.next; /* XXX is this re-initialization really needed ? */ vm_page_init(m, vm_page_fictitious_addr); m->fictitious = TRUE; } simple_unlock(&vm_page_queue_free_lock); return m; } /* * vm_page_release_fictitious: * * Release a fictitious page to the free list. */ void vm_page_release_fictitious( vm_page_t m) { assert(m->fictitious); assert(! m->tabled); simple_lock(&vm_page_queue_free_lock); m->pageq.next = (queue_entry_t) vm_page_queue_fictitious; vm_page_queue_fictitious = m; vm_page_fictitious_count++; simple_unlock(&vm_page_queue_free_lock); } /* * vm_page_more_fictitious: * * Add more fictitious pages to the free list. * Allowed to block. */ int vm_page_fictitious_quantum = 5; void vm_page_more_fictitious(void) { vm_page_t m; int i; for (i = 0; i < vm_page_fictitious_quantum; i++) { m = (vm_page_t) kmem_cache_alloc(&vm_page_cache); if (m == VM_PAGE_NULL) panic("vm_page_more_fictitious"); vm_page_init(m, vm_page_fictitious_addr); m->fictitious = TRUE; vm_page_release_fictitious(m); } } /* * vm_page_convert: * * Attempt to convert a fictitious page into a real page. */ boolean_t vm_page_convert( struct vm_page **mp, boolean_t external) { struct vm_page *real_m, *fict_m, *old; void **slot; fict_m = *mp; assert(fict_m->fictitious); assert(fict_m->phys_addr == vm_page_fictitious_addr); assert(! fict_m->active); assert(! fict_m->inactive); assert(have_vm_object_lock((*mp)->object)); real_m = vm_page_grab(external); if (real_m == VM_PAGE_NULL) return FALSE; memcpy(&real_m->vm_page_header, &fict_m->vm_page_header, sizeof *fict_m - VM_PAGE_HEADER_SIZE); real_m->fictitious = FALSE; fict_m->tabled = FALSE; /* Fix radix tree entry. */ /* XXX is the object locked? */ slot = rdxtree_lookup_slot(&fict_m->object->memt, offset_key(fict_m->offset)); old = rdxtree_replace_slot(slot, real_m); assert(old == fict_m); assert(real_m->phys_addr != vm_page_fictitious_addr); assert(fict_m->fictitious); assert(fict_m->phys_addr == vm_page_fictitious_addr); vm_page_release_fictitious(fict_m); *mp = real_m; return TRUE; } /* * vm_page_grab: * * Remove a page from the free list. * Returns VM_PAGE_NULL if the free list is too small. */ vm_page_t vm_page_grab( boolean_t external) { vm_page_t mem; simple_lock(&vm_page_queue_free_lock); /* * Only let privileged threads (involved in pageout) * dip into the reserved pool or exceed the limit * for externally-managed pages. */ if (((vm_page_free_count < vm_page_free_reserved) || (external && (vm_page_external_count > vm_page_external_limit))) && !current_thread()->vm_privilege) { simple_unlock(&vm_page_queue_free_lock); return VM_PAGE_NULL; } if (external) vm_page_external_count++; mem = vm_page_alloc_p(0, VM_PAGE_SEL_DIRECTMAP, VM_PAGE_OBJECT); if (! mem) { simple_unlock(&vm_page_queue_free_lock); return VM_PAGE_NULL; } vm_page_init_mach(mem); mem->extcounted = mem->external = external; simple_unlock(&vm_page_queue_free_lock); /* * Decide if we should poke the pageout daemon. * We do this if the free count is less than the low * water mark, or if the free count is less than the high * water mark (but above the low water mark) and the inactive * count is less than its target. * * We don't have the counts locked ... if they change a little, * it doesn't really matter. */ if ((vm_page_free_count < vm_page_free_min) || ((vm_page_free_count < vm_page_free_target) && (vm_page_inactive_count < vm_page_inactive_target))) thread_wakeup((event_t) &vm_page_free_wanted); return mem; } vm_offset_t vm_page_grab_phys_addr(void) { vm_page_t p = vm_page_grab(FALSE); if (p == VM_PAGE_NULL) return -1; else return p->phys_addr; } /* * vm_page_release: * * Return a page to the free list. */ void vm_page_release( vm_page_t mem, boolean_t external) { simple_lock(&vm_page_queue_free_lock); vm_page_free_p(mem, 0); if (external) vm_page_external_count--; /* * Check if we should wake up someone waiting for page. * But don't bother waking them unless they can allocate. * * We wakeup only one thread, to prevent starvation. * Because the scheduling system handles wait queues FIFO, * if we wakeup all waiting threads, one greedy thread * can starve multiple niceguy threads. When the threads * all wakeup, the greedy threads runs first, grabs the page, * and waits for another page. It will be the first to run * when the next page is freed. * * However, there is a slight danger here. * The thread we wake might not use the free page. * Then the other threads could wait indefinitely * while the page goes unused. To forestall this, * the pageout daemon will keep making free pages * as long as vm_page_free_wanted is non-zero. */ if ((vm_page_free_wanted > 0) && (vm_page_free_count >= vm_page_free_reserved)) { vm_page_free_wanted--; thread_wakeup_one((event_t) &vm_page_free_count); } simple_unlock(&vm_page_queue_free_lock); } /* * vm_page_wait: * * Wait for a page to become available. * If there are plenty of free pages, then we don't sleep. */ void vm_page_wait( void (*continuation)(void)) { /* * We can't use vm_page_free_reserved to make this * determination. Consider: some thread might * need to allocate two pages. The first allocation * succeeds, the second fails. After the first page is freed, * a call to vm_page_wait must really block. */ simple_lock(&vm_page_queue_free_lock); if ((vm_page_free_count < vm_page_free_target) || (vm_page_external_count > vm_page_external_limit)) { if (vm_page_free_wanted++ == 0) thread_wakeup((event_t)&vm_page_free_wanted); assert_wait((event_t)&vm_page_free_count, FALSE); simple_unlock(&vm_page_queue_free_lock); if (continuation != 0) { counter(c_vm_page_wait_block_user++); thread_block(continuation); } else { counter(c_vm_page_wait_block_kernel++); thread_block((void (*)(void)) 0); } } else simple_unlock(&vm_page_queue_free_lock); } /* * vm_page_alloc: * * Allocate and return a memory cell associated * with this VM object/offset pair. * * Object must be locked. */ vm_page_t vm_page_alloc( vm_object_t object, vm_offset_t offset) { vm_page_t mem; assert(have_vm_object_lock(object)); mem = vm_page_grab(!object->internal); if (mem == VM_PAGE_NULL) return VM_PAGE_NULL; vm_page_lock_queues(); vm_page_insert(mem, object, offset); vm_page_unlock_queues(); return mem; } /* * vm_page_free: * * Returns the given page to the free list, * disassociating it with any VM object. * * Object and page queues must be locked prior to entry. */ void vm_page_free( vm_page_t mem) { assert(have_vm_object_lock(mem->object)); assert(have_vm_page_queue_lock()); if (mem->tabled) vm_page_remove(mem); VM_PAGE_QUEUES_REMOVE(mem); if (mem->wire_count != 0) { if (!mem->private && !mem->fictitious) vm_page_wire_count--; mem->wire_count = 0; } if (mem->laundry) { vm_page_laundry_count--; mem->laundry = FALSE; } PAGE_WAKEUP_DONE(mem); if (mem->absent) vm_object_absent_release(mem->object); if (mem->private || mem->fictitious) { vm_page_release_fictitious(mem); } else { int external = mem->external && mem->extcounted; vm_page_release(mem, external); } } /* * vm_page_wire: * * Mark this page as wired down by yet * another map, removing it from paging queues * as necessary. * * The page's object and the page queues must be locked. */ void vm_page_wire( vm_page_t mem) { assert(have_vm_object_lock(mem->object)); assert(have_vm_page_queue_lock()); VM_PAGE_CHECK(mem); if (mem->wire_count == 0) { VM_PAGE_QUEUES_REMOVE(mem); if (!mem->private && !mem->fictitious) vm_page_wire_count++; } mem->wire_count++; } /* * vm_page_unwire: * * Release one wiring of this page, potentially * enabling it to be paged again. * * The page's object and the page queues must be locked. */ void vm_page_unwire( vm_page_t mem) { assert(have_vm_object_lock(mem->object)); assert(have_vm_page_queue_lock()); VM_PAGE_CHECK(mem); if (--mem->wire_count == 0) { queue_enter(&vm_page_queue_active, mem, vm_page_t, pageq); vm_page_active_count++; mem->active = TRUE; if (!mem->private && !mem->fictitious) vm_page_wire_count--; } } /* * vm_page_deactivate: * * Returns the given page to the inactive list, * indicating that no physical maps have access * to this page. [Used by the physical mapping system.] * * The page queues must be locked. */ void vm_page_deactivate( vm_page_t m) { assert(have_vm_page_queue_lock()); VM_PAGE_CHECK(m); /* * This page is no longer very interesting. If it was * interesting (active or inactive/referenced), then we * clear the reference bit and (re)enter it in the * inactive queue. Note wired pages should not have * their reference bit cleared. */ if (m->active || (m->inactive && m->reference)) { if (!m->fictitious && !m->absent) pmap_clear_reference(m->phys_addr); m->reference = FALSE; VM_PAGE_QUEUES_REMOVE(m); } if (m->wire_count == 0 && !m->inactive) { queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq); m->inactive = TRUE; vm_page_inactive_count++; } } /* * vm_page_activate: * * Put the specified page on the active list (if appropriate). * * The page queues must be locked. */ void vm_page_activate( vm_page_t m) { assert(have_vm_page_queue_lock()); VM_PAGE_CHECK(m); if (m->inactive) { queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq); vm_page_inactive_count--; m->inactive = FALSE; } if (m->wire_count == 0) { if (m->active) panic("vm_page_activate: already active"); queue_enter(&vm_page_queue_active, m, vm_page_t, pageq); m->active = TRUE; vm_page_active_count++; } } /* * vm_page_zero_fill: * * Zero-fill the specified page. */ void vm_page_zero_fill( vm_page_t m) { VM_PAGE_CHECK(m); pmap_zero_page(m->phys_addr); } /* * vm_page_copy: * * Copy one page to another */ void vm_page_copy( vm_page_t src_m, vm_page_t dest_m) { VM_PAGE_CHECK(src_m); VM_PAGE_CHECK(dest_m); pmap_copy_page(src_m->phys_addr, dest_m->phys_addr); } #if MACH_KDB #define printf kdbprintf /* * Routine: vm_page_print [exported] */ void vm_page_print(p) const vm_page_t p; { iprintf("Page 0x%X: object 0x%X,", (vm_offset_t) p, (vm_offset_t) p->object); printf(" offset 0x%X", p->offset); printf("wire_count %d,", p->wire_count); printf(" %s", (p->active ? "active" : (p->inactive ? "inactive" : "loose"))); printf("%s ", (p->laundry ? " laundry" : "")); printf("%s", (p->dirty ? "dirty" : "clean")); printf("%s", (p->busy ? " busy" : "")); printf("%s", (p->absent ? " absent" : "")); printf("%s", (p->error ? " error" : "")); printf("%s", (p->fictitious ? " fictitious" : "")); printf("%s", (p->private ? " private" : "")); printf("%s", (p->wanted ? " wanted" : "")); printf("%s,", (p->tabled ? "" : "not_tabled")); printf("phys_addr = 0x%X, lock = 0x%X, unlock_request = 0x%X\n", p->phys_addr, (vm_offset_t) p->page_lock, (vm_offset_t) p->unlock_request); } #endif /* MACH_KDB */