diff options
-rw-r--r-- | Makefile.am | 2 | ||||
-rw-r--r-- | i386/Makefrag.am | 2 | ||||
-rw-r--r-- | i386/i386/db_trace.c | 4 | ||||
-rw-r--r-- | i386/i386/locore.S | 14 | ||||
-rw-r--r-- | i386/i386/vm_param.h | 61 | ||||
-rw-r--r-- | i386/i386at/biosmem.c | 71 | ||||
-rw-r--r-- | i386/i386at/biosmem.h | 4 | ||||
-rw-r--r-- | i386/i386at/model_dep.c | 308 | ||||
-rw-r--r-- | i386/i386at/model_dep.h | 14 | ||||
-rw-r--r-- | i386/include/mach/i386/vm_types.h | 9 | ||||
-rw-r--r-- | i386/intel/pmap.c | 45 | ||||
-rw-r--r-- | i386/ldscript | 9 | ||||
-rw-r--r-- | i386/x15/boot.h | 127 | ||||
-rw-r--r-- | i386/x15/elf.h | 61 | ||||
-rw-r--r-- | i386/x15/multiboot.h | 111 | ||||
-rw-r--r-- | i386/x15/param.h | 185 | ||||
-rw-r--r-- | include/mach_debug/mach_debug.defs | 13 | ||||
-rw-r--r-- | kern/bootstrap.c | 7 | ||||
-rw-r--r-- | kern/limits.h | 29 | ||||
-rw-r--r-- | kern/log2.h | 45 | ||||
-rw-r--r-- | kern/startup.c | 2 | ||||
-rw-r--r-- | kern/stdint.h | 30 | ||||
-rw-r--r-- | kern/thread.c | 14 | ||||
-rw-r--r-- | kern/thread.h | 2 | ||||
-rw-r--r-- | vm/pmap.h | 17 | ||||
-rw-r--r-- | vm/vm_debug.c | 85 | ||||
-rw-r--r-- | vm/vm_fault.c | 4 | ||||
-rw-r--r-- | vm/vm_init.c | 2 | ||||
-rw-r--r-- | vm/vm_object.c | 102 | ||||
-rw-r--r-- | vm/vm_object.h | 4 | ||||
-rw-r--r-- | vm/vm_page.c | 152 | ||||
-rw-r--r-- | vm/vm_page.h | 285 | ||||
-rw-r--r-- | vm/vm_resident.c | 722 |
33 files changed, 1322 insertions, 1220 deletions
diff --git a/Makefile.am b/Makefile.am index 76a192b..ad36b41 100644 --- a/Makefile.am +++ b/Makefile.am @@ -162,7 +162,7 @@ clib_routines := memcmp memcpy memmove \ udivdi3 __udivdi3 __umoddi3 \ __rel_iplt_start __rel_iplt_end \ __ffsdi2 \ - _START _start etext _edata end _end # actually ld magic, not libc. + _boot _START _start etext _edata end _end # actually ld magic, not libc. gnumach-undef: gnumach.$(OBJEXT) $(NM_V) $(NM) -u $< | sed 's/ *U *//' | sort -u > $@ MOSTLYCLEANFILES += gnumach-undef diff --git a/i386/Makefrag.am b/i386/Makefrag.am index 4dd6a9f..215318d 100644 --- a/i386/Makefrag.am +++ b/i386/Makefrag.am @@ -29,6 +29,8 @@ libkernel_a_SOURCES += \ if PLATFORM_at libkernel_a_SOURCES += \ + i386/i386at/biosmem.c \ + i386/i386at/biosmem.h \ i386/i386at/boothdr.S \ i386/i386at/com.c \ i386/i386at/com.h \ diff --git a/i386/i386/db_trace.c b/i386/i386/db_trace.c index ec33859..c8789e7 100644 --- a/i386/i386/db_trace.c +++ b/i386/i386/db_trace.c @@ -37,6 +37,7 @@ #include <machine/machspl.h> #include <machine/db_interface.h> #include <machine/db_trace.h> +#include <i386at/model_dep.h> #include <ddb/db_access.h> #include <ddb/db_command.h> @@ -129,7 +130,6 @@ db_i386_reg_value( long *dp = 0; db_expr_t null_reg = 0; thread_t thread = ap->thread; - extern unsigned int_stack_high; if (db_option(ap->modif, 'u')) { if (thread == THREAD_NULL) { @@ -139,7 +139,7 @@ db_i386_reg_value( if (thread == current_thread()) { if (ddb_regs.cs & 0x3) dp = vp->valuep; - else if (ddb_regs.ebp < int_stack_high) + else if (ON_INT_STACK(ddb_regs.ebp)) db_error("cannot get/set user registers in nested interrupt\n"); } } else { diff --git a/i386/i386/locore.S b/i386/i386/locore.S index cfda86f..3ff0044 100644 --- a/i386/i386/locore.S +++ b/i386/i386/locore.S @@ -542,8 +542,10 @@ trap_from_kernel: #if MACH_KDB || MACH_TTD movl %esp,%ebx /* save current stack */ - cmpl EXT(int_stack_high),%esp /* on an interrupt stack? */ - jb 1f /* OK if so */ + movl %esp,%edx /* on an interrupt stack? */ + and $(~(KERNEL_STACK_SIZE-1)),%edx + cmpl EXT(int_stack_base),%edx + je 1f /* OK if so */ CPU_NUMBER(%edx) /* get CPU number */ cmpl CX(EXT(kernel_stack),%edx),%esp @@ -647,8 +649,10 @@ ENTRY(all_intrs) pushl %edx cld /* clear direction flag */ - cmpl %ss:EXT(int_stack_high),%esp /* on an interrupt stack? */ - jb int_from_intstack /* if not: */ + movl %esp,%edx /* on an interrupt stack? */ + and $(~(KERNEL_STACK_SIZE-1)),%edx + cmpl %ss:EXT(int_stack_base),%edx + je int_from_intstack /* if not: */ pushl %ds /* save segment registers */ pushl %es @@ -707,7 +711,7 @@ LEXT(return_to_iret) /* ( label for kdb_kintr and hardclock) */ iret /* return to caller */ int_from_intstack: - cmpl $EXT(_intstack),%esp /* seemingly looping? */ + cmpl $EXT(int_stack_base),%esp /* seemingly looping? */ jb stack_overflowed /* if not: */ call EXT(interrupt) /* call interrupt routine */ _return_to_iret_i: /* ( label for kdb_kintr) */ diff --git a/i386/i386/vm_param.h b/i386/i386/vm_param.h index ffd91d6..16f9119 100644 --- a/i386/i386/vm_param.h +++ b/i386/i386/vm_param.h @@ -25,6 +25,7 @@ /* XXX use xu/vm_param.h */ #include <mach/vm_param.h> +#include <kern/macros.h> #ifdef MACH_PV_PAGETABLES #include <xen/public/xen.h> #endif @@ -54,19 +55,65 @@ #define VM_MAX_KERNEL_ADDRESS (LINEAR_MAX_KERNEL_ADDRESS - LINEAR_MIN_KERNEL_ADDRESS + VM_MIN_KERNEL_ADDRESS) #endif /* MACH_PV_PAGETABLES */ -/* Reserve mapping room for kmem. */ -#ifdef MACH_XEN -#define VM_KERNEL_MAP_SIZE (224 * 1024 * 1024) -#else -#define VM_KERNEL_MAP_SIZE (192 * 1024 * 1024) -#endif - /* The kernel virtual address space is actually located at high linear addresses. This is the kernel address range in linear addresses. */ #define LINEAR_MIN_KERNEL_ADDRESS (VM_MAX_ADDRESS) #define LINEAR_MAX_KERNEL_ADDRESS (0xffffffffUL) +/* + * Direct physical mapping boundaries. + */ +#ifdef __LP64__ +#define VM_MIN_DIRECTMAP_ADDRESS VM_MIN_KERNEL_ADDRESS +#define VM_MAX_DIRECTMAP_ADDRESS DECL_CONST(0xffffc00000000000, UL) +#else /* __LP64__ */ +#define VM_MIN_DIRECTMAP_ADDRESS VM_MAX_ADDRESS +#define VM_MAX_DIRECTMAP_ADDRESS DECL_CONST(0xf8000000, UL) +#endif /* __LP64__ */ + +/* Reserve mapping room for virtual kernel memory, mainly used for IPC + and temporary mappings. */ +#define VM_KERNEL_MAP_SIZE (128 * 1024 * 1024) + +/* + * Physical memory properties. + */ + +#define VM_PAGE_DMA_LIMIT DECL_CONST(0x1000000, UL) + +#ifdef __LP64__ +#define VM_PAGE_MAX_SEGS 4 +#define VM_PAGE_DMA32_LIMIT DECL_CONST(0x100000000, UL) +#define VM_PAGE_DIRECTMAP_LIMIT DECL_CONST(0x400000000000, UL) +#define VM_PAGE_HIGHMEM_LIMIT DECL_CONST(0x10000000000000, UL) +#else /* __LP64__ */ +#define VM_PAGE_DIRECTMAP_LIMIT DECL_CONST(0x38000000, ULL) +#ifdef X86_PAE +#define VM_PAGE_MAX_SEGS 3 +#define VM_PAGE_HIGHMEM_LIMIT DECL_CONST(0x10000000000000, ULL) +#else /* X86_PAE */ +#define VM_PAGE_MAX_SEGS 3 +#define VM_PAGE_HIGHMEM_LIMIT DECL_CONST(0xfffff000, UL) +#endif /* X86_PAE */ +#endif /* __LP64__ */ + +/* + * Physical segment indexes. + */ +#define VM_PAGE_SEG_DMA 0 + +#ifdef __LP64__ +#define VM_PAGE_SEG_DMA32 1 +#define VM_PAGE_SEG_DIRECTMAP 2 +#define VM_PAGE_SEG_HIGHMEM 3 +#else /* __LP64__ */ +#define VM_PAGE_SEG_DMA32 1 /* Alias for the DIRECTMAP segment */ +#define VM_PAGE_SEG_DIRECTMAP 1 +#define VM_PAGE_SEG_HIGHMEM 2 +#endif /* __LP64__ */ + + #ifdef MACH_PV_PAGETABLES /* need room for mmu updates (2*8bytes) */ #define KERNEL_STACK_SIZE (4*I386_PGBYTES) diff --git a/i386/i386at/biosmem.c b/i386/i386at/biosmem.c index bdffc33..01dcd79 100644 --- a/i386/i386at/biosmem.c +++ b/i386/i386at/biosmem.c @@ -16,23 +16,33 @@ */ #include <kern/assert.h> -#include <kern/init.h> #include <kern/macros.h> -#include <kern/panic.h> -#include <kern/param.h> -#include <kern/printk.h> -#include <kern/stddef.h> -#include <kern/stdint.h> -#include <kern/string.h> -#include <kern/types.h> -#include <machine/biosmem.h> -#include <machine/boot.h> -#include <machine/cpu.h> -#include <machine/elf.h> -#include <machine/multiboot.h> -#include <vm/vm_kmem.h> +#include <kern/debug.h> +#include <kern/printf.h> +#include <stddef.h> +#include <string.h> +#include <sys/types.h> #include <vm/vm_page.h> +#include "biosmem.h" +#include "x15/elf.h" +#include "x15/multiboot.h" + +/* Mach glue. */ +#define __bootdata /* nothing */ +#define __boot /* nothing */ +#define __init /* nothing */ +#define boot_memmove memmove +#define boot_memset(P,C,S) memset((char *) phystokv(P), C, S) +#define boot_strlen(P) strlen((char *) phystokv(P)) +#define boot_panic panic +#define printk printf +#define BOOT_VTOP(addr) _kvtophys(addr) + +/* XXX */ +extern char _boot; +extern char _end; + /* * Maximum number of entries in the BIOS memory map. * @@ -115,7 +125,7 @@ biosmem_map_build(const struct multiboot_raw_info *mbi) struct biosmem_map_entry *start, *entry, *end; unsigned long addr; - addr = mbi->mmap_addr; + addr = phystokv(mbi->mmap_addr); mb_entry = (struct multiboot_raw_mmap_entry *)addr; mb_end = (struct multiboot_raw_mmap_entry *)(addr + mbi->mmap_length); start = biosmem_map; @@ -373,16 +383,16 @@ biosmem_save_cmdline_sizes(struct multiboot_raw_info *mbi) uint32_t i; if (mbi->flags & MULTIBOOT_LOADER_CMDLINE) - mbi->unused0 = boot_strlen((char *)(unsigned long)mbi->cmdline) + 1; + mbi->unused0 = boot_strlen((unsigned long)mbi->cmdline) + 1; if (mbi->flags & MULTIBOOT_LOADER_MODULES) { unsigned long addr; - addr = mbi->mods_addr; + addr = phystokv(mbi->mods_addr); for (i = 0; i < mbi->mods_count; i++) { mod = (struct multiboot_raw_module *)addr + i; - mod->reserved = boot_strlen((char *)(unsigned long)mod->string) + 1; + mod->reserved = boot_strlen((unsigned long)mod->string) + 1; } } } @@ -391,6 +401,8 @@ static void __boot biosmem_find_boot_data_update(uint32_t min, uint32_t *start, uint32_t *end, uint32_t data_start, uint32_t data_end) { + assert (data_start < data_end); + if ((min <= data_start) && (data_start < *start)) { *start = data_start; *end = data_end; @@ -419,21 +431,20 @@ biosmem_find_boot_data(const struct multiboot_raw_info *mbi, uint32_t min, struct elf_shdr *shdr; uint32_t i, start, end = end; unsigned long tmp; - start = max; - biosmem_find_boot_data_update(min, &start, &end, (unsigned long)&_boot, + biosmem_find_boot_data_update(min, &start, &end, + BOOT_VTOP((unsigned long)&_boot), BOOT_VTOP((unsigned long)&_end)); if ((mbi->flags & MULTIBOOT_LOADER_CMDLINE) && (mbi->cmdline != 0)) biosmem_find_boot_data_update(min, &start, &end, mbi->cmdline, mbi->cmdline + mbi->unused0); - if (mbi->flags & MULTIBOOT_LOADER_MODULES) { i = mbi->mods_count * sizeof(struct multiboot_raw_module); biosmem_find_boot_data_update(min, &start, &end, mbi->mods_addr, mbi->mods_addr + i); - tmp = mbi->mods_addr; + tmp = phystokv(mbi->mods_addr); for (i = 0; i < mbi->mods_count; i++) { mod = (struct multiboot_raw_module *)tmp + i; @@ -450,7 +461,7 @@ biosmem_find_boot_data(const struct multiboot_raw_info *mbi, uint32_t min, tmp = mbi->shdr_num * mbi->shdr_size; biosmem_find_boot_data_update(min, &start, &end, mbi->shdr_addr, mbi->shdr_addr + tmp); - tmp = mbi->shdr_addr; + tmp = phystokv(mbi->shdr_addr); for (i = 0; i < mbi->shdr_num; i++) { shdr = (struct elf_shdr *)(tmp + (i * mbi->shdr_size)); @@ -458,7 +469,6 @@ biosmem_find_boot_data(const struct multiboot_raw_info *mbi, uint32_t min, if ((shdr->type != ELF_SHT_SYMTAB) && (shdr->type != ELF_SHT_STRTAB)) continue; - biosmem_find_boot_data_update(min, &start, &end, shdr->addr, shdr->addr + shdr->size); } @@ -516,6 +526,10 @@ biosmem_setup_allocator(struct multiboot_raw_info *mbi) biosmem_heap_start = max_heap_start; biosmem_heap_end = max_heap_end; biosmem_heap_cur = biosmem_heap_end; + + /* Mach pmap glue. */ + extern vm_offset_t phys_last_addr; + phys_last_addr = (vm_offset_t) max_heap_end; } void __boot @@ -596,7 +610,7 @@ biosmem_bootalloc(unsigned int nr_pages) boot_panic(biosmem_panic_nomem_msg); biosmem_heap_cur = addr; - return boot_memset((void *)addr, 0, size); + return boot_memset(addr, 0, size); } phys_addr_t __boot @@ -688,10 +702,15 @@ biosmem_setup(void) biosmem_map_show(); +#if notyet cpu = cpu_current(); max_phys_end = (cpu->phys_addr_width == 0) ? (uint64_t)-1 : (uint64_t)1 << cpu->phys_addr_width; +#else + max_phys_end = (uint64_t)1 << 32; + (void) cpu; +#endif for (i = 0; i < ARRAY_SIZE(biosmem_segments); i++) { if (biosmem_segment_size(i) == 0) @@ -715,7 +734,7 @@ biosmem_free_usable_range(phys_addr_t start, phys_addr_t end) (unsigned long long)((end - start) >> 10)); while (start < end) { - page = vm_page_lookup(start); + page = vm_page_lookup_pa(start); assert(page != NULL); vm_page_manage(page); start += PAGE_SIZE; diff --git a/i386/i386at/biosmem.h b/i386/i386at/biosmem.h index b32e027..c4b59f5 100644 --- a/i386/i386at/biosmem.h +++ b/i386/i386at/biosmem.h @@ -18,8 +18,8 @@ #ifndef _X86_BIOSMEM_H #define _X86_BIOSMEM_H -#include <kern/types.h> -#include <machine/multiboot.h> +#include <sys/types.h> +#include "x15/multiboot.h" /* * Address where the address of the Extended BIOS Data Area segment can be diff --git a/i386/i386at/model_dep.c b/i386/i386at/model_dep.c index fdf983b..c831549 100644 --- a/i386/i386at/model_dep.c +++ b/i386/i386at/model_dep.c @@ -49,6 +49,7 @@ #include <kern/mach_clock.h> #include <kern/printf.h> #include <kern/startup.h> +#include <kern/thread.h> #include <sys/time.h> #include <sys/types.h> #include <vm/vm_page.h> @@ -70,6 +71,12 @@ #include <i386at/rtc.h> #include <i386at/model_dep.h> #include <i386at/acpihalt.h> +#define multiboot_module x15_multiboot_module +#define multiboot_info x15_multiboot_info +#include <i386/x15/multiboot.h> +#include <i386at/biosmem.h> +#undef multiboot_module +#undef multiboot_info #ifdef MACH_XEN #include <xen/console.h> #include <xen/store.h> @@ -125,27 +132,13 @@ struct multiboot_info boot_info; /* Command line supplied to kernel. */ char *kernel_cmdline = ""; -/* This is used for memory initialization: - it gets bumped up through physical memory - that exists and is not occupied by boot gunk. - It is not necessarily page-aligned. */ -static vm_offset_t avail_next -#ifndef MACH_HYP - = 0x1000 /* XX end of BIOS data area */ -#endif /* MACH_HYP */ - ; - -/* Possibly overestimated amount of available memory - still remaining to be handed to the VM system. */ -static vm_size_t avail_remaining; - extern char version[]; /* If set, reboot the system on ctrl-alt-delete. */ boolean_t rebootflag = FALSE; /* exported to kdintr */ -/* XX interrupt stack pointer and highwater mark, for locore.S. */ -vm_offset_t int_stack_top, int_stack_high; +/* Interrupt stack. */ +vm_offset_t int_stack_top, int_stack_base; #ifdef LINUX_DEV extern void linux_init(void); @@ -273,7 +266,8 @@ void db_reset_cpu(void) halt_all_cpus(1); } - +#if 0 +/* XXX: Port XEN bits to biosmem. */ /* * Compute physical memory size and other parameters. */ @@ -357,6 +351,7 @@ mem_size_init(void) - 0x1000); #endif /* MACH_HYP */ } +#endif /* 0 */ /* * Basic PC VM initialization. @@ -368,7 +363,7 @@ i386at_init(void) /* XXX move to intel/pmap.h */ extern pt_entry_t *kernel_page_dir; int nb_direct, i; - vm_offset_t addr, delta; + vm_offset_t delta; /* * Initialize the PIC prior to any possible call to an spl. @@ -382,44 +377,8 @@ i386at_init(void) /* * Find memory size parameters. */ - mem_size_init(); - -#ifdef MACH_XEN - kernel_cmdline = (char*) boot_info.cmd_line; -#else /* MACH_XEN */ - /* Copy content pointed by boot_info before losing access to it when it - * is too far in physical memory. */ - if (boot_info.flags & MULTIBOOT_CMDLINE) { - int len = strlen ((char*)phystokv(boot_info.cmdline)) + 1; - assert(init_alloc_aligned(round_page(len), &addr)); - kernel_cmdline = (char*) phystokv(addr); - memcpy(kernel_cmdline, (void *)phystokv(boot_info.cmdline), len); - boot_info.cmdline = addr; - } - - if (boot_info.flags & MULTIBOOT_MODS) { - struct multiboot_module *m; - int i; - - assert(init_alloc_aligned(round_page(boot_info.mods_count * sizeof(*m)), &addr)); - m = (void*) phystokv(addr); - memcpy(m, (void*) phystokv(boot_info.mods_addr), boot_info.mods_count * sizeof(*m)); - boot_info.mods_addr = addr; - - for (i = 0; i < boot_info.mods_count; i++) { - vm_size_t size = m[i].mod_end - m[i].mod_start; - assert(init_alloc_aligned(round_page(size), &addr)); - memcpy((void*) phystokv(addr), (void*) phystokv(m[i].mod_start), size); - m[i].mod_start = addr; - m[i].mod_end = addr + size; - - size = strlen((char*) phystokv(m[i].string)) + 1; - assert(init_alloc_aligned(round_page(size), &addr)); - memcpy((void*) phystokv(addr), (void*) phystokv(m[i].string), size); - m[i].string = addr; - } - } -#endif /* MACH_XEN */ + biosmem_bootstrap((struct multiboot_raw_info *) &boot_info); + biosmem_setup(); /* * Initialize kernel physical map, mapping the @@ -483,10 +442,39 @@ i386at_init(void) pmap_clear_bootstrap_pagetable((void *)boot_info.pt_base); #endif /* MACH_PV_PAGETABLES */ - /* Interrupt stacks are allocated in physical memory, - while kernel stacks are allocated in kernel virtual memory, - so phys_last_addr serves as a convenient dividing point. */ - int_stack_high = phystokv(phys_last_addr); + /* Initialize physical memory management. */ + vm_page_setup(); + vm_page_info(); + + /* Initialize the slab allocator. */ + slab_bootstrap(); + slab_init(); + + { +#ifdef MACH_XEN + kernel_cmdline = (char*) boot_info.cmd_line; +#else /* MACH_XEN */ + /* Copy content pointed by boot_info before losing + * access to it when it is too far in physical + * memory. */ + if (boot_info.flags & MULTIBOOT_CMDLINE) { + size_t len = strlen((const char *) + phystokv(boot_info.cmdline)); + kernel_cmdline = (char *) kalloc(len); + memcpy(kernel_cmdline, + (const void *) phystokv(boot_info.cmdline), + len + 1); + } +#endif + } + + /* Bootstrap the thread module so that we can allocate an + interrupt stack. */ + thread_bootstrap(); + + /* Allocate interrupt stack. */ + int_stack_base = thread_bootstrap_stack_alloc(); + int_stack_top = int_stack_base + KERNEL_STACK_SIZE - 4; /* * Initialize and activate the real i386 protected-mode structures. @@ -532,11 +520,6 @@ i386at_init(void) #ifdef MACH_XEN hyp_p2m_init(); #endif /* MACH_XEN */ - - /* XXX We'll just use the initialization stack we're already running on - as the interrupt stack for now. Later this will have to change, - because the init stack will get freed after bootup. */ - asm("movl %%esp,%0" : "=m" (int_stack_top)); } /* @@ -702,201 +685,6 @@ resettodr(void) writetodc(); } -unsigned int pmap_free_pages(void) -{ - return atop(avail_remaining); -} - -/* Always returns page-aligned regions. */ -boolean_t -init_alloc_aligned(vm_size_t size, vm_offset_t *addrp) -{ - vm_offset_t addr; - -#ifdef MACH_HYP - /* There is none */ - if (!avail_next) - avail_next = _kvtophys(boot_info.pt_base) + (boot_info.nr_pt_frames + 3) * 0x1000; -#else /* MACH_HYP */ - extern char start[], end[]; - int i; - static int wrapped = 0; - - /* Memory regions to skip. */ - vm_offset_t cmdline_start_pa = boot_info.flags & MULTIBOOT_CMDLINE - ? boot_info.cmdline : 0; - vm_offset_t cmdline_end_pa = cmdline_start_pa - ? cmdline_start_pa+strlen((char*)phystokv(cmdline_start_pa))+1 - : 0; - vm_offset_t mods_start_pa = boot_info.flags & MULTIBOOT_MODS - ? boot_info.mods_addr : 0; - vm_offset_t mods_end_pa = mods_start_pa - ? mods_start_pa - + boot_info.mods_count * sizeof(struct multiboot_module) - : 0; - - retry: -#endif /* MACH_HYP */ - - /* Page-align the start address. */ - avail_next = round_page(avail_next); - -#ifndef MACH_HYP - /* Start with memory above 16MB, reserving the low memory for later. */ - /* Don't care on Xen */ - if (!wrapped && phys_last_addr > 16 * 1024*1024) - { - if (avail_next < 16 * 1024*1024) - avail_next = 16 * 1024*1024; - else if (avail_next == phys_last_addr) - { - /* We have used all the memory above 16MB, so now start on - the low memory. This will wind up at the end of the list - of free pages, so it should not have been allocated to any - other use in early initialization before the Linux driver - glue initialization needs to allocate low memory. */ - avail_next = 0x1000; - wrapped = 1; - } - } -#endif /* MACH_HYP */ - - /* Check if we have reached the end of memory. */ - if (avail_next == - ( -#ifndef MACH_HYP - wrapped ? 16 * 1024*1024 : -#endif /* MACH_HYP */ - phys_last_addr)) - return FALSE; - - /* Tentatively assign the current location to the caller. */ - addr = avail_next; - - /* Bump the pointer past the newly allocated region - and see where that puts us. */ - avail_next += size; - -#ifndef MACH_HYP - /* Skip past the I/O and ROM area. */ - if (boot_info.flags & MULTIBOOT_MEM_MAP) - { - struct multiboot_mmap *map, *map_end, *current = NULL, *next = NULL; - unsigned long long minimum_next = ~0ULL; - - map = (void*) phystokv(boot_info.mmap_addr); - map_end = (void*) map + boot_info.mmap_count; - - /* Find both our current map, and the next one */ - while (map + 1 <= map_end) - { - if (map->Type == MB_ARD_MEMORY) - { - unsigned long long start = map->BaseAddr; - unsigned long long end = start + map->Length;; - - if (start <= addr && avail_next <= end) - { - /* Ok, fits in the current map */ - current = map; - break; - } - else if (avail_next <= start && start < minimum_next) - { - /* This map is not far from avail_next */ - next = map; - minimum_next = start; - } - } - map = (void*) map + map->size + sizeof(map->size); - } - - if (!current) { - /* Area does not fit in the current map, switch to next - * map if any */ - if (!next || next->BaseAddr >= phys_last_addr) - { - /* No further reachable map, we have reached - * the end of memory, but possibly wrap around - * 16MiB. */ - avail_next = phys_last_addr; - goto retry; - } - - /* Start from next map */ - avail_next = next->BaseAddr; - goto retry; - } - } - else if ((avail_next > (boot_info.mem_lower * 0x400)) && (addr < 0x100000)) - { - avail_next = 0x100000; - goto retry; - } - - /* Skip our own kernel code, data, and bss. */ - if ((phystokv(avail_next) > (vm_offset_t)start) && (phystokv(addr) < (vm_offset_t)end)) - { - avail_next = _kvtophys(end); - goto retry; - } - - /* Skip any areas occupied by valuable boot_info data. */ - if ((avail_next > cmdline_start_pa) && (addr < cmdline_end_pa)) - { - avail_next = cmdline_end_pa; - goto retry; - } - if ((avail_next > mods_start_pa) && (addr < mods_end_pa)) - { - avail_next = mods_end_pa; - goto retry; - } - if ((phystokv(avail_next) > kern_sym_start) && (phystokv(addr) < kern_sym_end)) - { - avail_next = _kvtophys(kern_sym_end); - goto retry; - } - if (boot_info.flags & MULTIBOOT_MODS) - { - struct multiboot_module *m = (struct multiboot_module *) - phystokv(boot_info.mods_addr); - for (i = 0; i < boot_info.mods_count; i++) - { - if ((avail_next > m[i].mod_start) - && (addr < m[i].mod_end)) - { - avail_next = m[i].mod_end; - goto retry; - } - /* XXX string */ - } - } -#endif /* MACH_HYP */ - - avail_remaining -= size; - - *addrp = addr; - return TRUE; -} - -boolean_t pmap_next_page(vm_offset_t *addrp) -{ - return init_alloc_aligned(PAGE_SIZE, addrp); -} - -/* Grab a physical page: - the standard memory allocation mechanism - during system initialization. */ -vm_offset_t -pmap_grab_page(void) -{ - vm_offset_t addr; - if (!pmap_next_page(&addr)) - panic("Not enough memory to initialize Mach"); - return addr; -} - boolean_t pmap_valid_page(vm_offset_t x) { /* XXX is this OK? What does it matter for? */ diff --git a/i386/i386at/model_dep.h b/i386/i386at/model_dep.h index aa24032..a27d9b3 100644 --- a/i386/i386at/model_dep.h +++ b/i386/i386at/model_dep.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 Free Software Foundation. + * Copyright (c) 2013-2015 Free Software Foundation. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -25,6 +25,16 @@ extern int timemmap(dev_t dev, vm_offset_t off, vm_prot_t prot); void inittodr(void); -boolean_t init_alloc_aligned(vm_size_t size, vm_offset_t *addrp); +/* + * Interrupt stack. + * + * We allocate the interrupt stacks from the kernels stack cache. As + * the stacks are naturally aligned, it is easy to find the base + * address given a stack pointer. + */ +extern vm_offset_t int_stack_top, int_stack_base; + +/* Check whether P points to the interrupt stack. */ +#define ON_INT_STACK(P) (((P) & ~(KERNEL_STACK_SIZE-1)) == int_stack_base) #endif /* _MODEL_DEP_H_ */ diff --git a/i386/include/mach/i386/vm_types.h b/i386/include/mach/i386/vm_types.h index 1439940..47badad 100644 --- a/i386/include/mach/i386/vm_types.h +++ b/i386/include/mach/i386/vm_types.h @@ -77,6 +77,15 @@ typedef unsigned long vm_offset_t; typedef vm_offset_t * vm_offset_array_t; /* + * A phys_addr_t is a physical address. + */ +#if PAE +typedef unsigned long long phys_addr_t; +#else /* PAE */ +typedef unsigned long phys_addr_t; +#endif /* PAE */ + +/* * A vm_size_t is the proper type for e.g. * expressing the difference between two * vm_offset_t entities. diff --git a/i386/intel/pmap.c b/i386/intel/pmap.c index 102309f..5b9ccaf 100644 --- a/i386/intel/pmap.c +++ b/i386/intel/pmap.c @@ -83,6 +83,7 @@ #include <i386/proc_reg.h> #include <i386/locore.h> #include <i386/model_dep.h> +#include <i386at/biosmem.h> #ifdef MACH_PSEUDO_PHYS #define WRITE_PTE(pte_p, pte_entry) *(pte_p) = pte_entry?pa_to_ma(pte_entry):0; @@ -627,19 +628,16 @@ void pmap_bootstrap(void) /* Note: initial Xen mapping holds at least 512kB free mapped page. * We use that for directly building our linear mapping. */ #if PAE - { - vm_offset_t addr; - init_alloc_aligned(PDPNUM * INTEL_PGBYTES, &addr); - kernel_pmap->dirbase = kernel_page_dir = (pt_entry_t*)phystokv(addr); - } - kernel_pmap->pdpbase = (pt_entry_t*)phystokv(pmap_grab_page()); + kernel_pmap->dirbase = kernel_page_dir = + (pt_entry_t*) biosmem_bootalloc (PDPNUM); + kernel_pmap->pdpbase = (pt_entry_t*) biosmem_bootalloc(1); { int i; for (i = 0; i < PDPNUM; i++) WRITE_PTE(&kernel_pmap->pdpbase[i], pa_to_pte(_kvtophys((void *) kernel_pmap->dirbase + i * INTEL_PGBYTES)) | INTEL_PTE_VALID); } #else /* PAE */ - kernel_pmap->dirbase = kernel_page_dir = (pt_entry_t*)phystokv(pmap_grab_page()); + kernel_pmap->dirbase = kernel_page_dir = biosmem_bootalloc(1); #endif /* PAE */ { int i; @@ -679,7 +677,7 @@ void pmap_bootstrap(void) struct mmu_update update; int j, n; - l1_map[n_l1map] = (pt_entry_t*) phystokv(pmap_grab_page()); + l1_map[n_l1map] = (pt_entry_t*) biosmem_bootalloc(1); for (j = 0; j < NPTES; j++) l1_map[n_l1map][j] = (((pt_entry_t)pfn_to_mfn(lin2pdenum(la - VM_MIN_KERNEL_ADDRESS) * NPTES + j)) << PAGE_SHIFT) | INTEL_PTE_VALID | INTEL_PTE_WRITE; pmap_set_page_readonly_init(l1_map[n_l1map]); @@ -719,7 +717,7 @@ void pmap_bootstrap(void) for (va = phystokv(phys_first_addr); va >= phystokv(phys_first_addr) && va < kernel_virtual_end; ) { pt_entry_t *pde = kernel_page_dir + lin2pdenum(kvtolin(va)); - pt_entry_t *ptable = (pt_entry_t*)phystokv(pmap_grab_page()); + pt_entry_t *ptable = (pt_entry_t*) biosmem_bootalloc(1); pt_entry_t *pte; /* Initialize the page directory entry. */ @@ -955,9 +953,8 @@ void pmap_init(void) s = (vm_size_t) (sizeof(struct pv_entry) * npages + pv_lock_table_size(npages) + npages); - - s = round_page(s); - if (kmem_alloc_wired(kernel_map, &addr, s) != KERN_SUCCESS) + addr = kalloc(s); + if (! addr) panic("pmap_init"); memset((void *) addr, 0, s); @@ -1158,6 +1155,7 @@ pmap_page_table_page_dealloc(vm_offset_t pa) pmap_t pmap_create(vm_size_t size) { pmap_t p; + struct vm_page *mem; pmap_statistics_t stats; /* @@ -1177,10 +1175,11 @@ pmap_t pmap_create(vm_size_t size) if (p == PMAP_NULL) panic("pmap_create"); - if (kmem_alloc_wired(kernel_map, - (vm_offset_t *)&p->dirbase, PDPNUM * INTEL_PGBYTES) - != KERN_SUCCESS) - panic("pmap_create"); + mem = vm_page_alloc_p(iorder2(PDPNUM), VM_PAGE_SEL_DIRECTMAP, + VM_PAGE_KERNEL); + if (! mem) + return PMAP_NULL; + p->dirbase = (pt_entry_t *) phystokv(mem->phys_addr); memcpy(p->dirbase, kernel_page_dir, PDPNUM * INTEL_PGBYTES); #ifdef LINUX_DEV @@ -1198,10 +1197,10 @@ pmap_t pmap_create(vm_size_t size) #endif /* MACH_PV_PAGETABLES */ #if PAE - if (kmem_alloc_wired(kernel_map, - (vm_offset_t *)&p->pdpbase, INTEL_PGBYTES) - != KERN_SUCCESS) - panic("pmap_create"); + mem = vm_page_alloc_p(0, VM_PAGE_SEL_DIRECTMAP, VM_PAGE_KERNEL); + if (! mem) + return PMAP_NULL; + p->pdpbase = (pt_entry_t *) phystokv(mem->phys_addr); { int i; for (i = 0; i < PDPNUM; i++) @@ -1286,12 +1285,14 @@ void pmap_destroy(pmap_t p) pmap_set_page_readwrite((void*) p->dirbase + i * INTEL_PGBYTES); } #endif /* MACH_PV_PAGETABLES */ - kmem_free(kernel_map, (vm_offset_t)p->dirbase, PDPNUM * INTEL_PGBYTES); + m = vm_page_lookup_pa(_kvtophys(p->dirbase)); + vm_page_free_p(m, PDPNUM); #if PAE #ifdef MACH_PV_PAGETABLES pmap_set_page_readwrite(p->pdpbase); #endif /* MACH_PV_PAGETABLES */ - kmem_free(kernel_map, (vm_offset_t)p->pdpbase, INTEL_PGBYTES); + m = vm_page_lookup_pa(_kvtophys(p->pdpbase)); + vm_page_free_p(m, 0); #endif /* PAE */ kmem_cache_free(&pmap_cache, (vm_offset_t) p); } diff --git a/i386/ldscript b/i386/ldscript index ddbbf91..dcf47a9 100644 --- a/i386/ldscript +++ b/i386/ldscript @@ -12,6 +12,10 @@ SECTIONS * `gnumach_LINKFLAGS' in `i386/Makefrag.am'. */ . = _START; + + /* biosmem.c uses this. */ + _boot = .; + .text : AT (_START_MAP) { @@ -34,7 +38,10 @@ SECTIONS PROVIDE (_etext = .); PROVIDE (etext = .); - /* Read-only sections, merged into text segment: */ + /* biosmem.c uses this. */ + _end = .; + +/* Read-only sections, merged into text segment: */ PROVIDE (__executable_start = .); .interp : { *(.interp) } .note.gnu.build-id : { *(.note.gnu.build-id) } diff --git a/i386/x15/boot.h b/i386/x15/boot.h new file mode 100644 index 0000000..ab85be0 --- /dev/null +++ b/i386/x15/boot.h @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2010-2014 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _X86_BOOT_H +#define _X86_BOOT_H + +#include <kern/macros.h> +#include <machine/vm_param.h> + +#define VM_KERNEL_OFFSET VM_MIN_KERNEL_ADDRESS +#define STACK_SIZE PAGE_SIZE +#define pmap_pte_t void // XXX + +/* + * Macros used by the very early panic functions. + */ +#define BOOT_CGAMEM 0xb8000 +#define BOOT_CGACHARS (80 * 25) +#define BOOT_CGACOLOR 0x7 + +/* + * The kernel is physically loaded at BOOT_OFFSET by the boot loader. It + * is divided in two parts: the .boot section which uses physical addresses + * and the main kernel code and data at VM_KERNEL_OFFSET. + * + * See the linker script for more information. + */ +#define BOOT_OFFSET DECL_CONST(0x100000, UL) + +/* + * Virtual to physical address translation macro. + */ +#define BOOT_VTOP(addr) ((addr) - VM_KERNEL_OFFSET) + +/* + * Address where the MP trampoline code is copied and run at. + * + * It must reside at a free location in the first segment and be page + * aligned. + */ +#define BOOT_MP_TRAMPOLINE_ADDR 0x7000 + +#ifndef __ASSEMBLER__ + +#include "multiboot.h" +#include <machine/pmap.h> + +/* + * Functions and data used before paging is enabled must be part of the .boot + * and .bootdata sections respectively, so that they use physical addresses. + * Once paging is enabled, their access relies on the kernel identity mapping. + */ +#define __boot __section(".boot.text") +#define __bootdata __section(".boot.data") + +/* + * Boundaries of the .boot section. + */ +extern char _boot; +extern char _eboot; + +extern char boot_stack[STACK_SIZE]; +extern char boot_ap_stack[STACK_SIZE]; + +/* + * This variable contains the CPU ID of an AP during early initialization. + */ +extern unsigned int boot_ap_id; + +/* + * Size of the trampoline code used for APs. + */ +extern uint32_t boot_mp_trampoline_size; + +/* + * Address of the MP trampoline code. + */ +void boot_mp_trampoline(void); + +/* + * Helper functions available before paging is enabled. + * + * Any memory passed to these must also be accessible without paging. + */ +void * boot_memmove(void *dest, const void *src, size_t n); +void * boot_memset(void *s, int c, size_t n); +size_t boot_strlen(const char *s); +void __noreturn boot_panic(const char *s); + +/* + * This function is called by the bootstrap code before paging is enabled. + * It establishes a direct mapping of the kernel at virtual addresses and + * returns the physical address of the page directory. It is up to the + * caller to actually enable paging. + * + * TODO Update comment. + */ +pmap_pte_t * boot_setup_paging(const struct multiboot_raw_info *mbi, + unsigned long eax); + +/* + * Main entry point, called directly after basic paging is initialized. + */ +void boot_main(void); + +/* + * Entry point for APs. + */ +void boot_ap_main(void); + +#endif /* __ASSEMBLER__ */ + +#endif /* _X86_BOOT_H */ diff --git a/i386/x15/elf.h b/i386/x15/elf.h new file mode 100644 index 0000000..e0ea260 --- /dev/null +++ b/i386/x15/elf.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2013 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _X86_ELF_H +#define _X86_ELF_H + +#define ELF_SHT_SYMTAB 2 +#define ELF_SHT_STRTAB 3 + +struct elf_shdr { + unsigned int name; + unsigned int type; + unsigned int flags; + unsigned long addr; + unsigned long offset; + unsigned int size; + unsigned int link; + unsigned int info; + unsigned int addralign; + unsigned int entsize; +}; + +#ifdef __LP64__ + +struct elf_sym { + unsigned int name; + unsigned char info; + unsigned char other; + unsigned short shndx; + unsigned long value; + unsigned long size; +}; + +#else /* __LP64__ */ + +struct elf_sym { + unsigned int name; + unsigned long value; + unsigned long size; + unsigned char info; + unsigned char other; + unsigned short shndx; +}; + +#endif /* __LP64__ */ + +#endif /* _X86_ELF_H */ diff --git a/i386/x15/multiboot.h b/i386/x15/multiboot.h new file mode 100644 index 0000000..4a0502c --- /dev/null +++ b/i386/x15/multiboot.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2010, 2012 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _X86_MULTIBOOT_H +#define _X86_MULTIBOOT_H + +/* + * Magic number provided by the OS to the boot loader. + */ +#define MULTIBOOT_OS_MAGIC 0x1badb002 + +/* + * Multiboot flags requesting services from the boot loader. + */ +#define MULTIBOOT_OS_MEMORY_INFO 0x2 + +#define MULTIBOOT_OS_FLAGS MULTIBOOT_OS_MEMORY_INFO + +/* + * Magic number to identify a multiboot compliant boot loader. + */ +#define MULTIBOOT_LOADER_MAGIC 0x2badb002 + +/* + * Multiboot flags set by the boot loader. + */ +#define MULTIBOOT_LOADER_MEMORY 0x01 +#define MULTIBOOT_LOADER_CMDLINE 0x04 +#define MULTIBOOT_LOADER_MODULES 0x08 +#define MULTIBOOT_LOADER_SHDR 0x20 +#define MULTIBOOT_LOADER_MMAP 0x40 + +#ifndef __ASSEMBLER__ + +#include <kern/macros.h> +#include <kern/stdint.h> + +/* + * A multiboot module. + */ +struct multiboot_raw_module { + uint32_t mod_start; + uint32_t mod_end; + uint32_t string; + uint32_t reserved; +} __packed; + +/* + * Memory map entry. + */ +struct multiboot_raw_mmap_entry { + uint32_t size; + uint64_t base_addr; + uint64_t length; + uint32_t type; +} __packed; + +/* + * Multiboot information structure as passed by the boot loader. + */ +struct multiboot_raw_info { + uint32_t flags; + uint32_t mem_lower; + uint32_t mem_upper; + uint32_t unused0; + uint32_t cmdline; + uint32_t mods_count; + uint32_t mods_addr; + uint32_t shdr_num; + uint32_t shdr_size; + uint32_t shdr_addr; + uint32_t shdr_strndx; + uint32_t mmap_length; + uint32_t mmap_addr; + uint32_t unused1[9]; +} __packed; + +/* + * Versions of the multiboot structures suitable for use with 64-bit pointers. + */ + +struct multiboot_module { + void *mod_start; + void *mod_end; + char *string; +}; + +struct multiboot_info { + uint32_t flags; + char *cmdline; + struct multiboot_module *mods_addr; + uint32_t mods_count; +}; + +#endif /* __ASSEMBLER__ */ + +#endif /* _X86_MULTIBOOT_H */ diff --git a/i386/x15/param.h b/i386/x15/param.h new file mode 100644 index 0000000..f8f9c33 --- /dev/null +++ b/i386/x15/param.h @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2010-2014 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * This file is a top header in the inclusion hierarchy, and shouldn't include + * other headers that may cause circular dependencies. + */ + +#ifndef _X86_PARAM_H +#define _X86_PARAM_H + +#include <kern/macros.h> + +#define __LITTLE_ENDIAN__ + +/* + * L1 cache line size. + * + * XXX Use this value until processor selection is available. + */ +#define CPU_L1_SIZE 64 + +/* + * Code/data alignment. + */ +#define TEXT_ALIGN 16 + +#ifdef __LP64__ +#define DATA_ALIGN 8 +#else /* __LP64__ */ +#define DATA_ALIGN 4 +#endif /* __LP64__ */ + +/* + * Attributes for variables that are mostly read and seldom changed. + */ +#define __read_mostly __section(".data.read_mostly") + +/* + * Provide architecture-specific string functions. + */ +#define ARCH_STRING_MEMCPY +#define ARCH_STRING_MEMMOVE +#define ARCH_STRING_MEMSET +#define ARCH_STRING_MEMCMP +#define ARCH_STRING_STRLEN +#define ARCH_STRING_STRCPY +#define ARCH_STRING_STRCMP + +/* + * System timer frequency. + * + * The selected value of 200 translates to a period of 5ms, small enough to + * provide low latency, and is practical as both a dividend and divisor. + */ +#define HZ 200 + +/* + * 4 KiB pages. + */ +#define PAGE_SHIFT 12 +#define PAGE_SIZE (1 << PAGE_SHIFT) +#define PAGE_MASK (PAGE_SIZE - 1) + +/* + * Kernel stack size for threads and interrupt handlers. + */ +#define STACK_SIZE PAGE_SIZE + +/* + * Virtual memory properties. + */ + +/* + * User space boundaries. + */ +#define VM_MIN_ADDRESS DECL_CONST(0, UL) + +#ifdef __LP64__ +#define VM_MAX_ADDRESS DECL_CONST(0x800000000000, UL) +#else /* __LP64__ */ +#define VM_MAX_ADDRESS DECL_CONST(0xc0000000, UL) +#endif/* __LP64__ */ + +/* + * Kernel space boundaries. + */ +#ifdef __LP64__ +#define VM_MIN_KERNEL_ADDRESS DECL_CONST(0xffff800000000000, UL) +#define VM_MAX_KERNEL_ADDRESS DECL_CONST(0xfffffffffffff000, UL) +#else /* __LP64__ */ +#define VM_MIN_KERNEL_ADDRESS VM_MAX_ADDRESS +#define VM_MAX_KERNEL_ADDRESS DECL_CONST(0xfffff000, UL) +#endif /* __LP64__ */ + +/* + * Direct physical mapping boundaries. + */ +#ifdef __LP64__ +#define VM_MIN_DIRECTMAP_ADDRESS VM_MIN_KERNEL_ADDRESS +#define VM_MAX_DIRECTMAP_ADDRESS DECL_CONST(0xffffc00000000000, UL) +#else /* __LP64__ */ +#define VM_MIN_DIRECTMAP_ADDRESS VM_MAX_ADDRESS +#define VM_MAX_DIRECTMAP_ADDRESS DECL_CONST(0xf8000000, UL) +#endif /* __LP64__ */ + +/* + * Kernel mapping offset. + * + * On 32-bits systems, the kernel is linked at addresses included in the + * direct physical mapping, whereas on 64-bits systems, it is linked at + * -2 GiB because the "kernel" memory model is used when compiling (see + * the -mcmodel=kernel gcc option). + */ +#ifdef __LP64__ +#define VM_KERNEL_OFFSET DECL_CONST(0xffffffff80000000, UL) +#else /* __LP64__ */ +#define VM_KERNEL_OFFSET VM_MIN_DIRECTMAP_ADDRESS +#endif /* __LP64__ */ + +/* + * Kernel virtual space boundaries. + * + * In addition to the direct physical mapping, the kernel has its own virtual + * memory space. + */ +#define VM_MIN_KMEM_ADDRESS VM_MAX_DIRECTMAP_ADDRESS + +#ifdef __LP64__ +#define VM_MAX_KMEM_ADDRESS VM_KERNEL_OFFSET +#else /* __LP64__ */ +#define VM_MAX_KMEM_ADDRESS DECL_CONST(0xfffff000, UL) +#endif /* __LP64__ */ + +/* + * Physical memory properties. + */ + +#define VM_PAGE_DMA_LIMIT DECL_CONST(0x1000000, UL) + +#ifdef __LP64__ +#define VM_PAGE_MAX_SEGS 4 +#define VM_PAGE_DMA32_LIMIT DECL_CONST(0x100000000, UL) +#define VM_PAGE_DIRECTMAP_LIMIT DECL_CONST(0x400000000000, UL) +#define VM_PAGE_HIGHMEM_LIMIT DECL_CONST(0x10000000000000, UL) +#else /* __LP64__ */ +#define VM_PAGE_DIRECTMAP_LIMIT DECL_CONST(0x38000000, ULL) +#ifdef X86_PAE +#define VM_PAGE_MAX_SEGS 3 +#define VM_PAGE_HIGHMEM_LIMIT DECL_CONST(0x10000000000000, ULL) +#else /* X86_PAE */ +#define VM_PAGE_MAX_SEGS 3 +#define VM_PAGE_HIGHMEM_LIMIT DECL_CONST(0xfffff000, UL) +#endif /* X86_PAE */ +#endif /* __LP64__ */ + +/* + * Physical segment indexes. + */ +#define VM_PAGE_SEG_DMA 0 + +#ifdef __LP64__ +#define VM_PAGE_SEG_DMA32 1 +#define VM_PAGE_SEG_DIRECTMAP 2 +#define VM_PAGE_SEG_HIGHMEM 3 +#else /* __LP64__ */ +#define VM_PAGE_SEG_DMA32 1 /* Alias for the DIRECTMAP segment */ +#define VM_PAGE_SEG_DIRECTMAP 1 +#define VM_PAGE_SEG_HIGHMEM 2 +#endif /* __LP64__ */ + +#endif /* _X86_PARAM_H */ diff --git a/include/mach_debug/mach_debug.defs b/include/mach_debug/mach_debug.defs index c8e8b1b..c59436c 100644 --- a/include/mach_debug/mach_debug.defs +++ b/include/mach_debug/mach_debug.defs @@ -120,20 +120,7 @@ routine processor_set_stack_usage( out maxusage : vm_size_t; out maxstack : vm_offset_t); -#if !defined(MACH_VM_DEBUG) || MACH_VM_DEBUG - -/* - * Returns information about the global VP table. - */ - -routine host_virtual_physical_table_info( - host : host_t; - out info : hash_info_bucket_array_t, - CountInOut, Dealloc); - -#else /* !defined(MACH_VM_DEBUG) || MACH_VM_DEBUG */ skip; /* host_virtual_physical_table_info */ -#endif /* !defined(MACH_VM_DEBUG) || MACH_VM_DEBUG */ #if !defined(MACH_KDB) || MACH_KDB /* diff --git a/kern/bootstrap.c b/kern/bootstrap.c index a2ff8aa..93b104b 100644 --- a/kern/bootstrap.c +++ b/kern/bootstrap.c @@ -262,10 +262,9 @@ void bootstrap_create(void) panic ("ERROR in executing boot script: %s", boot_script_error_string (losers)); } - /* XXX we could free the memory used - by the boot loader's descriptors and such. */ - for (n = 0; n < boot_info.mods_count; n++) - vm_page_create(bmods[n].mod_start, bmods[n].mod_end); + + /* Free usable memory. */ + /* XXX biosmem_free_usable(); */ } static void diff --git a/kern/limits.h b/kern/limits.h new file mode 100644 index 0000000..fa46853 --- /dev/null +++ b/kern/limits.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2010-2014 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _KERN_LIMITS_H +#define _KERN_LIMITS_H + +#define CHAR_BIT 8 + +#ifdef __LP64__ +#define LONG_BIT 64 +#else /* __LP64__ */ +#define LONG_BIT 32 +#endif /* __LP64__ */ + +#endif /* _KERN_LIMITS_H */ diff --git a/kern/log2.h b/kern/log2.h new file mode 100644 index 0000000..c9cc5be --- /dev/null +++ b/kern/log2.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2014 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * Integer base 2 logarithm operations. + */ + +#ifndef _KERN_LOG2_H +#define _KERN_LOG2_H + +#include <kern/assert.h> +#include <kern/limits.h> + +static inline unsigned int +ilog2(unsigned long x) +{ + assert(x != 0); + return LONG_BIT - __builtin_clzl(x) - 1; +} + +static inline unsigned int +iorder2(unsigned long size) +{ + assert(size != 0); + + if (size == 1) + return 0; + + return ilog2(size - 1) + 1; +} + +#endif /* _KERN_LOG2_H */ diff --git a/kern/startup.c b/kern/startup.c index 30cff5c..bebb52c 100644 --- a/kern/startup.c +++ b/kern/startup.c @@ -110,8 +110,8 @@ void setup_main(void) panic_init(); sched_init(); - vm_mem_bootstrap(); rdxtree_cache_init(); + vm_mem_bootstrap(); ipc_bootstrap(); vm_mem_init(); ipc_init(); diff --git a/kern/stdint.h b/kern/stdint.h new file mode 100644 index 0000000..d6794c4 --- /dev/null +++ b/kern/stdint.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2010, 2011 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _KERN_STDINT_H +#define _KERN_STDINT_H + +typedef signed char int8_t; +typedef unsigned char uint8_t; +typedef signed short int16_t; +typedef unsigned short uint16_t; +typedef signed int int32_t; +typedef unsigned int uint32_t; +typedef signed long long int64_t; +typedef unsigned long long uint64_t; + +#endif /* _KERN_STDINT_H */ diff --git a/kern/thread.c b/kern/thread.c index 6137a33..e46035f 100644 --- a/kern/thread.c +++ b/kern/thread.c @@ -123,6 +123,15 @@ decl_simple_lock_data(, stack_lock_data)/* splsched only */ */ static struct kmem_cache stack_cache; +vm_offset_t +thread_bootstrap_stack_alloc(void) +{ + vm_offset_t stack; + stack = kmem_cache_alloc(&stack_cache); + assert ((stack & (KERNEL_STACK_SIZE-1)) == 0); + return stack; +} + /* * stack_alloc_try: * @@ -220,7 +229,7 @@ void stack_privilege( thread->stack_privilege = current_stack(); } -void thread_init(void) +void thread_bootstrap(void) { kmem_cache_init(&thread_cache, "thread", sizeof(struct thread), 0, NULL, 0); @@ -233,7 +242,10 @@ void thread_init(void) kmem_cache_init(&stack_cache, "stack", KERNEL_STACK_SIZE, KERNEL_STACK_SIZE, NULL, 0); +} +void thread_init(void) +{ /* * Fill in a template thread for fast initialization. * [Fields that must be (or are typically) reset at diff --git a/kern/thread.h b/kern/thread.h index adf8b86..9728e12 100644 --- a/kern/thread.h +++ b/kern/thread.h @@ -351,6 +351,8 @@ extern void stack_collect(void); * Kernel-only routines */ +extern void thread_bootstrap(void); +extern vm_offset_t thread_bootstrap_stack_alloc(void); extern void thread_init(void); extern void thread_reference(thread_t); extern void thread_deallocate(thread_t); @@ -63,13 +63,10 @@ * but it is not part of the interface. */ -/* During VM initialization, steal a chunk of memory. */ -extern vm_offset_t pmap_steal_memory(vm_size_t); -/* During VM initialization, report remaining unused physical pages. */ -extern unsigned int pmap_free_pages(void); /* During VM initialization, use remaining physical pages to allocate page * frames. */ extern void pmap_startup(vm_offset_t *, vm_offset_t *); + /* Initialization, after kernel runs in virtual memory. */ extern void pmap_init(void); @@ -78,20 +75,12 @@ extern void pmap_init(void); * If machine/pmap.h defines MACHINE_PAGES, it must implement * the above functions. The pmap module has complete control. * Otherwise, it must implement - * pmap_free_pages * pmap_virtual_space - * pmap_next_page * pmap_init - * and vm/vm_resident.c implements pmap_steal_memory and pmap_startup - * using pmap_free_pages, pmap_next_page, pmap_virtual_space, - * and pmap_enter. pmap_free_pages may over-estimate the number - * of unused physical pages, and pmap_next_page may return FALSE - * to indicate that there are no more unused pages to return. - * However, for best performance pmap_free_pages should be accurate. + * and vm/vm_resident.c implements pmap_startup using + * pmap_virtual_space, and pmap_enter. */ -/* During VM initialization, return the next unused physical page. */ -extern boolean_t pmap_next_page(vm_offset_t *); /* During VM initialization, report virtual space available for the kernel. */ extern void pmap_virtual_space(vm_offset_t *, vm_offset_t *); #endif /* MACHINE_PAGES */ diff --git a/vm/vm_debug.c b/vm/vm_debug.c index 227090e..1248da7 100644 --- a/vm/vm_debug.c +++ b/vm/vm_debug.c @@ -48,6 +48,7 @@ #include <vm/vm_object.h> #include <kern/task.h> #include <kern/host.h> +#include <kern/rdxtree.h> #include <ipc/ipc_port.h> @@ -318,7 +319,8 @@ mach_vm_object_pages( /* object is locked, we have enough wired memory */ count = 0; - queue_iterate(&object->memq, p, vm_page_t, listq) { + struct rdxtree_iter iter; + rdxtree_for_each(&object->memt, &iter, p) { vm_page_info_t *info = &pages[count++]; vm_page_info_state_t state = 0; @@ -362,8 +364,6 @@ mach_vm_object_pages( state |= VPI_STATE_ACTIVE; if (p->laundry) state |= VPI_STATE_LAUNDRY; - if (p->free) - state |= VPI_STATE_FREE; if (p->reference) state |= VPI_STATE_REFERENCE; @@ -418,82 +418,3 @@ mach_vm_object_pages( } #endif /* MACH_VM_DEBUG */ - -/* - * Routine: host_virtual_physical_table_info - * Purpose: - * Return information about the VP table. - * Conditions: - * Nothing locked. Obeys CountInOut protocol. - * Returns: - * KERN_SUCCESS Returned information. - * KERN_INVALID_HOST The host is null. - * KERN_RESOURCE_SHORTAGE Couldn't allocate memory. - */ - -kern_return_t -host_virtual_physical_table_info(host, infop, countp) - const host_t host; - hash_info_bucket_array_t *infop; - natural_t *countp; -{ - vm_offset_t addr; - vm_size_t size = 0;/* '=0' to quiet gcc warnings */ - hash_info_bucket_t *info; - unsigned int potential, actual; - kern_return_t kr; - - if (host == HOST_NULL) - return KERN_INVALID_HOST; - - /* start with in-line data */ - - info = *infop; - potential = *countp; - - for (;;) { - actual = vm_page_info(info, potential); - if (actual <= potential) - break; - - /* allocate more memory */ - - if (info != *infop) - kmem_free(ipc_kernel_map, addr, size); - - size = round_page(actual * sizeof *info); - kr = kmem_alloc_pageable(ipc_kernel_map, &addr, size); - if (kr != KERN_SUCCESS) - return KERN_RESOURCE_SHORTAGE; - - info = (hash_info_bucket_t *) addr; - potential = size/sizeof *info; - } - - if (info == *infop) { - /* data fit in-line; nothing to deallocate */ - - *countp = actual; - } else if (actual == 0) { - kmem_free(ipc_kernel_map, addr, size); - - *countp = 0; - } else { - vm_map_copy_t copy; - vm_size_t used; - - used = round_page(actual * sizeof *info); - - if (used != size) - kmem_free(ipc_kernel_map, addr + used, size - used); - - kr = vm_map_copyin(ipc_kernel_map, addr, used, - TRUE, ©); - assert(kr == KERN_SUCCESS); - - *infop = (hash_info_bucket_t *) copy; - *countp = actual; - } - - return KERN_SUCCESS; -} diff --git a/vm/vm_fault.c b/vm/vm_fault.c index aa5febc..96f53fb 100644 --- a/vm/vm_fault.c +++ b/vm/vm_fault.c @@ -609,7 +609,7 @@ vm_fault_return_t vm_fault_page( * won't block for pages. */ - if (m->fictitious && !vm_page_convert(m, FALSE)) { + if (m->fictitious && !vm_page_convert(&m, FALSE)) { VM_PAGE_FREE(m); vm_fault_cleanup(object, first_m); return(VM_FAULT_MEMORY_SHORTAGE); @@ -727,7 +727,7 @@ vm_fault_return_t vm_fault_page( assert(m->object == object); first_m = VM_PAGE_NULL; - if (m->fictitious && !vm_page_convert(m, !object->internal)) { + if (m->fictitious && !vm_page_convert(&m, !object->internal)) { VM_PAGE_FREE(m); vm_fault_cleanup(object, VM_PAGE_NULL); return(VM_FAULT_MEMORY_SHORTAGE); diff --git a/vm/vm_init.c b/vm/vm_init.c index 4fdcd83..6563410 100644 --- a/vm/vm_init.c +++ b/vm/vm_init.c @@ -66,12 +66,10 @@ void vm_mem_bootstrap(void) * Initialize other VM packages */ - slab_bootstrap(); vm_object_bootstrap(); vm_map_init(); kmem_init(start, end); pmap_init(); - slab_init(); vm_fault_init(); vm_page_module_init(); memory_manager_default_init(); diff --git a/vm/vm_object.c b/vm/vm_object.c index ee09e3b..3c7d73c 100644 --- a/vm/vm_object.c +++ b/vm/vm_object.c @@ -48,6 +48,7 @@ #include <kern/queue.h> #include <kern/xpr.h> #include <kern/slab.h> +#include <kern/rdxtree.h> #include <vm/memory_object.h> #include <vm/vm_fault.h> #include <vm/vm_map.h> @@ -219,7 +220,7 @@ static void _vm_object_setup( vm_size_t size) { *object = vm_object_template; - queue_init(&object->memq); + rdxtree_init(&object->memt); vm_object_lock_init(object); object->size = size; } @@ -585,18 +586,15 @@ void vm_object_terminate( * It is possible for us to find busy/absent pages, * if some faults on this object were aborted. */ - + struct rdxtree_iter iter; if ((object->temporary) || (object->pager == IP_NULL)) { - while (!queue_empty(&object->memq)) { - p = (vm_page_t) queue_first(&object->memq); - + rdxtree_for_each(&object->memt, &iter, p) { VM_PAGE_CHECK(p); VM_PAGE_FREE(p); + rdxtree_iter_init(&iter); } - } else while (!queue_empty(&object->memq)) { - p = (vm_page_t) queue_first(&object->memq); - + } else rdxtree_for_each(&object->memt, &iter, p) { VM_PAGE_CHECK(p); vm_page_lock_queues(); @@ -625,6 +623,7 @@ void vm_object_terminate( } else { free_page: VM_PAGE_FREE(p); + rdxtree_iter_init(&iter); } } @@ -737,7 +736,6 @@ void vm_object_abort_activity( vm_object_t object) { vm_page_t p; - vm_page_t next; /* * Abort all activity that would be waiting @@ -748,10 +746,8 @@ void vm_object_abort_activity( * we don't. */ - p = (vm_page_t) queue_first(&object->memq); - while (!queue_end(&object->memq, (queue_entry_t) p)) { - next = (vm_page_t) queue_next(&p->listq); - + struct rdxtree_iter iter; + rdxtree_for_each(&object->memt, &iter, p) { /* * If it's being paged in, destroy it. * If an unlock has been requested, start it again. @@ -765,8 +761,7 @@ void vm_object_abort_activity( p->unlock_request = VM_PROT_NONE; PAGE_WAKEUP(p); } - - p = next; + rdxtree_iter_init(&iter); } /* @@ -874,7 +869,8 @@ void vm_object_deactivate_pages( { vm_page_t p; - queue_iterate(&object->memq, p, vm_page_t, listq) { + struct rdxtree_iter iter; + rdxtree_for_each(&object->memt, &iter, p) { vm_page_lock_queues(); if (!p->busy) vm_page_deactivate(p); @@ -937,7 +933,8 @@ void vm_object_pmap_protect( end = offset + size; - queue_iterate(&object->memq, p, vm_page_t, listq) { + struct rdxtree_iter iter; + rdxtree_for_each(&object->memt, &iter, p) { if (!p->fictitious && (offset <= p->offset) && (p->offset < end)) { @@ -1011,7 +1008,8 @@ void vm_object_pmap_remove( return; vm_object_lock(object); - queue_iterate(&object->memq, p, vm_page_t, listq) { + struct rdxtree_iter iter; + rdxtree_for_each(&object->memt, &iter, p) { if (!p->fictitious && (start <= p->offset) && (p->offset < end)) @@ -1381,7 +1379,8 @@ kern_return_t vm_object_copy_call( * the old memory object that we can. */ - queue_iterate(&src_object->memq, p, vm_page_t, listq) { + struct rdxtree_iter iter; + rdxtree_for_each(&src_object->memt, &iter, p) { if (!p->fictitious && (src_offset <= p->offset) && (p->offset < src_end) && @@ -1570,7 +1569,8 @@ vm_object_t vm_object_copy_delayed( * those pages will already be marked copy-on-write. */ - queue_iterate(&src_object->memq, p, vm_page_t, listq) { + struct rdxtree_iter iter; + rdxtree_for_each(&src_object->memt, &iter, p) { if (!p->fictitious) pmap_page_protect(p->phys_addr, (VM_PROT_ALL & ~VM_PROT_WRITE & @@ -2437,12 +2437,8 @@ void vm_object_collapse( * will be overwritten by any of the parent's * pages that shadow them. */ - - while (!queue_empty(&backing_object->memq)) { - - p = (vm_page_t) - queue_first(&backing_object->memq); - + struct rdxtree_iter iter; + rdxtree_for_each(&backing_object->memt, &iter, p) { new_offset = (p->offset - backing_offset); assert(!p->busy || p->absent); @@ -2504,6 +2500,7 @@ void vm_object_collapse( vm_page_rename(p, object, new_offset); } } + rdxtree_iter_init(&iter); } /* @@ -2637,8 +2634,8 @@ void vm_object_collapse( * of pages here. */ - queue_iterate(&backing_object->memq, p, - vm_page_t, listq) + struct rdxtree_iter iter; + rdxtree_for_each(&backing_object->memt, &iter, p) { new_offset = (p->offset - backing_offset); @@ -2710,47 +2707,21 @@ void vm_object_collapse( * In/out conditions: * The object must be locked. */ -unsigned int vm_object_page_remove_lookup = 0; -unsigned int vm_object_page_remove_iterate = 0; void vm_object_page_remove( vm_object_t object, vm_offset_t start, vm_offset_t end) { - vm_page_t p, next; - - /* - * One and two page removals are most popular. - * The factor of 16 here is somewhat arbitrary. - * It balances vm_object_lookup vs iteration. - */ - - if (atop(end - start) < (unsigned)object->resident_page_count/16) { - vm_object_page_remove_lookup++; - - for (; start < end; start += PAGE_SIZE) { - p = vm_page_lookup(object, start); - if (p != VM_PAGE_NULL) { - if (!p->fictitious) - pmap_page_protect(p->phys_addr, - VM_PROT_NONE); - VM_PAGE_FREE(p); - } - } - } else { - vm_object_page_remove_iterate++; - - p = (vm_page_t) queue_first(&object->memq); - while (!queue_end(&object->memq, (queue_entry_t) p)) { - next = (vm_page_t) queue_next(&p->listq); - if ((start <= p->offset) && (p->offset < end)) { - if (!p->fictitious) - pmap_page_protect(p->phys_addr, - VM_PROT_NONE); - VM_PAGE_FREE(p); - } - p = next; + vm_page_t p; + struct rdxtree_iter iter; + rdxtree_for_each(&object->memt, &iter, p) { + if ((start <= p->offset) && (p->offset < end)) { + if (!p->fictitious) + pmap_page_protect(p->phys_addr, + VM_PROT_NONE); + VM_PAGE_FREE(p); + rdxtree_iter_init(&iter); } } } @@ -2977,15 +2948,14 @@ void vm_object_print( if (vm_object_print_pages) { count = 0; - p = (vm_page_t) queue_first(&object->memq); - while (!queue_end(&object->memq, (queue_entry_t) p)) { + struct rdxtree_iter iter; + rdxtree_for_each(&object->memt, &iter, p) { if (count == 0) iprintf("memory:="); else if (count == 4) {printf("\n"); iprintf(" ..."); count = 0;} else printf(","); count++; printf("(off=0x%X,page=0x%X)", p->offset, (vm_offset_t) p); - p = (vm_page_t) queue_next(&p->listq); } if (count != 0) printf("\n"); diff --git a/vm/vm_object.h b/vm/vm_object.h index 3c9055f..142404a 100644 --- a/vm/vm_object.h +++ b/vm/vm_object.h @@ -46,6 +46,7 @@ #include <kern/assert.h> #include <kern/debug.h> #include <kern/macros.h> +#include <kern/rdxtree.h> #include <vm/pmap.h> #include <ipc/ipc_types.h> @@ -62,7 +63,8 @@ typedef struct ipc_port * pager_request_t; */ struct vm_object { - queue_chain_t memq; /* Resident memory */ + /* Resident memory indexed by offset. Protected by LOCK. */ + struct rdxtree memt; struct lock Lock; /* Synchronization */ #if VM_OBJECT_DEBUG thread_t LockHolder; /* Thread holding Lock */ diff --git a/vm/vm_page.c b/vm/vm_page.c index cc184ca..f2a11e1 100644 --- a/vm/vm_page.c +++ b/vm/vm_page.c @@ -29,24 +29,27 @@ * The symmetric case is handled likewise. */ +#include <string.h> #include <kern/assert.h> -#include <kern/init.h> #include <kern/list.h> #include <kern/macros.h> -#include <kern/mutex.h> -#include <kern/panic.h> -#include <kern/param.h> -#include <kern/printk.h> -#include <kern/sprintf.h> -#include <kern/stddef.h> -#include <kern/string.h> +#include <kern/lock.h> +#include <kern/printf.h> #include <kern/thread.h> -#include <kern/types.h> -#include <machine/cpu.h> #include <machine/pmap.h> -#include <vm/vm_kmem.h> #include <vm/vm_page.h> +/* XXX Mach glue. */ +#define CPU_L1_SIZE (1 << CPU_L1_SHIFT) +#define MAX_CPUS NCPUS +#define __read_mostly +#define __initdata +#define __init +#define cpu_id() cpu_number() +#define thread_pin() +#define thread_unpin() +#define printk printf + /* * Number of free block lists per segment. */ @@ -73,7 +76,7 @@ * Per-processor cache of pages. */ struct vm_page_cpu_pool { - struct mutex lock; + decl_simple_lock_data(,lock); int size; int transfer_size; int nr_pages; @@ -109,7 +112,7 @@ struct vm_page_seg { phys_addr_t end; struct vm_page *pages; struct vm_page *pages_end; - struct mutex lock; + decl_simple_lock_data(,lock); struct vm_page_free_list free_lists[VM_PAGE_NR_FREE_LISTS]; unsigned long nr_free_pages; }; @@ -154,16 +157,66 @@ static struct vm_page_boot_seg vm_page_boot_segs[VM_PAGE_MAX_SEGS] __initdata; */ static unsigned int vm_page_segs_size __read_mostly; +/* + * Resident page structures are initialized from + * a template (see vm_page_initialize). + */ +static struct vm_page vm_page_template = + { + .type = VM_PAGE_RESERVED, + .order = VM_PAGE_ORDER_UNLISTED, + .object = VM_OBJECT_NULL, /* reset later */ + .offset = 0, /* reset later */ + .wire_count = 0, + + .inactive = FALSE, + .active = FALSE, + .laundry = FALSE, + .external = FALSE, + + .busy = TRUE, + .wanted = FALSE, + .tabled = FALSE, + .fictitious = FALSE, + .private = FALSE, + .absent = FALSE, + .error = FALSE, + .dirty = FALSE, + .precious = FALSE, + .reference = FALSE, + + .phys_addr = 0, /* reset later */ + + .page_lock = VM_PROT_NONE, + .unlock_request = VM_PROT_NONE, + }; + + static void __init -vm_page_init(struct vm_page *page, unsigned short seg_index, phys_addr_t pa) +vm_page_initialize(struct vm_page *page, unsigned short seg_index, + phys_addr_t pa) { - memset(page, 0, sizeof(*page)); - page->type = VM_PAGE_RESERVED; + memcpy(page, &vm_page_template, VM_PAGE_HEADER_SIZE); page->seg_index = seg_index; - page->order = VM_PAGE_ORDER_UNLISTED; page->phys_addr = pa; } +/* XXX legacy mach interface */ +void +vm_page_init_mach(struct vm_page *page) +{ + memcpy(&page->vm_page_header, + &vm_page_template.vm_page_header, + sizeof *page - VM_PAGE_HEADER_SIZE); +} + +void +vm_page_init(vm_page_t mem, + vm_offset_t phys_addr) +{ + vm_page_initialize(mem, mem->seg_index, phys_addr); +} + void vm_page_set_type(struct vm_page *page, unsigned int order, unsigned short type) { @@ -278,7 +331,7 @@ vm_page_seg_free_to_buddy(struct vm_page_seg *seg, struct vm_page *page, static void __init vm_page_cpu_pool_init(struct vm_page_cpu_pool *cpu_pool, int size) { - mutex_init(&cpu_pool->lock); + simple_lock_init(&cpu_pool->lock); cpu_pool->size = size; cpu_pool->transfer_size = (size + VM_PAGE_CPU_POOL_TRANSFER_RATIO - 1) / VM_PAGE_CPU_POOL_TRANSFER_RATIO; @@ -321,7 +374,7 @@ vm_page_cpu_pool_fill(struct vm_page_cpu_pool *cpu_pool, assert(cpu_pool->nr_pages == 0); - mutex_lock(&seg->lock); + simple_lock(&seg->lock); for (i = 0; i < cpu_pool->transfer_size; i++) { page = vm_page_seg_alloc_from_buddy(seg, 0); @@ -332,7 +385,7 @@ vm_page_cpu_pool_fill(struct vm_page_cpu_pool *cpu_pool, vm_page_cpu_pool_push(cpu_pool, page); } - mutex_unlock(&seg->lock); + simple_unlock(&seg->lock); return i; } @@ -346,14 +399,14 @@ vm_page_cpu_pool_drain(struct vm_page_cpu_pool *cpu_pool, assert(cpu_pool->nr_pages == cpu_pool->size); - mutex_lock(&seg->lock); + simple_lock(&seg->lock); for (i = cpu_pool->transfer_size; i > 0; i--) { page = vm_page_cpu_pool_pop(cpu_pool); vm_page_seg_free_to_buddy(seg, page, 0); } - mutex_unlock(&seg->lock); + simple_unlock(&seg->lock); } static phys_addr_t __init @@ -394,7 +447,7 @@ vm_page_seg_init(struct vm_page_seg *seg, phys_addr_t start, phys_addr_t end, seg->pages = pages; seg->pages_end = pages + vm_page_atop(vm_page_seg_size(seg)); - mutex_init(&seg->lock); + simple_lock_init(&seg->lock); for (i = 0; i < ARRAY_SIZE(seg->free_lists); i++) vm_page_free_list_init(&seg->free_lists[i]); @@ -403,7 +456,7 @@ vm_page_seg_init(struct vm_page_seg *seg, phys_addr_t start, phys_addr_t end, i = seg - vm_page_segs; for (pa = seg->start; pa < seg->end; pa += PAGE_SIZE) - vm_page_init(&pages[vm_page_atop(pa - seg->start)], i, pa); + vm_page_initialize(&pages[vm_page_atop(pa - seg->start)], i, pa); } static struct vm_page * @@ -419,29 +472,30 @@ vm_page_seg_alloc(struct vm_page_seg *seg, unsigned int order, if (order == 0) { thread_pin(); cpu_pool = vm_page_cpu_pool_get(seg); - mutex_lock(&cpu_pool->lock); + simple_lock(&cpu_pool->lock); if (cpu_pool->nr_pages == 0) { filled = vm_page_cpu_pool_fill(cpu_pool, seg); if (!filled) { - mutex_unlock(&cpu_pool->lock); + simple_unlock(&cpu_pool->lock); thread_unpin(); return NULL; } } page = vm_page_cpu_pool_pop(cpu_pool); - mutex_unlock(&cpu_pool->lock); + simple_unlock(&cpu_pool->lock); thread_unpin(); } else { - mutex_lock(&seg->lock); + simple_lock(&seg->lock); page = vm_page_seg_alloc_from_buddy(seg, order); - mutex_unlock(&seg->lock); + simple_unlock(&seg->lock); } - assert(page->type == VM_PAGE_FREE); + assert(page->type == VM_PAGE_UNUSED); vm_page_set_type(page, order, type); + update_vm_page_counts(); return page; } @@ -451,27 +505,28 @@ vm_page_seg_free(struct vm_page_seg *seg, struct vm_page *page, { struct vm_page_cpu_pool *cpu_pool; - assert(page->type != VM_PAGE_FREE); + assert(page->type != VM_PAGE_UNUSED); assert(order < VM_PAGE_NR_FREE_LISTS); - vm_page_set_type(page, order, VM_PAGE_FREE); + vm_page_set_type(page, order, VM_PAGE_UNUSED); if (order == 0) { thread_pin(); cpu_pool = vm_page_cpu_pool_get(seg); - mutex_lock(&cpu_pool->lock); + simple_lock(&cpu_pool->lock); if (cpu_pool->nr_pages == cpu_pool->size) vm_page_cpu_pool_drain(cpu_pool, seg); vm_page_cpu_pool_push(cpu_pool, page); - mutex_unlock(&cpu_pool->lock); + simple_unlock(&cpu_pool->lock); thread_unpin(); } else { - mutex_lock(&seg->lock); + simple_lock(&seg->lock); vm_page_seg_free_to_buddy(seg, page, order); - mutex_unlock(&seg->lock); + simple_unlock(&seg->lock); } + update_vm_page_counts(); } void __init @@ -610,7 +665,7 @@ vm_page_setup(void) nr_pages += vm_page_atop(vm_page_boot_seg_size(&vm_page_boot_segs[i])); table_size = vm_page_round(nr_pages * sizeof(struct vm_page)); - printk("vm_page: page table size: %zu entries (%zuk)\n", nr_pages, + printk("vm_page: page table size: %u entries (%uk)\n", nr_pages, table_size >> 10); table = vm_page_bootalloc(table_size); va = (unsigned long)table; @@ -630,7 +685,7 @@ vm_page_setup(void) - boot_seg->start); while (page < end) { - page->type = VM_PAGE_FREE; + page->type = VM_PAGE_UNUSED; vm_page_seg_free_to_buddy(seg, page, 0); page++; } @@ -640,7 +695,7 @@ vm_page_setup(void) while (va < (unsigned long)table) { pa = vm_page_direct_pa(va); - page = vm_page_lookup(pa); + page = vm_page_lookup_pa(pa); assert((page != NULL) && (page->type == VM_PAGE_RESERVED)); page->type = VM_PAGE_TABLE; va += PAGE_SIZE; @@ -655,12 +710,12 @@ vm_page_manage(struct vm_page *page) assert(page->seg_index < ARRAY_SIZE(vm_page_segs)); assert(page->type == VM_PAGE_RESERVED); - vm_page_set_type(page, 0, VM_PAGE_FREE); + vm_page_set_type(page, 0, VM_PAGE_UNUSED); vm_page_seg_free_to_buddy(&vm_page_segs[page->seg_index], page, 0); } struct vm_page * -vm_page_lookup(phys_addr_t pa) +vm_page_lookup_pa(phys_addr_t pa) { struct vm_page_seg *seg; unsigned int i; @@ -676,7 +731,7 @@ vm_page_lookup(phys_addr_t pa) } struct vm_page * -vm_page_alloc(unsigned int order, unsigned int selector, unsigned short type) +vm_page_alloc_p(unsigned int order, unsigned int selector, unsigned short type) { struct vm_page *page; unsigned int i; @@ -695,7 +750,7 @@ vm_page_alloc(unsigned int order, unsigned int selector, unsigned short type) } void -vm_page_free(struct vm_page *page, unsigned int order) +vm_page_free_p(struct vm_page *page, unsigned int order) { assert(page->seg_index < ARRAY_SIZE(vm_page_segs)); @@ -733,3 +788,14 @@ vm_page_info(void) seg->nr_free_pages, seg->nr_free_pages >> (20 - PAGE_SHIFT)); } } + +void +update_vm_page_counts(void) +{ + unsigned long pages; + unsigned int i; + + for (i = 0, pages = 0; i < vm_page_segs_size; i++) + pages += vm_page_segs[i].nr_free_pages; + vm_page_free_count = pages; +} diff --git a/vm/vm_page.h b/vm/vm_page.h index 23c8c47..c401b25 100644 --- a/vm/vm_page.h +++ b/vm/vm_page.h @@ -25,11 +25,23 @@ #include <kern/list.h> #include <kern/log2.h> #include <kern/macros.h> -//#include <kern/param.h> -//#include <kern/stddef.h> -//#include <kern/types.h> +#include <mach/vm_param.h> +#include <machine/vm_param.h> #include <machine/pmap.h> +#include <kern/queue.h> +#include <kern/lock.h> + +#include <mach/boolean.h> +#include <mach/vm_prot.h> +#include <mach/vm_param.h> +#include <vm/vm_object.h> +#include <vm/vm_types.h> +#include <kern/queue.h> +#include <kern/lock.h> + +#include <kern/sched_prim.h> /* definitions of wait/wakeup */ + /* * Address/page conversion and rounding macros (not inline functions to * be easily usable on both virtual and physical addresses, which may not @@ -62,7 +74,7 @@ * TODO Obviously, this needs to be addressed, e.g. with a reserved pool of * pages. */ -#define VM_PAGE_FREE 0 /* Page unused */ +#define VM_PAGE_UNUSED 0 /* Page unused */ #define VM_PAGE_RESERVED 1 /* Page reserved at boot time */ #define VM_PAGE_TABLE 2 /* Page is part of the page table */ #define VM_PAGE_PMAP 3 /* Page stores pmap-specific data */ @@ -74,12 +86,51 @@ * Physical page descriptor. */ struct vm_page { - struct list node; - unsigned short type; - unsigned short seg_index; - unsigned short order; - phys_addr_t phys_addr; - void *slab_priv; + /* This is the data used by the vm_page module. */ + struct list node; + unsigned short type; + unsigned short seg_index; + unsigned short order; + phys_addr_t phys_addr; + void *slab_priv; + + /* We use an empty struct as the delimiter. */ + struct {} vm_page_header; +#define VM_PAGE_HEADER_SIZE offsetof(struct vm_page, vm_page_header) + + /* This is the data used by vm_resident and friends. */ + queue_chain_t pageq; /* queue info for FIFO queue */ + + vm_object_t object; /* which object am I in (O,P) */ + vm_offset_t offset; /* offset into that object (O,P) */ + + unsigned int wire_count:16, /* how many wired down maps use me? + (O&P) */ + /* boolean_t */ inactive:1, /* page is in inactive list (P) */ + active:1, /* page is in active list (P) */ + laundry:1, /* page is being cleaned now (P)*/ + reference:1, /* page has been used (P) */ + external:1, /* page considered external (P) */ + extcounted:1, /* page counted in ext counts (P) */ + busy:1, /* page is in transit (O) */ + wanted:1, /* someone is waiting for page (O) */ + tabled:1, /* page is in VP table (O) */ + fictitious:1, /* Physical page doesn't exist (O) */ + private:1, /* Page should not be returned to + * the free list (O) */ + absent:1, /* Data has been requested, but is + * not yet available (O) */ + error:1, /* Data manager was unable to provide + * data due to error (O) */ + dirty:1, /* Page must be cleaned (O) */ + precious:1, /* Page is precious; data must be + * returned even if clean (O) */ + overwriting:1; /* Request to unlock has been made + * without having data. (O) + * [See vm_object_overwrite] */ + + vm_prot_t page_lock; /* Uses prohibited by data manager (O) */ + vm_prot_t unlock_request; /* Outstanding unlock request (O) */ }; static inline unsigned short @@ -166,7 +217,7 @@ void vm_page_manage(struct vm_page *page); /* * Return the page descriptor for the given physical address. */ -struct vm_page * vm_page_lookup(phys_addr_t pa); +struct vm_page * vm_page_lookup_pa(phys_addr_t pa); /* * Allocate a block of 2^order physical pages. @@ -174,13 +225,13 @@ struct vm_page * vm_page_lookup(phys_addr_t pa); * The selector is used to determine the segments from which allocation can * be attempted. */ -struct vm_page * vm_page_alloc(unsigned int order, unsigned int selector, +struct vm_page * vm_page_alloc_p(unsigned int order, unsigned int selector, unsigned short type); /* * Release a block of 2^order physical pages. */ -void vm_page_free(struct vm_page *page, unsigned int order); +void vm_page_free_p(struct vm_page *page, unsigned int order); /* * Return the name of the given segment. @@ -192,4 +243,212 @@ const char * vm_page_seg_name(unsigned int seg_index); */ void vm_page_info(void); +/* Mach stuff follows. */ + +/* + * Glue code. + */ +#define CPU_L1_SIZE (1 << CPU_L1_SHIFT) +#define MAX_CPUS NCPUS +#define __read_mostly +#define __initdata +#define __init +#define cpu_id() cpu_number() +#define thread_pin() +#define thread_unpin() +#define printk printf + +void update_vm_page_counts(void); + +/* + * For debugging, this macro can be defined to perform + * some useful check on a page structure. + */ + +#define VM_PAGE_CHECK(mem) + +/* + * Each pageable resident page falls into one of three lists: + * + * free + * Available for allocation now. + * inactive + * Not referenced in any map, but still has an + * object/offset-page mapping, and may be dirty. + * This is the list of pages that should be + * paged out next. + * active + * A list of pages which have been placed in + * at least one physical map. This list is + * ordered, in LRU-like fashion. + */ + +extern +vm_page_t vm_page_queue_free; /* memory free queue */ +extern +vm_page_t vm_page_queue_fictitious; /* fictitious free queue */ +extern +queue_head_t vm_page_queue_active; /* active memory queue */ +extern +queue_head_t vm_page_queue_inactive; /* inactive memory queue */ + +extern +int vm_page_free_count; /* How many pages are free? */ +extern +int vm_page_fictitious_count;/* How many fictitious pages are free? */ +extern +int vm_page_active_count; /* How many pages are active? */ +extern +int vm_page_inactive_count; /* How many pages are inactive? */ +extern +int vm_page_wire_count; /* How many pages are wired? */ +extern +int vm_page_free_target; /* How many do we want free? */ +extern +int vm_page_free_min; /* When to wakeup pageout */ +extern +int vm_page_inactive_target;/* How many do we want inactive? */ +extern +int vm_page_free_reserved; /* How many pages reserved to do pageout */ +extern +int vm_page_laundry_count; /* How many pages being laundered? */ +extern +int vm_page_external_limit; /* Max number of pages for external objects */ + +/* Only objects marked with the extcounted bit are included in this total. + Pages which we scan for possible pageout, but which are not actually + dirty, don't get considered against the external page limits any more + in this way. */ +extern +int vm_page_external_count; /* How many pages for external objects? */ + + + +decl_simple_lock_data(extern,vm_page_queue_lock)/* lock on active and inactive + page queues */ +decl_simple_lock_data(extern,vm_page_queue_free_lock) + /* lock on free page queue */ + +extern unsigned int vm_page_free_wanted; + /* how many threads are waiting for memory */ + +extern vm_offset_t vm_page_fictitious_addr; + /* (fake) phys_addr of fictitious pages */ + +extern void vm_page_bootstrap( + vm_offset_t *startp, + vm_offset_t *endp); +extern void vm_page_module_init(void); + +extern void vm_page_create( + vm_offset_t start, + vm_offset_t end); +extern vm_page_t vm_page_lookup( + vm_object_t object, + vm_offset_t offset); +extern vm_page_t vm_page_grab_fictitious(void); +extern void vm_page_release_fictitious(vm_page_t); +extern boolean_t vm_page_convert(vm_page_t *, boolean_t); +extern void vm_page_more_fictitious(void); +extern vm_page_t vm_page_grab(boolean_t); +extern void vm_page_release(vm_page_t, boolean_t); +extern void vm_page_wait(void (*)(void)); +extern vm_page_t vm_page_alloc( + vm_object_t object, + vm_offset_t offset); +extern void vm_page_init( + vm_page_t mem, + vm_offset_t phys_addr); +extern void vm_page_init_mach(struct vm_page *); +extern void vm_page_free(vm_page_t); +extern void vm_page_activate(vm_page_t); +extern void vm_page_deactivate(vm_page_t); +extern void vm_page_rename( + vm_page_t mem, + vm_object_t new_object, + vm_offset_t new_offset); +extern void vm_page_insert( + vm_page_t mem, + vm_object_t object, + vm_offset_t offset); +extern void vm_page_remove( + vm_page_t mem); + +extern void vm_page_zero_fill(vm_page_t); +extern void vm_page_copy(vm_page_t src_m, vm_page_t dest_m); + +extern void vm_page_wire(vm_page_t); +extern void vm_page_unwire(vm_page_t); + +/* + * Functions implemented as macros + */ + +#define PAGE_ASSERT_WAIT(m, interruptible) \ + MACRO_BEGIN \ + (m)->wanted = TRUE; \ + assert_wait((event_t) (m), (interruptible)); \ + MACRO_END + +#define PAGE_WAKEUP_DONE(m) \ + MACRO_BEGIN \ + (m)->busy = FALSE; \ + if ((m)->wanted) { \ + (m)->wanted = FALSE; \ + thread_wakeup(((event_t) m)); \ + } \ + MACRO_END + +#define PAGE_WAKEUP(m) \ + MACRO_BEGIN \ + if ((m)->wanted) { \ + (m)->wanted = FALSE; \ + thread_wakeup((event_t) (m)); \ + } \ + MACRO_END + +#define VM_PAGE_FREE(p) \ + MACRO_BEGIN \ + vm_page_lock_queues(); \ + vm_page_free(p); \ + vm_page_unlock_queues(); \ + MACRO_END + +/* + * Macro to be used in place of pmap_enter() + */ + +#define PMAP_ENTER(pmap, virtual_address, page, protection, wired) \ + MACRO_BEGIN \ + pmap_enter( \ + (pmap), \ + (virtual_address), \ + (page)->phys_addr, \ + (protection) & ~(page)->page_lock, \ + (wired) \ + ); \ + MACRO_END + +#define VM_PAGE_WAIT(continuation) vm_page_wait(continuation) + +#define vm_page_lock_queues() simple_lock(&vm_page_queue_lock) +#define vm_page_unlock_queues() simple_unlock(&vm_page_queue_lock) + +#define VM_PAGE_QUEUES_REMOVE(mem) \ + MACRO_BEGIN \ + if (mem->active) { \ + queue_remove(&vm_page_queue_active, \ + mem, vm_page_t, pageq); \ + mem->active = FALSE; \ + vm_page_active_count--; \ + } \ + \ + if (mem->inactive) { \ + queue_remove(&vm_page_queue_inactive, \ + mem, vm_page_t, pageq); \ + mem->inactive = FALSE; \ + vm_page_inactive_count--; \ + } \ + MACRO_END + #endif /* _VM_VM_PAGE_H */ diff --git a/vm/vm_resident.c b/vm/vm_resident.c index d3b5a8e..88880ef 100644 --- a/vm/vm_resident.c +++ b/vm/vm_resident.c @@ -46,6 +46,7 @@ #include <machine/vm_param.h> #include <kern/xpr.h> #include <kern/slab.h> +#include <kern/rdxtree.h> #include <vm/pmap.h> #include <vm/vm_map.h> #include <vm/vm_page.h> @@ -54,7 +55,6 @@ #if MACH_VM_DEBUG #include <mach/kern_return.h> -#include <mach_debug/hash_info.h> #include <vm/vm_user.h> #endif @@ -79,33 +79,6 @@ vm_offset_t virtual_space_start; vm_offset_t virtual_space_end; /* - * The vm_page_lookup() routine, which provides for fast - * (virtual memory object, offset) to page lookup, employs - * the following hash table. The vm_page_{insert,remove} - * routines install and remove associations in the table. - * [This table is often called the virtual-to-physical, - * or VP, table.] - */ -typedef struct { - decl_simple_lock_data(,lock) - vm_page_t pages; -} vm_page_bucket_t; - -vm_page_bucket_t *vm_page_buckets; /* Array of buckets */ -unsigned int vm_page_bucket_count = 0; /* How big is array? */ -unsigned int vm_page_hash_mask; /* Mask for hash function */ - -/* - * Resident page structures are initialized from - * a template (see vm_page_alloc). - * - * When adding a new field to the virtual memory - * object structure, be sure to add initialization - * (see vm_page_bootstrap). - */ -struct vm_page vm_page_template; - -/* * Resident pages that represent real memory * are allocated from a free list. */ @@ -117,8 +90,6 @@ int vm_page_free_count; int vm_page_fictitious_count; int vm_page_external_count; -unsigned int vm_page_free_count_minimum; /* debugging */ - /* * Occasionally, the virtual memory system uses * resident page structures that do not refer to @@ -182,9 +153,6 @@ boolean_t vm_page_deactivate_hint = TRUE; * * Initializes the resident memory module. * - * Allocates memory for the page cells, and - * for the object/offset-to-page hash table headers. - * Each page cell is initialized and placed on the free list. * Returns the range of available kernel virtual memory. */ @@ -192,40 +160,6 @@ void vm_page_bootstrap( vm_offset_t *startp, vm_offset_t *endp) { - vm_page_t m; - int i; - - /* - * Initialize the vm_page template. - */ - - m = &vm_page_template; - m->object = VM_OBJECT_NULL; /* reset later */ - m->offset = 0; /* reset later */ - m->wire_count = 0; - - m->inactive = FALSE; - m->active = FALSE; - m->laundry = FALSE; - m->free = FALSE; - m->external = FALSE; - - m->busy = TRUE; - m->wanted = FALSE; - m->tabled = FALSE; - m->fictitious = FALSE; - m->private = FALSE; - m->absent = FALSE; - m->error = FALSE; - m->dirty = FALSE; - m->precious = FALSE; - m->reference = FALSE; - - m->phys_addr = 0; /* reset later */ - - m->page_lock = VM_PROT_NONE; - m->unlock_request = VM_PROT_NONE; - /* * Initialize the page queues. */ @@ -241,46 +175,6 @@ void vm_page_bootstrap( vm_page_free_wanted = 0; /* - * Steal memory for the kernel map entries. - */ - - kentry_data = pmap_steal_memory(kentry_data_size); - - /* - * Allocate (and initialize) the virtual-to-physical - * table hash buckets. - * - * The number of buckets should be a power of two to - * get a good hash function. The following computation - * chooses the first power of two that is greater - * than the number of physical pages in the system. - */ - - if (vm_page_bucket_count == 0) { - unsigned int npages = pmap_free_pages(); - - vm_page_bucket_count = 1; - while (vm_page_bucket_count < npages) - vm_page_bucket_count <<= 1; - } - - vm_page_hash_mask = vm_page_bucket_count - 1; - - if (vm_page_hash_mask & vm_page_bucket_count) - printf("vm_page_bootstrap: WARNING -- strange page hash\n"); - - vm_page_buckets = (vm_page_bucket_t *) - pmap_steal_memory(vm_page_bucket_count * - sizeof(vm_page_bucket_t)); - - for (i = 0; i < vm_page_bucket_count; i++) { - vm_page_bucket_t *bucket = &vm_page_buckets[i]; - - bucket->pages = VM_PAGE_NULL; - simple_lock_init(&bucket->lock); - } - - /* * Machine-dependent code allocates the resident page table. * It uses vm_page_init to initialize the page frames. * The code also returns to us the virtual space available @@ -296,125 +190,20 @@ void vm_page_bootstrap( *endp = virtual_space_end; /* printf("vm_page_bootstrap: %d free pages\n", vm_page_free_count);*/ - vm_page_free_count_minimum = vm_page_free_count; } #ifndef MACHINE_PAGES -/* - * We implement pmap_steal_memory and pmap_startup with the help - * of two simpler functions, pmap_virtual_space and pmap_next_page. - */ - -vm_offset_t pmap_steal_memory( - vm_size_t size) -{ - vm_offset_t addr, vaddr, paddr; - - /* - * We round the size to an integer multiple. - */ - - size = (size + 3) &~ 3; - - /* - * If this is the first call to pmap_steal_memory, - * we have to initialize ourself. - */ - - if (virtual_space_start == virtual_space_end) { - pmap_virtual_space(&virtual_space_start, &virtual_space_end); - - /* - * The initial values must be aligned properly, and - * we don't trust the pmap module to do it right. - */ - - virtual_space_start = round_page(virtual_space_start); - virtual_space_end = trunc_page(virtual_space_end); - } - - /* - * Allocate virtual memory for this request. - */ - - addr = virtual_space_start; - virtual_space_start += size; - - /* - * Allocate and map physical pages to back new virtual pages. - */ - - for (vaddr = round_page(addr); - vaddr < addr + size; - vaddr += PAGE_SIZE) { - if (!pmap_next_page(&paddr)) - panic("pmap_steal_memory"); - - /* - * XXX Logically, these mappings should be wired, - * but some pmap modules barf if they are. - */ - - pmap_enter(kernel_pmap, vaddr, paddr, - VM_PROT_READ|VM_PROT_WRITE, FALSE); - } - - return addr; -} - void pmap_startup( vm_offset_t *startp, vm_offset_t *endp) { - unsigned int i, npages, pages_initialized; - vm_page_t pages; - vm_offset_t paddr; - - /* - * We calculate how many page frames we will have - * and then allocate the page structures in one chunk. - */ - - npages = ((PAGE_SIZE * pmap_free_pages() + - (round_page(virtual_space_start) - virtual_space_start)) / - (PAGE_SIZE + sizeof *pages)); - - pages = (vm_page_t) pmap_steal_memory(npages * sizeof *pages); - - /* - * Initialize the page frames. - */ - - for (i = 0, pages_initialized = 0; i < npages; i++) { - if (!pmap_next_page(&paddr)) - break; - - vm_page_init(&pages[i], paddr); - pages_initialized++; - } - i = 0; - while (pmap_next_page(&paddr)) - i++; - if (i) - printf("%u memory page(s) left away\n", i); - - /* - * Release pages in reverse order so that physical pages - * initially get allocated in ascending addresses. This keeps - * the devices (which must address physical memory) happy if - * they require several consecutive pages. - */ - - for (i = pages_initialized; i > 0; i--) { - vm_page_release(&pages[i - 1], FALSE); - } - + pmap_virtual_space(&virtual_space_start, &virtual_space_end); /* - * We have to re-align virtual_space_start, - * because pmap_steal_memory has been using it. + * The initial values must be aligned properly, and + * we don't trust the pmap module to do it right. */ - virtual_space_start = round_page(virtual_space_start); + virtual_space_end = trunc_page(virtual_space_end); *startp = virtual_space_start; *endp = virtual_space_end; @@ -448,6 +237,8 @@ void vm_page_create( vm_offset_t start, vm_offset_t end) { + printf ("XXX: vm_page_create stubbed out\n"); + return; vm_offset_t paddr; vm_page_t m; @@ -463,17 +254,11 @@ void vm_page_create( } } -/* - * vm_page_hash: - * - * Distributes the object/offset key pair among hash buckets. - * - * NOTE: To get a good hash function, the bucket count should - * be a power of two. - */ -#define vm_page_hash(object, offset) \ - (((unsigned int)(vm_offset_t)object + (unsigned int)atop(offset)) \ - & vm_page_hash_mask) +static rdxtree_key_t +offset_key(vm_offset_t offset) +{ + return (rdxtree_key_t) atop(offset); +} /* * vm_page_insert: [ internal use only ] @@ -489,8 +274,6 @@ void vm_page_insert( vm_object_t object, vm_offset_t offset) { - vm_page_bucket_t *bucket; - VM_PAGE_CHECK(mem); if (mem->tabled) @@ -504,20 +287,10 @@ void vm_page_insert( mem->offset = offset; /* - * Insert it into the object_object/offset hash table - */ - - bucket = &vm_page_buckets[vm_page_hash(object, offset)]; - simple_lock(&bucket->lock); - mem->next = bucket->pages; - bucket->pages = mem; - simple_unlock(&bucket->lock); - - /* - * Now link into the object's list of backed pages. + * Insert it into the objects radix tree. */ - queue_enter(&object->memq, mem, vm_page_t, listq); + rdxtree_insert(&object->memt, offset_key(offset), mem); mem->tabled = TRUE; /* @@ -561,7 +334,8 @@ void vm_page_replace( vm_object_t object, vm_offset_t offset) { - vm_page_bucket_t *bucket; + struct vm_page *old; + void **slot; VM_PAGE_CHECK(mem); @@ -576,54 +350,23 @@ void vm_page_replace( mem->offset = offset; /* - * Insert it into the object_object/offset hash table, - * replacing any page that might have been there. + * Insert it into the objects radix tree, replacing any + * page that might have been there. */ + slot = rdxtree_lookup_slot(&object->memt, offset_key(offset)); + old = rdxtree_replace_slot(slot, mem); + if (old != VM_PAGE_NULL) { + old->tabled = FALSE; + object->resident_page_count--; - bucket = &vm_page_buckets[vm_page_hash(object, offset)]; - simple_lock(&bucket->lock); - if (bucket->pages) { - vm_page_t *mp = &bucket->pages; - vm_page_t m = *mp; - do { - if (m->object == object && m->offset == offset) { - /* - * Remove page from bucket and from object, - * and return it to the free list. - */ - *mp = m->next; - queue_remove(&object->memq, m, vm_page_t, - listq); - m->tabled = FALSE; - object->resident_page_count--; - - if (object->can_persist - && (object->ref_count == 0)) - vm_object_cached_pages_update(-1); - - /* - * Return page to the free list. - * Note the page is not tabled now, so this - * won't self-deadlock on the bucket lock. - */ - - vm_page_free(m); - break; - } - mp = &m->next; - } while ((m = *mp) != 0); - mem->next = bucket->pages; - } else { - mem->next = VM_PAGE_NULL; - } - bucket->pages = mem; - simple_unlock(&bucket->lock); + if (object->can_persist + && (object->ref_count == 0)) + vm_object_cached_pages_update(-1); - /* - * Now link into the object's list of backed pages. - */ + /* And free it. */ + vm_page_free(old); + } - queue_enter(&object->memq, mem, vm_page_t, listq); mem->tabled = TRUE; /* @@ -650,38 +393,11 @@ void vm_page_replace( void vm_page_remove( vm_page_t mem) { - vm_page_bucket_t *bucket; - vm_page_t this; - assert(mem->tabled); VM_PAGE_CHECK(mem); - /* - * Remove from the object_object/offset hash table - */ - - bucket = &vm_page_buckets[vm_page_hash(mem->object, mem->offset)]; - simple_lock(&bucket->lock); - if ((this = bucket->pages) == mem) { - /* optimize for common case */ - - bucket->pages = mem->next; - } else { - vm_page_t *prev; - - for (prev = &this->next; - (this = *prev) != mem; - prev = &this->next) - continue; - *prev = this->next; - } - simple_unlock(&bucket->lock); - - /* - * Now remove from the object's list of backed pages. - */ - - queue_remove(&mem->object->memq, mem, vm_page_t, listq); + /* Remove from the objects radix tree. */ + rdxtree_remove(&mem->object->memt, offset_key(mem->offset)); /* * And show that the object has one fewer resident @@ -709,23 +425,7 @@ vm_page_t vm_page_lookup( vm_object_t object, vm_offset_t offset) { - vm_page_t mem; - vm_page_bucket_t *bucket; - - /* - * Search the hash table for this object/offset pair - */ - - bucket = &vm_page_buckets[vm_page_hash(object, offset)]; - - simple_lock(&bucket->lock); - for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) { - VM_PAGE_CHECK(mem); - if ((mem->object == object) && (mem->offset == offset)) - break; - } - simple_unlock(&bucket->lock); - return mem; + return rdxtree_lookup(&object->memt, offset_key(offset)); } /* @@ -753,21 +453,6 @@ void vm_page_rename( } /* - * vm_page_init: - * - * Initialize the fields in a new page. - * This takes a structure with random values and initializes it - * so that it can be given to vm_page_release or vm_page_insert. - */ -void vm_page_init( - vm_page_t mem, - vm_offset_t phys_addr) -{ - *mem = vm_page_template; - mem->phys_addr = phys_addr; -} - -/* * vm_page_grab_fictitious: * * Remove a fictitious page from the free list. @@ -783,10 +468,10 @@ vm_page_t vm_page_grab_fictitious(void) if (m != VM_PAGE_NULL) { vm_page_fictitious_count--; vm_page_queue_fictitious = (vm_page_t) m->pageq.next; - m->free = FALSE; + assert(m->fictitious); + assert(! m->tabled); } simple_unlock(&vm_page_queue_free_lock); - return m; } @@ -799,10 +484,9 @@ vm_page_t vm_page_grab_fictitious(void) void vm_page_release_fictitious( vm_page_t m) { + assert(m->fictitious); + assert(! m->tabled); simple_lock(&vm_page_queue_free_lock); - if (m->free) - panic("vm_page_release_fictitious"); - m->free = TRUE; m->pageq.next = (queue_entry_t) vm_page_queue_fictitious; vm_page_queue_fictitious = m; vm_page_fictitious_count++; @@ -841,22 +525,43 @@ void vm_page_more_fictitious(void) */ boolean_t vm_page_convert( - vm_page_t m, + struct vm_page **mp, boolean_t external) { - vm_page_t real_m; + struct vm_page *real_m, *fict_m, *old; + void **slot; + + fict_m = *mp; + + assert(fict_m->fictitious); + assert(fict_m->phys_addr == vm_page_fictitious_addr); + assert(! fict_m->active); + assert(! fict_m->inactive); real_m = vm_page_grab(external); if (real_m == VM_PAGE_NULL) return FALSE; - m->phys_addr = real_m->phys_addr; - m->fictitious = FALSE; + memcpy(&real_m->vm_page_header, + &fict_m->vm_page_header, + sizeof *fict_m - VM_PAGE_HEADER_SIZE); + + real_m->fictitious = FALSE; + fict_m->tabled = FALSE; + + /* Fix radix tree entry. */ + /* XXX is the object locked? */ + slot = rdxtree_lookup_slot(&fict_m->object->memt, + offset_key(fict_m->offset)); + old = rdxtree_replace_slot(slot, real_m); + assert(old == fict_m); - real_m->phys_addr = vm_page_fictitious_addr; - real_m->fictitious = TRUE; + assert(real_m->phys_addr != vm_page_fictitious_addr); + assert(fict_m->fictitious); + assert(fict_m->phys_addr == vm_page_fictitious_addr); - vm_page_release_fictitious(real_m); + vm_page_release_fictitious(fict_m); + *mp = real_m; return TRUE; } @@ -888,16 +593,15 @@ vm_page_t vm_page_grab( return VM_PAGE_NULL; } - if (vm_page_queue_free == VM_PAGE_NULL) - panic("vm_page_grab"); - - if (--vm_page_free_count < vm_page_free_count_minimum) - vm_page_free_count_minimum = vm_page_free_count; if (external) vm_page_external_count++; - mem = vm_page_queue_free; - vm_page_queue_free = (vm_page_t) mem->pageq.next; - mem->free = FALSE; + + mem = vm_page_alloc_p(0, VM_PAGE_SEL_DIRECTMAP, VM_PAGE_OBJECT); + if (! mem) { + simple_unlock(&vm_page_queue_free_lock); + return VM_PAGE_NULL; + } + vm_page_init_mach(mem); mem->extcounted = mem->external = external; simple_unlock(&vm_page_queue_free_lock); @@ -930,237 +634,6 @@ vm_offset_t vm_page_grab_phys_addr(void) } /* - * vm_page_grab_contiguous_pages: - * - * Take N pages off the free list, the pages should - * cover a contiguous range of physical addresses. - * [Used by device drivers to cope with DMA limitations] - * - * Returns the page descriptors in ascending order, or - * Returns KERN_RESOURCE_SHORTAGE if it could not. - */ - -/* Biggest phys page number for the pages we handle in VM */ - -vm_size_t vm_page_big_pagenum = 0; /* Set this before call! */ - -kern_return_t -vm_page_grab_contiguous_pages( - int npages, - vm_page_t pages[], - natural_t *bits, - boolean_t external) -{ - int first_set; - int size, alloc_size; - kern_return_t ret; - vm_page_t mem, *prevmemp; - -#ifndef NBBY -#define NBBY 8 /* size in bits of sizeof()`s unity */ -#endif - -#define NBPEL (sizeof(natural_t)*NBBY) - - size = (vm_page_big_pagenum + NBPEL - 1) - & ~(NBPEL - 1); /* in bits */ - - size = size / NBBY; /* in bytes */ - - /* - * If we are called before the VM system is fully functional - * the invoker must provide us with the work space. [one bit - * per page starting at phys 0 and up to vm_page_big_pagenum] - */ - if (bits == 0) { - alloc_size = round_page(size); - if (kmem_alloc_wired(kernel_map, - (vm_offset_t *)&bits, - alloc_size) - != KERN_SUCCESS) - return KERN_RESOURCE_SHORTAGE; - } else - alloc_size = 0; - - memset(bits, 0, size); - - /* - * A very large granularity call, its rare so that is ok - */ - simple_lock(&vm_page_queue_free_lock); - - /* - * Do not dip into the reserved pool. - */ - - if ((vm_page_free_count < vm_page_free_reserved) - || (vm_page_external_count >= vm_page_external_limit)) { - printf_once("no more room for vm_page_grab_contiguous_pages"); - simple_unlock(&vm_page_queue_free_lock); - return KERN_RESOURCE_SHORTAGE; - } - - /* - * First pass through, build a big bit-array of - * the pages that are free. It is not going to - * be too large anyways, in 4k we can fit info - * for 32k pages. - */ - mem = vm_page_queue_free; - while (mem) { - int word_index, bit_index; - - bit_index = (mem->phys_addr >> PAGE_SHIFT); - word_index = bit_index / NBPEL; - bit_index = bit_index - (word_index * NBPEL); - bits[word_index] |= 1 << bit_index; - - mem = (vm_page_t) mem->pageq.next; - } - - /* - * Second loop. Scan the bit array for NPAGES - * contiguous bits. That gives us, if any, - * the range of pages we will be grabbing off - * the free list. - */ - { - int bits_so_far = 0, i; - - first_set = 0; - - for (i = 0; i < size; i += sizeof(natural_t)) { - - natural_t v = bits[i / sizeof(natural_t)]; - int bitpos; - - /* - * Bitscan this one word - */ - if (v) { - /* - * keep counting them beans ? - */ - bitpos = 0; - - if (bits_so_far) { -count_ones: - while (v & 1) { - bitpos++; - /* - * got enough beans ? - */ - if (++bits_so_far == npages) - goto found_em; - v >>= 1; - } - /* if we are being lucky, roll again */ - if (bitpos == NBPEL) - continue; - } - - /* - * search for beans here - */ - bits_so_far = 0; - while ((bitpos < NBPEL) && ((v & 1) == 0)) { - bitpos++; - v >>= 1; - } - if (v & 1) { - first_set = (i * NBBY) + bitpos; - goto count_ones; - } - } - /* - * No luck - */ - bits_so_far = 0; - } - } - - /* - * We could not find enough contiguous pages. - */ - simple_unlock(&vm_page_queue_free_lock); - - printf_once("no contiguous room for vm_page_grab_contiguous_pages"); - ret = KERN_RESOURCE_SHORTAGE; - goto out; - - /* - * Final pass. Now we know which pages we want. - * Scan the list until we find them all, grab - * pages as we go. FIRST_SET tells us where - * in the bit-array our pages start. - */ -found_em: - vm_page_free_count -= npages; - if (vm_page_free_count < vm_page_free_count_minimum) - vm_page_free_count_minimum = vm_page_free_count; - if (external) - vm_page_external_count += npages; - { - vm_offset_t first_phys, last_phys; - - /* cache values for compare */ - first_phys = first_set << PAGE_SHIFT; - last_phys = first_phys + (npages << PAGE_SHIFT);/* not included */ - - /* running pointers */ - mem = vm_page_queue_free; - prevmemp = &vm_page_queue_free; - - while (mem) { - - vm_offset_t addr; - - addr = mem->phys_addr; - - if ((addr >= first_phys) && - (addr < last_phys)) { - *prevmemp = (vm_page_t) mem->pageq.next; - pages[(addr - first_phys) >> PAGE_SHIFT] = mem; - mem->free = FALSE; - mem->extcounted = mem->external = external; - /* - * Got them all ? - */ - if (--npages == 0) break; - } else - prevmemp = (vm_page_t *) &mem->pageq.next; - - mem = (vm_page_t) mem->pageq.next; - } - } - - simple_unlock(&vm_page_queue_free_lock); - - /* - * Decide if we should poke the pageout daemon. - * We do this if the free count is less than the low - * water mark, or if the free count is less than the high - * water mark (but above the low water mark) and the inactive - * count is less than its target. - * - * We don't have the counts locked ... if they change a little, - * it doesn't really matter. - */ - - if ((vm_page_free_count < vm_page_free_min) || - ((vm_page_free_count < vm_page_free_target) && - (vm_page_inactive_count < vm_page_inactive_target))) - thread_wakeup(&vm_page_free_wanted); - - ret = KERN_SUCCESS; -out: - if (alloc_size) - kmem_free(kernel_map, (vm_offset_t) bits, alloc_size); - - return ret; -} - -/* * vm_page_release: * * Return a page to the free list. @@ -1171,12 +644,7 @@ void vm_page_release( boolean_t external) { simple_lock(&vm_page_queue_free_lock); - if (mem->free) - panic("vm_page_release"); - mem->free = TRUE; - mem->pageq.next = (queue_entry_t) vm_page_queue_free; - vm_page_queue_free = mem; - vm_page_free_count++; + vm_page_free_p(mem, 0); if (external) vm_page_external_count--; @@ -1283,9 +751,6 @@ vm_page_t vm_page_alloc( void vm_page_free( vm_page_t mem) { - if (mem->free) - panic("vm_page_free"); - if (mem->tabled) vm_page_remove(mem); VM_PAGE_QUEUES_REMOVE(mem); @@ -1459,47 +924,6 @@ void vm_page_copy( pmap_copy_page(src_m->phys_addr, dest_m->phys_addr); } -#if MACH_VM_DEBUG -/* - * Routine: vm_page_info - * Purpose: - * Return information about the global VP table. - * Fills the buffer with as much information as possible - * and returns the desired size of the buffer. - * Conditions: - * Nothing locked. The caller should provide - * possibly-pageable memory. - */ - -unsigned int -vm_page_info( - hash_info_bucket_t *info, - unsigned int count) -{ - int i; - - if (vm_page_bucket_count < count) - count = vm_page_bucket_count; - - for (i = 0; i < count; i++) { - vm_page_bucket_t *bucket = &vm_page_buckets[i]; - unsigned int bucket_count = 0; - vm_page_t m; - - simple_lock(&bucket->lock); - for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next) - bucket_count++; - simple_unlock(&bucket->lock); - - /* don't touch pageable memory while holding locks */ - info[i].hib_count = bucket_count; - } - - return vm_page_bucket_count; -} -#endif /* MACH_VM_DEBUG */ - - #if MACH_KDB #define printf kdbprintf @@ -1514,8 +938,6 @@ void vm_page_print(p) printf("wire_count %d,", p->wire_count); printf(" %s", (p->active ? "active" : (p->inactive ? "inactive" : "loose"))); - printf("%s", - (p->free ? " free" : "")); printf("%s ", (p->laundry ? " laundry" : "")); printf("%s", |