diff options
-rw-r--r-- | Makefrag.am | 1 | ||||
-rw-r--r-- | i386/Makefrag.am | 2 | ||||
-rw-r--r-- | i386/i386/vm_param.h | 40 | ||||
-rw-r--r-- | i386/i386at/biosmem.c | 844 | ||||
-rw-r--r-- | i386/i386at/biosmem.h | 84 | ||||
-rw-r--r-- | i386/i386at/elf.h | 61 | ||||
-rw-r--r-- | i386/i386at/model_dep.c | 286 | ||||
-rw-r--r-- | i386/include/mach/i386/multiboot.h | 105 | ||||
-rw-r--r-- | i386/include/mach/i386/vm_types.h | 5 | ||||
-rw-r--r-- | kern/bootstrap.c | 16 | ||||
-rw-r--r-- | kern/cpu_number.h | 2 | ||||
-rw-r--r-- | kern/log2.h | 50 | ||||
-rw-r--r-- | kern/slab.h | 5 | ||||
-rw-r--r-- | kern/startup.c | 2 | ||||
-rw-r--r-- | linux/dev/glue/glue.h | 4 | ||||
-rw-r--r-- | linux/dev/glue/kmem.c | 6 | ||||
-rw-r--r-- | linux/dev/init/main.c | 143 | ||||
-rw-r--r-- | linux/pcmcia-cs/glue/ds.c | 6 | ||||
-rw-r--r-- | vm/pmap.h | 15 | ||||
-rw-r--r-- | vm/vm_fault.c | 4 | ||||
-rw-r--r-- | vm/vm_init.c | 1 | ||||
-rw-r--r-- | vm/vm_object.c | 3 | ||||
-rw-r--r-- | vm/vm_page.c | 762 | ||||
-rw-r--r-- | vm/vm_page.h | 220 | ||||
-rw-r--r-- | vm/vm_resident.c | 546 |
25 files changed, 2378 insertions, 835 deletions
diff --git a/Makefrag.am b/Makefrag.am index 823ece5..bb600e0 100644 --- a/Makefrag.am +++ b/Makefrag.am @@ -259,6 +259,7 @@ libkernel_a_SOURCES += \ vm/vm_map.h \ vm/vm_object.c \ vm/vm_object.h \ + vm/vm_page.c \ vm/vm_page.h \ vm/vm_pageout.c \ vm/vm_pageout.h \ diff --git a/i386/Makefrag.am b/i386/Makefrag.am index ef393d5..e6cfedd 100644 --- a/i386/Makefrag.am +++ b/i386/Makefrag.am @@ -29,6 +29,8 @@ libkernel_a_SOURCES += \ if PLATFORM_at libkernel_a_SOURCES += \ + i386/i386at/biosmem.c \ + i386/i386at/biosmem.h \ i386/i386at/boothdr.S \ i386/i386at/com.c \ i386/i386at/com.h \ diff --git a/i386/i386/vm_param.h b/i386/i386/vm_param.h index 6292ca2..da3126c 100644 --- a/i386/i386/vm_param.h +++ b/i386/i386/vm_param.h @@ -23,6 +23,8 @@ #ifndef _I386_KERNEL_I386_VM_PARAM_ #define _I386_KERNEL_I386_VM_PARAM_ +#include <kern/macros.h> + /* XXX use xu/vm_param.h */ #include <mach/vm_param.h> #ifdef MACH_PV_PAGETABLES @@ -101,4 +103,42 @@ #define kvtolin(a) ((vm_offset_t)(a) - VM_MIN_KERNEL_ADDRESS + LINEAR_MIN_KERNEL_ADDRESS) #define lintokv(a) ((vm_offset_t)(a) - LINEAR_MIN_KERNEL_ADDRESS + VM_MIN_KERNEL_ADDRESS) +/* + * Physical memory properties. + */ +#define VM_PAGE_DMA_LIMIT DECL_CONST(0x1000000, UL) + +#ifdef __LP64__ +#define VM_PAGE_MAX_SEGS 4 +#define VM_PAGE_DMA32_LIMIT DECL_CONST(0x100000000, UL) +#define VM_PAGE_DIRECTMAP_LIMIT DECL_CONST(0x400000000000, UL) +#define VM_PAGE_HIGHMEM_LIMIT DECL_CONST(0x10000000000000, UL) +#else /* __LP64__ */ +#define VM_PAGE_DIRECTMAP_LIMIT (VM_MAX_KERNEL_ADDRESS \ + - VM_MIN_KERNEL_ADDRESS \ + - VM_KERNEL_MAP_SIZE + 1) +#ifdef PAE +#define VM_PAGE_MAX_SEGS 3 +#define VM_PAGE_HIGHMEM_LIMIT DECL_CONST(0x10000000000000, ULL) +#else /* PAE */ +#define VM_PAGE_MAX_SEGS 3 +#define VM_PAGE_HIGHMEM_LIMIT DECL_CONST(0xfffff000, UL) +#endif /* PAE */ +#endif /* __LP64__ */ + +/* + * Physical segment indexes. + */ +#define VM_PAGE_SEG_DMA 0 + +#ifdef __LP64__ +#define VM_PAGE_SEG_DMA32 1 +#define VM_PAGE_SEG_DIRECTMAP 2 +#define VM_PAGE_SEG_HIGHMEM 3 +#else /* __LP64__ */ +#define VM_PAGE_SEG_DMA32 1 /* Alias for the DIRECTMAP segment */ +#define VM_PAGE_SEG_DIRECTMAP 1 +#define VM_PAGE_SEG_HIGHMEM 2 +#endif /* __LP64__ */ + #endif /* _I386_KERNEL_I386_VM_PARAM_ */ diff --git a/i386/i386at/biosmem.c b/i386/i386at/biosmem.c new file mode 100644 index 0000000..5b4fbdd --- /dev/null +++ b/i386/i386at/biosmem.c @@ -0,0 +1,844 @@ +/* + * Copyright (c) 2010-2014 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <string.h> +#include <i386/model_dep.h> +#include <i386at/biosmem.h> +#include <i386at/elf.h> +#include <kern/assert.h> +#include <kern/debug.h> +#include <kern/macros.h> +#include <kern/printf.h> +#include <mach/vm_param.h> +#include <mach/machine/multiboot.h> +#include <sys/types.h> +#include <vm/vm_page.h> + +#define __boot +#define __bootdata +#define __init + +#define boot_memmove memmove +#define boot_panic panic +#define boot_strlen strlen + +#define BOOT_CGAMEM phystokv(0xb8000) +#define BOOT_CGACHARS (80 * 25) +#define BOOT_CGACOLOR 0x7 + +extern char _start, _end; + +/* + * Maximum number of entries in the BIOS memory map. + * + * Because of adjustments of overlapping ranges, the memory map can grow + * to twice this size. + */ +#define BIOSMEM_MAX_MAP_SIZE 128 + +/* + * Memory range types. + */ +#define BIOSMEM_TYPE_AVAILABLE 1 +#define BIOSMEM_TYPE_RESERVED 2 +#define BIOSMEM_TYPE_ACPI 3 +#define BIOSMEM_TYPE_NVS 4 +#define BIOSMEM_TYPE_UNUSABLE 5 +#define BIOSMEM_TYPE_DISABLED 6 + +/* + * Memory map entry. + */ +struct biosmem_map_entry { + uint64_t base_addr; + uint64_t length; + unsigned int type; +}; + +/* + * Contiguous block of physical memory. + * + * Tha "available" range records what has been passed to the VM system as + * available inside the segment. + */ +struct biosmem_segment { + phys_addr_t start; + phys_addr_t end; + phys_addr_t avail_start; + phys_addr_t avail_end; +}; + +/* + * Memory map built from the information passed by the boot loader. + * + * If the boot loader didn't pass a valid memory map, a simple map is built + * based on the mem_lower and mem_upper multiboot fields. + */ +static struct biosmem_map_entry biosmem_map[BIOSMEM_MAX_MAP_SIZE * 2] + __bootdata; +static unsigned int biosmem_map_size __bootdata; + +/* + * Physical segment boundaries. + */ +static struct biosmem_segment biosmem_segments[VM_PAGE_MAX_SEGS] __bootdata; + +/* + * Boundaries of the simple bootstrap heap. + * + * This heap is located above BIOS memory. + */ +static uint32_t biosmem_heap_start __bootdata; +static uint32_t biosmem_heap_cur __bootdata; +static uint32_t biosmem_heap_end __bootdata; + +static char biosmem_panic_toobig_msg[] __bootdata + = "biosmem: too many memory map entries"; +static char biosmem_panic_setup_msg[] __bootdata + = "biosmem: unable to set up the early memory allocator"; +static char biosmem_panic_noseg_msg[] __bootdata + = "biosmem: unable to find any memory segment"; +static char biosmem_panic_inval_msg[] __bootdata + = "biosmem: attempt to allocate 0 page"; +static char biosmem_panic_nomem_msg[] __bootdata + = "biosmem: unable to allocate memory"; + +static void __boot +biosmem_map_build(const struct multiboot_raw_info *mbi) +{ + struct multiboot_raw_mmap_entry *mb_entry, *mb_end; + struct biosmem_map_entry *start, *entry, *end; + unsigned long addr; + + addr = phystokv(mbi->mmap_addr); + mb_entry = (struct multiboot_raw_mmap_entry *)addr; + mb_end = (struct multiboot_raw_mmap_entry *)(addr + mbi->mmap_length); + start = biosmem_map; + entry = start; + end = entry + BIOSMEM_MAX_MAP_SIZE; + + while ((mb_entry < mb_end) && (entry < end)) { + entry->base_addr = mb_entry->base_addr; + entry->length = mb_entry->length; + entry->type = mb_entry->type; + + mb_entry = (void *)mb_entry + sizeof(mb_entry->size) + mb_entry->size; + entry++; + } + + biosmem_map_size = entry - start; +} + +static void __boot +biosmem_map_build_simple(const struct multiboot_raw_info *mbi) +{ + struct biosmem_map_entry *entry; + + entry = biosmem_map; + entry->base_addr = 0; + entry->length = mbi->mem_lower << 10; + entry->type = BIOSMEM_TYPE_AVAILABLE; + + entry++; + entry->base_addr = BIOSMEM_END; + entry->length = mbi->mem_upper << 10; + entry->type = BIOSMEM_TYPE_AVAILABLE; + + biosmem_map_size = 2; +} + +static int __boot +biosmem_map_entry_is_invalid(const struct biosmem_map_entry *entry) +{ + return (entry->base_addr + entry->length) <= entry->base_addr; +} + +static void __boot +biosmem_map_filter(void) +{ + struct biosmem_map_entry *entry; + unsigned int i; + + i = 0; + + while (i < biosmem_map_size) { + entry = &biosmem_map[i]; + + if (biosmem_map_entry_is_invalid(entry)) { + biosmem_map_size--; + boot_memmove(entry, entry + 1, + (biosmem_map_size - i) * sizeof(*entry)); + continue; + } + + i++; + } +} + +static void __boot +biosmem_map_sort(void) +{ + struct biosmem_map_entry tmp; + unsigned int i, j; + + /* + * Simple insertion sort. + */ + for (i = 1; i < biosmem_map_size; i++) { + tmp = biosmem_map[i]; + + for (j = i - 1; j < i; j--) { + if (biosmem_map[j].base_addr < tmp.base_addr) + break; + + biosmem_map[j + 1] = biosmem_map[j]; + } + + biosmem_map[j + 1] = tmp; + } +} + +static void __boot +biosmem_map_adjust(void) +{ + struct biosmem_map_entry tmp, *a, *b, *first, *second; + uint64_t a_end, b_end, last_end; + unsigned int i, j, last_type; + + biosmem_map_filter(); + + /* + * Resolve overlapping areas, giving priority to most restrictive + * (i.e. numerically higher) types. + */ + for (i = 0; i < biosmem_map_size; i++) { + a = &biosmem_map[i]; + a_end = a->base_addr + a->length; + + j = i + 1; + + while (j < biosmem_map_size) { + b = &biosmem_map[j]; + b_end = b->base_addr + b->length; + + if ((a->base_addr >= b_end) || (a_end <= b->base_addr)) { + j++; + continue; + } + + if (a->base_addr < b->base_addr) { + first = a; + second = b; + } else { + first = b; + second = a; + } + + if (a_end > b_end) { + last_end = a_end; + last_type = a->type; + } else { + last_end = b_end; + last_type = b->type; + } + + tmp.base_addr = second->base_addr; + tmp.length = MIN(a_end, b_end) - tmp.base_addr; + tmp.type = MAX(a->type, b->type); + first->length = tmp.base_addr - first->base_addr; + second->base_addr += tmp.length; + second->length = last_end - second->base_addr; + second->type = last_type; + + /* + * Filter out invalid entries. + */ + if (biosmem_map_entry_is_invalid(a) + && biosmem_map_entry_is_invalid(b)) { + *a = tmp; + biosmem_map_size--; + memmove(b, b + 1, (biosmem_map_size - j) * sizeof(*b)); + continue; + } else if (biosmem_map_entry_is_invalid(a)) { + *a = tmp; + j++; + continue; + } else if (biosmem_map_entry_is_invalid(b)) { + *b = tmp; + j++; + continue; + } + + if (tmp.type == a->type) + first = a; + else if (tmp.type == b->type) + first = b; + else { + + /* + * If the overlapping area can't be merged with one of its + * neighbors, it must be added as a new entry. + */ + + if (biosmem_map_size >= ARRAY_SIZE(biosmem_map)) + boot_panic(biosmem_panic_toobig_msg); + + biosmem_map[biosmem_map_size] = tmp; + biosmem_map_size++; + j++; + continue; + } + + if (first->base_addr > tmp.base_addr) + first->base_addr = tmp.base_addr; + + first->length += tmp.length; + j++; + } + } + + biosmem_map_sort(); +} + +static int __boot +biosmem_map_find_avail(phys_addr_t *phys_start, phys_addr_t *phys_end) +{ + const struct biosmem_map_entry *entry, *map_end; + phys_addr_t seg_start, seg_end; + uint64_t start, end; + + seg_start = (phys_addr_t)-1; + seg_end = (phys_addr_t)-1; + map_end = biosmem_map + biosmem_map_size; + + for (entry = biosmem_map; entry < map_end; entry++) { + if (entry->type != BIOSMEM_TYPE_AVAILABLE) + continue; + + start = vm_page_round(entry->base_addr); + + if (start >= *phys_end) + break; + + end = vm_page_trunc(entry->base_addr + entry->length); + + if ((start < end) && (start < *phys_end) && (end > *phys_start)) { + if (seg_start == (phys_addr_t)-1) + seg_start = start; + + seg_end = end; + } + } + + if ((seg_start == (phys_addr_t)-1) || (seg_end == (phys_addr_t)-1)) + return -1; + + if (seg_start > *phys_start) + *phys_start = seg_start; + + if (seg_end < *phys_end) + *phys_end = seg_end; + + return 0; +} + +static void __boot +biosmem_set_segment(unsigned int seg_index, phys_addr_t start, phys_addr_t end) +{ + biosmem_segments[seg_index].start = start; + biosmem_segments[seg_index].end = end; +} + +static phys_addr_t __boot +biosmem_segment_end(unsigned int seg_index) +{ + return biosmem_segments[seg_index].end; +} + +static phys_addr_t __boot +biosmem_segment_size(unsigned int seg_index) +{ + return biosmem_segments[seg_index].end - biosmem_segments[seg_index].start; +} + +static void __boot +biosmem_save_cmdline_sizes(struct multiboot_raw_info *mbi) +{ + struct multiboot_raw_module *mod; + uint32_t i, va; + + if (mbi->flags & MULTIBOOT_LOADER_CMDLINE) { + va = phystokv(mbi->cmdline); + mbi->unused0 = boot_strlen((char *)va) + 1; + } + + if (mbi->flags & MULTIBOOT_LOADER_MODULES) { + unsigned long addr; + + addr = phystokv(mbi->mods_addr); + + for (i = 0; i < mbi->mods_count; i++) { + mod = (struct multiboot_raw_module *)addr + i; + va = phystokv(mod->string); + mod->reserved = boot_strlen((char *)va) + 1; + } + } +} + +static void __boot +biosmem_find_boot_data_update(uint32_t min, uint32_t *start, uint32_t *end, + uint32_t data_start, uint32_t data_end) +{ + if ((min <= data_start) && (data_start < *start)) { + *start = data_start; + *end = data_end; + } +} + +/* + * Find the first boot data in the given range, and return their containing + * area (start address is returned directly, end address is returned in end). + * The following are considered boot data : + * - the kernel + * - the kernel command line + * - the module table + * - the modules + * - the modules command lines + * - the ELF section header table + * - the ELF .shstrtab, .symtab and .strtab sections + * + * If no boot data was found, 0 is returned, and the end address isn't set. + */ +static uint32_t __boot +biosmem_find_boot_data(const struct multiboot_raw_info *mbi, uint32_t min, + uint32_t max, uint32_t *endp) +{ + struct multiboot_raw_module *mod; + struct elf_shdr *shdr; + uint32_t i, start, end = end; + unsigned long tmp; + + start = max; + + biosmem_find_boot_data_update(min, &start, &end, _kvtophys(&_start), + _kvtophys(&_end)); + + if ((mbi->flags & MULTIBOOT_LOADER_CMDLINE) && (mbi->cmdline != 0)) + biosmem_find_boot_data_update(min, &start, &end, mbi->cmdline, + mbi->cmdline + mbi->unused0); + + if (mbi->flags & MULTIBOOT_LOADER_MODULES) { + i = mbi->mods_count * sizeof(struct multiboot_raw_module); + biosmem_find_boot_data_update(min, &start, &end, mbi->mods_addr, + mbi->mods_addr + i); + tmp = phystokv(mbi->mods_addr); + + for (i = 0; i < mbi->mods_count; i++) { + mod = (struct multiboot_raw_module *)tmp + i; + biosmem_find_boot_data_update(min, &start, &end, mod->mod_start, + mod->mod_end); + + if (mod->string != 0) + biosmem_find_boot_data_update(min, &start, &end, mod->string, + mod->string + mod->reserved); + } + } + + if (mbi->flags & MULTIBOOT_LOADER_SHDR) { + tmp = mbi->shdr_num * mbi->shdr_size; + biosmem_find_boot_data_update(min, &start, &end, mbi->shdr_addr, + mbi->shdr_addr + tmp); + tmp = phystokv(mbi->shdr_addr); + + for (i = 0; i < mbi->shdr_num; i++) { + shdr = (struct elf_shdr *)(tmp + (i * mbi->shdr_size)); + + if ((shdr->type != ELF_SHT_SYMTAB) + && (shdr->type != ELF_SHT_STRTAB)) + continue; + + biosmem_find_boot_data_update(min, &start, &end, shdr->addr, + shdr->addr + shdr->size); + } + } + + if (start == max) + return 0; + + *endp = end; + return start; +} + +static void __boot +biosmem_setup_allocator(struct multiboot_raw_info *mbi) +{ + uint32_t heap_start, heap_end, max_heap_start, max_heap_end; + uint32_t mem_end, next; + + /* + * Find some memory for the heap. Look for the largest unused area in + * upper memory, carefully avoiding all boot data. + */ + mem_end = vm_page_trunc((mbi->mem_upper + 1024) << 10); + +#ifndef __LP64__ + if (mem_end > VM_PAGE_DIRECTMAP_LIMIT) + mem_end = VM_PAGE_DIRECTMAP_LIMIT; +#endif /* __LP64__ */ + + max_heap_start = 0; + max_heap_end = 0; + next = BIOSMEM_END; + + do { + heap_start = next; + heap_end = biosmem_find_boot_data(mbi, heap_start, mem_end, &next); + + if (heap_end == 0) { + heap_end = mem_end; + next = 0; + } + + if ((heap_end - heap_start) > (max_heap_end - max_heap_start)) { + max_heap_start = heap_start; + max_heap_end = heap_end; + } + } while (next != 0); + + max_heap_start = vm_page_round(max_heap_start); + max_heap_end = vm_page_trunc(max_heap_end); + + if (max_heap_start >= max_heap_end) + boot_panic(biosmem_panic_setup_msg); + + biosmem_heap_start = max_heap_start; + biosmem_heap_end = max_heap_end; + biosmem_heap_cur = biosmem_heap_end; +} + +void __boot +biosmem_bootstrap(struct multiboot_raw_info *mbi) +{ + phys_addr_t phys_start, phys_end, last_addr; + int error; + + if (mbi->flags & MULTIBOOT_LOADER_MMAP) + biosmem_map_build(mbi); + else + biosmem_map_build_simple(mbi); + + biosmem_map_adjust(); + + phys_start = BIOSMEM_BASE; + phys_end = VM_PAGE_DMA_LIMIT; + error = biosmem_map_find_avail(&phys_start, &phys_end); + + if (error) + boot_panic(biosmem_panic_noseg_msg); + + biosmem_set_segment(VM_PAGE_SEG_DMA, phys_start, phys_end); + last_addr = phys_end; + + phys_start = VM_PAGE_DMA_LIMIT; +#ifdef VM_PAGE_DMA32_LIMIT + phys_end = VM_PAGE_DMA32_LIMIT; + error = biosmem_map_find_avail(&phys_start, &phys_end); + + if (error) + goto out; + + biosmem_set_segment(VM_PAGE_SEG_DMA32, phys_start, phys_end); + last_addr = phys_end; + + phys_start = VM_PAGE_DMA32_LIMIT; +#endif /* VM_PAGE_DMA32_LIMIT */ + phys_end = VM_PAGE_DIRECTMAP_LIMIT; + error = biosmem_map_find_avail(&phys_start, &phys_end); + + if (error) + goto out; + + biosmem_set_segment(VM_PAGE_SEG_DIRECTMAP, phys_start, phys_end); + last_addr = phys_end; + + phys_start = VM_PAGE_DIRECTMAP_LIMIT; + phys_end = VM_PAGE_HIGHMEM_LIMIT; + error = biosmem_map_find_avail(&phys_start, &phys_end); + + if (error) + goto out; + + biosmem_set_segment(VM_PAGE_SEG_HIGHMEM, phys_start, phys_end); + +out: + + /* + * The kernel and modules command lines will be memory mapped later + * during initialization. Their respective sizes must be saved. + */ + biosmem_save_cmdline_sizes(mbi); + biosmem_setup_allocator(mbi); + + /* XXX phys_last_addr must be part of the direct physical mapping */ + phys_last_addr = last_addr; +} + +unsigned long __boot +biosmem_bootalloc(unsigned int nr_pages) +{ + unsigned long addr, size; + + assert(!vm_page_ready()); + + size = vm_page_ptoa(nr_pages); + + if (size == 0) + boot_panic(biosmem_panic_inval_msg); + + /* Top-down allocation to avoid unnecessarily filling DMA segments */ + addr = biosmem_heap_cur - size; + + if ((addr < biosmem_heap_start) || (addr > biosmem_heap_cur)) + boot_panic(biosmem_panic_nomem_msg); + + biosmem_heap_cur = addr; + return addr; +} + +phys_addr_t __boot +biosmem_directmap_size(void) +{ + if (biosmem_segment_size(VM_PAGE_SEG_DIRECTMAP) != 0) + return biosmem_segment_end(VM_PAGE_SEG_DIRECTMAP); + else if (biosmem_segment_size(VM_PAGE_SEG_DMA32) != 0) + return biosmem_segment_end(VM_PAGE_SEG_DMA32); + else + return biosmem_segment_end(VM_PAGE_SEG_DMA); +} + +static const char * __init +biosmem_type_desc(unsigned int type) +{ + switch (type) { + case BIOSMEM_TYPE_AVAILABLE: + return "available"; + case BIOSMEM_TYPE_RESERVED: + return "reserved"; + case BIOSMEM_TYPE_ACPI: + return "ACPI"; + case BIOSMEM_TYPE_NVS: + return "ACPI NVS"; + case BIOSMEM_TYPE_UNUSABLE: + return "unusable"; + default: + return "unknown (reserved)"; + } +} + +static void __init +biosmem_map_show(void) +{ + const struct biosmem_map_entry *entry, *end; + + printf("biosmem: physical memory map:\n"); + + for (entry = biosmem_map, end = entry + biosmem_map_size; + entry < end; + entry++) + printf("biosmem: %018llx:%018llx, %s\n", entry->base_addr, + entry->base_addr + entry->length, + biosmem_type_desc(entry->type)); + + printf("biosmem: heap: %x-%x\n", biosmem_heap_start, biosmem_heap_end); +} + +static void __init +biosmem_load_segment(struct biosmem_segment *seg, uint64_t max_phys_end, + phys_addr_t phys_start, phys_addr_t phys_end, + phys_addr_t avail_start, phys_addr_t avail_end) +{ + unsigned int seg_index; + + seg_index = seg - biosmem_segments; + + if (phys_end > max_phys_end) { + if (max_phys_end <= phys_start) { + printf("biosmem: warning: segment %s physically unreachable, " + "not loaded\n", vm_page_seg_name(seg_index)); + return; + } + + printf("biosmem: warning: segment %s truncated to %#llx\n", + vm_page_seg_name(seg_index), max_phys_end); + phys_end = max_phys_end; + } + + if ((avail_start < phys_start) || (avail_start >= phys_end)) + avail_start = phys_start; + + if ((avail_end <= phys_start) || (avail_end > phys_end)) + avail_end = phys_end; + + seg->avail_start = avail_start; + seg->avail_end = avail_end; + vm_page_load(seg_index, phys_start, phys_end, avail_start, avail_end); +} + +void __init +biosmem_setup(void) +{ + struct biosmem_segment *seg; + unsigned int i; + + biosmem_map_show(); + + for (i = 0; i < ARRAY_SIZE(biosmem_segments); i++) { + if (biosmem_segment_size(i) == 0) + break; + + seg = &biosmem_segments[i]; + biosmem_load_segment(seg, VM_PAGE_HIGHMEM_LIMIT, seg->start, seg->end, + biosmem_heap_start, biosmem_heap_cur); + } +} + +static void __init +biosmem_free_usable_range(phys_addr_t start, phys_addr_t end) +{ + struct vm_page *page; + + printf("biosmem: release to vm_page: %llx-%llx (%lluk)\n", + (unsigned long long)start, (unsigned long long)end, + (unsigned long long)((end - start) >> 10)); + + while (start < end) { + page = vm_page_lookup_pa(start); + assert(page != NULL); + vm_page_manage(page); + start += PAGE_SIZE; + } +} + +static void __init +biosmem_free_usable_update_start(phys_addr_t *start, phys_addr_t res_start, + phys_addr_t res_end) +{ + if ((*start >= res_start) && (*start < res_end)) + *start = res_end; +} + +static phys_addr_t __init +biosmem_free_usable_start(phys_addr_t start) +{ + const struct biosmem_segment *seg; + unsigned int i; + + biosmem_free_usable_update_start(&start, _kvtophys(&_start), + _kvtophys(&_end)); + biosmem_free_usable_update_start(&start, biosmem_heap_start, + biosmem_heap_end); + + for (i = 0; i < ARRAY_SIZE(biosmem_segments); i++) { + seg = &biosmem_segments[i]; + biosmem_free_usable_update_start(&start, seg->avail_start, + seg->avail_end); + } + + return start; +} + +static int __init +biosmem_free_usable_reserved(phys_addr_t addr) +{ + const struct biosmem_segment *seg; + unsigned int i; + + if ((addr >= _kvtophys(&_start)) + && (addr < _kvtophys(&_end))) + return 1; + + if ((addr >= biosmem_heap_start) && (addr < biosmem_heap_end)) + return 1; + + for (i = 0; i < ARRAY_SIZE(biosmem_segments); i++) { + seg = &biosmem_segments[i]; + + if ((addr >= seg->avail_start) && (addr < seg->avail_end)) + return 1; + } + + return 0; +} + +static phys_addr_t __init +biosmem_free_usable_end(phys_addr_t start, phys_addr_t entry_end) +{ + while (start < entry_end) { + if (biosmem_free_usable_reserved(start)) + break; + + start += PAGE_SIZE; + } + + return start; +} + +static void __init +biosmem_free_usable_entry(phys_addr_t start, phys_addr_t end) +{ + phys_addr_t entry_end; + + entry_end = end; + + for (;;) { + start = biosmem_free_usable_start(start); + + if (start >= entry_end) + return; + + end = biosmem_free_usable_end(start, entry_end); + biosmem_free_usable_range(start, end); + start = end; + } +} + +void __init +biosmem_free_usable(void) +{ + struct biosmem_map_entry *entry; + uint64_t start, end; + unsigned int i; + + for (i = 0; i < biosmem_map_size; i++) { + entry = &biosmem_map[i]; + + if (entry->type != BIOSMEM_TYPE_AVAILABLE) + continue; + + start = vm_page_round(entry->base_addr); + + if (start >= VM_PAGE_HIGHMEM_LIMIT) + break; + + end = vm_page_trunc(entry->base_addr + entry->length); + + if (start < BIOSMEM_BASE) + start = BIOSMEM_BASE; + + biosmem_free_usable_entry(start, end); + } +} diff --git a/i386/i386at/biosmem.h b/i386/i386at/biosmem.h new file mode 100644 index 0000000..0cc4f8a --- /dev/null +++ b/i386/i386at/biosmem.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2010-2014 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _X86_BIOSMEM_H +#define _X86_BIOSMEM_H + +#include <mach/machine/vm_types.h> +#include <mach/machine/multiboot.h> + +/* + * Address where the address of the Extended BIOS Data Area segment can be + * found. + */ +#define BIOSMEM_EBDA_PTR 0x40e + +/* + * Significant low memory addresses. + * + * The first 64 KiB are reserved for various reasons (e.g. to preserve BIOS + * data and to work around data corruption on some hardware). + */ +#define BIOSMEM_BASE 0x010000 +#define BIOSMEM_BASE_END 0x0a0000 +#define BIOSMEM_EXT_ROM 0x0e0000 +#define BIOSMEM_ROM 0x0f0000 +#define BIOSMEM_END 0x100000 + +/* + * Early initialization of the biosmem module. + * + * This function processes the given multiboot data for BIOS-provided + * memory information, and sets up a bootstrap physical page allocator. + * + * It is called before paging is enabled. + */ +void biosmem_bootstrap(struct multiboot_raw_info *mbi); + +/* + * Allocate contiguous physical pages during bootstrap. + * + * This function is called before paging is enabled. It should only be used + * to allocate initial page table pages. Those pages are later loaded into + * the VM system (as reserved pages) which means they can be freed like other + * regular pages. Users should fix up the type of those pages once the VM + * system is initialized. + */ +unsigned long biosmem_bootalloc(unsigned int nr_pages); + +/* + * Return the amount of physical memory that can be directly mapped. + * + * This includes the size of both the DMA/DMA32 and DIRECTMAP segments. + */ +phys_addr_t biosmem_directmap_size(void); + +/* + * Set up physical memory based on the information obtained during bootstrap + * and load it in the VM system. + */ +void biosmem_setup(void); + +/* + * Free all usable memory. + * + * This includes ranges that weren't part of the bootstrap allocator initial + * heap, e.g. because they contained boot data. + */ +void biosmem_free_usable(void); + +#endif /* _X86_BIOSMEM_H */ diff --git a/i386/i386at/elf.h b/i386/i386at/elf.h new file mode 100644 index 0000000..26f4d87 --- /dev/null +++ b/i386/i386at/elf.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2013 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _X86_ELF_H +#define _X86_ELF_H + +#define ELF_SHT_SYMTAB 2 +#define ELF_SHT_STRTAB 3 + +struct elf_shdr { + unsigned int name; + unsigned int type; + unsigned int flags; + unsigned long addr; + unsigned long offset; + unsigned int size; + unsigned int link; + unsigned int info; + unsigned int addralign; + unsigned int entsize; +}; + +#ifdef __LP64__ + +struct elf_sym { + unsigned int name; + unsigned char info; + unsigned char other; + unsigned short shndx; + unsigned long value; + unsigned long size; +}; + +#else /* __LP64__ */ + +struct elf_sym { + unsigned int name; + unsigned long value; + unsigned long size; + unsigned char info; + unsigned char other; + unsigned short shndx; +}; + +#endif /* __LP64__ */ + +#endif /* _X86_ELF_H */ diff --git a/i386/i386at/model_dep.c b/i386/i386at/model_dep.c index 04cf695..dbb9d8b 100644 --- a/i386/i386at/model_dep.c +++ b/i386/i386at/model_dep.c @@ -64,6 +64,7 @@ #include <i386/locore.h> #include <i386/model_dep.h> #include <i386at/autoconf.h> +#include <i386at/biosmem.h> #include <i386at/idt.h> #include <i386at/int_init.h> #include <i386at/kd.h> @@ -127,20 +128,6 @@ struct multiboot_info boot_info; /* Command line supplied to kernel. */ char *kernel_cmdline = ""; -/* This is used for memory initialization: - it gets bumped up through physical memory - that exists and is not occupied by boot gunk. - It is not necessarily page-aligned. */ -static vm_offset_t avail_next -#ifndef MACH_HYP - = RESERVED_BIOS /* XX end of BIOS data area */ -#endif /* MACH_HYP */ - ; - -/* Possibly overestimated amount of available memory - still remaining to be handed to the VM system. */ -static vm_size_t avail_remaining; - extern char version[]; /* If set, reboot the system on ctrl-alt-delete. */ @@ -275,91 +262,6 @@ void db_reset_cpu(void) halt_all_cpus(1); } - -/* - * Compute physical memory size and other parameters. - */ -void -mem_size_init(void) -{ - vm_offset_t max_phys_size; - - /* Physical memory on all PCs starts at physical address 0. - XX make it a constant. */ - phys_first_addr = 0; - -#ifdef MACH_HYP - if (boot_info.nr_pages >= 0x100000) { - printf("Truncating memory size to 4GiB\n"); - phys_last_addr = 0xffffffffU; - } else - phys_last_addr = boot_info.nr_pages * 0x1000; -#else /* MACH_HYP */ - vm_size_t phys_last_kb; - - if (boot_info.flags & MULTIBOOT_MEM_MAP) { - struct multiboot_mmap *map, *map_end; - - map = (void*) phystokv(boot_info.mmap_addr); - map_end = (void*) map + boot_info.mmap_count; - - while (map + 1 <= map_end) { - if (map->Type == MB_ARD_MEMORY) { - unsigned long long start = map->BaseAddr, end = map->BaseAddr + map->Length;; - - if (start >= 0x100000000ULL) { - printf("Ignoring %luMiB RAM region above 4GiB\n", (unsigned long) (map->Length >> 20)); - } else { - if (end >= 0x100000000ULL) { - printf("Truncating memory region to 4GiB\n"); - end = 0x0ffffffffU; - } - if (end > phys_last_addr) - phys_last_addr = end; - - printf("AT386 boot: physical memory map from 0x%lx to 0x%lx\n", - (unsigned long) start, - (unsigned long) end); - } - } - map = (void*) map + map->size + sizeof(map->size); - } - } else { - phys_last_kb = 0x400 + boot_info.mem_upper; - /* Avoid 4GiB overflow. */ - if (phys_last_kb < 0x400 || phys_last_kb >= 0x400000) { - printf("Truncating memory size to 4GiB\n"); - phys_last_addr = 0xffffffffU; - } else - phys_last_addr = phys_last_kb * 0x400; - } -#endif /* MACH_HYP */ - - printf("AT386 boot: physical memory from 0x%lx to 0x%lx\n", - phys_first_addr, phys_last_addr); - - /* Reserve room for virtual mappings. - * Yes, this loses memory. Blame i386. */ - max_phys_size = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS - VM_KERNEL_MAP_SIZE; - if (phys_last_addr - phys_first_addr > max_phys_size) { - phys_last_addr = phys_first_addr + max_phys_size; - printf("Truncating memory to %luMiB\n", (phys_last_addr - phys_first_addr) / (1024 * 1024)); - /* TODO Xen: be nice, free lost memory */ - } - - phys_first_addr = round_page(phys_first_addr); - phys_last_addr = trunc_page(phys_last_addr); - -#ifdef MACH_HYP - /* Memory is just contiguous */ - avail_remaining = phys_last_addr; -#else /* MACH_HYP */ - avail_remaining - = phys_last_addr - (0x100000 - (boot_info.mem_lower * 0x400) - - RESERVED_BIOS); -#endif /* MACH_HYP */ -} - /* * Basic PC VM initialization. * Turns on paging and changes the kernel segments to use high linear addresses. @@ -382,9 +284,9 @@ i386at_init(void) #endif /* MACH_HYP */ /* - * Find memory size parameters. + * Read memory map and load it into the physical page allocator. */ - mem_size_init(); + biosmem_bootstrap((struct multiboot_raw_info *) &boot_info); #ifdef MACH_XEN kernel_cmdline = (char*) boot_info.cmd_line; @@ -432,6 +334,13 @@ i386at_init(void) pmap_bootstrap(); /* + * Load physical segments into the VM system. + * The early allocation functions become unusable after + * this point. + */ + biosmem_setup(); + + /* * We'll have to temporarily install a direct mapping * between physical memory and low linear memory, * until we start using our new kernel segment descriptors. @@ -706,187 +615,20 @@ resettodr(void) unsigned int pmap_free_pages(void) { - return atop(avail_remaining); + return vm_page_atop(phys_last_addr); /* XXX */ } -/* Always returns page-aligned regions. */ boolean_t init_alloc_aligned(vm_size_t size, vm_offset_t *addrp) { - vm_offset_t addr; + *addrp = biosmem_bootalloc(vm_page_atop(vm_page_round(size))); -#ifdef MACH_HYP - /* There is none */ - if (!avail_next) - avail_next = _kvtophys(boot_info.pt_base) + (boot_info.nr_pt_frames + 3) * 0x1000; -#else /* MACH_HYP */ - extern char start[], end[]; - int i; - static int wrapped = 0; - - /* Memory regions to skip. */ - vm_offset_t cmdline_start_pa = boot_info.flags & MULTIBOOT_CMDLINE - ? boot_info.cmdline : 0; - vm_offset_t cmdline_end_pa = cmdline_start_pa - ? cmdline_start_pa+strlen((char*)phystokv(cmdline_start_pa))+1 - : 0; - vm_offset_t mods_start_pa = boot_info.flags & MULTIBOOT_MODS - ? boot_info.mods_addr : 0; - vm_offset_t mods_end_pa = mods_start_pa - ? mods_start_pa - + boot_info.mods_count * sizeof(struct multiboot_module) - : 0; - - retry: -#endif /* MACH_HYP */ - - /* Page-align the start address. */ - avail_next = round_page(avail_next); - -#ifndef MACH_HYP - /* Start with memory above 16MB, reserving the low memory for later. */ - /* Don't care on Xen */ - if (!wrapped && phys_last_addr > 16 * 1024*1024) - { - if (avail_next < 16 * 1024*1024) - avail_next = 16 * 1024*1024; - else if (avail_next == phys_last_addr) - { - /* We have used all the memory above 16MB, so now start on - the low memory. This will wind up at the end of the list - of free pages, so it should not have been allocated to any - other use in early initialization before the Linux driver - glue initialization needs to allocate low memory. */ - avail_next = RESERVED_BIOS; - wrapped = 1; - } - } -#endif /* MACH_HYP */ - - /* Check if we have reached the end of memory. */ - if (avail_next == - ( -#ifndef MACH_HYP - wrapped ? 16 * 1024*1024 : -#endif /* MACH_HYP */ - phys_last_addr)) + if (*addrp == 0) return FALSE; - /* Tentatively assign the current location to the caller. */ - addr = avail_next; - - /* Bump the pointer past the newly allocated region - and see where that puts us. */ - avail_next += size; - -#ifndef MACH_HYP - /* Skip past the I/O and ROM area. */ - if (boot_info.flags & MULTIBOOT_MEM_MAP) - { - struct multiboot_mmap *map, *map_end, *current = NULL, *next = NULL; - unsigned long long minimum_next = ~0ULL; - - map = (void*) phystokv(boot_info.mmap_addr); - map_end = (void*) map + boot_info.mmap_count; - - /* Find both our current map, and the next one */ - while (map + 1 <= map_end) - { - if (map->Type == MB_ARD_MEMORY) - { - unsigned long long start = map->BaseAddr; - unsigned long long end = start + map->Length;; - - if (start <= addr && avail_next <= end) - { - /* Ok, fits in the current map */ - current = map; - break; - } - else if (avail_next <= start && start < minimum_next) - { - /* This map is not far from avail_next */ - next = map; - minimum_next = start; - } - } - map = (void*) map + map->size + sizeof(map->size); - } - - if (!current) { - /* Area does not fit in the current map, switch to next - * map if any */ - if (!next || next->BaseAddr >= phys_last_addr) - { - /* No further reachable map, we have reached - * the end of memory, but possibly wrap around - * 16MiB. */ - avail_next = phys_last_addr; - goto retry; - } - - /* Start from next map */ - avail_next = next->BaseAddr; - goto retry; - } - } - else if ((avail_next > (boot_info.mem_lower * 0x400)) && (addr < 0x100000)) - { - avail_next = 0x100000; - goto retry; - } - - /* Skip our own kernel code, data, and bss. */ - if ((phystokv(avail_next) > (vm_offset_t)start) && (phystokv(addr) < (vm_offset_t)end)) - { - avail_next = _kvtophys(end); - goto retry; - } - - /* Skip any areas occupied by valuable boot_info data. */ - if ((avail_next > cmdline_start_pa) && (addr < cmdline_end_pa)) - { - avail_next = cmdline_end_pa; - goto retry; - } - if ((avail_next > mods_start_pa) && (addr < mods_end_pa)) - { - avail_next = mods_end_pa; - goto retry; - } - if ((phystokv(avail_next) > kern_sym_start) && (phystokv(addr) < kern_sym_end)) - { - avail_next = _kvtophys(kern_sym_end); - goto retry; - } - if (boot_info.flags & MULTIBOOT_MODS) - { - struct multiboot_module *m = (struct multiboot_module *) - phystokv(boot_info.mods_addr); - for (i = 0; i < boot_info.mods_count; i++) - { - if ((avail_next > m[i].mod_start) - && (addr < m[i].mod_end)) - { - avail_next = m[i].mod_end; - goto retry; - } - /* XXX string */ - } - } -#endif /* MACH_HYP */ - - avail_remaining -= size; - - *addrp = addr; return TRUE; } -boolean_t pmap_next_page(vm_offset_t *addrp) -{ - return init_alloc_aligned(PAGE_SIZE, addrp); -} - /* Grab a physical page: the standard memory allocation mechanism during system initialization. */ @@ -894,7 +636,7 @@ vm_offset_t pmap_grab_page(void) { vm_offset_t addr; - if (!pmap_next_page(&addr)) + if (!init_alloc_aligned(PAGE_SIZE, &addr)) panic("Not enough memory to initialize Mach"); return addr; } diff --git a/i386/include/mach/i386/multiboot.h b/i386/include/mach/i386/multiboot.h index 8f1c47b..c66ca03 100644 --- a/i386/include/mach/i386/multiboot.h +++ b/i386/include/mach/i386/multiboot.h @@ -188,5 +188,110 @@ struct multiboot_mmap /* usable memory "Type", all others are reserved. */ #define MB_ARD_MEMORY 1 +/* + * Copyright (c) 2010, 2012 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Versions used by the biosmem module. + */ + +#include <kern/macros.h> + +/* + * Magic number provided by the OS to the boot loader. + */ +#define MULTIBOOT_OS_MAGIC 0x1badb002 + +/* + * Multiboot flags requesting services from the boot loader. + */ +#define MULTIBOOT_OS_MEMORY_INFO 0x2 + +#define MULTIBOOT_OS_FLAGS MULTIBOOT_OS_MEMORY_INFO + +/* + * Magic number to identify a multiboot compliant boot loader. + */ +#define MULTIBOOT_LOADER_MAGIC 0x2badb002 + +/* + * Multiboot flags set by the boot loader. + */ +#define MULTIBOOT_LOADER_MEMORY 0x01 +#define MULTIBOOT_LOADER_CMDLINE 0x04 +#define MULTIBOOT_LOADER_MODULES 0x08 +#define MULTIBOOT_LOADER_SHDR 0x20 +#define MULTIBOOT_LOADER_MMAP 0x40 + +/* + * A multiboot module. + */ +struct multiboot_raw_module { + uint32_t mod_start; + uint32_t mod_end; + uint32_t string; + uint32_t reserved; +} __packed; + +/* + * Memory map entry. + */ +struct multiboot_raw_mmap_entry { + uint32_t size; + uint64_t base_addr; + uint64_t length; + uint32_t type; +} __packed; + +/* + * Multiboot information structure as passed by the boot loader. + */ +struct multiboot_raw_info { + uint32_t flags; + uint32_t mem_lower; + uint32_t mem_upper; + uint32_t unused0; + uint32_t cmdline; + uint32_t mods_count; + uint32_t mods_addr; + uint32_t shdr_num; + uint32_t shdr_size; + uint32_t shdr_addr; + uint32_t shdr_strndx; + uint32_t mmap_length; + uint32_t mmap_addr; + uint32_t unused1[9]; +} __packed; + +/* + * Versions of the multiboot structures suitable for use with 64-bit pointers. + */ + +struct multiboot_os_module { + void *mod_start; + void *mod_end; + char *string; +}; + +struct multiboot_os_info { + uint32_t flags; + char *cmdline; + struct multiboot_module *mods_addr; + uint32_t mods_count; +}; #endif /* _MACH_I386_MULTIBOOT_H_ */ diff --git a/i386/include/mach/i386/vm_types.h b/i386/include/mach/i386/vm_types.h index 1439940..41272e3 100644 --- a/i386/include/mach/i386/vm_types.h +++ b/i386/include/mach/i386/vm_types.h @@ -77,6 +77,11 @@ typedef unsigned long vm_offset_t; typedef vm_offset_t * vm_offset_array_t; /* + * A type for physical addresses. + */ +typedef unsigned long phys_addr_t; + +/* * A vm_size_t is the proper type for e.g. * expressing the difference between two * vm_offset_t entities. diff --git a/kern/bootstrap.c b/kern/bootstrap.c index 249c605..0836276 100644 --- a/kern/bootstrap.c +++ b/kern/bootstrap.c @@ -107,6 +107,20 @@ task_insert_send_right( return name; } +static void +free_bootstrap_pages(phys_addr_t start, phys_addr_t end) +{ + struct vm_page *page; + + while (start < end) + { + page = vm_page_lookup_pa(start); + assert(page != NULL); + vm_page_manage(page); + start += PAGE_SIZE; + } +} + void bootstrap_create(void) { int compat; @@ -265,7 +279,7 @@ void bootstrap_create(void) /* XXX we could free the memory used by the boot loader's descriptors and such. */ for (n = 0; n < boot_info.mods_count; n++) - vm_page_create(bmods[n].mod_start, bmods[n].mod_end); + free_bootstrap_pages(bmods[n].mod_start, bmods[n].mod_end); } static void diff --git a/kern/cpu_number.h b/kern/cpu_number.h index 44bbd64..650f404 100644 --- a/kern/cpu_number.h +++ b/kern/cpu_number.h @@ -37,5 +37,7 @@ int master_cpu; /* 'master' processor - keeps time */ /* cpu number is always 0 on a single processor system */ #define cpu_number() (0) +#define CPU_L1_SIZE (1 << CPU_L1_SHIFT) + #endif /* NCPUS == 1 */ #endif /* _KERN_CPU_NUMBER_H_ */ diff --git a/kern/log2.h b/kern/log2.h new file mode 100644 index 0000000..0e67701 --- /dev/null +++ b/kern/log2.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2014 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * Integer base 2 logarithm operations. + */ + +#ifndef _KERN_LOG2_H +#define _KERN_LOG2_H + +#include <kern/assert.h> + +#ifdef __LP64__ +#define LONG_BIT 64 +#else /* __LP64__ */ +#define LONG_BIT 32 +#endif /* __LP64__ */ + +static inline unsigned int +ilog2(unsigned long x) +{ + assert(x != 0); + return LONG_BIT - __builtin_clzl(x) - 1; +} + +static inline unsigned int +iorder2(unsigned long size) +{ + assert(size != 0); + + if (size == 1) + return 0; + + return ilog2(size - 1) + 1; +} + +#endif /* _KERN_LOG2_H */ diff --git a/kern/slab.h b/kern/slab.h index 77db7c1..5ff3960 100644 --- a/kern/slab.h +++ b/kern/slab.h @@ -48,6 +48,7 @@ #define _KERN_SLAB_H #include <cache.h> +#include <kern/cpu_number.h> #include <kern/lock.h> #include <kern/list.h> #include <kern/rbtree.h> @@ -56,10 +57,6 @@ #include <vm/vm_types.h> #if SLAB_USE_CPU_POOLS -/* - * L1 cache line size. - */ -#define CPU_L1_SIZE (1 << CPU_L1_SHIFT) /* * Per-processor cache of pre-constructed objects. diff --git a/kern/startup.c b/kern/startup.c index 30cff5c..bd29694 100644 --- a/kern/startup.c +++ b/kern/startup.c @@ -136,7 +136,7 @@ void setup_main(void) mapable_time_init(); machine_info.max_cpus = NCPUS; - machine_info.memory_size = phys_last_addr - phys_first_addr; /* XXX mem_size */ + machine_info.memory_size = vm_page_mem_size(); /* XXX phys_addr_t -> vm_size_t */ machine_info.avail_cpus = 0; machine_info.major_version = KERNEL_MAJOR_VERSION; machine_info.minor_version = KERNEL_MINOR_VERSION; diff --git a/linux/dev/glue/glue.h b/linux/dev/glue/glue.h index 5d4f6d8..8cb118c 100644 --- a/linux/dev/glue/glue.h +++ b/linux/dev/glue/glue.h @@ -25,8 +25,8 @@ extern int linux_auto_config; extern int linux_intr_pri; -extern void *alloc_contig_mem (unsigned, unsigned, unsigned, vm_page_t *); -extern void free_contig_mem (vm_page_t); +extern unsigned long alloc_contig_mem (unsigned, unsigned, unsigned, vm_page_t *); +extern void free_contig_mem (vm_page_t, unsigned); extern void init_IRQ (void); extern void restore_IRQ (void); extern void linux_kmem_init (void); diff --git a/linux/dev/glue/kmem.c b/linux/dev/glue/kmem.c index ff052ff..ed57610 100644 --- a/linux/dev/glue/kmem.c +++ b/linux/dev/glue/kmem.c @@ -111,10 +111,8 @@ linux_kmem_init () for (p = pages, j = 0; j < MEM_CHUNK_SIZE - PAGE_SIZE; j += PAGE_SIZE) { assert (p->phys_addr < MEM_DMA_LIMIT); - assert (p->phys_addr + PAGE_SIZE - == ((vm_page_t) p->pageq.next)->phys_addr); - - p = (vm_page_t) p->pageq.next; + assert (p->phys_addr + PAGE_SIZE == (p + 1)->phys_addr); + p++; } pages_free[i].end = pages_free[i].start + MEM_CHUNK_SIZE; diff --git a/linux/dev/init/main.c b/linux/dev/init/main.c index 8737b62..d69b3fc 100644 --- a/linux/dev/init/main.c +++ b/linux/dev/init/main.c @@ -98,7 +98,7 @@ void linux_init (void) { int addr; - unsigned memory_start, memory_end; + unsigned long memory_start, memory_end; vm_page_t pages; /* @@ -131,9 +131,7 @@ linux_init (void) /* * Allocate contiguous memory below 16 MB. */ - memory_start = (unsigned long) alloc_contig_mem (CONTIG_ALLOC, - 16 * 1024 * 1024, - 0, &pages); + memory_start = alloc_contig_mem (CONTIG_ALLOC, 16 * 1024 * 1024, 0, &pages); if (memory_start == 0) panic ("linux_init: alloc_contig_mem failed"); memory_end = memory_start + CONTIG_ALLOC; @@ -147,14 +145,6 @@ linux_init (void) panic ("linux_init: ran out memory"); /* - * Free unused memory. - */ - while (pages && phystokv(pages->phys_addr) < round_page (memory_start)) - pages = (vm_page_t) pages->pageq.next; - if (pages) - free_contig_mem (pages); - - /* * Initialize devices. */ #ifdef CONFIG_INET @@ -182,140 +172,31 @@ linux_init (void) /* * Allocate contiguous memory with the given constraints. - * This routine is horribly inefficient but it is presently - * only used during initialization so it's not that bad. */ -void * +unsigned long alloc_contig_mem (unsigned size, unsigned limit, unsigned mask, vm_page_t * pages) { - int i, j, bits_len; - unsigned *bits, len; - void *m; - vm_page_t p, page_list, tail, prev; - vm_offset_t addr = 0, max_addr; - - if (size == 0) - return (NULL); - size = round_page (size); - if ((size >> PAGE_SHIFT) > vm_page_free_count) - return (NULL); - - /* Allocate bit array. */ - max_addr = phys_last_addr; - if (max_addr > limit) - max_addr = limit; - bits_len = ((((max_addr >> PAGE_SHIFT) + NBPW - 1) / NBPW) - * sizeof (unsigned)); - bits = (unsigned *) kalloc (bits_len); - if (!bits) - return (NULL); - memset (bits, 0, bits_len); + vm_page_t p; - /* - * Walk the page free list and set a bit for every usable page. - */ - simple_lock (&vm_page_queue_free_lock); - p = vm_page_queue_free; - while (p) - { - if (p->phys_addr < limit) - (bits[(p->phys_addr >> PAGE_SHIFT) / NBPW] - |= 1 << ((p->phys_addr >> PAGE_SHIFT) % NBPW)); - p = (vm_page_t) p->pageq.next; - } + p = vm_page_grab_contig(size, VM_PAGE_SEL_DMA); - /* - * Scan bit array for contiguous pages. - */ - len = 0; - m = NULL; - for (i = 0; len < size && i < bits_len / sizeof (unsigned); i++) - for (j = 0; len < size && j < NBPW; j++) - if (!(bits[i] & (1 << j))) - { - len = 0; - m = NULL; - } - else - { - if (len == 0) - { - addr = ((vm_offset_t) (i * NBPW + j) - << PAGE_SHIFT); - if ((addr & mask) == 0) - { - len += PAGE_SIZE; - m = (void *) addr; - } - } - else - len += PAGE_SIZE; - } - - if (len != size) - { - simple_unlock (&vm_page_queue_free_lock); - kfree ((vm_offset_t) bits, bits_len); - return (NULL); - } - - /* - * Remove pages from free list - * and construct list to return to caller. - */ - page_list = NULL; - for (len = 0; len < size; len += PAGE_SIZE, addr += PAGE_SIZE) - { - prev = NULL; - for (p = vm_page_queue_free; p; p = (vm_page_t) p->pageq.next) - { - if (p->phys_addr == addr) - break; - prev = p; - } - if (!p) - panic ("alloc_contig_mem: page not on free list"); - if (prev) - prev->pageq.next = p->pageq.next; - else - vm_page_queue_free = (vm_page_t) p->pageq.next; - p->free = FALSE; - p->pageq.next = NULL; - if (!page_list) - page_list = tail = p; - else - { - tail->pageq.next = (queue_entry_t) p; - tail = p; - } - vm_page_free_count--; - } + if (p == NULL) + return 0; - simple_unlock (&vm_page_queue_free_lock); - kfree ((vm_offset_t) bits, bits_len); if (pages) - *pages = page_list; - return (void *) phystokv(m); + *pages = p; + + return phystokv(vm_page_to_pa(p)); } /* * Free memory allocated by alloc_contig_mem. */ void -free_contig_mem (vm_page_t pages) +free_contig_mem (vm_page_t pages, unsigned size) { - int i; - vm_page_t p; - - for (p = pages, i = 0; p->pageq.next; p = (vm_page_t) p->pageq.next, i++) - p->free = TRUE; - p->free = TRUE; - simple_lock (&vm_page_queue_free_lock); - vm_page_free_count += i + 1; - p->pageq.next = (queue_entry_t) vm_page_queue_free; - vm_page_queue_free = pages; - simple_unlock (&vm_page_queue_free_lock); + vm_page_free_contig(pages, size); } /* This is the number of bits of precision for the loops_per_second. Each diff --git a/linux/pcmcia-cs/glue/ds.c b/linux/pcmcia-cs/glue/ds.c index 8f88b55..cc4b92b 100644 --- a/linux/pcmcia-cs/glue/ds.c +++ b/linux/pcmcia-cs/glue/ds.c @@ -24,12 +24,6 @@ /* This file is included from linux/pcmcia-cs/modules/ds.c. */ /* - * Prepare the namespace for inclusion of Mach header files. - */ - -#undef PAGE_SHIFT - -/* * This is really ugly. But this is glue code, so... It's about the `kfree' * symbols in <linux/malloc.h> and <kern/kalloc.h>. */ @@ -67,9 +67,6 @@ extern vm_offset_t pmap_steal_memory(vm_size_t); /* During VM initialization, report remaining unused physical pages. */ extern unsigned int pmap_free_pages(void); -/* During VM initialization, use remaining physical pages to allocate page - * frames. */ -extern void pmap_startup(vm_offset_t *, vm_offset_t *); /* Initialization, after kernel runs in virtual memory. */ extern void pmap_init(void); @@ -80,18 +77,14 @@ extern void pmap_init(void); * Otherwise, it must implement * pmap_free_pages * pmap_virtual_space - * pmap_next_page * pmap_init - * and vm/vm_resident.c implements pmap_steal_memory and pmap_startup - * using pmap_free_pages, pmap_next_page, pmap_virtual_space, - * and pmap_enter. pmap_free_pages may over-estimate the number - * of unused physical pages, and pmap_next_page may return FALSE - * to indicate that there are no more unused pages to return. + * and vm/vm_resident.c implements pmap_steal_memory using + * pmap_free_pages, pmap_virtual_space, and pmap_enter. + * + * pmap_free_pages may over-estimate the number of unused physical pages. * However, for best performance pmap_free_pages should be accurate. */ -/* During VM initialization, return the next unused physical page. */ -extern boolean_t pmap_next_page(vm_offset_t *); /* During VM initialization, report virtual space available for the kernel. */ extern void pmap_virtual_space(vm_offset_t *, vm_offset_t *); #endif /* MACHINE_PAGES */ diff --git a/vm/vm_fault.c b/vm/vm_fault.c index 46779f6..4d67417 100644 --- a/vm/vm_fault.c +++ b/vm/vm_fault.c @@ -607,7 +607,7 @@ vm_fault_return_t vm_fault_page( * won't block for pages. */ - if (m->fictitious && !vm_page_convert(m, FALSE)) { + if (m->fictitious && !vm_page_convert(&m, FALSE)) { VM_PAGE_FREE(m); vm_fault_cleanup(object, first_m); return(VM_FAULT_MEMORY_SHORTAGE); @@ -725,7 +725,7 @@ vm_fault_return_t vm_fault_page( assert(m->object == object); first_m = VM_PAGE_NULL; - if (m->fictitious && !vm_page_convert(m, !object->internal)) { + if (m->fictitious && !vm_page_convert(&m, !object->internal)) { VM_PAGE_FREE(m); vm_fault_cleanup(object, VM_PAGE_NULL); return(VM_FAULT_MEMORY_SHORTAGE); diff --git a/vm/vm_init.c b/vm/vm_init.c index 3d1081c..23d5d46 100644 --- a/vm/vm_init.c +++ b/vm/vm_init.c @@ -83,4 +83,5 @@ void vm_mem_init(void) { vm_object_init(); memory_object_proxy_init(); + vm_page_info_all(); } diff --git a/vm/vm_object.c b/vm/vm_object.c index 6666fcb..eda03c6 100644 --- a/vm/vm_object.c +++ b/vm/vm_object.c @@ -2891,7 +2891,8 @@ vm_object_page_map( VM_PAGE_FREE(old_page); } - vm_page_init(m, addr); + vm_page_init(m); + m->phys_addr = addr; m->private = TRUE; /* don`t free page */ m->wire_count = 1; vm_page_lock_queues(); diff --git a/vm/vm_page.c b/vm/vm_page.c new file mode 100644 index 0000000..a539ab4 --- /dev/null +++ b/vm/vm_page.c @@ -0,0 +1,762 @@ +/* + * Copyright (c) 2010-2014 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * This implementation uses the binary buddy system to manage its heap. + * Descriptions of the buddy system can be found in the following works : + * - "UNIX Internals: The New Frontiers", by Uresh Vahalia. + * - "Dynamic Storage Allocation: A Survey and Critical Review", + * by Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles. + * + * In addition, this allocator uses per-CPU pools of pages for order 0 + * (i.e. single page) allocations. These pools act as caches (but are named + * differently to avoid confusion with CPU caches) that reduce contention on + * multiprocessor systems. When a pool is empty and cannot provide a page, + * it is filled by transferring multiple pages from the backend buddy system. + * The symmetric case is handled likewise. + */ + +#include <string.h> +#include <kern/assert.h> +#include <kern/cpu_number.h> +#include <kern/debug.h> +#include <kern/list.h> +#include <kern/lock.h> +#include <kern/macros.h> +#include <kern/printf.h> +#include <kern/thread.h> +#include <mach/vm_param.h> +#include <machine/pmap.h> +#include <sys/types.h> +#include <vm/vm_page.h> + +#define __init +#define __initdata +#define __read_mostly + +#define thread_pin() +#define thread_unpin() + +/* + * Number of free block lists per segment. + */ +#define VM_PAGE_NR_FREE_LISTS 11 + +/* + * The size of a CPU pool is computed by dividing the number of pages in its + * containing segment by this value. + */ +#define VM_PAGE_CPU_POOL_RATIO 1024 + +/* + * Maximum number of pages in a CPU pool. + */ +#define VM_PAGE_CPU_POOL_MAX_SIZE 128 + +/* + * The transfer size of a CPU pool is computed by dividing the pool size by + * this value. + */ +#define VM_PAGE_CPU_POOL_TRANSFER_RATIO 2 + +/* + * Per-processor cache of pages. + */ +struct vm_page_cpu_pool { + simple_lock_data_t lock; + int size; + int transfer_size; + int nr_pages; + struct list pages; +} __aligned(CPU_L1_SIZE); + +/* + * Special order value for pages that aren't in a free list. Such pages are + * either allocated, or part of a free block of pages but not the head page. + */ +#define VM_PAGE_ORDER_UNLISTED ((unsigned short)-1) + +/* + * Doubly-linked list of free blocks. + */ +struct vm_page_free_list { + unsigned long size; + struct list blocks; +}; + +/* + * Segment name buffer size. + */ +#define VM_PAGE_NAME_SIZE 16 + +/* + * Segment of contiguous memory. + */ +struct vm_page_seg { + struct vm_page_cpu_pool cpu_pools[NCPUS]; + + phys_addr_t start; + phys_addr_t end; + struct vm_page *pages; + struct vm_page *pages_end; + simple_lock_data_t lock; + struct vm_page_free_list free_lists[VM_PAGE_NR_FREE_LISTS]; + unsigned long nr_free_pages; +}; + +/* + * Bootstrap information about a segment. + */ +struct vm_page_boot_seg { + phys_addr_t start; + phys_addr_t end; + phys_addr_t avail_start; + phys_addr_t avail_end; +}; + +static int vm_page_is_ready __read_mostly; + +/* + * Segment table. + * + * The system supports a maximum of 4 segments : + * - DMA: suitable for DMA + * - DMA32: suitable for DMA when devices support 32-bits addressing + * - DIRECTMAP: direct physical mapping, allows direct access from + * the kernel with a simple offset translation + * - HIGHMEM: must be mapped before it can be accessed + * + * Segments are ordered by priority, 0 being the lowest priority. Their + * relative priorities are DMA < DMA32 < DIRECTMAP < HIGHMEM. Some segments + * may actually be aliases for others, e.g. if DMA is always possible from + * the direct physical mapping, DMA and DMA32 are aliases for DIRECTMAP, + * in which case the segment table contains DIRECTMAP and HIGHMEM only. + */ +static struct vm_page_seg vm_page_segs[VM_PAGE_MAX_SEGS]; + +/* + * Bootstrap segment table. + */ +static struct vm_page_boot_seg vm_page_boot_segs[VM_PAGE_MAX_SEGS] __initdata; + +/* + * Number of loaded segments. + */ +static unsigned int vm_page_segs_size __read_mostly; + +static void __init +vm_page_init_pa(struct vm_page *page, unsigned short seg_index, phys_addr_t pa) +{ + memset(page, 0, sizeof(*page)); + vm_page_init(page); /* vm_resident members */ + page->type = VM_PT_RESERVED; + page->seg_index = seg_index; + page->order = VM_PAGE_ORDER_UNLISTED; + page->phys_addr = pa; +} + +void +vm_page_set_type(struct vm_page *page, unsigned int order, unsigned short type) +{ + unsigned int i, nr_pages; + + nr_pages = 1 << order; + + for (i = 0; i < nr_pages; i++) + page[i].type = type; +} + +static void __init +vm_page_free_list_init(struct vm_page_free_list *free_list) +{ + free_list->size = 0; + list_init(&free_list->blocks); +} + +static inline void +vm_page_free_list_insert(struct vm_page_free_list *free_list, + struct vm_page *page) +{ + assert(page->order == VM_PAGE_ORDER_UNLISTED); + + free_list->size++; + list_insert_head(&free_list->blocks, &page->node); +} + +static inline void +vm_page_free_list_remove(struct vm_page_free_list *free_list, + struct vm_page *page) +{ + assert(page->order != VM_PAGE_ORDER_UNLISTED); + + free_list->size--; + list_remove(&page->node); +} + +static struct vm_page * +vm_page_seg_alloc_from_buddy(struct vm_page_seg *seg, unsigned int order) +{ + struct vm_page_free_list *free_list = free_list; + struct vm_page *page, *buddy; + unsigned int i; + + assert(order < VM_PAGE_NR_FREE_LISTS); + + for (i = order; i < VM_PAGE_NR_FREE_LISTS; i++) { + free_list = &seg->free_lists[i]; + + if (free_list->size != 0) + break; + } + + if (i == VM_PAGE_NR_FREE_LISTS) + return NULL; + + page = list_first_entry(&free_list->blocks, struct vm_page, node); + vm_page_free_list_remove(free_list, page); + page->order = VM_PAGE_ORDER_UNLISTED; + + while (i > order) { + i--; + buddy = &page[1 << i]; + vm_page_free_list_insert(&seg->free_lists[i], buddy); + buddy->order = i; + } + + seg->nr_free_pages -= (1 << order); + return page; +} + +static void +vm_page_seg_free_to_buddy(struct vm_page_seg *seg, struct vm_page *page, + unsigned int order) +{ + struct vm_page *buddy; + phys_addr_t pa, buddy_pa; + unsigned int nr_pages; + + assert(page >= seg->pages); + assert(page < seg->pages_end); + assert(page->order == VM_PAGE_ORDER_UNLISTED); + assert(order < VM_PAGE_NR_FREE_LISTS); + + nr_pages = (1 << order); + pa = page->phys_addr; + + while (order < (VM_PAGE_NR_FREE_LISTS - 1)) { + buddy_pa = pa ^ vm_page_ptoa(1 << order); + + if ((buddy_pa < seg->start) || (buddy_pa >= seg->end)) + break; + + buddy = &seg->pages[vm_page_atop(buddy_pa - seg->start)]; + + if (buddy->order != order) + break; + + vm_page_free_list_remove(&seg->free_lists[order], buddy); + buddy->order = VM_PAGE_ORDER_UNLISTED; + order++; + pa &= -vm_page_ptoa(1 << order); + page = &seg->pages[vm_page_atop(pa - seg->start)]; + } + + vm_page_free_list_insert(&seg->free_lists[order], page); + page->order = order; + seg->nr_free_pages += nr_pages; +} + +static void __init +vm_page_cpu_pool_init(struct vm_page_cpu_pool *cpu_pool, int size) +{ + simple_lock_init(&cpu_pool->lock); + cpu_pool->size = size; + cpu_pool->transfer_size = (size + VM_PAGE_CPU_POOL_TRANSFER_RATIO - 1) + / VM_PAGE_CPU_POOL_TRANSFER_RATIO; + cpu_pool->nr_pages = 0; + list_init(&cpu_pool->pages); +} + +static inline struct vm_page_cpu_pool * +vm_page_cpu_pool_get(struct vm_page_seg *seg) +{ + return &seg->cpu_pools[cpu_number()]; +} + +static inline struct vm_page * +vm_page_cpu_pool_pop(struct vm_page_cpu_pool *cpu_pool) +{ + struct vm_page *page; + + assert(cpu_pool->nr_pages != 0); + cpu_pool->nr_pages--; + page = list_first_entry(&cpu_pool->pages, struct vm_page, node); + list_remove(&page->node); + return page; +} + +static inline void +vm_page_cpu_pool_push(struct vm_page_cpu_pool *cpu_pool, struct vm_page *page) +{ + assert(cpu_pool->nr_pages < cpu_pool->size); + cpu_pool->nr_pages++; + list_insert_head(&cpu_pool->pages, &page->node); +} + +static int +vm_page_cpu_pool_fill(struct vm_page_cpu_pool *cpu_pool, + struct vm_page_seg *seg) +{ + struct vm_page *page; + int i; + + assert(cpu_pool->nr_pages == 0); + + simple_lock(&seg->lock); + + for (i = 0; i < cpu_pool->transfer_size; i++) { + page = vm_page_seg_alloc_from_buddy(seg, 0); + + if (page == NULL) + break; + + vm_page_cpu_pool_push(cpu_pool, page); + } + + simple_unlock(&seg->lock); + + return i; +} + +static void +vm_page_cpu_pool_drain(struct vm_page_cpu_pool *cpu_pool, + struct vm_page_seg *seg) +{ + struct vm_page *page; + int i; + + assert(cpu_pool->nr_pages == cpu_pool->size); + + simple_lock(&seg->lock); + + for (i = cpu_pool->transfer_size; i > 0; i--) { + page = vm_page_cpu_pool_pop(cpu_pool); + vm_page_seg_free_to_buddy(seg, page, 0); + } + + simple_unlock(&seg->lock); +} + +static phys_addr_t __init +vm_page_seg_size(struct vm_page_seg *seg) +{ + return seg->end - seg->start; +} + +static int __init +vm_page_seg_compute_pool_size(struct vm_page_seg *seg) +{ + phys_addr_t size; + + size = vm_page_atop(vm_page_seg_size(seg)) / VM_PAGE_CPU_POOL_RATIO; + + if (size == 0) + size = 1; + else if (size > VM_PAGE_CPU_POOL_MAX_SIZE) + size = VM_PAGE_CPU_POOL_MAX_SIZE; + + return size; +} + +static void __init +vm_page_seg_init(struct vm_page_seg *seg, phys_addr_t start, phys_addr_t end, + struct vm_page *pages) +{ + phys_addr_t pa; + int pool_size; + unsigned int i; + + seg->start = start; + seg->end = end; + pool_size = vm_page_seg_compute_pool_size(seg); + + for (i = 0; i < ARRAY_SIZE(seg->cpu_pools); i++) + vm_page_cpu_pool_init(&seg->cpu_pools[i], pool_size); + + seg->pages = pages; + seg->pages_end = pages + vm_page_atop(vm_page_seg_size(seg)); + simple_lock_init(&seg->lock); + + for (i = 0; i < ARRAY_SIZE(seg->free_lists); i++) + vm_page_free_list_init(&seg->free_lists[i]); + + seg->nr_free_pages = 0; + i = seg - vm_page_segs; + + for (pa = seg->start; pa < seg->end; pa += PAGE_SIZE) + vm_page_init_pa(&pages[vm_page_atop(pa - seg->start)], i, pa); +} + +static struct vm_page * +vm_page_seg_alloc(struct vm_page_seg *seg, unsigned int order, + unsigned short type) +{ + struct vm_page_cpu_pool *cpu_pool; + struct vm_page *page; + int filled; + + assert(order < VM_PAGE_NR_FREE_LISTS); + + if (order == 0) { + thread_pin(); + cpu_pool = vm_page_cpu_pool_get(seg); + simple_lock(&cpu_pool->lock); + + if (cpu_pool->nr_pages == 0) { + filled = vm_page_cpu_pool_fill(cpu_pool, seg); + + if (!filled) { + simple_unlock(&cpu_pool->lock); + thread_unpin(); + return NULL; + } + } + + page = vm_page_cpu_pool_pop(cpu_pool); + simple_unlock(&cpu_pool->lock); + thread_unpin(); + } else { + simple_lock(&seg->lock); + page = vm_page_seg_alloc_from_buddy(seg, order); + simple_unlock(&seg->lock); + } + + assert(page->type == VM_PT_FREE); + vm_page_set_type(page, order, type); + return page; +} + +static void +vm_page_seg_free(struct vm_page_seg *seg, struct vm_page *page, + unsigned int order) +{ + struct vm_page_cpu_pool *cpu_pool; + + assert(page->type != VM_PT_FREE); + assert(order < VM_PAGE_NR_FREE_LISTS); + + vm_page_set_type(page, order, VM_PT_FREE); + + if (order == 0) { + thread_pin(); + cpu_pool = vm_page_cpu_pool_get(seg); + simple_lock(&cpu_pool->lock); + + if (cpu_pool->nr_pages == cpu_pool->size) + vm_page_cpu_pool_drain(cpu_pool, seg); + + vm_page_cpu_pool_push(cpu_pool, page); + simple_unlock(&cpu_pool->lock); + thread_unpin(); + } else { + simple_lock(&seg->lock); + vm_page_seg_free_to_buddy(seg, page, order); + simple_unlock(&seg->lock); + } +} + +void __init +vm_page_load(unsigned int seg_index, phys_addr_t start, phys_addr_t end, + phys_addr_t avail_start, phys_addr_t avail_end) +{ + struct vm_page_boot_seg *seg; + + assert(seg_index < ARRAY_SIZE(vm_page_boot_segs)); + assert(vm_page_aligned(start)); + assert(vm_page_aligned(end)); + assert(vm_page_aligned(avail_start)); + assert(vm_page_aligned(avail_end)); + assert(start < end); + assert(start <= avail_start); + assert(avail_end <= end); + assert(vm_page_segs_size < ARRAY_SIZE(vm_page_boot_segs)); + + seg = &vm_page_boot_segs[seg_index]; + seg->start = start; + seg->end = end; + seg->avail_start = avail_start; + seg->avail_end = avail_end; + vm_page_segs_size++; +} + +int +vm_page_ready(void) +{ + return vm_page_is_ready; +} + +static unsigned int +vm_page_select_alloc_seg(unsigned int selector) +{ + unsigned int seg_index; + + switch (selector) { + case VM_PAGE_SEL_DMA: + seg_index = VM_PAGE_SEG_DMA; + break; + case VM_PAGE_SEL_DMA32: + seg_index = VM_PAGE_SEG_DMA32; + break; + case VM_PAGE_SEL_DIRECTMAP: + seg_index = VM_PAGE_SEG_DIRECTMAP; + break; + case VM_PAGE_SEL_HIGHMEM: + seg_index = VM_PAGE_SEG_HIGHMEM; + break; + default: + panic("vm_page: invalid selector"); + } + + return MIN(vm_page_segs_size - 1, seg_index); +} + +static int __init +vm_page_boot_seg_loaded(const struct vm_page_boot_seg *seg) +{ + return (seg->end != 0); +} + +static void __init +vm_page_check_boot_segs(void) +{ + unsigned int i; + int expect_loaded; + + if (vm_page_segs_size == 0) + panic("vm_page: no physical memory loaded"); + + for (i = 0; i < ARRAY_SIZE(vm_page_boot_segs); i++) { + expect_loaded = (i < vm_page_segs_size); + + if (vm_page_boot_seg_loaded(&vm_page_boot_segs[i]) == expect_loaded) + continue; + + panic("vm_page: invalid boot segment table"); + } +} + +static phys_addr_t __init +vm_page_boot_seg_size(struct vm_page_boot_seg *seg) +{ + return seg->end - seg->start; +} + +static phys_addr_t __init +vm_page_boot_seg_avail_size(struct vm_page_boot_seg *seg) +{ + return seg->avail_end - seg->avail_start; +} + +unsigned long __init +vm_page_bootalloc(size_t size) +{ + struct vm_page_boot_seg *seg; + phys_addr_t pa; + unsigned int i; + + for (i = vm_page_select_alloc_seg(VM_PAGE_SEL_DIRECTMAP); + i < vm_page_segs_size; + i--) { + seg = &vm_page_boot_segs[i]; + + if (size <= vm_page_boot_seg_avail_size(seg)) { + pa = seg->avail_start; + seg->avail_start += vm_page_round(size); + return pa; + } + } + + panic("vm_page: no physical memory available"); +} + +void __init +vm_page_setup(void) +{ + struct vm_page_boot_seg *boot_seg; + struct vm_page_seg *seg; + struct vm_page *table, *page, *end; + size_t nr_pages, table_size; + unsigned long va; + unsigned int i; + phys_addr_t pa; + + vm_page_check_boot_segs(); + + /* + * Compute the page table size. + */ + nr_pages = 0; + + for (i = 0; i < vm_page_segs_size; i++) + nr_pages += vm_page_atop(vm_page_boot_seg_size(&vm_page_boot_segs[i])); + + table_size = vm_page_round(nr_pages * sizeof(struct vm_page)); + printf("vm_page: page table size: %lu entries (%luk)\n", nr_pages, + table_size >> 10); + table = (struct vm_page *)pmap_steal_memory(table_size); + va = (unsigned long)table; + + /* + * Initialize the segments, associating them to the page table. When + * the segments are initialized, all their pages are set allocated. + * Pages are then released, which populates the free lists. + */ + for (i = 0; i < vm_page_segs_size; i++) { + seg = &vm_page_segs[i]; + boot_seg = &vm_page_boot_segs[i]; + vm_page_seg_init(seg, boot_seg->start, boot_seg->end, table); + page = seg->pages + vm_page_atop(boot_seg->avail_start + - boot_seg->start); + end = seg->pages + vm_page_atop(boot_seg->avail_end + - boot_seg->start); + + while (page < end) { + page->type = VM_PT_FREE; + vm_page_seg_free_to_buddy(seg, page, 0); + page++; + + /* XXX */ + if (i <= VM_PAGE_SEG_DIRECTMAP) + vm_page_free_count++; + } + + table += vm_page_atop(vm_page_seg_size(seg)); + } + + while (va < (unsigned long)table) { + pa = pmap_extract(kernel_pmap, va); + page = vm_page_lookup_pa(pa); + assert((page != NULL) && (page->type == VM_PT_RESERVED)); + page->type = VM_PT_TABLE; + va += PAGE_SIZE; + } + + vm_page_is_ready = 1; +} + +void __init +vm_page_manage(struct vm_page *page) +{ + assert(page->seg_index < ARRAY_SIZE(vm_page_segs)); + assert(page->type == VM_PT_RESERVED); + + vm_page_set_type(page, 0, VM_PT_FREE); + vm_page_seg_free_to_buddy(&vm_page_segs[page->seg_index], page, 0); +} + +struct vm_page * +vm_page_lookup_pa(phys_addr_t pa) +{ + struct vm_page_seg *seg; + unsigned int i; + + for (i = 0; i < vm_page_segs_size; i++) { + seg = &vm_page_segs[i]; + + if ((pa >= seg->start) && (pa < seg->end)) + return &seg->pages[vm_page_atop(pa - seg->start)]; + } + + return NULL; +} + +struct vm_page * +vm_page_alloc_pa(unsigned int order, unsigned int selector, unsigned short type) +{ + struct vm_page *page; + unsigned int i; + + for (i = vm_page_select_alloc_seg(selector); i < vm_page_segs_size; i--) { + page = vm_page_seg_alloc(&vm_page_segs[i], order, type); + + if (page != NULL) + return page; + } + + if (type == VM_PT_PMAP) + panic("vm_page: unable to allocate pmap page"); + + return NULL; +} + +void +vm_page_free_pa(struct vm_page *page, unsigned int order) +{ + assert(page->seg_index < ARRAY_SIZE(vm_page_segs)); + + vm_page_seg_free(&vm_page_segs[page->seg_index], page, order); +} + +const char * +vm_page_seg_name(unsigned int seg_index) +{ + /* Don't use a switch statement since segments can be aliased */ + if (seg_index == VM_PAGE_SEG_HIGHMEM) + return "HIGHMEM"; + else if (seg_index == VM_PAGE_SEG_DIRECTMAP) + return "DIRECTMAP"; + else if (seg_index == VM_PAGE_SEG_DMA32) + return "DMA32"; + else if (seg_index == VM_PAGE_SEG_DMA) + return "DMA"; + else + panic("vm_page: invalid segment index"); +} + +void +vm_page_info_all(void) +{ + struct vm_page_seg *seg; + unsigned long pages; + unsigned int i; + + for (i = 0; i < vm_page_segs_size; i++) { + seg = &vm_page_segs[i]; + pages = (unsigned long)(seg->pages_end - seg->pages); + printf("vm_page: %s: pages: %lu (%luM), free: %lu (%luM)\n", + vm_page_seg_name(i), pages, pages >> (20 - PAGE_SHIFT), + seg->nr_free_pages, seg->nr_free_pages >> (20 - PAGE_SHIFT)); + } +} + +phys_addr_t +vm_page_mem_size(void) +{ + phys_addr_t total; + unsigned int i; + + total = 0; + + for (i = 0; i < vm_page_segs_size; i++) { + /* XXX */ + if (i > VM_PAGE_SEG_DIRECTMAP) + continue; + + total += vm_page_seg_size(&vm_page_segs[i]); + } + + return total; +} diff --git a/vm/vm_page.h b/vm/vm_page.h index e6a8c49..7607aad 100644 --- a/vm/vm_page.h +++ b/vm/vm_page.h @@ -36,11 +36,12 @@ #include <mach/boolean.h> #include <mach/vm_prot.h> -#include <mach/vm_param.h> +#include <machine/vm_param.h> #include <vm/vm_object.h> #include <vm/vm_types.h> #include <kern/queue.h> #include <kern/lock.h> +#include <kern/log2.h> #include <kern/macros.h> #include <kern/sched_prim.h> /* definitions of wait/wakeup */ @@ -76,6 +77,22 @@ */ struct vm_page { + /* Members used in the vm_page module only */ + struct list node; + unsigned short type; + unsigned short seg_index; + unsigned short order; + + /* + * This member is used throughout the code and may only change for + * fictitious pages. + */ + phys_addr_t phys_addr; + + /* We use an empty struct as the delimiter. */ + struct {} vm_page_header; +#define VM_PAGE_HEADER_SIZE offsetof(struct vm_page, vm_page_header) + queue_chain_t pageq; /* queue info for FIFO * queue or free list (P) */ queue_chain_t listq; /* all pages in same object (O) */ @@ -110,8 +127,6 @@ struct vm_page { * without having data. (O) * [See vm_object_overwrite] */ - vm_offset_t phys_addr; /* Physical address of page, passed - * to pmap_enter (read-only) */ vm_prot_t page_lock; /* Uses prohibited by data manager (O) */ vm_prot_t unlock_request; /* Outstanding unlock request (O) */ }; @@ -140,8 +155,6 @@ struct vm_page { */ extern -vm_page_t vm_page_queue_free; /* memory free queue */ -extern vm_page_t vm_page_queue_fictitious; /* fictitious free queue */ extern queue_head_t vm_page_queue_active; /* active memory queue */ @@ -196,25 +209,21 @@ extern void vm_page_bootstrap( vm_offset_t *endp); extern void vm_page_module_init(void); -extern void vm_page_create( - vm_offset_t start, - vm_offset_t end); extern vm_page_t vm_page_lookup( vm_object_t object, vm_offset_t offset); extern vm_page_t vm_page_grab_fictitious(void); -extern void vm_page_release_fictitious(vm_page_t); -extern boolean_t vm_page_convert(vm_page_t, boolean_t); +extern boolean_t vm_page_convert(vm_page_t *, boolean_t); extern void vm_page_more_fictitious(void); extern vm_page_t vm_page_grab(boolean_t); -extern void vm_page_release(vm_page_t, boolean_t); +extern vm_page_t vm_page_grab_contig(vm_size_t, unsigned int); +extern void vm_page_free_contig(vm_page_t, vm_size_t); extern void vm_page_wait(void (*)(void)); extern vm_page_t vm_page_alloc( vm_object_t object, vm_offset_t offset); extern void vm_page_init( - vm_page_t mem, - vm_offset_t phys_addr); + vm_page_t mem); extern void vm_page_free(vm_page_t); extern void vm_page_activate(vm_page_t); extern void vm_page_deactivate(vm_page_t); @@ -312,4 +321,189 @@ extern unsigned int vm_page_info( } \ MACRO_END +/* + * Copyright (c) 2010-2014 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * Physical page management. + */ + +/* + * Address/page conversion and rounding macros (not inline functions to + * be easily usable on both virtual and physical addresses, which may not + * have the same type size). + */ +#define vm_page_atop(addr) ((addr) >> PAGE_SHIFT) +#define vm_page_ptoa(page) ((page) << PAGE_SHIFT) +#define vm_page_trunc(addr) P2ALIGN(addr, PAGE_SIZE) +#define vm_page_round(addr) P2ROUND(addr, PAGE_SIZE) +#define vm_page_aligned(addr) P2ALIGNED(addr, PAGE_SIZE) + +/* + * Segment selectors. + * + * Selector-to-segment-list translation table : + * DMA DMA + * DMA32 DMA32 DMA + * DIRECTMAP DIRECTMAP DMA32 DMA + * HIGHMEM HIGHMEM DIRECTMAP DMA32 DMA + */ +#define VM_PAGE_SEL_DMA 0 +#define VM_PAGE_SEL_DMA32 1 +#define VM_PAGE_SEL_DIRECTMAP 2 +#define VM_PAGE_SEL_HIGHMEM 3 + +/* + * Page usage types. + * + * Failing to allocate pmap pages will cause a kernel panic. + * TODO Obviously, this needs to be addressed, e.g. with a reserved pool of + * pages. + */ +#define VM_PT_FREE 0 /* Page unused */ +#define VM_PT_RESERVED 1 /* Page reserved at boot time */ +#define VM_PT_TABLE 2 /* Page is part of the page table */ +#define VM_PT_PMAP 3 /* Page stores pmap-specific data */ +#define VM_PT_KMEM 4 /* Page is part of a kmem slab */ +#define VM_PT_KERNEL 5 /* Type for generic kernel allocations */ + +static inline unsigned short +vm_page_type(const struct vm_page *page) +{ + return page->type; +} + +void vm_page_set_type(struct vm_page *page, unsigned int order, + unsigned short type); + +static inline unsigned int +vm_page_order(size_t size) +{ + return iorder2(vm_page_atop(vm_page_round(size))); +} + +static inline phys_addr_t +vm_page_to_pa(const struct vm_page *page) +{ + return page->phys_addr; +} + +#if 0 +static inline unsigned long +vm_page_direct_va(phys_addr_t pa) +{ + assert(pa < VM_PAGE_DIRECTMAP_LIMIT); + return ((unsigned long)pa + VM_MIN_DIRECTMAP_ADDRESS); +} + +static inline phys_addr_t +vm_page_direct_pa(unsigned long va) +{ + assert(va >= VM_MIN_DIRECTMAP_ADDRESS); + assert(va < VM_MAX_DIRECTMAP_ADDRESS); + return (va - VM_MIN_DIRECTMAP_ADDRESS); +} + +static inline void * +vm_page_direct_ptr(const struct vm_page *page) +{ + return (void *)vm_page_direct_va(vm_page_to_pa(page)); +} +#endif + +/* + * Load physical memory into the vm_page module at boot time. + * + * The avail_start and avail_end parameters are used to maintain a simple + * heap for bootstrap allocations. + * + * All addresses must be page-aligned. Segments can be loaded in any order. + */ +void vm_page_load(unsigned int seg_index, phys_addr_t start, phys_addr_t end, + phys_addr_t avail_start, phys_addr_t avail_end); + +/* + * Return true if the vm_page module is completely initialized, false + * otherwise, in which case only vm_page_bootalloc() can be used for + * allocations. + */ +int vm_page_ready(void); + +/* + * Early allocation function. + * + * This function is used by the vm_resident module to implement + * pmap_steal_memory. It can be used after physical segments have been loaded + * and before the vm_page module is initialized. + */ +unsigned long vm_page_bootalloc(size_t size); + +/* + * Set up the vm_page module. + * + * Architecture-specific code must have loaded segments before calling this + * function. Segments must comply with the selector-to-segment-list table, + * e.g. HIGHMEM is loaded if and only if DIRECTMAP, DMA32 and DMA are loaded, + * notwithstanding segment aliasing. + * + * Once this function returns, the vm_page module is ready, and normal + * allocation functions can be used. + */ +void vm_page_setup(void); + +/* + * Make the given page managed by the vm_page module. + * + * If additional memory can be made usable after the VM system is initialized, + * it should be reported through this function. + */ +void vm_page_manage(struct vm_page *page); + +/* + * Return the page descriptor for the given physical address. + */ +struct vm_page * vm_page_lookup_pa(phys_addr_t pa); + +/* + * Allocate a block of 2^order physical pages. + * + * The selector is used to determine the segments from which allocation can + * be attempted. + */ +struct vm_page * vm_page_alloc_pa(unsigned int order, unsigned int selector, + unsigned short type); + +/* + * Release a block of 2^order physical pages. + */ +void vm_page_free_pa(struct vm_page *page, unsigned int order); + +/* + * Return the name of the given segment. + */ +const char * vm_page_seg_name(unsigned int seg_index); + +/* + * Display internal information about the module. + */ +void vm_page_info_all(void); + +/* + * Return the total amount of physical memory. + */ +phys_addr_t vm_page_mem_size(void); + #endif /* _VM_VM_PAGE_H_ */ diff --git a/vm/vm_resident.c b/vm/vm_resident.c index c70fa73..9fd6491 100644 --- a/vm/vm_resident.c +++ b/vm/vm_resident.c @@ -72,7 +72,7 @@ /* * These variables record the values returned by vm_page_bootstrap, * for debugging purposes. The implementation of pmap_steal_memory - * and pmap_startup here also uses them internally. + * here also uses them internally. */ vm_offset_t virtual_space_start; @@ -95,21 +95,6 @@ vm_page_bucket_t *vm_page_buckets; /* Array of buckets */ unsigned int vm_page_bucket_count = 0; /* How big is array? */ unsigned int vm_page_hash_mask; /* Mask for hash function */ -/* - * Resident page structures are initialized from - * a template (see vm_page_alloc). - * - * When adding a new field to the virtual memory - * object structure, be sure to add initialization - * (see vm_page_bootstrap). - */ -struct vm_page vm_page_template; - -/* - * Resident pages that represent real memory - * are allocated from a free list. - */ -vm_page_t vm_page_queue_free; vm_page_t vm_page_queue_fictitious; decl_simple_lock_data(,vm_page_queue_free_lock) unsigned int vm_page_free_wanted; @@ -192,48 +177,15 @@ void vm_page_bootstrap( vm_offset_t *startp, vm_offset_t *endp) { - vm_page_t m; int i; /* - * Initialize the vm_page template. - */ - - m = &vm_page_template; - m->object = VM_OBJECT_NULL; /* reset later */ - m->offset = 0; /* reset later */ - m->wire_count = 0; - - m->inactive = FALSE; - m->active = FALSE; - m->laundry = FALSE; - m->free = FALSE; - m->external = FALSE; - - m->busy = TRUE; - m->wanted = FALSE; - m->tabled = FALSE; - m->fictitious = FALSE; - m->private = FALSE; - m->absent = FALSE; - m->error = FALSE; - m->dirty = FALSE; - m->precious = FALSE; - m->reference = FALSE; - - m->phys_addr = 0; /* reset later */ - - m->page_lock = VM_PROT_NONE; - m->unlock_request = VM_PROT_NONE; - - /* * Initialize the page queues. */ simple_lock_init(&vm_page_queue_free_lock); simple_lock_init(&vm_page_queue_lock); - vm_page_queue_free = VM_PAGE_NULL; vm_page_queue_fictitious = VM_PAGE_NULL; queue_init(&vm_page_queue_active); queue_init(&vm_page_queue_inactive); @@ -280,15 +232,8 @@ void vm_page_bootstrap( simple_lock_init(&bucket->lock); } - /* - * Machine-dependent code allocates the resident page table. - * It uses vm_page_init to initialize the page frames. - * The code also returns to us the virtual space available - * to the kernel. We don't trust the pmap module - * to get the alignment right. - */ + vm_page_setup(); - pmap_startup(&virtual_space_start, &virtual_space_end); virtual_space_start = round_page(virtual_space_start); virtual_space_end = trunc_page(virtual_space_end); @@ -301,8 +246,8 @@ void vm_page_bootstrap( #ifndef MACHINE_PAGES /* - * We implement pmap_steal_memory and pmap_startup with the help - * of two simpler functions, pmap_virtual_space and pmap_next_page. + * We implement pmap_steal_memory with the help + * of two simpler functions, pmap_virtual_space and vm_page_bootalloc. */ vm_offset_t pmap_steal_memory( @@ -310,11 +255,7 @@ vm_offset_t pmap_steal_memory( { vm_offset_t addr, vaddr, paddr; - /* - * We round the size to an integer multiple. - */ - - size = (size + 3) &~ 3; + size = round_page(size); /* * If this is the first call to pmap_steal_memory, @@ -347,8 +288,7 @@ vm_offset_t pmap_steal_memory( for (vaddr = round_page(addr); vaddr < addr + size; vaddr += PAGE_SIZE) { - if (!pmap_next_page(&paddr)) - panic("pmap_steal_memory"); + paddr = vm_page_bootalloc(PAGE_SIZE); /* * XXX Logically, these mappings should be wired, @@ -361,64 +301,6 @@ vm_offset_t pmap_steal_memory( return addr; } - -void pmap_startup( - vm_offset_t *startp, - vm_offset_t *endp) -{ - unsigned int i, npages, pages_initialized; - vm_page_t pages; - vm_offset_t paddr; - - /* - * We calculate how many page frames we will have - * and then allocate the page structures in one chunk. - */ - - npages = ((PAGE_SIZE * pmap_free_pages() + - (round_page(virtual_space_start) - virtual_space_start)) / - (PAGE_SIZE + sizeof *pages)); - - pages = (vm_page_t) pmap_steal_memory(npages * sizeof *pages); - - /* - * Initialize the page frames. - */ - - for (i = 0, pages_initialized = 0; i < npages; i++) { - if (!pmap_next_page(&paddr)) - break; - - vm_page_init(&pages[i], paddr); - pages_initialized++; - } - i = 0; - while (pmap_next_page(&paddr)) - i++; - if (i) - printf("%u memory page(s) left away\n", i); - - /* - * Release pages in reverse order so that physical pages - * initially get allocated in ascending addresses. This keeps - * the devices (which must address physical memory) happy if - * they require several consecutive pages. - */ - - for (i = pages_initialized; i > 0; i--) { - vm_page_release(&pages[i - 1], FALSE); - } - - /* - * We have to re-align virtual_space_start, - * because pmap_steal_memory has been using it. - */ - - virtual_space_start = round_page(virtual_space_start); - - *startp = virtual_space_start; - *endp = virtual_space_end; -} #endif /* MACHINE_PAGES */ /* @@ -434,34 +316,6 @@ void vm_page_module_init(void) } /* - * Routine: vm_page_create - * Purpose: - * After the VM system is up, machine-dependent code - * may stumble across more physical memory. For example, - * memory that it was reserving for a frame buffer. - * vm_page_create turns this memory into available pages. - */ - -void vm_page_create( - vm_offset_t start, - vm_offset_t end) -{ - vm_offset_t paddr; - vm_page_t m; - - for (paddr = round_page(start); - paddr < trunc_page(end); - paddr += PAGE_SIZE) { - m = (vm_page_t) kmem_cache_alloc(&vm_page_cache); - if (m == VM_PAGE_NULL) - panic("vm_page_create"); - - vm_page_init(m, paddr); - vm_page_release(m, FALSE); - } -} - -/* * vm_page_hash: * * Distributes the object/offset key pair among hash buckets. @@ -750,6 +604,33 @@ void vm_page_rename( vm_page_unlock_queues(); } +static void vm_page_init_template(vm_page_t m) +{ + m->object = VM_OBJECT_NULL; /* reset later */ + m->offset = 0; /* reset later */ + m->wire_count = 0; + + m->inactive = FALSE; + m->active = FALSE; + m->laundry = FALSE; + m->free = FALSE; + m->external = FALSE; + + m->busy = TRUE; + m->wanted = FALSE; + m->tabled = FALSE; + m->fictitious = FALSE; + m->private = FALSE; + m->absent = FALSE; + m->error = FALSE; + m->dirty = FALSE; + m->precious = FALSE; + m->reference = FALSE; + + m->page_lock = VM_PROT_NONE; + m->unlock_request = VM_PROT_NONE; +} + /* * vm_page_init: * @@ -758,11 +639,9 @@ void vm_page_rename( * so that it can be given to vm_page_release or vm_page_insert. */ void vm_page_init( - vm_page_t mem, - vm_offset_t phys_addr) + vm_page_t mem) { - *mem = vm_page_template; - mem->phys_addr = phys_addr; + vm_page_init_template(mem); } /* @@ -794,7 +673,7 @@ vm_page_t vm_page_grab_fictitious(void) * Release a fictitious page to the free list. */ -void vm_page_release_fictitious( +static void vm_page_release_fictitious( vm_page_t m) { simple_lock(&vm_page_queue_free_lock); @@ -826,7 +705,8 @@ void vm_page_more_fictitious(void) if (m == VM_PAGE_NULL) panic("vm_page_more_fictitious"); - vm_page_init(m, vm_page_fictitious_addr); + vm_page_init(m); + m->phys_addr = vm_page_fictitious_addr; m->fictitious = TRUE; vm_page_release_fictitious(m); } @@ -836,25 +716,46 @@ void vm_page_more_fictitious(void) * vm_page_convert: * * Attempt to convert a fictitious page into a real page. + * + * The object referenced by *MP must be locked. */ boolean_t vm_page_convert( - vm_page_t m, + struct vm_page **mp, boolean_t external) { - vm_page_t real_m; + struct vm_page *real_m, *fict_m; + vm_object_t object; + vm_offset_t offset; + + fict_m = *mp; + + assert(fict_m->fictitious); + assert(fict_m->phys_addr == vm_page_fictitious_addr); + assert(!fict_m->active); + assert(!fict_m->inactive); real_m = vm_page_grab(external); if (real_m == VM_PAGE_NULL) return FALSE; - m->phys_addr = real_m->phys_addr; - m->fictitious = FALSE; + object = fict_m->object; + offset = fict_m->offset; + vm_page_remove(fict_m); + + memcpy(&real_m->vm_page_header, + &fict_m->vm_page_header, + sizeof(*fict_m) - VM_PAGE_HEADER_SIZE); + real_m->fictitious = FALSE; - real_m->phys_addr = vm_page_fictitious_addr; - real_m->fictitious = TRUE; + vm_page_insert(real_m, object, offset); - vm_page_release_fictitious(real_m); + assert(real_m->phys_addr != vm_page_fictitious_addr); + assert(fict_m->fictitious); + assert(fict_m->phys_addr == vm_page_fictitious_addr); + + vm_page_release_fictitious(fict_m); + *mp = real_m; return TRUE; } @@ -886,15 +787,16 @@ vm_page_t vm_page_grab( return VM_PAGE_NULL; } - if (vm_page_queue_free == VM_PAGE_NULL) + mem = vm_page_alloc_pa(0, VM_PAGE_SEL_DIRECTMAP, VM_PT_KERNEL); + + if (mem == NULL) panic("vm_page_grab"); if (--vm_page_free_count < vm_page_free_count_minimum) vm_page_free_count_minimum = vm_page_free_count; if (external) vm_page_external_count++; - mem = vm_page_queue_free; - vm_page_queue_free = (vm_page_t) mem->pageq.next; + mem->free = FALSE; mem->extcounted = mem->external = external; simple_unlock(&vm_page_queue_free_lock); @@ -928,208 +830,97 @@ vm_offset_t vm_page_grab_phys_addr(void) } /* - * vm_page_grab_contiguous_pages: - * - * Take N pages off the free list, the pages should - * cover a contiguous range of physical addresses. - * [Used by device drivers to cope with DMA limitations] + * vm_page_release: * - * Returns the page descriptors in ascending order, or - * Returns KERN_RESOURCE_SHORTAGE if it could not. + * Return a page to the free list. */ -/* Biggest phys page number for the pages we handle in VM */ - -vm_size_t vm_page_big_pagenum = 0; /* Set this before call! */ - -kern_return_t -vm_page_grab_contiguous_pages( - int npages, - vm_page_t pages[], - natural_t *bits, - boolean_t external) +static void vm_page_release( + vm_page_t mem, + boolean_t external) { - int first_set; - int size, alloc_size; - kern_return_t ret; - vm_page_t mem, *prevmemp; + simple_lock(&vm_page_queue_free_lock); + if (mem->free) + panic("vm_page_release"); + mem->free = TRUE; + vm_page_free_pa(mem, 0); + vm_page_free_count++; + if (external) + vm_page_external_count--; -#ifndef NBBY -#define NBBY 8 /* size in bits of sizeof()`s unity */ -#endif + /* + * Check if we should wake up someone waiting for page. + * But don't bother waking them unless they can allocate. + * + * We wakeup only one thread, to prevent starvation. + * Because the scheduling system handles wait queues FIFO, + * if we wakeup all waiting threads, one greedy thread + * can starve multiple niceguy threads. When the threads + * all wakeup, the greedy threads runs first, grabs the page, + * and waits for another page. It will be the first to run + * when the next page is freed. + * + * However, there is a slight danger here. + * The thread we wake might not use the free page. + * Then the other threads could wait indefinitely + * while the page goes unused. To forestall this, + * the pageout daemon will keep making free pages + * as long as vm_page_free_wanted is non-zero. + */ -#define NBPEL (sizeof(natural_t)*NBBY) + if ((vm_page_free_wanted > 0) && + (vm_page_free_count >= vm_page_free_reserved)) { + vm_page_free_wanted--; + thread_wakeup_one((event_t) &vm_page_free_count); + } - size = (vm_page_big_pagenum + NBPEL - 1) - & ~(NBPEL - 1); /* in bits */ + simple_unlock(&vm_page_queue_free_lock); +} - size = size / NBBY; /* in bytes */ +/* + * vm_page_grab_contig: + * + * Remove a block of contiguous pages from the free list. + * Returns VM_PAGE_NULL if the request fails. + */ - /* - * If we are called before the VM system is fully functional - * the invoker must provide us with the work space. [one bit - * per page starting at phys 0 and up to vm_page_big_pagenum] - */ - if (bits == 0) { - alloc_size = round_page(size); - if (kmem_alloc_wired(kernel_map, - (vm_offset_t *)&bits, - alloc_size) - != KERN_SUCCESS) - return KERN_RESOURCE_SHORTAGE; - } else - alloc_size = 0; +vm_page_t vm_page_grab_contig( + vm_size_t size, + unsigned int selector) +{ + unsigned int i, order, nr_pages; + vm_page_t mem; - memset(bits, 0, size); + order = vm_page_order(size); + nr_pages = 1 << order; - /* - * A very large granularity call, its rare so that is ok - */ simple_lock(&vm_page_queue_free_lock); /* - * Do not dip into the reserved pool. + * Only let privileged threads (involved in pageout) + * dip into the reserved pool or exceed the limit + * for externally-managed pages. */ - if ((vm_page_free_count < vm_page_free_reserved) - || (vm_page_external_count >= vm_page_external_limit)) { - printf_once("no more room for vm_page_grab_contiguous_pages"); + if (((vm_page_free_count - nr_pages) <= vm_page_free_reserved) + && !current_thread()->vm_privilege) { simple_unlock(&vm_page_queue_free_lock); - return KERN_RESOURCE_SHORTAGE; - } - - /* - * First pass through, build a big bit-array of - * the pages that are free. It is not going to - * be too large anyways, in 4k we can fit info - * for 32k pages. - */ - mem = vm_page_queue_free; - while (mem) { - int word_index, bit_index; - - bit_index = (mem->phys_addr >> PAGE_SHIFT); - word_index = bit_index / NBPEL; - bit_index = bit_index - (word_index * NBPEL); - bits[word_index] |= 1 << bit_index; - - mem = (vm_page_t) mem->pageq.next; + return VM_PAGE_NULL; } - /* - * Second loop. Scan the bit array for NPAGES - * contiguous bits. That gives us, if any, - * the range of pages we will be grabbing off - * the free list. - */ - { - int bits_so_far = 0, i; - - first_set = 0; - - for (i = 0; i < size; i += sizeof(natural_t)) { - - natural_t v = bits[i / sizeof(natural_t)]; - int bitpos; - - /* - * Bitscan this one word - */ - if (v) { - /* - * keep counting them beans ? - */ - bitpos = 0; - - if (bits_so_far) { -count_ones: - while (v & 1) { - bitpos++; - /* - * got enough beans ? - */ - if (++bits_so_far == npages) - goto found_em; - v >>= 1; - } - /* if we are being lucky, roll again */ - if (bitpos == NBPEL) - continue; - } - - /* - * search for beans here - */ - bits_so_far = 0; - while ((bitpos < NBPEL) && ((v & 1) == 0)) { - bitpos++; - v >>= 1; - } - if (v & 1) { - first_set = (i * NBBY) + bitpos; - goto count_ones; - } - } - /* - * No luck - */ - bits_so_far = 0; - } - } + mem = vm_page_alloc_pa(order, selector, VM_PT_KERNEL); - /* - * We could not find enough contiguous pages. - */ - simple_unlock(&vm_page_queue_free_lock); + if (mem == NULL) + panic("vm_page_grab_contig"); - printf_once("no contiguous room for vm_page_grab_contiguous_pages"); - ret = KERN_RESOURCE_SHORTAGE; - goto out; + vm_page_free_count -= nr_pages; - /* - * Final pass. Now we know which pages we want. - * Scan the list until we find them all, grab - * pages as we go. FIRST_SET tells us where - * in the bit-array our pages start. - */ -found_em: - vm_page_free_count -= npages; if (vm_page_free_count < vm_page_free_count_minimum) vm_page_free_count_minimum = vm_page_free_count; - if (external) - vm_page_external_count += npages; - { - vm_offset_t first_phys, last_phys; - - /* cache values for compare */ - first_phys = first_set << PAGE_SHIFT; - last_phys = first_phys + (npages << PAGE_SHIFT);/* not included */ - - /* running pointers */ - mem = vm_page_queue_free; - prevmemp = &vm_page_queue_free; - - while (mem) { - - vm_offset_t addr; - - addr = mem->phys_addr; - - if ((addr >= first_phys) && - (addr < last_phys)) { - *prevmemp = (vm_page_t) mem->pageq.next; - pages[(addr - first_phys) >> PAGE_SHIFT] = mem; - mem->free = FALSE; - mem->extcounted = mem->external = external; - /* - * Got them all ? - */ - if (--npages == 0) break; - } else - prevmemp = (vm_page_t *) &mem->pageq.next; - - mem = (vm_page_t) mem->pageq.next; - } + + for (i = 0; i < nr_pages; i++) { + mem[i].free = FALSE; + mem[i].extcounted = mem[i].external = 0; } simple_unlock(&vm_page_queue_free_lock); @@ -1148,55 +939,35 @@ found_em: if ((vm_page_free_count < vm_page_free_min) || ((vm_page_free_count < vm_page_free_target) && (vm_page_inactive_count < vm_page_inactive_target))) - thread_wakeup(&vm_page_free_wanted); - - ret = KERN_SUCCESS; -out: - if (alloc_size) - kmem_free(kernel_map, (vm_offset_t) bits, alloc_size); + thread_wakeup((event_t) &vm_page_free_wanted); - return ret; + return mem; } /* - * vm_page_release: + * vm_page_free_contig: * - * Return a page to the free list. + * Return a block of contiguous pages to the free list. */ -void vm_page_release( - vm_page_t mem, - boolean_t external) +void vm_page_free_contig(vm_page_t mem, vm_size_t size) { + unsigned int i, order, nr_pages; + + order = vm_page_order(size); + nr_pages = 1 << order; + simple_lock(&vm_page_queue_free_lock); - if (mem->free) - panic("vm_page_release"); - mem->free = TRUE; - mem->pageq.next = (queue_entry_t) vm_page_queue_free; - vm_page_queue_free = mem; - vm_page_free_count++; - if (external) - vm_page_external_count--; - /* - * Check if we should wake up someone waiting for page. - * But don't bother waking them unless they can allocate. - * - * We wakeup only one thread, to prevent starvation. - * Because the scheduling system handles wait queues FIFO, - * if we wakeup all waiting threads, one greedy thread - * can starve multiple niceguy threads. When the threads - * all wakeup, the greedy threads runs first, grabs the page, - * and waits for another page. It will be the first to run - * when the next page is freed. - * - * However, there is a slight danger here. - * The thread we wake might not use the free page. - * Then the other threads could wait indefinitely - * while the page goes unused. To forestall this, - * the pageout daemon will keep making free pages - * as long as vm_page_free_wanted is non-zero. - */ + for (i = 0; i < nr_pages; i++) { + if (mem[i].free) + panic("vm_page_free_contig"); + + mem[i].free = TRUE; + } + + vm_page_free_pa(mem, order); + vm_page_free_count += nr_pages; if ((vm_page_free_wanted > 0) && (vm_page_free_count >= vm_page_free_reserved)) { @@ -1310,12 +1081,13 @@ void vm_page_free( */ if (mem->private || mem->fictitious) { - vm_page_init(mem, vm_page_fictitious_addr); + vm_page_init(mem); + mem->phys_addr = vm_page_fictitious_addr; mem->fictitious = TRUE; vm_page_release_fictitious(mem); } else { int external = mem->external && mem->extcounted; - vm_page_init(mem, mem->phys_addr); + vm_page_init(mem); vm_page_release(mem, external); } } |