diff options
-rw-r--r-- | Makefrag.am | 1 | ||||
-rw-r--r-- | i386/i386at/biosmem.c | 831 | ||||
-rw-r--r-- | i386/i386at/biosmem.h | 84 | ||||
-rw-r--r-- | vm/vm_page.c | 735 | ||||
-rw-r--r-- | vm/vm_page.h | 442 |
5 files changed, 1812 insertions, 281 deletions
diff --git a/Makefrag.am b/Makefrag.am index b9d96c5..35a4884 100644 --- a/Makefrag.am +++ b/Makefrag.am @@ -259,6 +259,7 @@ libkernel_a_SOURCES += \ vm/vm_map.h \ vm/vm_object.c \ vm/vm_object.h \ + vm/vm_page.c \ vm/vm_page.h \ vm/vm_pageout.c \ vm/vm_pageout.h \ diff --git a/i386/i386at/biosmem.c b/i386/i386at/biosmem.c new file mode 100644 index 0000000..d666f1b --- /dev/null +++ b/i386/i386at/biosmem.c @@ -0,0 +1,831 @@ +/* + * Copyright (c) 2010-2014 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <kern/assert.h> +#include <kern/init.h> +#include <kern/macros.h> +#include <kern/panic.h> +#include <kern/param.h> +#include <kern/printk.h> +#include <kern/stddef.h> +#include <kern/stdint.h> +#include <kern/string.h> +#include <kern/types.h> +#include <machine/biosmem.h> +#include <machine/boot.h> +#include <machine/cpu.h> +#include <machine/elf.h> +#include <machine/multiboot.h> +#include <vm/vm_kmem.h> +#include <vm/vm_page.h> + +/* + * Maximum number of entries in the BIOS memory map. + * + * Because of adjustments of overlapping ranges, the memory map can grow + * to twice this size. + */ +#define BIOSMEM_MAX_MAP_SIZE 128 + +/* + * Memory range types. + */ +#define BIOSMEM_TYPE_AVAILABLE 1 +#define BIOSMEM_TYPE_RESERVED 2 +#define BIOSMEM_TYPE_ACPI 3 +#define BIOSMEM_TYPE_NVS 4 +#define BIOSMEM_TYPE_UNUSABLE 5 +#define BIOSMEM_TYPE_DISABLED 6 + +/* + * Memory map entry. + */ +struct biosmem_map_entry { + uint64_t base_addr; + uint64_t length; + unsigned int type; +}; + +/* + * Contiguous block of physical memory. + * + * Tha "available" range records what has been passed to the VM system as + * available inside the segment. + */ +struct biosmem_segment { + phys_addr_t start; + phys_addr_t end; + phys_addr_t avail_start; + phys_addr_t avail_end; +}; + +/* + * Memory map built from the information passed by the boot loader. + * + * If the boot loader didn't pass a valid memory map, a simple map is built + * based on the mem_lower and mem_upper multiboot fields. + */ +static struct biosmem_map_entry biosmem_map[BIOSMEM_MAX_MAP_SIZE * 2] + __bootdata; +static unsigned int biosmem_map_size __bootdata; + +/* + * Physical segment boundaries. + */ +static struct biosmem_segment biosmem_segments[VM_PAGE_MAX_SEGS] __bootdata; + +/* + * Boundaries of the simple bootstrap heap. + * + * This heap is located above BIOS memory. + */ +static uint32_t biosmem_heap_start __bootdata; +static uint32_t biosmem_heap_cur __bootdata; +static uint32_t biosmem_heap_end __bootdata; + +static char biosmem_panic_toobig_msg[] __bootdata + = "biosmem: too many memory map entries"; +static char biosmem_panic_setup_msg[] __bootdata + = "biosmem: unable to set up the early memory allocator"; +static char biosmem_panic_noseg_msg[] __bootdata + = "biosmem: unable to find any memory segment"; +static char biosmem_panic_inval_msg[] __bootdata + = "biosmem: attempt to allocate 0 page"; +static char biosmem_panic_nomem_msg[] __bootdata + = "biosmem: unable to allocate memory"; + +static void __boot +biosmem_map_build(const struct multiboot_raw_info *mbi) +{ + struct multiboot_raw_mmap_entry *mb_entry, *mb_end; + struct biosmem_map_entry *start, *entry, *end; + unsigned long addr; + + addr = mbi->mmap_addr; + mb_entry = (struct multiboot_raw_mmap_entry *)addr; + mb_end = (struct multiboot_raw_mmap_entry *)(addr + mbi->mmap_length); + start = biosmem_map; + entry = start; + end = entry + BIOSMEM_MAX_MAP_SIZE; + + while ((mb_entry < mb_end) && (entry < end)) { + entry->base_addr = mb_entry->base_addr; + entry->length = mb_entry->length; + entry->type = mb_entry->type; + + mb_entry = (void *)mb_entry + sizeof(mb_entry->size) + mb_entry->size; + entry++; + } + + biosmem_map_size = entry - start; +} + +static void __boot +biosmem_map_build_simple(const struct multiboot_raw_info *mbi) +{ + struct biosmem_map_entry *entry; + + entry = biosmem_map; + entry->base_addr = 0; + entry->length = mbi->mem_lower << 10; + entry->type = BIOSMEM_TYPE_AVAILABLE; + + entry++; + entry->base_addr = BIOSMEM_END; + entry->length = mbi->mem_upper << 10; + entry->type = BIOSMEM_TYPE_AVAILABLE; + + biosmem_map_size = 2; +} + +static int __boot +biosmem_map_entry_is_invalid(const struct biosmem_map_entry *entry) +{ + return (entry->base_addr + entry->length) <= entry->base_addr; +} + +static void __boot +biosmem_map_filter(void) +{ + struct biosmem_map_entry *entry; + unsigned int i; + + i = 0; + + while (i < biosmem_map_size) { + entry = &biosmem_map[i]; + + if (biosmem_map_entry_is_invalid(entry)) { + biosmem_map_size--; + boot_memmove(entry, entry + 1, + (biosmem_map_size - i) * sizeof(*entry)); + continue; + } + + i++; + } +} + +static void __boot +biosmem_map_sort(void) +{ + struct biosmem_map_entry tmp; + unsigned int i, j; + + /* + * Simple insertion sort. + */ + for (i = 1; i < biosmem_map_size; i++) { + tmp = biosmem_map[i]; + + for (j = i - 1; j < i; j--) { + if (biosmem_map[j].base_addr < tmp.base_addr) + break; + + biosmem_map[j + 1] = biosmem_map[j]; + } + + biosmem_map[j + 1] = tmp; + } +} + +static void __boot +biosmem_map_adjust(void) +{ + struct biosmem_map_entry tmp, *a, *b, *first, *second; + uint64_t a_end, b_end, last_end; + unsigned int i, j, last_type; + + biosmem_map_filter(); + + /* + * Resolve overlapping areas, giving priority to most restrictive + * (i.e. numerically higher) types. + */ + for (i = 0; i < biosmem_map_size; i++) { + a = &biosmem_map[i]; + a_end = a->base_addr + a->length; + + j = i + 1; + + while (j < biosmem_map_size) { + b = &biosmem_map[j]; + b_end = b->base_addr + b->length; + + if ((a->base_addr >= b_end) || (a_end <= b->base_addr)) { + j++; + continue; + } + + if (a->base_addr < b->base_addr) { + first = a; + second = b; + } else { + first = b; + second = a; + } + + if (a_end > b_end) { + last_end = a_end; + last_type = a->type; + } else { + last_end = b_end; + last_type = b->type; + } + + tmp.base_addr = second->base_addr; + tmp.length = MIN(a_end, b_end) - tmp.base_addr; + tmp.type = MAX(a->type, b->type); + first->length = tmp.base_addr - first->base_addr; + second->base_addr += tmp.length; + second->length = last_end - second->base_addr; + second->type = last_type; + + /* + * Filter out invalid entries. + */ + if (biosmem_map_entry_is_invalid(a) + && biosmem_map_entry_is_invalid(b)) { + *a = tmp; + biosmem_map_size--; + memmove(b, b + 1, (biosmem_map_size - j) * sizeof(*b)); + continue; + } else if (biosmem_map_entry_is_invalid(a)) { + *a = tmp; + j++; + continue; + } else if (biosmem_map_entry_is_invalid(b)) { + *b = tmp; + j++; + continue; + } + + if (tmp.type == a->type) + first = a; + else if (tmp.type == b->type) + first = b; + else { + + /* + * If the overlapping area can't be merged with one of its + * neighbors, it must be added as a new entry. + */ + + if (biosmem_map_size >= ARRAY_SIZE(biosmem_map)) + boot_panic(biosmem_panic_toobig_msg); + + biosmem_map[biosmem_map_size] = tmp; + biosmem_map_size++; + j++; + continue; + } + + if (first->base_addr > tmp.base_addr) + first->base_addr = tmp.base_addr; + + first->length += tmp.length; + j++; + } + } + + biosmem_map_sort(); +} + +static int __boot +biosmem_map_find_avail(phys_addr_t *phys_start, phys_addr_t *phys_end) +{ + const struct biosmem_map_entry *entry, *map_end; + phys_addr_t seg_start, seg_end; + uint64_t start, end; + + seg_start = (phys_addr_t)-1; + seg_end = (phys_addr_t)-1; + map_end = biosmem_map + biosmem_map_size; + + for (entry = biosmem_map; entry < map_end; entry++) { + if (entry->type != BIOSMEM_TYPE_AVAILABLE) + continue; + + start = vm_page_round(entry->base_addr); + + if (start >= *phys_end) + break; + + end = vm_page_trunc(entry->base_addr + entry->length); + + if ((start < end) && (start < *phys_end) && (end > *phys_start)) { + if (seg_start == (phys_addr_t)-1) + seg_start = start; + + seg_end = end; + } + } + + if ((seg_start == (phys_addr_t)-1) || (seg_end == (phys_addr_t)-1)) + return -1; + + if (seg_start > *phys_start) + *phys_start = seg_start; + + if (seg_end < *phys_end) + *phys_end = seg_end; + + return 0; +} + +static void __boot +biosmem_set_segment(unsigned int seg_index, phys_addr_t start, phys_addr_t end) +{ + biosmem_segments[seg_index].start = start; + biosmem_segments[seg_index].end = end; +} + +static phys_addr_t __boot +biosmem_segment_end(unsigned int seg_index) +{ + return biosmem_segments[seg_index].end; +} + +static phys_addr_t __boot +biosmem_segment_size(unsigned int seg_index) +{ + return biosmem_segments[seg_index].end - biosmem_segments[seg_index].start; +} + +static void __boot +biosmem_save_cmdline_sizes(struct multiboot_raw_info *mbi) +{ + struct multiboot_raw_module *mod; + uint32_t i; + + if (mbi->flags & MULTIBOOT_LOADER_CMDLINE) + mbi->unused0 = boot_strlen((char *)(unsigned long)mbi->cmdline) + 1; + + if (mbi->flags & MULTIBOOT_LOADER_MODULES) { + unsigned long addr; + + addr = mbi->mods_addr; + + for (i = 0; i < mbi->mods_count; i++) { + mod = (struct multiboot_raw_module *)addr + i; + mod->reserved = boot_strlen((char *)(unsigned long)mod->string) + 1; + } + } +} + +static void __boot +biosmem_find_boot_data_update(uint32_t min, uint32_t *start, uint32_t *end, + uint32_t data_start, uint32_t data_end) +{ + if ((min <= data_start) && (data_start < *start)) { + *start = data_start; + *end = data_end; + } +} + +/* + * Find the first boot data in the given range, and return their containing + * area (start address is returned directly, end address is returned in end). + * The following are considered boot data : + * - the kernel + * - the kernel command line + * - the module table + * - the modules + * - the modules command lines + * - the ELF section header table + * - the ELF .shstrtab, .symtab and .strtab sections + * + * If no boot data was found, 0 is returned, and the end address isn't set. + */ +static uint32_t __boot +biosmem_find_boot_data(const struct multiboot_raw_info *mbi, uint32_t min, + uint32_t max, uint32_t *endp) +{ + struct multiboot_raw_module *mod; + struct elf_shdr *shdr; + uint32_t i, start, end = end; + unsigned long tmp; + + start = max; + + biosmem_find_boot_data_update(min, &start, &end, (unsigned long)&_boot, + BOOT_VTOP((unsigned long)&_end)); + + if ((mbi->flags & MULTIBOOT_LOADER_CMDLINE) && (mbi->cmdline != 0)) + biosmem_find_boot_data_update(min, &start, &end, mbi->cmdline, + mbi->cmdline + mbi->unused0); + + if (mbi->flags & MULTIBOOT_LOADER_MODULES) { + i = mbi->mods_count * sizeof(struct multiboot_raw_module); + biosmem_find_boot_data_update(min, &start, &end, mbi->mods_addr, + mbi->mods_addr + i); + tmp = mbi->mods_addr; + + for (i = 0; i < mbi->mods_count; i++) { + mod = (struct multiboot_raw_module *)tmp + i; + biosmem_find_boot_data_update(min, &start, &end, mod->mod_start, + mod->mod_end); + + if (mod->string != 0) + biosmem_find_boot_data_update(min, &start, &end, mod->string, + mod->string + mod->reserved); + } + } + + if (mbi->flags & MULTIBOOT_LOADER_SHDR) { + tmp = mbi->shdr_num * mbi->shdr_size; + biosmem_find_boot_data_update(min, &start, &end, mbi->shdr_addr, + mbi->shdr_addr + tmp); + tmp = mbi->shdr_addr; + + for (i = 0; i < mbi->shdr_num; i++) { + shdr = (struct elf_shdr *)(tmp + (i * mbi->shdr_size)); + + if ((shdr->type != ELF_SHT_SYMTAB) + && (shdr->type != ELF_SHT_STRTAB)) + continue; + + biosmem_find_boot_data_update(min, &start, &end, shdr->addr, + shdr->addr + shdr->size); + } + } + + if (start == max) + return 0; + + *endp = end; + return start; +} + +static void __boot +biosmem_setup_allocator(struct multiboot_raw_info *mbi) +{ + uint32_t heap_start, heap_end, max_heap_start, max_heap_end; + uint32_t mem_end, next; + + /* + * Find some memory for the heap. Look for the largest unused area in + * upper memory, carefully avoiding all boot data. + */ + mem_end = vm_page_trunc((mbi->mem_upper + 1024) << 10); + +#ifndef __LP64__ + if (mem_end > VM_PAGE_DIRECTMAP_LIMIT) + mem_end = VM_PAGE_DIRECTMAP_LIMIT; +#endif /* __LP64__ */ + + max_heap_start = 0; + max_heap_end = 0; + next = BIOSMEM_END; + + do { + heap_start = next; + heap_end = biosmem_find_boot_data(mbi, heap_start, mem_end, &next); + + if (heap_end == 0) { + heap_end = mem_end; + next = 0; + } + + if ((heap_end - heap_start) > (max_heap_end - max_heap_start)) { + max_heap_start = heap_start; + max_heap_end = heap_end; + } + } while (next != 0); + + max_heap_start = vm_page_round(max_heap_start); + max_heap_end = vm_page_trunc(max_heap_end); + + if (max_heap_start >= max_heap_end) + boot_panic(biosmem_panic_setup_msg); + + biosmem_heap_start = max_heap_start; + biosmem_heap_end = max_heap_end; + biosmem_heap_cur = biosmem_heap_end; +} + +void __boot +biosmem_bootstrap(struct multiboot_raw_info *mbi) +{ + phys_addr_t phys_start, phys_end; + int error; + + if (mbi->flags & MULTIBOOT_LOADER_MMAP) + biosmem_map_build(mbi); + else + biosmem_map_build_simple(mbi); + + biosmem_map_adjust(); + + phys_start = BIOSMEM_BASE; + phys_end = VM_PAGE_DMA_LIMIT; + error = biosmem_map_find_avail(&phys_start, &phys_end); + + if (error) + boot_panic(biosmem_panic_noseg_msg); + + biosmem_set_segment(VM_PAGE_SEG_DMA, phys_start, phys_end); + + phys_start = VM_PAGE_DMA_LIMIT; +#ifdef VM_PAGE_DMA32_LIMIT + phys_end = VM_PAGE_DMA32_LIMIT; + error = biosmem_map_find_avail(&phys_start, &phys_end); + + if (error) + goto out; + + biosmem_set_segment(VM_PAGE_SEG_DMA32, phys_start, phys_end); + + phys_start = VM_PAGE_DMA32_LIMIT; +#endif /* VM_PAGE_DMA32_LIMIT */ + phys_end = VM_PAGE_DIRECTMAP_LIMIT; + error = biosmem_map_find_avail(&phys_start, &phys_end); + + if (error) + goto out; + + biosmem_set_segment(VM_PAGE_SEG_DIRECTMAP, phys_start, phys_end); + + phys_start = VM_PAGE_DIRECTMAP_LIMIT; + phys_end = VM_PAGE_HIGHMEM_LIMIT; + error = biosmem_map_find_avail(&phys_start, &phys_end); + + if (error) + goto out; + + biosmem_set_segment(VM_PAGE_SEG_HIGHMEM, phys_start, phys_end); + +out: + + /* + * The kernel and modules command lines will be memory mapped later + * during initialization. Their respective sizes must be saved. + */ + biosmem_save_cmdline_sizes(mbi); + biosmem_setup_allocator(mbi); +} + +void * __boot +biosmem_bootalloc(unsigned int nr_pages) +{ + unsigned long addr, size; + + size = vm_page_ptoa(nr_pages); + + if (size == 0) + boot_panic(biosmem_panic_inval_msg); + + /* Top-down allocation to avoid unnecessarily filling DMA segments */ + addr = biosmem_heap_cur - size; + + if ((addr < biosmem_heap_start) || (addr > biosmem_heap_cur)) + boot_panic(biosmem_panic_nomem_msg); + + biosmem_heap_cur = addr; + return boot_memset((void *)addr, 0, size); +} + +phys_addr_t __boot +biosmem_directmap_size(void) +{ + if (biosmem_segment_size(VM_PAGE_SEG_DIRECTMAP) != 0) + return biosmem_segment_end(VM_PAGE_SEG_DIRECTMAP); + else if (biosmem_segment_size(VM_PAGE_SEG_DMA32) != 0) + return biosmem_segment_end(VM_PAGE_SEG_DMA32); + else + return biosmem_segment_end(VM_PAGE_SEG_DMA); +} + +static const char * __init +biosmem_type_desc(unsigned int type) +{ + switch (type) { + case BIOSMEM_TYPE_AVAILABLE: + return "available"; + case BIOSMEM_TYPE_RESERVED: + return "reserved"; + case BIOSMEM_TYPE_ACPI: + return "ACPI"; + case BIOSMEM_TYPE_NVS: + return "ACPI NVS"; + case BIOSMEM_TYPE_UNUSABLE: + return "unusable"; + default: + return "unknown (reserved)"; + } +} + +static void __init +biosmem_map_show(void) +{ + const struct biosmem_map_entry *entry, *end; + + printk("biosmem: physical memory map:\n"); + + for (entry = biosmem_map, end = entry + biosmem_map_size; + entry < end; + entry++) + printk("biosmem: %018llx:%018llx, %s\n", entry->base_addr, + entry->base_addr + entry->length, + biosmem_type_desc(entry->type)); + + printk("biosmem: heap: %x-%x\n", biosmem_heap_start, biosmem_heap_end); +} + +static void __init +biosmem_load_segment(struct biosmem_segment *seg, uint64_t max_phys_end, + phys_addr_t phys_start, phys_addr_t phys_end, + phys_addr_t avail_start, phys_addr_t avail_end) +{ + unsigned int seg_index; + + seg_index = seg - biosmem_segments; + + if (phys_end > max_phys_end) { + if (max_phys_end <= phys_start) { + printk("biosmem: warning: segment %s physically unreachable, " + "not loaded\n", vm_page_seg_name(seg_index)); + return; + } + + printk("biosmem: warning: segment %s truncated to %#llx\n", + vm_page_seg_name(seg_index), max_phys_end); + phys_end = max_phys_end; + } + + if ((avail_start < phys_start) || (avail_start >= phys_end)) + avail_start = phys_start; + + if ((avail_end <= phys_start) || (avail_end > phys_end)) + avail_end = phys_end; + + seg->avail_start = avail_start; + seg->avail_end = avail_end; + vm_page_load(seg_index, phys_start, phys_end, avail_start, avail_end); +} + +void __init +biosmem_setup(void) +{ + uint64_t max_phys_end; + struct biosmem_segment *seg; + struct cpu *cpu; + unsigned int i; + + biosmem_map_show(); + + cpu = cpu_current(); + max_phys_end = (cpu->phys_addr_width == 0) + ? (uint64_t)-1 + : (uint64_t)1 << cpu->phys_addr_width; + + for (i = 0; i < ARRAY_SIZE(biosmem_segments); i++) { + if (biosmem_segment_size(i) == 0) + break; + + seg = &biosmem_segments[i]; + biosmem_load_segment(seg, max_phys_end, seg->start, seg->end, + biosmem_heap_start, biosmem_heap_cur); + } +} + +static void __init +biosmem_free_usable_range(phys_addr_t start, phys_addr_t end) +{ + struct vm_page *page; + + printk("biosmem: release to vm_page: %llx-%llx (%lluk)\n", + (unsigned long long)start, (unsigned long long)end, + (unsigned long long)((end - start) >> 10)); + + while (start < end) { + page = vm_page_lookup(start); + assert(page != NULL); + vm_page_manage(page); + start += PAGE_SIZE; + } +} + +static void __init +biosmem_free_usable_update_start(phys_addr_t *start, phys_addr_t res_start, + phys_addr_t res_end) +{ + if ((*start >= res_start) && (*start < res_end)) + *start = res_end; +} + +static phys_addr_t __init +biosmem_free_usable_start(phys_addr_t start) +{ + const struct biosmem_segment *seg; + unsigned int i; + + biosmem_free_usable_update_start(&start, (unsigned long)&_boot, + BOOT_VTOP((unsigned long)&_end)); + biosmem_free_usable_update_start(&start, biosmem_heap_start, + biosmem_heap_end); + + for (i = 0; i < ARRAY_SIZE(biosmem_segments); i++) { + seg = &biosmem_segments[i]; + biosmem_free_usable_update_start(&start, seg->avail_start, + seg->avail_end); + } + + return start; +} + +static int __init +biosmem_free_usable_reserved(phys_addr_t addr) +{ + const struct biosmem_segment *seg; + unsigned int i; + + if ((addr >= (unsigned long)&_boot) + && (addr < BOOT_VTOP((unsigned long)&_end))) + return 1; + + if ((addr >= biosmem_heap_start) && (addr < biosmem_heap_end)) + return 1; + + for (i = 0; i < ARRAY_SIZE(biosmem_segments); i++) { + seg = &biosmem_segments[i]; + + if ((addr >= seg->avail_start) && (addr < seg->avail_end)) + return 1; + } + + return 0; +} + +static phys_addr_t __init +biosmem_free_usable_end(phys_addr_t start, phys_addr_t entry_end) +{ + while (start < entry_end) { + if (biosmem_free_usable_reserved(start)) + break; + + start += PAGE_SIZE; + } + + return start; +} + +static void __init +biosmem_free_usable_entry(phys_addr_t start, phys_addr_t end) +{ + phys_addr_t entry_end; + + entry_end = end; + + for (;;) { + start = biosmem_free_usable_start(start); + + if (start >= entry_end) + return; + + end = biosmem_free_usable_end(start, entry_end); + biosmem_free_usable_range(start, end); + start = end; + } +} + +void __init +biosmem_free_usable(void) +{ + struct biosmem_map_entry *entry; + uint64_t start, end; + unsigned int i; + + for (i = 0; i < biosmem_map_size; i++) { + entry = &biosmem_map[i]; + + if (entry->type != BIOSMEM_TYPE_AVAILABLE) + continue; + + start = vm_page_round(entry->base_addr); + + if (start >= VM_PAGE_HIGHMEM_LIMIT) + break; + + end = vm_page_trunc(entry->base_addr + entry->length); + + if (start < BIOSMEM_BASE) + start = BIOSMEM_BASE; + + biosmem_free_usable_entry(start, end); + } +} diff --git a/i386/i386at/biosmem.h b/i386/i386at/biosmem.h new file mode 100644 index 0000000..b32e027 --- /dev/null +++ b/i386/i386at/biosmem.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2010-2014 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _X86_BIOSMEM_H +#define _X86_BIOSMEM_H + +#include <kern/types.h> +#include <machine/multiboot.h> + +/* + * Address where the address of the Extended BIOS Data Area segment can be + * found. + */ +#define BIOSMEM_EBDA_PTR 0x40e + +/* + * Significant low memory addresses. + * + * The first 64 KiB are reserved for various reasons (e.g. to preserve BIOS + * data and to work around data corruption on some hardware). + */ +#define BIOSMEM_BASE 0x010000 +#define BIOSMEM_BASE_END 0x0a0000 +#define BIOSMEM_EXT_ROM 0x0e0000 +#define BIOSMEM_ROM 0x0f0000 +#define BIOSMEM_END 0x100000 + +/* + * Early initialization of the biosmem module. + * + * This function processes the given multiboot data for BIOS-provided + * memory information, and sets up a bootstrap physical page allocator. + * + * It is called before paging is enabled. + */ +void biosmem_bootstrap(struct multiboot_raw_info *mbi); + +/* + * Allocate contiguous physical pages during bootstrap. + * + * This function is called before paging is enabled. It should only be used + * to allocate initial page table pages. Those pages are later loaded into + * the VM system (as reserved pages) which means they can be freed like other + * regular pages. Users should fix up the type of those pages once the VM + * system is initialized. + */ +void * biosmem_bootalloc(unsigned int nr_pages); + +/* + * Return the amount of physical memory that can be directly mapped. + * + * This includes the size of both the DMA/DMA32 and DIRECTMAP segments. + */ +phys_addr_t biosmem_directmap_size(void); + +/* + * Set up physical memory based on the information obtained during bootstrap + * and load it in the VM system. + */ +void biosmem_setup(void); + +/* + * Free all usable memory. + * + * This includes ranges that weren't part of the bootstrap allocator initial + * heap, e.g. because they contained boot data. + */ +void biosmem_free_usable(void); + +#endif /* _X86_BIOSMEM_H */ diff --git a/vm/vm_page.c b/vm/vm_page.c new file mode 100644 index 0000000..cc184ca --- /dev/null +++ b/vm/vm_page.c @@ -0,0 +1,735 @@ +/* + * Copyright (c) 2010-2014 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * This implementation uses the binary buddy system to manage its heap. + * Descriptions of the buddy system can be found in the following works : + * - "UNIX Internals: The New Frontiers", by Uresh Vahalia. + * - "Dynamic Storage Allocation: A Survey and Critical Review", + * by Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles. + * + * In addition, this allocator uses per-CPU pools of pages for order 0 + * (i.e. single page) allocations. These pools act as caches (but are named + * differently to avoid confusion with CPU caches) that reduce contention on + * multiprocessor systems. When a pool is empty and cannot provide a page, + * it is filled by transferring multiple pages from the backend buddy system. + * The symmetric case is handled likewise. + */ + +#include <kern/assert.h> +#include <kern/init.h> +#include <kern/list.h> +#include <kern/macros.h> +#include <kern/mutex.h> +#include <kern/panic.h> +#include <kern/param.h> +#include <kern/printk.h> +#include <kern/sprintf.h> +#include <kern/stddef.h> +#include <kern/string.h> +#include <kern/thread.h> +#include <kern/types.h> +#include <machine/cpu.h> +#include <machine/pmap.h> +#include <vm/vm_kmem.h> +#include <vm/vm_page.h> + +/* + * Number of free block lists per segment. + */ +#define VM_PAGE_NR_FREE_LISTS 11 + +/* + * The size of a CPU pool is computed by dividing the number of pages in its + * containing segment by this value. + */ +#define VM_PAGE_CPU_POOL_RATIO 1024 + +/* + * Maximum number of pages in a CPU pool. + */ +#define VM_PAGE_CPU_POOL_MAX_SIZE 128 + +/* + * The transfer size of a CPU pool is computed by dividing the pool size by + * this value. + */ +#define VM_PAGE_CPU_POOL_TRANSFER_RATIO 2 + +/* + * Per-processor cache of pages. + */ +struct vm_page_cpu_pool { + struct mutex lock; + int size; + int transfer_size; + int nr_pages; + struct list pages; +} __aligned(CPU_L1_SIZE); + +/* + * Special order value for pages that aren't in a free list. Such pages are + * either allocated, or part of a free block of pages but not the head page. + */ +#define VM_PAGE_ORDER_UNLISTED ((unsigned short)-1) + +/* + * Doubly-linked list of free blocks. + */ +struct vm_page_free_list { + unsigned long size; + struct list blocks; +}; + +/* + * Segment name buffer size. + */ +#define VM_PAGE_NAME_SIZE 16 + +/* + * Segment of contiguous memory. + */ +struct vm_page_seg { + struct vm_page_cpu_pool cpu_pools[MAX_CPUS]; + + phys_addr_t start; + phys_addr_t end; + struct vm_page *pages; + struct vm_page *pages_end; + struct mutex lock; + struct vm_page_free_list free_lists[VM_PAGE_NR_FREE_LISTS]; + unsigned long nr_free_pages; +}; + +/* + * Bootstrap information about a segment. + */ +struct vm_page_boot_seg { + phys_addr_t start; + phys_addr_t end; + phys_addr_t avail_start; + phys_addr_t avail_end; +}; + +static int vm_page_is_ready __read_mostly; + +/* + * Segment table. + * + * The system supports a maximum of 4 segments : + * - DMA: suitable for DMA + * - DMA32: suitable for DMA when devices support 32-bits addressing + * - DIRECTMAP: direct physical mapping, allows direct access from + * the kernel with a simple offset translation + * - HIGHMEM: must be mapped before it can be accessed + * + * Segments are ordered by priority, 0 being the lowest priority. Their + * relative priorities are DMA < DMA32 < DIRECTMAP < HIGHMEM. Some segments + * may actually be aliases for others, e.g. if DMA is always possible from + * the direct physical mapping, DMA and DMA32 are aliases for DIRECTMAP, + * in which case the segment table contains DIRECTMAP and HIGHMEM only. + */ +static struct vm_page_seg vm_page_segs[VM_PAGE_MAX_SEGS]; + +/* + * Bootstrap segment table. + */ +static struct vm_page_boot_seg vm_page_boot_segs[VM_PAGE_MAX_SEGS] __initdata; + +/* + * Number of loaded segments. + */ +static unsigned int vm_page_segs_size __read_mostly; + +static void __init +vm_page_init(struct vm_page *page, unsigned short seg_index, phys_addr_t pa) +{ + memset(page, 0, sizeof(*page)); + page->type = VM_PAGE_RESERVED; + page->seg_index = seg_index; + page->order = VM_PAGE_ORDER_UNLISTED; + page->phys_addr = pa; +} + +void +vm_page_set_type(struct vm_page *page, unsigned int order, unsigned short type) +{ + unsigned int i, nr_pages; + + nr_pages = 1 << order; + + for (i = 0; i < nr_pages; i++) + page[i].type = type; +} + +static void __init +vm_page_free_list_init(struct vm_page_free_list *free_list) +{ + free_list->size = 0; + list_init(&free_list->blocks); +} + +static inline void +vm_page_free_list_insert(struct vm_page_free_list *free_list, + struct vm_page *page) +{ + assert(page->order == VM_PAGE_ORDER_UNLISTED); + + free_list->size++; + list_insert_head(&free_list->blocks, &page->node); +} + +static inline void +vm_page_free_list_remove(struct vm_page_free_list *free_list, + struct vm_page *page) +{ + assert(page->order != VM_PAGE_ORDER_UNLISTED); + + free_list->size--; + list_remove(&page->node); +} + +static struct vm_page * +vm_page_seg_alloc_from_buddy(struct vm_page_seg *seg, unsigned int order) +{ + struct vm_page_free_list *free_list = free_list; + struct vm_page *page, *buddy; + unsigned int i; + + assert(order < VM_PAGE_NR_FREE_LISTS); + + for (i = order; i < VM_PAGE_NR_FREE_LISTS; i++) { + free_list = &seg->free_lists[i]; + + if (free_list->size != 0) + break; + } + + if (i == VM_PAGE_NR_FREE_LISTS) + return NULL; + + page = list_first_entry(&free_list->blocks, struct vm_page, node); + vm_page_free_list_remove(free_list, page); + page->order = VM_PAGE_ORDER_UNLISTED; + + while (i > order) { + i--; + buddy = &page[1 << i]; + vm_page_free_list_insert(&seg->free_lists[i], buddy); + buddy->order = i; + } + + seg->nr_free_pages -= (1 << order); + return page; +} + +static void +vm_page_seg_free_to_buddy(struct vm_page_seg *seg, struct vm_page *page, + unsigned int order) +{ + struct vm_page *buddy; + phys_addr_t pa, buddy_pa; + unsigned int nr_pages; + + assert(page >= seg->pages); + assert(page < seg->pages_end); + assert(page->order == VM_PAGE_ORDER_UNLISTED); + assert(order < VM_PAGE_NR_FREE_LISTS); + + nr_pages = (1 << order); + pa = page->phys_addr; + + while (order < (VM_PAGE_NR_FREE_LISTS - 1)) { + buddy_pa = pa ^ vm_page_ptoa(1 << order); + + if ((buddy_pa < seg->start) || (buddy_pa >= seg->end)) + break; + + buddy = &seg->pages[vm_page_atop(buddy_pa - seg->start)]; + + if (buddy->order != order) + break; + + vm_page_free_list_remove(&seg->free_lists[order], buddy); + buddy->order = VM_PAGE_ORDER_UNLISTED; + order++; + pa &= -vm_page_ptoa(1 << order); + page = &seg->pages[vm_page_atop(pa - seg->start)]; + } + + vm_page_free_list_insert(&seg->free_lists[order], page); + page->order = order; + seg->nr_free_pages += nr_pages; +} + +static void __init +vm_page_cpu_pool_init(struct vm_page_cpu_pool *cpu_pool, int size) +{ + mutex_init(&cpu_pool->lock); + cpu_pool->size = size; + cpu_pool->transfer_size = (size + VM_PAGE_CPU_POOL_TRANSFER_RATIO - 1) + / VM_PAGE_CPU_POOL_TRANSFER_RATIO; + cpu_pool->nr_pages = 0; + list_init(&cpu_pool->pages); +} + +static inline struct vm_page_cpu_pool * +vm_page_cpu_pool_get(struct vm_page_seg *seg) +{ + return &seg->cpu_pools[cpu_id()]; +} + +static inline struct vm_page * +vm_page_cpu_pool_pop(struct vm_page_cpu_pool *cpu_pool) +{ + struct vm_page *page; + + assert(cpu_pool->nr_pages != 0); + cpu_pool->nr_pages--; + page = list_first_entry(&cpu_pool->pages, struct vm_page, node); + list_remove(&page->node); + return page; +} + +static inline void +vm_page_cpu_pool_push(struct vm_page_cpu_pool *cpu_pool, struct vm_page *page) +{ + assert(cpu_pool->nr_pages < cpu_pool->size); + cpu_pool->nr_pages++; + list_insert_head(&cpu_pool->pages, &page->node); +} + +static int +vm_page_cpu_pool_fill(struct vm_page_cpu_pool *cpu_pool, + struct vm_page_seg *seg) +{ + struct vm_page *page; + int i; + + assert(cpu_pool->nr_pages == 0); + + mutex_lock(&seg->lock); + + for (i = 0; i < cpu_pool->transfer_size; i++) { + page = vm_page_seg_alloc_from_buddy(seg, 0); + + if (page == NULL) + break; + + vm_page_cpu_pool_push(cpu_pool, page); + } + + mutex_unlock(&seg->lock); + + return i; +} + +static void +vm_page_cpu_pool_drain(struct vm_page_cpu_pool *cpu_pool, + struct vm_page_seg *seg) +{ + struct vm_page *page; + int i; + + assert(cpu_pool->nr_pages == cpu_pool->size); + + mutex_lock(&seg->lock); + + for (i = cpu_pool->transfer_size; i > 0; i--) { + page = vm_page_cpu_pool_pop(cpu_pool); + vm_page_seg_free_to_buddy(seg, page, 0); + } + + mutex_unlock(&seg->lock); +} + +static phys_addr_t __init +vm_page_seg_size(struct vm_page_seg *seg) +{ + return seg->end - seg->start; +} + +static int __init +vm_page_seg_compute_pool_size(struct vm_page_seg *seg) +{ + phys_addr_t size; + + size = vm_page_atop(vm_page_seg_size(seg)) / VM_PAGE_CPU_POOL_RATIO; + + if (size == 0) + size = 1; + else if (size > VM_PAGE_CPU_POOL_MAX_SIZE) + size = VM_PAGE_CPU_POOL_MAX_SIZE; + + return size; +} + +static void __init +vm_page_seg_init(struct vm_page_seg *seg, phys_addr_t start, phys_addr_t end, + struct vm_page *pages) +{ + phys_addr_t pa; + int pool_size; + unsigned int i; + + seg->start = start; + seg->end = end; + pool_size = vm_page_seg_compute_pool_size(seg); + + for (i = 0; i < ARRAY_SIZE(seg->cpu_pools); i++) + vm_page_cpu_pool_init(&seg->cpu_pools[i], pool_size); + + seg->pages = pages; + seg->pages_end = pages + vm_page_atop(vm_page_seg_size(seg)); + mutex_init(&seg->lock); + + for (i = 0; i < ARRAY_SIZE(seg->free_lists); i++) + vm_page_free_list_init(&seg->free_lists[i]); + + seg->nr_free_pages = 0; + i = seg - vm_page_segs; + + for (pa = seg->start; pa < seg->end; pa += PAGE_SIZE) + vm_page_init(&pages[vm_page_atop(pa - seg->start)], i, pa); +} + +static struct vm_page * +vm_page_seg_alloc(struct vm_page_seg *seg, unsigned int order, + unsigned short type) +{ + struct vm_page_cpu_pool *cpu_pool; + struct vm_page *page; + int filled; + + assert(order < VM_PAGE_NR_FREE_LISTS); + + if (order == 0) { + thread_pin(); + cpu_pool = vm_page_cpu_pool_get(seg); + mutex_lock(&cpu_pool->lock); + + if (cpu_pool->nr_pages == 0) { + filled = vm_page_cpu_pool_fill(cpu_pool, seg); + + if (!filled) { + mutex_unlock(&cpu_pool->lock); + thread_unpin(); + return NULL; + } + } + + page = vm_page_cpu_pool_pop(cpu_pool); + mutex_unlock(&cpu_pool->lock); + thread_unpin(); + } else { + mutex_lock(&seg->lock); + page = vm_page_seg_alloc_from_buddy(seg, order); + mutex_unlock(&seg->lock); + } + + assert(page->type == VM_PAGE_FREE); + vm_page_set_type(page, order, type); + return page; +} + +static void +vm_page_seg_free(struct vm_page_seg *seg, struct vm_page *page, + unsigned int order) +{ + struct vm_page_cpu_pool *cpu_pool; + + assert(page->type != VM_PAGE_FREE); + assert(order < VM_PAGE_NR_FREE_LISTS); + + vm_page_set_type(page, order, VM_PAGE_FREE); + + if (order == 0) { + thread_pin(); + cpu_pool = vm_page_cpu_pool_get(seg); + mutex_lock(&cpu_pool->lock); + + if (cpu_pool->nr_pages == cpu_pool->size) + vm_page_cpu_pool_drain(cpu_pool, seg); + + vm_page_cpu_pool_push(cpu_pool, page); + mutex_unlock(&cpu_pool->lock); + thread_unpin(); + } else { + mutex_lock(&seg->lock); + vm_page_seg_free_to_buddy(seg, page, order); + mutex_unlock(&seg->lock); + } +} + +void __init +vm_page_load(unsigned int seg_index, phys_addr_t start, phys_addr_t end, + phys_addr_t avail_start, phys_addr_t avail_end) +{ + struct vm_page_boot_seg *seg; + + assert(seg_index < ARRAY_SIZE(vm_page_boot_segs)); + assert(vm_page_aligned(start)); + assert(vm_page_aligned(end)); + assert(vm_page_aligned(avail_start)); + assert(vm_page_aligned(avail_end)); + assert(start < end); + assert(start <= avail_start); + assert(avail_end <= end); + assert(vm_page_segs_size < ARRAY_SIZE(vm_page_boot_segs)); + + seg = &vm_page_boot_segs[seg_index]; + seg->start = start; + seg->end = end; + seg->avail_start = avail_start; + seg->avail_end = avail_end; + vm_page_segs_size++; +} + +int +vm_page_ready(void) +{ + return vm_page_is_ready; +} + +static unsigned int +vm_page_select_alloc_seg(unsigned int selector) +{ + unsigned int seg_index; + + switch (selector) { + case VM_PAGE_SEL_DMA: + seg_index = VM_PAGE_SEG_DMA; + break; + case VM_PAGE_SEL_DMA32: + seg_index = VM_PAGE_SEG_DMA32; + break; + case VM_PAGE_SEL_DIRECTMAP: + seg_index = VM_PAGE_SEG_DIRECTMAP; + break; + case VM_PAGE_SEL_HIGHMEM: + seg_index = VM_PAGE_SEG_HIGHMEM; + break; + default: + panic("vm_page: invalid selector"); + } + + return MIN(vm_page_segs_size - 1, seg_index); +} + +static int __init +vm_page_boot_seg_loaded(const struct vm_page_boot_seg *seg) +{ + return (seg->end != 0); +} + +static void __init +vm_page_check_boot_segs(void) +{ + unsigned int i; + int expect_loaded; + + if (vm_page_segs_size == 0) + panic("vm_page: no physical memory loaded"); + + for (i = 0; i < ARRAY_SIZE(vm_page_boot_segs); i++) { + expect_loaded = (i < vm_page_segs_size); + + if (vm_page_boot_seg_loaded(&vm_page_boot_segs[i]) == expect_loaded) + continue; + + panic("vm_page: invalid boot segment table"); + } +} + +static phys_addr_t __init +vm_page_boot_seg_size(struct vm_page_boot_seg *seg) +{ + return seg->end - seg->start; +} + +static phys_addr_t __init +vm_page_boot_seg_avail_size(struct vm_page_boot_seg *seg) +{ + return seg->avail_end - seg->avail_start; +} + +static void * __init +vm_page_bootalloc(size_t size) +{ + struct vm_page_boot_seg *seg; + phys_addr_t pa; + unsigned int i; + + for (i = vm_page_select_alloc_seg(VM_PAGE_SEL_DIRECTMAP); + i < vm_page_segs_size; + i--) { + seg = &vm_page_boot_segs[i]; + + if (size <= vm_page_boot_seg_avail_size(seg)) { + pa = seg->avail_start; + seg->avail_start += vm_page_round(size); + return (void *)vm_page_direct_va(pa); + } + } + + panic("vm_page: no physical memory available"); +} + +void __init +vm_page_setup(void) +{ + struct vm_page_boot_seg *boot_seg; + struct vm_page_seg *seg; + struct vm_page *table, *page, *end; + size_t nr_pages, table_size; + unsigned long va; + unsigned int i; + phys_addr_t pa; + + vm_page_check_boot_segs(); + + /* + * Compute the page table size. + */ + nr_pages = 0; + + for (i = 0; i < vm_page_segs_size; i++) + nr_pages += vm_page_atop(vm_page_boot_seg_size(&vm_page_boot_segs[i])); + + table_size = vm_page_round(nr_pages * sizeof(struct vm_page)); + printk("vm_page: page table size: %zu entries (%zuk)\n", nr_pages, + table_size >> 10); + table = vm_page_bootalloc(table_size); + va = (unsigned long)table; + + /* + * Initialize the segments, associating them to the page table. When + * the segments are initialized, all their pages are set allocated. + * Pages are then released, which populates the free lists. + */ + for (i = 0; i < vm_page_segs_size; i++) { + seg = &vm_page_segs[i]; + boot_seg = &vm_page_boot_segs[i]; + vm_page_seg_init(seg, boot_seg->start, boot_seg->end, table); + page = seg->pages + vm_page_atop(boot_seg->avail_start + - boot_seg->start); + end = seg->pages + vm_page_atop(boot_seg->avail_end + - boot_seg->start); + + while (page < end) { + page->type = VM_PAGE_FREE; + vm_page_seg_free_to_buddy(seg, page, 0); + page++; + } + + table += vm_page_atop(vm_page_seg_size(seg)); + } + + while (va < (unsigned long)table) { + pa = vm_page_direct_pa(va); + page = vm_page_lookup(pa); + assert((page != NULL) && (page->type == VM_PAGE_RESERVED)); + page->type = VM_PAGE_TABLE; + va += PAGE_SIZE; + } + + vm_page_is_ready = 1; +} + +void __init +vm_page_manage(struct vm_page *page) +{ + assert(page->seg_index < ARRAY_SIZE(vm_page_segs)); + assert(page->type == VM_PAGE_RESERVED); + + vm_page_set_type(page, 0, VM_PAGE_FREE); + vm_page_seg_free_to_buddy(&vm_page_segs[page->seg_index], page, 0); +} + +struct vm_page * +vm_page_lookup(phys_addr_t pa) +{ + struct vm_page_seg *seg; + unsigned int i; + + for (i = 0; i < vm_page_segs_size; i++) { + seg = &vm_page_segs[i]; + + if ((pa >= seg->start) && (pa < seg->end)) + return &seg->pages[vm_page_atop(pa - seg->start)]; + } + + return NULL; +} + +struct vm_page * +vm_page_alloc(unsigned int order, unsigned int selector, unsigned short type) +{ + struct vm_page *page; + unsigned int i; + + for (i = vm_page_select_alloc_seg(selector); i < vm_page_segs_size; i--) { + page = vm_page_seg_alloc(&vm_page_segs[i], order, type); + + if (page != NULL) + return page; + } + + if (type == VM_PAGE_PMAP) + panic("vm_page: unable to allocate pmap page"); + + return NULL; +} + +void +vm_page_free(struct vm_page *page, unsigned int order) +{ + assert(page->seg_index < ARRAY_SIZE(vm_page_segs)); + + vm_page_seg_free(&vm_page_segs[page->seg_index], page, order); +} + +const char * +vm_page_seg_name(unsigned int seg_index) +{ + /* Don't use a switch statement since segments can be aliased */ + if (seg_index == VM_PAGE_SEG_HIGHMEM) + return "HIGHMEM"; + else if (seg_index == VM_PAGE_SEG_DIRECTMAP) + return "DIRECTMAP"; + else if (seg_index == VM_PAGE_SEG_DMA32) + return "DMA32"; + else if (seg_index == VM_PAGE_SEG_DMA) + return "DMA"; + else + panic("vm_page: invalid segment index"); +} + +void +vm_page_info(void) +{ + struct vm_page_seg *seg; + unsigned long pages; + unsigned int i; + + for (i = 0; i < vm_page_segs_size; i++) { + seg = &vm_page_segs[i]; + pages = (unsigned long)(seg->pages_end - seg->pages); + printk("vm_page: %s: pages: %lu (%luM), free: %lu (%luM)\n", + vm_page_seg_name(i), pages, pages >> (20 - PAGE_SHIFT), + seg->nr_free_pages, seg->nr_free_pages >> (20 - PAGE_SHIFT)); + } +} diff --git a/vm/vm_page.h b/vm/vm_page.h index dd571e2..23c8c47 100644 --- a/vm/vm_page.h +++ b/vm/vm_page.h @@ -1,315 +1,195 @@ -/* - * Mach Operating System - * Copyright (c) 1993-1988 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ /* - * File: vm/vm_page.h - * Author: Avadis Tevanian, Jr., Michael Wayne Young - * Date: 1985 + * Copyright (c) 2010-2014 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. * - * Resident memory system definitions. + * + * Physical page management. */ -#ifndef _VM_VM_PAGE_H_ -#define _VM_VM_PAGE_H_ - -#include <mach/boolean.h> -#include <mach/vm_prot.h> -#include <mach/vm_param.h> -#include <vm/vm_object.h> -#include <vm/vm_types.h> -#include <kern/queue.h> -#include <kern/lock.h> +#ifndef _VM_VM_PAGE_H +#define _VM_VM_PAGE_H +#include <kern/assert.h> +#include <kern/list.h> +#include <kern/log2.h> #include <kern/macros.h> -#include <kern/sched_prim.h> /* definitions of wait/wakeup */ +//#include <kern/param.h> +//#include <kern/stddef.h> +//#include <kern/types.h> +#include <machine/pmap.h> -#if MACH_VM_DEBUG -#include <mach_debug/hash_info.h> -#endif +/* + * Address/page conversion and rounding macros (not inline functions to + * be easily usable on both virtual and physical addresses, which may not + * have the same type size). + */ +#define vm_page_atop(addr) ((addr) >> PAGE_SHIFT) +#define vm_page_ptoa(page) ((page) << PAGE_SHIFT) +#define vm_page_trunc(addr) P2ALIGN(addr, PAGE_SIZE) +#define vm_page_round(addr) P2ROUND(addr, PAGE_SIZE) +#define vm_page_aligned(addr) P2ALIGNED(addr, PAGE_SIZE) /* - * Management of resident (logical) pages. - * - * A small structure is kept for each resident - * page, indexed by page number. Each structure - * is an element of several lists: - * - * A hash table bucket used to quickly - * perform object/offset lookups - * - * A list of all pages for a given object, - * so they can be quickly deactivated at - * time of deallocation. + * Segment selectors. * - * An ordered list of pages due for pageout. - * - * In addition, the structure contains the object - * and offset to which this page belongs (for pageout), - * and sundry status bits. + * Selector-to-segment-list translation table : + * DMA DMA + * DMA32 DMA32 DMA + * DIRECTMAP DIRECTMAP DMA32 DMA + * HIGHMEM HIGHMEM DIRECTMAP DMA32 DMA + */ +#define VM_PAGE_SEL_DMA 0 +#define VM_PAGE_SEL_DMA32 1 +#define VM_PAGE_SEL_DIRECTMAP 2 +#define VM_PAGE_SEL_HIGHMEM 3 + +/* + * Page usage types. * - * Fields in this structure are locked either by the lock on the - * object that the page belongs to (O) or by the lock on the page - * queues (P). [Some fields require that both locks be held to - * change that field; holding either lock is sufficient to read.] + * Failing to allocate pmap pages will cause a kernel panic. + * TODO Obviously, this needs to be addressed, e.g. with a reserved pool of + * pages. */ +#define VM_PAGE_FREE 0 /* Page unused */ +#define VM_PAGE_RESERVED 1 /* Page reserved at boot time */ +#define VM_PAGE_TABLE 2 /* Page is part of the page table */ +#define VM_PAGE_PMAP 3 /* Page stores pmap-specific data */ +#define VM_PAGE_KMEM 4 /* Page is part of a kmem slab */ +#define VM_PAGE_OBJECT 5 /* Page is part of a VM object */ +#define VM_PAGE_KERNEL 6 /* Type for generic kernel allocations */ +/* + * Physical page descriptor. + */ struct vm_page { - queue_chain_t pageq; /* queue info for FIFO - * queue or free list (P) */ - queue_chain_t listq; /* all pages in same object (O) */ - struct vm_page *next; /* VP bucket link (O) */ - - vm_object_t object; /* which object am I in (O,P) */ - vm_offset_t offset; /* offset into that object (O,P) */ - - unsigned int wire_count:15, /* how many wired down maps use me? - (O&P) */ - /* boolean_t */ inactive:1, /* page is in inactive list (P) */ - active:1, /* page is in active list (P) */ - laundry:1, /* page is being cleaned now (P)*/ - free:1, /* page is on free list (P) */ - reference:1, /* page has been used (P) */ - external:1, /* page considered external (P) */ - extcounted:1, /* page counted in ext counts (P) */ - busy:1, /* page is in transit (O) */ - wanted:1, /* someone is waiting for page (O) */ - tabled:1, /* page is in VP table (O) */ - fictitious:1, /* Physical page doesn't exist (O) */ - private:1, /* Page should not be returned to - * the free list (O) */ - absent:1, /* Data has been requested, but is - * not yet available (O) */ - error:1, /* Data manager was unable to provide - * data due to error (O) */ - dirty:1, /* Page must be cleaned (O) */ - precious:1, /* Page is precious; data must be - * returned even if clean (O) */ - overwriting:1; /* Request to unlock has been made - * without having data. (O) - * [See vm_object_overwrite] */ - - vm_offset_t phys_addr; /* Physical address of page, passed - * to pmap_enter (read-only) */ - vm_prot_t page_lock; /* Uses prohibited by data manager (O) */ - vm_prot_t unlock_request; /* Outstanding unlock request (O) */ + struct list node; + unsigned short type; + unsigned short seg_index; + unsigned short order; + phys_addr_t phys_addr; + void *slab_priv; }; +static inline unsigned short +vm_page_type(const struct vm_page *page) +{ + return page->type; +} + +void vm_page_set_type(struct vm_page *page, unsigned int order, + unsigned short type); + +static inline unsigned int +vm_page_order(size_t size) +{ + return iorder2(vm_page_atop(vm_page_round(size))); +} + +static inline phys_addr_t +vm_page_to_pa(const struct vm_page *page) +{ + return page->phys_addr; +} + +static inline unsigned long +vm_page_direct_va(phys_addr_t pa) +{ + assert(pa < VM_PAGE_DIRECTMAP_LIMIT); + return ((unsigned long)pa + VM_MIN_DIRECTMAP_ADDRESS); +} + +static inline phys_addr_t +vm_page_direct_pa(unsigned long va) +{ + assert(va >= VM_MIN_DIRECTMAP_ADDRESS); + assert(va < VM_MAX_DIRECTMAP_ADDRESS); + return (va - VM_MIN_DIRECTMAP_ADDRESS); +} + +static inline void * +vm_page_direct_ptr(const struct vm_page *page) +{ + return (void *)vm_page_direct_va(vm_page_to_pa(page)); +} + /* - * For debugging, this macro can be defined to perform - * some useful check on a page structure. + * Load physical memory into the vm_page module at boot time. + * + * The avail_start and avail_end parameters are used to maintain a simple + * heap for bootstrap allocations. + * + * All addresses must be page-aligned. Segments can be loaded in any order. */ +void vm_page_load(unsigned int seg_index, phys_addr_t start, phys_addr_t end, + phys_addr_t avail_start, phys_addr_t avail_end); -#define VM_PAGE_CHECK(mem) +/* + * Return true if the vm_page module is completely initialized, false + * otherwise, in which case only vm_page_bootalloc() can be used for + * allocations. + */ +int vm_page_ready(void); /* - * Each pageable resident page falls into one of three lists: + * Set up the vm_page module. + * + * Architecture-specific code must have loaded segments before calling this + * function. Segments must comply with the selector-to-segment-list table, + * e.g. HIGHMEM is loaded if and only if DIRECTMAP, DMA32 and DMA are loaded, + * notwithstanding segment aliasing. * - * free - * Available for allocation now. - * inactive - * Not referenced in any map, but still has an - * object/offset-page mapping, and may be dirty. - * This is the list of pages that should be - * paged out next. - * active - * A list of pages which have been placed in - * at least one physical map. This list is - * ordered, in LRU-like fashion. + * Once this function returns, the vm_page module is ready, and normal + * allocation functions can be used. */ - -extern -vm_page_t vm_page_queue_free; /* memory free queue */ -extern -vm_page_t vm_page_queue_fictitious; /* fictitious free queue */ -extern -queue_head_t vm_page_queue_active; /* active memory queue */ -extern -queue_head_t vm_page_queue_inactive; /* inactive memory queue */ - -extern -int vm_page_free_count; /* How many pages are free? */ -extern -int vm_page_fictitious_count;/* How many fictitious pages are free? */ -extern -int vm_page_active_count; /* How many pages are active? */ -extern -int vm_page_inactive_count; /* How many pages are inactive? */ -extern -int vm_page_wire_count; /* How many pages are wired? */ -extern -int vm_page_free_target; /* How many do we want free? */ -extern -int vm_page_free_min; /* When to wakeup pageout */ -extern -int vm_page_inactive_target;/* How many do we want inactive? */ -extern -int vm_page_free_reserved; /* How many pages reserved to do pageout */ -extern -int vm_page_laundry_count; /* How many pages being laundered? */ -extern -int vm_page_external_limit; /* Max number of pages for external objects */ - -/* Only objects marked with the extcounted bit are included in this total. - Pages which we scan for possible pageout, but which are not actually - dirty, don't get considered against the external page limits any more - in this way. */ -extern -int vm_page_external_count; /* How many pages for external objects? */ - - - -struct lock vm_page_queue_lock; /* lock on active and inactive page queues */ -decl_simple_lock_data(extern,vm_page_queue_free_lock) - /* lock on free page queue */ - -extern unsigned int vm_page_free_wanted; - /* how many threads are waiting for memory */ - -extern vm_offset_t vm_page_fictitious_addr; - /* (fake) phys_addr of fictitious pages */ - -extern void vm_page_bootstrap( - vm_offset_t *startp, - vm_offset_t *endp); -extern void vm_page_module_init(void); - -extern void vm_page_create( - vm_offset_t start, - vm_offset_t end); -extern vm_page_t vm_page_lookup( - vm_object_t object, - vm_offset_t offset); -extern vm_page_t vm_page_grab_fictitious(void); -extern void vm_page_release_fictitious(vm_page_t); -extern boolean_t vm_page_convert(vm_page_t, boolean_t); -extern void vm_page_more_fictitious(void); -extern vm_page_t vm_page_grab(boolean_t); -extern void vm_page_release(vm_page_t, boolean_t); -extern void vm_page_wait(void (*)(void)); -extern vm_page_t vm_page_alloc( - vm_object_t object, - vm_offset_t offset); -extern void vm_page_init( - vm_page_t mem, - vm_offset_t phys_addr); -extern void vm_page_free(vm_page_t); -extern void vm_page_activate(vm_page_t); -extern void vm_page_deactivate(vm_page_t); -extern void vm_page_rename( - vm_page_t mem, - vm_object_t new_object, - vm_offset_t new_offset); -extern void vm_page_insert( - vm_page_t mem, - vm_object_t object, - vm_offset_t offset); -extern void vm_page_remove( - vm_page_t mem); - -extern void vm_page_zero_fill(vm_page_t); -extern void vm_page_copy(vm_page_t src_m, vm_page_t dest_m); - -extern void vm_page_wire(vm_page_t); -extern void vm_page_unwire(vm_page_t); - -#if MACH_VM_DEBUG -extern unsigned int vm_page_info( - hash_info_bucket_t *info, - unsigned int count); -#endif +void vm_page_setup(void); /* - * Functions implemented as macros + * Make the given page managed by the vm_page module. + * + * If additional memory can be made usable after the VM system is initialized, + * it should be reported through this function. */ - -#define PAGE_ASSERT_WAIT(m, interruptible) \ - MACRO_BEGIN \ - (m)->wanted = TRUE; \ - assert_wait((event_t) (m), (interruptible)); \ - MACRO_END - -#define PAGE_WAKEUP_DONE(m) \ - MACRO_BEGIN \ - (m)->busy = FALSE; \ - if ((m)->wanted) { \ - (m)->wanted = FALSE; \ - thread_wakeup(((event_t) m)); \ - } \ - MACRO_END - -#define PAGE_WAKEUP(m) \ - MACRO_BEGIN \ - if ((m)->wanted) { \ - (m)->wanted = FALSE; \ - thread_wakeup((event_t) (m)); \ - } \ - MACRO_END - -#define VM_PAGE_FREE(p) \ - MACRO_BEGIN \ - vm_page_lock_queues(); \ - vm_page_free(p); \ - vm_page_unlock_queues(); \ - MACRO_END +void vm_page_manage(struct vm_page *page); /* - * Macro to be used in place of pmap_enter() + * Return the page descriptor for the given physical address. */ +struct vm_page * vm_page_lookup(phys_addr_t pa); -#define PMAP_ENTER(pmap, virtual_address, page, protection, wired) \ - MACRO_BEGIN \ - pmap_enter( \ - (pmap), \ - (virtual_address), \ - (page)->phys_addr, \ - (protection) & ~(page)->page_lock, \ - (wired) \ - ); \ - MACRO_END +/* + * Allocate a block of 2^order physical pages. + * + * The selector is used to determine the segments from which allocation can + * be attempted. + */ +struct vm_page * vm_page_alloc(unsigned int order, unsigned int selector, + unsigned short type); -#define VM_PAGE_WAIT(continuation) vm_page_wait(continuation) +/* + * Release a block of 2^order physical pages. + */ +void vm_page_free(struct vm_page *page, unsigned int order); -#define vm_page_lock_queues() lock_write(&vm_page_queue_lock) -#define vm_page_unlock_queues() lock_write_done(&vm_page_queue_lock) -#define have_vm_page_queue_lock() have_write_lock(&vm_page_queue_lock) +/* + * Return the name of the given segment. + */ +const char * vm_page_seg_name(unsigned int seg_index); -#define VM_PAGE_QUEUES_REMOVE(mem) \ - MACRO_BEGIN \ - if (mem->active) { \ - queue_remove(&vm_page_queue_active, \ - mem, vm_page_t, pageq); \ - mem->active = FALSE; \ - vm_page_active_count--; \ - } \ - \ - if (mem->inactive) { \ - queue_remove(&vm_page_queue_inactive, \ - mem, vm_page_t, pageq); \ - mem->inactive = FALSE; \ - vm_page_inactive_count--; \ - } \ - MACRO_END +/* + * Display internal information about the module. + */ +void vm_page_info(void); -#endif /* _VM_VM_PAGE_H_ */ +#endif /* _VM_VM_PAGE_H */ |