summaryrefslogtreecommitdiff
path: root/vm
diff options
context:
space:
mode:
Diffstat (limited to 'vm')
-rw-r--r--vm/vm_page.c735
-rw-r--r--vm/vm_page.h442
2 files changed, 896 insertions, 281 deletions
diff --git a/vm/vm_page.c b/vm/vm_page.c
new file mode 100644
index 0000000..cc184ca
--- /dev/null
+++ b/vm/vm_page.c
@@ -0,0 +1,735 @@
+/*
+ * Copyright (c) 2010-2014 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * This implementation uses the binary buddy system to manage its heap.
+ * Descriptions of the buddy system can be found in the following works :
+ * - "UNIX Internals: The New Frontiers", by Uresh Vahalia.
+ * - "Dynamic Storage Allocation: A Survey and Critical Review",
+ * by Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles.
+ *
+ * In addition, this allocator uses per-CPU pools of pages for order 0
+ * (i.e. single page) allocations. These pools act as caches (but are named
+ * differently to avoid confusion with CPU caches) that reduce contention on
+ * multiprocessor systems. When a pool is empty and cannot provide a page,
+ * it is filled by transferring multiple pages from the backend buddy system.
+ * The symmetric case is handled likewise.
+ */
+
+#include <kern/assert.h>
+#include <kern/init.h>
+#include <kern/list.h>
+#include <kern/macros.h>
+#include <kern/mutex.h>
+#include <kern/panic.h>
+#include <kern/param.h>
+#include <kern/printk.h>
+#include <kern/sprintf.h>
+#include <kern/stddef.h>
+#include <kern/string.h>
+#include <kern/thread.h>
+#include <kern/types.h>
+#include <machine/cpu.h>
+#include <machine/pmap.h>
+#include <vm/vm_kmem.h>
+#include <vm/vm_page.h>
+
+/*
+ * Number of free block lists per segment.
+ */
+#define VM_PAGE_NR_FREE_LISTS 11
+
+/*
+ * The size of a CPU pool is computed by dividing the number of pages in its
+ * containing segment by this value.
+ */
+#define VM_PAGE_CPU_POOL_RATIO 1024
+
+/*
+ * Maximum number of pages in a CPU pool.
+ */
+#define VM_PAGE_CPU_POOL_MAX_SIZE 128
+
+/*
+ * The transfer size of a CPU pool is computed by dividing the pool size by
+ * this value.
+ */
+#define VM_PAGE_CPU_POOL_TRANSFER_RATIO 2
+
+/*
+ * Per-processor cache of pages.
+ */
+struct vm_page_cpu_pool {
+ struct mutex lock;
+ int size;
+ int transfer_size;
+ int nr_pages;
+ struct list pages;
+} __aligned(CPU_L1_SIZE);
+
+/*
+ * Special order value for pages that aren't in a free list. Such pages are
+ * either allocated, or part of a free block of pages but not the head page.
+ */
+#define VM_PAGE_ORDER_UNLISTED ((unsigned short)-1)
+
+/*
+ * Doubly-linked list of free blocks.
+ */
+struct vm_page_free_list {
+ unsigned long size;
+ struct list blocks;
+};
+
+/*
+ * Segment name buffer size.
+ */
+#define VM_PAGE_NAME_SIZE 16
+
+/*
+ * Segment of contiguous memory.
+ */
+struct vm_page_seg {
+ struct vm_page_cpu_pool cpu_pools[MAX_CPUS];
+
+ phys_addr_t start;
+ phys_addr_t end;
+ struct vm_page *pages;
+ struct vm_page *pages_end;
+ struct mutex lock;
+ struct vm_page_free_list free_lists[VM_PAGE_NR_FREE_LISTS];
+ unsigned long nr_free_pages;
+};
+
+/*
+ * Bootstrap information about a segment.
+ */
+struct vm_page_boot_seg {
+ phys_addr_t start;
+ phys_addr_t end;
+ phys_addr_t avail_start;
+ phys_addr_t avail_end;
+};
+
+static int vm_page_is_ready __read_mostly;
+
+/*
+ * Segment table.
+ *
+ * The system supports a maximum of 4 segments :
+ * - DMA: suitable for DMA
+ * - DMA32: suitable for DMA when devices support 32-bits addressing
+ * - DIRECTMAP: direct physical mapping, allows direct access from
+ * the kernel with a simple offset translation
+ * - HIGHMEM: must be mapped before it can be accessed
+ *
+ * Segments are ordered by priority, 0 being the lowest priority. Their
+ * relative priorities are DMA < DMA32 < DIRECTMAP < HIGHMEM. Some segments
+ * may actually be aliases for others, e.g. if DMA is always possible from
+ * the direct physical mapping, DMA and DMA32 are aliases for DIRECTMAP,
+ * in which case the segment table contains DIRECTMAP and HIGHMEM only.
+ */
+static struct vm_page_seg vm_page_segs[VM_PAGE_MAX_SEGS];
+
+/*
+ * Bootstrap segment table.
+ */
+static struct vm_page_boot_seg vm_page_boot_segs[VM_PAGE_MAX_SEGS] __initdata;
+
+/*
+ * Number of loaded segments.
+ */
+static unsigned int vm_page_segs_size __read_mostly;
+
+static void __init
+vm_page_init(struct vm_page *page, unsigned short seg_index, phys_addr_t pa)
+{
+ memset(page, 0, sizeof(*page));
+ page->type = VM_PAGE_RESERVED;
+ page->seg_index = seg_index;
+ page->order = VM_PAGE_ORDER_UNLISTED;
+ page->phys_addr = pa;
+}
+
+void
+vm_page_set_type(struct vm_page *page, unsigned int order, unsigned short type)
+{
+ unsigned int i, nr_pages;
+
+ nr_pages = 1 << order;
+
+ for (i = 0; i < nr_pages; i++)
+ page[i].type = type;
+}
+
+static void __init
+vm_page_free_list_init(struct vm_page_free_list *free_list)
+{
+ free_list->size = 0;
+ list_init(&free_list->blocks);
+}
+
+static inline void
+vm_page_free_list_insert(struct vm_page_free_list *free_list,
+ struct vm_page *page)
+{
+ assert(page->order == VM_PAGE_ORDER_UNLISTED);
+
+ free_list->size++;
+ list_insert_head(&free_list->blocks, &page->node);
+}
+
+static inline void
+vm_page_free_list_remove(struct vm_page_free_list *free_list,
+ struct vm_page *page)
+{
+ assert(page->order != VM_PAGE_ORDER_UNLISTED);
+
+ free_list->size--;
+ list_remove(&page->node);
+}
+
+static struct vm_page *
+vm_page_seg_alloc_from_buddy(struct vm_page_seg *seg, unsigned int order)
+{
+ struct vm_page_free_list *free_list = free_list;
+ struct vm_page *page, *buddy;
+ unsigned int i;
+
+ assert(order < VM_PAGE_NR_FREE_LISTS);
+
+ for (i = order; i < VM_PAGE_NR_FREE_LISTS; i++) {
+ free_list = &seg->free_lists[i];
+
+ if (free_list->size != 0)
+ break;
+ }
+
+ if (i == VM_PAGE_NR_FREE_LISTS)
+ return NULL;
+
+ page = list_first_entry(&free_list->blocks, struct vm_page, node);
+ vm_page_free_list_remove(free_list, page);
+ page->order = VM_PAGE_ORDER_UNLISTED;
+
+ while (i > order) {
+ i--;
+ buddy = &page[1 << i];
+ vm_page_free_list_insert(&seg->free_lists[i], buddy);
+ buddy->order = i;
+ }
+
+ seg->nr_free_pages -= (1 << order);
+ return page;
+}
+
+static void
+vm_page_seg_free_to_buddy(struct vm_page_seg *seg, struct vm_page *page,
+ unsigned int order)
+{
+ struct vm_page *buddy;
+ phys_addr_t pa, buddy_pa;
+ unsigned int nr_pages;
+
+ assert(page >= seg->pages);
+ assert(page < seg->pages_end);
+ assert(page->order == VM_PAGE_ORDER_UNLISTED);
+ assert(order < VM_PAGE_NR_FREE_LISTS);
+
+ nr_pages = (1 << order);
+ pa = page->phys_addr;
+
+ while (order < (VM_PAGE_NR_FREE_LISTS - 1)) {
+ buddy_pa = pa ^ vm_page_ptoa(1 << order);
+
+ if ((buddy_pa < seg->start) || (buddy_pa >= seg->end))
+ break;
+
+ buddy = &seg->pages[vm_page_atop(buddy_pa - seg->start)];
+
+ if (buddy->order != order)
+ break;
+
+ vm_page_free_list_remove(&seg->free_lists[order], buddy);
+ buddy->order = VM_PAGE_ORDER_UNLISTED;
+ order++;
+ pa &= -vm_page_ptoa(1 << order);
+ page = &seg->pages[vm_page_atop(pa - seg->start)];
+ }
+
+ vm_page_free_list_insert(&seg->free_lists[order], page);
+ page->order = order;
+ seg->nr_free_pages += nr_pages;
+}
+
+static void __init
+vm_page_cpu_pool_init(struct vm_page_cpu_pool *cpu_pool, int size)
+{
+ mutex_init(&cpu_pool->lock);
+ cpu_pool->size = size;
+ cpu_pool->transfer_size = (size + VM_PAGE_CPU_POOL_TRANSFER_RATIO - 1)
+ / VM_PAGE_CPU_POOL_TRANSFER_RATIO;
+ cpu_pool->nr_pages = 0;
+ list_init(&cpu_pool->pages);
+}
+
+static inline struct vm_page_cpu_pool *
+vm_page_cpu_pool_get(struct vm_page_seg *seg)
+{
+ return &seg->cpu_pools[cpu_id()];
+}
+
+static inline struct vm_page *
+vm_page_cpu_pool_pop(struct vm_page_cpu_pool *cpu_pool)
+{
+ struct vm_page *page;
+
+ assert(cpu_pool->nr_pages != 0);
+ cpu_pool->nr_pages--;
+ page = list_first_entry(&cpu_pool->pages, struct vm_page, node);
+ list_remove(&page->node);
+ return page;
+}
+
+static inline void
+vm_page_cpu_pool_push(struct vm_page_cpu_pool *cpu_pool, struct vm_page *page)
+{
+ assert(cpu_pool->nr_pages < cpu_pool->size);
+ cpu_pool->nr_pages++;
+ list_insert_head(&cpu_pool->pages, &page->node);
+}
+
+static int
+vm_page_cpu_pool_fill(struct vm_page_cpu_pool *cpu_pool,
+ struct vm_page_seg *seg)
+{
+ struct vm_page *page;
+ int i;
+
+ assert(cpu_pool->nr_pages == 0);
+
+ mutex_lock(&seg->lock);
+
+ for (i = 0; i < cpu_pool->transfer_size; i++) {
+ page = vm_page_seg_alloc_from_buddy(seg, 0);
+
+ if (page == NULL)
+ break;
+
+ vm_page_cpu_pool_push(cpu_pool, page);
+ }
+
+ mutex_unlock(&seg->lock);
+
+ return i;
+}
+
+static void
+vm_page_cpu_pool_drain(struct vm_page_cpu_pool *cpu_pool,
+ struct vm_page_seg *seg)
+{
+ struct vm_page *page;
+ int i;
+
+ assert(cpu_pool->nr_pages == cpu_pool->size);
+
+ mutex_lock(&seg->lock);
+
+ for (i = cpu_pool->transfer_size; i > 0; i--) {
+ page = vm_page_cpu_pool_pop(cpu_pool);
+ vm_page_seg_free_to_buddy(seg, page, 0);
+ }
+
+ mutex_unlock(&seg->lock);
+}
+
+static phys_addr_t __init
+vm_page_seg_size(struct vm_page_seg *seg)
+{
+ return seg->end - seg->start;
+}
+
+static int __init
+vm_page_seg_compute_pool_size(struct vm_page_seg *seg)
+{
+ phys_addr_t size;
+
+ size = vm_page_atop(vm_page_seg_size(seg)) / VM_PAGE_CPU_POOL_RATIO;
+
+ if (size == 0)
+ size = 1;
+ else if (size > VM_PAGE_CPU_POOL_MAX_SIZE)
+ size = VM_PAGE_CPU_POOL_MAX_SIZE;
+
+ return size;
+}
+
+static void __init
+vm_page_seg_init(struct vm_page_seg *seg, phys_addr_t start, phys_addr_t end,
+ struct vm_page *pages)
+{
+ phys_addr_t pa;
+ int pool_size;
+ unsigned int i;
+
+ seg->start = start;
+ seg->end = end;
+ pool_size = vm_page_seg_compute_pool_size(seg);
+
+ for (i = 0; i < ARRAY_SIZE(seg->cpu_pools); i++)
+ vm_page_cpu_pool_init(&seg->cpu_pools[i], pool_size);
+
+ seg->pages = pages;
+ seg->pages_end = pages + vm_page_atop(vm_page_seg_size(seg));
+ mutex_init(&seg->lock);
+
+ for (i = 0; i < ARRAY_SIZE(seg->free_lists); i++)
+ vm_page_free_list_init(&seg->free_lists[i]);
+
+ seg->nr_free_pages = 0;
+ i = seg - vm_page_segs;
+
+ for (pa = seg->start; pa < seg->end; pa += PAGE_SIZE)
+ vm_page_init(&pages[vm_page_atop(pa - seg->start)], i, pa);
+}
+
+static struct vm_page *
+vm_page_seg_alloc(struct vm_page_seg *seg, unsigned int order,
+ unsigned short type)
+{
+ struct vm_page_cpu_pool *cpu_pool;
+ struct vm_page *page;
+ int filled;
+
+ assert(order < VM_PAGE_NR_FREE_LISTS);
+
+ if (order == 0) {
+ thread_pin();
+ cpu_pool = vm_page_cpu_pool_get(seg);
+ mutex_lock(&cpu_pool->lock);
+
+ if (cpu_pool->nr_pages == 0) {
+ filled = vm_page_cpu_pool_fill(cpu_pool, seg);
+
+ if (!filled) {
+ mutex_unlock(&cpu_pool->lock);
+ thread_unpin();
+ return NULL;
+ }
+ }
+
+ page = vm_page_cpu_pool_pop(cpu_pool);
+ mutex_unlock(&cpu_pool->lock);
+ thread_unpin();
+ } else {
+ mutex_lock(&seg->lock);
+ page = vm_page_seg_alloc_from_buddy(seg, order);
+ mutex_unlock(&seg->lock);
+ }
+
+ assert(page->type == VM_PAGE_FREE);
+ vm_page_set_type(page, order, type);
+ return page;
+}
+
+static void
+vm_page_seg_free(struct vm_page_seg *seg, struct vm_page *page,
+ unsigned int order)
+{
+ struct vm_page_cpu_pool *cpu_pool;
+
+ assert(page->type != VM_PAGE_FREE);
+ assert(order < VM_PAGE_NR_FREE_LISTS);
+
+ vm_page_set_type(page, order, VM_PAGE_FREE);
+
+ if (order == 0) {
+ thread_pin();
+ cpu_pool = vm_page_cpu_pool_get(seg);
+ mutex_lock(&cpu_pool->lock);
+
+ if (cpu_pool->nr_pages == cpu_pool->size)
+ vm_page_cpu_pool_drain(cpu_pool, seg);
+
+ vm_page_cpu_pool_push(cpu_pool, page);
+ mutex_unlock(&cpu_pool->lock);
+ thread_unpin();
+ } else {
+ mutex_lock(&seg->lock);
+ vm_page_seg_free_to_buddy(seg, page, order);
+ mutex_unlock(&seg->lock);
+ }
+}
+
+void __init
+vm_page_load(unsigned int seg_index, phys_addr_t start, phys_addr_t end,
+ phys_addr_t avail_start, phys_addr_t avail_end)
+{
+ struct vm_page_boot_seg *seg;
+
+ assert(seg_index < ARRAY_SIZE(vm_page_boot_segs));
+ assert(vm_page_aligned(start));
+ assert(vm_page_aligned(end));
+ assert(vm_page_aligned(avail_start));
+ assert(vm_page_aligned(avail_end));
+ assert(start < end);
+ assert(start <= avail_start);
+ assert(avail_end <= end);
+ assert(vm_page_segs_size < ARRAY_SIZE(vm_page_boot_segs));
+
+ seg = &vm_page_boot_segs[seg_index];
+ seg->start = start;
+ seg->end = end;
+ seg->avail_start = avail_start;
+ seg->avail_end = avail_end;
+ vm_page_segs_size++;
+}
+
+int
+vm_page_ready(void)
+{
+ return vm_page_is_ready;
+}
+
+static unsigned int
+vm_page_select_alloc_seg(unsigned int selector)
+{
+ unsigned int seg_index;
+
+ switch (selector) {
+ case VM_PAGE_SEL_DMA:
+ seg_index = VM_PAGE_SEG_DMA;
+ break;
+ case VM_PAGE_SEL_DMA32:
+ seg_index = VM_PAGE_SEG_DMA32;
+ break;
+ case VM_PAGE_SEL_DIRECTMAP:
+ seg_index = VM_PAGE_SEG_DIRECTMAP;
+ break;
+ case VM_PAGE_SEL_HIGHMEM:
+ seg_index = VM_PAGE_SEG_HIGHMEM;
+ break;
+ default:
+ panic("vm_page: invalid selector");
+ }
+
+ return MIN(vm_page_segs_size - 1, seg_index);
+}
+
+static int __init
+vm_page_boot_seg_loaded(const struct vm_page_boot_seg *seg)
+{
+ return (seg->end != 0);
+}
+
+static void __init
+vm_page_check_boot_segs(void)
+{
+ unsigned int i;
+ int expect_loaded;
+
+ if (vm_page_segs_size == 0)
+ panic("vm_page: no physical memory loaded");
+
+ for (i = 0; i < ARRAY_SIZE(vm_page_boot_segs); i++) {
+ expect_loaded = (i < vm_page_segs_size);
+
+ if (vm_page_boot_seg_loaded(&vm_page_boot_segs[i]) == expect_loaded)
+ continue;
+
+ panic("vm_page: invalid boot segment table");
+ }
+}
+
+static phys_addr_t __init
+vm_page_boot_seg_size(struct vm_page_boot_seg *seg)
+{
+ return seg->end - seg->start;
+}
+
+static phys_addr_t __init
+vm_page_boot_seg_avail_size(struct vm_page_boot_seg *seg)
+{
+ return seg->avail_end - seg->avail_start;
+}
+
+static void * __init
+vm_page_bootalloc(size_t size)
+{
+ struct vm_page_boot_seg *seg;
+ phys_addr_t pa;
+ unsigned int i;
+
+ for (i = vm_page_select_alloc_seg(VM_PAGE_SEL_DIRECTMAP);
+ i < vm_page_segs_size;
+ i--) {
+ seg = &vm_page_boot_segs[i];
+
+ if (size <= vm_page_boot_seg_avail_size(seg)) {
+ pa = seg->avail_start;
+ seg->avail_start += vm_page_round(size);
+ return (void *)vm_page_direct_va(pa);
+ }
+ }
+
+ panic("vm_page: no physical memory available");
+}
+
+void __init
+vm_page_setup(void)
+{
+ struct vm_page_boot_seg *boot_seg;
+ struct vm_page_seg *seg;
+ struct vm_page *table, *page, *end;
+ size_t nr_pages, table_size;
+ unsigned long va;
+ unsigned int i;
+ phys_addr_t pa;
+
+ vm_page_check_boot_segs();
+
+ /*
+ * Compute the page table size.
+ */
+ nr_pages = 0;
+
+ for (i = 0; i < vm_page_segs_size; i++)
+ nr_pages += vm_page_atop(vm_page_boot_seg_size(&vm_page_boot_segs[i]));
+
+ table_size = vm_page_round(nr_pages * sizeof(struct vm_page));
+ printk("vm_page: page table size: %zu entries (%zuk)\n", nr_pages,
+ table_size >> 10);
+ table = vm_page_bootalloc(table_size);
+ va = (unsigned long)table;
+
+ /*
+ * Initialize the segments, associating them to the page table. When
+ * the segments are initialized, all their pages are set allocated.
+ * Pages are then released, which populates the free lists.
+ */
+ for (i = 0; i < vm_page_segs_size; i++) {
+ seg = &vm_page_segs[i];
+ boot_seg = &vm_page_boot_segs[i];
+ vm_page_seg_init(seg, boot_seg->start, boot_seg->end, table);
+ page = seg->pages + vm_page_atop(boot_seg->avail_start
+ - boot_seg->start);
+ end = seg->pages + vm_page_atop(boot_seg->avail_end
+ - boot_seg->start);
+
+ while (page < end) {
+ page->type = VM_PAGE_FREE;
+ vm_page_seg_free_to_buddy(seg, page, 0);
+ page++;
+ }
+
+ table += vm_page_atop(vm_page_seg_size(seg));
+ }
+
+ while (va < (unsigned long)table) {
+ pa = vm_page_direct_pa(va);
+ page = vm_page_lookup(pa);
+ assert((page != NULL) && (page->type == VM_PAGE_RESERVED));
+ page->type = VM_PAGE_TABLE;
+ va += PAGE_SIZE;
+ }
+
+ vm_page_is_ready = 1;
+}
+
+void __init
+vm_page_manage(struct vm_page *page)
+{
+ assert(page->seg_index < ARRAY_SIZE(vm_page_segs));
+ assert(page->type == VM_PAGE_RESERVED);
+
+ vm_page_set_type(page, 0, VM_PAGE_FREE);
+ vm_page_seg_free_to_buddy(&vm_page_segs[page->seg_index], page, 0);
+}
+
+struct vm_page *
+vm_page_lookup(phys_addr_t pa)
+{
+ struct vm_page_seg *seg;
+ unsigned int i;
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ seg = &vm_page_segs[i];
+
+ if ((pa >= seg->start) && (pa < seg->end))
+ return &seg->pages[vm_page_atop(pa - seg->start)];
+ }
+
+ return NULL;
+}
+
+struct vm_page *
+vm_page_alloc(unsigned int order, unsigned int selector, unsigned short type)
+{
+ struct vm_page *page;
+ unsigned int i;
+
+ for (i = vm_page_select_alloc_seg(selector); i < vm_page_segs_size; i--) {
+ page = vm_page_seg_alloc(&vm_page_segs[i], order, type);
+
+ if (page != NULL)
+ return page;
+ }
+
+ if (type == VM_PAGE_PMAP)
+ panic("vm_page: unable to allocate pmap page");
+
+ return NULL;
+}
+
+void
+vm_page_free(struct vm_page *page, unsigned int order)
+{
+ assert(page->seg_index < ARRAY_SIZE(vm_page_segs));
+
+ vm_page_seg_free(&vm_page_segs[page->seg_index], page, order);
+}
+
+const char *
+vm_page_seg_name(unsigned int seg_index)
+{
+ /* Don't use a switch statement since segments can be aliased */
+ if (seg_index == VM_PAGE_SEG_HIGHMEM)
+ return "HIGHMEM";
+ else if (seg_index == VM_PAGE_SEG_DIRECTMAP)
+ return "DIRECTMAP";
+ else if (seg_index == VM_PAGE_SEG_DMA32)
+ return "DMA32";
+ else if (seg_index == VM_PAGE_SEG_DMA)
+ return "DMA";
+ else
+ panic("vm_page: invalid segment index");
+}
+
+void
+vm_page_info(void)
+{
+ struct vm_page_seg *seg;
+ unsigned long pages;
+ unsigned int i;
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ seg = &vm_page_segs[i];
+ pages = (unsigned long)(seg->pages_end - seg->pages);
+ printk("vm_page: %s: pages: %lu (%luM), free: %lu (%luM)\n",
+ vm_page_seg_name(i), pages, pages >> (20 - PAGE_SHIFT),
+ seg->nr_free_pages, seg->nr_free_pages >> (20 - PAGE_SHIFT));
+ }
+}
diff --git a/vm/vm_page.h b/vm/vm_page.h
index dd571e2..23c8c47 100644
--- a/vm/vm_page.h
+++ b/vm/vm_page.h
@@ -1,315 +1,195 @@
-/*
- * Mach Operating System
- * Copyright (c) 1993-1988 Carnegie Mellon University
- * All Rights Reserved.
- *
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
/*
- * File: vm/vm_page.h
- * Author: Avadis Tevanian, Jr., Michael Wayne Young
- * Date: 1985
+ * Copyright (c) 2010-2014 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
*
- * Resident memory system definitions.
+ *
+ * Physical page management.
*/
-#ifndef _VM_VM_PAGE_H_
-#define _VM_VM_PAGE_H_
-
-#include <mach/boolean.h>
-#include <mach/vm_prot.h>
-#include <mach/vm_param.h>
-#include <vm/vm_object.h>
-#include <vm/vm_types.h>
-#include <kern/queue.h>
-#include <kern/lock.h>
+#ifndef _VM_VM_PAGE_H
+#define _VM_VM_PAGE_H
+#include <kern/assert.h>
+#include <kern/list.h>
+#include <kern/log2.h>
#include <kern/macros.h>
-#include <kern/sched_prim.h> /* definitions of wait/wakeup */
+//#include <kern/param.h>
+//#include <kern/stddef.h>
+//#include <kern/types.h>
+#include <machine/pmap.h>
-#if MACH_VM_DEBUG
-#include <mach_debug/hash_info.h>
-#endif
+/*
+ * Address/page conversion and rounding macros (not inline functions to
+ * be easily usable on both virtual and physical addresses, which may not
+ * have the same type size).
+ */
+#define vm_page_atop(addr) ((addr) >> PAGE_SHIFT)
+#define vm_page_ptoa(page) ((page) << PAGE_SHIFT)
+#define vm_page_trunc(addr) P2ALIGN(addr, PAGE_SIZE)
+#define vm_page_round(addr) P2ROUND(addr, PAGE_SIZE)
+#define vm_page_aligned(addr) P2ALIGNED(addr, PAGE_SIZE)
/*
- * Management of resident (logical) pages.
- *
- * A small structure is kept for each resident
- * page, indexed by page number. Each structure
- * is an element of several lists:
- *
- * A hash table bucket used to quickly
- * perform object/offset lookups
- *
- * A list of all pages for a given object,
- * so they can be quickly deactivated at
- * time of deallocation.
+ * Segment selectors.
*
- * An ordered list of pages due for pageout.
- *
- * In addition, the structure contains the object
- * and offset to which this page belongs (for pageout),
- * and sundry status bits.
+ * Selector-to-segment-list translation table :
+ * DMA DMA
+ * DMA32 DMA32 DMA
+ * DIRECTMAP DIRECTMAP DMA32 DMA
+ * HIGHMEM HIGHMEM DIRECTMAP DMA32 DMA
+ */
+#define VM_PAGE_SEL_DMA 0
+#define VM_PAGE_SEL_DMA32 1
+#define VM_PAGE_SEL_DIRECTMAP 2
+#define VM_PAGE_SEL_HIGHMEM 3
+
+/*
+ * Page usage types.
*
- * Fields in this structure are locked either by the lock on the
- * object that the page belongs to (O) or by the lock on the page
- * queues (P). [Some fields require that both locks be held to
- * change that field; holding either lock is sufficient to read.]
+ * Failing to allocate pmap pages will cause a kernel panic.
+ * TODO Obviously, this needs to be addressed, e.g. with a reserved pool of
+ * pages.
*/
+#define VM_PAGE_FREE 0 /* Page unused */
+#define VM_PAGE_RESERVED 1 /* Page reserved at boot time */
+#define VM_PAGE_TABLE 2 /* Page is part of the page table */
+#define VM_PAGE_PMAP 3 /* Page stores pmap-specific data */
+#define VM_PAGE_KMEM 4 /* Page is part of a kmem slab */
+#define VM_PAGE_OBJECT 5 /* Page is part of a VM object */
+#define VM_PAGE_KERNEL 6 /* Type for generic kernel allocations */
+/*
+ * Physical page descriptor.
+ */
struct vm_page {
- queue_chain_t pageq; /* queue info for FIFO
- * queue or free list (P) */
- queue_chain_t listq; /* all pages in same object (O) */
- struct vm_page *next; /* VP bucket link (O) */
-
- vm_object_t object; /* which object am I in (O,P) */
- vm_offset_t offset; /* offset into that object (O,P) */
-
- unsigned int wire_count:15, /* how many wired down maps use me?
- (O&P) */
- /* boolean_t */ inactive:1, /* page is in inactive list (P) */
- active:1, /* page is in active list (P) */
- laundry:1, /* page is being cleaned now (P)*/
- free:1, /* page is on free list (P) */
- reference:1, /* page has been used (P) */
- external:1, /* page considered external (P) */
- extcounted:1, /* page counted in ext counts (P) */
- busy:1, /* page is in transit (O) */
- wanted:1, /* someone is waiting for page (O) */
- tabled:1, /* page is in VP table (O) */
- fictitious:1, /* Physical page doesn't exist (O) */
- private:1, /* Page should not be returned to
- * the free list (O) */
- absent:1, /* Data has been requested, but is
- * not yet available (O) */
- error:1, /* Data manager was unable to provide
- * data due to error (O) */
- dirty:1, /* Page must be cleaned (O) */
- precious:1, /* Page is precious; data must be
- * returned even if clean (O) */
- overwriting:1; /* Request to unlock has been made
- * without having data. (O)
- * [See vm_object_overwrite] */
-
- vm_offset_t phys_addr; /* Physical address of page, passed
- * to pmap_enter (read-only) */
- vm_prot_t page_lock; /* Uses prohibited by data manager (O) */
- vm_prot_t unlock_request; /* Outstanding unlock request (O) */
+ struct list node;
+ unsigned short type;
+ unsigned short seg_index;
+ unsigned short order;
+ phys_addr_t phys_addr;
+ void *slab_priv;
};
+static inline unsigned short
+vm_page_type(const struct vm_page *page)
+{
+ return page->type;
+}
+
+void vm_page_set_type(struct vm_page *page, unsigned int order,
+ unsigned short type);
+
+static inline unsigned int
+vm_page_order(size_t size)
+{
+ return iorder2(vm_page_atop(vm_page_round(size)));
+}
+
+static inline phys_addr_t
+vm_page_to_pa(const struct vm_page *page)
+{
+ return page->phys_addr;
+}
+
+static inline unsigned long
+vm_page_direct_va(phys_addr_t pa)
+{
+ assert(pa < VM_PAGE_DIRECTMAP_LIMIT);
+ return ((unsigned long)pa + VM_MIN_DIRECTMAP_ADDRESS);
+}
+
+static inline phys_addr_t
+vm_page_direct_pa(unsigned long va)
+{
+ assert(va >= VM_MIN_DIRECTMAP_ADDRESS);
+ assert(va < VM_MAX_DIRECTMAP_ADDRESS);
+ return (va - VM_MIN_DIRECTMAP_ADDRESS);
+}
+
+static inline void *
+vm_page_direct_ptr(const struct vm_page *page)
+{
+ return (void *)vm_page_direct_va(vm_page_to_pa(page));
+}
+
/*
- * For debugging, this macro can be defined to perform
- * some useful check on a page structure.
+ * Load physical memory into the vm_page module at boot time.
+ *
+ * The avail_start and avail_end parameters are used to maintain a simple
+ * heap for bootstrap allocations.
+ *
+ * All addresses must be page-aligned. Segments can be loaded in any order.
*/
+void vm_page_load(unsigned int seg_index, phys_addr_t start, phys_addr_t end,
+ phys_addr_t avail_start, phys_addr_t avail_end);
-#define VM_PAGE_CHECK(mem)
+/*
+ * Return true if the vm_page module is completely initialized, false
+ * otherwise, in which case only vm_page_bootalloc() can be used for
+ * allocations.
+ */
+int vm_page_ready(void);
/*
- * Each pageable resident page falls into one of three lists:
+ * Set up the vm_page module.
+ *
+ * Architecture-specific code must have loaded segments before calling this
+ * function. Segments must comply with the selector-to-segment-list table,
+ * e.g. HIGHMEM is loaded if and only if DIRECTMAP, DMA32 and DMA are loaded,
+ * notwithstanding segment aliasing.
*
- * free
- * Available for allocation now.
- * inactive
- * Not referenced in any map, but still has an
- * object/offset-page mapping, and may be dirty.
- * This is the list of pages that should be
- * paged out next.
- * active
- * A list of pages which have been placed in
- * at least one physical map. This list is
- * ordered, in LRU-like fashion.
+ * Once this function returns, the vm_page module is ready, and normal
+ * allocation functions can be used.
*/
-
-extern
-vm_page_t vm_page_queue_free; /* memory free queue */
-extern
-vm_page_t vm_page_queue_fictitious; /* fictitious free queue */
-extern
-queue_head_t vm_page_queue_active; /* active memory queue */
-extern
-queue_head_t vm_page_queue_inactive; /* inactive memory queue */
-
-extern
-int vm_page_free_count; /* How many pages are free? */
-extern
-int vm_page_fictitious_count;/* How many fictitious pages are free? */
-extern
-int vm_page_active_count; /* How many pages are active? */
-extern
-int vm_page_inactive_count; /* How many pages are inactive? */
-extern
-int vm_page_wire_count; /* How many pages are wired? */
-extern
-int vm_page_free_target; /* How many do we want free? */
-extern
-int vm_page_free_min; /* When to wakeup pageout */
-extern
-int vm_page_inactive_target;/* How many do we want inactive? */
-extern
-int vm_page_free_reserved; /* How many pages reserved to do pageout */
-extern
-int vm_page_laundry_count; /* How many pages being laundered? */
-extern
-int vm_page_external_limit; /* Max number of pages for external objects */
-
-/* Only objects marked with the extcounted bit are included in this total.
- Pages which we scan for possible pageout, but which are not actually
- dirty, don't get considered against the external page limits any more
- in this way. */
-extern
-int vm_page_external_count; /* How many pages for external objects? */
-
-
-
-struct lock vm_page_queue_lock; /* lock on active and inactive page queues */
-decl_simple_lock_data(extern,vm_page_queue_free_lock)
- /* lock on free page queue */
-
-extern unsigned int vm_page_free_wanted;
- /* how many threads are waiting for memory */
-
-extern vm_offset_t vm_page_fictitious_addr;
- /* (fake) phys_addr of fictitious pages */
-
-extern void vm_page_bootstrap(
- vm_offset_t *startp,
- vm_offset_t *endp);
-extern void vm_page_module_init(void);
-
-extern void vm_page_create(
- vm_offset_t start,
- vm_offset_t end);
-extern vm_page_t vm_page_lookup(
- vm_object_t object,
- vm_offset_t offset);
-extern vm_page_t vm_page_grab_fictitious(void);
-extern void vm_page_release_fictitious(vm_page_t);
-extern boolean_t vm_page_convert(vm_page_t, boolean_t);
-extern void vm_page_more_fictitious(void);
-extern vm_page_t vm_page_grab(boolean_t);
-extern void vm_page_release(vm_page_t, boolean_t);
-extern void vm_page_wait(void (*)(void));
-extern vm_page_t vm_page_alloc(
- vm_object_t object,
- vm_offset_t offset);
-extern void vm_page_init(
- vm_page_t mem,
- vm_offset_t phys_addr);
-extern void vm_page_free(vm_page_t);
-extern void vm_page_activate(vm_page_t);
-extern void vm_page_deactivate(vm_page_t);
-extern void vm_page_rename(
- vm_page_t mem,
- vm_object_t new_object,
- vm_offset_t new_offset);
-extern void vm_page_insert(
- vm_page_t mem,
- vm_object_t object,
- vm_offset_t offset);
-extern void vm_page_remove(
- vm_page_t mem);
-
-extern void vm_page_zero_fill(vm_page_t);
-extern void vm_page_copy(vm_page_t src_m, vm_page_t dest_m);
-
-extern void vm_page_wire(vm_page_t);
-extern void vm_page_unwire(vm_page_t);
-
-#if MACH_VM_DEBUG
-extern unsigned int vm_page_info(
- hash_info_bucket_t *info,
- unsigned int count);
-#endif
+void vm_page_setup(void);
/*
- * Functions implemented as macros
+ * Make the given page managed by the vm_page module.
+ *
+ * If additional memory can be made usable after the VM system is initialized,
+ * it should be reported through this function.
*/
-
-#define PAGE_ASSERT_WAIT(m, interruptible) \
- MACRO_BEGIN \
- (m)->wanted = TRUE; \
- assert_wait((event_t) (m), (interruptible)); \
- MACRO_END
-
-#define PAGE_WAKEUP_DONE(m) \
- MACRO_BEGIN \
- (m)->busy = FALSE; \
- if ((m)->wanted) { \
- (m)->wanted = FALSE; \
- thread_wakeup(((event_t) m)); \
- } \
- MACRO_END
-
-#define PAGE_WAKEUP(m) \
- MACRO_BEGIN \
- if ((m)->wanted) { \
- (m)->wanted = FALSE; \
- thread_wakeup((event_t) (m)); \
- } \
- MACRO_END
-
-#define VM_PAGE_FREE(p) \
- MACRO_BEGIN \
- vm_page_lock_queues(); \
- vm_page_free(p); \
- vm_page_unlock_queues(); \
- MACRO_END
+void vm_page_manage(struct vm_page *page);
/*
- * Macro to be used in place of pmap_enter()
+ * Return the page descriptor for the given physical address.
*/
+struct vm_page * vm_page_lookup(phys_addr_t pa);
-#define PMAP_ENTER(pmap, virtual_address, page, protection, wired) \
- MACRO_BEGIN \
- pmap_enter( \
- (pmap), \
- (virtual_address), \
- (page)->phys_addr, \
- (protection) & ~(page)->page_lock, \
- (wired) \
- ); \
- MACRO_END
+/*
+ * Allocate a block of 2^order physical pages.
+ *
+ * The selector is used to determine the segments from which allocation can
+ * be attempted.
+ */
+struct vm_page * vm_page_alloc(unsigned int order, unsigned int selector,
+ unsigned short type);
-#define VM_PAGE_WAIT(continuation) vm_page_wait(continuation)
+/*
+ * Release a block of 2^order physical pages.
+ */
+void vm_page_free(struct vm_page *page, unsigned int order);
-#define vm_page_lock_queues() lock_write(&vm_page_queue_lock)
-#define vm_page_unlock_queues() lock_write_done(&vm_page_queue_lock)
-#define have_vm_page_queue_lock() have_write_lock(&vm_page_queue_lock)
+/*
+ * Return the name of the given segment.
+ */
+const char * vm_page_seg_name(unsigned int seg_index);
-#define VM_PAGE_QUEUES_REMOVE(mem) \
- MACRO_BEGIN \
- if (mem->active) { \
- queue_remove(&vm_page_queue_active, \
- mem, vm_page_t, pageq); \
- mem->active = FALSE; \
- vm_page_active_count--; \
- } \
- \
- if (mem->inactive) { \
- queue_remove(&vm_page_queue_inactive, \
- mem, vm_page_t, pageq); \
- mem->inactive = FALSE; \
- vm_page_inactive_count--; \
- } \
- MACRO_END
+/*
+ * Display internal information about the module.
+ */
+void vm_page_info(void);
-#endif /* _VM_VM_PAGE_H_ */
+#endif /* _VM_VM_PAGE_H */