diff options
author | Guillem Jover <guillem@debian.org> | 2006-02-02 04:29:43 +0000 |
---|---|---|
committer | Guillem Jover <guillem@debian.org> | 2006-02-02 04:29:43 +0000 |
commit | 8e32f37752c300a27cffeb28c832982c51f845fa (patch) | |
tree | 0aa08e60ec95512526704bf3370b61e56ea32989 /debian | |
parent | 88f47da9bee3cdd5cebe4398d3ce3500ec0348de (diff) |
* Make the Linux device drivers use dynamic memory allocation via the
glue code.
- debian/patches/61_vm_resident-zoned.patch: New file.
Thanks to Gianluca Guida <glguida@gmail.com>.
Diffstat (limited to 'debian')
-rw-r--r-- | debian/changelog | 4 | ||||
-rw-r--r-- | debian/patches/61_vm_resident-zoned.patch | 1827 |
2 files changed, 1831 insertions, 0 deletions
diff --git a/debian/changelog b/debian/changelog index 6f279c6..fe13367 100644 --- a/debian/changelog +++ b/debian/changelog @@ -23,6 +23,10 @@ gnumach (1:20060201-1) UNRELEASED; urgency=low of hooking into SPL code. - debian/patches/60_glue_linux_soft_ints.patch: New file. Thanks to Gianluca Guida <glguida@gmail.com>. + * Make the Linux device drivers use dynamic memory allocation via the + glue code. + - debian/patches/61_vm_resident-zoned.patch: New file. + Thanks to Gianluca Guida <glguida@gmail.com>. -- Guillem Jover <guillem@debian.org> Wed, 1 Feb 2006 23:21:59 +0200 diff --git a/debian/patches/61_vm_resident-zoned.patch b/debian/patches/61_vm_resident-zoned.patch new file mode 100644 index 0000000..0290d14 --- /dev/null +++ b/debian/patches/61_vm_resident-zoned.patch @@ -0,0 +1,1827 @@ +#DPATCHLEVEL=1 + +2006-01-20 Gianluca Guida <glguida@gmail.com> + + * vm/pmap.h (pmap_is_dma, pmap_is_normal): New functions. + + * vm/page.h (VM_PAGE_DMA): New macro. + (vm_page_queue_free): Variable removed. + (vm_page_queue_free_dma, vm_page_queue_free_normal): New + variables. + (vm_page_physaddr_lookup, vm_page_grab_flags) + (vm_page_grab_contiguous_pages_flags): New functions. + (first_phys_addr, last_phys_addr): Declarations removed. + (phys_first_addr, phys_last_addr): New declarations. + + * vm/vm_resident.c (vm_page_free_bitmap) + (vm_page_free_bitmap_bitsz): New variables. + (vm_page_free_bitmap_set, vm_page_free_bitmap_unset) + (vm_page_free_bitmap_alloc, vm_page_array_init, vm_page_array_add) + (vm_page_physaddr_lookup): New functions. + (vm_page_bootstrap): Initialize vm_page_queue_free_normal and + vm_page_queue_free_dma. Call vm_page_free_bitmap_alloc. + (pmap_startup): Call vm_page_array_init. Add page to vm_page_array + for physaddress-to-vm_page_t resolution. Do not release page in + any particular order. + (vm_page_create): Add page to vm_page_array. + (vm_page_grab_flags, vm_page_grab_contiguous_pages_queue) + (vm_page_grab_contiguous_flags): New functions. + (vm_page_grab, vm_page_grab_contiguous_pages): Function rewritten. + (vm_page_release): Select proper free list based on page type (DMA + or non-DMA). Use Mach's queue handling funcitons not .next and + .prev pointers manually. Set vm_page_free_bitmap when freeing a + page. + + * i386/i386at/model_dep.c (use_all_mem): Always set to 1. + (init_alloc_aligned): Do not use any particular oder during + bootstrap allocation. + (alloc_dma_mem): Function removed. + + * kern/startup.c: Include <vm/pmap.h>. + (setup_main): Calculate memory size using phys_last_addr and + phys_first_addr. + + * i386/intel/pmap.c (vm_page_normal_first, vm_page_normal_last) + (vm_page_dma_first, vm_page_dma_last): New variables. + (pmap_bootstrap): Set proper values to vm_page_normal_* and + vm_page_dma_* based on system's memory. + (pmap_is_dma, pmap_is_normal): New functions. + + * kern/startup.c: Removed /* XXX */ comments, since phys_last_addr + and phys_first_addr are exported by pmap.c now. + + * linux/dev/glue/kmem.c: Include <mach/error.h> + (MEM_CHUNK_SIZE, MEM_CHUNKS, NBPW): Macro removed. + (pages_free, memlist, num_page_collect, linux_mem_avail): + Variables removed. + (memlist_dma, memlist_nml): New variables. + (linux_kmem_init): No initialization needed anymore (all code + removed). + (coalesce_blocks): Coalesce separately DMA memory blocks and + non-DMA memory blocks. + (linux_kmalloc): Check for DMA in priority argument. Select proper + memlist to get blocks from. Pass dma information to + __get_free_pages. + (linux_kfree): Select proper memlist to free memory into. + (collect_kmalloc_pages): Function removed. + (collect_kmalloc_pages_dma, collect_kmalloc_pages_nml) + (linux_kmem_collect): New functions. + (__get_free_pages, free_pages): Functions rewritten. + + * linux/dev/init/main.c (CONTIG_ALLOC, NBPW): Macro removed. + (CONTIG_ALLOC_ORDER): New macro. + (linux_init): Use __getfreepages to allocate PCI initialization + memory and free_pages to free it. + (alloc_contig_mem, free_contig_mem): Functions removed. + + * vm/vm_pageout.c (vm_pageout_scan) [LINUX_DEV]: Call + linux_kmem_collect. + + +diff -ru gnumach-vanilla/i386/i386at/model_dep.c gnumach-vm_resident/i386/i386at/model_dep.c +--- gnumach-vanilla/i386/i386at/model_dep.c 2004-11-28 18:29:35.000000000 +0100 ++++ gnumach-vm_resident/i386/i386at/model_dep.c 2006-01-26 00:37:31.000000000 +0100 +@@ -86,16 +86,7 @@ + /* Configuration parameter: + if zero, only use physical memory in the low 16MB of addresses. + Only SCSI still has DMA problems. */ +-#ifdef LINUX_DEV +-#define use_all_mem 1 +-#else +-#include "nscsi.h" +-#if NSCSI > 0 +-#define use_all_mem 0 +-#else + #define use_all_mem 1 +-#endif +-#endif + + extern char version[]; + +@@ -468,7 +459,6 @@ + vm_offset_t addr; + extern char start[], end[]; + int i; +- static int wrapped = 0; + + /* Memory regions to skip. */ + vm_offset_t cmdline_start_pa = boot_info.flags & MULTIBOOT_CMDLINE +@@ -488,25 +478,8 @@ + /* Page-align the start address. */ + avail_next = round_page(avail_next); + +- /* Start with memory above 16MB, reserving the low memory for later. */ +- if (use_all_mem && !wrapped && phys_last_addr > 16 * 1024*1024) +- { +- if (avail_next < 16 * 1024*1024) +- avail_next = 16 * 1024*1024; +- else if (avail_next == phys_last_addr) +- { +- /* We have used all the memory above 16MB, so now start on +- the low memory. This will wind up at the end of the list +- of free pages, so it should not have been allocated to any +- other use in early initialization before the Linux driver +- glue initialization needs to allocate low memory. */ +- avail_next = 0x1000; +- wrapped = 1; +- } +- } +- + /* Check if we have reached the end of memory. */ +- if (avail_next == (wrapped ? 16 * 1024*1024 : phys_last_addr)) ++ if (avail_next == phys_last_addr) + return FALSE; + + /* Tentatively assign the current location to the caller. */ +@@ -599,107 +572,3 @@ + !(((boot_info.mem_lower * 1024) <= x) && (x < 1024*1024))); + } + +-#ifndef NBBY +-#define NBBY 8 +-#endif +-#ifndef NBPW +-#define NBPW (NBBY * sizeof(int)) +-#endif +-#define DMA_MAX (16*1024*1024) +- +-/* +- * Allocate contiguous pages below 16 MB +- * starting at specified boundary for DMA. +- */ +-vm_offset_t +-alloc_dma_mem(size, align) +- vm_size_t size; +- vm_offset_t align; +-{ +- int *bits, i, j, k, n; +- int npages, count, bit, mask; +- int first_page, last_page; +- vm_offset_t addr; +- vm_page_t p, prevp; +- +- npages = round_page(size) / PAGE_SIZE; +- mask = align ? (align - 1) / PAGE_SIZE : 0; +- +- /* +- * Allocate bit array. +- */ +- n = ((DMA_MAX / PAGE_SIZE) + NBPW - 1) / NBPW; +- i = n * NBPW; +- bits = (unsigned *)kalloc(i); +- if (bits == 0) { +- printf("alloc_dma_mem: unable alloc bit array\n"); +- return (0); +- } +- bzero((char *)bits, i); +- +- /* +- * Walk the page free list and set a bit for +- * every usable page in bit array. +- */ +- simple_lock(&vm_page_queue_free_lock); +- for (p = vm_page_queue_free; p; p = (vm_page_t)p->pageq.next) { +- if (p->phys_addr < DMA_MAX) { +- i = p->phys_addr / PAGE_SIZE; +- bits[i / NBPW] |= 1 << (i % NBPW); +- } +- } +- +- /* +- * Search for contiguous pages by scanning bit array. +- */ +- for (i = 0, first_page = -1; i < n; i++) { +- for (bit = 1, j = 0; j < NBPW; j++, bit <<= 1) { +- if (bits[i] & bit) { +- if (first_page < 0) { +- k = i * NBPW + j; +- if (!mask +- || (((k & mask) + npages) +- <= mask + 1)) { +- first_page = k; +- if (npages == 1) +- goto found; +- count = 1; +- } +- } else if (++count == npages) +- goto found; +- } else +- first_page = -1; +- } +- } +- addr = 0; +- goto out; +- +- found: +- /* +- * Remove pages from the free list. +- */ +- addr = first_page * PAGE_SIZE; +- last_page = first_page + npages; +- vm_page_free_count -= npages; +- p = vm_page_queue_free; +- prevp = 0; +- while (1) { +- i = p->phys_addr / PAGE_SIZE; +- if (i >= first_page && i < last_page) { +- if (prevp) +- prevp->pageq.next = p->pageq.next; +- else +- vm_page_queue_free = (vm_page_t)p->pageq.next; +- p->free = FALSE; +- if (--npages == 0) +- break; +- } else +- prevp = p; +- p = (vm_page_t)p->pageq.next; +- } +- +- out: +- simple_unlock(&vm_page_queue_free_lock); +- kfree((vm_offset_t)bits, n * NBPW); +- return (addr); +-} +diff -ru gnumach-vanilla/i386/intel/pmap.c gnumach-vm_resident/i386/intel/pmap.c +--- gnumach-vanilla/i386/intel/pmap.c 2001-04-05 08:39:21.000000000 +0200 ++++ gnumach-vm_resident/i386/intel/pmap.c 2006-01-26 00:37:31.000000000 +0100 +@@ -584,6 +584,11 @@ + return(virt); + } + ++unsigned long vm_page_normal_first = 16*1024*1024; ++unsigned long vm_page_normal_last = 0; ++unsigned long vm_page_dma_first = 0; ++unsigned long vm_page_dma_last = 16*1024*1024 - 1; ++ + /* + * Bootstrap the system enough to run with virtual memory. + * Allocate the kernel page directory and page tables, +@@ -703,6 +708,25 @@ + va += INTEL_PGBYTES; + } + } ++ ++ if (phys_last_addr <= 16*1024*1024) { ++ /* Set so to never get TRUE from isnormal(). */ ++ vm_page_normal_first = phys_last_addr + 1; ++ vm_page_normal_last = 0; ++ ++ /* Only DMA memory. */ ++ vm_page_dma_first = 0; ++ vm_page_dma_last = phys_last_addr; ++ } else { ++ vm_page_normal_first = 16*1024*1024; ++ vm_page_normal_last = phys_last_addr; ++ ++ vm_page_dma_first = 0; ++ vm_page_dma_last = 16*1024*1024 - 1; ++ } ++ ++ ++ + } + + #if i860 +@@ -2346,6 +2370,27 @@ + return (phys_attribute_test(phys, PHYS_REFERENCED)); + } + ++/* ++ * pmap_is_dma ++ * ++ * Return TRUE if PHYS is in the DMA zone range. ++ */ ++boolean_t pmap_is_dma (vm_offset_t phys) ++{ ++ return (phys < 16*1024*1024); ++} ++ ++/* ++ * pmap_is_normal: ++ * ++ * Return TRUE if PHYS is in the normal zone range. ++ */ ++boolean_t pmap_is_normal (vm_offset_t phys) ++{ ++ return (phys >= 16*1024*1024); ++} ++ ++ + #if NCPUS > 1 + /* + * TLB Coherence Code (TLB "shootdown" code) +diff -ru gnumach-vanilla/kern/startup.c gnumach-vm_resident/kern/startup.c +--- gnumach-vanilla/kern/startup.c 2001-04-05 08:39:20.000000000 +0200 ++++ gnumach-vm_resident/kern/startup.c 2006-01-26 00:37:31.000000000 +0100 +@@ -80,9 +80,6 @@ + extern void action_thread(); + #endif /* NCPUS > 1 */ + +-/* XX */ +-extern vm_offset_t phys_first_addr, phys_last_addr; +- + /* + * Running in virtual memory, on the interrupt stack. + * Does not return. Dispatches initial thread. +@@ -122,7 +119,7 @@ + machine_init(); + + machine_info.max_cpus = NCPUS; +- machine_info.memory_size = phys_last_addr - phys_first_addr; /* XXX mem_size */ ++ machine_info.memory_size = phys_last_addr - phys_first_addr; + machine_info.avail_cpus = 0; + machine_info.major_version = KERNEL_MAJOR_VERSION; + machine_info.minor_version = KERNEL_MINOR_VERSION; +diff -ru gnumach-vanilla/linux/dev/glue/kmem.c gnumach-vm_resident/linux/dev/glue/kmem.c +--- gnumach-vanilla/linux/dev/glue/kmem.c 1999-04-26 07:45:35.000000000 +0200 ++++ gnumach-vm_resident/linux/dev/glue/kmem.c 2006-01-26 04:10:52.000000000 +0100 +@@ -25,6 +25,7 @@ + #include <sys/types.h> + + #include <mach/mach_types.h> ++#include <mach/error.h> + #include <mach/vm_param.h> + + #include <kern/assert.h> +@@ -40,22 +41,11 @@ + + #include <asm/system.h> + +-extern void *alloc_contig_mem (unsigned, unsigned, unsigned, vm_page_t *); + extern int printf (const char *, ...); + +-/* Amount of memory to reserve for Linux memory allocator. +- We reserve 64K chunks to stay within DMA limits. +- Increase MEM_CHUNKS if the kernel is running out of memory. */ +-#define MEM_CHUNK_SIZE (64 * 1024) +-#define MEM_CHUNKS 7 +- + /* Mininum amount that linux_kmalloc will allocate. */ + #define MIN_ALLOC 12 + +-#ifndef NBPW +-#define NBPW 32 +-#endif +- + /* Memory block header. */ + struct blkhdr + { +@@ -70,62 +60,17 @@ + struct pagehdr *next; /* next header in list */ + }; + +-/* This structure describes a memory chunk. */ +-struct chunkhdr +-{ +- unsigned long start; /* start address */ +- unsigned long end; /* end address */ +- unsigned long bitmap; /* busy/free bitmap of pages */ +-}; +- +-/* Chunks from which pages are allocated. */ +-static struct chunkhdr pages_free[MEM_CHUNKS]; +- + /* Memory list maintained by linux_kmalloc. */ +-static struct pagehdr *memlist; ++static struct pagehdr *memlist_dma = NULL; ++static struct pagehdr *memlist_nml = NULL; + + /* Some statistics. */ + int num_block_coalesce = 0; +-int num_page_collect = 0; +-int linux_mem_avail; + + /* Initialize the Linux memory allocator. */ + void + linux_kmem_init () + { +- int i, j; +- vm_page_t p, pages; +- +- for (i = 0; i < MEM_CHUNKS; i++) +- { +- /* Allocate memory. */ +- pages_free[i].start = (unsigned long) alloc_contig_mem (MEM_CHUNK_SIZE, +- 16 * 1024 * 1024, +- 0xffff, &pages); +- +- assert (pages_free[i].start); +- assert ((pages_free[i].start & 0xffff) == 0); +- +- /* Sanity check: ensure pages are contiguous and within DMA limits. */ +- for (p = pages, j = 0; j < MEM_CHUNK_SIZE - PAGE_SIZE; j += PAGE_SIZE) +- { +- assert (p->phys_addr < 16 * 1024 * 1024); +- assert (p->phys_addr + PAGE_SIZE +- == ((vm_page_t) p->pageq.next)->phys_addr); +- +- p = (vm_page_t) p->pageq.next; +- } +- +- pages_free[i].end = pages_free[i].start + MEM_CHUNK_SIZE; +- +- /* Initialize free page bitmap. */ +- pages_free[i].bitmap = 0; +- j = MEM_CHUNK_SIZE >> PAGE_SHIFT; +- while (--j >= 0) +- pages_free[i].bitmap |= 1 << j; +- } +- +- linux_mem_avail = (MEM_CHUNKS * MEM_CHUNK_SIZE) >> PAGE_SHIFT; + } + + /* Return the number by which the page size should be +@@ -178,7 +123,40 @@ + + num_block_coalesce++; + +- for (ph = memlist; ph; ph = ph->next) ++ /* Coalesce DMA memory. */ ++ for (ph = memlist_dma; ph; ph = ph->next) ++ { ++ bh = (struct blkhdr *) (ph + 1); ++ ebh = (struct blkhdr *) ((void *) ph + ph->size); ++ while (1) ++ { ++ /* Skip busy blocks. */ ++ while (bh < ebh && !bh->free) ++ bh = (struct blkhdr *) ((void *) (bh + 1) + bh->size); ++ if (bh == ebh) ++ break; ++ ++ /* Merge adjacent free blocks. */ ++ while (1) ++ { ++ bhp = (struct blkhdr *) ((void *) (bh + 1) + bh->size); ++ if (bhp == ebh) ++ { ++ bh = bhp; ++ break; ++ } ++ if (!bhp->free) ++ { ++ bh = (struct blkhdr *) ((void *) (bhp + 1) + bhp->size); ++ break; ++ } ++ bh->size += bhp->size + sizeof (struct blkhdr); ++ } ++ } ++ } ++ ++ /* Coalesce non-DMA memory. */ ++ for (ph = memlist_nml; ph; ph = ph->next) + { + bh = (struct blkhdr *) (ph + 1); + ebh = (struct blkhdr *) ((void *) ph + ph->size); +@@ -216,20 +194,26 @@ + void * + linux_kmalloc (unsigned int size, int priority) + { +- int order, coalesced = 0; ++ int order, coalesced = 0, dma = 0; + unsigned flags; +- struct pagehdr *ph; ++ struct pagehdr *ph, **memlistp; + struct blkhdr *bh, *new_bh; + ++ if (priority & GFP_DMA) ++ { ++ memlistp = &memlist_dma; ++ dma = 1; ++ } ++ else ++ { ++ memlistp = &memlist_nml; ++ dma = 0; ++ } + if (size < MIN_ALLOC) + size = MIN_ALLOC; + else + size = (size + sizeof (int) - 1) & ~(sizeof (int) - 1); + +- assert (size <= (MEM_CHUNK_SIZE +- - sizeof (struct pagehdr) +- - sizeof (struct blkhdr))); +- + save_flags (flags); + cli (); + +@@ -238,7 +222,7 @@ + + /* Walk the page list and find the first free block with size + greater than or equal to the one required. */ +- for (ph = memlist; ph; ph = ph->next) ++ for (ph = *memlistp; ph; ph = ph->next) + { + bh = (struct blkhdr *) (ph + 1); + while (bh < (struct blkhdr *) ((void *) ph + ph->size)) +@@ -278,16 +262,26 @@ + order = get_page_order (size + + sizeof (struct pagehdr) + + sizeof (struct blkhdr)); +- ph = (struct pagehdr *) __get_free_pages (GFP_KERNEL, order, ~0UL); ++ ph = (struct pagehdr *) __get_free_pages (GFP_KERNEL, order, dma); + if (!ph) + { + restore_flags (flags); + return NULL; + } + ++ /* __get_free_pages may return DMA memory if non-DMA memory is not ++ free so we check back here for mem type. */ ++ if (pmap_is_dma ((unsigned long)ph)) ++ { ++ memlistp = &memlist_dma; ++ } ++ else ++ { ++ memlistp = &memlist_nml; ++ } + ph->size = PAGE_SIZE << order; +- ph->next = memlist; +- memlist = ph; ++ ph->next = *memlistp; ++ *memlistp = ph; + bh = (struct blkhdr *) (ph + 1); + bh->free = 0; + bh->size = ph->size - sizeof (struct pagehdr) - sizeof (struct blkhdr); +@@ -310,17 +304,28 @@ + linux_kfree (void *p) + { + unsigned flags; ++ vm_offset_t addr; + struct blkhdr *bh; +- struct pagehdr *ph; ++ struct pagehdr *ph, **memlistp; + + assert (((int) p & (sizeof (int) - 1)) == 0); + ++ addr = (vm_offset_t) p; ++ ++ if (pmap_is_dma (addr)) ++ { ++ memlistp = &memlist_dma; ++ } ++ else ++ { ++ memlistp = &memlist_nml; ++ } + save_flags (flags); + cli (); + + check_page_list (__LINE__); + +- for (ph = memlist; ph; ph = ph->next) ++ for (ph = *memlistp; ph; ph = ph->next) + if (p >= (void *) ph && p < (void *) ph + ph->size) + break; + +@@ -339,10 +344,10 @@ + restore_flags (flags); + } + +-/* Free any pages that are not in use. ++/* Free any DMA page that are not in use. + Called by __get_free_pages when pages are running low. */ + static void +-collect_kmalloc_pages () ++collect_kmalloc_pages_dma () + { + struct blkhdr *bh; + struct pagehdr *ph, **prev_ph; +@@ -353,8 +358,8 @@ + + check_page_list (__LINE__); + +- ph = memlist; +- prev_ph = &memlist; ++ ph = memlist_dma; ++ prev_ph = &memlist_dma; + while (ph) + { + bh = (struct blkhdr *) (ph + 1); +@@ -373,68 +378,91 @@ + + check_page_list (__LINE__); + } +- +-/* Allocate ORDER + 1 number of physically contiguous pages. +- PRIORITY and DMA are not used in Mach. +- +- XXX: This needs to be dynamic. To do that we need to make +- the Mach page manipulation routines interrupt safe and they +- must provide machine dependant hooks. */ +-unsigned long +-__get_free_pages (int priority, unsigned long order, int dma) ++/* Free any non-DMA page that are not in use. ++ Called by __get_free_pages when pages are running low. */ ++static void ++collect_kmalloc_pages_nml () + { +- int i, pages_collected = 0; +- unsigned flags, bits, off, j, len; ++ struct blkhdr *bh; ++ struct pagehdr *ph, **prev_ph; + +- assert ((PAGE_SIZE << order) <= MEM_CHUNK_SIZE); ++ check_page_list (__LINE__); + +- /* Construct bitmap of contiguous pages. */ +- bits = 0; +- j = 0; +- len = 0; +- while (len < (PAGE_SIZE << order)) +- { +- bits |= 1 << j++; +- len += PAGE_SIZE; +- } ++ coalesce_blocks (); + +- save_flags (flags); +- cli (); +-again: ++ check_page_list (__LINE__); + +- /* Search each chunk for the required number of contiguous pages. */ +- for (i = 0; i < MEM_CHUNKS; i++) ++ ph = memlist_nml; ++ prev_ph = &memlist_nml; ++ while (ph) + { +- off = 0; +- j = bits; +- while (MEM_CHUNK_SIZE - off >= (PAGE_SIZE << order)) ++ bh = (struct blkhdr *) (ph + 1); ++ if (bh->free && (void *) (bh + 1) + bh->size == (void *) ph + ph->size) + { +- if ((pages_free[i].bitmap & j) == j) +- { +- pages_free[i].bitmap &= ~j; +- linux_mem_avail -= order + 1; +- restore_flags (flags); +- return pages_free[i].start + off; +- } +- j <<= 1; +- off += PAGE_SIZE; ++ *prev_ph = ph->next; ++ free_pages ((unsigned long) ph, get_page_order (ph->size)); ++ ph = *prev_ph; ++ } ++ else ++ { ++ prev_ph = &ph->next; ++ ph = ph->next; + } + } + +- /* Allocation failed; collect kmalloc and buffer pages +- and try again. */ +- if (!pages_collected) +- { +- num_page_collect++; +- collect_kmalloc_pages (); +- pages_collected = 1; +- goto again; +- } ++ check_page_list (__LINE__); ++} + +- printf ("%s:%d: __get_free_pages: ran out of pages\n", __FILE__, __LINE__); ++/* Allocate ORDER + 1 number of physically contiguous pages. ++ PRIORITY and DMA are not used in Mach. */ ++unsigned long ++__get_free_pages (int priority, unsigned long order, int dma) ++{ ++ unsigned long pagenum; ++ unsigned vm_page_flags = 0; ++ unsigned long p; ++ ++ if (dma) ++ vm_page_flags |= VM_PAGE_DMA; ++ ++ pagenum = (1 << order); ++ ++ p = 0; ++ ++ if (pagenum > 1) ++ { ++ /* ++ * Contiguous grabbing is slow and may fail. ++ * We reserve it for special occasions. ++ */ ++ mach_error_t err; ++ vm_offset_t vmo; ++ ++ err = vm_page_grab_contiguous_pages_flags (pagenum, &vmo, FALSE, ++ vm_page_flags, 0); ++ p = (unsigned long) vmo; ++ ++ if (err) ++ return 0; ++ } ++ else ++ { ++ vm_page_t m; ++ ++ m = vm_page_grab_flags (FALSE, vm_page_flags); ++ if (!m) ++ return 0; ++ ++ p = m->phys_addr; ++ ++ if (m->tabled) ++ { ++ printf ("Error while getting page of order %ld\n", order); ++ return 0; ++ } ++ }; + +- restore_flags (flags); +- return 0; ++ return p; + } + + /* Free ORDER + 1 number of physically +@@ -442,36 +470,20 @@ + void + free_pages (unsigned long addr, unsigned long order) + { +- int i; +- unsigned flags, bits, len, j; +- +- assert ((addr & PAGE_MASK) == 0); ++ unsigned long i, pagenum; + +- for (i = 0; i < MEM_CHUNKS; i++) +- if (addr >= pages_free[i].start && addr < pages_free[i].end) +- break; +- +- assert (i < MEM_CHUNKS); ++ pagenum = 1 << order; + +- /* Contruct bitmap of contiguous pages. */ +- len = 0; +- j = 0; +- bits = 0; +- while (len < (PAGE_SIZE << order)) ++ for (i = 0; i < pagenum; i++) + { +- bits |= 1 << j++; +- len += PAGE_SIZE; +- } +- bits <<= (addr - pages_free[i].start) >> PAGE_SHIFT; ++ vm_page_t m; + +- save_flags (flags); +- cli (); +- +- assert ((pages_free[i].bitmap & bits) == 0); ++ m = vm_page_physaddr_lookup (addr + (i * PAGE_SIZE)); ++ if (m == VM_PAGE_NULL) ++ panic ("couldn't lookup page for address %lx", addr + (i * PAGE_SIZE)); + +- pages_free[i].bitmap |= bits; +- linux_mem_avail += order + 1; +- restore_flags (flags); ++ vm_page_free (m); ++ } + } + + +@@ -579,3 +591,9 @@ + vmalloc_list_insert (addr, round_page (size)); + return (void *) addr; + } ++ ++void linux_kmem_collect (void) ++{ ++ collect_kmalloc_pages_nml (); ++ collect_kmalloc_pages_dma (); ++} +diff -ru gnumach-vanilla/linux/dev/init/main.c gnumach-vm_resident/linux/dev/init/main.c +--- gnumach-vanilla/linux/dev/init/main.c 1999-04-26 07:49:06.000000000 +0200 ++++ gnumach-vm_resident/linux/dev/init/main.c 2006-01-26 00:37:31.000000000 +0100 +@@ -82,9 +82,7 @@ + static void calibrate_delay (void); + + extern int hz; +-extern vm_offset_t phys_last_addr; + +-extern void *alloc_contig_mem (unsigned, unsigned, unsigned, vm_page_t *); + extern void free_contig_mem (vm_page_t); + extern void init_IRQ (void); + extern void restore_IRQ (void); +@@ -105,10 +103,8 @@ + extern void linux_sched_init (void); + + +-/* +- * Amount of contiguous memory to allocate for initialization. +- */ +-#define CONTIG_ALLOC (512 * 1024) ++/* Amount of contiguous memory to allocate for initialization. */ ++#define CONTIG_ALLOC_ORDER (7) /* 512kb. */ + + /* + * Initialize Linux drivers. +@@ -117,7 +113,7 @@ + linux_init (void) + { + int addr; +- unsigned memory_start, memory_end; ++ unsigned long memory_start, memory_end; + vm_page_t pages; + + /* +@@ -142,40 +138,34 @@ + memcpy ((char *) &drive_info + 16, + (void *) ((addr & 0xffff) + ((addr >> 12) & 0xffff0)), 16); + +- /* +- * Initialize Linux memory allocator. +- */ ++ /* Initialize Linux memory allocator. */ + linux_kmem_init (); + +- /* +- * Allocate contiguous memory below 16 MB. +- */ +- memory_start = (unsigned long) alloc_contig_mem (CONTIG_ALLOC, +- 16 * 1024 * 1024, +- 0, &pages); +- if (memory_start == 0) +- panic ("linux_init: alloc_contig_mem failed"); +- memory_end = memory_start + CONTIG_ALLOC; ++ /* Allocate contiguous memory below 16 MB. */ ++ memory_start = __get_free_pages (GFP_ATOMIC, CONTIG_ALLOC_ORDER, 1); ++ if (!memory_start) ++ panic ("linux_init: alloc PCI memory failed"); ++ memory_end = memory_start + ((1 << CONTIG_ALLOC_ORDER) * PAGE_SIZE); + +- /* +- * Initialize PCI bus. +- */ ++ /* Initialize PCI bus. */ + memory_start = pci_init (memory_start, memory_end); + + if (memory_start > memory_end) + panic ("linux_init: ran out memory"); + +- /* +- * Free unused memory. +- */ +- while (pages && pages->phys_addr < round_page (memory_start)) +- pages = (vm_page_t) pages->pageq.next; +- if (pages) +- free_contig_mem (pages); ++ /* Free unused memory. */ ++ { ++ unsigned long memaddr; ++ ++ for (memaddr = round_page (memory_start); ++ memaddr < memory_end; ++ memaddr += PAGE_SIZE) ++ { ++ free_pages (memaddr, 0); ++ } ++ } + +- /* +- * Initialize devices. +- */ ++ /* Initialize devices. */ + #ifdef CONFIG_INET + linux_net_emulation_init (); + #endif +@@ -186,148 +176,6 @@ + linux_auto_config = 0; + } + +-#ifndef NBPW +-#define NBPW 32 +-#endif +- +-/* +- * Allocate contiguous memory with the given constraints. +- * This routine is horribly inefficient but it is presently +- * only used during initialization so it's not that bad. +- */ +-void * +-alloc_contig_mem (unsigned size, unsigned limit, +- unsigned mask, vm_page_t * pages) +-{ +- int i, j, bits_len; +- unsigned *bits, len; +- void *m; +- vm_page_t p, page_list, tail, prev; +- vm_offset_t addr, max_addr; +- +- if (size == 0) +- return (NULL); +- size = round_page (size); +- if ((size >> PAGE_SHIFT) > vm_page_free_count) +- return (NULL); +- +- /* Allocate bit array. */ +- max_addr = phys_last_addr; +- if (max_addr > limit) +- max_addr = limit; +- bits_len = ((((max_addr >> PAGE_SHIFT) + NBPW - 1) / NBPW) +- * sizeof (unsigned)); +- bits = (unsigned *) kalloc (bits_len); +- if (!bits) +- return (NULL); +- memset (bits, 0, bits_len); +- +- /* +- * Walk the page free list and set a bit for every usable page. +- */ +- simple_lock (&vm_page_queue_free_lock); +- p = vm_page_queue_free; +- while (p) +- { +- if (p->phys_addr < limit) +- (bits[(p->phys_addr >> PAGE_SHIFT) / NBPW] +- |= 1 << ((p->phys_addr >> PAGE_SHIFT) % NBPW)); +- p = (vm_page_t) p->pageq.next; +- } +- +- /* +- * Scan bit array for contiguous pages. +- */ +- len = 0; +- m = NULL; +- for (i = 0; len < size && i < bits_len / sizeof (unsigned); i++) +- for (j = 0; len < size && j < NBPW; j++) +- if (!(bits[i] & (1 << j))) +- { +- len = 0; +- m = NULL; +- } +- else +- { +- if (len == 0) +- { +- addr = ((vm_offset_t) (i * NBPW + j) +- << PAGE_SHIFT); +- if ((addr & mask) == 0) +- { +- len += PAGE_SIZE; +- m = (void *) addr; +- } +- } +- else +- len += PAGE_SIZE; +- } +- +- if (len != size) +- { +- simple_unlock (&vm_page_queue_free_lock); +- kfree ((vm_offset_t) bits, bits_len); +- return (NULL); +- } +- +- /* +- * Remove pages from free list +- * and construct list to return to caller. +- */ +- page_list = NULL; +- for (len = 0; len < size; len += PAGE_SIZE, addr += PAGE_SIZE) +- { +- prev = NULL; +- for (p = vm_page_queue_free; p; p = (vm_page_t) p->pageq.next) +- { +- if (p->phys_addr == addr) +- break; +- prev = p; +- } +- if (!p) +- panic ("alloc_contig_mem: page not on free list"); +- if (prev) +- prev->pageq.next = p->pageq.next; +- else +- vm_page_queue_free = (vm_page_t) p->pageq.next; +- p->free = FALSE; +- p->pageq.next = NULL; +- if (!page_list) +- page_list = tail = p; +- else +- { +- tail->pageq.next = (queue_entry_t) p; +- tail = p; +- } +- vm_page_free_count--; +- } +- +- simple_unlock (&vm_page_queue_free_lock); +- kfree ((vm_offset_t) bits, bits_len); +- if (pages) +- *pages = page_list; +- return (m); +-} +- +-/* +- * Free memory allocated by alloc_contig_mem. +- */ +-void +-free_contig_mem (vm_page_t pages) +-{ +- int i; +- vm_page_t p; +- +- for (p = pages, i = 0; p->pageq.next; p = (vm_page_t) p->pageq.next, i++) +- p->free = TRUE; +- p->free = TRUE; +- simple_lock (&vm_page_queue_free_lock); +- vm_page_free_count += i + 1; +- p->pageq.next = (queue_entry_t) vm_page_queue_free; +- vm_page_queue_free = pages; +- simple_unlock (&vm_page_queue_free_lock); +-} +- + /* This is the number of bits of precision for the loops_per_second. Each + * bit takes on average 1.5/HZ seconds. This (like the original) is a little + * better than 1% +diff -ru gnumach-vanilla/vm/pmap.h gnumach-vm_resident/vm/pmap.h +--- gnumach-vanilla/vm/pmap.h 2001-04-05 08:39:21.000000000 +0200 ++++ gnumach-vm_resident/vm/pmap.h 2006-01-26 00:37:31.000000000 +0100 +@@ -174,6 +174,15 @@ + /* Return modify bit */ + boolean_t pmap_is_modified(vm_offset_t pa); + ++/* ++ * Page Zones routines ++ */ ++ ++/* Physical address is in DMA capable zone. */ ++boolean_t pmap_is_dma (vm_offset_t pa); ++ ++/* Physical address is in non-DMA capable zone. */ ++boolean_t pmap_is_normal (vm_offset_t pa); + + /* + * Statistics routines +diff -ru gnumach-vanilla/vm/vm_page.h gnumach-vm_resident/vm/vm_page.h +--- gnumach-vanilla/vm/vm_page.h 1999-06-28 02:41:02.000000000 +0200 ++++ gnumach-vm_resident/vm/vm_page.h 2006-01-26 00:37:31.000000000 +0100 +@@ -152,22 +152,26 @@ + * ordered, in LRU-like fashion. + */ + ++#define VM_PAGE_DMA 0x1 ++ ++extern ++queue_head_t vm_page_queue_free_normal; /* normal memory free queue */ + extern +-vm_page_t vm_page_queue_free; /* memory free queue */ ++queue_head_t vm_page_queue_free_dma; /* DMA-capable memory free queue */ + extern +-vm_page_t vm_page_queue_fictitious; /* fictitious free queue */ ++vm_page_t vm_page_queue_fictitious; /* fictitious free queue */ + extern +-queue_head_t vm_page_queue_active; /* active memory queue */ ++queue_head_t vm_page_queue_active; /* active memory queue */ + extern + queue_head_t vm_page_queue_inactive; /* inactive memory queue */ + + extern +-vm_offset_t first_phys_addr; /* physical address for first_page */ ++vm_offset_t phys_first_addr;/* physical address for first_page */ + extern +-vm_offset_t last_phys_addr; /* physical address for last_page */ ++vm_offset_t phys_last_addr; /* physical address for last_page */ + + extern +-int vm_page_free_count; /* How many pages are free? */ ++int vm_page_free_count; /* How many pages are free? */ + extern + int vm_page_fictitious_count;/* How many fictitious pages are free? */ + extern +@@ -220,11 +224,20 @@ + extern vm_page_t vm_page_lookup( + vm_object_t object, + vm_offset_t offset); ++extern vm_page_t vm_page_physaddr_lookup (vm_offset_t); + extern vm_page_t vm_page_grab_fictitious(void); + extern void vm_page_release_fictitious(vm_page_t); + extern boolean_t vm_page_convert(vm_page_t, boolean_t); + extern void vm_page_more_fictitious(void); + extern vm_page_t vm_page_grab(boolean_t); ++extern vm_page_t vm_page_grab_flags(boolean_t, unsigned); ++extern kern_return_t vm_page_grab_contiguous_pages_flags( ++ int npages, ++ vm_offset_t *phys_address, ++ boolean_t external, ++ unsigned flags, ++ unsigned long align); ++ + extern void vm_page_release(vm_page_t, boolean_t); + extern void vm_page_wait(void (*)(void)); + extern vm_page_t vm_page_alloc( +diff -ru gnumach-vanilla/vm/vm_pageout.c gnumach-vm_resident/vm/vm_pageout.c +--- gnumach-vanilla/vm/vm_pageout.c 2001-04-05 08:39:21.000000000 +0200 ++++ gnumach-vm_resident/vm/vm_pageout.c 2006-01-26 00:48:08.000000000 +0100 +@@ -559,6 +559,9 @@ + */ + + Restart: ++#ifdef LINUX_DEV ++ linux_kmem_collect(); ++#endif + stack_collect(); + net_kmsg_collect(); + consider_task_collect(); +diff -ru gnumach-vanilla/vm/vm_resident.c gnumach-vm_resident/vm/vm_resident.c +--- gnumach-vanilla/vm/vm_resident.c 1999-09-04 15:03:32.000000000 +0200 ++++ gnumach-vm_resident/vm/vm_resident.c 2006-01-26 00:37:31.000000000 +0100 +@@ -56,6 +56,9 @@ + #include <vm/vm_user.h> + #endif + ++extern unsigned long vm_page_normal_first, vm_page_normal_last; ++extern unsigned long vm_page_dma_first, vm_page_dma_last; ++ + /* in zalloc.c XXX */ + extern vm_offset_t zdata; + extern vm_size_t zdata_size; +@@ -105,14 +108,19 @@ + * Resident pages that represent real memory + * are allocated from a free list. + */ +-vm_page_t vm_page_queue_free; ++queue_head_t vm_page_queue_free_normal; ++queue_head_t vm_page_queue_free_dma; + vm_page_t vm_page_queue_fictitious; + decl_simple_lock_data(,vm_page_queue_free_lock) ++ + unsigned int vm_page_free_wanted; + int vm_page_free_count; + int vm_page_fictitious_count; + int vm_page_external_count; + ++natural_t *vm_page_free_bitmap; ++unsigned long vm_page_free_bitmap_bitsz; ++ + unsigned int vm_page_free_count_minimum; /* debugging */ + + /* +@@ -174,6 +182,102 @@ + boolean_t vm_page_deactivate_hint = TRUE; + + /* ++ * vm_page_free_bitmap_set and vm_page_free_bitmap_unset: ++ * FIXME: Free pages bitmap is SLOW! Make a decent multizone O(1) ++ * page allocator. ++ * ++ * Used to mark a page as free. ++ */ ++ ++#ifndef NBBY ++#define NBBY 8 /* size in bits of sizeof()`s unity */ ++#endif ++#define NBPEL (sizeof(natural_t)*NBBY) ++ ++void vm_page_free_bitmap_set(natural_t pageno) ++{ ++ register int word_index, bit_index; ++ ++ word_index = pageno / NBPEL; ++ bit_index = pageno - (word_index * NBPEL); ++ ++ vm_page_free_bitmap[word_index] |= 1 << bit_index; ++} ++void vm_page_free_bitmap_unset(natural_t pageno) ++{ ++ register int word_index, bit_index; ++ ++ word_index = pageno / NBPEL; ++ bit_index = pageno - (word_index * NBPEL); ++ ++ vm_page_free_bitmap[word_index] &= ~(1 << bit_index); ++} ++ ++/* ++ * vm_page_free_bitmap_alloc: ++ * ++ * Alloc space for bitmap at initializiation time. ++ * FIXME: Free pages bitmap is SLOW! Make a decent multizone O(1) ++ * page allocator. ++ */ ++ ++void vm_page_free_bitmap_alloc(void) ++{ ++ register unsigned long size, bitsz; ++ unsigned long vm_page_big_pagenum; ++ int i; ++ ++ vm_page_big_pagenum = atop (phys_last_addr); ++ ++ bitsz = (vm_page_big_pagenum + NBPEL - 1) ++ & ~(NBPEL - 1); /* in bits */ ++ ++ size = bitsz / NBBY; /* in bytes */ ++ ++ vm_page_free_bitmap = (natural_t *) pmap_steal_memory (size); ++ bzero(vm_page_free_bitmap, size); ++ ++ vm_page_free_bitmap_bitsz = bitsz; ++} ++ ++ ++/* Fast phys_addr to vm_page_t lookup. */ ++ ++static vm_page_t *vm_page_array; ++static unsigned vm_page_array_size; ++ ++static void ++vm_page_array_init () ++{ ++ int i; ++ ++ vm_page_array_size = (phys_last_addr - phys_first_addr) >> PAGE_SHIFT; ++ vm_page_array = (vm_page_t *) pmap_steal_memory (sizeof(vm_page_t) ++ * (vm_page_array_size)); ++ ++ for (i = 0; i < vm_page_array_size; i++) ++ vm_page_array[i] = VM_PAGE_NULL; ++ ++} ++ ++static void ++vm_page_array_add (vm_offset_t phys_addr, vm_page_t pg) ++{ ++ assert (phys_addr < phys_last_addr && phys_addr >= phys_first_addr); ++ ++ vm_page_array [(phys_addr - phys_first_addr) >> PAGE_SHIFT] = pg; ++} ++ ++vm_page_t ++vm_page_physaddr_lookup (vm_offset_t phys_addr) ++{ ++ assert (phys_addr < phys_last_addr && phys_addr >= phys_first_addr); ++ ++ return vm_page_array [(phys_addr - phys_first_addr) >> PAGE_SHIFT]; ++} ++ ++ ++/* + * vm_page_bootstrap: + * + * Initializes the resident memory module. +@@ -229,7 +333,8 @@ + simple_lock_init(&vm_page_queue_free_lock); + simple_lock_init(&vm_page_queue_lock); + +- vm_page_queue_free = VM_PAGE_NULL; ++ queue_init (&vm_page_queue_free_normal); ++ queue_init (&vm_page_queue_free_dma); + vm_page_queue_fictitious = VM_PAGE_NULL; + queue_init(&vm_page_queue_active); + queue_init(&vm_page_queue_inactive); +@@ -279,6 +384,8 @@ + simple_lock_init(&bucket->lock); + } + ++ vm_page_free_bitmap_alloc(); ++ + /* + * Machine-dependent code allocates the resident page table. + * It uses vm_page_init to initialize the page frames. +@@ -294,7 +401,6 @@ + *startp = virtual_space_start; + *endp = virtual_space_end; + +- /* printf("vm_page_bootstrap: %d free pages\n", vm_page_free_count);*/ + vm_page_free_count_minimum = vm_page_free_count; + } + +@@ -380,6 +486,8 @@ + + pages = (vm_page_t) pmap_steal_memory(npages * sizeof *pages); + ++ vm_page_array_init (); ++ + /* + * Initialize the page frames. + */ +@@ -389,21 +497,12 @@ + break; + + vm_page_init(&pages[i], paddr); ++ vm_page_array_add (paddr, &pages[i]); ++ vm_page_release(&pages[i], FALSE); + pages_initialized++; + } + + /* +- * Release pages in reverse order so that physical pages +- * initially get allocated in ascending addresses. This keeps +- * the devices (which must address physical memory) happy if +- * they require several consecutive pages. +- */ +- +- for (i = pages_initialized; i > 0; i--) { +- vm_page_release(&pages[i - 1], FALSE); +- } +- +- /* + * We have to re-align virtual_space_start, + * because pmap_steal_memory has been using it. + */ +@@ -421,7 +520,7 @@ + * Second initialization pass, to be done after + * the basic VM system is ready. + */ +-void vm_page_module_init(void) ++void vm_page_module_init(void) + { + vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page), + VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS, +@@ -453,6 +552,7 @@ + panic("vm_page_create"); + + vm_page_init(m, paddr); ++ vm_page_array_add (paddr, m); + vm_page_release(m, FALSE); + } + } +@@ -840,16 +940,16 @@ + } + + /* +- * vm_page_grab: ++ * vm_page_grab_flags: + * +- * Remove a page from the free list. ++ * Remove a page specifying the memory zone to get the page from. + * Returns VM_PAGE_NULL if the free list is too small. + */ +- +-vm_page_t vm_page_grab( +- boolean_t external) ++vm_page_t vm_page_grab_flags( ++ boolean_t external, ++ unsigned flags) + { +- register vm_page_t mem; ++ register vm_page_t mem = VM_PAGE_NULL; + + simple_lock(&vm_page_queue_free_lock); + +@@ -867,17 +967,70 @@ + return VM_PAGE_NULL; + } + +- if (vm_page_queue_free == VM_PAGE_NULL) ++ /* ++ * If we put no flag, we request any page, so we search in ++ * the normal zone before. ++ */ ++ ++ if (!(flags & VM_PAGE_DMA) ++ && !(queue_empty(&vm_page_queue_free_normal))) ++ { ++ ++ if (--vm_page_free_count < vm_page_free_count_minimum) ++ vm_page_free_count_minimum = vm_page_free_count; ++ ++ if (external) ++ vm_page_external_count++; ++ ++ queue_remove_first (&vm_page_queue_free_normal, ++ mem, vm_page_t, pageq); ++ ++ mem->free = FALSE; ++ mem->extcounted = mem->external = external; ++ mem->pageq.next = 0; ++ mem->pageq.prev = 0; ++ ++ vm_page_free_bitmap_unset (atop (mem->phys_addr)); ++ } ++ ++ if (!mem) ++ { ++ ++ /* ++ * It is not necessarily a bug if we ask for a DMA page and we can't ++ * obtain it, despite of vm_page_free_count, since free pages can be ++ * in the normal zone. ++ */ ++ if (queue_empty(&vm_page_queue_free_dma)) ++ { ++ if (!(flags & VM_PAGE_DMA)) + panic("vm_page_grab"); ++ else ++ { ++ printf ("vm_page_grab: no dma anymore"); ++ simple_unlock(&vm_page_queue_free_lock); ++ return VM_PAGE_NULL; ++ } ++ } ++ ++ ++ if (--vm_page_free_count < vm_page_free_count_minimum) ++ vm_page_free_count_minimum = vm_page_free_count; ++ ++ if (external) ++ vm_page_external_count++; ++ ++ queue_remove_first (&vm_page_queue_free_dma, ++ mem, vm_page_t, pageq); ++ ++ mem->free = FALSE; ++ mem->extcounted = mem->external = external; ++ mem->pageq.next = 0; ++ mem->pageq.prev = 0; ++ ++ vm_page_free_bitmap_unset (atop (mem->phys_addr)); ++ } + +- if (--vm_page_free_count < vm_page_free_count_minimum) +- vm_page_free_count_minimum = vm_page_free_count; +- if (external) +- vm_page_external_count++; +- mem = vm_page_queue_free; +- vm_page_queue_free = (vm_page_t) mem->pageq.next; +- mem->free = FALSE; +- mem->extcounted = mem->external = external; + simple_unlock(&vm_page_queue_free_lock); + + /* +@@ -897,6 +1050,26 @@ + thread_wakeup((event_t) &vm_page_free_wanted); + + return mem; ++ ++} ++ ++ ++/* ++ * vm_page_grab: ++ * ++ * Remove a page from the free list. ++ * Returns VM_PAGE_NULL if the free list is too small. ++ */ ++ ++vm_page_t vm_page_grab( ++ boolean_t external) ++{ ++ register vm_page_t mem; ++ ++ /* Get any free page, no matter what zone. */ ++ mem = vm_page_grab_flags (external, 0); ++ ++ return mem; + } + + vm_offset_t vm_page_grab_phys_addr() +@@ -909,13 +1082,12 @@ + } + + /* +- * vm_page_grab_contiguous_pages: ++ * vm_page_grab_contiguous_pages_queue: + * +- * Take N pages off the free list, the pages should +- * cover a contiguous range of physical addresses. +- * [Used by device drivers to cope with DMA limitations] ++ * Take N pages off the free list FREEQUEUE, the pages ++ * should cover a contiguous range of physical addresses. + * +- * Returns the page descriptors in ascending order, or ++ * Returns the first page descriptor, or + * Returns KERN_RESOURCE_SHORTAGE if it could not. + */ + +@@ -924,44 +1096,32 @@ + vm_size_t vm_page_big_pagenum = 0; /* Set this before call! */ + + kern_return_t +-vm_page_grab_contiguous_pages( +- int npages, +- vm_page_t pages[], +- natural_t *bits, +- boolean_t external) ++vm_page_grab_contiguous_pages_queue( ++ int npages, ++ vm_offset_t *phys_addr, ++ boolean_t external, ++ queue_t freequeue, ++ unsigned long minbitidx, ++ unsigned long maxbitidx, ++ unsigned long align) ++ + { + register int first_set; + int size, alloc_size; + kern_return_t ret; + vm_page_t mem, prevmem; + +-#ifndef NBBY +-#define NBBY 8 /* size in bits of sizeof()`s unity */ +-#endif ++ if (!align) ++ align = 1; + +-#define NBPEL (sizeof(natural_t)*NBBY) ++ if (minbitidx >= vm_page_free_bitmap_bitsz) ++ panic ("minbitidx too high."); + +- size = (vm_page_big_pagenum + NBPEL - 1) +- & ~(NBPEL - 1); /* in bits */ +- +- size = size / NBBY; /* in bytes */ +- +- /* +- * If we are called before the VM system is fully functional +- * the invoker must provide us with the work space. [one bit +- * per page starting at phys 0 and up to vm_page_big_pagenum] +- */ +- if (bits == 0) { +- alloc_size = round_page(size); +- if (kmem_alloc_wired(kernel_map, +- (vm_offset_t *)&bits, +- alloc_size) +- != KERN_SUCCESS) +- return KERN_RESOURCE_SHORTAGE; +- } else +- alloc_size = 0; +- +- bzero(bits, size); ++ if (maxbitidx > vm_page_free_bitmap_bitsz) { ++ printf ("%s: maxbitidx exceeds bitmap size (%x > %x).\n", ++ __FUNCTION__, maxbitidx, vm_page_free_bitmap_bitsz); ++ maxbitidx = vm_page_free_bitmap_bitsz; ++ } + + /* + * A very large granularity call, its rare so that is ok +@@ -972,32 +1132,16 @@ + * Do not dip into the reserved pool. + */ + +- if ((vm_page_free_count < vm_page_free_reserved) +- || (vm_page_external_count >= vm_page_external_limit)) { ++ if (((vm_page_free_count < vm_page_free_reserved) ++ || (external ++ && (vm_page_external_count > vm_page_external_limit))) ++ && !current_thread()->vm_privilege) { + simple_unlock(&vm_page_queue_free_lock); + return KERN_RESOURCE_SHORTAGE; + } + + /* +- * First pass through, build a big bit-array of +- * the pages that are free. It is not going to +- * be too large anyways, in 4k we can fit info +- * for 32k pages. +- */ +- mem = vm_page_queue_free; +- while (mem) { +- register int word_index, bit_index; +- +- bit_index = (mem->phys_addr >> PAGE_SHIFT); +- word_index = bit_index / NBPEL; +- bit_index = bit_index - (word_index * NBPEL); +- bits[word_index] |= 1 << bit_index; +- +- mem = (vm_page_t) mem->pageq.next; +- } +- +- /* +- * Second loop. Scan the bit array for NPAGES ++ * First loop. Scan the bit array for NPAGES + * contiguous bits. That gives us, if any, + * the range of pages we will be grabbing off + * the free list. +@@ -1007,9 +1151,13 @@ + + first_set = 0; + +- for (i = 0; i < size; i += sizeof(natural_t)) { ++ for (i = (minbitidx/NBBY); ++ i < (maxbitidx/NBBY); ++ i += sizeof(natural_t)) ++ { + +- register natural_t v = bits[i / sizeof(natural_t)]; ++ register natural_t v = ++ vm_page_free_bitmap[i / sizeof(natural_t)]; + register int bitpos; + + /* +@@ -1042,14 +1190,20 @@ + */ + bits_so_far = 0; + count_zeroes: +- while ((bitpos < NBPEL) && ((v & 1) == 0)) { ++ while ((bitpos < NBPEL) && ++ (((v & 1) == 0) ++ || ((bitpos + i*NBBY) % align))) ++ { + bitpos++; + v >>= 1; + } +- if (v & 1) { ++ ++ if ((v & 1) ++ && (!((bitpos + i*NBBY) % align))) ++ { + first_set = (i * NBBY) + bitpos; + goto count_ones; +- } ++ } + } + /* + * No luck +@@ -1063,7 +1217,6 @@ + */ + not_found_em: + simple_unlock(&vm_page_queue_free_lock); +- + ret = KERN_RESOURCE_SHORTAGE; + goto out; + +@@ -1079,43 +1232,33 @@ + vm_page_free_count_minimum = vm_page_free_count; + if (external) + vm_page_external_count += npages; ++ + { +- register vm_offset_t first_phys, last_phys; +- +- /* cache values for compare */ +- first_phys = first_set << PAGE_SHIFT; +- last_phys = first_phys + (npages << PAGE_SHIFT);/* not included */ +- +- /* running pointers */ +- mem = vm_page_queue_free; +- prevmem = VM_PAGE_NULL; +- +- while (mem) { +- +- register vm_offset_t addr; +- +- addr = mem->phys_addr; +- +- if ((addr >= first_phys) && +- (addr < last_phys)) { +- if (prevmem) +- prevmem->pageq.next = mem->pageq.next; +- pages[(addr - first_phys) >> PAGE_SHIFT] = mem; +- mem->free = FALSE; +- mem->extcounted = mem->external = external; +- /* +- * Got them all ? +- */ +- if (--npages == 0) break; +- } else +- prevmem = mem; +- +- mem = (vm_page_t) mem->pageq.next; ++ vm_offset_t first_phys; ++ vm_page_t pg; ++ int i; ++ ++ first_phys = first_set << PAGE_SHIFT; ++ ++ if (phys_addr) ++ *phys_addr = first_phys; ++ ++ for (i = 0; i < npages; i++) ++ { ++ pg = vm_page_physaddr_lookup (first_phys + (i << PAGE_SHIFT)); ++ ++ assert (pg != VM_PAGE_NULL); ++ ++ queue_remove (freequeue, pg, vm_page_t, pageq); ++ ++ pg->free = FALSE; ++ pg->extcounted = pg->external = external; ++ vm_page_free_bitmap_unset (atop (pg->phys_addr)); + } + } +- ++ + simple_unlock(&vm_page_queue_free_lock); +- ++ + /* + * Decide if we should poke the pageout daemon. + * We do this if the free count is less than the low +@@ -1134,8 +1277,74 @@ + + ret = KERN_SUCCESS; + out: +- if (alloc_size) +- kmem_free(kernel_map, (vm_offset_t) bits, alloc_size); ++ ++ return ret; ++} ++ ++/* ++ * vm_page_grab_contiguous_pages_flags: ++ * ++ * Take N pages from specified zone, the pages should ++ * cover a contiguous range of physical addresses. ++ * [Used by device drivers to cope with DMA limitations] ++ * ++ * Returns the page descriptors in ascending order, or ++ * Returns KERN_RESOURCE_SHORTAGE if it could not. ++ */ ++ ++kern_return_t ++vm_page_grab_contiguous_pages_flags( ++ int npages, ++ vm_offset_t *phys_addr, ++ boolean_t external, ++ unsigned flags, ++ unsigned long align) ++{ ++ kern_return_t ret; ++ ++ if (!(flags & VM_PAGE_DMA)) ++ { ++ ret = vm_page_grab_contiguous_pages_queue ( ++ npages, phys_addr, external, ++ &vm_page_queue_free_normal, ++ atop(vm_page_normal_first), ++ atop(vm_page_normal_last), ++ align); ++ ++ if (ret == KERN_SUCCESS) ++ return ret; ++ }; ++ ++ ret = vm_page_grab_contiguous_pages_queue ( ++ npages, phys_addr, external, ++ &vm_page_queue_free_dma, ++ atop(vm_page_dma_first), ++ atop(vm_page_dma_last), ++ align); ++ ++ return ret; ++} ++ ++/* ++ * vm_page_grab_contiguous_pages: ++ * ++ * Take N pages off the free list, the pages should ++ * cover a contiguous range of physical addresses. ++ * ++ * Returns the page descriptors in ascending order, or ++ * Returns KERN_RESOURCE_SHORTAGE if it could not. ++ * [Used by device drivers to cope with DMA limitations] ++ */ ++kern_return_t ++vm_page_grab_contiguous_pages( ++ int npages, ++ queue_t pages, ++ vm_offset_t *phys_addr, ++ boolean_t e) ++{ ++ kern_return_t ret; ++ ++ ret = vm_page_grab_contiguous_pages_flags (npages, phys_addr, e, 0, 0); + + return ret; + } +@@ -1150,16 +1359,36 @@ + register vm_page_t mem, + boolean_t external) + { ++ queue_t freequeue; ++ ++ if (pmap_is_dma (mem->phys_addr)) ++ freequeue = &vm_page_queue_free_dma; ++ else if (pmap_is_normal (mem->phys_addr)) ++ freequeue = &vm_page_queue_free_normal; ++ else { ++ /* XXX - Don't put a panic here. it's just for now. */ ++ panic ("vm_page_release (unknown page zone)"); ++ } ++ ++ /* UGLY: We skip the page 0, since it may cause problems ++ when returned to drivers. */ ++ if (mem->phys_addr == 0) ++ return; ++ + simple_lock(&vm_page_queue_free_lock); ++ + if (mem->free) + panic("vm_page_release"); + mem->free = TRUE; +- mem->pageq.next = (queue_entry_t) vm_page_queue_free; +- vm_page_queue_free = mem; ++ ++ queue_enter (freequeue, mem, vm_page_t, pageq); ++ + vm_page_free_count++; + if (external) + vm_page_external_count--; + ++ vm_page_free_bitmap_set (atop (mem->phys_addr)); ++ + /* + * Check if we should wake up someone waiting for page. + * But don't bother waking them unless they can allocate. |