summaryrefslogtreecommitdiff
path: root/debian
diff options
context:
space:
mode:
authorGuillem Jover <guillem@debian.org>2006-02-02 04:29:43 +0000
committerGuillem Jover <guillem@debian.org>2006-02-02 04:29:43 +0000
commit8e32f37752c300a27cffeb28c832982c51f845fa (patch)
tree0aa08e60ec95512526704bf3370b61e56ea32989 /debian
parent88f47da9bee3cdd5cebe4398d3ce3500ec0348de (diff)
* Make the Linux device drivers use dynamic memory allocation via the
glue code. - debian/patches/61_vm_resident-zoned.patch: New file. Thanks to Gianluca Guida <glguida@gmail.com>.
Diffstat (limited to 'debian')
-rw-r--r--debian/changelog4
-rw-r--r--debian/patches/61_vm_resident-zoned.patch1827
2 files changed, 1831 insertions, 0 deletions
diff --git a/debian/changelog b/debian/changelog
index 6f279c6..fe13367 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -23,6 +23,10 @@ gnumach (1:20060201-1) UNRELEASED; urgency=low
of hooking into SPL code.
- debian/patches/60_glue_linux_soft_ints.patch: New file.
Thanks to Gianluca Guida <glguida@gmail.com>.
+ * Make the Linux device drivers use dynamic memory allocation via the
+ glue code.
+ - debian/patches/61_vm_resident-zoned.patch: New file.
+ Thanks to Gianluca Guida <glguida@gmail.com>.
-- Guillem Jover <guillem@debian.org> Wed, 1 Feb 2006 23:21:59 +0200
diff --git a/debian/patches/61_vm_resident-zoned.patch b/debian/patches/61_vm_resident-zoned.patch
new file mode 100644
index 0000000..0290d14
--- /dev/null
+++ b/debian/patches/61_vm_resident-zoned.patch
@@ -0,0 +1,1827 @@
+#DPATCHLEVEL=1
+
+2006-01-20 Gianluca Guida <glguida@gmail.com>
+
+ * vm/pmap.h (pmap_is_dma, pmap_is_normal): New functions.
+
+ * vm/page.h (VM_PAGE_DMA): New macro.
+ (vm_page_queue_free): Variable removed.
+ (vm_page_queue_free_dma, vm_page_queue_free_normal): New
+ variables.
+ (vm_page_physaddr_lookup, vm_page_grab_flags)
+ (vm_page_grab_contiguous_pages_flags): New functions.
+ (first_phys_addr, last_phys_addr): Declarations removed.
+ (phys_first_addr, phys_last_addr): New declarations.
+
+ * vm/vm_resident.c (vm_page_free_bitmap)
+ (vm_page_free_bitmap_bitsz): New variables.
+ (vm_page_free_bitmap_set, vm_page_free_bitmap_unset)
+ (vm_page_free_bitmap_alloc, vm_page_array_init, vm_page_array_add)
+ (vm_page_physaddr_lookup): New functions.
+ (vm_page_bootstrap): Initialize vm_page_queue_free_normal and
+ vm_page_queue_free_dma. Call vm_page_free_bitmap_alloc.
+ (pmap_startup): Call vm_page_array_init. Add page to vm_page_array
+ for physaddress-to-vm_page_t resolution. Do not release page in
+ any particular order.
+ (vm_page_create): Add page to vm_page_array.
+ (vm_page_grab_flags, vm_page_grab_contiguous_pages_queue)
+ (vm_page_grab_contiguous_flags): New functions.
+ (vm_page_grab, vm_page_grab_contiguous_pages): Function rewritten.
+ (vm_page_release): Select proper free list based on page type (DMA
+ or non-DMA). Use Mach's queue handling funcitons not .next and
+ .prev pointers manually. Set vm_page_free_bitmap when freeing a
+ page.
+
+ * i386/i386at/model_dep.c (use_all_mem): Always set to 1.
+ (init_alloc_aligned): Do not use any particular oder during
+ bootstrap allocation.
+ (alloc_dma_mem): Function removed.
+
+ * kern/startup.c: Include <vm/pmap.h>.
+ (setup_main): Calculate memory size using phys_last_addr and
+ phys_first_addr.
+
+ * i386/intel/pmap.c (vm_page_normal_first, vm_page_normal_last)
+ (vm_page_dma_first, vm_page_dma_last): New variables.
+ (pmap_bootstrap): Set proper values to vm_page_normal_* and
+ vm_page_dma_* based on system's memory.
+ (pmap_is_dma, pmap_is_normal): New functions.
+
+ * kern/startup.c: Removed /* XXX */ comments, since phys_last_addr
+ and phys_first_addr are exported by pmap.c now.
+
+ * linux/dev/glue/kmem.c: Include <mach/error.h>
+ (MEM_CHUNK_SIZE, MEM_CHUNKS, NBPW): Macro removed.
+ (pages_free, memlist, num_page_collect, linux_mem_avail):
+ Variables removed.
+ (memlist_dma, memlist_nml): New variables.
+ (linux_kmem_init): No initialization needed anymore (all code
+ removed).
+ (coalesce_blocks): Coalesce separately DMA memory blocks and
+ non-DMA memory blocks.
+ (linux_kmalloc): Check for DMA in priority argument. Select proper
+ memlist to get blocks from. Pass dma information to
+ __get_free_pages.
+ (linux_kfree): Select proper memlist to free memory into.
+ (collect_kmalloc_pages): Function removed.
+ (collect_kmalloc_pages_dma, collect_kmalloc_pages_nml)
+ (linux_kmem_collect): New functions.
+ (__get_free_pages, free_pages): Functions rewritten.
+
+ * linux/dev/init/main.c (CONTIG_ALLOC, NBPW): Macro removed.
+ (CONTIG_ALLOC_ORDER): New macro.
+ (linux_init): Use __getfreepages to allocate PCI initialization
+ memory and free_pages to free it.
+ (alloc_contig_mem, free_contig_mem): Functions removed.
+
+ * vm/vm_pageout.c (vm_pageout_scan) [LINUX_DEV]: Call
+ linux_kmem_collect.
+
+
+diff -ru gnumach-vanilla/i386/i386at/model_dep.c gnumach-vm_resident/i386/i386at/model_dep.c
+--- gnumach-vanilla/i386/i386at/model_dep.c 2004-11-28 18:29:35.000000000 +0100
++++ gnumach-vm_resident/i386/i386at/model_dep.c 2006-01-26 00:37:31.000000000 +0100
+@@ -86,16 +86,7 @@
+ /* Configuration parameter:
+ if zero, only use physical memory in the low 16MB of addresses.
+ Only SCSI still has DMA problems. */
+-#ifdef LINUX_DEV
+-#define use_all_mem 1
+-#else
+-#include "nscsi.h"
+-#if NSCSI > 0
+-#define use_all_mem 0
+-#else
+ #define use_all_mem 1
+-#endif
+-#endif
+
+ extern char version[];
+
+@@ -468,7 +459,6 @@
+ vm_offset_t addr;
+ extern char start[], end[];
+ int i;
+- static int wrapped = 0;
+
+ /* Memory regions to skip. */
+ vm_offset_t cmdline_start_pa = boot_info.flags & MULTIBOOT_CMDLINE
+@@ -488,25 +478,8 @@
+ /* Page-align the start address. */
+ avail_next = round_page(avail_next);
+
+- /* Start with memory above 16MB, reserving the low memory for later. */
+- if (use_all_mem && !wrapped && phys_last_addr > 16 * 1024*1024)
+- {
+- if (avail_next < 16 * 1024*1024)
+- avail_next = 16 * 1024*1024;
+- else if (avail_next == phys_last_addr)
+- {
+- /* We have used all the memory above 16MB, so now start on
+- the low memory. This will wind up at the end of the list
+- of free pages, so it should not have been allocated to any
+- other use in early initialization before the Linux driver
+- glue initialization needs to allocate low memory. */
+- avail_next = 0x1000;
+- wrapped = 1;
+- }
+- }
+-
+ /* Check if we have reached the end of memory. */
+- if (avail_next == (wrapped ? 16 * 1024*1024 : phys_last_addr))
++ if (avail_next == phys_last_addr)
+ return FALSE;
+
+ /* Tentatively assign the current location to the caller. */
+@@ -599,107 +572,3 @@
+ !(((boot_info.mem_lower * 1024) <= x) && (x < 1024*1024)));
+ }
+
+-#ifndef NBBY
+-#define NBBY 8
+-#endif
+-#ifndef NBPW
+-#define NBPW (NBBY * sizeof(int))
+-#endif
+-#define DMA_MAX (16*1024*1024)
+-
+-/*
+- * Allocate contiguous pages below 16 MB
+- * starting at specified boundary for DMA.
+- */
+-vm_offset_t
+-alloc_dma_mem(size, align)
+- vm_size_t size;
+- vm_offset_t align;
+-{
+- int *bits, i, j, k, n;
+- int npages, count, bit, mask;
+- int first_page, last_page;
+- vm_offset_t addr;
+- vm_page_t p, prevp;
+-
+- npages = round_page(size) / PAGE_SIZE;
+- mask = align ? (align - 1) / PAGE_SIZE : 0;
+-
+- /*
+- * Allocate bit array.
+- */
+- n = ((DMA_MAX / PAGE_SIZE) + NBPW - 1) / NBPW;
+- i = n * NBPW;
+- bits = (unsigned *)kalloc(i);
+- if (bits == 0) {
+- printf("alloc_dma_mem: unable alloc bit array\n");
+- return (0);
+- }
+- bzero((char *)bits, i);
+-
+- /*
+- * Walk the page free list and set a bit for
+- * every usable page in bit array.
+- */
+- simple_lock(&vm_page_queue_free_lock);
+- for (p = vm_page_queue_free; p; p = (vm_page_t)p->pageq.next) {
+- if (p->phys_addr < DMA_MAX) {
+- i = p->phys_addr / PAGE_SIZE;
+- bits[i / NBPW] |= 1 << (i % NBPW);
+- }
+- }
+-
+- /*
+- * Search for contiguous pages by scanning bit array.
+- */
+- for (i = 0, first_page = -1; i < n; i++) {
+- for (bit = 1, j = 0; j < NBPW; j++, bit <<= 1) {
+- if (bits[i] & bit) {
+- if (first_page < 0) {
+- k = i * NBPW + j;
+- if (!mask
+- || (((k & mask) + npages)
+- <= mask + 1)) {
+- first_page = k;
+- if (npages == 1)
+- goto found;
+- count = 1;
+- }
+- } else if (++count == npages)
+- goto found;
+- } else
+- first_page = -1;
+- }
+- }
+- addr = 0;
+- goto out;
+-
+- found:
+- /*
+- * Remove pages from the free list.
+- */
+- addr = first_page * PAGE_SIZE;
+- last_page = first_page + npages;
+- vm_page_free_count -= npages;
+- p = vm_page_queue_free;
+- prevp = 0;
+- while (1) {
+- i = p->phys_addr / PAGE_SIZE;
+- if (i >= first_page && i < last_page) {
+- if (prevp)
+- prevp->pageq.next = p->pageq.next;
+- else
+- vm_page_queue_free = (vm_page_t)p->pageq.next;
+- p->free = FALSE;
+- if (--npages == 0)
+- break;
+- } else
+- prevp = p;
+- p = (vm_page_t)p->pageq.next;
+- }
+-
+- out:
+- simple_unlock(&vm_page_queue_free_lock);
+- kfree((vm_offset_t)bits, n * NBPW);
+- return (addr);
+-}
+diff -ru gnumach-vanilla/i386/intel/pmap.c gnumach-vm_resident/i386/intel/pmap.c
+--- gnumach-vanilla/i386/intel/pmap.c 2001-04-05 08:39:21.000000000 +0200
++++ gnumach-vm_resident/i386/intel/pmap.c 2006-01-26 00:37:31.000000000 +0100
+@@ -584,6 +584,11 @@
+ return(virt);
+ }
+
++unsigned long vm_page_normal_first = 16*1024*1024;
++unsigned long vm_page_normal_last = 0;
++unsigned long vm_page_dma_first = 0;
++unsigned long vm_page_dma_last = 16*1024*1024 - 1;
++
+ /*
+ * Bootstrap the system enough to run with virtual memory.
+ * Allocate the kernel page directory and page tables,
+@@ -703,6 +708,25 @@
+ va += INTEL_PGBYTES;
+ }
+ }
++
++ if (phys_last_addr <= 16*1024*1024) {
++ /* Set so to never get TRUE from isnormal(). */
++ vm_page_normal_first = phys_last_addr + 1;
++ vm_page_normal_last = 0;
++
++ /* Only DMA memory. */
++ vm_page_dma_first = 0;
++ vm_page_dma_last = phys_last_addr;
++ } else {
++ vm_page_normal_first = 16*1024*1024;
++ vm_page_normal_last = phys_last_addr;
++
++ vm_page_dma_first = 0;
++ vm_page_dma_last = 16*1024*1024 - 1;
++ }
++
++
++
+ }
+
+ #if i860
+@@ -2346,6 +2370,27 @@
+ return (phys_attribute_test(phys, PHYS_REFERENCED));
+ }
+
++/*
++ * pmap_is_dma
++ *
++ * Return TRUE if PHYS is in the DMA zone range.
++ */
++boolean_t pmap_is_dma (vm_offset_t phys)
++{
++ return (phys < 16*1024*1024);
++}
++
++/*
++ * pmap_is_normal:
++ *
++ * Return TRUE if PHYS is in the normal zone range.
++ */
++boolean_t pmap_is_normal (vm_offset_t phys)
++{
++ return (phys >= 16*1024*1024);
++}
++
++
+ #if NCPUS > 1
+ /*
+ * TLB Coherence Code (TLB "shootdown" code)
+diff -ru gnumach-vanilla/kern/startup.c gnumach-vm_resident/kern/startup.c
+--- gnumach-vanilla/kern/startup.c 2001-04-05 08:39:20.000000000 +0200
++++ gnumach-vm_resident/kern/startup.c 2006-01-26 00:37:31.000000000 +0100
+@@ -80,9 +80,6 @@
+ extern void action_thread();
+ #endif /* NCPUS > 1 */
+
+-/* XX */
+-extern vm_offset_t phys_first_addr, phys_last_addr;
+-
+ /*
+ * Running in virtual memory, on the interrupt stack.
+ * Does not return. Dispatches initial thread.
+@@ -122,7 +119,7 @@
+ machine_init();
+
+ machine_info.max_cpus = NCPUS;
+- machine_info.memory_size = phys_last_addr - phys_first_addr; /* XXX mem_size */
++ machine_info.memory_size = phys_last_addr - phys_first_addr;
+ machine_info.avail_cpus = 0;
+ machine_info.major_version = KERNEL_MAJOR_VERSION;
+ machine_info.minor_version = KERNEL_MINOR_VERSION;
+diff -ru gnumach-vanilla/linux/dev/glue/kmem.c gnumach-vm_resident/linux/dev/glue/kmem.c
+--- gnumach-vanilla/linux/dev/glue/kmem.c 1999-04-26 07:45:35.000000000 +0200
++++ gnumach-vm_resident/linux/dev/glue/kmem.c 2006-01-26 04:10:52.000000000 +0100
+@@ -25,6 +25,7 @@
+ #include <sys/types.h>
+
+ #include <mach/mach_types.h>
++#include <mach/error.h>
+ #include <mach/vm_param.h>
+
+ #include <kern/assert.h>
+@@ -40,22 +41,11 @@
+
+ #include <asm/system.h>
+
+-extern void *alloc_contig_mem (unsigned, unsigned, unsigned, vm_page_t *);
+ extern int printf (const char *, ...);
+
+-/* Amount of memory to reserve for Linux memory allocator.
+- We reserve 64K chunks to stay within DMA limits.
+- Increase MEM_CHUNKS if the kernel is running out of memory. */
+-#define MEM_CHUNK_SIZE (64 * 1024)
+-#define MEM_CHUNKS 7
+-
+ /* Mininum amount that linux_kmalloc will allocate. */
+ #define MIN_ALLOC 12
+
+-#ifndef NBPW
+-#define NBPW 32
+-#endif
+-
+ /* Memory block header. */
+ struct blkhdr
+ {
+@@ -70,62 +60,17 @@
+ struct pagehdr *next; /* next header in list */
+ };
+
+-/* This structure describes a memory chunk. */
+-struct chunkhdr
+-{
+- unsigned long start; /* start address */
+- unsigned long end; /* end address */
+- unsigned long bitmap; /* busy/free bitmap of pages */
+-};
+-
+-/* Chunks from which pages are allocated. */
+-static struct chunkhdr pages_free[MEM_CHUNKS];
+-
+ /* Memory list maintained by linux_kmalloc. */
+-static struct pagehdr *memlist;
++static struct pagehdr *memlist_dma = NULL;
++static struct pagehdr *memlist_nml = NULL;
+
+ /* Some statistics. */
+ int num_block_coalesce = 0;
+-int num_page_collect = 0;
+-int linux_mem_avail;
+
+ /* Initialize the Linux memory allocator. */
+ void
+ linux_kmem_init ()
+ {
+- int i, j;
+- vm_page_t p, pages;
+-
+- for (i = 0; i < MEM_CHUNKS; i++)
+- {
+- /* Allocate memory. */
+- pages_free[i].start = (unsigned long) alloc_contig_mem (MEM_CHUNK_SIZE,
+- 16 * 1024 * 1024,
+- 0xffff, &pages);
+-
+- assert (pages_free[i].start);
+- assert ((pages_free[i].start & 0xffff) == 0);
+-
+- /* Sanity check: ensure pages are contiguous and within DMA limits. */
+- for (p = pages, j = 0; j < MEM_CHUNK_SIZE - PAGE_SIZE; j += PAGE_SIZE)
+- {
+- assert (p->phys_addr < 16 * 1024 * 1024);
+- assert (p->phys_addr + PAGE_SIZE
+- == ((vm_page_t) p->pageq.next)->phys_addr);
+-
+- p = (vm_page_t) p->pageq.next;
+- }
+-
+- pages_free[i].end = pages_free[i].start + MEM_CHUNK_SIZE;
+-
+- /* Initialize free page bitmap. */
+- pages_free[i].bitmap = 0;
+- j = MEM_CHUNK_SIZE >> PAGE_SHIFT;
+- while (--j >= 0)
+- pages_free[i].bitmap |= 1 << j;
+- }
+-
+- linux_mem_avail = (MEM_CHUNKS * MEM_CHUNK_SIZE) >> PAGE_SHIFT;
+ }
+
+ /* Return the number by which the page size should be
+@@ -178,7 +123,40 @@
+
+ num_block_coalesce++;
+
+- for (ph = memlist; ph; ph = ph->next)
++ /* Coalesce DMA memory. */
++ for (ph = memlist_dma; ph; ph = ph->next)
++ {
++ bh = (struct blkhdr *) (ph + 1);
++ ebh = (struct blkhdr *) ((void *) ph + ph->size);
++ while (1)
++ {
++ /* Skip busy blocks. */
++ while (bh < ebh && !bh->free)
++ bh = (struct blkhdr *) ((void *) (bh + 1) + bh->size);
++ if (bh == ebh)
++ break;
++
++ /* Merge adjacent free blocks. */
++ while (1)
++ {
++ bhp = (struct blkhdr *) ((void *) (bh + 1) + bh->size);
++ if (bhp == ebh)
++ {
++ bh = bhp;
++ break;
++ }
++ if (!bhp->free)
++ {
++ bh = (struct blkhdr *) ((void *) (bhp + 1) + bhp->size);
++ break;
++ }
++ bh->size += bhp->size + sizeof (struct blkhdr);
++ }
++ }
++ }
++
++ /* Coalesce non-DMA memory. */
++ for (ph = memlist_nml; ph; ph = ph->next)
+ {
+ bh = (struct blkhdr *) (ph + 1);
+ ebh = (struct blkhdr *) ((void *) ph + ph->size);
+@@ -216,20 +194,26 @@
+ void *
+ linux_kmalloc (unsigned int size, int priority)
+ {
+- int order, coalesced = 0;
++ int order, coalesced = 0, dma = 0;
+ unsigned flags;
+- struct pagehdr *ph;
++ struct pagehdr *ph, **memlistp;
+ struct blkhdr *bh, *new_bh;
+
++ if (priority & GFP_DMA)
++ {
++ memlistp = &memlist_dma;
++ dma = 1;
++ }
++ else
++ {
++ memlistp = &memlist_nml;
++ dma = 0;
++ }
+ if (size < MIN_ALLOC)
+ size = MIN_ALLOC;
+ else
+ size = (size + sizeof (int) - 1) & ~(sizeof (int) - 1);
+
+- assert (size <= (MEM_CHUNK_SIZE
+- - sizeof (struct pagehdr)
+- - sizeof (struct blkhdr)));
+-
+ save_flags (flags);
+ cli ();
+
+@@ -238,7 +222,7 @@
+
+ /* Walk the page list and find the first free block with size
+ greater than or equal to the one required. */
+- for (ph = memlist; ph; ph = ph->next)
++ for (ph = *memlistp; ph; ph = ph->next)
+ {
+ bh = (struct blkhdr *) (ph + 1);
+ while (bh < (struct blkhdr *) ((void *) ph + ph->size))
+@@ -278,16 +262,26 @@
+ order = get_page_order (size
+ + sizeof (struct pagehdr)
+ + sizeof (struct blkhdr));
+- ph = (struct pagehdr *) __get_free_pages (GFP_KERNEL, order, ~0UL);
++ ph = (struct pagehdr *) __get_free_pages (GFP_KERNEL, order, dma);
+ if (!ph)
+ {
+ restore_flags (flags);
+ return NULL;
+ }
+
++ /* __get_free_pages may return DMA memory if non-DMA memory is not
++ free so we check back here for mem type. */
++ if (pmap_is_dma ((unsigned long)ph))
++ {
++ memlistp = &memlist_dma;
++ }
++ else
++ {
++ memlistp = &memlist_nml;
++ }
+ ph->size = PAGE_SIZE << order;
+- ph->next = memlist;
+- memlist = ph;
++ ph->next = *memlistp;
++ *memlistp = ph;
+ bh = (struct blkhdr *) (ph + 1);
+ bh->free = 0;
+ bh->size = ph->size - sizeof (struct pagehdr) - sizeof (struct blkhdr);
+@@ -310,17 +304,28 @@
+ linux_kfree (void *p)
+ {
+ unsigned flags;
++ vm_offset_t addr;
+ struct blkhdr *bh;
+- struct pagehdr *ph;
++ struct pagehdr *ph, **memlistp;
+
+ assert (((int) p & (sizeof (int) - 1)) == 0);
+
++ addr = (vm_offset_t) p;
++
++ if (pmap_is_dma (addr))
++ {
++ memlistp = &memlist_dma;
++ }
++ else
++ {
++ memlistp = &memlist_nml;
++ }
+ save_flags (flags);
+ cli ();
+
+ check_page_list (__LINE__);
+
+- for (ph = memlist; ph; ph = ph->next)
++ for (ph = *memlistp; ph; ph = ph->next)
+ if (p >= (void *) ph && p < (void *) ph + ph->size)
+ break;
+
+@@ -339,10 +344,10 @@
+ restore_flags (flags);
+ }
+
+-/* Free any pages that are not in use.
++/* Free any DMA page that are not in use.
+ Called by __get_free_pages when pages are running low. */
+ static void
+-collect_kmalloc_pages ()
++collect_kmalloc_pages_dma ()
+ {
+ struct blkhdr *bh;
+ struct pagehdr *ph, **prev_ph;
+@@ -353,8 +358,8 @@
+
+ check_page_list (__LINE__);
+
+- ph = memlist;
+- prev_ph = &memlist;
++ ph = memlist_dma;
++ prev_ph = &memlist_dma;
+ while (ph)
+ {
+ bh = (struct blkhdr *) (ph + 1);
+@@ -373,68 +378,91 @@
+
+ check_page_list (__LINE__);
+ }
+-
+-/* Allocate ORDER + 1 number of physically contiguous pages.
+- PRIORITY and DMA are not used in Mach.
+-
+- XXX: This needs to be dynamic. To do that we need to make
+- the Mach page manipulation routines interrupt safe and they
+- must provide machine dependant hooks. */
+-unsigned long
+-__get_free_pages (int priority, unsigned long order, int dma)
++/* Free any non-DMA page that are not in use.
++ Called by __get_free_pages when pages are running low. */
++static void
++collect_kmalloc_pages_nml ()
+ {
+- int i, pages_collected = 0;
+- unsigned flags, bits, off, j, len;
++ struct blkhdr *bh;
++ struct pagehdr *ph, **prev_ph;
+
+- assert ((PAGE_SIZE << order) <= MEM_CHUNK_SIZE);
++ check_page_list (__LINE__);
+
+- /* Construct bitmap of contiguous pages. */
+- bits = 0;
+- j = 0;
+- len = 0;
+- while (len < (PAGE_SIZE << order))
+- {
+- bits |= 1 << j++;
+- len += PAGE_SIZE;
+- }
++ coalesce_blocks ();
+
+- save_flags (flags);
+- cli ();
+-again:
++ check_page_list (__LINE__);
+
+- /* Search each chunk for the required number of contiguous pages. */
+- for (i = 0; i < MEM_CHUNKS; i++)
++ ph = memlist_nml;
++ prev_ph = &memlist_nml;
++ while (ph)
+ {
+- off = 0;
+- j = bits;
+- while (MEM_CHUNK_SIZE - off >= (PAGE_SIZE << order))
++ bh = (struct blkhdr *) (ph + 1);
++ if (bh->free && (void *) (bh + 1) + bh->size == (void *) ph + ph->size)
+ {
+- if ((pages_free[i].bitmap & j) == j)
+- {
+- pages_free[i].bitmap &= ~j;
+- linux_mem_avail -= order + 1;
+- restore_flags (flags);
+- return pages_free[i].start + off;
+- }
+- j <<= 1;
+- off += PAGE_SIZE;
++ *prev_ph = ph->next;
++ free_pages ((unsigned long) ph, get_page_order (ph->size));
++ ph = *prev_ph;
++ }
++ else
++ {
++ prev_ph = &ph->next;
++ ph = ph->next;
+ }
+ }
+
+- /* Allocation failed; collect kmalloc and buffer pages
+- and try again. */
+- if (!pages_collected)
+- {
+- num_page_collect++;
+- collect_kmalloc_pages ();
+- pages_collected = 1;
+- goto again;
+- }
++ check_page_list (__LINE__);
++}
+
+- printf ("%s:%d: __get_free_pages: ran out of pages\n", __FILE__, __LINE__);
++/* Allocate ORDER + 1 number of physically contiguous pages.
++ PRIORITY and DMA are not used in Mach. */
++unsigned long
++__get_free_pages (int priority, unsigned long order, int dma)
++{
++ unsigned long pagenum;
++ unsigned vm_page_flags = 0;
++ unsigned long p;
++
++ if (dma)
++ vm_page_flags |= VM_PAGE_DMA;
++
++ pagenum = (1 << order);
++
++ p = 0;
++
++ if (pagenum > 1)
++ {
++ /*
++ * Contiguous grabbing is slow and may fail.
++ * We reserve it for special occasions.
++ */
++ mach_error_t err;
++ vm_offset_t vmo;
++
++ err = vm_page_grab_contiguous_pages_flags (pagenum, &vmo, FALSE,
++ vm_page_flags, 0);
++ p = (unsigned long) vmo;
++
++ if (err)
++ return 0;
++ }
++ else
++ {
++ vm_page_t m;
++
++ m = vm_page_grab_flags (FALSE, vm_page_flags);
++ if (!m)
++ return 0;
++
++ p = m->phys_addr;
++
++ if (m->tabled)
++ {
++ printf ("Error while getting page of order %ld\n", order);
++ return 0;
++ }
++ };
+
+- restore_flags (flags);
+- return 0;
++ return p;
+ }
+
+ /* Free ORDER + 1 number of physically
+@@ -442,36 +470,20 @@
+ void
+ free_pages (unsigned long addr, unsigned long order)
+ {
+- int i;
+- unsigned flags, bits, len, j;
+-
+- assert ((addr & PAGE_MASK) == 0);
++ unsigned long i, pagenum;
+
+- for (i = 0; i < MEM_CHUNKS; i++)
+- if (addr >= pages_free[i].start && addr < pages_free[i].end)
+- break;
+-
+- assert (i < MEM_CHUNKS);
++ pagenum = 1 << order;
+
+- /* Contruct bitmap of contiguous pages. */
+- len = 0;
+- j = 0;
+- bits = 0;
+- while (len < (PAGE_SIZE << order))
++ for (i = 0; i < pagenum; i++)
+ {
+- bits |= 1 << j++;
+- len += PAGE_SIZE;
+- }
+- bits <<= (addr - pages_free[i].start) >> PAGE_SHIFT;
++ vm_page_t m;
+
+- save_flags (flags);
+- cli ();
+-
+- assert ((pages_free[i].bitmap & bits) == 0);
++ m = vm_page_physaddr_lookup (addr + (i * PAGE_SIZE));
++ if (m == VM_PAGE_NULL)
++ panic ("couldn't lookup page for address %lx", addr + (i * PAGE_SIZE));
+
+- pages_free[i].bitmap |= bits;
+- linux_mem_avail += order + 1;
+- restore_flags (flags);
++ vm_page_free (m);
++ }
+ }
+
+
+@@ -579,3 +591,9 @@
+ vmalloc_list_insert (addr, round_page (size));
+ return (void *) addr;
+ }
++
++void linux_kmem_collect (void)
++{
++ collect_kmalloc_pages_nml ();
++ collect_kmalloc_pages_dma ();
++}
+diff -ru gnumach-vanilla/linux/dev/init/main.c gnumach-vm_resident/linux/dev/init/main.c
+--- gnumach-vanilla/linux/dev/init/main.c 1999-04-26 07:49:06.000000000 +0200
++++ gnumach-vm_resident/linux/dev/init/main.c 2006-01-26 00:37:31.000000000 +0100
+@@ -82,9 +82,7 @@
+ static void calibrate_delay (void);
+
+ extern int hz;
+-extern vm_offset_t phys_last_addr;
+
+-extern void *alloc_contig_mem (unsigned, unsigned, unsigned, vm_page_t *);
+ extern void free_contig_mem (vm_page_t);
+ extern void init_IRQ (void);
+ extern void restore_IRQ (void);
+@@ -105,10 +103,8 @@
+ extern void linux_sched_init (void);
+
+
+-/*
+- * Amount of contiguous memory to allocate for initialization.
+- */
+-#define CONTIG_ALLOC (512 * 1024)
++/* Amount of contiguous memory to allocate for initialization. */
++#define CONTIG_ALLOC_ORDER (7) /* 512kb. */
+
+ /*
+ * Initialize Linux drivers.
+@@ -117,7 +113,7 @@
+ linux_init (void)
+ {
+ int addr;
+- unsigned memory_start, memory_end;
++ unsigned long memory_start, memory_end;
+ vm_page_t pages;
+
+ /*
+@@ -142,40 +138,34 @@
+ memcpy ((char *) &drive_info + 16,
+ (void *) ((addr & 0xffff) + ((addr >> 12) & 0xffff0)), 16);
+
+- /*
+- * Initialize Linux memory allocator.
+- */
++ /* Initialize Linux memory allocator. */
+ linux_kmem_init ();
+
+- /*
+- * Allocate contiguous memory below 16 MB.
+- */
+- memory_start = (unsigned long) alloc_contig_mem (CONTIG_ALLOC,
+- 16 * 1024 * 1024,
+- 0, &pages);
+- if (memory_start == 0)
+- panic ("linux_init: alloc_contig_mem failed");
+- memory_end = memory_start + CONTIG_ALLOC;
++ /* Allocate contiguous memory below 16 MB. */
++ memory_start = __get_free_pages (GFP_ATOMIC, CONTIG_ALLOC_ORDER, 1);
++ if (!memory_start)
++ panic ("linux_init: alloc PCI memory failed");
++ memory_end = memory_start + ((1 << CONTIG_ALLOC_ORDER) * PAGE_SIZE);
+
+- /*
+- * Initialize PCI bus.
+- */
++ /* Initialize PCI bus. */
+ memory_start = pci_init (memory_start, memory_end);
+
+ if (memory_start > memory_end)
+ panic ("linux_init: ran out memory");
+
+- /*
+- * Free unused memory.
+- */
+- while (pages && pages->phys_addr < round_page (memory_start))
+- pages = (vm_page_t) pages->pageq.next;
+- if (pages)
+- free_contig_mem (pages);
++ /* Free unused memory. */
++ {
++ unsigned long memaddr;
++
++ for (memaddr = round_page (memory_start);
++ memaddr < memory_end;
++ memaddr += PAGE_SIZE)
++ {
++ free_pages (memaddr, 0);
++ }
++ }
+
+- /*
+- * Initialize devices.
+- */
++ /* Initialize devices. */
+ #ifdef CONFIG_INET
+ linux_net_emulation_init ();
+ #endif
+@@ -186,148 +176,6 @@
+ linux_auto_config = 0;
+ }
+
+-#ifndef NBPW
+-#define NBPW 32
+-#endif
+-
+-/*
+- * Allocate contiguous memory with the given constraints.
+- * This routine is horribly inefficient but it is presently
+- * only used during initialization so it's not that bad.
+- */
+-void *
+-alloc_contig_mem (unsigned size, unsigned limit,
+- unsigned mask, vm_page_t * pages)
+-{
+- int i, j, bits_len;
+- unsigned *bits, len;
+- void *m;
+- vm_page_t p, page_list, tail, prev;
+- vm_offset_t addr, max_addr;
+-
+- if (size == 0)
+- return (NULL);
+- size = round_page (size);
+- if ((size >> PAGE_SHIFT) > vm_page_free_count)
+- return (NULL);
+-
+- /* Allocate bit array. */
+- max_addr = phys_last_addr;
+- if (max_addr > limit)
+- max_addr = limit;
+- bits_len = ((((max_addr >> PAGE_SHIFT) + NBPW - 1) / NBPW)
+- * sizeof (unsigned));
+- bits = (unsigned *) kalloc (bits_len);
+- if (!bits)
+- return (NULL);
+- memset (bits, 0, bits_len);
+-
+- /*
+- * Walk the page free list and set a bit for every usable page.
+- */
+- simple_lock (&vm_page_queue_free_lock);
+- p = vm_page_queue_free;
+- while (p)
+- {
+- if (p->phys_addr < limit)
+- (bits[(p->phys_addr >> PAGE_SHIFT) / NBPW]
+- |= 1 << ((p->phys_addr >> PAGE_SHIFT) % NBPW));
+- p = (vm_page_t) p->pageq.next;
+- }
+-
+- /*
+- * Scan bit array for contiguous pages.
+- */
+- len = 0;
+- m = NULL;
+- for (i = 0; len < size && i < bits_len / sizeof (unsigned); i++)
+- for (j = 0; len < size && j < NBPW; j++)
+- if (!(bits[i] & (1 << j)))
+- {
+- len = 0;
+- m = NULL;
+- }
+- else
+- {
+- if (len == 0)
+- {
+- addr = ((vm_offset_t) (i * NBPW + j)
+- << PAGE_SHIFT);
+- if ((addr & mask) == 0)
+- {
+- len += PAGE_SIZE;
+- m = (void *) addr;
+- }
+- }
+- else
+- len += PAGE_SIZE;
+- }
+-
+- if (len != size)
+- {
+- simple_unlock (&vm_page_queue_free_lock);
+- kfree ((vm_offset_t) bits, bits_len);
+- return (NULL);
+- }
+-
+- /*
+- * Remove pages from free list
+- * and construct list to return to caller.
+- */
+- page_list = NULL;
+- for (len = 0; len < size; len += PAGE_SIZE, addr += PAGE_SIZE)
+- {
+- prev = NULL;
+- for (p = vm_page_queue_free; p; p = (vm_page_t) p->pageq.next)
+- {
+- if (p->phys_addr == addr)
+- break;
+- prev = p;
+- }
+- if (!p)
+- panic ("alloc_contig_mem: page not on free list");
+- if (prev)
+- prev->pageq.next = p->pageq.next;
+- else
+- vm_page_queue_free = (vm_page_t) p->pageq.next;
+- p->free = FALSE;
+- p->pageq.next = NULL;
+- if (!page_list)
+- page_list = tail = p;
+- else
+- {
+- tail->pageq.next = (queue_entry_t) p;
+- tail = p;
+- }
+- vm_page_free_count--;
+- }
+-
+- simple_unlock (&vm_page_queue_free_lock);
+- kfree ((vm_offset_t) bits, bits_len);
+- if (pages)
+- *pages = page_list;
+- return (m);
+-}
+-
+-/*
+- * Free memory allocated by alloc_contig_mem.
+- */
+-void
+-free_contig_mem (vm_page_t pages)
+-{
+- int i;
+- vm_page_t p;
+-
+- for (p = pages, i = 0; p->pageq.next; p = (vm_page_t) p->pageq.next, i++)
+- p->free = TRUE;
+- p->free = TRUE;
+- simple_lock (&vm_page_queue_free_lock);
+- vm_page_free_count += i + 1;
+- p->pageq.next = (queue_entry_t) vm_page_queue_free;
+- vm_page_queue_free = pages;
+- simple_unlock (&vm_page_queue_free_lock);
+-}
+-
+ /* This is the number of bits of precision for the loops_per_second. Each
+ * bit takes on average 1.5/HZ seconds. This (like the original) is a little
+ * better than 1%
+diff -ru gnumach-vanilla/vm/pmap.h gnumach-vm_resident/vm/pmap.h
+--- gnumach-vanilla/vm/pmap.h 2001-04-05 08:39:21.000000000 +0200
++++ gnumach-vm_resident/vm/pmap.h 2006-01-26 00:37:31.000000000 +0100
+@@ -174,6 +174,15 @@
+ /* Return modify bit */
+ boolean_t pmap_is_modified(vm_offset_t pa);
+
++/*
++ * Page Zones routines
++ */
++
++/* Physical address is in DMA capable zone. */
++boolean_t pmap_is_dma (vm_offset_t pa);
++
++/* Physical address is in non-DMA capable zone. */
++boolean_t pmap_is_normal (vm_offset_t pa);
+
+ /*
+ * Statistics routines
+diff -ru gnumach-vanilla/vm/vm_page.h gnumach-vm_resident/vm/vm_page.h
+--- gnumach-vanilla/vm/vm_page.h 1999-06-28 02:41:02.000000000 +0200
++++ gnumach-vm_resident/vm/vm_page.h 2006-01-26 00:37:31.000000000 +0100
+@@ -152,22 +152,26 @@
+ * ordered, in LRU-like fashion.
+ */
+
++#define VM_PAGE_DMA 0x1
++
++extern
++queue_head_t vm_page_queue_free_normal; /* normal memory free queue */
+ extern
+-vm_page_t vm_page_queue_free; /* memory free queue */
++queue_head_t vm_page_queue_free_dma; /* DMA-capable memory free queue */
+ extern
+-vm_page_t vm_page_queue_fictitious; /* fictitious free queue */
++vm_page_t vm_page_queue_fictitious; /* fictitious free queue */
+ extern
+-queue_head_t vm_page_queue_active; /* active memory queue */
++queue_head_t vm_page_queue_active; /* active memory queue */
+ extern
+ queue_head_t vm_page_queue_inactive; /* inactive memory queue */
+
+ extern
+-vm_offset_t first_phys_addr; /* physical address for first_page */
++vm_offset_t phys_first_addr;/* physical address for first_page */
+ extern
+-vm_offset_t last_phys_addr; /* physical address for last_page */
++vm_offset_t phys_last_addr; /* physical address for last_page */
+
+ extern
+-int vm_page_free_count; /* How many pages are free? */
++int vm_page_free_count; /* How many pages are free? */
+ extern
+ int vm_page_fictitious_count;/* How many fictitious pages are free? */
+ extern
+@@ -220,11 +224,20 @@
+ extern vm_page_t vm_page_lookup(
+ vm_object_t object,
+ vm_offset_t offset);
++extern vm_page_t vm_page_physaddr_lookup (vm_offset_t);
+ extern vm_page_t vm_page_grab_fictitious(void);
+ extern void vm_page_release_fictitious(vm_page_t);
+ extern boolean_t vm_page_convert(vm_page_t, boolean_t);
+ extern void vm_page_more_fictitious(void);
+ extern vm_page_t vm_page_grab(boolean_t);
++extern vm_page_t vm_page_grab_flags(boolean_t, unsigned);
++extern kern_return_t vm_page_grab_contiguous_pages_flags(
++ int npages,
++ vm_offset_t *phys_address,
++ boolean_t external,
++ unsigned flags,
++ unsigned long align);
++
+ extern void vm_page_release(vm_page_t, boolean_t);
+ extern void vm_page_wait(void (*)(void));
+ extern vm_page_t vm_page_alloc(
+diff -ru gnumach-vanilla/vm/vm_pageout.c gnumach-vm_resident/vm/vm_pageout.c
+--- gnumach-vanilla/vm/vm_pageout.c 2001-04-05 08:39:21.000000000 +0200
++++ gnumach-vm_resident/vm/vm_pageout.c 2006-01-26 00:48:08.000000000 +0100
+@@ -559,6 +559,9 @@
+ */
+
+ Restart:
++#ifdef LINUX_DEV
++ linux_kmem_collect();
++#endif
+ stack_collect();
+ net_kmsg_collect();
+ consider_task_collect();
+diff -ru gnumach-vanilla/vm/vm_resident.c gnumach-vm_resident/vm/vm_resident.c
+--- gnumach-vanilla/vm/vm_resident.c 1999-09-04 15:03:32.000000000 +0200
++++ gnumach-vm_resident/vm/vm_resident.c 2006-01-26 00:37:31.000000000 +0100
+@@ -56,6 +56,9 @@
+ #include <vm/vm_user.h>
+ #endif
+
++extern unsigned long vm_page_normal_first, vm_page_normal_last;
++extern unsigned long vm_page_dma_first, vm_page_dma_last;
++
+ /* in zalloc.c XXX */
+ extern vm_offset_t zdata;
+ extern vm_size_t zdata_size;
+@@ -105,14 +108,19 @@
+ * Resident pages that represent real memory
+ * are allocated from a free list.
+ */
+-vm_page_t vm_page_queue_free;
++queue_head_t vm_page_queue_free_normal;
++queue_head_t vm_page_queue_free_dma;
+ vm_page_t vm_page_queue_fictitious;
+ decl_simple_lock_data(,vm_page_queue_free_lock)
++
+ unsigned int vm_page_free_wanted;
+ int vm_page_free_count;
+ int vm_page_fictitious_count;
+ int vm_page_external_count;
+
++natural_t *vm_page_free_bitmap;
++unsigned long vm_page_free_bitmap_bitsz;
++
+ unsigned int vm_page_free_count_minimum; /* debugging */
+
+ /*
+@@ -174,6 +182,102 @@
+ boolean_t vm_page_deactivate_hint = TRUE;
+
+ /*
++ * vm_page_free_bitmap_set and vm_page_free_bitmap_unset:
++ * FIXME: Free pages bitmap is SLOW! Make a decent multizone O(1)
++ * page allocator.
++ *
++ * Used to mark a page as free.
++ */
++
++#ifndef NBBY
++#define NBBY 8 /* size in bits of sizeof()`s unity */
++#endif
++#define NBPEL (sizeof(natural_t)*NBBY)
++
++void vm_page_free_bitmap_set(natural_t pageno)
++{
++ register int word_index, bit_index;
++
++ word_index = pageno / NBPEL;
++ bit_index = pageno - (word_index * NBPEL);
++
++ vm_page_free_bitmap[word_index] |= 1 << bit_index;
++}
++void vm_page_free_bitmap_unset(natural_t pageno)
++{
++ register int word_index, bit_index;
++
++ word_index = pageno / NBPEL;
++ bit_index = pageno - (word_index * NBPEL);
++
++ vm_page_free_bitmap[word_index] &= ~(1 << bit_index);
++}
++
++/*
++ * vm_page_free_bitmap_alloc:
++ *
++ * Alloc space for bitmap at initializiation time.
++ * FIXME: Free pages bitmap is SLOW! Make a decent multizone O(1)
++ * page allocator.
++ */
++
++void vm_page_free_bitmap_alloc(void)
++{
++ register unsigned long size, bitsz;
++ unsigned long vm_page_big_pagenum;
++ int i;
++
++ vm_page_big_pagenum = atop (phys_last_addr);
++
++ bitsz = (vm_page_big_pagenum + NBPEL - 1)
++ & ~(NBPEL - 1); /* in bits */
++
++ size = bitsz / NBBY; /* in bytes */
++
++ vm_page_free_bitmap = (natural_t *) pmap_steal_memory (size);
++ bzero(vm_page_free_bitmap, size);
++
++ vm_page_free_bitmap_bitsz = bitsz;
++}
++
++
++/* Fast phys_addr to vm_page_t lookup. */
++
++static vm_page_t *vm_page_array;
++static unsigned vm_page_array_size;
++
++static void
++vm_page_array_init ()
++{
++ int i;
++
++ vm_page_array_size = (phys_last_addr - phys_first_addr) >> PAGE_SHIFT;
++ vm_page_array = (vm_page_t *) pmap_steal_memory (sizeof(vm_page_t)
++ * (vm_page_array_size));
++
++ for (i = 0; i < vm_page_array_size; i++)
++ vm_page_array[i] = VM_PAGE_NULL;
++
++}
++
++static void
++vm_page_array_add (vm_offset_t phys_addr, vm_page_t pg)
++{
++ assert (phys_addr < phys_last_addr && phys_addr >= phys_first_addr);
++
++ vm_page_array [(phys_addr - phys_first_addr) >> PAGE_SHIFT] = pg;
++}
++
++vm_page_t
++vm_page_physaddr_lookup (vm_offset_t phys_addr)
++{
++ assert (phys_addr < phys_last_addr && phys_addr >= phys_first_addr);
++
++ return vm_page_array [(phys_addr - phys_first_addr) >> PAGE_SHIFT];
++}
++
++
++/*
+ * vm_page_bootstrap:
+ *
+ * Initializes the resident memory module.
+@@ -229,7 +333,8 @@
+ simple_lock_init(&vm_page_queue_free_lock);
+ simple_lock_init(&vm_page_queue_lock);
+
+- vm_page_queue_free = VM_PAGE_NULL;
++ queue_init (&vm_page_queue_free_normal);
++ queue_init (&vm_page_queue_free_dma);
+ vm_page_queue_fictitious = VM_PAGE_NULL;
+ queue_init(&vm_page_queue_active);
+ queue_init(&vm_page_queue_inactive);
+@@ -279,6 +384,8 @@
+ simple_lock_init(&bucket->lock);
+ }
+
++ vm_page_free_bitmap_alloc();
++
+ /*
+ * Machine-dependent code allocates the resident page table.
+ * It uses vm_page_init to initialize the page frames.
+@@ -294,7 +401,6 @@
+ *startp = virtual_space_start;
+ *endp = virtual_space_end;
+
+- /* printf("vm_page_bootstrap: %d free pages\n", vm_page_free_count);*/
+ vm_page_free_count_minimum = vm_page_free_count;
+ }
+
+@@ -380,6 +486,8 @@
+
+ pages = (vm_page_t) pmap_steal_memory(npages * sizeof *pages);
+
++ vm_page_array_init ();
++
+ /*
+ * Initialize the page frames.
+ */
+@@ -389,21 +497,12 @@
+ break;
+
+ vm_page_init(&pages[i], paddr);
++ vm_page_array_add (paddr, &pages[i]);
++ vm_page_release(&pages[i], FALSE);
+ pages_initialized++;
+ }
+
+ /*
+- * Release pages in reverse order so that physical pages
+- * initially get allocated in ascending addresses. This keeps
+- * the devices (which must address physical memory) happy if
+- * they require several consecutive pages.
+- */
+-
+- for (i = pages_initialized; i > 0; i--) {
+- vm_page_release(&pages[i - 1], FALSE);
+- }
+-
+- /*
+ * We have to re-align virtual_space_start,
+ * because pmap_steal_memory has been using it.
+ */
+@@ -421,7 +520,7 @@
+ * Second initialization pass, to be done after
+ * the basic VM system is ready.
+ */
+-void vm_page_module_init(void)
++void vm_page_module_init(void)
+ {
+ vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
+ VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
+@@ -453,6 +552,7 @@
+ panic("vm_page_create");
+
+ vm_page_init(m, paddr);
++ vm_page_array_add (paddr, m);
+ vm_page_release(m, FALSE);
+ }
+ }
+@@ -840,16 +940,16 @@
+ }
+
+ /*
+- * vm_page_grab:
++ * vm_page_grab_flags:
+ *
+- * Remove a page from the free list.
++ * Remove a page specifying the memory zone to get the page from.
+ * Returns VM_PAGE_NULL if the free list is too small.
+ */
+-
+-vm_page_t vm_page_grab(
+- boolean_t external)
++vm_page_t vm_page_grab_flags(
++ boolean_t external,
++ unsigned flags)
+ {
+- register vm_page_t mem;
++ register vm_page_t mem = VM_PAGE_NULL;
+
+ simple_lock(&vm_page_queue_free_lock);
+
+@@ -867,17 +967,70 @@
+ return VM_PAGE_NULL;
+ }
+
+- if (vm_page_queue_free == VM_PAGE_NULL)
++ /*
++ * If we put no flag, we request any page, so we search in
++ * the normal zone before.
++ */
++
++ if (!(flags & VM_PAGE_DMA)
++ && !(queue_empty(&vm_page_queue_free_normal)))
++ {
++
++ if (--vm_page_free_count < vm_page_free_count_minimum)
++ vm_page_free_count_minimum = vm_page_free_count;
++
++ if (external)
++ vm_page_external_count++;
++
++ queue_remove_first (&vm_page_queue_free_normal,
++ mem, vm_page_t, pageq);
++
++ mem->free = FALSE;
++ mem->extcounted = mem->external = external;
++ mem->pageq.next = 0;
++ mem->pageq.prev = 0;
++
++ vm_page_free_bitmap_unset (atop (mem->phys_addr));
++ }
++
++ if (!mem)
++ {
++
++ /*
++ * It is not necessarily a bug if we ask for a DMA page and we can't
++ * obtain it, despite of vm_page_free_count, since free pages can be
++ * in the normal zone.
++ */
++ if (queue_empty(&vm_page_queue_free_dma))
++ {
++ if (!(flags & VM_PAGE_DMA))
+ panic("vm_page_grab");
++ else
++ {
++ printf ("vm_page_grab: no dma anymore");
++ simple_unlock(&vm_page_queue_free_lock);
++ return VM_PAGE_NULL;
++ }
++ }
++
++
++ if (--vm_page_free_count < vm_page_free_count_minimum)
++ vm_page_free_count_minimum = vm_page_free_count;
++
++ if (external)
++ vm_page_external_count++;
++
++ queue_remove_first (&vm_page_queue_free_dma,
++ mem, vm_page_t, pageq);
++
++ mem->free = FALSE;
++ mem->extcounted = mem->external = external;
++ mem->pageq.next = 0;
++ mem->pageq.prev = 0;
++
++ vm_page_free_bitmap_unset (atop (mem->phys_addr));
++ }
+
+- if (--vm_page_free_count < vm_page_free_count_minimum)
+- vm_page_free_count_minimum = vm_page_free_count;
+- if (external)
+- vm_page_external_count++;
+- mem = vm_page_queue_free;
+- vm_page_queue_free = (vm_page_t) mem->pageq.next;
+- mem->free = FALSE;
+- mem->extcounted = mem->external = external;
+ simple_unlock(&vm_page_queue_free_lock);
+
+ /*
+@@ -897,6 +1050,26 @@
+ thread_wakeup((event_t) &vm_page_free_wanted);
+
+ return mem;
++
++}
++
++
++/*
++ * vm_page_grab:
++ *
++ * Remove a page from the free list.
++ * Returns VM_PAGE_NULL if the free list is too small.
++ */
++
++vm_page_t vm_page_grab(
++ boolean_t external)
++{
++ register vm_page_t mem;
++
++ /* Get any free page, no matter what zone. */
++ mem = vm_page_grab_flags (external, 0);
++
++ return mem;
+ }
+
+ vm_offset_t vm_page_grab_phys_addr()
+@@ -909,13 +1082,12 @@
+ }
+
+ /*
+- * vm_page_grab_contiguous_pages:
++ * vm_page_grab_contiguous_pages_queue:
+ *
+- * Take N pages off the free list, the pages should
+- * cover a contiguous range of physical addresses.
+- * [Used by device drivers to cope with DMA limitations]
++ * Take N pages off the free list FREEQUEUE, the pages
++ * should cover a contiguous range of physical addresses.
+ *
+- * Returns the page descriptors in ascending order, or
++ * Returns the first page descriptor, or
+ * Returns KERN_RESOURCE_SHORTAGE if it could not.
+ */
+
+@@ -924,44 +1096,32 @@
+ vm_size_t vm_page_big_pagenum = 0; /* Set this before call! */
+
+ kern_return_t
+-vm_page_grab_contiguous_pages(
+- int npages,
+- vm_page_t pages[],
+- natural_t *bits,
+- boolean_t external)
++vm_page_grab_contiguous_pages_queue(
++ int npages,
++ vm_offset_t *phys_addr,
++ boolean_t external,
++ queue_t freequeue,
++ unsigned long minbitidx,
++ unsigned long maxbitidx,
++ unsigned long align)
++
+ {
+ register int first_set;
+ int size, alloc_size;
+ kern_return_t ret;
+ vm_page_t mem, prevmem;
+
+-#ifndef NBBY
+-#define NBBY 8 /* size in bits of sizeof()`s unity */
+-#endif
++ if (!align)
++ align = 1;
+
+-#define NBPEL (sizeof(natural_t)*NBBY)
++ if (minbitidx >= vm_page_free_bitmap_bitsz)
++ panic ("minbitidx too high.");
+
+- size = (vm_page_big_pagenum + NBPEL - 1)
+- & ~(NBPEL - 1); /* in bits */
+-
+- size = size / NBBY; /* in bytes */
+-
+- /*
+- * If we are called before the VM system is fully functional
+- * the invoker must provide us with the work space. [one bit
+- * per page starting at phys 0 and up to vm_page_big_pagenum]
+- */
+- if (bits == 0) {
+- alloc_size = round_page(size);
+- if (kmem_alloc_wired(kernel_map,
+- (vm_offset_t *)&bits,
+- alloc_size)
+- != KERN_SUCCESS)
+- return KERN_RESOURCE_SHORTAGE;
+- } else
+- alloc_size = 0;
+-
+- bzero(bits, size);
++ if (maxbitidx > vm_page_free_bitmap_bitsz) {
++ printf ("%s: maxbitidx exceeds bitmap size (%x > %x).\n",
++ __FUNCTION__, maxbitidx, vm_page_free_bitmap_bitsz);
++ maxbitidx = vm_page_free_bitmap_bitsz;
++ }
+
+ /*
+ * A very large granularity call, its rare so that is ok
+@@ -972,32 +1132,16 @@
+ * Do not dip into the reserved pool.
+ */
+
+- if ((vm_page_free_count < vm_page_free_reserved)
+- || (vm_page_external_count >= vm_page_external_limit)) {
++ if (((vm_page_free_count < vm_page_free_reserved)
++ || (external
++ && (vm_page_external_count > vm_page_external_limit)))
++ && !current_thread()->vm_privilege) {
+ simple_unlock(&vm_page_queue_free_lock);
+ return KERN_RESOURCE_SHORTAGE;
+ }
+
+ /*
+- * First pass through, build a big bit-array of
+- * the pages that are free. It is not going to
+- * be too large anyways, in 4k we can fit info
+- * for 32k pages.
+- */
+- mem = vm_page_queue_free;
+- while (mem) {
+- register int word_index, bit_index;
+-
+- bit_index = (mem->phys_addr >> PAGE_SHIFT);
+- word_index = bit_index / NBPEL;
+- bit_index = bit_index - (word_index * NBPEL);
+- bits[word_index] |= 1 << bit_index;
+-
+- mem = (vm_page_t) mem->pageq.next;
+- }
+-
+- /*
+- * Second loop. Scan the bit array for NPAGES
++ * First loop. Scan the bit array for NPAGES
+ * contiguous bits. That gives us, if any,
+ * the range of pages we will be grabbing off
+ * the free list.
+@@ -1007,9 +1151,13 @@
+
+ first_set = 0;
+
+- for (i = 0; i < size; i += sizeof(natural_t)) {
++ for (i = (minbitidx/NBBY);
++ i < (maxbitidx/NBBY);
++ i += sizeof(natural_t))
++ {
+
+- register natural_t v = bits[i / sizeof(natural_t)];
++ register natural_t v =
++ vm_page_free_bitmap[i / sizeof(natural_t)];
+ register int bitpos;
+
+ /*
+@@ -1042,14 +1190,20 @@
+ */
+ bits_so_far = 0;
+ count_zeroes:
+- while ((bitpos < NBPEL) && ((v & 1) == 0)) {
++ while ((bitpos < NBPEL) &&
++ (((v & 1) == 0)
++ || ((bitpos + i*NBBY) % align)))
++ {
+ bitpos++;
+ v >>= 1;
+ }
+- if (v & 1) {
++
++ if ((v & 1)
++ && (!((bitpos + i*NBBY) % align)))
++ {
+ first_set = (i * NBBY) + bitpos;
+ goto count_ones;
+- }
++ }
+ }
+ /*
+ * No luck
+@@ -1063,7 +1217,6 @@
+ */
+ not_found_em:
+ simple_unlock(&vm_page_queue_free_lock);
+-
+ ret = KERN_RESOURCE_SHORTAGE;
+ goto out;
+
+@@ -1079,43 +1232,33 @@
+ vm_page_free_count_minimum = vm_page_free_count;
+ if (external)
+ vm_page_external_count += npages;
++
+ {
+- register vm_offset_t first_phys, last_phys;
+-
+- /* cache values for compare */
+- first_phys = first_set << PAGE_SHIFT;
+- last_phys = first_phys + (npages << PAGE_SHIFT);/* not included */
+-
+- /* running pointers */
+- mem = vm_page_queue_free;
+- prevmem = VM_PAGE_NULL;
+-
+- while (mem) {
+-
+- register vm_offset_t addr;
+-
+- addr = mem->phys_addr;
+-
+- if ((addr >= first_phys) &&
+- (addr < last_phys)) {
+- if (prevmem)
+- prevmem->pageq.next = mem->pageq.next;
+- pages[(addr - first_phys) >> PAGE_SHIFT] = mem;
+- mem->free = FALSE;
+- mem->extcounted = mem->external = external;
+- /*
+- * Got them all ?
+- */
+- if (--npages == 0) break;
+- } else
+- prevmem = mem;
+-
+- mem = (vm_page_t) mem->pageq.next;
++ vm_offset_t first_phys;
++ vm_page_t pg;
++ int i;
++
++ first_phys = first_set << PAGE_SHIFT;
++
++ if (phys_addr)
++ *phys_addr = first_phys;
++
++ for (i = 0; i < npages; i++)
++ {
++ pg = vm_page_physaddr_lookup (first_phys + (i << PAGE_SHIFT));
++
++ assert (pg != VM_PAGE_NULL);
++
++ queue_remove (freequeue, pg, vm_page_t, pageq);
++
++ pg->free = FALSE;
++ pg->extcounted = pg->external = external;
++ vm_page_free_bitmap_unset (atop (pg->phys_addr));
+ }
+ }
+-
++
+ simple_unlock(&vm_page_queue_free_lock);
+-
++
+ /*
+ * Decide if we should poke the pageout daemon.
+ * We do this if the free count is less than the low
+@@ -1134,8 +1277,74 @@
+
+ ret = KERN_SUCCESS;
+ out:
+- if (alloc_size)
+- kmem_free(kernel_map, (vm_offset_t) bits, alloc_size);
++
++ return ret;
++}
++
++/*
++ * vm_page_grab_contiguous_pages_flags:
++ *
++ * Take N pages from specified zone, the pages should
++ * cover a contiguous range of physical addresses.
++ * [Used by device drivers to cope with DMA limitations]
++ *
++ * Returns the page descriptors in ascending order, or
++ * Returns KERN_RESOURCE_SHORTAGE if it could not.
++ */
++
++kern_return_t
++vm_page_grab_contiguous_pages_flags(
++ int npages,
++ vm_offset_t *phys_addr,
++ boolean_t external,
++ unsigned flags,
++ unsigned long align)
++{
++ kern_return_t ret;
++
++ if (!(flags & VM_PAGE_DMA))
++ {
++ ret = vm_page_grab_contiguous_pages_queue (
++ npages, phys_addr, external,
++ &vm_page_queue_free_normal,
++ atop(vm_page_normal_first),
++ atop(vm_page_normal_last),
++ align);
++
++ if (ret == KERN_SUCCESS)
++ return ret;
++ };
++
++ ret = vm_page_grab_contiguous_pages_queue (
++ npages, phys_addr, external,
++ &vm_page_queue_free_dma,
++ atop(vm_page_dma_first),
++ atop(vm_page_dma_last),
++ align);
++
++ return ret;
++}
++
++/*
++ * vm_page_grab_contiguous_pages:
++ *
++ * Take N pages off the free list, the pages should
++ * cover a contiguous range of physical addresses.
++ *
++ * Returns the page descriptors in ascending order, or
++ * Returns KERN_RESOURCE_SHORTAGE if it could not.
++ * [Used by device drivers to cope with DMA limitations]
++ */
++kern_return_t
++vm_page_grab_contiguous_pages(
++ int npages,
++ queue_t pages,
++ vm_offset_t *phys_addr,
++ boolean_t e)
++{
++ kern_return_t ret;
++
++ ret = vm_page_grab_contiguous_pages_flags (npages, phys_addr, e, 0, 0);
+
+ return ret;
+ }
+@@ -1150,16 +1359,36 @@
+ register vm_page_t mem,
+ boolean_t external)
+ {
++ queue_t freequeue;
++
++ if (pmap_is_dma (mem->phys_addr))
++ freequeue = &vm_page_queue_free_dma;
++ else if (pmap_is_normal (mem->phys_addr))
++ freequeue = &vm_page_queue_free_normal;
++ else {
++ /* XXX - Don't put a panic here. it's just for now. */
++ panic ("vm_page_release (unknown page zone)");
++ }
++
++ /* UGLY: We skip the page 0, since it may cause problems
++ when returned to drivers. */
++ if (mem->phys_addr == 0)
++ return;
++
+ simple_lock(&vm_page_queue_free_lock);
++
+ if (mem->free)
+ panic("vm_page_release");
+ mem->free = TRUE;
+- mem->pageq.next = (queue_entry_t) vm_page_queue_free;
+- vm_page_queue_free = mem;
++
++ queue_enter (freequeue, mem, vm_page_t, pageq);
++
+ vm_page_free_count++;
+ if (external)
+ vm_page_external_count--;
+
++ vm_page_free_bitmap_set (atop (mem->phys_addr));
++
+ /*
+ * Check if we should wake up someone waiting for page.
+ * But don't bother waking them unless they can allocate.