summaryrefslogtreecommitdiff
path: root/serverboot/default_pager.c
diff options
context:
space:
mode:
Diffstat (limited to 'serverboot/default_pager.c')
-rw-r--r--serverboot/default_pager.c3844
1 files changed, 3844 insertions, 0 deletions
diff --git a/serverboot/default_pager.c b/serverboot/default_pager.c
new file mode 100644
index 00000000..198bb224
--- /dev/null
+++ b/serverboot/default_pager.c
@@ -0,0 +1,3844 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1993-1989 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * Default pager. Pages to paging partition.
+ *
+ * MUST BE ABLE TO ALLOCATE WIRED-DOWN MEMORY!!!
+ */
+
+#include <mach.h>
+#include <mach/message.h>
+#include <mach/notify.h>
+#include <mach/mig_errors.h>
+#include <mach/thread_switch.h>
+#include <mach/task_info.h>
+#include <mach/default_pager_types.h>
+
+#include <cthreads.h>
+
+#include <device/device_types.h>
+#include <device/device.h>
+
+#include <queue.h>
+#include <wiring.h>
+
+#include <assert.h>
+#include <stdio.h>
+
+#include "file_io.h"
+
+#define debug 0
+
+extern void *kalloc();
+
+static char my_name[] = "(default pager):";
+
+static struct mutex printf_lock = MUTEX_INITIALIZER;
+
+#define dprintf(f, x...) \
+ ({ mutex_lock (&printf_lock); printf (f , ##x); fflush (stdout); mutex_unlock (&printf_lock); })
+#define ddprintf(f, x...) ((void)0)
+
+/*
+ * parallel vs serial switch
+ */
+#define PARALLEL 1
+
+#if 0
+#define CHECKSUM 1
+#endif
+
+#define USE_PRECIOUS 1
+
+#define ptoa(p) ((p)*vm_page_size)
+#define atop(a) ((a)/vm_page_size)
+
+/*
+
+ */
+/*
+ * Bitmap allocation.
+ */
+typedef unsigned int bm_entry_t;
+#define NB_BM 32
+#define BM_MASK 0xffffffff
+
+#define howmany(a,b) (((a) + (b) - 1)/(b))
+
+/*
+ * Value to indicate no block assigned
+ */
+#define NO_BLOCK ((vm_offset_t)-1)
+
+/*
+ * 'Partition' structure for each paging area.
+ * Controls allocation of blocks within paging area.
+ */
+struct part {
+ struct mutex p_lock; /* for bitmap/free */
+ vm_size_t total_size; /* total number of blocks */
+ vm_size_t free; /* number of blocks free */
+ unsigned int id; /* named lookup */
+ bm_entry_t *bitmap; /* allocation map */
+ boolean_t going_away; /* destroy attempt in progress */
+ struct file_direct *file; /* file paged to */
+};
+typedef struct part *partition_t;
+
+struct {
+ struct mutex lock;
+ int n_partitions;
+ partition_t *partition_list;/* array, for quick mapping */
+} all_partitions; /* list of all such */
+
+typedef unsigned char p_index_t;
+
+#define P_INDEX_INVALID ((p_index_t)-1)
+
+#define no_partition(x) ((x) == P_INDEX_INVALID)
+
+partition_t partition_of(x)
+ int x;
+{
+ if (x >= all_partitions.n_partitions || x < 0)
+ panic("partition_of x%x", x);
+ return all_partitions.partition_list[x];
+}
+
+void set_partition_of(x, p)
+ int x;
+ partition_t p;
+{
+ if (x >= all_partitions.n_partitions || x < 0)
+ panic("set_partition_of x%x", x);
+ all_partitions.partition_list[x] = p;
+}
+
+/*
+ * Simple mapping from (file)NAME to id
+ * Saves space, filenames can be long.
+ */
+unsigned int
+part_id(name)
+ unsigned char *name;
+{
+ register unsigned int len, id, xorid;
+
+ len = strlen(name);
+ id = xorid = 0;
+ while (len--) {
+ xorid ^= *name;
+ id += *name++;
+ }
+ return (id << 8) | xorid;
+}
+
+partition_init()
+{
+ mutex_init(&all_partitions.lock);
+ all_partitions.n_partitions = 0;
+}
+
+static partition_t
+new_partition (const char *name, struct file_direct *fdp,
+ int check_linux_signature)
+{
+ register partition_t part;
+ register vm_size_t size, bmsize;
+ vm_offset_t raddr;
+ mach_msg_type_number_t rsize;
+ int rc;
+ unsigned int id = part_id(name);
+
+ mutex_lock(&all_partitions.lock);
+ {
+ unsigned int i;
+ for (i = 0; i < all_partitions.n_partitions; i++)
+ {
+ part = partition_of(i);
+ if (part && part->id == id)
+ {
+ printf ("(default pager): Already paging to partition %s!\n",
+ name);
+ mutex_unlock(&all_partitions.lock);
+ return 0;
+ }
+ }
+ }
+ mutex_unlock(&all_partitions.lock);
+
+ size = atop(fdp->fd_size * fdp->fd_bsize);
+ bmsize = howmany(size, NB_BM) * sizeof(bm_entry_t);
+
+ part = (partition_t) kalloc(sizeof(struct part));
+ mutex_init(&part->p_lock);
+ part->total_size = size;
+ part->free = size;
+ part->id = id;
+ part->bitmap = (bm_entry_t *)kalloc(bmsize);
+ part->going_away= FALSE;
+ part->file = fdp;
+
+ bzero((char *)part->bitmap, bmsize);
+
+ if (check_linux_signature < 0)
+ {
+ printf("(default pager): "
+ "Paging to raw partition %s (%uk paging space)\n",
+ name, part->total_size * (vm_page_size / 1024));
+ return part;
+ }
+
+#define LINUX_PAGE_SIZE 4096 /* size of pages in Linux swap partitions */
+ rc = page_read_file_direct(part->file,
+ 0, LINUX_PAGE_SIZE,
+ &raddr,
+ &rsize);
+ if (rc)
+ panic("(default pager): cannot read first page of %s! rc=%#x\n",
+ name, rc);
+ while (rsize < LINUX_PAGE_SIZE)
+ {
+ /* Filesystem block size is smaller than page size,
+ so we must do several reads to get the whole page. */
+ vm_address_t baddr, bsize;
+ rc = page_read_file_direct(part->file,
+ rsize, LINUX_PAGE_SIZE-rsize,
+ &baddr,
+ &bsize);
+ if (rc)
+ panic("(default pager): "
+ "cannot read first page of %s! rc=%#x at %#x\n",
+ name, rc, rsize);
+
+ memcpy ((char *) raddr + rsize, (void *) baddr, bsize);
+ rsize += bsize;
+ vm_deallocate (mach_task_self (), baddr, bsize);
+ }
+
+ if (!memcmp("SWAP-SPACE", (char *) raddr + LINUX_PAGE_SIZE-10, 10))
+ {
+ /* The partition's first page has a Linux swap signature.
+ This means the beginning of the page contains a bitmap
+ of good pages, and all others are bad. */
+ unsigned int i, j, bad, max;
+ int waste;
+
+ printf("(default pager): Found Linux 2.0 swap signature in %s\n",
+ name);
+
+ /* The first page, and the pages corresponding to the bits
+ occupied by the signature in the final 10 bytes of the page,
+ are always unavailable ("bad"). */
+ *(u_int32_t *)raddr &= ~(u_int32_t) 1;
+ memset((char *) raddr + LINUX_PAGE_SIZE-10, 0, 10);
+
+ max = LINUX_PAGE_SIZE / sizeof(u_int32_t);
+ if (max > (part->total_size + 31) / 32)
+ max = (part->total_size + 31) / 32;
+
+ bad = 0;
+ for (i = 0; i < max; ++i)
+ {
+ u_int32_t bm = ((u_int32_t *) raddr)[i];
+ if (bm == ~(u_int32_t) 0)
+ continue;
+ /* There are some zero bits in this word. */
+ for (j = 0; j < 32; ++j)
+ if ((bm & (1 << j)) == 0)
+ {
+ unsigned int p = i*32 + j;
+ if (p >= part->total_size)
+ break;
+ ++bad;
+ part->bitmap[p / NB_BM] |= 1 << (p % NB_BM);
+ }
+ }
+ part->free -= bad;
+
+ --bad; /* Don't complain about first page. */
+ waste = part->total_size - (8 * (LINUX_PAGE_SIZE-10));
+ if (waste > 0)
+ {
+ /* The wasted pages were already marked "bad". */
+ bad -= waste;
+ if (bad > 0)
+ printf("\
+(default pager): Paging to %s, %dk swap-space (%dk bad, %dk wasted at end)\n",
+ name,
+ part->free * (LINUX_PAGE_SIZE / 1024),
+ bad * (LINUX_PAGE_SIZE / 1024),
+ waste * (LINUX_PAGE_SIZE / 1024));
+ else
+ printf("\
+(default pager): Paging to %s, %dk swap-space (%dk wasted at end)\n",
+ name,
+ part->free * (LINUX_PAGE_SIZE / 1024),
+ waste * (LINUX_PAGE_SIZE / 1024));
+ }
+ else if (bad > 0)
+ printf("\
+(default pager): Paging to %s, %dk swap-space (excludes %dk marked bad)\n",
+ name,
+ part->free * (LINUX_PAGE_SIZE / 1024),
+ bad * (LINUX_PAGE_SIZE / 1024));
+ else
+ printf("\
+(default pager): Paging to %s, %dk swap-space\n",
+ name,
+ part->free * (LINUX_PAGE_SIZE / 1024));
+ }
+ else if (!memcmp("SWAPSPACE2",
+ (char *) raddr + LINUX_PAGE_SIZE-10, 10))
+ {
+ struct
+ {
+ u_int8_t bootbits[1024];
+ u_int32_t version;
+ u_int32_t last_page;
+ u_int32_t nr_badpages;
+ u_int32_t padding[125];
+ u_int32_t badpages[1];
+ } *hdr = (void *) raddr;
+
+ printf("\
+(default pager): Found Linux 2.2 swap signature (v%u) in %s...",
+ hdr->version, name);
+
+ part->bitmap[0] |= 1; /* first page unusable */
+ part->free--;
+
+ switch (hdr->version)
+ {
+ default:
+ if (check_linux_signature)
+ {
+ printf ("version %u unknown! SKIPPING %s!\n",
+ hdr->version,
+ name);
+ vm_deallocate(mach_task_self(), raddr, rsize);
+ kfree(part->bitmap, bmsize);
+ kfree(part, sizeof *part);
+ return 0;
+ }
+ else
+ printf ("version %u unknown! IGNORING SIGNATURE PAGE!"
+ " %dk swap-space\n",
+ hdr->version,
+ part->free * (LINUX_PAGE_SIZE / 1024));
+ break;
+
+ case 1:
+ {
+ unsigned int waste, i;
+ if (hdr->last_page > part->total_size)
+ {
+ printf ("signature says %uk, partition has only %uk! ",
+ hdr->last_page * (LINUX_PAGE_SIZE / 1024),
+ part->total_size * (LINUX_PAGE_SIZE / 1024));
+ waste = 0;
+ }
+ else
+ {
+ waste = part->total_size - hdr->last_page;
+ part->total_size = hdr->last_page;
+ part->free = part->total_size - 1;
+ }
+ for (i = 0; i < hdr->nr_badpages; ++i)
+ {
+ const u_int32_t bad = hdr->badpages[i];
+ part->bitmap[bad / NB_BM] |= 1 << (bad % NB_BM);
+ part->free--;
+ }
+ printf ("%uk swap-space",
+ part->free * (LINUX_PAGE_SIZE / 1024));
+ if (hdr->nr_badpages != 0)
+ printf (" (excludes %uk marked bad)",
+ hdr->nr_badpages * (LINUX_PAGE_SIZE / 1024));
+ if (waste != 0)
+ printf (" (excludes %uk at end of partition)",
+ waste * (LINUX_PAGE_SIZE / 1024));
+ printf ("\n");
+ }
+ }
+ }
+ else if (check_linux_signature)
+ {
+ printf ("(default pager): "
+ "Cannot find Linux swap signature page! "
+ "SKIPPING %s (%uk partition)!",
+ name, part->total_size * (vm_page_size / 1024));
+ kfree(part->bitmap, bmsize);
+ kfree(part, sizeof *part);
+ part = 0;
+ }
+ else
+ printf("(default pager): "
+ "Paging to raw partition %s (%uk paging space)\n",
+ name, part->total_size * (vm_page_size / 1024));
+
+ vm_deallocate(mach_task_self(), raddr, rsize);
+
+ return part;
+}
+
+/*
+ * Create a partition descriptor,
+ * add it to the list of all such.
+ * size is in BYTES.
+ */
+void
+create_paging_partition(const char *name,
+ struct file_direct *fdp, int isa_file,
+ int linux_signature)
+{
+ register partition_t part;
+
+ part = new_partition (name, fdp, linux_signature);
+ if (!part)
+ return;
+
+ mutex_lock(&all_partitions.lock);
+ {
+ register int i;
+
+ for (i = 0; i < all_partitions.n_partitions; i++)
+ if (partition_of(i) == 0) break;
+
+ if (i == all_partitions.n_partitions) {
+ register partition_t *new_list, *old_list;
+ register int n;
+
+ n = i ? (i<<1) : 2;
+ new_list = (partition_t *)
+ kalloc( n * sizeof(partition_t) );
+ if (new_list == 0) no_paging_space(TRUE);
+ bzero(new_list, n*sizeof(partition_t));
+ if (i) {
+ old_list = all_partitions.partition_list;
+ bcopy(old_list, new_list, i*sizeof(partition_t));
+ }
+ all_partitions.partition_list = new_list;
+ all_partitions.n_partitions = n;
+ if (i) kfree(old_list, i*sizeof(partition_t));
+ }
+ set_partition_of(i, part);
+ }
+ mutex_unlock(&all_partitions.lock);
+
+#if 0
+ dprintf("%s Added paging %s %s\n", my_name,
+ (isa_file) ? "file" : "device", name);
+#endif
+ overcommitted(TRUE, part->free);
+}
+
+/*
+ * Choose the most appropriate default partition
+ * for an object of SIZE bytes.
+ * Return the partition locked, unless
+ * the object has no CUR_PARTition.
+ */
+p_index_t
+choose_partition(size, cur_part)
+ unsigned int size;
+ register p_index_t cur_part;
+{
+ register partition_t part;
+ register boolean_t found = FALSE;
+ register int i;
+
+ mutex_lock(&all_partitions.lock);
+ for (i = 0; i < all_partitions.n_partitions; i++) {
+
+ /* the undesireable one ? */
+ if (i == cur_part)
+ continue;
+
+ddprintf ("choose_partition(%x,%d,%d)\n",size,cur_part,i);
+ /* one that was removed ? */
+ if ((part = partition_of(i)) == 0)
+ continue;
+
+ /* one that is being removed ? */
+ if (part->going_away)
+ continue;
+
+ /* is it big enough ? */
+ mutex_lock(&part->p_lock);
+ if (ptoa(part->free) >= size) {
+ if (cur_part != P_INDEX_INVALID) {
+ mutex_unlock(&all_partitions.lock);
+ return (p_index_t)i;
+ } else
+ found = TRUE;
+ }
+ mutex_unlock(&part->p_lock);
+
+ if (found) break;
+ }
+ mutex_unlock(&all_partitions.lock);
+ return (found) ? (p_index_t)i : P_INDEX_INVALID;
+}
+
+/*
+ * Allocate a page in a paging partition
+ * The partition is returned unlocked.
+ */
+vm_offset_t
+pager_alloc_page(pindex, lock_it)
+ p_index_t pindex;
+{
+ register int bm_e;
+ register int bit;
+ register int limit;
+ register bm_entry_t *bm;
+ partition_t part;
+ static char here[] = "%spager_alloc_page";
+
+ if (no_partition(pindex))
+ return (NO_BLOCK);
+ddprintf ("pager_alloc_page(%d,%d)\n",pindex,lock_it);
+ part = partition_of(pindex);
+
+ /* unlikely, but possible deadlock against destroy_partition */
+ if (!part || part->going_away)
+ return (NO_BLOCK);
+
+ if (lock_it)
+ mutex_lock(&part->p_lock);
+
+ if (part->free == 0) {
+ /* out of paging space */
+ mutex_unlock(&part->p_lock);
+ return (NO_BLOCK);
+ }
+
+ limit = howmany(part->total_size, NB_BM);
+ bm = part->bitmap;
+ for (bm_e = 0; bm_e < limit; bm_e++, bm++)
+ if (*bm != BM_MASK)
+ break;
+
+ if (bm_e == limit)
+ panic(here,my_name);
+
+ /*
+ * Find and set the proper bit
+ */
+ {
+ register bm_entry_t b = *bm;
+
+ for (bit = 0; bit < NB_BM; bit++)
+ if ((b & (1<<bit)) == 0)
+ break;
+ if (bit == NB_BM)
+ panic(here,my_name);
+
+ *bm = b | (1<<bit);
+ part->free--;
+
+ }
+
+ mutex_unlock(&part->p_lock);
+
+ return (bm_e*NB_BM+bit);
+}
+
+/*
+ * Deallocate a page in a paging partition
+ */
+void
+pager_dealloc_page(pindex, page, lock_it)
+ p_index_t pindex;
+ register vm_offset_t page;
+{
+ register partition_t part;
+ register int bit, bm_e;
+
+ /* be paranoid */
+ if (no_partition(pindex))
+ panic("%sdealloc_page",my_name);
+ddprintf ("pager_dealloc_page(%d,%x,%d)\n",pindex,page,lock_it);
+ part = partition_of(pindex);
+
+ if (page >= part->total_size)
+ panic("%sdealloc_page",my_name);
+
+ bm_e = page / NB_BM;
+ bit = page % NB_BM;
+
+ if (lock_it)
+ mutex_lock(&part->p_lock);
+
+ part->bitmap[bm_e] &= ~(1<<bit);
+ part->free++;
+
+ if (lock_it)
+ mutex_unlock(&part->p_lock);
+}
+
+/*
+
+ */
+/*
+ * Allocation info for each paging object.
+ *
+ * Most operations, even pager_write_offset and pager_put_checksum,
+ * just need a read lock. Higher-level considerations prevent
+ * conflicting operations on a single page. The lock really protects
+ * the underlying size and block map memory, so pager_extend needs a
+ * write lock.
+ *
+ * An object can now span multiple paging partitions. The allocation
+ * info we keep is a pair (offset,p_index) where the index is in the
+ * array of all partition ptrs, and the offset is partition-relative.
+ * Size wise we are doing ok fitting the pair into a single integer:
+ * the offset really is in pages so we have vm_page_size bits available
+ * for the partition index.
+ */
+#define DEBUG_READER_CONFLICTS 0
+
+#if DEBUG_READER_CONFLICTS
+int default_pager_read_conflicts = 0;
+#endif
+
+union dp_map {
+
+ struct {
+ unsigned int p_offset : 24,
+ p_index : 8;
+ } block;
+
+ union dp_map *indirect;
+};
+typedef union dp_map *dp_map_t;
+
+/* quick check for part==block==invalid */
+#define no_block(e) ((e).indirect == (dp_map_t)NO_BLOCK)
+#define invalidate_block(e) ((e).indirect = (dp_map_t)NO_BLOCK)
+
+struct dpager {
+ struct mutex lock; /* lock for extending block map */
+ /* XXX should be read-write lock */
+#if DEBUG_READER_CONFLICTS
+ int readers;
+ boolean_t writer;
+#endif
+ dp_map_t map; /* block map */
+ vm_size_t size; /* size of paging object, in pages */
+ p_index_t cur_partition;
+#ifdef CHECKSUM
+ vm_offset_t *checksum; /* checksum - parallel to block map */
+#define NO_CHECKSUM ((vm_offset_t)-1)
+#endif CHECKSUM
+};
+typedef struct dpager *dpager_t;
+
+/*
+ * A paging object uses either a one- or a two-level map of offsets
+ * into a paging partition.
+ */
+#define PAGEMAP_ENTRIES 64
+ /* number of pages in a second-level map */
+#define PAGEMAP_SIZE(npgs) ((npgs)*sizeof(vm_offset_t))
+
+#define INDIRECT_PAGEMAP_ENTRIES(npgs) \
+ ((((npgs)-1)/PAGEMAP_ENTRIES) + 1)
+#define INDIRECT_PAGEMAP_SIZE(npgs) \
+ (INDIRECT_PAGEMAP_ENTRIES(npgs) * sizeof(vm_offset_t *))
+#define INDIRECT_PAGEMAP(size) \
+ (size > PAGEMAP_ENTRIES)
+
+#define ROUNDUP_TO_PAGEMAP(npgs) \
+ (((npgs) + PAGEMAP_ENTRIES - 1) & ~(PAGEMAP_ENTRIES - 1))
+
+/*
+ * Object sizes are rounded up to the next power of 2,
+ * unless they are bigger than a given maximum size.
+ */
+vm_size_t max_doubled_size = 4 * 1024 * 1024; /* 4 meg */
+
+/*
+ * Attach a new paging object to a paging partition
+ */
+void
+pager_alloc(pager, part, size)
+ register dpager_t pager;
+ p_index_t part;
+ register vm_size_t size; /* in BYTES */
+{
+ register int i;
+ register dp_map_t mapptr, emapptr;
+
+ mutex_init(&pager->lock);
+#if DEBUG_READER_CONFLICTS
+ pager->readers = 0;
+ pager->writer = FALSE;
+#endif
+ pager->cur_partition = part;
+
+ /*
+ * Convert byte size to number of pages, then increase to the nearest
+ * power of 2.
+ */
+ size = atop(size);
+ if (size <= atop(max_doubled_size)) {
+ i = 1;
+ while (i < size)
+ i <<= 1;
+ size = i;
+ } else
+ size = ROUNDUP_TO_PAGEMAP(size);
+
+ /*
+ * Allocate and initialize the block map
+ */
+ {
+ register vm_size_t alloc_size;
+ dp_map_t init_value;
+
+ if (INDIRECT_PAGEMAP(size)) {
+ alloc_size = INDIRECT_PAGEMAP_SIZE(size);
+ init_value = (dp_map_t)0;
+ } else {
+ alloc_size = PAGEMAP_SIZE(size);
+ init_value = (dp_map_t)NO_BLOCK;
+ }
+
+ mapptr = (dp_map_t) kalloc(alloc_size);
+ for (emapptr = &mapptr[(alloc_size-1) / sizeof(vm_offset_t)];
+ emapptr >= mapptr;
+ emapptr--)
+ emapptr->indirect = init_value;
+
+ }
+ pager->map = mapptr;
+ pager->size = size;
+
+#ifdef CHECKSUM
+ if (INDIRECT_PAGEMAP(size)) {
+ mapptr = (vm_offset_t *)
+ kalloc(INDIRECT_PAGEMAP_SIZE(size));
+ for (i = INDIRECT_PAGEMAP_ENTRIES(size); --i >= 0;)
+ mapptr[i] = 0;
+ } else {
+ mapptr = (vm_offset_t *) kalloc(PAGEMAP_SIZE(size));
+ for (i = 0; i < size; i++)
+ mapptr[i] = NO_CHECKSUM;
+ }
+ pager->checksum = mapptr;
+#endif CHECKSUM
+}
+
+/*
+ * Return size (in bytes) of space actually allocated to this pager.
+ * The pager is read-locked.
+ */
+
+vm_size_t
+pager_allocated(pager)
+ register dpager_t pager;
+{
+ vm_size_t size;
+ register dp_map_t map, emap;
+ vm_size_t asize;
+
+ size = pager->size; /* in pages */
+ asize = 0; /* allocated, in pages */
+ map = pager->map;
+
+ if (INDIRECT_PAGEMAP(size)) {
+ for (emap = &map[INDIRECT_PAGEMAP_ENTRIES(size)];
+ map < emap; map++) {
+
+ register dp_map_t map2, emap2;
+
+ if ((map2 = map->indirect) == 0)
+ continue;
+
+ for (emap2 = &map2[PAGEMAP_ENTRIES];
+ map2 < emap2; map2++)
+ if ( ! no_block(*map2) )
+ asize++;
+
+ }
+ } else {
+ for (emap = &map[size]; map < emap; map++)
+ if ( ! no_block(*map) )
+ asize++;
+ }
+
+ return ptoa(asize);
+}
+
+/*
+ * Find offsets (in the object) of pages actually allocated to this pager.
+ * Returns the number of allocated pages, whether or not they all fit.
+ * The pager is read-locked.
+ */
+
+unsigned int
+pager_pages(pager, pages, numpages)
+ dpager_t pager;
+ register default_pager_page_t *pages;
+ unsigned int numpages;
+{
+ vm_size_t size;
+ dp_map_t map, emap;
+ unsigned int actual;
+ vm_offset_t offset;
+
+ size = pager->size; /* in pages */
+ map = pager->map;
+ actual = 0;
+ offset = 0;
+
+ if (INDIRECT_PAGEMAP(size)) {
+ for (emap = &map[INDIRECT_PAGEMAP_ENTRIES(size)];
+ map < emap; map++) {
+
+ register dp_map_t map2, emap2;
+
+ if ((map2 = map->indirect) == 0) {
+ offset += vm_page_size * PAGEMAP_ENTRIES;
+ continue;
+ }
+ for (emap2 = &map2[PAGEMAP_ENTRIES];
+ map2 < emap2; map2++)
+ if ( ! no_block(*map2) ) {
+ if (actual++ < numpages)
+ pages++->dpp_offset = offset;
+ }
+ offset += vm_page_size;
+ }
+ } else {
+ for (emap = &map[size]; map < emap; map++)
+ if ( ! no_block(*map) ) {
+ if (actual++ < numpages)
+ pages++->dpp_offset = offset;
+ }
+ offset += vm_page_size;
+ }
+ return actual;
+}
+
+/*
+ * Extend the map for a paging object.
+ *
+ * XXX This implementation can allocate an arbitrary large amount
+ * of wired memory when extending a big block map. Because vm-privileged
+ * threads call pager_extend, this can crash the system by exhausting
+ * system memory.
+ */
+void
+pager_extend(pager, new_size)
+ register dpager_t pager;
+ register vm_size_t new_size; /* in pages */
+{
+ register dp_map_t new_mapptr;
+ register dp_map_t old_mapptr;
+ register int i;
+ register vm_size_t old_size;
+
+ mutex_lock(&pager->lock); /* XXX lock_write */
+#if DEBUG_READER_CONFLICTS
+ pager->writer = TRUE;
+#endif
+ /*
+ * Double current size until we cover new size.
+ * If object is 'too big' just use new size.
+ */
+ old_size = pager->size;
+
+ if (new_size <= atop(max_doubled_size)) {
+ i = old_size;
+ while (i < new_size)
+ i <<= 1;
+ new_size = i;
+ } else
+ new_size = ROUNDUP_TO_PAGEMAP(new_size);
+
+ if (INDIRECT_PAGEMAP(old_size)) {
+ /*
+ * Pager already uses two levels. Allocate
+ * a larger indirect block.
+ */
+ new_mapptr = (dp_map_t)
+ kalloc(INDIRECT_PAGEMAP_SIZE(new_size));
+ old_mapptr = pager->map;
+ for (i = 0; i < INDIRECT_PAGEMAP_ENTRIES(old_size); i++)
+ new_mapptr[i] = old_mapptr[i];
+ for (; i < INDIRECT_PAGEMAP_ENTRIES(new_size); i++)
+ new_mapptr[i].indirect = (dp_map_t)0;
+ kfree((char *)old_mapptr, INDIRECT_PAGEMAP_SIZE(old_size));
+ pager->map = new_mapptr;
+ pager->size = new_size;
+#ifdef CHECKSUM
+ new_mapptr = (vm_offset_t *)
+ kalloc(INDIRECT_PAGEMAP_SIZE(new_size));
+ old_mapptr = pager->checksum;
+ for (i = 0; i < INDIRECT_PAGEMAP_ENTRIES(old_size); i++)
+ new_mapptr[i] = old_mapptr[i];
+ for (; i < INDIRECT_PAGEMAP_ENTRIES(new_size); i++)
+ new_mapptr[i] = 0;
+ kfree((char *)old_mapptr, INDIRECT_PAGEMAP_SIZE(old_size));
+ pager->checksum = new_mapptr;
+#endif CHECKSUM
+#if DEBUG_READER_CONFLICTS
+ pager->writer = FALSE;
+#endif
+ mutex_unlock(&pager->lock);
+ ddprintf ("pager_extend 1 mapptr %x [3b] = %x\n", new_mapptr,
+ new_mapptr[0x3b]);
+ if (new_mapptr[0x3b].indirect > 0x10000
+ && new_mapptr[0x3b].indirect != NO_BLOCK)
+ panic ("debug panic");
+ return;
+ }
+
+ if (INDIRECT_PAGEMAP(new_size)) {
+ /*
+ * Changing from direct map to indirect map.
+ * Allocate both indirect and direct map blocks,
+ * since second-level (direct) block must be
+ * full size (PAGEMAP_SIZE(PAGEMAP_ENTRIES)).
+ */
+
+ /*
+ * Allocate new second-level map first.
+ */
+ new_mapptr = (dp_map_t) kalloc(PAGEMAP_SIZE(PAGEMAP_ENTRIES));
+ old_mapptr = pager->map;
+ for (i = 0; i < old_size; i++)
+ new_mapptr[i] = old_mapptr[i];
+ for (; i < PAGEMAP_ENTRIES; i++)
+ invalidate_block(new_mapptr[i]);
+ kfree((char *)old_mapptr, PAGEMAP_SIZE(old_size));
+ old_mapptr = new_mapptr;
+
+ ddprintf ("pager_extend 2 mapptr %x [3b] = %x\n", new_mapptr,
+ new_mapptr[0x3b]);
+ if (new_mapptr[0x3b].indirect > 0x10000
+ && new_mapptr[0x3b].indirect != NO_BLOCK)
+ panic ("debug panic");
+
+ /*
+ * Now allocate indirect map.
+ */
+ new_mapptr = (dp_map_t)
+ kalloc(INDIRECT_PAGEMAP_SIZE(new_size));
+ new_mapptr[0].indirect = old_mapptr;
+ for (i = 1; i < INDIRECT_PAGEMAP_ENTRIES(new_size); i++)
+ new_mapptr[i].indirect = 0;
+ pager->map = new_mapptr;
+ pager->size = new_size;
+#ifdef CHECKSUM
+ /*
+ * Allocate new second-level map first.
+ */
+ new_mapptr = (vm_offset_t *)kalloc(PAGEMAP_SIZE(PAGEMAP_ENTRIES));
+ old_mapptr = pager->checksum;
+ for (i = 0; i < old_size; i++)
+ new_mapptr[i] = old_mapptr[i];
+ for (; i < PAGEMAP_ENTRIES; i++)
+ new_mapptr[i] = NO_CHECKSUM;
+ kfree((char *)old_mapptr, PAGEMAP_SIZE(old_size));
+ old_mapptr = new_mapptr;
+
+ /*
+ * Now allocate indirect map.
+ */
+ new_mapptr = (vm_offset_t *)
+ kalloc(INDIRECT_PAGEMAP_SIZE(new_size));
+ new_mapptr[0] = (vm_offset_t) old_mapptr;
+ for (i = 1; i < INDIRECT_PAGEMAP_ENTRIES(new_size); i++)
+ new_mapptr[i] = 0;
+ pager->checksum = new_mapptr;
+#endif CHECKSUM
+#if DEBUG_READER_CONFLICTS
+ pager->writer = FALSE;
+#endif
+ mutex_unlock(&pager->lock);
+ return;
+ }
+ /*
+ * Enlarging a direct block.
+ */
+ new_mapptr = (dp_map_t) kalloc(PAGEMAP_SIZE(new_size));
+ old_mapptr = pager->map;
+ for (i = 0; i < old_size; i++)
+ new_mapptr[i] = old_mapptr[i];
+ for (; i < new_size; i++)
+ invalidate_block(new_mapptr[i]);
+ kfree((char *)old_mapptr, PAGEMAP_SIZE(old_size));
+ pager->map = new_mapptr;
+ pager->size = new_size;
+#ifdef CHECKSUM
+ new_mapptr = (vm_offset_t *)
+ kalloc(PAGEMAP_SIZE(new_size));
+ old_mapptr = pager->checksum;
+ for (i = 0; i < old_size; i++)
+ new_mapptr[i] = old_mapptr[i];
+ for (; i < new_size; i++)
+ new_mapptr[i] = NO_CHECKSUM;
+ kfree((char *)old_mapptr, PAGEMAP_SIZE(old_size));
+ pager->checksum = new_mapptr;
+#endif CHECKSUM
+#if DEBUG_READER_CONFLICTS
+ pager->writer = FALSE;
+#endif
+ mutex_unlock(&pager->lock);
+}
+
+/*
+ * Given an offset within a paging object, find the
+ * corresponding block within the paging partition.
+ * Return NO_BLOCK if none allocated.
+ */
+union dp_map
+pager_read_offset(pager, offset)
+ register dpager_t pager;
+ vm_offset_t offset;
+{
+ register vm_offset_t f_page;
+ union dp_map pager_offset;
+
+ f_page = atop(offset);
+
+#if DEBUG_READER_CONFLICTS
+ if (pager->readers > 0)
+ default_pager_read_conflicts++; /* would have proceeded with
+ read/write lock */
+#endif
+ mutex_lock(&pager->lock); /* XXX lock_read */
+#if DEBUG_READER_CONFLICTS
+ pager->readers++;
+#endif
+ if (f_page >= pager->size)
+ {
+ ddprintf ("%spager_read_offset pager %x: bad page %d >= size %d",
+ my_name, pager, f_page, pager->size);
+ return (union dp_map) (union dp_map *) NO_BLOCK;
+#if 0
+ panic("%spager_read_offset",my_name);
+#endif
+ }
+
+ if (INDIRECT_PAGEMAP(pager->size)) {
+ register dp_map_t mapptr;
+
+ mapptr = pager->map[f_page/PAGEMAP_ENTRIES].indirect;
+ if (mapptr == 0)
+ invalidate_block(pager_offset);
+ else
+ pager_offset = mapptr[f_page%PAGEMAP_ENTRIES];
+ }
+ else {
+ pager_offset = pager->map[f_page];
+ }
+
+#if DEBUG_READER_CONFLICTS
+ pager->readers--;
+#endif
+ mutex_unlock(&pager->lock);
+ return (pager_offset);
+}
+
+#if USE_PRECIOUS
+/*
+ * Release a single disk block.
+ */
+pager_release_offset(pager, offset)
+ register dpager_t pager;
+ vm_offset_t offset;
+{
+ register union dp_map entry;
+
+ offset = atop(offset);
+
+ mutex_lock(&pager->lock); /* XXX lock_read */
+
+ if (INDIRECT_PAGEMAP(pager->size)) {
+ register dp_map_t mapptr;
+
+ mapptr = pager->map[offset / PAGEMAP_ENTRIES].indirect;
+ entry = mapptr[offset % PAGEMAP_ENTRIES];
+ invalidate_block(mapptr[offset % PAGEMAP_ENTRIES]);
+ } else {
+ entry = pager->map[offset];
+ invalidate_block(pager->map[offset]);
+ }
+
+ mutex_unlock(&pager->lock);
+
+ pager_dealloc_page(entry.block.p_index, entry.block.p_offset, TRUE);
+}
+#endif /*USE_PRECIOUS*/
+
+
+/*
+ * Move a page from one partition to another
+ * New partition is locked, old partition is
+ * locked unless LOCK_OLD sez otherwise.
+ */
+union dp_map
+pager_move_page(block)
+ union dp_map block;
+{
+ partition_t old_part, new_part;
+ p_index_t old_pindex, new_pindex;
+ union dp_map ret;
+ vm_size_t size;
+ vm_offset_t raddr, offset, new_offset;
+ kern_return_t rc;
+ static char here[] = "%spager_move_page";
+
+ old_pindex = block.block.p_index;
+ invalidate_block(ret);
+
+ /* See if we have room to put it anywhere else */
+ new_pindex = choose_partition( ptoa(1), old_pindex);
+ if (no_partition(new_pindex))
+ return ret;
+
+ /* this unlocks the new partition */
+ new_offset = pager_alloc_page(new_pindex, FALSE);
+ if (new_offset == NO_BLOCK)
+ panic(here,my_name);
+
+ /*
+ * Got the resources, now move the data
+ */
+ddprintf ("pager_move_page(%x,%d,%d)\n",block.block.p_offset,old_pindex,new_pindex);
+ old_part = partition_of(old_pindex);
+ offset = ptoa(block.block.p_offset);
+ rc = page_read_file_direct (old_part->file,
+ offset,
+ vm_page_size,
+ &raddr,
+ &size);
+ if (rc != 0)
+ panic(here,my_name);
+
+ /* release old */
+ pager_dealloc_page(old_pindex, block.block.p_offset, FALSE);
+
+ new_part = partition_of(new_pindex);
+ offset = ptoa(new_offset);
+ rc = page_write_file_direct (new_part->file,
+ offset,
+ raddr,
+ size,
+ &size);
+ if (rc != 0)
+ panic(here,my_name);
+
+ (void) vm_deallocate( mach_task_self(), raddr, size);
+
+ ret.block.p_offset = new_offset;
+ ret.block.p_index = new_pindex;
+
+ return ret;
+}
+
+#ifdef CHECKSUM
+/*
+ * Return the checksum for a block.
+ */
+int
+pager_get_checksum(pager, offset)
+ register dpager_t pager;
+ vm_offset_t offset;
+{
+ register vm_offset_t f_page;
+ int checksum;
+
+ f_page = atop(offset);
+
+ mutex_lock(&pager->lock); /* XXX lock_read */
+ if (f_page >= pager->size)
+ panic("%spager_get_checksum",my_name);
+
+ if (INDIRECT_PAGEMAP(pager->size)) {
+ register vm_offset_t *mapptr;
+
+ mapptr = (vm_offset_t *)pager->checksum[f_page/PAGEMAP_ENTRIES];
+ if (mapptr == 0)
+ checksum = NO_CHECKSUM;
+ else
+ checksum = mapptr[f_page%PAGEMAP_ENTRIES];
+ }
+ else {
+ checksum = pager->checksum[f_page];
+ }
+
+ mutex_unlock(&pager->lock);
+ return (checksum);
+}
+
+/*
+ * Remember the checksum for a block.
+ */
+int
+pager_put_checksum(pager, offset, checksum)
+ register dpager_t pager;
+ vm_offset_t offset;
+ int checksum;
+{
+ register vm_offset_t f_page;
+ static char here[] = "%spager_put_checksum";
+
+ f_page = atop(offset);
+
+ mutex_lock(&pager->lock); /* XXX lock_read */
+ if (f_page >= pager->size)
+ panic(here,my_name);
+
+ if (INDIRECT_PAGEMAP(pager->size)) {
+ register vm_offset_t *mapptr;
+
+ mapptr = (vm_offset_t *)pager->checksum[f_page/PAGEMAP_ENTRIES];
+ if (mapptr == 0)
+ panic(here,my_name);
+
+ mapptr[f_page%PAGEMAP_ENTRIES] = checksum;
+ }
+ else {
+ pager->checksum[f_page] = checksum;
+ }
+ mutex_unlock(&pager->lock);
+}
+
+/*
+ * Compute a checksum - XOR each 32-bit word.
+ */
+int
+compute_checksum(addr, size)
+ vm_offset_t addr;
+ vm_size_t size;
+{
+ register int checksum = NO_CHECKSUM;
+ register int *ptr;
+ register int count;
+
+ ptr = (int *)addr;
+ count = size / sizeof(int);
+
+ while (--count >= 0)
+ checksum ^= *ptr++;
+
+ return (checksum);
+}
+#endif CHECKSUM
+
+/*
+ * Given an offset within a paging object, find the
+ * corresponding block within the paging partition.
+ * Allocate a new block if necessary.
+ *
+ * WARNING: paging objects apparently may be extended
+ * without notice!
+ */
+union dp_map
+pager_write_offset(pager, offset)
+ register dpager_t pager;
+ vm_offset_t offset;
+{
+ register vm_offset_t f_page;
+ register dp_map_t mapptr;
+ register union dp_map block;
+
+ invalidate_block(block);
+
+ f_page = atop(offset);
+
+#if DEBUG_READER_CONFLICTS
+ if (pager->readers > 0)
+ default_pager_read_conflicts++; /* would have proceeded with
+ read/write lock */
+#endif
+ mutex_lock(&pager->lock); /* XXX lock_read */
+#if DEBUG_READER_CONFLICTS
+ pager->readers++;
+#endif
+
+ /* Catch the case where we had no initial fit partition
+ for this object, but one was added later on */
+ if (no_partition(pager->cur_partition)) {
+ p_index_t new_part;
+ vm_size_t size;
+
+ size = (f_page > pager->size) ? f_page : pager->size;
+ new_part = choose_partition(ptoa(size), P_INDEX_INVALID);
+ if (no_partition(new_part))
+ new_part = choose_partition(ptoa(1), P_INDEX_INVALID);
+ if (no_partition(new_part))
+ /* give up right now to avoid confusion */
+ goto out;
+ else
+ pager->cur_partition = new_part;
+ }
+
+ while (f_page >= pager->size) {
+ ddprintf ("pager_write_offset: extending: %x %x\n", f_page, pager->size);
+
+ /*
+ * Paging object must be extended.
+ * Remember that offset is 0-based, but size is 1-based.
+ */
+#if DEBUG_READER_CONFLICTS
+ pager->readers--;
+#endif
+ mutex_unlock(&pager->lock);
+ pager_extend(pager, f_page + 1);
+#if DEBUG_READER_CONFLICTS
+ if (pager->readers > 0)
+ default_pager_read_conflicts++; /* would have proceeded with
+ read/write lock */
+#endif
+ mutex_lock(&pager->lock); /* XXX lock_read */
+#if DEBUG_READER_CONFLICTS
+ pager->readers++;
+#endif
+ ddprintf ("pager_write_offset: done extending: %x %x\n", f_page, pager->size);
+ }
+
+ if (INDIRECT_PAGEMAP(pager->size)) {
+ ddprintf ("pager_write_offset: indirect\n");
+ mapptr = pager->map[f_page/PAGEMAP_ENTRIES].indirect;
+ if (mapptr == 0) {
+ /*
+ * Allocate the indirect block
+ */
+ register int i;
+ ddprintf ("pager_write_offset: allocating indirect\n");
+
+ mapptr = (dp_map_t) kalloc(PAGEMAP_SIZE(PAGEMAP_ENTRIES));
+ if (mapptr == 0) {
+ /* out of space! */
+ no_paging_space(TRUE);
+ goto out;
+ }
+ pager->map[f_page/PAGEMAP_ENTRIES].indirect = mapptr;
+ for (i = 0; i < PAGEMAP_ENTRIES; i++)
+ invalidate_block(mapptr[i]);
+#ifdef CHECKSUM
+ {
+ register vm_offset_t *cksumptr;
+ register int j;
+
+ cksumptr = (vm_offset_t *)
+ kalloc(PAGEMAP_SIZE(PAGEMAP_ENTRIES));
+ if (cksumptr == 0) {
+ /* out of space! */
+ no_paging_space(TRUE);
+ goto out;
+ }
+ pager->checksum[f_page/PAGEMAP_ENTRIES]
+ = (vm_offset_t)cksumptr;
+ for (j = 0; j < PAGEMAP_ENTRIES; j++)
+ cksumptr[j] = NO_CHECKSUM;
+ }
+#endif CHECKSUM
+ }
+ f_page %= PAGEMAP_ENTRIES;
+ }
+ else {
+ mapptr = pager->map;
+ }
+
+ block = mapptr[f_page];
+ ddprintf ("pager_write_offset: block starts as %x[%x] %x\n", mapptr, f_page, block);
+ if (no_block(block)) {
+ vm_offset_t off;
+
+ /* get room now */
+ off = pager_alloc_page(pager->cur_partition, TRUE);
+ if (off == NO_BLOCK) {
+ /*
+ * Before giving up, try all other partitions.
+ */
+ p_index_t new_part;
+
+ ddprintf ("pager_write_offset: could not allocate block\n");
+ /* returns it locked (if any one is non-full) */
+ new_part = choose_partition( ptoa(1), pager->cur_partition);
+ if ( ! no_partition(new_part) ) {
+
+#if debug
+dprintf("%s partition %x filled,", my_name, pager->cur_partition);
+dprintf("extending object %x (size %x) to %x.\n",
+ pager, pager->size, new_part);
+#endif
+
+ /* this one tastes better */
+ pager->cur_partition = new_part;
+
+ /* this unlocks the partition too */
+ off = pager_alloc_page(pager->cur_partition, FALSE);
+
+ }
+
+ if (off == NO_BLOCK) {
+ /*
+ * Oh well.
+ */
+ overcommitted(FALSE, 1);
+ goto out;
+ }
+ ddprintf ("pager_write_offset: decided to allocate block\n");
+ }
+ block.block.p_offset = off;
+ block.block.p_index = pager->cur_partition;
+ mapptr[f_page] = block;
+ ddprintf ("pager_write_offset: mapptr %x [3b] = %x\n", mapptr,
+ mapptr[0x3b]);
+ ddprintf ("pager_write_offset: block is finally %x\n", block);
+ }
+
+out:
+
+#if DEBUG_READER_CONFLICTS
+ pager->readers--;
+#endif
+ mutex_unlock(&pager->lock);
+ return (block);
+}
+
+/*
+ * Deallocate all of the blocks belonging to a paging object.
+ * No locking needed because no other operations can be in progress.
+ */
+void
+pager_dealloc(pager)
+ register dpager_t pager;
+{
+ register int i, j;
+ register dp_map_t mapptr;
+ register union dp_map block;
+
+ if (INDIRECT_PAGEMAP(pager->size)) {
+ for (i = INDIRECT_PAGEMAP_ENTRIES(pager->size); --i >= 0; ) {
+ mapptr = pager->map[i].indirect;
+ if (mapptr != 0) {
+ for (j = 0; j < PAGEMAP_ENTRIES; j++) {
+ block = mapptr[j];
+ if ( ! no_block(block) )
+ pager_dealloc_page(block.block.p_index,
+ block.block.p_offset, TRUE);
+ }
+ kfree((char *)mapptr, PAGEMAP_SIZE(PAGEMAP_ENTRIES));
+ }
+ }
+ kfree((char *)pager->map, INDIRECT_PAGEMAP_SIZE(pager->size));
+#ifdef CHECKSUM
+ for (i = INDIRECT_PAGEMAP_ENTRIES(pager->size); --i >= 0; ) {
+ mapptr = (vm_offset_t *)pager->checksum[i];
+ if (mapptr) {
+ kfree((char *)mapptr, PAGEMAP_SIZE(PAGEMAP_ENTRIES));
+ }
+ }
+ kfree((char *)pager->checksum,
+ INDIRECT_PAGEMAP_SIZE(pager->size));
+#endif CHECKSUM
+ }
+ else {
+ mapptr = pager->map;
+ for (i = 0; i < pager->size; i++ ) {
+ block = mapptr[i];
+ if ( ! no_block(block) )
+ pager_dealloc_page(block.block.p_index,
+ block.block.p_offset, TRUE);
+ }
+ kfree((char *)pager->map, PAGEMAP_SIZE(pager->size));
+#ifdef CHECKSUM
+ kfree((char *)pager->checksum, PAGEMAP_SIZE(pager->size));
+#endif CHECKSUM
+ }
+}
+
+/*
+ * Move all the pages of a PAGER that live in a
+ * partition PINDEX somewhere else.
+ * Pager should be write-locked, partition too.
+ * Returns FALSE if it could not do it, but
+ * some pages might have been moved nonetheless.
+ */
+boolean_t
+pager_realloc(pager, pindex)
+ register dpager_t pager;
+ p_index_t pindex;
+{
+ register dp_map_t map, emap;
+ vm_size_t size;
+ union dp_map block;
+
+ size = pager->size; /* in pages */
+ map = pager->map;
+
+ if (INDIRECT_PAGEMAP(size)) {
+ for (emap = &map[INDIRECT_PAGEMAP_ENTRIES(size)];
+ map < emap; map++) {
+
+ register dp_map_t map2, emap2;
+
+ if ((map2 = map->indirect) == 0)
+ continue;
+
+ for (emap2 = &map2[PAGEMAP_ENTRIES];
+ map2 < emap2; map2++)
+ if ( map2->block.p_index == pindex) {
+
+ block = pager_move_page(*map2);
+ if (!no_block(block))
+ *map2 = block;
+ else
+ return FALSE;
+ }
+
+ }
+ goto ok;
+ }
+
+ /* A small one */
+ for (emap = &map[size]; map < emap; map++)
+ if (map->block.p_index == pindex) {
+ block = pager_move_page(*map);
+ if (!no_block(block))
+ *map = block;
+ else
+ return FALSE;
+ }
+ok:
+ pager->cur_partition = choose_partition(0, P_INDEX_INVALID);
+ return TRUE;
+}
+
+/*
+
+ */
+
+/*
+ * Read/write routines.
+ */
+#define PAGER_SUCCESS 0
+#define PAGER_ABSENT 1
+#define PAGER_ERROR 2
+
+/*
+ * Read data from a default pager. Addr is the address of a buffer
+ * to fill. Out_addr returns the buffer that contains the data;
+ * if it is different from <addr>, it must be deallocated after use.
+ */
+int
+default_read(ds, addr, size, offset, out_addr, deallocate)
+ register dpager_t ds;
+ vm_offset_t addr; /* pointer to block to fill */
+ register vm_size_t size;
+ register vm_offset_t offset;
+ vm_offset_t *out_addr;
+ /* returns pointer to data */
+ boolean_t deallocate;
+{
+ register union dp_map block;
+ vm_offset_t raddr;
+ vm_size_t rsize;
+ register int rc;
+ boolean_t first_time;
+ register partition_t part;
+#ifdef CHECKSUM
+ vm_size_t original_size = size;
+#endif CHECKSUM
+ vm_offset_t original_offset = offset;
+
+ /*
+ * Find the block in the paging partition
+ */
+ block = pager_read_offset(ds, offset);
+ if ( no_block(block) )
+ return (PAGER_ABSENT);
+
+ /*
+ * Read it, trying for the entire page.
+ */
+ offset = ptoa(block.block.p_offset);
+ddprintf ("default_read(%x,%x,%x,%d)\n",addr,size,offset,block.block.p_index);
+ part = partition_of(block.block.p_index);
+ first_time = TRUE;
+ *out_addr = addr;
+
+ do {
+ rc = page_read_file_direct(part->file,
+ offset,
+ size,
+ &raddr,
+ &rsize);
+ if (rc != 0)
+ return (PAGER_ERROR);
+
+ /*
+ * If we got the entire page on the first read, return it.
+ */
+ if (first_time && rsize == size) {
+ *out_addr = raddr;
+ break;
+ }
+ /*
+ * Otherwise, copy the data into the
+ * buffer we were passed, and try for
+ * the next piece.
+ */
+ first_time = FALSE;
+ bcopy((char *)raddr, (char *)addr, rsize);
+ addr += rsize;
+ offset += rsize;
+ size -= rsize;
+ } while (size != 0);
+
+#if USE_PRECIOUS
+ if (deallocate)
+ pager_release_offset(ds, original_offset);
+#endif /*USE_PRECIOUS*/
+
+#ifdef CHECKSUM
+ {
+ int write_checksum,
+ read_checksum;
+
+ write_checksum = pager_get_checksum(ds, original_offset);
+ read_checksum = compute_checksum(*out_addr, original_size);
+ if (write_checksum != read_checksum) {
+ panic(
+ "PAGER CHECKSUM ERROR: offset 0x%x, written 0x%x, read 0x%x",
+ original_offset, write_checksum, read_checksum);
+ }
+ }
+#endif CHECKSUM
+ return (PAGER_SUCCESS);
+}
+
+int
+default_write(ds, addr, size, offset)
+ register dpager_t ds;
+ register vm_offset_t addr;
+ register vm_size_t size;
+ register vm_offset_t offset;
+{
+ register union dp_map block;
+ partition_t part;
+ vm_size_t wsize;
+ register int rc;
+
+ ddprintf ("default_write: pager offset %x\n", offset);
+
+ /*
+ * Find block in paging partition
+ */
+ block = pager_write_offset(ds, offset);
+ if ( no_block(block) )
+ return (PAGER_ERROR);
+
+#ifdef CHECKSUM
+ /*
+ * Save checksum
+ */
+ {
+ int checksum;
+
+ checksum = compute_checksum(addr, size);
+ pager_put_checksum(ds, offset, checksum);
+ }
+#endif CHECKSUM
+ offset = ptoa(block.block.p_offset);
+ddprintf ("default_write(%x,%x,%x,%d)\n",addr,size,offset,block.block.p_index);
+ part = partition_of(block.block.p_index);
+
+ /*
+ * There are various assumptions made here,we
+ * will not get into the next disk 'block' by
+ * accident. It might well be non-contiguous.
+ */
+ do {
+ rc = page_write_file_direct(part->file,
+ offset,
+ addr,
+ size,
+ &wsize);
+ if (rc != 0) {
+ dprintf("*** PAGER ERROR: default_write: ");
+ dprintf("ds=0x%x addr=0x%x size=0x%x offset=0x%x resid=0x%x\n",
+ ds, addr, size, offset, wsize);
+ return (PAGER_ERROR);
+ }
+ addr += wsize;
+ offset += wsize;
+ size -= wsize;
+ } while (size != 0);
+ return (PAGER_SUCCESS);
+}
+
+boolean_t
+default_has_page(ds, offset)
+ dpager_t ds;
+ vm_offset_t offset;
+{
+ return ( ! no_block(pager_read_offset(ds, offset)) );
+}
+
+/*
+
+ */
+
+/*
+ * Mapping between pager port and paging object.
+ */
+struct dstruct {
+ queue_chain_t links; /* Link in pager-port list */
+
+ struct mutex lock; /* Lock for the structure */
+ struct condition
+ waiting_seqno, /* someone waiting on seqno */
+ waiting_read, /* someone waiting on readers */
+ waiting_write, /* someone waiting on writers */
+ waiting_refs; /* someone waiting on refs */
+
+ memory_object_t pager; /* Pager port */
+ mach_port_seqno_t seqno; /* Pager port sequence number */
+ mach_port_t pager_request; /* Request port */
+ mach_port_urefs_t request_refs; /* Request port user-refs */
+ mach_port_t pager_name; /* Name port */
+ mach_port_urefs_t name_refs; /* Name port user-refs */
+
+ unsigned int readers; /* Reads in progress */
+ unsigned int writers; /* Writes in progress */
+
+ unsigned int errors; /* Pageout error count */
+ struct dpager dpager; /* Actual pager */
+};
+typedef struct dstruct * default_pager_t;
+#define DEFAULT_PAGER_NULL ((default_pager_t)0)
+
+#if PARALLEL
+#define dstruct_lock_init(ds) mutex_init(&ds->lock)
+#define dstruct_lock(ds) mutex_lock(&ds->lock)
+#define dstruct_unlock(ds) mutex_unlock(&ds->lock)
+#else /* PARALLEL */
+#define dstruct_lock_init(ds)
+#define dstruct_lock(ds)
+#define dstruct_unlock(ds)
+#endif /* PARALLEL */
+
+/*
+ * List of all pagers. A specific pager is
+ * found directly via its port, this list is
+ * only used for monitoring purposes by the
+ * default_pager_object* calls
+ */
+struct pager_port {
+ queue_head_t queue;
+ struct mutex lock;
+ int count; /* saves code */
+ queue_head_t leak_queue;
+} all_pagers;
+
+#define pager_port_list_init() \
+{ \
+ mutex_init(&all_pagers.lock); \
+ queue_init(&all_pagers.queue); \
+ queue_init(&all_pagers.leak_queue); \
+ all_pagers.count = 0; \
+}
+
+void pager_port_list_insert(port, ds)
+ mach_port_t port;
+ default_pager_t ds;
+{
+ mutex_lock(&all_pagers.lock);
+ queue_enter(&all_pagers.queue, ds, default_pager_t, links);
+ all_pagers.count++;
+ mutex_unlock(&all_pagers.lock);
+}
+
+/* given a data structure return a good port-name to associate it to */
+#define pnameof(_x_) (((vm_offset_t)(_x_))+1)
+/* reverse, assumes no-odd-pointers */
+#define dnameof(_x_) (((vm_offset_t)(_x_))&~1)
+
+/* The magic typecast */
+#define pager_port_lookup(_port_) \
+ ((! MACH_PORT_VALID(_port_) || \
+ ((default_pager_t)dnameof(_port_))->pager != (_port_)) ? \
+ DEFAULT_PAGER_NULL : (default_pager_t)dnameof(_port_))
+
+void pager_port_list_delete(ds)
+ default_pager_t ds;
+{
+ mutex_lock(&all_pagers.lock);
+ queue_remove(&all_pagers.queue, ds, default_pager_t, links);
+ all_pagers.count--;
+ mutex_unlock(&all_pagers.lock);
+}
+
+/*
+ * Destroy a paging partition.
+ * XXX this is not re-entrant XXX
+ */
+kern_return_t
+destroy_paging_partition(name, pp_private)
+ char *name;
+ void **pp_private;
+{
+ register unsigned int id = part_id(name);
+ register partition_t part;
+ boolean_t all_ok = TRUE;
+ default_pager_t entry;
+ int pindex;
+
+ /*
+ * Find and take partition out of list
+ * This prevents choose_partition from
+ * getting in the way.
+ */
+ mutex_lock(&all_partitions.lock);
+ for (pindex = 0; pindex < all_partitions.n_partitions; pindex++) {
+ part = partition_of(pindex);
+ if (part && (part->id == id)) break;
+ }
+ if (pindex == all_partitions.n_partitions) {
+ mutex_unlock(&all_partitions.lock);
+ return KERN_INVALID_ARGUMENT;
+ }
+ part->going_away = TRUE;
+ mutex_unlock(&all_partitions.lock);
+
+ /*
+ * This might take a while..
+ */
+all_over_again:
+#if debug
+dprintf("Partition x%x (id x%x) for %s, all_ok %d\n", part, id, name, all_ok);
+#endif
+ all_ok = TRUE;
+ mutex_lock(&part->p_lock);
+
+ mutex_lock(&all_pagers.lock);
+ queue_iterate(&all_pagers.queue, entry, default_pager_t, links) {
+
+ dstruct_lock(entry);
+
+ if (!mutex_try_lock(&entry->dpager.lock)) {
+
+ dstruct_unlock(entry);
+ mutex_unlock(&all_pagers.lock);
+ mutex_unlock(&part->p_lock);
+
+ /* yield the processor */
+ (void) thread_switch(MACH_PORT_NULL,
+ SWITCH_OPTION_NONE, 0);
+
+ goto all_over_again;
+
+ }
+
+ /*
+ * See if we can relocate all the pages of this object
+ * currently on this partition on some other partition
+ */
+ all_ok = pager_realloc(&entry->dpager, pindex);
+
+ mutex_unlock(&entry->dpager.lock);
+ dstruct_unlock(entry);
+
+ if (!all_ok) break;
+
+ }
+ mutex_unlock(&all_pagers.lock);
+
+ if (all_ok) {
+ /* No need to unlock partition, there are no refs left */
+
+ set_partition_of(pindex, 0);
+ *pp_private = part->file;
+ kfree(part->bitmap, howmany(part->total_size, NB_BM) * sizeof(bm_entry_t));
+ kfree(part, sizeof(struct part));
+ dprintf("%s Removed paging partition %s\n", my_name, name);
+ return KERN_SUCCESS;
+ }
+
+ /*
+ * Put partition back in.
+ */
+ part->going_away = FALSE;
+
+ return KERN_FAILURE;
+}
+
+
+/*
+ * We use the sequence numbers on requests to regulate
+ * our parallelism. In general, we allow multiple reads and writes
+ * to proceed in parallel, with the exception that reads must
+ * wait for previous writes to finish. (Because the kernel might
+ * generate a data-request for a page on the heels of a data-write
+ * for the same page, and we must avoid returning stale data.)
+ * terminate requests wait for proceeding reads and writes to finish.
+ */
+
+unsigned int default_pager_total = 0; /* debugging */
+unsigned int default_pager_wait_seqno = 0; /* debugging */
+unsigned int default_pager_wait_read = 0; /* debugging */
+unsigned int default_pager_wait_write = 0; /* debugging */
+unsigned int default_pager_wait_refs = 0; /* debugging */
+
+#if PARALLEL
+/*
+ * Waits for correct sequence number. Leaves pager locked.
+ */
+void pager_port_lock(ds, seqno)
+ default_pager_t ds;
+ mach_port_seqno_t seqno;
+{
+ default_pager_total++;
+ddprintf ("pager_port_lock <%p>: <%p>: %d: 1\n", &ds, ds, seqno);
+ dstruct_lock(ds);
+ddprintf ("pager_port_lock <%p>: <%p>: %d: 2\n", &ds, ds, seqno);
+ while (ds->seqno != seqno) {
+ddprintf ("pager_port_lock <%p>: <%p>: %d: 3\n", &ds, ds, seqno);
+ default_pager_wait_seqno++;
+ condition_wait(&ds->waiting_seqno, &ds->lock);
+ddprintf ("pager_port_lock <%p>: <%p>: %d: 4\n", &ds, ds, seqno);
+ }
+}
+
+/*
+ * Increments sequence number and unlocks pager.
+ */
+void pager_port_unlock(ds)
+ default_pager_t ds;
+{
+ ds->seqno++;
+ddprintf ("pager_port_unlock <%p>: <%p>: seqno => %d\n", &ds, ds, ds->seqno);
+ dstruct_unlock(ds);
+ddprintf ("pager_port_unlock <%p>: <%p>: 2\n", &ds, ds);
+ condition_broadcast(&ds->waiting_seqno);
+ddprintf ("pager_port_unlock <%p>: <%p>: 3\n", &ds, ds);
+}
+
+/*
+ * Start a read - one more reader. Pager must be locked.
+ */
+void pager_port_start_read(ds)
+ default_pager_t ds;
+{
+ ds->readers++;
+}
+
+/*
+ * Wait for readers. Unlocks and relocks pager if wait needed.
+ */
+void pager_port_wait_for_readers(ds)
+ default_pager_t ds;
+{
+ while (ds->readers != 0) {
+ default_pager_wait_read++;
+ condition_wait(&ds->waiting_read, &ds->lock);
+ }
+}
+
+/*
+ * Finish a read. Pager is unlocked and returns unlocked.
+ */
+void pager_port_finish_read(ds)
+ default_pager_t ds;
+{
+ dstruct_lock(ds);
+ if (--ds->readers == 0) {
+ dstruct_unlock(ds);
+ condition_broadcast(&ds->waiting_read);
+ }
+ else {
+ dstruct_unlock(ds);
+ }
+}
+
+/*
+ * Start a write - one more writer. Pager must be locked.
+ */
+void pager_port_start_write(ds)
+ default_pager_t ds;
+{
+ ds->writers++;
+}
+
+/*
+ * Wait for writers. Unlocks and relocks pager if wait needed.
+ */
+void pager_port_wait_for_writers(ds)
+ default_pager_t ds;
+{
+ while (ds->writers != 0) {
+ default_pager_wait_write++;
+ condition_wait(&ds->waiting_write, &ds->lock);
+ }
+}
+
+/*
+ * Finish a write. Pager is unlocked and returns unlocked.
+ */
+void pager_port_finish_write(ds)
+ default_pager_t ds;
+{
+ dstruct_lock(ds);
+ if (--ds->writers == 0) {
+ dstruct_unlock(ds);
+ condition_broadcast(&ds->waiting_write);
+ }
+ else {
+ dstruct_unlock(ds);
+ }
+}
+
+/*
+ * Wait for concurrent default_pager_objects.
+ * Unlocks and relocks pager if wait needed.
+ */
+void pager_port_wait_for_refs(ds)
+ default_pager_t ds;
+{
+ while (ds->name_refs == 0) {
+ default_pager_wait_refs++;
+ condition_wait(&ds->waiting_refs, &ds->lock);
+ }
+}
+
+/*
+ * Finished creating name refs - wake up waiters.
+ */
+void pager_port_finish_refs(ds)
+ default_pager_t ds;
+{
+ condition_broadcast(&ds->waiting_refs);
+}
+
+#else /* PARALLEL */
+
+#define pager_port_lock(ds,seqno)
+#define pager_port_unlock(ds)
+#define pager_port_start_read(ds)
+#define pager_port_wait_for_readers(ds)
+#define pager_port_finish_read(ds)
+#define pager_port_start_write(ds)
+#define pager_port_wait_for_writers(ds)
+#define pager_port_finish_write(ds)
+#define pager_port_wait_for_refs(ds)
+#define pager_port_finish_refs(ds)
+
+#endif /* PARALLEL */
+
+/*
+ * Default pager.
+ */
+task_t default_pager_self; /* Our task port. */
+
+mach_port_t default_pager_default_port; /* Port for memory_object_create. */
+
+/* We catch exceptions on ourself & startup using this port. */
+mach_port_t default_pager_exception_port;
+/* We receive bootstrap requests on this port. */
+mach_port_t default_pager_bootstrap_port;
+
+mach_port_t default_pager_internal_set; /* Port set for internal objects. */
+mach_port_t default_pager_external_set; /* Port set for external objects. */
+mach_port_t default_pager_default_set; /* Port set for "default" thread. */
+
+typedef struct default_pager_thread {
+ cthread_t dpt_thread; /* Server thread. */
+ vm_offset_t dpt_buffer; /* Read buffer. */
+ boolean_t dpt_internal; /* Do we handle internal objects? */
+} default_pager_thread_t;
+
+#if PARALLEL
+ /* determine number of threads at run time */
+#define DEFAULT_PAGER_INTERNAL_COUNT (0)
+
+#else /* PARALLEL */
+#define DEFAULT_PAGER_INTERNAL_COUNT (1)
+#endif /* PARALLEL */
+
+/* Memory created by default_pager_object_create should mostly be resident. */
+#define DEFAULT_PAGER_EXTERNAL_COUNT (1)
+
+unsigned int default_pager_internal_count = DEFAULT_PAGER_INTERNAL_COUNT;
+ /* Number of "internal" threads. */
+unsigned int default_pager_external_count = DEFAULT_PAGER_EXTERNAL_COUNT;
+ /* Number of "external" threads. */
+
+default_pager_t pager_port_alloc(size)
+ vm_size_t size;
+{
+ default_pager_t ds;
+ p_index_t part;
+
+ ds = (default_pager_t) kalloc(sizeof *ds);
+ if (ds == DEFAULT_PAGER_NULL)
+ panic("%spager_port_alloc",my_name);
+ bzero((char *) ds, sizeof *ds);
+
+ dstruct_lock_init(ds);
+
+ /*
+ * Get a suitable partition. If none big enough
+ * just pick one and overcommit. If no partitions
+ * at all.. well just fake one so that we will
+ * kill specific objects on pageouts rather than
+ * panicing the system now.
+ */
+ part = choose_partition(size, P_INDEX_INVALID);
+ if (no_partition(part)) {
+ overcommitted(FALSE, atop(size));
+ part = choose_partition(0,P_INDEX_INVALID);
+#if debug
+ if (no_partition(part))
+ dprintf("%s No paging space at all !!\n", my_name);
+#endif
+ }
+ pager_alloc(&ds->dpager, part, size);
+
+ return ds;
+}
+
+mach_port_urefs_t default_pager_max_urefs = 10000;
+
+/*
+ * Check user reference count on pager_request port.
+ * Pager must be locked.
+ * Unlocks and re-locks pager if needs to call kernel.
+ */
+void pager_port_check_request(ds, pager_request)
+ default_pager_t ds;
+ mach_port_t pager_request;
+{
+ mach_port_delta_t delta;
+ kern_return_t kr;
+
+ assert(ds->pager_request == pager_request);
+
+ if (++ds->request_refs > default_pager_max_urefs) {
+ delta = 1 - ds->request_refs;
+ ds->request_refs = 1;
+
+ dstruct_unlock(ds);
+
+ /*
+ * Deallocate excess user references.
+ */
+
+ kr = mach_port_mod_refs(default_pager_self, pager_request,
+ MACH_PORT_RIGHT_SEND, delta);
+ if (kr != KERN_SUCCESS)
+ panic("%spager_port_check_request",my_name);
+
+ dstruct_lock(ds);
+ }
+}
+
+void default_pager_add(ds, internal)
+ default_pager_t ds;
+ boolean_t internal;
+{
+ mach_port_t pager = ds->pager;
+ mach_port_t pset;
+ mach_port_mscount_t sync;
+ mach_port_t previous;
+ kern_return_t kr;
+ static char here[] = "%sdefault_pager_add";
+
+ /*
+ * The port currently has a make-send count of zero,
+ * because either we just created the port or we just
+ * received the port in a memory_object_create request.
+ */
+
+ if (internal) {
+ /* possibly generate an immediate no-senders notification */
+ sync = 0;
+ pset = default_pager_internal_set;
+ } else {
+ /* delay notification till send right is created */
+ sync = 1;
+ pset = default_pager_external_set;
+ }
+
+ kr = mach_port_request_notification(default_pager_self, pager,
+ MACH_NOTIFY_NO_SENDERS, sync,
+ pager, MACH_MSG_TYPE_MAKE_SEND_ONCE,
+ &previous);
+ if ((kr != KERN_SUCCESS) || (previous != MACH_PORT_NULL))
+ panic(here,my_name);
+
+ kr = mach_port_move_member(default_pager_self, pager, pset);
+ if (kr != KERN_SUCCESS)
+ panic(here,my_name);
+}
+
+/*
+ * Routine: memory_object_create
+ * Purpose:
+ * Handle requests for memory objects from the
+ * kernel.
+ * Notes:
+ * Because we only give out the default memory
+ * manager port to the kernel, we don't have to
+ * be so paranoid about the contents.
+ */
+kern_return_t
+seqnos_memory_object_create(old_pager, seqno, new_pager, new_size,
+ new_pager_request, new_pager_name, new_page_size)
+ mach_port_t old_pager;
+ mach_port_seqno_t seqno;
+ mach_port_t new_pager;
+ vm_size_t new_size;
+ mach_port_t new_pager_request;
+ mach_port_t new_pager_name;
+ vm_size_t new_page_size;
+{
+ register default_pager_t ds;
+ kern_return_t kr;
+
+ assert(old_pager == default_pager_default_port);
+ assert(MACH_PORT_VALID(new_pager_request));
+ assert(MACH_PORT_VALID(new_pager_name));
+ assert(new_page_size == vm_page_size);
+
+ ds = pager_port_alloc(new_size);
+rename_it:
+ kr = mach_port_rename( default_pager_self,
+ new_pager, (mach_port_t)pnameof(ds));
+ if (kr != KERN_SUCCESS) {
+ default_pager_t ds1;
+
+ if (kr != KERN_NAME_EXISTS)
+ panic("%s m_o_create", my_name);
+ ds1 = (default_pager_t) kalloc(sizeof *ds1);
+ *ds1 = *ds;
+ mutex_lock(&all_pagers.lock);
+ queue_enter(&all_pagers.leak_queue, ds, default_pager_t, links);
+ mutex_unlock(&all_pagers.lock);
+ ds = ds1;
+ goto rename_it;
+ }
+
+ new_pager = (mach_port_t) pnameof(ds);
+
+ /*
+ * Set up associations between these ports
+ * and this default_pager structure
+ */
+
+ ds->pager = new_pager;
+ ds->pager_request = new_pager_request;
+ ds->request_refs = 1;
+ ds->pager_name = new_pager_name;
+ ds->name_refs = 1;
+
+ /*
+ * After this, other threads might receive requests
+ * for this memory object or find it in the port list.
+ */
+
+ pager_port_list_insert(new_pager, ds);
+ default_pager_add(ds, TRUE);
+
+ return(KERN_SUCCESS);
+}
+
+memory_object_copy_strategy_t default_pager_copy_strategy =
+ MEMORY_OBJECT_COPY_DELAY;
+
+kern_return_t
+seqnos_memory_object_init(pager, seqno, pager_request, pager_name,
+ pager_page_size)
+ mach_port_t pager;
+ mach_port_seqno_t seqno;
+ mach_port_t pager_request;
+ mach_port_t pager_name;
+ vm_size_t pager_page_size;
+{
+ register default_pager_t ds;
+ kern_return_t kr;
+ static char here[] = "%sinit";
+
+ assert(MACH_PORT_VALID(pager_request));
+ assert(MACH_PORT_VALID(pager_name));
+ assert(pager_page_size == vm_page_size);
+
+ ds = pager_port_lookup(pager);
+ if (ds == DEFAULT_PAGER_NULL)
+ panic(here, my_name);
+ pager_port_lock(ds, seqno);
+
+ if (ds->pager_request != MACH_PORT_NULL)
+ panic(here, my_name);
+
+ ds->pager_request = pager_request;
+ ds->request_refs = 1;
+ ds->pager_name = pager_name;
+ ds->name_refs = 1;
+
+ /*
+ * Even if the kernel immediately terminates the object,
+ * the pager_request port won't be destroyed until
+ * we process the terminate request, which won't happen
+ * until we unlock the object.
+ */
+
+ kr = memory_object_set_attributes(pager_request,
+ TRUE,
+ FALSE, /* do not cache */
+ default_pager_copy_strategy);
+ if (kr != KERN_SUCCESS)
+ panic(here, my_name);
+
+ pager_port_unlock(ds);
+
+ return(KERN_SUCCESS);
+}
+
+kern_return_t
+seqnos_memory_object_terminate(pager, seqno, pager_request, pager_name)
+ mach_port_t pager;
+ mach_port_seqno_t seqno;
+ mach_port_t pager_request;
+ mach_port_t pager_name;
+{
+ register default_pager_t ds;
+ mach_port_urefs_t request_refs, name_refs;
+ kern_return_t kr;
+ static char here[] = "%sterminate";
+
+ /*
+ * pager_request and pager_name are receive rights,
+ * not send rights.
+ */
+
+ ds = pager_port_lookup(pager);
+ if (ds == DEFAULT_PAGER_NULL)
+ panic(here, my_name);
+ddprintf ("seqnos_memory_object_terminate <%p>: pager_port_lock: <%p>[s:%d,r:%d,w:%d,l:%d], %d\n",
+ &kr, ds, ds->seqno, ds->readers, ds->writers, ds->lock.held, seqno);
+ pager_port_lock(ds, seqno);
+
+ /*
+ * Wait for read and write requests to terminate.
+ */
+
+ pager_port_wait_for_readers(ds);
+ pager_port_wait_for_writers(ds);
+
+ /*
+ * After memory_object_terminate both memory_object_init
+ * and a no-senders notification are possible, so we need
+ * to clean up the request and name ports but leave
+ * the pager port.
+ *
+ * A concurrent default_pager_objects might be allocating
+ * more references for the name port. In this case,
+ * we must first wait for it to finish.
+ */
+
+ pager_port_wait_for_refs(ds);
+
+ ds->pager_request = MACH_PORT_NULL;
+ request_refs = ds->request_refs;
+ ds->request_refs = 0;
+ assert(ds->pager_name == pager_name);
+ ds->pager_name = MACH_PORT_NULL;
+ name_refs = ds->name_refs;
+ ds->name_refs = 0;
+ddprintf ("seqnos_memory_object_terminate <%p>: pager_port_unlock: <%p>[s:%d,r:%d,w:%d,l:%d]\n",
+ &kr, ds, ds->seqno, ds->readers, ds->writers, ds->lock.held);
+ pager_port_unlock(ds);
+
+ /*
+ * Now we deallocate our various port rights.
+ */
+
+ kr = mach_port_mod_refs(default_pager_self, pager_request,
+ MACH_PORT_RIGHT_SEND, -request_refs);
+ if (kr != KERN_SUCCESS)
+ panic(here,my_name);
+
+ kr = mach_port_mod_refs(default_pager_self, pager_request,
+ MACH_PORT_RIGHT_RECEIVE, -1);
+ if (kr != KERN_SUCCESS)
+ panic(here,my_name);
+
+ kr = mach_port_mod_refs(default_pager_self, pager_name,
+ MACH_PORT_RIGHT_SEND, -name_refs);
+ if (kr != KERN_SUCCESS)
+ panic(here,my_name);
+
+ kr = mach_port_mod_refs(default_pager_self, pager_name,
+ MACH_PORT_RIGHT_RECEIVE, -1);
+ if (kr != KERN_SUCCESS)
+ panic(here,my_name);
+
+ return (KERN_SUCCESS);
+}
+
+void default_pager_no_senders(pager, seqno, mscount)
+ memory_object_t pager;
+ mach_port_seqno_t seqno;
+ mach_port_mscount_t mscount;
+{
+ register default_pager_t ds;
+ kern_return_t kr;
+ static char here[] = "%sno_senders";
+
+ /*
+ * Because we don't give out multiple send rights
+ * for a memory object, there can't be a race
+ * between getting a no-senders notification
+ * and creating a new send right for the object.
+ * Hence we don't keep track of mscount.
+ */
+
+
+ ds = pager_port_lookup(pager);
+ if (ds == DEFAULT_PAGER_NULL)
+ panic(here,my_name);
+ pager_port_lock(ds, seqno);
+
+ /*
+ * We shouldn't get a no-senders notification
+ * when the kernel has the object cached.
+ */
+
+ if (ds->pager_request != MACH_PORT_NULL)
+ panic(here,my_name);
+
+ /*
+ * Unlock the pager (though there should be no one
+ * waiting for it).
+ */
+ dstruct_unlock(ds);
+
+ /*
+ * Remove the memory object port association, and then
+ * the destroy the port itself. We must remove the object
+ * from the port list before deallocating the pager,
+ * because of default_pager_objects.
+ */
+
+ pager_port_list_delete(ds);
+ pager_dealloc(&ds->dpager);
+
+ kr = mach_port_mod_refs(default_pager_self, pager,
+ MACH_PORT_RIGHT_RECEIVE, -1);
+ if (kr != KERN_SUCCESS)
+ panic(here,my_name);
+
+ /*
+ * Do this *after* deallocating the port name
+ */
+ kfree((char *) ds, sizeof(*ds));
+
+ /*
+ * Recover memory that we might have wasted because
+ * of name conflicts
+ */
+ mutex_lock(&all_pagers.lock);
+
+ while (!queue_empty(&all_pagers.leak_queue)) {
+
+ ds = (default_pager_t) queue_first(&all_pagers.leak_queue);
+ queue_remove_first(&all_pagers.leak_queue, ds, default_pager_t, links);
+ kfree((char *) ds, sizeof(*ds));
+ }
+
+ mutex_unlock(&all_pagers.lock);
+}
+
+int default_pager_pagein_count = 0;
+int default_pager_pageout_count = 0;
+
+kern_return_t
+seqnos_memory_object_data_request(pager, seqno, reply_to, offset,
+ length, protection_required)
+ memory_object_t pager;
+ mach_port_seqno_t seqno;
+ mach_port_t reply_to;
+ vm_offset_t offset;
+ vm_size_t length;
+ vm_prot_t protection_required;
+{
+ default_pager_thread_t *dpt;
+ default_pager_t ds;
+ vm_offset_t addr;
+ unsigned int errors;
+ kern_return_t rc;
+ static char here[] = "%sdata_request";
+
+ dpt = (default_pager_thread_t *) cthread_data(cthread_self());
+
+ if (length != vm_page_size)
+ panic(here,my_name);
+
+ ds = pager_port_lookup(pager);
+ if (ds == DEFAULT_PAGER_NULL)
+ panic(here,my_name);
+ddprintf ("seqnos_memory_object_data_request <%p>: pager_port_lock: <%p>[s:%d,r:%d,w:%d,l:%d], %d\n",
+ &ds, ds, ds->seqno, ds->readers, ds->writers, ds->lock.held, seqno);
+ pager_port_lock(ds, seqno);
+ pager_port_check_request(ds, reply_to);
+ pager_port_wait_for_writers(ds);
+ pager_port_start_read(ds);
+
+ /*
+ * Get error count while pager locked.
+ */
+ errors = ds->errors;
+
+ddprintf ("seqnos_memory_object_data_request <%p>: pager_port_unlock: <%p>[s:%d,r:%d,w:%d,l:%d]\n",
+ &ds, ds, ds->seqno, ds->readers, ds->writers, ds->lock.held);
+ pager_port_unlock(ds);
+
+ if (errors) {
+ dprintf("%s %s\n", my_name,
+ "dropping data_request because of previous paging errors");
+ (void) memory_object_data_error(reply_to,
+ offset, vm_page_size,
+ KERN_FAILURE);
+ goto done;
+ }
+
+ rc = default_read(&ds->dpager, dpt->dpt_buffer,
+ vm_page_size, offset,
+ &addr, protection_required & VM_PROT_WRITE);
+
+ switch (rc) {
+ case PAGER_SUCCESS:
+ if (addr != dpt->dpt_buffer) {
+ /*
+ * Deallocates data buffer
+ */
+ (void) memory_object_data_supply(
+ reply_to, offset,
+ addr, vm_page_size, TRUE,
+ VM_PROT_NONE,
+ FALSE, MACH_PORT_NULL);
+ } else {
+ (void) memory_object_data_provided(
+ reply_to, offset,
+ addr, vm_page_size,
+ VM_PROT_NONE);
+ }
+ break;
+
+ case PAGER_ABSENT:
+ (void) memory_object_data_unavailable(
+ reply_to,
+ offset,
+ vm_page_size);
+ break;
+
+ case PAGER_ERROR:
+ (void) memory_object_data_error(
+ reply_to,
+ offset,
+ vm_page_size,
+ KERN_FAILURE);
+ break;
+ }
+
+ default_pager_pagein_count++;
+
+ done:
+ pager_port_finish_read(ds);
+ return(KERN_SUCCESS);
+}
+
+/*
+ * memory_object_data_initialize: check whether we already have each page, and
+ * write it if we do not. The implementation is far from optimized, and
+ * also assumes that the default_pager is single-threaded.
+ */
+kern_return_t
+seqnos_memory_object_data_initialize(pager, seqno, pager_request,
+ offset, addr, data_cnt)
+ memory_object_t pager;
+ mach_port_seqno_t seqno;
+ mach_port_t pager_request;
+ register
+ vm_offset_t offset;
+ register
+ pointer_t addr;
+ vm_size_t data_cnt;
+{
+ vm_offset_t amount_sent;
+ default_pager_t ds;
+ static char here[] = "%sdata_initialize";
+
+#ifdef lint
+ pager_request++;
+#endif lint
+
+ ds = pager_port_lookup(pager);
+ if (ds == DEFAULT_PAGER_NULL)
+ panic(here,my_name);
+ddprintf ("seqnos_memory_object_data_initialize <%p>: pager_port_lock: <%p>[s:%d,r:%d,w:%d,l:%d], %d\n",
+ &ds, ds, ds->seqno, ds->readers, ds->writers, ds->lock.held, seqno);
+ pager_port_lock(ds, seqno);
+ pager_port_check_request(ds, pager_request);
+ pager_port_start_write(ds);
+ddprintf ("seqnos_memory_object_data_initialize <%p>: pager_port_unlock: <%p>[s:%d,r:%d,w:%d,l:%d]\n",
+ &ds, ds, ds->seqno, ds->readers, ds->writers, ds->lock.held);
+ pager_port_unlock(ds);
+
+ for (amount_sent = 0;
+ amount_sent < data_cnt;
+ amount_sent += vm_page_size) {
+
+ if (!default_has_page(&ds->dpager, offset + amount_sent)) {
+ if (default_write(&ds->dpager,
+ addr + amount_sent,
+ vm_page_size,
+ offset + amount_sent)
+ != PAGER_SUCCESS) {
+ dprintf("%s%s write error\n", my_name, here);
+ dstruct_lock(ds);
+ ds->errors++;
+ dstruct_unlock(ds);
+ }
+ }
+ }
+
+ pager_port_finish_write(ds);
+ if (vm_deallocate(default_pager_self, addr, data_cnt) != KERN_SUCCESS)
+ panic(here,my_name);
+
+ return(KERN_SUCCESS);
+}
+
+/*
+ * memory_object_data_write: split up the stuff coming in from
+ * a memory_object_data_write call
+ * into individual pages and pass them off to default_write.
+ */
+kern_return_t
+seqnos_memory_object_data_write(pager, seqno, pager_request,
+ offset, addr, data_cnt)
+ memory_object_t pager;
+ mach_port_seqno_t seqno;
+ mach_port_t pager_request;
+ register
+ vm_offset_t offset;
+ register
+ pointer_t addr;
+ vm_size_t data_cnt;
+{
+ register
+ vm_size_t amount_sent;
+ default_pager_t ds;
+ static char here[] = "%sdata_write";
+ int err;
+
+#ifdef lint
+ pager_request++;
+#endif lint
+
+ddprintf ("seqnos_memory_object_data_write <%p>: 1\n", &err);
+ if ((data_cnt % vm_page_size) != 0)
+ {
+ ddprintf ("fail 1: %d %d\n", data_cnt, vm_page_size);
+ panic(here,my_name);
+ }
+
+
+ddprintf ("seqnos_memory_object_data_write <%p>: 2\n", &err);
+ ds = pager_port_lookup(pager);
+ddprintf ("seqnos_memory_object_data_write <%p>: 3\n", &err);
+ if (ds == DEFAULT_PAGER_NULL)
+ {
+ ddprintf ("fail 2: %d %d\n", pager, ds);
+ panic(here,my_name);
+ }
+
+ddprintf ("seqnos_memory_object_data_write <%p>: 4\n", &err);
+ddprintf ("seqnos_memory_object_data_write <%p>: pager_port_lock: <%p>[s:%d,r:%d,w:%d,l:%d], %d\n",
+ &err, ds, ds->seqno, ds->readers, ds->writers, ds->lock.held, seqno);
+ pager_port_lock(ds, seqno);
+ddprintf ("seqnos_memory_object_data_write <%p>: 5\n", &err);
+ pager_port_check_request(ds, pager_request);
+ddprintf ("seqnos_memory_object_data_write <%p>: 6\n", &err);
+ pager_port_start_write(ds);
+ddprintf ("seqnos_memory_object_data_write <%p>: 7\n", &err);
+ddprintf ("seqnos_memory_object_data_write <%p>: pager_port_unlock: <%p>[s:%d,r:%d,w:%d,l:%d]\n",
+ &err, ds, ds->seqno, ds->readers, ds->writers, ds->lock.held);
+ pager_port_unlock(ds);
+
+ddprintf ("seqnos_memory_object_data_write <%p>: 8\n", &err);
+ for (amount_sent = 0;
+ amount_sent < data_cnt;
+ amount_sent += vm_page_size) {
+
+ register int result;
+
+ddprintf ("seqnos_memory_object_data_write <%p>: 9\n", &err);
+ result = default_write(&ds->dpager,
+ addr + amount_sent,
+ vm_page_size,
+ offset + amount_sent);
+ddprintf ("seqnos_memory_object_data_write <%p>: 10\n", &err);
+ if (result != KERN_SUCCESS) {
+ddprintf ("seqnos_memory_object_data_write <%p>: 11\n", &err);
+#if debug
+ dprintf("%s WRITE ERROR on default_pageout:", my_name);
+ dprintf(" pager=%x, offset=0x%x, length=0x%x, result=%d\n",
+ pager, offset+amount_sent, vm_page_size, result);
+#endif
+ dstruct_lock(ds);
+ ds->errors++;
+ dstruct_unlock(ds);
+ }
+ default_pager_pageout_count++;
+ }
+
+ddprintf ("seqnos_memory_object_data_write <%p>: 12\n", &err);
+ pager_port_finish_write(ds);
+ddprintf ("seqnos_memory_object_data_write <%p>: 13\n", &err);
+ err = vm_deallocate(default_pager_self, addr, data_cnt);
+ddprintf ("seqnos_memory_object_data_write <%p>: 14\n", &err);
+ if (err != KERN_SUCCESS)
+ {
+ ddprintf ("fail 3: %s %s %s %s\n", default_pager_self, addr, data_cnt, &err);
+
+ panic(here,my_name);
+ }
+
+
+ddprintf ("seqnos_memory_object_data_write <%p>: 15\n", &err);
+ return(KERN_SUCCESS);
+}
+
+/*ARGSUSED*/
+kern_return_t
+seqnos_memory_object_copy(old_memory_object, seqno, old_memory_control,
+ offset, length, new_memory_object)
+ memory_object_t old_memory_object;
+ mach_port_seqno_t seqno;
+ memory_object_control_t
+ old_memory_control;
+ vm_offset_t offset;
+ vm_size_t length;
+ memory_object_t new_memory_object;
+{
+ panic("%scopy", my_name);
+ return KERN_FAILURE;
+}
+
+kern_return_t
+seqnos_memory_object_lock_completed(pager, seqno, pager_request,
+ offset, length)
+ memory_object_t pager;
+ mach_port_seqno_t seqno;
+ mach_port_t pager_request;
+ vm_offset_t offset;
+ vm_size_t length;
+{
+#ifdef lint
+ pager++; seqno++; pager_request++; offset++; length++;
+#endif lint
+
+ panic("%slock_completed",my_name);
+ return(KERN_FAILURE);
+}
+
+kern_return_t
+seqnos_memory_object_data_unlock(pager, seqno, pager_request,
+ offset, addr, data_cnt)
+ memory_object_t pager;
+ mach_port_seqno_t seqno;
+ mach_port_t pager_request;
+ vm_offset_t offset;
+ pointer_t addr;
+ vm_size_t data_cnt;
+{
+ panic("%sdata_unlock",my_name);
+ return(KERN_FAILURE);
+}
+
+kern_return_t
+seqnos_memory_object_supply_completed(pager, seqno, pager_request,
+ offset, length,
+ result, error_offset)
+ memory_object_t pager;
+ mach_port_seqno_t seqno;
+ mach_port_t pager_request;
+ vm_offset_t offset;
+ vm_size_t length;
+ kern_return_t result;
+ vm_offset_t error_offset;
+{
+ panic("%ssupply_completed",my_name);
+ return(KERN_FAILURE);
+}
+
+kern_return_t
+seqnos_memory_object_data_return(pager, seqno, pager_request,
+ offset, addr, data_cnt,
+ dirty, kernel_copy)
+ memory_object_t pager;
+ mach_port_seqno_t seqno;
+ mach_port_t pager_request;
+ vm_offset_t offset;
+ pointer_t addr;
+ vm_size_t data_cnt;
+ boolean_t dirty;
+ boolean_t kernel_copy;
+{
+ panic("%sdata_return",my_name);
+ return(KERN_FAILURE);
+}
+
+kern_return_t
+seqnos_memory_object_change_completed(pager, seqno, may_cache, copy_strategy)
+ memory_object_t pager;
+ mach_port_seqno_t seqno;
+ boolean_t may_cache;
+ memory_object_copy_strategy_t copy_strategy;
+{
+ panic("%schange_completed",my_name);
+ return(KERN_FAILURE);
+}
+
+
+boolean_t default_pager_notify_server(in, out)
+ mach_msg_header_t *in, *out;
+{
+ register mach_no_senders_notification_t *n =
+ (mach_no_senders_notification_t *) in;
+
+ /*
+ * The only send-once rights we create are for
+ * receiving no-more-senders notifications.
+ * Hence, if we receive a message directed to
+ * a send-once right, we can assume it is
+ * a genuine no-senders notification from the kernel.
+ */
+
+ if ((n->not_header.msgh_bits !=
+ MACH_MSGH_BITS(0, MACH_MSG_TYPE_PORT_SEND_ONCE)) ||
+ (n->not_header.msgh_id != MACH_NOTIFY_NO_SENDERS))
+ return FALSE;
+
+ assert(n->not_header.msgh_size == sizeof *n);
+ assert(n->not_header.msgh_remote_port == MACH_PORT_NULL);
+
+ assert(n->not_type.msgt_name == MACH_MSG_TYPE_INTEGER_32);
+ assert(n->not_type.msgt_size == 32);
+ assert(n->not_type.msgt_number == 1);
+ assert(n->not_type.msgt_inline);
+ assert(! n->not_type.msgt_longform);
+
+ default_pager_no_senders(n->not_header.msgh_local_port,
+ n->not_header.msgh_seqno, n->not_count);
+
+ out->msgh_remote_port = MACH_PORT_NULL;
+ return TRUE;
+}
+
+extern boolean_t seqnos_memory_object_server();
+extern boolean_t seqnos_memory_object_default_server();
+extern boolean_t default_pager_server();
+extern boolean_t exc_server();
+extern boolean_t bootstrap_server();
+extern void bootstrap_compat();
+
+mach_msg_size_t default_pager_msg_size_object = 128;
+
+boolean_t
+default_pager_demux_object(in, out)
+ mach_msg_header_t *in;
+ mach_msg_header_t *out;
+{
+ /*
+ * We receive memory_object_data_initialize messages in
+ * the memory_object_default interface.
+ */
+
+int rval;
+ddprintf ("DPAGER DEMUX OBJECT <%p>: %d\n", in, in->msgh_id);
+rval =
+ (seqnos_memory_object_server(in, out) ||
+ seqnos_memory_object_default_server(in, out) ||
+ default_pager_notify_server(in, out));
+ddprintf ("DPAGER DEMUX OBJECT DONE <%p>: %d\n", in, in->msgh_id);
+return rval;
+}
+
+mach_msg_size_t default_pager_msg_size_default = 8 * 1024;
+
+boolean_t
+default_pager_demux_default(in, out)
+ mach_msg_header_t *in;
+ mach_msg_header_t *out;
+{
+ if (in->msgh_local_port == default_pager_default_port) {
+ /*
+ * We receive memory_object_create messages in
+ * the memory_object_default interface.
+ */
+
+int rval;
+ddprintf ("DPAGER DEMUX DEFAULT <%p>: %d\n", in, in->msgh_id);
+rval =
+ (seqnos_memory_object_default_server(in, out) ||
+ default_pager_server(in, out));
+ddprintf ("DPAGER DEMUX DEFAULT DONE <%p>: %d\n", in, in->msgh_id);
+return rval;
+ } else if (in->msgh_local_port == default_pager_exception_port) {
+ /*
+ * We receive exception messages for
+ * ourself and the startup task.
+ */
+
+ return exc_server(in, out);
+ } else if (in->msgh_local_port == default_pager_bootstrap_port) {
+ /*
+ * We receive bootstrap requests
+ * from the startup task.
+ */
+
+ if (in->msgh_id == 999999) {
+ /* compatibility for old bootstrap interface */
+
+ bootstrap_compat(in, out);
+ return TRUE;
+ }
+
+ return bootstrap_server(in, out);
+ } else {
+ panic(my_name);
+ return FALSE;
+ }
+}
+
+/*
+ * We use multiple threads, for two reasons.
+ *
+ * First, memory objects created by default_pager_object_create
+ * are "external", instead of "internal". This means the kernel
+ * sends data (memory_object_data_write) to the object pageable.
+ * To prevent deadlocks, the external and internal objects must
+ * be managed by different threads.
+ *
+ * Second, the default pager uses synchronous IO operations.
+ * Spreading requests across multiple threads should
+ * recover some of the performance loss from synchronous IO.
+ *
+ * We have 3+ threads.
+ * One receives memory_object_create and
+ * default_pager_object_create requests.
+ * One or more manage internal objects.
+ * One or more manage external objects.
+ */
+
+void
+default_pager_thread_privileges()
+{
+ /*
+ * Set thread privileges.
+ */
+ cthread_wire(); /* attach kernel thread to cthread */
+ wire_thread(); /* grab a kernel stack and memory allocation
+ privileges */
+}
+
+any_t
+default_pager_default_thread (arg)
+ any_t arg;
+{
+ kern_return_t kr;
+ default_pager_thread_privileges ();
+ for (;;) {
+ kr = mach_msg_server(default_pager_demux_default,
+ default_pager_msg_size_default,
+ default_pager_default_set);
+ panic(my_name, kr);
+ }
+}
+
+
+
+any_t
+default_pager_thread(arg)
+ any_t arg;
+{
+ default_pager_thread_t *dpt = (default_pager_thread_t *) arg;
+ mach_port_t pset;
+ kern_return_t kr;
+
+ cthread_set_data(cthread_self(), (any_t) dpt);
+
+
+ /*
+ * Threads handling external objects cannot have
+ * privileges. Otherwise a burst of data-requests for an
+ * external object could empty the free-page queue,
+ * because the fault code only reserves real pages for
+ * requests sent to internal objects.
+ */
+
+ if (dpt->dpt_internal) {
+ default_pager_thread_privileges();
+ pset = default_pager_internal_set;
+ } else {
+ pset = default_pager_external_set;
+ }
+
+ for (;;) {
+ kr = mach_msg_server(default_pager_demux_object,
+ default_pager_msg_size_object,
+ pset);
+ panic(my_name, kr);
+ }
+}
+
+void
+start_default_pager_thread(internal)
+ boolean_t internal;
+{
+ default_pager_thread_t *dpt;
+ kern_return_t kr;
+
+ dpt = (default_pager_thread_t *) kalloc(sizeof *dpt);
+ if (dpt == 0)
+ panic(my_name);
+
+ dpt->dpt_internal = internal;
+
+ kr = vm_allocate(default_pager_self, &dpt->dpt_buffer,
+ vm_page_size, TRUE);
+ if (kr != KERN_SUCCESS)
+ panic(my_name);
+ wire_memory(dpt->dpt_buffer, vm_page_size,
+ VM_PROT_READ|VM_PROT_WRITE);
+
+ dpt->dpt_thread = cthread_fork(default_pager_thread, (any_t) dpt);
+}
+
+void
+default_pager_initialize(host_port)
+ mach_port_t host_port;
+{
+ memory_object_t DMM;
+ kern_return_t kr;
+
+ /*
+ * This task will become the default pager.
+ */
+ default_pager_self = mach_task_self();
+
+ /*
+ * Initialize the "default pager" port.
+ */
+ kr = mach_port_allocate(default_pager_self, MACH_PORT_RIGHT_RECEIVE,
+ &default_pager_default_port);
+ if (kr != KERN_SUCCESS)
+ panic(my_name);
+
+ DMM = default_pager_default_port;
+ kr = vm_set_default_memory_manager(host_port, &DMM);
+ if ((kr != KERN_SUCCESS) || MACH_PORT_VALID(DMM))
+ panic(my_name);
+
+ /*
+ * Initialize the exception port.
+ */
+ kr = mach_port_allocate(default_pager_self, MACH_PORT_RIGHT_RECEIVE,
+ &default_pager_exception_port);
+ if (kr != KERN_SUCCESS)
+ panic(my_name);
+
+ /*
+ * Initialize the bootstrap port.
+ */
+ kr = mach_port_allocate(default_pager_self, MACH_PORT_RIGHT_RECEIVE,
+ &default_pager_bootstrap_port);
+ if (kr != KERN_SUCCESS)
+ panic(my_name);
+
+ /*
+ * Arrange for wiring privileges.
+ */
+ wire_setup(host_port);
+
+ /*
+ * Find out how many CPUs we have, to determine the number
+ * of threads to create.
+ */
+ if (default_pager_internal_count == 0) {
+ host_basic_info_data_t h_info;
+ natural_t h_info_count;
+
+ h_info_count = HOST_BASIC_INFO_COUNT;
+ (void) host_info(host_port, HOST_BASIC_INFO,
+ (host_info_t)&h_info, &h_info_count);
+
+ /*
+ * Random computation to get more parallelism on
+ * multiprocessors.
+ */
+ default_pager_internal_count =
+ (h_info.avail_cpus > 32 ? 32 : h_info.avail_cpus) / 4 + 3;
+ }
+}
+
+/*
+ * Initialize and Run the default pager
+ */
+void
+default_pager()
+{
+ kern_return_t kr;
+ int i;
+
+ default_pager_thread_privileges();
+
+ /*
+ * Wire down code, data, stack
+ */
+ wire_all_memory();
+
+
+ /*
+ * Initialize the list of all pagers.
+ */
+ pager_port_list_init();
+
+ kr = mach_port_allocate(default_pager_self, MACH_PORT_RIGHT_PORT_SET,
+ &default_pager_internal_set);
+ if (kr != KERN_SUCCESS)
+ panic(my_name);
+
+ kr = mach_port_allocate(default_pager_self, MACH_PORT_RIGHT_PORT_SET,
+ &default_pager_external_set);
+ if (kr != KERN_SUCCESS)
+ panic(my_name);
+
+ kr = mach_port_allocate(default_pager_self, MACH_PORT_RIGHT_PORT_SET,
+ &default_pager_default_set);
+ if (kr != KERN_SUCCESS)
+ panic(my_name);
+
+ kr = mach_port_move_member(default_pager_self,
+ default_pager_default_port,
+ default_pager_default_set);
+ if (kr != KERN_SUCCESS)
+ panic(my_name);
+
+ kr = mach_port_move_member(default_pager_self,
+ default_pager_exception_port,
+ default_pager_default_set);
+ if (kr != KERN_SUCCESS)
+ panic(my_name);
+
+ kr = mach_port_move_member(default_pager_self,
+ default_pager_bootstrap_port,
+ default_pager_default_set);
+ if (kr != KERN_SUCCESS)
+ panic(my_name);
+
+ /*
+ * Now we create the threads that will actually
+ * manage objects.
+ */
+
+ for (i = 0; i < default_pager_internal_count; i++)
+ start_default_pager_thread(TRUE);
+
+ for (i = 0; i < default_pager_external_count; i++)
+ start_default_pager_thread(FALSE);
+
+ default_pager_default_thread(0); /* Become the default_pager server */
+#if 0
+ cthread_fork (default_pager_default_thread, 0);
+ /* cthread_exit (cthread_self ()); */
+ thread_suspend (mach_thread_self ());
+#endif
+}
+
+/*
+ * Create an external object.
+ */
+kern_return_t default_pager_object_create(pager, mem_obj, size)
+ mach_port_t pager;
+ mach_port_t *mem_obj;
+ vm_size_t size;
+{
+ default_pager_t ds;
+ mach_port_t port;
+ kern_return_t result;
+
+ if (pager != default_pager_default_port)
+ return KERN_INVALID_ARGUMENT;
+
+ ds = pager_port_alloc(size);
+rename_it:
+ port = (mach_port_t) pnameof(ds);
+ result = mach_port_allocate_name(default_pager_self,
+ MACH_PORT_RIGHT_RECEIVE, port);
+ if (result != KERN_SUCCESS) {
+ default_pager_t ds1;
+
+ if (result != KERN_NAME_EXISTS) return (result);
+
+ ds1 = (default_pager_t) kalloc(sizeof *ds1);
+ *ds1 = *ds;
+ mutex_lock(&all_pagers.lock);
+ queue_enter(&all_pagers.leak_queue, ds, default_pager_t, links);
+ mutex_unlock(&all_pagers.lock);
+ ds = ds1;
+ goto rename_it;
+ }
+
+ /*
+ * Set up associations between these ports
+ * and this default_pager structure
+ */
+
+ ds->pager = port;
+ pager_port_list_insert(port, ds);
+ default_pager_add(ds, FALSE);
+
+ *mem_obj = port;
+ return (KERN_SUCCESS);
+}
+
+kern_return_t default_pager_info(pager, infop)
+ mach_port_t pager;
+ default_pager_info_t *infop;
+{
+ vm_size_t total, free;
+
+ if (pager != default_pager_default_port)
+ return KERN_INVALID_ARGUMENT;
+
+ mutex_lock(&all_partitions.lock);
+ paging_space_info(&total, &free);
+ mutex_unlock(&all_partitions.lock);
+
+ infop->dpi_total_space = ptoa(total);
+ infop->dpi_free_space = ptoa(free);
+ infop->dpi_page_size = vm_page_size;
+ return KERN_SUCCESS;
+}
+
+kern_return_t default_pager_objects(pager, objectsp, ocountp, portsp, pcountp)
+ mach_port_t pager;
+ default_pager_object_array_t *objectsp;
+ natural_t *ocountp;
+ mach_port_array_t *portsp;
+ natural_t *pcountp;
+{
+ vm_offset_t oaddr; /* memory for objects */
+ vm_size_t osize; /* current size */
+ default_pager_object_t *objects;
+ natural_t opotential;
+
+ vm_offset_t paddr; /* memory for ports */
+ vm_size_t psize; /* current size */
+ mach_port_t *ports;
+ natural_t ppotential;
+
+ unsigned int actual;
+ unsigned int num_pagers;
+ kern_return_t kr;
+ default_pager_t entry;
+
+ if (pager != default_pager_default_port)
+ return KERN_INVALID_ARGUMENT;
+
+ /* start with the inline memory */
+
+ num_pagers = 0;
+
+ objects = *objectsp;
+ opotential = *ocountp;
+
+ ports = *portsp;
+ ppotential = *pcountp;
+
+ mutex_lock(&all_pagers.lock);
+ /*
+ * We will send no more than this many
+ */
+ actual = all_pagers.count;
+ mutex_unlock(&all_pagers.lock);
+
+ if (opotential < actual) {
+ vm_offset_t newaddr;
+ vm_size_t newsize;
+
+ newsize = 2 * round_page(actual * sizeof *objects);
+
+ kr = vm_allocate(default_pager_self, &newaddr, newsize, TRUE);
+ if (kr != KERN_SUCCESS)
+ goto nomemory;
+
+ oaddr = newaddr;
+ osize = newsize;
+ opotential = osize/sizeof *objects;
+ objects = (default_pager_object_t *) oaddr;
+ }
+
+ if (ppotential < actual) {
+ vm_offset_t newaddr;
+ vm_size_t newsize;
+
+ newsize = 2 * round_page(actual * sizeof *ports);
+
+ kr = vm_allocate(default_pager_self, &newaddr, newsize, TRUE);
+ if (kr != KERN_SUCCESS)
+ goto nomemory;
+
+ paddr = newaddr;
+ psize = newsize;
+ ppotential = psize/sizeof *ports;
+ ports = (mach_port_t *) paddr;
+ }
+
+ /*
+ * Now scan the list.
+ */
+
+ mutex_lock(&all_pagers.lock);
+
+ num_pagers = 0;
+ queue_iterate(&all_pagers.queue, entry, default_pager_t, links) {
+
+ mach_port_t port;
+ vm_size_t size;
+
+ if ((num_pagers >= opotential) ||
+ (num_pagers >= ppotential)) {
+ /*
+ * This should be rare. In any case,
+ * we will only miss recent objects,
+ * because they are added at the end.
+ */
+ break;
+ }
+
+ /*
+ * Avoid interfering with normal operations
+ */
+ if (!mutex_try_lock(&entry->dpager.lock))
+ goto not_this_one;
+ size = pager_allocated(&entry->dpager);
+ mutex_unlock(&entry->dpager.lock);
+
+ dstruct_lock(entry);
+
+ port = entry->pager_name;
+ if (port == MACH_PORT_NULL) {
+ /*
+ * The object is waiting for no-senders
+ * or memory_object_init.
+ */
+ dstruct_unlock(entry);
+ goto not_this_one;
+ }
+
+ /*
+ * We need a reference for the reply message.
+ * While we are unlocked, the bucket queue
+ * can change and the object might be terminated.
+ * memory_object_terminate will wait for us,
+ * preventing deallocation of the entry.
+ */
+
+ if (--entry->name_refs == 0) {
+ dstruct_unlock(entry);
+
+ /* keep the list locked, wont take long */
+
+ kr = mach_port_mod_refs(default_pager_self,
+ port, MACH_PORT_RIGHT_SEND,
+ default_pager_max_urefs);
+ if (kr != KERN_SUCCESS)
+ panic("%sdefault_pager_objects",my_name);
+
+ dstruct_lock(entry);
+
+ entry->name_refs += default_pager_max_urefs;
+ pager_port_finish_refs(entry);
+ }
+ dstruct_unlock(entry);
+
+ /* the arrays are wired, so no deadlock worries */
+
+ objects[num_pagers].dpo_object = (vm_offset_t) entry;
+ objects[num_pagers].dpo_size = size;
+ ports [num_pagers++] = port;
+ continue;
+not_this_one:
+ /*
+ * Do not return garbage
+ */
+ objects[num_pagers].dpo_object = (vm_offset_t) 0;
+ objects[num_pagers].dpo_size = 0;
+ ports [num_pagers++] = MACH_PORT_NULL;
+
+ }
+
+ mutex_unlock(&all_pagers.lock);
+
+ /*
+ * Deallocate and clear unused memory.
+ * (Returned memory will automagically become pageable.)
+ */
+
+ if (objects == *objectsp) {
+ /*
+ * Our returned information fit inline.
+ * Nothing to deallocate.
+ */
+
+ *ocountp = num_pagers;
+ } else if (actual == 0) {
+ (void) vm_deallocate(default_pager_self, oaddr, osize);
+
+ /* return zero items inline */
+ *ocountp = 0;
+ } else {
+ vm_offset_t used;
+
+ used = round_page(actual * sizeof *objects);
+
+ if (used != osize)
+ (void) vm_deallocate(default_pager_self,
+ oaddr + used, osize - used);
+
+ *objectsp = objects;
+ *ocountp = num_pagers;
+ }
+
+ if (ports == *portsp) {
+ /*
+ * Our returned information fit inline.
+ * Nothing to deallocate.
+ */
+
+ *pcountp = num_pagers;
+ } else if (actual == 0) {
+ (void) vm_deallocate(default_pager_self, paddr, psize);
+
+ /* return zero items inline */
+ *pcountp = 0;
+ } else {
+ vm_offset_t used;
+
+ used = round_page(actual * sizeof *ports);
+
+ if (used != psize)
+ (void) vm_deallocate(default_pager_self,
+ paddr + used, psize - used);
+
+ *portsp = ports;
+ *pcountp = num_pagers;
+ }
+
+ return KERN_SUCCESS;
+
+ nomemory:
+
+ {
+ register int i;
+ for (i = 0; i < num_pagers; i++)
+ (void) mach_port_deallocate(default_pager_self, ports[i]);
+ }
+
+ if (objects != *objectsp)
+ (void) vm_deallocate(default_pager_self, oaddr, osize);
+
+ if (ports != *portsp)
+ (void) vm_deallocate(default_pager_self, paddr, psize);
+
+ return KERN_RESOURCE_SHORTAGE;
+}
+
+
+kern_return_t
+default_pager_object_pages(pager, object, pagesp, countp)
+ mach_port_t pager;
+ mach_port_t object;
+ default_pager_page_array_t *pagesp;
+ natural_t *countp;
+{
+ vm_offset_t addr; /* memory for page offsets */
+ vm_size_t size; /* current memory size */
+ default_pager_page_t *pages;
+ natural_t potential, actual;
+ kern_return_t kr;
+
+ if (pager != default_pager_default_port)
+ return KERN_INVALID_ARGUMENT;
+
+ /* we start with the inline space */
+
+ pages = *pagesp;
+ potential = *countp;
+
+ for (;;) {
+ default_pager_t entry;
+
+ mutex_lock(&all_pagers.lock);
+ queue_iterate(&all_pagers.queue, entry, default_pager_t, links) {
+ dstruct_lock(entry);
+ if (entry->pager_name == object) {
+ mutex_unlock(&all_pagers.lock);
+ goto found_object;
+ }
+ dstruct_unlock(entry);
+ }
+ mutex_unlock(&all_pagers.lock);
+
+ /* did not find the object */
+
+ if (pages != *pagesp)
+ (void) vm_deallocate(default_pager_self, addr, size);
+ return KERN_INVALID_ARGUMENT;
+
+ found_object:
+
+ if (!mutex_try_lock(&entry->dpager.lock)) {
+ /* oh well bad luck */
+
+ dstruct_unlock(entry);
+
+ /* yield the processor */
+ (void) thread_switch(MACH_PORT_NULL,
+ SWITCH_OPTION_NONE, 0);
+ continue;
+ }
+
+ actual = pager_pages(&entry->dpager, pages, potential);
+ mutex_unlock(&entry->dpager.lock);
+ dstruct_unlock(entry);
+
+ if (actual <= potential)
+ break;
+
+ /* allocate more memory */
+
+ if (pages != *pagesp)
+ (void) vm_deallocate(default_pager_self, addr, size);
+ size = round_page(actual * sizeof *pages);
+ kr = vm_allocate(default_pager_self, &addr, size, TRUE);
+ if (kr != KERN_SUCCESS)
+ return kr;
+ pages = (default_pager_page_t *) addr;
+ potential = size/sizeof *pages;
+ }
+
+ /*
+ * Deallocate and clear unused memory.
+ * (Returned memory will automagically become pageable.)
+ */
+
+ if (pages == *pagesp) {
+ /*
+ * Our returned information fit inline.
+ * Nothing to deallocate.
+ */
+
+ *countp = actual;
+ } else if (actual == 0) {
+ (void) vm_deallocate(default_pager_self, addr, size);
+
+ /* return zero items inline */
+ *countp = 0;
+ } else {
+ vm_offset_t used;
+
+ used = round_page(actual * sizeof *pages);
+
+ if (used != size)
+ (void) vm_deallocate(default_pager_self,
+ addr + used, size - used);
+
+ *pagesp = pages;
+ *countp = actual;
+ }
+ return KERN_SUCCESS;
+}
+
+/*
+ * Add/remove extra paging space
+ */
+
+extern mach_port_t bootstrap_master_device_port;
+extern mach_port_t bootstrap_master_host_port;
+
+kern_return_t
+default_pager_paging_file(pager, mdport, file_name, add)
+ mach_port_t pager;
+ mach_port_t mdport;
+ default_pager_filename_t file_name;
+ boolean_t add;
+{
+ kern_return_t kr;
+
+ if (pager != default_pager_default_port)
+ return KERN_INVALID_ARGUMENT;
+
+#if 0
+dprintf("bmd %x md %x\n", bootstrap_master_device_port, mdport);
+#endif
+ if (add) {
+ kr = add_paging_file(bootstrap_master_device_port,
+ file_name, 0);
+ } else {
+ kr = remove_paging_file(file_name);
+ }
+
+ /* XXXX more code needed */
+ if (mdport != bootstrap_master_device_port)
+ mach_port_deallocate( mach_task_self(), mdport);
+
+ return kr;
+}
+
+default_pager_register_fileserver(pager, fileserver)
+ mach_port_t pager;
+ mach_port_t fileserver;
+{
+ if (pager != default_pager_default_port)
+ return KERN_INVALID_ARGUMENT;
+#if notyet
+ mach_port_deallocate(mach_task_self(), fileserver);
+ if (0) dp_helper_paging_space(0,0,0);/*just linkit*/
+#endif
+ return KERN_SUCCESS;
+}
+
+/*
+ * When things do not quite workout...
+ */
+no_paging_space(out_of_memory)
+ boolean_t out_of_memory;
+{
+ static char here[] = "%s *** NOT ENOUGH PAGING SPACE ***";
+
+ if (out_of_memory)
+ dprintf("*** OUT OF MEMORY *** ");
+ panic(here, my_name);
+}
+
+overcommitted(got_more_space, space)
+ boolean_t got_more_space;
+ vm_size_t space; /* in pages */
+{
+ vm_size_t pages_free, pages_total;
+
+ static boolean_t user_warned = FALSE;
+ static vm_size_t pages_shortage = 0;
+
+ paging_space_info(&pages_total, &pages_free);
+
+ /*
+ * If user added more space, see if it is enough
+ */
+ if (got_more_space) {
+ pages_free -= pages_shortage;
+ if (pages_free > 0) {
+ pages_shortage = 0;
+ if (user_warned)
+ dprintf("%s paging space ok now.\n", my_name);
+ } else
+ pages_shortage = pages_free;
+ user_warned = FALSE;
+ return;
+ }
+ /*
+ * We ran out of gas, let user know.
+ */
+ pages_free -= space;
+ pages_shortage = (pages_free > 0) ? 0 : -pages_free;
+ if (!user_warned && pages_shortage) {
+ user_warned = TRUE;
+ dprintf("%s paging space over-committed.\n", my_name);
+ }
+#if debug
+ user_warned = FALSE;
+ dprintf("%s paging space over-committed [+%d (%d) pages].\n",
+ my_name, space, pages_shortage);
+#endif
+}
+
+paging_space_info(totp, freep)
+ vm_size_t *totp, *freep;
+{
+ register vm_size_t total, free;
+ register partition_t part;
+ register int i;
+
+ total = free = 0;
+ for (i = 0; i < all_partitions.n_partitions; i++) {
+
+ if ((part = partition_of(i)) == 0) continue;
+
+ /* no need to lock: by the time this data
+ gets back to any remote requestor it
+ will be obsolete anyways */
+ total += part->total_size;
+ free += part->free;
+#if debug
+ dprintf("Partition %d: x%x total, x%x free\n",
+ i, part->total_size, part->free);
+#endif
+ }
+ *totp = total;
+ *freep = free;
+}
+
+/*
+ * Catch exceptions.
+ */
+
+kern_return_t
+catch_exception_raise(exception_port, thread, task, exception, code, subcode)
+ mach_port_t exception_port;
+ mach_port_t thread, task;
+ int exception, code, subcode;
+{
+ ddprintf ("(default_pager)catch_exception_raise(%d,%d,%d)\n",
+ exception, code, subcode);
+ panic(my_name);
+
+ /* mach_msg_server will deallocate thread/task for us */
+
+ return KERN_FAILURE;
+}
+
+/*
+ * Handle bootstrap requests.
+ */
+
+kern_return_t
+do_bootstrap_privileged_ports(bootstrap, hostp, devicep)
+ mach_port_t bootstrap;
+ mach_port_t *hostp, *devicep;
+{
+ *hostp = bootstrap_master_host_port;
+ *devicep = bootstrap_master_device_port;
+ return KERN_SUCCESS;
+}
+
+void
+bootstrap_compat(in, out)
+ mach_msg_header_t *in, *out;
+{
+ mig_reply_header_t *reply = (mig_reply_header_t *) out;
+ mach_msg_return_t mr;
+
+ struct imsg {
+ mach_msg_header_t hdr;
+ mach_msg_type_t port_desc_1;
+ mach_port_t port_1;
+ mach_msg_type_t port_desc_2;
+ mach_port_t port_2;
+ } imsg;
+
+ /*
+ * Send back the host and device ports.
+ */
+
+ imsg.hdr.msgh_bits = MACH_MSGH_BITS_COMPLEX |
+ MACH_MSGH_BITS(MACH_MSGH_BITS_REMOTE(in->msgh_bits), 0);
+ /* msgh_size doesn't need to be initialized */
+ imsg.hdr.msgh_remote_port = in->msgh_remote_port;
+ imsg.hdr.msgh_local_port = MACH_PORT_NULL;
+ /* msgh_seqno doesn't need to be initialized */
+ imsg.hdr.msgh_id = in->msgh_id + 100; /* this is a reply msg */
+
+ imsg.port_desc_1.msgt_name = MACH_MSG_TYPE_COPY_SEND;
+ imsg.port_desc_1.msgt_size = (sizeof(mach_port_t) * 8);
+ imsg.port_desc_1.msgt_number = 1;
+ imsg.port_desc_1.msgt_inline = TRUE;
+ imsg.port_desc_1.msgt_longform = FALSE;
+ imsg.port_desc_1.msgt_deallocate = FALSE;
+ imsg.port_desc_1.msgt_unused = 0;
+
+ imsg.port_1 = bootstrap_master_host_port;
+
+ imsg.port_desc_2 = imsg.port_desc_1;
+
+ imsg.port_2 = bootstrap_master_device_port;
+
+ /*
+ * Send the reply message.
+ * (mach_msg_server can not do this, because the reply
+ * is not in standard format.)
+ */
+
+ mr = mach_msg(&imsg.hdr, MACH_SEND_MSG,
+ sizeof imsg, 0, MACH_PORT_NULL,
+ MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL);
+ if (mr != MACH_MSG_SUCCESS)
+ (void) mach_port_deallocate(default_pager_self,
+ imsg.hdr.msgh_remote_port);
+
+ /*
+ * Tell mach_msg_server to do nothing.
+ */
+
+ reply->RetCode = MIG_NO_REPLY;
+}
+
+#ifdef mips
+/*
+ * set_ras_address for default pager
+ * Default pager does not have emulator support
+ * so it needs a local version of set_ras_address.
+ */
+int
+set_ras_address(basepc, boundspc)
+ vm_offset_t basepc;
+ vm_offset_t boundspc;
+{
+ kern_return_t status;
+
+ status = task_ras_control(mach_task_self(), basepc, boundspc,
+ TASK_RAS_CONTROL_INSTALL_ONE);
+ if (status != KERN_SUCCESS)
+ return -1;
+ return 0;
+}
+#endif