/*
 * Mach Operating System
 * Copyright (c) 1993-1989 Carnegie Mellon University
 * All Rights Reserved.
 *
 * Permission to use, copy, modify and distribute this software and its
 * documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie Mellon
 * the rights to redistribute these changes.
 */
/*
 * Default pager.  Pages to paging partition.
 *
 * MUST BE ABLE TO ALLOCATE WIRED-DOWN MEMORY!!!
 */

#include <mach.h>
#include <mach/message.h>
#include <mach/notify.h>
#include <mach/mig_errors.h>
#include <mach/thread_switch.h>
#include <mach/task_info.h>
#include <mach/default_pager_types.h>

#include <cthreads.h>

#include <device/device_types.h>
#include <device/device.h>

#include <queue.h>
#include <wiring.h>

#include <assert.h>
#include <stdio.h>

#include <file_io.h>

#define debug 0

extern void *kalloc();

static char my_name[] = "(default pager):";

static struct mutex printf_lock = MUTEX_INITIALIZER;

#define dprintf(f, x...) \
  ({ mutex_lock (&printf_lock); printf (f , ##x); fflush (stdout); mutex_unlock (&printf_lock); })
#define ddprintf(f, x...) ((void)0)

/*
 * parallel vs serial switch
 */
#define	PARALLEL 1

#if	0
#define	CHECKSUM	1
#endif

#define	USE_PRECIOUS	1

#define	ptoa(p)	((p)*vm_page_size)
#define	atop(a)	((a)/vm_page_size)

/*

 */
/*
 * Bitmap allocation.
 */
typedef unsigned int	bm_entry_t;
#define	NB_BM		32
#define	BM_MASK		0xffffffff

#define	howmany(a,b)	(((a) + (b) - 1)/(b))

/*
 * Value to indicate no block assigned
 */
#define	NO_BLOCK	((vm_offset_t)-1)

/*
 * 'Partition' structure for each paging area.
 * Controls allocation of blocks within paging area.
 */
struct part {
	struct mutex	p_lock;		/* for bitmap/free */
	vm_size_t	total_size;	/* total number of blocks */
	vm_size_t	free;		/* number of blocks free */
	unsigned int	id;		/* named lookup */
	bm_entry_t	*bitmap;	/* allocation map */
	boolean_t	going_away;	/* destroy attempt in progress */
	struct file_direct *file;	/* file paged to */
};
typedef	struct part	*partition_t;

struct {
	struct mutex	lock;
	int		n_partitions;
	partition_t	*partition_list;/* array, for quick mapping */
} all_partitions;			/* list of all such */

typedef unsigned char	p_index_t;

#define	P_INDEX_INVALID	((p_index_t)-1)

#define	no_partition(x)	((x) == P_INDEX_INVALID)

partition_t partition_of(x)
      int x;
{
      if (x >= all_partitions.n_partitions || x < 0)
	  panic("partition_of x%x", x);
      return all_partitions.partition_list[x];
}

void set_partition_of(x, p)
      int x;
      partition_t p;
{
      if (x >= all_partitions.n_partitions || x < 0)
	  panic("set_partition_of x%x", x);
      all_partitions.partition_list[x] = p;
}

/*
 * Simple mapping from (file)NAME to id
 * Saves space, filenames can be long.
 */
unsigned int
part_id(name)
	unsigned char	*name;
{
	register unsigned int len, id, xorid;

	len = strlen(name);
	id = xorid = 0;
	while (len--) {
		xorid ^= *name;
		id += *name++;
	}
	return (id << 8) | xorid;
}

partition_init()
{
	mutex_init(&all_partitions.lock);
	all_partitions.n_partitions = 0;
}

static partition_t
new_partition (const char *name, struct file_direct *fdp,
	       int check_linux_signature)
{
	register partition_t	part;
	register vm_size_t	size, bmsize;
	vm_offset_t raddr;
	mach_msg_type_number_t rsize;
	int rc;
	unsigned int id = part_id(name);

	mutex_lock(&all_partitions.lock);
	{
	  unsigned int i;
	  for (i = 0; i < all_partitions.n_partitions; i++)
	    {
	      part = partition_of(i);
	      if (part && part->id == id)
		{
		  printf ("(default pager): Already paging to partition %s!\n",
			  name);
		  mutex_unlock(&all_partitions.lock);
		  return 0;
		}
	    }
	}
	mutex_unlock(&all_partitions.lock);

	size = atop(fdp->fd_size * fdp->fd_bsize);
	bmsize = howmany(size, NB_BM) * sizeof(bm_entry_t);

	part = (partition_t) kalloc(sizeof(struct part));
	mutex_init(&part->p_lock);
	part->total_size = size;
	part->free	= size;
	part->id	= id;
	part->bitmap	= (bm_entry_t *)kalloc(bmsize);
	part->going_away= FALSE;
	part->file = fdp;

	bzero((char *)part->bitmap, bmsize);

	if (check_linux_signature < 0)
	  {
	    if (check_linux_signature != -3)
	      printf("(default pager): "
		     "Paging to raw partition %s (%uk paging space)\n",
		     name, part->total_size * (vm_page_size / 1024));
	    return part;
	  }

#define LINUX_PAGE_SIZE 4096	/* size of pages in Linux swap partitions */
	rc = page_read_file_direct(part->file,
				   0, LINUX_PAGE_SIZE,
				   &raddr,
				   &rsize);
	if (rc)
	  panic("(default pager): cannot read first page of %s! rc=%#x\n",
		name, rc);
	while (rsize < LINUX_PAGE_SIZE)
	  {
	    /* Filesystem block size is smaller than page size,
	       so we must do several reads to get the whole page.  */
	    vm_address_t baddr, bsize;
	    rc = page_read_file_direct(part->file,
				       rsize, LINUX_PAGE_SIZE-rsize,
				       &baddr,
				       &bsize);
	    if (rc)
	      panic("(default pager): "
		    "cannot read first page of %s! rc=%#x at %#x\n",
		    name, rc, rsize);

	    memcpy ((char *) raddr + rsize, (void *) baddr, bsize);
	    rsize += bsize;
	    vm_deallocate (mach_task_self (), baddr, bsize);
	  }

	if (!memcmp("SWAP-SPACE", (char *) raddr + LINUX_PAGE_SIZE-10, 10))
	  {
	    /* The partition's first page has a Linux swap signature.
	       This means the beginning of the page contains a bitmap
	       of good pages, and all others are bad.  */
	    unsigned int i, j, bad, max;
	    int waste;

	    printf("(default pager): Found Linux 2.0 swap signature in %s\n",
		   name);

	    /* The first page, and the pages corresponding to the bits
	       occupied by the signature in the final 10 bytes of the page,
	       are always unavailable ("bad").  */
	    *(u_int32_t *)raddr &= ~(u_int32_t) 1;
	    memset((char *) raddr + LINUX_PAGE_SIZE-10, 0, 10);

	    max = LINUX_PAGE_SIZE / sizeof(u_int32_t);
	    if (max > (part->total_size + 31) / 32)
	      max = (part->total_size + 31) / 32;

	    bad = 0;
	    for (i = 0; i < max; ++i)
	      {
		u_int32_t bm = ((u_int32_t *) raddr)[i];
		if (bm == ~(u_int32_t) 0)
		  continue;
		/* There are some zero bits in this word.  */
		for (j = 0; j < 32; ++j)
		  if ((bm & (1 << j)) == 0)
		    {
		      unsigned int p = i*32 + j;
		      if (p >= part->total_size)
			break;
		      ++bad;
		      part->bitmap[p / NB_BM] |= 1 << (p % NB_BM);
		    }
	      }
	    part->free -= bad;

	    --bad;		/* Don't complain about first page.  */
	    waste = part->total_size - (8 * (LINUX_PAGE_SIZE-10));
	    if (waste > 0)
	      {
		/* The wasted pages were already marked "bad".  */
		bad -= waste;
		if (bad > 0)
		  printf("\
(default pager): Paging to %s, %dk swap-space (%dk bad, %dk wasted at end)\n",
			 name,
			 part->free * (LINUX_PAGE_SIZE / 1024),
			 bad * (LINUX_PAGE_SIZE / 1024),
			 waste * (LINUX_PAGE_SIZE / 1024));
		else
		  printf("\
(default pager): Paging to %s, %dk swap-space (%dk wasted at end)\n",
			 name,
			 part->free * (LINUX_PAGE_SIZE / 1024),
			 waste * (LINUX_PAGE_SIZE / 1024));
	      }
	    else if (bad > 0)
	      printf("\
(default pager): Paging to %s, %dk swap-space (excludes %dk marked bad)\n",
		     name,
		     part->free * (LINUX_PAGE_SIZE / 1024),
		     bad * (LINUX_PAGE_SIZE / 1024));
	    else
	      printf("\
(default pager): Paging to %s, %dk swap-space\n",
		     name,
		     part->free * (LINUX_PAGE_SIZE / 1024));
	  }
	else if (!memcmp("SWAPSPACE2",
			 (char *) raddr + LINUX_PAGE_SIZE-10, 10))
	  {
	    struct
	    {
	      u_int8_t bootbits[1024];
	      u_int32_t version;
	      u_int32_t last_page;
	      u_int32_t nr_badpages;
	      u_int32_t padding[125];
	      u_int32_t badpages[1];
	    } *hdr = (void *) raddr;

	    printf("\
(default pager): Found Linux 2.2 swap signature (v%u) in %s...",
		   hdr->version, name);

	    part->bitmap[0] |= 1; /* first page unusable */
	    part->free--;

	    switch (hdr->version)
	      {
	      default:
		if (check_linux_signature)
		  {
		    printf ("version %u unknown!  SKIPPING %s!\n",
			    hdr->version,
			    name);
		    vm_deallocate(mach_task_self(), raddr, rsize);
		    kfree(part->bitmap, bmsize);
		    kfree(part, sizeof *part);
		    return 0;
		  }
		else
		  printf ("version %u unknown! IGNORING SIGNATURE PAGE!"
			  " %dk swap-space\n",
			  hdr->version,
			  part->free * (LINUX_PAGE_SIZE / 1024));
		break;

	      case 1:
		{
		  unsigned int waste, i;
		  if (hdr->last_page > part->total_size)
		    {
		      printf ("signature says %uk, partition has only %uk! ",
			    hdr->last_page * (LINUX_PAGE_SIZE / 1024),
			    part->total_size * (LINUX_PAGE_SIZE / 1024));
		      waste = 0;
		    }
		  else
		    {
		      waste = part->total_size - hdr->last_page;
		      part->total_size = hdr->last_page;
		      part->free = part->total_size - 1;
		    }
		  for (i = 0; i < hdr->nr_badpages; ++i)
		    {
		      const u_int32_t bad = hdr->badpages[i];
		      part->bitmap[bad / NB_BM] |= 1 << (bad % NB_BM);
		      part->free--;
		    }
		  printf ("%uk swap-space",
			  part->free * (LINUX_PAGE_SIZE / 1024));
		  if (hdr->nr_badpages != 0)
		    printf (" (excludes %uk marked bad)",
			    hdr->nr_badpages * (LINUX_PAGE_SIZE / 1024));
		  if (waste != 0)
		    printf (" (excludes %uk at end of partition)",
			    waste * (LINUX_PAGE_SIZE / 1024));
		  printf ("\n");
		}
	      }
	  }
	else if (check_linux_signature)
	  {
	    printf ("(default pager): "
		    "Cannot find Linux swap signature page!  "
		    "SKIPPING %s (%uk partition)!",
		    name, part->total_size * (vm_page_size / 1024));
	    kfree(part->bitmap, bmsize);
	    kfree(part, sizeof *part);
	    part = 0;
	  }
	else
	  printf("(default pager): "
		 "Paging to raw partition %s (%uk paging space)\n",
		 name, part->total_size * (vm_page_size / 1024));

	vm_deallocate(mach_task_self(), raddr, rsize);

	return part;
}

/*
 * Create a partition descriptor,
 * add it to the list of all such.
 * size is in BYTES.
 */
void
create_paging_partition(const char *name,
			struct file_direct *fdp, int isa_file,
			int linux_signature)
{
	register partition_t	part;

	part = new_partition (name, fdp, linux_signature);
	if (!part)
	  return;

	mutex_lock(&all_partitions.lock);
	{
		register int i;

		for (i = 0; i < all_partitions.n_partitions; i++)
			if (partition_of(i) == 0) break;

		if (i == all_partitions.n_partitions) {
			register partition_t	*new_list, *old_list;
			register int		n;

			n = i ? (i<<1) : 2;
			new_list = (partition_t *)
				kalloc( n * sizeof(partition_t) );
			if (new_list == 0) no_paging_space(TRUE);
			bzero(new_list, n*sizeof(partition_t));
			if (i) {
			    old_list = all_partitions.partition_list;
			    bcopy(old_list, new_list, i*sizeof(partition_t));
			}
			all_partitions.partition_list = new_list;
			all_partitions.n_partitions = n;
			if (i) kfree(old_list, i*sizeof(partition_t));
		}
		set_partition_of(i, part);
	}
	mutex_unlock(&all_partitions.lock);

#if 0
	dprintf("%s Added paging %s %s\n", my_name,
		(isa_file) ? "file" : "device",  name);
#endif
	overcommitted(TRUE, part->free);
}

/*
 * Choose the most appropriate default partition
 * for an object of SIZE bytes.
 * Return the partition locked, unless
 * the object has no CUR_PARTition.
 */
p_index_t
choose_partition(size, cur_part)
	unsigned int		size;
	register p_index_t	cur_part;
{
	register partition_t	part;
	register boolean_t	found = FALSE;
	register int		i;

	mutex_lock(&all_partitions.lock);
	for (i = 0; i < all_partitions.n_partitions; i++) {

		/* the undesireable one ? */
		if (i == cur_part)
			continue;

ddprintf ("choose_partition(%x,%d,%d)\n",size,cur_part,i);
		/* one that was removed ? */
		if ((part = partition_of(i)) == 0)
			continue;

		/* one that is being removed ? */
		if (part->going_away)
			continue;

		/* is it big enough ? */
		mutex_lock(&part->p_lock);
		if (ptoa(part->free) >= size) {
			if (cur_part != P_INDEX_INVALID) {
				mutex_unlock(&all_partitions.lock);
				return (p_index_t)i;
			} else
				found = TRUE;
		}
		mutex_unlock(&part->p_lock);

		if (found) break;
	}
	mutex_unlock(&all_partitions.lock);
	return (found) ? (p_index_t)i : P_INDEX_INVALID;
}

/*
 * Allocate a page in a paging partition
 * The partition is returned unlocked.
 */
vm_offset_t
pager_alloc_page(pindex, lock_it)
	p_index_t	pindex;
{
	register int	bm_e;
	register int	bit;
	register int	limit;
	register bm_entry_t	*bm;
	partition_t	part;
	static char	here[] = "%spager_alloc_page";

	if (no_partition(pindex))
	    return (NO_BLOCK);
ddprintf ("pager_alloc_page(%d,%d)\n",pindex,lock_it);
	part = partition_of(pindex);

	/* unlikely, but possible deadlock against destroy_partition */
	if (!part || part->going_away)
	    return (NO_BLOCK);

	if (lock_it)
	    mutex_lock(&part->p_lock);

	if (part->free == 0) {
	    /* out of paging space */
	    mutex_unlock(&part->p_lock);
	    return (NO_BLOCK);
	}

	limit = howmany(part->total_size, NB_BM);
	bm = part->bitmap;
	for (bm_e = 0; bm_e < limit; bm_e++, bm++)
	    if (*bm != BM_MASK)
		break;

	if (bm_e == limit)
	    panic(here,my_name);

	/*
	 * Find and set the proper bit
	 */
	{
	    register bm_entry_t	b = *bm;

	    for (bit = 0; bit < NB_BM; bit++)
		if ((b & (1<<bit)) == 0)
		    break;
	    if (bit == NB_BM)
		panic(here,my_name);

	    *bm = b | (1<<bit);
	    part->free--;

	}

	mutex_unlock(&part->p_lock);

	return (bm_e*NB_BM+bit);
}

/*
 * Deallocate a page in a paging partition
 */
void
pager_dealloc_page(pindex, page, lock_it)
	p_index_t		pindex;
	register vm_offset_t	page;
{
	register partition_t	part;
	register int	bit, bm_e;

	/* be paranoid */
	if (no_partition(pindex))
	    panic("%sdealloc_page",my_name);
ddprintf ("pager_dealloc_page(%d,%x,%d)\n",pindex,page,lock_it);
	part = partition_of(pindex);

	if (page >= part->total_size)
	    panic("%sdealloc_page",my_name);

	bm_e = page / NB_BM;
	bit  = page % NB_BM;

	if (lock_it)
	    mutex_lock(&part->p_lock);

	part->bitmap[bm_e] &= ~(1<<bit);
	part->free++;

	if (lock_it)
	    mutex_unlock(&part->p_lock);
}

/*

 */
/*
 * Allocation info for each paging object.
 *
 * Most operations, even pager_write_offset and pager_put_checksum,
 * just need a read lock.  Higher-level considerations prevent
 * conflicting operations on a single page.  The lock really protects
 * the underlying size and block map memory, so pager_extend needs a
 * write lock.
 *
 * An object can now span multiple paging partitions.  The allocation
 * info we keep is a pair (offset,p_index) where the index is in the
 * array of all partition ptrs, and the offset is partition-relative.
 * Size wise we are doing ok fitting the pair into a single integer:
 * the offset really is in pages so we have vm_page_size bits available
 * for the partition index.
 */
#define	DEBUG_READER_CONFLICTS	0

#if	DEBUG_READER_CONFLICTS
int	default_pager_read_conflicts = 0;
#endif

union dp_map {

	struct {
		unsigned int	p_offset : 24,
				p_index : 8;
	} block;

	union dp_map		*indirect;
};
typedef union dp_map	*dp_map_t;

/* quick check for part==block==invalid */
#define	no_block(e)		((e).indirect == (dp_map_t)NO_BLOCK)
#define	invalidate_block(e)	((e).indirect = (dp_map_t)NO_BLOCK)

struct dpager {
	struct mutex	lock;		/* lock for extending block map */
					/* XXX should be read-write lock */
#if	DEBUG_READER_CONFLICTS
	int		readers;
	boolean_t	writer;
#endif
	dp_map_t	map;		/* block map */
	vm_size_t	size;		/* size of paging object, in pages */
	p_index_t	cur_partition;
#ifdef	CHECKSUM
	vm_offset_t	*checksum;	/* checksum - parallel to block map */
#define	NO_CHECKSUM	((vm_offset_t)-1)
#endif	 /* CHECKSUM */
};
typedef struct dpager	*dpager_t;

/*
 * A paging object uses either a one- or a two-level map of offsets
 * into a paging partition.
 */
#define	PAGEMAP_ENTRIES		64
				/* number of pages in a second-level map */
#define	PAGEMAP_SIZE(npgs)	((npgs)*sizeof(vm_offset_t))

#define	INDIRECT_PAGEMAP_ENTRIES(npgs) \
		((((npgs)-1)/PAGEMAP_ENTRIES) + 1)
#define	INDIRECT_PAGEMAP_SIZE(npgs) \
		(INDIRECT_PAGEMAP_ENTRIES(npgs) * sizeof(vm_offset_t *))
#define	INDIRECT_PAGEMAP(size)	\
		(size > PAGEMAP_ENTRIES)

#define	ROUNDUP_TO_PAGEMAP(npgs) \
		(((npgs) + PAGEMAP_ENTRIES - 1) & ~(PAGEMAP_ENTRIES - 1))

/*
 * Object sizes are rounded up to the next power of 2,
 * unless they are bigger than a given maximum size.
 */
vm_size_t	max_doubled_size = 4 * 1024 * 1024;	/* 4 meg */

/*
 * Attach a new paging object to a paging partition
 */
void
pager_alloc(pager, part, size)
	register dpager_t	pager;
	p_index_t		part;
	register vm_size_t	size;	/* in BYTES */
{
	register int    i;
	register dp_map_t mapptr, emapptr;

	mutex_init(&pager->lock);
#if	DEBUG_READER_CONFLICTS
	pager->readers = 0;
	pager->writer = FALSE;
#endif
	pager->cur_partition = part;

	/*
	 * Convert byte size to number of pages, then increase to the nearest
	 * power of 2.
	 */
	size = atop(size);
	if (size <= atop(max_doubled_size)) {
	    i = 1;
	    while (i < size)
		i <<= 1;
	    size = i;
	} else
	    size = ROUNDUP_TO_PAGEMAP(size);

	/*
	 * Allocate and initialize the block map
	 */
	{
		register vm_size_t	alloc_size;
		dp_map_t		init_value;

		if (INDIRECT_PAGEMAP(size)) {
			alloc_size = INDIRECT_PAGEMAP_SIZE(size);
			init_value = (dp_map_t)0;
		} else {
			alloc_size = PAGEMAP_SIZE(size);
			init_value = (dp_map_t)NO_BLOCK;
		}

		mapptr = (dp_map_t) kalloc(alloc_size);
		for (emapptr = &mapptr[(alloc_size-1) / sizeof(vm_offset_t)];
		     emapptr >= mapptr;
		     emapptr--)
			emapptr->indirect = init_value;

	}
	pager->map = mapptr;
	pager->size = size;

#ifdef	CHECKSUM
	if (INDIRECT_PAGEMAP(size)) {
		mapptr = (vm_offset_t *)
			kalloc(INDIRECT_PAGEMAP_SIZE(size));
		for (i = INDIRECT_PAGEMAP_ENTRIES(size); --i >= 0;)
			mapptr[i] = 0;
	} else {
		mapptr = (vm_offset_t *) kalloc(PAGEMAP_SIZE(size));
		for (i = 0; i < size; i++)
			mapptr[i] = NO_CHECKSUM;
	}
	pager->checksum = mapptr;
#endif	 /* CHECKSUM */
}

/*
 * Return size (in bytes) of space actually allocated to this pager.
 * The pager is read-locked.
 */

vm_size_t
pager_allocated(pager)
	register dpager_t	pager;
{
	vm_size_t       size;
	register dp_map_t map, emap;
	vm_size_t       asize;

	size = pager->size;	/* in pages */
	asize = 0;		/* allocated, in pages */
	map = pager->map;

	if (INDIRECT_PAGEMAP(size)) {
		for (emap = &map[INDIRECT_PAGEMAP_ENTRIES(size)];
		     map < emap; map++) {

			register dp_map_t	map2, emap2;

			if ((map2 = map->indirect) == 0)
				continue;

			for (emap2 = &map2[PAGEMAP_ENTRIES];
			     map2 < emap2; map2++)
				if ( ! no_block(*map2) )
					asize++;

		}
	} else {
		for (emap = &map[size]; map < emap; map++)
			if ( ! no_block(*map) )
				asize++;
	}

	return ptoa(asize);
}

/*
 * Find offsets (in the object) of pages actually allocated to this pager.
 * Returns the number of allocated pages, whether or not they all fit.
 * The pager is read-locked.
 */

unsigned int
pager_pages(pager, pages, numpages)
	dpager_t			pager;
	register default_pager_page_t	*pages;
	unsigned int			numpages;
{
	vm_size_t       size;
	dp_map_t        map, emap;
	unsigned int    actual;
	vm_offset_t     offset;

	size = pager->size;	/* in pages */
	map = pager->map;
	actual = 0;
	offset = 0;

	if (INDIRECT_PAGEMAP(size)) {
		for (emap = &map[INDIRECT_PAGEMAP_ENTRIES(size)];
		     map < emap; map++) {

			register dp_map_t	map2, emap2;

			if ((map2 = map->indirect) == 0) {
				offset += vm_page_size * PAGEMAP_ENTRIES;
				continue;
			}
			for (emap2 = &map2[PAGEMAP_ENTRIES];
			     map2 < emap2; map2++)
				if ( ! no_block(*map2) ) {
					if (actual++ < numpages)
						pages++->dpp_offset = offset;
				}
			offset += vm_page_size;
		}
	} else {
		for (emap = &map[size]; map < emap; map++)
			if ( ! no_block(*map) ) {
				if (actual++ < numpages)
					pages++->dpp_offset = offset;
			}
		offset += vm_page_size;
	}
	return actual;
}

/*
 * Extend the map for a paging object.
 *
 * XXX This implementation can allocate an arbitrary large amount
 * of wired memory when extending a big block map.  Because vm-privileged
 * threads call pager_extend, this can crash the system by exhausting
 * system memory.
 */
void
pager_extend(pager, new_size)
	register dpager_t	pager;
	register vm_size_t	new_size;	/* in pages */
{
	register dp_map_t	new_mapptr;
	register dp_map_t	old_mapptr;
	register int		i;
	register vm_size_t	old_size;

	mutex_lock(&pager->lock);		/* XXX lock_write */
#if	DEBUG_READER_CONFLICTS
	pager->writer = TRUE;
#endif
	/*
	 * Double current size until we cover new size.
	 * If object is 'too big' just use new size.
	 */
	old_size = pager->size;

	if (new_size <= atop(max_doubled_size)) {
	    i = old_size;
	    while (i < new_size)
		i <<= 1;
	    new_size = i;
	} else
	    new_size = ROUNDUP_TO_PAGEMAP(new_size);

	if (INDIRECT_PAGEMAP(old_size)) {
	    /*
	     * Pager already uses two levels.  Allocate
	     * a larger indirect block.
	     */
	    new_mapptr = (dp_map_t)
			kalloc(INDIRECT_PAGEMAP_SIZE(new_size));
	    old_mapptr = pager->map;
	    for (i = 0; i < INDIRECT_PAGEMAP_ENTRIES(old_size); i++)
		new_mapptr[i] = old_mapptr[i];
	    for (; i < INDIRECT_PAGEMAP_ENTRIES(new_size); i++)
		new_mapptr[i].indirect = (dp_map_t)0;
	    kfree((char *)old_mapptr, INDIRECT_PAGEMAP_SIZE(old_size));
	    pager->map = new_mapptr;
	    pager->size = new_size;
#ifdef	CHECKSUM
	    new_mapptr = (vm_offset_t *)
			kalloc(INDIRECT_PAGEMAP_SIZE(new_size));
	    old_mapptr = pager->checksum;
	    for (i = 0; i < INDIRECT_PAGEMAP_ENTRIES(old_size); i++)
		new_mapptr[i] = old_mapptr[i];
	    for (; i < INDIRECT_PAGEMAP_ENTRIES(new_size); i++)
		new_mapptr[i] = 0;
	    kfree((char *)old_mapptr, INDIRECT_PAGEMAP_SIZE(old_size));
	    pager->checksum = new_mapptr;
#endif	 /* CHECKSUM */
#if	DEBUG_READER_CONFLICTS
	    pager->writer = FALSE;
#endif
	    mutex_unlock(&pager->lock);
	    ddprintf ("pager_extend 1 mapptr %x [3b] = %x\n", new_mapptr,
		     new_mapptr[0x3b]);
	    if (new_mapptr[0x3b].indirect > 0x10000
		&& new_mapptr[0x3b].indirect != NO_BLOCK)
	      panic ("debug panic");
	    return;
	}

	if (INDIRECT_PAGEMAP(new_size)) {
	    /*
	     * Changing from direct map to indirect map.
	     * Allocate both indirect and direct map blocks,
	     * since second-level (direct) block must be
	     * full size (PAGEMAP_SIZE(PAGEMAP_ENTRIES)).
	     */

	    /*
	     * Allocate new second-level map first.
	     */
	    new_mapptr = (dp_map_t) kalloc(PAGEMAP_SIZE(PAGEMAP_ENTRIES));
	    old_mapptr = pager->map;
	    for (i = 0; i < old_size; i++)
		new_mapptr[i] = old_mapptr[i];
	    for (; i < PAGEMAP_ENTRIES; i++)
		invalidate_block(new_mapptr[i]);
	    kfree((char *)old_mapptr, PAGEMAP_SIZE(old_size));
	    old_mapptr = new_mapptr;

	    ddprintf ("pager_extend 2 mapptr %x [3b] = %x\n", new_mapptr,
		     new_mapptr[0x3b]);
	    if (new_mapptr[0x3b].indirect > 0x10000
		&& new_mapptr[0x3b].indirect != NO_BLOCK)
	      panic ("debug panic");

	    /*
	     * Now allocate indirect map.
	     */
	    new_mapptr = (dp_map_t)
			kalloc(INDIRECT_PAGEMAP_SIZE(new_size));
	    new_mapptr[0].indirect = old_mapptr;
	    for (i = 1; i < INDIRECT_PAGEMAP_ENTRIES(new_size); i++)
		new_mapptr[i].indirect = 0;
	    pager->map = new_mapptr;
	    pager->size = new_size;
#ifdef	CHECKSUM
	    /*
	     * Allocate new second-level map first.
	     */
	    new_mapptr = (vm_offset_t *)kalloc(PAGEMAP_SIZE(PAGEMAP_ENTRIES));
	    old_mapptr = pager->checksum;
	    for (i = 0; i < old_size; i++)
		new_mapptr[i] = old_mapptr[i];
	    for (; i < PAGEMAP_ENTRIES; i++)
		new_mapptr[i] = NO_CHECKSUM;
	    kfree((char *)old_mapptr, PAGEMAP_SIZE(old_size));
	    old_mapptr = new_mapptr;

	    /*
	     * Now allocate indirect map.
	     */
	    new_mapptr = (vm_offset_t *)
			kalloc(INDIRECT_PAGEMAP_SIZE(new_size));
	    new_mapptr[0] = (vm_offset_t) old_mapptr;
	    for (i = 1; i < INDIRECT_PAGEMAP_ENTRIES(new_size); i++)
		new_mapptr[i] = 0;
	    pager->checksum = new_mapptr;
#endif	 /* CHECKSUM */
#if	DEBUG_READER_CONFLICTS
	    pager->writer = FALSE;
#endif
	    mutex_unlock(&pager->lock);
	    return;
	}
	/*
	 * Enlarging a direct block.
	 */
	new_mapptr = (dp_map_t)	kalloc(PAGEMAP_SIZE(new_size));
	old_mapptr = pager->map;
	for (i = 0; i < old_size; i++)
	    new_mapptr[i] = old_mapptr[i];
	for (; i < new_size; i++)
	    invalidate_block(new_mapptr[i]);
	kfree((char *)old_mapptr, PAGEMAP_SIZE(old_size));
	pager->map = new_mapptr;
	pager->size = new_size;
#ifdef	CHECKSUM
	new_mapptr = (vm_offset_t *)
		kalloc(PAGEMAP_SIZE(new_size));
	old_mapptr = pager->checksum;
	for (i = 0; i < old_size; i++)
	    new_mapptr[i] = old_mapptr[i];
	for (; i < new_size; i++)
	    new_mapptr[i] = NO_CHECKSUM;
	kfree((char *)old_mapptr, PAGEMAP_SIZE(old_size));
	pager->checksum = new_mapptr;
#endif	 /* CHECKSUM */
#if	DEBUG_READER_CONFLICTS
	pager->writer = FALSE;
#endif
	mutex_unlock(&pager->lock);
}

/*
 * Given an offset within a paging object, find the
 * corresponding block within the paging partition.
 * Return NO_BLOCK if none allocated.
 */
union dp_map
pager_read_offset(pager, offset)
	register dpager_t	pager;
	vm_offset_t		offset;
{
	register vm_offset_t	f_page;
	union dp_map		pager_offset;

	f_page = atop(offset);

#if	DEBUG_READER_CONFLICTS
	if (pager->readers > 0)
	    default_pager_read_conflicts++;	/* would have proceeded with
						   read/write lock */
#endif
	mutex_lock(&pager->lock);		/* XXX lock_read */
#if	DEBUG_READER_CONFLICTS
	pager->readers++;
#endif
	if (f_page >= pager->size)
	  {
	    ddprintf ("%spager_read_offset pager %x: bad page %d >= size %d",
		    my_name, pager, f_page, pager->size);
	    return (union dp_map) (union dp_map *) NO_BLOCK;
#if 0
	    panic("%spager_read_offset",my_name);
#endif
	  }

	if (INDIRECT_PAGEMAP(pager->size)) {
	    register dp_map_t	mapptr;

	    mapptr = pager->map[f_page/PAGEMAP_ENTRIES].indirect;
	    if (mapptr == 0)
		invalidate_block(pager_offset);
	    else
		pager_offset = mapptr[f_page%PAGEMAP_ENTRIES];
	}
	else {
	    pager_offset = pager->map[f_page];
	}

#if	DEBUG_READER_CONFLICTS
	pager->readers--;
#endif
	mutex_unlock(&pager->lock);
	return (pager_offset);
}

#if	USE_PRECIOUS
/*
 * Release a single disk block.
 */
pager_release_offset(pager, offset)
	register dpager_t	pager;
	vm_offset_t		offset;
{
	register union dp_map	entry;

	offset = atop(offset);

	mutex_lock(&pager->lock);	/* XXX lock_read */

	if (INDIRECT_PAGEMAP(pager->size)) {
		register dp_map_t	mapptr;

		mapptr = pager->map[offset / PAGEMAP_ENTRIES].indirect;
		entry = mapptr[offset % PAGEMAP_ENTRIES];
		invalidate_block(mapptr[offset % PAGEMAP_ENTRIES]);
	} else {
		entry = pager->map[offset];
		invalidate_block(pager->map[offset]);
	}

	mutex_unlock(&pager->lock);

	pager_dealloc_page(entry.block.p_index, entry.block.p_offset, TRUE);
}
#endif	/*USE_PRECIOUS*/


/*
 * Move a page from one partition to another
 * New partition is locked, old partition is
 * locked unless LOCK_OLD sez otherwise.
 */
union dp_map
pager_move_page(block)
	union dp_map	block;
{
	partition_t	old_part, new_part;
	p_index_t	old_pindex, new_pindex;
	union dp_map	ret;
	vm_size_t	size;
	vm_offset_t	raddr, offset, new_offset;
	kern_return_t	rc;
	static char	here[] = "%spager_move_page";

	old_pindex = block.block.p_index;
	invalidate_block(ret);

	/* See if we have room to put it anywhere else */
	new_pindex = choose_partition( ptoa(1), old_pindex);
	if (no_partition(new_pindex))
		return ret;

	/* this unlocks the new partition */
	new_offset = pager_alloc_page(new_pindex, FALSE);
	if (new_offset == NO_BLOCK)
		panic(here,my_name);

	/*
	 * Got the resources, now move the data
	 */
ddprintf ("pager_move_page(%x,%d,%d)\n",block.block.p_offset,old_pindex,new_pindex);
	old_part = partition_of(old_pindex);
	offset = ptoa(block.block.p_offset);
	rc = page_read_file_direct (old_part->file,
				    offset,
				    vm_page_size,
				    &raddr,
				    &size);
	if (rc != 0)
		panic(here,my_name);

	/* release old */
	pager_dealloc_page(old_pindex, block.block.p_offset, FALSE);

	new_part = partition_of(new_pindex);
	offset = ptoa(new_offset);
	rc = page_write_file_direct (new_part->file,
				     offset,
				     raddr,
				     size,
				     &size);
	if (rc != 0)
		panic(here,my_name);

	(void) vm_deallocate( mach_task_self(), raddr, size);

	ret.block.p_offset = new_offset;
	ret.block.p_index  = new_pindex;

	return ret;
}

#ifdef	CHECKSUM
/*
 * Return the checksum for a block.
 */
int
pager_get_checksum(pager, offset)
	register dpager_t	pager;
	vm_offset_t		offset;
{
	register vm_offset_t	f_page;
	int checksum;

	f_page = atop(offset);

	mutex_lock(&pager->lock);		/* XXX lock_read */
	if (f_page >= pager->size)
	    panic("%spager_get_checksum",my_name);

	if (INDIRECT_PAGEMAP(pager->size)) {
	    register vm_offset_t *mapptr;

	    mapptr = (vm_offset_t *)pager->checksum[f_page/PAGEMAP_ENTRIES];
	    if (mapptr == 0)
		checksum = NO_CHECKSUM;
	    else
		checksum = mapptr[f_page%PAGEMAP_ENTRIES];
	}
	else {
	    checksum = pager->checksum[f_page];
	}

	mutex_unlock(&pager->lock);
	return (checksum);
}

/*
 * Remember the checksum for a block.
 */
int
pager_put_checksum(pager, offset, checksum)
	register dpager_t	pager;
	vm_offset_t		offset;
	int			checksum;
{
	register vm_offset_t	f_page;
	static char		here[] = "%spager_put_checksum";

	f_page = atop(offset);

	mutex_lock(&pager->lock);		/* XXX lock_read */
	if (f_page >= pager->size)
	    panic(here,my_name);

	if (INDIRECT_PAGEMAP(pager->size)) {
	    register vm_offset_t *mapptr;

	    mapptr = (vm_offset_t *)pager->checksum[f_page/PAGEMAP_ENTRIES];
	    if (mapptr == 0)
		panic(here,my_name);

	    mapptr[f_page%PAGEMAP_ENTRIES] = checksum;
	}
	else {
	    pager->checksum[f_page] = checksum;
	}
	mutex_unlock(&pager->lock);
}

/*
 * Compute a checksum - XOR each 32-bit word.
 */
int
compute_checksum(addr, size)
	vm_offset_t	addr;
	vm_size_t	size;
{
	register int	checksum = NO_CHECKSUM;
	register int	*ptr;
	register int	count;

	ptr = (int *)addr;
	count = size / sizeof(int);

	while (--count >= 0)
	    checksum ^= *ptr++;

	return (checksum);
}
#endif	 /* CHECKSUM */

/*
 * Given an offset within a paging object, find the
 * corresponding block within the paging partition.
 * Allocate a new block if necessary.
 *
 * WARNING: paging objects apparently may be extended
 * without notice!
 */
union dp_map
pager_write_offset(pager, offset)
	register dpager_t	pager;
	vm_offset_t		offset;
{
	register vm_offset_t	f_page;
	register dp_map_t	mapptr;
	register union dp_map	block;

	invalidate_block(block);

	f_page = atop(offset);

#if	DEBUG_READER_CONFLICTS
	if (pager->readers > 0)
	    default_pager_read_conflicts++;	/* would have proceeded with
						   read/write lock */
#endif
	mutex_lock(&pager->lock);		/* XXX lock_read */
#if	DEBUG_READER_CONFLICTS
	pager->readers++;
#endif

	/* Catch the case where we had no initial fit partition
	   for this object, but one was added later on */
	if (no_partition(pager->cur_partition)) {
		p_index_t	new_part;
		vm_size_t	size;

		size = (f_page > pager->size) ? f_page : pager->size;
		new_part = choose_partition(ptoa(size), P_INDEX_INVALID);
		if (no_partition(new_part))
			new_part = choose_partition(ptoa(1), P_INDEX_INVALID);
		if (no_partition(new_part))
			/* give up right now to avoid confusion */
			goto out;
		else
			pager->cur_partition = new_part;
	}

	while (f_page >= pager->size) {
	  ddprintf ("pager_write_offset: extending: %x %x\n", f_page, pager->size);

	    /*
	     * Paging object must be extended.
	     * Remember that offset is 0-based, but size is 1-based.
	     */
#if	DEBUG_READER_CONFLICTS
	    pager->readers--;
#endif
	    mutex_unlock(&pager->lock);
	    pager_extend(pager, f_page + 1);
#if	DEBUG_READER_CONFLICTS
	    if (pager->readers > 0)
		default_pager_read_conflicts++;	/* would have proceeded with
						   read/write lock */
#endif
	    mutex_lock(&pager->lock);		/* XXX lock_read */
#if	DEBUG_READER_CONFLICTS
	    pager->readers++;
#endif
	    ddprintf ("pager_write_offset: done extending: %x %x\n", f_page, pager->size);
	}

	if (INDIRECT_PAGEMAP(pager->size)) {
	  ddprintf ("pager_write_offset: indirect\n");
	    mapptr = pager->map[f_page/PAGEMAP_ENTRIES].indirect;
	    if (mapptr == 0) {
		/*
		 * Allocate the indirect block
		 */
		register int i;
		ddprintf ("pager_write_offset: allocating indirect\n");

		mapptr = (dp_map_t) kalloc(PAGEMAP_SIZE(PAGEMAP_ENTRIES));
		if (mapptr == 0) {
		    /* out of space! */
		    no_paging_space(TRUE);
		    goto out;
		}
		pager->map[f_page/PAGEMAP_ENTRIES].indirect = mapptr;
		for (i = 0; i < PAGEMAP_ENTRIES; i++)
		    invalidate_block(mapptr[i]);
#ifdef	CHECKSUM
		{
		    register vm_offset_t *cksumptr;
		    register int j;

		    cksumptr = (vm_offset_t *)
				kalloc(PAGEMAP_SIZE(PAGEMAP_ENTRIES));
		    if (cksumptr == 0) {
			/* out of space! */
			no_paging_space(TRUE);
			goto out;
		    }
		    pager->checksum[f_page/PAGEMAP_ENTRIES]
			= (vm_offset_t)cksumptr;
		    for (j = 0; j < PAGEMAP_ENTRIES; j++)
			cksumptr[j] = NO_CHECKSUM;
		}
#endif	 /* CHECKSUM */
	    }
	    f_page %= PAGEMAP_ENTRIES;
	}
	else {
	    mapptr = pager->map;
	}

	block = mapptr[f_page];
	ddprintf ("pager_write_offset: block starts as %x[%x] %x\n", mapptr, f_page, block);
	if (no_block(block)) {
	    vm_offset_t	off;

	    /* get room now */
	    off = pager_alloc_page(pager->cur_partition, TRUE);
	    if (off == NO_BLOCK) {
		/*
		 * Before giving up, try all other partitions.
		 */
		p_index_t	new_part;

		ddprintf ("pager_write_offset: could not allocate block\n");
		/* returns it locked (if any one is non-full) */
		new_part = choose_partition( ptoa(1), pager->cur_partition);
		if ( ! no_partition(new_part) ) {

#if debug
dprintf("%s partition %x filled,", my_name, pager->cur_partition);
dprintf("extending object %x (size %x) to %x.\n",
	pager, pager->size, new_part);
#endif

		    /* this one tastes better */
		    pager->cur_partition = new_part;

		    /* this unlocks the partition too */
		    off = pager_alloc_page(pager->cur_partition, FALSE);

		}

		if (off == NO_BLOCK) {
		    /*
		     * Oh well.
		     */
		    overcommitted(FALSE, 1);
		    goto out;
		}
		ddprintf ("pager_write_offset: decided to allocate block\n");
	    }
	    block.block.p_offset = off;
	    block.block.p_index  = pager->cur_partition;
	    mapptr[f_page] = block;
	    ddprintf ("pager_write_offset: mapptr %x [3b] = %x\n", mapptr,
		     mapptr[0x3b]);
	    ddprintf ("pager_write_offset: block is finally %x\n", block);
	}

out:

#if	DEBUG_READER_CONFLICTS
	pager->readers--;
#endif
	mutex_unlock(&pager->lock);
	return (block);
}

/*
 * Deallocate all of the blocks belonging to a paging object.
 * No locking needed because no other operations can be in progress.
 */
void
pager_dealloc(pager)
	register dpager_t	pager;
{
	register int i, j;
	register dp_map_t	mapptr;
	register union dp_map	block;

	if (INDIRECT_PAGEMAP(pager->size)) {
	    for (i = INDIRECT_PAGEMAP_ENTRIES(pager->size); --i >= 0; ) {
		mapptr = pager->map[i].indirect;
		if (mapptr != 0) {
		    for (j = 0; j < PAGEMAP_ENTRIES; j++) {
			block = mapptr[j];
			if ( ! no_block(block) )
			    pager_dealloc_page(block.block.p_index,
			    			block.block.p_offset, TRUE);
		    }
		    kfree((char *)mapptr, PAGEMAP_SIZE(PAGEMAP_ENTRIES));
		}
	    }
	    kfree((char *)pager->map, INDIRECT_PAGEMAP_SIZE(pager->size));
#ifdef	CHECKSUM
	    for (i = INDIRECT_PAGEMAP_ENTRIES(pager->size); --i >= 0; ) {
		mapptr = (vm_offset_t *)pager->checksum[i];
		if (mapptr) {
		    kfree((char *)mapptr, PAGEMAP_SIZE(PAGEMAP_ENTRIES));
		}
	    }
	    kfree((char *)pager->checksum,
		  INDIRECT_PAGEMAP_SIZE(pager->size));
#endif	 /* CHECKSUM */
	}
	else {
	    mapptr = pager->map;
	    for (i = 0; i < pager->size; i++ ) {
		block = mapptr[i];
		if ( ! no_block(block) )
		    pager_dealloc_page(block.block.p_index,
		    			block.block.p_offset, TRUE);
	    }
	    kfree((char *)pager->map, PAGEMAP_SIZE(pager->size));
#ifdef	CHECKSUM
	    kfree((char *)pager->checksum, PAGEMAP_SIZE(pager->size));
#endif	 /* CHECKSUM */
	}
}

/*
 * Move all the pages of a PAGER that live in a
 * partition PINDEX somewhere else.
 * Pager should be write-locked, partition too.
 * Returns FALSE if it could not do it, but
 * some pages might have been moved nonetheless.
 */
boolean_t
pager_realloc(pager, pindex)
	register dpager_t	pager;
	p_index_t		pindex;
{
	register dp_map_t	map, emap;
	vm_size_t		size;
	union dp_map		block;

	size = pager->size;	/* in pages */
	map = pager->map;

	if (INDIRECT_PAGEMAP(size)) {
		for (emap = &map[INDIRECT_PAGEMAP_ENTRIES(size)];
		     map < emap; map++) {

			register dp_map_t	map2, emap2;

			if ((map2 = map->indirect) == 0)
				continue;

			for (emap2 = &map2[PAGEMAP_ENTRIES];
			     map2 < emap2; map2++)
				if ( map2->block.p_index == pindex) {

					block = pager_move_page(*map2);
					if (!no_block(block))
						*map2 = block;
					else
						return FALSE;
				}

		}
		goto ok;
	}

	/* A small one */
	for (emap = &map[size]; map < emap; map++)
		if (map->block.p_index == pindex) {
			block = pager_move_page(*map);
			if (!no_block(block))
				*map = block;
			else
				return FALSE;
		}
ok:
	pager->cur_partition = choose_partition(0, P_INDEX_INVALID);
	return TRUE;
}

/*

 */

/*
 * Read/write routines.
 */
#define	PAGER_SUCCESS	0
#define	PAGER_ABSENT	1
#define	PAGER_ERROR	2

/*
 * Read data from a default pager.  Addr is the address of a buffer
 * to fill.  Out_addr returns the buffer that contains the data;
 * if it is different from <addr>, it must be deallocated after use.
 */
int
default_read(ds, addr, size, offset, out_addr, deallocate)
	register dpager_t	ds;
	vm_offset_t		addr;	/* pointer to block to fill */
	register vm_size_t	size;
	register vm_offset_t	offset;
	vm_offset_t		*out_addr;
				/* returns pointer to data */
	boolean_t		deallocate;
{
	register union dp_map	block;
	vm_offset_t	raddr;
	vm_size_t	rsize;
	register int	rc;
	boolean_t	first_time;
	register partition_t	part;
#ifdef	CHECKSUM
	vm_size_t	original_size = size;
#endif	 /* CHECKSUM */
	vm_offset_t	original_offset = offset;

	/*
	 * Find the block in the paging partition
	 */
	block = pager_read_offset(ds, offset);
	if ( no_block(block) )
	    return (PAGER_ABSENT);

	/*
	 * Read it, trying for the entire page.
	 */
	offset = ptoa(block.block.p_offset);
ddprintf ("default_read(%x,%x,%x,%d)\n",addr,size,offset,block.block.p_index);
	part   = partition_of(block.block.p_index);
	first_time = TRUE;
	*out_addr = addr;

	do {
	    rc = page_read_file_direct(part->file,
				       offset,
				       size,
				       &raddr,
				       &rsize);
	    if (rc != 0)
		return (PAGER_ERROR);

	    /*
	     * If we got the entire page on the first read, return it.
	     */
	    if (first_time && rsize == size) {
		*out_addr = raddr;
		break;
	    }
	    /*
	     * Otherwise, copy the data into the
	     * buffer we were passed, and try for
	     * the next piece.
	     */
	    first_time = FALSE;
	    bcopy((char *)raddr, (char *)addr, rsize);
	    addr += rsize;
	    offset += rsize;
	    size -= rsize;
	} while (size != 0);

#if	USE_PRECIOUS
	if (deallocate)
		pager_release_offset(ds, original_offset);
#endif	/*USE_PRECIOUS*/

#ifdef	CHECKSUM
	{
	    int	write_checksum,
		read_checksum;

	    write_checksum = pager_get_checksum(ds, original_offset);
	    read_checksum = compute_checksum(*out_addr, original_size);
	    if (write_checksum != read_checksum) {
		panic(
  "PAGER CHECKSUM ERROR: offset 0x%x, written 0x%x, read 0x%x",
		    original_offset, write_checksum, read_checksum);
	    }
	}
#endif	 /* CHECKSUM */
	return (PAGER_SUCCESS);
}

int
default_write(ds, addr, size, offset)
	register dpager_t	ds;
	register vm_offset_t	addr;
	register vm_size_t	size;
	register vm_offset_t	offset;
{
	register union dp_map	block;
	partition_t		part;
	vm_size_t		wsize;
	register int		rc;

	ddprintf ("default_write: pager offset %x\n", offset);

	/*
	 * Find block in paging partition
	 */
	block = pager_write_offset(ds, offset);
	if ( no_block(block) )
	    return (PAGER_ERROR);

#ifdef	CHECKSUM
	/*
	 * Save checksum
	 */
	{
	    int	checksum;

	    checksum = compute_checksum(addr, size);
	    pager_put_checksum(ds, offset, checksum);
	}
#endif	 /* CHECKSUM */
	offset = ptoa(block.block.p_offset);
ddprintf ("default_write(%x,%x,%x,%d)\n",addr,size,offset,block.block.p_index);
	part   = partition_of(block.block.p_index);

	/*
	 * There are various assumptions made here,we
	 * will not get into the next disk 'block' by
	 * accident. It might well be non-contiguous.
	 */
	do {
	    rc = page_write_file_direct(part->file,
					offset,
					addr,
					size,
					&wsize);
	    if (rc != 0) {
		dprintf("*** PAGER ERROR: default_write: ");
		dprintf("ds=0x%x addr=0x%x size=0x%x offset=0x%x resid=0x%x\n",
			ds, addr, size, offset, wsize);
		return (PAGER_ERROR);
	    }
	    addr += wsize;
	    offset += wsize;
	    size -= wsize;
	} while (size != 0);
	return (PAGER_SUCCESS);
}

boolean_t
default_has_page(ds, offset)
	dpager_t	ds;
	vm_offset_t	offset;
{
	return ( ! no_block(pager_read_offset(ds, offset)) );
}

/*

 */

/*
 * Mapping between pager port and paging object.
 */
struct dstruct {
	queue_chain_t	links;		/* Link in pager-port list */

	struct mutex	lock;		/* Lock for the structure */
	struct condition
			waiting_seqno,	/* someone waiting on seqno */
			waiting_read,	/* someone waiting on readers */
			waiting_write,	/* someone waiting on writers */
			waiting_refs;	/* someone waiting on refs */

	memory_object_t	pager;		/* Pager port */
	mach_port_seqno_t seqno;	/* Pager port sequence number */
	mach_port_t	pager_request;	/* Request port */
	mach_port_urefs_t request_refs;	/* Request port user-refs */
	mach_port_t	pager_name;	/* Name port */
	mach_port_urefs_t name_refs;	/* Name port user-refs */

	unsigned int	readers;	/* Reads in progress */
	unsigned int	writers;	/* Writes in progress */

	unsigned int	errors;		/* Pageout error count */
	struct dpager	dpager;		/* Actual pager */
};
typedef struct dstruct *	default_pager_t;
#define	DEFAULT_PAGER_NULL	((default_pager_t)0)

#if	PARALLEL
#define	dstruct_lock_init(ds)	mutex_init(&ds->lock)
#define	dstruct_lock(ds)	mutex_lock(&ds->lock)
#define	dstruct_unlock(ds)	mutex_unlock(&ds->lock)
#else	/* PARALLEL */
#define	dstruct_lock_init(ds)
#define	dstruct_lock(ds)
#define	dstruct_unlock(ds)
#endif	/* PARALLEL */

/*
 * List of all pagers.  A specific pager is
 * found directly via its port, this list is
 * only used for monitoring purposes by the
 * default_pager_object* calls
 */
struct pager_port {
	queue_head_t	queue;
	struct mutex	lock;
	int		count;	/* saves code */
	queue_head_t	leak_queue;
} all_pagers;

#define pager_port_list_init()					\
{								\
	mutex_init(&all_pagers.lock);				\
	queue_init(&all_pagers.queue);				\
	queue_init(&all_pagers.leak_queue);			\
	all_pagers.count = 0;					\
}

void pager_port_list_insert(port, ds)
	mach_port_t port;
	default_pager_t	ds;
{
	mutex_lock(&all_pagers.lock);
	queue_enter(&all_pagers.queue, ds, default_pager_t, links);
	all_pagers.count++;
	mutex_unlock(&all_pagers.lock);
}

/* given a data structure return a good port-name to associate it to */
#define	pnameof(_x_)	(((vm_offset_t)(_x_))+1)
/* reverse, assumes no-odd-pointers */
#define	dnameof(_x_)	(((vm_offset_t)(_x_))&~1)

/* The magic typecast */
#define pager_port_lookup(_port_)					\
	((! MACH_PORT_VALID(_port_) ||					\
	 ((default_pager_t)dnameof(_port_))->pager != (_port_)) ?	\
		DEFAULT_PAGER_NULL : (default_pager_t)dnameof(_port_))

void pager_port_list_delete(ds)
	default_pager_t ds;
{
	mutex_lock(&all_pagers.lock);
	queue_remove(&all_pagers.queue, ds, default_pager_t, links);
	all_pagers.count--;
	mutex_unlock(&all_pagers.lock);
}

/*
 * Destroy a paging partition.
 * XXX this is not re-entrant XXX
 */
kern_return_t
destroy_paging_partition(name, pp_private)
	char		*name;
	void **pp_private;
{
	register unsigned int	id = part_id(name);
	register partition_t	part;
	boolean_t		all_ok = TRUE;
	default_pager_t		entry;
	int			pindex;

	/*
	 * Find and take partition out of list
	 * This prevents choose_partition from
	 * getting in the way.
	 */
	mutex_lock(&all_partitions.lock);
	for (pindex = 0; pindex < all_partitions.n_partitions; pindex++) {
		part = partition_of(pindex);
		if (part && (part->id == id)) break;
	}
	if (pindex == all_partitions.n_partitions) {
		mutex_unlock(&all_partitions.lock);
		return KERN_INVALID_ARGUMENT;
	}
	part->going_away = TRUE;
	mutex_unlock(&all_partitions.lock);

	/*
	 * This might take a while..
	 */
all_over_again:
#if debug
dprintf("Partition x%x (id x%x) for %s, all_ok %d\n", part, id, name, all_ok);
#endif
	all_ok = TRUE;
	mutex_lock(&part->p_lock);

	mutex_lock(&all_pagers.lock);
	queue_iterate(&all_pagers.queue, entry, default_pager_t, links) {

		dstruct_lock(entry);

		if (!mutex_try_lock(&entry->dpager.lock)) {

			dstruct_unlock(entry);
			mutex_unlock(&all_pagers.lock);
			mutex_unlock(&part->p_lock);

			/* yield the processor */
			(void) thread_switch(MACH_PORT_NULL,
					     SWITCH_OPTION_NONE, 0);

			goto all_over_again;

		}

		/*
		 * See if we can relocate all the pages of this object
		 * currently on this partition on some other partition
		 */
		all_ok = pager_realloc(&entry->dpager, pindex);

		mutex_unlock(&entry->dpager.lock);
		dstruct_unlock(entry);

		if (!all_ok)  break;

	}
	mutex_unlock(&all_pagers.lock);

	if (all_ok) {
		/* No need to unlock partition, there are no refs left */

		set_partition_of(pindex, 0);
		*pp_private = part->file;
		kfree(part->bitmap, howmany(part->total_size, NB_BM) * sizeof(bm_entry_t));
		kfree(part, sizeof(struct part));
		dprintf("%s Removed paging partition %s\n", my_name, name);
		return KERN_SUCCESS;
	}

	/*
	 * Put partition back in.
	 */
	part->going_away = FALSE;

	return KERN_FAILURE;
}


/*
 *	We use the sequence numbers on requests to regulate
 *	our parallelism.  In general, we allow multiple reads and writes
 *	to proceed in parallel, with the exception that reads must
 *	wait for previous writes to finish.  (Because the kernel might
 *	generate a data-request for a page on the heels of a data-write
 *	for the same page, and we must avoid returning stale data.)
 *	terminate requests wait for proceeding reads and writes to finish.
 */

unsigned int default_pager_total = 0;		/* debugging */
unsigned int default_pager_wait_seqno = 0;	/* debugging */
unsigned int default_pager_wait_read = 0;	/* debugging */
unsigned int default_pager_wait_write = 0;	/* debugging */
unsigned int default_pager_wait_refs = 0;	/* debugging */

#if	PARALLEL
/*
 * Waits for correct sequence number.  Leaves pager locked.
 */
void pager_port_lock(ds, seqno)
	default_pager_t ds;
	mach_port_seqno_t seqno;
{
	default_pager_total++;
ddprintf ("pager_port_lock <%p>: <%p>: %d: 1\n", &ds, ds, seqno);
	dstruct_lock(ds);
ddprintf ("pager_port_lock <%p>: <%p>: %d: 2\n", &ds, ds, seqno);
	while (ds->seqno != seqno) {
ddprintf ("pager_port_lock <%p>: <%p>: %d: 3\n", &ds, ds, seqno);
		default_pager_wait_seqno++;
		condition_wait(&ds->waiting_seqno, &ds->lock);
ddprintf ("pager_port_lock <%p>: <%p>: %d: 4\n", &ds, ds, seqno);
	}
}

/*
 * Increments sequence number and unlocks pager.
 */
void pager_port_unlock(ds)
	default_pager_t ds;
{
	ds->seqno++;
ddprintf ("pager_port_unlock <%p>: <%p>: seqno => %d\n", &ds, ds, ds->seqno);
	dstruct_unlock(ds);
ddprintf ("pager_port_unlock <%p>: <%p>: 2\n", &ds, ds);
	condition_broadcast(&ds->waiting_seqno);
ddprintf ("pager_port_unlock <%p>: <%p>: 3\n", &ds, ds);
}

/*
 * Start a read - one more reader.  Pager must be locked.
 */
void pager_port_start_read(ds)
	default_pager_t ds;
{
	ds->readers++;
}

/*
 * Wait for readers.  Unlocks and relocks pager if wait needed.
 */
void pager_port_wait_for_readers(ds)
	default_pager_t ds;
{
	while (ds->readers != 0) {
		default_pager_wait_read++;
		condition_wait(&ds->waiting_read, &ds->lock);
	}
}

/*
 * Finish a read.  Pager is unlocked and returns unlocked.
 */
void pager_port_finish_read(ds)
	default_pager_t ds;
{
	dstruct_lock(ds);
	if (--ds->readers == 0) {
		dstruct_unlock(ds);
		condition_broadcast(&ds->waiting_read);
	}
	else {
		dstruct_unlock(ds);
	}
}

/*
 * Start a write - one more writer.  Pager must be locked.
 */
void pager_port_start_write(ds)
	default_pager_t ds;
{
	ds->writers++;
}

/*
 * Wait for writers.  Unlocks and relocks pager if wait needed.
 */
void pager_port_wait_for_writers(ds)
	default_pager_t ds;
{
	while (ds->writers != 0) {
		default_pager_wait_write++;
		condition_wait(&ds->waiting_write, &ds->lock);
	}
}

/*
 * Finish a write.  Pager is unlocked and returns unlocked.
 */
void pager_port_finish_write(ds)
	default_pager_t ds;
{
	dstruct_lock(ds);
	if (--ds->writers == 0) {
		dstruct_unlock(ds);
		condition_broadcast(&ds->waiting_write);
	}
	else {
		dstruct_unlock(ds);
	}
}

/*
 * Wait for concurrent default_pager_objects.
 * Unlocks and relocks pager if wait needed.
 */
void pager_port_wait_for_refs(ds)
	default_pager_t ds;
{
	while (ds->name_refs == 0) {
		default_pager_wait_refs++;
		condition_wait(&ds->waiting_refs, &ds->lock);
	}
}

/*
 * Finished creating name refs - wake up waiters.
 */
void pager_port_finish_refs(ds)
	default_pager_t ds;
{
	condition_broadcast(&ds->waiting_refs);
}

#else	/* PARALLEL */

#define	pager_port_lock(ds,seqno)
#define	pager_port_unlock(ds)
#define	pager_port_start_read(ds)
#define	pager_port_wait_for_readers(ds)
#define	pager_port_finish_read(ds)
#define	pager_port_start_write(ds)
#define	pager_port_wait_for_writers(ds)
#define	pager_port_finish_write(ds)
#define pager_port_wait_for_refs(ds)
#define pager_port_finish_refs(ds)

#endif	/* PARALLEL */

/*
 *	Default pager.
 */
task_t		default_pager_self;	/* Our task port. */

mach_port_t default_pager_default_port;	/* Port for memory_object_create. */

/* We catch exceptions on ourself & startup using this port. */
mach_port_t default_pager_exception_port;
/* We receive bootstrap requests on this port. */
mach_port_t default_pager_bootstrap_port;

mach_port_t default_pager_internal_set;	/* Port set for internal objects. */
mach_port_t default_pager_external_set;	/* Port set for external objects. */
mach_port_t default_pager_default_set;	/* Port set for "default" thread. */

typedef struct default_pager_thread {
	cthread_t	dpt_thread;	/* Server thread. */
	vm_offset_t	dpt_buffer;	/* Read buffer. */
	boolean_t	dpt_internal;	/* Do we handle internal objects? */
} default_pager_thread_t;

#if	PARALLEL
	/* determine number of threads at run time */
#define DEFAULT_PAGER_INTERNAL_COUNT	(0)

#else	/* PARALLEL */
#define	DEFAULT_PAGER_INTERNAL_COUNT	(1)
#endif	/* PARALLEL */

/* Memory created by default_pager_object_create should mostly be resident. */
#define DEFAULT_PAGER_EXTERNAL_COUNT	(1)

unsigned int default_pager_internal_count = DEFAULT_PAGER_INTERNAL_COUNT;
					/* Number of "internal" threads. */
unsigned int default_pager_external_count = DEFAULT_PAGER_EXTERNAL_COUNT;
					/* Number of "external" threads. */

default_pager_t pager_port_alloc(size)
	vm_size_t size;
{
	default_pager_t ds;
	p_index_t	part;

	ds = (default_pager_t) kalloc(sizeof *ds);
	if (ds == DEFAULT_PAGER_NULL)
	    panic("%spager_port_alloc",my_name);
	bzero((char *) ds, sizeof *ds);

	dstruct_lock_init(ds);

	/*
	 * Get a suitable partition.  If none big enough
	 * just pick one and overcommit.  If no partitions
	 * at all.. well just fake one so that we will
	 * kill specific objects on pageouts rather than
	 * panicing the system now.
	 */
	part = choose_partition(size, P_INDEX_INVALID);
	if (no_partition(part)) {
		overcommitted(FALSE, atop(size));
		part = choose_partition(0,P_INDEX_INVALID);
#if debug
		if (no_partition(part))
			dprintf("%s No paging space at all !!\n", my_name);
#endif
	}
	pager_alloc(&ds->dpager, part, size);

	return ds;
}

mach_port_urefs_t default_pager_max_urefs = 10000;

/*
 * Check user reference count on pager_request port.
 * Pager must be locked.
 * Unlocks and re-locks pager if needs to call kernel.
 */
void pager_port_check_request(ds, pager_request)
	default_pager_t ds;
	mach_port_t pager_request;
{
	mach_port_delta_t delta;
	kern_return_t kr;

	assert(ds->pager_request == pager_request);

	if (++ds->request_refs > default_pager_max_urefs) {
		delta = 1 - ds->request_refs;
		ds->request_refs = 1;

		dstruct_unlock(ds);

		/*
		 *	Deallocate excess user references.
		 */

		kr = mach_port_mod_refs(default_pager_self, pager_request,
					MACH_PORT_RIGHT_SEND, delta);
		if (kr != KERN_SUCCESS)
			panic("%spager_port_check_request",my_name);

		dstruct_lock(ds);
	}
}

void default_pager_add(ds, internal)
	default_pager_t ds;
	boolean_t internal;
{
	mach_port_t		pager = ds->pager;
	mach_port_t		pset;
	mach_port_mscount_t 	sync;
	mach_port_t		previous;
	kern_return_t		kr;
	static char		here[] = "%sdefault_pager_add";

	/*
	 *	The port currently has a make-send count of zero,
	 *	because either we just created the port or we just
	 *	received the port in a memory_object_create request.
	 */

	if (internal) {
		/* possibly generate an immediate no-senders notification */
		sync = 0;
		pset = default_pager_internal_set;
	} else {
		/* delay notification till send right is created */
		sync = 1;
		pset = default_pager_external_set;
	}

	kr = mach_port_request_notification(default_pager_self, pager,
				MACH_NOTIFY_NO_SENDERS, sync,
				pager, MACH_MSG_TYPE_MAKE_SEND_ONCE,
				&previous);
	if ((kr != KERN_SUCCESS) || (previous != MACH_PORT_NULL))
		panic(here,my_name);

	kr = mach_port_move_member(default_pager_self, pager, pset);
	if (kr != KERN_SUCCESS)
		panic(here,my_name);
}

/*
 *	Routine:	memory_object_create
 *	Purpose:
 *		Handle requests for memory objects from the
 *		kernel.
 *	Notes:
 *		Because we only give out the default memory
 *		manager port to the kernel, we don't have to
 *		be so paranoid about the contents.
 */
kern_return_t
seqnos_memory_object_create(old_pager, seqno, new_pager, new_size,
			    new_pager_request, new_pager_name, new_page_size)
	mach_port_t	old_pager;
	mach_port_seqno_t seqno;
	mach_port_t	new_pager;
	vm_size_t	new_size;
	mach_port_t	new_pager_request;
	mach_port_t	new_pager_name;
	vm_size_t	new_page_size;
{
	register default_pager_t	ds;
	kern_return_t			kr;

	assert(old_pager == default_pager_default_port);
	assert(MACH_PORT_VALID(new_pager_request));
	assert(MACH_PORT_VALID(new_pager_name));
	assert(new_page_size == vm_page_size);

	ds = pager_port_alloc(new_size);
rename_it:
	kr = mach_port_rename(	default_pager_self,
				new_pager, (mach_port_t)pnameof(ds));
	if (kr != KERN_SUCCESS) {
		default_pager_t	ds1;

		if (kr != KERN_NAME_EXISTS)
			panic("%s m_o_create", my_name);
		ds1 = (default_pager_t) kalloc(sizeof *ds1);
		*ds1 = *ds;
		mutex_lock(&all_pagers.lock);
		queue_enter(&all_pagers.leak_queue, ds, default_pager_t, links);
		mutex_unlock(&all_pagers.lock);
		ds = ds1;
		goto rename_it;
	}

	new_pager = (mach_port_t) pnameof(ds);

	/*
	 *	Set up associations between these ports
	 *	and this default_pager structure
	 */

	ds->pager = new_pager;
	ds->pager_request = new_pager_request;
	ds->request_refs = 1;
	ds->pager_name = new_pager_name;
	ds->name_refs = 1;

	/*
	 *	After this, other threads might receive requests
	 *	for this memory object or find it in the port list.
	 */

	pager_port_list_insert(new_pager, ds);
	default_pager_add(ds, TRUE);

	return(KERN_SUCCESS);
}

memory_object_copy_strategy_t default_pager_copy_strategy =
					MEMORY_OBJECT_COPY_DELAY;

kern_return_t
seqnos_memory_object_init(pager, seqno, pager_request, pager_name,
			  pager_page_size)
	mach_port_t	pager;
	mach_port_seqno_t seqno;
	mach_port_t	pager_request;
	mach_port_t	pager_name;
	vm_size_t	pager_page_size;
{
	register default_pager_t ds;
	kern_return_t		 kr;
	static char		 here[] = "%sinit";

	assert(MACH_PORT_VALID(pager_request));
	assert(MACH_PORT_VALID(pager_name));
	assert(pager_page_size == vm_page_size);

	ds = pager_port_lookup(pager);
	if (ds == DEFAULT_PAGER_NULL)
	    panic(here, my_name);
	pager_port_lock(ds, seqno);

	if (ds->pager_request != MACH_PORT_NULL)
	    panic(here, my_name);

	ds->pager_request = pager_request;
	ds->request_refs = 1;
	ds->pager_name = pager_name;
	ds->name_refs = 1;

	/*
	 *	Even if the kernel immediately terminates the object,
	 *	the pager_request port won't be destroyed until
	 *	we process the terminate request, which won't happen
	 *	until we unlock the object.
	 */

	kr = memory_object_set_attributes(pager_request,
					  TRUE,
					  FALSE,	/* do not cache */
					  default_pager_copy_strategy);
	if (kr != KERN_SUCCESS)
	    panic(here, my_name);

	pager_port_unlock(ds);

	return(KERN_SUCCESS);
}

kern_return_t
seqnos_memory_object_terminate(pager, seqno, pager_request, pager_name)
	mach_port_t	pager;
	mach_port_seqno_t seqno;
	mach_port_t	pager_request;
	mach_port_t	pager_name;
{
	register default_pager_t	ds;
	mach_port_urefs_t		request_refs, name_refs;
	kern_return_t			kr;
	static char			here[] = "%sterminate";

	/*
	 *	pager_request and pager_name are receive rights,
	 *	not send rights.
	 */

	ds = pager_port_lookup(pager);
	if (ds == DEFAULT_PAGER_NULL)
		panic(here, my_name);
ddprintf ("seqnos_memory_object_terminate <%p>: pager_port_lock: <%p>[s:%d,r:%d,w:%d,l:%d], %d\n",
	&kr, ds, ds->seqno, ds->readers, ds->writers, ds->lock.held, seqno);
	pager_port_lock(ds, seqno);

	/*
	 *	Wait for read and write requests to terminate.
	 */

	pager_port_wait_for_readers(ds);
	pager_port_wait_for_writers(ds);

	/*
	 *	After memory_object_terminate both memory_object_init
	 *	and a no-senders notification are possible, so we need
	 *	to clean up the request and name ports but leave
	 *	the pager port.
	 *
	 *	A concurrent default_pager_objects might be allocating
	 *	more references for the name port.  In this case,
	 *	we must first wait for it to finish.
	 */

	pager_port_wait_for_refs(ds);

	ds->pager_request = MACH_PORT_NULL;
	request_refs = ds->request_refs;
	ds->request_refs = 0;
	assert(ds->pager_name == pager_name);
	ds->pager_name = MACH_PORT_NULL;
	name_refs = ds->name_refs;
	ds->name_refs = 0;
ddprintf ("seqnos_memory_object_terminate <%p>: pager_port_unlock: <%p>[s:%d,r:%d,w:%d,l:%d]\n",
	&kr, ds, ds->seqno, ds->readers, ds->writers, ds->lock.held);
	pager_port_unlock(ds);

	/*
	 *	Now we deallocate our various port rights.
	 */

	kr = mach_port_mod_refs(default_pager_self, pager_request,
				MACH_PORT_RIGHT_SEND, -request_refs);
	if (kr != KERN_SUCCESS)
	    panic(here,my_name);

	kr = mach_port_mod_refs(default_pager_self, pager_request,
				MACH_PORT_RIGHT_RECEIVE, -1);
	if (kr != KERN_SUCCESS)
	    panic(here,my_name);

	kr = mach_port_mod_refs(default_pager_self, pager_name,
				MACH_PORT_RIGHT_SEND, -name_refs);
	if (kr != KERN_SUCCESS)
	    panic(here,my_name);

	kr = mach_port_mod_refs(default_pager_self, pager_name,
				MACH_PORT_RIGHT_RECEIVE, -1);
	if (kr != KERN_SUCCESS)
	    panic(here,my_name);

	return (KERN_SUCCESS);
}

void default_pager_no_senders(pager, seqno, mscount)
	memory_object_t pager;
	mach_port_seqno_t seqno;
	mach_port_mscount_t mscount;
{
	register default_pager_t ds;
	kern_return_t		 kr;
	static char		 here[] = "%sno_senders";

	/*
	 *	Because we don't give out multiple send rights
	 *	for a memory object, there can't be a race
	 *	between getting a no-senders notification
	 *	and creating a new send right for the object.
	 *	Hence we don't keep track of mscount.
	 */


	ds = pager_port_lookup(pager);
	if (ds == DEFAULT_PAGER_NULL)
		panic(here,my_name);
	pager_port_lock(ds, seqno);

	/*
	 *	We shouldn't get a no-senders notification
	 *	when the kernel has the object cached.
	 */

	if (ds->pager_request != MACH_PORT_NULL)
		panic(here,my_name);

	/*
	 *	Unlock the pager (though there should be no one
	 *	waiting for it).
	 */
	dstruct_unlock(ds);

	/*
	 *	Remove the memory object port association, and then
	 *	the destroy the port itself.  We must remove the object
	 *	from the port list before deallocating the pager,
	 *	because of default_pager_objects.
	 */

	pager_port_list_delete(ds);
	pager_dealloc(&ds->dpager);

	kr = mach_port_mod_refs(default_pager_self, pager,
				MACH_PORT_RIGHT_RECEIVE, -1);
	if (kr != KERN_SUCCESS)
		panic(here,my_name);

	/*
	 * Do this *after* deallocating the port name
	 */
	kfree((char *) ds, sizeof(*ds));

	/*
	 * Recover memory that we might have wasted because
	 * of name conflicts
	 */
	mutex_lock(&all_pagers.lock);

	while (!queue_empty(&all_pagers.leak_queue)) {

		ds = (default_pager_t) queue_first(&all_pagers.leak_queue);
		queue_remove_first(&all_pagers.leak_queue, ds, default_pager_t, links);
		kfree((char *) ds, sizeof(*ds));
	}

	mutex_unlock(&all_pagers.lock);
}

int		default_pager_pagein_count = 0;
int		default_pager_pageout_count = 0;

kern_return_t
seqnos_memory_object_data_request(pager, seqno, reply_to, offset,
				  length, protection_required)
	memory_object_t	pager;
	mach_port_seqno_t seqno;
	mach_port_t	reply_to;
	vm_offset_t	offset;
	vm_size_t	length;
	vm_prot_t	protection_required;
{
	default_pager_thread_t	*dpt;
	default_pager_t		ds;
	vm_offset_t		addr;
	unsigned int 		errors;
	kern_return_t		rc;
	static char		here[] = "%sdata_request";

	dpt = (default_pager_thread_t *) cthread_data(cthread_self());

	if (length != vm_page_size)
	    panic(here,my_name);

	ds = pager_port_lookup(pager);
	if (ds == DEFAULT_PAGER_NULL)
	    panic(here,my_name);
ddprintf ("seqnos_memory_object_data_request <%p>: pager_port_lock: <%p>[s:%d,r:%d,w:%d,l:%d], %d\n",
	&ds, ds, ds->seqno, ds->readers, ds->writers, ds->lock.held, seqno);
	pager_port_lock(ds, seqno);
	pager_port_check_request(ds, reply_to);
	pager_port_wait_for_writers(ds);
	pager_port_start_read(ds);

	/*
	 * Get error count while pager locked.
	 */
	errors = ds->errors;

ddprintf ("seqnos_memory_object_data_request <%p>: pager_port_unlock: <%p>[s:%d,r:%d,w:%d,l:%d]\n",
	&ds, ds, ds->seqno, ds->readers, ds->writers, ds->lock.held);
	pager_port_unlock(ds);

	if (errors) {
	    dprintf("%s %s\n", my_name,
		   "dropping data_request because of previous paging errors");
	    (void) memory_object_data_error(reply_to,
				offset, vm_page_size,
				KERN_FAILURE);
	    goto done;
	}

	rc = default_read(&ds->dpager, dpt->dpt_buffer,
			  vm_page_size, offset,
			  &addr, protection_required & VM_PROT_WRITE);

	switch (rc) {
	    case PAGER_SUCCESS:
		if (addr != dpt->dpt_buffer) {
		    /*
		     *	Deallocates data buffer
		     */
		    (void) memory_object_data_supply(
		        reply_to, offset,
			addr, vm_page_size, TRUE,
			VM_PROT_NONE,
			FALSE, MACH_PORT_NULL);
		} else {
		    (void) memory_object_data_provided(
			reply_to, offset,
			addr, vm_page_size,
			VM_PROT_NONE);
		}
		break;

	    case PAGER_ABSENT:
		(void) memory_object_data_unavailable(
			reply_to,
			offset,
			vm_page_size);
		break;

	    case PAGER_ERROR:
		(void) memory_object_data_error(
			reply_to,
			offset,
			vm_page_size,
			KERN_FAILURE);
		break;
	}

	default_pager_pagein_count++;

    done:
	pager_port_finish_read(ds);
	return(KERN_SUCCESS);
}

/*
 * memory_object_data_initialize: check whether we already have each page, and
 * write it if we do not.  The implementation is far from optimized, and
 * also assumes that the default_pager is single-threaded.
 */
kern_return_t
seqnos_memory_object_data_initialize(pager, seqno, pager_request,
				     offset, addr, data_cnt)
	memory_object_t	pager;
	mach_port_seqno_t seqno;
	mach_port_t	pager_request;
	register
	vm_offset_t	offset;
	register
	pointer_t	addr;
	vm_size_t	data_cnt;
{
	vm_offset_t	amount_sent;
	default_pager_t	ds;
	static char	here[] = "%sdata_initialize";

#ifdef	lint
	pager_request++;
#endif	 /* lint */

	ds = pager_port_lookup(pager);
	if (ds == DEFAULT_PAGER_NULL)
	    panic(here,my_name);
ddprintf ("seqnos_memory_object_data_initialize <%p>: pager_port_lock: <%p>[s:%d,r:%d,w:%d,l:%d], %d\n",
	&ds, ds, ds->seqno, ds->readers, ds->writers, ds->lock.held, seqno);
	pager_port_lock(ds, seqno);
	pager_port_check_request(ds, pager_request);
	pager_port_start_write(ds);
ddprintf ("seqnos_memory_object_data_initialize <%p>: pager_port_unlock: <%p>[s:%d,r:%d,w:%d,l:%d]\n",
	&ds, ds, ds->seqno, ds->readers, ds->writers, ds->lock.held);
	pager_port_unlock(ds);

	for (amount_sent = 0;
	     amount_sent < data_cnt;
	     amount_sent += vm_page_size) {

	     if (!default_has_page(&ds->dpager, offset + amount_sent)) {
		if (default_write(&ds->dpager,
				  addr + amount_sent,
				  vm_page_size,
				  offset + amount_sent)
			 != PAGER_SUCCESS) {
		    dprintf("%s%s write error\n", my_name, here);
		    dstruct_lock(ds);
		    ds->errors++;
		    dstruct_unlock(ds);
		}
	     }
	}

	pager_port_finish_write(ds);
	if (vm_deallocate(default_pager_self, addr, data_cnt) != KERN_SUCCESS)
	    panic(here,my_name);

	return(KERN_SUCCESS);
}

/*
 * memory_object_data_write: split up the stuff coming in from
 * a memory_object_data_write call
 * into individual pages and pass them off to default_write.
 */
kern_return_t
seqnos_memory_object_data_write(pager, seqno, pager_request,
				offset, addr, data_cnt)
	memory_object_t	pager;
	mach_port_seqno_t seqno;
	mach_port_t	pager_request;
	register
	vm_offset_t	offset;
	register
	pointer_t	addr;
	vm_size_t	data_cnt;
{
	register
	vm_size_t	amount_sent;
	default_pager_t	ds;
	static char	here[] = "%sdata_write";
	int err;

#ifdef	lint
	pager_request++;
#endif	 /* lint */

ddprintf ("seqnos_memory_object_data_write <%p>: 1\n", &err);
	if ((data_cnt % vm_page_size) != 0)
	  {
	    ddprintf ("fail 1: %d %d\n", data_cnt, vm_page_size);
	    panic(here,my_name);
	  }


ddprintf ("seqnos_memory_object_data_write <%p>: 2\n", &err);
	ds = pager_port_lookup(pager);
ddprintf ("seqnos_memory_object_data_write <%p>: 3\n", &err);
	if (ds == DEFAULT_PAGER_NULL)
	  {
	    ddprintf ("fail 2: %d %d\n", pager, ds);
	    panic(here,my_name);
	  }

ddprintf ("seqnos_memory_object_data_write <%p>: 4\n", &err);
ddprintf ("seqnos_memory_object_data_write <%p>: pager_port_lock: <%p>[s:%d,r:%d,w:%d,l:%d], %d\n",
	&err, ds, ds->seqno, ds->readers, ds->writers, ds->lock.held, seqno);
	pager_port_lock(ds, seqno);
ddprintf ("seqnos_memory_object_data_write <%p>: 5\n", &err);
	pager_port_check_request(ds, pager_request);
ddprintf ("seqnos_memory_object_data_write <%p>: 6\n", &err);
	pager_port_start_write(ds);
ddprintf ("seqnos_memory_object_data_write <%p>: 7\n", &err);
ddprintf ("seqnos_memory_object_data_write <%p>: pager_port_unlock: <%p>[s:%d,r:%d,w:%d,l:%d]\n",
	&err, ds, ds->seqno, ds->readers, ds->writers, ds->lock.held);
	pager_port_unlock(ds);

ddprintf ("seqnos_memory_object_data_write <%p>: 8\n", &err);
	for (amount_sent = 0;
	     amount_sent < data_cnt;
	     amount_sent += vm_page_size) {

	    register int result;

ddprintf ("seqnos_memory_object_data_write <%p>: 9\n", &err);
	    result = default_write(&ds->dpager,
			      addr + amount_sent,
			      vm_page_size,
			      offset + amount_sent);
ddprintf ("seqnos_memory_object_data_write <%p>: 10\n", &err);
	    if (result != KERN_SUCCESS) {
ddprintf ("seqnos_memory_object_data_write <%p>: 11\n", &err);
#if debug
		dprintf("%s WRITE ERROR on default_pageout:", my_name);
		dprintf(" pager=%x, offset=0x%x, length=0x%x, result=%d\n",
			pager, offset+amount_sent, vm_page_size, result);
#endif
		dstruct_lock(ds);
		ds->errors++;
		dstruct_unlock(ds);
	    }
	    default_pager_pageout_count++;
	}

ddprintf ("seqnos_memory_object_data_write <%p>: 12\n", &err);
	pager_port_finish_write(ds);
ddprintf ("seqnos_memory_object_data_write <%p>: 13\n", &err);
	err = vm_deallocate(default_pager_self, addr, data_cnt);
ddprintf ("seqnos_memory_object_data_write <%p>: 14\n", &err);
	if (err != KERN_SUCCESS)
	  {
	    ddprintf ("fail 3: %s %s %s %s\n", default_pager_self, addr, data_cnt, &err);

	      panic(here,my_name);
	  }


ddprintf ("seqnos_memory_object_data_write <%p>: 15\n", &err);
	return(KERN_SUCCESS);
}

/*ARGSUSED*/
kern_return_t
seqnos_memory_object_copy(old_memory_object, seqno, old_memory_control,
			  offset, length, new_memory_object)
	memory_object_t	old_memory_object;
	mach_port_seqno_t seqno;
	memory_object_control_t
			old_memory_control;
	vm_offset_t	offset;
	vm_size_t	length;
	memory_object_t	new_memory_object;
{
	panic("%scopy", my_name);
	return KERN_FAILURE;
}

kern_return_t
seqnos_memory_object_lock_completed(pager, seqno, pager_request,
				    offset, length)
	memory_object_t	pager;
	mach_port_seqno_t seqno;
	mach_port_t	pager_request;
	vm_offset_t	offset;
	vm_size_t	length;
{
#ifdef	lint
	pager++; seqno++; pager_request++; offset++; length++;
#endif	 /* lint */

	panic("%slock_completed",my_name);
	return(KERN_FAILURE);
}

kern_return_t
seqnos_memory_object_data_unlock(pager, seqno, pager_request,
				 offset, addr, data_cnt)
	memory_object_t	pager;
	mach_port_seqno_t seqno;
	mach_port_t	pager_request;
	vm_offset_t	offset;
	pointer_t	addr;
	vm_size_t	data_cnt;
{
	panic("%sdata_unlock",my_name);
	return(KERN_FAILURE);
}

kern_return_t
seqnos_memory_object_supply_completed(pager, seqno, pager_request,
				      offset, length,
				      result, error_offset)
	memory_object_t	pager;
	mach_port_seqno_t seqno;
	mach_port_t	pager_request;
	vm_offset_t	offset;
	vm_size_t	length;
	kern_return_t	result;
	vm_offset_t	error_offset;
{
	panic("%ssupply_completed",my_name);
	return(KERN_FAILURE);
}

kern_return_t
seqnos_memory_object_data_return(pager, seqno, pager_request,
				 offset, addr, data_cnt,
				 dirty, kernel_copy)
	memory_object_t	pager;
	mach_port_seqno_t seqno;
	mach_port_t	pager_request;
	vm_offset_t	offset;
	pointer_t	addr;
	vm_size_t	data_cnt;
	boolean_t	dirty;
	boolean_t	kernel_copy;
{
	panic("%sdata_return",my_name);
	return(KERN_FAILURE);
}

kern_return_t
seqnos_memory_object_change_completed(pager, seqno, may_cache, copy_strategy)
	memory_object_t	pager;
	mach_port_seqno_t seqno;
	boolean_t	may_cache;
	memory_object_copy_strategy_t copy_strategy;
{
	panic("%schange_completed",my_name);
	return(KERN_FAILURE);
}


boolean_t default_pager_notify_server(in, out)
	mach_msg_header_t *in, *out;
{
	register mach_no_senders_notification_t *n =
			(mach_no_senders_notification_t *) in;

	/*
	 *	The only send-once rights we create are for
	 *	receiving no-more-senders notifications.
	 *	Hence, if we receive a message directed to
	 *	a send-once right, we can assume it is
	 *	a genuine no-senders notification from the kernel.
	 */

	if ((n->not_header.msgh_bits !=
			MACH_MSGH_BITS(0, MACH_MSG_TYPE_PORT_SEND_ONCE)) ||
	    (n->not_header.msgh_id != MACH_NOTIFY_NO_SENDERS))
		return FALSE;

	assert(n->not_header.msgh_size == sizeof *n);
	assert(n->not_header.msgh_remote_port == MACH_PORT_NULL);

	assert(n->not_type.msgt_name == MACH_MSG_TYPE_INTEGER_32);
	assert(n->not_type.msgt_size == 32);
	assert(n->not_type.msgt_number == 1);
	assert(n->not_type.msgt_inline);
	assert(! n->not_type.msgt_longform);

	default_pager_no_senders(n->not_header.msgh_local_port,
				 n->not_header.msgh_seqno, n->not_count);

	out->msgh_remote_port = MACH_PORT_NULL;
	return TRUE;
}

extern boolean_t seqnos_memory_object_server();
extern boolean_t seqnos_memory_object_default_server();
extern boolean_t default_pager_server();
extern boolean_t exc_server();
extern boolean_t bootstrap_server();
extern void bootstrap_compat();

mach_msg_size_t default_pager_msg_size_object = 128;

boolean_t
default_pager_demux_object(in, out)
	mach_msg_header_t	*in;
	mach_msg_header_t	*out;
{
	/*
	 *	We receive memory_object_data_initialize messages in
	 *	the memory_object_default interface.
	 */

int rval;
ddprintf ("DPAGER DEMUX OBJECT <%p>: %d\n", in, in->msgh_id);
rval =
 (seqnos_memory_object_server(in, out) ||
		seqnos_memory_object_default_server(in, out) ||
		default_pager_notify_server(in, out));
ddprintf ("DPAGER DEMUX OBJECT DONE <%p>: %d\n", in, in->msgh_id);
return rval;
}

mach_msg_size_t default_pager_msg_size_default = 8 * 1024;

boolean_t
default_pager_demux_default(in, out)
	mach_msg_header_t	*in;
	mach_msg_header_t	*out;
{
	if (in->msgh_local_port == default_pager_default_port) {
		/*
		 *	We receive memory_object_create messages in
		 *	the memory_object_default interface.
		 */

int rval;
ddprintf ("DPAGER DEMUX DEFAULT <%p>: %d\n", in, in->msgh_id);
rval =
		 (seqnos_memory_object_default_server(in, out) ||
			default_pager_server(in, out));
ddprintf ("DPAGER DEMUX DEFAULT DONE <%p>: %d\n", in, in->msgh_id);
return rval;
	} else if (in->msgh_local_port == default_pager_exception_port) {
		/*
		 *	We receive exception messages for
		 *	ourself and the startup task.
		 */

		return exc_server(in, out);
	} else if (in->msgh_local_port == default_pager_bootstrap_port) {
		/*
		 *	We receive bootstrap requests
		 *	from the startup task.
		 */

		if (in->msgh_id == 999999) {
			/* compatibility for old bootstrap interface */

			bootstrap_compat(in, out);
			return TRUE;
		}

		return bootstrap_server(in, out);
	} else {
		panic(my_name);
		return FALSE;
	}
}

/*
 *	We use multiple threads, for two reasons.
 *
 *	First, memory objects created by default_pager_object_create
 *	are "external", instead of "internal".  This means the kernel
 *	sends data (memory_object_data_write) to the object pageable.
 *	To prevent deadlocks, the external and internal objects must
 *	be managed by different threads.
 *
 *	Second, the default pager uses synchronous IO operations.
 *	Spreading requests across multiple threads should
 *	recover some of the performance loss from synchronous IO.
 *
 *	We have 3+ threads.
 *	One receives memory_object_create and
 *	default_pager_object_create requests.
 *	One or more manage internal objects.
 *	One or more manage external objects.
 */

void
default_pager_thread_privileges()
{
	/*
	 *	Set thread privileges.
	 */
	cthread_wire();		/* attach kernel thread to cthread */
	wire_thread();		/* grab a kernel stack and memory allocation
				   privileges */
}

any_t
default_pager_default_thread (arg)
     any_t arg;
{
  kern_return_t kr;
	default_pager_thread_privileges ();
   	for (;;) {
		kr = mach_msg_server(default_pager_demux_default,
				     default_pager_msg_size_default,
				     default_pager_default_set);
		panic(my_name, kr);
	}
}



any_t
default_pager_thread(arg)
	any_t	arg;
{
	default_pager_thread_t *dpt = (default_pager_thread_t *) arg;
	mach_port_t pset;
	kern_return_t kr;

	cthread_set_data(cthread_self(), (any_t) dpt);


	/*
	 *	Threads handling external objects cannot have
	 *	privileges.  Otherwise a burst of data-requests for an
	 *	external object could empty the free-page queue,
	 *	because the fault code only reserves real pages for
	 *	requests sent to internal objects.
	 */

	if (dpt->dpt_internal) {
		default_pager_thread_privileges();
		pset = default_pager_internal_set;
	} else {
		pset = default_pager_external_set;
	}

	for (;;) {
		kr = mach_msg_server(default_pager_demux_object,
				     default_pager_msg_size_object,
				     pset);
		panic(my_name, kr);
	}
}

void
start_default_pager_thread(internal)
	boolean_t internal;
{
	default_pager_thread_t *dpt;
	kern_return_t kr;

	dpt = (default_pager_thread_t *) kalloc(sizeof *dpt);
	if (dpt == 0)
		panic(my_name);

	dpt->dpt_internal = internal;

	kr = vm_allocate(default_pager_self, &dpt->dpt_buffer,
			 vm_page_size, TRUE);
	if (kr != KERN_SUCCESS)
		panic(my_name);
	wire_memory(dpt->dpt_buffer, vm_page_size,
		    VM_PROT_READ|VM_PROT_WRITE);

	dpt->dpt_thread = cthread_fork(default_pager_thread, (any_t) dpt);
}

void
default_pager_initialize(host_port)
	mach_port_t	host_port;
{
	memory_object_t		DMM;
	kern_return_t		kr;

	/*
	 *	This task will become the default pager.
	 */
	default_pager_self = mach_task_self();

	/*
	 *	Initialize the "default pager" port.
	 */
	kr = mach_port_allocate(default_pager_self, MACH_PORT_RIGHT_RECEIVE,
				&default_pager_default_port);
	if (kr != KERN_SUCCESS)
		panic(my_name);

	DMM = default_pager_default_port;
	kr = vm_set_default_memory_manager(host_port, &DMM);
	if ((kr != KERN_SUCCESS) || MACH_PORT_VALID(DMM))
		panic(my_name);

	/*
	 *	Initialize the exception port.
	 */
	kr = mach_port_allocate(default_pager_self, MACH_PORT_RIGHT_RECEIVE,
				&default_pager_exception_port);
	if (kr != KERN_SUCCESS)
		panic(my_name);

	/*
	 *	Initialize the bootstrap port.
	 */
	kr = mach_port_allocate(default_pager_self, MACH_PORT_RIGHT_RECEIVE,
				&default_pager_bootstrap_port);
	if (kr != KERN_SUCCESS)
		panic(my_name);

	/*
	 * Arrange for wiring privileges.
	 */
	wire_setup(host_port);

	/*
	 * Find out how many CPUs we have, to determine the number
	 * of threads to create.
	 */
	if (default_pager_internal_count == 0) {
		host_basic_info_data_t h_info;
		natural_t h_info_count;

		h_info_count = HOST_BASIC_INFO_COUNT;
		(void) host_info(host_port, HOST_BASIC_INFO,
				 (host_info_t)&h_info, &h_info_count);

		/*
		 *	Random computation to get more parallelism on
		 *	multiprocessors.
		 */
		default_pager_internal_count =
		    (h_info.avail_cpus > 32 ? 32 : h_info.avail_cpus) / 4 + 3;
	}
}

/*
 * Initialize and Run the default pager
 */
void
default_pager()
{
	kern_return_t kr;
	int i;

	default_pager_thread_privileges();

	/*
	 * Wire down code, data, stack
	 */
	wire_all_memory();


	/*
	 *	Initialize the list of all pagers.
	 */
	pager_port_list_init();

	kr = mach_port_allocate(default_pager_self, MACH_PORT_RIGHT_PORT_SET,
				&default_pager_internal_set);
	if (kr != KERN_SUCCESS)
		panic(my_name);

	kr = mach_port_allocate(default_pager_self, MACH_PORT_RIGHT_PORT_SET,
				&default_pager_external_set);
	if (kr != KERN_SUCCESS)
		panic(my_name);

	kr = mach_port_allocate(default_pager_self, MACH_PORT_RIGHT_PORT_SET,
				&default_pager_default_set);
	if (kr != KERN_SUCCESS)
		panic(my_name);

	kr = mach_port_move_member(default_pager_self,
				   default_pager_default_port,
				   default_pager_default_set);
	if (kr != KERN_SUCCESS)
		panic(my_name);

	kr = mach_port_move_member(default_pager_self,
				   default_pager_exception_port,
				   default_pager_default_set);
	if (kr != KERN_SUCCESS)
		panic(my_name);

	kr = mach_port_move_member(default_pager_self,
				   default_pager_bootstrap_port,
				   default_pager_default_set);
	if (kr != KERN_SUCCESS)
		panic(my_name);

	/*
	 *	Now we create the threads that will actually
	 *	manage objects.
	 */

	for (i = 0; i < default_pager_internal_count; i++)
		start_default_pager_thread(TRUE);

	for (i = 0; i < default_pager_external_count; i++)
		start_default_pager_thread(FALSE);

	default_pager_default_thread(0); /* Become the default_pager server */
#if 0
	cthread_fork (default_pager_default_thread, 0);
	/*	cthread_exit (cthread_self ()); */
	thread_suspend (mach_thread_self ());
#endif
}

/*
 * Create an external object.
 */
kern_return_t default_pager_object_create(pager, mem_obj, size)
	mach_port_t pager;
	mach_port_t *mem_obj;
	vm_size_t size;
{
	default_pager_t ds;
	mach_port_t port;
	kern_return_t result;

	if (pager != default_pager_default_port)
		return KERN_INVALID_ARGUMENT;

	ds = pager_port_alloc(size);
rename_it:
	port = (mach_port_t) pnameof(ds);
	result = mach_port_allocate_name(default_pager_self,
				    MACH_PORT_RIGHT_RECEIVE, port);
	if (result != KERN_SUCCESS) {
		default_pager_t	ds1;

		if (result != KERN_NAME_EXISTS) return (result);

		ds1 = (default_pager_t) kalloc(sizeof *ds1);
		*ds1 = *ds;
		mutex_lock(&all_pagers.lock);
		queue_enter(&all_pagers.leak_queue, ds, default_pager_t, links);
		mutex_unlock(&all_pagers.lock);
		ds = ds1;
		goto rename_it;
	}

	/*
	 *	Set up associations between these ports
	 *	and this default_pager structure
	 */

	ds->pager = port;
	pager_port_list_insert(port, ds);
	default_pager_add(ds, FALSE);

	*mem_obj = port;
	return (KERN_SUCCESS);
}

kern_return_t default_pager_info(pager, infop)
	mach_port_t pager;
	default_pager_info_t *infop;
{
	vm_size_t		total, free;

	if (pager != default_pager_default_port)
		return KERN_INVALID_ARGUMENT;

	mutex_lock(&all_partitions.lock);
	paging_space_info(&total, &free);
	mutex_unlock(&all_partitions.lock);

	infop->dpi_total_space = ptoa(total);
	infop->dpi_free_space = ptoa(free);
	infop->dpi_page_size = vm_page_size;
	return KERN_SUCCESS;
}

kern_return_t default_pager_objects(pager, objectsp, ocountp, portsp, pcountp)
	mach_port_t			pager;
	default_pager_object_array_t	*objectsp;
	natural_t			*ocountp;
	mach_port_array_t		*portsp;
	natural_t			*pcountp;
{
	vm_offset_t			oaddr;	/* memory for objects */
	vm_size_t			osize;	/* current size */
	default_pager_object_t		*objects;
	natural_t			opotential;

	vm_offset_t			paddr;	/* memory for ports */
	vm_size_t			psize;	/* current size */
	mach_port_t			*ports;
	natural_t			ppotential;

	unsigned int			actual;
	unsigned int			num_pagers;
	kern_return_t			kr;
	default_pager_t			entry;

	if (pager != default_pager_default_port)
		return KERN_INVALID_ARGUMENT;

	/* start with the inline memory */

	num_pagers = 0;

	objects = *objectsp;
	opotential = *ocountp;

	ports = *portsp;
	ppotential = *pcountp;

	mutex_lock(&all_pagers.lock);
	/*
	 * We will send no more than this many
	 */
	actual = all_pagers.count;
	mutex_unlock(&all_pagers.lock);

	if (opotential < actual) {
		vm_offset_t	newaddr;
		vm_size_t	newsize;

		newsize = 2 * round_page(actual * sizeof *objects);

		kr = vm_allocate(default_pager_self, &newaddr, newsize, TRUE);
		if (kr != KERN_SUCCESS)
			goto nomemory;

		oaddr = newaddr;
		osize = newsize;
		opotential = osize/sizeof *objects;
		objects = (default_pager_object_t *) oaddr;
	}

	if (ppotential < actual) {
		vm_offset_t	newaddr;
		vm_size_t	newsize;

		newsize = 2 * round_page(actual * sizeof *ports);

		kr = vm_allocate(default_pager_self, &newaddr, newsize, TRUE);
		if (kr != KERN_SUCCESS)
			goto nomemory;

		paddr = newaddr;
		psize = newsize;
		ppotential = psize/sizeof *ports;
		ports = (mach_port_t *) paddr;
	}

	/*
	 * Now scan the list.
	 */

	mutex_lock(&all_pagers.lock);

	num_pagers = 0;
	queue_iterate(&all_pagers.queue, entry, default_pager_t, links) {

		mach_port_t		port;
		vm_size_t		size;

		if ((num_pagers >= opotential) ||
		    (num_pagers >= ppotential)) {
			/*
			 *	This should be rare.  In any case,
			 *	we will only miss recent objects,
			 *	because they are added at the end.
			 */
			break;
		}

		/*
		 * Avoid interfering with normal operations
		 */
		if (!mutex_try_lock(&entry->dpager.lock))
			goto not_this_one;
		size = pager_allocated(&entry->dpager);
		mutex_unlock(&entry->dpager.lock);

		dstruct_lock(entry);

		port = entry->pager_name;
		if (port == MACH_PORT_NULL) {
			/*
			 *	The object is waiting for no-senders
			 *	or memory_object_init.
			 */
			dstruct_unlock(entry);
			goto not_this_one;
		}

		/*
		 *	We need a reference for the reply message.
		 *	While we are unlocked, the bucket queue
		 *	can change and the object might be terminated.
		 *	memory_object_terminate will wait for us,
		 *	preventing deallocation of the entry.
		 */

		if (--entry->name_refs == 0) {
			dstruct_unlock(entry);

			/* keep the list locked, wont take long */

			kr = mach_port_mod_refs(default_pager_self,
					port, MACH_PORT_RIGHT_SEND,
					default_pager_max_urefs);
			if (kr != KERN_SUCCESS)
				panic("%sdefault_pager_objects",my_name);

			dstruct_lock(entry);

			entry->name_refs += default_pager_max_urefs;
			pager_port_finish_refs(entry);
		}
		dstruct_unlock(entry);

		/* the arrays are wired, so no deadlock worries */

		objects[num_pagers].dpo_object = (vm_offset_t) entry;
		objects[num_pagers].dpo_size = size;
		ports  [num_pagers++] = port;
		continue;
not_this_one:
		/*
		 * Do not return garbage
		 */
		objects[num_pagers].dpo_object = (vm_offset_t) 0;
		objects[num_pagers].dpo_size = 0;
		ports  [num_pagers++] = MACH_PORT_NULL;

	}

	mutex_unlock(&all_pagers.lock);

	/*
	 *	Deallocate and clear unused memory.
	 *	(Returned memory will automagically become pageable.)
	 */

	if (objects == *objectsp) {
		/*
		 *	Our returned information fit inline.
		 *	Nothing to deallocate.
		 */

		*ocountp = num_pagers;
	} else if (actual == 0) {
		(void) vm_deallocate(default_pager_self, oaddr, osize);

		/* return zero items inline */
		*ocountp = 0;
	} else {
		vm_offset_t used;

		used = round_page(actual * sizeof *objects);

		if (used != osize)
			(void) vm_deallocate(default_pager_self,
					     oaddr + used, osize - used);

		*objectsp = objects;
		*ocountp = num_pagers;
	}

	if (ports == *portsp) {
		/*
		 *	Our returned information fit inline.
		 *	Nothing to deallocate.
		 */

		*pcountp = num_pagers;
	} else if (actual == 0) {
		(void) vm_deallocate(default_pager_self, paddr, psize);

		/* return zero items inline */
		*pcountp = 0;
	} else {
		vm_offset_t used;

		used = round_page(actual * sizeof *ports);

		if (used != psize)
			(void) vm_deallocate(default_pager_self,
					     paddr + used, psize - used);

		*portsp = ports;
		*pcountp = num_pagers;
	}

	return KERN_SUCCESS;

    nomemory:

	{
		register int	i;
		for (i = 0; i < num_pagers; i++)
		    (void) mach_port_deallocate(default_pager_self, ports[i]);
	}

	if (objects != *objectsp)
		(void) vm_deallocate(default_pager_self, oaddr, osize);

	if (ports != *portsp)
		(void) vm_deallocate(default_pager_self, paddr, psize);

	return KERN_RESOURCE_SHORTAGE;
}


kern_return_t
default_pager_object_pages(pager, object, pagesp, countp)
	mach_port_t			pager;
	mach_port_t			object;
	default_pager_page_array_t	*pagesp;
	natural_t			*countp;
{
	vm_offset_t			addr;	/* memory for page offsets */
	vm_size_t			size;	/* current memory size */
	default_pager_page_t		*pages;
	natural_t 			potential, actual;
	kern_return_t			kr;

	if (pager != default_pager_default_port)
		return KERN_INVALID_ARGUMENT;

	/* we start with the inline space */

	pages = *pagesp;
	potential = *countp;

	for (;;) {
		default_pager_t		entry;

		mutex_lock(&all_pagers.lock);
		queue_iterate(&all_pagers.queue, entry, default_pager_t, links) {
			dstruct_lock(entry);
			if (entry->pager_name == object) {
				mutex_unlock(&all_pagers.lock);
				goto found_object;
			}
			dstruct_unlock(entry);
		}
		mutex_unlock(&all_pagers.lock);

		/* did not find the object */

		if (pages != *pagesp)
			(void) vm_deallocate(default_pager_self, addr, size);
		return KERN_INVALID_ARGUMENT;

	    found_object:

		if (!mutex_try_lock(&entry->dpager.lock)) {
			/* oh well bad luck */

			dstruct_unlock(entry);

			/* yield the processor */
			(void) thread_switch(MACH_PORT_NULL,
					     SWITCH_OPTION_NONE, 0);
			continue;
		}

		actual = pager_pages(&entry->dpager, pages, potential);
		mutex_unlock(&entry->dpager.lock);
		dstruct_unlock(entry);

		if (actual <= potential)
			break;

		/* allocate more memory */

		if (pages != *pagesp)
			(void) vm_deallocate(default_pager_self, addr, size);
		size = round_page(actual * sizeof *pages);
		kr = vm_allocate(default_pager_self, &addr, size, TRUE);
		if (kr != KERN_SUCCESS)
			return kr;
		pages = (default_pager_page_t *) addr;
		potential = size/sizeof *pages;
	}

	/*
	 *	Deallocate and clear unused memory.
	 *	(Returned memory will automagically become pageable.)
	 */

	if (pages == *pagesp) {
		/*
		 *	Our returned information fit inline.
		 *	Nothing to deallocate.
		 */

		*countp = actual;
	} else if (actual == 0) {
		(void) vm_deallocate(default_pager_self, addr, size);

		/* return zero items inline */
		*countp = 0;
	} else {
		vm_offset_t used;

		used = round_page(actual * sizeof *pages);

		if (used != size)
			(void) vm_deallocate(default_pager_self,
					     addr + used, size - used);

		*pagesp = pages;
		*countp = actual;
	}
	return KERN_SUCCESS;
}

/*
 * Add/remove extra paging space
 */

extern mach_port_t bootstrap_master_device_port;
extern mach_port_t bootstrap_master_host_port;

kern_return_t
default_pager_paging_file(pager, mdport, file_name, add)
	mach_port_t			pager;
	mach_port_t			mdport;
	default_pager_filename_t	file_name;
	boolean_t			add;
{
	kern_return_t   kr;

	if (pager != default_pager_default_port)
		return KERN_INVALID_ARGUMENT;

#if 0
dprintf("bmd %x md %x\n", bootstrap_master_device_port, mdport);
#endif
	if (add) {
		kr = add_paging_file(bootstrap_master_device_port,
				     file_name, 0);
	} else {
		kr = remove_paging_file(file_name);
	}

	/* XXXX more code needed */
	if (mdport != bootstrap_master_device_port)
		mach_port_deallocate( mach_task_self(), mdport);

	return kr;
}

default_pager_register_fileserver(pager, fileserver)
	mach_port_t			pager;
	mach_port_t			fileserver;
{
	if (pager != default_pager_default_port)
		return KERN_INVALID_ARGUMENT;
#if	notyet
	mach_port_deallocate(mach_task_self(), fileserver);
	if (0) dp_helper_paging_space(0,0,0);/*just linkit*/
#endif
	return KERN_SUCCESS;
}

/*
 * When things do not quite workout...
 */
no_paging_space(out_of_memory)
	boolean_t		out_of_memory;
{
	static char		here[] = "%s *** NOT ENOUGH PAGING SPACE ***";

	if (out_of_memory)
		dprintf("*** OUT OF MEMORY *** ");
	panic(here, my_name);
}

overcommitted(got_more_space, space)
	boolean_t	got_more_space;
	vm_size_t	space;		/* in pages */
{
	vm_size_t	pages_free, pages_total;

	static boolean_t user_warned = FALSE;
	static vm_size_t pages_shortage = 0;

	paging_space_info(&pages_total, &pages_free);

	/*
	 * If user added more space, see if it is enough
	 */
	if (got_more_space) {
		pages_free -= pages_shortage;
		if (pages_free > 0) {
			pages_shortage = 0;
			if (user_warned)
				dprintf("%s paging space ok now.\n", my_name);
		} else
			pages_shortage = pages_free;
		user_warned = FALSE;
		return;
	}
	/*
	 * We ran out of gas, let user know.
	 */
	pages_free -= space;
	pages_shortage = (pages_free > 0) ? 0 : -pages_free;
	if (!user_warned && pages_shortage) {
		user_warned = TRUE;
		dprintf("%s paging space over-committed.\n", my_name);
	}
#if debug
	user_warned = FALSE;
	dprintf("%s paging space over-committed [+%d (%d) pages].\n",
			my_name, space, pages_shortage);
#endif
}

paging_space_info(totp, freep)
	vm_size_t	*totp, *freep;
{
	register vm_size_t	total, free;
	register partition_t	part;
	register int		i;

	total = free = 0;
	for (i = 0; i < all_partitions.n_partitions; i++) {

		if ((part = partition_of(i)) == 0) continue;

		/* no need to lock: by the time this data
		   gets back to any remote requestor it
		   will be obsolete anyways */
		total += part->total_size;
		free += part->free;
#if debug
		dprintf("Partition %d: x%x total, x%x free\n",
		       i, part->total_size, part->free);
#endif
	}
	*totp = total;
	*freep = free;
}

/*
 *	Catch exceptions.
 */

kern_return_t
catch_exception_raise(exception_port, thread, task, exception, code, subcode)
	mach_port_t exception_port;
	mach_port_t thread, task;
	int exception, code, subcode;
{
	ddprintf ("(default_pager)catch_exception_raise(%d,%d,%d)\n",
	       exception, code, subcode);
	panic(my_name);

	/* mach_msg_server will deallocate thread/task for us */

	return KERN_FAILURE;
}

/*
 *	Handle bootstrap requests.
 */

kern_return_t
do_bootstrap_privileged_ports(bootstrap, hostp, devicep)
	mach_port_t bootstrap;
	mach_port_t *hostp, *devicep;
{
	*hostp = bootstrap_master_host_port;
	*devicep = bootstrap_master_device_port;
	return KERN_SUCCESS;
}

void
bootstrap_compat(in, out)
	mach_msg_header_t *in, *out;
{
	mig_reply_header_t *reply = (mig_reply_header_t *) out;
	mach_msg_return_t mr;

	struct imsg {
		mach_msg_header_t	hdr;
		mach_msg_type_t		port_desc_1;
		mach_port_t		port_1;
		mach_msg_type_t		port_desc_2;
		mach_port_t		port_2;
	} imsg;

	/*
	 * Send back the host and device ports.
	 */

	imsg.hdr.msgh_bits = MACH_MSGH_BITS_COMPLEX |
		MACH_MSGH_BITS(MACH_MSGH_BITS_REMOTE(in->msgh_bits), 0);
	/* msgh_size doesn't need to be initialized */
	imsg.hdr.msgh_remote_port = in->msgh_remote_port;
	imsg.hdr.msgh_local_port = MACH_PORT_NULL;
	/* msgh_seqno doesn't need to be initialized */
	imsg.hdr.msgh_id = in->msgh_id + 100;	/* this is a reply msg */

	imsg.port_desc_1.msgt_name = MACH_MSG_TYPE_COPY_SEND;
	imsg.port_desc_1.msgt_size = (sizeof(mach_port_t) * 8);
	imsg.port_desc_1.msgt_number = 1;
	imsg.port_desc_1.msgt_inline = TRUE;
	imsg.port_desc_1.msgt_longform = FALSE;
	imsg.port_desc_1.msgt_deallocate = FALSE;
	imsg.port_desc_1.msgt_unused = 0;

	imsg.port_1 = bootstrap_master_host_port;

	imsg.port_desc_2 = imsg.port_desc_1;

	imsg.port_2 = bootstrap_master_device_port;

	/*
	 * Send the reply message.
	 * (mach_msg_server can not do this, because the reply
	 * is not in standard format.)
	 */

	mr = mach_msg(&imsg.hdr, MACH_SEND_MSG,
		      sizeof imsg, 0, MACH_PORT_NULL,
		      MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL);
	if (mr != MACH_MSG_SUCCESS)
		(void) mach_port_deallocate(default_pager_self,
					    imsg.hdr.msgh_remote_port);

	/*
	 * Tell mach_msg_server to do nothing.
	 */

	reply->RetCode = MIG_NO_REPLY;
}

#ifdef	mips
/*
 * set_ras_address for default pager
 * Default pager does not have emulator support
 * so it needs a local version of set_ras_address.
 */
int
set_ras_address(basepc, boundspc)
	vm_offset_t basepc;
	vm_offset_t boundspc;
{
	kern_return_t status;

	status = task_ras_control(mach_task_self(), basepc, boundspc,
				  TASK_RAS_CONTROL_INSTALL_ONE);
	if (status != KERN_SUCCESS)
	  return -1;
	return 0;
}
#endif