summaryrefslogtreecommitdiff
path: root/ext2fs/pager.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext2fs/pager.c')
-rw-r--r--ext2fs/pager.c1044
1 files changed, 1044 insertions, 0 deletions
diff --git a/ext2fs/pager.c b/ext2fs/pager.c
new file mode 100644
index 00000000..bf57d9ed
--- /dev/null
+++ b/ext2fs/pager.c
@@ -0,0 +1,1044 @@
+/* Pager for ext2fs
+
+ Copyright (C) 1994, 95, 96, 97, 98, 99 Free Software Foundation, Inc.
+
+ Converted for ext2fs by Miles Bader <miles@gnu.ai.mit.edu>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2, or (at
+ your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#include <string.h>
+#include <hurd/store.h>
+#include "ext2fs.h"
+
+/* A ports bucket to hold pager ports. */
+struct port_bucket *pager_bucket;
+
+/* Mapped image of the disk. */
+void *disk_image;
+
+spin_lock_t node_to_page_lock = SPIN_LOCK_INITIALIZER;
+
+#ifdef DONT_CACHE_MEMORY_OBJECTS
+#define MAY_CACHE 0
+#else
+#define MAY_CACHE 1
+#endif
+
+#define STATS
+
+#ifdef STATS
+struct ext2fs_pager_stats
+{
+ spin_lock_t lock;
+
+ unsigned long disk_pageins;
+ unsigned long disk_pageouts;
+
+ unsigned long file_pageins;
+ unsigned long file_pagein_reads; /* Device reads done by file pagein */
+ unsigned long file_pagein_freed_bufs; /* Discarded pages */
+ unsigned long file_pagein_alloced_bufs; /* Allocated pages */
+
+ unsigned long file_pageouts;
+
+ unsigned long file_page_unlocks;
+ unsigned long file_grows;
+};
+
+static struct ext2fs_pager_stats ext2s_pager_stats;
+
+#define STAT_INC(field) \
+do { spin_lock (&ext2s_pager_stats.lock); \
+ ext2s_pager_stats.field++; \
+ spin_unlock (&ext2s_pager_stats.lock); } while (0)
+
+#else /* !STATS */
+#define STAT_INC(field) /* nop */0
+#endif /* STATS */
+
+#define MAX_FREE_PAGE_BUFS 32
+
+static spin_lock_t free_page_bufs_lock = SPIN_LOCK_INITIALIZER;
+static void *free_page_bufs = 0;
+static int num_free_page_bufs = 0;
+
+/* Returns a single page page-aligned buffer. */
+static void *
+get_page_buf ()
+{
+ void *buf;
+
+ spin_lock (&free_page_bufs_lock);
+
+ buf = free_page_bufs;
+ if (buf == 0)
+ {
+ spin_unlock (&free_page_bufs_lock);
+ buf = mmap (0, vm_page_size, PROT_READ|PROT_WRITE, MAP_ANON, 0, 0);
+ if (buf == (void *) -1)
+ buf = 0;
+ }
+ else
+ {
+ free_page_bufs = *(void **)buf;
+ num_free_page_bufs--;
+ spin_unlock (&free_page_bufs_lock);
+ }
+
+ return buf;
+}
+
+/* Frees a block returned by get_page_buf. */
+static void
+free_page_buf (void *buf)
+{
+ spin_lock (&free_page_bufs_lock);
+ if (num_free_page_bufs < MAX_FREE_PAGE_BUFS)
+ {
+ *(void **)buf = free_page_bufs;
+ free_page_bufs = buf;
+ num_free_page_bufs++;
+ spin_unlock (&free_page_bufs_lock);
+ }
+ else
+ {
+ spin_unlock (&free_page_bufs_lock);
+ munmap (buf, vm_page_size);
+ }
+}
+
+/* Find the location on disk of page OFFSET in NODE. Return the disk block
+ in BLOCK (if unallocated, then return 0). If *LOCK is 0, then it a reader
+ lock is aquired on NODE's ALLOC_LOCK before doing anything, and left
+ locked after return -- even if an error is returned. 0 on success or an
+ error code otherwise is returned. */
+static error_t
+find_block (struct node *node, vm_offset_t offset,
+ block_t *block, struct rwlock **lock)
+{
+ error_t err;
+
+ if (!*lock)
+ {
+ *lock = &node->dn->alloc_lock;
+ rwlock_reader_lock (*lock);
+ }
+
+ if (offset + block_size > node->allocsize)
+ return EIO;
+
+ err = ext2_getblk (node, offset >> log2_block_size, 0, block);
+ if (err == EINVAL)
+ /* Don't barf yet if the node is unallocated. */
+ {
+ *block = 0;
+ err = 0;
+ }
+
+ return err;
+}
+
+/* Read one page for the pager backing NODE at offset PAGE, into BUF. This
+ may need to read several filesystem blocks to satisfy one page, and tries
+ to consolidate the i/o if possible. */
+static error_t
+file_pager_read_page (struct node *node, vm_offset_t page,
+ void **buf, int *writelock)
+{
+ error_t err;
+ int offs = 0;
+ int partial = 0; /* A page truncated by the EOF. */
+ struct rwlock *lock = NULL;
+ int left = vm_page_size;
+ block_t pending_blocks = 0;
+ int num_pending_blocks = 0;
+
+ /* Read the NUM_PENDING_BLOCKS blocks in PENDING_BLOCKS, into the buffer
+ pointed to by BUF (allocating it if necessary) at offset OFFS. OFFS in
+ adjusted by the amount read, and NUM_PENDING_BLOCKS is zeroed. Any read
+ error is returned. */
+ error_t do_pending_reads ()
+ {
+ if (num_pending_blocks > 0)
+ {
+ block_t dev_block = pending_blocks << log2_dev_blocks_per_fs_block;
+ size_t amount = num_pending_blocks << log2_block_size;
+ /* The buffer we try to read into; on the first read, we pass in a
+ size of zero, so that the read is guaranteed to allocate a new
+ buffer, otherwise, we try to read directly into the tail of the
+ buffer we've already got. */
+ void *new_buf = *buf + offs;
+ size_t new_len = offs == 0 ? 0 : vm_page_size - offs;
+
+ STAT_INC (file_pagein_reads);
+
+ err = store_read (store, dev_block, amount, &new_buf, &new_len);
+ if (err)
+ return err;
+ else if (amount != new_len)
+ return EIO;
+
+ if (new_buf != *buf + offs)
+ {
+ /* The read went into a different buffer than the one we
+ passed. */
+ if (offs == 0)
+ /* First read, make the returned page be our buffer. */
+ *buf = new_buf;
+ else
+ /* We've already got some buffer, so copy into it. */
+ {
+ bcopy (new_buf, *buf + offs, new_len);
+ free_page_buf (new_buf); /* Return NEW_BUF to our pool. */
+ STAT_INC (file_pagein_freed_bufs);
+ }
+ }
+
+ offs += new_len;
+ num_pending_blocks = 0;
+ }
+
+ return 0;
+ }
+
+ STAT_INC (file_pageins);
+
+ *writelock = 0;
+
+ if (page >= node->allocsize)
+ {
+ err = EIO;
+ left = 0;
+ }
+ else if (page + left > node->allocsize)
+ {
+ left = node->allocsize - page;
+ partial = 1;
+ }
+
+ while (left > 0)
+ {
+ block_t block;
+
+ err = find_block (node, page, &block, &lock);
+ if (err)
+ break;
+
+ if (block != pending_blocks + num_pending_blocks)
+ {
+ err = do_pending_reads ();
+ if (err)
+ break;
+ pending_blocks = block;
+ }
+
+ if (block == 0)
+ /* Reading unallocated block, just make a zero-filled one. */
+ {
+ *writelock = 1;
+ if (offs == 0)
+ /* No page allocated to read into yet. */
+ {
+ *buf = get_page_buf ();
+ if (! *buf)
+ break;
+ STAT_INC (file_pagein_alloced_bufs);
+ }
+ bzero (*buf + offs, block_size);
+ offs += block_size;
+ }
+ else
+ num_pending_blocks++;
+
+ page += block_size;
+ left -= block_size;
+ }
+
+ if (!err && num_pending_blocks > 0)
+ err = do_pending_reads();
+
+ if (!err && partial && !*writelock)
+ node->dn->last_page_partially_writable = 1;
+
+ if (lock)
+ rwlock_reader_unlock (lock);
+
+ return err;
+}
+
+struct pending_blocks
+{
+ /* The block number of the first of the blocks. */
+ block_t block;
+ /* How many blocks we have. */
+ off_t num;
+ /* A (page-aligned) buffer pointing to the data we're dealing with. */
+ void *buf;
+ /* And an offset into BUF. */
+ int offs;
+};
+
+/* Write the any pending blocks in PB. */
+static error_t
+pending_blocks_write (struct pending_blocks *pb)
+{
+ if (pb->num > 0)
+ {
+ error_t err;
+ block_t dev_block = pb->block << log2_dev_blocks_per_fs_block;
+ size_t length = pb->num << log2_block_size, amount;
+
+ ext2_debug ("writing block %lu[%d]", pb->block, pb->num);
+
+ if (pb->offs > 0)
+ /* Put what we're going to write into a page-aligned buffer. */
+ {
+ void *page_buf = get_page_buf ();
+ bcopy (pb->buf + pb->offs, (void *)page_buf, length);
+ err = store_write (store, dev_block, page_buf, length, &amount);
+ free_page_buf (page_buf);
+ }
+ else
+ err = store_write (store, dev_block, pb->buf, length, &amount);
+ if (err)
+ return err;
+ else if (amount != length)
+ return EIO;
+
+ pb->offs += length;
+ pb->num = 0;
+ }
+
+ return 0;
+}
+
+static void
+pending_blocks_init (struct pending_blocks *pb, void *buf)
+{
+ pb->buf = buf;
+ pb->block = 0;
+ pb->num = 0;
+ pb->offs = 0;
+}
+
+/* Skip writing the next block in PB's buffer (writing out any previous
+ blocks if necessary). */
+static error_t
+pending_blocks_skip (struct pending_blocks *pb)
+{
+ error_t err = pending_blocks_write (pb);
+ pb->offs += block_size;
+ return err;
+}
+
+/* Add the disk block BLOCK to the list of destination disk blocks pending in
+ PB. */
+static error_t
+pending_blocks_add (struct pending_blocks *pb, block_t block)
+{
+ if (block != pb->block + pb->num)
+ {
+ error_t err = pending_blocks_write (pb);
+ if (err)
+ return err;
+ pb->block = block;
+ }
+ pb->num++;
+ return 0;
+}
+
+/* Write one page for the pager backing NODE, at offset PAGE, into BUF. This
+ may need to write several filesystem blocks to satisfy one page, and tries
+ to consolidate the i/o if possible. */
+static error_t
+file_pager_write_page (struct node *node, vm_offset_t offset, void *buf)
+{
+ error_t err = 0;
+ struct pending_blocks pb;
+ struct rwlock *lock = &node->dn->alloc_lock;
+ block_t block;
+ int left = vm_page_size;
+
+ pending_blocks_init (&pb, buf);
+
+ /* Holding NODE->dn->alloc_lock effectively locks NODE->allocsize,
+ at least for the cases we care about: pager_unlock_page,
+ diskfs_grow and diskfs_truncate. */
+ rwlock_reader_lock (&node->dn->alloc_lock);
+
+ if (offset >= node->allocsize)
+ left = 0;
+ else if (offset + left > node->allocsize)
+ left = node->allocsize - offset;
+
+ ext2_debug ("writing inode %d page %d[%d]", node->cache_id, offset, left);
+
+ STAT_INC (file_pageouts);
+
+ while (left > 0)
+ {
+ err = find_block (node, offset, &block, &lock);
+ if (err)
+ break;
+ assert (block);
+ pending_blocks_add (&pb, block);
+ offset += block_size;
+ left -= block_size;
+ }
+
+ if (!err)
+ pending_blocks_write (&pb);
+
+ rwlock_reader_unlock (&node->dn->alloc_lock);
+
+ return err;
+}
+
+static error_t
+disk_pager_read_page (vm_offset_t page, void **buf, int *writelock)
+{
+ error_t err;
+ size_t length = vm_page_size, read = 0;
+ vm_size_t dev_end = store->size;
+
+ if (page + vm_page_size > dev_end)
+ length = dev_end - page;
+
+ err = store_read (store, page >> store->log2_block_size, length, buf, &read);
+ if (read != length)
+ return EIO;
+ if (!err && length != vm_page_size)
+ bzero ((void *)(*buf + length), vm_page_size - length);
+
+ *writelock = 0;
+
+ return err;
+}
+
+static error_t
+disk_pager_write_page (vm_offset_t page, void *buf)
+{
+ error_t err = 0;
+ size_t length = vm_page_size, amount;
+ vm_size_t dev_end = store->size;
+
+ if (page + vm_page_size > dev_end)
+ length = dev_end - page;
+
+ ext2_debug ("writing disk page %d[%d]", page, length);
+
+ STAT_INC (disk_pageouts);
+
+ if (modified_global_blocks)
+ /* Be picky about which blocks in a page that we write. */
+ {
+ vm_offset_t offs = page;
+ struct pending_blocks pb;
+
+ pending_blocks_init (&pb, buf);
+
+ while (length > 0 && !err)
+ {
+ block_t block = boffs_block (offs);
+
+ /* We don't clear the block modified bit here because this paging
+ write request may not be the same one that actually set the bit,
+ and our copy of the page may be out of date; we have to leave
+ the bit on in case a paging write request corresponding to the
+ modification comes along later. The bit is only actually ever
+ cleared if the block is allocated to a file, so this results in
+ excess writes of blocks from modified pages. Unfortunately I
+ know of no way to get arount this given the current external
+ paging interface. XXXX */
+ if (test_bit (block, modified_global_blocks))
+ /* This block may have been modified, so write it out. */
+ err = pending_blocks_add (&pb, block);
+ else
+ /* Otherwise just skip it. */
+ err = pending_blocks_skip (&pb);
+
+ offs += block_size;
+ length -= block_size;
+ }
+
+ if (!err)
+ err = pending_blocks_write (&pb);
+ }
+ else
+ {
+ err = store_write (store, page >> store->log2_block_size,
+ buf, length, &amount);
+ if (!err && length != amount)
+ err = EIO;
+ }
+
+ return err;
+}
+
+/* Satisfy a pager read request for either the disk pager or file pager
+ PAGER, to the page at offset PAGE into BUF. WRITELOCK should be set if
+ the pager should make the page writeable. */
+error_t
+pager_read_page (struct user_pager_info *pager, vm_offset_t page,
+ vm_address_t *buf, int *writelock)
+{
+ if (pager->type == DISK)
+ return disk_pager_read_page (page, (void **)buf, writelock);
+ else
+ return file_pager_read_page (pager->node, page, (void **)buf, writelock);
+}
+
+/* Satisfy a pager write request for either the disk pager or file pager
+ PAGER, from the page at offset PAGE from BUF. */
+error_t
+pager_write_page (struct user_pager_info *pager, vm_offset_t page,
+ vm_address_t buf)
+{
+ if (pager->type == DISK)
+ return disk_pager_write_page (page, (void *)buf);
+ else
+ return file_pager_write_page (pager->node, page, (void *)buf);
+}
+
+/* Make page PAGE writable, at least up to ALLOCSIZE. This function and
+ diskfs_grow are the only places that blocks are actually added to the
+ file. */
+error_t
+pager_unlock_page (struct user_pager_info *pager, vm_offset_t page)
+{
+ if (pager->type == DISK)
+ return 0;
+ else
+ {
+ error_t err;
+ volatile int partial_page;
+ struct node *node = pager->node;
+ struct disknode *dn = node->dn;
+
+ rwlock_writer_lock (&dn->alloc_lock);
+
+ partial_page = (page + vm_page_size > node->allocsize);
+
+ err = diskfs_catch_exception ();
+ if (!err)
+ {
+ block_t block = page >> log2_block_size;
+ int left = (partial_page ? node->allocsize - page : vm_page_size);
+
+ while (left > 0)
+ {
+ block_t disk_block;
+ err = ext2_getblk (node, block++, 1, &disk_block);
+ if (err)
+ break;
+ left -= block_size;
+ }
+ }
+ diskfs_end_catch_exception ();
+
+ if (partial_page)
+ /* If an error occurred, this page still isn't writable; otherwise,
+ since it's at the end of the file, it's now partially writable. */
+ dn->last_page_partially_writable = !err;
+ else if (page + vm_page_size == node->allocsize)
+ /* This makes the last page writable, which ends exactly at the end
+ of the file. If any error occurred, the page still isn't
+ writable, and if not, then the whole thing is writable. */
+ dn->last_page_partially_writable = 0;
+
+#ifdef EXT2FS_DEBUG
+ if (dn->last_page_partially_writable)
+ ext2_debug ("made page %u[%lu] in inode %d partially writable",
+ page, node->allocsize - page, node->cache_id);
+ else
+ ext2_debug ("made page %u[%u] in inode %d writable",
+ page, vm_page_size, node->cache_id);
+#endif
+
+ STAT_INC (file_page_unlocks);
+
+ rwlock_writer_unlock (&dn->alloc_lock);
+
+ if (err == ENOSPC)
+ ext2_warning ("This filesystem is out of space, and will now crash. Bye!");
+ else if (err)
+ ext2_warning ("inode=%d, page=0x%x: %s",
+ node->cache_id, page, strerror (err));
+
+ return err;
+ }
+}
+
+/* Grow the disk allocated to locked node NODE to be at least SIZE bytes, and
+ set NODE->allocsize to the actual allocated size. (If the allocated size
+ is already SIZE bytes, do nothing.) CRED identifies the user responsible
+ for the call. */
+error_t
+diskfs_grow (struct node *node, off_t size, struct protid *cred)
+{
+ diskfs_check_readonly ();
+ assert (!diskfs_readonly);
+
+ if (size > node->allocsize)
+ {
+ error_t err = 0;
+ off_t old_size;
+ volatile off_t new_size;
+ volatile block_t end_block;
+ block_t new_end_block;
+ struct disknode *dn = node->dn;
+
+ rwlock_writer_lock (&dn->alloc_lock);
+
+ old_size = node->allocsize;
+ new_size = round_block (size);
+
+ /* The first unallocated blocks after the old and new ends of the
+ file, respectively. */
+ end_block = old_size >> log2_block_size;
+ new_end_block = new_size >> log2_block_size;
+
+ if (new_end_block > end_block)
+ {
+ /* The first block of the first unallocate page after the old end
+ of the file. If LAST_PAGE_PARTIALLY_WRITABLE is true, any
+ blocks between this and END_BLOCK were unallocated, but are
+ considered `unlocked' -- that is pager_unlock_page has been
+ called on the page they're in. Since after this grow the pager
+ will expect them to be writable, we'd better allocate them. */
+ block_t old_page_end_block =
+ round_page (old_size) >> log2_block_size;
+
+ ext2_debug ("growing inode %d to %lu bytes (from %lu)", node->cache_id,
+ new_size, old_size);
+
+ if (dn->last_page_partially_writable
+ && old_page_end_block > end_block)
+ {
+ volatile block_t writable_end =
+ (old_page_end_block > new_end_block
+ ? new_end_block
+ : old_page_end_block);
+
+ ext2_debug ("extending writable page %u by %ld blocks"
+ "; first new block = %lu",
+ trunc_page (old_size),
+ writable_end - end_block,
+ end_block);
+
+ err = diskfs_catch_exception ();
+ while (!err && end_block < writable_end)
+ {
+ block_t disk_block;
+ err = ext2_getblk (node, end_block++, 1, &disk_block);
+ }
+ diskfs_end_catch_exception ();
+
+ if (err)
+ /* Reflect how much we allocated successfully. */
+ new_size = (end_block - 1) << log2_block_size;
+ else
+ /* See if it's still valid to say this. */
+ dn->last_page_partially_writable =
+ (old_page_end_block > end_block);
+ }
+ }
+
+ STAT_INC (file_grows);
+
+ ext2_debug ("new size: %ld%s.", new_size,
+ dn->last_page_partially_writable
+ ? " (last page writable)": "");
+ if (err)
+ ext2_warning ("inode=%d, target=%ld: %s",
+ node->cache_id, new_size, strerror (err));
+
+ node->allocsize = new_size;
+
+ rwlock_writer_unlock (&dn->alloc_lock);
+
+ return err;
+ }
+ else
+ return 0;
+}
+
+/* This syncs a single file (NODE) to disk. Wait for all I/O to complete
+ if WAIT is set. NODE->lock must be held. */
+void
+diskfs_file_update (struct node *node, int wait)
+{
+ struct pager *pager;
+
+ spin_lock (&node_to_page_lock);
+ pager = node->dn->pager;
+ if (pager)
+ ports_port_ref (pager);
+ spin_unlock (&node_to_page_lock);
+
+ if (pager)
+ {
+ pager_sync (pager, wait);
+ ports_port_deref (pager);
+ }
+
+ pokel_sync (&node->dn->indir_pokel, wait);
+
+ diskfs_node_update (node, wait);
+}
+
+/* Invalidate any pager data associated with NODE. */
+void
+flush_node_pager (struct node *node)
+{
+ struct pager *pager;
+ struct disknode *dn = node->dn;
+
+ spin_lock (&node_to_page_lock);
+ pager = dn->pager;
+ if (pager)
+ ports_port_ref (pager);
+ spin_unlock (&node_to_page_lock);
+
+ if (pager)
+ {
+ pager_flush (pager, 1);
+ ports_port_deref (pager);
+ }
+}
+
+
+/* Return in *OFFSET and *SIZE the minimum valid address the pager will
+ accept and the size of the object. */
+inline error_t
+pager_report_extent (struct user_pager_info *pager,
+ vm_address_t *offset, vm_size_t *size)
+{
+ assert (pager->type == DISK || pager->type == FILE_DATA);
+
+ *offset = 0;
+
+ if (pager->type == DISK)
+ *size = store->size;
+ else
+ *size = pager->node->allocsize;
+
+ return 0;
+}
+
+/* This is called when a pager is being deallocated after all extant send
+ rights have been destroyed. */
+void
+pager_clear_user_data (struct user_pager_info *upi)
+{
+ if (upi->type == FILE_DATA)
+ {
+ struct pager *pager;
+
+ spin_lock (&node_to_page_lock);
+ pager = upi->node->dn->pager;
+ if (pager && pager_get_upi (pager) == upi)
+ upi->node->dn->pager = 0;
+ spin_unlock (&node_to_page_lock);
+
+ diskfs_nrele_light (upi->node);
+ }
+
+ free (upi);
+}
+
+/* This will be called when the ports library wants to drop weak references.
+ The pager library creates no weak references itself. If the user doesn't
+ either, then it's OK for this function to do nothing. */
+void
+pager_dropweak (struct user_pager_info *p __attribute__ ((unused)))
+{
+}
+
+/* Create the DISK pager. */
+void
+create_disk_pager (void)
+{
+ struct user_pager_info *upi = malloc (sizeof (struct user_pager_info));
+ upi->type = DISK;
+ pager_bucket = ports_create_bucket ();
+ diskfs_start_disk_pager (upi, pager_bucket, MAY_CACHE, store->size,
+ &disk_image);
+}
+
+/* Call this to create a FILE_DATA pager and return a send right.
+ NODE must be locked. */
+mach_port_t
+diskfs_get_filemap (struct node *node, vm_prot_t prot)
+{
+ mach_port_t right;
+
+ assert (S_ISDIR (node->dn_stat.st_mode)
+ || S_ISREG (node->dn_stat.st_mode)
+ || (S_ISLNK (node->dn_stat.st_mode)));
+
+ spin_lock (&node_to_page_lock);
+ do
+ {
+ struct pager *pager = node->dn->pager;
+ if (pager)
+ {
+ /* Because PAGER is not a real reference,
+ this might be nearly deallocated. If that's so, then
+ the port right will be null. In that case, clear here
+ and loop. The deallocation will complete separately. */
+ right = pager_get_port (pager);
+ if (right == MACH_PORT_NULL)
+ node->dn->pager = 0;
+ else
+ pager_get_upi (pager)->max_prot |= prot;
+ }
+ else
+ {
+ struct user_pager_info *upi =
+ malloc (sizeof (struct user_pager_info));
+ upi->type = FILE_DATA;
+ upi->node = node;
+ upi->max_prot = 0;
+ diskfs_nref_light (node);
+ node->dn->pager =
+ pager_create (upi, pager_bucket, MAY_CACHE,
+ MEMORY_OBJECT_COPY_DELAY);
+ if (node->dn->pager == 0)
+ {
+ diskfs_nrele_light (node);
+ free (upi);
+ spin_unlock (&node_to_page_lock);
+ return MACH_PORT_NULL;
+ }
+
+ right = pager_get_port (node->dn->pager);
+ ports_port_deref (node->dn->pager);
+ }
+ }
+ while (right == MACH_PORT_NULL);
+ spin_unlock (&node_to_page_lock);
+
+ mach_port_insert_right (mach_task_self (), right, right,
+ MACH_MSG_TYPE_MAKE_SEND);
+
+ return right;
+}
+
+/* Call this when we should turn off caching so that unused memory object
+ ports get freed. */
+void
+drop_pager_softrefs (struct node *node)
+{
+ struct pager *pager;
+
+ spin_lock (&node_to_page_lock);
+ pager = node->dn->pager;
+ if (pager)
+ ports_port_ref (pager);
+ spin_unlock (&node_to_page_lock);
+
+ if (MAY_CACHE && pager)
+ pager_change_attributes (pager, 0, MEMORY_OBJECT_COPY_DELAY, 0);
+ if (pager)
+ ports_port_deref (pager);
+}
+
+/* Call this when we should turn on caching because it's no longer
+ important for unused memory object ports to get freed. */
+void
+allow_pager_softrefs (struct node *node)
+{
+ struct pager *pager;
+
+ spin_lock (&node_to_page_lock);
+ pager = node->dn->pager;
+ if (pager)
+ ports_port_ref (pager);
+ spin_unlock (&node_to_page_lock);
+
+ if (MAY_CACHE && pager)
+ pager_change_attributes (pager, 1, MEMORY_OBJECT_COPY_DELAY, 0);
+ if (pager)
+ ports_port_deref (pager);
+}
+
+/* Call this to find out the struct pager * corresponding to the
+ FILE_DATA pager of inode IP. This should be used *only* as a subsequent
+ argument to register_memory_fault_area, and will be deleted when
+ the kernel interface is fixed. NODE must be locked. */
+struct pager *
+diskfs_get_filemap_pager_struct (struct node *node)
+{
+ /* This is safe because pager can't be cleared; there must be
+ an active mapping for this to be called. */
+ return node->dn->pager;
+}
+
+/* Shutdown all the pagers (except the disk pager). */
+void
+diskfs_shutdown_pager ()
+{
+ error_t shutdown_one (void *v_p)
+ {
+ struct pager *p = v_p;
+ if (p != diskfs_disk_pager)
+ pager_shutdown (p);
+ return 0;
+ }
+
+ write_all_disknodes ();
+
+ ports_bucket_iterate (pager_bucket, shutdown_one);
+
+ /* Sync everything on the the disk pager. */
+ sync_global (1);
+
+ /* Despite the name of this function, we never actually shutdown the disk
+ pager, just make sure it's synced. */
+}
+
+/* Sync all the pagers. */
+void
+diskfs_sync_everything (int wait)
+{
+ error_t sync_one (void *v_p)
+ {
+ struct pager *p = v_p;
+ if (p != diskfs_disk_pager)
+ pager_sync (p, wait);
+ return 0;
+ }
+
+ write_all_disknodes ();
+ ports_bucket_iterate (pager_bucket, sync_one);
+
+ /* Do things on the the disk pager. */
+ sync_global (wait);
+}
+
+static void
+disable_caching ()
+{
+ error_t block_cache (void *arg)
+ {
+ struct pager *p = arg;
+
+ pager_change_attributes (p, 0, MEMORY_OBJECT_COPY_DELAY, 1);
+ return 0;
+ }
+
+ /* Loop through the pagers and turn off caching one by one,
+ synchronously. That should cause termination of each pager. */
+ ports_bucket_iterate (pager_bucket, block_cache);
+}
+
+static void
+enable_caching ()
+{
+ error_t enable_cache (void *arg)
+ {
+ struct pager *p = arg;
+ struct user_pager_info *upi = pager_get_upi (p);
+
+ pager_change_attributes (p, 1, MEMORY_OBJECT_COPY_DELAY, 0);
+
+ /* It's possible that we didn't have caching on before, because
+ the user here is the only reference to the underlying node
+ (actually, that's quite likely inside this particular
+ routine), and if that node has no links. So dinkle the node
+ ref counting scheme here, which will cause caching to be
+ turned off, if that's really necessary. */
+ if (upi->type == FILE_DATA)
+ {
+ diskfs_nref (upi->node);
+ diskfs_nrele (upi->node);
+ }
+
+ return 0;
+ }
+
+ ports_bucket_iterate (pager_bucket, enable_cache);
+}
+
+/* Tell diskfs if there are pagers exported, and if none, then
+ prevent any new ones from showing up. */
+int
+diskfs_pager_users ()
+{
+ int npagers = ports_count_bucket (pager_bucket);
+
+ if (npagers <= 1)
+ return 0;
+
+ if (MAY_CACHE)
+ {
+ disable_caching ();
+
+ /* Give it a second; the kernel doesn't actually shutdown
+ immediately. XXX */
+ sleep (1);
+
+ npagers = ports_count_bucket (pager_bucket);
+ if (npagers <= 1)
+ return 0;
+
+ /* Darn, there are actual honest users. Turn caching back on,
+ and return failure. */
+ enable_caching ();
+ }
+
+ ports_enable_bucket (pager_bucket);
+
+ return 1;
+}
+
+/* Return the bitwise or of the maximum prot parameter (the second arg to
+ diskfs_get_filemap) for all active user pagers. */
+vm_prot_t
+diskfs_max_user_pager_prot ()
+{
+ vm_prot_t max_prot = 0;
+ int npagers = ports_count_bucket (pager_bucket);
+
+ if (npagers > 1)
+ /* More than just the disk pager. */
+ {
+ error_t add_pager_max_prot (void *v_p)
+ {
+ struct pager *p = v_p;
+ struct user_pager_info *upi = pager_get_upi (p);
+ if (upi->type == FILE_DATA)
+ max_prot |= upi->max_prot;
+ /* Stop iterating if MAX_PROT is as filled as it's going to get. */
+ return
+ (max_prot == (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)) ? 1 : 0;
+ }
+
+ disable_caching (); /* Make any silly pagers go away. */
+
+ /* Give it a second; the kernel doesn't actually shutdown
+ immediately. XXX */
+ sleep (1);
+
+ ports_bucket_iterate (pager_bucket, add_pager_max_prot);
+
+ enable_caching ();
+ }
+
+ ports_enable_bucket (pager_bucket);
+
+ return max_prot;
+}