summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Braun <rbraun@sceen.net>2013-05-03 19:56:51 +0200
committerSamuel Thibault <samuel.thibault@ens-lyon.org>2013-09-16 01:22:25 +0200
commitea4802bc0975218544cb447df37b704f60ef2fde (patch)
treec129132f171f570bf89760e4bddc14317a440f79
parent0e847864cef404f555387d7fcc97f7dbe769e1e9 (diff)
Large store support for ext2fs
This is a revised version of the large store patch for ext2fs, written by Ognyan Kulev. It provides support for stores larger than 2 GiB. * ext2fs/balloc.c: Use the new disk_cache_block_ref and disk_cache_block_deref functions to access blocks from the disk cache. * ext2fs/ext2fs.c (main): Update initialization call to pokel_init, and call map_hypermetadata instead of get_hypermetadata. * ext2fs/ext2fs.h: Include <hurd/store.h> and <hurd/ihash.h>. (DISK_CACHE_BLOCKS): New macro. (DC_INCORE): Likewise. (DC_UNTOUCHED): Likewise. (DC_FIXED): Likewise. (DC_DONT_REUSE): Likewise. (DC_NO_BLOCK): Likewise. (DISK_CACHE_LAST_READ_XOR) [!NDEBUG]: Likewise. (struct disk_cache_info): New structure. (disk_cache): New external variable. (disk_cache_size): Likewise. (disk_cache_blocks): Likewise. (disk_cache_bptr): Likewise. (disk_cache_info): Likewise. (disk_cache_lock): Likewise. (disk_cache_reassociation): Likewise. (disk_cache_block_ref): New declaration. (disk_cache_block_ref_ptr): Likewise. (disk_cache_block_deref): Likewise. (disk_cache_block_is_ref): Likewise. (map_hypermetadata): Likewise. (trunc_block): Cast to off_t. (round_block): Likewise. (boffs): Likewise. (bptr_index): New macro. (boffs_ptr): Rewrite as an inline function to make it look up a block from the disk cache. (bptr_offs): Likewise. (dino): Remove function, replaced with ... (dino_ref): ... this one, which adds a reference to the inode block. (dino_deref): New inline function. (record_global_poke): Make sure block is referenced. (record_indir_poke): Likewise. (sync_global_ptr): Remove block reference, and adjust call to pager_sync_some. (sync_global): Add debug call to print wait parameter. * ext2fs/getblk.c: Use the new disk_cache_block_ref and disk_cache_block_deref functions to access blocks from the disk cache. * ext2fs/hyper.c (get_hypermetadata): Read the superblock from the store now that it's not directly mapped in memory. Move the initialization of zeroblock here from ... (map_hypermetadata): ... here. Also, set the superblock pointer. (diskfs_set_hypermetadata): Add a reference to the superblock. (diskfs_readonly_changed): Update call to mprotect. * ext2fs/ialloc.c: Use the new disk_cache_block_ref, disk_cache_block_ref_ptr and disk_cache_block_deref functions to access blocks from the disk cache. * ext2fs/inode.c: Update calls that used the disk image to use the disk cache, and use the new reference handling functions where appropriate. * ext2fs/pager.c: Include <unistd.h> and "../libpager/priv.h". (disk_image): Remove global variable. (disk_pager_read_page): Update cache information. (disk_pager_write_page): Likewise. (disk_pager_notify_evict): New function. (pager_notify_evict): Call disk_pager_notify_evict appropriately. (disk_cache): New global variable. (disk_cache_size): Likewise. (disk_cache_blocks): Likewise. (disk_cache_bptr): Likewise. (disk_cache_info): Likewise. (disk_cache_hint): Likewise. (disk_cache_lock): Likewise. (disk_cache_reassociation): Likewise. (disk_cache_init): New function. (disk_cache_return_unused): Likewise. (disk_cache_block_ref): Likewise. (disk_cache_block_ref_ptr): Likewise. (disk_cache_block_deref): Likewise. (disk_cache_block_is_ref): Likewise. (create_disk_pager): Update initialization of the disk pager. * ext2fs/pokel.c (pokel_add): Drop block references with disk_cache_block_deref. (_pokel_exec): Likewise. * ext2fs/truncate.c (trunc_indirect): Use the new disk_cache_block_ref and disk_cache_block_deref functions to access blocks from the disk cache.
-rw-r--r--ext2fs/balloc.c31
-rw-r--r--ext2fs/ext2fs.c6
-rw-r--r--ext2fs/ext2fs.h145
-rw-r--r--ext2fs/getblk.c21
-rw-r--r--ext2fs/hyper.c35
-rw-r--r--ext2fs/ialloc.c37
-rw-r--r--ext2fs/inode.c40
-rw-r--r--ext2fs/pager.c461
-rw-r--r--ext2fs/pokel.c39
-rw-r--r--ext2fs/truncate.c9
10 files changed, 733 insertions, 91 deletions
diff --git a/ext2fs/balloc.c b/ext2fs/balloc.c
index b2d2eab9..efef8ae4 100644
--- a/ext2fs/balloc.c
+++ b/ext2fs/balloc.c
@@ -92,7 +92,7 @@ ext2_free_blocks (block_t block, unsigned long count)
block, count);
}
gdp = group_desc (block_group);
- bh = bptr (gdp->bg_block_bitmap);
+ bh = disk_cache_block_ref (gdp->bg_block_bitmap);
if (in_range (gdp->bg_block_bitmap, block, gcount) ||
in_range (gdp->bg_inode_bitmap, block, gcount) ||
@@ -114,6 +114,7 @@ ext2_free_blocks (block_t block, unsigned long count)
}
record_global_poke (bh);
+ disk_cache_block_ref_ptr (gdp);
record_global_poke (gdp);
block += gcount;
@@ -139,7 +140,7 @@ ext2_new_block (block_t goal,
block_t prealloc_goal,
block_t *prealloc_count, block_t *prealloc_block)
{
- char *bh;
+ char *bh = NULL;
char *p, *r;
int i, j, k, tmp;
unsigned long lmap;
@@ -165,6 +166,7 @@ ext2_new_block (block_t goal,
ext2_debug ("goal=%u", goal);
repeat:
+ assert (bh == NULL);
/*
* First, test whether the goal block is free.
*/
@@ -179,7 +181,7 @@ repeat:
if (j)
goal_attempts++;
#endif
- bh = bptr (gdp->bg_block_bitmap);
+ bh = disk_cache_block_ref (gdp->bg_block_bitmap);
ext2_debug ("goal is at %d:%d", i, j);
@@ -245,6 +247,9 @@ repeat:
j = k;
goto got_block;
}
+
+ disk_cache_block_deref (bh);
+ bh = NULL;
}
ext2_debug ("bit not found in block group %d", i);
@@ -267,7 +272,8 @@ repeat:
pthread_spin_unlock (&global_lock);
return 0;
}
- bh = bptr (gdp->bg_block_bitmap);
+ assert (bh == NULL);
+ bh = disk_cache_block_ref (gdp->bg_block_bitmap);
r = memscan (bh, 0, sblock->s_blocks_per_group >> 3);
j = (r - bh) << 3;
if (j < sblock->s_blocks_per_group)
@@ -277,12 +283,15 @@ repeat:
sblock->s_blocks_per_group);
if (j >= sblock->s_blocks_per_group)
{
+ disk_cache_block_deref (bh);
+ bh = NULL;
ext2_error ("free blocks count corrupted for block group %d", i);
pthread_spin_unlock (&global_lock);
return 0;
}
search_back:
+ assert (bh != NULL);
/*
* We have succeeded in finding a free byte in the block
* bitmap. Now search backwards up to 7 bits to find the
@@ -291,6 +300,7 @@ search_back:
for (k = 0; k < 7 && j > 0 && !test_bit (j - 1, bh); k++, j--);
got_block:
+ assert (bh != NULL);
ext2_debug ("using block group %d (%d)", i, gdp->bg_free_blocks_count);
@@ -304,6 +314,8 @@ got_block:
if (set_bit (j, bh))
{
ext2_warning ("bit already set for block %d", j);
+ disk_cache_block_deref (bh);
+ bh = NULL;
goto repeat;
}
@@ -351,6 +363,7 @@ got_block:
j = tmp;
record_global_poke (bh);
+ bh = NULL;
if (j >= sblock->s_blocks_count)
{
@@ -363,12 +376,14 @@ got_block:
j, goal_hits, goal_attempts);
gdp->bg_free_blocks_count--;
+ disk_cache_block_ref_ptr (gdp);
record_global_poke (gdp);
sblock->s_free_blocks_count--;
sblock_dirty = 1;
sync_out:
+ assert (bh == NULL);
pthread_spin_unlock (&global_lock);
alloc_sync (0);
@@ -390,9 +405,12 @@ ext2_count_free_blocks ()
gdp = NULL;
for (i = 0; i < groups_count; i++)
{
+ void *bh;
gdp = group_desc (i);
desc_count += gdp->bg_free_blocks_count;
- x = count_free (bptr (gdp->bg_block_bitmap), block_size);
+ bh = disk_cache_block_ref (gdp->bg_block_bitmap);
+ x = count_free (bh, block_size);
+ disk_cache_block_deref (bh);
printf ("group %d: stored = %d, counted = %lu",
i, gdp->bg_free_blocks_count, x);
bitmap_count += x;
@@ -453,7 +471,7 @@ ext2_check_blocks_bitmap ()
gdp = group_desc (i);
desc_count += gdp->bg_free_blocks_count;
- bh = bptr (gdp->bg_block_bitmap);
+ bh = disk_cache_block_ref (gdp->bg_block_bitmap);
if (!EXT2_HAS_RO_COMPAT_FEATURE (sblock,
EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)
@@ -479,6 +497,7 @@ ext2_check_blocks_bitmap ()
ext2_error ("block #%d of the inode table in group %d is marked free", j, i);
x = count_free (bh, block_size);
+ disk_cache_block_deref (bh);
if (gdp->bg_free_blocks_count != x)
ext2_error ("wrong free blocks count for group %d,"
" stored = %d, counted = %lu",
diff --git a/ext2fs/ext2fs.c b/ext2fs/ext2fs.c
index 993f1997..128b6edd 100644
--- a/ext2fs/ext2fs.c
+++ b/ext2fs/ext2fs.c
@@ -181,9 +181,9 @@ main (int argc, char **argv)
/* Map the entire disk. */
create_disk_pager ();
- pokel_init (&global_pokel, diskfs_disk_pager, disk_image);
+ pokel_init (&global_pokel, diskfs_disk_pager, disk_cache);
- get_hypermetadata();
+ map_hypermetadata ();
inode_init ();
@@ -211,6 +211,8 @@ diskfs_reload_global_state ()
{
pokel_flush (&global_pokel);
pager_flush (diskfs_disk_pager, 1);
+ sblock = NULL;
get_hypermetadata ();
+ map_hypermetadata ();
return 0;
}
diff --git a/ext2fs/ext2fs.h b/ext2fs/ext2fs.h
index 52bf2b19..e01d1a59 100644
--- a/ext2fs/ext2fs.h
+++ b/ext2fs/ext2fs.h
@@ -23,7 +23,9 @@
#include <hurd/pager.h>
#include <hurd/fshelp.h>
#include <hurd/iohelp.h>
+#include <hurd/store.h>
#include <hurd/diskfs.h>
+#include <hurd/ihash.h>
#include <assert.h>
#include <pthread.h>
#include <sys/mman.h>
@@ -195,6 +197,8 @@ struct user_pager_info
/* ---------------------------------------------------------------- */
/* pager.c */
+#define DISK_CACHE_BLOCKS 65536
+
#include <hurd/diskfs-pager.h>
/* Set up the disk pager. */
@@ -218,10 +222,54 @@ extern struct store *store;
/* What the user specified. */
extern struct store_parsed *store_parsed;
-/* Mapped image of the disk. */
-extern void *disk_image;
+/* Mapped image of cached blocks of the disk. */
+extern void *disk_cache;
+extern store_offset_t disk_cache_size;
+extern int disk_cache_blocks;
+
+#define DC_INCORE 0x01 /* Not in core. */
+#define DC_UNTOUCHED 0x02 /* Not touched by disk_pager_read_paged
+ or disk_cache_block_ref. */
+#define DC_FIXED 0x04 /* Must not be re-associated. */
+
+/* Flags that forbid re-association of page. DC_UNTOUCHED is included
+ because this flag is used only when page is already to be
+ re-associated, so it's not good candidate for another
+ remapping. */
+#define DC_DONT_REUSE (DC_INCORE | DC_UNTOUCHED | DC_FIXED)
+
+#define DC_NO_BLOCK ((block_t) -1L)
+
+#ifndef NDEBUG
+#define DISK_CACHE_LAST_READ_XOR 0xDEADBEEF
+#endif
+
+/* Disk cache blocks' meta info. */
+struct disk_cache_info
+{
+ block_t block;
+ uint16_t flags;
+ uint16_t ref_count;
+#ifndef NDEBUG
+ block_t last_read, last_read_xor;
+#endif
+};
-/* Our in-core copy of the super-block (pointer into the disk_image). */
+/* block num --> pointer to in-memory block */
+extern hurd_ihash_t disk_cache_bptr;
+/* Metadata about cached block. */
+extern struct disk_cache_info *disk_cache_info;
+/* Lock for these mappings */
+extern pthread_mutex_t disk_cache_lock;
+/* Fired when a re-association is done. */
+extern pthread_cond_t disk_cache_reassociation;
+
+void *disk_cache_block_ref (block_t block);
+void disk_cache_block_ref_ptr (void *ptr);
+void disk_cache_block_deref (void *ptr);
+int disk_cache_block_is_ref (block_t block);
+
+/* Our in-core copy of the super-block (pointer into the disk_cache). */
struct ext2_super_block *sblock;
/* True if sblock has been modified. */
int sblock_dirty;
@@ -251,6 +299,9 @@ vm_address_t zeroblock;
/* Get the superblock from the disk, & setup various global info from it. */
void get_hypermetadata ();
+
+/* Map `sblock' and `group_desc_image' pointers to disk cache. */
+void map_hypermetadata ();
/* ---------------------------------------------------------------- */
/* Random stuff calculated from the super block. */
@@ -274,21 +325,51 @@ pthread_spinlock_t generation_lock;
unsigned long next_generation;
/* ---------------------------------------------------------------- */
-/* Functions for looking inside disk_image */
+/* Functions for looking inside disk_cache */
-#define trunc_block(offs) (((offs) >> log2_block_size) << log2_block_size)
+#define trunc_block(offs) \
+ ((off_t) ((offs) >> log2_block_size) << log2_block_size)
#define round_block(offs) \
- ((((offs) + block_size - 1) >> log2_block_size) << log2_block_size)
+ ((off_t) (((offs) + block_size - 1) >> log2_block_size) << log2_block_size)
/* block num --> byte offset on disk */
-#define boffs(block) ((block) << log2_block_size)
+#define boffs(block) ((off_t) (block) << log2_block_size)
/* byte offset on disk --> block num */
#define boffs_block(offs) ((offs) >> log2_block_size)
+/* pointer to in-memory block -> index in disk_cache_info */
+#define bptr_index(ptr) (((char *)ptr - (char *)disk_cache) >> log2_block_size)
+
/* byte offset on disk --> pointer to in-memory block */
-#define boffs_ptr(offs) (((char *)disk_image) + (offs))
+EXT2FS_EI char *
+boffs_ptr (off_t offset)
+{
+ block_t block = boffs_block (offset);
+ pthread_mutex_lock (&disk_cache_lock);
+ char *ptr = hurd_ihash_find (disk_cache_bptr, block);
+ pthread_mutex_unlock (&disk_cache_lock);
+ assert (ptr);
+ ptr += offset % block_size;
+ ext2_debug ("(%lld) = %p", offset, ptr);
+ return ptr;
+}
+
/* pointer to in-memory block --> byte offset on disk */
-#define bptr_offs(ptr) ((char *)(ptr) - ((char *)disk_image))
+EXT2FS_EI off_t
+bptr_offs (void *ptr)
+{
+ vm_offset_t mem_offset = (char *)ptr - (char *)disk_cache;
+ off_t offset;
+ assert (mem_offset < disk_cache_size);
+ pthread_mutex_lock (&disk_cache_lock);
+ offset = (off_t) disk_cache_info[boffs_block (mem_offset)].block
+ << log2_block_size;
+ assert (offset || mem_offset < block_size);
+ offset += mem_offset % block_size;
+ pthread_mutex_unlock (&disk_cache_lock);
+ ext2_debug ("(%p) = %lld", ptr, offset);
+ return offset;
+}
/* block num --> pointer to in-memory block */
#define bptr(block) boffs_ptr(boffs(block))
@@ -308,14 +389,24 @@ extern struct ext2_inode *dino (ino_t inum);
#if defined(__USE_EXTERN_INLINES) || defined(EXT2FS_DEFINE_EI)
/* Convert an inode number to the dinode on disk. */
EXT2FS_EI struct ext2_inode *
-dino (ino_t inum)
+dino_ref (ino_t inum)
{
unsigned long inodes_per_group = sblock->s_inodes_per_group;
unsigned long bg_num = (inum - 1) / inodes_per_group;
unsigned long group_inum = (inum - 1) % inodes_per_group;
- struct ext2_group_desc *bg = group_desc(bg_num);
+ struct ext2_group_desc *bg = group_desc (bg_num);
block_t block = bg->bg_inode_table + (group_inum / inodes_per_block);
- return ((struct ext2_inode *)bptr(block)) + group_inum % inodes_per_block;
+ struct ext2_inode *inode = disk_cache_block_ref (block);
+ inode += group_inum % inodes_per_block;
+ ext2_debug ("(%llu) = %p", inum, inode);
+ return inode;
+}
+
+EXT2FS_EI void
+dino_deref (struct ext2_inode *inode)
+{
+ ext2_debug ("(%p)", inode);
+ disk_cache_block_deref (inode);
}
#endif /* Use extern inlines. */
@@ -377,27 +468,38 @@ global_block_modified (block_t block)
EXT2FS_EI void
record_global_poke (void *ptr)
{
- int boffs = trunc_block (bptr_offs (ptr));
- global_block_modified (boffs_block (boffs));
- pokel_add (&global_pokel, boffs_ptr(boffs), block_size);
+ block_t block = boffs_block (bptr_offs (ptr));
+ void *block_ptr = bptr (block);
+ ext2_debug ("(%p = %p)", ptr, block_ptr);
+ assert (disk_cache_block_is_ref (block));
+ global_block_modified (block);
+ pokel_add (&global_pokel, block_ptr, block_size);
}
/* This syncs a modification to a non-file block. */
EXT2FS_EI void
sync_global_ptr (void *bptr, int wait)
{
- vm_offset_t boffs = trunc_block (bptr_offs (bptr));
- global_block_modified (boffs_block (boffs));
- pager_sync_some (diskfs_disk_pager, trunc_page (boffs), vm_page_size, wait);
+ block_t block = boffs_block (bptr_offs (bptr));
+ void *block_ptr = bptr (block);
+ ext2_debug ("(%p -> %u)", bptr, block);
+ global_block_modified (block);
+ disk_cache_block_deref (block_ptr);
+ pager_sync_some (diskfs_disk_pager,
+ block_ptr - disk_cache, block_size, wait);
+
}
/* This records a modification to one of a file's indirect blocks. */
EXT2FS_EI void
record_indir_poke (struct node *node, void *ptr)
{
- int boffs = trunc_block (bptr_offs (ptr));
- global_block_modified (boffs_block (boffs));
- pokel_add (&node->dn->indir_pokel, boffs_ptr(boffs), block_size);
+ block_t block = boffs_block (bptr_offs (ptr));
+ void *block_ptr = bptr (block);
+ ext2_debug ("(%llu, %p)", node->cache_id, ptr);
+ assert (disk_cache_block_is_ref (block));
+ global_block_modified (block);
+ pokel_add (&node->dn->indir_pokel, block_ptr, block_size);
}
/* ---------------------------------------------------------------- */
@@ -405,6 +507,7 @@ record_indir_poke (struct node *node, void *ptr)
EXT2FS_EI void
sync_global (int wait)
{
+ ext2_debug ("%d", wait);
pokel_sync (&global_pokel, wait);
}
diff --git a/ext2fs/getblk.c b/ext2fs/getblk.c
index 23ba6459..bde66e1c 100644
--- a/ext2fs/getblk.c
+++ b/ext2fs/getblk.c
@@ -104,7 +104,7 @@ ext2_alloc_block (struct node *node, block_t goal, int zero)
if (result && zero)
{
- char *bh = bptr (result);
+ char *bh = disk_cache_block_ref (result);
bzero (bh, block_size);
record_indir_poke (node, bh);
}
@@ -122,6 +122,8 @@ inode_getblk (struct node *node, int nr, int create, int zero,
block_t hint;
#endif
+ assert (0 <= nr && nr < EXT2_N_BLOCKS);
+
*result = node->dn->info.i_data[nr];
if (*result)
return 0;
@@ -180,14 +182,20 @@ block_getblk (struct node *node, block_t block, int nr, int create, int zero,
{
int i;
block_t goal = 0;
- block_t *bh = (block_t *)bptr (block);
+ block_t *bh = (block_t *)disk_cache_block_ref (block);
*result = bh[nr];
if (*result)
- return 0;
+ {
+ disk_cache_block_deref (bh);
+ return 0;
+ }
if (!create)
- return EINVAL;
+ {
+ disk_cache_block_deref (bh);
+ return EINVAL;
+ }
if (node->dn->info.i_next_alloc_block == new_block)
goal = node->dn->info.i_next_alloc_goal;
@@ -207,7 +215,10 @@ block_getblk (struct node *node, block_t block, int nr, int create, int zero,
*result = ext2_alloc_block (node, goal, zero);
if (!*result)
- return ENOSPC;
+ {
+ disk_cache_block_deref (bh);
+ return ENOSPC;
+ }
bh[nr] = *result;
diff --git a/ext2fs/hyper.c b/ext2fs/hyper.c
index bee4175f..5bcc2abe 100644
--- a/ext2fs/hyper.c
+++ b/ext2fs/hyper.c
@@ -58,11 +58,14 @@ static int ext2fs_clean; /* fs clean before we started writing? */
void
get_hypermetadata (void)
{
- error_t err = diskfs_catch_exception ();
- if (err)
- ext2_panic ("can't read superblock: %s", strerror (err));
+ error_t err;
+ size_t read = 0;
- sblock = (struct ext2_super_block *) boffs_ptr (SBLOCK_OFFS);
+ assert (! sblock);
+ err = store_read (store, SBLOCK_OFFS >> store->log2_block_size,
+ SBLOCK_SIZE, (void **)&sblock, &read);
+ if (err || read != SBLOCK_SIZE)
+ ext2_panic ("Cannot read hypermetadata");
if (sblock->s_magic != EXT2_SUPER_MAGIC
#ifdef EXT2FS_PRE_02B_COMPAT
@@ -152,15 +155,25 @@ get_hypermetadata (void)
allocate_mod_map ();
- diskfs_end_catch_exception ();
+ /* A handy source of page-aligned zeros. */
+ if (zeroblock == 0)
+ {
+ zeroblock = (vm_address_t) mmap (0, block_size, PROT_READ, MAP_ANON, 0, 0);
+ assert (zeroblock != (vm_address_t) MAP_FAILED);
+ }
+
+ munmap (sblock, SBLOCK_SIZE);
+ sblock = NULL;
+}
+
+void
+map_hypermetadata (void)
+{
+ sblock = (struct ext2_super_block *) boffs_ptr (SBLOCK_OFFS);
/* Cache a convenient pointer to the block group descriptors for allocation.
These are stored in the filesystem blocks following the superblock. */
group_desc_image = (struct ext2_group_desc *) bptr (bptr_block (sblock) + 1);
-
- /* A handy source of page-aligned zeros. */
- if (zeroblock == 0)
- zeroblock = (vm_address_t) mmap (0, block_size, PROT_READ, MAP_ANON, 0, 0);
}
error_t
@@ -183,6 +196,7 @@ diskfs_set_hypermetadata (int wait, int clean)
if (sblock_dirty)
{
sblock_dirty = 0;
+ disk_cache_block_ref_ptr (sblock);
record_global_poke (sblock);
}
@@ -199,7 +213,8 @@ diskfs_readonly_changed (int readonly)
(*(readonly ? store_set_flags : store_clear_flags)) (store, STORE_READONLY);
- mprotect (disk_image, store->size, PROT_READ | (readonly ? 0 : PROT_WRITE));
+ mprotect (disk_cache, disk_cache_size,
+ PROT_READ | (readonly ? 0 : PROT_WRITE));
if (!readonly && !(sblock->s_state & EXT2_VALID_FS))
ext2_warning ("UNCLEANED FILESYSTEM NOW WRITABLE");
diff --git a/ext2fs/ialloc.c b/ext2fs/ialloc.c
index aa018d94..2d8e51e0 100644
--- a/ext2fs/ialloc.c
+++ b/ext2fs/ialloc.c
@@ -75,22 +75,25 @@ diskfs_free_node (struct node *np, mode_t old_mode)
bit = (inum - 1) % sblock->s_inodes_per_group;
gdp = group_desc (block_group);
- bh = bptr (gdp->bg_inode_bitmap);
+ bh = disk_cache_block_ref (gdp->bg_inode_bitmap);
if (!clear_bit (bit, bh))
ext2_warning ("bit already cleared for inode %Ld", inum);
else
{
+ disk_cache_block_ref_ptr (bh);
record_global_poke (bh);
gdp->bg_free_inodes_count++;
if (S_ISDIR (old_mode))
gdp->bg_used_dirs_count--;
+ disk_cache_block_ref_ptr (gdp);
record_global_poke (gdp);
sblock->s_free_inodes_count++;
}
+ disk_cache_block_deref (bh);
sblock_dirty = 1;
pthread_spin_unlock (&global_lock);
alloc_sync(0);
@@ -111,7 +114,7 @@ diskfs_free_node (struct node *np, mode_t old_mode)
ino_t
ext2_alloc_inode (ino_t dir_inum, mode_t mode)
{
- char *bh;
+ char *bh = NULL;
int i, j, inum, avefreei;
struct ext2_group_desc *gdp;
struct ext2_group_desc *tmp;
@@ -119,6 +122,7 @@ ext2_alloc_inode (ino_t dir_inum, mode_t mode)
pthread_spin_lock (&global_lock);
repeat:
+ assert (bh == NULL);
gdp = NULL;
i = 0;
@@ -213,7 +217,7 @@ repeat:
return 0;
}
- bh = bptr (gdp->bg_inode_bitmap);
+ bh = disk_cache_block_ref (gdp->bg_inode_bitmap);
if ((inum =
find_first_zero_bit ((unsigned long *) bh, sblock->s_inodes_per_group))
< sblock->s_inodes_per_group)
@@ -221,12 +225,17 @@ repeat:
if (set_bit (inum, bh))
{
ext2_warning ("bit already set for inode %d", inum);
+ disk_cache_block_deref (bh);
+ bh = NULL;
goto repeat;
}
record_global_poke (bh);
+ bh = NULL;
}
else
{
+ disk_cache_block_deref (bh);
+ bh = NULL;
if (gdp->bg_free_inodes_count != 0)
{
ext2_error ("free inodes count corrupted in group %d", i);
@@ -248,15 +257,25 @@ repeat:
gdp->bg_free_inodes_count--;
if (S_ISDIR (mode))
gdp->bg_used_dirs_count++;
+ disk_cache_block_ref_ptr (gdp);
record_global_poke (gdp);
sblock->s_free_inodes_count--;
sblock_dirty = 1;
sync_out:
+ assert (bh == NULL);
pthread_spin_unlock (&global_lock);
alloc_sync (0);
+ /* Make sure the coming read_node won't complain about bad
+ fields. */
+ {
+ struct ext2_inode *di = dino_ref (inum);
+ memset (di, 0, sizeof *di);
+ dino_deref (di);
+ }
+
return inum;
}
@@ -353,10 +372,12 @@ ext2_count_free_inodes ()
gdp = NULL;
for (i = 0; i < groups_count; i++)
{
+ void *bh;
gdp = group_desc (i);
desc_count += gdp->bg_free_inodes_count;
- x = count_free (bptr (gdp->bg_inode_bitmap),
- sblock->s_inodes_per_group / 8);
+ bh = disk_cache_block_ref (gdp->bg_inode_bitmap);
+ x = count_free (bh, sblock->s_inodes_per_group / 8);
+ disk_cache_block_deref (bh);
ext2_debug ("group %d: stored = %d, counted = %lu",
i, gdp->bg_free_inodes_count, x);
bitmap_count += x;
@@ -386,10 +407,12 @@ ext2_check_inodes_bitmap ()
gdp = NULL;
for (i = 0; i < groups_count; i++)
{
+ void *bh;
gdp = group_desc (i);
desc_count += gdp->bg_free_inodes_count;
- x = count_free (bptr (gdp->bg_inode_bitmap),
- sblock->s_inodes_per_group / 8);
+ bh = disk_cache_block_ref (gdp->bg_inode_bitmap);
+ x = count_free (bh, sblock->s_inodes_per_group / 8);
+ disk_cache_block_deref (bh);
if (gdp->bg_free_inodes_count != x)
ext2_error ("wrong free inodes count in group %d, "
"stored = %d, counted = %lu",
diff --git a/ext2fs/inode.c b/ext2fs/inode.c
index 2c442795..e75c63f9 100644
--- a/ext2fs/inode.c
+++ b/ext2fs/inode.c
@@ -92,7 +92,7 @@ diskfs_cached_lookup (ino_t inum, struct node **npp)
dn->dir_idx = 0;
dn->pager = 0;
pthread_rwlock_init (&dn->alloc_lock, NULL);
- pokel_init (&dn->indir_pokel, diskfs_disk_pager, disk_image);
+ pokel_init (&dn->indir_pokel, diskfs_disk_pager, disk_cache);
/* Create the new node. */
np = diskfs_make_node (dn);
@@ -201,13 +201,17 @@ read_node (struct node *np)
error_t err;
struct stat *st = &np->dn_stat;
struct disknode *dn = np->dn;
- struct ext2_inode *di = dino (np->cache_id);
+ struct ext2_inode *di;
struct ext2_inode_info *info = &dn->info;
+ ext2_debug ("(%llu)", np->cache_id);
+
err = diskfs_catch_exception ();
if (err)
return err;
+ di = dino_ref (np->cache_id);
+
st->st_fstype = FSTYPE_EXT2FS;
st->st_fsid = getpid (); /* This call is very cheap. */
st->st_ino = np->cache_id;
@@ -285,7 +289,9 @@ read_node (struct node *np)
info->i_high_size = di->i_size_high;
if (info->i_high_size) /* XXX */
{
+ dino_deref (di);
ext2_warning ("cannot handle large file inode %Ld", np->cache_id);
+ diskfs_end_catch_exception ();
return EFBIG;
}
}
@@ -307,6 +313,7 @@ read_node (struct node *np)
}
dn->info_i_translator = di->i_translator;
+ dino_deref (di);
diskfs_end_catch_exception ();
if (S_ISREG (st->st_mode) || S_ISDIR (st->st_mode)
@@ -408,7 +415,9 @@ write_node (struct node *np)
{
error_t err;
struct stat *st = &np->dn_stat;
- struct ext2_inode *di = dino (np->cache_id);
+ struct ext2_inode *di;
+
+ ext2_debug ("(%llu)", np->cache_id);
if (np->dn->info.i_prealloc_count)
ext2_discard_prealloc (np);
@@ -425,6 +434,8 @@ write_node (struct node *np)
if (err)
return NULL;
+ di = dino_ref (np->cache_id);
+
di->i_generation = st->st_gen;
/* We happen to know that the stat mode bits are the same
@@ -505,6 +516,7 @@ write_node (struct node *np)
diskfs_end_catch_exception ();
np->dn_stat_dirty = 0;
+ /* Leave invoking dino_deref (di) to the caller. */
return di;
}
else
@@ -674,7 +686,7 @@ diskfs_set_translator (struct node *np, const char *name, unsigned namelen,
if (err)
return err;
- di = dino (np->cache_id);
+ di = dino_ref (np->cache_id);
blkno = di->i_translator;
if (namelen && !blkno)
@@ -687,6 +699,7 @@ diskfs_set_translator (struct node *np, const char *name, unsigned namelen,
0, 0, 0);
if (blkno == 0)
{
+ dino_deref (di);
diskfs_end_catch_exception ();
return ENOSPC;
}
@@ -710,15 +723,20 @@ diskfs_set_translator (struct node *np, const char *name, unsigned namelen,
np->dn_stat.st_mode &= ~S_IPTRANS;
np->dn_set_ctime = 1;
}
+ else
+ dino_deref (di);
if (namelen)
{
+ void *blkptr;
+
buf[0] = namelen & 0xFF;
buf[1] = (namelen >> 8) & 0xFF;
bcopy (name, buf + 2, namelen);
- bcopy (buf, bptr (blkno), block_size);
- record_global_poke (bptr (blkno));
+ blkptr = disk_cache_block_ref (blkno);
+ memcpy (blkptr, buf, block_size);
+ record_global_poke (blkptr);
np->dn_stat.st_mode |= S_IPTRANS;
np->dn_set_ctime = 1;
@@ -736,7 +754,8 @@ diskfs_get_translator (struct node *np, char **namep, unsigned *namelen)
error_t err = 0;
daddr_t blkno;
unsigned datalen;
- const void *transloc;
+ void *transloc;
+ struct ext2_inode *di;
assert (sblock->s_creator_os == EXT2_OS_HURD);
@@ -744,9 +763,11 @@ diskfs_get_translator (struct node *np, char **namep, unsigned *namelen)
if (err)
return err;
- blkno = (dino (np->cache_id))->i_translator;
+ di = dino_ref (np->cache_id);
+ blkno = di->i_translator;
+ dino_deref (di);
assert (blkno);
- transloc = bptr (blkno);
+ transloc = disk_cache_block_ref (blkno);
datalen =
((unsigned char *)transloc)[0] + (((unsigned char *)transloc)[1] << 8);
@@ -761,6 +782,7 @@ diskfs_get_translator (struct node *np, char **namep, unsigned *namelen)
memcpy (*namep, transloc + 2, datalen);
}
+ disk_cache_block_deref (transloc);
diskfs_end_catch_exception ();
*namelen = datalen;
diff --git a/ext2fs/pager.c b/ext2fs/pager.c
index 92137112..6e99c837 100644
--- a/ext2fs/pager.c
+++ b/ext2fs/pager.c
@@ -18,17 +18,18 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <hurd/store.h>
#include "ext2fs.h"
+/* XXX */
+#include "../libpager/priv.h"
+
/* A ports bucket to hold pager ports. */
struct port_bucket *pager_bucket;
-/* Mapped image of the disk. */
-void *disk_image;
-
pthread_spinlock_t node_to_page_lock = PTHREAD_SPINLOCK_INITIALIZER;
@@ -165,6 +166,9 @@ file_pager_read_page (struct node *node, vm_offset_t page,
block_t pending_blocks = 0;
int num_pending_blocks = 0;
+ ext2_debug ("reading inode %llu page %lu[%u]",
+ node->cache_id, page, vm_page_size);
+
/* Read the NUM_PENDING_BLOCKS blocks in PENDING_BLOCKS, into the buffer
pointed to by BUF (allocating it if necessary) at offset OFFS. OFFS in
adjusted by the amount read, and NUM_PENDING_BLOCKS is zeroed. Any read
@@ -173,7 +177,8 @@ file_pager_read_page (struct node *node, vm_offset_t page,
{
if (num_pending_blocks > 0)
{
- block_t dev_block = pending_blocks << log2_dev_blocks_per_fs_block;
+ store_offset_t dev_block = (store_offset_t) pending_blocks
+ << log2_dev_blocks_per_fs_block;
size_t amount = num_pending_blocks << log2_block_size;
/* The buffer we try to read into; on the first read, we pass in a
size of zero, so that the read is guaranteed to allocate a new
@@ -297,7 +302,8 @@ pending_blocks_write (struct pending_blocks *pb)
if (pb->num > 0)
{
error_t err;
- block_t dev_block = pb->block << log2_dev_blocks_per_fs_block;
+ store_offset_t dev_block = (store_offset_t) pb->block
+ << log2_dev_blocks_per_fs_block;
size_t length = pb->num << log2_block_size, amount;
ext2_debug ("writing block %u[%ld]", pb->block, pb->num);
@@ -359,7 +365,7 @@ pending_blocks_add (struct pending_blocks *pb, block_t block)
return 0;
}
-/* Write one page for the pager backing NODE, at offset PAGE, into BUF. This
+/* Write one page for the pager backing NODE, at OFFSET, into BUF. This
may need to write several filesystem blocks to satisfy one page, and tries
to consolidate the i/o if possible. */
static error_t
@@ -411,12 +417,28 @@ disk_pager_read_page (vm_offset_t page, void **buf, int *writelock)
{
error_t err;
size_t length = vm_page_size, read = 0;
- vm_size_t dev_end = store->size;
+ store_offset_t offset = page, dev_end = store->size;
+ int index = offset >> log2_block_size;
+
+ pthread_mutex_lock (&disk_cache_lock);
+ offset = ((store_offset_t) disk_cache_info[index].block << log2_block_size)
+ + offset % block_size;
+ disk_cache_info[index].flags |= DC_INCORE;
+ disk_cache_info[index].flags &=~ DC_UNTOUCHED;
+#ifndef NDEBUG
+ disk_cache_info[index].last_read = disk_cache_info[index].block;
+ disk_cache_info[index].last_read_xor
+ = disk_cache_info[index].block ^ DISK_CACHE_LAST_READ_XOR;
+#endif
+ pthread_mutex_unlock (&disk_cache_lock);
+
+ ext2_debug ("(%lld)", offset >> log2_block_size);
- if (page + vm_page_size > dev_end)
- length = dev_end - page;
+ if (offset + vm_page_size > dev_end)
+ length = dev_end - offset;
- err = store_read (store, page >> store->log2_block_size, length, buf, &read);
+ err = store_read (store, offset >> store->log2_block_size, length,
+ buf, &read);
if (read != length)
return EIO;
if (!err && length != vm_page_size)
@@ -432,26 +454,38 @@ disk_pager_write_page (vm_offset_t page, void *buf)
{
error_t err = 0;
size_t length = vm_page_size, amount;
- vm_size_t dev_end = store->size;
+ store_offset_t offset = page, dev_end = store->size;
+ int index = offset >> log2_block_size;
+
+ pthread_mutex_lock (&disk_cache_lock);
+ assert (disk_cache_info[index].block != DC_NO_BLOCK);
+ offset = ((store_offset_t) disk_cache_info[index].block << log2_block_size)
+ + offset % block_size;
+#ifndef NDEBUG /* Not strictly needed. */
+ assert ((disk_cache_info[index].last_read ^ DISK_CACHE_LAST_READ_XOR)
+ == disk_cache_info[index].last_read_xor);
+ assert (disk_cache_info[index].last_read
+ == disk_cache_info[index].block);
+#endif
+ pthread_mutex_unlock (&disk_cache_lock);
- if (page + vm_page_size > dev_end)
- length = dev_end - page;
+ if (offset + vm_page_size > dev_end)
+ length = dev_end - offset;
- ext2_debug ("writing disk page %d[%d]", page, length);
+ ext2_debug ("writing disk page %lld[%zu]", offset, length);
STAT_INC (disk_pageouts);
if (modified_global_blocks)
/* Be picky about which blocks in a page that we write. */
{
- vm_offset_t offs = page;
struct pending_blocks pb;
pending_blocks_init (&pb, buf);
while (length > 0 && !err)
{
- block_t block = boffs_block (offs);
+ block_t block = boffs_block (offset);
/* We don't clear the block modified bit here because this paging
write request may not be the same one that actually set the bit,
@@ -469,7 +503,7 @@ disk_pager_write_page (vm_offset_t page, void *buf)
/* Otherwise just skip it. */
err = pending_blocks_skip (&pb);
- offs += block_size;
+ offset += block_size;
length -= block_size;
}
@@ -478,7 +512,7 @@ disk_pager_write_page (vm_offset_t page, void *buf)
}
else
{
- err = store_write (store, page >> store->log2_block_size,
+ err = store_write (store, offset >> store->log2_block_size,
buf, length, &amount);
if (!err && length != amount)
err = EIO;
@@ -486,6 +520,18 @@ disk_pager_write_page (vm_offset_t page, void *buf)
return err;
}
+
+static void
+disk_pager_notify_evict (vm_offset_t page)
+{
+ unsigned long index = page >> log2_block_size;
+
+ ext2_debug ("(block %lu)", index);
+
+ pthread_mutex_lock (&disk_cache_lock);
+ disk_cache_info[index].flags &= ~DC_INCORE;
+ pthread_mutex_unlock (&disk_cache_lock);
+}
/* Satisfy a pager read request for either the disk pager or file pager
PAGER, to the page at offset PAGE into BUF. WRITELOCK should be set if
@@ -515,7 +561,8 @@ pager_write_page (struct user_pager_info *pager, vm_offset_t page,
void
pager_notify_evict (struct user_pager_info *pager, vm_offset_t page)
{
- assert (!"unrequested notification on eviction");
+ if (pager->type == DISK)
+ disk_pager_notify_evict (page);
}
@@ -774,6 +821,373 @@ pager_dropweak (struct user_pager_info *p __attribute__ ((unused)))
{
}
+/* Cached blocks from disk. */
+void *disk_cache;
+
+/* DISK_CACHE size in bytes and blocks. */
+store_offset_t disk_cache_size;
+int disk_cache_blocks;
+
+/* block num --> pointer to in-memory block */
+hurd_ihash_t disk_cache_bptr;
+/* Cached blocks' info. */
+struct disk_cache_info *disk_cache_info;
+/* Hint index for which cache block to reuse next. */
+int disk_cache_hint;
+/* Lock for these structures. */
+pthread_mutex_t disk_cache_lock;
+/* Fired when a re-association is done. */
+pthread_cond_t disk_cache_reassociation;
+
+/* Finish mapping initialization. */
+static void
+disk_cache_init (void)
+{
+ if (block_size != vm_page_size)
+ ext2_panic ("Block size %u != vm_page_size %u",
+ block_size, vm_page_size);
+
+ pthread_mutex_init (&disk_cache_lock, NULL);
+ pthread_cond_init (&disk_cache_reassociation, NULL);
+
+ /* Allocate space for block num -> in-memory pointer mapping. */
+ if (hurd_ihash_create (&disk_cache_bptr, HURD_IHASH_NO_LOCP))
+ ext2_panic ("Can't allocate memory for disk_pager_bptr");
+
+ /* Allocate space for disk cache blocks' info. */
+ disk_cache_info = malloc ((sizeof *disk_cache_info) * disk_cache_blocks);
+ if (!disk_cache_info)
+ ext2_panic ("Cannot allocate space for disk cache info");
+
+ /* Initialize disk_cache_info. */
+ for (int i = 0; i < disk_cache_blocks; i++)
+ {
+ disk_cache_info[i].block = DC_NO_BLOCK;
+ disk_cache_info[i].flags = 0;
+ disk_cache_info[i].ref_count = 0;
+#ifndef NDEBUG
+ disk_cache_info[i].last_read = DC_NO_BLOCK;
+ disk_cache_info[i].last_read_xor
+ = DC_NO_BLOCK ^ DISK_CACHE_LAST_READ_XOR;
+#endif
+ }
+ disk_cache_hint = 0;
+
+ /* Map the superblock and the block group descriptors. */
+ block_t fixed_first = boffs_block (SBLOCK_OFFS);
+ block_t fixed_last = fixed_first
+ + (round_block ((sizeof *group_desc_image) * groups_count)
+ >> log2_block_size);
+ ext2_debug ("%u-%u\n", fixed_first, fixed_last);
+ assert (fixed_last - fixed_first + 1 <= (block_t)disk_cache_blocks + 3);
+ for (block_t i = fixed_first; i <= fixed_last; i++)
+ {
+ disk_cache_block_ref (i);
+ assert (disk_cache_info[i-fixed_first].block == i);
+ disk_cache_info[i-fixed_first].flags |= DC_FIXED;
+ }
+}
+
+static void
+disk_cache_return_unused (void)
+{
+ int index;
+
+ /* XXX: Touch all pages. It seems that sometimes GNU Mach "forgets"
+ to notify us about evicted pages. Disk cache must be
+ unlocked. */
+ for (vm_offset_t i = 0; i < disk_cache_size; i += vm_page_size)
+ *(volatile char *)(disk_cache + i);
+
+ /* Release some references to cached blocks. */
+ pokel_sync (&global_pokel, 1);
+
+ /* Return unused pages that are in core. */
+ int pending_begin = -1, pending_end = -1;
+ pthread_mutex_lock (&disk_cache_lock);
+ for (index = 0; index < disk_cache_blocks; index++)
+ if (! (disk_cache_info[index].flags & (DC_DONT_REUSE & ~DC_INCORE))
+ && ! disk_cache_info[index].ref_count)
+ {
+ ext2_debug ("return %u -> %d",
+ disk_cache_info[index].block, index);
+ if (index != pending_end)
+ {
+ /* Return previous region, if there is such, ... */
+ if (pending_end >= 0)
+ {
+ pthread_mutex_unlock (&disk_cache_lock);
+ pager_return_some (diskfs_disk_pager,
+ pending_begin * vm_page_size,
+ (pending_end - pending_begin)
+ * vm_page_size, 1);
+ pthread_mutex_lock (&disk_cache_lock);
+ }
+ /* ... and start new region. */
+ pending_begin = index;
+ }
+ pending_end = index + 1;
+ }
+
+ pthread_mutex_unlock (&disk_cache_lock);
+
+ /* Return last region, if there is such. */
+ if (pending_end >= 0)
+ pager_return_some (diskfs_disk_pager,
+ pending_begin * vm_page_size,
+ (pending_end - pending_begin) * vm_page_size,
+ 1);
+ else
+ {
+ printf ("ext2fs: disk cache is starving\n");
+
+ /* Give it some time. This should happen rarely. */
+ sleep (1);
+ }
+}
+
+/* Map block and return pointer to it. */
+void *
+disk_cache_block_ref (block_t block)
+{
+ int index;
+ void *bptr;
+
+ assert (0 <= block && block < store->size >> log2_block_size);
+
+ ext2_debug ("(%u)", block);
+
+retry_ref:
+ pthread_mutex_lock (&disk_cache_lock);
+
+ bptr = hurd_ihash_find (disk_cache_bptr, block);
+ if (bptr)
+ /* Already mapped. */
+ {
+ index = bptr_index (bptr);
+
+ /* In process of re-associating? */
+ if (disk_cache_info[index].flags & DC_UNTOUCHED)
+ {
+ /* Wait re-association to finish. */
+ pthread_cond_wait (&disk_cache_reassociation, &disk_cache_lock);
+ pthread_mutex_unlock (&disk_cache_lock);
+
+#if 0
+ printf ("Re-association -- wait finished.\n");
+#endif
+
+ goto retry_ref;
+ }
+
+ /* Just increment reference and return. */
+ assert (disk_cache_info[index].ref_count + 1
+ > disk_cache_info[index].ref_count);
+ disk_cache_info[index].ref_count++;
+
+ ext2_debug ("cached %u -> %d (ref_count = %hu, flags = %#hx, ptr = %p)",
+ disk_cache_info[index].block, index,
+ disk_cache_info[index].ref_count,
+ disk_cache_info[index].flags, bptr);
+
+ pthread_mutex_unlock (&disk_cache_lock);
+
+ return bptr;
+ }
+
+ /* Search for a block that is not in core and is not referenced. */
+ index = disk_cache_hint;
+ while ((disk_cache_info[index].flags & DC_DONT_REUSE)
+ || (disk_cache_info[index].ref_count))
+ {
+ ext2_debug ("reject %u -> %d (ref_count = %hu, flags = %#hx)",
+ disk_cache_info[index].block, index,
+ disk_cache_info[index].ref_count,
+ disk_cache_info[index].flags);
+
+ /* Just move to next block. */
+ index++;
+ if (index >= disk_cache_blocks)
+ index -= disk_cache_blocks;
+
+ /* If we return to where we started, than there is no suitable
+ block. */
+ if (index == disk_cache_hint)
+ break;
+ }
+
+ /* The next place in the disk cache becomes the current hint. */
+ disk_cache_hint = index + 1;
+ if (disk_cache_hint >= disk_cache_blocks)
+ disk_cache_hint -= disk_cache_blocks;
+
+ /* Is suitable place found? */
+ if ((disk_cache_info[index].flags & DC_DONT_REUSE)
+ || disk_cache_info[index].ref_count)
+ /* No place is found. Try to release some blocks and try
+ again. */
+ {
+ ext2_debug ("flush %u -> %d", disk_cache_info[index].block, index);
+
+ pthread_mutex_unlock (&disk_cache_lock);
+
+ disk_cache_return_unused ();
+
+ goto retry_ref;
+ }
+
+ /* Suitable place is found. */
+
+ /* Calculate pointer to data. */
+ bptr = (char *)disk_cache + (index << log2_block_size);
+ ext2_debug ("map %u -> %d (%p)", block, index, bptr);
+
+ /* This pager_return_some is used only to set PM_FORCEREAD for the
+ page. DC_UNTOUCHED is set so that we catch if someone has
+ referenced the block while we didn't hold disk_cache_lock. */
+ disk_cache_info[index].flags |= DC_UNTOUCHED;
+
+#if 0 /* XXX: Let's see if this is needed at all. */
+
+ pthread_mutex_unlock (&disk_cache_lock);
+ pager_return_some (diskfs_disk_pager, bptr - disk_cache, vm_page_size, 1);
+ pthread_mutex_lock (&disk_cache_lock);
+
+ /* Has someone used our bptr? Has someone mapped requested block
+ while we have unlocked disk_cache_lock? If so, environment has
+ changed and we have to restart operation. */
+ if ((! (disk_cache_info[index].flags & DC_UNTOUCHED))
+ || hurd_ihash_find (disk_cache_bptr, block))
+ {
+ pthread_mutex_unlock (&disk_cache_lock);
+ goto retry_ref;
+ }
+
+#elif 0
+
+ /* XXX: Use libpager internals. */
+
+ pthread_mutex_lock (&diskfs_disk_pager->interlock);
+ int page = (bptr - disk_cache) / vm_page_size;
+ assert (page >= 0);
+ int is_incore = (page < diskfs_disk_pager->pagemapsize
+ && (diskfs_disk_pager->pagemap[page] & PM_INCORE));
+ pthread_mutex_unlock (&diskfs_disk_pager->interlock);
+ if (is_incore)
+ {
+ pthread_mutex_unlock (&disk_cache_lock);
+ printf ("INCORE\n");
+ goto retry_ref;
+ }
+
+#endif
+
+ /* Re-associate. */
+ if (disk_cache_info[index].block != DC_NO_BLOCK)
+ /* Remove old association. */
+ hurd_ihash_remove (disk_cache_bptr, disk_cache_info[index].block);
+ /* New association. */
+ if (hurd_ihash_add (disk_cache_bptr, block, bptr))
+ ext2_panic ("Couldn't hurd_ihash_add new disk block");
+ assert (! (disk_cache_info[index].flags & DC_DONT_REUSE & ~DC_UNTOUCHED));
+ disk_cache_info[index].block = block;
+ assert (! disk_cache_info[index].ref_count);
+ disk_cache_info[index].ref_count = 1;
+
+ /* All data structures are set up. */
+ pthread_mutex_unlock (&disk_cache_lock);
+
+ /* Try to read page. */
+ *(volatile char *) bptr;
+
+ /* Check if it's actually read. */
+ pthread_mutex_lock (&disk_cache_lock);
+ if (disk_cache_info[index].flags & DC_UNTOUCHED)
+ /* It's not read. */
+ {
+ /* Remove newly created association. */
+ hurd_ihash_remove (disk_cache_bptr, block);
+ disk_cache_info[index].block = DC_NO_BLOCK;
+ disk_cache_info[index].flags &=~ DC_UNTOUCHED;
+ disk_cache_info[index].ref_count = 0;
+ pthread_mutex_unlock (&disk_cache_lock);
+
+ /* Prepare next time association of this page to succeed. */
+ pager_flush_some (diskfs_disk_pager, bptr - disk_cache,
+ vm_page_size, 0);
+
+#if 0
+ printf ("Re-association failed.\n");
+#endif
+
+ goto retry_ref;
+ }
+
+ /* Re-association was successful. */
+ pthread_cond_broadcast (&disk_cache_reassociation);
+
+ pthread_mutex_unlock (&disk_cache_lock);
+
+ ext2_debug ("(%u) = %p", block, bptr);
+ return bptr;
+}
+
+void
+disk_cache_block_ref_ptr (void *ptr)
+{
+ int index;
+
+ pthread_mutex_lock (&disk_cache_lock);
+ index = bptr_index (ptr);
+ assert (disk_cache_info[index].ref_count >= 1);
+ assert (disk_cache_info[index].ref_count + 1
+ > disk_cache_info[index].ref_count);
+ disk_cache_info[index].ref_count++;
+ assert (! (disk_cache_info[index].flags & DC_UNTOUCHED));
+ ext2_debug ("(%p) (ref_count = %hu, flags = %#hx)",
+ ptr,
+ disk_cache_info[index].ref_count,
+ disk_cache_info[index].flags);
+ pthread_mutex_unlock (&disk_cache_lock);
+}
+
+void
+disk_cache_block_deref (void *ptr)
+{
+ int index;
+
+ assert (disk_cache <= ptr && ptr <= disk_cache + disk_cache_size);
+
+ pthread_mutex_lock (&disk_cache_lock);
+ index = bptr_index (ptr);
+ ext2_debug ("(%p) (ref_count = %hu, flags = %#hx)",
+ ptr,
+ disk_cache_info[index].ref_count - 1,
+ disk_cache_info[index].flags);
+ assert (! (disk_cache_info[index].flags & DC_UNTOUCHED));
+ assert (disk_cache_info[index].ref_count >= 1);
+ disk_cache_info[index].ref_count--;
+ pthread_mutex_unlock (&disk_cache_lock);
+}
+
+/* Not used. */
+int
+disk_cache_block_is_ref (block_t block)
+{
+ int ref;
+ void *ptr;
+
+ pthread_mutex_lock (&disk_cache_lock);
+ ptr = hurd_ihash_find (disk_cache_bptr, block);
+ if (ptr == NULL)
+ ref = 0;
+ else /* XXX: Should check for DC_UNTOUCHED too. */
+ ref = disk_cache_info[bptr_index (ptr)].ref_count;
+ pthread_mutex_unlock (&disk_cache_lock);
+
+ return ref;
+}
+
/* Create the DISK pager. */
void
create_disk_pager (void)
@@ -783,9 +1197,12 @@ create_disk_pager (void)
ext2_panic ("can't create disk pager: %s", strerror (errno));
upi->type = DISK;
pager_bucket = ports_create_bucket ();
- diskfs_start_disk_pager (upi, pager_bucket, MAY_CACHE, 0,
- store->size, &disk_image);
-
+ get_hypermetadata ();
+ disk_cache_blocks = DISK_CACHE_BLOCKS;
+ disk_cache_size = disk_cache_blocks << log2_block_size;
+ diskfs_start_disk_pager (upi, pager_bucket, MAY_CACHE, 1,
+ disk_cache_size, &disk_cache);
+ disk_cache_init ();
}
/* Call this to create a FILE_DATA pager and return a send right.
diff --git a/ext2fs/pokel.c b/ext2fs/pokel.c
index a8b16c97..3afb32e4 100644
--- a/ext2fs/pokel.c
+++ b/ext2fs/pokel.c
@@ -67,12 +67,27 @@ pokel_add (struct pokel *pokel, void *loc, vm_size_t length)
vm_offset_t p_offs = pl->offset;
vm_size_t p_end = p_offs + pl->length;
- if (p_offs == offset && p_end == end)
- break;
+ if (p_offs <= offset && end <= p_end)
+ {
+ if (pokel->image == disk_cache)
+ for (vm_offset_t i = offset; i < end; i += block_size)
+ disk_cache_block_deref (disk_cache + i);
+
+ break;
+ }
else if (p_end >= offset && end >= p_offs)
{
pl->offset = offset < p_offs ? offset : p_offs;
pl->length = (end > p_end ? end : p_end) - pl->offset;
+
+ if (pokel->image == disk_cache)
+ {
+ vm_offset_t i_begin = p_offs > offset ? p_offs : offset;
+ vm_offset_t i_end = p_end < end ? p_end : end;
+ for (vm_offset_t i = i_begin; i < i_end; i += block_size)
+ disk_cache_block_deref (disk_cache + i);
+ }
+
ext2_debug ("extended 0x%x[%ul] to 0x%x[%ul]",
p_offs, p_end - p_offs, pl->offset, pl->length);
break;
@@ -113,11 +128,21 @@ _pokel_exec (struct pokel *pokel, int sync, int wait)
pthread_spin_unlock (&pokel->lock);
for (pl = pokes; pl; last = pl, pl = pl->next)
- if (sync)
- {
- ext2_debug ("syncing 0x%x[%ul]", pl->offset, pl->length);
- pager_sync_some (pokel->pager, pl->offset, pl->length, wait);
- }
+ {
+ if (sync)
+ {
+ ext2_debug ("syncing 0x%lx[%ul]", pl->offset, pl->length);
+ pager_sync_some (pokel->pager, pl->offset, pl->length, wait);
+ }
+
+ if (pokel->image == disk_cache)
+ {
+ vm_offset_t begin = trunc_block (pl->offset);
+ vm_offset_t end = round_block (pl->offset + pl->length);
+ for (vm_offset_t i = begin; i != end; i += block_size)
+ disk_cache_block_deref (pokel->image + i);
+ }
+ }
if (last)
{
diff --git a/ext2fs/truncate.c b/ext2fs/truncate.c
index 37e360bb..63d22955 100644
--- a/ext2fs/truncate.c
+++ b/ext2fs/truncate.c
@@ -124,7 +124,7 @@ trunc_indirect (struct node *node, block_t end,
{
unsigned index;
int modified = 0, all_freed = 1;
- block_t *ind_bh = (block_t *)bptr (*p);
+ block_t *ind_bh = (block_t *) disk_cache_block_ref (*p);
unsigned first = end < offset ? 0 : end - offset;
for (index = first; index < addr_per_block; index++)
@@ -139,11 +139,16 @@ trunc_indirect (struct node *node, block_t end,
if (first == 0 && all_freed)
{
- pager_flush_some (diskfs_disk_pager, boffs (*p), block_size, 1);
+ pager_flush_some (diskfs_disk_pager,
+ bptr_index (ind_bh) << log2_block_size,
+ block_size, 1);
free_block_run_free_ptr (fbr, p);
+ disk_cache_block_deref (ind_bh);
}
else if (modified)
record_indir_poke (node, ind_bh);
+ else
+ disk_cache_block_deref (ind_bh);
}
}