From ea4802bc0975218544cb447df37b704f60ef2fde Mon Sep 17 00:00:00 2001 From: Richard Braun Date: Fri, 3 May 2013 19:56:51 +0200 Subject: Large store support for ext2fs This is a revised version of the large store patch for ext2fs, written by Ognyan Kulev. It provides support for stores larger than 2 GiB. * ext2fs/balloc.c: Use the new disk_cache_block_ref and disk_cache_block_deref functions to access blocks from the disk cache. * ext2fs/ext2fs.c (main): Update initialization call to pokel_init, and call map_hypermetadata instead of get_hypermetadata. * ext2fs/ext2fs.h: Include and . (DISK_CACHE_BLOCKS): New macro. (DC_INCORE): Likewise. (DC_UNTOUCHED): Likewise. (DC_FIXED): Likewise. (DC_DONT_REUSE): Likewise. (DC_NO_BLOCK): Likewise. (DISK_CACHE_LAST_READ_XOR) [!NDEBUG]: Likewise. (struct disk_cache_info): New structure. (disk_cache): New external variable. (disk_cache_size): Likewise. (disk_cache_blocks): Likewise. (disk_cache_bptr): Likewise. (disk_cache_info): Likewise. (disk_cache_lock): Likewise. (disk_cache_reassociation): Likewise. (disk_cache_block_ref): New declaration. (disk_cache_block_ref_ptr): Likewise. (disk_cache_block_deref): Likewise. (disk_cache_block_is_ref): Likewise. (map_hypermetadata): Likewise. (trunc_block): Cast to off_t. (round_block): Likewise. (boffs): Likewise. (bptr_index): New macro. (boffs_ptr): Rewrite as an inline function to make it look up a block from the disk cache. (bptr_offs): Likewise. (dino): Remove function, replaced with ... (dino_ref): ... this one, which adds a reference to the inode block. (dino_deref): New inline function. (record_global_poke): Make sure block is referenced. (record_indir_poke): Likewise. (sync_global_ptr): Remove block reference, and adjust call to pager_sync_some. (sync_global): Add debug call to print wait parameter. * ext2fs/getblk.c: Use the new disk_cache_block_ref and disk_cache_block_deref functions to access blocks from the disk cache. * ext2fs/hyper.c (get_hypermetadata): Read the superblock from the store now that it's not directly mapped in memory. Move the initialization of zeroblock here from ... (map_hypermetadata): ... here. Also, set the superblock pointer. (diskfs_set_hypermetadata): Add a reference to the superblock. (diskfs_readonly_changed): Update call to mprotect. * ext2fs/ialloc.c: Use the new disk_cache_block_ref, disk_cache_block_ref_ptr and disk_cache_block_deref functions to access blocks from the disk cache. * ext2fs/inode.c: Update calls that used the disk image to use the disk cache, and use the new reference handling functions where appropriate. * ext2fs/pager.c: Include and "../libpager/priv.h". (disk_image): Remove global variable. (disk_pager_read_page): Update cache information. (disk_pager_write_page): Likewise. (disk_pager_notify_evict): New function. (pager_notify_evict): Call disk_pager_notify_evict appropriately. (disk_cache): New global variable. (disk_cache_size): Likewise. (disk_cache_blocks): Likewise. (disk_cache_bptr): Likewise. (disk_cache_info): Likewise. (disk_cache_hint): Likewise. (disk_cache_lock): Likewise. (disk_cache_reassociation): Likewise. (disk_cache_init): New function. (disk_cache_return_unused): Likewise. (disk_cache_block_ref): Likewise. (disk_cache_block_ref_ptr): Likewise. (disk_cache_block_deref): Likewise. (disk_cache_block_is_ref): Likewise. (create_disk_pager): Update initialization of the disk pager. * ext2fs/pokel.c (pokel_add): Drop block references with disk_cache_block_deref. (_pokel_exec): Likewise. * ext2fs/truncate.c (trunc_indirect): Use the new disk_cache_block_ref and disk_cache_block_deref functions to access blocks from the disk cache. --- ext2fs/balloc.c | 31 +++- ext2fs/ext2fs.c | 6 +- ext2fs/ext2fs.h | 145 ++++++++++++++--- ext2fs/getblk.c | 21 ++- ext2fs/hyper.c | 35 +++-- ext2fs/ialloc.c | 37 ++++- ext2fs/inode.c | 40 +++-- ext2fs/pager.c | 461 +++++++++++++++++++++++++++++++++++++++++++++++++++--- ext2fs/pokel.c | 39 ++++- ext2fs/truncate.c | 9 +- 10 files changed, 733 insertions(+), 91 deletions(-) (limited to 'ext2fs') diff --git a/ext2fs/balloc.c b/ext2fs/balloc.c index b2d2eab9..efef8ae4 100644 --- a/ext2fs/balloc.c +++ b/ext2fs/balloc.c @@ -92,7 +92,7 @@ ext2_free_blocks (block_t block, unsigned long count) block, count); } gdp = group_desc (block_group); - bh = bptr (gdp->bg_block_bitmap); + bh = disk_cache_block_ref (gdp->bg_block_bitmap); if (in_range (gdp->bg_block_bitmap, block, gcount) || in_range (gdp->bg_inode_bitmap, block, gcount) || @@ -114,6 +114,7 @@ ext2_free_blocks (block_t block, unsigned long count) } record_global_poke (bh); + disk_cache_block_ref_ptr (gdp); record_global_poke (gdp); block += gcount; @@ -139,7 +140,7 @@ ext2_new_block (block_t goal, block_t prealloc_goal, block_t *prealloc_count, block_t *prealloc_block) { - char *bh; + char *bh = NULL; char *p, *r; int i, j, k, tmp; unsigned long lmap; @@ -165,6 +166,7 @@ ext2_new_block (block_t goal, ext2_debug ("goal=%u", goal); repeat: + assert (bh == NULL); /* * First, test whether the goal block is free. */ @@ -179,7 +181,7 @@ repeat: if (j) goal_attempts++; #endif - bh = bptr (gdp->bg_block_bitmap); + bh = disk_cache_block_ref (gdp->bg_block_bitmap); ext2_debug ("goal is at %d:%d", i, j); @@ -245,6 +247,9 @@ repeat: j = k; goto got_block; } + + disk_cache_block_deref (bh); + bh = NULL; } ext2_debug ("bit not found in block group %d", i); @@ -267,7 +272,8 @@ repeat: pthread_spin_unlock (&global_lock); return 0; } - bh = bptr (gdp->bg_block_bitmap); + assert (bh == NULL); + bh = disk_cache_block_ref (gdp->bg_block_bitmap); r = memscan (bh, 0, sblock->s_blocks_per_group >> 3); j = (r - bh) << 3; if (j < sblock->s_blocks_per_group) @@ -277,12 +283,15 @@ repeat: sblock->s_blocks_per_group); if (j >= sblock->s_blocks_per_group) { + disk_cache_block_deref (bh); + bh = NULL; ext2_error ("free blocks count corrupted for block group %d", i); pthread_spin_unlock (&global_lock); return 0; } search_back: + assert (bh != NULL); /* * We have succeeded in finding a free byte in the block * bitmap. Now search backwards up to 7 bits to find the @@ -291,6 +300,7 @@ search_back: for (k = 0; k < 7 && j > 0 && !test_bit (j - 1, bh); k++, j--); got_block: + assert (bh != NULL); ext2_debug ("using block group %d (%d)", i, gdp->bg_free_blocks_count); @@ -304,6 +314,8 @@ got_block: if (set_bit (j, bh)) { ext2_warning ("bit already set for block %d", j); + disk_cache_block_deref (bh); + bh = NULL; goto repeat; } @@ -351,6 +363,7 @@ got_block: j = tmp; record_global_poke (bh); + bh = NULL; if (j >= sblock->s_blocks_count) { @@ -363,12 +376,14 @@ got_block: j, goal_hits, goal_attempts); gdp->bg_free_blocks_count--; + disk_cache_block_ref_ptr (gdp); record_global_poke (gdp); sblock->s_free_blocks_count--; sblock_dirty = 1; sync_out: + assert (bh == NULL); pthread_spin_unlock (&global_lock); alloc_sync (0); @@ -390,9 +405,12 @@ ext2_count_free_blocks () gdp = NULL; for (i = 0; i < groups_count; i++) { + void *bh; gdp = group_desc (i); desc_count += gdp->bg_free_blocks_count; - x = count_free (bptr (gdp->bg_block_bitmap), block_size); + bh = disk_cache_block_ref (gdp->bg_block_bitmap); + x = count_free (bh, block_size); + disk_cache_block_deref (bh); printf ("group %d: stored = %d, counted = %lu", i, gdp->bg_free_blocks_count, x); bitmap_count += x; @@ -453,7 +471,7 @@ ext2_check_blocks_bitmap () gdp = group_desc (i); desc_count += gdp->bg_free_blocks_count; - bh = bptr (gdp->bg_block_bitmap); + bh = disk_cache_block_ref (gdp->bg_block_bitmap); if (!EXT2_HAS_RO_COMPAT_FEATURE (sblock, EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER) @@ -479,6 +497,7 @@ ext2_check_blocks_bitmap () ext2_error ("block #%d of the inode table in group %d is marked free", j, i); x = count_free (bh, block_size); + disk_cache_block_deref (bh); if (gdp->bg_free_blocks_count != x) ext2_error ("wrong free blocks count for group %d," " stored = %d, counted = %lu", diff --git a/ext2fs/ext2fs.c b/ext2fs/ext2fs.c index 993f1997..128b6edd 100644 --- a/ext2fs/ext2fs.c +++ b/ext2fs/ext2fs.c @@ -181,9 +181,9 @@ main (int argc, char **argv) /* Map the entire disk. */ create_disk_pager (); - pokel_init (&global_pokel, diskfs_disk_pager, disk_image); + pokel_init (&global_pokel, diskfs_disk_pager, disk_cache); - get_hypermetadata(); + map_hypermetadata (); inode_init (); @@ -211,6 +211,8 @@ diskfs_reload_global_state () { pokel_flush (&global_pokel); pager_flush (diskfs_disk_pager, 1); + sblock = NULL; get_hypermetadata (); + map_hypermetadata (); return 0; } diff --git a/ext2fs/ext2fs.h b/ext2fs/ext2fs.h index 52bf2b19..e01d1a59 100644 --- a/ext2fs/ext2fs.h +++ b/ext2fs/ext2fs.h @@ -23,7 +23,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -195,6 +197,8 @@ struct user_pager_info /* ---------------------------------------------------------------- */ /* pager.c */ +#define DISK_CACHE_BLOCKS 65536 + #include /* Set up the disk pager. */ @@ -218,10 +222,54 @@ extern struct store *store; /* What the user specified. */ extern struct store_parsed *store_parsed; -/* Mapped image of the disk. */ -extern void *disk_image; +/* Mapped image of cached blocks of the disk. */ +extern void *disk_cache; +extern store_offset_t disk_cache_size; +extern int disk_cache_blocks; + +#define DC_INCORE 0x01 /* Not in core. */ +#define DC_UNTOUCHED 0x02 /* Not touched by disk_pager_read_paged + or disk_cache_block_ref. */ +#define DC_FIXED 0x04 /* Must not be re-associated. */ + +/* Flags that forbid re-association of page. DC_UNTOUCHED is included + because this flag is used only when page is already to be + re-associated, so it's not good candidate for another + remapping. */ +#define DC_DONT_REUSE (DC_INCORE | DC_UNTOUCHED | DC_FIXED) + +#define DC_NO_BLOCK ((block_t) -1L) + +#ifndef NDEBUG +#define DISK_CACHE_LAST_READ_XOR 0xDEADBEEF +#endif + +/* Disk cache blocks' meta info. */ +struct disk_cache_info +{ + block_t block; + uint16_t flags; + uint16_t ref_count; +#ifndef NDEBUG + block_t last_read, last_read_xor; +#endif +}; -/* Our in-core copy of the super-block (pointer into the disk_image). */ +/* block num --> pointer to in-memory block */ +extern hurd_ihash_t disk_cache_bptr; +/* Metadata about cached block. */ +extern struct disk_cache_info *disk_cache_info; +/* Lock for these mappings */ +extern pthread_mutex_t disk_cache_lock; +/* Fired when a re-association is done. */ +extern pthread_cond_t disk_cache_reassociation; + +void *disk_cache_block_ref (block_t block); +void disk_cache_block_ref_ptr (void *ptr); +void disk_cache_block_deref (void *ptr); +int disk_cache_block_is_ref (block_t block); + +/* Our in-core copy of the super-block (pointer into the disk_cache). */ struct ext2_super_block *sblock; /* True if sblock has been modified. */ int sblock_dirty; @@ -251,6 +299,9 @@ vm_address_t zeroblock; /* Get the superblock from the disk, & setup various global info from it. */ void get_hypermetadata (); + +/* Map `sblock' and `group_desc_image' pointers to disk cache. */ +void map_hypermetadata (); /* ---------------------------------------------------------------- */ /* Random stuff calculated from the super block. */ @@ -274,21 +325,51 @@ pthread_spinlock_t generation_lock; unsigned long next_generation; /* ---------------------------------------------------------------- */ -/* Functions for looking inside disk_image */ +/* Functions for looking inside disk_cache */ -#define trunc_block(offs) (((offs) >> log2_block_size) << log2_block_size) +#define trunc_block(offs) \ + ((off_t) ((offs) >> log2_block_size) << log2_block_size) #define round_block(offs) \ - ((((offs) + block_size - 1) >> log2_block_size) << log2_block_size) + ((off_t) (((offs) + block_size - 1) >> log2_block_size) << log2_block_size) /* block num --> byte offset on disk */ -#define boffs(block) ((block) << log2_block_size) +#define boffs(block) ((off_t) (block) << log2_block_size) /* byte offset on disk --> block num */ #define boffs_block(offs) ((offs) >> log2_block_size) +/* pointer to in-memory block -> index in disk_cache_info */ +#define bptr_index(ptr) (((char *)ptr - (char *)disk_cache) >> log2_block_size) + /* byte offset on disk --> pointer to in-memory block */ -#define boffs_ptr(offs) (((char *)disk_image) + (offs)) +EXT2FS_EI char * +boffs_ptr (off_t offset) +{ + block_t block = boffs_block (offset); + pthread_mutex_lock (&disk_cache_lock); + char *ptr = hurd_ihash_find (disk_cache_bptr, block); + pthread_mutex_unlock (&disk_cache_lock); + assert (ptr); + ptr += offset % block_size; + ext2_debug ("(%lld) = %p", offset, ptr); + return ptr; +} + /* pointer to in-memory block --> byte offset on disk */ -#define bptr_offs(ptr) ((char *)(ptr) - ((char *)disk_image)) +EXT2FS_EI off_t +bptr_offs (void *ptr) +{ + vm_offset_t mem_offset = (char *)ptr - (char *)disk_cache; + off_t offset; + assert (mem_offset < disk_cache_size); + pthread_mutex_lock (&disk_cache_lock); + offset = (off_t) disk_cache_info[boffs_block (mem_offset)].block + << log2_block_size; + assert (offset || mem_offset < block_size); + offset += mem_offset % block_size; + pthread_mutex_unlock (&disk_cache_lock); + ext2_debug ("(%p) = %lld", ptr, offset); + return offset; +} /* block num --> pointer to in-memory block */ #define bptr(block) boffs_ptr(boffs(block)) @@ -308,14 +389,24 @@ extern struct ext2_inode *dino (ino_t inum); #if defined(__USE_EXTERN_INLINES) || defined(EXT2FS_DEFINE_EI) /* Convert an inode number to the dinode on disk. */ EXT2FS_EI struct ext2_inode * -dino (ino_t inum) +dino_ref (ino_t inum) { unsigned long inodes_per_group = sblock->s_inodes_per_group; unsigned long bg_num = (inum - 1) / inodes_per_group; unsigned long group_inum = (inum - 1) % inodes_per_group; - struct ext2_group_desc *bg = group_desc(bg_num); + struct ext2_group_desc *bg = group_desc (bg_num); block_t block = bg->bg_inode_table + (group_inum / inodes_per_block); - return ((struct ext2_inode *)bptr(block)) + group_inum % inodes_per_block; + struct ext2_inode *inode = disk_cache_block_ref (block); + inode += group_inum % inodes_per_block; + ext2_debug ("(%llu) = %p", inum, inode); + return inode; +} + +EXT2FS_EI void +dino_deref (struct ext2_inode *inode) +{ + ext2_debug ("(%p)", inode); + disk_cache_block_deref (inode); } #endif /* Use extern inlines. */ @@ -377,27 +468,38 @@ global_block_modified (block_t block) EXT2FS_EI void record_global_poke (void *ptr) { - int boffs = trunc_block (bptr_offs (ptr)); - global_block_modified (boffs_block (boffs)); - pokel_add (&global_pokel, boffs_ptr(boffs), block_size); + block_t block = boffs_block (bptr_offs (ptr)); + void *block_ptr = bptr (block); + ext2_debug ("(%p = %p)", ptr, block_ptr); + assert (disk_cache_block_is_ref (block)); + global_block_modified (block); + pokel_add (&global_pokel, block_ptr, block_size); } /* This syncs a modification to a non-file block. */ EXT2FS_EI void sync_global_ptr (void *bptr, int wait) { - vm_offset_t boffs = trunc_block (bptr_offs (bptr)); - global_block_modified (boffs_block (boffs)); - pager_sync_some (diskfs_disk_pager, trunc_page (boffs), vm_page_size, wait); + block_t block = boffs_block (bptr_offs (bptr)); + void *block_ptr = bptr (block); + ext2_debug ("(%p -> %u)", bptr, block); + global_block_modified (block); + disk_cache_block_deref (block_ptr); + pager_sync_some (diskfs_disk_pager, + block_ptr - disk_cache, block_size, wait); + } /* This records a modification to one of a file's indirect blocks. */ EXT2FS_EI void record_indir_poke (struct node *node, void *ptr) { - int boffs = trunc_block (bptr_offs (ptr)); - global_block_modified (boffs_block (boffs)); - pokel_add (&node->dn->indir_pokel, boffs_ptr(boffs), block_size); + block_t block = boffs_block (bptr_offs (ptr)); + void *block_ptr = bptr (block); + ext2_debug ("(%llu, %p)", node->cache_id, ptr); + assert (disk_cache_block_is_ref (block)); + global_block_modified (block); + pokel_add (&node->dn->indir_pokel, block_ptr, block_size); } /* ---------------------------------------------------------------- */ @@ -405,6 +507,7 @@ record_indir_poke (struct node *node, void *ptr) EXT2FS_EI void sync_global (int wait) { + ext2_debug ("%d", wait); pokel_sync (&global_pokel, wait); } diff --git a/ext2fs/getblk.c b/ext2fs/getblk.c index 23ba6459..bde66e1c 100644 --- a/ext2fs/getblk.c +++ b/ext2fs/getblk.c @@ -104,7 +104,7 @@ ext2_alloc_block (struct node *node, block_t goal, int zero) if (result && zero) { - char *bh = bptr (result); + char *bh = disk_cache_block_ref (result); bzero (bh, block_size); record_indir_poke (node, bh); } @@ -122,6 +122,8 @@ inode_getblk (struct node *node, int nr, int create, int zero, block_t hint; #endif + assert (0 <= nr && nr < EXT2_N_BLOCKS); + *result = node->dn->info.i_data[nr]; if (*result) return 0; @@ -180,14 +182,20 @@ block_getblk (struct node *node, block_t block, int nr, int create, int zero, { int i; block_t goal = 0; - block_t *bh = (block_t *)bptr (block); + block_t *bh = (block_t *)disk_cache_block_ref (block); *result = bh[nr]; if (*result) - return 0; + { + disk_cache_block_deref (bh); + return 0; + } if (!create) - return EINVAL; + { + disk_cache_block_deref (bh); + return EINVAL; + } if (node->dn->info.i_next_alloc_block == new_block) goal = node->dn->info.i_next_alloc_goal; @@ -207,7 +215,10 @@ block_getblk (struct node *node, block_t block, int nr, int create, int zero, *result = ext2_alloc_block (node, goal, zero); if (!*result) - return ENOSPC; + { + disk_cache_block_deref (bh); + return ENOSPC; + } bh[nr] = *result; diff --git a/ext2fs/hyper.c b/ext2fs/hyper.c index bee4175f..5bcc2abe 100644 --- a/ext2fs/hyper.c +++ b/ext2fs/hyper.c @@ -58,11 +58,14 @@ static int ext2fs_clean; /* fs clean before we started writing? */ void get_hypermetadata (void) { - error_t err = diskfs_catch_exception (); - if (err) - ext2_panic ("can't read superblock: %s", strerror (err)); + error_t err; + size_t read = 0; - sblock = (struct ext2_super_block *) boffs_ptr (SBLOCK_OFFS); + assert (! sblock); + err = store_read (store, SBLOCK_OFFS >> store->log2_block_size, + SBLOCK_SIZE, (void **)&sblock, &read); + if (err || read != SBLOCK_SIZE) + ext2_panic ("Cannot read hypermetadata"); if (sblock->s_magic != EXT2_SUPER_MAGIC #ifdef EXT2FS_PRE_02B_COMPAT @@ -152,15 +155,25 @@ get_hypermetadata (void) allocate_mod_map (); - diskfs_end_catch_exception (); + /* A handy source of page-aligned zeros. */ + if (zeroblock == 0) + { + zeroblock = (vm_address_t) mmap (0, block_size, PROT_READ, MAP_ANON, 0, 0); + assert (zeroblock != (vm_address_t) MAP_FAILED); + } + + munmap (sblock, SBLOCK_SIZE); + sblock = NULL; +} + +void +map_hypermetadata (void) +{ + sblock = (struct ext2_super_block *) boffs_ptr (SBLOCK_OFFS); /* Cache a convenient pointer to the block group descriptors for allocation. These are stored in the filesystem blocks following the superblock. */ group_desc_image = (struct ext2_group_desc *) bptr (bptr_block (sblock) + 1); - - /* A handy source of page-aligned zeros. */ - if (zeroblock == 0) - zeroblock = (vm_address_t) mmap (0, block_size, PROT_READ, MAP_ANON, 0, 0); } error_t @@ -183,6 +196,7 @@ diskfs_set_hypermetadata (int wait, int clean) if (sblock_dirty) { sblock_dirty = 0; + disk_cache_block_ref_ptr (sblock); record_global_poke (sblock); } @@ -199,7 +213,8 @@ diskfs_readonly_changed (int readonly) (*(readonly ? store_set_flags : store_clear_flags)) (store, STORE_READONLY); - mprotect (disk_image, store->size, PROT_READ | (readonly ? 0 : PROT_WRITE)); + mprotect (disk_cache, disk_cache_size, + PROT_READ | (readonly ? 0 : PROT_WRITE)); if (!readonly && !(sblock->s_state & EXT2_VALID_FS)) ext2_warning ("UNCLEANED FILESYSTEM NOW WRITABLE"); diff --git a/ext2fs/ialloc.c b/ext2fs/ialloc.c index aa018d94..2d8e51e0 100644 --- a/ext2fs/ialloc.c +++ b/ext2fs/ialloc.c @@ -75,22 +75,25 @@ diskfs_free_node (struct node *np, mode_t old_mode) bit = (inum - 1) % sblock->s_inodes_per_group; gdp = group_desc (block_group); - bh = bptr (gdp->bg_inode_bitmap); + bh = disk_cache_block_ref (gdp->bg_inode_bitmap); if (!clear_bit (bit, bh)) ext2_warning ("bit already cleared for inode %Ld", inum); else { + disk_cache_block_ref_ptr (bh); record_global_poke (bh); gdp->bg_free_inodes_count++; if (S_ISDIR (old_mode)) gdp->bg_used_dirs_count--; + disk_cache_block_ref_ptr (gdp); record_global_poke (gdp); sblock->s_free_inodes_count++; } + disk_cache_block_deref (bh); sblock_dirty = 1; pthread_spin_unlock (&global_lock); alloc_sync(0); @@ -111,7 +114,7 @@ diskfs_free_node (struct node *np, mode_t old_mode) ino_t ext2_alloc_inode (ino_t dir_inum, mode_t mode) { - char *bh; + char *bh = NULL; int i, j, inum, avefreei; struct ext2_group_desc *gdp; struct ext2_group_desc *tmp; @@ -119,6 +122,7 @@ ext2_alloc_inode (ino_t dir_inum, mode_t mode) pthread_spin_lock (&global_lock); repeat: + assert (bh == NULL); gdp = NULL; i = 0; @@ -213,7 +217,7 @@ repeat: return 0; } - bh = bptr (gdp->bg_inode_bitmap); + bh = disk_cache_block_ref (gdp->bg_inode_bitmap); if ((inum = find_first_zero_bit ((unsigned long *) bh, sblock->s_inodes_per_group)) < sblock->s_inodes_per_group) @@ -221,12 +225,17 @@ repeat: if (set_bit (inum, bh)) { ext2_warning ("bit already set for inode %d", inum); + disk_cache_block_deref (bh); + bh = NULL; goto repeat; } record_global_poke (bh); + bh = NULL; } else { + disk_cache_block_deref (bh); + bh = NULL; if (gdp->bg_free_inodes_count != 0) { ext2_error ("free inodes count corrupted in group %d", i); @@ -248,15 +257,25 @@ repeat: gdp->bg_free_inodes_count--; if (S_ISDIR (mode)) gdp->bg_used_dirs_count++; + disk_cache_block_ref_ptr (gdp); record_global_poke (gdp); sblock->s_free_inodes_count--; sblock_dirty = 1; sync_out: + assert (bh == NULL); pthread_spin_unlock (&global_lock); alloc_sync (0); + /* Make sure the coming read_node won't complain about bad + fields. */ + { + struct ext2_inode *di = dino_ref (inum); + memset (di, 0, sizeof *di); + dino_deref (di); + } + return inum; } @@ -353,10 +372,12 @@ ext2_count_free_inodes () gdp = NULL; for (i = 0; i < groups_count; i++) { + void *bh; gdp = group_desc (i); desc_count += gdp->bg_free_inodes_count; - x = count_free (bptr (gdp->bg_inode_bitmap), - sblock->s_inodes_per_group / 8); + bh = disk_cache_block_ref (gdp->bg_inode_bitmap); + x = count_free (bh, sblock->s_inodes_per_group / 8); + disk_cache_block_deref (bh); ext2_debug ("group %d: stored = %d, counted = %lu", i, gdp->bg_free_inodes_count, x); bitmap_count += x; @@ -386,10 +407,12 @@ ext2_check_inodes_bitmap () gdp = NULL; for (i = 0; i < groups_count; i++) { + void *bh; gdp = group_desc (i); desc_count += gdp->bg_free_inodes_count; - x = count_free (bptr (gdp->bg_inode_bitmap), - sblock->s_inodes_per_group / 8); + bh = disk_cache_block_ref (gdp->bg_inode_bitmap); + x = count_free (bh, sblock->s_inodes_per_group / 8); + disk_cache_block_deref (bh); if (gdp->bg_free_inodes_count != x) ext2_error ("wrong free inodes count in group %d, " "stored = %d, counted = %lu", diff --git a/ext2fs/inode.c b/ext2fs/inode.c index 2c442795..e75c63f9 100644 --- a/ext2fs/inode.c +++ b/ext2fs/inode.c @@ -92,7 +92,7 @@ diskfs_cached_lookup (ino_t inum, struct node **npp) dn->dir_idx = 0; dn->pager = 0; pthread_rwlock_init (&dn->alloc_lock, NULL); - pokel_init (&dn->indir_pokel, diskfs_disk_pager, disk_image); + pokel_init (&dn->indir_pokel, diskfs_disk_pager, disk_cache); /* Create the new node. */ np = diskfs_make_node (dn); @@ -201,13 +201,17 @@ read_node (struct node *np) error_t err; struct stat *st = &np->dn_stat; struct disknode *dn = np->dn; - struct ext2_inode *di = dino (np->cache_id); + struct ext2_inode *di; struct ext2_inode_info *info = &dn->info; + ext2_debug ("(%llu)", np->cache_id); + err = diskfs_catch_exception (); if (err) return err; + di = dino_ref (np->cache_id); + st->st_fstype = FSTYPE_EXT2FS; st->st_fsid = getpid (); /* This call is very cheap. */ st->st_ino = np->cache_id; @@ -285,7 +289,9 @@ read_node (struct node *np) info->i_high_size = di->i_size_high; if (info->i_high_size) /* XXX */ { + dino_deref (di); ext2_warning ("cannot handle large file inode %Ld", np->cache_id); + diskfs_end_catch_exception (); return EFBIG; } } @@ -307,6 +313,7 @@ read_node (struct node *np) } dn->info_i_translator = di->i_translator; + dino_deref (di); diskfs_end_catch_exception (); if (S_ISREG (st->st_mode) || S_ISDIR (st->st_mode) @@ -408,7 +415,9 @@ write_node (struct node *np) { error_t err; struct stat *st = &np->dn_stat; - struct ext2_inode *di = dino (np->cache_id); + struct ext2_inode *di; + + ext2_debug ("(%llu)", np->cache_id); if (np->dn->info.i_prealloc_count) ext2_discard_prealloc (np); @@ -425,6 +434,8 @@ write_node (struct node *np) if (err) return NULL; + di = dino_ref (np->cache_id); + di->i_generation = st->st_gen; /* We happen to know that the stat mode bits are the same @@ -505,6 +516,7 @@ write_node (struct node *np) diskfs_end_catch_exception (); np->dn_stat_dirty = 0; + /* Leave invoking dino_deref (di) to the caller. */ return di; } else @@ -674,7 +686,7 @@ diskfs_set_translator (struct node *np, const char *name, unsigned namelen, if (err) return err; - di = dino (np->cache_id); + di = dino_ref (np->cache_id); blkno = di->i_translator; if (namelen && !blkno) @@ -687,6 +699,7 @@ diskfs_set_translator (struct node *np, const char *name, unsigned namelen, 0, 0, 0); if (blkno == 0) { + dino_deref (di); diskfs_end_catch_exception (); return ENOSPC; } @@ -710,15 +723,20 @@ diskfs_set_translator (struct node *np, const char *name, unsigned namelen, np->dn_stat.st_mode &= ~S_IPTRANS; np->dn_set_ctime = 1; } + else + dino_deref (di); if (namelen) { + void *blkptr; + buf[0] = namelen & 0xFF; buf[1] = (namelen >> 8) & 0xFF; bcopy (name, buf + 2, namelen); - bcopy (buf, bptr (blkno), block_size); - record_global_poke (bptr (blkno)); + blkptr = disk_cache_block_ref (blkno); + memcpy (blkptr, buf, block_size); + record_global_poke (blkptr); np->dn_stat.st_mode |= S_IPTRANS; np->dn_set_ctime = 1; @@ -736,7 +754,8 @@ diskfs_get_translator (struct node *np, char **namep, unsigned *namelen) error_t err = 0; daddr_t blkno; unsigned datalen; - const void *transloc; + void *transloc; + struct ext2_inode *di; assert (sblock->s_creator_os == EXT2_OS_HURD); @@ -744,9 +763,11 @@ diskfs_get_translator (struct node *np, char **namep, unsigned *namelen) if (err) return err; - blkno = (dino (np->cache_id))->i_translator; + di = dino_ref (np->cache_id); + blkno = di->i_translator; + dino_deref (di); assert (blkno); - transloc = bptr (blkno); + transloc = disk_cache_block_ref (blkno); datalen = ((unsigned char *)transloc)[0] + (((unsigned char *)transloc)[1] << 8); @@ -761,6 +782,7 @@ diskfs_get_translator (struct node *np, char **namep, unsigned *namelen) memcpy (*namep, transloc + 2, datalen); } + disk_cache_block_deref (transloc); diskfs_end_catch_exception (); *namelen = datalen; diff --git a/ext2fs/pager.c b/ext2fs/pager.c index 92137112..6e99c837 100644 --- a/ext2fs/pager.c +++ b/ext2fs/pager.c @@ -18,17 +18,18 @@ along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include #include #include #include #include "ext2fs.h" +/* XXX */ +#include "../libpager/priv.h" + /* A ports bucket to hold pager ports. */ struct port_bucket *pager_bucket; -/* Mapped image of the disk. */ -void *disk_image; - pthread_spinlock_t node_to_page_lock = PTHREAD_SPINLOCK_INITIALIZER; @@ -165,6 +166,9 @@ file_pager_read_page (struct node *node, vm_offset_t page, block_t pending_blocks = 0; int num_pending_blocks = 0; + ext2_debug ("reading inode %llu page %lu[%u]", + node->cache_id, page, vm_page_size); + /* Read the NUM_PENDING_BLOCKS blocks in PENDING_BLOCKS, into the buffer pointed to by BUF (allocating it if necessary) at offset OFFS. OFFS in adjusted by the amount read, and NUM_PENDING_BLOCKS is zeroed. Any read @@ -173,7 +177,8 @@ file_pager_read_page (struct node *node, vm_offset_t page, { if (num_pending_blocks > 0) { - block_t dev_block = pending_blocks << log2_dev_blocks_per_fs_block; + store_offset_t dev_block = (store_offset_t) pending_blocks + << log2_dev_blocks_per_fs_block; size_t amount = num_pending_blocks << log2_block_size; /* The buffer we try to read into; on the first read, we pass in a size of zero, so that the read is guaranteed to allocate a new @@ -297,7 +302,8 @@ pending_blocks_write (struct pending_blocks *pb) if (pb->num > 0) { error_t err; - block_t dev_block = pb->block << log2_dev_blocks_per_fs_block; + store_offset_t dev_block = (store_offset_t) pb->block + << log2_dev_blocks_per_fs_block; size_t length = pb->num << log2_block_size, amount; ext2_debug ("writing block %u[%ld]", pb->block, pb->num); @@ -359,7 +365,7 @@ pending_blocks_add (struct pending_blocks *pb, block_t block) return 0; } -/* Write one page for the pager backing NODE, at offset PAGE, into BUF. This +/* Write one page for the pager backing NODE, at OFFSET, into BUF. This may need to write several filesystem blocks to satisfy one page, and tries to consolidate the i/o if possible. */ static error_t @@ -411,12 +417,28 @@ disk_pager_read_page (vm_offset_t page, void **buf, int *writelock) { error_t err; size_t length = vm_page_size, read = 0; - vm_size_t dev_end = store->size; + store_offset_t offset = page, dev_end = store->size; + int index = offset >> log2_block_size; + + pthread_mutex_lock (&disk_cache_lock); + offset = ((store_offset_t) disk_cache_info[index].block << log2_block_size) + + offset % block_size; + disk_cache_info[index].flags |= DC_INCORE; + disk_cache_info[index].flags &=~ DC_UNTOUCHED; +#ifndef NDEBUG + disk_cache_info[index].last_read = disk_cache_info[index].block; + disk_cache_info[index].last_read_xor + = disk_cache_info[index].block ^ DISK_CACHE_LAST_READ_XOR; +#endif + pthread_mutex_unlock (&disk_cache_lock); + + ext2_debug ("(%lld)", offset >> log2_block_size); - if (page + vm_page_size > dev_end) - length = dev_end - page; + if (offset + vm_page_size > dev_end) + length = dev_end - offset; - err = store_read (store, page >> store->log2_block_size, length, buf, &read); + err = store_read (store, offset >> store->log2_block_size, length, + buf, &read); if (read != length) return EIO; if (!err && length != vm_page_size) @@ -432,26 +454,38 @@ disk_pager_write_page (vm_offset_t page, void *buf) { error_t err = 0; size_t length = vm_page_size, amount; - vm_size_t dev_end = store->size; + store_offset_t offset = page, dev_end = store->size; + int index = offset >> log2_block_size; + + pthread_mutex_lock (&disk_cache_lock); + assert (disk_cache_info[index].block != DC_NO_BLOCK); + offset = ((store_offset_t) disk_cache_info[index].block << log2_block_size) + + offset % block_size; +#ifndef NDEBUG /* Not strictly needed. */ + assert ((disk_cache_info[index].last_read ^ DISK_CACHE_LAST_READ_XOR) + == disk_cache_info[index].last_read_xor); + assert (disk_cache_info[index].last_read + == disk_cache_info[index].block); +#endif + pthread_mutex_unlock (&disk_cache_lock); - if (page + vm_page_size > dev_end) - length = dev_end - page; + if (offset + vm_page_size > dev_end) + length = dev_end - offset; - ext2_debug ("writing disk page %d[%d]", page, length); + ext2_debug ("writing disk page %lld[%zu]", offset, length); STAT_INC (disk_pageouts); if (modified_global_blocks) /* Be picky about which blocks in a page that we write. */ { - vm_offset_t offs = page; struct pending_blocks pb; pending_blocks_init (&pb, buf); while (length > 0 && !err) { - block_t block = boffs_block (offs); + block_t block = boffs_block (offset); /* We don't clear the block modified bit here because this paging write request may not be the same one that actually set the bit, @@ -469,7 +503,7 @@ disk_pager_write_page (vm_offset_t page, void *buf) /* Otherwise just skip it. */ err = pending_blocks_skip (&pb); - offs += block_size; + offset += block_size; length -= block_size; } @@ -478,7 +512,7 @@ disk_pager_write_page (vm_offset_t page, void *buf) } else { - err = store_write (store, page >> store->log2_block_size, + err = store_write (store, offset >> store->log2_block_size, buf, length, &amount); if (!err && length != amount) err = EIO; @@ -486,6 +520,18 @@ disk_pager_write_page (vm_offset_t page, void *buf) return err; } + +static void +disk_pager_notify_evict (vm_offset_t page) +{ + unsigned long index = page >> log2_block_size; + + ext2_debug ("(block %lu)", index); + + pthread_mutex_lock (&disk_cache_lock); + disk_cache_info[index].flags &= ~DC_INCORE; + pthread_mutex_unlock (&disk_cache_lock); +} /* Satisfy a pager read request for either the disk pager or file pager PAGER, to the page at offset PAGE into BUF. WRITELOCK should be set if @@ -515,7 +561,8 @@ pager_write_page (struct user_pager_info *pager, vm_offset_t page, void pager_notify_evict (struct user_pager_info *pager, vm_offset_t page) { - assert (!"unrequested notification on eviction"); + if (pager->type == DISK) + disk_pager_notify_evict (page); } @@ -774,6 +821,373 @@ pager_dropweak (struct user_pager_info *p __attribute__ ((unused))) { } +/* Cached blocks from disk. */ +void *disk_cache; + +/* DISK_CACHE size in bytes and blocks. */ +store_offset_t disk_cache_size; +int disk_cache_blocks; + +/* block num --> pointer to in-memory block */ +hurd_ihash_t disk_cache_bptr; +/* Cached blocks' info. */ +struct disk_cache_info *disk_cache_info; +/* Hint index for which cache block to reuse next. */ +int disk_cache_hint; +/* Lock for these structures. */ +pthread_mutex_t disk_cache_lock; +/* Fired when a re-association is done. */ +pthread_cond_t disk_cache_reassociation; + +/* Finish mapping initialization. */ +static void +disk_cache_init (void) +{ + if (block_size != vm_page_size) + ext2_panic ("Block size %u != vm_page_size %u", + block_size, vm_page_size); + + pthread_mutex_init (&disk_cache_lock, NULL); + pthread_cond_init (&disk_cache_reassociation, NULL); + + /* Allocate space for block num -> in-memory pointer mapping. */ + if (hurd_ihash_create (&disk_cache_bptr, HURD_IHASH_NO_LOCP)) + ext2_panic ("Can't allocate memory for disk_pager_bptr"); + + /* Allocate space for disk cache blocks' info. */ + disk_cache_info = malloc ((sizeof *disk_cache_info) * disk_cache_blocks); + if (!disk_cache_info) + ext2_panic ("Cannot allocate space for disk cache info"); + + /* Initialize disk_cache_info. */ + for (int i = 0; i < disk_cache_blocks; i++) + { + disk_cache_info[i].block = DC_NO_BLOCK; + disk_cache_info[i].flags = 0; + disk_cache_info[i].ref_count = 0; +#ifndef NDEBUG + disk_cache_info[i].last_read = DC_NO_BLOCK; + disk_cache_info[i].last_read_xor + = DC_NO_BLOCK ^ DISK_CACHE_LAST_READ_XOR; +#endif + } + disk_cache_hint = 0; + + /* Map the superblock and the block group descriptors. */ + block_t fixed_first = boffs_block (SBLOCK_OFFS); + block_t fixed_last = fixed_first + + (round_block ((sizeof *group_desc_image) * groups_count) + >> log2_block_size); + ext2_debug ("%u-%u\n", fixed_first, fixed_last); + assert (fixed_last - fixed_first + 1 <= (block_t)disk_cache_blocks + 3); + for (block_t i = fixed_first; i <= fixed_last; i++) + { + disk_cache_block_ref (i); + assert (disk_cache_info[i-fixed_first].block == i); + disk_cache_info[i-fixed_first].flags |= DC_FIXED; + } +} + +static void +disk_cache_return_unused (void) +{ + int index; + + /* XXX: Touch all pages. It seems that sometimes GNU Mach "forgets" + to notify us about evicted pages. Disk cache must be + unlocked. */ + for (vm_offset_t i = 0; i < disk_cache_size; i += vm_page_size) + *(volatile char *)(disk_cache + i); + + /* Release some references to cached blocks. */ + pokel_sync (&global_pokel, 1); + + /* Return unused pages that are in core. */ + int pending_begin = -1, pending_end = -1; + pthread_mutex_lock (&disk_cache_lock); + for (index = 0; index < disk_cache_blocks; index++) + if (! (disk_cache_info[index].flags & (DC_DONT_REUSE & ~DC_INCORE)) + && ! disk_cache_info[index].ref_count) + { + ext2_debug ("return %u -> %d", + disk_cache_info[index].block, index); + if (index != pending_end) + { + /* Return previous region, if there is such, ... */ + if (pending_end >= 0) + { + pthread_mutex_unlock (&disk_cache_lock); + pager_return_some (diskfs_disk_pager, + pending_begin * vm_page_size, + (pending_end - pending_begin) + * vm_page_size, 1); + pthread_mutex_lock (&disk_cache_lock); + } + /* ... and start new region. */ + pending_begin = index; + } + pending_end = index + 1; + } + + pthread_mutex_unlock (&disk_cache_lock); + + /* Return last region, if there is such. */ + if (pending_end >= 0) + pager_return_some (diskfs_disk_pager, + pending_begin * vm_page_size, + (pending_end - pending_begin) * vm_page_size, + 1); + else + { + printf ("ext2fs: disk cache is starving\n"); + + /* Give it some time. This should happen rarely. */ + sleep (1); + } +} + +/* Map block and return pointer to it. */ +void * +disk_cache_block_ref (block_t block) +{ + int index; + void *bptr; + + assert (0 <= block && block < store->size >> log2_block_size); + + ext2_debug ("(%u)", block); + +retry_ref: + pthread_mutex_lock (&disk_cache_lock); + + bptr = hurd_ihash_find (disk_cache_bptr, block); + if (bptr) + /* Already mapped. */ + { + index = bptr_index (bptr); + + /* In process of re-associating? */ + if (disk_cache_info[index].flags & DC_UNTOUCHED) + { + /* Wait re-association to finish. */ + pthread_cond_wait (&disk_cache_reassociation, &disk_cache_lock); + pthread_mutex_unlock (&disk_cache_lock); + +#if 0 + printf ("Re-association -- wait finished.\n"); +#endif + + goto retry_ref; + } + + /* Just increment reference and return. */ + assert (disk_cache_info[index].ref_count + 1 + > disk_cache_info[index].ref_count); + disk_cache_info[index].ref_count++; + + ext2_debug ("cached %u -> %d (ref_count = %hu, flags = %#hx, ptr = %p)", + disk_cache_info[index].block, index, + disk_cache_info[index].ref_count, + disk_cache_info[index].flags, bptr); + + pthread_mutex_unlock (&disk_cache_lock); + + return bptr; + } + + /* Search for a block that is not in core and is not referenced. */ + index = disk_cache_hint; + while ((disk_cache_info[index].flags & DC_DONT_REUSE) + || (disk_cache_info[index].ref_count)) + { + ext2_debug ("reject %u -> %d (ref_count = %hu, flags = %#hx)", + disk_cache_info[index].block, index, + disk_cache_info[index].ref_count, + disk_cache_info[index].flags); + + /* Just move to next block. */ + index++; + if (index >= disk_cache_blocks) + index -= disk_cache_blocks; + + /* If we return to where we started, than there is no suitable + block. */ + if (index == disk_cache_hint) + break; + } + + /* The next place in the disk cache becomes the current hint. */ + disk_cache_hint = index + 1; + if (disk_cache_hint >= disk_cache_blocks) + disk_cache_hint -= disk_cache_blocks; + + /* Is suitable place found? */ + if ((disk_cache_info[index].flags & DC_DONT_REUSE) + || disk_cache_info[index].ref_count) + /* No place is found. Try to release some blocks and try + again. */ + { + ext2_debug ("flush %u -> %d", disk_cache_info[index].block, index); + + pthread_mutex_unlock (&disk_cache_lock); + + disk_cache_return_unused (); + + goto retry_ref; + } + + /* Suitable place is found. */ + + /* Calculate pointer to data. */ + bptr = (char *)disk_cache + (index << log2_block_size); + ext2_debug ("map %u -> %d (%p)", block, index, bptr); + + /* This pager_return_some is used only to set PM_FORCEREAD for the + page. DC_UNTOUCHED is set so that we catch if someone has + referenced the block while we didn't hold disk_cache_lock. */ + disk_cache_info[index].flags |= DC_UNTOUCHED; + +#if 0 /* XXX: Let's see if this is needed at all. */ + + pthread_mutex_unlock (&disk_cache_lock); + pager_return_some (diskfs_disk_pager, bptr - disk_cache, vm_page_size, 1); + pthread_mutex_lock (&disk_cache_lock); + + /* Has someone used our bptr? Has someone mapped requested block + while we have unlocked disk_cache_lock? If so, environment has + changed and we have to restart operation. */ + if ((! (disk_cache_info[index].flags & DC_UNTOUCHED)) + || hurd_ihash_find (disk_cache_bptr, block)) + { + pthread_mutex_unlock (&disk_cache_lock); + goto retry_ref; + } + +#elif 0 + + /* XXX: Use libpager internals. */ + + pthread_mutex_lock (&diskfs_disk_pager->interlock); + int page = (bptr - disk_cache) / vm_page_size; + assert (page >= 0); + int is_incore = (page < diskfs_disk_pager->pagemapsize + && (diskfs_disk_pager->pagemap[page] & PM_INCORE)); + pthread_mutex_unlock (&diskfs_disk_pager->interlock); + if (is_incore) + { + pthread_mutex_unlock (&disk_cache_lock); + printf ("INCORE\n"); + goto retry_ref; + } + +#endif + + /* Re-associate. */ + if (disk_cache_info[index].block != DC_NO_BLOCK) + /* Remove old association. */ + hurd_ihash_remove (disk_cache_bptr, disk_cache_info[index].block); + /* New association. */ + if (hurd_ihash_add (disk_cache_bptr, block, bptr)) + ext2_panic ("Couldn't hurd_ihash_add new disk block"); + assert (! (disk_cache_info[index].flags & DC_DONT_REUSE & ~DC_UNTOUCHED)); + disk_cache_info[index].block = block; + assert (! disk_cache_info[index].ref_count); + disk_cache_info[index].ref_count = 1; + + /* All data structures are set up. */ + pthread_mutex_unlock (&disk_cache_lock); + + /* Try to read page. */ + *(volatile char *) bptr; + + /* Check if it's actually read. */ + pthread_mutex_lock (&disk_cache_lock); + if (disk_cache_info[index].flags & DC_UNTOUCHED) + /* It's not read. */ + { + /* Remove newly created association. */ + hurd_ihash_remove (disk_cache_bptr, block); + disk_cache_info[index].block = DC_NO_BLOCK; + disk_cache_info[index].flags &=~ DC_UNTOUCHED; + disk_cache_info[index].ref_count = 0; + pthread_mutex_unlock (&disk_cache_lock); + + /* Prepare next time association of this page to succeed. */ + pager_flush_some (diskfs_disk_pager, bptr - disk_cache, + vm_page_size, 0); + +#if 0 + printf ("Re-association failed.\n"); +#endif + + goto retry_ref; + } + + /* Re-association was successful. */ + pthread_cond_broadcast (&disk_cache_reassociation); + + pthread_mutex_unlock (&disk_cache_lock); + + ext2_debug ("(%u) = %p", block, bptr); + return bptr; +} + +void +disk_cache_block_ref_ptr (void *ptr) +{ + int index; + + pthread_mutex_lock (&disk_cache_lock); + index = bptr_index (ptr); + assert (disk_cache_info[index].ref_count >= 1); + assert (disk_cache_info[index].ref_count + 1 + > disk_cache_info[index].ref_count); + disk_cache_info[index].ref_count++; + assert (! (disk_cache_info[index].flags & DC_UNTOUCHED)); + ext2_debug ("(%p) (ref_count = %hu, flags = %#hx)", + ptr, + disk_cache_info[index].ref_count, + disk_cache_info[index].flags); + pthread_mutex_unlock (&disk_cache_lock); +} + +void +disk_cache_block_deref (void *ptr) +{ + int index; + + assert (disk_cache <= ptr && ptr <= disk_cache + disk_cache_size); + + pthread_mutex_lock (&disk_cache_lock); + index = bptr_index (ptr); + ext2_debug ("(%p) (ref_count = %hu, flags = %#hx)", + ptr, + disk_cache_info[index].ref_count - 1, + disk_cache_info[index].flags); + assert (! (disk_cache_info[index].flags & DC_UNTOUCHED)); + assert (disk_cache_info[index].ref_count >= 1); + disk_cache_info[index].ref_count--; + pthread_mutex_unlock (&disk_cache_lock); +} + +/* Not used. */ +int +disk_cache_block_is_ref (block_t block) +{ + int ref; + void *ptr; + + pthread_mutex_lock (&disk_cache_lock); + ptr = hurd_ihash_find (disk_cache_bptr, block); + if (ptr == NULL) + ref = 0; + else /* XXX: Should check for DC_UNTOUCHED too. */ + ref = disk_cache_info[bptr_index (ptr)].ref_count; + pthread_mutex_unlock (&disk_cache_lock); + + return ref; +} + /* Create the DISK pager. */ void create_disk_pager (void) @@ -783,9 +1197,12 @@ create_disk_pager (void) ext2_panic ("can't create disk pager: %s", strerror (errno)); upi->type = DISK; pager_bucket = ports_create_bucket (); - diskfs_start_disk_pager (upi, pager_bucket, MAY_CACHE, 0, - store->size, &disk_image); - + get_hypermetadata (); + disk_cache_blocks = DISK_CACHE_BLOCKS; + disk_cache_size = disk_cache_blocks << log2_block_size; + diskfs_start_disk_pager (upi, pager_bucket, MAY_CACHE, 1, + disk_cache_size, &disk_cache); + disk_cache_init (); } /* Call this to create a FILE_DATA pager and return a send right. diff --git a/ext2fs/pokel.c b/ext2fs/pokel.c index a8b16c97..3afb32e4 100644 --- a/ext2fs/pokel.c +++ b/ext2fs/pokel.c @@ -67,12 +67,27 @@ pokel_add (struct pokel *pokel, void *loc, vm_size_t length) vm_offset_t p_offs = pl->offset; vm_size_t p_end = p_offs + pl->length; - if (p_offs == offset && p_end == end) - break; + if (p_offs <= offset && end <= p_end) + { + if (pokel->image == disk_cache) + for (vm_offset_t i = offset; i < end; i += block_size) + disk_cache_block_deref (disk_cache + i); + + break; + } else if (p_end >= offset && end >= p_offs) { pl->offset = offset < p_offs ? offset : p_offs; pl->length = (end > p_end ? end : p_end) - pl->offset; + + if (pokel->image == disk_cache) + { + vm_offset_t i_begin = p_offs > offset ? p_offs : offset; + vm_offset_t i_end = p_end < end ? p_end : end; + for (vm_offset_t i = i_begin; i < i_end; i += block_size) + disk_cache_block_deref (disk_cache + i); + } + ext2_debug ("extended 0x%x[%ul] to 0x%x[%ul]", p_offs, p_end - p_offs, pl->offset, pl->length); break; @@ -113,11 +128,21 @@ _pokel_exec (struct pokel *pokel, int sync, int wait) pthread_spin_unlock (&pokel->lock); for (pl = pokes; pl; last = pl, pl = pl->next) - if (sync) - { - ext2_debug ("syncing 0x%x[%ul]", pl->offset, pl->length); - pager_sync_some (pokel->pager, pl->offset, pl->length, wait); - } + { + if (sync) + { + ext2_debug ("syncing 0x%lx[%ul]", pl->offset, pl->length); + pager_sync_some (pokel->pager, pl->offset, pl->length, wait); + } + + if (pokel->image == disk_cache) + { + vm_offset_t begin = trunc_block (pl->offset); + vm_offset_t end = round_block (pl->offset + pl->length); + for (vm_offset_t i = begin; i != end; i += block_size) + disk_cache_block_deref (pokel->image + i); + } + } if (last) { diff --git a/ext2fs/truncate.c b/ext2fs/truncate.c index 37e360bb..63d22955 100644 --- a/ext2fs/truncate.c +++ b/ext2fs/truncate.c @@ -124,7 +124,7 @@ trunc_indirect (struct node *node, block_t end, { unsigned index; int modified = 0, all_freed = 1; - block_t *ind_bh = (block_t *)bptr (*p); + block_t *ind_bh = (block_t *) disk_cache_block_ref (*p); unsigned first = end < offset ? 0 : end - offset; for (index = first; index < addr_per_block; index++) @@ -139,11 +139,16 @@ trunc_indirect (struct node *node, block_t end, if (first == 0 && all_freed) { - pager_flush_some (diskfs_disk_pager, boffs (*p), block_size, 1); + pager_flush_some (diskfs_disk_pager, + bptr_index (ind_bh) << log2_block_size, + block_size, 1); free_block_run_free_ptr (fbr, p); + disk_cache_block_deref (ind_bh); } else if (modified) record_indir_poke (node, ind_bh); + else + disk_cache_block_deref (ind_bh); } } -- cgit v1.2.3