From c234e34ad80801acd902c6d4892a7722fd084a87 Mon Sep 17 00:00:00 2001 From: Justus Winter <4winter@informatik.uni-hamburg.de> Date: Wed, 15 Apr 2015 13:17:06 +0200 Subject: libdiskfs: implement a node cache Previously, all users of libdiskfs implemented a node cache on their own. Move the node cache from ext2fs into libdiskfs. We preserve the previous API by marking all functions that we pull from ext2fs as weak, so that users like tmpfs can still implement their own node cache. * ext2fs/dir.c (diskfs_lookup_hard): Adjust accordingly. * ext2fs/ext2fs.c (main): Don't call `inode_init'. * ext2fs/ext2fs.h (struct disknode): Drop `hnext', `hprevp'. * ext2fs/inode.c: Move the node cache into diskfs. (diskfs_user_make_node): New function. (diskfs_try_dropping_softrefs): Rename to `diskfs_user_try_dropping_softrefs'. (read_node): Rename to `diskfs_user_read_node'. Also move a chunk of code dealing with generations from `diskfs_cached_lookup' here. * libdiskfs/Makefile (OTHERSRCS): Add `node-cache.c'. * libdiskfs/diskfs.h (struct node): Add `hnext', `hprevp'. Amend existing comments, add forward declarations. * libdiskfs/node-cache.c: New file. --- ext2fs/dir.c | 4 +- ext2fs/ext2fs.c | 2 - ext2fs/ext2fs.h | 9 --- ext2fs/inode.c | 239 ++++++-------------------------------------------------- 4 files changed, 28 insertions(+), 226 deletions(-) (limited to 'ext2fs') diff --git a/ext2fs/dir.c b/ext2fs/dir.c index 2dfe1d76..6cdfba27 100644 --- a/ext2fs/dir.c +++ b/ext2fs/dir.c @@ -306,7 +306,7 @@ diskfs_lookup_hard (struct node *dp, const char *name, enum lookup_type type, /* Here below are the spec dotdot cases. */ else if (type == RENAME || type == REMOVE) - np = ifind (inum); + np = diskfs_cached_ifind (inum); else if (type == LOOKUP) { @@ -359,7 +359,7 @@ diskfs_lookup_hard (struct node *dp, const char *name, enum lookup_type type, diskfs_nput (np); } else if (type == RENAME || type == REMOVE) - /* We just did ifind to get np; that allocates + /* We just did diskfs_cached_ifind to get np; that allocates no new references, so we don't have anything to do */ ; else if (type == LOOKUP) diff --git a/ext2fs/ext2fs.c b/ext2fs/ext2fs.c index beb7cad9..d0fdfe7a 100644 --- a/ext2fs/ext2fs.c +++ b/ext2fs/ext2fs.c @@ -185,8 +185,6 @@ main (int argc, char **argv) map_hypermetadata (); - inode_init (); - /* Set diskfs_root_node to the root inode. */ err = diskfs_cached_lookup (EXT2_ROOT_INO, &diskfs_root_node); if (err) diff --git a/ext2fs/ext2fs.h b/ext2fs/ext2fs.h index 9667b6f6..96d8e9db 100644 --- a/ext2fs/ext2fs.h +++ b/ext2fs/ext2fs.h @@ -159,9 +159,6 @@ struct disknode each DIRBLKSIZE piece of the directory. */ int *dirents; - /* Links on hash list. */ - struct node *hnext, **hprevp; - /* Lock to lock while fiddling with this inode's block allocation info. */ pthread_rwlock_t alloc_lock; @@ -419,12 +416,6 @@ dino_deref (struct ext2_inode *inode) /* Write all active disknodes into the inode pager. */ void write_all_disknodes (); - -/* Lookup node INUM (which must have a reference already) and return it - without allocating any new references. */ -struct node *ifind (ino_t inum); - -void inode_init (void); /* ---------------------------------------------------------------- */ diff --git a/ext2fs/inode.c b/ext2fs/inode.c index 7af617c5..d83bedca 100644 --- a/ext2fs/inode.c +++ b/ext2fs/inode.c @@ -39,144 +39,33 @@ #define UF_IMMUTABLE 0 #endif -#define INOHSZ 8192 -#if ((INOHSZ&(INOHSZ-1)) == 0) -#define INOHASH(ino) ((ino)&(INOHSZ-1)) -#else -#define INOHASH(ino) (((unsigned)(ino))%INOHSZ) -#endif - -/* The nodehash is a cache of nodes. - - Access to nodehash and nodehash_nr_items is protected by - nodecache_lock. - - Every node in the nodehash carries a light reference. When we are - asked to give up that light reference, we reacquire our lock - momentarily to check whether someone else reacquired a reference - through the nodehash. */ -static struct node *nodehash[INOHSZ]; -static size_t nodehash_nr_items; -static pthread_rwlock_t nodecache_lock = PTHREAD_RWLOCK_INITIALIZER; - -static error_t read_node (struct node *np); - pthread_spinlock_t generation_lock = PTHREAD_SPINLOCK_INITIALIZER; -/* Initialize the inode hash table. */ -void -inode_init () -{ - int n; - for (n = 0; n < INOHSZ; n++) - nodehash[n] = 0; -} - -/* Lookup node with inode number INUM. Returns NULL if the node is - not found in the node cache. */ -static struct node * -lookup (ino_t inum) -{ - struct node *np; - for (np = nodehash[INOHASH(inum)]; np; np = diskfs_node_disknode (np)->hnext) - if (np->cache_id == inum) - return np; - return NULL; -} - -/* Fetch inode INUM, set *NPP to the node structure; - gain one user reference and lock the node. */ +/* The user must define this function if she wants to use the node + cache. Create and initialize a node. */ error_t -diskfs_cached_lookup (ino_t inum, struct node **npp) +diskfs_user_make_node (struct node **npp, struct lookup_context *ctx) { - error_t err; - struct node *np, *tmp; + struct node *np; struct disknode *dn; - pthread_rwlock_rdlock (&nodecache_lock); - np = lookup (inum); - if (np) - goto gotit; - pthread_rwlock_unlock (&nodecache_lock); - /* Create the new node. */ np = diskfs_make_node_alloc (sizeof *dn); - dn = diskfs_node_disknode (np); - np->cache_id = inum; + if (np == NULL) + return ENOMEM; /* Format specific data for the new node. */ + dn = diskfs_node_disknode (np); dn->dirents = 0; dn->dir_idx = 0; dn->pager = 0; pthread_rwlock_init (&dn->alloc_lock, NULL); pokel_init (&dn->indir_pokel, diskfs_disk_pager, disk_cache); - pthread_mutex_lock (&np->lock); - - /* Put NP in NODEHASH. */ - pthread_rwlock_wrlock (&nodecache_lock); - tmp = lookup (inum); - if (tmp) - { - /* We lost a race. */ - diskfs_nput (np); - np = tmp; - goto gotit; - } - - dn->hnext = nodehash[INOHASH(inum)]; - if (dn->hnext) - diskfs_node_disknode (dn->hnext)->hprevp = &dn->hnext; - dn->hprevp = &nodehash[INOHASH(inum)]; - nodehash[INOHASH(inum)] = np; - diskfs_nref_light (np); - nodehash_nr_items += 1; - pthread_rwlock_unlock (&nodecache_lock); - - /* Get the contents of NP off disk. */ - err = read_node (np); - - if (!diskfs_check_readonly () && !np->dn_stat.st_gen) - { - pthread_spin_lock (&generation_lock); - if (++next_generation < diskfs_mtime->seconds) - next_generation = diskfs_mtime->seconds; - np->dn_stat.st_gen = next_generation; - pthread_spin_unlock (&generation_lock); - np->dn_set_ctime = 1; - } - - if (err) - return err; - else - { - *npp = np; - return 0; - } - - gotit: - diskfs_nref (np); - pthread_rwlock_unlock (&nodecache_lock); - pthread_mutex_lock (&np->lock); *npp = np; return 0; } -/* Lookup node INUM (which must have a reference already) and return it - without allocating any new references. */ -struct node * -ifind (ino_t inum) -{ - struct node *np; - - pthread_rwlock_rdlock (&nodecache_lock); - np = lookup (inum); - pthread_rwlock_unlock (&nodecache_lock); - - assert (np); - return np; -} - /* The last reference to a node has gone away; drop it from the hash table and clean all state in the dn structure. */ void @@ -193,38 +82,12 @@ diskfs_node_norefs (struct node *np) free (np); } -/* The last hard reference to a node has gone away; arrange to have - all the weak references dropped that can be. */ +/* The user must define this function if she wants to use the node + cache. The last hard reference to a node has gone away; arrange to + have all the weak references dropped that can be. */ void -diskfs_try_dropping_softrefs (struct node *np) +diskfs_user_try_dropping_softrefs (struct node *np) { - pthread_rwlock_wrlock (&nodecache_lock); - if (diskfs_node_disknode (np)->hprevp != NULL) - { - /* Check if someone reacquired a reference through the - nodehash. */ - struct references result; - refcounts_references (&np->refcounts, &result); - - if (result.hard > 0) - { - /* A reference was reacquired through a hash table lookup. - It's fine, we didn't touch anything yet. */ - pthread_rwlock_unlock (&nodecache_lock); - return; - } - - *diskfs_node_disknode (np)->hprevp = diskfs_node_disknode (np)->hnext; - if (diskfs_node_disknode (np)->hnext) - diskfs_node_disknode (diskfs_node_disknode (np)->hnext)->hprevp = - diskfs_node_disknode (np)->hprevp; - diskfs_node_disknode (np)->hnext = NULL; - diskfs_node_disknode (np)->hprevp = NULL; - nodehash_nr_items -= 1; - diskfs_nrele_light (np); - } - pthread_rwlock_unlock (&nodecache_lock); - drop_pager_softrefs (np); } @@ -242,9 +105,10 @@ diskfs_new_hardrefs (struct node *np) allow_pager_softrefs (np); } -/* Read stat information out of the ext2_inode. */ -static error_t -read_node (struct node *np) +/* The user must define this function if she wants to use the node + cache. Read stat information out of the on-disk node. */ +error_t +diskfs_user_read_node (struct node *np, struct lookup_context *ctx) { error_t err; struct stat *st = &np->dn_stat; @@ -384,6 +248,16 @@ read_node (struct node *np) linux, some devices). */ np->allocsize = 0; + if (!diskfs_check_readonly () && !np->dn_stat.st_gen) + { + pthread_spin_lock (&generation_lock); + if (++next_generation < diskfs_mtime->seconds) + next_generation = diskfs_mtime->seconds; + np->dn_stat.st_gen = next_generation; + pthread_spin_unlock (&generation_lock); + np->dn_set_ctime = 1; + } + return 0; } @@ -585,72 +459,11 @@ diskfs_node_reload (struct node *node) } pokel_flush (&dn->indir_pokel); flush_node_pager (node); - read_node (node); + diskfs_user_read_node (node, NULL); return 0; } -/* For each active node, call FUN. The node is to be locked around the call - to FUN. If FUN returns non-zero for any node, then immediately stop, and - return that value. */ -error_t -diskfs_node_iterate (error_t (*fun)(struct node *)) -{ - error_t err = 0; - int n; - size_t num_nodes; - struct node *node, **node_list, **p; - - pthread_rwlock_rdlock (&nodecache_lock); - - /* We must copy everything from the hash table into another data structure - to avoid running into any problems with the hash-table being modified - during processing (normally we delegate access to hash-table with - nodecache_lock, but we can't hold this while locking the - individual node locks). */ - num_nodes = nodehash_nr_items; - - /* TODO This method doesn't scale beyond a few dozen nodes and should be - replaced. */ - node_list = malloc (num_nodes * sizeof (struct node *)); - if (node_list == NULL) - { - pthread_rwlock_unlock (&nodecache_lock); - ext2_debug ("unable to allocate temporary node table"); - return ENOMEM; - } - - p = node_list; - for (n = 0; n < INOHSZ; n++) - for (node = nodehash[n]; node; node = diskfs_node_disknode (node)->hnext) - { - *p++ = node; - - /* We acquire a hard reference for node, but without using - diskfs_nref. We do this so that diskfs_new_hardrefs will not - get called. */ - refcounts_ref (&node->refcounts, NULL); - } - - pthread_rwlock_unlock (&nodecache_lock); - - p = node_list; - while (num_nodes-- > 0) - { - node = *p++; - if (!err) - { - pthread_mutex_lock (&node->lock); - err = (*fun)(node); - pthread_mutex_unlock (&node->lock); - } - diskfs_nrele (node); - } - - free (node_list); - return err; -} - /* Write all active disknodes into the ext2_inode pager. */ void write_all_disknodes () -- cgit v1.2.3