diff options
-rw-r--r-- | ext2fs/dir.c | 4 | ||||
-rw-r--r-- | ext2fs/ext2fs.c | 2 | ||||
-rw-r--r-- | ext2fs/ext2fs.h | 9 | ||||
-rw-r--r-- | ext2fs/inode.c | 239 | ||||
-rw-r--r-- | libdiskfs/Makefile | 2 | ||||
-rw-r--r-- | libdiskfs/diskfs.h | 50 | ||||
-rw-r--r-- | libdiskfs/node-cache.c | 260 |
7 files changed, 336 insertions, 230 deletions
diff --git a/ext2fs/dir.c b/ext2fs/dir.c index 2dfe1d76..6cdfba27 100644 --- a/ext2fs/dir.c +++ b/ext2fs/dir.c @@ -306,7 +306,7 @@ diskfs_lookup_hard (struct node *dp, const char *name, enum lookup_type type, /* Here below are the spec dotdot cases. */ else if (type == RENAME || type == REMOVE) - np = ifind (inum); + np = diskfs_cached_ifind (inum); else if (type == LOOKUP) { @@ -359,7 +359,7 @@ diskfs_lookup_hard (struct node *dp, const char *name, enum lookup_type type, diskfs_nput (np); } else if (type == RENAME || type == REMOVE) - /* We just did ifind to get np; that allocates + /* We just did diskfs_cached_ifind to get np; that allocates no new references, so we don't have anything to do */ ; else if (type == LOOKUP) diff --git a/ext2fs/ext2fs.c b/ext2fs/ext2fs.c index beb7cad9..d0fdfe7a 100644 --- a/ext2fs/ext2fs.c +++ b/ext2fs/ext2fs.c @@ -185,8 +185,6 @@ main (int argc, char **argv) map_hypermetadata (); - inode_init (); - /* Set diskfs_root_node to the root inode. */ err = diskfs_cached_lookup (EXT2_ROOT_INO, &diskfs_root_node); if (err) diff --git a/ext2fs/ext2fs.h b/ext2fs/ext2fs.h index 9667b6f6..96d8e9db 100644 --- a/ext2fs/ext2fs.h +++ b/ext2fs/ext2fs.h @@ -159,9 +159,6 @@ struct disknode each DIRBLKSIZE piece of the directory. */ int *dirents; - /* Links on hash list. */ - struct node *hnext, **hprevp; - /* Lock to lock while fiddling with this inode's block allocation info. */ pthread_rwlock_t alloc_lock; @@ -419,12 +416,6 @@ dino_deref (struct ext2_inode *inode) /* Write all active disknodes into the inode pager. */ void write_all_disknodes (); - -/* Lookup node INUM (which must have a reference already) and return it - without allocating any new references. */ -struct node *ifind (ino_t inum); - -void inode_init (void); /* ---------------------------------------------------------------- */ diff --git a/ext2fs/inode.c b/ext2fs/inode.c index 7af617c5..d83bedca 100644 --- a/ext2fs/inode.c +++ b/ext2fs/inode.c @@ -39,144 +39,33 @@ #define UF_IMMUTABLE 0 #endif -#define INOHSZ 8192 -#if ((INOHSZ&(INOHSZ-1)) == 0) -#define INOHASH(ino) ((ino)&(INOHSZ-1)) -#else -#define INOHASH(ino) (((unsigned)(ino))%INOHSZ) -#endif - -/* The nodehash is a cache of nodes. - - Access to nodehash and nodehash_nr_items is protected by - nodecache_lock. - - Every node in the nodehash carries a light reference. When we are - asked to give up that light reference, we reacquire our lock - momentarily to check whether someone else reacquired a reference - through the nodehash. */ -static struct node *nodehash[INOHSZ]; -static size_t nodehash_nr_items; -static pthread_rwlock_t nodecache_lock = PTHREAD_RWLOCK_INITIALIZER; - -static error_t read_node (struct node *np); - pthread_spinlock_t generation_lock = PTHREAD_SPINLOCK_INITIALIZER; -/* Initialize the inode hash table. */ -void -inode_init () -{ - int n; - for (n = 0; n < INOHSZ; n++) - nodehash[n] = 0; -} - -/* Lookup node with inode number INUM. Returns NULL if the node is - not found in the node cache. */ -static struct node * -lookup (ino_t inum) -{ - struct node *np; - for (np = nodehash[INOHASH(inum)]; np; np = diskfs_node_disknode (np)->hnext) - if (np->cache_id == inum) - return np; - return NULL; -} - -/* Fetch inode INUM, set *NPP to the node structure; - gain one user reference and lock the node. */ +/* The user must define this function if she wants to use the node + cache. Create and initialize a node. */ error_t -diskfs_cached_lookup (ino_t inum, struct node **npp) +diskfs_user_make_node (struct node **npp, struct lookup_context *ctx) { - error_t err; - struct node *np, *tmp; + struct node *np; struct disknode *dn; - pthread_rwlock_rdlock (&nodecache_lock); - np = lookup (inum); - if (np) - goto gotit; - pthread_rwlock_unlock (&nodecache_lock); - /* Create the new node. */ np = diskfs_make_node_alloc (sizeof *dn); - dn = diskfs_node_disknode (np); - np->cache_id = inum; + if (np == NULL) + return ENOMEM; /* Format specific data for the new node. */ + dn = diskfs_node_disknode (np); dn->dirents = 0; dn->dir_idx = 0; dn->pager = 0; pthread_rwlock_init (&dn->alloc_lock, NULL); pokel_init (&dn->indir_pokel, diskfs_disk_pager, disk_cache); - pthread_mutex_lock (&np->lock); - - /* Put NP in NODEHASH. */ - pthread_rwlock_wrlock (&nodecache_lock); - tmp = lookup (inum); - if (tmp) - { - /* We lost a race. */ - diskfs_nput (np); - np = tmp; - goto gotit; - } - - dn->hnext = nodehash[INOHASH(inum)]; - if (dn->hnext) - diskfs_node_disknode (dn->hnext)->hprevp = &dn->hnext; - dn->hprevp = &nodehash[INOHASH(inum)]; - nodehash[INOHASH(inum)] = np; - diskfs_nref_light (np); - nodehash_nr_items += 1; - pthread_rwlock_unlock (&nodecache_lock); - - /* Get the contents of NP off disk. */ - err = read_node (np); - - if (!diskfs_check_readonly () && !np->dn_stat.st_gen) - { - pthread_spin_lock (&generation_lock); - if (++next_generation < diskfs_mtime->seconds) - next_generation = diskfs_mtime->seconds; - np->dn_stat.st_gen = next_generation; - pthread_spin_unlock (&generation_lock); - np->dn_set_ctime = 1; - } - - if (err) - return err; - else - { - *npp = np; - return 0; - } - - gotit: - diskfs_nref (np); - pthread_rwlock_unlock (&nodecache_lock); - pthread_mutex_lock (&np->lock); *npp = np; return 0; } -/* Lookup node INUM (which must have a reference already) and return it - without allocating any new references. */ -struct node * -ifind (ino_t inum) -{ - struct node *np; - - pthread_rwlock_rdlock (&nodecache_lock); - np = lookup (inum); - pthread_rwlock_unlock (&nodecache_lock); - - assert (np); - return np; -} - /* The last reference to a node has gone away; drop it from the hash table and clean all state in the dn structure. */ void @@ -193,38 +82,12 @@ diskfs_node_norefs (struct node *np) free (np); } -/* The last hard reference to a node has gone away; arrange to have - all the weak references dropped that can be. */ +/* The user must define this function if she wants to use the node + cache. The last hard reference to a node has gone away; arrange to + have all the weak references dropped that can be. */ void -diskfs_try_dropping_softrefs (struct node *np) +diskfs_user_try_dropping_softrefs (struct node *np) { - pthread_rwlock_wrlock (&nodecache_lock); - if (diskfs_node_disknode (np)->hprevp != NULL) - { - /* Check if someone reacquired a reference through the - nodehash. */ - struct references result; - refcounts_references (&np->refcounts, &result); - - if (result.hard > 0) - { - /* A reference was reacquired through a hash table lookup. - It's fine, we didn't touch anything yet. */ - pthread_rwlock_unlock (&nodecache_lock); - return; - } - - *diskfs_node_disknode (np)->hprevp = diskfs_node_disknode (np)->hnext; - if (diskfs_node_disknode (np)->hnext) - diskfs_node_disknode (diskfs_node_disknode (np)->hnext)->hprevp = - diskfs_node_disknode (np)->hprevp; - diskfs_node_disknode (np)->hnext = NULL; - diskfs_node_disknode (np)->hprevp = NULL; - nodehash_nr_items -= 1; - diskfs_nrele_light (np); - } - pthread_rwlock_unlock (&nodecache_lock); - drop_pager_softrefs (np); } @@ -242,9 +105,10 @@ diskfs_new_hardrefs (struct node *np) allow_pager_softrefs (np); } -/* Read stat information out of the ext2_inode. */ -static error_t -read_node (struct node *np) +/* The user must define this function if she wants to use the node + cache. Read stat information out of the on-disk node. */ +error_t +diskfs_user_read_node (struct node *np, struct lookup_context *ctx) { error_t err; struct stat *st = &np->dn_stat; @@ -384,6 +248,16 @@ read_node (struct node *np) linux, some devices). */ np->allocsize = 0; + if (!diskfs_check_readonly () && !np->dn_stat.st_gen) + { + pthread_spin_lock (&generation_lock); + if (++next_generation < diskfs_mtime->seconds) + next_generation = diskfs_mtime->seconds; + np->dn_stat.st_gen = next_generation; + pthread_spin_unlock (&generation_lock); + np->dn_set_ctime = 1; + } + return 0; } @@ -585,72 +459,11 @@ diskfs_node_reload (struct node *node) } pokel_flush (&dn->indir_pokel); flush_node_pager (node); - read_node (node); + diskfs_user_read_node (node, NULL); return 0; } -/* For each active node, call FUN. The node is to be locked around the call - to FUN. If FUN returns non-zero for any node, then immediately stop, and - return that value. */ -error_t -diskfs_node_iterate (error_t (*fun)(struct node *)) -{ - error_t err = 0; - int n; - size_t num_nodes; - struct node *node, **node_list, **p; - - pthread_rwlock_rdlock (&nodecache_lock); - - /* We must copy everything from the hash table into another data structure - to avoid running into any problems with the hash-table being modified - during processing (normally we delegate access to hash-table with - nodecache_lock, but we can't hold this while locking the - individual node locks). */ - num_nodes = nodehash_nr_items; - - /* TODO This method doesn't scale beyond a few dozen nodes and should be - replaced. */ - node_list = malloc (num_nodes * sizeof (struct node *)); - if (node_list == NULL) - { - pthread_rwlock_unlock (&nodecache_lock); - ext2_debug ("unable to allocate temporary node table"); - return ENOMEM; - } - - p = node_list; - for (n = 0; n < INOHSZ; n++) - for (node = nodehash[n]; node; node = diskfs_node_disknode (node)->hnext) - { - *p++ = node; - - /* We acquire a hard reference for node, but without using - diskfs_nref. We do this so that diskfs_new_hardrefs will not - get called. */ - refcounts_ref (&node->refcounts, NULL); - } - - pthread_rwlock_unlock (&nodecache_lock); - - p = node_list; - while (num_nodes-- > 0) - { - node = *p++; - if (!err) - { - pthread_mutex_lock (&node->lock); - err = (*fun)(node); - pthread_mutex_unlock (&node->lock); - } - diskfs_nrele (node); - } - - free (node_list); - return err; -} - /* Write all active disknodes into the ext2_inode pager. */ void write_all_disknodes () diff --git a/libdiskfs/Makefile b/libdiskfs/Makefile index 996e86a0..47b93390 100644 --- a/libdiskfs/Makefile +++ b/libdiskfs/Makefile @@ -41,7 +41,7 @@ OTHERSRCS = conch-fetch.c conch-set.c dir-clear.c dir-init.c dir-renamed.c \ extern-inline.c \ node-create.c node-drop.c node-make.c node-rdwr.c node-update.c \ node-nref.c node-nput.c node-nrele.c node-nrefl.c node-nputl.c \ - node-nrelel.c \ + node-nrelel.c node-cache.c \ peropen-make.c peropen-rele.c protid-make.c protid-rele.c \ init-init.c init-startup.c init-first.c init-main.c \ rdwr-internal.c boot-start.c demuxer.c node-times.c shutdown.c \ diff --git a/libdiskfs/diskfs.h b/libdiskfs/diskfs.h index 8ab61420..82a16b4f 100644 --- a/libdiskfs/diskfs.h +++ b/libdiskfs/diskfs.h @@ -80,6 +80,9 @@ struct peropen filesystem. */ struct node { + /* Links on hash list. */ + struct node *hnext, **hprevp; + struct disknode *dn; io_statbuf_t dn_stat; @@ -451,7 +454,8 @@ void diskfs_free_node (struct node *np, mode_t mode); if it isn't to be retained. */ void diskfs_node_norefs (struct node *np); -/* The user must define this function. Node NP has some light +/* The user must define this function unless she wants to use the node + cache. See the section `Node cache' below. Node NP has some light references, but has just lost its last hard references. Take steps so that if any light references can be freed, they are. NP is locked as is the pager refcount lock. This function will be called after @@ -515,7 +519,8 @@ void diskfs_write_disknode (struct node *np, int wait); then return only after the physical media has been completely updated. */ void diskfs_file_update (struct node *np, int wait); -/* The user must define this function. For each active node, call +/* The user must define this function unless she wants to use the node + cache. See the section `Node cache' below. For each active node, call FUN. The node is to be locked around the call to FUN. If FUN returns non-zero for any node, then immediately stop, and return that value. */ @@ -587,6 +592,36 @@ error_t (*diskfs_read_symlink_hook)(struct node *np, char *target); error_t diskfs_get_source (struct protid *cred, char *source, size_t source_len); +/* Libdiskfs contains a node cache. + + Using it relieves the user of implementing diskfs_cached_lookup, + diskfs_node_iterate, and diskfs_try_dropping_softrefs. + + In order to use it, she must implement the following functions with + the prefix `diskfs_user_'. */ + +/* This can be used to provide additional context to + diskfs_user_make_node and diskfs_user_read_node in case of cache + misses. */ +struct lookup_context; + +/* The user must define this function if she wants to use the node + cache. Create and initialize a node. */ +error_t diskfs_user_make_node (struct node **npp, struct lookup_context *ctx); + +/* The user must define this function if she wants to use the node + cache. Read stat information out of the on-disk node. */ +error_t diskfs_user_read_node (struct node *np, struct lookup_context *ctx); + +/* The user must define this function if she wants to use the node + cache. The last hard reference to a node has gone away; arrange to + have all the weak references dropped that can be. */ +void diskfs_user_try_dropping_softrefs (struct node *np); + +/* Lookup node INUM (which must have a reference already) and return it + without allocating any new references. */ +struct node *diskfs_cached_ifind (ino_t inum); + /* The library exports the following functions for general use */ /* Call this after arguments have been parsed to initialize the library. @@ -808,9 +843,18 @@ error_t diskfs_dirrewrite (struct node *dp, struct node *oldnp, error_t diskfs_dirremove (struct node *dp, struct node *np, const char *name, struct dirstat *ds); -/* Return the node corresponding to CACHE_ID in *NPP. */ +/* The user must define this function unless she wants to use the node + cache. See the section `Node cache' above. Return the node + corresponding to CACHE_ID in *NPP. */ error_t diskfs_cached_lookup (ino64_t cache_id, struct node **npp); +/* Return the node corresponding to CACHE_ID in *NPP. In case of a + cache miss, use CTX to create it and load it from the disk. See + the section `Node cache' above. */ +error_t diskfs_cached_lookup_context (ino_t inum, struct node **npp, + struct lookup_context *ctx); + + /* Create a new node. Give it MODE; if that includes IFDIR, also initialize `.' and `..' in the new directory. Return the node in NPP. CRED identifies the user responsible for the call. If NAME is nonzero, diff --git a/libdiskfs/node-cache.c b/libdiskfs/node-cache.c new file mode 100644 index 00000000..6b70da83 --- /dev/null +++ b/libdiskfs/node-cache.c @@ -0,0 +1,260 @@ +/* Inode cache. + + Copyright (C) 1994-2015 Free Software Foundation, Inc. + + This file is part of the GNU Hurd. + + The GNU Hurd is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + The GNU Hurd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the GNU Hurd. If not, see <http://www.gnu.org/licenses/>. */ + +#include "priv.h" + +#define INOHSZ 8192 +#if ((INOHSZ&(INOHSZ-1)) == 0) +#define INOHASH(ino) ((ino)&(INOHSZ-1)) +#else +#define INOHASH(ino) (((unsigned)(ino))%INOHSZ) +#endif + +/* The nodehash is a cache of nodes. + + Access to nodehash and nodehash_nr_items is protected by + nodecache_lock. + + Every node in the nodehash carries a light reference. When we are + asked to give up that light reference, we reacquire our lock + momentarily to check whether someone else reacquired a reference + through the nodehash. */ +static struct node *nodehash[INOHSZ]; +static size_t nodehash_nr_items; +static pthread_rwlock_t nodecache_lock = PTHREAD_RWLOCK_INITIALIZER; + +/* Initialize the inode hash table. */ +static void __attribute__ ((constructor)) +nodecache_init () +{ +} + +/* Lookup node with inode number INUM. Returns NULL if the node is + not found in the node cache. */ +static struct node * +lookup (ino_t inum) +{ + struct node *np; + for (np = nodehash[INOHASH(inum)]; np; np = np->hnext) + if (np->cache_id == inum) + return np; + return NULL; +} + +/* Fetch inode INUM, set *NPP to the node structure; + gain one user reference and lock the node. */ +error_t __attribute__ ((weak)) +diskfs_cached_lookup (ino_t inum, struct node **npp) +{ + return diskfs_cached_lookup_context (inum, npp, NULL); +} + +/* Fetch inode INUM, set *NPP to the node structure; + gain one user reference and lock the node. */ +error_t +diskfs_cached_lookup_context (ino_t inum, struct node **npp, + struct lookup_context *ctx) +{ + error_t err; + struct node *np, *tmp; + + pthread_rwlock_rdlock (&nodecache_lock); + np = lookup (inum); + if (np) + goto gotit; + pthread_rwlock_unlock (&nodecache_lock); + + err = diskfs_user_make_node (&np, ctx); + if (err) + return err; + + np->cache_id = inum; + pthread_mutex_lock (&np->lock); + + /* Put NP in NODEHASH. */ + pthread_rwlock_wrlock (&nodecache_lock); + tmp = lookup (inum); + if (tmp) + { + /* We lost a race. */ + diskfs_nput (np); + np = tmp; + goto gotit; + } + + np->hnext = nodehash[INOHASH(inum)]; + if (np->hnext) + np->hnext->hprevp = &np->hnext; + np->hprevp = &nodehash[INOHASH(inum)]; + nodehash[INOHASH(inum)] = np; + diskfs_nref_light (np); + nodehash_nr_items += 1; + pthread_rwlock_unlock (&nodecache_lock); + + /* Get the contents of NP off disk. */ + err = diskfs_user_read_node (np, ctx); + if (err) + return err; + else + { + *npp = np; + return 0; + } + + gotit: + diskfs_nref (np); + pthread_rwlock_unlock (&nodecache_lock); + pthread_mutex_lock (&np->lock); + *npp = np; + return 0; +} + +/* Lookup node INUM (which must have a reference already) and return it + without allocating any new references. */ +struct node * +diskfs_cached_ifind (ino_t inum) +{ + struct node *np; + + pthread_rwlock_rdlock (&nodecache_lock); + np = lookup (inum); + pthread_rwlock_unlock (&nodecache_lock); + + assert (np); + return np; +} + +void __attribute__ ((weak)) +diskfs_try_dropping_softrefs (struct node *np) +{ + pthread_rwlock_wrlock (&nodecache_lock); + if (np->hprevp != NULL) + { + /* Check if someone reacquired a reference through the + nodehash. */ + struct references result; + refcounts_references (&np->refcounts, &result); + + if (result.hard > 0) + { + /* A reference was reacquired through a hash table lookup. + It's fine, we didn't touch anything yet. */ + pthread_rwlock_unlock (&nodecache_lock); + return; + } + + *np->hprevp = np->hnext; + if (np->hnext) + np->hnext->hprevp = np->hprevp; + np->hnext = NULL; + np->hprevp = NULL; + nodehash_nr_items -= 1; + diskfs_nrele_light (np); + } + pthread_rwlock_unlock (&nodecache_lock); + + diskfs_user_try_dropping_softrefs (np); +} + +/* For each active node, call FUN. The node is to be locked around the call + to FUN. If FUN returns non-zero for any node, then immediately stop, and + return that value. */ +error_t __attribute__ ((weak)) +diskfs_node_iterate (error_t (*fun)(struct node *)) +{ + error_t err = 0; + int n; + size_t num_nodes; + struct node *node, **node_list, **p; + + pthread_rwlock_rdlock (&nodecache_lock); + + /* We must copy everything from the hash table into another data structure + to avoid running into any problems with the hash-table being modified + during processing (normally we delegate access to hash-table with + nodecache_lock, but we can't hold this while locking the + individual node locks). */ + /* XXX: Can we? */ + num_nodes = nodehash_nr_items; + + /* TODO This method doesn't scale beyond a few dozen nodes and should be + replaced. */ + node_list = malloc (num_nodes * sizeof (struct node *)); + if (node_list == NULL) + { + pthread_rwlock_unlock (&nodecache_lock); + error (0, 0, "unable to allocate temporary node table"); + return ENOMEM; + } + + p = node_list; + for (n = 0; n < INOHSZ; n++) + for (node = nodehash[n]; node; node = node->hnext) + { + *p++ = node; + + /* We acquire a hard reference for node, but without using + diskfs_nref. We do this so that diskfs_new_hardrefs will not + get called. */ + refcounts_ref (&node->refcounts, NULL); + } + + pthread_rwlock_unlock (&nodecache_lock); + + p = node_list; + while (num_nodes-- > 0) + { + node = *p++; + if (!err) + { + pthread_mutex_lock (&node->lock); + err = (*fun)(node); + pthread_mutex_unlock (&node->lock); + } + diskfs_nrele (node); + } + + free (node_list); + return err; +} + +/* The user must define this function if she wants to use the node + cache. Create and initialize a node. */ +error_t __attribute__ ((weak)) +diskfs_user_make_node (struct node **npp, struct lookup_context *ctx) +{ + assert (! "diskfs_user_make_node not implemented"); +} + +/* The user must define this function if she wants to use the node + cache. Read stat information out of the on-disk node. */ +error_t __attribute__ ((weak)) +diskfs_user_read_node (struct node *np, struct lookup_context *ctx) +{ + assert (! "diskfs_user_read_node not implemented"); +} + +/* The user must define this function if she wants to use the node + cache. The last hard reference to a node has gone away; arrange to + have all the weak references dropped that can be. */ +void __attribute__ ((weak)) +diskfs_user_try_dropping_softrefs (struct node *np) +{ + assert (! "diskfs_user_try_dropping_softrefs not implemented"); +} |