diff options
author | Justus Winter <4winter@informatik.uni-hamburg.de> | 2014-06-20 14:34:49 +0200 |
---|---|---|
committer | Justus Winter <4winter@informatik.uni-hamburg.de> | 2014-06-20 14:34:49 +0200 |
commit | 10b4d684f95e1b0d5a0c3fbe60b83bf7ea44b2e1 (patch) | |
tree | 29f3f309aa091c6400e6d1a7b6e5c5b407759b2a /debian/patches | |
parent | d6f2282b6e994d815f460e214cdee8ec17a1fffa (diff) |
add patch series
Diffstat (limited to 'debian/patches')
11 files changed, 3006 insertions, 0 deletions
diff --git a/debian/patches/0001-ext2fs-use-correct-type-for-block-numbers.patch b/debian/patches/0001-ext2fs-use-correct-type-for-block-numbers.patch new file mode 100644 index 00000000..5b37b2e7 --- /dev/null +++ b/debian/patches/0001-ext2fs-use-correct-type-for-block-numbers.patch @@ -0,0 +1,38 @@ +From eceaf73731410c9544d06a657ce1afb2c728cdfe Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Wed, 4 Jun 2014 17:26:06 +0200 +Subject: [PATCH 01/10] ext2fs: use correct type for block numbers + +* ext2fs/dir.c (count_dirents): Use block_t for nb. +(diskfs_get_directs): Likewise for blkno, nblks. +--- + ext2fs/dir.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/ext2fs/dir.c b/ext2fs/dir.c +index 337314c..470b7e9 100644 +--- a/ext2fs/dir.c ++++ b/ext2fs/dir.c +@@ -823,7 +823,7 @@ diskfs_drop_dirstat (struct node *dp, struct dirstat *ds) + write the answer down in its dirents array. As a side affect + fill BUF with the block. */ + static error_t +-count_dirents (struct node *dp, int nb, char *buf) ++count_dirents (struct node *dp, block_t nb, char *buf) + { + size_t amt; + char *offinblk; +@@ -868,8 +868,8 @@ diskfs_get_directs (struct node *dp, + vm_size_t bufsiz, + int *amt) + { +- int blkno; +- int nblks; ++ block_t blkno; ++ block_t nblks; + int curentry; + char buf[DIRBLKSIZ]; + char *bufp; +-- +2.0.0 + diff --git a/debian/patches/0002-libdiskfs-fix-reference-counting-of-peropen-objects.patch b/debian/patches/0002-libdiskfs-fix-reference-counting-of-peropen-objects.patch new file mode 100644 index 00000000..92289b1e --- /dev/null +++ b/debian/patches/0002-libdiskfs-fix-reference-counting-of-peropen-objects.patch @@ -0,0 +1,88 @@ +From d6bfa7bf45e77670f5e8150137ff95fd591c0e8c Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Fri, 20 Jun 2014 13:54:28 +0200 +Subject: [PATCH 02/10] libdiskfs: fix reference counting of peropen objects + +Previously, peropen objects were created with a reference count of +zero. Therefore, if diskfs_create_protid fails, passing such an +object to diskfs_release_peropen would lead to a reference count +underflow. + +* libdiskfs/peropen-make.c (diskfs_peropen_make): Initialize reference +count to one. +* libdiskfs/protid-make.c (diskfs_start_protid): And consume this +reference on success. Update comment. +(diskfs_finish_protid): Update comment. +* libdiskfs/diskfs.h: Update comments. +--- + libdiskfs/diskfs.h | 4 ++-- + libdiskfs/peropen-make.c | 2 +- + libdiskfs/protid-make.c | 6 +++--- + 3 files changed, 6 insertions(+), 6 deletions(-) + +diff --git a/libdiskfs/diskfs.h b/libdiskfs/diskfs.h +index e328527..e59ba99 100644 +--- a/libdiskfs/diskfs.h ++++ b/libdiskfs/diskfs.h +@@ -820,12 +820,12 @@ diskfs_create_node (struct node *dir, const char *name, mode_t mode, + struct dirstat *ds); + + /* Create and return a protid for an existing peropen PO in CRED, +- referring to user USER. */ ++ referring to user USER. On success, consume a reference to PO. */ + error_t diskfs_create_protid (struct peropen *po, struct iouser *user, + struct protid **cred); + + /* Build and return in CRED a protid which has no user identification, for +- peropen PO. */ ++ peropen PO. On success, consume a reference to PO. */ + error_t diskfs_start_protid (struct peropen *po, struct protid **cred); + + /* Finish building protid CRED started with diskfs_start_protid; +diff --git a/libdiskfs/peropen-make.c b/libdiskfs/peropen-make.c +index 6d5ca01..788b9a7 100644 +--- a/libdiskfs/peropen-make.c ++++ b/libdiskfs/peropen-make.c +@@ -31,7 +31,7 @@ diskfs_make_peropen (struct node *np, int flags, struct peropen *context, + + po->filepointer = 0; + po->lock_status = LOCK_UN; +- refcount_init (&po->refcnt, 0); ++ refcount_init (&po->refcnt, 1); + po->openstat = flags; + po->np = np; + po->path = NULL; +diff --git a/libdiskfs/protid-make.c b/libdiskfs/protid-make.c +index 22aaa2e..bd13bde 100644 +--- a/libdiskfs/protid-make.c ++++ b/libdiskfs/protid-make.c +@@ -20,7 +20,7 @@ + #include <assert.h> + + /* Build and return in CRED a protid which has no user identification, for +- peropen PO. */ ++ peropen PO. On success, consume a reference to PO. */ + error_t + diskfs_start_protid (struct peropen *po, struct protid **cred) + { +@@ -29,7 +29,7 @@ diskfs_start_protid (struct peropen *po, struct protid **cred) + sizeof (struct protid), cred); + if (! err) + { +- refcount_ref (&po->refcnt); ++ /* Consume a reference. */ + (*cred)->po = po; + (*cred)->shared_object = MACH_PORT_NULL; + (*cred)->mapped = 0; +@@ -56,7 +56,7 @@ diskfs_finish_protid (struct protid *cred, struct iouser *user) + } + + /* Create and return a protid for an existing peropen PO in CRED for +- USER. */ ++ USER. On success, consume a reference to PO. */ + error_t + diskfs_create_protid (struct peropen *po, struct iouser *user, + struct protid **cred) +-- +2.0.0 + diff --git a/debian/patches/0003-include-detect-use-after-free-errors-using-the-refer.patch b/debian/patches/0003-include-detect-use-after-free-errors-using-the-refer.patch new file mode 100644 index 00000000..1690e1fd --- /dev/null +++ b/debian/patches/0003-include-detect-use-after-free-errors-using-the-refer.patch @@ -0,0 +1,76 @@ +From 6a36e42db8d32e66a1057d894a8d9c6da77f07a7 Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Fri, 20 Jun 2014 14:27:59 +0200 +Subject: [PATCH 03/10] include: detect use-after-free errors using the + reference counts + +* include/refcount.h (refcount_init): There must be at least one +reference at initialization time. +(refcounts_init): Likewise. +(refcount_ref): Detect use-after-free errors. +(refcounts_ref): Likewise. +(refcounts_ref_weak): Likewise. +--- + include/refcount.h | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +diff --git a/include/refcount.h b/include/refcount.h +index 785b052..1efe865 100644 +--- a/include/refcount.h ++++ b/include/refcount.h +@@ -31,10 +31,11 @@ + /* An opaque type. You must not access these values directly. */ + typedef unsigned int refcount_t; + +-/* Initialize REF with REFERENCES. */ ++/* Initialize REF with REFERENCES. REFERENCES must not be zero. */ + static inline void + refcount_init (refcount_t *ref, unsigned int references) + { ++ assert (references > 0 || !"references must not be zero!"); + *ref = references; + } + +@@ -47,6 +48,7 @@ refcount_ref (refcount_t *ref) + unsigned int r; + r = __atomic_add_fetch (ref, 1, __ATOMIC_RELAXED); + assert (r != UINT_MAX || !"refcount overflowed!"); ++ assert (r > 1 || !"refcount detected use after free!"); + return r; + } + +@@ -101,10 +103,12 @@ union _references { + uint64_t value; + }; + +-/* Initialize REF with HARD and WEAK references. */ ++/* Initialize REF with HARD and WEAK references. HARD and WEAK must ++ not both be zero. */ + static inline void + refcounts_init (refcounts_t *ref, uint32_t hard, uint32_t weak) + { ++ assert ((hard != 0 || weak != 0) || !"references must not both be zero!"); + ref->references = (struct references) { .hard = hard, .weak = weak }; + } + +@@ -119,6 +123,8 @@ refcounts_ref (refcounts_t *ref, struct references *result) + union _references r; + r.value = __atomic_add_fetch (&ref->value, op.value, __ATOMIC_RELAXED); + assert (r.references.hard != UINT32_MAX || !"refcount overflowed!"); ++ assert (! (r.references.hard == 1 && r.references.weak == 0) ++ || !"refcount detected use after free!"); + if (result) + *result = r.references; + } +@@ -208,6 +214,8 @@ refcounts_ref_weak (refcounts_t *ref, struct references *result) + union _references r; + r.value = __atomic_add_fetch (&ref->value, op.value, __ATOMIC_RELAXED); + assert (r.references.weak != UINT32_MAX || !"refcount overflowed!"); ++ assert (! (r.references.hard == 0 && r.references.weak == 1) ++ || !"refcount detected use after free!"); + if (result) + *result = r.references; + } +-- +2.0.0 + diff --git a/debian/patches/0004-ext2fs-use-a-seperate-lock-to-protect-nodehash.patch b/debian/patches/0004-ext2fs-use-a-seperate-lock-to-protect-nodehash.patch new file mode 100644 index 00000000..08c92855 --- /dev/null +++ b/debian/patches/0004-ext2fs-use-a-seperate-lock-to-protect-nodehash.patch @@ -0,0 +1,259 @@ +From 2efa153d904224ca71711e688ccf635d94baf7b6 Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Tue, 13 May 2014 13:09:15 +0200 +Subject: [PATCH 04/10] ext2fs: use a seperate lock to protect nodehash + +Previously, ext2fs used diskfs_node_refcnt_lock to serialize access to +the nodehash. + +Use a separate lock to protect nodehash. Adjust the reference +counting accordingly. Every node in the nodehash carries a light +reference. When we are asked to give up that light reference, we +reacquire our lock momentarily to check whether someone else +reacquired a reference through the nodehash. + +* ext2fs/inode.c (nodecache_lock): New lock. +(diskfs_cached_lookup): Use a separate lock to protect nodehash. +Adjust the reference counting accordingly. +(ifind): Likewise. +(diskfs_node_iterate): Likewise. +(diskfs_node_norefs): Move the code removing the node from nodehash... +(diskfs_try_dropping_softrefs): ... here, where we check whether +someone reacquired a reference, and if so hold on to our light +reference. +--- + ext2fs/inode.c | 127 +++++++++++++++++++++++++++++++++++++++++---------------- + 1 file changed, 91 insertions(+), 36 deletions(-) + +diff --git a/ext2fs/inode.c b/ext2fs/inode.c +index ed78265..67c502a 100644 +--- a/ext2fs/inode.c ++++ b/ext2fs/inode.c +@@ -46,8 +46,19 @@ + #define INOHASH(ino) (((unsigned)(ino))%INOHSZ) + #endif + ++/* The nodehash is a cache of nodes. ++ ++ Access to nodehash and nodehash_nr_items is protected by ++ nodecache_lock. ++ ++ Every node in the nodehash carries a light reference. When we are ++ asked to give up that light reference, we reacquire our lock ++ momentarily to check whether someone else reacquired a reference ++ through the nodehash. */ + static struct node *nodehash[INOHSZ]; + static size_t nodehash_nr_items; ++/* nodecache_lock must be acquired before diskfs_node_refcnt_lock. */ ++static pthread_rwlock_t nodecache_lock = PTHREAD_RWLOCK_INITIALIZER; + + static error_t read_node (struct node *np); + +@@ -62,33 +73,37 @@ inode_init () + nodehash[n] = 0; + } + ++/* Lookup node with inode number INUM. Returns NULL if the node is ++ not found in the node cache. */ ++static struct node * ++lookup (ino_t inum) ++{ ++ struct node *np; ++ for (np = nodehash[INOHASH(inum)]; np; np = np->dn->hnext) ++ if (np->cache_id == inum) ++ return np; ++ return NULL; ++} ++ + /* Fetch inode INUM, set *NPP to the node structure; + gain one user reference and lock the node. */ + error_t + diskfs_cached_lookup (ino_t inum, struct node **npp) + { + error_t err; +- struct node *np; ++ struct node *np, *tmp; + struct disknode *dn; + +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- for (np = nodehash[INOHASH(inum)]; np; np = np->dn->hnext) +- if (np->cache_id == inum) +- { +- np->references++; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- pthread_mutex_lock (&np->lock); +- *npp = np; +- return 0; +- } ++ pthread_rwlock_rdlock (&nodecache_lock); ++ np = lookup (inum); ++ if (np) ++ goto gotit; ++ pthread_rwlock_unlock (&nodecache_lock); + + /* Format specific data for the new node. */ + dn = malloc (sizeof (struct disknode)); + if (! dn) +- { +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- return ENOMEM; +- } ++ return ENOMEM; + dn->dirents = 0; + dn->dir_idx = 0; + dn->pager = 0; +@@ -102,14 +117,24 @@ diskfs_cached_lookup (ino_t inum, struct node **npp) + pthread_mutex_lock (&np->lock); + + /* Put NP in NODEHASH. */ ++ pthread_rwlock_wrlock (&nodecache_lock); ++ tmp = lookup (inum); ++ if (tmp) ++ { ++ /* We lost a race. */ ++ diskfs_nput (np); ++ np = tmp; ++ goto gotit; ++ } ++ + dn->hnext = nodehash[INOHASH(inum)]; + if (dn->hnext) + dn->hnext->dn->hprevp = &dn->hnext; + dn->hprevp = &nodehash[INOHASH(inum)]; + nodehash[INOHASH(inum)] = np; ++ diskfs_nref_light (np); + nodehash_nr_items += 1; +- +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ pthread_rwlock_unlock (&nodecache_lock); + + /* Get the contents of NP off disk. */ + err = read_node (np); +@@ -131,6 +156,13 @@ diskfs_cached_lookup (ino_t inum, struct node **npp) + *npp = np; + return 0; + } ++ ++ gotit: ++ diskfs_nref (np); ++ pthread_rwlock_unlock (&nodecache_lock); ++ pthread_mutex_lock (&np->lock); ++ *npp = np; ++ return 0; + } + + /* Lookup node INUM (which must have a reference already) and return it +@@ -140,17 +172,12 @@ ifind (ino_t inum) + { + struct node *np; + +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- for (np = nodehash[INOHASH(inum)]; np; np = np->dn->hnext) +- { +- if (np->cache_id != inum) +- continue; ++ pthread_rwlock_rdlock (&nodecache_lock); ++ np = lookup (inum); ++ pthread_rwlock_unlock (&nodecache_lock); + +- assert (np->references); +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- return np; +- } +- assert (0); ++ assert (np); ++ return np; + } + + /* The last reference to a node has gone away; drop +@@ -158,11 +185,6 @@ ifind (ino_t inum) + void + diskfs_node_norefs (struct node *np) + { +- *np->dn->hprevp = np->dn->hnext; +- if (np->dn->hnext) +- np->dn->hnext->dn->hprevp = np->dn->hprevp; +- nodehash_nr_items -= 1; +- + if (np->dn->dirents) + free (np->dn->dirents); + assert (!np->dn->pager); +@@ -180,6 +202,35 @@ diskfs_node_norefs (struct node *np) + void + diskfs_try_dropping_softrefs (struct node *np) + { ++ pthread_rwlock_wrlock (&nodecache_lock); ++ if (np->dn->hnext != NULL) ++ { ++ /* Check if someone reacquired a reference through the ++ nodehash. */ ++ unsigned int references; ++ pthread_spin_lock (&diskfs_node_refcnt_lock); ++ references = np->references; ++ pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ ++ /* An additional reference is acquired by libdiskfs across calls ++ to diskfs_try_dropping_softrefs. */ ++ if (references > 1) ++ { ++ /* A reference was reacquired through a hash table lookup. ++ It's fine, we didn't touch anything yet. */ ++ pthread_rwlock_unlock (&nodecache_lock); ++ return; ++ } ++ ++ *np->dn->hprevp = np->dn->hnext; ++ if (np->dn->hnext) ++ np->dn->hnext->dn->hprevp = np->dn->hprevp; ++ np->dn->hnext = NULL; ++ nodehash_nr_items -= 1; ++ diskfs_nrele_light (np); ++ } ++ pthread_rwlock_unlock (&nodecache_lock); ++ + drop_pager_softrefs (np); + } + +@@ -556,12 +607,12 @@ diskfs_node_iterate (error_t (*fun)(struct node *)) + size_t num_nodes; + struct node *node, **node_list, **p; + +- pthread_spin_lock (&diskfs_node_refcnt_lock); ++ pthread_rwlock_rdlock (&nodecache_lock); + + /* We must copy everything from the hash table into another data structure + to avoid running into any problems with the hash-table being modified + during processing (normally we delegate access to hash-table with +- diskfs_node_refcnt_lock, but we can't hold this while locking the ++ nodecache_lock, but we can't hold this while locking the + individual node locks). */ + num_nodes = nodehash_nr_items; + +@@ -570,7 +621,7 @@ diskfs_node_iterate (error_t (*fun)(struct node *)) + node_list = malloc (num_nodes * sizeof (struct node *)); + if (node_list == NULL) + { +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ pthread_rwlock_unlock (&nodecache_lock); + ext2_debug ("unable to allocate temporary node table"); + return ENOMEM; + } +@@ -580,10 +631,14 @@ diskfs_node_iterate (error_t (*fun)(struct node *)) + for (node = nodehash[n]; node; node = node->dn->hnext) + { + *p++ = node; ++ ++ /* We acquire a hard reference for node, but without using ++ diskfs_nref. We do this so that diskfs_new_hardrefs will not ++ get called. */ + node->references++; + } + +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ pthread_rwlock_unlock (&nodecache_lock); + + p = node_list; + while (num_nodes-- > 0) +-- +2.0.0 + diff --git a/debian/patches/0005-fatfs-use-a-seperate-lock-to-protect-nodehash.patch b/debian/patches/0005-fatfs-use-a-seperate-lock-to-protect-nodehash.patch new file mode 100644 index 00000000..44f6c0fa --- /dev/null +++ b/debian/patches/0005-fatfs-use-a-seperate-lock-to-protect-nodehash.patch @@ -0,0 +1,303 @@ +From d67477d73bfab206c55625f3760a05a081787fe9 Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Tue, 13 May 2014 15:14:53 +0200 +Subject: [PATCH 05/10] fatfs: use a seperate lock to protect nodehash + +Previously, fatfs used diskfs_node_refcnt_lock to serialize access to +the nodehash. + +Use a separate lock to protect nodehash. Adjust the reference +counting accordingly. Every node in the nodehash carries a light +reference. When we are asked to give up that light reference, we +reacquire our lock momentarily to check whether someone else +reacquired a reference through the nodehash. + +* fatfs/inode.c (nodecache_lock): New lock. +(diskfs_cached_lookup): Use a separate lock to protect nodehash. +Adjust the reference counting accordingly. +(ifind): Likewise. +(diskfs_node_iterate): Likewise. +(diskfs_node_norefs): Move the code removing the node from nodehash... +(diskfs_try_dropping_softrefs): ... here, where we check whether +someone reacquired a reference, and if so hold on to our light +reference. +--- + fatfs/inode.c | 146 ++++++++++++++++++++++++++++++++++++++++------------------ + 1 file changed, 101 insertions(+), 45 deletions(-) + +diff --git a/fatfs/inode.c b/fatfs/inode.c +index ed6f3f0..c3997d0 100644 +--- a/fatfs/inode.c ++++ b/fatfs/inode.c +@@ -44,8 +44,19 @@ + #define INOHASH(ino) (((unsigned)(ino))%INOHSZ) + #endif + ++/* The nodehash is a cache of nodes. ++ ++ Access to nodehash and nodehash_nr_items is protected by ++ nodecache_lock. ++ ++ Every node in the nodehash carries a light reference. When we are ++ asked to give up that light reference, we reacquire our lock ++ momentarily to check whether someone else reacquired a reference ++ through the nodehash. */ + static struct node *nodehash[INOHSZ]; + static size_t nodehash_nr_items; ++/* nodecache_lock must be acquired before diskfs_node_refcnt_lock. */ ++static pthread_rwlock_t nodecache_lock = PTHREAD_RWLOCK_INITIALIZER; + + static error_t read_node (struct node *np, vm_address_t buf); + +@@ -58,33 +69,38 @@ inode_init () + nodehash[n] = 0; + } + ++/* Lookup node with inode number INUM. Returns NULL if the node is ++ not found in the node cache. */ ++static struct node * ++lookup (ino_t inum) ++{ ++ struct node *np; ++ for (np = nodehash[INOHASH(inum)]; np; np = np->dn->hnext) ++ if (np->cache_id == inum) ++ return np; ++ return NULL; ++} ++ + /* Fetch inode INUM, set *NPP to the node structure; gain one user + reference and lock the node. */ + error_t + diskfs_cached_lookup (ino64_t inum, struct node **npp) + { + error_t err; +- struct node *np; ++ struct node *np, *tmp; + struct disknode *dn; + +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- for (np = nodehash[INOHASH(inum)]; np; np = np->dn->hnext) +- if (np->cache_id == inum) +- { +- np->references++; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- pthread_mutex_lock (&np->lock); +- *npp = np; +- return 0; +- } ++ pthread_rwlock_rdlock (&nodecache_lock); ++ np = lookup (inum); ++ if (np) ++ goto gotit; ++ pthread_rwlock_unlock (&nodecache_lock); + + /* Format specific data for the new node. */ + dn = malloc (sizeof (struct disknode)); + if (! dn) +- { +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- return ENOMEM; +- } ++ return ENOMEM; ++ + dn->pager = 0; + dn->first = 0; + dn->last = 0; +@@ -102,15 +118,25 @@ diskfs_cached_lookup (ino64_t inum, struct node **npp) + pthread_mutex_lock (&np->lock); + + /* Put NP in NODEHASH. */ ++ pthread_rwlock_wrlock (&nodecache_lock); ++ tmp = lookup (inum); ++ if (tmp) ++ { ++ /* We lost a race. */ ++ diskfs_nput (np); ++ np = tmp; ++ goto gotit; ++ } ++ + dn->hnext = nodehash[INOHASH(inum)]; + if (dn->hnext) + dn->hnext->dn->hprevp = &dn->hnext; + dn->hprevp = &nodehash[INOHASH(inum)]; + nodehash[INOHASH(inum)] = np; ++ diskfs_nref_light (np); + nodehash_nr_items += 1; ++ pthread_rwlock_unlock (&nodecache_lock); + +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- + /* Get the contents of NP off disk. */ + err = read_node (np, 0); + +@@ -121,6 +147,13 @@ diskfs_cached_lookup (ino64_t inum, struct node **npp) + *npp = np; + return 0; + } ++ ++ gotit: ++ diskfs_nref (np); ++ pthread_rwlock_unlock (&nodecache_lock); ++ pthread_mutex_lock (&np->lock); ++ *npp = np; ++ return 0; + } + + /* Fetch inode INUM, set *NPP to the node structure; +@@ -133,24 +166,23 @@ diskfs_cached_lookup_in_dirbuf (int inum, struct node **npp, vm_address_t buf) + struct node *np; + struct disknode *dn; + +- pthread_spin_lock (&diskfs_node_refcnt_lock); ++ pthread_rwlock_rdlock (&nodecache_lock); + for (np = nodehash[INOHASH(inum)]; np; np = np->dn->hnext) + if (np->cache_id == inum) + { +- np->references++; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ diskfs_nref (np); ++ pthread_rwlock_unlock (&nodecache_lock); + pthread_mutex_lock (&np->lock); + *npp = np; + return 0; + } ++ pthread_rwlock_unlock (&nodecache_lock); + + /* Format specific data for the new node. */ + dn = malloc (sizeof (struct disknode)); + if (! dn) +- { +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- return ENOMEM; +- } ++ return ENOMEM; ++ + dn->pager = 0; + dn->first = 0; + dn->last = 0; +@@ -168,15 +200,16 @@ diskfs_cached_lookup_in_dirbuf (int inum, struct node **npp, vm_address_t buf) + pthread_mutex_lock (&np->lock); + + /* Put NP in NODEHASH. */ ++ pthread_rwlock_wrlock (&nodecache_lock); + dn->hnext = nodehash[INOHASH(inum)]; + if (dn->hnext) + dn->hnext->dn->hprevp = &dn->hnext; + dn->hprevp = &nodehash[INOHASH(inum)]; + nodehash[INOHASH(inum)] = np; ++ diskfs_nref_light (np); + nodehash_nr_items += 1; ++ pthread_rwlock_unlock (&nodecache_lock); + +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- + /* Get the contents of NP off disk. */ + err = read_node (np, buf); + +@@ -196,17 +229,12 @@ ifind (ino_t inum) + { + struct node *np; + +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- for (np = nodehash[INOHASH(inum)]; np; np = np->dn->hnext) +- { +- if (np->cache_id != inum) +- continue; ++ pthread_rwlock_rdlock (&nodecache_lock); ++ np = lookup (inum); ++ pthread_rwlock_unlock (&nodecache_lock); + +- assert (np->references); +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- return np; +- } +- assert (0); ++ assert (np); ++ return np; + } + + /* The last reference to a node has gone away; drop it from the hash +@@ -216,11 +244,6 @@ diskfs_node_norefs (struct node *np) + { + struct cluster_chain *last = np->dn->first; + +- *np->dn->hprevp = np->dn->hnext; +- if (np->dn->hnext) +- np->dn->hnext->dn->hprevp = np->dn->hprevp; +- nodehash_nr_items -= 1; +- + while (last) + { + struct cluster_chain *next = last->next; +@@ -251,6 +274,35 @@ diskfs_node_norefs (struct node *np) + void + diskfs_try_dropping_softrefs (struct node *np) + { ++ pthread_rwlock_wrlock (&nodecache_lock); ++ if (np->dn->hnext != NULL) ++ { ++ /* Check if someone reacquired a reference through the ++ nodehash. */ ++ unsigned int references; ++ pthread_spin_lock (&diskfs_node_refcnt_lock); ++ references = np->references; ++ pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ ++ /* An additional reference is acquired by libdiskfs across calls ++ to diskfs_try_dropping_softrefs. */ ++ if (references > 1) ++ { ++ /* A reference was reacquired through a hash table lookup. ++ It's fine, we didn't touch anything yet. */ ++ pthread_rwlock_unlock (&nodecache_lock); ++ return; ++ } ++ ++ *np->dn->hprevp = np->dn->hnext; ++ if (np->dn->hnext) ++ np->dn->hnext->dn->hprevp = np->dn->hprevp; ++ np->dn->hnext = NULL; ++ nodehash_nr_items -= 1; ++ diskfs_nrele_light (np); ++ } ++ pthread_rwlock_unlock (&nodecache_lock); ++ + drop_pager_softrefs (np); + } + +@@ -554,12 +606,12 @@ diskfs_node_iterate (error_t (*fun)(struct node *)) + size_t num_nodes; + struct node *node, **node_list, **p; + +- pthread_spin_lock (&diskfs_node_refcnt_lock); ++ pthread_rwlock_rdlock (&nodecache_lock); + + /* We must copy everything from the hash table into another data structure + to avoid running into any problems with the hash-table being modified + during processing (normally we delegate access to hash-table with +- diskfs_node_refcnt_lock, but we can't hold this while locking the ++ nodecache_lock, but we can't hold this while locking the + individual node locks). */ + + num_nodes = nodehash_nr_items; +@@ -570,10 +622,14 @@ diskfs_node_iterate (error_t (*fun)(struct node *)) + for (node = nodehash[n]; node; node = node->dn->hnext) + { + *p++ = node; +- node->references++; ++ ++ /* We acquire a hard reference for node, but without using ++ diskfs_nref. We do this so that diskfs_new_hardrefs will not ++ get called. */ ++ node->references++; + } + +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ pthread_rwlock_unlock (&nodecache_lock); + + p = node_list; + while (num_nodes-- > 0) +-- +2.0.0 + diff --git a/debian/patches/0006-isofs-use-a-seperate-lock-to-protect-node_cache.patch b/debian/patches/0006-isofs-use-a-seperate-lock-to-protect-node_cache.patch new file mode 100644 index 00000000..54f9ee25 --- /dev/null +++ b/debian/patches/0006-isofs-use-a-seperate-lock-to-protect-node_cache.patch @@ -0,0 +1,310 @@ +From a020ab648c11990935fd26e3b935ab969c184c09 Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Tue, 13 May 2014 15:16:31 +0200 +Subject: [PATCH 06/10] isofs: use a seperate lock to protect node_cache + +Previously, isofs used diskfs_node_refcnt_lock to serialize access to +the node_cache. + +Use a separate lock to protect node_cache. Adjust the reference +counting accordingly. Every node in the node_cache carries a light +reference. When we are asked to give up that light reference, we +reacquire our lock momentarily to check whether someone else +reacquired a reference through the node_cache. + +* isofs/inode.c (nodecache_lock): New lock. +(inode_cache_find): Use a separate lock to protect node_cache. +Adjust the reference counting accordingly. +(diskfs_cached_lookup): Likewise. +(load_inode): Likewise. +(cache_inode): Update comment accordingly. +(diskfs_node_iterate): Likewise. +(diskfs_node_norefs): Move the code removing the node from node_cache... +(diskfs_try_dropping_softrefs): ... here, where we check whether +someone reacquired a reference, and if so hold on to our light +reference. +--- + isofs/inode.c | 146 +++++++++++++++++++++++++++++++++++++++++----------------- + 1 file changed, 105 insertions(+), 41 deletions(-) + +diff --git a/isofs/inode.c b/isofs/inode.c +index cdc05ae..3941580 100644 +--- a/isofs/inode.c ++++ b/isofs/inode.c +@@ -48,35 +48,53 @@ struct node_cache + struct node *np; /* if live */ + }; + ++/* The node_cache is a cache of nodes. ++ ++ Access to node_cache, node_cache_size, and node_cache_alloced is ++ protected by nodecache_lock. ++ ++ Every node in the node_cache carries a light reference. When we ++ are asked to give up that light reference, we reacquire our lock ++ momentarily to check whether someone else reacquired a reference ++ through the node_cache. */ + static int node_cache_size = 0; + static int node_cache_alloced = 0; + struct node_cache *node_cache = 0; ++/* nodecache_lock must be acquired before diskfs_node_refcnt_lock. */ ++static pthread_rwlock_t nodecache_lock = PTHREAD_RWLOCK_INITIALIZER; + + /* Forward */ + static error_t read_disknode (struct node *, + struct dirrect *, struct rrip_lookup *); + + ++/* Lookup node with id ID. Returns NULL if the node is not found in ++ the node cache. */ ++static struct node * ++lookup (off_t id) ++{ ++ int i; ++ for (i = 0; i < node_cache_size; i++) ++ if (node_cache[i].id == id ++ && node_cache[i].np) ++ return node_cache[i].np; ++ return NULL; ++} ++ + /* See if node with identifier ID is in the cache. If so, return it, +- with one additional reference. diskfs_node_refcnt_lock must be held ++ with one additional reference. nodecache_lock must be held + on entry to the call, and will be released iff the node was found + in the cache. */ + void + inode_cache_find (off_t id, struct node **npp) + { +- int i; +- +- for (i = 0; i < node_cache_size; i++) +- if (node_cache[i].id == id +- && node_cache[i].np) +- { +- *npp = node_cache[i].np; +- (*npp)->references++; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- pthread_mutex_lock (&(*npp)->lock); +- return; +- } +- *npp = 0; ++ *npp = lookup (id); ++ if (*npp) ++ { ++ diskfs_nref (*npp); ++ pthread_rwlock_unlock (&nodecache_lock); ++ pthread_mutex_lock (&(*npp)->lock); ++ } + } + + +@@ -92,7 +110,7 @@ use_file_start_id (struct dirrect *record, struct rrip_lookup *rr) + } + + /* Enter NP into the cache. The directory entry we used is DR, the +- cached Rock-Ridge info RR. diskfs_node_refcnt_lock must be held. */ ++ cached Rock-Ridge info RR. nodecache_lock must be held. */ + void + cache_inode (struct node *np, struct dirrect *record, + struct rrip_lookup *rr) +@@ -137,6 +155,7 @@ cache_inode (struct node *np, struct dirrect *record, + c->id = id; + c->dr = record; + c->file_start = np->dn->file_start; ++ diskfs_nref_light (np); + c->np = np; + + /* PLUS 1 so that we don't store zero cache ID's (not allowed by diskfs) */ +@@ -155,7 +174,7 @@ diskfs_cached_lookup (ino_t id, struct node **npp) + to avoid presenting zero cache ID's. */ + id--; + +- pthread_spin_lock (&diskfs_node_refcnt_lock); ++ pthread_rwlock_rdlock (&nodecache_lock); + assert (id < node_cache_size); + + np = node_cache[id].np; +@@ -166,6 +185,8 @@ diskfs_cached_lookup (ino_t id, struct node **npp) + struct rrip_lookup rr; + struct disknode *dn; + ++ pthread_rwlock_unlock (&nodecache_lock); ++ + rrip_lookup (node_cache[id].dr, &rr, 1); + + /* We should never cache the wrong directory entry */ +@@ -174,7 +195,7 @@ diskfs_cached_lookup (ino_t id, struct node **npp) + dn = malloc (sizeof (struct disknode)); + if (!dn) + { +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ pthread_rwlock_unlock (&nodecache_lock); + release_rrip (&rr); + return ENOMEM; + } +@@ -185,16 +206,26 @@ diskfs_cached_lookup (ino_t id, struct node **npp) + if (!np) + { + free (dn); +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ pthread_rwlock_unlock (&nodecache_lock); + release_rrip (&rr); + return ENOMEM; + } + np->cache_id = id + 1; /* see above for rationale for increment */ + pthread_mutex_lock (&np->lock); ++ ++ pthread_rwlock_wrlock (&nodecache_lock); ++ if (c->np != NULL) ++ { ++ /* We lost a race. */ ++ diskfs_nput (np); ++ np = c->np; ++ goto gotit; ++ } + c->np = np; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ diskfs_nref_light (np); ++ pthread_rwlock_unlock (&nodecache_lock); + +- err = read_disknode (np, node_cache[id].dr, &rr); ++ err = read_disknode (np, dn->dr, &rr); + if (!err) + *npp = np; + +@@ -203,9 +234,9 @@ diskfs_cached_lookup (ino_t id, struct node **npp) + return err; + } + +- +- np->references++; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ gotit: ++ diskfs_nref (np); ++ pthread_rwlock_unlock (&nodecache_lock); + pthread_mutex_lock (&np->lock); + *npp = np; + return 0; +@@ -307,7 +338,8 @@ load_inode (struct node **npp, struct dirrect *record, + error_t err; + off_t file_start; + struct disknode *dn; +- struct node *np; ++ struct node *np, *tmp; ++ off_t id; + + err = calculate_file_start (record, &file_start, rr); + if (err) +@@ -315,27 +347,23 @@ load_inode (struct node **npp, struct dirrect *record, + if (rr->valid & VALID_CL) + record = rr->realdirent; + +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- + /* First check the cache */ + if (use_file_start_id (record, rr)) +- inode_cache_find (file_start << store->log2_block_size, npp); ++ id = file_start << store->log2_block_size; + else +- inode_cache_find ((off_t) ((void *) record - (void *) disk_image), npp); ++ id = (off_t) ((void *) record - (void *) disk_image); + ++ pthread_rwlock_rdlock (&nodecache_lock); ++ inode_cache_find (id, npp); + if (*npp) +- { +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- return 0; +- } ++ return 0; ++ pthread_rwlock_unlock (&nodecache_lock); + + /* Create a new node */ + dn = malloc (sizeof (struct disknode)); + if (!dn) +- { +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- return ENOMEM; +- } ++ return ENOMEM; ++ + dn->fileinfo = 0; + dn->dr = record; + dn->file_start = file_start; +@@ -344,14 +372,25 @@ load_inode (struct node **npp, struct dirrect *record, + if (!np) + { + free (dn); +- pthread_spin_unlock (&diskfs_node_refcnt_lock); + return ENOMEM; + } + + pthread_mutex_lock (&np->lock); + ++ pthread_rwlock_wrlock (&nodecache_lock); ++ tmp = lookup (id); ++ if (tmp) ++ { ++ /* We lost a race. */ ++ diskfs_nput (np); ++ diskfs_nref (tmp); ++ *npp = tmp; ++ pthread_rwlock_unlock (&nodecache_lock); ++ return 0; ++ } ++ + cache_inode (np, record, rr); +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ pthread_rwlock_unlock (&nodecache_lock); + + err = read_disknode (np, record, rr); + *npp = np; +@@ -505,9 +544,6 @@ error_t (*diskfs_read_symlink_hook) (struct node *, char *) + void + diskfs_node_norefs (struct node *np) + { +- assert (node_cache[np->cache_id - 1].np == np); +- node_cache[np->cache_id - 1].np = 0; +- + if (np->dn->translator) + free (np->dn->translator); + +@@ -521,6 +557,34 @@ diskfs_node_norefs (struct node *np) + void + diskfs_try_dropping_softrefs (struct node *np) + { ++ pthread_rwlock_wrlock (&nodecache_lock); ++ if (np->cache_id != 0) ++ { ++ assert (node_cache[np->cache_id - 1].np == np); ++ ++ /* Check if someone reacquired a reference through the ++ node_cache. */ ++ unsigned int references; ++ pthread_spin_lock (&diskfs_node_refcnt_lock); ++ references = np->references; ++ pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ ++ /* An additional reference is acquired by libdiskfs across calls ++ to diskfs_try_dropping_softrefs. */ ++ if (references > 1) ++ { ++ /* A reference was reacquired through a hash table lookup. ++ It's fine, we didn't touch anything yet. */ ++ pthread_rwlock_unlock (&nodecache_lock); ++ return; ++ } ++ ++ node_cache[np->cache_id - 1].np = 0; ++ np->cache_id = 0; ++ diskfs_nrele_light (np); ++ } ++ pthread_rwlock_unlock (&nodecache_lock); ++ + drop_pager_softrefs (np); + } + +-- +2.0.0 + diff --git a/debian/patches/0007-tmpfs-use-a-seperate-lock-to-protect-all_nodes.patch b/debian/patches/0007-tmpfs-use-a-seperate-lock-to-protect-all_nodes.patch new file mode 100644 index 00000000..c9e46828 --- /dev/null +++ b/debian/patches/0007-tmpfs-use-a-seperate-lock-to-protect-all_nodes.patch @@ -0,0 +1,314 @@ +From 27c815234a8e21d79ab07a53b4d1d4bbacfcde6c Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Tue, 13 May 2014 15:35:42 +0200 +Subject: [PATCH 07/10] tmpfs: use a seperate lock to protect all_nodes + +Previously, tmpfs used diskfs_node_refcnt_lock to serialize access to +the all_nodes and some other related global state related to memory +consumption. + +Use a separate lock to protect all_nodes, and atomic operations to +access the state related to memory consumption. Adjust the reference +counting accordingly. Every node in the all_nodes carries a light +reference. When we are asked to give up that light reference, we +reacquire our lock momentarily to check whether someone else +reacquired a reference through the all_nodes. + +* tmpfs/tmpfs.h (num_files, tmpfs_space_used): Use atomic operations +for these variables. +(adjust_used): Use atomic operations. +(get_used): New convenience function to atomically retrieve +tmpfs_space_used. +* tmpfs/node.c (all_nodes_lock): New lock. +(diskfs_alloc_node): Use a separate lock to protect all_nodes. +Adjust the reference counting accordingly. +(diskfs_free_node): Likewise. +(diskfs_cached_lookup):Likewise. +(diskfs_node_iterate): Likewise. +(diskfs_node_norefs): Do not remove the node from all_nodes. This +actually looks like a mistake, I do not know why they did that here as +well as in diskfs_free_node. +(diskfs_try_dropping_softrefs): Check whether someone reacquired a +reference, and if so hold on to our light reference. +(diskfs_grow): Use atomic operations. +* tmpfs/tmpfs.c (diskfs_set_statfs): Likewise. +--- + tmpfs/node.c | 107 ++++++++++++++++++++++++++++++++++++++++++---------------- + tmpfs/tmpfs.c | 6 ++-- + tmpfs/tmpfs.h | 20 +++++++---- + 3 files changed, 94 insertions(+), 39 deletions(-) + +diff --git a/tmpfs/node.c b/tmpfs/node.c +index acc029a..24ad0bd 100644 +--- a/tmpfs/node.c ++++ b/tmpfs/node.c +@@ -29,8 +29,19 @@ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + unsigned int num_files; + static unsigned int gen; + ++/* all_nodes is a list of all nodes. ++ ++ Access to all_nodes and all_nodes_nr_items is protected by ++ all_nodes_lock. ++ ++ Every node in all_nodes carries a light reference. When we are ++ asked to give up that light reference, we reacquire our lock ++ momentarily to check whether someone else reacquired a ++ reference. */ + struct node *all_nodes; + static size_t all_nodes_nr_items; ++/* all_nodes_lock must be acquired before diskfs_node_refcnt_lock. */ ++pthread_rwlock_t all_nodes_lock = PTHREAD_RWLOCK_INITIALIZER; + + error_t + diskfs_alloc_node (struct node *dp, mode_t mode, struct node **npp) +@@ -40,18 +51,17 @@ diskfs_alloc_node (struct node *dp, mode_t mode, struct node **npp) + dn = calloc (1, sizeof *dn); + if (dn == 0) + return ENOSPC; +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- if (round_page (tmpfs_space_used + sizeof *dn) / vm_page_size ++ ++ if (round_page (get_used () + sizeof *dn) / vm_page_size + > tmpfs_page_limit) + { +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ pthread_rwlock_unlock (&all_nodes_lock); + free (dn); + return ENOSPC; + } + dn->gen = gen++; +- ++num_files; +- tmpfs_space_used += sizeof *dn; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ __atomic_add_fetch (&num_files, 1, __ATOMIC_RELAXED); ++ adjust_used (sizeof *dn); + + dn->type = IFTODT (mode & S_IFMT); + return diskfs_cached_lookup ((ino_t) (uintptr_t) dn, npp); +@@ -75,15 +85,19 @@ diskfs_free_node (struct node *np, mode_t mode) + free (np->dn->u.lnk); + break; + } ++ ++ pthread_rwlock_wrlock (&all_nodes_lock); + *np->dn->hprevp = np->dn->hnext; + if (np->dn->hnext != 0) + np->dn->hnext->dn->hprevp = np->dn->hprevp; + all_nodes_nr_items -= 1; ++ pthread_rwlock_unlock (&all_nodes_lock); ++ + free (np->dn); + np->dn = 0; + +- --num_files; +- tmpfs_space_used -= sizeof *np->dn; ++ __atomic_sub_fetch (&num_files, 1, __ATOMIC_RELAXED); ++ adjust_used (-sizeof *np->dn); + } + + void +@@ -117,14 +131,6 @@ diskfs_node_norefs (struct node *np) + np->dn->u.chr = np->dn_stat.st_rdev; + break; + } +- +- /* Remove this node from the cache list rooted at `all_nodes'. */ +- *np->dn->hprevp = np->dn->hnext; +- if (np->dn->hnext != 0) +- np->dn->hnext->dn->hprevp = np->dn->hprevp; +- all_nodes_nr_items -= 1; +- np->dn->hnext = 0; +- np->dn->hprevp = 0; + } + + free (np); +@@ -167,30 +173,34 @@ diskfs_cached_lookup (ino_t inum, struct node **npp) + + assert (npp); + ++ pthread_rwlock_rdlock (&all_nodes_lock); + if (dn->hprevp != 0) /* There is already a node. */ +- { +- np = *dn->hprevp; +- assert (np->dn == dn); +- assert (*dn->hprevp == np); +- +- diskfs_nref (np); +- } ++ goto gotit; + else + /* Create the new node. */ + { + struct stat *st; ++ pthread_rwlock_unlock (&all_nodes_lock); + + np = diskfs_make_node (dn); + np->cache_id = (ino_t) (uintptr_t) dn; + +- pthread_spin_lock (&diskfs_node_refcnt_lock); ++ pthread_rwlock_wrlock (&all_nodes_lock); ++ if (dn->hprevp != NULL) ++ { ++ /* We lost a race. */ ++ diskfs_nrele (np); ++ goto gotit; ++ } ++ + dn->hnext = all_nodes; + if (dn->hnext) + dn->hnext->dn->hprevp = &dn->hnext; + dn->hprevp = &all_nodes; + all_nodes = np; + all_nodes_nr_items += 1; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ diskfs_nref_light (np); ++ pthread_rwlock_unlock (&all_nodes_lock); + + st = &np->dn_stat; + memset (st, 0, sizeof *st); +@@ -220,6 +230,16 @@ diskfs_cached_lookup (ino_t inum, struct node **npp) + pthread_mutex_lock (&np->lock); + *npp = np; + return 0; ++ ++ gotit: ++ np = *dn->hprevp; ++ assert (np->dn == dn); ++ assert (*dn->hprevp == np); ++ diskfs_nref (np); ++ pthread_rwlock_unlock (&all_nodes_lock); ++ pthread_mutex_lock (&np->lock); ++ *npp = np; ++ return 0; + } + + error_t +@@ -229,12 +249,12 @@ diskfs_node_iterate (error_t (*fun) (struct node *)) + size_t num_nodes; + struct node *node, **node_list, **p; + +- pthread_spin_lock (&diskfs_node_refcnt_lock); ++ pthread_rwlock_rdlock (&all_nodes_lock); + + /* We must copy everything from the hash table into another data structure + to avoid running into any problems with the hash-table being modified + during processing (normally we delegate access to hash-table with +- diskfs_node_refcnt_lock, but we can't hold this while locking the ++ all_nodes_lock, but we can't hold this while locking the + individual node locks). */ + + num_nodes = all_nodes_nr_items; +@@ -243,10 +263,14 @@ diskfs_node_iterate (error_t (*fun) (struct node *)) + for (node = all_nodes; node != 0; node = node->dn->hnext) + { + *p++ = node; ++ ++ /* We acquire a hard reference for node, but without using ++ diskfs_nref. We do this so that diskfs_new_hardrefs will not ++ get called. */ + node->references++; + } + +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ pthread_rwlock_unlock (&all_nodes_lock); + + p = node_list; + while (num_nodes-- > 0) +@@ -272,6 +296,31 @@ diskfs_node_iterate (error_t (*fun) (struct node *)) + void + diskfs_try_dropping_softrefs (struct node *np) + { ++ pthread_rwlock_wrlock (&all_nodes_lock); ++ if (np->cache_id != 0) ++ { ++ /* Check if someone reacquired a reference. */ ++ unsigned int references; ++ pthread_spin_lock (&diskfs_node_refcnt_lock); ++ references = np->references; ++ pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ ++ /* An additional reference is acquired by libdiskfs across calls ++ to diskfs_try_dropping_softrefs. */ ++ if (references > 1) ++ { ++ /* A reference was reacquired. It's fine, we didn't touch ++ anything yet. */ ++ pthread_rwlock_unlock (&all_nodes_lock); ++ return; ++ } ++ ++ /* Just let go of the weak reference. The node will be removed ++ from all_nodes in diskfs_free_node. */ ++ np->cache_id = 0; ++ diskfs_nrele_light (np); ++ } ++ pthread_rwlock_unlock (&all_nodes_lock); + } + + /* The user must define this funcction. Node NP has some light +@@ -447,7 +496,7 @@ diskfs_grow (struct node *np, off_t size, struct protid *cred) + + off_t set_size = size; + size = round_page (size); +- if (round_page (tmpfs_space_used + size - np->allocsize) ++ if (round_page (get_used () + size - np->allocsize) + / vm_page_size > tmpfs_page_limit) + return ENOSPC; + +diff --git a/tmpfs/tmpfs.c b/tmpfs/tmpfs.c +index 718c6d8..0aace25 100644 +--- a/tmpfs/tmpfs.c ++++ b/tmpfs/tmpfs.c +@@ -67,10 +67,8 @@ diskfs_set_statfs (struct statfs *st) + st->f_bsize = vm_page_size; + st->f_blocks = tmpfs_page_limit; + +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- st->f_files = num_files; +- pages = round_page (tmpfs_space_used) / vm_page_size; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ st->f_files = __atomic_load_n (&num_files, __ATOMIC_RELAXED); ++ pages = round_page (get_used ()) / vm_page_size; + + st->f_bfree = pages < tmpfs_page_limit ? tmpfs_page_limit - pages : 0; + st->f_bavail = st->f_bfree; +diff --git a/tmpfs/tmpfs.h b/tmpfs/tmpfs.h +index b3c636d..ad47200 100644 +--- a/tmpfs/tmpfs.h ++++ b/tmpfs/tmpfs.h +@@ -69,17 +69,25 @@ struct tmpfs_dirent + char name[0]; + }; + +-extern unsigned int num_files; +-extern off_t tmpfs_page_limit, tmpfs_space_used; +- ++extern off_t tmpfs_page_limit; + extern mach_port_t default_pager; + ++/* These two must be accessed using atomic operations. */ ++extern unsigned int num_files; ++extern off_t tmpfs_space_used; ++ ++/* Convenience function to adjust tmpfs_space_used. */ + static inline void + adjust_used (off_t change) + { +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- tmpfs_space_used += change; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ __atomic_add_fetch (&num_files, change, __ATOMIC_RELAXED); ++} ++ ++/* Convenience function to get tmpfs_space_used. */ ++static inline off_t ++get_used (void) ++{ ++ return __atomic_load_n (&num_files, __ATOMIC_RELAXED); + } + + #endif +-- +2.0.0 + diff --git a/debian/patches/0008-libdiskfs-lock-less-reference-counting-of-nodes.patch b/debian/patches/0008-libdiskfs-lock-less-reference-counting-of-nodes.patch new file mode 100644 index 00000000..54ee5cab --- /dev/null +++ b/debian/patches/0008-libdiskfs-lock-less-reference-counting-of-nodes.patch @@ -0,0 +1,608 @@ +From 311f71cb70dbe335ad8f8e8bf1141ee52bb3ae4f Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Wed, 14 May 2014 11:19:35 +0200 +Subject: [PATCH 08/10] libdiskfs: lock-less reference counting of nodes + +* libdiskfs/diskfs.h (struct node): Use refcounts_t for reference counting. +(diskfs_node_refcnt_lock): Remove. +(diskfs_node_norefs,diskfs_drop_node): Change comments accordingly. +* libdiskfs/init-init.c: Likewise. +* libdiskfs/node-drop.c: Likewise. +* libdiskfs/node-make.c: Likewise. +* libdiskfs/node-nput.c: Likewise. +* libdiskfs/node-nputl.c: Likewise. +* libdiskfs/node-nref.c: Likewise. +* libdiskfs/node-nrefl.c: Likewise. +* libdiskfs/node-nrele.c: Likewise. +* libdiskfs/node-nrelel.c: Likewise. +* ext2fs/inode.c: Likewise. +* fatfs/inode.c: Likewise. +* isofs/inode.c: Likewise. +* tmpfs/node.c: Likewise. +* doc/hurd.texi: Likewise. +--- + doc/hurd.texi | 11 ++-------- + ext2fs/inode.c | 15 +++++--------- + fatfs/inode.c | 27 ++++++++----------------- + isofs/inode.c | 13 ++++-------- + libdiskfs/diskfs.h | 15 ++++++-------- + libdiskfs/init-init.c | 2 -- + libdiskfs/node-drop.c | 9 +++------ + libdiskfs/node-make.c | 3 +-- + libdiskfs/node-nput.c | 54 +++++++++++++++++++------------------------------ + libdiskfs/node-nputl.c | 12 ++++------- + libdiskfs/node-nref.c | 9 +++------ + libdiskfs/node-nrefl.c | 4 +--- + libdiskfs/node-nrele.c | 48 +++++++++++++++++++++---------------------- + libdiskfs/node-nrelel.c | 9 +++------ + tmpfs/node.c | 15 +++++--------- + 15 files changed, 89 insertions(+), 157 deletions(-) + +diff --git a/doc/hurd.texi b/doc/hurd.texi +index 07ddfb4..6cafdb9 100644 +--- a/doc/hurd.texi ++++ b/doc/hurd.texi +@@ -3780,10 +3780,6 @@ new thread and (eventually) get rid of the old one; the old thread won't + do any more syncs, regardless. + @end deftypefun + +-@deftypevar spin_lock_t diskfs_node_refcnt_lock +-Pager reference count lock. +-@end deftypevar +- + @deftypevar int diskfs_readonly + Set to zero if the filesystem is currently writable. + @end deftypevar +@@ -3818,9 +3814,7 @@ Every file or directory is a diskfs @dfn{node}. The following functions + help your diskfs callbacks manage nodes and their references: + + @deftypefun void diskfs_drop_node (@w{struct node *@var{np}}) +-Node @var{np} now has no more references; clean all state. The +-@var{diskfs_node_refcnt_lock} must be held, and will be released upon +-return. @var{np} must be locked. ++Node @var{np} now has no more references; clean all state. + @end deftypefun + + @deftypefun void diskfs_node_update (@w{struct node *@var{np}}, @w{int @var{wait}}) +@@ -4236,14 +4230,13 @@ without real users. + @deftypefun void diskfs_try_dropping_softrefs (@w{struct node *@var{np}}) + Node @var{np} has some light references, but has just lost its last hard + references. Take steps so that if any light references can be freed, +-they are. Both @var{diskfs_node_refcnt_lock} and @var{np} are locked. ++they are. @var{np} is locked. + This function will be called after @code{diskfs_lost_hardrefs}. + @end deftypefun + + @deftypefun void diskfs_node_norefs (@w{struct node *@var{np}}) + Node @var{np} has no more references; free local state, including + @code{*@var{np}} if it shouldn't be retained. +-@var{diskfs_node_refcnt_lock} is held. + @end deftypefun + + @deftypefun error_t diskfs_set_hypermetadata (@w{int @var{wait}}, @w{int @var{clean}}) +diff --git a/ext2fs/inode.c b/ext2fs/inode.c +index 67c502a..714d1a2 100644 +--- a/ext2fs/inode.c ++++ b/ext2fs/inode.c +@@ -57,7 +57,6 @@ + through the nodehash. */ + static struct node *nodehash[INOHSZ]; + static size_t nodehash_nr_items; +-/* nodecache_lock must be acquired before diskfs_node_refcnt_lock. */ + static pthread_rwlock_t nodecache_lock = PTHREAD_RWLOCK_INITIALIZER; + + static error_t read_node (struct node *np); +@@ -207,14 +206,10 @@ diskfs_try_dropping_softrefs (struct node *np) + { + /* Check if someone reacquired a reference through the + nodehash. */ +- unsigned int references; +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- references = np->references; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- +- /* An additional reference is acquired by libdiskfs across calls +- to diskfs_try_dropping_softrefs. */ +- if (references > 1) ++ struct references result; ++ refcounts_references (&np->refcounts, &result); ++ ++ if (result.hard > 0) + { + /* A reference was reacquired through a hash table lookup. + It's fine, we didn't touch anything yet. */ +@@ -635,7 +630,7 @@ diskfs_node_iterate (error_t (*fun)(struct node *)) + /* We acquire a hard reference for node, but without using + diskfs_nref. We do this so that diskfs_new_hardrefs will not + get called. */ +- node->references++; ++ refcounts_ref (&node->refcounts, NULL); + } + + pthread_rwlock_unlock (&nodecache_lock); +diff --git a/fatfs/inode.c b/fatfs/inode.c +index c3997d0..0cdca5e 100644 +--- a/fatfs/inode.c ++++ b/fatfs/inode.c +@@ -55,7 +55,6 @@ + through the nodehash. */ + static struct node *nodehash[INOHSZ]; + static size_t nodehash_nr_items; +-/* nodecache_lock must be acquired before diskfs_node_refcnt_lock. */ + static pthread_rwlock_t nodecache_lock = PTHREAD_RWLOCK_INITIALIZER; + + static error_t read_node (struct node *np, vm_address_t buf); +@@ -254,14 +253,8 @@ diskfs_node_norefs (struct node *np) + if (np->dn->translator) + free (np->dn->translator); + +- /* It is safe to unlock diskfs_node_refcnt_lock here for a while because +- all references to the node have been deleted. */ + if (np->dn->dirnode) +- { +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- diskfs_nrele (np->dn->dirnode); +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- } ++ diskfs_nrele (np->dn->dirnode); + + assert (!np->dn->pager); + +@@ -279,14 +272,10 @@ diskfs_try_dropping_softrefs (struct node *np) + { + /* Check if someone reacquired a reference through the + nodehash. */ +- unsigned int references; +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- references = np->references; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- +- /* An additional reference is acquired by libdiskfs across calls +- to diskfs_try_dropping_softrefs. */ +- if (references > 1) ++ struct references result; ++ refcounts_references (&np->refcounts, &result); ++ ++ if (result.hard > 0) + { + /* A reference was reacquired through a hash table lookup. + It's fine, we didn't touch anything yet. */ +@@ -391,7 +380,7 @@ read_node (struct node *np, vm_address_t buf) + /* Files in fatfs depend on the directory that hold the file. */ + np->dn->dirnode = dp; + if (dp) +- dp->references++; ++ refcounts_ref (&dp->refcounts, NULL); + + pthread_rwlock_rdlock (&np->dn->dirent_lock); + +@@ -626,7 +615,7 @@ diskfs_node_iterate (error_t (*fun)(struct node *)) + /* We acquire a hard reference for node, but without using + diskfs_nref. We do this so that diskfs_new_hardrefs will not + get called. */ +- node->references++; ++ refcounts_ref (&node->refcounts, NULL); + } + + pthread_rwlock_unlock (&nodecache_lock); +@@ -837,7 +826,7 @@ diskfs_alloc_node (struct node *dir, mode_t mode, struct node **node) + + /* FIXME: We know that readnode couldn't put this in. */ + np->dn->dirnode = dir; +- dir->references++; ++ refcounts_ref (&dir->refcounts, NULL); + + *node = np; + return 0; +diff --git a/isofs/inode.c b/isofs/inode.c +index 3941580..4c75510 100644 +--- a/isofs/inode.c ++++ b/isofs/inode.c +@@ -60,7 +60,6 @@ struct node_cache + static int node_cache_size = 0; + static int node_cache_alloced = 0; + struct node_cache *node_cache = 0; +-/* nodecache_lock must be acquired before diskfs_node_refcnt_lock. */ + static pthread_rwlock_t nodecache_lock = PTHREAD_RWLOCK_INITIALIZER; + + /* Forward */ +@@ -564,14 +563,10 @@ diskfs_try_dropping_softrefs (struct node *np) + + /* Check if someone reacquired a reference through the + node_cache. */ +- unsigned int references; +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- references = np->references; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- +- /* An additional reference is acquired by libdiskfs across calls +- to diskfs_try_dropping_softrefs. */ +- if (references > 1) ++ struct references result; ++ refcounts_references (&np->refcounts, &result); ++ ++ if (result.hard > 0) + { + /* A reference was reacquired through a hash table lookup. + It's fine, we didn't touch anything yet. */ +diff --git a/libdiskfs/diskfs.h b/libdiskfs/diskfs.h +index e59ba99..c756eed 100644 +--- a/libdiskfs/diskfs.h ++++ b/libdiskfs/diskfs.h +@@ -96,8 +96,7 @@ struct node + + pthread_mutex_t lock; + +- int references; /* hard references */ +- int light_references; /* light references */ ++ refcounts_t refcounts; + + mach_port_t sockaddr; /* address for S_IFSOCK shortcut */ + +@@ -198,8 +197,6 @@ extern volatile struct mapped_time_value *diskfs_mtime; + be done by format independent code. */ + extern int diskfs_synchronous; + +-extern pthread_spinlock_t diskfs_node_refcnt_lock; +- + extern int pager_port_type; + + /* Whether the filesystem is currently writable or not. */ +@@ -448,14 +445,15 @@ error_t diskfs_alloc_node (struct node *dp, mode_t mode, struct node **np); + void diskfs_free_node (struct node *np, mode_t mode); + + /* Node NP has no more references; free local state, including *NP +- if it isn't to be retained. diskfs_node_refcnt_lock is held. */ ++ if it isn't to be retained. */ + void diskfs_node_norefs (struct node *np); + + /* The user must define this function. Node NP has some light + references, but has just lost its last hard references. Take steps + so that if any light references can be freed, they are. NP is locked + as is the pager refcount lock. This function will be called after +- diskfs_lost_hardrefs. */ ++ diskfs_lost_hardrefs. An additional light reference is acquired by ++ libdiskfs across calls to this function. */ + void diskfs_try_dropping_softrefs (struct node *np); + + /* The user must define this funcction. Node NP has some light +@@ -611,9 +609,8 @@ void diskfs_spawn_first_thread (ports_demuxer_type demuxer); + diskfs_init_completed once it has a valid proc and auth port. */ + void diskfs_start_bootstrap (); + +-/* Node NP now has no more references; clean all state. The +- _diskfs_node_refcnt_lock must be held, and will be released +- upon return. NP must be locked. */ ++/* Node NP now has no more references; clean all state. NP must be ++ locked. */ + void diskfs_drop_node (struct node *np); + + /* Set on disk fields from NP->dn_stat; update ctime, atime, and mtime +diff --git a/libdiskfs/init-init.c b/libdiskfs/init-init.c +index 7a7f248..6c94faa 100644 +--- a/libdiskfs/init-init.c ++++ b/libdiskfs/init-init.c +@@ -41,8 +41,6 @@ int _diskfs_noatime; + + struct hurd_port _diskfs_exec_portcell; + +-pthread_spinlock_t diskfs_node_refcnt_lock = PTHREAD_SPINLOCK_INITIALIZER; +- + pthread_spinlock_t _diskfs_control_lock = PTHREAD_SPINLOCK_INITIALIZER; + int _diskfs_ncontrol_ports; + +diff --git a/libdiskfs/node-drop.c b/libdiskfs/node-drop.c +index 83eb590..fab3cfa 100644 +--- a/libdiskfs/node-drop.c ++++ b/libdiskfs/node-drop.c +@@ -31,9 +31,8 @@ free_modreqs (struct modreq *mr) + } + + +-/* Node NP now has no more references; clean all state. The +- diskfs_node_refcnt_lock must be held, and will be released +- upon return. NP must be locked. */ ++/* Node NP now has no more references; clean all state. NP must be ++ locked. */ + void + diskfs_drop_node (struct node *np) + { +@@ -60,8 +59,7 @@ diskfs_drop_node (struct node *np) + and an nput. The next time through, this routine + will notice that the size is zero, and not have to + do anything. */ +- np->references++; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ refcounts_ref (&np->refcounts, NULL); + diskfs_truncate (np, 0); + + /* Force allocsize to zero; if truncate consistently fails this +@@ -94,5 +92,4 @@ diskfs_drop_node (struct node *np) + assert (!np->sockaddr); + + diskfs_node_norefs (np); +- pthread_spin_unlock (&diskfs_node_refcnt_lock); + } +diff --git a/libdiskfs/node-make.c b/libdiskfs/node-make.c +index ff0cc0d..c7ca3b0 100644 +--- a/libdiskfs/node-make.c ++++ b/libdiskfs/node-make.c +@@ -29,8 +29,7 @@ init_node (struct node *np, struct disknode *dn) + np->dn_stat_dirty = 0; + + pthread_mutex_init (&np->lock, NULL); +- np->references = 1; +- np->light_references = 0; ++ refcounts_init (&np->refcounts, 1, 0); + np->owner = 0; + np->sockaddr = MACH_PORT_NULL; + +diff --git a/libdiskfs/node-nput.c b/libdiskfs/node-nput.c +index 5043ad1..2935ae2 100644 +--- a/libdiskfs/node-nput.c ++++ b/libdiskfs/node-nput.c +@@ -26,56 +26,44 @@ + void + diskfs_nput (struct node *np) + { +- int tried_drop_softrefs = 0; ++ struct references result; + +- loop: +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- assert (np->references); +- np->references--; +- if (np->references + np->light_references == 0) +- diskfs_drop_node (np); +- else if (np->references == 0 && !tried_drop_softrefs) +- { +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ /* While we call the diskfs_try_dropping_softrefs, we need to hold ++ one reference. We use a weak reference for this purpose, which ++ we acquire by demoting our hard reference to a weak one. */ ++ refcounts_demote (&np->refcounts, &result); + ++ if (result.hard == 0) ++ { + /* This is our cue that something akin to "last process closes file" + in the POSIX.1 sense happened, so make sure any pending node time + updates now happen in a timely fashion. */ + diskfs_set_node_times (np); +- + diskfs_lost_hardrefs (np); + if (!np->dn_stat.st_nlink) + { +- /* There are no links. If there are soft references that +- can be dropped, we can't let them postpone deallocation. +- So attempt to drop them. But that's a user-supplied +- routine, which might result in further recursive calls to +- the ref-counting system. So we have to reacquire our +- reference around the call to forestall disaster. */ +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- np->references++; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- + if (np->sockaddr != MACH_PORT_NULL) + { + mach_port_deallocate (mach_task_self (), np->sockaddr); + np->sockaddr = MACH_PORT_NULL; + } + ++ /* There are no links. If there are soft references that ++ can be dropped, we can't let them postpone deallocation. ++ So attempt to drop them. But that's a user-supplied ++ routine, which might result in further recursive calls to ++ the ref-counting system. This is not a problem, as we ++ hold a weak reference ourselves. */ + diskfs_try_dropping_softrefs (np); +- +- /* But there's no value in looping forever in this +- routine; only try to drop soft refs once. */ +- tried_drop_softrefs = 1; +- +- /* Now we can drop the reference back... */ +- goto loop; + } + pthread_mutex_unlock (&np->lock); + } +- else +- { +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- pthread_mutex_unlock (&np->lock); +- } ++ ++ /* Finally get rid of our reference. */ ++ refcounts_deref_weak (&np->refcounts, &result); ++ ++ if (result.hard == 0 && result.weak == 0) ++ diskfs_drop_node (np); ++ ++ pthread_mutex_unlock (&np->lock); + } +diff --git a/libdiskfs/node-nputl.c b/libdiskfs/node-nputl.c +index 1959665..8dac16e 100644 +--- a/libdiskfs/node-nputl.c ++++ b/libdiskfs/node-nputl.c +@@ -25,14 +25,10 @@ + void + diskfs_nput_light (struct node *np) + { +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- assert (np->light_references); +- np->light_references--; +- if (np->references + np->light_references == 0) ++ struct references result; ++ refcounts_deref_weak (&np->refcounts, &result); ++ if (result.hard == 0 && result.weak == 0) + diskfs_drop_node (np); + else +- { +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- pthread_mutex_unlock (&np->lock); +- } ++ pthread_mutex_unlock (&np->lock); + } +diff --git a/libdiskfs/node-nref.c b/libdiskfs/node-nref.c +index 13cea05..766a69c 100644 +--- a/libdiskfs/node-nref.c ++++ b/libdiskfs/node-nref.c +@@ -26,12 +26,9 @@ + void + diskfs_nref (struct node *np) + { +- int new_hardref; +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- np->references++; +- new_hardref = (np->references == 1); +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- if (new_hardref) ++ struct references result; ++ refcounts_ref (&np->refcounts, &result); ++ if (result.hard == 1) + { + pthread_mutex_lock (&np->lock); + diskfs_new_hardrefs (np); +diff --git a/libdiskfs/node-nrefl.c b/libdiskfs/node-nrefl.c +index 9692247..f7a823d 100644 +--- a/libdiskfs/node-nrefl.c ++++ b/libdiskfs/node-nrefl.c +@@ -24,7 +24,5 @@ + void + diskfs_nref_light (struct node *np) + { +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- np->light_references++; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ refcounts_ref_weak (&np->refcounts, NULL); + } +diff --git a/libdiskfs/node-nrele.c b/libdiskfs/node-nrele.c +index cc68089..d962846 100644 +--- a/libdiskfs/node-nrele.c ++++ b/libdiskfs/node-nrele.c +@@ -28,38 +28,36 @@ + void + diskfs_nrele (struct node *np) + { +- int tried_drop_softrefs = 0; ++ struct references result; + +- loop: +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- assert (np->references); +- np->references--; +- if (np->references + np->light_references == 0) +- { +- pthread_mutex_lock (&np->lock); +- diskfs_drop_node (np); +- } +- else if (np->references == 0) ++ /* While we call the diskfs_try_dropping_softrefs, we need to hold ++ one reference. We use a weak reference for this purpose, which ++ we acquire by demoting our hard reference to a weak one. */ ++ refcounts_demote (&np->refcounts, &result); ++ ++ if (result.hard == 0) + { + pthread_mutex_lock (&np->lock); +- pthread_spin_unlock (&diskfs_node_refcnt_lock); + diskfs_lost_hardrefs (np); +- if (!np->dn_stat.st_nlink && !tried_drop_softrefs) ++ if (!np->dn_stat.st_nlink) + { +- /* Same issue here as in nput; see that for explanation */ +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- np->references++; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- ++ /* There are no links. If there are soft references that ++ can be dropped, we can't let them postpone deallocation. ++ So attempt to drop them. But that's a user-supplied ++ routine, which might result in further recursive calls to ++ the ref-counting system. This is not a problem, as we ++ hold a weak reference ourselves. */ + diskfs_try_dropping_softrefs (np); +- tried_drop_softrefs = 1; +- +- /* Now we can drop the reference back... */ +- pthread_mutex_unlock (&np->lock); +- goto loop; + } + pthread_mutex_unlock (&np->lock); + } +- else +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ ++ /* Finally get rid of our reference. */ ++ refcounts_deref_weak (&np->refcounts, &result); ++ ++ if (result.hard == 0 && result.weak == 0) ++ { ++ pthread_mutex_lock (&np->lock); ++ diskfs_drop_node (np); ++ } + } +diff --git a/libdiskfs/node-nrelel.c b/libdiskfs/node-nrelel.c +index ee53b22..dc4f920 100644 +--- a/libdiskfs/node-nrelel.c ++++ b/libdiskfs/node-nrelel.c +@@ -26,14 +26,11 @@ + void + diskfs_nrele_light (struct node *np) + { +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- assert (np->light_references); +- np->light_references--; +- if (np->references + np->light_references == 0) ++ struct references result; ++ refcounts_deref_weak (&np->refcounts, &result); ++ if (result.hard == 0 && result.weak == 0) + { + pthread_mutex_lock (&np->lock); + diskfs_drop_node (np); + } +- else +- pthread_spin_unlock (&diskfs_node_refcnt_lock); + } +diff --git a/tmpfs/node.c b/tmpfs/node.c +index 24ad0bd..bbc5ba8 100644 +--- a/tmpfs/node.c ++++ b/tmpfs/node.c +@@ -40,7 +40,6 @@ static unsigned int gen; + reference. */ + struct node *all_nodes; + static size_t all_nodes_nr_items; +-/* all_nodes_lock must be acquired before diskfs_node_refcnt_lock. */ + pthread_rwlock_t all_nodes_lock = PTHREAD_RWLOCK_INITIALIZER; + + error_t +@@ -267,7 +266,7 @@ diskfs_node_iterate (error_t (*fun) (struct node *)) + /* We acquire a hard reference for node, but without using + diskfs_nref. We do this so that diskfs_new_hardrefs will not + get called. */ +- node->references++; ++ refcounts_ref (&node->refcounts, NULL); + } + + pthread_rwlock_unlock (&all_nodes_lock); +@@ -300,14 +299,10 @@ diskfs_try_dropping_softrefs (struct node *np) + if (np->cache_id != 0) + { + /* Check if someone reacquired a reference. */ +- unsigned int references; +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- references = np->references; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- +- /* An additional reference is acquired by libdiskfs across calls +- to diskfs_try_dropping_softrefs. */ +- if (references > 1) ++ struct references result; ++ refcounts_references (&np->refcounts, &result); ++ ++ if (result.hard > 0) + { + /* A reference was reacquired. It's fine, we didn't touch + anything yet. */ +-- +2.0.0 + diff --git a/debian/patches/0009-libports-use-a-global-hash-table-for-the-lookups.patch b/debian/patches/0009-libports-use-a-global-hash-table-for-the-lookups.patch new file mode 100644 index 00000000..c0ace80f --- /dev/null +++ b/debian/patches/0009-libports-use-a-global-hash-table-for-the-lookups.patch @@ -0,0 +1,660 @@ +From 649249db6e93964996fb7d3867904346fed5af00 Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Sat, 3 May 2014 03:53:41 +0200 +Subject: [PATCH 09/10] libports: use a global hash table for the lookups + +Previously, libports used a hash table per port bucket. This makes +looking up a port difficult if one does not know the port bucket, as +one has to iterate over all buckets and do a hash table lookup each. + +Having to iterate over the buckets makes it necessary to keep a list +of all buckets, which has to be updated and protected by a lock as +well. + +Also, the current code in _ports_bucket_class_iterate iterates over +the hash table associated with the bucket given. When +ports_class_iterate calls this common function, it obtains a reference +to the bucket from one of the ports in the given class. This will not +work if a class contains ports in different port buckets. This +limitation is not documented as far as I can see. Again, having to +maintain this list has its cost and requires serialization. + +Use a global hash table for lookups instead. Keep the per-bucket hash +tables for efficient iteration over buckets. Furthermore, serialize +access to all hash tables using a separate lock. Remove the linked +lists of all buckets and all ports in a class. + +* libports/bucket-iterate.c (ports_bucket_iterate): Acquire +_ports_htable_lock. Also, generalize ports_bucket_iterate so that it +takes a pointer to a hash table as first argument. +(ports_bucket_iterate): Ajust call to former function accordingly. +* libports/class-iterate.c (ports_class_iterate): Just call the +generalized _ports_bucket_class_iterate with the global hash table as +argument. +* libports/ports.h (struct port_info): Remove the port class links. +(struct port_bucket): Remove the hash table, and the all buckets link. +(_ports_all_buckets): Remove declaration. +(_ports_htable): New global hash table. +(_ports_htable_lock): Protected by this lock. +* libports/claim-right.c: Adjust accordingly. +* libports/complete-deallocate.c: Likewise. +* libports/create-bucket.c: Likewise. +* libports/create-class.c: Likewise. +* libports/create-internal.c: Likewise. +* libports/destroy-right.c: Likewise. +* libports/import-port.c: Likewise. +* libports/lookup-port.c: Likewise. +* libports/reallocate-from-external.c: Likewise. +* libports/reallocate-port.c: Likewise. +* libports/transfer-right.c: Likewise. +* libports/inhibit-all-rpcs.c: Iterate over the hash table. +* libports/inhibit-bucket-rpcs.c: Likewise, but filter using bucket. +* libports/inhibit-class-rpcs.c: Likewise, but filter using class. +* libports/init.c (_ports_htable): Initialize. +(_ports_htable_lock): Likewise. +--- + libports/bucket-iterate.c | 16 +++++++++------- + libports/claim-right.c | 5 ++++- + libports/class-iterate.c | 10 +--------- + libports/complete-deallocate.c | 7 +++---- + libports/create-bucket.c | 6 ------ + libports/create-class.c | 1 - + libports/create-internal.c | 19 +++++++++++++------ + libports/destroy-right.c | 5 +++-- + libports/import-port.c | 19 +++++++++++++------ + libports/inhibit-all-rpcs.c | 27 +++++++++++++-------------- + libports/inhibit-bucket-rpcs.c | 3 ++- + libports/inhibit-class-rpcs.c | 27 ++++++++++++++++++--------- + libports/init.c | 7 ++++++- + libports/lookup-port.c | 23 +++++++++-------------- + libports/ports.h | 22 +++++++++++++++++----- + libports/reallocate-from-external.c | 15 +++++++++++---- + libports/reallocate-port.c | 9 ++++++++- + libports/transfer-right.c | 18 ++++++++++++++---- + 18 files changed, 144 insertions(+), 95 deletions(-) + +diff --git a/libports/bucket-iterate.c b/libports/bucket-iterate.c +index 2d1b00d..79b6d72 100644 +--- a/libports/bucket-iterate.c ++++ b/libports/bucket-iterate.c +@@ -25,7 +25,7 @@ + /* Internal entrypoint for both ports_bucket_iterate and ports_class_iterate. + If CLASS is non-null, call FUN only for ports in that class. */ + error_t +-_ports_bucket_class_iterate (struct port_bucket *bucket, ++_ports_bucket_class_iterate (struct hurd_ihash *ht, + struct port_class *class, + error_t (*fun)(void *)) + { +@@ -36,23 +36,24 @@ _ports_bucket_class_iterate (struct port_bucket *bucket, + error_t err; + + pthread_mutex_lock (&_ports_lock); ++ pthread_rwlock_rdlock (&_ports_htable_lock); + +- if (bucket->htable.nr_items == 0) ++ if (ht->nr_items == 0) + { +- pthread_mutex_unlock (&_ports_lock); ++ pthread_rwlock_unlock (&_ports_htable_lock); + return 0; + } + +- nr_items = bucket->htable.nr_items; ++ nr_items = ht->nr_items; + p = malloc (nr_items * sizeof *p); + if (p == NULL) + { +- pthread_mutex_unlock (&_ports_lock); ++ pthread_rwlock_unlock (&_ports_htable_lock); + return ENOMEM; + } + + n = 0; +- HURD_IHASH_ITERATE (&bucket->htable, arg) ++ HURD_IHASH_ITERATE (ht, arg) + { + struct port_info *const pi = arg; + +@@ -63,6 +64,7 @@ _ports_bucket_class_iterate (struct port_bucket *bucket, + n++; + } + } ++ pthread_rwlock_unlock (&_ports_htable_lock); + pthread_mutex_unlock (&_ports_lock); + + if (n != 0 && n != nr_items) +@@ -89,5 +91,5 @@ error_t + ports_bucket_iterate (struct port_bucket *bucket, + error_t (*fun)(void *)) + { +- return _ports_bucket_class_iterate (bucket, 0, fun); ++ return _ports_bucket_class_iterate (&bucket->htable, NULL, fun); + } +diff --git a/libports/claim-right.c b/libports/claim-right.c +index 4851ea3..85592ff 100644 +--- a/libports/claim-right.c ++++ b/libports/claim-right.c +@@ -34,10 +34,13 @@ ports_claim_right (void *portstruct) + if (ret == MACH_PORT_NULL) + return ret; + +- pthread_mutex_lock (&_ports_lock); ++ pthread_rwlock_wrlock (&_ports_htable_lock); ++ hurd_ihash_locp_remove (&_ports_htable, pi->ports_htable_entry); + hurd_ihash_locp_remove (&pi->bucket->htable, pi->hentry); ++ pthread_rwlock_unlock (&_ports_htable_lock); + err = mach_port_move_member (mach_task_self (), ret, MACH_PORT_NULL); + assert_perror (err); ++ pthread_mutex_lock (&_ports_lock); + pi->port_right = MACH_PORT_NULL; + if (pi->flags & PORT_HAS_SENDRIGHTS) + { +diff --git a/libports/class-iterate.c b/libports/class-iterate.c +index 1f8878a..df33818 100644 +--- a/libports/class-iterate.c ++++ b/libports/class-iterate.c +@@ -23,13 +23,5 @@ error_t + ports_class_iterate (struct port_class *class, + error_t (*fun)(void *)) + { +- pthread_mutex_lock (&_ports_lock); +- if (class->ports != 0) +- { +- struct port_bucket *bucket = class->ports->bucket; +- pthread_mutex_unlock (&_ports_lock); +- return _ports_bucket_class_iterate (bucket, class, fun); +- } +- pthread_mutex_unlock (&_ports_lock); +- return 0; ++ return _ports_bucket_class_iterate (&_ports_htable, class, fun); + } +diff --git a/libports/complete-deallocate.c b/libports/complete-deallocate.c +index 8ce095b..4768dab 100644 +--- a/libports/complete-deallocate.c ++++ b/libports/complete-deallocate.c +@@ -29,16 +29,15 @@ _ports_complete_deallocate (struct port_info *pi) + + if (pi->port_right) + { ++ pthread_rwlock_wrlock (&_ports_htable_lock); ++ hurd_ihash_locp_remove (&_ports_htable, pi->ports_htable_entry); + hurd_ihash_locp_remove (&pi->bucket->htable, pi->hentry); ++ pthread_rwlock_unlock (&_ports_htable_lock); + mach_port_mod_refs (mach_task_self (), pi->port_right, + MACH_PORT_RIGHT_RECEIVE, -1); + pi->port_right = MACH_PORT_NULL; + } + +- *pi->prevp = pi->next; +- if (pi->next) +- pi->next->prevp = pi->prevp; +- + pi->bucket->count--; + pi->class->count--; + +diff --git a/libports/create-bucket.c b/libports/create-bucket.c +index 52d50c3..2c5f1b6 100644 +--- a/libports/create-bucket.c ++++ b/libports/create-bucket.c +@@ -48,11 +48,5 @@ ports_create_bucket () + + hurd_ihash_init (&ret->htable, offsetof (struct port_info, hentry)); + ret->rpcs = ret->flags = ret->count = 0; +- +- pthread_mutex_lock (&_ports_lock); +- ret->next = _ports_all_buckets; +- _ports_all_buckets = ret; +- pthread_mutex_unlock (&_ports_lock); +- + return ret; + } +diff --git a/libports/create-class.c b/libports/create-class.c +index 12c8add..782f52b 100644 +--- a/libports/create-class.c ++++ b/libports/create-class.c +@@ -39,7 +39,6 @@ ports_create_class (void (*clean_routine)(void *), + cl->dropweak_routine = dropweak_routine; + cl->flags = 0; + cl->rpcs = 0; +- cl->ports = NULL; + cl->count = 0; + cl->uninhibitable_rpcs = ports_default_uninhibitable_rpcs; + +diff --git a/libports/create-internal.c b/libports/create-internal.c +index 8551297..8543986 100644 +--- a/libports/create-internal.c ++++ b/libports/create-internal.c +@@ -81,15 +81,22 @@ _ports_create_port_internal (struct port_class *class, + goto loop; + } + ++ pthread_rwlock_wrlock (&_ports_htable_lock); ++ err = hurd_ihash_add (&_ports_htable, port, pi); ++ if (err) ++ { ++ pthread_rwlock_unlock (&_ports_htable_lock); ++ goto lose; ++ } + err = hurd_ihash_add (&bucket->htable, port, pi); + if (err) +- goto lose; ++ { ++ hurd_ihash_locp_remove (&_ports_htable, pi->ports_htable_entry); ++ pthread_rwlock_unlock (&_ports_htable_lock); ++ goto lose; ++ } ++ pthread_rwlock_unlock (&_ports_htable_lock); + +- pi->next = class->ports; +- pi->prevp = &class->ports; +- if (class->ports) +- class->ports->prevp = &pi->next; +- class->ports = pi; + bucket->count++; + class->count++; + pthread_mutex_unlock (&_ports_lock); +diff --git a/libports/destroy-right.c b/libports/destroy-right.c +index 65e19c7..448b379 100644 +--- a/libports/destroy-right.c ++++ b/libports/destroy-right.c +@@ -30,12 +30,13 @@ ports_destroy_right (void *portstruct) + + if (pi->port_right != MACH_PORT_NULL) + { +- pthread_mutex_lock (&_ports_lock); ++ pthread_rwlock_wrlock (&_ports_htable_lock); ++ hurd_ihash_locp_remove (&_ports_htable, pi->ports_htable_entry); + hurd_ihash_locp_remove (&pi->bucket->htable, pi->hentry); ++ pthread_rwlock_unlock (&_ports_htable_lock); + err = mach_port_mod_refs (mach_task_self (), pi->port_right, + MACH_PORT_RIGHT_RECEIVE, -1); + assert_perror (err); +- pthread_mutex_unlock (&_ports_lock); + + pi->port_right = MACH_PORT_NULL; + +diff --git a/libports/import-port.c b/libports/import-port.c +index 226f47e..2660672 100644 +--- a/libports/import-port.c ++++ b/libports/import-port.c +@@ -75,15 +75,22 @@ ports_import_port (struct port_class *class, struct port_bucket *bucket, + goto loop; + } + ++ pthread_rwlock_wrlock (&_ports_htable_lock); ++ err = hurd_ihash_add (&_ports_htable, port, pi); ++ if (err) ++ { ++ pthread_rwlock_unlock (&_ports_htable_lock); ++ goto lose; ++ } + err = hurd_ihash_add (&bucket->htable, port, pi); + if (err) +- goto lose; ++ { ++ hurd_ihash_locp_remove (&_ports_htable, pi->ports_htable_entry); ++ pthread_rwlock_unlock (&_ports_htable_lock); ++ goto lose; ++ } ++ pthread_rwlock_unlock (&_ports_htable_lock); + +- pi->next = class->ports; +- pi->prevp = &class->ports; +- if (class->ports) +- class->ports->prevp = &pi->next; +- class->ports = pi; + bucket->count++; + class->count++; + pthread_mutex_unlock (&_ports_lock); +diff --git a/libports/inhibit-all-rpcs.c b/libports/inhibit-all-rpcs.c +index d4a54ba..27e2ec5 100644 +--- a/libports/inhibit-all-rpcs.c ++++ b/libports/inhibit-all-rpcs.c +@@ -36,24 +36,23 @@ ports_inhibit_all_rpcs () + struct port_bucket *bucket; + int this_one = 0; + +- for (bucket = _ports_all_buckets; bucket; bucket = bucket->next) ++ pthread_rwlock_rdlock (&_ports_htable_lock); ++ HURD_IHASH_ITERATE (&_ports_htable, portstruct) + { +- HURD_IHASH_ITERATE (&bucket->htable, portstruct) ++ struct rpc_info *rpc; ++ struct port_info *pi = portstruct; ++ ++ for (rpc = pi->current_rpcs; rpc; rpc = rpc->next) + { +- struct rpc_info *rpc; +- struct port_info *pi = portstruct; +- +- for (rpc = pi->current_rpcs; rpc; rpc = rpc->next) +- { +- /* Avoid cancelling the calling thread if it's currently +- handling a RPC. */ +- if (rpc->thread == hurd_thread_self ()) +- this_one = 1; +- else +- hurd_thread_cancel (rpc->thread); +- } ++ /* Avoid cancelling the calling thread if it's currently ++ handling a RPC. */ ++ if (rpc->thread == hurd_thread_self ()) ++ this_one = 1; ++ else ++ hurd_thread_cancel (rpc->thread); + } + } ++ pthread_rwlock_unlock (&_ports_htable_lock); + + while (_ports_total_rpcs > this_one) + { +diff --git a/libports/inhibit-bucket-rpcs.c b/libports/inhibit-bucket-rpcs.c +index 965aa03..82efdf5 100644 +--- a/libports/inhibit-bucket-rpcs.c ++++ b/libports/inhibit-bucket-rpcs.c +@@ -35,6 +35,7 @@ ports_inhibit_bucket_rpcs (struct port_bucket *bucket) + { + int this_one = 0; + ++ pthread_rwlock_rdlock (&_ports_htable_lock); + HURD_IHASH_ITERATE (&bucket->htable, portstruct) + { + struct rpc_info *rpc; +@@ -49,7 +50,7 @@ ports_inhibit_bucket_rpcs (struct port_bucket *bucket) + hurd_thread_cancel (rpc->thread); + } + } +- ++ pthread_rwlock_unlock (&_ports_htable_lock); + + while (bucket->rpcs > this_one) + { +diff --git a/libports/inhibit-class-rpcs.c b/libports/inhibit-class-rpcs.c +index 7ee8653..9a87a5f 100644 +--- a/libports/inhibit-class-rpcs.c ++++ b/libports/inhibit-class-rpcs.c +@@ -36,15 +36,24 @@ ports_inhibit_class_rpcs (struct port_class *class) + struct rpc_info *rpc; + int this_one = 0; + +- for (pi = class->ports; pi; pi = pi->next) +- for (rpc = pi->current_rpcs; rpc; rpc = rpc->next) +- { +- /* Avoid cancelling the calling thread. */ +- if (rpc->thread == hurd_thread_self ()) +- this_one = 1; +- else +- hurd_thread_cancel (rpc->thread); +- } ++ pthread_rwlock_rdlock (&_ports_htable_lock); ++ HURD_IHASH_ITERATE (&_ports_htable, portstruct) ++ { ++ struct rpc_info *rpc; ++ struct port_info *pi = portstruct; ++ if (pi->class != class) ++ continue; ++ ++ for (rpc = pi->current_rpcs; rpc; rpc = rpc->next) ++ { ++ /* Avoid cancelling the calling thread. */ ++ if (rpc->thread == hurd_thread_self ()) ++ this_one = 1; ++ else ++ hurd_thread_cancel (rpc->thread); ++ } ++ } ++ pthread_rwlock_unlock (&_ports_htable_lock); + + while (class->rpcs > this_one) + { +diff --git a/libports/init.c b/libports/init.c +index 3ef5388..4a68cb8 100644 +--- a/libports/init.c ++++ b/libports/init.c +@@ -19,9 +19,14 @@ + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + + #include "ports.h" ++#include <stddef.h> + + pthread_mutex_t _ports_lock = PTHREAD_MUTEX_INITIALIZER; + pthread_cond_t _ports_block = PTHREAD_COND_INITIALIZER; +-struct port_bucket *_ports_all_buckets; ++ ++struct hurd_ihash _ports_htable = ++ HURD_IHASH_INITIALIZER (offsetof (struct port_info, ports_htable_entry)); ++pthread_rwlock_t _ports_htable_lock = PTHREAD_RWLOCK_INITIALIZER; ++ + int _ports_total_rpcs; + int _ports_flags; +diff --git a/libports/lookup-port.c b/libports/lookup-port.c +index f79f6f0..858ee11 100644 +--- a/libports/lookup-port.c ++++ b/libports/lookup-port.c +@@ -26,27 +26,22 @@ ports_lookup_port (struct port_bucket *bucket, + mach_port_t port, + struct port_class *class) + { +- struct port_info *pi = 0; +- ++ struct port_info *pi; ++ + pthread_mutex_lock (&_ports_lock); ++ pthread_rwlock_rdlock (&_ports_htable_lock); + +- if (bucket) +- pi = hurd_ihash_find (&bucket->htable, port); +- else +- for (bucket = _ports_all_buckets; bucket; bucket = bucket->next) +- { +- pi = hurd_ihash_find (&bucket->htable, port); +- if (pi) +- break; +- } +- +- if (pi && class && pi->class != class) ++ pi = hurd_ihash_find (&_ports_htable, port); ++ if (pi ++ && ((class && pi->class != class) ++ || (bucket && pi->bucket != bucket))) + pi = 0; + + if (pi) + pi->refcnt++; + ++ pthread_rwlock_unlock (&_ports_htable_lock); + pthread_mutex_unlock (&_ports_lock); +- ++ + return pi; + } +diff --git a/libports/ports.h b/libports/ports.h +index 7f13124..6922162 100644 +--- a/libports/ports.h ++++ b/libports/ports.h +@@ -48,7 +48,7 @@ struct port_info + struct rpc_info *current_rpcs; + struct port_bucket *bucket; + hurd_ihash_locp_t hentry; +- struct port_info *next, **prevp; /* links on port_class list */ ++ hurd_ihash_locp_t ports_htable_entry; + }; + typedef struct port_info *port_info_t; + +@@ -61,11 +61,12 @@ typedef struct port_info *port_info_t; + struct port_bucket + { + mach_port_t portset; ++ /* Per-bucket hash table used for fast iteration. Access must be ++ serialized using _ports_htable_lock. */ + struct hurd_ihash htable; + int rpcs; + int flags; + int count; +- struct port_bucket *next; + }; + /* FLAGS above are the following: */ + #define PORT_BUCKET_INHIBITED PORTS_INHIBITED +@@ -78,7 +79,6 @@ struct port_class + { + int flags; + int rpcs; +- struct port_info *ports; + int count; + void (*clean_routine) (void *); + void (*dropweak_routine) (void *); +@@ -277,7 +277,7 @@ error_t ports_class_iterate (struct port_class *class, + error_t (*fun)(void *port)); + + /* Internal entrypoint for above two. */ +-error_t _ports_bucket_class_iterate (struct port_bucket *bucket, ++error_t _ports_bucket_class_iterate (struct hurd_ihash *ht, + struct port_class *class, + error_t (*fun)(void *port)); + +@@ -402,7 +402,19 @@ extern kern_return_t + /* Private data */ + extern pthread_mutex_t _ports_lock; + extern pthread_cond_t _ports_block; +-extern struct port_bucket *_ports_all_buckets; ++ ++/* A global hash table mapping port names to port_info objects. This ++ table is used for port lookups and to iterate over classes. ++ ++ A port in this hash table carries an implicit light reference. ++ When the reference counts reach zero, we call ++ _ports_complete_deallocate. There we reacquire our lock ++ momentarily to check whether someone else reacquired a reference ++ through the hash table. */ ++extern struct hurd_ihash _ports_htable; ++/* Access to the hash table is protected by this lock. */ ++extern pthread_rwlock_t _ports_htable_lock; ++ + extern int _ports_total_rpcs; + extern int _ports_flags; + #define _PORTS_INHIBITED PORTS_INHIBITED +diff --git a/libports/reallocate-from-external.c b/libports/reallocate-from-external.c +index 8cccb2a..9944b39 100644 +--- a/libports/reallocate-from-external.c ++++ b/libports/reallocate-from-external.c +@@ -43,8 +43,11 @@ ports_reallocate_from_external (void *portstruct, mach_port_t receive) + MACH_PORT_RIGHT_RECEIVE, -1); + assert_perror (err); + ++ pthread_rwlock_wrlock (&_ports_htable_lock); ++ hurd_ihash_locp_remove (&_ports_htable, pi->ports_htable_entry); + hurd_ihash_locp_remove (&pi->bucket->htable, pi->hentry); +- ++ pthread_rwlock_unlock (&_ports_htable_lock); ++ + if ((pi->flags & PORT_HAS_SENDRIGHTS) && !stat.mps_srights) + { + dropref = 1; +@@ -59,11 +62,15 @@ ports_reallocate_from_external (void *portstruct, mach_port_t receive) + pi->port_right = receive; + pi->cancel_threshold = 0; + pi->mscount = stat.mps_mscount; +- +- err = hurd_ihash_add (&pi->bucket->htable, receive, pi); ++ ++ pthread_rwlock_wrlock (&_ports_htable_lock); ++ err = hurd_ihash_add (&_ports_htable, receive, pi); + assert_perror (err); ++ err = hurd_ihash_add (&pi->bucket->htable, receive, pi); ++ pthread_rwlock_unlock (&_ports_htable_lock); + pthread_mutex_unlock (&_ports_lock); +- ++ assert_perror (err); ++ + mach_port_move_member (mach_task_self (), receive, pi->bucket->portset); + + if (stat.mps_srights) +diff --git a/libports/reallocate-port.c b/libports/reallocate-port.c +index d2adaeb..cc534eb 100644 +--- a/libports/reallocate-port.c ++++ b/libports/reallocate-port.c +@@ -36,7 +36,10 @@ ports_reallocate_port (void *portstruct) + MACH_PORT_RIGHT_RECEIVE, -1); + assert_perror (err); + ++ pthread_rwlock_wrlock (&_ports_htable_lock); ++ hurd_ihash_locp_remove (&_ports_htable, pi->ports_htable_entry); + hurd_ihash_locp_remove (&pi->bucket->htable, pi->hentry); ++ pthread_rwlock_unlock (&_ports_htable_lock); + + err = mach_port_allocate (mach_task_self (), MACH_PORT_RIGHT_RECEIVE, + &pi->port_right); +@@ -48,9 +51,13 @@ ports_reallocate_port (void *portstruct) + } + pi->cancel_threshold = 0; + pi->mscount = 0; +- err = hurd_ihash_add (&pi->bucket->htable, pi->port_right, pi); ++ pthread_rwlock_wrlock (&_ports_htable_lock); ++ err = hurd_ihash_add (&_ports_htable, pi->port_right, pi); + assert_perror (err); ++ err = hurd_ihash_add (&pi->bucket->htable, pi->port_right, pi); ++ pthread_rwlock_unlock (&_ports_htable_lock); + pthread_mutex_unlock (&_ports_lock); ++ assert_perror (err); + + err = mach_port_move_member (mach_task_self (), pi->port_right, + pi->bucket->portset); +diff --git a/libports/transfer-right.c b/libports/transfer-right.c +index 72488a9..3f48290 100644 +--- a/libports/transfer-right.c ++++ b/libports/transfer-right.c +@@ -41,7 +41,10 @@ ports_transfer_right (void *tostruct, + port = frompi->port_right; + if (port != MACH_PORT_NULL) + { ++ pthread_rwlock_wrlock (&_ports_htable_lock); ++ hurd_ihash_locp_remove (&_ports_htable, frompi->ports_htable_entry); + hurd_ihash_locp_remove (&frompi->bucket->htable, frompi->hentry); ++ pthread_rwlock_unlock (&_ports_htable_lock); + frompi->port_right = MACH_PORT_NULL; + if (frompi->flags & PORT_HAS_SENDRIGHTS) + { +@@ -54,7 +57,10 @@ ports_transfer_right (void *tostruct, + /* Destroy the existing right in TOPI. */ + if (topi->port_right != MACH_PORT_NULL) + { ++ pthread_rwlock_wrlock (&_ports_htable_lock); ++ hurd_ihash_locp_remove (&_ports_htable, topi->ports_htable_entry); + hurd_ihash_locp_remove (&topi->bucket->htable, topi->hentry); ++ pthread_rwlock_unlock (&_ports_htable_lock); + err = mach_port_mod_refs (mach_task_self (), topi->port_right, + MACH_PORT_RIGHT_RECEIVE, -1); + assert_perror (err); +@@ -74,10 +80,16 @@ ports_transfer_right (void *tostruct, + topi->port_right = port; + topi->cancel_threshold = frompi->cancel_threshold; + topi->mscount = frompi->mscount; +- ++ ++ pthread_mutex_unlock (&_ports_lock); ++ + if (port) + { ++ pthread_rwlock_wrlock (&_ports_htable_lock); ++ err = hurd_ihash_add (&_ports_htable, port, topi); ++ assert_perror (err); + err = hurd_ihash_add (&topi->bucket->htable, port, topi); ++ pthread_rwlock_unlock (&_ports_htable_lock); + assert_perror (err); + if (topi->bucket != frompi->bucket) + { +@@ -86,9 +98,7 @@ ports_transfer_right (void *tostruct, + assert_perror (err); + } + } +- +- pthread_mutex_unlock (&_ports_lock); +- ++ + /* Take care of any lowered reference counts. */ + if (dereffrompi) + ports_port_deref (frompi); +-- +2.0.0 + diff --git a/debian/patches/0010-libports-lock-less-reference-counting-for-port_info-.patch b/debian/patches/0010-libports-lock-less-reference-counting-for-port_info-.patch new file mode 100644 index 00000000..9c3b7d27 --- /dev/null +++ b/debian/patches/0010-libports-lock-less-reference-counting-for-port_info-.patch @@ -0,0 +1,340 @@ +From e900dbf2e8d6b7bb98b6e78f7c9440e20c31928d Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Sat, 3 May 2014 01:02:35 +0200 +Subject: [PATCH 10/10] libports: lock-less reference counting for port_info + objects + +* libports/ports.h (struct port_info): Use the new type. +* libports/lookup-port.c: No need to lock _ports_lock anymore. +* libports/bucket-iterate.c: Likewise. +* libports/complete-deallocate.c: Check if someone reacquired a +reference through a hash table lookup. +* libports/create-internal.c: Use the new reference counting primitives. +* libports/get-right.c: Likewise. +* libports/import-port.c: Likewise. +* libports/port-deref-weak.c: Likewise. +* libports/port-deref.c: Likewise. +* libports/port-ref-weak.c: Likewise. +* libports/port-ref.c: Likewise. +* libports/reallocate-from-external.c: Likewise. +* libports/transfer-right.c: Likewise. +* utils/rpctrace.c: Likewise. +--- + libports/bucket-iterate.c | 4 +--- + libports/complete-deallocate.c | 14 ++++++++++++++ + libports/create-internal.c | 3 +-- + libports/get-right.c | 2 +- + libports/import-port.c | 3 +-- + libports/lookup-port.c | 4 +--- + libports/port-deref-weak.c | 10 +++------- + libports/port-deref.c | 34 ++++++++++++++++------------------ + libports/port-ref-weak.c | 6 +----- + libports/port-ref.c | 6 +----- + libports/ports.h | 4 ++-- + libports/reallocate-from-external.c | 2 +- + libports/transfer-right.c | 2 +- + utils/rpctrace.c | 10 ++++++++-- + 14 files changed, 52 insertions(+), 52 deletions(-) + +diff --git a/libports/bucket-iterate.c b/libports/bucket-iterate.c +index 79b6d72..b021b99 100644 +--- a/libports/bucket-iterate.c ++++ b/libports/bucket-iterate.c +@@ -35,7 +35,6 @@ _ports_bucket_class_iterate (struct hurd_ihash *ht, + size_t i, n, nr_items; + error_t err; + +- pthread_mutex_lock (&_ports_lock); + pthread_rwlock_rdlock (&_ports_htable_lock); + + if (ht->nr_items == 0) +@@ -59,13 +58,12 @@ _ports_bucket_class_iterate (struct hurd_ihash *ht, + + if (class == 0 || pi->class == class) + { +- pi->refcnt++; ++ refcounts_ref (&pi->refcounts, NULL); + p[n] = pi; + n++; + } + } + pthread_rwlock_unlock (&_ports_htable_lock); +- pthread_mutex_unlock (&_ports_lock); + + if (n != 0 && n != nr_items) + { +diff --git a/libports/complete-deallocate.c b/libports/complete-deallocate.c +index 4768dab..0d852f5 100644 +--- a/libports/complete-deallocate.c ++++ b/libports/complete-deallocate.c +@@ -29,15 +29,29 @@ _ports_complete_deallocate (struct port_info *pi) + + if (pi->port_right) + { ++ struct references result; ++ + pthread_rwlock_wrlock (&_ports_htable_lock); ++ refcounts_references (&pi->refcounts, &result); ++ if (result.hard > 0 || result.weak > 0) ++ { ++ /* A reference was reacquired through a hash table lookup. ++ It's fine, we didn't touch anything yet. */ ++ pthread_mutex_unlock (&_ports_htable_lock); ++ return; ++ } ++ + hurd_ihash_locp_remove (&_ports_htable, pi->ports_htable_entry); + hurd_ihash_locp_remove (&pi->bucket->htable, pi->hentry); + pthread_rwlock_unlock (&_ports_htable_lock); ++ + mach_port_mod_refs (mach_task_self (), pi->port_right, + MACH_PORT_RIGHT_RECEIVE, -1); + pi->port_right = MACH_PORT_NULL; + } + ++ pthread_mutex_lock (&_ports_lock); ++ + pi->bucket->count--; + pi->class->count--; + +diff --git a/libports/create-internal.c b/libports/create-internal.c +index 8543986..2d85931 100644 +--- a/libports/create-internal.c ++++ b/libports/create-internal.c +@@ -54,8 +54,7 @@ _ports_create_port_internal (struct port_class *class, + } + + pi->class = class; +- pi->refcnt = 1; +- pi->weakrefcnt = 0; ++ refcounts_init (&pi->refcounts, 1, 0); + pi->cancel_threshold = 0; + pi->mscount = 0; + pi->flags = 0; +diff --git a/libports/get-right.c b/libports/get-right.c +index 89050c6..8681f46 100644 +--- a/libports/get-right.c ++++ b/libports/get-right.c +@@ -41,7 +41,7 @@ ports_get_right (void *port) + if ((pi->flags & PORT_HAS_SENDRIGHTS) == 0) + { + pi->flags |= PORT_HAS_SENDRIGHTS; +- pi->refcnt++; ++ refcounts_ref (&pi->refcounts, NULL); + err = mach_port_request_notification (mach_task_self (), + pi->port_right, + MACH_NOTIFY_NO_SENDERS, +diff --git a/libports/import-port.c b/libports/import-port.c +index 2660672..c337c85 100644 +--- a/libports/import-port.c ++++ b/libports/import-port.c +@@ -48,8 +48,7 @@ ports_import_port (struct port_class *class, struct port_bucket *bucket, + return ENOMEM; + + pi->class = class; +- pi->refcnt = 1 + !!stat.mps_srights; +- pi->weakrefcnt = 0; ++ refcounts_init (&pi->refcounts, 1 + !!stat.mps_srights, 0); + pi->cancel_threshold = 0; + pi->mscount = stat.mps_mscount; + pi->flags = stat.mps_srights ? PORT_HAS_SENDRIGHTS : 0; +diff --git a/libports/lookup-port.c b/libports/lookup-port.c +index 858ee11..cff0546 100644 +--- a/libports/lookup-port.c ++++ b/libports/lookup-port.c +@@ -28,7 +28,6 @@ ports_lookup_port (struct port_bucket *bucket, + { + struct port_info *pi; + +- pthread_mutex_lock (&_ports_lock); + pthread_rwlock_rdlock (&_ports_htable_lock); + + pi = hurd_ihash_find (&_ports_htable, port); +@@ -38,10 +37,9 @@ ports_lookup_port (struct port_bucket *bucket, + pi = 0; + + if (pi) +- pi->refcnt++; ++ ports_port_ref (pi); + + pthread_rwlock_unlock (&_ports_htable_lock); +- pthread_mutex_unlock (&_ports_lock); + + return pi; + } +diff --git a/libports/port-deref-weak.c b/libports/port-deref-weak.c +index beb4842..8432660 100644 +--- a/libports/port-deref-weak.c ++++ b/libports/port-deref-weak.c +@@ -25,12 +25,8 @@ void + ports_port_deref_weak (void *portstruct) + { + struct port_info *pi = portstruct; +- +- pthread_mutex_lock (&_ports_lock); +- assert (pi->weakrefcnt); +- pi->weakrefcnt--; +- if (pi->refcnt == 0 && pi->weakrefcnt == 0) ++ struct references result; ++ refcounts_deref_weak (&pi->refcounts, &result); ++ if (result.hard == 0 && result.weak == 0) + _ports_complete_deallocate (pi); +- else +- pthread_mutex_unlock (&_ports_lock); + } +diff --git a/libports/port-deref.c b/libports/port-deref.c +index cf9b238..b97dd13 100644 +--- a/libports/port-deref.c ++++ b/libports/port-deref.c +@@ -25,26 +25,24 @@ void + ports_port_deref (void *portstruct) + { + struct port_info *pi = portstruct; +- int trieddroppingweakrefs = 0; +- +- retry: +- +- pthread_mutex_lock (&_ports_lock); +- +- if (pi->refcnt == 1 && pi->weakrefcnt +- && pi->class->dropweak_routine && !trieddroppingweakrefs) ++ struct references result; ++ ++ if (pi->class->dropweak_routine) + { +- pthread_mutex_unlock (&_ports_lock); +- (*pi->class->dropweak_routine) (pi); +- trieddroppingweakrefs = 1; +- goto retry; ++ /* If we need to call the dropweak routine, we need to hold one ++ reference while doing so. We use a weak reference for this ++ purpose, which we acquire by demoting our hard reference to a ++ weak one. */ ++ refcounts_demote (&pi->refcounts, &result); ++ ++ if (result.hard == 0 && result.weak > 1) ++ (*pi->class->dropweak_routine) (pi); ++ ++ refcounts_deref_weak (&pi->refcounts, &result); + } +- +- assert (pi->refcnt); ++ else ++ refcounts_deref (&pi->refcounts, &result); + +- pi->refcnt--; +- if (pi->refcnt == 0 && pi->weakrefcnt == 0) ++ if (result.hard == 0 && result.weak == 0) + _ports_complete_deallocate (pi); +- else +- pthread_mutex_unlock (&_ports_lock); + } +diff --git a/libports/port-ref-weak.c b/libports/port-ref-weak.c +index c7d3c69..3f62dfe 100644 +--- a/libports/port-ref-weak.c ++++ b/libports/port-ref-weak.c +@@ -25,9 +25,5 @@ void + ports_port_ref_weak (void *portstruct) + { + struct port_info *pi = portstruct; +- +- pthread_mutex_lock (&_ports_lock); +- assert (pi->refcnt || pi->weakrefcnt); +- pi->weakrefcnt++; +- pthread_mutex_unlock (&_ports_lock); ++ refcounts_ref_weak (&pi->refcounts, NULL); + } +diff --git a/libports/port-ref.c b/libports/port-ref.c +index 92b7118..9a1c71e 100644 +--- a/libports/port-ref.c ++++ b/libports/port-ref.c +@@ -25,9 +25,5 @@ void + ports_port_ref (void *portstruct) + { + struct port_info *pi = portstruct; +- +- pthread_mutex_lock (&_ports_lock); +- assert (pi->refcnt || pi->weakrefcnt); +- pi->refcnt++; +- pthread_mutex_unlock (&_ports_lock); ++ refcounts_ref (&pi->refcounts, NULL); + } +diff --git a/libports/ports.h b/libports/ports.h +index 6922162..40d3b43 100644 +--- a/libports/ports.h ++++ b/libports/ports.h +@@ -27,6 +27,7 @@ + #include <hurd/ihash.h> + #include <mach/notify.h> + #include <pthread.h> ++#include <refcount.h> + + /* These are global values for common flags used in the various structures. + Not all of these are meaningful in all flag fields. */ +@@ -39,8 +40,7 @@ + struct port_info + { + struct port_class *class; +- int refcnt; +- int weakrefcnt; ++ refcounts_t refcounts; + mach_port_mscount_t mscount; + mach_msg_seqno_t cancel_threshold; + int flags; +diff --git a/libports/reallocate-from-external.c b/libports/reallocate-from-external.c +index 9944b39..7205bd9 100644 +--- a/libports/reallocate-from-external.c ++++ b/libports/reallocate-from-external.c +@@ -56,7 +56,7 @@ ports_reallocate_from_external (void *portstruct, mach_port_t receive) + else if (((pi->flags & PORT_HAS_SENDRIGHTS) == 0) && stat.mps_srights) + { + pi->flags |= PORT_HAS_SENDRIGHTS; +- pi->refcnt++; ++ refcounts_ref (&pi->refcounts, NULL); + } + + pi->port_right = receive; +diff --git a/libports/transfer-right.c b/libports/transfer-right.c +index 3f48290..776a8d2 100644 +--- a/libports/transfer-right.c ++++ b/libports/transfer-right.c +@@ -72,7 +72,7 @@ ports_transfer_right (void *tostruct, + else if (((topi->flags & PORT_HAS_SENDRIGHTS) == 0) && hassendrights) + { + topi->flags |= PORT_HAS_SENDRIGHTS; +- topi->refcnt++; ++ refcounts_ref (&topi->refcounts, NULL); + } + } + +diff --git a/utils/rpctrace.c b/utils/rpctrace.c +index fc913e3..b11fea4 100644 +--- a/utils/rpctrace.c ++++ b/utils/rpctrace.c +@@ -431,7 +431,9 @@ destroy_receiver_info (struct receiver_info *info) + while (send_wrapper) + { + struct sender_info *next = send_wrapper->next; +- assert (TRACED_INFO (send_wrapper)->pi.refcnt == 1); ++ assert ( ++ refcounts_hard_references (&TRACED_INFO (send_wrapper)->pi.refcounts) ++ == 1); + /* Reset the receive_right of the send wrapper in advance to avoid + * destroy_receiver_info is called when the port info is destroyed. */ + send_wrapper->receive_right = NULL; +@@ -848,7 +850,11 @@ rewrite_right (mach_port_t *right, mach_msg_type_name_t *type, + hurd_ihash_locp_remove (&traced_names, receiver_info->locp); + + send_wrapper2 = get_send_wrapper (receiver_info, dest, &rr); +- assert (TRACED_INFO (send_wrapper2)->pi.refcnt == 1); ++ assert ( ++ refcounts_hard_references ( ++ &TRACED_INFO (send_wrapper2)->pi.refcounts) ++ == 1); ++ + name = TRACED_INFO (send_wrapper2)->name; + TRACED_INFO (send_wrapper2)->name = NULL; + /* send_wrapper2 isn't destroyed normally, so we need to unlink +-- +2.0.0 + diff --git a/debian/patches/series b/debian/patches/series index 91cedd09..7e167966 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -51,3 +51,13 @@ fix-net_rcv_msg.patch #pp.patch #pp-dde.patch #pp-random.patch +0001-ext2fs-use-correct-type-for-block-numbers.patch +0002-libdiskfs-fix-reference-counting-of-peropen-objects.patch +0003-include-detect-use-after-free-errors-using-the-refer.patch +0004-ext2fs-use-a-seperate-lock-to-protect-nodehash.patch +0005-fatfs-use-a-seperate-lock-to-protect-nodehash.patch +0006-isofs-use-a-seperate-lock-to-protect-node_cache.patch +0007-tmpfs-use-a-seperate-lock-to-protect-all_nodes.patch +0008-libdiskfs-lock-less-reference-counting-of-nodes.patch +0009-libports-use-a-global-hash-table-for-the-lookups.patch +0010-libports-lock-less-reference-counting-for-port_info-.patch |