diff options
28 files changed, 7653 insertions, 0 deletions
diff --git a/debian/patches/0001-libdiskfs-add-diskfs_make_node_alloc-to-allocate-fat.patch b/debian/patches/0001-libdiskfs-add-diskfs_make_node_alloc-to-allocate-fat.patch new file mode 100644 index 00000000..e4e7179f --- /dev/null +++ b/debian/patches/0001-libdiskfs-add-diskfs_make_node_alloc-to-allocate-fat.patch @@ -0,0 +1,141 @@ +From dbe60bff776af39833b50cf6ead4ac0f58805fda Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Fri, 16 May 2014 23:06:33 +0200 +Subject: [PATCH 01/27] libdiskfs: add diskfs_make_node_alloc to allocate fat + nodes + +libdiskfs has two kind of nodes, struct node and struct netnode. +struct node is used to store libdiskfs specific data, while struct +netnode contains user supplied data. Previously, both objects were +allocated separatly, and a pointer from the node to the netnode +provided a mapping from the former to the latter. + +Provide a function diskfs_make_node_alloc that allocates both nodes in +a contiguous region. + +This reduces the memory allocation overhead when creating nodes. It +also makes the relation between node and netnode a simple offset +calculation. Provide two functions to compute the netnode address +from the node address and vice-versa. + +Most notably, this makes implementing a cache on top of libdiskfs +easier. Auxiliary data for the cache can be stored in the +user-defined netnode, and the fat node can be used as the value. + +* libdiskfs/node-make.c (init_node): Move initialization here. +(diskfs_make_node): Use init_node. +(diskfs_make_node_alloc): New function to allocate fat nodes. +* libdiskfs/diskfs.h (diskfs_make_node_alloc): New declaration. +(diskfs_node_netnode): Compute netnode address from node address. +(diskfs_netnode_node): And vice-versa. +--- + libdiskfs/diskfs.h | 29 +++++++++++++++++++++++++++-- + libdiskfs/node-make.c | 39 ++++++++++++++++++++++++++++++--------- + 2 files changed, 57 insertions(+), 11 deletions(-) + +diff --git a/libdiskfs/diskfs.h b/libdiskfs/diskfs.h +index 8151ddc..a3f860d 100644 +--- a/libdiskfs/diskfs.h ++++ b/libdiskfs/diskfs.h +@@ -116,9 +116,12 @@ struct node + + loff_t allocsize; + +- ino64_t cache_id; +- + int author_tracks_uid; ++ ++ /* This is the last field. We do this so that if the node is ++ allocated with the disknode in an contiguous block, it sits right ++ next to the user supplied data. */ ++ ino64_t cache_id; + }; + + struct diskfs_control +@@ -685,6 +688,28 @@ diskfs_notice_filechange (struct node *np, enum file_changed_type type, + The new node will have one hard reference and no light references. */ + struct node *diskfs_make_node (struct disknode *dn); + ++/* Create a new node structure. Also allocate SIZE bytes for the ++ disknode. The address of the disknode can be obtained using ++ diskfs_node_disknode. The new node will have one hard reference ++ and no light references. */ ++struct node *diskfs_make_node_alloc (size_t size); ++ ++/* Return the address of the disknode for NODE. NODE must have been ++ allocated using diskfs_make_node_alloc. */ ++static inline struct disknode * ++diskfs_node_disknode (struct node *node) ++{ ++ return (struct disknode *) ((char *) node + sizeof (struct node)); ++} ++ ++/* Return the address of the node for DISKNODE. DISKNODE must have ++ been allocated using diskfs_make_node_alloc. */ ++static inline struct node * ++diskfs_disknode_node (struct disknode *disknode) ++{ ++ return (struct node *) ((char *) disknode - sizeof (struct node)); ++} ++ + + /* The library also exports the following functions; they are not generally + useful unless you are redefining other functions the library provides. */ +diff --git a/libdiskfs/node-make.c b/libdiskfs/node-make.c +index 2b6ef2a..ff0cc0d 100644 +--- a/libdiskfs/node-make.c ++++ b/libdiskfs/node-make.c +@@ -19,16 +19,9 @@ + #include <fcntl.h> + + +-/* Create a and return new node structure with DN as its physical disknode. +- The node will have one hard reference and no light references. */ +-struct node * +-diskfs_make_node (struct disknode *dn) ++static struct node * ++init_node (struct node *np, struct disknode *dn) + { +- struct node *np = malloc (sizeof (struct node)); +- +- if (np == 0) +- return 0; +- + np->dn = dn; + np->dn_set_ctime = 0; + np->dn_set_atime = 0; +@@ -52,3 +45,31 @@ diskfs_make_node (struct disknode *dn) + + return np; + } ++ ++/* Create a and return new node structure with DN as its physical disknode. ++ The node will have one hard reference and no light references. */ ++struct node * ++diskfs_make_node (struct disknode *dn) ++{ ++ struct node *np = malloc (sizeof (struct node)); ++ ++ if (np == 0) ++ return 0; ++ ++ return init_node (np, dn); ++} ++ ++/* Create a new node structure. Also allocate SIZE bytes for the ++ disknode. The address of the disknode can be obtained using ++ diskfs_node_disknode. The new node will have one hard reference ++ and no light references. */ ++struct node * ++diskfs_make_node_alloc (size_t size) ++{ ++ struct node *np = malloc (sizeof (struct node) + size); ++ ++ if (np == NULL) ++ return NULL; ++ ++ return init_node (np, diskfs_node_disknode (np)); ++} +-- +2.0.0.rc2 + diff --git a/debian/patches/0002-libnetfs-add-netfs_make_node_alloc-to-allocate-fat-n.patch b/debian/patches/0002-libnetfs-add-netfs_make_node_alloc-to-allocate-fat-n.patch new file mode 100644 index 00000000..e013c1e3 --- /dev/null +++ b/debian/patches/0002-libnetfs-add-netfs_make_node_alloc-to-allocate-fat-n.patch @@ -0,0 +1,116 @@ +From b28098f0eb0637d1c06aa38ce6041c8e0916199d Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Sun, 18 May 2014 13:34:12 +0200 +Subject: [PATCH 02/27] libnetfs: add netfs_make_node_alloc to allocate fat + nodes + +libnetfs has two kind of nodes, struct node and struct netnode. +struct node is used to store libnetfs specific data, while struct +netnode contains user supplied data. Previously, both objects were +allocated separatly, and a pointer from the node to the netnode +provided a mapping from the former to the latter. + +Provide a function netfs_make_node_alloc that allocates both nodes in +a contiguous region. + +This reduces the memory allocation overhead when creating nodes. It +also makes the relation between node and netnode a simple offset +calculation. Provide two functions to compute the netnode address +from the node address and vice-versa. + +Most notably, this makes implementing a cache on top of libnetfs +easier. Auxiliary data for the cache can be stored in the +user-defined netnode, and the fat node can be used as the value. + +* libnetfs/make-node.c (init_node): Move initialization here. +(netfs_make_node): Use init_node. +(netfs_make_node_alloc): New function to allocate fat nodes. +* libnetfs/netfs.h (netfs_make_node_alloc): New declaration. +(netfs_node_netnode): Compute netnode address from node address. +(netfs_netnode_node): And vice-versa. +--- + libnetfs/make-node.c | 29 +++++++++++++++++++++++------ + libnetfs/netfs.h | 22 ++++++++++++++++++++++ + 2 files changed, 45 insertions(+), 6 deletions(-) + +diff --git a/libnetfs/make-node.c b/libnetfs/make-node.c +index f20ada1..6bd8109 100644 +--- a/libnetfs/make-node.c ++++ b/libnetfs/make-node.c +@@ -21,13 +21,9 @@ + #include "netfs.h" + #include <hurd/fshelp.h> + +-struct node * +-netfs_make_node (struct netnode *nn) ++static struct node * ++init_node (struct node *np, struct netnode *nn) + { +- struct node *np = malloc (sizeof (struct node)); +- if (! np) +- return NULL; +- + np->nn = nn; + + pthread_mutex_init (&np->lock, NULL); +@@ -40,3 +36,24 @@ netfs_make_node (struct netnode *nn) + + return np; + } ++ ++struct node * ++netfs_make_node (struct netnode *nn) ++{ ++ struct node *np = malloc (sizeof (struct node)); ++ if (! np) ++ return NULL; ++ ++ return init_node (np, nn); ++} ++ ++struct node * ++netfs_make_node_alloc (size_t size) ++{ ++ struct node *np = malloc (sizeof (struct node) + size); ++ ++ if (np == NULL) ++ return NULL; ++ ++ return init_node (np, netfs_node_netnode (np)); ++} +diff --git a/libnetfs/netfs.h b/libnetfs/netfs.h +index aef4a3d..e3c3b3e 100644 +--- a/libnetfs/netfs.h ++++ b/libnetfs/netfs.h +@@ -372,6 +372,28 @@ extern int netfs_maxsymlinks; + If an error occurs, NULL is returned. */ + struct node *netfs_make_node (struct netnode *); + ++/* Create a new node structure. Also allocate SIZE bytes for the ++ netnode. The address of the netnode can be obtained using ++ netfs_node_netnode. The new node will have one hard reference and ++ no light references. If an error occurs, NULL is returned. */ ++struct node *netfs_make_node_alloc (size_t size); ++ ++/* Return the address of the netnode for NODE. NODE must have been ++ allocated using netfs_make_node_alloc. */ ++static inline struct netnode * ++netfs_node_netnode (struct node *node) ++{ ++ return (struct netnode *) ((char *) node + sizeof (struct node)); ++} ++ ++/* Return the address of the node for NETNODE. NETNODE must have been ++ allocated using netfs_make_node_alloc. */ ++static inline struct node * ++netfs_netnode_node (struct netnode *netnode) ++{ ++ return (struct node *) ((char *) netnode - sizeof (struct node)); ++} ++ + /* Whenever node->references is to be touched, this lock must be + held. Cf. netfs_nrele, netfs_nput, netfs_nref and netfs_drop_node. */ + extern pthread_spinlock_t netfs_node_refcnt_lock; +-- +2.0.0.rc2 + diff --git a/debian/patches/0003-trans-fakeroot-use-fat-nodes-to-simplify-the-node-ca.patch b/debian/patches/0003-trans-fakeroot-use-fat-nodes-to-simplify-the-node-ca.patch new file mode 100644 index 00000000..3923896a --- /dev/null +++ b/debian/patches/0003-trans-fakeroot-use-fat-nodes-to-simplify-the-node-ca.patch @@ -0,0 +1,149 @@ +From 4053f08472930d523d2f0392e565f0650eb32802 Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Sun, 18 May 2014 13:45:14 +0200 +Subject: [PATCH 03/27] trans/fakeroot: use fat nodes to simplify the node + cache + +Previously, fakeroot stored netnodes in the hash table. But we are +not interested in a cache for netnodes, we need a node cache. So +fakeroot kept pointers to the associated node object in each netnode +object. + +Use fat netfs nodes, which combine node and netnode objects. + +* trans/fakeroot.c (struct netnode): Remove np. +(idport_ihash): Fix ihash location pointer offset. +(new_node): Allocate fat nodes, store the node pointer in the hash +table. +(netfs_node_norefs): Adjust accordingly. +(netfs_S_dir_lookup): Likewise. +--- + trans/fakeroot.c | 36 ++++++++++++------------------------ + 1 file changed, 12 insertions(+), 24 deletions(-) + +diff --git a/trans/fakeroot.c b/trans/fakeroot.c +index 4175b55..59f8a86 100644 +--- a/trans/fakeroot.c ++++ b/trans/fakeroot.c +@@ -47,7 +47,6 @@ static auth_t fakeroot_auth_port; + + struct netnode + { +- struct node *np; /* our node */ + hurd_ihash_locp_t idport_locp;/* easy removal pointer in idport ihash */ + mach_port_t idport; /* port from io_identity */ + int openmodes; /* O_READ | O_WRITE | O_EXEC */ +@@ -64,7 +63,8 @@ struct netnode + + pthread_mutex_t idport_ihash_lock = PTHREAD_MUTEX_INITIALIZER; + struct hurd_ihash idport_ihash +- = HURD_IHASH_INITIALIZER (offsetof (struct netnode, idport_locp)); ++= HURD_IHASH_INITIALIZER (sizeof (struct node) ++ + offsetof (struct netnode, idport_locp)); + + + /* Make a new virtual node. Always consumes the ports. If +@@ -74,8 +74,9 @@ new_node (file_t file, mach_port_t idport, int locked, int openmodes, + struct node **np) + { + error_t err; +- struct netnode *nn = calloc (1, sizeof *nn); +- if (nn == 0) ++ struct netnode *nn; ++ *np = netfs_make_node_alloc (sizeof *nn); ++ if (*np == 0) + { + mach_port_deallocate (mach_task_self (), file); + if (idport != MACH_PORT_NULL) +@@ -84,6 +85,7 @@ new_node (file_t file, mach_port_t idport, int locked, int openmodes, + pthread_mutex_unlock (&idport_ihash_lock); + return ENOMEM; + } ++ nn = netfs_node_netnode (*np); + nn->file = file; + nn->openmodes = openmodes; + if (idport != MACH_PORT_NULL) +@@ -97,35 +99,26 @@ new_node (file_t file, mach_port_t idport, int locked, int openmodes, + if (err) + { + mach_port_deallocate (mach_task_self (), file); +- free (nn); ++ free (*np); + return err; + } + } + + if (!locked) + pthread_mutex_lock (&idport_ihash_lock); +- err = hurd_ihash_add (&idport_ihash, nn->idport, nn); ++ err = hurd_ihash_add (&idport_ihash, nn->idport, *np); + if (err) + goto lose; + +- *np = nn->np = netfs_make_node (nn); +- if (*np == 0) +- { +- err = ENOMEM; +- goto lose_hash; +- } +- + pthread_mutex_lock (&(*np)->lock); + pthread_mutex_unlock (&idport_ihash_lock); + return 0; + +- lose_hash: +- hurd_ihash_locp_remove (&idport_ihash, nn->idport_locp); + lose: + pthread_mutex_unlock (&idport_ihash_lock); + mach_port_deallocate (mach_task_self (), nn->idport); + mach_port_deallocate (mach_task_self (), file); +- free (nn); ++ free (*np); + return err; + } + +@@ -161,8 +154,6 @@ set_faked_attribute (struct node *np, unsigned int faked) + void + netfs_node_norefs (struct node *np) + { +- assert (np->nn->np == np); +- + pthread_mutex_unlock (&np->lock); + pthread_spin_unlock (&netfs_node_refcnt_lock); + +@@ -172,7 +163,6 @@ netfs_node_norefs (struct node *np) + + mach_port_deallocate (mach_task_self (), np->nn->file); + mach_port_deallocate (mach_task_self (), np->nn->idport); +- free (np->nn); + free (np); + + pthread_spin_lock (&netfs_node_refcnt_lock); +@@ -358,13 +348,12 @@ netfs_S_dir_lookup (struct protid *diruser, + refcount lock so that, if a node is found, its reference counter cannot + drop to 0 before we get our own reference. */ + pthread_spin_lock (&netfs_node_refcnt_lock); +- struct netnode *nn = hurd_ihash_find (&idport_ihash, idport); +- if (nn != NULL) ++ np = hurd_ihash_find (&idport_ihash, idport); ++ if (np != NULL) + { +- assert (nn->np->nn == nn); + /* We already know about this node. */ + +- if (nn->np->references == 0) ++ if (np->references == 0) + { + /* But it might be in the process of being released. If so, + unlock the hash table to give the node a chance to actually +@@ -376,7 +365,6 @@ netfs_S_dir_lookup (struct protid *diruser, + } + + /* Otherwise, reference it right away. */ +- np = nn->np; + np->references++; + pthread_spin_unlock (&netfs_node_refcnt_lock); + +-- +2.0.0.rc2 + diff --git a/debian/patches/0004-trans-fakeroot-use-netfs_node_netnode-instead-of-np-.patch b/debian/patches/0004-trans-fakeroot-use-netfs_node_netnode-instead-of-np-.patch new file mode 100644 index 00000000..6e1b285c --- /dev/null +++ b/debian/patches/0004-trans-fakeroot-use-netfs_node_netnode-instead-of-np-.patch @@ -0,0 +1,386 @@ +From 1823cd5e2bcf983498b15969c6a5c9f0f5aa0a5f Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Sun, 18 May 2014 14:06:30 +0200 +Subject: [PATCH 04/27] trans/fakeroot: use netfs_node_netnode instead of + np->nn + +When using fat nodes, expressions of the form E->nn can be rewritten +as netfs_node_netnode (E). This is much faster as it only involves a +offset calculation. For reference, I used the following semantic +patch to create the patch: + +@@ +expression E; +@@ + +- E->nn ++ netfs_node_netnode (E) + +* trans/fakeroot.c: Use netfs_node_netnode instead of np->nn. +--- + trans/fakeroot.c | 99 ++++++++++++++++++++++++++++++-------------------------- + 1 file changed, 53 insertions(+), 46 deletions(-) + +diff --git a/trans/fakeroot.c b/trans/fakeroot.c +index 59f8a86..32a34ec 100644 +--- a/trans/fakeroot.c ++++ b/trans/fakeroot.c +@@ -125,7 +125,7 @@ new_node (file_t file, mach_port_t idport, int locked, int openmodes, + static void + set_default_attributes (struct node *np) + { +- np->nn->faked = FAKE_UID | FAKE_GID | FAKE_DEFAULT; ++ netfs_node_netnode (np)->faked = FAKE_UID | FAKE_GID | FAKE_DEFAULT; + np->nn_stat.st_uid = 0; + np->nn_stat.st_gid = 0; + } +@@ -133,9 +133,9 @@ set_default_attributes (struct node *np) + static void + set_faked_attribute (struct node *np, unsigned int faked) + { +- np->nn->faked |= faked; ++ netfs_node_netnode (np)->faked |= faked; + +- if (np->nn->faked & FAKE_DEFAULT) ++ if (netfs_node_netnode (np)->faked & FAKE_DEFAULT) + { + /* Now that the node has non-default faked attributes, they have to be + retained for future accesses. Account for the hash table reference. +@@ -146,7 +146,7 @@ set_faked_attribute (struct node *np, unsigned int faked) + easy enough if it's ever needed, although scalability could be + improved. */ + netfs_nref (np); +- np->nn->faked &= ~FAKE_DEFAULT; ++ netfs_node_netnode (np)->faked &= ~FAKE_DEFAULT; + } + } + +@@ -158,11 +158,11 @@ netfs_node_norefs (struct node *np) + pthread_spin_unlock (&netfs_node_refcnt_lock); + + pthread_mutex_lock (&idport_ihash_lock); +- hurd_ihash_locp_remove (&idport_ihash, np->nn->idport_locp); ++ hurd_ihash_locp_remove (&idport_ihash, netfs_node_netnode (np)->idport_locp); + pthread_mutex_unlock (&idport_ihash_lock); + +- mach_port_deallocate (mach_task_self (), np->nn->file); +- mach_port_deallocate (mach_task_self (), np->nn->idport); ++ mach_port_deallocate (mach_task_self (), netfs_node_netnode (np)->file); ++ mach_port_deallocate (mach_task_self (), netfs_node_netnode (np)->idport); + free (np); + + pthread_spin_lock (&netfs_node_refcnt_lock); +@@ -245,7 +245,8 @@ error_t + netfs_check_open_permissions (struct iouser *user, struct node *np, + int flags, int newnode) + { +- return check_openmodes (np->nn, flags & (O_RDWR|O_EXEC), MACH_PORT_NULL); ++ return check_openmodes (netfs_node_netnode (np), ++ flags & (O_RDWR|O_EXEC), MACH_PORT_NULL); + } + + error_t +@@ -271,12 +272,12 @@ netfs_S_dir_lookup (struct protid *diruser, + + dnp = diruser->po->np; + +- mach_port_t dir = dnp->nn->file; ++ mach_port_t dir = netfs_node_netnode (dnp)->file; + redo_lookup: + err = dir_lookup (dir, filename, + flags & (O_NOLINK|O_RDWR|O_EXEC|O_CREAT|O_EXCL|O_NONBLOCK), + mode, do_retry, retry_name, &file); +- if (dir != dnp->nn->file) ++ if (dir != netfs_node_netnode (dnp)->file) + mach_port_deallocate (mach_task_self (), dir); + if (err) + return err; +@@ -380,7 +381,8 @@ netfs_S_dir_lookup (struct protid *diruser, + pthread_mutex_unlock (&dnp->lock); + } + +- err = check_openmodes (np->nn, (flags & (O_RDWR|O_EXEC)), file); ++ err = check_openmodes (netfs_node_netnode (np), ++ (flags & (O_RDWR|O_EXEC)), file); + pthread_mutex_unlock (&idport_ihash_lock); + } + else +@@ -448,17 +450,17 @@ error_t + netfs_validate_stat (struct node *np, struct iouser *cred) + { + struct stat st; +- error_t err = io_stat (np->nn->file, &st); ++ error_t err = io_stat (netfs_node_netnode (np)->file, &st); + if (err) + return err; + +- if (np->nn->faked & FAKE_UID) ++ if (netfs_node_netnode (np)->faked & FAKE_UID) + st.st_uid = np->nn_stat.st_uid; +- if (np->nn->faked & FAKE_GID) ++ if (netfs_node_netnode (np)->faked & FAKE_GID) + st.st_gid = np->nn_stat.st_gid; +- if (np->nn->faked & FAKE_AUTHOR) ++ if (netfs_node_netnode (np)->faked & FAKE_AUTHOR) + st.st_author = np->nn_stat.st_author; +- if (np->nn->faked & FAKE_MODE) ++ if (netfs_node_netnode (np)->faked & FAKE_MODE) + st.st_mode = np->nn_stat.st_mode; + + np->nn_stat = st; +@@ -528,7 +530,7 @@ netfs_attempt_chmod (struct iouser *cred, struct node *np, mode_t mode) + + /* We don't bother with error checking since the fake mode change should + always succeed--worst case a later open will get EACCES. */ +- (void) file_chmod (np->nn->file, mode); ++ (void) file_chmod (netfs_node_netnode (np)->file, mode); + set_faked_attribute (np, FAKE_MODE); + np->nn_stat.st_mode = mode; + return 0; +@@ -543,7 +545,7 @@ netfs_attempt_mksymlink (struct iouser *cred, struct node *np, char *name) + char trans[sizeof _HURD_SYMLINK + namelen]; + memcpy (trans, _HURD_SYMLINK, sizeof _HURD_SYMLINK); + memcpy (&trans[sizeof _HURD_SYMLINK], name, namelen); +- return file_set_translator (np->nn->file, ++ return file_set_translator (netfs_node_netnode (np)->file, + FS_TRANS_EXCL|FS_TRANS_SET, + FS_TRANS_EXCL|FS_TRANS_SET, 0, + trans, sizeof trans, +@@ -562,7 +564,7 @@ netfs_attempt_mkdev (struct iouser *cred, struct node *np, + return ENOMEM; + else + { +- error_t err = file_set_translator (np->nn->file, ++ error_t err = file_set_translator (netfs_node_netnode (np)->file, + FS_TRANS_EXCL|FS_TRANS_SET, + FS_TRANS_EXCL|FS_TRANS_SET, 0, + trans, translen + 1, +@@ -576,7 +578,7 @@ netfs_attempt_mkdev (struct iouser *cred, struct node *np, + error_t + netfs_attempt_chflags (struct iouser *cred, struct node *np, int flags) + { +- return file_chflags (np->nn->file, flags); ++ return file_chflags (netfs_node_netnode (np)->file, flags); + } + + error_t +@@ -602,25 +604,25 @@ netfs_attempt_utimes (struct iouser *cred, struct node *np, + else + m.tv.tv_sec = m.tv.tv_usec = -1; + +- return file_utimes (np->nn->file, a.tvt, m.tvt); ++ return file_utimes (netfs_node_netnode (np)->file, a.tvt, m.tvt); + } + + error_t + netfs_attempt_set_size (struct iouser *cred, struct node *np, off_t size) + { +- return file_set_size (np->nn->file, size); ++ return file_set_size (netfs_node_netnode (np)->file, size); + } + + error_t + netfs_attempt_statfs (struct iouser *cred, struct node *np, struct statfs *st) + { +- return file_statfs (np->nn->file, st); ++ return file_statfs (netfs_node_netnode (np)->file, st); + } + + error_t + netfs_attempt_sync (struct iouser *cred, struct node *np, int wait) + { +- return file_sync (np->nn->file, wait, 0); ++ return file_sync (netfs_node_netnode (np)->file, wait, 0); + } + + error_t +@@ -633,7 +635,7 @@ error_t + netfs_attempt_mkdir (struct iouser *user, struct node *dir, + char *name, mode_t mode) + { +- return dir_mkdir (dir->nn->file, name, mode | S_IRWXU); ++ return dir_mkdir (netfs_node_netnode (dir)->file, name, mode | S_IRWXU); + } + + +@@ -645,7 +647,7 @@ netfs_attempt_mkdir (struct iouser *user, struct node *dir, + error_t + netfs_attempt_unlink (struct iouser *user, struct node *dir, char *name) + { +- return dir_unlink (dir->nn->file, name); ++ return dir_unlink (netfs_node_netnode (dir)->file, name); + } + + error_t +@@ -653,22 +655,22 @@ netfs_attempt_rename (struct iouser *user, struct node *fromdir, + char *fromname, struct node *todir, + char *toname, int excl) + { +- return dir_rename (fromdir->nn->file, fromname, +- todir->nn->file, toname, excl); ++ return dir_rename (netfs_node_netnode (fromdir)->file, fromname, ++ netfs_node_netnode (todir)->file, toname, excl); + } + + error_t + netfs_attempt_rmdir (struct iouser *user, + struct node *dir, char *name) + { +- return dir_rmdir (dir->nn->file, name); ++ return dir_rmdir (netfs_node_netnode (dir)->file, name); + } + + error_t + netfs_attempt_link (struct iouser *user, struct node *dir, + struct node *file, char *name, int excl) + { +- return dir_link (dir->nn->file, file->nn->file, name, excl); ++ return dir_link (netfs_node_netnode (dir)->file, netfs_node_netnode (file)->file, name, excl); + } + + error_t +@@ -676,7 +678,7 @@ netfs_attempt_mkfile (struct iouser *user, struct node *dir, + mode_t mode, struct node **np) + { + file_t newfile; +- error_t err = dir_mkfile (dir->nn->file, O_RDWR|O_EXEC, ++ error_t err = dir_mkfile (netfs_node_netnode (dir)->file, O_RDWR|O_EXEC, + real_from_fake_mode (mode), &newfile); + pthread_mutex_unlock (&dir->lock); + if (err == 0) +@@ -692,7 +694,8 @@ netfs_attempt_readlink (struct iouser *user, struct node *np, char *buf) + char transbuf[sizeof _HURD_SYMLINK + np->nn_stat.st_size + 1]; + char *trans = transbuf; + size_t translen = sizeof transbuf; +- error_t err = file_get_translator (np->nn->file, &trans, &translen); ++ error_t err = file_get_translator (netfs_node_netnode (np)->file, ++ &trans, &translen); + if (err == 0) + { + if (translen < sizeof _HURD_SYMLINK +@@ -715,7 +718,8 @@ netfs_attempt_read (struct iouser *cred, struct node *np, + off_t offset, size_t *len, void *data) + { + char *buf = data; +- error_t err = io_read (np->nn->file, &buf, len, offset, *len); ++ error_t err = io_read (netfs_node_netnode (np)->file, ++ &buf, len, offset, *len); + if (err == 0 && buf != data) + { + memcpy (data, buf, *len); +@@ -728,7 +732,7 @@ error_t + netfs_attempt_write (struct iouser *cred, struct node *np, + off_t offset, size_t *len, void *data) + { +- return io_write (np->nn->file, data, *len, offset, len); ++ return io_write (netfs_node_netnode (np)->file, data, *len, offset, len); + } + + error_t +@@ -744,7 +748,7 @@ netfs_get_dirents (struct iouser *cred, struct node *dir, + mach_msg_type_number_t *datacnt, + vm_size_t bufsize, int *amt) + { +- return dir_readdir (dir->nn->file, data, datacnt, ++ return dir_readdir (netfs_node_netnode (dir)->file, data, datacnt, + entry, nentries, bufsize, amt); + } + +@@ -762,7 +766,7 @@ netfs_file_get_storage_info (struct iouser *cred, + mach_msg_type_number_t *data_len) + { + *ports_type = MACH_MSG_TYPE_MOVE_SEND; +- return file_get_storage_info (np->nn->file, ++ return file_get_storage_info (netfs_node_netnode (np)->file, + ports, num_ports, + ints, num_ints, + offsets, num_offsets, +@@ -795,8 +799,9 @@ netfs_S_file_exec (struct protid *user, + return EOPNOTSUPP; + + pthread_mutex_lock (&user->po->np->lock); +- err = check_openmodes (user->po->np->nn, O_EXEC, MACH_PORT_NULL); +- file = user->po->np->nn->file; ++ err = check_openmodes (netfs_node_netnode (user->po->np), ++ O_EXEC, MACH_PORT_NULL); ++ file = netfs_node_netnode (user->po->np)->file; + if (!err) + err = mach_port_mod_refs (mach_task_self (), + file, MACH_PORT_RIGHT_SEND, 1); +@@ -806,7 +811,8 @@ netfs_S_file_exec (struct protid *user, + { + /* We cannot use MACH_MSG_TYPE_MOVE_SEND because we might need to + retry an interrupted call that would have consumed the rights. */ +- err = file_exec (user->po->np->nn->file, task, flags, argv, argvlen, ++ err = file_exec (netfs_node_netnode (user->po->np)->file, ++ task, flags, argv, argvlen, + envp, envplen, fds, MACH_MSG_TYPE_COPY_SEND, fdslen, + portarray, MACH_MSG_TYPE_COPY_SEND, portarraylen, + intarray, intarraylen, deallocnames, deallocnameslen, +@@ -838,7 +844,7 @@ netfs_S_io_map (struct protid *user, + *rdobjtype = *wrobjtype = MACH_MSG_TYPE_MOVE_SEND; + + pthread_mutex_lock (&user->po->np->lock); +- err = io_map (user->po->np->nn->file, rdobj, wrobj); ++ err = io_map (netfs_node_netnode (user->po->np)->file, rdobj, wrobj); + pthread_mutex_unlock (&user->po->np->lock); + return err; + } +@@ -855,7 +861,7 @@ netfs_S_io_map_cntl (struct protid *user, + *objtype = MACH_MSG_TYPE_MOVE_SEND; + + pthread_mutex_lock (&user->po->np->lock); +- err = io_map_cntl (user->po->np->nn->file, obj); ++ err = io_map_cntl (netfs_node_netnode (user->po->np)->file, obj); + pthread_mutex_unlock (&user->po->np->lock); + return err; + } +@@ -876,7 +882,8 @@ netfs_S_io_identity (struct protid *user, + *idtype = *fsystype = MACH_MSG_TYPE_MOVE_SEND; + + pthread_mutex_lock (&user->po->np->lock); +- err = io_identity (user->po->np->nn->file, id, fsys, fileno); ++ err = io_identity (netfs_node_netnode (user->po->np)->file, ++ id, fsys, fileno); + pthread_mutex_unlock (&user->po->np->lock); + return err; + } +@@ -891,7 +898,7 @@ netfs_S_##name (struct protid *user) \ + return EOPNOTSUPP; \ + \ + pthread_mutex_lock (&user->po->np->lock); \ +- err = name (user->po->np->nn->file); \ ++ err = name (netfs_node_netnode (user->po->np)->file); \ + pthread_mutex_unlock (&user->po->np->lock); \ + return err; \ + } +@@ -913,7 +920,7 @@ netfs_S_io_prenotify (struct protid *user, + return EOPNOTSUPP; + + pthread_mutex_lock (&user->po->np->lock); +- err = io_prenotify (user->po->np->nn->file, start, stop); ++ err = io_prenotify (netfs_node_netnode (user->po->np)->file, start, stop); + pthread_mutex_unlock (&user->po->np->lock); + return err; + } +@@ -928,7 +935,7 @@ netfs_S_io_postnotify (struct protid *user, + return EOPNOTSUPP; + + pthread_mutex_lock (&user->po->np->lock); +- err = io_postnotify (user->po->np->nn->file, start, stop); ++ err = io_postnotify (netfs_node_netnode (user->po->np)->file, start, stop); + pthread_mutex_unlock (&user->po->np->lock); + return err; + } +@@ -971,7 +978,7 @@ netfs_demuxer (mach_msg_header_t *inp, + | MACH_MSGH_BITS (MACH_MSG_TYPE_COPY_SEND, + MACH_MSGH_BITS_REMOTE (inp->msgh_bits)); + inp->msgh_local_port = inp->msgh_remote_port; /* reply port */ +- inp->msgh_remote_port = cred->po->np->nn->file; ++ inp->msgh_remote_port = netfs_node_netnode (cred->po->np)->file; + err = mach_msg (inp, MACH_SEND_MSG, inp->msgh_size, 0, + MACH_PORT_NULL, MACH_MSG_TIMEOUT_NONE, + MACH_PORT_NULL); +-- +2.0.0.rc2 + diff --git a/debian/patches/0005-exec-add-missing-includes.patch b/debian/patches/0005-exec-add-missing-includes.patch new file mode 100644 index 00000000..42d69201 --- /dev/null +++ b/debian/patches/0005-exec-add-missing-includes.patch @@ -0,0 +1,39 @@ +From 10e79d1c7e5ac4e601fcb39c568b996234f0d7a1 Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Tue, 20 May 2014 16:02:59 +0200 +Subject: [PATCH 05/27] exec: add missing includes + +* exec/exec.c: Include mach/gnumach.h. +* exec/main.c: Include device/device.h. +--- + exec/exec.c | 1 + + exec/main.c | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/exec/exec.c b/exec/exec.c +index b068f5e..2fc1e44 100644 +--- a/exec/exec.c ++++ b/exec/exec.c +@@ -24,6 +24,7 @@ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + + + #include "priv.h" ++#include <mach/gnumach.h> + #include <hurd.h> + #include <hurd/exec.h> + #include <sys/stat.h> +diff --git a/exec/main.c b/exec/main.c +index 27f33b1..78faebd 100644 +--- a/exec/main.c ++++ b/exec/main.c +@@ -21,6 +21,7 @@ + + #include "priv.h" + #include <error.h> ++#include <device/device.h> + #include <hurd/paths.h> + #include <hurd/startup.h> + #include <argp.h> +-- +2.0.0.rc2 + diff --git a/debian/patches/0006-term-fix-memory-leak.patch b/debian/patches/0006-term-fix-memory-leak.patch new file mode 100644 index 00000000..ce4f74ee --- /dev/null +++ b/debian/patches/0006-term-fix-memory-leak.patch @@ -0,0 +1,30 @@ +From 04e1f62116a793fdc68ed94708ff0e19dd606b84 Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Tue, 20 May 2014 16:17:17 +0200 +Subject: [PATCH 06/27] term: fix memory leak + +I hope someone fixed that bug. + +* term/users.c (pi_destroy_hook): Fix memory leak. +--- + term/users.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/term/users.c b/term/users.c +index 97bc22c..9bd51d0 100644 +--- a/term/users.c ++++ b/term/users.c +@@ -259,9 +259,7 @@ pi_destroy_hook (struct trivfs_protid *cred) + { + assert (((struct protid_hook *)cred->hook)->refcnt > 0); + if (--((struct protid_hook *)cred->hook)->refcnt == 0) +- /* XXX don't free for now, so we can try and catch a multiple-freeing +- bug. */ +- /* free (cred->hook) */; ++ free (cred->hook); + } + pthread_mutex_unlock (&global_lock); + } +-- +2.0.0.rc2 + diff --git a/debian/patches/0007-libdiskfs-fix-type-of-dir_cache_id-node_cache_id.patch b/debian/patches/0007-libdiskfs-fix-type-of-dir_cache_id-node_cache_id.patch new file mode 100644 index 00000000..624cb6d4 --- /dev/null +++ b/debian/patches/0007-libdiskfs-fix-type-of-dir_cache_id-node_cache_id.patch @@ -0,0 +1,27 @@ +From b14a5c98c9a8dd1193daeef5af60fe4ea1851d4f Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Wed, 21 May 2014 13:20:29 +0200 +Subject: [PATCH 07/27] libdiskfs: fix type of dir_cache_id, node_cache_id + +* libdiskfs/name-cache.c (struct lookup_cache): Fix type of +dir_cache_id, node_cache_id. +--- + libdiskfs/name-cache.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/libdiskfs/name-cache.c b/libdiskfs/name-cache.c +index 8424ffe..c113692 100644 +--- a/libdiskfs/name-cache.c ++++ b/libdiskfs/name-cache.c +@@ -37,7 +37,7 @@ struct lookup_cache + /* Used to indentify nodes to the fs dependent code. 0 for NODE_CACHE_ID + means a `negative' entry -- recording that there's definitely no node with + this name. */ +- int dir_cache_id, node_cache_id; ++ ino64_t dir_cache_id, node_cache_id; + + /* Name of the node NODE_CACHE_ID in the directory DIR_CACHE_ID. Entries + with names too long to fit in this buffer aren't cached at all. */ +-- +2.0.0.rc2 + diff --git a/debian/patches/0008-libstore-provide-function-declaration-until-availabl.patch b/debian/patches/0008-libstore-provide-function-declaration-until-availabl.patch new file mode 100644 index 00000000..1a73d7f2 --- /dev/null +++ b/debian/patches/0008-libstore-provide-function-declaration-until-availabl.patch @@ -0,0 +1,38 @@ +From 5f913e1d0d5edef2e4f9abf8ca078abf2a72ba23 Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Wed, 21 May 2014 13:30:24 +0200 +Subject: [PATCH 08/27] libstore: provide function declaration until available + upstream + +Until the Hurd specific header is available, provide a local +declaration of ped_device_new_from_store. + +* libstore/part.c (ped_device_new_from_store): New declaration. +--- + libstore/part.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/libstore/part.c b/libstore/part.c +index 56e904e..fb7f07e 100644 +--- a/libstore/part.c ++++ b/libstore/part.c +@@ -26,6 +26,16 @@ + + #include <parted/parted.h> + /*#include <parted/device_gnu.h>*/ ++ ++/* XXX Until the Hurd specific header is available, provide the ++ declaration of ped_device_new_from_store here. */ ++ ++/* Initialize a PedDevice using SOURCE. The SOURCE will NOT be destroyed; ++ the caller created it, it is the caller's responsilbility to free it ++ after it calls ped_device_destory. SOURCE is not registered in Parted's ++ list of devices. */ ++PedDevice* ped_device_new_from_store (struct store *source); ++ + #include <string.h> + #include <error.h> + +-- +2.0.0.rc2 + diff --git a/debian/patches/0009-Avoid-compiler-warning-about-empty-bodies.patch b/debian/patches/0009-Avoid-compiler-warning-about-empty-bodies.patch new file mode 100644 index 00000000..322e05f8 --- /dev/null +++ b/debian/patches/0009-Avoid-compiler-warning-about-empty-bodies.patch @@ -0,0 +1,135 @@ +From 63e87438a543b11fa407dc6e705c1416b4c58efe Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Tue, 20 May 2014 16:07:44 +0200 +Subject: [PATCH 09/27] Avoid compiler warning about empty bodies + +Make empty bodies of control flow statements more explicit. Doing so +will allow us to use stricter compiler settings. This would have +cought 4ece292c. + +* console-client/xkb/xkb.c: Make empty bodies more explicit +* libpipe/pipe.c: Likewise. +* mach-defpager/default_pager.c: Likewise. +* pfinet/linux-src/include/net/addrconf.h: Likewise. +* pfinet/linux-src/net/ipv4/fib_hash.c: Likewise. +* pflocal/connq.c: Likewise. +* pflocal/socket.c: Likewise. +--- + console-client/xkb/xkb.c | 3 ++- + libpipe/pipe.c | 4 +++- + mach-defpager/default_pager.c | 4 ++-- + pfinet/linux-src/include/net/addrconf.h | 2 ++ + pfinet/linux-src/net/ipv4/fib_hash.c | 2 +- + pflocal/connq.c | 2 +- + pflocal/socket.c | 4 +++- + 7 files changed, 14 insertions(+), 7 deletions(-) + +diff --git a/console-client/xkb/xkb.c b/console-client/xkb/xkb.c +index 0b43913..220701b 100644 +--- a/console-client/xkb/xkb.c ++++ b/console-client/xkb/xkb.c +@@ -606,10 +606,11 @@ clearcontrols (keypress_t key, boolctrls ctrls, int flags) + static void + lockcontrols (keypress_t key, boolctrls ctrls, int flags) + { ++ /* XXX this needs a closer look. */ + if (!key.rel) + { + //setcontrols (key, boolctrls, flags); +- if (!(flags & noLock)); ++ //if (!(flags & noLock)); + // lboolctrls |= boolctrls; + } + else +diff --git a/libpipe/pipe.c b/libpipe/pipe.c +index 1080b5d..f9300e7 100644 +--- a/libpipe/pipe.c ++++ b/libpipe/pipe.c +@@ -352,7 +352,9 @@ pipe_send (struct pipe *pipe, int noblock, void *source, + if (!err) + err = packet_set_ports (control_packet, ports, num_ports); + if (err) +- /* Trash CONTROL_PACKET somehow XXX */; ++ { ++ /* Trash CONTROL_PACKET somehow XXX */ ++ } + } + } + +diff --git a/mach-defpager/default_pager.c b/mach-defpager/default_pager.c +index f514ea6..380c724 100644 +--- a/mach-defpager/default_pager.c ++++ b/mach-defpager/default_pager.c +@@ -88,13 +88,13 @@ synchronized_printf (const char *fmt, ...) + #if 0 + #define dprintf(f, x...) synchronized_printf (f, ##x) + #else +-#define dprintf(f, x...) ++#define dprintf(f, x...) (void) 0 + #endif + + #if 0 + #define ddprintf(f, x...) synchronized_printf (f, ##x) + #else +-#define ddprintf(f, x...) ++#define ddprintf(f, x...) (void) 0 + #endif + + /* +diff --git a/pfinet/linux-src/include/net/addrconf.h b/pfinet/linux-src/include/net/addrconf.h +index d711d0d..4b27077 100644 +--- a/pfinet/linux-src/include/net/addrconf.h ++++ b/pfinet/linux-src/include/net/addrconf.h +@@ -1,6 +1,8 @@ + #ifndef _ADDRCONF_H + #define _ADDRCONF_H + ++#include "ipv6.h" ++ + #define RETRANS_TIMER HZ + + #define MAX_RTR_SOLICITATIONS 3 +diff --git a/pfinet/linux-src/net/ipv4/fib_hash.c b/pfinet/linux-src/net/ipv4/fib_hash.c +index d9e029c..e3987ea 100644 +--- a/pfinet/linux-src/net/ipv4/fib_hash.c ++++ b/pfinet/linux-src/net/ipv4/fib_hash.c +@@ -406,7 +406,7 @@ static void rtmsg_fib(int, struct fib_node*, int, int, + struct nlmsghdr *n, + struct netlink_skb_parms *); + #else +-#define rtmsg_fib(a, b, c, d, e, f) ++#define rtmsg_fib(a, b, c, d, e, f) (void) 0 + #endif + + +diff --git a/pflocal/connq.c b/pflocal/connq.c +index d88711e..d86f9a2 100644 +--- a/pflocal/connq.c ++++ b/pflocal/connq.c +@@ -212,7 +212,7 @@ connq_listen (struct connq *cq, struct timespec *tsp, struct sock **sock) + request and another listener (should) eventually come along. + (In fact it is very probably as the caller has likely done a + select and will now follow up with an accept.) */ +- ; ++ { /* Do nothing. */ } + + out: + pthread_mutex_unlock (&cq->lock); +diff --git a/pflocal/socket.c b/pflocal/socket.c +index 7142c6e..792c637 100644 +--- a/pflocal/socket.c ++++ b/pflocal/socket.c +@@ -198,7 +198,9 @@ S_socket_accept (struct sock_user *user, + ports_port_deref (peer_addr); + } + else +- /* TEAR DOWN THE CONNECTION XXX */; ++ { ++ /* TEAR DOWN THE CONNECTION XXX */ ++ } + } + } + +-- +2.0.0.rc2 + diff --git a/debian/patches/0010-librbtree-add-a-red-black-tree-implementation.patch b/debian/patches/0010-librbtree-add-a-red-black-tree-implementation.patch new file mode 100644 index 00000000..b34df7bb --- /dev/null +++ b/debian/patches/0010-librbtree-add-a-red-black-tree-implementation.patch @@ -0,0 +1,1181 @@ +From 07e6865e754ab651465651c5d9bff7d5c504b447 Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Fri, 16 May 2014 20:21:48 +0200 +Subject: [PATCH 10/27] librbtree: add a red-black tree implementation + +Red-black trees offer search, insert, and delete operations with worst +case runtime in O(log n). They are also very space efficient. + +This is an verbatim copy from git://git.sceen.net/rbraun/librbraun.git +as of 45fef4ad. Only macros.h has been slightly amended. This file +defines two macros, which are also defined in cthreads.h. Undefine +the macros first to work around this problem. + +* librbtree/Makefile: New file. +* librbtree/rbtree.c: Likewise. +* librbtree/rbtree.h: Likewise. +* librbtree/rbtree_i.h: Likewise. +* librbtree/macros.h: Likewise. Also, undefine MACRO_{BEGIN,END}. +* Makefile (lib-subdirs): Add librbtree. +--- + Makefile | 3 + + librbtree/Makefile | 27 +++ + librbtree/macros.h | 70 ++++++++ + librbtree/rbtree.c | 498 +++++++++++++++++++++++++++++++++++++++++++++++++++ + librbtree/rbtree.h | 310 ++++++++++++++++++++++++++++++++ + librbtree/rbtree_i.h | 196 ++++++++++++++++++++ + 6 files changed, 1104 insertions(+) + create mode 100644 librbtree/Makefile + create mode 100644 librbtree/macros.h + create mode 100644 librbtree/rbtree.c + create mode 100644 librbtree/rbtree.h + create mode 100644 librbtree/rbtree_i.h + +diff --git a/Makefile b/Makefile +index 106f2f6..8a0be83 100644 +--- a/Makefile ++++ b/Makefile +@@ -46,6 +46,9 @@ endif + # Other directories + other-subdirs = hurd doc config release include + ++# XXX ++lib-subdirs += librbtree ++ + # All the subdirectories together + subdirs = $(lib-subdirs) $(prog-subdirs) $(other-subdirs) + +diff --git a/librbtree/Makefile b/librbtree/Makefile +new file mode 100644 +index 0000000..0f33358 +--- /dev/null ++++ b/librbtree/Makefile +@@ -0,0 +1,27 @@ ++# Copyright (C) 2014 Free Software Foundation, Inc. ++# ++# This file is part of the GNU Hurd. ++# ++# The GNU Hurd is free software; you can redistribute it and/or ++# modify it under the terms of the GNU General Public License as ++# published by the Free Software Foundation; either version 2, or (at ++# your option) any later version. ++# ++# The GNU Hurd is distributed in the hope that it will be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with the GNU Hurd. If not, see <http://www.gnu.org/licenses/>. ++ ++dir := librbtree ++makemode := library ++ ++libname := librbtree ++SRCS = rbtree.c ++installhdrs = rbtree.h rbtree_i.h macros.h ++ ++OBJS = $(SRCS:.c=.o) ++ ++include ../Makeconf +diff --git a/librbtree/macros.h b/librbtree/macros.h +new file mode 100644 +index 0000000..f0960a3 +--- /dev/null ++++ b/librbtree/macros.h +@@ -0,0 +1,70 @@ ++/* ++ * Copyright (c) 2009, 2010 Richard Braun. ++ * All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR ++ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES ++ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. ++ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, ++ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT ++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF ++ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ * ++ * ++ * Helper macros. ++ */ ++ ++#ifndef _MACROS_H ++#define _MACROS_H ++ ++#include <stddef.h> ++ ++#undef MACRO_BEGIN ++#undef MACRO_END ++#define MACRO_BEGIN ({ ++#define MACRO_END }) ++ ++#define XQUOTE(x) #x ++#define QUOTE(x) XQUOTE(x) ++ ++#define STRLEN(x) (sizeof(x) - 1) ++#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) ++ ++#define MIN(a, b) ((a) < (b) ? (a) : (b)) ++#define MAX(a, b) ((a) > (b) ? (a) : (b)) ++ ++#define P2ALIGNED(x, a) (((x) & ((a) - 1)) == 0) ++#define ISP2(x) P2ALIGNED(x, x) ++#define P2ALIGN(x, a) ((x) & -(a)) ++#define P2ROUND(x, a) (-(-(x) & -(a))) ++#define P2END(x, a) (-(~(x) & -(a))) ++ ++#define structof(ptr, type, member) \ ++ ((type *)((char *)(ptr) - offsetof(type, member))) ++ ++#define alignof(x) __alignof__(x) ++ ++#define likely(expr) __builtin_expect(!!(expr), 1) ++#define unlikely(expr) __builtin_expect(!!(expr), 0) ++ ++#define barrier() asm volatile("" : : : "memory") ++ ++#define __noreturn __attribute__((noreturn)) ++#define __aligned(x) __attribute__((aligned(x))) ++ ++#define __format_printf(fmt, args) \ ++ __attribute__((format(printf, fmt, args))) ++ ++#endif /* _MACROS_H */ +diff --git a/librbtree/rbtree.c b/librbtree/rbtree.c +new file mode 100644 +index 0000000..969a18d +--- /dev/null ++++ b/librbtree/rbtree.c +@@ -0,0 +1,498 @@ ++/* ++ * Copyright (c) 2010, 2012 Richard Braun. ++ * All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR ++ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES ++ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. ++ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, ++ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT ++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF ++ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++ ++#include <assert.h> ++#include <stddef.h> ++ ++#include "macros.h" ++#include "rbtree.h" ++#include "rbtree_i.h" ++ ++/* ++ * Return the index of a node in the children array of its parent. ++ * ++ * The parent parameter must not be NULL, and must be the parent of the ++ * given node. ++ */ ++static inline int ++rbtree_node_index(const struct rbtree_node *node, ++ const struct rbtree_node *parent) ++{ ++ assert(parent != NULL); ++ assert((node == NULL) || (rbtree_node_parent(node) == parent)); ++ ++ if (parent->children[RBTREE_LEFT] == node) ++ return RBTREE_LEFT; ++ ++ assert(parent->children[RBTREE_RIGHT] == node); ++ ++ return RBTREE_RIGHT; ++} ++ ++/* ++ * Return the color of a node. ++ */ ++static inline int ++rbtree_node_color(const struct rbtree_node *node) ++{ ++ return node->parent & RBTREE_COLOR_MASK; ++} ++ ++/* ++ * Return true if the node is red. ++ */ ++static inline int ++rbtree_node_is_red(const struct rbtree_node *node) ++{ ++ return rbtree_node_color(node) == RBTREE_COLOR_RED; ++} ++ ++/* ++ * Return true if the node is black. ++ */ ++static inline int ++rbtree_node_is_black(const struct rbtree_node *node) ++{ ++ return rbtree_node_color(node) == RBTREE_COLOR_BLACK; ++} ++ ++/* ++ * Set the parent of a node, retaining its current color. ++ */ ++static inline void ++rbtree_node_set_parent(struct rbtree_node *node, struct rbtree_node *parent) ++{ ++ assert(rbtree_node_check_alignment(node)); ++ assert(rbtree_node_check_alignment(parent)); ++ ++ node->parent = (unsigned long)parent | (node->parent & RBTREE_COLOR_MASK); ++} ++ ++/* ++ * Set the color of a node, retaining its current parent. ++ */ ++static inline void ++rbtree_node_set_color(struct rbtree_node *node, int color) ++{ ++ assert((color & ~RBTREE_COLOR_MASK) == 0); ++ node->parent = (node->parent & RBTREE_PARENT_MASK) | color; ++} ++ ++/* ++ * Set the color of a node to red, retaining its current parent. ++ */ ++static inline void ++rbtree_node_set_red(struct rbtree_node *node) ++{ ++ rbtree_node_set_color(node, RBTREE_COLOR_RED); ++} ++ ++/* ++ * Set the color of a node to black, retaining its current parent. ++ */ ++static inline void ++rbtree_node_set_black(struct rbtree_node *node) ++{ ++ rbtree_node_set_color(node, RBTREE_COLOR_BLACK); ++} ++ ++/* ++ * Return the left-most deepest child node of the given node. ++ */ ++static struct rbtree_node * ++rbtree_node_find_deepest(struct rbtree_node *node) ++{ ++ struct rbtree_node *parent; ++ ++ assert(node != NULL); ++ ++ for (;;) { ++ parent = node; ++ node = node->children[RBTREE_LEFT]; ++ ++ if (node == NULL) { ++ node = parent->children[RBTREE_RIGHT]; ++ ++ if (node == NULL) ++ return parent; ++ } ++ } ++} ++ ++/* ++ * Perform a tree rotation, rooted at the given node. ++ * ++ * The direction parameter defines the rotation direction and is either ++ * RBTREE_LEFT or RBTREE_RIGHT. ++ */ ++static void ++rbtree_rotate(struct rbtree *tree, struct rbtree_node *node, int direction) ++{ ++ struct rbtree_node *parent, *rnode; ++ int left, right; ++ ++ left = direction; ++ right = 1 - left; ++ parent = rbtree_node_parent(node); ++ rnode = node->children[right]; ++ ++ node->children[right] = rnode->children[left]; ++ ++ if (rnode->children[left] != NULL) ++ rbtree_node_set_parent(rnode->children[left], node); ++ ++ rnode->children[left] = node; ++ rbtree_node_set_parent(rnode, parent); ++ ++ if (unlikely(parent == NULL)) ++ tree->root = rnode; ++ else ++ parent->children[rbtree_node_index(node, parent)] = rnode; ++ ++ rbtree_node_set_parent(node, rnode); ++} ++ ++void ++rbtree_insert_rebalance(struct rbtree *tree, struct rbtree_node *parent, ++ int index, struct rbtree_node *node) ++{ ++ struct rbtree_node *grand_parent, *uncle, *tmp; ++ int left, right; ++ ++ assert(rbtree_node_check_alignment(parent)); ++ assert(rbtree_node_check_alignment(node)); ++ ++ node->parent = (unsigned long)parent | RBTREE_COLOR_RED; ++ node->children[RBTREE_LEFT] = NULL; ++ node->children[RBTREE_RIGHT] = NULL; ++ ++ if (unlikely(parent == NULL)) ++ tree->root = node; ++ else ++ parent->children[index] = node; ++ ++ for (;;) { ++ if (parent == NULL) { ++ rbtree_node_set_black(node); ++ break; ++ } ++ ++ if (rbtree_node_is_black(parent)) ++ break; ++ ++ grand_parent = rbtree_node_parent(parent); ++ assert(grand_parent != NULL); ++ ++ left = rbtree_node_index(parent, grand_parent); ++ right = 1 - left; ++ ++ uncle = grand_parent->children[right]; ++ ++ /* ++ * Uncle is red. Flip colors and repeat at grand parent. ++ */ ++ if ((uncle != NULL) && rbtree_node_is_red(uncle)) { ++ rbtree_node_set_black(uncle); ++ rbtree_node_set_black(parent); ++ rbtree_node_set_red(grand_parent); ++ node = grand_parent; ++ parent = rbtree_node_parent(node); ++ continue; ++ } ++ ++ /* ++ * Node is the right child of its parent. Rotate left at parent. ++ */ ++ if (parent->children[right] == node) { ++ rbtree_rotate(tree, parent, left); ++ tmp = node; ++ node = parent; ++ parent = tmp; ++ } ++ ++ /* ++ * Node is the left child of its parent. Handle colors, rotate right ++ * at grand parent, and leave. ++ */ ++ rbtree_node_set_black(parent); ++ rbtree_node_set_red(grand_parent); ++ rbtree_rotate(tree, grand_parent, right); ++ break; ++ } ++ ++ assert(rbtree_node_is_black(tree->root)); ++} ++ ++void ++rbtree_remove(struct rbtree *tree, struct rbtree_node *node) ++{ ++ struct rbtree_node *child, *parent, *brother; ++ int color, left, right; ++ ++ if (node->children[RBTREE_LEFT] == NULL) ++ child = node->children[RBTREE_RIGHT]; ++ else if (node->children[RBTREE_RIGHT] == NULL) ++ child = node->children[RBTREE_LEFT]; ++ else { ++ struct rbtree_node *successor; ++ ++ /* ++ * Two-children case: replace the node with its successor. ++ */ ++ ++ successor = node->children[RBTREE_RIGHT]; ++ ++ while (successor->children[RBTREE_LEFT] != NULL) ++ successor = successor->children[RBTREE_LEFT]; ++ ++ color = rbtree_node_color(successor); ++ child = successor->children[RBTREE_RIGHT]; ++ parent = rbtree_node_parent(node); ++ ++ if (unlikely(parent == NULL)) ++ tree->root = successor; ++ else ++ parent->children[rbtree_node_index(node, parent)] = successor; ++ ++ parent = rbtree_node_parent(successor); ++ ++ /* ++ * Set parent directly to keep the original color. ++ */ ++ successor->parent = node->parent; ++ successor->children[RBTREE_LEFT] = node->children[RBTREE_LEFT]; ++ rbtree_node_set_parent(successor->children[RBTREE_LEFT], successor); ++ ++ if (node == parent) ++ parent = successor; ++ else { ++ successor->children[RBTREE_RIGHT] = node->children[RBTREE_RIGHT]; ++ rbtree_node_set_parent(successor->children[RBTREE_RIGHT], ++ successor); ++ parent->children[RBTREE_LEFT] = child; ++ ++ if (child != NULL) ++ rbtree_node_set_parent(child, parent); ++ } ++ ++ goto update_color; ++ } ++ ++ /* ++ * Node has at most one child. ++ */ ++ ++ color = rbtree_node_color(node); ++ parent = rbtree_node_parent(node); ++ ++ if (child != NULL) ++ rbtree_node_set_parent(child, parent); ++ ++ if (unlikely(parent == NULL)) ++ tree->root = child; ++ else ++ parent->children[rbtree_node_index(node, parent)] = child; ++ ++ /* ++ * The node has been removed, update the colors. The child pointer can ++ * be NULL, in which case it is considered a black leaf. ++ */ ++update_color: ++ if (color == RBTREE_COLOR_RED) ++ return; ++ ++ for (;;) { ++ if ((child != NULL) && rbtree_node_is_red(child)) { ++ rbtree_node_set_black(child); ++ break; ++ } ++ ++ if (parent == NULL) ++ break; ++ ++ left = rbtree_node_index(child, parent); ++ right = 1 - left; ++ ++ brother = parent->children[right]; ++ ++ /* ++ * Brother is red. Recolor and rotate left at parent so that brother ++ * becomes black. ++ */ ++ if (rbtree_node_is_red(brother)) { ++ rbtree_node_set_black(brother); ++ rbtree_node_set_red(parent); ++ rbtree_rotate(tree, parent, left); ++ brother = parent->children[right]; ++ } ++ ++ /* ++ * Brother has no red child. Recolor and repeat at parent. ++ */ ++ if (((brother->children[RBTREE_LEFT] == NULL) ++ || rbtree_node_is_black(brother->children[RBTREE_LEFT])) ++ && ((brother->children[RBTREE_RIGHT] == NULL) ++ || rbtree_node_is_black(brother->children[RBTREE_RIGHT]))) { ++ rbtree_node_set_red(brother); ++ child = parent; ++ parent = rbtree_node_parent(child); ++ continue; ++ } ++ ++ /* ++ * Brother's right child is black. Recolor and rotate right at brother. ++ */ ++ if ((brother->children[right] == NULL) ++ || rbtree_node_is_black(brother->children[right])) { ++ rbtree_node_set_black(brother->children[left]); ++ rbtree_node_set_red(brother); ++ rbtree_rotate(tree, brother, right); ++ brother = parent->children[right]; ++ } ++ ++ /* ++ * Brother's left child is black. Exchange parent and brother colors ++ * (we already know brother is black), set brother's right child black, ++ * rotate left at parent and leave. ++ */ ++ rbtree_node_set_color(brother, rbtree_node_color(parent)); ++ rbtree_node_set_black(parent); ++ rbtree_node_set_black(brother->children[right]); ++ rbtree_rotate(tree, parent, left); ++ break; ++ } ++ ++ assert((tree->root == NULL) || rbtree_node_is_black(tree->root)); ++} ++ ++struct rbtree_node * ++rbtree_nearest(struct rbtree_node *parent, int index, int direction) ++{ ++ assert(rbtree_check_index(direction)); ++ ++ if (parent == NULL) ++ return NULL; ++ ++ assert(rbtree_check_index(index)); ++ ++ if (index != direction) ++ return parent; ++ ++ return rbtree_walk(parent, direction); ++} ++ ++struct rbtree_node * ++rbtree_firstlast(const struct rbtree *tree, int direction) ++{ ++ struct rbtree_node *prev, *cur; ++ ++ assert(rbtree_check_index(direction)); ++ ++ prev = NULL; ++ ++ for (cur = tree->root; cur != NULL; cur = cur->children[direction]) ++ prev = cur; ++ ++ return prev; ++} ++ ++struct rbtree_node * ++rbtree_walk(struct rbtree_node *node, int direction) ++{ ++ int left, right; ++ ++ assert(rbtree_check_index(direction)); ++ ++ left = direction; ++ right = 1 - left; ++ ++ if (node == NULL) ++ return NULL; ++ ++ if (node->children[left] != NULL) { ++ node = node->children[left]; ++ ++ while (node->children[right] != NULL) ++ node = node->children[right]; ++ } else { ++ struct rbtree_node *parent; ++ int index; ++ ++ for (;;) { ++ parent = rbtree_node_parent(node); ++ ++ if (parent == NULL) ++ return NULL; ++ ++ index = rbtree_node_index(node, parent); ++ node = parent; ++ ++ if (index == right) ++ break; ++ } ++ } ++ ++ return node; ++} ++ ++struct rbtree_node * ++rbtree_postwalk_deepest(const struct rbtree *tree) ++{ ++ struct rbtree_node *node; ++ ++ node = tree->root; ++ ++ if (node == NULL) ++ return NULL; ++ ++ return rbtree_node_find_deepest(node); ++} ++ ++struct rbtree_node * ++rbtree_postwalk_unlink(struct rbtree_node *node) ++{ ++ struct rbtree_node *parent; ++ int index; ++ ++ if (node == NULL) ++ return NULL; ++ ++ assert(node->children[RBTREE_LEFT] == NULL); ++ assert(node->children[RBTREE_RIGHT] == NULL); ++ ++ parent = rbtree_node_parent(node); ++ ++ if (parent == NULL) ++ return NULL; ++ ++ index = rbtree_node_index(node, parent); ++ parent->children[index] = NULL; ++ node = parent->children[RBTREE_RIGHT]; ++ ++ if (node == NULL) ++ return parent; ++ ++ return rbtree_node_find_deepest(node); ++} +diff --git a/librbtree/rbtree.h b/librbtree/rbtree.h +new file mode 100644 +index 0000000..cbb519e +--- /dev/null ++++ b/librbtree/rbtree.h +@@ -0,0 +1,310 @@ ++/* ++ * Copyright (c) 2010, 2011, 2012 Richard Braun. ++ * All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR ++ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES ++ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. ++ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, ++ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT ++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF ++ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ * ++ * ++ * Red-black tree. ++ */ ++ ++#ifndef _RBTREE_H ++#define _RBTREE_H ++ ++#include <assert.h> ++#include <stddef.h> ++ ++#include "macros.h" ++ ++/* ++ * Indexes of the left and right nodes in the children array of a node. ++ */ ++#define RBTREE_LEFT 0 ++#define RBTREE_RIGHT 1 ++ ++/* ++ * Red-black node. ++ */ ++struct rbtree_node; ++ ++/* ++ * Red-black tree. ++ */ ++struct rbtree; ++ ++/* ++ * Static tree initializer. ++ */ ++#define RBTREE_INITIALIZER { NULL } ++ ++#include "rbtree_i.h" ++ ++/* ++ * Initialize a tree. ++ */ ++static inline void ++rbtree_init(struct rbtree *tree) ++{ ++ tree->root = NULL; ++} ++ ++/* ++ * Initialize a node. ++ * ++ * A node is in no tree when its parent points to itself. ++ */ ++static inline void ++rbtree_node_init(struct rbtree_node *node) ++{ ++ assert(rbtree_node_check_alignment(node)); ++ ++ node->parent = (unsigned long)node | RBTREE_COLOR_RED; ++ node->children[RBTREE_LEFT] = NULL; ++ node->children[RBTREE_RIGHT] = NULL; ++} ++ ++/* ++ * Return true if node is in no tree. ++ */ ++static inline int ++rbtree_node_unlinked(const struct rbtree_node *node) ++{ ++ return rbtree_node_parent(node) == node; ++} ++ ++/* ++ * Macro that evaluates to the address of the structure containing the ++ * given node based on the given type and member. ++ */ ++#define rbtree_entry(node, type, member) structof(node, type, member) ++ ++/* ++ * Return true if tree is empty. ++ */ ++static inline int ++rbtree_empty(const struct rbtree *tree) ++{ ++ return tree->root == NULL; ++} ++ ++/* ++ * Look up a node in a tree. ++ * ++ * Note that implementing the lookup algorithm as a macro gives two benefits: ++ * First, it avoids the overhead of a callback function. Next, the type of the ++ * cmp_fn parameter isn't rigid. The only guarantee offered by this ++ * implementation is that the key parameter is the first parameter given to ++ * cmp_fn. This way, users can pass only the value they need for comparison ++ * instead of e.g. allocating a full structure on the stack. ++ * ++ * See rbtree_insert(). ++ */ ++#define rbtree_lookup(tree, key, cmp_fn) \ ++MACRO_BEGIN \ ++ struct rbtree_node *___cur; \ ++ int ___diff; \ ++ \ ++ ___cur = (tree)->root; \ ++ \ ++ while (___cur != NULL) { \ ++ ___diff = cmp_fn(key, ___cur); \ ++ \ ++ if (___diff == 0) \ ++ break; \ ++ \ ++ ___cur = ___cur->children[rbtree_d2i(___diff)]; \ ++ } \ ++ \ ++ ___cur; \ ++MACRO_END ++ ++/* ++ * Look up a node or one of its nearest nodes in a tree. ++ * ++ * This macro essentially acts as rbtree_lookup() but if no entry matched ++ * the key, an additional step is performed to obtain the next or previous ++ * node, depending on the direction (left or right). ++ * ++ * The constraints that apply to the key parameter are the same as for ++ * rbtree_lookup(). ++ */ ++#define rbtree_lookup_nearest(tree, key, cmp_fn, dir) \ ++MACRO_BEGIN \ ++ struct rbtree_node *___cur, *___prev; \ ++ int ___diff, ___index; \ ++ \ ++ ___prev = NULL; \ ++ ___index = -1; \ ++ ___cur = (tree)->root; \ ++ \ ++ while (___cur != NULL) { \ ++ ___diff = cmp_fn(key, ___cur); \ ++ \ ++ if (___diff == 0) \ ++ break; \ ++ \ ++ ___prev = ___cur; \ ++ ___index = rbtree_d2i(___diff); \ ++ ___cur = ___cur->children[___index]; \ ++ } \ ++ \ ++ if (___cur == NULL) \ ++ ___cur = rbtree_nearest(___prev, ___index, dir); \ ++ \ ++ ___cur; \ ++MACRO_END ++ ++/* ++ * Insert a node in a tree. ++ * ++ * This macro performs a standard lookup to obtain the insertion point of ++ * the given node in the tree (it is assumed that the inserted node never ++ * compares equal to any other entry in the tree) and links the node. It ++ * then checks red-black rules violations, and rebalances the tree if ++ * necessary. ++ * ++ * Unlike rbtree_lookup(), the cmp_fn parameter must compare two complete ++ * entries, so it is suggested to use two different comparison inline ++ * functions, such as myobj_cmp_lookup() and myobj_cmp_insert(). There is no ++ * guarantee about the order of the nodes given to the comparison function. ++ * ++ * See rbtree_lookup(). ++ */ ++#define rbtree_insert(tree, node, cmp_fn) \ ++MACRO_BEGIN \ ++ struct rbtree_node *___cur, *___prev; \ ++ int ___diff, ___index; \ ++ \ ++ ___prev = NULL; \ ++ ___index = -1; \ ++ ___cur = (tree)->root; \ ++ \ ++ while (___cur != NULL) { \ ++ ___diff = cmp_fn(node, ___cur); \ ++ assert(___diff != 0); \ ++ ___prev = ___cur; \ ++ ___index = rbtree_d2i(___diff); \ ++ ___cur = ___cur->children[___index]; \ ++ } \ ++ \ ++ rbtree_insert_rebalance(tree, ___prev, ___index, node); \ ++MACRO_END ++ ++/* ++ * Look up a node/slot pair in a tree. ++ * ++ * This macro essentially acts as rbtree_lookup() but in addition to a node, ++ * it also returns a slot, which identifies an insertion point in the tree. ++ * If the returned node is NULL, the slot can be used by rbtree_insert_slot() ++ * to insert without the overhead of an additional lookup. The slot is a ++ * simple unsigned long integer. ++ * ++ * The constraints that apply to the key parameter are the same as for ++ * rbtree_lookup(). ++ */ ++#define rbtree_lookup_slot(tree, key, cmp_fn, slot) \ ++MACRO_BEGIN \ ++ struct rbtree_node *___cur, *___prev; \ ++ int ___diff, ___index; \ ++ \ ++ ___prev = NULL; \ ++ ___index = 0; \ ++ ___cur = (tree)->root; \ ++ \ ++ while (___cur != NULL) { \ ++ ___diff = cmp_fn(key, ___cur); \ ++ \ ++ if (___diff == 0) \ ++ break; \ ++ \ ++ ___prev = ___cur; \ ++ ___index = rbtree_d2i(___diff); \ ++ ___cur = ___cur->children[___index]; \ ++ } \ ++ \ ++ (slot) = rbtree_slot(___prev, ___index); \ ++ ___cur; \ ++MACRO_END ++ ++/* ++ * Insert a node at an insertion point in a tree. ++ * ++ * This macro essentially acts as rbtree_insert() except that it doesn't ++ * obtain the insertion point with a standard lookup. The insertion point ++ * is obtained by calling rbtree_lookup_slot(). In addition, the new node ++ * must not compare equal to an existing node in the tree (i.e. the slot ++ * must denote a NULL node). ++ */ ++static inline void ++rbtree_insert_slot(struct rbtree *tree, unsigned long slot, ++ struct rbtree_node *node) ++{ ++ struct rbtree_node *parent; ++ int index; ++ ++ parent = rbtree_slot_parent(slot); ++ index = rbtree_slot_index(slot); ++ rbtree_insert_rebalance(tree, parent, index, node); ++} ++ ++/* ++ * Remove a node from a tree. ++ * ++ * After completion, the node is stale. ++ */ ++void rbtree_remove(struct rbtree *tree, struct rbtree_node *node); ++ ++/* ++ * Return the first node of a tree. ++ */ ++#define rbtree_first(tree) rbtree_firstlast(tree, RBTREE_LEFT) ++ ++/* ++ * Return the last node of a tree. ++ */ ++#define rbtree_last(tree) rbtree_firstlast(tree, RBTREE_RIGHT) ++ ++/* ++ * Return the node previous to the given node. ++ */ ++#define rbtree_prev(node) rbtree_walk(node, RBTREE_LEFT) ++ ++/* ++ * Return the node next to the given node. ++ */ ++#define rbtree_next(node) rbtree_walk(node, RBTREE_RIGHT) ++ ++/* ++ * Forge a loop to process all nodes of a tree, removing them when visited. ++ * ++ * This macro can only be used to destroy a tree, so that the resources used ++ * by the entries can be released by the user. It basically removes all nodes ++ * without doing any color checking. ++ * ++ * After completion, all nodes and the tree root member are stale. ++ */ ++#define rbtree_for_each_remove(tree, node, tmp) \ ++for (node = rbtree_postwalk_deepest(tree), \ ++ tmp = rbtree_postwalk_unlink(node); \ ++ node != NULL; \ ++ node = tmp, tmp = rbtree_postwalk_unlink(node)) \ ++ ++#endif /* _RBTREE_H */ +diff --git a/librbtree/rbtree_i.h b/librbtree/rbtree_i.h +new file mode 100644 +index 0000000..eb1619f +--- /dev/null ++++ b/librbtree/rbtree_i.h +@@ -0,0 +1,196 @@ ++/* ++ * Copyright (c) 2010, 2011, 2012 Richard Braun. ++ * All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR ++ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES ++ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. ++ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, ++ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT ++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF ++ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++ ++#ifndef _RBTREE_I_H ++#define _RBTREE_I_H ++ ++#include <assert.h> ++#include <stddef.h> ++ ++#include "macros.h" ++ ++/* ++ * Red-black node structure. ++ * ++ * To reduce the number of branches and the instruction cache footprint, ++ * the left and right child pointers are stored in an array, and the symmetry ++ * of most tree operations is exploited by using left/right variables when ++ * referring to children. ++ * ++ * In addition, this implementation assumes that all nodes are 4-byte aligned, ++ * so that the least significant bit of the parent member can be used to store ++ * the color of the node. This is true for all modern 32 and 64 bits ++ * architectures, as long as the nodes aren't embedded in structures with ++ * special alignment constraints such as member packing. ++ */ ++struct rbtree_node { ++ unsigned long parent; ++ struct rbtree_node *children[2]; ++}; ++ ++/* ++ * Red-black tree structure. ++ */ ++struct rbtree { ++ struct rbtree_node *root; ++}; ++ ++/* ++ * Masks applied on the parent member of a node to obtain either the ++ * color or the parent address. ++ */ ++#define RBTREE_COLOR_MASK 0x1UL ++#define RBTREE_PARENT_MASK (~0x3UL) ++ ++/* ++ * Node colors. ++ */ ++#define RBTREE_COLOR_RED 0 ++#define RBTREE_COLOR_BLACK 1 ++ ++/* ++ * Masks applied on slots to obtain either the child index or the parent ++ * address. ++ */ ++#define RBTREE_SLOT_INDEX_MASK 0x1UL ++#define RBTREE_SLOT_PARENT_MASK (~RBTREE_SLOT_INDEX_MASK) ++ ++/* ++ * Return true if the given index is a valid child index. ++ */ ++static inline int ++rbtree_check_index(int index) ++{ ++ return index == (index & 1); ++} ++ ++/* ++ * Convert the result of a comparison into an index in the children array ++ * (0 or 1). ++ * ++ * This function is mostly used when looking up a node. ++ */ ++static inline int ++rbtree_d2i(int diff) ++{ ++ return !(diff <= 0); ++} ++ ++/* ++ * Return true if the given pointer is suitably aligned. ++ */ ++static inline int ++rbtree_node_check_alignment(const struct rbtree_node *node) ++{ ++ return ((unsigned long)node & (~RBTREE_PARENT_MASK)) == 0; ++} ++ ++/* ++ * Return the parent of a node. ++ */ ++static inline struct rbtree_node * ++rbtree_node_parent(const struct rbtree_node *node) ++{ ++ return (struct rbtree_node *)(node->parent & RBTREE_PARENT_MASK); ++} ++ ++/* ++ * Translate an insertion point into a slot. ++ */ ++static inline unsigned long ++rbtree_slot(struct rbtree_node *parent, int index) ++{ ++ assert(rbtree_node_check_alignment(parent)); ++ assert(rbtree_check_index(index)); ++ return (unsigned long)parent | index; ++} ++ ++/* ++ * Extract the parent address from a slot. ++ */ ++static inline struct rbtree_node * ++rbtree_slot_parent(unsigned long slot) ++{ ++ return (struct rbtree_node *)(slot & RBTREE_SLOT_PARENT_MASK); ++} ++ ++/* ++ * Extract the index from a slot. ++ */ ++static inline int ++rbtree_slot_index(unsigned long slot) ++{ ++ return slot & RBTREE_SLOT_INDEX_MASK; ++} ++ ++/* ++ * Insert a node in a tree, rebalancing it if necessary. ++ * ++ * The index parameter is the index in the children array of the parent where ++ * the new node is to be inserted. It is ignored if the parent is NULL. ++ * ++ * This function is intended to be used by the rbtree_insert() macro only. ++ */ ++void rbtree_insert_rebalance(struct rbtree *tree, struct rbtree_node *parent, ++ int index, struct rbtree_node *node); ++ ++/* ++ * Return the previous or next node relative to a location in a tree. ++ * ++ * The parent and index parameters define the location, which can be empty. ++ * The direction parameter is either RBTREE_LEFT (to obtain the previous ++ * node) or RBTREE_RIGHT (to obtain the next one). ++ */ ++struct rbtree_node * rbtree_nearest(struct rbtree_node *parent, int index, ++ int direction); ++ ++/* ++ * Return the first or last node of a tree. ++ * ++ * The direction parameter is either RBTREE_LEFT (to obtain the first node) ++ * or RBTREE_RIGHT (to obtain the last one). ++ */ ++struct rbtree_node * rbtree_firstlast(const struct rbtree *tree, int direction); ++ ++/* ++ * Return the node next to, or previous to the given node. ++ * ++ * The direction parameter is either RBTREE_LEFT (to obtain the previous node) ++ * or RBTREE_RIGHT (to obtain the next one). ++ */ ++struct rbtree_node * rbtree_walk(struct rbtree_node *node, int direction); ++ ++/* ++ * Return the left-most deepest node of a tree, which is the starting point of ++ * the postorder traversal performed by rbtree_for_each_remove(). ++ */ ++struct rbtree_node * rbtree_postwalk_deepest(const struct rbtree *tree); ++ ++/* ++ * Unlink a node from its tree and return the next (right) node in postorder. ++ */ ++struct rbtree_node * rbtree_postwalk_unlink(struct rbtree_node *node); ++ ++#endif /* _RBTREE_I_H */ +-- +2.0.0.rc2 + diff --git a/debian/patches/0011-librdxtree-add-a-radix-tree-implementation.patch b/debian/patches/0011-librdxtree-add-a-radix-tree-implementation.patch new file mode 100644 index 00000000..5f784ae8 --- /dev/null +++ b/debian/patches/0011-librdxtree-add-a-radix-tree-implementation.patch @@ -0,0 +1,1439 @@ +From faf8b98c69b66785246651ed3852e98243b4db0c Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Mon, 19 May 2014 18:42:30 +0200 +Subject: [PATCH 11/27] librdxtree: add a radix tree implementation + +Radix trees offer search, insert, and delete operations in time linear +in the size of the key. They are also very cache efficient. + +This is an verbatim copy from git://git.sceen.net/rbraun/librbraun.git +as of 45fef4ad. Only macros.h has been slightly amended. This file +defines two macros, which are also defined in cthreads.h. Undefine +the macros first to work around this problem. + +* librdxtree/Makefile: New file. +* librdxtree/rdxtree.c: Likewise. +* librdxtree/rdxtree.h: Likewise. +* librdxtree/rdxtree_i.h: Likewise. +* librdxtree/error.c: Likewise. +* librdxtree/error.h: Likewise. +* librdxtree/macros.h: Likewise. Also, undefine MACRO_{BEGIN,END}. +* Makefile (lib-subdirs): Add librdxtree. +--- + Makefile | 1 + + librdxtree/Makefile | 27 ++ + librdxtree/error.c | 94 ++++++ + librdxtree/error.h | 84 +++++ + librdxtree/macros.h | 70 +++++ + librdxtree/rdxtree.c | 808 +++++++++++++++++++++++++++++++++++++++++++++++++ + librdxtree/rdxtree.h | 195 ++++++++++++ + librdxtree/rdxtree_i.h | 65 ++++ + 8 files changed, 1344 insertions(+) + create mode 100644 librdxtree/Makefile + create mode 100644 librdxtree/error.c + create mode 100644 librdxtree/error.h + create mode 100644 librdxtree/macros.h + create mode 100644 librdxtree/rdxtree.c + create mode 100644 librdxtree/rdxtree.h + create mode 100644 librdxtree/rdxtree_i.h + +diff --git a/Makefile b/Makefile +index 8a0be83..53dac72 100644 +--- a/Makefile ++++ b/Makefile +@@ -48,6 +48,7 @@ other-subdirs = hurd doc config release include + + # XXX + lib-subdirs += librbtree ++lib-subdirs += librdxtree + + # All the subdirectories together + subdirs = $(lib-subdirs) $(prog-subdirs) $(other-subdirs) +diff --git a/librdxtree/Makefile b/librdxtree/Makefile +new file mode 100644 +index 0000000..c9d3c9d +--- /dev/null ++++ b/librdxtree/Makefile +@@ -0,0 +1,27 @@ ++# Copyright (C) 2014 Free Software Foundation, Inc. ++# ++# This file is part of the GNU Hurd. ++# ++# The GNU Hurd is free software; you can redistribute it and/or ++# modify it under the terms of the GNU General Public License as ++# published by the Free Software Foundation; either version 2, or (at ++# your option) any later version. ++# ++# The GNU Hurd is distributed in the hope that it will be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with the GNU Hurd. If not, see <http://www.gnu.org/licenses/>. ++ ++dir := librdxtree ++makemode := library ++ ++libname := librdxtree ++SRCS = rdxtree.c ++installhdrs = rdxtree.h rdxtree_i.h macros.h ++ ++OBJS = $(SRCS:.c=.o) ++ ++include ../Makeconf +diff --git a/librdxtree/error.c b/librdxtree/error.c +new file mode 100644 +index 0000000..dcc574f +--- /dev/null ++++ b/librdxtree/error.c +@@ -0,0 +1,94 @@ ++/* ++ * Copyright (c) 2009, 2010 Richard Braun. ++ * All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR ++ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES ++ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. ++ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, ++ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT ++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF ++ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++ ++#include <errno.h> ++#include <stdio.h> ++#include <stdlib.h> ++ ++#include "error.h" ++#include "macros.h" ++ ++/* ++ * Error message table. ++ * ++ * This table must be consistent with the enum defined in error.h. ++ */ ++static const char *errormsg_table[] = { ++ "success", ++ "unknown error", ++ "invalid argument", ++ "not enough space", ++ "invalid format", ++ "not enough resources", ++ "operation not permitted", ++ "resource busy", ++ "memory limit exceeded", ++ "operation timed out", ++ "operation would block", ++ "entry not found", ++ "internal memory allocator failure" ++}; ++ ++#define ERRORMSG_TABLE_SIZE ARRAY_SIZE(errormsg_table) ++ ++const char * ++error_str(unsigned int error) ++{ ++ if (error >= ERRORMSG_TABLE_SIZE) ++ return "invalid error code"; ++ ++ return errormsg_table[error]; ++} ++ ++unsigned int ++error_from_errno(int errno_code) ++{ ++ switch (errno_code) { ++ case 0: ++ return ERR_SUCCESS; ++ case EINVAL: ++ return ERR_INVAL; ++ case ENOMEM: ++ return ERR_NOMEM; ++ case EAGAIN: ++ return ERR_NORES; ++ case EPERM: ++ return ERR_PERM; ++ case EBUSY: ++ return ERR_BUSY; ++ case ETIMEDOUT: ++ return ERR_TIMEDOUT; ++ default: ++ fprintf(stderr, "unable to translate errno code (%d)\n", errno_code); ++ return ERR_UNKNOWN; ++ } ++} ++ ++void ++error_die(unsigned int error) ++{ ++ fprintf(stderr, "process terminating, reason: %s\n", error_str(error)); ++ abort(); ++} +diff --git a/librdxtree/error.h b/librdxtree/error.h +new file mode 100644 +index 0000000..81babf7 +--- /dev/null ++++ b/librdxtree/error.h +@@ -0,0 +1,84 @@ ++/* ++ * Copyright (c) 2009, 2010 Richard Braun. ++ * All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR ++ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES ++ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. ++ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, ++ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT ++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF ++ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++ ++#ifndef _ERROR_H ++#define _ERROR_H ++ ++#include "macros.h" ++ ++/* ++ * List of errors this library can return. ++ * ++ * ERR_SUCCESS is guaranteed to be 0, allowing code such as : ++ * ++ * error = do_smth(); ++ * ++ * if (error) { ++ * ...; ++ * } ++ */ ++enum { ++ ERR_SUCCESS, ++ ERR_UNKNOWN, ++ ERR_INVAL, ++ ERR_NOMEM, ++ ERR_FORMAT, ++ ERR_NORES, ++ ERR_PERM, ++ ERR_BUSY, ++ ERR_MEMLIM, ++ ERR_TIMEDOUT, ++ ERR_WOULDBLOCK, ++ ERR_LOOKUP, ++ ERR_MEM_CACHE ++}; ++ ++/* ++ * Return the message matching the given error. ++ * ++ * The returned address points to a statically allocated, read only, ++ * null-terminated string literal. The caller must not attempt to use it ++ * for anything else than error reporting. ++ */ ++const char * error_str(unsigned int error); ++ ++/* ++ * Map standard error codes to error values. ++ * ++ * This function accepts a subset of the standard error codes in errno.h. ++ * When called, and if the errno value is handled, it will return the ++ * corresponding ERR_xxx code. Otherwise ERR_UNKNOWN is returned. ++ */ ++unsigned int error_from_errno(int errno_code); ++ ++/* ++ * Exit the current process, reporting an error. ++ * ++ * This function will report the given error and make the process exit, ++ * using the error code as the exit() parameter. ++ */ ++void __noreturn error_die(unsigned int error); ++ ++#endif /* _ERROR_H */ +diff --git a/librdxtree/macros.h b/librdxtree/macros.h +new file mode 100644 +index 0000000..f0960a3 +--- /dev/null ++++ b/librdxtree/macros.h +@@ -0,0 +1,70 @@ ++/* ++ * Copyright (c) 2009, 2010 Richard Braun. ++ * All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR ++ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES ++ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. ++ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, ++ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT ++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF ++ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ * ++ * ++ * Helper macros. ++ */ ++ ++#ifndef _MACROS_H ++#define _MACROS_H ++ ++#include <stddef.h> ++ ++#undef MACRO_BEGIN ++#undef MACRO_END ++#define MACRO_BEGIN ({ ++#define MACRO_END }) ++ ++#define XQUOTE(x) #x ++#define QUOTE(x) XQUOTE(x) ++ ++#define STRLEN(x) (sizeof(x) - 1) ++#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) ++ ++#define MIN(a, b) ((a) < (b) ? (a) : (b)) ++#define MAX(a, b) ((a) > (b) ? (a) : (b)) ++ ++#define P2ALIGNED(x, a) (((x) & ((a) - 1)) == 0) ++#define ISP2(x) P2ALIGNED(x, x) ++#define P2ALIGN(x, a) ((x) & -(a)) ++#define P2ROUND(x, a) (-(-(x) & -(a))) ++#define P2END(x, a) (-(~(x) & -(a))) ++ ++#define structof(ptr, type, member) \ ++ ((type *)((char *)(ptr) - offsetof(type, member))) ++ ++#define alignof(x) __alignof__(x) ++ ++#define likely(expr) __builtin_expect(!!(expr), 1) ++#define unlikely(expr) __builtin_expect(!!(expr), 0) ++ ++#define barrier() asm volatile("" : : : "memory") ++ ++#define __noreturn __attribute__((noreturn)) ++#define __aligned(x) __attribute__((aligned(x))) ++ ++#define __format_printf(fmt, args) \ ++ __attribute__((format(printf, fmt, args))) ++ ++#endif /* _MACROS_H */ +diff --git a/librdxtree/rdxtree.c b/librdxtree/rdxtree.c +new file mode 100644 +index 0000000..36fd09f +--- /dev/null ++++ b/librdxtree/rdxtree.c +@@ -0,0 +1,808 @@ ++/* ++ * Copyright (c) 2013 Richard Braun. ++ * ++ * This program is free software: you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, either version 3 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program. If not, see <http://www.gnu.org/licenses/>. ++ */ ++ ++/* ++ * Copyright (c) 2011 Richard Braun. ++ * All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR ++ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES ++ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. ++ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, ++ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT ++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF ++ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++ ++#include <assert.h> ++#include <limits.h> ++#include <stddef.h> ++#include <stdlib.h> ++#include <string.h> ++ ++#include "error.h" ++#include "macros.h" ++#include "rdxtree.h" ++#include "rdxtree_i.h" ++ ++/* ++ * Mask applied on an entry to obtain its address. ++ */ ++#define RDXTREE_ENTRY_ADDR_MASK (~0x3UL) ++ ++/* ++ * Global properties used to shape radix trees. ++ */ ++#define RDXTREE_RADIX 6 ++#define RDXTREE_RADIX_SIZE (1UL << RDXTREE_RADIX) ++#define RDXTREE_RADIX_MASK (RDXTREE_RADIX_SIZE - 1) ++ ++#if RDXTREE_RADIX < 6 ++typedef unsigned long rdxtree_bm_t; ++#define rdxtree_ffs(x) __builtin_ffsl(x) ++#elif RDXTREE_RADIX == 6 /* RDXTREE_RADIX < 6 */ ++typedef unsigned long long rdxtree_bm_t; ++#define rdxtree_ffs(x) __builtin_ffsll(x) ++#else /* RDXTREE_RADIX < 6 */ ++#error "radix too high" ++#endif /* RDXTREE_RADIX < 6 */ ++ ++/* ++ * Allocation bitmap size in bits. ++ */ ++#define RDXTREE_BM_SIZE (sizeof(rdxtree_bm_t) * CHAR_BIT) ++ ++/* ++ * Empty/full allocation bitmap words. ++ */ ++#define RDXTREE_BM_EMPTY ((rdxtree_bm_t)0) ++#define RDXTREE_BM_FULL \ ++ ((~(rdxtree_bm_t)0) >> (RDXTREE_BM_SIZE - RDXTREE_RADIX_SIZE)) ++ ++/* ++ * These macros can be replaced by actual functions in an environment ++ * that provides lockless synchronization such as RCU. ++ */ ++#define llsync_assign_ptr(ptr, value) ((ptr) = (value)) ++#define llsync_read_ptr(ptr) (ptr) ++ ++/* ++ * Radix tree node. ++ * ++ * The height of a tree is the number of nodes to traverse until stored ++ * pointers are reached. A height of 0 means the entries of a node (or the ++ * tree root) directly point to stored pointers. ++ * ++ * The index is valid if and only if the parent isn't NULL. ++ * ++ * Concerning the allocation bitmap, a bit is set when the node it denotes, ++ * or one of its children, can be used to allocate an entry. Conversely, a bit ++ * is clear when the matching node and all of its children have no free entry. ++ * ++ * In order to support safe lockless lookups, in particular during a resize, ++ * each node includes the height of its subtree, which is invariant during ++ * the entire node lifetime. Since the tree height does vary, it can't be ++ * used to determine whether the tree root is a node or a stored pointer. ++ * This implementation assumes that all nodes and stored pointers are at least ++ * 4-byte aligned, and uses the least significant bit of entries to indicate ++ * the pointer type. This bit is set for internal nodes, and clear for stored ++ * pointers so that they can be accessed from slots without conversion. ++ */ ++struct rdxtree_node { ++ struct rdxtree_node *parent; ++ unsigned int index; ++ unsigned int height; ++ unsigned int nr_entries; ++ rdxtree_bm_t alloc_bm; ++ void *entries[RDXTREE_RADIX_SIZE]; ++}; ++ ++#ifdef RDXTREE_ENABLE_NODE_CREATION_FAILURES ++unsigned int rdxtree_fail_node_creation_threshold; ++unsigned int rdxtree_nr_node_creations; ++#endif /* RDXTREE_ENABLE_NODE_CREATION_FAILURES */ ++ ++static inline int ++rdxtree_check_alignment(const void *ptr) ++{ ++ return ((unsigned long)ptr & ~RDXTREE_ENTRY_ADDR_MASK) == 0; ++} ++ ++static inline void * ++rdxtree_entry_addr(void *entry) ++{ ++ return (void *)((unsigned long)entry & RDXTREE_ENTRY_ADDR_MASK); ++} ++ ++static inline int ++rdxtree_entry_is_node(const void *entry) ++{ ++ return ((unsigned long)entry & 1) != 0; ++} ++ ++static inline void * ++rdxtree_node_to_entry(struct rdxtree_node *node) ++{ ++ return (void *)((unsigned long)node | 1); ++} ++ ++static int ++rdxtree_node_create(struct rdxtree_node **nodep, unsigned int height) ++{ ++ struct rdxtree_node *node; ++ ++#ifdef RDXTREE_ENABLE_NODE_CREATION_FAILURES ++ if (rdxtree_fail_node_creation_threshold != 0) { ++ rdxtree_nr_node_creations++; ++ ++ if (rdxtree_nr_node_creations == rdxtree_fail_node_creation_threshold) ++ return ERR_NOMEM; ++ } ++#endif /* RDXTREE_ENABLE_NODE_CREATION_FAILURES */ ++ ++ node = malloc(sizeof(*node)); ++ ++ if (node == NULL) ++ return ERR_NOMEM; ++ ++ assert(rdxtree_check_alignment(node)); ++ node->parent = NULL; ++ node->height = height; ++ node->nr_entries = 0; ++ node->alloc_bm = RDXTREE_BM_FULL; ++ memset(node->entries, 0, sizeof(node->entries)); ++ *nodep = node; ++ return 0; ++} ++ ++static void ++rdxtree_node_schedule_destruction(struct rdxtree_node *node) ++{ ++ /* ++ * This function is intended to use the appropriate interface to defer ++ * destruction until all read-side references are dropped in an ++ * environment that provides lockless synchronization. ++ * ++ * Otherwise, it simply "schedules" destruction immediately. ++ */ ++ free(node); ++} ++ ++static inline void ++rdxtree_node_link(struct rdxtree_node *node, struct rdxtree_node *parent, ++ unsigned int index) ++{ ++ node->parent = parent; ++ node->index = index; ++} ++ ++static inline void ++rdxtree_node_unlink(struct rdxtree_node *node) ++{ ++ assert(node->parent != NULL); ++ node->parent = NULL; ++} ++ ++static inline int ++rdxtree_node_full(struct rdxtree_node *node) ++{ ++ return (node->nr_entries == ARRAY_SIZE(node->entries)); ++} ++ ++static inline int ++rdxtree_node_empty(struct rdxtree_node *node) ++{ ++ return (node->nr_entries == 0); ++} ++ ++static inline void ++rdxtree_node_insert(struct rdxtree_node *node, unsigned int index, ++ void *entry) ++{ ++ assert(index < ARRAY_SIZE(node->entries)); ++ assert(node->entries[index] == NULL); ++ ++ node->nr_entries++; ++ llsync_assign_ptr(node->entries[index], entry); ++} ++ ++static inline void ++rdxtree_node_insert_node(struct rdxtree_node *node, unsigned int index, ++ struct rdxtree_node *child) ++{ ++ rdxtree_node_insert(node, index, rdxtree_node_to_entry(child)); ++} ++ ++static inline void ++rdxtree_node_remove(struct rdxtree_node *node, unsigned int index) ++{ ++ assert(index < ARRAY_SIZE(node->entries)); ++ assert(node->entries[index] != NULL); ++ ++ node->nr_entries--; ++ llsync_assign_ptr(node->entries[index], NULL); ++} ++ ++static inline void * ++rdxtree_node_find(struct rdxtree_node *node, unsigned int index, int get_slot) ++{ ++ void *ptr; ++ ++ while (index < ARRAY_SIZE(node->entries)) { ++ ptr = rdxtree_entry_addr(node->entries[index]); ++ ++ if (ptr != NULL) ++ return get_slot ? (void *)&node->entries[index] : ptr; ++ ++ index++; ++ } ++ ++ return NULL; ++} ++ ++static inline void ++rdxtree_node_bm_set(struct rdxtree_node *node, unsigned int index) ++{ ++ node->alloc_bm |= (rdxtree_bm_t)1 << index; ++} ++ ++static inline void ++rdxtree_node_bm_clear(struct rdxtree_node *node, unsigned int index) ++{ ++ node->alloc_bm &= ~((rdxtree_bm_t)1 << index); ++} ++ ++static inline int ++rdxtree_node_bm_is_set(struct rdxtree_node *node, unsigned int index) ++{ ++ return (node->alloc_bm & ((rdxtree_bm_t)1 << index)); ++} ++ ++static inline int ++rdxtree_node_bm_empty(struct rdxtree_node *node) ++{ ++ return (node->alloc_bm == RDXTREE_BM_EMPTY); ++} ++ ++static inline unsigned int ++rdxtree_node_bm_first(struct rdxtree_node *node) ++{ ++ return rdxtree_ffs(node->alloc_bm) - 1; ++} ++ ++static inline unsigned long long ++rdxtree_max_key(unsigned int height) ++{ ++ size_t shift; ++ ++ shift = RDXTREE_RADIX * height; ++ ++ if (likely(shift < (sizeof(unsigned long long) * CHAR_BIT))) ++ return (1ULL << shift) - 1; ++ else ++ return ~0ULL; ++} ++ ++static void ++rdxtree_shrink(struct rdxtree *tree) ++{ ++ struct rdxtree_node *node; ++ void *entry; ++ ++ while (tree->height > 0) { ++ node = rdxtree_entry_addr(tree->root); ++ ++ if (node->nr_entries != 1) ++ break; ++ ++ entry = node->entries[0]; ++ ++ if (entry == NULL) ++ break; ++ ++ tree->height--; ++ ++ if (tree->height > 0) ++ rdxtree_node_unlink(rdxtree_entry_addr(entry)); ++ ++ llsync_assign_ptr(tree->root, entry); ++ rdxtree_node_schedule_destruction(node); ++ } ++} ++ ++static int ++rdxtree_grow(struct rdxtree *tree, unsigned long long key) ++{ ++ struct rdxtree_node *root, *node; ++ unsigned int new_height; ++ int error; ++ ++ new_height = tree->height + 1; ++ ++ while (key > rdxtree_max_key(new_height)) ++ new_height++; ++ ++ if (tree->root == NULL) { ++ tree->height = new_height; ++ return ERR_SUCCESS; ++ } ++ ++ root = rdxtree_entry_addr(tree->root); ++ ++ do { ++ error = rdxtree_node_create(&node, tree->height); ++ ++ if (error) { ++ rdxtree_shrink(tree); ++ return error; ++ } ++ ++ if (tree->height == 0) ++ rdxtree_node_bm_clear(node, 0); ++ else { ++ rdxtree_node_link(root, node, 0); ++ ++ if (rdxtree_node_bm_empty(root)) ++ rdxtree_node_bm_clear(node, 0); ++ } ++ ++ rdxtree_node_insert(node, 0, tree->root); ++ tree->height++; ++ llsync_assign_ptr(tree->root, rdxtree_node_to_entry(node)); ++ root = node; ++ } while (new_height > tree->height); ++ ++ return ERR_SUCCESS; ++} ++ ++static void ++rdxtree_cleanup(struct rdxtree *tree, struct rdxtree_node *node) ++{ ++ struct rdxtree_node *prev; ++ ++ for (;;) { ++ if (likely(!rdxtree_node_empty(node))) { ++ if (unlikely(node->parent == NULL)) ++ rdxtree_shrink(tree); ++ ++ break; ++ } ++ ++ if (node->parent == NULL) { ++ tree->height = 0; ++ llsync_assign_ptr(tree->root, NULL); ++ rdxtree_node_schedule_destruction(node); ++ break; ++ } ++ ++ prev = node; ++ node = node->parent; ++ rdxtree_node_unlink(prev); ++ rdxtree_node_remove(node, prev->index); ++ rdxtree_node_schedule_destruction(prev); ++ } ++} ++ ++static void ++rdxtree_insert_bm_clear(struct rdxtree_node *node, unsigned int index) ++{ ++ for (;;) { ++ rdxtree_node_bm_clear(node, index); ++ ++ if (!rdxtree_node_full(node) || (node->parent == NULL)) ++ break; ++ ++ index = node->index; ++ node = node->parent; ++ } ++} ++ ++int ++rdxtree_insert_common(struct rdxtree *tree, unsigned long long key, ++ void *ptr, void ***slotp) ++{ ++ struct rdxtree_node *node, *prev; ++ unsigned int height, shift, index = index; ++ int error; ++ ++ assert(ptr != NULL); ++ assert(rdxtree_check_alignment(ptr)); ++ ++ if (unlikely(key > rdxtree_max_key(tree->height))) { ++ error = rdxtree_grow(tree, key); ++ ++ if (error) ++ return error; ++ } ++ ++ height = tree->height; ++ ++ if (unlikely(height == 0)) { ++ if (tree->root != NULL) ++ return ERR_BUSY; ++ ++ llsync_assign_ptr(tree->root, ptr); ++ ++ if (slotp != NULL) ++ *slotp = &tree->root; ++ ++ return ERR_SUCCESS; ++ } ++ ++ node = rdxtree_entry_addr(tree->root); ++ shift = (height - 1) * RDXTREE_RADIX; ++ prev = NULL; ++ ++ do { ++ if (node == NULL) { ++ error = rdxtree_node_create(&node, height - 1); ++ ++ if (error) { ++ if (prev == NULL) ++ tree->height = 0; ++ else ++ rdxtree_cleanup(tree, prev); ++ ++ return error; ++ } ++ ++ if (prev == NULL) ++ llsync_assign_ptr(tree->root, rdxtree_node_to_entry(node)); ++ else { ++ rdxtree_node_link(node, prev, index); ++ rdxtree_node_insert_node(prev, index, node); ++ } ++ } ++ ++ prev = node; ++ index = (unsigned int)(key >> shift) & RDXTREE_RADIX_MASK; ++ node = rdxtree_entry_addr(prev->entries[index]); ++ shift -= RDXTREE_RADIX; ++ height--; ++ } while (height > 0); ++ ++ if (unlikely(node != NULL)) ++ return ERR_BUSY; ++ ++ rdxtree_node_insert(prev, index, ptr); ++ rdxtree_insert_bm_clear(prev, index); ++ ++ if (slotp != NULL) ++ *slotp = &prev->entries[index]; ++ ++ return ERR_SUCCESS; ++} ++ ++int ++rdxtree_insert_alloc_common(struct rdxtree *tree, void *ptr, ++ unsigned long long *keyp, void ***slotp) ++{ ++ struct rdxtree_node *node, *prev; ++ unsigned long long key; ++ unsigned int height, shift, index = index; ++ int error; ++ ++ assert(ptr != NULL); ++ assert(rdxtree_check_alignment(ptr)); ++ ++ height = tree->height; ++ ++ if (unlikely(height == 0)) { ++ if (tree->root == NULL) { ++ llsync_assign_ptr(tree->root, ptr); ++ *keyp = 0; ++ ++ if (slotp != NULL) ++ *slotp = &tree->root; ++ ++ return ERR_SUCCESS; ++ } ++ ++ goto grow; ++ } ++ ++ node = rdxtree_entry_addr(tree->root); ++ key = 0; ++ shift = (height - 1) * RDXTREE_RADIX; ++ prev = NULL; ++ ++ do { ++ if (node == NULL) { ++ error = rdxtree_node_create(&node, height - 1); ++ ++ if (error) { ++ rdxtree_cleanup(tree, prev); ++ return error; ++ } ++ ++ rdxtree_node_link(node, prev, index); ++ rdxtree_node_insert_node(prev, index, node); ++ } ++ ++ prev = node; ++ index = rdxtree_node_bm_first(node); ++ ++ if (index == (unsigned int)-1) ++ goto grow; ++ ++ key |= (unsigned long long)index << shift; ++ node = rdxtree_entry_addr(node->entries[index]); ++ shift -= RDXTREE_RADIX; ++ height--; ++ } while (height > 0); ++ ++ rdxtree_node_insert(prev, index, ptr); ++ rdxtree_insert_bm_clear(prev, index); ++ ++ if (slotp != NULL) ++ *slotp = &prev->entries[index]; ++ ++ goto out; ++ ++grow: ++ key = rdxtree_max_key(height) + 1; ++ error = rdxtree_insert_common(tree, key, ptr, slotp); ++ ++ if (error) ++ return error; ++ ++out: ++ *keyp = key; ++ return ERR_SUCCESS; ++} ++ ++static void ++rdxtree_remove_bm_set(struct rdxtree_node *node, unsigned int index) ++{ ++ do { ++ rdxtree_node_bm_set(node, index); ++ ++ if (node->parent == NULL) ++ break; ++ ++ index = node->index; ++ node = node->parent; ++ } while (!rdxtree_node_bm_is_set(node, index)); ++} ++ ++void * ++rdxtree_remove(struct rdxtree *tree, unsigned long long key) ++{ ++ struct rdxtree_node *node, *prev; ++ unsigned int height, shift, index; ++ ++ height = tree->height; ++ ++ if (unlikely(key > rdxtree_max_key(height))) ++ return NULL; ++ ++ node = rdxtree_entry_addr(tree->root); ++ ++ if (unlikely(height == 0)) { ++ llsync_assign_ptr(tree->root, NULL); ++ return node; ++ } ++ ++ shift = (height - 1) * RDXTREE_RADIX; ++ ++ do { ++ if (node == NULL) ++ return NULL; ++ ++ prev = node; ++ index = (unsigned int)(key >> shift) & RDXTREE_RADIX_MASK; ++ node = rdxtree_entry_addr(node->entries[index]); ++ shift -= RDXTREE_RADIX; ++ height--; ++ } while (height > 0); ++ ++ if (node == NULL) ++ return NULL; ++ ++ rdxtree_node_remove(prev, index); ++ rdxtree_remove_bm_set(prev, index); ++ rdxtree_cleanup(tree, prev); ++ return node; ++} ++ ++void * ++rdxtree_lookup_common(const struct rdxtree *tree, unsigned long long key, ++ int get_slot) ++{ ++ struct rdxtree_node *node, *prev; ++ unsigned int height, shift, index; ++ void *entry; ++ ++ entry = llsync_read_ptr(tree->root); ++ ++ if (entry == NULL) { ++ node = NULL; ++ height = 0; ++ } else { ++ node = rdxtree_entry_addr(entry); ++ height = rdxtree_entry_is_node(entry) ? node->height + 1 : 0; ++ } ++ ++ if (key > rdxtree_max_key(height)) ++ return NULL; ++ ++ if (height == 0) { ++ if (node == NULL) ++ return NULL; ++ ++ return get_slot ? (void *)&tree->root : node; ++ } ++ ++ shift = (height - 1) * RDXTREE_RADIX; ++ ++ do { ++ if (node == NULL) ++ return NULL; ++ ++ prev = node; ++ index = (unsigned int)(key >> shift) & RDXTREE_RADIX_MASK; ++ entry = llsync_read_ptr(node->entries[index]); ++ node = rdxtree_entry_addr(entry); ++ shift -= RDXTREE_RADIX; ++ height--; ++ } while (height > 0); ++ ++ if (node == NULL) ++ return NULL; ++ ++ return get_slot ? (void *)&prev->entries[index] : node; ++} ++ ++void * ++rdxtree_replace_slot(void **slot, void *ptr) ++{ ++ void *old; ++ ++ assert(ptr != NULL); ++ assert(rdxtree_check_alignment(ptr)); ++ ++ old = *slot; ++ assert(old != NULL); ++ assert(rdxtree_check_alignment(old)); ++ llsync_assign_ptr(*slot, ptr); ++ return old; ++} ++ ++static struct rdxtree_node * ++rdxtree_walk(struct rdxtree *tree, struct rdxtree_node *node) ++{ ++ struct rdxtree_node *prev, *child; ++ unsigned int height, index; ++ ++ if (node == NULL) { ++ height = tree->height; ++ node = rdxtree_entry_addr(tree->root); ++ ++ while (height > 1) { ++ node = rdxtree_node_find(node, 0, 0); ++ height--; ++ } ++ ++ return node; ++ } ++ ++ height = 0; ++ ++ for (;;) { ++ prev = node->parent; ++ ++ if (prev == NULL) ++ return NULL; ++ ++ index = node->index; ++ child = rdxtree_node_find(prev, index + 1, 0); ++ ++ if (child != NULL) ++ break; ++ ++ height++; ++ node = prev; ++ } ++ ++ node = child; ++ ++ while (height > 0) { ++ node = rdxtree_node_find(node, 0, 0); ++ height--; ++ } ++ ++ return node; ++} ++ ++void * ++rdxtree_iter_next(struct rdxtree *tree, struct rdxtree_iter *iter) ++{ ++ unsigned int index; ++ ++ if (tree->height == 0) { ++ if (iter->slot != NULL) ++ return NULL; ++ ++ iter->slot = &tree->root; ++ return *iter->slot; ++ } ++ ++ if (iter->node != NULL) { ++ index = iter->slot - ((struct rdxtree_node *)iter->node)->entries; ++ iter->slot = rdxtree_node_find(iter->node, index + 1, 1); ++ } ++ ++ if (iter->slot == NULL) { ++ iter->node = rdxtree_walk(tree, iter->node); ++ ++ if (iter->node != NULL) ++ iter->slot = rdxtree_node_find(iter->node, 0, 1); ++ } ++ ++ if (iter->slot == NULL) ++ return NULL; ++ ++ return *iter->slot; ++} ++ ++void ++rdxtree_remove_all(struct rdxtree *tree) ++{ ++ struct rdxtree_node *node, *parent, *next; ++ unsigned int height, index; ++ ++ height = tree->height; ++ ++ if (height == 0) { ++ if (tree->root != NULL) ++ llsync_assign_ptr(tree->root, NULL); ++ ++ return; ++ } ++ ++ node = rdxtree_walk(tree, NULL); ++ ++ do { ++ next = rdxtree_walk(tree, node); ++ ++ parent = node->parent; ++ ++ if (parent != NULL) { ++ index = node->index; ++ rdxtree_node_remove(parent, index); ++ rdxtree_remove_bm_set(parent, index); ++ rdxtree_cleanup(tree, parent); ++ node->parent = NULL; ++ } ++ ++ rdxtree_node_schedule_destruction(node); ++ ++ node = next; ++ } while (node != NULL); ++} +diff --git a/librdxtree/rdxtree.h b/librdxtree/rdxtree.h +new file mode 100644 +index 0000000..8ccfa25 +--- /dev/null ++++ b/librdxtree/rdxtree.h +@@ -0,0 +1,195 @@ ++/* ++ * Copyright (c) 2013 Richard Braun. ++ * ++ * This program is free software: you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, either version 3 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program. If not, see <http://www.gnu.org/licenses/>. ++ */ ++ ++/* ++ * Copyright (c) 2011 Richard Braun. ++ * All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR ++ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES ++ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. ++ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, ++ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT ++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF ++ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ * ++ * ++ * Radix tree. ++ * ++ * In addition to the standard insertion operation, this implementation ++ * can allocate keys for the caller at insertion time. ++ */ ++ ++#ifndef _RDXTREE_H ++#define _RDXTREE_H ++ ++#include <stddef.h> ++ ++/* ++ * Radix tree. ++ */ ++struct rdxtree; ++ ++/* ++ * Radix tree iterator. ++ */ ++struct rdxtree_iter; ++ ++/* ++ * Static tree initializer. ++ */ ++#define RDXTREE_INITIALIZER { 0, NULL } ++ ++#include "rdxtree_i.h" ++ ++/* ++ * Initialize a tree. ++ */ ++static inline void ++rdxtree_init(struct rdxtree *tree) ++{ ++ tree->height = 0; ++ tree->root = NULL; ++} ++ ++/* ++ * Insert a pointer in a tree. ++ * ++ * The ptr parameter must not be NULL. ++ */ ++static inline int ++rdxtree_insert(struct rdxtree *tree, unsigned long long key, void *ptr) ++{ ++ return rdxtree_insert_common(tree, key, ptr, NULL); ++} ++ ++/* ++ * Insert a pointer in a tree and obtain its slot. ++ * ++ * The ptr and slotp parameters must not be NULL. If successful, the slot of ++ * the newly inserted pointer is stored at the address pointed to by the slotp ++ * parameter. ++ */ ++static inline int ++rdxtree_insert_slot(struct rdxtree *tree, unsigned long long key, void *ptr, ++ void ***slotp) ++{ ++ return rdxtree_insert_common(tree, key, ptr, slotp); ++} ++ ++/* ++ * Insert a pointer in a tree, for which a new key is allocated. ++ * ++ * The ptr and keyp parameters must not be NULL. The newly allocated key is ++ * stored at the address pointed to by the keyp parameter. ++ */ ++static inline int ++rdxtree_insert_alloc(struct rdxtree *tree, void *ptr, unsigned long long *keyp) ++{ ++ return rdxtree_insert_alloc_common(tree, ptr, keyp, NULL); ++} ++ ++/* ++ * Insert a pointer in a tree, for which a new key is allocated, and obtain ++ * its slot. ++ * ++ * The ptr, keyp and slotp parameters must not be NULL. The newly allocated ++ * key is stored at the address pointed to by the keyp parameter while the ++ * slot of the inserted pointer is stored at the address pointed to by the ++ * slotp parameter. ++ */ ++static inline int ++rdxtree_insert_alloc_slot(struct rdxtree *tree, void *ptr, ++ unsigned long long *keyp, void ***slotp) ++{ ++ return rdxtree_insert_alloc_common(tree, ptr, keyp, slotp); ++} ++ ++/* ++ * Remove a pointer from a tree. ++ * ++ * The matching pointer is returned if successful, NULL otherwise. ++ */ ++void * rdxtree_remove(struct rdxtree *tree, unsigned long long key); ++ ++/* ++ * Look up a pointer in a tree. ++ * ++ * The matching pointer is returned if successful, NULL otherwise. ++ */ ++static inline void * ++rdxtree_lookup(const struct rdxtree *tree, unsigned long long key) ++{ ++ return rdxtree_lookup_common(tree, key, 0); ++} ++ ++/* ++ * Look up a slot in a tree. ++ * ++ * A slot is a pointer to a stored pointer in a tree. It can be used as ++ * a placeholder for fast replacements to avoid multiple lookups on the same ++ * key. ++ * ++ * A slot for the matching pointer is returned if successful, NULL otherwise. ++ * ++ * See rdxtree_replace_slot(). ++ */ ++static inline void ** ++rdxtree_lookup_slot(const struct rdxtree *tree, unsigned long long key) ++{ ++ return rdxtree_lookup_common(tree, key, 1); ++} ++ ++/* ++ * Replace a pointer in a tree. ++ * ++ * The ptr parameter must not be NULL. The previous pointer is returned. ++ * ++ * See rdxtree_lookup_slot(). ++ */ ++void * rdxtree_replace_slot(void **slot, void *ptr); ++ ++/* ++ * Forge a loop to process all pointers of a tree. ++ */ ++#define rdxtree_for_each(tree, iter, ptr) \ ++for (rdxtree_iter_init(iter), ptr = rdxtree_iter_next(tree, iter); \ ++ ptr != NULL; \ ++ ptr = rdxtree_iter_next(tree, iter)) ++ ++/* ++ * Remove all pointers from a tree. ++ * ++ * The common way to destroy a tree and its pointers is to loop over all ++ * the pointers using rdxtree_for_each(), freeing them, then call this ++ * function. ++ */ ++void rdxtree_remove_all(struct rdxtree *tree); ++ ++#endif /* _RDXTREE_H */ +diff --git a/librdxtree/rdxtree_i.h b/librdxtree/rdxtree_i.h +new file mode 100644 +index 0000000..4761beb +--- /dev/null ++++ b/librdxtree/rdxtree_i.h +@@ -0,0 +1,65 @@ ++/* ++ * Copyright (c) 2013 Richard Braun. ++ * ++ * This program is free software: you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, either version 3 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program. If not, see <http://www.gnu.org/licenses/>. ++ */ ++ ++#ifndef _RDXTREE_I_H ++#define _RDXTREE_I_H ++ ++/* ++ * Radix tree. ++ */ ++struct rdxtree { ++ unsigned int height; ++ void *root; ++}; ++ ++/* ++ * Radix tree iterator. ++ */ ++struct rdxtree_iter { ++ void *node; ++ void **slot; ++}; ++ ++/* ++ * Initialize an iterator. ++ */ ++static inline void ++rdxtree_iter_init(struct rdxtree_iter *iter) ++{ ++ iter->node = NULL; ++ iter->slot = NULL; ++} ++ ++int rdxtree_insert_common(struct rdxtree *tree, unsigned long long key, ++ void *ptr, void ***slotp); ++ ++int rdxtree_insert_alloc_common(struct rdxtree *tree, void *ptr, ++ unsigned long long *keyp, void ***slotp); ++ ++void * rdxtree_lookup_common(const struct rdxtree *tree, unsigned long long key, ++ int get_slot); ++ ++/* ++ * Walk over pointers in a tree. ++ * ++ * Move the iterator to the next pointer in the given tree. ++ * ++ * The next pointer is returned if there is one, NULL otherwise. ++ */ ++void * rdxtree_iter_next(struct rdxtree *tree, struct rdxtree_iter *iter); ++ ++#endif /* _RDXTREE_I_H */ +-- +2.0.0.rc2 + diff --git a/debian/patches/0012-libdiskfs-lock-less-reference-counting-for-peropen-o.patch b/debian/patches/0012-libdiskfs-lock-less-reference-counting-for-peropen-o.patch new file mode 100644 index 00000000..9c11c45f --- /dev/null +++ b/debian/patches/0012-libdiskfs-lock-less-reference-counting-for-peropen-o.patch @@ -0,0 +1,143 @@ +From b3d8db87232be02a0812f420668627fc9bda10d1 Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Tue, 6 May 2014 18:58:10 +0200 +Subject: [PATCH 12/27] libdiskfs: lock-less reference counting for peropen + objects + +* libdiskfs/diskfs.h (struct peropen): Use refcount_t for field refcnt. +* libdiskfs/peropen-make.c (diskfs_make_peropen): Initialize refcnt. +* libdiskfs/peropen-rele.c (diskfs_release_peropen): Adjust accordingly. +* libdiskfs/protid-make.c (diskfs_start_protid): Likewise. Also, the +node must no longer be locked, adjust comment accordingly. +(diskfs_create_protid): Likewise. +--- + libdiskfs/diskfs.h | 7 ++++--- + libdiskfs/peropen-make.c | 2 +- + libdiskfs/peropen-rele.c | 21 ++++++++++----------- + libdiskfs/protid-make.c | 8 ++++---- + 4 files changed, 19 insertions(+), 19 deletions(-) + +diff --git a/libdiskfs/diskfs.h b/libdiskfs/diskfs.h +index a3f860d..acd1910 100644 +--- a/libdiskfs/diskfs.h ++++ b/libdiskfs/diskfs.h +@@ -28,6 +28,7 @@ + #include <hurd/iohelp.h> + #include <idvec.h> + #include <features.h> ++#include <refcount.h> + + #ifdef DISKFS_DEFINE_EXTERN_INLINE + #define DISKFS_EXTERN_INLINE +@@ -57,7 +58,7 @@ struct peropen + { + int filepointer; + int lock_status; +- int refcnt; ++ refcount_t refcnt; + int openstat; + + struct node *np; +@@ -817,12 +818,12 @@ diskfs_create_node (struct node *dir, const char *name, mode_t mode, + struct dirstat *ds); + + /* Create and return a protid for an existing peropen PO in CRED, +- referring to user USER. The node PO->np must be locked. */ ++ referring to user USER. */ + error_t diskfs_create_protid (struct peropen *po, struct iouser *user, + struct protid **cred); + + /* Build and return in CRED a protid which has no user identification, for +- peropen PO. The node PO->np must be locked. */ ++ peropen PO. */ + error_t diskfs_start_protid (struct peropen *po, struct protid **cred); + + /* Finish building protid CRED started with diskfs_start_protid; +diff --git a/libdiskfs/peropen-make.c b/libdiskfs/peropen-make.c +index eba037f..6d5ca01 100644 +--- a/libdiskfs/peropen-make.c ++++ b/libdiskfs/peropen-make.c +@@ -31,7 +31,7 @@ diskfs_make_peropen (struct node *np, int flags, struct peropen *context, + + po->filepointer = 0; + po->lock_status = LOCK_UN; +- po->refcnt = 0; ++ refcount_init (&po->refcnt, 0); + po->openstat = flags; + po->np = np; + po->path = NULL; +diff --git a/libdiskfs/peropen-rele.c b/libdiskfs/peropen-rele.c +index d3f7492..877137b 100644 +--- a/libdiskfs/peropen-rele.c ++++ b/libdiskfs/peropen-rele.c +@@ -22,13 +22,8 @@ + void + diskfs_release_peropen (struct peropen *po) + { +- pthread_mutex_lock (&po->np->lock); +- +- if (--po->refcnt) +- { +- pthread_mutex_unlock (&po->np->lock); +- return; +- } ++ if (refcount_deref (&po->refcnt) > 0) ++ return; + + if (po->root_parent) + mach_port_deallocate (mach_task_self (), po->root_parent); +@@ -40,10 +35,14 @@ diskfs_release_peropen (struct peropen *po) + mach_port_deallocate (mach_task_self (), po->shadow_root_parent); + + if (po->lock_status != LOCK_UN) +- fshelp_acquire_lock (&po->np->userlock, &po->lock_status, +- &po->np->lock, LOCK_UN); +- +- diskfs_nput (po->np); ++ { ++ pthread_mutex_lock (&po->np->lock); ++ fshelp_acquire_lock (&po->np->userlock, &po->lock_status, ++ &po->np->lock, LOCK_UN); ++ diskfs_nput (po->np); ++ } ++ else ++ diskfs_nrele (po->np); + + free (po->path); + free (po); +diff --git a/libdiskfs/protid-make.c b/libdiskfs/protid-make.c +index b39b92a..22aaa2e 100644 +--- a/libdiskfs/protid-make.c ++++ b/libdiskfs/protid-make.c +@@ -20,7 +20,7 @@ + #include <assert.h> + + /* Build and return in CRED a protid which has no user identification, for +- peropen PO. The node PO->np must be locked. */ ++ peropen PO. */ + error_t + diskfs_start_protid (struct peropen *po, struct protid **cred) + { +@@ -29,7 +29,7 @@ diskfs_start_protid (struct peropen *po, struct protid **cred) + sizeof (struct protid), cred); + if (! err) + { +- po->refcnt++; ++ refcount_ref (&po->refcnt); + (*cred)->po = po; + (*cred)->shared_object = MACH_PORT_NULL; + (*cred)->mapped = 0; +@@ -55,8 +55,8 @@ diskfs_finish_protid (struct protid *cred, struct iouser *user) + assert_perror (err); + } + +-/* Create and return a protid for an existing peropen PO in CRED for USER. +- The node PO->np must be locked. */ ++/* Create and return a protid for an existing peropen PO in CRED for ++ USER. */ + error_t + diskfs_create_protid (struct peropen *po, struct iouser *user, + struct protid **cred) +-- +2.0.0.rc2 + diff --git a/debian/patches/0013-libtrivfs-lock-less-reference-counting-for-trivfs_pe.patch b/debian/patches/0013-libtrivfs-lock-less-reference-counting-for-trivfs_pe.patch new file mode 100644 index 00000000..c0ed644b --- /dev/null +++ b/debian/patches/0013-libtrivfs-lock-less-reference-counting-for-trivfs_pe.patch @@ -0,0 +1,172 @@ +From fca1db1c349da81edccd46f719918a40447613da Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Tue, 6 May 2014 19:07:13 +0200 +Subject: [PATCH 13/27] libtrivfs: lock-less reference counting for + trivfs_peropen objects + +* libtrivfs/trivfs.h (struct trivfs_peropen): Use refcount_t for field +refcnt. +(struct trivfs_control): Remove unused field lock. +* libtrivfs/cntl-create.c (trivfs_create_control): Drop the mutex +initialization. +* libtrivfs/io-reauthenticate.c (trivfs_S_io_reauthenticate): Adjust +accordingly. +* libtrivfs/io-restrict-auth.c (trivfs_S_io_restrict_auth): Likewise. +* libtrivfs/open.c (trivfs_open): Initialize refcnt. +* libtrivfs/protid-clean.c (trivfs_clean_protid): Likewise. +* libtrivfs/protid-dup.c (trivfs_protid_dup): Likewise. +--- + libtrivfs/cntl-create.c | 1 - + libtrivfs/io-reauthenticate.c | 5 +---- + libtrivfs/io-restrict-auth.c | 4 +--- + libtrivfs/open.c | 2 +- + libtrivfs/protid-clean.c | 26 +++++++++++++++----------- + libtrivfs/protid-dup.c | 5 +---- + libtrivfs/trivfs.h | 4 ++-- + 7 files changed, 21 insertions(+), 26 deletions(-) + +diff --git a/libtrivfs/cntl-create.c b/libtrivfs/cntl-create.c +index 910daf3..eb9a834 100644 +--- a/libtrivfs/cntl-create.c ++++ b/libtrivfs/cntl-create.c +@@ -85,7 +85,6 @@ trivfs_create_control (mach_port_t underlying, + } + + (*control)->hook = 0; +- pthread_mutex_init (&(*control)->lock, NULL); + } + + out: +diff --git a/libtrivfs/io-reauthenticate.c b/libtrivfs/io-reauthenticate.c +index 7677697..df0ed2e 100644 +--- a/libtrivfs/io-reauthenticate.c ++++ b/libtrivfs/io-reauthenticate.c +@@ -62,11 +62,8 @@ trivfs_S_io_reauthenticate (struct trivfs_protid *cred, + newcred->isroot = 1; + + newcred->hook = cred->hook; +- +- pthread_mutex_lock (&cred->po->cntl->lock); + newcred->po = cred->po; +- newcred->po->refcnt++; +- pthread_mutex_unlock (&cred->po->cntl->lock); ++ refcount_ref (&newcred->po->refcnt); + + do + err = io_restrict_auth (newcred->po->cntl->underlying, &newcred->realnode, +diff --git a/libtrivfs/io-restrict-auth.c b/libtrivfs/io-restrict-auth.c +index 65b4fd6..39670fe 100644 +--- a/libtrivfs/io-restrict-auth.c ++++ b/libtrivfs/io-restrict-auth.c +@@ -110,10 +110,8 @@ trivfs_S_io_restrict_auth (struct trivfs_protid *cred, + } + + newcred->isroot = 0; +- pthread_mutex_lock (&cred->po->cntl->lock); + newcred->po = cred->po; +- newcred->po->refcnt++; +- pthread_mutex_unlock (&cred->po->cntl->lock); ++ refcount_ref (&newcred->po->refcnt); + if (cred->isroot && idvec_contains (user->uids, 0)) + newcred->isroot = 1; + newcred->user = user; +diff --git a/libtrivfs/open.c b/libtrivfs/open.c +index f64d2ff..97e70a1 100644 +--- a/libtrivfs/open.c ++++ b/libtrivfs/open.c +@@ -40,7 +40,7 @@ trivfs_open (struct trivfs_control *cntl, + + ports_port_ref (cntl); + +- po->refcnt = 1; ++ refcount_init (&po->refcnt, 1); + po->cntl = cntl; + po->openmodes = flags; + po->hook = 0; +diff --git a/libtrivfs/protid-clean.c b/libtrivfs/protid-clean.c +index f98da6a..cce736d 100644 +--- a/libtrivfs/protid-clean.c ++++ b/libtrivfs/protid-clean.c +@@ -31,19 +31,23 @@ trivfs_clean_protid (void *arg) + (*trivfs_protid_destroy_hook) (cred); + + /* If we hold the only reference to the peropen, try to get rid of it. */ +- pthread_mutex_lock (&cntl->lock); +- if (cred->po->refcnt == 1 && trivfs_peropen_destroy_hook) ++ if (refcount_deref (&cred->po->refcnt) == 0) + { +- pthread_mutex_unlock (&cntl->lock); +- (*trivfs_peropen_destroy_hook) (cred->po); +- pthread_mutex_lock (&cntl->lock); ++ if (trivfs_peropen_destroy_hook) ++ { ++ /* Reaquire a reference while we call the hook. */ ++ refcount_ref (&cred->po->refcnt); ++ (*trivfs_peropen_destroy_hook) (cred->po); ++ if (refcount_deref (&cred->po->refcnt) == 0) ++ goto free_po; ++ } ++ else ++ { ++ free_po: ++ ports_port_deref (cntl); ++ free (cred->po); ++ } + } +- if (--cred->po->refcnt == 0) +- { +- ports_port_deref (cntl); +- free (cred->po); +- } +- pthread_mutex_unlock (&cntl->lock); + + iohelp_free_iouser (cred->user); + +diff --git a/libtrivfs/protid-dup.c b/libtrivfs/protid-dup.c +index 6169603..75f3ca8 100644 +--- a/libtrivfs/protid-dup.c ++++ b/libtrivfs/protid-dup.c +@@ -35,11 +35,8 @@ trivfs_protid_dup (struct trivfs_protid *cred, struct trivfs_protid **dup) + + if (! err) + { +- pthread_mutex_lock (&cred->po->cntl->lock); + new->po = cred->po; +- new->po->refcnt++; +- pthread_mutex_unlock (&cred->po->cntl->lock); +- ++ refcount_ref (&new->po->refcnt); + new->isroot = cred->isroot; + + err = iohelp_dup_iouser (&new->user, cred->user); +diff --git a/libtrivfs/trivfs.h b/libtrivfs/trivfs.h +index bb456ff..8902338 100644 +--- a/libtrivfs/trivfs.h ++++ b/libtrivfs/trivfs.h +@@ -24,6 +24,7 @@ + #include <mach/mach.h> + #include <hurd/ports.h> + #include <hurd/iohelp.h> ++#include <refcount.h> + + struct trivfs_protid + { +@@ -41,14 +42,13 @@ struct trivfs_peropen + { + void *hook; /* for user use */ + int openmodes; +- int refcnt; ++ refcount_t refcnt; + struct trivfs_control *cntl; + }; + + struct trivfs_control + { + struct port_info pi; +- pthread_mutex_t lock; + struct port_class *protid_class; + struct port_bucket *protid_bucket; + mach_port_t filesys_id; +-- +2.0.0.rc2 + diff --git a/debian/patches/0014-libports-use-a-single-hash-table.patch b/debian/patches/0014-libports-use-a-single-hash-table.patch new file mode 100644 index 00000000..18cd45a4 --- /dev/null +++ b/debian/patches/0014-libports-use-a-single-hash-table.patch @@ -0,0 +1,650 @@ +From cbcc40d7a04cd3334ee4784411a578611c7d7f00 Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Sat, 3 May 2014 03:53:41 +0200 +Subject: [PATCH 14/27] libports: use a single hash table + +Previously, libports used a hash table per port bucket. This makes +looking up a port difficult if one does not know the port bucket, as +one has to iterate over all buckets and do a hash table lookup each. + +Having to iterate over the buckets makes it necessary to keep a list +of all buckets, which has to be updated and protected by a lock as +well. + +Also, the current code in _ports_bucket_class_iterate iterates over +the hash table associated with the bucket given. When +ports_class_iterate calls this common function, it obtains a reference +to the bucket from one of the ports in the given class. This will not +work if a class contains ports in different port buckets. This +limitation is not documented as far as I can see. Again, having to +maintain this list has its cost and requires serialization. + +Use a single hash table instead. Furthermore, serialize access to it +using a separate lock. Remove the linked lists of all buckets and all +ports in a class. + +* libports/bucket-iterate.c (ports_bucket_iterate): Acquire +_ports_htable_lock. Also, generalize ports_bucket_iterate to treat +the bucket argument the same way as the class argument. +* libports/class-iterate.c (ports_class_iterate): Just call the +generalized _ports_bucket_class_iterate with NULL as class. +* libports/ports.h (struct port_info): Remove the port class links. +(struct port_bucket): Remove the hash table, and the all buckets link. +(_ports_all_buckets): Remove declaration. +(_ports_htable): New global hash table. +(_ports_htable_lock): Protected by this lock. +* libports/claim-right.c: Adjust accordingly. +* libports/complete-deallocate.c: Likewise. +* libports/create-bucket.c: Likewise. +* libports/create-class.c: Likewise. +* libports/create-internal.c: Likewise. +* libports/destroy-right.c: Likewise. +* libports/import-port.c: Likewise. +* libports/lookup-port.c: Likewise. +* libports/reallocate-from-external.c: Likewise. +* libports/reallocate-port.c: Likewise. +* libports/transfer-right.c: Likewise. +* libports/inhibit-all-rpcs.c: Iterate over the hash table. +* libports/inhibit-bucket-rpcs.c: Likewise, but filter using bucket. +* libports/inhibit-class-rpcs.c: Likewise, but filter using class. +* libports/init.c (_ports_htable): Initialize. +(_ports_htable_lock): Likewise. +--- + libports/bucket-iterate.c | 22 +++++++++++++++------- + libports/claim-right.c | 5 ++++- + libports/class-iterate.c | 10 +--------- + libports/complete-deallocate.c | 7 +++---- + libports/create-bucket.c | 6 ------ + libports/create-class.c | 1 - + libports/create-internal.c | 19 +++++++++++++------ + libports/destroy-right.c | 5 +++-- + libports/import-port.c | 19 +++++++++++++------ + libports/inhibit-all-rpcs.c | 27 +++++++++++++-------------- + libports/inhibit-bucket-rpcs.c | 3 ++- + libports/inhibit-class-rpcs.c | 27 ++++++++++++++++++--------- + libports/init.c | 7 ++++++- + libports/lookup-port.c | 21 ++++++++------------- + libports/ports.h | 13 ++++++++----- + libports/reallocate-from-external.c | 15 +++++++++++---- + libports/reallocate-port.c | 9 ++++++++- + libports/transfer-right.c | 18 ++++++++++++++---- + 18 files changed, 140 insertions(+), 94 deletions(-) + +diff --git a/libports/bucket-iterate.c b/libports/bucket-iterate.c +index babc204..06d8f7d 100644 +--- a/libports/bucket-iterate.c ++++ b/libports/bucket-iterate.c +@@ -25,7 +25,7 @@ + /* Internal entrypoint for both ports_bucket_iterate and ports_class_iterate. + If CLASS is non-null, call FUN only for ports in that class. */ + error_t +-_ports_bucket_class_iterate (struct port_bucket *bucket, ++_ports_bucket_class_iterate (struct hurd_ihash *ht, + struct port_class *class, + error_t (*fun)(void *)) + { +@@ -36,23 +36,24 @@ _ports_bucket_class_iterate (struct port_bucket *bucket, + error_t err; + + pthread_mutex_lock (&_ports_lock); ++ pthread_mutex_lock (&_ports_htable_lock); + +- if (bucket->htable.nr_items == 0) ++ if (ht->nr_items == 0) + { +- pthread_mutex_unlock (&_ports_lock); ++ pthread_mutex_unlock (&_ports_htable_lock); + return 0; + } + +- nr_items = bucket->htable.nr_items; ++ nr_items = ht->nr_items; + p = malloc (nr_items * sizeof *p); + if (p == NULL) + { +- pthread_mutex_unlock (&_ports_lock); ++ pthread_mutex_unlock (&_ports_htable_lock); + return ENOMEM; + } + + n = 0; +- HURD_IHASH_ITERATE (&bucket->htable, arg) ++ HURD_IHASH_ITERATE (ht, arg) + { + struct port_info *const pi = arg; + +@@ -63,8 +64,15 @@ _ports_bucket_class_iterate (struct port_bucket *bucket, + n++; + } + } ++ pthread_mutex_unlock (&_ports_htable_lock); + pthread_mutex_unlock (&_ports_lock); + ++ if (n == 0) ++ { ++ free (p); ++ return 0; ++ } ++ + if (n != nr_items) + { + /* We allocated too much. Release unused memory. */ +@@ -89,5 +97,5 @@ error_t + ports_bucket_iterate (struct port_bucket *bucket, + error_t (*fun)(void *)) + { +- return _ports_bucket_class_iterate (bucket, 0, fun); ++ return _ports_bucket_class_iterate (&bucket->htable, NULL, fun); + } +diff --git a/libports/claim-right.c b/libports/claim-right.c +index 4851ea3..11cc3ad 100644 +--- a/libports/claim-right.c ++++ b/libports/claim-right.c +@@ -34,10 +34,13 @@ ports_claim_right (void *portstruct) + if (ret == MACH_PORT_NULL) + return ret; + +- pthread_mutex_lock (&_ports_lock); ++ pthread_mutex_lock (&_ports_htable_lock); ++ hurd_ihash_locp_remove (&_ports_htable, pi->ports_htable_entry); + hurd_ihash_locp_remove (&pi->bucket->htable, pi->hentry); ++ pthread_mutex_unlock (&_ports_htable_lock); + err = mach_port_move_member (mach_task_self (), ret, MACH_PORT_NULL); + assert_perror (err); ++ pthread_mutex_lock (&_ports_lock); + pi->port_right = MACH_PORT_NULL; + if (pi->flags & PORT_HAS_SENDRIGHTS) + { +diff --git a/libports/class-iterate.c b/libports/class-iterate.c +index 1f8878a..df33818 100644 +--- a/libports/class-iterate.c ++++ b/libports/class-iterate.c +@@ -23,13 +23,5 @@ error_t + ports_class_iterate (struct port_class *class, + error_t (*fun)(void *)) + { +- pthread_mutex_lock (&_ports_lock); +- if (class->ports != 0) +- { +- struct port_bucket *bucket = class->ports->bucket; +- pthread_mutex_unlock (&_ports_lock); +- return _ports_bucket_class_iterate (bucket, class, fun); +- } +- pthread_mutex_unlock (&_ports_lock); +- return 0; ++ return _ports_bucket_class_iterate (&_ports_htable, class, fun); + } +diff --git a/libports/complete-deallocate.c b/libports/complete-deallocate.c +index 8ce095b..50548ec 100644 +--- a/libports/complete-deallocate.c ++++ b/libports/complete-deallocate.c +@@ -29,16 +29,15 @@ _ports_complete_deallocate (struct port_info *pi) + + if (pi->port_right) + { ++ pthread_mutex_lock (&_ports_htable_lock); ++ hurd_ihash_locp_remove (&_ports_htable, pi->ports_htable_entry); + hurd_ihash_locp_remove (&pi->bucket->htable, pi->hentry); ++ pthread_mutex_unlock (&_ports_htable_lock); + mach_port_mod_refs (mach_task_self (), pi->port_right, + MACH_PORT_RIGHT_RECEIVE, -1); + pi->port_right = MACH_PORT_NULL; + } + +- *pi->prevp = pi->next; +- if (pi->next) +- pi->next->prevp = pi->prevp; +- + pi->bucket->count--; + pi->class->count--; + +diff --git a/libports/create-bucket.c b/libports/create-bucket.c +index 52d50c3..2c5f1b6 100644 +--- a/libports/create-bucket.c ++++ b/libports/create-bucket.c +@@ -48,11 +48,5 @@ ports_create_bucket () + + hurd_ihash_init (&ret->htable, offsetof (struct port_info, hentry)); + ret->rpcs = ret->flags = ret->count = 0; +- +- pthread_mutex_lock (&_ports_lock); +- ret->next = _ports_all_buckets; +- _ports_all_buckets = ret; +- pthread_mutex_unlock (&_ports_lock); +- + return ret; + } +diff --git a/libports/create-class.c b/libports/create-class.c +index 12c8add..782f52b 100644 +--- a/libports/create-class.c ++++ b/libports/create-class.c +@@ -39,7 +39,6 @@ ports_create_class (void (*clean_routine)(void *), + cl->dropweak_routine = dropweak_routine; + cl->flags = 0; + cl->rpcs = 0; +- cl->ports = NULL; + cl->count = 0; + cl->uninhibitable_rpcs = ports_default_uninhibitable_rpcs; + +diff --git a/libports/create-internal.c b/libports/create-internal.c +index 8551297..2981e74 100644 +--- a/libports/create-internal.c ++++ b/libports/create-internal.c +@@ -81,15 +81,22 @@ _ports_create_port_internal (struct port_class *class, + goto loop; + } + ++ pthread_mutex_lock (&_ports_htable_lock); ++ err = hurd_ihash_add (&_ports_htable, port, pi); ++ if (err) ++ { ++ pthread_mutex_unlock (&_ports_htable_lock); ++ goto lose; ++ } + err = hurd_ihash_add (&bucket->htable, port, pi); + if (err) +- goto lose; ++ { ++ hurd_ihash_locp_remove (&_ports_htable, pi->ports_htable_entry); ++ pthread_mutex_unlock (&_ports_htable_lock); ++ goto lose; ++ } ++ pthread_mutex_unlock (&_ports_htable_lock); + +- pi->next = class->ports; +- pi->prevp = &class->ports; +- if (class->ports) +- class->ports->prevp = &pi->next; +- class->ports = pi; + bucket->count++; + class->count++; + pthread_mutex_unlock (&_ports_lock); +diff --git a/libports/destroy-right.c b/libports/destroy-right.c +index 65e19c7..aff7c02 100644 +--- a/libports/destroy-right.c ++++ b/libports/destroy-right.c +@@ -30,12 +30,13 @@ ports_destroy_right (void *portstruct) + + if (pi->port_right != MACH_PORT_NULL) + { +- pthread_mutex_lock (&_ports_lock); ++ pthread_mutex_lock (&_ports_htable_lock); ++ hurd_ihash_locp_remove (&_ports_htable, pi->ports_htable_entry); + hurd_ihash_locp_remove (&pi->bucket->htable, pi->hentry); ++ pthread_mutex_unlock (&_ports_htable_lock); + err = mach_port_mod_refs (mach_task_self (), pi->port_right, + MACH_PORT_RIGHT_RECEIVE, -1); + assert_perror (err); +- pthread_mutex_unlock (&_ports_lock); + + pi->port_right = MACH_PORT_NULL; + +diff --git a/libports/import-port.c b/libports/import-port.c +index 226f47e..91afaa7 100644 +--- a/libports/import-port.c ++++ b/libports/import-port.c +@@ -75,15 +75,22 @@ ports_import_port (struct port_class *class, struct port_bucket *bucket, + goto loop; + } + ++ pthread_mutex_lock (&_ports_htable_lock); ++ err = hurd_ihash_add (&_ports_htable, port, pi); ++ if (err) ++ { ++ pthread_mutex_unlock (&_ports_htable_lock); ++ goto lose; ++ } + err = hurd_ihash_add (&bucket->htable, port, pi); + if (err) +- goto lose; ++ { ++ hurd_ihash_locp_remove (&_ports_htable, pi->ports_htable_entry); ++ pthread_mutex_unlock (&_ports_htable_lock); ++ goto lose; ++ } ++ pthread_mutex_unlock (&_ports_htable_lock); + +- pi->next = class->ports; +- pi->prevp = &class->ports; +- if (class->ports) +- class->ports->prevp = &pi->next; +- class->ports = pi; + bucket->count++; + class->count++; + pthread_mutex_unlock (&_ports_lock); +diff --git a/libports/inhibit-all-rpcs.c b/libports/inhibit-all-rpcs.c +index d4a54ba..83c291f 100644 +--- a/libports/inhibit-all-rpcs.c ++++ b/libports/inhibit-all-rpcs.c +@@ -36,24 +36,23 @@ ports_inhibit_all_rpcs () + struct port_bucket *bucket; + int this_one = 0; + +- for (bucket = _ports_all_buckets; bucket; bucket = bucket->next) ++ pthread_mutex_lock (&_ports_htable_lock); ++ HURD_IHASH_ITERATE (&_ports_htable, portstruct) + { +- HURD_IHASH_ITERATE (&bucket->htable, portstruct) ++ struct rpc_info *rpc; ++ struct port_info *pi = portstruct; ++ ++ for (rpc = pi->current_rpcs; rpc; rpc = rpc->next) + { +- struct rpc_info *rpc; +- struct port_info *pi = portstruct; +- +- for (rpc = pi->current_rpcs; rpc; rpc = rpc->next) +- { +- /* Avoid cancelling the calling thread if it's currently +- handling a RPC. */ +- if (rpc->thread == hurd_thread_self ()) +- this_one = 1; +- else +- hurd_thread_cancel (rpc->thread); +- } ++ /* Avoid cancelling the calling thread if it's currently ++ handling a RPC. */ ++ if (rpc->thread == hurd_thread_self ()) ++ this_one = 1; ++ else ++ hurd_thread_cancel (rpc->thread); + } + } ++ pthread_mutex_unlock (&_ports_htable_lock); + + while (_ports_total_rpcs > this_one) + { +diff --git a/libports/inhibit-bucket-rpcs.c b/libports/inhibit-bucket-rpcs.c +index 965aa03..8c5a486 100644 +--- a/libports/inhibit-bucket-rpcs.c ++++ b/libports/inhibit-bucket-rpcs.c +@@ -35,6 +35,7 @@ ports_inhibit_bucket_rpcs (struct port_bucket *bucket) + { + int this_one = 0; + ++ pthread_mutex_lock (&_ports_htable_lock); + HURD_IHASH_ITERATE (&bucket->htable, portstruct) + { + struct rpc_info *rpc; +@@ -49,7 +50,7 @@ ports_inhibit_bucket_rpcs (struct port_bucket *bucket) + hurd_thread_cancel (rpc->thread); + } + } +- ++ pthread_mutex_unlock (&_ports_htable_lock); + + while (bucket->rpcs > this_one) + { +diff --git a/libports/inhibit-class-rpcs.c b/libports/inhibit-class-rpcs.c +index 7ee8653..1580bdb 100644 +--- a/libports/inhibit-class-rpcs.c ++++ b/libports/inhibit-class-rpcs.c +@@ -36,15 +36,24 @@ ports_inhibit_class_rpcs (struct port_class *class) + struct rpc_info *rpc; + int this_one = 0; + +- for (pi = class->ports; pi; pi = pi->next) +- for (rpc = pi->current_rpcs; rpc; rpc = rpc->next) +- { +- /* Avoid cancelling the calling thread. */ +- if (rpc->thread == hurd_thread_self ()) +- this_one = 1; +- else +- hurd_thread_cancel (rpc->thread); +- } ++ pthread_mutex_lock (&_ports_htable_lock); ++ HURD_IHASH_ITERATE (&_ports_htable, portstruct) ++ { ++ struct rpc_info *rpc; ++ struct port_info *pi = portstruct; ++ if (pi->class != class) ++ continue; ++ ++ for (rpc = pi->current_rpcs; rpc; rpc = rpc->next) ++ { ++ /* Avoid cancelling the calling thread. */ ++ if (rpc->thread == hurd_thread_self ()) ++ this_one = 1; ++ else ++ hurd_thread_cancel (rpc->thread); ++ } ++ } ++ pthread_mutex_unlock (&_ports_htable_lock); + + while (class->rpcs > this_one) + { +diff --git a/libports/init.c b/libports/init.c +index 3ef5388..d3aa90c 100644 +--- a/libports/init.c ++++ b/libports/init.c +@@ -19,9 +19,14 @@ + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + + #include "ports.h" ++#include <stddef.h> + + pthread_mutex_t _ports_lock = PTHREAD_MUTEX_INITIALIZER; + pthread_cond_t _ports_block = PTHREAD_COND_INITIALIZER; +-struct port_bucket *_ports_all_buckets; ++ ++struct hurd_ihash _ports_htable = ++ HURD_IHASH_INITIALIZER (offsetof (struct port_info, ports_htable_entry)); ++pthread_mutex_t _ports_htable_lock = PTHREAD_MUTEX_INITIALIZER; ++ + int _ports_total_rpcs; + int _ports_flags; +diff --git a/libports/lookup-port.c b/libports/lookup-port.c +index f79f6f0..fbb13ef 100644 +--- a/libports/lookup-port.c ++++ b/libports/lookup-port.c +@@ -26,26 +26,21 @@ ports_lookup_port (struct port_bucket *bucket, + mach_port_t port, + struct port_class *class) + { +- struct port_info *pi = 0; +- ++ struct port_info *pi; ++ + pthread_mutex_lock (&_ports_lock); ++ pthread_mutex_lock (&_ports_htable_lock); + +- if (bucket) +- pi = hurd_ihash_find (&bucket->htable, port); +- else +- for (bucket = _ports_all_buckets; bucket; bucket = bucket->next) +- { +- pi = hurd_ihash_find (&bucket->htable, port); +- if (pi) +- break; +- } +- +- if (pi && class && pi->class != class) ++ pi = hurd_ihash_find (&_ports_htable, port); ++ if (pi ++ && ((class && pi->class != class) ++ || (bucket && pi->bucket != bucket))) + pi = 0; + + if (pi) + pi->refcnt++; + ++ pthread_mutex_unlock (&_ports_htable_lock); + pthread_mutex_unlock (&_ports_lock); + + return pi; +diff --git a/libports/ports.h b/libports/ports.h +index 7f13124..c29d78a 100644 +--- a/libports/ports.h ++++ b/libports/ports.h +@@ -48,7 +48,7 @@ struct port_info + struct rpc_info *current_rpcs; + struct port_bucket *bucket; + hurd_ihash_locp_t hentry; +- struct port_info *next, **prevp; /* links on port_class list */ ++ hurd_ihash_locp_t ports_htable_entry; + }; + typedef struct port_info *port_info_t; + +@@ -65,7 +65,6 @@ struct port_bucket + int rpcs; + int flags; + int count; +- struct port_bucket *next; + }; + /* FLAGS above are the following: */ + #define PORT_BUCKET_INHIBITED PORTS_INHIBITED +@@ -78,7 +77,6 @@ struct port_class + { + int flags; + int rpcs; +- struct port_info *ports; + int count; + void (*clean_routine) (void *); + void (*dropweak_routine) (void *); +@@ -277,7 +275,7 @@ error_t ports_class_iterate (struct port_class *class, + error_t (*fun)(void *port)); + + /* Internal entrypoint for above two. */ +-error_t _ports_bucket_class_iterate (struct port_bucket *bucket, ++error_t _ports_bucket_class_iterate (struct hurd_ihash *ht, + struct port_class *class, + error_t (*fun)(void *port)); + +@@ -402,7 +400,12 @@ extern kern_return_t + /* Private data */ + extern pthread_mutex_t _ports_lock; + extern pthread_cond_t _ports_block; +-extern struct port_bucket *_ports_all_buckets; ++ ++/* A hash table mapping port names to port_info objects. */ ++extern struct hurd_ihash _ports_htable; ++/* Access to the hash table is protected by this lock. */ ++extern pthread_mutex_t _ports_htable_lock; ++ + extern int _ports_total_rpcs; + extern int _ports_flags; + #define _PORTS_INHIBITED PORTS_INHIBITED +diff --git a/libports/reallocate-from-external.c b/libports/reallocate-from-external.c +index 8cccb2a..678b5d9 100644 +--- a/libports/reallocate-from-external.c ++++ b/libports/reallocate-from-external.c +@@ -43,8 +43,11 @@ ports_reallocate_from_external (void *portstruct, mach_port_t receive) + MACH_PORT_RIGHT_RECEIVE, -1); + assert_perror (err); + ++ pthread_mutex_lock (&_ports_htable_lock); ++ hurd_ihash_locp_remove (&_ports_htable, pi->ports_htable_entry); + hurd_ihash_locp_remove (&pi->bucket->htable, pi->hentry); +- ++ pthread_mutex_unlock (&_ports_htable_lock); ++ + if ((pi->flags & PORT_HAS_SENDRIGHTS) && !stat.mps_srights) + { + dropref = 1; +@@ -59,11 +62,15 @@ ports_reallocate_from_external (void *portstruct, mach_port_t receive) + pi->port_right = receive; + pi->cancel_threshold = 0; + pi->mscount = stat.mps_mscount; +- +- err = hurd_ihash_add (&pi->bucket->htable, receive, pi); ++ ++ pthread_mutex_lock (&_ports_htable_lock); ++ err = hurd_ihash_add (&_ports_htable, receive, pi); + assert_perror (err); ++ err = hurd_ihash_add (&pi->bucket->htable, receive, pi); ++ pthread_mutex_unlock (&_ports_htable_lock); + pthread_mutex_unlock (&_ports_lock); +- ++ assert_perror (err); ++ + mach_port_move_member (mach_task_self (), receive, pi->bucket->portset); + + if (stat.mps_srights) +diff --git a/libports/reallocate-port.c b/libports/reallocate-port.c +index d2adaeb..4a719e4 100644 +--- a/libports/reallocate-port.c ++++ b/libports/reallocate-port.c +@@ -36,7 +36,10 @@ ports_reallocate_port (void *portstruct) + MACH_PORT_RIGHT_RECEIVE, -1); + assert_perror (err); + ++ pthread_mutex_lock (&_ports_htable_lock); ++ hurd_ihash_locp_remove (&_ports_htable, pi->ports_htable_entry); + hurd_ihash_locp_remove (&pi->bucket->htable, pi->hentry); ++ pthread_mutex_unlock (&_ports_htable_lock); + + err = mach_port_allocate (mach_task_self (), MACH_PORT_RIGHT_RECEIVE, + &pi->port_right); +@@ -48,9 +51,13 @@ ports_reallocate_port (void *portstruct) + } + pi->cancel_threshold = 0; + pi->mscount = 0; +- err = hurd_ihash_add (&pi->bucket->htable, pi->port_right, pi); ++ pthread_mutex_lock (&_ports_htable_lock); ++ err = hurd_ihash_add (&_ports_htable, pi->port_right, pi); + assert_perror (err); ++ err = hurd_ihash_add (&pi->bucket->htable, pi->port_right, pi); ++ pthread_mutex_unlock (&_ports_htable_lock); + pthread_mutex_unlock (&_ports_lock); ++ assert_perror (err); + + err = mach_port_move_member (mach_task_self (), pi->port_right, + pi->bucket->portset); +diff --git a/libports/transfer-right.c b/libports/transfer-right.c +index 72488a9..3f23d93 100644 +--- a/libports/transfer-right.c ++++ b/libports/transfer-right.c +@@ -41,7 +41,10 @@ ports_transfer_right (void *tostruct, + port = frompi->port_right; + if (port != MACH_PORT_NULL) + { ++ pthread_mutex_lock (&_ports_htable_lock); ++ hurd_ihash_locp_remove (&_ports_htable, frompi->ports_htable_entry); + hurd_ihash_locp_remove (&frompi->bucket->htable, frompi->hentry); ++ pthread_mutex_unlock (&_ports_htable_lock); + frompi->port_right = MACH_PORT_NULL; + if (frompi->flags & PORT_HAS_SENDRIGHTS) + { +@@ -54,7 +57,10 @@ ports_transfer_right (void *tostruct, + /* Destroy the existing right in TOPI. */ + if (topi->port_right != MACH_PORT_NULL) + { ++ pthread_mutex_lock (&_ports_htable_lock); ++ hurd_ihash_locp_remove (&_ports_htable, topi->ports_htable_entry); + hurd_ihash_locp_remove (&topi->bucket->htable, topi->hentry); ++ pthread_mutex_unlock (&_ports_htable_lock); + err = mach_port_mod_refs (mach_task_self (), topi->port_right, + MACH_PORT_RIGHT_RECEIVE, -1); + assert_perror (err); +@@ -74,10 +80,16 @@ ports_transfer_right (void *tostruct, + topi->port_right = port; + topi->cancel_threshold = frompi->cancel_threshold; + topi->mscount = frompi->mscount; +- ++ ++ pthread_mutex_unlock (&_ports_lock); ++ + if (port) + { ++ pthread_mutex_lock (&_ports_htable_lock); ++ err = hurd_ihash_add (&_ports_htable, port, topi); ++ assert_perror (err); + err = hurd_ihash_add (&topi->bucket->htable, port, topi); ++ pthread_mutex_unlock (&_ports_htable_lock); + assert_perror (err); + if (topi->bucket != frompi->bucket) + { +@@ -86,9 +98,7 @@ ports_transfer_right (void *tostruct, + assert_perror (err); + } + } +- +- pthread_mutex_unlock (&_ports_lock); +- ++ + /* Take care of any lowered reference counts. */ + if (dereffrompi) + ports_port_deref (frompi); +-- +2.0.0.rc2 + diff --git a/debian/patches/0015-libports-lock-less-reference-counting-for-port_info-.patch b/debian/patches/0015-libports-lock-less-reference-counting-for-port_info-.patch new file mode 100644 index 00000000..de7fcbd2 --- /dev/null +++ b/debian/patches/0015-libports-lock-less-reference-counting-for-port_info-.patch @@ -0,0 +1,345 @@ +From 84aefd4c5ccdbc0330963b6a0c099696997bd84d Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Sat, 3 May 2014 01:02:35 +0200 +Subject: [PATCH 15/27] libports: lock-less reference counting for port_info + objects + +* libports/ports.h (struct port_info): Use the new type. +* libports/lookup-port.c: No need to lock _ports_lock anymore. +* libports/bucket-iterate.c: Likewise. +* libports/complete-deallocate.c: Check if someone reacquired a +reference through a hash table lookup. +* libports/create-internal.c: Use the new reference counting primitives. +* libports/get-right.c: Likewise. +* libports/import-port.c: Likewise. +* libports/port-deref-weak.c: Likewise. +* libports/port-deref.c: Likewise. +* libports/port-ref-weak.c: Likewise. +* libports/port-ref.c: Likewise. +* libports/reallocate-from-external.c: Likewise. +* libports/transfer-right.c: Likewise. +* utils/rpctrace.c: Likewise. +--- + libports/bucket-iterate.c | 4 +--- + libports/complete-deallocate.c | 14 ++++++++++++++ + libports/create-internal.c | 3 +-- + libports/get-right.c | 2 +- + libports/import-port.c | 3 +-- + libports/lookup-port.c | 6 ++---- + libports/port-deref-weak.c | 10 +++------- + libports/port-deref.c | 34 ++++++++++++++++------------------ + libports/port-ref-weak.c | 8 +++----- + libports/port-ref.c | 8 +++----- + libports/ports.h | 4 ++-- + libports/reallocate-from-external.c | 2 +- + libports/transfer-right.c | 2 +- + utils/rpctrace.c | 10 ++++++++-- + 14 files changed, 57 insertions(+), 53 deletions(-) + +diff --git a/libports/bucket-iterate.c b/libports/bucket-iterate.c +index 06d8f7d..8a28827 100644 +--- a/libports/bucket-iterate.c ++++ b/libports/bucket-iterate.c +@@ -35,7 +35,6 @@ _ports_bucket_class_iterate (struct hurd_ihash *ht, + size_t i, n, nr_items; + error_t err; + +- pthread_mutex_lock (&_ports_lock); + pthread_mutex_lock (&_ports_htable_lock); + + if (ht->nr_items == 0) +@@ -59,13 +58,12 @@ _ports_bucket_class_iterate (struct hurd_ihash *ht, + + if (class == 0 || pi->class == class) + { +- pi->refcnt++; ++ refcounts_ref (&pi->refcounts, NULL); + p[n] = pi; + n++; + } + } + pthread_mutex_unlock (&_ports_htable_lock); +- pthread_mutex_unlock (&_ports_lock); + + if (n == 0) + { +diff --git a/libports/complete-deallocate.c b/libports/complete-deallocate.c +index 50548ec..a239957 100644 +--- a/libports/complete-deallocate.c ++++ b/libports/complete-deallocate.c +@@ -29,15 +29,29 @@ _ports_complete_deallocate (struct port_info *pi) + + if (pi->port_right) + { ++ struct references result; ++ + pthread_mutex_lock (&_ports_htable_lock); ++ refcounts_references (&pi->refcounts, &result); ++ if (result.hard > 0 || result.weak > 0) ++ { ++ /* A reference was reacquired through a hash table lookup. ++ It's fine, we didn't touch anything yet. */ ++ pthread_mutex_unlock (&_ports_htable_lock); ++ return; ++ } ++ + hurd_ihash_locp_remove (&_ports_htable, pi->ports_htable_entry); + hurd_ihash_locp_remove (&pi->bucket->htable, pi->hentry); + pthread_mutex_unlock (&_ports_htable_lock); ++ + mach_port_mod_refs (mach_task_self (), pi->port_right, + MACH_PORT_RIGHT_RECEIVE, -1); + pi->port_right = MACH_PORT_NULL; + } + ++ pthread_mutex_lock (&_ports_lock); ++ + pi->bucket->count--; + pi->class->count--; + +diff --git a/libports/create-internal.c b/libports/create-internal.c +index 2981e74..4f1e512 100644 +--- a/libports/create-internal.c ++++ b/libports/create-internal.c +@@ -54,8 +54,7 @@ _ports_create_port_internal (struct port_class *class, + } + + pi->class = class; +- pi->refcnt = 1; +- pi->weakrefcnt = 0; ++ refcounts_init (&pi->refcounts, 1, 0); + pi->cancel_threshold = 0; + pi->mscount = 0; + pi->flags = 0; +diff --git a/libports/get-right.c b/libports/get-right.c +index 89050c6..8681f46 100644 +--- a/libports/get-right.c ++++ b/libports/get-right.c +@@ -41,7 +41,7 @@ ports_get_right (void *port) + if ((pi->flags & PORT_HAS_SENDRIGHTS) == 0) + { + pi->flags |= PORT_HAS_SENDRIGHTS; +- pi->refcnt++; ++ refcounts_ref (&pi->refcounts, NULL); + err = mach_port_request_notification (mach_task_self (), + pi->port_right, + MACH_NOTIFY_NO_SENDERS, +diff --git a/libports/import-port.c b/libports/import-port.c +index 91afaa7..8601d0e 100644 +--- a/libports/import-port.c ++++ b/libports/import-port.c +@@ -48,8 +48,7 @@ ports_import_port (struct port_class *class, struct port_bucket *bucket, + return ENOMEM; + + pi->class = class; +- pi->refcnt = 1 + !!stat.mps_srights; +- pi->weakrefcnt = 0; ++ refcounts_init (&pi->refcounts, 1 + !!stat.mps_srights, 0); + pi->cancel_threshold = 0; + pi->mscount = stat.mps_mscount; + pi->flags = stat.mps_srights ? PORT_HAS_SENDRIGHTS : 0; +diff --git a/libports/lookup-port.c b/libports/lookup-port.c +index fbb13ef..1bf012f 100644 +--- a/libports/lookup-port.c ++++ b/libports/lookup-port.c +@@ -28,7 +28,6 @@ ports_lookup_port (struct port_bucket *bucket, + { + struct port_info *pi; + +- pthread_mutex_lock (&_ports_lock); + pthread_mutex_lock (&_ports_htable_lock); + + pi = hurd_ihash_find (&_ports_htable, port); +@@ -38,10 +37,9 @@ ports_lookup_port (struct port_bucket *bucket, + pi = 0; + + if (pi) +- pi->refcnt++; ++ ports_port_ref (pi); + + pthread_mutex_unlock (&_ports_htable_lock); +- pthread_mutex_unlock (&_ports_lock); +- ++ + return pi; + } +diff --git a/libports/port-deref-weak.c b/libports/port-deref-weak.c +index beb4842..8432660 100644 +--- a/libports/port-deref-weak.c ++++ b/libports/port-deref-weak.c +@@ -25,12 +25,8 @@ void + ports_port_deref_weak (void *portstruct) + { + struct port_info *pi = portstruct; +- +- pthread_mutex_lock (&_ports_lock); +- assert (pi->weakrefcnt); +- pi->weakrefcnt--; +- if (pi->refcnt == 0 && pi->weakrefcnt == 0) ++ struct references result; ++ refcounts_deref_weak (&pi->refcounts, &result); ++ if (result.hard == 0 && result.weak == 0) + _ports_complete_deallocate (pi); +- else +- pthread_mutex_unlock (&_ports_lock); + } +diff --git a/libports/port-deref.c b/libports/port-deref.c +index cf9b238..b97dd13 100644 +--- a/libports/port-deref.c ++++ b/libports/port-deref.c +@@ -25,26 +25,24 @@ void + ports_port_deref (void *portstruct) + { + struct port_info *pi = portstruct; +- int trieddroppingweakrefs = 0; +- +- retry: +- +- pthread_mutex_lock (&_ports_lock); +- +- if (pi->refcnt == 1 && pi->weakrefcnt +- && pi->class->dropweak_routine && !trieddroppingweakrefs) ++ struct references result; ++ ++ if (pi->class->dropweak_routine) + { +- pthread_mutex_unlock (&_ports_lock); +- (*pi->class->dropweak_routine) (pi); +- trieddroppingweakrefs = 1; +- goto retry; ++ /* If we need to call the dropweak routine, we need to hold one ++ reference while doing so. We use a weak reference for this ++ purpose, which we acquire by demoting our hard reference to a ++ weak one. */ ++ refcounts_demote (&pi->refcounts, &result); ++ ++ if (result.hard == 0 && result.weak > 1) ++ (*pi->class->dropweak_routine) (pi); ++ ++ refcounts_deref_weak (&pi->refcounts, &result); + } +- +- assert (pi->refcnt); ++ else ++ refcounts_deref (&pi->refcounts, &result); + +- pi->refcnt--; +- if (pi->refcnt == 0 && pi->weakrefcnt == 0) ++ if (result.hard == 0 && result.weak == 0) + _ports_complete_deallocate (pi); +- else +- pthread_mutex_unlock (&_ports_lock); + } +diff --git a/libports/port-ref-weak.c b/libports/port-ref-weak.c +index c7d3c69..e4b7fc8 100644 +--- a/libports/port-ref-weak.c ++++ b/libports/port-ref-weak.c +@@ -25,9 +25,7 @@ void + ports_port_ref_weak (void *portstruct) + { + struct port_info *pi = portstruct; +- +- pthread_mutex_lock (&_ports_lock); +- assert (pi->refcnt || pi->weakrefcnt); +- pi->weakrefcnt++; +- pthread_mutex_unlock (&_ports_lock); ++ struct references result; ++ refcounts_ref_weak (&pi->refcounts, &result); ++ assert (result.hard > 0 || result.weak > 1); + } +diff --git a/libports/port-ref.c b/libports/port-ref.c +index 92b7118..761c50f 100644 +--- a/libports/port-ref.c ++++ b/libports/port-ref.c +@@ -25,9 +25,7 @@ void + ports_port_ref (void *portstruct) + { + struct port_info *pi = portstruct; +- +- pthread_mutex_lock (&_ports_lock); +- assert (pi->refcnt || pi->weakrefcnt); +- pi->refcnt++; +- pthread_mutex_unlock (&_ports_lock); ++ struct references result; ++ refcounts_ref (&pi->refcounts, &result); ++ assert (result.hard > 1 || result.weak > 0); + } +diff --git a/libports/ports.h b/libports/ports.h +index c29d78a..3439443 100644 +--- a/libports/ports.h ++++ b/libports/ports.h +@@ -27,6 +27,7 @@ + #include <hurd/ihash.h> + #include <mach/notify.h> + #include <pthread.h> ++#include <refcount.h> + + /* These are global values for common flags used in the various structures. + Not all of these are meaningful in all flag fields. */ +@@ -39,8 +40,7 @@ + struct port_info + { + struct port_class *class; +- int refcnt; +- int weakrefcnt; ++ refcounts_t refcounts; + mach_port_mscount_t mscount; + mach_msg_seqno_t cancel_threshold; + int flags; +diff --git a/libports/reallocate-from-external.c b/libports/reallocate-from-external.c +index 678b5d9..a912725 100644 +--- a/libports/reallocate-from-external.c ++++ b/libports/reallocate-from-external.c +@@ -56,7 +56,7 @@ ports_reallocate_from_external (void *portstruct, mach_port_t receive) + else if (((pi->flags & PORT_HAS_SENDRIGHTS) == 0) && stat.mps_srights) + { + pi->flags |= PORT_HAS_SENDRIGHTS; +- pi->refcnt++; ++ refcounts_ref (&pi->refcounts, NULL); + } + + pi->port_right = receive; +diff --git a/libports/transfer-right.c b/libports/transfer-right.c +index 3f23d93..0e898ba 100644 +--- a/libports/transfer-right.c ++++ b/libports/transfer-right.c +@@ -72,7 +72,7 @@ ports_transfer_right (void *tostruct, + else if (((topi->flags & PORT_HAS_SENDRIGHTS) == 0) && hassendrights) + { + topi->flags |= PORT_HAS_SENDRIGHTS; +- topi->refcnt++; ++ refcounts_ref (&topi->refcounts, NULL); + } + } + +diff --git a/utils/rpctrace.c b/utils/rpctrace.c +index fc913e3..b11fea4 100644 +--- a/utils/rpctrace.c ++++ b/utils/rpctrace.c +@@ -431,7 +431,9 @@ destroy_receiver_info (struct receiver_info *info) + while (send_wrapper) + { + struct sender_info *next = send_wrapper->next; +- assert (TRACED_INFO (send_wrapper)->pi.refcnt == 1); ++ assert ( ++ refcounts_hard_references (&TRACED_INFO (send_wrapper)->pi.refcounts) ++ == 1); + /* Reset the receive_right of the send wrapper in advance to avoid + * destroy_receiver_info is called when the port info is destroyed. */ + send_wrapper->receive_right = NULL; +@@ -848,7 +850,11 @@ rewrite_right (mach_port_t *right, mach_msg_type_name_t *type, + hurd_ihash_locp_remove (&traced_names, receiver_info->locp); + + send_wrapper2 = get_send_wrapper (receiver_info, dest, &rr); +- assert (TRACED_INFO (send_wrapper2)->pi.refcnt == 1); ++ assert ( ++ refcounts_hard_references ( ++ &TRACED_INFO (send_wrapper2)->pi.refcounts) ++ == 1); ++ + name = TRACED_INFO (send_wrapper2)->name; + TRACED_INFO (send_wrapper2)->name = NULL; + /* send_wrapper2 isn't destroyed normally, so we need to unlink +-- +2.0.0.rc2 + diff --git a/debian/patches/0016-ext2fs-use-a-seperate-lock-to-protect-nodehash.patch b/debian/patches/0016-ext2fs-use-a-seperate-lock-to-protect-nodehash.patch new file mode 100644 index 00000000..e35ebf4b --- /dev/null +++ b/debian/patches/0016-ext2fs-use-a-seperate-lock-to-protect-nodehash.patch @@ -0,0 +1,190 @@ +From 0e33b5953e70f4cae1ec443c2825ce649e379d05 Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Tue, 13 May 2014 13:09:15 +0200 +Subject: [PATCH 16/27] ext2fs: use a seperate lock to protect nodehash + +Previously, ext2fs used diskfs_node_refcnt_lock to serialize access to +the nodehash. + +Use a separate lock to protect nodehash. Adjust the reference +counting accordingly. Every node in the nodehash carries a light +reference. When we are asked to give up that light reference, we +reacquire our lock momentarily to check whether someone else +reacquired a reference through the nodehash. + +* ext2fs/inode.c (nodehash_lock): New lock. +(diskfs_cached_lookup): Use a separate lock to protect nodehash. +Adjust the reference counting accordingly. +(ifind): Likewise. +(diskfs_node_iterate): Likewise. +(diskfs_node_norefs): Move the code removing the node from nodehash... +(diskfs_try_dropping_softrefs): ... here, where we check whether +someone reacquired a reference, and if so hold on to our light +reference. +--- + ext2fs/inode.c | 68 ++++++++++++++++++++++++++++++++++++++++++---------------- + 1 file changed, 50 insertions(+), 18 deletions(-) + +diff --git a/ext2fs/inode.c b/ext2fs/inode.c +index ed78265..aa070a2 100644 +--- a/ext2fs/inode.c ++++ b/ext2fs/inode.c +@@ -46,8 +46,18 @@ + #define INOHASH(ino) (((unsigned)(ino))%INOHSZ) + #endif + ++/* The nodehash is a cache of nodes. ++ ++ Access to nodehash and nodehash_nr_items is protected by ++ nodehash_lock. ++ ++ Every node in the nodehash carries a light reference. When we are ++ asked to give up that light reference, we reacquire our lock ++ momentarily to check whether someone else reacquired a reference ++ through the nodehash. */ + static struct node *nodehash[INOHSZ]; + static size_t nodehash_nr_items; ++static pthread_mutex_t nodehash_lock = PTHREAD_MUTEX_INITIALIZER; + + static error_t read_node (struct node *np); + +@@ -71,12 +81,12 @@ diskfs_cached_lookup (ino_t inum, struct node **npp) + struct node *np; + struct disknode *dn; + +- pthread_spin_lock (&diskfs_node_refcnt_lock); ++ pthread_mutex_lock (&nodehash_lock); + for (np = nodehash[INOHASH(inum)]; np; np = np->dn->hnext) + if (np->cache_id == inum) + { +- np->references++; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ diskfs_nref (np); ++ pthread_mutex_unlock (&nodehash_lock); + pthread_mutex_lock (&np->lock); + *npp = np; + return 0; +@@ -86,7 +96,7 @@ diskfs_cached_lookup (ino_t inum, struct node **npp) + dn = malloc (sizeof (struct disknode)); + if (! dn) + { +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ pthread_mutex_unlock (&nodehash_lock); + return ENOMEM; + } + dn->dirents = 0; +@@ -107,9 +117,10 @@ diskfs_cached_lookup (ino_t inum, struct node **npp) + dn->hnext->dn->hprevp = &dn->hnext; + dn->hprevp = &nodehash[INOHASH(inum)]; + nodehash[INOHASH(inum)] = np; ++ diskfs_nref_light (np); + nodehash_nr_items += 1; + +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ pthread_mutex_unlock (&nodehash_lock); + + /* Get the contents of NP off disk. */ + err = read_node (np); +@@ -140,14 +151,13 @@ ifind (ino_t inum) + { + struct node *np; + +- pthread_spin_lock (&diskfs_node_refcnt_lock); ++ pthread_mutex_lock (&nodehash_lock); + for (np = nodehash[INOHASH(inum)]; np; np = np->dn->hnext) + { + if (np->cache_id != inum) + continue; + +- assert (np->references); +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ pthread_mutex_unlock (&nodehash_lock); + return np; + } + assert (0); +@@ -158,11 +168,6 @@ ifind (ino_t inum) + void + diskfs_node_norefs (struct node *np) + { +- *np->dn->hprevp = np->dn->hnext; +- if (np->dn->hnext) +- np->dn->hnext->dn->hprevp = np->dn->hprevp; +- nodehash_nr_items -= 1; +- + if (np->dn->dirents) + free (np->dn->dirents); + assert (!np->dn->pager); +@@ -180,6 +185,33 @@ diskfs_node_norefs (struct node *np) + void + diskfs_try_dropping_softrefs (struct node *np) + { ++ pthread_mutex_lock (&nodehash_lock); ++ if (np->dn->hnext != NULL) ++ { ++ /* Check if someone reacquired a reference through the ++ nodehash. */ ++ unsigned int references; ++ pthread_spin_lock (&diskfs_node_refcnt_lock); ++ references = np->references; ++ pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ ++ if (references > 0) ++ { ++ /* A reference was reacquired through a hash table lookup. ++ It's fine, we didn't touch anything yet. */ ++ pthread_mutex_unlock (&nodehash_lock); ++ return; ++ } ++ ++ *np->dn->hprevp = np->dn->hnext; ++ if (np->dn->hnext) ++ np->dn->hnext->dn->hprevp = np->dn->hprevp; ++ np->dn->hnext = NULL; ++ nodehash_nr_items -= 1; ++ diskfs_nrele_light (np); ++ } ++ pthread_mutex_unlock (&nodehash_lock); ++ + drop_pager_softrefs (np); + } + +@@ -556,12 +588,12 @@ diskfs_node_iterate (error_t (*fun)(struct node *)) + size_t num_nodes; + struct node *node, **node_list, **p; + +- pthread_spin_lock (&diskfs_node_refcnt_lock); ++ pthread_mutex_lock (&nodehash_lock); + + /* We must copy everything from the hash table into another data structure + to avoid running into any problems with the hash-table being modified + during processing (normally we delegate access to hash-table with +- diskfs_node_refcnt_lock, but we can't hold this while locking the ++ nodehash_lock, but we can't hold this while locking the + individual node locks). */ + num_nodes = nodehash_nr_items; + +@@ -570,7 +602,7 @@ diskfs_node_iterate (error_t (*fun)(struct node *)) + node_list = malloc (num_nodes * sizeof (struct node *)); + if (node_list == NULL) + { +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ pthread_mutex_unlock (&nodehash_lock); + ext2_debug ("unable to allocate temporary node table"); + return ENOMEM; + } +@@ -580,10 +612,10 @@ diskfs_node_iterate (error_t (*fun)(struct node *)) + for (node = nodehash[n]; node; node = node->dn->hnext) + { + *p++ = node; +- node->references++; ++ diskfs_nref (node); + } + +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ pthread_mutex_unlock (&nodehash_lock); + + p = node_list; + while (num_nodes-- > 0) +-- +2.0.0.rc2 + diff --git a/debian/patches/0017-fatfs-use-a-seperate-lock-to-protect-nodehash.patch b/debian/patches/0017-fatfs-use-a-seperate-lock-to-protect-nodehash.patch new file mode 100644 index 00000000..339b2613 --- /dev/null +++ b/debian/patches/0017-fatfs-use-a-seperate-lock-to-protect-nodehash.patch @@ -0,0 +1,222 @@ +From 68ad61182dd79be1e67cf191bab28c1e6af610aa Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Tue, 13 May 2014 15:14:53 +0200 +Subject: [PATCH 17/27] fatfs: use a seperate lock to protect nodehash + +Previously, fatfs used diskfs_node_refcnt_lock to serialize access to +the nodehash. + +Use a separate lock to protect nodehash. Adjust the reference +counting accordingly. Every node in the nodehash carries a light +reference. When we are asked to give up that light reference, we +reacquire our lock momentarily to check whether someone else +reacquired a reference through the nodehash. + +* fatfs/inode.c (nodehash_lock): New lock. +(diskfs_cached_lookup): Use a separate lock to protect nodehash. +Adjust the reference counting accordingly. +(ifind): Likewise. +(diskfs_node_iterate): Likewise. +(diskfs_node_norefs): Move the code removing the node from nodehash... +(diskfs_try_dropping_softrefs): ... here, where we check whether +someone reacquired a reference, and if so hold on to our light +reference. +--- + fatfs/inode.c | 81 +++++++++++++++++++++++++++++++++++++++++------------------ + 1 file changed, 57 insertions(+), 24 deletions(-) + +diff --git a/fatfs/inode.c b/fatfs/inode.c +index ed6f3f0..8b3385b 100644 +--- a/fatfs/inode.c ++++ b/fatfs/inode.c +@@ -44,8 +44,18 @@ + #define INOHASH(ino) (((unsigned)(ino))%INOHSZ) + #endif + ++/* The nodehash is a cache of nodes. ++ ++ Access to nodehash and nodehash_nr_items is protected by ++ nodehash_lock. ++ ++ Every node in the nodehash carries a light reference. When we are ++ asked to give up that light reference, we reacquire our lock ++ momentarily to check whether someone else reacquired a reference ++ through the nodehash. */ + static struct node *nodehash[INOHSZ]; + static size_t nodehash_nr_items; ++static pthread_mutex_t nodehash_lock = PTHREAD_MUTEX_INITIALIZER; + + static error_t read_node (struct node *np, vm_address_t buf); + +@@ -67,12 +77,12 @@ diskfs_cached_lookup (ino64_t inum, struct node **npp) + struct node *np; + struct disknode *dn; + +- pthread_spin_lock (&diskfs_node_refcnt_lock); ++ pthread_mutex_lock (&nodehash_lock); + for (np = nodehash[INOHASH(inum)]; np; np = np->dn->hnext) + if (np->cache_id == inum) + { +- np->references++; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ diskfs_nref (np); ++ pthread_mutex_unlock (&nodehash_lock); + pthread_mutex_lock (&np->lock); + *npp = np; + return 0; +@@ -82,7 +92,7 @@ diskfs_cached_lookup (ino64_t inum, struct node **npp) + dn = malloc (sizeof (struct disknode)); + if (! dn) + { +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ pthread_mutex_unlock (&nodehash_lock); + return ENOMEM; + } + dn->pager = 0; +@@ -107,10 +117,11 @@ diskfs_cached_lookup (ino64_t inum, struct node **npp) + dn->hnext->dn->hprevp = &dn->hnext; + dn->hprevp = &nodehash[INOHASH(inum)]; + nodehash[INOHASH(inum)] = np; ++ diskfs_nref_light (np); + nodehash_nr_items += 1; + +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- ++ pthread_mutex_unlock (&nodehash_lock); ++ + /* Get the contents of NP off disk. */ + err = read_node (np, 0); + +@@ -133,12 +144,12 @@ diskfs_cached_lookup_in_dirbuf (int inum, struct node **npp, vm_address_t buf) + struct node *np; + struct disknode *dn; + +- pthread_spin_lock (&diskfs_node_refcnt_lock); ++ pthread_mutex_lock (&nodehash_lock); + for (np = nodehash[INOHASH(inum)]; np; np = np->dn->hnext) + if (np->cache_id == inum) + { +- np->references++; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ diskfs_nref (np); ++ pthread_mutex_unlock (&nodehash_lock); + pthread_mutex_lock (&np->lock); + *npp = np; + return 0; +@@ -148,7 +159,7 @@ diskfs_cached_lookup_in_dirbuf (int inum, struct node **npp, vm_address_t buf) + dn = malloc (sizeof (struct disknode)); + if (! dn) + { +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ pthread_mutex_unlock (&nodehash_lock); + return ENOMEM; + } + dn->pager = 0; +@@ -173,10 +184,11 @@ diskfs_cached_lookup_in_dirbuf (int inum, struct node **npp, vm_address_t buf) + dn->hnext->dn->hprevp = &dn->hnext; + dn->hprevp = &nodehash[INOHASH(inum)]; + nodehash[INOHASH(inum)] = np; ++ diskfs_nref_light (np); + nodehash_nr_items += 1; + +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- ++ pthread_mutex_unlock (&nodehash_lock); ++ + /* Get the contents of NP off disk. */ + err = read_node (np, buf); + +@@ -196,14 +208,13 @@ ifind (ino_t inum) + { + struct node *np; + +- pthread_spin_lock (&diskfs_node_refcnt_lock); ++ pthread_mutex_lock (&nodehash_lock); + for (np = nodehash[INOHASH(inum)]; np; np = np->dn->hnext) + { + if (np->cache_id != inum) + continue; + +- assert (np->references); +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ pthread_mutex_unlock (&nodehash_lock); + return np; + } + assert (0); +@@ -216,11 +227,6 @@ diskfs_node_norefs (struct node *np) + { + struct cluster_chain *last = np->dn->first; + +- *np->dn->hprevp = np->dn->hnext; +- if (np->dn->hnext) +- np->dn->hnext->dn->hprevp = np->dn->hprevp; +- nodehash_nr_items -= 1; +- + while (last) + { + struct cluster_chain *next = last->next; +@@ -251,6 +257,33 @@ diskfs_node_norefs (struct node *np) + void + diskfs_try_dropping_softrefs (struct node *np) + { ++ pthread_mutex_lock (&nodehash_lock); ++ if (np->dn->hnext != NULL) ++ { ++ /* Check if someone reacquired a reference through the ++ nodehash. */ ++ unsigned int references; ++ pthread_spin_lock (&diskfs_node_refcnt_lock); ++ references = np->references; ++ pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ ++ if (references > 0) ++ { ++ /* A reference was reacquired through a hash table lookup. ++ It's fine, we didn't touch anything yet. */ ++ pthread_mutex_unlock (&nodehash_lock); ++ return; ++ } ++ ++ *np->dn->hprevp = np->dn->hnext; ++ if (np->dn->hnext) ++ np->dn->hnext->dn->hprevp = np->dn->hprevp; ++ np->dn->hnext = NULL; ++ nodehash_nr_items -= 1; ++ diskfs_nrele_light (np); ++ } ++ pthread_mutex_unlock (&nodehash_lock); ++ + drop_pager_softrefs (np); + } + +@@ -554,12 +587,12 @@ diskfs_node_iterate (error_t (*fun)(struct node *)) + size_t num_nodes; + struct node *node, **node_list, **p; + +- pthread_spin_lock (&diskfs_node_refcnt_lock); ++ pthread_mutex_lock (&nodehash_lock); + + /* We must copy everything from the hash table into another data structure + to avoid running into any problems with the hash-table being modified + during processing (normally we delegate access to hash-table with +- diskfs_node_refcnt_lock, but we can't hold this while locking the ++ nodehash_lock, but we can't hold this while locking the + individual node locks). */ + + num_nodes = nodehash_nr_items; +@@ -570,10 +603,10 @@ diskfs_node_iterate (error_t (*fun)(struct node *)) + for (node = nodehash[n]; node; node = node->dn->hnext) + { + *p++ = node; +- node->references++; ++ diskfs_nref (node); + } + +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ pthread_mutex_unlock (&nodehash_lock); + + p = node_list; + while (num_nodes-- > 0) +-- +2.0.0.rc2 + diff --git a/debian/patches/0018-isofs-use-a-seperate-lock-to-protect-node_cache.patch b/debian/patches/0018-isofs-use-a-seperate-lock-to-protect-node_cache.patch new file mode 100644 index 00000000..423d6f49 --- /dev/null +++ b/debian/patches/0018-isofs-use-a-seperate-lock-to-protect-node_cache.patch @@ -0,0 +1,222 @@ +From bde66c2b4cf268fbf3c0aeca66d3187320706673 Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Tue, 13 May 2014 15:16:31 +0200 +Subject: [PATCH 18/27] isofs: use a seperate lock to protect node_cache + +Previously, isofs used diskfs_node_refcnt_lock to serialize access to +the node_cache. + +Use a separate lock to protect node_cache. Adjust the reference +counting accordingly. Every node in the node_cache carries a light +reference. When we are asked to give up that light reference, we +reacquire our lock momentarily to check whether someone else +reacquired a reference through the node_cache. + +* isofs/inode.c (node_cache_lock): New lock. +(inode_cache_find): Use a separate lock to protect node_cache. +Adjust the reference counting accordingly. +(diskfs_cached_lookup): Likewise. +(load_inode): Likewise. +(cache_inode): Update comment accordingly. +(diskfs_node_iterate): Likewise. +(diskfs_node_norefs): Move the code removing the node from node_cache... +(diskfs_try_dropping_softrefs): ... here, where we check whether +someone reacquired a reference, and if so hold on to our light +reference. +--- + isofs/inode.c | 72 +++++++++++++++++++++++++++++++++++++++++++---------------- + 1 file changed, 53 insertions(+), 19 deletions(-) + +diff --git a/isofs/inode.c b/isofs/inode.c +index cdc05ae..4f22086 100644 +--- a/isofs/inode.c ++++ b/isofs/inode.c +@@ -48,9 +48,19 @@ struct node_cache + struct node *np; /* if live */ + }; + ++/* The node_cache is a cache of nodes. ++ ++ Access to node_cache, node_cache_size, and node_cache_alloced is ++ protected by node_cache_lock. ++ ++ Every node in the node_cache carries a light reference. When we ++ are asked to give up that light reference, we reacquire our lock ++ momentarily to check whether someone else reacquired a reference ++ through the node_cache. */ + static int node_cache_size = 0; + static int node_cache_alloced = 0; + struct node_cache *node_cache = 0; ++static pthread_mutex_t node_cache_lock = PTHREAD_MUTEX_INITIALIZER; + + /* Forward */ + static error_t read_disknode (struct node *, +@@ -58,7 +68,7 @@ static error_t read_disknode (struct node *, + + + /* See if node with identifier ID is in the cache. If so, return it, +- with one additional reference. diskfs_node_refcnt_lock must be held ++ with one additional reference. node_cache_lock must be held + on entry to the call, and will be released iff the node was found + in the cache. */ + void +@@ -71,8 +81,8 @@ inode_cache_find (off_t id, struct node **npp) + && node_cache[i].np) + { + *npp = node_cache[i].np; +- (*npp)->references++; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ diskfs_nref (*npp); ++ pthread_mutex_unlock (&node_cache_lock); + pthread_mutex_lock (&(*npp)->lock); + return; + } +@@ -92,7 +102,7 @@ use_file_start_id (struct dirrect *record, struct rrip_lookup *rr) + } + + /* Enter NP into the cache. The directory entry we used is DR, the +- cached Rock-Ridge info RR. diskfs_node_refcnt_lock must be held. */ ++ cached Rock-Ridge info RR. node_cache_lock must be held. */ + void + cache_inode (struct node *np, struct dirrect *record, + struct rrip_lookup *rr) +@@ -155,7 +165,7 @@ diskfs_cached_lookup (ino_t id, struct node **npp) + to avoid presenting zero cache ID's. */ + id--; + +- pthread_spin_lock (&diskfs_node_refcnt_lock); ++ pthread_mutex_lock (&node_cache_lock); + assert (id < node_cache_size); + + np = node_cache[id].np; +@@ -174,7 +184,7 @@ diskfs_cached_lookup (ino_t id, struct node **npp) + dn = malloc (sizeof (struct disknode)); + if (!dn) + { +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ pthread_mutex_unlock (&node_cache_lock); + release_rrip (&rr); + return ENOMEM; + } +@@ -185,16 +195,17 @@ diskfs_cached_lookup (ino_t id, struct node **npp) + if (!np) + { + free (dn); +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ pthread_mutex_unlock (&node_cache_lock); + release_rrip (&rr); + return ENOMEM; + } + np->cache_id = id + 1; /* see above for rationale for increment */ + pthread_mutex_lock (&np->lock); + c->np = np; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ diskfs_nref_light (np); ++ pthread_mutex_unlock (&node_cache_lock); + +- err = read_disknode (np, node_cache[id].dr, &rr); ++ err = read_disknode (np, dn->dr, &rr); + if (!err) + *npp = np; + +@@ -204,8 +215,8 @@ diskfs_cached_lookup (ino_t id, struct node **npp) + } + + +- np->references++; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ diskfs_nref (np); ++ pthread_mutex_unlock (&node_cache_lock); + pthread_mutex_lock (&np->lock); + *npp = np; + return 0; +@@ -315,7 +326,7 @@ load_inode (struct node **npp, struct dirrect *record, + if (rr->valid & VALID_CL) + record = rr->realdirent; + +- pthread_spin_lock (&diskfs_node_refcnt_lock); ++ pthread_mutex_lock (&node_cache_lock); + + /* First check the cache */ + if (use_file_start_id (record, rr)) +@@ -325,7 +336,7 @@ load_inode (struct node **npp, struct dirrect *record, + + if (*npp) + { +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ pthread_mutex_unlock (&node_cache_lock); + return 0; + } + +@@ -333,7 +344,7 @@ load_inode (struct node **npp, struct dirrect *record, + dn = malloc (sizeof (struct disknode)); + if (!dn) + { +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ pthread_mutex_unlock (&node_cache_lock); + return ENOMEM; + } + dn->fileinfo = 0; +@@ -344,14 +355,14 @@ load_inode (struct node **npp, struct dirrect *record, + if (!np) + { + free (dn); +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ pthread_mutex_unlock (&node_cache_lock); + return ENOMEM; + } + + pthread_mutex_lock (&np->lock); + + cache_inode (np, record, rr); +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ pthread_mutex_unlock (&node_cache_lock); + + err = read_disknode (np, record, rr); + *npp = np; +@@ -505,9 +516,6 @@ error_t (*diskfs_read_symlink_hook) (struct node *, char *) + void + diskfs_node_norefs (struct node *np) + { +- assert (node_cache[np->cache_id - 1].np == np); +- node_cache[np->cache_id - 1].np = 0; +- + if (np->dn->translator) + free (np->dn->translator); + +@@ -521,6 +529,32 @@ diskfs_node_norefs (struct node *np) + void + diskfs_try_dropping_softrefs (struct node *np) + { ++ pthread_mutex_lock (&node_cache_lock); ++ if (np->cache_id != 0) ++ { ++ assert (node_cache[np->cache_id - 1].np == np); ++ ++ /* Check if someone reacquired a reference through the ++ node_cache. */ ++ unsigned int references; ++ pthread_spin_lock (&diskfs_node_refcnt_lock); ++ references = np->references; ++ pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ ++ if (references > 0) ++ { ++ /* A reference was reacquired through a hash table lookup. ++ It's fine, we didn't touch anything yet. */ ++ pthread_mutex_unlock (&node_cache_lock); ++ return; ++ } ++ ++ node_cache[np->cache_id - 1].np = 0; ++ np->cache_id = 0; ++ diskfs_nrele_light (np); ++ } ++ pthread_mutex_unlock (&node_cache_lock); ++ + drop_pager_softrefs (np); + } + +-- +2.0.0.rc2 + diff --git a/debian/patches/0019-tmpfs-use-a-seperate-lock-to-protect-all_nodes.patch b/debian/patches/0019-tmpfs-use-a-seperate-lock-to-protect-all_nodes.patch new file mode 100644 index 00000000..c168cb57 --- /dev/null +++ b/debian/patches/0019-tmpfs-use-a-seperate-lock-to-protect-all_nodes.patch @@ -0,0 +1,263 @@ +From 84f3fbd92b28f4d61baa93361f49817df2e0e482 Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Tue, 13 May 2014 15:35:42 +0200 +Subject: [PATCH 19/27] tmpfs: use a seperate lock to protect all_nodes + +Previously, tmpfs used diskfs_node_refcnt_lock to serialize access to +the all_nodes and some other related global state related to memory +consumption. + +Use a separate lock to protect all_nodes, and to the state related to +memory consumption as this is updated during insertion and removal +operations. Adjust the reference counting accordingly. Every node in +the all_nodes carries a light reference. When we are asked to give up +that light reference, we reacquire our lock momentarily to check +whether someone else reacquired a reference through the all_nodes. + +* tmpfs/node.c (all_nodes_lock): New lock. +(diskfs_alloc_node): Use a separate lock to protect all_nodes. +Adjust the reference counting accordingly. +(diskfs_free_node): Likewise. +(diskfs_cached_lookup):Likewise. +(diskfs_node_iterate): Likewise. +(diskfs_node_norefs): Move the code removing the node from all_nodes... +(diskfs_try_dropping_softrefs): ... here, where we check whether +someone reacquired a reference, and if so hold on to our light +reference. +* tmpfs/tmpfs.c (diskfs_set_statfs): Use all_nodes_lock. +* tmpfs/tmpfs.h (all_nodes_lock): New declaration. +(adjust_used): Use all_nodes_lock. +--- + tmpfs/node.c | 76 +++++++++++++++++++++++++++++++++++++++++++---------------- + tmpfs/tmpfs.c | 4 ++-- + tmpfs/tmpfs.h | 6 +++-- + 3 files changed, 62 insertions(+), 24 deletions(-) + +diff --git a/tmpfs/node.c b/tmpfs/node.c +index acc029a..74971ec 100644 +--- a/tmpfs/node.c ++++ b/tmpfs/node.c +@@ -29,8 +29,18 @@ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + unsigned int num_files; + static unsigned int gen; + ++/* all_nodes is a cache of nodes. ++ ++ Access to all_nodes and all_nodes_nr_items is protected by ++ all_nodes_lock. ++ ++ Every node in the all_nodes carries a light reference. When we are ++ asked to give up that light reference, we reacquire our lock ++ momentarily to check whether someone else reacquired a reference ++ through the all_nodes. */ + struct node *all_nodes; + static size_t all_nodes_nr_items; ++pthread_mutex_t all_nodes_lock = PTHREAD_MUTEX_INITIALIZER; + + error_t + diskfs_alloc_node (struct node *dp, mode_t mode, struct node **npp) +@@ -40,18 +50,18 @@ diskfs_alloc_node (struct node *dp, mode_t mode, struct node **npp) + dn = calloc (1, sizeof *dn); + if (dn == 0) + return ENOSPC; +- pthread_spin_lock (&diskfs_node_refcnt_lock); ++ pthread_mutex_lock (&all_nodes_lock); + if (round_page (tmpfs_space_used + sizeof *dn) / vm_page_size + > tmpfs_page_limit) + { +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ pthread_mutex_unlock (&all_nodes_lock); + free (dn); + return ENOSPC; + } + dn->gen = gen++; + ++num_files; + tmpfs_space_used += sizeof *dn; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ pthread_mutex_unlock (&all_nodes_lock); + + dn->type = IFTODT (mode & S_IFMT); + return diskfs_cached_lookup ((ino_t) (uintptr_t) dn, npp); +@@ -75,15 +85,18 @@ diskfs_free_node (struct node *np, mode_t mode) + free (np->dn->u.lnk); + break; + } ++ ++ pthread_mutex_lock (&all_nodes_lock); + *np->dn->hprevp = np->dn->hnext; + if (np->dn->hnext != 0) + np->dn->hnext->dn->hprevp = np->dn->hprevp; + all_nodes_nr_items -= 1; +- free (np->dn); +- np->dn = 0; +- + --num_files; + tmpfs_space_used -= sizeof *np->dn; ++ pthread_mutex_unlock (&all_nodes_lock); ++ ++ free (np->dn); ++ np->dn = 0; + } + + void +@@ -117,14 +130,6 @@ diskfs_node_norefs (struct node *np) + np->dn->u.chr = np->dn_stat.st_rdev; + break; + } +- +- /* Remove this node from the cache list rooted at `all_nodes'. */ +- *np->dn->hprevp = np->dn->hnext; +- if (np->dn->hnext != 0) +- np->dn->hnext->dn->hprevp = np->dn->hprevp; +- all_nodes_nr_items -= 1; +- np->dn->hnext = 0; +- np->dn->hprevp = 0; + } + + free (np); +@@ -167,11 +172,14 @@ diskfs_cached_lookup (ino_t inum, struct node **npp) + + assert (npp); + ++ pthread_mutex_lock (&all_nodes_lock); ++ + if (dn->hprevp != 0) /* There is already a node. */ + { + np = *dn->hprevp; + assert (np->dn == dn); + assert (*dn->hprevp == np); ++ pthread_mutex_unlock (&all_nodes_lock); + + diskfs_nref (np); + } +@@ -183,14 +191,14 @@ diskfs_cached_lookup (ino_t inum, struct node **npp) + np = diskfs_make_node (dn); + np->cache_id = (ino_t) (uintptr_t) dn; + +- pthread_spin_lock (&diskfs_node_refcnt_lock); + dn->hnext = all_nodes; + if (dn->hnext) + dn->hnext->dn->hprevp = &dn->hnext; + dn->hprevp = &all_nodes; + all_nodes = np; + all_nodes_nr_items += 1; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ diskfs_nref_light (np); ++ pthread_mutex_unlock (&all_nodes_lock); + + st = &np->dn_stat; + memset (st, 0, sizeof *st); +@@ -229,12 +237,12 @@ diskfs_node_iterate (error_t (*fun) (struct node *)) + size_t num_nodes; + struct node *node, **node_list, **p; + +- pthread_spin_lock (&diskfs_node_refcnt_lock); ++ pthread_mutex_lock (&all_nodes_lock); + + /* We must copy everything from the hash table into another data structure + to avoid running into any problems with the hash-table being modified + during processing (normally we delegate access to hash-table with +- diskfs_node_refcnt_lock, but we can't hold this while locking the ++ all_nodes_lock, but we can't hold this while locking the + individual node locks). */ + + num_nodes = all_nodes_nr_items; +@@ -243,10 +251,10 @@ diskfs_node_iterate (error_t (*fun) (struct node *)) + for (node = all_nodes; node != 0; node = node->dn->hnext) + { + *p++ = node; +- node->references++; ++ diskfs_nref (node); + } + +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ pthread_mutex_unlock (&all_nodes_lock); + + p = node_list; + while (num_nodes-- > 0) +@@ -272,6 +280,34 @@ diskfs_node_iterate (error_t (*fun) (struct node *)) + void + diskfs_try_dropping_softrefs (struct node *np) + { ++ pthread_mutex_lock (&all_nodes_lock); ++ if (np->dn->hnext != NULL) ++ { ++ /* Check if someone reacquired a reference through the ++ all_nodes. */ ++ unsigned int references; ++ pthread_spin_lock (&diskfs_node_refcnt_lock); ++ references = np->references; ++ pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ ++ if (references > 0) ++ { ++ /* A reference was reacquired through a hash table lookup. ++ It's fine, we didn't touch anything yet. */ ++ pthread_mutex_unlock (&all_nodes_lock); ++ return; ++ } ++ ++ /* Remove this node from the cache list rooted at `all_nodes'. */ ++ *np->dn->hprevp = np->dn->hnext; ++ if (np->dn->hnext != 0) ++ np->dn->hnext->dn->hprevp = np->dn->hprevp; ++ all_nodes_nr_items -= 1; ++ np->dn->hnext = NULL; ++ np->dn->hprevp = NULL; ++ diskfs_nrele_light (np); ++ } ++ pthread_mutex_unlock (&all_nodes_lock); + } + + /* The user must define this funcction. Node NP has some light +diff --git a/tmpfs/tmpfs.c b/tmpfs/tmpfs.c +index a45d343..024b818 100644 +--- a/tmpfs/tmpfs.c ++++ b/tmpfs/tmpfs.c +@@ -67,10 +67,10 @@ diskfs_set_statfs (struct statfs *st) + st->f_bsize = vm_page_size; + st->f_blocks = tmpfs_page_limit; + +- pthread_spin_lock (&diskfs_node_refcnt_lock); ++ pthread_mutex_lock (&all_nodes_lock); + st->f_files = num_files; + pages = round_page (tmpfs_space_used) / vm_page_size; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ pthread_mutex_unlock (&all_nodes_lock); + + st->f_bfree = pages < tmpfs_page_limit ? tmpfs_page_limit - pages : 0; + st->f_bavail = st->f_bfree; +diff --git a/tmpfs/tmpfs.h b/tmpfs/tmpfs.h +index b3c636d..187249e 100644 +--- a/tmpfs/tmpfs.h ++++ b/tmpfs/tmpfs.h +@@ -24,6 +24,7 @@ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + #include <sys/types.h> + #include <dirent.h> + #include <stdint.h> ++#include <pthread.h> + + struct disknode + { +@@ -71,15 +72,16 @@ struct tmpfs_dirent + + extern unsigned int num_files; + extern off_t tmpfs_page_limit, tmpfs_space_used; ++extern pthread_mutex_t all_nodes_lock; + + extern mach_port_t default_pager; + + static inline void + adjust_used (off_t change) + { +- pthread_spin_lock (&diskfs_node_refcnt_lock); ++ pthread_mutex_lock (&all_nodes_lock); + tmpfs_space_used += change; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ pthread_mutex_unlock (&all_nodes_lock); + } + + #endif +-- +2.0.0.rc2 + diff --git a/debian/patches/0020-libdiskfs-lock-less-reference-counting-of-nodes.patch b/debian/patches/0020-libdiskfs-lock-less-reference-counting-of-nodes.patch new file mode 100644 index 00000000..2142eb34 --- /dev/null +++ b/debian/patches/0020-libdiskfs-lock-less-reference-counting-of-nodes.patch @@ -0,0 +1,531 @@ +From 2bbb125dfe14d575401a924f4d5ac74c507cfc4b Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Wed, 14 May 2014 11:19:35 +0200 +Subject: [PATCH 20/27] libdiskfs: lock-less reference counting of nodes + +* libdiskfs/diskfs.h (struct node): Use refcounts_t for reference counting. +(diskfs_node_refcnt_lock): Remove. +(diskfs_node_norefs,diskfs_drop_node): Change comments accordingly. +* libdiskfs/init-init.c: Likewise. +* libdiskfs/node-drop.c: Likewise. +* libdiskfs/node-make.c: Likewise. +* libdiskfs/node-nput.c: Likewise. +* libdiskfs/node-nputl.c: Likewise. +* libdiskfs/node-nref.c: Likewise. +* libdiskfs/node-nrefl.c: Likewise. +* libdiskfs/node-nrele.c: Likewise. +* libdiskfs/node-nrelel.c: Likewise. +* ext2fs/inode.c: Likewise. +* fatfs/inode.c: Likewise. +* isofs/inode.c: Likewise. +* tmpfs/node.c: Likewise. +* doc/hurd.texi: Likewise. +--- + doc/hurd.texi | 11 ++-------- + ext2fs/inode.c | 9 +++------ + fatfs/inode.c | 21 ++++++------------- + isofs/inode.c | 9 +++------ + libdiskfs/diskfs.h | 12 ++++------- + libdiskfs/init-init.c | 2 -- + libdiskfs/node-drop.c | 9 +++------ + libdiskfs/node-make.c | 3 +-- + libdiskfs/node-nput.c | 54 +++++++++++++++++++------------------------------ + libdiskfs/node-nputl.c | 12 ++++------- + libdiskfs/node-nref.c | 10 ++++----- + libdiskfs/node-nrefl.c | 6 +++--- + libdiskfs/node-nrele.c | 48 +++++++++++++++++++++---------------------- + libdiskfs/node-nrelel.c | 9 +++------ + tmpfs/node.c | 9 +++------ + 15 files changed, 83 insertions(+), 141 deletions(-) + +diff --git a/doc/hurd.texi b/doc/hurd.texi +index 07ddfb4..6cafdb9 100644 +--- a/doc/hurd.texi ++++ b/doc/hurd.texi +@@ -3780,10 +3780,6 @@ new thread and (eventually) get rid of the old one; the old thread won't + do any more syncs, regardless. + @end deftypefun + +-@deftypevar spin_lock_t diskfs_node_refcnt_lock +-Pager reference count lock. +-@end deftypevar +- + @deftypevar int diskfs_readonly + Set to zero if the filesystem is currently writable. + @end deftypevar +@@ -3818,9 +3814,7 @@ Every file or directory is a diskfs @dfn{node}. The following functions + help your diskfs callbacks manage nodes and their references: + + @deftypefun void diskfs_drop_node (@w{struct node *@var{np}}) +-Node @var{np} now has no more references; clean all state. The +-@var{diskfs_node_refcnt_lock} must be held, and will be released upon +-return. @var{np} must be locked. ++Node @var{np} now has no more references; clean all state. + @end deftypefun + + @deftypefun void diskfs_node_update (@w{struct node *@var{np}}, @w{int @var{wait}}) +@@ -4236,14 +4230,13 @@ without real users. + @deftypefun void diskfs_try_dropping_softrefs (@w{struct node *@var{np}}) + Node @var{np} has some light references, but has just lost its last hard + references. Take steps so that if any light references can be freed, +-they are. Both @var{diskfs_node_refcnt_lock} and @var{np} are locked. ++they are. @var{np} is locked. + This function will be called after @code{diskfs_lost_hardrefs}. + @end deftypefun + + @deftypefun void diskfs_node_norefs (@w{struct node *@var{np}}) + Node @var{np} has no more references; free local state, including + @code{*@var{np}} if it shouldn't be retained. +-@var{diskfs_node_refcnt_lock} is held. + @end deftypefun + + @deftypefun error_t diskfs_set_hypermetadata (@w{int @var{wait}}, @w{int @var{clean}}) +diff --git a/ext2fs/inode.c b/ext2fs/inode.c +index aa070a2..7ec343f 100644 +--- a/ext2fs/inode.c ++++ b/ext2fs/inode.c +@@ -190,12 +190,9 @@ diskfs_try_dropping_softrefs (struct node *np) + { + /* Check if someone reacquired a reference through the + nodehash. */ +- unsigned int references; +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- references = np->references; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- +- if (references > 0) ++ struct references result; ++ refcounts_references (&np->refcounts, &result); ++ if (result.hard > 0 || result.weak > 1) + { + /* A reference was reacquired through a hash table lookup. + It's fine, we didn't touch anything yet. */ +diff --git a/fatfs/inode.c b/fatfs/inode.c +index 8b3385b..d5750b4 100644 +--- a/fatfs/inode.c ++++ b/fatfs/inode.c +@@ -237,14 +237,8 @@ diskfs_node_norefs (struct node *np) + if (np->dn->translator) + free (np->dn->translator); + +- /* It is safe to unlock diskfs_node_refcnt_lock here for a while because +- all references to the node have been deleted. */ + if (np->dn->dirnode) +- { +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- diskfs_nrele (np->dn->dirnode); +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- } ++ diskfs_nrele (np->dn->dirnode); + + assert (!np->dn->pager); + +@@ -262,12 +256,9 @@ diskfs_try_dropping_softrefs (struct node *np) + { + /* Check if someone reacquired a reference through the + nodehash. */ +- unsigned int references; +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- references = np->references; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- +- if (references > 0) ++ struct references result; ++ refcounts_references (&np->refcounts, &result); ++ if (result.hard > 0 || result.weak > 1) + { + /* A reference was reacquired through a hash table lookup. + It's fine, we didn't touch anything yet. */ +@@ -372,7 +363,7 @@ read_node (struct node *np, vm_address_t buf) + /* Files in fatfs depend on the directory that hold the file. */ + np->dn->dirnode = dp; + if (dp) +- dp->references++; ++ refcounts_ref (&dp->refcounts, NULL); + + pthread_rwlock_rdlock (&np->dn->dirent_lock); + +@@ -814,7 +805,7 @@ diskfs_alloc_node (struct node *dir, mode_t mode, struct node **node) + + /* FIXME: We know that readnode couldn't put this in. */ + np->dn->dirnode = dir; +- dir->references++; ++ refcounts_ref (&dir->refcounts, NULL); + + *node = np; + return 0; +diff --git a/isofs/inode.c b/isofs/inode.c +index 4f22086..b36a289 100644 +--- a/isofs/inode.c ++++ b/isofs/inode.c +@@ -536,12 +536,9 @@ diskfs_try_dropping_softrefs (struct node *np) + + /* Check if someone reacquired a reference through the + node_cache. */ +- unsigned int references; +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- references = np->references; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- +- if (references > 0) ++ struct references result; ++ refcounts_references (&np->refcounts, &result); ++ if (result.hard > 0 || result.weak > 1) + { + /* A reference was reacquired through a hash table lookup. + It's fine, we didn't touch anything yet. */ +diff --git a/libdiskfs/diskfs.h b/libdiskfs/diskfs.h +index acd1910..e10e75e 100644 +--- a/libdiskfs/diskfs.h ++++ b/libdiskfs/diskfs.h +@@ -96,8 +96,7 @@ struct node + + pthread_mutex_t lock; + +- int references; /* hard references */ +- int light_references; /* light references */ ++ refcounts_t refcounts; + + mach_port_t sockaddr; /* address for S_IFSOCK shortcut */ + +@@ -201,8 +200,6 @@ extern volatile struct mapped_time_value *diskfs_mtime; + be done by format independent code. */ + extern int diskfs_synchronous; + +-extern pthread_spinlock_t diskfs_node_refcnt_lock; +- + extern int pager_port_type; + + /* Whether the filesystem is currently writable or not. */ +@@ -451,7 +448,7 @@ error_t diskfs_alloc_node (struct node *dp, mode_t mode, struct node **np); + void diskfs_free_node (struct node *np, mode_t mode); + + /* Node NP has no more references; free local state, including *NP +- if it isn't to be retained. diskfs_node_refcnt_lock is held. */ ++ if it isn't to be retained. */ + void diskfs_node_norefs (struct node *np); + + /* The user must define this function. Node NP has some light +@@ -614,9 +611,8 @@ void diskfs_spawn_first_thread (ports_demuxer_type demuxer); + diskfs_init_completed once it has a valid proc and auth port. */ + void diskfs_start_bootstrap (); + +-/* Node NP now has no more references; clean all state. The +- _diskfs_node_refcnt_lock must be held, and will be released +- upon return. NP must be locked. */ ++/* Node NP now has no more references; clean all state. NP must be ++ locked. */ + void diskfs_drop_node (struct node *np); + + /* Set on disk fields from NP->dn_stat; update ctime, atime, and mtime +diff --git a/libdiskfs/init-init.c b/libdiskfs/init-init.c +index 35be7ed..976b4e8 100644 +--- a/libdiskfs/init-init.c ++++ b/libdiskfs/init-init.c +@@ -37,8 +37,6 @@ int _diskfs_noatime; + + struct hurd_port _diskfs_exec_portcell; + +-pthread_spinlock_t diskfs_node_refcnt_lock = PTHREAD_SPINLOCK_INITIALIZER; +- + pthread_spinlock_t _diskfs_control_lock = PTHREAD_SPINLOCK_INITIALIZER; + int _diskfs_ncontrol_ports; + +diff --git a/libdiskfs/node-drop.c b/libdiskfs/node-drop.c +index 83eb590..fab3cfa 100644 +--- a/libdiskfs/node-drop.c ++++ b/libdiskfs/node-drop.c +@@ -31,9 +31,8 @@ free_modreqs (struct modreq *mr) + } + + +-/* Node NP now has no more references; clean all state. The +- diskfs_node_refcnt_lock must be held, and will be released +- upon return. NP must be locked. */ ++/* Node NP now has no more references; clean all state. NP must be ++ locked. */ + void + diskfs_drop_node (struct node *np) + { +@@ -60,8 +59,7 @@ diskfs_drop_node (struct node *np) + and an nput. The next time through, this routine + will notice that the size is zero, and not have to + do anything. */ +- np->references++; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ refcounts_ref (&np->refcounts, NULL); + diskfs_truncate (np, 0); + + /* Force allocsize to zero; if truncate consistently fails this +@@ -94,5 +92,4 @@ diskfs_drop_node (struct node *np) + assert (!np->sockaddr); + + diskfs_node_norefs (np); +- pthread_spin_unlock (&diskfs_node_refcnt_lock); + } +diff --git a/libdiskfs/node-make.c b/libdiskfs/node-make.c +index ff0cc0d..c7ca3b0 100644 +--- a/libdiskfs/node-make.c ++++ b/libdiskfs/node-make.c +@@ -29,8 +29,7 @@ init_node (struct node *np, struct disknode *dn) + np->dn_stat_dirty = 0; + + pthread_mutex_init (&np->lock, NULL); +- np->references = 1; +- np->light_references = 0; ++ refcounts_init (&np->refcounts, 1, 0); + np->owner = 0; + np->sockaddr = MACH_PORT_NULL; + +diff --git a/libdiskfs/node-nput.c b/libdiskfs/node-nput.c +index 5043ad1..2935ae2 100644 +--- a/libdiskfs/node-nput.c ++++ b/libdiskfs/node-nput.c +@@ -26,56 +26,44 @@ + void + diskfs_nput (struct node *np) + { +- int tried_drop_softrefs = 0; ++ struct references result; + +- loop: +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- assert (np->references); +- np->references--; +- if (np->references + np->light_references == 0) +- diskfs_drop_node (np); +- else if (np->references == 0 && !tried_drop_softrefs) +- { +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ /* While we call the diskfs_try_dropping_softrefs, we need to hold ++ one reference. We use a weak reference for this purpose, which ++ we acquire by demoting our hard reference to a weak one. */ ++ refcounts_demote (&np->refcounts, &result); + ++ if (result.hard == 0) ++ { + /* This is our cue that something akin to "last process closes file" + in the POSIX.1 sense happened, so make sure any pending node time + updates now happen in a timely fashion. */ + diskfs_set_node_times (np); +- + diskfs_lost_hardrefs (np); + if (!np->dn_stat.st_nlink) + { +- /* There are no links. If there are soft references that +- can be dropped, we can't let them postpone deallocation. +- So attempt to drop them. But that's a user-supplied +- routine, which might result in further recursive calls to +- the ref-counting system. So we have to reacquire our +- reference around the call to forestall disaster. */ +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- np->references++; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- + if (np->sockaddr != MACH_PORT_NULL) + { + mach_port_deallocate (mach_task_self (), np->sockaddr); + np->sockaddr = MACH_PORT_NULL; + } + ++ /* There are no links. If there are soft references that ++ can be dropped, we can't let them postpone deallocation. ++ So attempt to drop them. But that's a user-supplied ++ routine, which might result in further recursive calls to ++ the ref-counting system. This is not a problem, as we ++ hold a weak reference ourselves. */ + diskfs_try_dropping_softrefs (np); +- +- /* But there's no value in looping forever in this +- routine; only try to drop soft refs once. */ +- tried_drop_softrefs = 1; +- +- /* Now we can drop the reference back... */ +- goto loop; + } + pthread_mutex_unlock (&np->lock); + } +- else +- { +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- pthread_mutex_unlock (&np->lock); +- } ++ ++ /* Finally get rid of our reference. */ ++ refcounts_deref_weak (&np->refcounts, &result); ++ ++ if (result.hard == 0 && result.weak == 0) ++ diskfs_drop_node (np); ++ ++ pthread_mutex_unlock (&np->lock); + } +diff --git a/libdiskfs/node-nputl.c b/libdiskfs/node-nputl.c +index 1959665..8dac16e 100644 +--- a/libdiskfs/node-nputl.c ++++ b/libdiskfs/node-nputl.c +@@ -25,14 +25,10 @@ + void + diskfs_nput_light (struct node *np) + { +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- assert (np->light_references); +- np->light_references--; +- if (np->references + np->light_references == 0) ++ struct references result; ++ refcounts_deref_weak (&np->refcounts, &result); ++ if (result.hard == 0 && result.weak == 0) + diskfs_drop_node (np); + else +- { +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- pthread_mutex_unlock (&np->lock); +- } ++ pthread_mutex_unlock (&np->lock); + } +diff --git a/libdiskfs/node-nref.c b/libdiskfs/node-nref.c +index 13cea05..89ffa4f 100644 +--- a/libdiskfs/node-nref.c ++++ b/libdiskfs/node-nref.c +@@ -26,12 +26,10 @@ + void + diskfs_nref (struct node *np) + { +- int new_hardref; +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- np->references++; +- new_hardref = (np->references == 1); +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- if (new_hardref) ++ struct references result; ++ refcounts_ref (&np->refcounts, &result); ++ assert (result.hard > 1 || result.weak > 0); ++ if (result.hard == 1) + { + pthread_mutex_lock (&np->lock); + diskfs_new_hardrefs (np); +diff --git a/libdiskfs/node-nrefl.c b/libdiskfs/node-nrefl.c +index 9692247..b7af409 100644 +--- a/libdiskfs/node-nrefl.c ++++ b/libdiskfs/node-nrefl.c +@@ -24,7 +24,7 @@ + void + diskfs_nref_light (struct node *np) + { +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- np->light_references++; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ struct references result; ++ refcounts_ref_weak (&np->refcounts, &result); ++ assert (result.hard > 0 || result.weak > 1); + } +diff --git a/libdiskfs/node-nrele.c b/libdiskfs/node-nrele.c +index cc68089..d962846 100644 +--- a/libdiskfs/node-nrele.c ++++ b/libdiskfs/node-nrele.c +@@ -28,38 +28,36 @@ + void + diskfs_nrele (struct node *np) + { +- int tried_drop_softrefs = 0; ++ struct references result; + +- loop: +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- assert (np->references); +- np->references--; +- if (np->references + np->light_references == 0) +- { +- pthread_mutex_lock (&np->lock); +- diskfs_drop_node (np); +- } +- else if (np->references == 0) ++ /* While we call the diskfs_try_dropping_softrefs, we need to hold ++ one reference. We use a weak reference for this purpose, which ++ we acquire by demoting our hard reference to a weak one. */ ++ refcounts_demote (&np->refcounts, &result); ++ ++ if (result.hard == 0) + { + pthread_mutex_lock (&np->lock); +- pthread_spin_unlock (&diskfs_node_refcnt_lock); + diskfs_lost_hardrefs (np); +- if (!np->dn_stat.st_nlink && !tried_drop_softrefs) ++ if (!np->dn_stat.st_nlink) + { +- /* Same issue here as in nput; see that for explanation */ +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- np->references++; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- ++ /* There are no links. If there are soft references that ++ can be dropped, we can't let them postpone deallocation. ++ So attempt to drop them. But that's a user-supplied ++ routine, which might result in further recursive calls to ++ the ref-counting system. This is not a problem, as we ++ hold a weak reference ourselves. */ + diskfs_try_dropping_softrefs (np); +- tried_drop_softrefs = 1; +- +- /* Now we can drop the reference back... */ +- pthread_mutex_unlock (&np->lock); +- goto loop; + } + pthread_mutex_unlock (&np->lock); + } +- else +- pthread_spin_unlock (&diskfs_node_refcnt_lock); ++ ++ /* Finally get rid of our reference. */ ++ refcounts_deref_weak (&np->refcounts, &result); ++ ++ if (result.hard == 0 && result.weak == 0) ++ { ++ pthread_mutex_lock (&np->lock); ++ diskfs_drop_node (np); ++ } + } +diff --git a/libdiskfs/node-nrelel.c b/libdiskfs/node-nrelel.c +index ee53b22..dc4f920 100644 +--- a/libdiskfs/node-nrelel.c ++++ b/libdiskfs/node-nrelel.c +@@ -26,14 +26,11 @@ + void + diskfs_nrele_light (struct node *np) + { +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- assert (np->light_references); +- np->light_references--; +- if (np->references + np->light_references == 0) ++ struct references result; ++ refcounts_deref_weak (&np->refcounts, &result); ++ if (result.hard == 0 && result.weak == 0) + { + pthread_mutex_lock (&np->lock); + diskfs_drop_node (np); + } +- else +- pthread_spin_unlock (&diskfs_node_refcnt_lock); + } +diff --git a/tmpfs/node.c b/tmpfs/node.c +index 74971ec..275bb3c 100644 +--- a/tmpfs/node.c ++++ b/tmpfs/node.c +@@ -285,12 +285,9 @@ diskfs_try_dropping_softrefs (struct node *np) + { + /* Check if someone reacquired a reference through the + all_nodes. */ +- unsigned int references; +- pthread_spin_lock (&diskfs_node_refcnt_lock); +- references = np->references; +- pthread_spin_unlock (&diskfs_node_refcnt_lock); +- +- if (references > 0) ++ struct references result; ++ refcounts_references (&np->refcounts, &result); ++ if (result.hard > 0 || result.weak > 1) + { + /* A reference was reacquired through a hash table lookup. + It's fine, we didn't touch anything yet. */ +-- +2.0.0.rc2 + diff --git a/debian/patches/0021-ext2fs-use-librdxtree-for-the-nodehash.patch b/debian/patches/0021-ext2fs-use-librdxtree-for-the-nodehash.patch new file mode 100644 index 00000000..dbba86c4 --- /dev/null +++ b/debian/patches/0021-ext2fs-use-librdxtree-for-the-nodehash.patch @@ -0,0 +1,308 @@ +From 285dd318d324914f16e8ed858cd92b81870d68a8 Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Wed, 14 May 2014 15:30:19 +0200 +Subject: [PATCH 21/27] ext2fs: use librdxtree for the nodehash + +Previously, a lookup of a node through nodehash took O(N / INOHSZ). +With INOHSZ being a constant (512) this is linear in N. Use a radix +tree instead, which gives us a lookup in time linear with the size of +keys. + +* ext2fs/inode.c (INOHSZ, INOHASH): Remove. +(nodehash): Use a radix tree. +(inode_init): Remove. +(diskfs_cached_lookup): Adjust accordingly. +(ifind, diskfs_try_dropping_softrefs, diskfs_node_iterate): Likewise. +* ext2fs/ext2fs.h (struct disknode): Remove list pointers. +* ext2fs/ext2fs.c (main): Drop inode_init. +--- + ext2fs/Makefile | 2 +- + ext2fs/ext2fs.c | 2 - + ext2fs/ext2fs.h | 4 +- + ext2fs/inode.c | 121 +++++++++++++++++++++----------------------------------- + 4 files changed, 48 insertions(+), 81 deletions(-) + +diff --git a/ext2fs/Makefile b/ext2fs/Makefile +index 8d2e68c..f93d696 100644 +--- a/ext2fs/Makefile ++++ b/ext2fs/Makefile +@@ -23,7 +23,7 @@ target = ext2fs + SRCS = balloc.c dir.c ext2fs.c getblk.c hyper.c ialloc.c \ + inode.c pager.c pokel.c truncate.c storeinfo.c msg.c xinl.c + OBJS = $(SRCS:.c=.o) +-HURDLIBS = diskfs pager iohelp fshelp store ports ihash shouldbeinlibc ++HURDLIBS = diskfs pager iohelp fshelp store ports ihash rdxtree shouldbeinlibc + OTHERLIBS = -lpthread $(and $(HAVE_LIBBZ2),-lbz2) $(and $(HAVE_LIBZ),-lz) + + include ../Makeconf +diff --git a/ext2fs/ext2fs.c b/ext2fs/ext2fs.c +index 128b6ed..0409dfb 100644 +--- a/ext2fs/ext2fs.c ++++ b/ext2fs/ext2fs.c +@@ -185,8 +185,6 @@ main (int argc, char **argv) + + map_hypermetadata (); + +- inode_init (); +- + /* Set diskfs_root_node to the root inode. */ + err = diskfs_cached_lookup (EXT2_ROOT_INO, &diskfs_root_node); + if (err) +diff --git a/ext2fs/ext2fs.h b/ext2fs/ext2fs.h +index 3422af2..8eaa846 100644 +--- a/ext2fs/ext2fs.h ++++ b/ext2fs/ext2fs.h +@@ -26,6 +26,7 @@ + #include <hurd/store.h> + #include <hurd/diskfs.h> + #include <hurd/ihash.h> ++#include <hurd/rdxtree.h> + #include <assert.h> + #include <pthread.h> + #include <sys/mman.h> +@@ -159,9 +160,6 @@ struct disknode + each DIRBLKSIZE piece of the directory. */ + int *dirents; + +- /* Links on hash list. */ +- struct node *hnext, **hprevp; +- + /* Lock to lock while fiddling with this inode's block allocation info. */ + pthread_rwlock_t alloc_lock; + +diff --git a/ext2fs/inode.c b/ext2fs/inode.c +index 7ec343f..fea39ae 100644 +--- a/ext2fs/inode.c ++++ b/ext2fs/inode.c +@@ -20,6 +20,7 @@ + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + + #include "ext2fs.h" ++#include <stddef.h> + #include <string.h> + #include <unistd.h> + #include <stdio.h> +@@ -39,13 +40,6 @@ + #define UF_IMMUTABLE 0 + #endif + +-#define INOHSZ 512 +-#if ((INOHSZ&(INOHSZ-1)) == 0) +-#define INOHASH(ino) ((ino)&(INOHSZ-1)) +-#else +-#define INOHASH(ino) (((unsigned)(ino))%INOHSZ) +-#endif +- + /* The nodehash is a cache of nodes. + + Access to nodehash and nodehash_nr_items is protected by +@@ -55,23 +49,14 @@ + asked to give up that light reference, we reacquire our lock + momentarily to check whether someone else reacquired a reference + through the nodehash. */ +-static struct node *nodehash[INOHSZ]; ++static struct rdxtree nodecache = RDXTREE_INITIALIZER; + static size_t nodehash_nr_items; +-static pthread_mutex_t nodehash_lock = PTHREAD_MUTEX_INITIALIZER; ++static pthread_mutex_t nodecache_lock = PTHREAD_RWLOCK_INITIALIZER; + + static error_t read_node (struct node *np); + + pthread_spinlock_t generation_lock = PTHREAD_SPINLOCK_INITIALIZER; + +-/* Initialize the inode hash table. */ +-void +-inode_init () +-{ +- int n; +- for (n = 0; n < INOHSZ; n++) +- nodehash[n] = 0; +-} +- + /* Fetch inode INUM, set *NPP to the node structure; + gain one user reference and lock the node. */ + error_t +@@ -81,46 +66,40 @@ diskfs_cached_lookup (ino_t inum, struct node **npp) + struct node *np; + struct disknode *dn; + +- pthread_mutex_lock (&nodehash_lock); +- for (np = nodehash[INOHASH(inum)]; np; np = np->dn->hnext) +- if (np->cache_id == inum) +- { +- diskfs_nref (np); +- pthread_mutex_unlock (&nodehash_lock); +- pthread_mutex_lock (&np->lock); +- *npp = np; +- return 0; +- } +- +- /* Format specific data for the new node. */ +- dn = malloc (sizeof (struct disknode)); +- if (! dn) ++ pthread_rwlock_rdlock (&nodecache_lock); ++ np = rdxtree_lookup (&nodecache, inum); ++ if (np != NULL) ++ { ++ diskfs_nref (np); ++ pthread_rwlock_unlock (&nodecache_lock); ++ pthread_mutex_lock (&np->lock); ++ *npp = np; ++ return 0; ++ } ++ pthread_rwlock_unlock (&nodecache_lock); ++ ++ /* Create the new node. */ ++ np = diskfs_make_node_alloc (sizeof *dn); ++ if (np == NULL) + { +- pthread_mutex_unlock (&nodehash_lock); ++ pthread_mutex_unlock (&nodecache_lock); + return ENOMEM; + } ++ np->cache_id = inum; ++ dn = diskfs_node_disknode (np); + dn->dirents = 0; + dn->dir_idx = 0; + dn->pager = 0; + pthread_rwlock_init (&dn->alloc_lock, NULL); + pokel_init (&dn->indir_pokel, diskfs_disk_pager, disk_cache); +- +- /* Create the new node. */ +- np = diskfs_make_node (dn); +- np->cache_id = inum; +- + pthread_mutex_lock (&np->lock); + + /* Put NP in NODEHASH. */ +- dn->hnext = nodehash[INOHASH(inum)]; +- if (dn->hnext) +- dn->hnext->dn->hprevp = &dn->hnext; +- dn->hprevp = &nodehash[INOHASH(inum)]; +- nodehash[INOHASH(inum)] = np; + diskfs_nref_light (np); ++ pthread_rwlock_wrlock (&nodecache_lock); ++ rdxtree_insert (&nodecache, inum, np); + nodehash_nr_items += 1; +- +- pthread_mutex_unlock (&nodehash_lock); ++ pthread_rwlock_unlock (&nodecache_lock); + + /* Get the contents of NP off disk. */ + err = read_node (np); +@@ -151,16 +130,12 @@ ifind (ino_t inum) + { + struct node *np; + +- pthread_mutex_lock (&nodehash_lock); +- for (np = nodehash[INOHASH(inum)]; np; np = np->dn->hnext) +- { +- if (np->cache_id != inum) +- continue; ++ pthread_rwlock_rdlock (&nodecache_lock); ++ np = rdxtree_lookup (&nodecache, inum); ++ pthread_rwlock_unlock (&nodecache_lock); + +- pthread_mutex_unlock (&nodehash_lock); +- return np; +- } +- assert (0); ++ assert (np); ++ return np; + } + + /* The last reference to a node has gone away; drop +@@ -176,7 +151,6 @@ diskfs_node_norefs (struct node *np) + pokel_inherit (&global_pokel, &np->dn->indir_pokel); + pokel_finalize (&np->dn->indir_pokel); + +- free (np->dn); + free (np); + } + +@@ -185,8 +159,8 @@ diskfs_node_norefs (struct node *np) + void + diskfs_try_dropping_softrefs (struct node *np) + { +- pthread_mutex_lock (&nodehash_lock); +- if (np->dn->hnext != NULL) ++ pthread_rwlock_wrlock (&nodecache_lock); ++ if (np->cache_id != 0) + { + /* Check if someone reacquired a reference through the + nodehash. */ +@@ -196,18 +170,16 @@ diskfs_try_dropping_softrefs (struct node *np) + { + /* A reference was reacquired through a hash table lookup. + It's fine, we didn't touch anything yet. */ +- pthread_mutex_unlock (&nodehash_lock); ++ pthread_rwlock_unlock (&nodecache_lock); + return; + } + +- *np->dn->hprevp = np->dn->hnext; +- if (np->dn->hnext) +- np->dn->hnext->dn->hprevp = np->dn->hprevp; +- np->dn->hnext = NULL; ++ rdxtree_remove (&nodecache, np->cache_id); + nodehash_nr_items -= 1; ++ np->cache_id = 0; + diskfs_nrele_light (np); + } +- pthread_mutex_unlock (&nodehash_lock); ++ pthread_rwlock_unlock (&nodecache_lock); + + drop_pager_softrefs (np); + } +@@ -581,16 +553,16 @@ error_t + diskfs_node_iterate (error_t (*fun)(struct node *)) + { + error_t err = 0; +- int n; ++ struct rdxtree_iter iter; + size_t num_nodes; + struct node *node, **node_list, **p; + +- pthread_mutex_lock (&nodehash_lock); ++ pthread_rwlock_rdlock (&nodecache_lock); + + /* We must copy everything from the hash table into another data structure + to avoid running into any problems with the hash-table being modified + during processing (normally we delegate access to hash-table with +- nodehash_lock, but we can't hold this while locking the ++ nodecache_lock, but we can't hold this while locking the + individual node locks). */ + num_nodes = nodehash_nr_items; + +@@ -599,20 +571,19 @@ diskfs_node_iterate (error_t (*fun)(struct node *)) + node_list = malloc (num_nodes * sizeof (struct node *)); + if (node_list == NULL) + { +- pthread_mutex_unlock (&nodehash_lock); ++ pthread_rwlock_unlock (&nodecache_lock); + ext2_debug ("unable to allocate temporary node table"); + return ENOMEM; + } + + p = node_list; +- for (n = 0; n < INOHSZ; n++) +- for (node = nodehash[n]; node; node = node->dn->hnext) +- { +- *p++ = node; +- diskfs_nref (node); +- } +- +- pthread_mutex_unlock (&nodehash_lock); ++ rdxtree_for_each (&nodecache, &iter, node) ++ { ++ *p++ = node; ++ diskfs_nref (node); ++ } ++ ++ pthread_rwlock_unlock (&nodecache_lock); + + p = node_list; + while (num_nodes-- > 0) +-- +2.0.0.rc2 + diff --git a/debian/patches/0022-hurd-add-an-Hurd-server-introspection-protocol.patch b/debian/patches/0022-hurd-add-an-Hurd-server-introspection-protocol.patch new file mode 100644 index 00000000..3fd3631a --- /dev/null +++ b/debian/patches/0022-hurd-add-an-Hurd-server-introspection-protocol.patch @@ -0,0 +1,134 @@ +From 82b61c9901b19248e47628d4f79d60336b2f35a4 Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Wed, 21 May 2014 16:40:12 +0200 +Subject: [PATCH 22/27] hurd: add an Hurd server introspection protocol + +Most Hurd servers use libports to manage receive rights and the +associated objects. These procedures can be used to query the state +associated with receive rights managed by libports. + +The procedures are not specific to libports. Any Hurd server can +implement this protocol. To do so, a server installs a send right in +the array of well-known ports, under the key +HURD_PORT_REGISTER_INTROSPECTION. + +* hurd/hurd_port.defs: New file. +* hurd/hurd_types.h (HURD_PORT_REGISTER_INTROSPECTION): New macro. +(HURD_PORT_REGISTER_MAX): Likewise. +* hurd/subsystems: Add hurd_port subsystem. +--- + hurd/hurd_port.defs | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++ + hurd/hurd_types.h | 6 +++++ + hurd/subsystems | 1 + + 3 files changed, 80 insertions(+) + create mode 100644 hurd/hurd_port.defs + +diff --git a/hurd/hurd_port.defs b/hurd/hurd_port.defs +new file mode 100644 +index 0000000..0d82072 +--- /dev/null ++++ b/hurd/hurd_port.defs +@@ -0,0 +1,73 @@ ++/* Hurd server introspection. ++ ++ Copyright (C) 2014 Free Software Foundation, Inc. ++ ++ This file is part of the GNU Hurd. ++ ++ The GNU Hurd is free software; you can redistribute it and/or ++ modify it under the terms of the GNU General Public License as ++ published by the Free Software Foundation; either version 2, or (at ++ your option) any later version. ++ ++ The GNU Hurd is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with the GNU Hurd. If not, see <http://www.gnu.org/licenses/>. */ ++ ++subsystem hurd_port 39000; ++ ++/* Hurd server introspection. ++ ++ Most Hurd servers use libports to manage receive rights and the ++ associated objects. These procedures can be used to query the ++ state associated with receive rights managed by libports. ++ ++ The procedures are not specific to libports. Any Hurd server can ++ implement this protocol. To do so, a server installs a send right ++ in the array of well-known ports, under the key ++ HURD_PORT_REGISTER_INTROSPECTION. ++ ++ A client in possession of the servers task port can retrieve a copy ++ of this send right using mach_ports_lookup. */ ++ ++#include <hurd/hurd_types.defs> ++ ++#ifdef HURD_PORT_IMPORTS ++HURD_PORT_IMPORTS ++#endif ++ ++INTR_INTERFACE ++ ++/* Return the number of hard and weak references of the object ++ directly associated with the receive right NAME. ++ ++ Return EINVAL if NAME does not denote a receive right managed by ++ the port-to-object mapper, or if the concept of reference counting ++ simply does not apply. */ ++routine hurd_port_get_refcounts ( ++ introspection: mach_port_t; ++ name: mach_port_name_t; ++ waittime timeout: natural_t; ++ RPT ++ out hard: natural_t; ++ out weak: natural_t); ++ ++/* Return a compact, human-readable description of the object related ++ with the receive right NAME. ++ ++ This description is meant for debugging purposes and should include ++ relevant internal state. If possible, it should include ++ information that is meaningful in other contexts (like a file name, ++ or the inode number). ++ ++ Return EINVAL if NAME does not denote a receive right managed by ++ the port-to-object mapper. */ ++routine hurd_port_debug_info ( ++ introspection: mach_port_t; ++ port: mach_port_name_t; ++ waittime timeout: natural_t; ++ RPT ++ out debug_info: string_t); +diff --git a/hurd/hurd_types.h b/hurd/hurd_types.h +index 8eac206..7875bb5 100644 +--- a/hurd/hurd_types.h ++++ b/hurd/hurd_types.h +@@ -371,4 +371,10 @@ enum + INIT_INT_MAX, + }; + ++/* Define the well-known ports available via mach_ports_lookup. */ ++#define HURD_PORT_REGISTER_INTROSPECTION 0 ++ ++/* This is a fixed limit. */ ++#define HURD_PORT_REGISTER_MAX TASK_PORT_REGISTER_MAX ++ + #endif +diff --git a/hurd/subsystems b/hurd/subsystems +index c05895c..59893b2 100644 +--- a/hurd/subsystems ++++ b/hurd/subsystems +@@ -36,6 +36,7 @@ tape 35000 Special control operations for magtapes + login 36000 Database of logged-in users + pfinet 37000 Internet configuration calls + password 38000 Password checker ++hurd_port 39000 Port debugging and introspection + <ioctl space> 100000- First subsystem of ioctl class 'f' (lowest class) + tioctl 156000 Ioctl class 't' (terminals) + tioctl 156200 (continued) +-- +2.0.0.rc2 + diff --git a/debian/patches/0023-libports-implement-the-Hurd-server-introspection-pro.patch b/debian/patches/0023-libports-implement-the-Hurd-server-introspection-pro.patch new file mode 100644 index 00000000..243f5a96 --- /dev/null +++ b/debian/patches/0023-libports-implement-the-Hurd-server-introspection-pro.patch @@ -0,0 +1,328 @@ +From d494a068b98f79a7c454802a52eddbff3350acbc Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Wed, 21 May 2014 16:47:14 +0200 +Subject: [PATCH 23/27] libports: implement the Hurd server introspection + protocol + +Add a compact and self-contained introspection server to libports. +Add functions to to label port buckets and classes. Make it possible +to provide a function that given an object of a class, returns a +human-readable representation for it. + +* libports/introspection.c: New file. +* libports/create-bucket.c (ports_label_bucket): New function. +* libports/create-class.c (ports_set_debug_info): Likewise. +* libports/ports.h (struct port_bucket): Add label. +(struct port_class): Add debug_info and label. +(ports_label_bucket): New declaration. +(ports_set_debug_info): Likewise. +* libports/Makefile (SRCS): Add introspection.c. +(OBJS): Add hurd_portServer.o. +--- + libports/Makefile | 4 +- + libports/create-bucket.c | 7 ++ + libports/create-class.c | 15 ++++ + libports/introspection.c | 182 +++++++++++++++++++++++++++++++++++++++++++++++ + libports/ports.h | 12 ++++ + 5 files changed, 218 insertions(+), 2 deletions(-) + create mode 100644 libports/introspection.c + +diff --git a/libports/Makefile b/libports/Makefile +index 30da1c1..6c7f5df 100644 +--- a/libports/Makefile ++++ b/libports/Makefile +@@ -36,13 +36,13 @@ SRCS = create-bucket.c create-class.c \ + interrupt-operation.c interrupt-on-notify.c interrupt-notified-rpcs.c \ + dead-name.c create-port.c import-port.c default-uninhibitable-rpcs.c \ + claim-right.c transfer-right.c create-port-noinstall.c create-internal.c \ +- interrupted.c ++ interrupted.c introspection.c + + installhdrs = ports.h + + HURDLIBS= ihash + LDLIBS += -lpthread +-OBJS = $(SRCS:.c=.o) notifyServer.o interruptServer.o ++OBJS = $(SRCS:.c=.o) notifyServer.o interruptServer.o hurd_portServer.o + + MIGCOMSFLAGS = -prefix ports_ + MIGSFLAGS = -imacros $(srcdir)/mig-mutate.h +diff --git a/libports/create-bucket.c b/libports/create-bucket.c +index 2c5f1b6..51168cb 100644 +--- a/libports/create-bucket.c ++++ b/libports/create-bucket.c +@@ -48,5 +48,12 @@ ports_create_bucket () + + hurd_ihash_init (&ret->htable, offsetof (struct port_info, hentry)); + ret->rpcs = ret->flags = ret->count = 0; ++ ret->label = "unlabeled bucket"; + return ret; + } ++ ++/* Label BUCKET with LABEL. */ ++void ports_label_bucket (struct port_bucket *bucket, const char *label) ++{ ++ bucket->label = label; ++} +diff --git a/libports/create-class.c b/libports/create-class.c +index 782f52b..fbce9d9 100644 +--- a/libports/create-class.c ++++ b/libports/create-class.c +@@ -41,6 +41,21 @@ ports_create_class (void (*clean_routine)(void *), + cl->rpcs = 0; + cl->count = 0; + cl->uninhibitable_rpcs = ports_default_uninhibitable_rpcs; ++ cl->debug_info = NULL; ++ cl->label = "unlabeled class"; + + return cl; + } ++ ++/* Label BUCKET with LABEL. Use DEBUG_INFO to format human-readable ++ information about a given object belonging to CLASS into an buffer, ++ or the default formatting function if DEBUG_INFO is NULL. */ ++void ++ports_set_debug_info (struct port_class *class, ++ const char *label, ++ error_t (*debug_info) (const void *, char *, size_t)) ++{ ++ class->label = label; ++ if (debug_info) ++ class->debug_info = debug_info; ++} +diff --git a/libports/introspection.c b/libports/introspection.c +new file mode 100644 +index 0000000..dcebd6b +--- /dev/null ++++ b/libports/introspection.c +@@ -0,0 +1,182 @@ ++/* Hurd server introspection. ++ ++ Copyright (C) 2014 Free Software Foundation, Inc. ++ ++ This file is part of the GNU Hurd. ++ ++ The GNU Hurd is free software; you can redistribute it and/or ++ modify it under the terms of the GNU General Public License as ++ published by the Free Software Foundation; either version 2, or (at ++ your option) any later version. ++ ++ The GNU Hurd is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with the GNU Hurd. If not, see <http://www.gnu.org/licenses/>. */ ++ ++#include <error.h> ++#include <mach/mach_param.h> ++#include <pthread.h> ++#include <stdio.h> ++#include <string.h> ++ ++#include "ports.h" ++#include "hurd_port_S.h" ++ ++/* We service introspection requests on this port. */ ++static mach_port_t introspection_port; ++ ++/* We use a separate thread to service the introspection requests. It ++ is a straight forward Mach server for the hurd_port protocol. */ ++static void * ++service_introspection_requests (void *arg) ++{ ++ error_t err; ++ mach_port_t *ports; ++ size_t ports_len; ++ ++ err = mach_port_allocate (mach_task_self (), MACH_PORT_RIGHT_RECEIVE, ++ &introspection_port); ++ if (err) ++ { ++ error (0, err, "mach_port_allocate"); ++ return NULL; ++ } ++ ++ err = mach_port_insert_right (mach_task_self (), ++ introspection_port, introspection_port, ++ MACH_MSG_TYPE_MAKE_SEND); ++ if (err) ++ { ++ error (0, err, "mach_port_insert_right"); ++ return NULL; ++ } ++ ++ ++ err = mach_ports_lookup (mach_task_self (), ++ &ports, &ports_len); ++ if (err) ++ { ++ error (0, err, "mach_ports_lookup"); ++ return NULL; ++ } ++ ++ ports[HURD_PORT_REGISTER_INTROSPECTION] = introspection_port; ++ ++ err = mach_ports_register (mach_task_self (), ++ ports, ++ HURD_PORT_REGISTER_MAX); ++ if (err) ++ { ++ error (0, err, "mach_ports_register"); ++ return NULL; ++ } ++ ++ /* XXX mig should emit this declaration. */ ++ boolean_t ports_hurd_port_server (mach_msg_header_t *InHeadP, ++ mach_msg_header_t *OutHeadP); ++ ++ while (1) ++ mach_msg_server (ports_hurd_port_server, 0, introspection_port); ++ ++ /* Not reached. */ ++ return NULL; ++} ++ ++/* Start the introspection server before main is called. */ ++static void __attribute__ ((constructor)) ++init (void) ++{ ++ error_t err; ++ ++ pthread_t thread; ++ pthread_attr_t attr; ++#define STACK_SIZE (64 * 1024) ++ pthread_attr_init (&attr); ++ pthread_attr_setstacksize (&attr, STACK_SIZE); ++#undef STACK_SIZE ++ ++ err = pthread_create (&thread, &attr, ++ service_introspection_requests, NULL); ++ if (err) ++ error (1, err, "pthread_create"); ++ pthread_detach (thread); ++} ++ ++/* Return the number of hard and weak references of the object ++ directly associated with the receive right NAME. ++ ++ Return EINVAL if NAME does not denote a receive right managed by ++ the port-to-object mapper, or if the concept of reference counting ++ simply does not apply. */ ++error_t ++ports_S_hurd_port_get_refcounts (mach_port_t port, ++ mach_port_t name, ++ natural_t *hard, ++ natural_t *weak) ++{ ++ struct references result; ++ struct port_info *pi; ++ ++ if (port != introspection_port) ++ return EOPNOTSUPP; ++ ++ pi = ports_lookup_port (0, name, 0); ++ if (pi == NULL) ++ return EINVAL; ++ ++ refcounts_references (&pi->refcounts, &result); ++ ++ *hard = result.hard - 1; ++ *weak = result.weak; ++ ports_port_deref (pi); ++ return 0; ++} ++ ++static error_t ++default_debug_info (const void *port, char *buffer, size_t size) ++{ ++ const struct port_info *pi = port; ++ snprintf (buffer, size, ++ "bucket: %s, class: %s", ++ pi->bucket->label, pi->class->label); ++ return 0; ++} ++ ++/* Return a compact, human-readable description of the object related ++ with the receive right NAME. ++ ++ This description is meant for debugging purposes and should include ++ relevant internal state. If possible, it should include ++ information that is meaningful in other contexts (like a file name, ++ or the inode number). ++ ++ Return EINVAL if NAME does not denote a receive right managed by ++ the port-to-object mapper. */ ++error_t ++ports_S_hurd_port_debug_info (mach_port_t port, ++ mach_port_t name, ++ char *info) ++{ ++ error_t err; ++ struct port_info *pi; ++ ++ if (port != introspection_port) ++ return EOPNOTSUPP; ++ ++ pi = ports_lookup_port (0, name, 0); ++ if (pi == NULL) ++ return EINVAL; ++ ++ if (pi->class->debug_info) ++ err = pi->class->debug_info (pi, info, 1024 /* XXX */); ++ else ++ err = default_debug_info (pi, info, 1024 /* XXX */); ++ info[1023] = 0; ++ ++ ports_port_deref (pi); ++ return err; ++} +diff --git a/libports/ports.h b/libports/ports.h +index 3439443..18eb5cb 100644 +--- a/libports/ports.h ++++ b/libports/ports.h +@@ -65,6 +65,7 @@ struct port_bucket + int rpcs; + int flags; + int count; ++ const char *label; + }; + /* FLAGS above are the following: */ + #define PORT_BUCKET_INHIBITED PORTS_INHIBITED +@@ -80,7 +81,9 @@ struct port_class + int count; + void (*clean_routine) (void *); + void (*dropweak_routine) (void *); ++ error_t (*debug_info) (const void *, char *, size_t); + struct ports_msg_id_range *uninhibitable_rpcs; ++ const char *label; + }; + /* FLAGS are the following: */ + #define PORT_CLASS_INHIBITED PORTS_INHIBITED +@@ -149,6 +152,9 @@ extern struct ports_msg_id_range *ports_default_uninhibitable_rpcs; + /* Create and return a new bucket. */ + struct port_bucket *ports_create_bucket (void); + ++/* Label BUCKET with LABEL. */ ++void ports_label_bucket (struct port_bucket *bucket, const char *label); ++ + /* Create and return a new port class. If nonzero, CLEAN_ROUTINE will + be called for each allocated port object in this class when it is + being destroyed. If nonzero, DROPWEAK_ROUTINE will be called +@@ -158,6 +164,12 @@ struct port_bucket *ports_create_bucket (void); + struct port_class *ports_create_class (void (*clean_routine)(void *), + void (*dropweak_routine)(void *)); + ++/* Label BUCKET with LABEL. Use DEBUG_INFO to format human-readable ++ information about a given object belonging to CLASS into an buffer, ++ or the default formatting function if DEBUG_INFO is NULL. */ ++void ports_set_debug_info (struct port_class *class, const char *label, ++ error_t (*debug_info) (const void *, char *, size_t)); ++ + /* Create and return in RESULT a new port in CLASS and BUCKET; SIZE bytes + will be allocated to hold the port structure and whatever private data the + user desires. */ +-- +2.0.0.rc2 + diff --git a/debian/patches/0024-utils-implement-portinfo-query-process.patch b/debian/patches/0024-utils-implement-portinfo-query-process.patch new file mode 100644 index 00000000..7db120e7 --- /dev/null +++ b/debian/patches/0024-utils-implement-portinfo-query-process.patch @@ -0,0 +1,168 @@ +From 9eef073dafdaeca49be2013e27b86687b6161d05 Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Wed, 21 May 2014 17:38:46 +0200 +Subject: [PATCH 24/27] utils: implement portinfo --query-process + +Implement portinfo --query-process (hopefully) as envisaged by a +comment in portinfo.c. We use the new Hurd server introspection +protocol to obtain information about the objects related to ports: + +% utils/portinfo --receive --query-process 5586 77 + 77: receive [bucket: diskfs_port_bucket, class: diskfs_protid_class, + node{inode: 48194, hard: 1, weak: 1}, + path: hello/hurd/developers_:)] + +* libshouldbeinlibc/Makefile (OBJS): Add hurd_portUser.o. +* libshouldbeinlibc/portinfo.c (show_portinfo_query): New function. +(print_port_info): Use show_portinfo_query if desired. +* libshouldbeinlibc/portinfo.h (PORTINFO_QUERY): New macro. +* utils/portinfo.c (argp_option): Drop #if 0. +(parse_opt): Handle --query-process. +--- + libshouldbeinlibc/Makefile | 2 +- + libshouldbeinlibc/portinfo.c | 69 ++++++++++++++++++++++++++++++++++++++++++++ + libshouldbeinlibc/portinfo.h | 1 + + utils/portinfo.c | 3 +- + 4 files changed, 72 insertions(+), 3 deletions(-) + +diff --git a/libshouldbeinlibc/Makefile b/libshouldbeinlibc/Makefile +index 14a7939..cedd2c8 100644 +--- a/libshouldbeinlibc/Makefile ++++ b/libshouldbeinlibc/Makefile +@@ -32,6 +32,6 @@ installhdrs = idvec.h timefmt.h maptime.h \ + wire.h portinfo.h portxlate.h cacheq.h ugids.h nullauth.h + installhdrsubdir = . + +-OBJS = $(SRCS:.c=.o) ++OBJS = $(SRCS:.c=.o) hurd_portUser.o + + include ../Makeconf +diff --git a/libshouldbeinlibc/portinfo.c b/libshouldbeinlibc/portinfo.c +index e6305c6..f99b789 100644 +--- a/libshouldbeinlibc/portinfo.c ++++ b/libshouldbeinlibc/portinfo.c +@@ -17,10 +17,77 @@ + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + ++#include <assert.h> ++#include <error.h> ++#include <string.h> + #include <sys/types.h> + #include <sys/mman.h> + + #include "portinfo.h" ++#include "hurd_port_U.h" ++ ++static void ++show_portinfo_query (mach_port_t task, mach_port_t name, ++ unsigned show, FILE *stream) ++{ ++ error_t err; ++ static mach_port_t introspection_port; ++ static mach_port_t for_task; ++ ++ if (task != for_task) ++ { ++ mach_port_t *ports; ++ size_t ports_len; ++ ++ err = mach_ports_lookup (task, &ports, &ports_len); ++ if (! err) ++ { ++ size_t i; ++ if (MACH_PORT_VALID (introspection_port)) ++ mach_port_deallocate (mach_task_self (), introspection_port); ++ ++ for (i = 0; i < ports_len; i++) ++ if (i == HURD_PORT_REGISTER_INTROSPECTION) ++ introspection_port = ports[i]; ++ else ++ { ++ if (MACH_PORT_VALID (ports[i])) ++ mach_port_deallocate (mach_task_self (), ports[i]); ++ } ++ } ++ else ++ introspection_port = MACH_PORT_DEAD; ++ ++ for_task = task; ++ } ++ ++ if (! MACH_PORT_VALID (introspection_port)) ++ return; ++ ++ string_t info; /* XXX */ ++ err = hurd_port_debug_info (introspection_port, name, 100, info); ++ if (err) ++ { ++ if (err != EINVAL) ++ error (0, err, "hurd_port_debug_info"); ++ return; ++ } ++ ++ if (strlen (info) > 0) ++ fprintf (stream, " [%s", info); ++ ++ if (show & PORTINFO_DETAILS) ++ { ++ unsigned int hard, weak; ++ err = hurd_port_get_refcounts (introspection_port, name, 100, ++ &hard, &weak); ++ if (! err) ++ fprintf (stream, ", hard: %u, weak: %u", hard, weak); ++ } ++ ++ fprintf (stream, "]"); ++} ++ + + /* Prints info about NAME in TASK to STREAM, in a way described by the flags + in SHOW. If TYPE is non-zero, it should be what mach_port_type returns +@@ -83,6 +150,8 @@ print_port_info (mach_port_t name, mach_port_type_t type, task_t task, + status.mps_nsrequest ? ", ns-req" : ""); + } + } ++ if (show & PORTINFO_QUERY) ++ show_portinfo_query (task, name, show, stream); + } + if (type & MACH_PORT_TYPE_SEND) + { +diff --git a/libshouldbeinlibc/portinfo.h b/libshouldbeinlibc/portinfo.h +index 143c289..bd96eb8 100644 +--- a/libshouldbeinlibc/portinfo.h ++++ b/libshouldbeinlibc/portinfo.h +@@ -31,6 +31,7 @@ + #define PORTINFO_DETAILS 0x1 + #define PORTINFO_MEMBERS 0x4 + #define PORTINFO_HEX_NAMES 0x8 ++#define PORTINFO_QUERY 0x10 + + /* Prints info about NAME in TASK to STREAM, in a way described by the flags + in SHOW. If TYPE is non-zero, it should be what mach_port_type returns +diff --git a/utils/portinfo.c b/utils/portinfo.c +index 4c40352..27998db 100644 +--- a/utils/portinfo.c ++++ b/utils/portinfo.c +@@ -44,10 +44,8 @@ static const struct argp_option options[] = { + {"verbose", 'v', 0, 0, "Give more detailed information"}, + {"members", 'm', 0, 0, "Show members of port-sets"}, + {"hex-names", 'x', 0, 0, "Show port names in hexadecimal"}, +-#if 0 /* XXX implement this */ + {"query-process", 'q', 0, 0, "Query the process itself for the identity of" + " the ports in question -- requires the process be in a sane state"}, +-#endif + {"hold", '*', 0, OPTION_HIDDEN}, + + {0,0,0,0, "Selecting which names to show:", 2}, +@@ -249,6 +247,7 @@ main (int argc, char **argv) + case 'v': show |= PORTINFO_DETAILS; break; + case 'm': show |= PORTINFO_MEMBERS; break; + case 'x': show |= PORTINFO_HEX_NAMES; break; ++ case 'q': show |= PORTINFO_QUERY; break; + + case 'r': only |= MACH_PORT_TYPE_RECEIVE; break; + case 's': only |= MACH_PORT_TYPE_SEND; break; +-- +2.0.0.rc2 + diff --git a/debian/patches/0025-libdiskfs-annotate-objects-managed-by-libports.patch b/debian/patches/0025-libdiskfs-annotate-objects-managed-by-libports.patch new file mode 100644 index 00000000..7bc5edd6 --- /dev/null +++ b/debian/patches/0025-libdiskfs-annotate-objects-managed-by-libports.patch @@ -0,0 +1,107 @@ +From a2e4f7f47159eb668f16e76e0856aa81b09656e3 Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Wed, 21 May 2014 18:17:33 +0200 +Subject: [PATCH 25/27] libdiskfs: annotate objects managed by libports + +Label all port classes and diskfs_port_bucket. Provide +diskfs_format_debug_info which prints a human-readable description of +a protid object, which notably includes the path and the inode number. + +* libdiskfs/diskfs.h (diskfs_format_debug_info): New declaration. +* libdiskfs/init-init.c (diskfs_format_debug_info): New function. +(diskfs_init_diskfs): Add annotations to classes and bucket. +--- + libdiskfs/diskfs.h | 8 ++++++++ + libdiskfs/init-init.c | 41 ++++++++++++++++++++++++++++++++++++----- + 2 files changed, 44 insertions(+), 5 deletions(-) + +diff --git a/libdiskfs/diskfs.h b/libdiskfs/diskfs.h +index e10e75e..bb12b4a 100644 +--- a/libdiskfs/diskfs.h ++++ b/libdiskfs/diskfs.h +@@ -585,6 +585,14 @@ error_t (*diskfs_read_symlink_hook)(struct node *np, char *target); + default function always returns EOPNOTSUPP. */ + error_t diskfs_get_source (struct protid *cred, + char *source, size_t source_len); ++ ++/* The user may define this function. The function must provide a ++ human-readable description of PROTID in BUFFER of size SIZE. The ++ default implementation generates a reasonable amount of ++ information. */ ++error_t diskfs_format_debug_info (const void *protid, ++ char *buffer, size_t size); ++ + + /* The library exports the following functions for general use */ + +diff --git a/libdiskfs/init-init.c b/libdiskfs/init-init.c +index 976b4e8..c7c1af7 100644 +--- a/libdiskfs/init-init.c ++++ b/libdiskfs/init-init.c +@@ -24,6 +24,7 @@ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + #include <hurd/fsys.h> + #include <stdio.h> + #include <maptime.h> ++#include <inttypes.h> + + mach_port_t diskfs_default_pager; + mach_port_t diskfs_auth_server_port; +@@ -48,6 +49,29 @@ struct port_class *diskfs_shutdown_notification_class; + + struct port_bucket *diskfs_port_bucket; + ++/* Provide a human-readable description of the given protid object. */ ++error_t ++diskfs_format_debug_info (const void *port, char *buffer, size_t size) ++{ ++ const struct protid *protid = port; ++ const struct port_info *pi = port; ++ struct references result; ++ ++ refcounts_references (&protid->po->np->refcounts, &result); ++ ++ snprintf (buffer, size, ++ "bucket: %s, class: %s" ++ ", node{inode: %"PRIu64", hard: %u, weak: %u}, path: %s", ++ pi->bucket->label, ++ pi->class->label, ++ protid->po->np->cache_id, ++ result.hard, ++ result.weak, ++ protid->po->path); ++ ++ return 0; ++} ++ + /* Call this after arguments have been parsed to initialize the + library. */ + error_t +@@ -83,13 +107,20 @@ diskfs_init_diskfs (void) + + diskfs_auth_server_port = getauth (); + +- diskfs_protid_class = ports_create_class (diskfs_protid_rele, 0); +- diskfs_control_class = ports_create_class (_diskfs_control_clean, 0); +- diskfs_initboot_class = ports_create_class (0, 0); +- diskfs_execboot_class = ports_create_class (0, 0); +- diskfs_shutdown_notification_class = ports_create_class (0, 0); ++#define MAKE_CLASS(NAME, FN, ARG, DBG) \ ++ NAME = ports_create_class ((FN), (ARG)); \ ++ ports_set_debug_info (NAME, #NAME, (DBG)) ++ ++ MAKE_CLASS (diskfs_protid_class, diskfs_protid_rele, NULL, ++ diskfs_format_debug_info); ++ MAKE_CLASS (diskfs_control_class, _diskfs_control_clean, NULL, NULL); ++ MAKE_CLASS (diskfs_initboot_class, NULL, NULL, NULL); ++ MAKE_CLASS (diskfs_execboot_class, NULL, NULL, NULL); ++ MAKE_CLASS (diskfs_shutdown_notification_class, NULL, NULL, NULL); ++#undef MAKE_CLASS + + diskfs_port_bucket = ports_create_bucket (); ++ ports_label_bucket (diskfs_port_bucket, "diskfs_port_bucket"); + + _hurd_port_init (&_diskfs_exec_portcell, MACH_PORT_NULL); + +-- +2.0.0.rc2 + diff --git a/debian/patches/0026-libpager-annotate-objects-managed-by-libports.patch b/debian/patches/0026-libpager-annotate-objects-managed-by-libports.patch new file mode 100644 index 00000000..3927b0d9 --- /dev/null +++ b/debian/patches/0026-libpager-annotate-objects-managed-by-libports.patch @@ -0,0 +1,61 @@ +From b3ee4c42475efa8d6b14379212af18b4cfbf84a4 Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Wed, 21 May 2014 18:33:14 +0200 +Subject: [PATCH 26/27] libpager: annotate objects managed by libports + +Label _pager_class and provide a function which prints a +human-readable description of a pager object. + +* libpager/pager-create.c (format_debug_info): New function. +(create_class): Label _pager_class. +--- + libpager/pager-create.c | 20 ++++++++++++++++++++ + 1 file changed, 20 insertions(+) + +diff --git a/libpager/pager-create.c b/libpager/pager-create.c +index 1fc15b8..fa6b67e 100644 +--- a/libpager/pager-create.c ++++ b/libpager/pager-create.c +@@ -15,6 +15,8 @@ + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + ++#include <stdio.h> ++ + #include "priv.h" + + /* Create and return a new pager with user info UPI. */ +@@ -52,6 +54,23 @@ pager_create (struct user_pager_info *upi, + return p; + } + ++/* Provide a human-readable description of the given pager object. */ ++static error_t ++format_debug_info (const void *port, char *buffer, size_t size) ++{ ++ const struct pager *pager = port; ++ const struct port_info *pi = port; ++ ++ snprintf (buffer, size, ++ "bucket: %s, class: %s, may_cache: %d", ++ pi->bucket->label, ++ pi->class->label, ++ /* XXX I have no idea what might be interesting to print ++ here, but it is straight forward to add stuff. */ ++ pager->may_cache); ++ ++ return 0; ++} + + /* This causes the function to be run at startup by compiler magic. */ + static void create_class (void) __attribute__ ((constructor)); +@@ -60,5 +79,6 @@ static void + create_class () + { + _pager_class = ports_create_class (_pager_clean, _pager_real_dropweak); ++ ports_set_debug_info (_pager_class, "_pager_class", format_debug_info); + (void) &create_class; /* Avoid warning */ + } +-- +2.0.0.rc2 + diff --git a/debian/patches/0027-ext2fs-annotate-objects-managed-by-libports.patch b/debian/patches/0027-ext2fs-annotate-objects-managed-by-libports.patch new file mode 100644 index 00000000..c650dddc --- /dev/null +++ b/debian/patches/0027-ext2fs-annotate-objects-managed-by-libports.patch @@ -0,0 +1,101 @@ +From f1d5a4bb6ab41f576487d3d3acbf7f192e946a1d Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Wed, 21 May 2014 18:39:38 +0200 +Subject: [PATCH 27/27] ext2fs: annotate objects managed by libports + +Install a specialized version of libpagers format_debug_info which +prints more detailed information, like the nodes inode number for file +pager objects. Also label both pager buckets. + +* ext2fs/pager-create.c (format_debug_info): New function. +(create_disk_pager): Install our own format_debug_info function. +Label both pager buckets. +--- + ext2fs/pager.c | 40 ++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 40 insertions(+) + +diff --git a/ext2fs/pager.c b/ext2fs/pager.c +index 017efcc..aba7f27 100644 +--- a/ext2fs/pager.c ++++ b/ext2fs/pager.c +@@ -19,10 +19,12 @@ + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + + #include <unistd.h> ++#include <stdio.h> + #include <string.h> + #include <errno.h> + #include <error.h> + #include <hurd/store.h> ++#include <inttypes.h> + #include "ext2fs.h" + + /* XXX */ +@@ -1207,6 +1209,40 @@ service_paging_requests (void *arg) + return NULL; + } + ++/* Provide a human-readable description of the given pager object. */ ++static error_t ++format_debug_info (const void *port, char *buffer, size_t size) ++{ ++ const struct pager *pager = port; ++ const struct port_info *pi = port; ++ ++ if (pager->upi->type == FILE_DATA) ++ { ++ struct references result; ++ ++ refcounts_references (&pager->upi->node->refcounts, &result); ++ ++ snprintf (buffer, size, ++ "bucket: %s, class: %s" ++ ", node{inode: %"PRIu64", hard: %u, weak: %u}", ++ pi->bucket->label, ++ pi->class->label, ++ pager->upi->node->cache_id, ++ result.hard, ++ result.weak); ++ } ++ else ++ snprintf (buffer, size, ++ "bucket: %s, class: %s, may_cache: %d", ++ pi->bucket->label, ++ pi->class->label, ++ /* XXX I have no idea what might be interesting to print ++ here, but it is straight forward to add stuff. */ ++ pager->may_cache); ++ ++ return 0; ++} ++ + /* Create the disk pager, and the file pager. */ + void + create_disk_pager (void) +@@ -1215,12 +1251,15 @@ create_disk_pager (void) + pthread_attr_t attr; + error_t err; + ++ ports_set_debug_info (_pager_class, "_pager_class", format_debug_info); ++ + /* The disk pager. */ + struct user_pager_info *upi = malloc (sizeof (struct user_pager_info)); + if (!upi) + ext2_panic ("can't create disk pager: %s", strerror (errno)); + upi->type = DISK; + disk_pager_bucket = ports_create_bucket (); ++ ports_label_bucket (disk_pager_bucket, "disk_pager_bucket"); + get_hypermetadata (); + disk_cache_blocks = DISK_CACHE_BLOCKS; + disk_cache_size = disk_cache_blocks << log2_block_size; +@@ -1230,6 +1269,7 @@ create_disk_pager (void) + + /* The file pager. */ + file_pager_bucket = ports_create_bucket (); ++ ports_label_bucket (file_pager_bucket, "file_pager_bucket"); + + #define STACK_SIZE (64 * 1024) + pthread_attr_init (&attr); +-- +2.0.0.rc2 + diff --git a/debian/patches/series b/debian/patches/series index 6f1da5d7..0306b2c5 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -42,3 +42,30 @@ mach-defpager-protected-payload.patch #0010-libihash-make-the-locp-offset-mechanism-more-flexibl.patch #0011-trans-fakeroot-use-the-indirection-mechanism-for-loc.patch +0001-libdiskfs-add-diskfs_make_node_alloc-to-allocate-fat.patch +0002-libnetfs-add-netfs_make_node_alloc-to-allocate-fat-n.patch +0003-trans-fakeroot-use-fat-nodes-to-simplify-the-node-ca.patch +0004-trans-fakeroot-use-netfs_node_netnode-instead-of-np-.patch +0005-exec-add-missing-includes.patch +0006-term-fix-memory-leak.patch +0007-libdiskfs-fix-type-of-dir_cache_id-node_cache_id.patch +0008-libstore-provide-function-declaration-until-availabl.patch +0009-Avoid-compiler-warning-about-empty-bodies.patch +0010-librbtree-add-a-red-black-tree-implementation.patch +0011-librdxtree-add-a-radix-tree-implementation.patch +0012-libdiskfs-lock-less-reference-counting-for-peropen-o.patch +0013-libtrivfs-lock-less-reference-counting-for-trivfs_pe.patch +0014-libports-use-a-single-hash-table.patch +0015-libports-lock-less-reference-counting-for-port_info-.patch +0016-ext2fs-use-a-seperate-lock-to-protect-nodehash.patch +0017-fatfs-use-a-seperate-lock-to-protect-nodehash.patch +0018-isofs-use-a-seperate-lock-to-protect-node_cache.patch +0019-tmpfs-use-a-seperate-lock-to-protect-all_nodes.patch +0020-libdiskfs-lock-less-reference-counting-of-nodes.patch +0021-ext2fs-use-librdxtree-for-the-nodehash.patch +0022-hurd-add-an-Hurd-server-introspection-protocol.patch +0023-libports-implement-the-Hurd-server-introspection-pro.patch +0024-utils-implement-portinfo-query-process.patch +0025-libdiskfs-annotate-objects-managed-by-libports.patch +0026-libpager-annotate-objects-managed-by-libports.patch +0027-ext2fs-annotate-objects-managed-by-libports.patch |