diff options
-rw-r--r-- | debian/patches/0011-XXX-libports-cuckoo-hashing.patch | 278 |
1 files changed, 164 insertions, 114 deletions
diff --git a/debian/patches/0011-XXX-libports-cuckoo-hashing.patch b/debian/patches/0011-XXX-libports-cuckoo-hashing.patch index 74d91a91..28c1f53e 100644 --- a/debian/patches/0011-XXX-libports-cuckoo-hashing.patch +++ b/debian/patches/0011-XXX-libports-cuckoo-hashing.patch @@ -1,18 +1,38 @@ -From 86c0c31e918babde0c3e75d62a1ad1b59c494995 Mon Sep 17 00:00:00 2001 +From b02b304f483a1e0fe489f1c88cffba0a22d2f27b Mon Sep 17 00:00:00 2001 From: Justus Winter <4winter@informatik.uni-hamburg.de> Date: Sat, 10 May 2014 10:05:07 +0200 Subject: [PATCH 11/11] XXX libports: cuckoo hashing --- - libihash/ihash.c | 208 +++++++++++++++++++++++++++++++------------------------ - libihash/ihash.h | 38 +++------- - 2 files changed, 128 insertions(+), 118 deletions(-) + libihash/Makefile | 1 + + libihash/ihash.c | 228 +++++++++++++++++++++++++++++++++++------------------- + libihash/ihash.h | 37 +++++---- + 3 files changed, 173 insertions(+), 93 deletions(-) +diff --git a/libihash/Makefile b/libihash/Makefile +index 09bb136..ca528f7 100644 +--- a/libihash/Makefile ++++ b/libihash/Makefile +@@ -24,5 +24,6 @@ SRCS = ihash.c + installhdrs = ihash.h + + OBJS = $(SRCS:.c=.o) ++LDLIBS += -lm + + include ../Makeconf diff --git a/libihash/ihash.c b/libihash/ihash.c -index e74a2c5..cb7d765 100644 +index e74a2c5..38b809b 100644 --- a/libihash/ihash.c +++ b/libihash/ihash.c -@@ -69,6 +69,18 @@ index_valid (hurd_ihash_t ht, unsigned int idx, hurd_ihash_key_t key) +@@ -26,6 +26,7 @@ + #endif + + #include <errno.h> ++#include <math.h> + #include <stdlib.h> + #include <stdint.h> + #include <assert.h> +@@ -69,6 +70,18 @@ index_valid (hurd_ihash_t ht, unsigned int idx, hurd_ihash_key_t key) return !index_empty (ht, idx) && ht->items[idx].key == key; } @@ -26,12 +46,12 @@ index e74a2c5..cb7d765 100644 +hash1 (hurd_ihash_t ht, hurd_ihash_key_t key) +{ + return hash_int32 (key ^ ht->salt1, ht->size - 1) -+ | (1U << (ht->size - 2)); /* add offset */ ++ | (1U << (ht->size - 1)); /* add offset */ +} /* Given a hash table HT, and a key KEY, find the index in the table of that key. You must subsequently check with index_valid() if the -@@ -76,29 +88,13 @@ index_valid (hurd_ihash_t ht, unsigned int idx, hurd_ihash_key_t key) +@@ -76,29 +89,13 @@ index_valid (hurd_ihash_t ht, unsigned int idx, hurd_ihash_key_t key) static inline int find_index (hurd_ihash_t ht, hurd_ihash_key_t key) { @@ -40,7 +60,10 @@ index e74a2c5..cb7d765 100644 - unsigned int mask = ht->size - 1; - - idx = hash_int32 (key, 32) & mask; -- ++ unsigned int idx0, idx1; ++ idx0 = hash0 (ht, key); ++ idx1 = hash1 (ht, key); + - if (ht->items[idx].value == _HURD_IHASH_EMPTY || ht->items[idx].key == key) - return idx; - @@ -54,10 +77,7 @@ index e74a2c5..cb7d765 100644 - return up_idx; - } - while (up_idx != idx); -+ unsigned int idx0, idx1; -+ idx0 = hash0 (ht, key); -+ idx1 = hash1 (ht, key); - +- - /* If we end up here, the item could not be found. Return any - invalid index. */ - return idx; @@ -67,23 +87,49 @@ index e74a2c5..cb7d765 100644 } -@@ -124,8 +120,9 @@ hurd_ihash_init (hurd_ihash_t ht, intptr_t locp_offs) - ht->nr_items = 0; - ht->size = 0; +@@ -126,6 +123,9 @@ hurd_ihash_init (hurd_ihash_t ht, intptr_t locp_offs) ht->locp_offset = locp_offs; -- ht->max_load = HURD_IHASH_MAX_LOAD_DEFAULT; + ht->max_load = HURD_IHASH_MAX_LOAD_DEFAULT; ht->cleanup = 0; + ht->salt0 = 0; + ht->salt1 = ~0; ++ ht->max_loop = 0; } -@@ -184,27 +181,57 @@ hurd_ihash_set_cleanup (hurd_ihash_t ht, hurd_ihash_cleanup_t cleanup, - ht->cleanup = cleanup; - ht->cleanup_data = cleanup_data; +@@ -186,14 +186,15 @@ hurd_ihash_set_cleanup (hurd_ihash_t ht, hurd_ihash_cleanup_t cleanup, + } + + +-/* Set the maximum load factor in percent to MAX_LOAD, which should be +- between 1 and 100. The default is HURD_IHASH_MAX_LOAD_DEFAULT. ++ /* Set the maximum load factor in percent to MAX_LOAD, which must be ++ between 1 and HURD_IHASH_MAX_LOAD_DEFAULT. The default is ++ HURD_IHASH_MAX_LOAD_DEFAULT. If MAX_LOAD is invalid, the maximum ++ load factor is not changed. ++ + New elements are only added to the hash table while the number of + hashed elements is that much percent of the total size of the hash + table. If more elements are added, the hash table is first +- expanded and reorganized. A MAX_LOAD of 100 will always fill the +- whole table before enlarging it, but note that this will increase +- the cost of operations significantly when the table is almost full. ++ expanded and reorganized. + + If the value is set to a smaller value than the current load + factor, the next reorganization will happen when a new item is +@@ -201,10 +202,67 @@ hurd_ihash_set_cleanup (hurd_ihash_t ht, hurd_ihash_cleanup_t cleanup, + void + hurd_ihash_set_max_load (hurd_ihash_t ht, unsigned int max_load) + { +- ht->max_load = max_load; ++ if (0 < max_load && max_load < HURD_IHASH_MAX_LOAD_DEFAULT) ++ ht->max_load = max_load; } -+ -+/* XXX */ + + ++/* Fill the IDXth slot of HT with ITEM. Updates the location pointer ++ if necessary. */ +static inline void +set_item (hurd_ihash_t ht, unsigned int idx, struct _hurd_ihash_item *item) +{ @@ -92,67 +138,56 @@ index e74a2c5..cb7d765 100644 + *((hurd_ihash_locp_t *) (((char *) item->value) + ht->locp_offset)) + = &ht->items[idx].value; +} - -+/* XXX */ ++ ++/* Insert ITEM into HT at its preferred location. If this location is ++ currently occupied, move that item to it's alternate location. ++ This process is repeated until no item has to be moved. ++ ++ If this succeeds within MAX_LOOP steps, return 1. Otherwise return ++ 0. On failure, ITEM contains an item (not the same as the one ++ supplied by the caller). It is the responsibility of the callee to ++ add this item to HT. */ +static inline int -+add_item (hurd_ihash_t ht, hurd_ihash_key_t key, hurd_ihash_value_t value) ++add_item (hurd_ihash_t ht, struct _hurd_ihash_item *item, ++ unsigned int max_loop) +{ -+ struct _hurd_ihash_item item, old; -+ unsigned int idx0, idx1; - --/* Set the maximum load factor in percent to MAX_LOAD, which should be -- between 1 and 100. The default is HURD_IHASH_MAX_LOAD_DEFAULT. -- New elements are only added to the hash table while the number of -- hashed elements is that much percent of the total size of the hash -- table. If more elements are added, the hash table is first -- expanded and reorganized. A MAX_LOAD of 100 will always fill the -- whole table before enlarging it, but note that this will increase -- the cost of operations significantly when the table is almost full. -+ item = (struct _hurd_ihash_item){ .key = key, .value = value}; - -- If the value is set to a smaller value than the current load -- factor, the next reorganization will happen when a new item is -- added to the hash table. */ --void --hurd_ihash_set_max_load (hurd_ihash_t ht, unsigned int max_load) --{ -- ht->max_load = max_load; -+ /* xxx max_loop = round_up (3 * log_{1+epsilon}(n)) */ + unsigned int i; -+ for (i = 0; i < 3 * ht->size; i++) ++ struct _hurd_ihash_item old; ++ unsigned int idx0, idx1; ++ ++ for (i = 0; i < max_loop; i++) + { -+ idx0 = hash0 (ht, item.key); ++ idx0 = hash0 (ht, item->key); + if (index_empty (ht, idx0)) + { -+ set_item (ht, idx0, &item); ++ set_item (ht, idx0, item); + return 1; + } + + old = ht->items[idx0]; -+ set_item (ht, idx0, &item); -+ item = old; ++ set_item (ht, idx0, item); ++ *item = old; + -+ idx1 = hash1 (ht, item.key); ++ idx1 = hash1 (ht, item->key); + if (index_empty (ht, idx1)) + { -+ set_item (ht, idx1, &item); ++ set_item (ht, idx1, item); + return 1; + } + + old = ht->items[idx1]; -+ set_item (ht, idx1, &item); -+ item = old; ++ set_item (ht, idx1, item); ++ *item = old; + } + + return 0; - } - -- ++} ++ + /* Helper function for hurd_ihash_add. Return 1 if the item was added, and 0 if it could not be added because no empty slot was found. The arguments are identical to hurd_ihash_add. -@@ -215,53 +242,55 @@ hurd_ihash_set_max_load (hurd_ihash_t ht, unsigned int max_load) +@@ -215,53 +273,62 @@ hurd_ihash_set_max_load (hurd_ihash_t ht, unsigned int max_load) static inline int add_one (hurd_ihash_t ht, hurd_ihash_key_t key, hurd_ihash_value_t value) { @@ -162,7 +197,12 @@ index e74a2c5..cb7d765 100644 - - idx = hash_int32 (key, 32) & mask; - first_free = idx; -- ++ /* The item that needs to be added. Also, if we remove an item from ++ the table, we store it here. If we fail to insert it again, we ++ start over. */ ++ struct _hurd_ihash_item item = ++ (struct _hurd_ihash_item){ .key = key, .value = value}; + - if (ht->items[idx].value != _HURD_IHASH_EMPTY && ht->items[idx].key != key) - { - unsigned int up_idx = idx; @@ -179,12 +219,7 @@ index e74a2c5..cb7d765 100644 - } - while (up_idx != idx); - } -+ /* The item that needs to be added. Also, if we remove an item from -+ the table, we store it here. If we fail to insert it again, we -+ start over. */ -+ struct _hurd_ihash_item item = -+ (struct _hurd_ihash_item){ .key = key, .value = value}; - +- - /* Remove the old entry for this key if necessary. */ + unsigned int idx = find_index (ht, key); if (index_valid (ht, idx, key)) @@ -208,39 +243,46 @@ index e74a2c5..cb7d765 100644 + /* Update the entry. */ + locp_remove (ht, &ht->items[idx].value); + set_item (ht, idx, &item); ++ ht->nr_items += 1; return 1; } - return 0; -+ int added = add_item (ht, key, value); ++ /* No matter what, we will insert this item. */ ++ ht->nr_items += 1; ++ ++ int added = add_item (ht, &item, ht->max_loop); + if (added) + return 1; + + /* We failed. */ ++ size_t size = 1U << ht->size; + + retry: + /* Pick different hash functions. */ + ht->salt0 = hash_int32 (ht->salt0, 32); + ht->salt1 = hash_int32 (ht->salt1, 32); + ++ error (0, 0, "XXXXXXXXXXXXXXXXx We failed! salts %x %x", ++ ht->salt0, ht->salt1); ++ + /* Try to insert the new or stashed item. */ -+ added = add_item (ht, item.key, item.value); ++ added = add_item (ht, &item, size); + if (! added) + goto retry; + + /* Rehash the table. */ -+ for (idx = 0; idx < (1U << ht->size); idx++) ++ for (idx = 0; idx < size; idx++) + if (! index_empty (ht, idx) + /* For each non-empty slot, check if the item there is not at + the correct (new) index. */ -+ && (idx < (1U << (ht->size - 1)) /* select the appropriate -+ hash function */ ++ && (idx < (size >> 1) /* select the appropriate function */ + ? hash0 (ht, ht->items[idx].key) != idx + : hash1 (ht, ht->items[idx].key) != idx)) + { + item = ht->items[idx]; + ht->items[idx].value = _HURD_IHASH_EMPTY; -+ added = add_item (ht, item.key, item.value); ++ added = add_item (ht, &item, size); + if (! added) + goto retry; + } @@ -249,31 +291,39 @@ index e74a2c5..cb7d765 100644 } -@@ -278,8 +307,8 @@ hurd_ihash_add (hurd_ihash_t ht, hurd_ihash_key_t key, hurd_ihash_value_t item) +@@ -274,12 +341,11 @@ hurd_ihash_add (hurd_ihash_t ht, hurd_ihash_key_t key, hurd_ihash_value_t item) + { + struct hurd_ihash old_ht = *ht; + int was_added; +- unsigned int i; if (ht->size) { -- /* Only fill the hash table up to its maximum load factor. */ + /* Only fill the hash table up to its maximum load factor. */ - if (ht->nr_items * 100 / ht->size <= ht->max_load) -+ /* Keep the load below 0.5. */ -+ if (ht->nr_items < (1U << (ht->size - 1))) ++ if ((ht->nr_items * 100) >> ht->size <= ht->max_load) if (add_one (ht, key, item)) return 0; } -@@ -289,10 +318,10 @@ hurd_ihash_add (hurd_ihash_t ht, hurd_ihash_key_t key, hurd_ihash_value_t item) +@@ -289,10 +355,15 @@ hurd_ihash_add (hurd_ihash_t ht, hurd_ihash_key_t key, hurd_ihash_value_t item) if (ht->size == 0) ht->size = HURD_IHASH_MIN_SIZE; else - ht->size <<= 1; + ht->size += 1; /* double the size */ ++ ++ /* Recompute the threshold for rehashing. */ ++ size_t size = 1U << ht->size; ++ float epsilon = (float) (50 - ht->max_load) / 2; ++ ht->max_loop = ceilf (3 * logf (size) / log1pf (epsilon)); /* calloc() will initialize all values to _HURD_IHASH_EMPTY implicitely. */ - ht->items = calloc (ht->size, sizeof (struct _hurd_ihash_item)); -+ ht->items = calloc (1U << ht->size, sizeof (struct _hurd_ihash_item)); ++ ht->items = calloc (size, sizeof (struct _hurd_ihash_item)); if (ht->items == NULL) { -@@ -301,12 +330,11 @@ hurd_ihash_add (hurd_ihash_t ht, hurd_ihash_key_t key, hurd_ihash_value_t item) +@@ -301,12 +372,11 @@ hurd_ihash_add (hurd_ihash_t ht, hurd_ihash_key_t key, hurd_ihash_value_t item) } /* We have to rehash the old entries. */ @@ -292,32 +342,28 @@ index e74a2c5..cb7d765 100644 /* Finally add the new element! */ was_added = add_one (ht, key, item); diff --git a/libihash/ihash.h b/libihash/ihash.h -index 057babc..27045cb 100644 +index 057babc..5327442 100644 --- a/libihash/ihash.h +++ b/libihash/ihash.h -@@ -73,32 +73,29 @@ struct hurd_ihash +@@ -73,7 +73,7 @@ struct hurd_ihash /* An array of (key, value) pairs. */ _hurd_ihash_item_t items; - /* The length of the array ITEMS. */ -+ /* Log2 of the length of the array ITEMS, or 0 indicating that ITEMS -+ has not been allocated. */ ++ /* Log2 of the length of the array ITEMS. */ size_t size; /* The offset of the location pointer from the hash value. */ - intptr_t locp_offset; - -- /* The maximum load factor in percent. */ -- int max_load; -- - /* When freeing or overwriting an element, this function is called - with the value as the first argument, and CLEANUP_DATA as the +@@ -87,18 +87,24 @@ struct hurd_ihash second argument. This does not happen if CLEANUP is NULL. */ hurd_ihash_cleanup_t cleanup; void *cleanup_data; + + /* Salts for the hash function. */ + uint32_t salt0, salt1; ++ ++ /* Rehash threshold. */ ++ unsigned int max_loop; }; typedef struct hurd_ihash *hurd_ihash_t; @@ -327,60 +373,64 @@ index 057babc..27045cb 100644 -/* The size of the initial allocation in number of items. This must - be a power of two. */ -#define HURD_IHASH_MIN_SIZE 32 -- ++/* Log2 of the size of the initial allocation in number of items. */ ++#define HURD_IHASH_MIN_SIZE 5 + -/* The default value for the maximum load factor in percent. */ -#define HURD_IHASH_MAX_LOAD_DEFAULT 75 -+/* The size of the initial allocation in log2(number of items). */ -+#define HURD_IHASH_MIN_SIZE 5 ++/* The default value for the maximum load factor in percent. Must be ++ below 50. */ ++#define HURD_IHASH_MAX_LOAD_DEFAULT 40 /* The LOCP_OFFS to use if no location pointer is available. */ #define HURD_IHASH_NO_LOCP INTPTR_MIN -@@ -106,8 +103,7 @@ typedef struct hurd_ihash *hurd_ihash_t; - /* The static initializer for a struct hurd_ihash. */ +@@ -107,7 +113,8 @@ typedef struct hurd_ihash *hurd_ihash_t; #define HURD_IHASH_INITIALIZER(locp_offs) \ { .nr_items = 0, .size = 0, .cleanup = (hurd_ihash_cleanup_t) 0, \ -- .max_load = HURD_IHASH_MAX_LOAD_DEFAULT, \ + .max_load = HURD_IHASH_MAX_LOAD_DEFAULT, \ - .locp_offset = (locp_offs)} -+ .locp_offset = (locp_offs), .salt0 = 0, .salt1 = ~0 } ++ .locp_offset = (locp_offs), \ ++ .salt0 = 0, .salt1 = ~0, .max_loop = 0 } /* Initialize the hash table at address HT. If LOCP_OFFSET is not HURD_IHASH_NO_LOCP, then this is an offset (in bytes) from the -@@ -143,20 +139,6 @@ void hurd_ihash_free (hurd_ihash_t ht); +@@ -143,14 +150,16 @@ void hurd_ihash_free (hurd_ihash_t ht); void hurd_ihash_set_cleanup (hurd_ihash_t ht, hurd_ihash_cleanup_t cleanup, void *cleanup_data); -/* Set the maximum load factor in percent to MAX_LOAD, which should be - between 50 and 100. The default is HURD_IHASH_MAX_LOAD_DEFAULT. -- New elements are only added to the hash table while the number of -- hashed elements is that much percent of the total size of the hash -- table. If more elements are added, the hash table is first ++ ++/* Set the maximum load factor in percent to MAX_LOAD, which must be ++ at least 1, and at most HURD_IHASH_MAX_LOAD_DEFAULT. The default ++ is HURD_IHASH_MAX_LOAD_DEFAULT. If MAX_LOAD is invalid, the ++ maximum load factor is not changed. ++ + New elements are only added to the hash table while the number of + hashed elements is that much percent of the total size of the hash + table. If more elements are added, the hash table is first - expanded and reorganized. A MAX_LOAD of 100 will always fill the - whole table before enlarging it, but note that this will increase - the cost of operations significantly when the table is almost full. -- -- If the value is set to a smaller value than the current load -- factor, the next reorganization will happen when a new item is -- added to the hash table. */ --void hurd_ihash_set_max_load (hurd_ihash_t ht, unsigned int max_load); -- - - /* Add ITEM to the hash table HT under the key KEY. If there already - is an item under this key, call the cleanup function (if any) for -@@ -216,7 +198,7 @@ hurd_ihash_value_t hurd_ihash_find (hurd_ihash_t ht, hurd_ihash_key_t key); ++ expanded and reorganized. + + If the value is set to a smaller value than the current load + factor, the next reorganization will happen when a new item is +@@ -216,7 +225,7 @@ hurd_ihash_value_t hurd_ihash_find (hurd_ihash_t ht, hurd_ihash_key_t key); *_hurd_ihash_valuep = (ht)->size ? &(ht)->items[0].value : 0; \ (ht)->size \ && ((_hurd_ihash_item_t) _hurd_ihash_valuep) - &(ht)->items[0] \ - < (ht)->size \ -+ < (1U << (ht)->size) \ ++ < (1 << (ht)->size) \ && (val = *_hurd_ihash_valuep, 1); \ _hurd_ihash_valuep = (hurd_ihash_value_t *) \ (((_hurd_ihash_item_t) _hurd_ihash_valuep) + 1)) \ -@@ -234,7 +216,7 @@ hurd_ihash_value_t hurd_ihash_find (hurd_ihash_t ht, hurd_ihash_key_t key); +@@ -234,7 +243,7 @@ hurd_ihash_value_t hurd_ihash_find (hurd_ihash_t ht, hurd_ihash_key_t key); ITEM->value. */ #define HURD_IHASH_ITERATE_ITEMS(ht, item) \ for (_hurd_ihash_item_t item = (ht)->size? &(ht)->items[0]: 0; \ - (ht)->size && item - &(ht)->items[0] < (ht)->size; \ -+ (ht)->size && item - &(ht)->items[0] < (1U << (ht)->size); \ ++ (ht)->size && item - &(ht)->items[0] < (1 << (ht)->size); \ item++) \ if (item->value != _HURD_IHASH_EMPTY && \ item->value != _HURD_IHASH_DELETED) |