diff options
-rw-r--r-- | debian/patches/0008-libihash-use-linear-probing-and-fast-modulo-operatio.patch | 228 | ||||
-rw-r--r-- | debian/patches/series | 1 |
2 files changed, 229 insertions, 0 deletions
diff --git a/debian/patches/0008-libihash-use-linear-probing-and-fast-modulo-operatio.patch b/debian/patches/0008-libihash-use-linear-probing-and-fast-modulo-operatio.patch new file mode 100644 index 00000000..84c09fc3 --- /dev/null +++ b/debian/patches/0008-libihash-use-linear-probing-and-fast-modulo-operatio.patch @@ -0,0 +1,228 @@ +From 6ad3d3f3558391f02468dbac2d1d73daa8e2f7e1 Mon Sep 17 00:00:00 2001 +From: Justus Winter <4winter@informatik.uni-hamburg.de> +Date: Thu, 8 May 2014 18:33:57 +0200 +Subject: [PATCH 8/8] libihash: use linear probing and fast modulo operation + +libihash uses open addressing. Previously, quadratic probing in both +directions was used to resolve collisions. Quadratic probing might +result in a less efficient use of caches. + +Also, prime numbers of the form 4 * i + 3 were used as array sizes. +This was used in combination with the integer modulo operation for +hashing. It has been known for some time that libihash suffers from +collisions, so a integer hash function is now applied to the keys to +derive the index. + +Use linear probing instead. Also, use powers of two for the array +sizes, so a bit mask can be used for the modulo operation. + +* libihash/ihash.c (ihash_sizes, ihash_nsizes): Remove. +(find_index): Use linear probing and fast modulo operation. +(add_one): Likewise. +* libihash/ihash.h (HURD_IHASH_MIN_SIZE): New macro. +--- + libihash/ihash.c | 121 ++++++------------------------------------------------- + libihash/ihash.h | 3 ++ + 2 files changed, 16 insertions(+), 108 deletions(-) + +diff --git a/libihash/ihash.c b/libihash/ihash.c +index 1de4c35..e74a2c5 100644 +--- a/libihash/ihash.c ++++ b/libihash/ihash.c +@@ -31,55 +31,6 @@ + #include <assert.h> + + #include "ihash.h" +- +- +-/* The prime numbers of the form 4 * i + 3 for some i, all greater +- than twice the previous one and smaller than 2^40 (for now). */ +-static const uint64_t ihash_sizes[] = +-{ +- 3, +- 7, +- 19, +- 43, +- 103, +- 211, +- 431, +- 863, +- 1747, +- 3499, +- 7019, +- 14051, +- 28111, +- 56239, +- 112507, +- 225023, +- 450067, +- 900139, +- 1800311, +- 3600659, +- 7201351, +- 14402743, +- 28805519, +- 57611039, +- 115222091, +- 230444239, +- 460888499, +- 921777067, +- 1843554151, +- UINT64_C (3687108307), +- UINT64_C (7374216631), +- UINT64_C (14748433279), +- UINT64_C (29496866579), +- UINT64_C (58993733159), +- UINT64_C (117987466379), +- UINT64_C (235974932759), +- UINT64_C (471949865531), +- UINT64_C (943899731087) +-}; +- +-static const unsigned int ihash_nsizes = (sizeof ihash_sizes +- / sizeof ihash_sizes[0]); +- + + /* Integer hashing follows Thomas Wang's paper about his 32/64-bits + mix functions : +@@ -126,40 +77,24 @@ static inline int + find_index (hurd_ihash_t ht, hurd_ihash_key_t key) + { + unsigned int idx; +- unsigned int i; + unsigned int up_idx; +- unsigned int down_idx; ++ unsigned int mask = ht->size - 1; + +- idx = hash_int32 (key, 32) % ht->size; ++ idx = hash_int32 (key, 32) & mask; + + if (ht->items[idx].value == _HURD_IHASH_EMPTY || ht->items[idx].key == key) + return idx; + +- /* Instead of calculating idx + 1, idx + 4, idx + 9, ..., idx + i^2, +- we add 1, 3, 5, 7, etc to the previous index. We do this in both +- directions separately. */ +- i = 1; + up_idx = idx; +- down_idx = idx; + + do + { +- up_idx = (up_idx + i) % ht->size; ++ up_idx = (up_idx + 1) & mask; + if (ht->items[up_idx].value == _HURD_IHASH_EMPTY + || ht->items[up_idx].key == key) + return up_idx; +- +- if (down_idx < i) +- down_idx += ht->size; +- down_idx = (down_idx - i) % ht->size; +- if (ht->items[down_idx].value == _HURD_IHASH_EMPTY +- || ht->items[down_idx].key == key) +- return down_idx; +- +- /* After (ht->size - 1) / 2 iterations, this will be 0. */ +- i = (i + 2) % ht->size; + } +- while (i); ++ while (up_idx != idx); + + /* If we end up here, the item could not be found. Return any + invalid index. */ +@@ -282,53 +217,26 @@ add_one (hurd_ihash_t ht, hurd_ihash_key_t key, hurd_ihash_value_t value) + { + unsigned int idx; + unsigned int first_free; ++ unsigned int mask = ht->size - 1; + +- idx = hash_int32 (key, 32) % ht->size; ++ idx = hash_int32 (key, 32) & mask; + first_free = idx; + + if (ht->items[idx].value != _HURD_IHASH_EMPTY && ht->items[idx].key != key) + { +- /* Instead of calculating idx + 1, idx + 4, idx + 9, ..., idx + +- i^2, we add 1, 3, 5, 7, ... 2 * i - 1 to the previous index. +- We do this in both directions separately. */ +- unsigned int i = 1; + unsigned int up_idx = idx; +- unsigned int down_idx = idx; + + do + { +- up_idx = (up_idx + i) % ht->size; ++ up_idx = (up_idx + 1) & mask; + if (ht->items[up_idx].value == _HURD_IHASH_EMPTY + || ht->items[up_idx].key == key) + { + idx = up_idx; + break; + } +- if (first_free == idx +- && ht->items[up_idx].value == _HURD_IHASH_DELETED) +- first_free = up_idx; +- +- if (down_idx < i) +- down_idx += ht->size; +- down_idx = (down_idx - i) % ht->size; +- if (down_idx < 0) +- down_idx += ht->size; +- else +- down_idx %= ht->size; +- if (ht->items[down_idx].value == _HURD_IHASH_EMPTY +- || ht->items[down_idx].key == key) +- { +- idx = down_idx; +- break; +- } +- if (first_free == idx +- && ht->items[down_idx].value == _HURD_IHASH_DELETED) +- first_free = down_idx; +- +- /* After (ht->size - 1) / 2 iterations, this will be 0. */ +- i = (i + 2) % ht->size; + } +- while (i); ++ while (up_idx != idx); + } + + /* Remove the old entry for this key if necessary. */ +@@ -377,15 +285,12 @@ hurd_ihash_add (hurd_ihash_t ht, hurd_ihash_key_t key, hurd_ihash_value_t item) + } + + /* The hash table is too small, and we have to increase it. */ +- for (i = 0; i < ihash_nsizes; i++) +- if (ihash_sizes[i] > old_ht.size) +- break; +- if (i == ihash_nsizes +- || ihash_sizes[i] > SIZE_MAX / sizeof (struct _hurd_ihash_item)) +- return ENOMEM; /* Surely will be true momentarily. */ +- + ht->nr_items = 0; +- ht->size = ihash_sizes[i]; ++ if (ht->size == 0) ++ ht->size = HURD_IHASH_MIN_SIZE; ++ else ++ ht->size <<= 1; ++ + /* calloc() will initialize all values to _HURD_IHASH_EMPTY implicitely. */ + ht->items = calloc (ht->size, sizeof (struct _hurd_ihash_item)); + +diff --git a/libihash/ihash.h b/libihash/ihash.h +index 6bdc925..04c957f 100644 +--- a/libihash/ihash.h ++++ b/libihash/ihash.h +@@ -93,6 +93,9 @@ typedef struct hurd_ihash *hurd_ihash_t; + + /* Construction and destruction of hash tables. */ + ++/* The size of the initial allocation in number of items. */ ++#define HURD_IHASH_MIN_SIZE 32 ++ + /* The default value for the maximum load factor in percent. */ + #define HURD_IHASH_MAX_LOAD_DEFAULT 75 + +-- +2.0.0.rc0 + diff --git a/debian/patches/series b/debian/patches/series index 9a7a2c9a..ef352455 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -48,3 +48,4 @@ mach-defpager-protected-payload.patch 0005-libtrivfs-lock-less-reference-counting-for-trivfs_pr.patch 0006-libihash-use-an-integer-hash-function-on-the-keys.patch 0007-libihash-reduce-the-default-maximum-load-factor-to-7.patch +0008-libihash-use-linear-probing-and-fast-modulo-operatio.patch |