summaryrefslogtreecommitdiff
path: root/libihash/ihash.c
diff options
context:
space:
mode:
authorJustus Winter <4winter@informatik.uni-hamburg.de>2014-05-08 18:33:57 +0200
committerJustus Winter <4winter@informatik.uni-hamburg.de>2014-05-13 21:36:18 +0200
commit57341e5be12f79ee1916369679bb6507a10fcac9 (patch)
treebbdbcb1e17aef48410a9ea2140a4783e34570459 /libihash/ihash.c
parent2d898893815a980f1b821fcec267eb8e7ded678e (diff)
libihash: use linear probing and fast modulo operation
libihash uses open addressing. Previously, quadratic probing in both directions was used to resolve collisions. Quadratic probing might result in a less efficient use of caches. Also, prime numbers of the form 4 * i + 3 were used as array sizes. This was used in combination with the integer modulo operation for hashing. It has been known for some time that libihash suffers from collisions, so a integer hash function is now applied to the keys to derive the index. Use linear probing instead. Also, use powers of two for the array sizes, so a bit mask can be used for the modulo operation. * libihash/ihash.c (ihash_sizes, ihash_nsizes): Remove. (find_index): Use linear probing and fast modulo operation. (add_one): Likewise. * libihash/ihash.h (HURD_IHASH_MIN_SIZE): New macro.
Diffstat (limited to 'libihash/ihash.c')
-rw-r--r--libihash/ihash.c125
1 files changed, 15 insertions, 110 deletions
diff --git a/libihash/ihash.c b/libihash/ihash.c
index 71ddc26d..249f4884 100644
--- a/libihash/ihash.c
+++ b/libihash/ihash.c
@@ -31,55 +31,6 @@
#include <assert.h>
#include "ihash.h"
-
-
-/* The prime numbers of the form 4 * i + 3 for some i, all greater
- than twice the previous one and smaller than 2^40 (for now). */
-static const uint64_t ihash_sizes[] =
-{
- 3,
- 7,
- 19,
- 43,
- 103,
- 211,
- 431,
- 863,
- 1747,
- 3499,
- 7019,
- 14051,
- 28111,
- 56239,
- 112507,
- 225023,
- 450067,
- 900139,
- 1800311,
- 3600659,
- 7201351,
- 14402743,
- 28805519,
- 57611039,
- 115222091,
- 230444239,
- 460888499,
- 921777067,
- 1843554151,
- UINT64_C (3687108307),
- UINT64_C (7374216631),
- UINT64_C (14748433279),
- UINT64_C (29496866579),
- UINT64_C (58993733159),
- UINT64_C (117987466379),
- UINT64_C (235974932759),
- UINT64_C (471949865531),
- UINT64_C (943899731087)
-};
-
-static const unsigned int ihash_nsizes = (sizeof ihash_sizes
- / sizeof ihash_sizes[0]);
-
/* This is the integer finalizer from MurmurHash3. */
static inline uint32_t
@@ -120,40 +71,24 @@ static inline int
find_index (hurd_ihash_t ht, hurd_ihash_key_t key)
{
unsigned int idx;
- unsigned int i;
unsigned int up_idx;
- unsigned int down_idx;
+ unsigned int mask = ht->size - 1;
- idx = murmur3_mix32 (key, 32) % ht->size;
+ idx = murmur3_mix32 (key, __builtin_ctzl (ht->size));
if (ht->items[idx].value == _HURD_IHASH_EMPTY || ht->items[idx].key == key)
return idx;
- /* Instead of calculating idx + 1, idx + 4, idx + 9, ..., idx + i^2,
- we add 1, 3, 5, 7, etc to the previous index. We do this in both
- directions separately. */
- i = 1;
up_idx = idx;
- down_idx = idx;
do
{
- up_idx = (up_idx + i) % ht->size;
+ up_idx = (up_idx + 1) & mask;
if (ht->items[up_idx].value == _HURD_IHASH_EMPTY
|| ht->items[up_idx].key == key)
return up_idx;
-
- if (down_idx < i)
- down_idx += ht->size;
- down_idx = (down_idx - i) % ht->size;
- if (ht->items[down_idx].value == _HURD_IHASH_EMPTY
- || ht->items[down_idx].key == key)
- return down_idx;
-
- /* After (ht->size - 1) / 2 iterations, this will be 0. */
- i = (i + 2) % ht->size;
}
- while (i);
+ while (up_idx != idx);
/* If we end up here, the item could not be found. Return any
invalid index. */
@@ -277,52 +212,25 @@ add_one (hurd_ihash_t ht, hurd_ihash_key_t key, hurd_ihash_value_t value)
unsigned int idx;
unsigned int first_free;
- idx = murmur3_mix32 (key, 32) % ht->size;
+ idx = murmur3_mix32 (key, __builtin_ctzl (ht->size));
first_free = idx;
if (ht->items[idx].value != _HURD_IHASH_EMPTY && ht->items[idx].key != key)
{
- /* Instead of calculating idx + 1, idx + 4, idx + 9, ..., idx +
- i^2, we add 1, 3, 5, 7, ... 2 * i - 1 to the previous index.
- We do this in both directions separately. */
- unsigned int i = 1;
+ unsigned int mask = ht->size - 1;
unsigned int up_idx = idx;
- unsigned int down_idx = idx;
-
+
do
{
- up_idx = (up_idx + i) % ht->size;
+ up_idx = (up_idx + 1) & mask;
if (ht->items[up_idx].value == _HURD_IHASH_EMPTY
|| ht->items[up_idx].key == key)
{
idx = up_idx;
break;
}
- if (first_free == idx
- && ht->items[up_idx].value == _HURD_IHASH_DELETED)
- first_free = up_idx;
-
- if (down_idx < i)
- down_idx += ht->size;
- down_idx = (down_idx - i) % ht->size;
- if (down_idx < 0)
- down_idx += ht->size;
- else
- down_idx %= ht->size;
- if (ht->items[down_idx].value == _HURD_IHASH_EMPTY
- || ht->items[down_idx].key == key)
- {
- idx = down_idx;
- break;
- }
- if (first_free == idx
- && ht->items[down_idx].value == _HURD_IHASH_DELETED)
- first_free = down_idx;
-
- /* After (ht->size - 1) / 2 iterations, this will be 0. */
- i = (i + 2) % ht->size;
}
- while (i);
+ while (up_idx != idx);
}
/* Remove the old entry for this key if necessary. */
@@ -365,21 +273,18 @@ hurd_ihash_add (hurd_ihash_t ht, hurd_ihash_key_t key, hurd_ihash_value_t item)
if (ht->size)
{
/* Only fill the hash table up to its maximum load factor. */
- if (ht->nr_items * 100 / ht->size <= ht->max_load)
+ if ((ht->nr_items * 100) >> __builtin_ctzl (ht->size) <= ht->max_load)
if (add_one (ht, key, item))
return 0;
}
/* The hash table is too small, and we have to increase it. */
- for (i = 0; i < ihash_nsizes; i++)
- if (ihash_sizes[i] > old_ht.size)
- break;
- if (i == ihash_nsizes
- || ihash_sizes[i] > SIZE_MAX / sizeof (struct _hurd_ihash_item))
- return ENOMEM; /* Surely will be true momentarily. */
-
ht->nr_items = 0;
- ht->size = ihash_sizes[i];
+ if (ht->size == 0)
+ ht->size = HURD_IHASH_MIN_SIZE;
+ else
+ ht->size <<= 1;
+
/* calloc() will initialize all values to _HURD_IHASH_EMPTY implicitely. */
ht->items = calloc (ht->size, sizeof (struct _hurd_ihash_item));