diff options
author | Justus Winter <4winter@informatik.uni-hamburg.de> | 2014-05-08 18:33:57 +0200 |
---|---|---|
committer | Justus Winter <4winter@informatik.uni-hamburg.de> | 2014-05-13 21:36:18 +0200 |
commit | 57341e5be12f79ee1916369679bb6507a10fcac9 (patch) | |
tree | bbdbcb1e17aef48410a9ea2140a4783e34570459 /libihash/ihash.c | |
parent | 2d898893815a980f1b821fcec267eb8e7ded678e (diff) |
libihash: use linear probing and fast modulo operation
libihash uses open addressing. Previously, quadratic probing in both
directions was used to resolve collisions. Quadratic probing might
result in a less efficient use of caches.
Also, prime numbers of the form 4 * i + 3 were used as array sizes.
This was used in combination with the integer modulo operation for
hashing. It has been known for some time that libihash suffers from
collisions, so a integer hash function is now applied to the keys to
derive the index.
Use linear probing instead. Also, use powers of two for the array
sizes, so a bit mask can be used for the modulo operation.
* libihash/ihash.c (ihash_sizes, ihash_nsizes): Remove.
(find_index): Use linear probing and fast modulo operation.
(add_one): Likewise.
* libihash/ihash.h (HURD_IHASH_MIN_SIZE): New macro.
Diffstat (limited to 'libihash/ihash.c')
-rw-r--r-- | libihash/ihash.c | 125 |
1 files changed, 15 insertions, 110 deletions
diff --git a/libihash/ihash.c b/libihash/ihash.c index 71ddc26d..249f4884 100644 --- a/libihash/ihash.c +++ b/libihash/ihash.c @@ -31,55 +31,6 @@ #include <assert.h> #include "ihash.h" - - -/* The prime numbers of the form 4 * i + 3 for some i, all greater - than twice the previous one and smaller than 2^40 (for now). */ -static const uint64_t ihash_sizes[] = -{ - 3, - 7, - 19, - 43, - 103, - 211, - 431, - 863, - 1747, - 3499, - 7019, - 14051, - 28111, - 56239, - 112507, - 225023, - 450067, - 900139, - 1800311, - 3600659, - 7201351, - 14402743, - 28805519, - 57611039, - 115222091, - 230444239, - 460888499, - 921777067, - 1843554151, - UINT64_C (3687108307), - UINT64_C (7374216631), - UINT64_C (14748433279), - UINT64_C (29496866579), - UINT64_C (58993733159), - UINT64_C (117987466379), - UINT64_C (235974932759), - UINT64_C (471949865531), - UINT64_C (943899731087) -}; - -static const unsigned int ihash_nsizes = (sizeof ihash_sizes - / sizeof ihash_sizes[0]); - /* This is the integer finalizer from MurmurHash3. */ static inline uint32_t @@ -120,40 +71,24 @@ static inline int find_index (hurd_ihash_t ht, hurd_ihash_key_t key) { unsigned int idx; - unsigned int i; unsigned int up_idx; - unsigned int down_idx; + unsigned int mask = ht->size - 1; - idx = murmur3_mix32 (key, 32) % ht->size; + idx = murmur3_mix32 (key, __builtin_ctzl (ht->size)); if (ht->items[idx].value == _HURD_IHASH_EMPTY || ht->items[idx].key == key) return idx; - /* Instead of calculating idx + 1, idx + 4, idx + 9, ..., idx + i^2, - we add 1, 3, 5, 7, etc to the previous index. We do this in both - directions separately. */ - i = 1; up_idx = idx; - down_idx = idx; do { - up_idx = (up_idx + i) % ht->size; + up_idx = (up_idx + 1) & mask; if (ht->items[up_idx].value == _HURD_IHASH_EMPTY || ht->items[up_idx].key == key) return up_idx; - - if (down_idx < i) - down_idx += ht->size; - down_idx = (down_idx - i) % ht->size; - if (ht->items[down_idx].value == _HURD_IHASH_EMPTY - || ht->items[down_idx].key == key) - return down_idx; - - /* After (ht->size - 1) / 2 iterations, this will be 0. */ - i = (i + 2) % ht->size; } - while (i); + while (up_idx != idx); /* If we end up here, the item could not be found. Return any invalid index. */ @@ -277,52 +212,25 @@ add_one (hurd_ihash_t ht, hurd_ihash_key_t key, hurd_ihash_value_t value) unsigned int idx; unsigned int first_free; - idx = murmur3_mix32 (key, 32) % ht->size; + idx = murmur3_mix32 (key, __builtin_ctzl (ht->size)); first_free = idx; if (ht->items[idx].value != _HURD_IHASH_EMPTY && ht->items[idx].key != key) { - /* Instead of calculating idx + 1, idx + 4, idx + 9, ..., idx + - i^2, we add 1, 3, 5, 7, ... 2 * i - 1 to the previous index. - We do this in both directions separately. */ - unsigned int i = 1; + unsigned int mask = ht->size - 1; unsigned int up_idx = idx; - unsigned int down_idx = idx; - + do { - up_idx = (up_idx + i) % ht->size; + up_idx = (up_idx + 1) & mask; if (ht->items[up_idx].value == _HURD_IHASH_EMPTY || ht->items[up_idx].key == key) { idx = up_idx; break; } - if (first_free == idx - && ht->items[up_idx].value == _HURD_IHASH_DELETED) - first_free = up_idx; - - if (down_idx < i) - down_idx += ht->size; - down_idx = (down_idx - i) % ht->size; - if (down_idx < 0) - down_idx += ht->size; - else - down_idx %= ht->size; - if (ht->items[down_idx].value == _HURD_IHASH_EMPTY - || ht->items[down_idx].key == key) - { - idx = down_idx; - break; - } - if (first_free == idx - && ht->items[down_idx].value == _HURD_IHASH_DELETED) - first_free = down_idx; - - /* After (ht->size - 1) / 2 iterations, this will be 0. */ - i = (i + 2) % ht->size; } - while (i); + while (up_idx != idx); } /* Remove the old entry for this key if necessary. */ @@ -365,21 +273,18 @@ hurd_ihash_add (hurd_ihash_t ht, hurd_ihash_key_t key, hurd_ihash_value_t item) if (ht->size) { /* Only fill the hash table up to its maximum load factor. */ - if (ht->nr_items * 100 / ht->size <= ht->max_load) + if ((ht->nr_items * 100) >> __builtin_ctzl (ht->size) <= ht->max_load) if (add_one (ht, key, item)) return 0; } /* The hash table is too small, and we have to increase it. */ - for (i = 0; i < ihash_nsizes; i++) - if (ihash_sizes[i] > old_ht.size) - break; - if (i == ihash_nsizes - || ihash_sizes[i] > SIZE_MAX / sizeof (struct _hurd_ihash_item)) - return ENOMEM; /* Surely will be true momentarily. */ - ht->nr_items = 0; - ht->size = ihash_sizes[i]; + if (ht->size == 0) + ht->size = HURD_IHASH_MIN_SIZE; + else + ht->size <<= 1; + /* calloc() will initialize all values to _HURD_IHASH_EMPTY implicitely. */ ht->items = calloc (ht->size, sizeof (struct _hurd_ihash_item)); |