summaryrefslogtreecommitdiff
path: root/debian/patches/0006-libihash-do-not-use-an-integer-hash-function-by-defa.patch
diff options
context:
space:
mode:
authorJustus Winter <4winter@informatik.uni-hamburg.de>2014-05-26 14:01:13 +0200
committerJustus Winter <4winter@informatik.uni-hamburg.de>2014-05-26 14:01:13 +0200
commit014461fef384771fa359cab675c25d197a3d8e68 (patch)
treed0d8d1b5fd4fdfa7540bbc5d36a73a23370ad408 /debian/patches/0006-libihash-do-not-use-an-integer-hash-function-by-defa.patch
parent2b512be2e9b7421d9b2c95a8cf1b87e0e2eee43e (diff)
add patch series
Diffstat (limited to 'debian/patches/0006-libihash-do-not-use-an-integer-hash-function-by-defa.patch')
-rw-r--r--debian/patches/0006-libihash-do-not-use-an-integer-hash-function-by-defa.patch106
1 files changed, 106 insertions, 0 deletions
diff --git a/debian/patches/0006-libihash-do-not-use-an-integer-hash-function-by-defa.patch b/debian/patches/0006-libihash-do-not-use-an-integer-hash-function-by-defa.patch
new file mode 100644
index 00000000..fa5702d2
--- /dev/null
+++ b/debian/patches/0006-libihash-do-not-use-an-integer-hash-function-by-defa.patch
@@ -0,0 +1,106 @@
+From 5b039a12bf5cfc9c65b8e169ed4503e306f971f3 Mon Sep 17 00:00:00 2001
+From: Justus Winter <4winter@informatik.uni-hamburg.de>
+Date: Mon, 26 May 2014 12:18:08 +0200
+Subject: [PATCH 06/20] libihash: do not use an integer hash function by
+ default
+
+Recently libihash was changed to use an integer hash function on the
+keys in an attempt to reduce the rate of collisions (2d898893), which
+has long been assumed to be high.
+
+Richard Braun was kind enough to run some benchmarks. He observed:
+
+"1/ Using an extremely simple microbenchmark [1] that merely inserts
+keys, either random integers or sequential ones to match the way port
+names are managed, it seems that the previous code, despite its
+apparent flaws, did quite well.
+
+[1] http://darnassus.sceen.net/gitweb/rbraun/ihtest.git
+
+Using an integer hashing function actually reduces performance on the
+sequential integer test case. It makes sense because, considering a
+set of consecutive integers starting from 0, and a hash table that
+always has more slots than items, a modulo is a perfect hash
+function. Even when taking into account that only names for receive
+rights are normally managed with libihash, i.e. that keys aren't
+actually sequential, they are almost all equally distributed, leading
+to very few collisions.
+
+Therefore, as a third option, I've removed the hashing function,
+leaving only a fast modulo (an AND) and this variant provided the best
+raw results.
+
+2/ I've also built hurd packages multiple times and got average build
+times with each variant (previous, new, new without hash function) and
+here are the results I obtained respectively : 52m59s, 52m31s, 52m22s."
+
+Do not use the integer hash function on the keys by default.
+
+* libihash/ihash.c (murmur3_mix32): Remove now unused function.
+(find_index): Use the fast division method to derive the index.
+(add_one): Likewise. Also, update the comment to reflect the current
+hashing method.
+---
+ libihash/ihash.c | 22 ++++------------------
+ 1 file changed, 4 insertions(+), 18 deletions(-)
+
+diff --git a/libihash/ihash.c b/libihash/ihash.c
+index 4d9cc18..fa29257 100644
+--- a/libihash/ihash.c
++++ b/libihash/ihash.c
+@@ -32,19 +32,6 @@
+
+ #include "ihash.h"
+
+-/* This is the integer finalizer from MurmurHash3. */
+-static inline uint32_t
+-murmur3_mix32 (uint32_t h, unsigned int bits)
+-{
+- h ^= h >> 16;
+- h *= 0x85ebca6b;
+- h ^= h >> 13;
+- h *= 0xc2b2ae35;
+- h ^= h >> 16;
+-
+- return h >> (32 - bits);
+-}
+-
+ /* Return 1 if the slot with the index IDX in the hash table HT is
+ empty, and 0 otherwise. */
+ static inline int
+@@ -74,7 +61,7 @@ find_index (hurd_ihash_t ht, hurd_ihash_key_t key)
+ unsigned int up_idx;
+ unsigned int mask = ht->size - 1;
+
+- idx = murmur3_mix32 (key, __builtin_ctzl (ht->size));
++ idx = key & mask;
+
+ if (ht->items[idx].value == _HURD_IHASH_EMPTY || ht->items[idx].key == key)
+ return idx;
+@@ -205,20 +192,19 @@ hurd_ihash_set_max_load (hurd_ihash_t ht, unsigned int max_load)
+ found. The arguments are identical to hurd_ihash_add.
+
+ We are using open address hashing. As the hash function we use the
+- division method with quadratic probe. This is guaranteed to try
+- all slots in the hash table if the prime number is 3 mod 4. */
++ division method with linear probe. */
+ static inline int
+ add_one (hurd_ihash_t ht, hurd_ihash_key_t key, hurd_ihash_value_t value)
+ {
+ unsigned int idx;
+ unsigned int first_free;
++ unsigned int mask = ht->size - 1;
+
+- idx = murmur3_mix32 (key, __builtin_ctzl (ht->size));
++ idx = key & mask;
+ first_free = idx;
+
+ if (ht->items[idx].value != _HURD_IHASH_EMPTY && ht->items[idx].key != key)
+ {
+- unsigned int mask = ht->size - 1;
+ unsigned int up_idx = idx;
+
+ do
+--
+2.0.0.rc2
+