1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
|
From b18240df917ef17a7877164e098092728c139bb6 Mon Sep 17 00:00:00 2001
From: Justus Winter <4winter@informatik.uni-hamburg.de>
Date: Thu, 8 May 2014 18:33:57 +0200
Subject: [PATCH 08/11] libihash: use linear probing and fast modulo operation
libihash uses open addressing. Previously, quadratic probing in both
directions was used to resolve collisions. Quadratic probing might
result in a less efficient use of caches.
Also, prime numbers of the form 4 * i + 3 were used as array sizes.
This was used in combination with the integer modulo operation for
hashing. It has been known for some time that libihash suffers from
collisions, so a integer hash function is now applied to the keys to
derive the index.
Use linear probing instead. Also, use powers of two for the array
sizes, so a bit mask can be used for the modulo operation.
* libihash/ihash.c (ihash_sizes, ihash_nsizes): Remove.
(find_index): Use linear probing and fast modulo operation.
(add_one): Likewise.
* libihash/ihash.h (HURD_IHASH_MIN_SIZE): New macro.
---
libihash/ihash.c | 121 ++++++-------------------------------------------------
libihash/ihash.h | 4 ++
2 files changed, 17 insertions(+), 108 deletions(-)
diff --git a/libihash/ihash.c b/libihash/ihash.c
index 1de4c35..e74a2c5 100644
--- a/libihash/ihash.c
+++ b/libihash/ihash.c
@@ -31,55 +31,6 @@
#include <assert.h>
#include "ihash.h"
-
-
-/* The prime numbers of the form 4 * i + 3 for some i, all greater
- than twice the previous one and smaller than 2^40 (for now). */
-static const uint64_t ihash_sizes[] =
-{
- 3,
- 7,
- 19,
- 43,
- 103,
- 211,
- 431,
- 863,
- 1747,
- 3499,
- 7019,
- 14051,
- 28111,
- 56239,
- 112507,
- 225023,
- 450067,
- 900139,
- 1800311,
- 3600659,
- 7201351,
- 14402743,
- 28805519,
- 57611039,
- 115222091,
- 230444239,
- 460888499,
- 921777067,
- 1843554151,
- UINT64_C (3687108307),
- UINT64_C (7374216631),
- UINT64_C (14748433279),
- UINT64_C (29496866579),
- UINT64_C (58993733159),
- UINT64_C (117987466379),
- UINT64_C (235974932759),
- UINT64_C (471949865531),
- UINT64_C (943899731087)
-};
-
-static const unsigned int ihash_nsizes = (sizeof ihash_sizes
- / sizeof ihash_sizes[0]);
-
/* Integer hashing follows Thomas Wang's paper about his 32/64-bits
mix functions :
@@ -126,40 +77,24 @@ static inline int
find_index (hurd_ihash_t ht, hurd_ihash_key_t key)
{
unsigned int idx;
- unsigned int i;
unsigned int up_idx;
- unsigned int down_idx;
+ unsigned int mask = ht->size - 1;
- idx = hash_int32 (key, 32) % ht->size;
+ idx = hash_int32 (key, 32) & mask;
if (ht->items[idx].value == _HURD_IHASH_EMPTY || ht->items[idx].key == key)
return idx;
- /* Instead of calculating idx + 1, idx + 4, idx + 9, ..., idx + i^2,
- we add 1, 3, 5, 7, etc to the previous index. We do this in both
- directions separately. */
- i = 1;
up_idx = idx;
- down_idx = idx;
do
{
- up_idx = (up_idx + i) % ht->size;
+ up_idx = (up_idx + 1) & mask;
if (ht->items[up_idx].value == _HURD_IHASH_EMPTY
|| ht->items[up_idx].key == key)
return up_idx;
-
- if (down_idx < i)
- down_idx += ht->size;
- down_idx = (down_idx - i) % ht->size;
- if (ht->items[down_idx].value == _HURD_IHASH_EMPTY
- || ht->items[down_idx].key == key)
- return down_idx;
-
- /* After (ht->size - 1) / 2 iterations, this will be 0. */
- i = (i + 2) % ht->size;
}
- while (i);
+ while (up_idx != idx);
/* If we end up here, the item could not be found. Return any
invalid index. */
@@ -282,53 +217,26 @@ add_one (hurd_ihash_t ht, hurd_ihash_key_t key, hurd_ihash_value_t value)
{
unsigned int idx;
unsigned int first_free;
+ unsigned int mask = ht->size - 1;
- idx = hash_int32 (key, 32) % ht->size;
+ idx = hash_int32 (key, 32) & mask;
first_free = idx;
if (ht->items[idx].value != _HURD_IHASH_EMPTY && ht->items[idx].key != key)
{
- /* Instead of calculating idx + 1, idx + 4, idx + 9, ..., idx +
- i^2, we add 1, 3, 5, 7, ... 2 * i - 1 to the previous index.
- We do this in both directions separately. */
- unsigned int i = 1;
unsigned int up_idx = idx;
- unsigned int down_idx = idx;
do
{
- up_idx = (up_idx + i) % ht->size;
+ up_idx = (up_idx + 1) & mask;
if (ht->items[up_idx].value == _HURD_IHASH_EMPTY
|| ht->items[up_idx].key == key)
{
idx = up_idx;
break;
}
- if (first_free == idx
- && ht->items[up_idx].value == _HURD_IHASH_DELETED)
- first_free = up_idx;
-
- if (down_idx < i)
- down_idx += ht->size;
- down_idx = (down_idx - i) % ht->size;
- if (down_idx < 0)
- down_idx += ht->size;
- else
- down_idx %= ht->size;
- if (ht->items[down_idx].value == _HURD_IHASH_EMPTY
- || ht->items[down_idx].key == key)
- {
- idx = down_idx;
- break;
- }
- if (first_free == idx
- && ht->items[down_idx].value == _HURD_IHASH_DELETED)
- first_free = down_idx;
-
- /* After (ht->size - 1) / 2 iterations, this will be 0. */
- i = (i + 2) % ht->size;
}
- while (i);
+ while (up_idx != idx);
}
/* Remove the old entry for this key if necessary. */
@@ -377,15 +285,12 @@ hurd_ihash_add (hurd_ihash_t ht, hurd_ihash_key_t key, hurd_ihash_value_t item)
}
/* The hash table is too small, and we have to increase it. */
- for (i = 0; i < ihash_nsizes; i++)
- if (ihash_sizes[i] > old_ht.size)
- break;
- if (i == ihash_nsizes
- || ihash_sizes[i] > SIZE_MAX / sizeof (struct _hurd_ihash_item))
- return ENOMEM; /* Surely will be true momentarily. */
-
ht->nr_items = 0;
- ht->size = ihash_sizes[i];
+ if (ht->size == 0)
+ ht->size = HURD_IHASH_MIN_SIZE;
+ else
+ ht->size <<= 1;
+
/* calloc() will initialize all values to _HURD_IHASH_EMPTY implicitely. */
ht->items = calloc (ht->size, sizeof (struct _hurd_ihash_item));
diff --git a/libihash/ihash.h b/libihash/ihash.h
index 6bdc925..057babc 100644
--- a/libihash/ihash.h
+++ b/libihash/ihash.h
@@ -93,6 +93,10 @@ typedef struct hurd_ihash *hurd_ihash_t;
/* Construction and destruction of hash tables. */
+/* The size of the initial allocation in number of items. This must
+ be a power of two. */
+#define HURD_IHASH_MIN_SIZE 32
+
/* The default value for the maximum load factor in percent. */
#define HURD_IHASH_MAX_LOAD_DEFAULT 75
--
2.0.0.rc0
|