khash.h
Go to the documentation of this file.
1 /* The MIT License
2 
3  Copyright (c) 2008, 2009, 2011 by Attractive Chaos <[email protected]>
4 
5  Permission is hereby granted, free of charge, to any person obtaining
6  a copy of this software and associated documentation files (the
7  "Software"), to deal in the Software without restriction, including
8  without limitation the rights to use, copy, modify, merge, publish,
9  distribute, sublicense, and/or sell copies of the Software, and to
10  permit persons to whom the Software is furnished to do so, subject to
11  the following conditions:
12 
13  The above copyright notice and this permission notice shall be
14  included in all copies or substantial portions of the Software.
15 
16  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20  BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21  ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  SOFTWARE.
24 */
25 
26 /*
27  Example:
28 
29 #include "nvim/khash.h"
30 KHASH_MAP_INIT_INT(32, char)
31 int main() {
32  int ret, is_missing;
33  khiter_t k;
34  khash_t(32) *h = kh_init(32);
35  k = kh_put(32, h, 5, &ret);
36  kh_value(h, k) = 10;
37  k = kh_get(32, h, 10);
38  is_missing = (k == kh_end(h));
39  k = kh_get(32, h, 5);
40  kh_del(32, h, k);
41  for (k = kh_begin(h); k != kh_end(h); ++k)
42  if (kh_exist(h, k)) kh_value(h, k) = 1;
43  kh_destroy(32, h);
44  return 0;
45 }
46 */
47 
48 /*
49  2013-05-02 (0.2.8):
50 
51  * Use quadratic probing. When the capacity is power of 2, stepping function
52  i*(i+1)/2 guarantees to traverse each bucket. It is better than double
53  hashing on cache performance and is more robust than linear probing.
54 
55  In theory, double hashing should be more robust than quadratic probing.
56  However, my implementation is probably not for large hash tables, because
57  the second hash function is closely tied to the first hash function,
58  which reduce the effectiveness of double hashing.
59 
60  Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php
61 
62  2011-12-29 (0.2.7):
63 
64  * Minor code clean up; no actual effect.
65 
66  2011-09-16 (0.2.6):
67 
68  * The capacity is a power of 2. This seems to dramatically improve the
69  speed for simple keys. Thank Zilong Tan for the suggestion. Reference:
70 
71  - http://code.google.com/p/ulib/
72  - http://nothings.org/computer/judy/
73 
74  * Allow to optionally use linear probing which usually has better
75  performance for random input. Double hashing is still the default as it
76  is more robust to certain non-random input.
77 
78  * Added Wang's integer hash function (not used by default). This hash
79  function is more robust to certain non-random input.
80 
81  2011-02-14 (0.2.5):
82 
83  * Allow to declare global functions.
84 
85  2009-09-26 (0.2.4):
86 
87  * Improve portability
88 
89  2008-09-19 (0.2.3):
90 
91  * Corrected the example
92  * Improved interfaces
93 
94  2008-09-11 (0.2.2):
95 
96  * Improved speed a little in kh_put()
97 
98  2008-09-10 (0.2.1):
99 
100  * Added kh_clear()
101  * Fixed a compiling error
102 
103  2008-09-02 (0.2.0):
104 
105  * Changed to token concatenation which increases flexibility.
106 
107  2008-08-31 (0.1.2):
108 
109  * Fixed a bug in kh_get(), which has not been tested previously.
110 
111  2008-08-31 (0.1.1):
112 
113  * Added destructor
114 */
115 
116 
117 #ifndef NVIM_LIB_KHASH_H
118 #define NVIM_LIB_KHASH_H
119 
126 #define AC_VERSION_KHASH_H "0.2.8"
127 
128 #include <stdlib.h>
129 #include <string.h>
130 #include <limits.h>
131 #include <stdint.h>
132 
133 #include "nvim/memory.h"
134 
135 #include "nvim/func_attr.h"
136 
137 /* compiler specific configuration */
138 
139 #if UINT_MAX == 0xffffffffu
140 typedef unsigned int khint32_t;
141 #elif ULONG_MAX == 0xffffffffu
142 typedef unsigned long khint32_t;
143 #endif
144 
145 #if ULONG_MAX == ULLONG_MAX
146 typedef unsigned long khint64_t;
147 #else
148 typedef unsigned long long khint64_t;
149 #endif
150 
151 #ifdef _MSC_VER
152 #define kh_inline __inline
153 #else
154 #define kh_inline inline
155 #endif
156 
157 typedef khint32_t khint_t;
159 
160 #define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
161 #define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
162 #define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
163 #define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(khint_t)(1ul<<((i&0xfU)<<1)))
164 #define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(khint_t)(2ul<<((i&0xfU)<<1)))
165 #define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(khint_t)(3ul<<((i&0xfU)<<1)))
166 #define __ac_set_isdel_true(flag, i) (flag[i>>4]|=(khint_t)1ul<<((i&0xfU)<<1))
167 
168 #define __ac_fsize(m) ((m) < 16? 1 : (m)>>4)
169 
170 #ifndef kroundup32
171 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
172 #endif
173 
174 #ifndef kcalloc
175 #define kcalloc(N,Z) xcalloc(N,Z)
176 #endif
177 #ifndef kmalloc
178 #define kmalloc(Z) xmalloc(Z)
179 #endif
180 #ifndef krealloc
181 #define krealloc(P,Z) xrealloc(P,Z)
182 #endif
183 #ifndef kfree
184 #define kfree(P) XFREE_CLEAR(P)
185 #endif
186 
187 #define __ac_HASH_UPPER 0.77
188 
189 #define __KHASH_TYPE(name, khkey_t, khval_t) \
190  typedef struct { \
191  khint_t n_buckets, size, n_occupied, upper_bound; \
192  khint32_t *flags; \
193  khkey_t *keys; \
194  khval_t *vals; \
195  } kh_##name##_t;
196 
197 #define __KHASH_PROTOTYPES(name, khkey_t, khval_t) \
198  extern kh_##name##_t *kh_init_##name(void); \
199  extern void kh_dealloc_##name(kh_##name##_t *h); \
200  extern void kh_destroy_##name(kh_##name##_t *h); \
201  extern void kh_clear_##name(kh_##name##_t *h); \
202  extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \
203  extern void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \
204  extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \
205  extern void kh_del_##name(kh_##name##_t *h, khint_t x);
206 
207 #define __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, \
208  __hash_equal) \
209  SCOPE kh_##name##_t *kh_init_##name(void) \
210  REAL_FATTR_UNUSED; \
211  SCOPE kh_##name##_t *kh_init_##name(void) { \
212  return (kh_##name##_t*)kcalloc(1, sizeof(kh_##name##_t)); \
213  } \
214  SCOPE void kh_dealloc_##name(kh_##name##_t *h) \
215  REAL_FATTR_UNUSED; \
216  SCOPE void kh_dealloc_##name(kh_##name##_t *h) \
217  { \
218  kfree(h->keys); \
219  kfree(h->flags); \
220  kfree(h->vals); \
221  } \
222  SCOPE void kh_destroy_##name(kh_##name##_t *h) \
223  REAL_FATTR_UNUSED; \
224  SCOPE void kh_destroy_##name(kh_##name##_t *h) \
225  { \
226  if (h) { \
227  kh_dealloc_##name(h); \
228  kfree(h); \
229  } \
230  } \
231  SCOPE void kh_clear_##name(kh_##name##_t *h) \
232  REAL_FATTR_UNUSED; \
233  SCOPE void kh_clear_##name(kh_##name##_t *h) \
234  { \
235  if (h && h->flags) { \
236  memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \
237  h->size = h->n_occupied = 0; \
238  } \
239  } \
240  SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \
241  REAL_FATTR_UNUSED; \
242  SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \
243  { \
244  if (h->n_buckets) { \
245  khint_t k, i, last, mask, step = 0; \
246  mask = h->n_buckets - 1; \
247  k = __hash_func(key); i = k & mask; \
248  last = i; \
249  while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || \
250  !__hash_equal(h->keys[i], key))) { \
251  i = (i + (++step)) & mask; \
252  if (i == last) { \
253  return h->n_buckets; \
254  } \
255  } \
256  return __ac_iseither(h->flags, i) ? h->n_buckets : i; \
257  } else { \
258  return 0; \
259  } \
260  } \
261  SCOPE void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
262  REAL_FATTR_UNUSED; \
263  SCOPE void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
264  { /* This function uses 0.25*n_buckets bytes of working space instead of */ \
265  /* [sizeof(key_t+val_t)+.25]*n_buckets. */ \
266  khint32_t *new_flags = 0; \
267  khint_t j = 1; \
268  { \
269  kroundup32(new_n_buckets); \
270  if (new_n_buckets < 4) { \
271  new_n_buckets = 4; \
272  } \
273  /* requested size is too small */ \
274  if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) { \
275  j = 0; \
276  } else { /* hash table size to be changed (shrink or expand); rehash */ \
277  new_flags = (khint32_t*)kmalloc(__ac_fsize(new_n_buckets) \
278  * sizeof(khint32_t)); \
279  memset(new_flags, 0xaa, \
280  __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
281  if (h->n_buckets < new_n_buckets) { /* expand */ \
282  khkey_t *new_keys = (khkey_t*)krealloc( \
283  (void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
284  h->keys = new_keys; \
285  if (kh_is_map) { \
286  khval_t *new_vals = (khval_t*)krealloc( \
287  (void *)h->vals, new_n_buckets * sizeof(khval_t)); \
288  h->vals = new_vals; \
289  } \
290  } /* otherwise shrink */ \
291  } \
292  } \
293  if (j) { /* rehashing is needed */ \
294  for (j = 0; j != h->n_buckets; ++j) { \
295  if (__ac_iseither(h->flags, j) == 0) { \
296  khkey_t key = h->keys[j]; \
297  khval_t val; \
298  khint_t new_mask; \
299  new_mask = new_n_buckets - 1; \
300  if (kh_is_map) { \
301  val = h->vals[j]; \
302  } \
303  __ac_set_isdel_true(h->flags, j); \
304  /* kick-out process; sort of like in Cuckoo hashing */ \
305  while (1) { \
306  khint_t k, i, step = 0; \
307  k = __hash_func(key); \
308  i = k & new_mask; \
309  while (!__ac_isempty(new_flags, i)) { \
310  i = (i + (++step)) & new_mask; \
311  } \
312  __ac_set_isempty_false(new_flags, i); \
313  /* kick out the existing element */ \
314  if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { \
315  { \
316  khkey_t tmp = h->keys[i]; \
317  h->keys[i] = key; \
318  key = tmp; \
319  } \
320  if (kh_is_map) { \
321  khval_t tmp = h->vals[i]; \
322  h->vals[i] = val; \
323  val = tmp; \
324  } \
325  /* mark it as deleted in the old hash table */ \
326  __ac_set_isdel_true(h->flags, i); \
327  } else { /* write the element and jump out of the loop */ \
328  h->keys[i] = key; \
329  if (kh_is_map) { \
330  h->vals[i] = val; \
331  } \
332  break; \
333  } \
334  } \
335  } \
336  } \
337  if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \
338  h->keys = (khkey_t*)krealloc((void *)h->keys, \
339  new_n_buckets * sizeof(khkey_t)); \
340  if (kh_is_map) { \
341  h->vals = (khval_t*)krealloc((void *)h->vals, \
342  new_n_buckets * sizeof(khval_t)); \
343  } \
344  } \
345  kfree(h->flags); /* free the working space */ \
346  h->flags = new_flags; \
347  h->n_buckets = new_n_buckets; \
348  h->n_occupied = h->size; \
349  h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
350  } \
351  } \
352  SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
353  REAL_FATTR_UNUSED; \
354  SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
355  { \
356  khint_t x; \
357  if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \
358  if (h->n_buckets > (h->size << 1)) { \
359  kh_resize_##name(h, h->n_buckets - 1); /* clear "deleted" elements */ \
360  } else { \
361  kh_resize_##name(h, h->n_buckets + 1); /* expand the hash table */ \
362  } \
363  } /* TODO: implement automatically shrinking; */ \
364  /* resize() already support shrinking */ \
365  { \
366  khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0; \
367  x = site = h->n_buckets; \
368  k = __hash_func(key); \
369  i = k & mask; \
370  if (__ac_isempty(h->flags, i)) { \
371  x = i; /* for speed up */ \
372  } else { \
373  last = i; \
374  while (!__ac_isempty(h->flags, i) \
375  && (__ac_isdel(h->flags, i) \
376  || !__hash_equal(h->keys[i], key))) { \
377  if (__ac_isdel(h->flags, i)) { \
378  site = i; \
379  } \
380  i = (i + (++step)) & mask; \
381  if (i == last) { \
382  x = site; \
383  break; \
384  } \
385  } \
386  if (x == h->n_buckets) { \
387  if (__ac_isempty(h->flags, i) && site != h->n_buckets) { \
388  x = site; \
389  } else { \
390  x = i; \
391  } \
392  } \
393  } \
394  } \
395  if (__ac_isempty(h->flags, x)) { /* not present at all */ \
396  h->keys[x] = key; \
397  __ac_set_isboth_false(h->flags, x); \
398  h->size++; \
399  h->n_occupied++; \
400  *ret = 1; \
401  } else if (__ac_isdel(h->flags, x)) { /* deleted */ \
402  h->keys[x] = key; \
403  __ac_set_isboth_false(h->flags, x); \
404  h->size++; \
405  *ret = 2; \
406  } else { \
407  *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \
408  } \
409  return x; \
410  } \
411  SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \
412  REAL_FATTR_UNUSED; \
413  SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \
414  { \
415  if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \
416  __ac_set_isdel_true(h->flags, x); \
417  --h->size; \
418  } \
419  }
420 
421 #define KHASH_DECLARE(name, khkey_t, khval_t) \
422  __KHASH_TYPE(name, khkey_t, khval_t) \
423  __KHASH_PROTOTYPES(name, khkey_t, khval_t)
424 
425 #define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
426  __KHASH_TYPE(name, khkey_t, khval_t) \
427  __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
428 
429 #define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
430  KHASH_INIT2(name, static kh_inline, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
431 
432 /* --- BEGIN OF HASH FUNCTIONS --- */
433 
439 #define kh_int_hash_func(key) (khint32_t)(key)
440 
443 #define kh_int_hash_equal(a, b) ((a) == (b))
444 
449 #define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11)
450 
453 #define kh_int64_hash_equal(a, b) ((a) == (b))
454 
459 static kh_inline khint_t __ac_X31_hash_string(const char *s)
460 {
461  khint_t h = (khint_t)*s;
462  if (h) for (++s ; *s; ++s) h = (h << 5) - h + (uint8_t)*s;
463  return h;
464 }
470 #define kh_str_hash_func(key) __ac_X31_hash_string(key)
471 
474 #define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)
475 
476 static kh_inline khint_t __ac_Wang_hash(khint_t key)
477 {
478  key += ~(key << 15);
479  key ^= (key >> 10);
480  key += (key << 3);
481  key ^= (key >> 6);
482  key += ~(key << 11);
483  key ^= (key >> 16);
484  return key;
485 }
486 #define kh_int_hash_func2(k) __ac_Wang_hash((khint_t)key)
487 
488 /* --- END OF HASH FUNCTIONS --- */
489 
490 /* Other convenient macros... */
491 
496 #define khash_t(name) kh_##name##_t
497 
503 #define kh_init(name) kh_init_##name()
504 
510 #define kh_destroy(name, h) kh_destroy_##name(h)
511 
517 #define kh_dealloc(name, h) kh_dealloc_##name(h)
518 
524 #define kh_clear(name, h) kh_clear_##name(h)
525 
532 #define kh_resize(name, h, s) kh_resize_##name(h, s)
533 
545 #define kh_put(name, h, k, r) kh_put_##name(h, k, r)
546 
554 #define kh_get(name, h, k) kh_get_##name(h, k)
555 
562 #define kh_del(name, h, k) kh_del_##name(h, k)
563 
570 #define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))
571 
578 #define kh_key(h, x) ((h)->keys[x])
579 
587 #define kh_val(h, x) ((h)->vals[x])
588 
592 #define kh_value(h, x) ((h)->vals[x])
593 
599 #define kh_begin(h) (khint_t)(0)
600 
606 #define kh_end(h) ((h)->n_buckets)
607 
613 #define kh_size(h) ((h)->size)
614 
620 #define kh_n_buckets(h) ((h)->n_buckets)
621 
629 #define kh_foreach(h, kvar, vvar, code) { khint_t __i; \
630  for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \
631  if (!kh_exist(h,__i)) continue; \
632  (kvar) = kh_key(h,__i); \
633  (vvar) = kh_val(h,__i); \
634  code; \
635  } }
636 
643 #define kh_foreach_value(h, vvar, code) { khint_t __i; \
644  for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \
645  if (!kh_exist(h,__i)) continue; \
646  (vvar) = kh_val(h,__i); \
647  code; \
648  } }
649 
656 #define kh_foreach_key(h, kvar, code) \
657  { \
658  khint_t __i; \
659  for (__i = kh_begin(h); __i != kh_end(h); __i++) { \
660  if (!kh_exist(h, __i)) { \
661  continue; \
662  } \
663  (kvar) = kh_key(h, __i); \
664  code; \
665  } \
666  }
667 
668 /* More conenient interfaces */
669 
674 #define KHASH_SET_INIT_INT(name) \
675  KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)
676 
682 #define KHASH_MAP_INIT_INT(name, khval_t) \
683  KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
684 
689 #define KHASH_SET_INIT_INT64(name) \
690  KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
691 
697 #define KHASH_MAP_INIT_INT64(name, khval_t) \
698  KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
699 
700 typedef const char *kh_cstr_t;
705 #define KHASH_SET_INIT_STR(name) \
706  KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)
707 
713 #define KHASH_MAP_INIT_STR(name, khval_t) \
714  KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
715 
720 #define KHASH_EMPTY_TABLE(name) \
721  ((kh_##name##_t) { \
722  .n_buckets = 0, \
723  .size = 0, \
724  .n_occupied = 0, \
725  .upper_bound = 0, \
726  .flags = NULL, \
727  .keys = NULL, \
728  .vals = NULL, \
729  })
730 #endif // NVIM_LIB_KHASH_H
const char * kh_cstr_t
Definition: khash.h:700
khint32_t khint_t
Definition: khash.h:157
char * s
Definition: message.c:739
khint_t khiter_t
Definition: khash.h:158
#define kh_inline
Definition: khash.h:154
unsigned long khint64_t
Definition: khash.h:146