khash.h
Go to the documentation of this file.
1 /* The MIT License
2 
3  Copyright (c) 2008, 2009, 2011 by Attractive Chaos <[email protected]>
4 
5  Permission is hereby granted, free of charge, to any person obtaining
6  a copy of this software and associated documentation files (the
7  "Software"), to deal in the Software without restriction, including
8  without limitation the rights to use, copy, modify, merge, publish,
9  distribute, sublicense, and/or sell copies of the Software, and to
10  permit persons to whom the Software is furnished to do so, subject to
11  the following conditions:
12 
13  The above copyright notice and this permission notice shall be
14  included in all copies or substantial portions of the Software.
15 
16  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20  BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21  ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  SOFTWARE.
24  */
25 
26 /*
27  Example:
28 
29  #include "nvim/khash.h"
30  KHASH_MAP_INIT_INT(32, char)
31  int main() {
32  int ret, is_missing;
33  khiter_t k;
34  khash_t(32) *h = kh_init(32);
35  k = kh_put(32, h, 5, &ret);
36  kh_value(h, k) = 10;
37  k = kh_get(32, h, 10);
38  is_missing = (k == kh_end(h));
39  k = kh_get(32, h, 5);
40  kh_del(32, h, k);
41  for (k = kh_begin(h); k != kh_end(h); ++k)
42  if (kh_exist(h, k)) kh_value(h, k) = 1;
43  kh_destroy(32, h);
44  return 0;
45  }
46  */
47 
48 /*
49  2013-05-02 (0.2.8):
50 
51  * Use quadratic probing. When the capacity is power of 2, stepping function
52  i*(i+1)/2 guarantees to traverse each bucket. It is better than double
53  hashing on cache performance and is more robust than linear probing.
54 
55  In theory, double hashing should be more robust than quadratic probing.
56  However, my implementation is probably not for large hash tables, because
57  the second hash function is closely tied to the first hash function,
58  which reduce the effectiveness of double hashing.
59 
60  Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php
61 
62  2011-12-29 (0.2.7):
63 
64  * Minor code clean up; no actual effect.
65 
66  2011-09-16 (0.2.6):
67 
68  * The capacity is a power of 2. This seems to dramatically improve the
69  speed for simple keys. Thank Zilong Tan for the suggestion. Reference:
70 
71  - http://code.google.com/p/ulib/
72  - http://nothings.org/computer/judy/
73 
74  * Allow to optionally use linear probing which usually has better
75  performance for random input. Double hashing is still the default as it
76  is more robust to certain non-random input.
77 
78  * Added Wang's integer hash function (not used by default). This hash
79  function is more robust to certain non-random input.
80 
81  2011-02-14 (0.2.5):
82 
83  * Allow to declare global functions.
84 
85  2009-09-26 (0.2.4):
86 
87  * Improve portability
88 
89  2008-09-19 (0.2.3):
90 
91  * Corrected the example
92  * Improved interfaces
93 
94  2008-09-11 (0.2.2):
95 
96  * Improved speed a little in kh_put()
97 
98  2008-09-10 (0.2.1):
99 
100  * Added kh_clear()
101  * Fixed a compiling error
102 
103  2008-09-02 (0.2.0):
104 
105  * Changed to token concatenation which increases flexibility.
106 
107  2008-08-31 (0.1.2):
108 
109  * Fixed a bug in kh_get(), which has not been tested previously.
110 
111  2008-08-31 (0.1.1):
112 
113  * Added destructor
114  */
115 
116 
117 #ifndef NVIM_LIB_KHASH_H
118 #define NVIM_LIB_KHASH_H
119 
126 #define AC_VERSION_KHASH_H "0.2.8"
127 
128 #include <limits.h>
129 #include <stdint.h>
130 #include <stdlib.h>
131 #include <string.h>
132 
133 #include "nvim/func_attr.h"
134 #include "nvim/memory.h"
135 
136 // compiler specific configuration
137 
138 #if UINT_MAX == 0xffffffffu
139 typedef unsigned int khint32_t;
140 #elif ULONG_MAX == 0xffffffffu
141 typedef unsigned long khint32_t;
142 #endif
143 
144 #if ULONG_MAX == ULLONG_MAX
145 typedef unsigned long khint64_t;
146 #else
147 typedef unsigned long long khint64_t;
148 #endif
149 
150 #ifdef _MSC_VER
151 # define kh_inline __inline
152 #else
153 # define kh_inline inline
154 #endif
155 
156 typedef khint32_t khint_t;
158 
159 #define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
160 #define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
161 #define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
162 #define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(khint_t)(1ul<<((i&0xfU)<<1)))
163 #define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(khint_t)(2ul<<((i&0xfU)<<1)))
164 #define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(khint_t)(3ul<<((i&0xfU)<<1)))
165 #define __ac_set_isdel_true(flag, i) (flag[i>>4]|=(khint_t)1ul<<((i&0xfU)<<1))
166 
167 #define __ac_fsize(m) ((m) < 16? 1 : (m)>>4)
168 
169 #ifndef kroundup32
170 # define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, \
171  ++(x))
172 #endif
173 
174 #ifndef kcalloc
175 # define kcalloc(N, Z) xcalloc(N, Z)
176 #endif
177 #ifndef kmalloc
178 # define kmalloc(Z) xmalloc(Z)
179 #endif
180 #ifndef krealloc
181 # define krealloc(P, Z) xrealloc(P, Z)
182 #endif
183 #ifndef kfree
184 # define kfree(P) XFREE_CLEAR(P)
185 #endif
186 
187 #define __ac_HASH_UPPER 0.77
188 
189 #define __KHASH_TYPE(name, khkey_t, khval_t) \
190  typedef struct { \
191  khint_t n_buckets, size, n_occupied, upper_bound; \
192  khint32_t *flags; \
193  khkey_t *keys; \
194  khval_t *vals; \
195  } kh_##name##_t;
196 
197 #define __KHASH_PROTOTYPES(name, khkey_t, khval_t) \
198  extern kh_##name##_t *kh_init_##name(void); \
199  extern void kh_dealloc_##name(kh_##name##_t *h); \
200  extern void kh_destroy_##name(kh_##name##_t *h); \
201  extern void kh_clear_##name(kh_##name##_t *h); \
202  extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \
203  extern void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \
204  extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \
205  extern void kh_del_##name(kh_##name##_t *h, khint_t x);
206 
207 #define __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, \
208  __hash_equal) \
209  SCOPE kh_##name##_t *kh_init_##name(void) \
210  REAL_FATTR_UNUSED; \
211  SCOPE kh_##name##_t *kh_init_##name(void) { \
212  return (kh_##name##_t *)kcalloc(1, sizeof(kh_##name##_t)); \
213  } \
214  SCOPE void kh_dealloc_##name(kh_##name##_t *h) \
215  REAL_FATTR_UNUSED; \
216  SCOPE void kh_dealloc_##name(kh_##name##_t *h) \
217  { \
218  kfree(h->keys); \
219  kfree(h->flags); \
220  kfree(h->vals); \
221  } \
222  SCOPE void kh_destroy_##name(kh_##name##_t *h) \
223  REAL_FATTR_UNUSED; \
224  SCOPE void kh_destroy_##name(kh_##name##_t *h) \
225  { \
226  if (h) { \
227  kh_dealloc_##name(h); \
228  kfree(h); \
229  } \
230  } \
231  SCOPE void kh_clear_##name(kh_##name##_t *h) \
232  REAL_FATTR_UNUSED; \
233  SCOPE void kh_clear_##name(kh_##name##_t *h) \
234  { \
235  if (h && h->flags) { \
236  memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \
237  h->size = h->n_occupied = 0; \
238  } \
239  } \
240  SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \
241  REAL_FATTR_UNUSED; \
242  SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \
243  { \
244  if (h->n_buckets) { \
245  khint_t k, i, last, mask, step = 0; \
246  mask = h->n_buckets - 1; \
247  k = __hash_func(key); i = k & mask; \
248  last = i; \
249  while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || \
250  !__hash_equal(h->keys[i], key))) { \
251  i = (i + (++step)) & mask; \
252  if (i == last) { \
253  return h->n_buckets; \
254  } \
255  } \
256  return __ac_iseither(h->flags, i) ? h->n_buckets : i; \
257  } else { \
258  return 0; \
259  } \
260  } \
261  SCOPE void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
262  REAL_FATTR_UNUSED; \
263  SCOPE void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
264  { /* This function uses 0.25*n_buckets bytes of working space instead of */ \
265  /* [sizeof(key_t+val_t)+.25]*n_buckets. */ \
266  khint32_t *new_flags = 0; \
267  khint_t j = 1; \
268  { \
269  kroundup32(new_n_buckets); \
270  if (new_n_buckets < 4) { \
271  new_n_buckets = 4; \
272  } \
273  /* requested size is too small */ \
274  if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) { \
275  j = 0; \
276  } else { /* hash table size to be changed (shrink or expand); rehash */ \
277  new_flags = (khint32_t *)kmalloc(__ac_fsize(new_n_buckets) \
278  * sizeof(khint32_t)); \
279  memset(new_flags, 0xaa, \
280  __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
281  if (h->n_buckets < new_n_buckets) { /* expand */ \
282  khkey_t *new_keys = (khkey_t *)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
283  h->keys = new_keys; \
284  if (kh_is_map) { \
285  khval_t *new_vals = \
286  (khval_t *)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
287  h->vals = new_vals; \
288  } \
289  } /* otherwise shrink */ \
290  } \
291  } \
292  if (j) { /* rehashing is needed */ \
293  for (j = 0; j != h->n_buckets; ++j) { \
294  if (__ac_iseither(h->flags, j) == 0) { \
295  khkey_t key = h->keys[j]; \
296  khval_t val; \
297  khint_t new_mask; \
298  new_mask = new_n_buckets - 1; \
299  if (kh_is_map) { \
300  val = h->vals[j]; \
301  } \
302  __ac_set_isdel_true(h->flags, j); \
303  /* kick-out process; sort of like in Cuckoo hashing */ \
304  while (1) { \
305  khint_t k, i, step = 0; \
306  k = __hash_func(key); \
307  i = k & new_mask; \
308  while (!__ac_isempty(new_flags, i)) { \
309  i = (i + (++step)) & new_mask; \
310  } \
311  __ac_set_isempty_false(new_flags, i); \
312  /* kick out the existing element */ \
313  if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { \
314  { \
315  khkey_t tmp = h->keys[i]; \
316  h->keys[i] = key; \
317  key = tmp; \
318  } \
319  if (kh_is_map) { \
320  khval_t tmp = h->vals[i]; \
321  h->vals[i] = val; \
322  val = tmp; \
323  } \
324  /* mark it as deleted in the old hash table */ \
325  __ac_set_isdel_true(h->flags, i); \
326  } else { /* write the element and jump out of the loop */ \
327  h->keys[i] = key; \
328  if (kh_is_map) { \
329  h->vals[i] = val; \
330  } \
331  break; \
332  } \
333  } \
334  } \
335  } \
336  if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \
337  h->keys = (khkey_t *)krealloc((void *)h->keys, \
338  new_n_buckets * sizeof(khkey_t)); \
339  if (kh_is_map) { \
340  h->vals = (khval_t *)krealloc((void *)h->vals, \
341  new_n_buckets * sizeof(khval_t)); \
342  } \
343  } \
344  kfree(h->flags); /* free the working space */ \
345  h->flags = new_flags; \
346  h->n_buckets = new_n_buckets; \
347  h->n_occupied = h->size; \
348  h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
349  } \
350  } \
351  SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
352  REAL_FATTR_UNUSED; \
353  SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
354  { \
355  khint_t x; \
356  if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \
357  if (h->n_buckets > (h->size << 1)) { \
358  kh_resize_##name(h, h->n_buckets - 1); /* clear "deleted" elements */ \
359  } else { \
360  kh_resize_##name(h, h->n_buckets + 1); /* expand the hash table */ \
361  } \
362  } /* TODO: implement automatically shrinking; */ \
363  /* resize() already support shrinking */ \
364  { \
365  khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0; \
366  x = site = h->n_buckets; \
367  k = __hash_func(key); \
368  i = k & mask; \
369  if (__ac_isempty(h->flags, i)) { \
370  x = i; /* for speed up */ \
371  } else { \
372  last = i; \
373  while (!__ac_isempty(h->flags, i) \
374  && (__ac_isdel(h->flags, i) \
375  || !__hash_equal(h->keys[i], key))) { \
376  if (__ac_isdel(h->flags, i)) { \
377  site = i; \
378  } \
379  i = (i + (++step)) & mask; \
380  if (i == last) { \
381  x = site; \
382  break; \
383  } \
384  } \
385  if (x == h->n_buckets) { \
386  if (__ac_isempty(h->flags, i) && site != h->n_buckets) { \
387  x = site; \
388  } else { \
389  x = i; \
390  } \
391  } \
392  } \
393  } \
394  if (__ac_isempty(h->flags, x)) { /* not present at all */ \
395  h->keys[x] = key; \
396  __ac_set_isboth_false(h->flags, x); \
397  h->size++; \
398  h->n_occupied++; \
399  *ret = 1; \
400  } else if (__ac_isdel(h->flags, x)) { /* deleted */ \
401  h->keys[x] = key; \
402  __ac_set_isboth_false(h->flags, x); \
403  h->size++; \
404  *ret = 2; \
405  } else { \
406  *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \
407  } \
408  return x; \
409  } \
410  SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \
411  REAL_FATTR_UNUSED; \
412  SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \
413  { \
414  if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \
415  __ac_set_isdel_true(h->flags, x); \
416  --h->size; \
417  } \
418  }
419 
420 #define KHASH_DECLARE(name, khkey_t, khval_t) \
421  __KHASH_TYPE(name, khkey_t, khval_t) \
422  __KHASH_PROTOTYPES(name, khkey_t, khval_t)
423 
424 #define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
425  __KHASH_TYPE(name, khkey_t, khval_t) \
426  __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
427 
428 #define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
429  KHASH_INIT2(name, static kh_inline, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
430 
431 // --- BEGIN OF HASH FUNCTIONS ---
432 
438 #define kh_int_hash_func(key) (khint32_t)(key)
439 
442 #define kh_int_hash_equal(a, b) ((a) == (b))
443 
448 #define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11)
449 
452 #define kh_int64_hash_equal(a, b) ((a) == (b))
453 
458 static kh_inline khint_t __ac_X31_hash_string(const char *s)
459 {
460  khint_t h = (khint_t)*s;
461  if (h) {
462  for (++s; *s; ++s) { h = (h << 5) - h + (uint8_t)*s; }
463  }
464  return h;
465 }
471 #define kh_str_hash_func(key) __ac_X31_hash_string(key)
472 
475 #define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)
476 
477 static kh_inline khint_t __ac_Wang_hash(khint_t key)
478 {
479  key += ~(key << 15);
480  key ^= (key >> 10);
481  key += (key << 3);
482  key ^= (key >> 6);
483  key += ~(key << 11);
484  key ^= (key >> 16);
485  return key;
486 }
487 #define kh_int_hash_func2(k) __ac_Wang_hash((khint_t)key)
488 
489 // --- END OF HASH FUNCTIONS ---
490 
491 // Other convenient macros...
492 
497 #define khash_t(name) kh_##name##_t
498 
504 #define kh_init(name) kh_init_##name()
505 
511 #define kh_destroy(name, h) kh_destroy_##name(h)
512 
518 #define kh_dealloc(name, h) kh_dealloc_##name(h)
519 
525 #define kh_clear(name, h) kh_clear_##name(h)
526 
533 #define kh_resize(name, h, s) kh_resize_##name(h, s)
534 
546 #define kh_put(name, h, k, r) kh_put_##name(h, k, r)
547 
555 #define kh_get(name, h, k) kh_get_##name(h, k)
556 
563 #define kh_del(name, h, k) kh_del_##name(h, k)
564 
571 #define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))
572 
579 #define kh_key(h, x) ((h)->keys[x])
580 
588 #define kh_val(h, x) ((h)->vals[x])
589 
593 #define kh_value(h, x) ((h)->vals[x])
594 
600 #define kh_begin(h) (khint_t)(0)
601 
607 #define kh_end(h) ((h)->n_buckets)
608 
614 #define kh_size(h) ((h)->size)
615 
621 #define kh_n_buckets(h) ((h)->n_buckets)
622 
630 #define kh_foreach(h, kvar, vvar, code) { khint_t __i; \
631  for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \
632  if (!kh_exist(h, __i)) continue; \
633  (kvar) = kh_key(h, __i); \
634  (vvar) = kh_val(h, __i); \
635  code; \
636  } }
637 
644 #define kh_foreach_value(h, vvar, code) { khint_t __i; \
645  for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \
646  if (!kh_exist(h, __i)) continue; \
647  (vvar) = kh_val(h, __i); \
648  code; \
649  } }
650 
657 #define kh_foreach_key(h, kvar, code) \
658  { \
659  khint_t __i; \
660  for (__i = kh_begin(h); __i != kh_end(h); __i++) { \
661  if (!kh_exist(h, __i)) { \
662  continue; \
663  } \
664  (kvar) = kh_key(h, __i); \
665  code; \
666  } \
667  }
668 
669 // More convenient interfaces
670 
675 #define KHASH_SET_INIT_INT(name) \
676  KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)
677 
683 #define KHASH_MAP_INIT_INT(name, khval_t) \
684  KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
685 
690 #define KHASH_SET_INIT_INT64(name) \
691  KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
692 
698 #define KHASH_MAP_INIT_INT64(name, khval_t) \
699  KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
700 
701 typedef const char *kh_cstr_t;
706 #define KHASH_SET_INIT_STR(name) \
707  KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)
708 
714 #define KHASH_MAP_INIT_STR(name, khval_t) \
715  KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
716 
721 #define KHASH_EMPTY_TABLE(name) \
722  ((kh_##name##_t) { \
723  .n_buckets = 0, \
724  .size = 0, \
725  .n_occupied = 0, \
726  .upper_bound = 0, \
727  .flags = NULL, \
728  .keys = NULL, \
729  .vals = NULL, \
730  })
731 #endif // NVIM_LIB_KHASH_H
khint_t
khint32_t khint_t
Definition: khash.h:156
khiter_t
khint_t khiter_t
Definition: khash.h:157
s
char_u * s
Definition: eval.c:764
kh_cstr_t
const typedef char * kh_cstr_t
Definition: khash.h:701
khint64_t
unsigned long khint64_t
Definition: khash.h:145
kh_inline
#define kh_inline
Definition: khash.h:153
stdlib.h
func_attr.h
memory.h