Chameleon

Chameleon Svn Source Tree

Root/branches/cparm/i386/libsaio/uthash.h

1/*
2Copyright (c) 2003-2012, Troy D. Hanson http://uthash.sourceforge.net
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
12IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
13TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
15OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
16EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
17PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
18PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
19LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
20NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22*/
23
24#ifndef UTHASH_H
25#define UTHASH_H
26
27#include "libsaio.h"
28
29/* These macros use decltype or the earlier __typeof GNU extension.
30 As decltype is only available in newer compilers (VS2010 or gcc 4.3+
31 when compiling c++ source) this code uses whatever method is needed
32 or, for VS2008 where neither is available, uses casting workarounds. */
33#ifdef _MSC_VER /* MS compiler */
34#if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */
35#define DECLTYPE(x) (decltype(x))
36#else /* VS2008 or older (or VS2010 in C mode) */
37#define NO_DECLTYPE
38#define DECLTYPE(x)
39#endif
40#else /* GNU, Sun and other compilers */
41#define DECLTYPE(x) (__typeof(x))
42#endif
43
44#ifdef NO_DECLTYPE
45#define DECLTYPE_ASSIGN(dst,src) \
46do { \
47 char **_da_dst = (char**)(&(dst)); \
48 *_da_dst = (char*)(src); \
49} while(0)
50#else
51#define DECLTYPE_ASSIGN(dst,src) \
52do { \
53 (dst) = DECLTYPE(dst)(src); \
54} while(0)
55#endif
56
57#define UTHASH_VERSION 1.9.6
58
59#ifndef uthash_fatal
60#define uthash_fatal(msg) longjmp(uterror,-1) /* fatal error (out of memory,etc) */
61#endif
62#ifndef uthash_malloc
63#define uthash_malloc(sz) malloc(sz) /* malloc fcn */
64#endif
65#ifndef uthash_free
66#define uthash_free(ptr,sz) free(ptr) /* free fcn */
67#endif
68#ifndef uthash_noexpand_fyi
69#define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */
70#endif
71#ifndef uthash_expand_fyi
72#define uthash_expand_fyi(tbl) /* can be defined to log expands */
73#endif
74
75/* initial number of buckets */
76#define HASH_INITIAL_NUM_BUCKETS 32 /* initial number of buckets */
77#define HASH_INITIAL_NUM_BUCKETS_LOG2 5 /* lg2 of initial number of buckets */
78#define HASH_BKT_CAPACITY_THRESH 10 /* expand when bucket count reaches */
79
80/* calculate the element whose hash handle address is hhe */
81#define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho)))
82
83#define HASH_FIND(hh,head,keyptr,keylen,out) \
84do { \
85 unsigned _hf_bkt,_hf_hashv; \
86 out=NULL; \
87 if (head) { \
88 HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt); \
89 if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv)) { \
90 HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], \
91 keyptr,keylen,out); \
92 } \
93 } \
94} while (0)
95
96#ifdef HASH_BLOOM
97#define HASH_BLOOM_BITLEN (1ULL << HASH_BLOOM)
98#define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8) + ((HASH_BLOOM_BITLEN%8) ? 1:0)
99#define HASH_BLOOM_MAKE(tbl) \
100do { \
101 (tbl)->bloom_nbits = HASH_BLOOM; \
102 (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \
103 if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \
104 memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \
105 (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \
106} while (0)
107
108#define HASH_BLOOM_FREE(tbl) \
109do { \
110 uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \
111} while (0)
112
113#define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8] |= (1U << ((idx)%8)))
114#define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8] & (1U << ((idx)%8)))
115
116#define HASH_BLOOM_ADD(tbl,hashv) \
117 HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))
118
119#define HASH_BLOOM_TEST(tbl,hashv) \
120 HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))
121
122#else
123#define HASH_BLOOM_MAKE(tbl)
124#define HASH_BLOOM_FREE(tbl)
125#define HASH_BLOOM_ADD(tbl,hashv)
126#define HASH_BLOOM_TEST(tbl,hashv) (1)
127#endif
128
129#define HASH_MAKE_TABLE(hh,head) \
130do { \
131 (head)->hh.tbl = (UT_hash_table*)uthash_malloc( \
132 sizeof(UT_hash_table)); \
133 if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \
134 memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \
135 (head)->hh.tbl->tail = &((head)->hh); \
136 (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \
137 (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \
138 (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \
139 (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \
140 HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \
141 if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \
142 memset((head)->hh.tbl->buckets, 0, \
143 HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \
144 HASH_BLOOM_MAKE((head)->hh.tbl); \
145 (head)->hh.tbl->signature = HASH_SIGNATURE; \
146} while(0)
147
148#define HASH_ADD(hh,head,fieldname,keylen_in,add) \
149 HASH_ADD_KEYPTR(hh,head,&((add)->fieldname),keylen_in,add)
150
151#define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \
152do { \
153 unsigned _ha_bkt; \
154 (add)->hh.next = NULL; \
155 (add)->hh.key = (char*)keyptr; \
156 (add)->hh.keylen = (unsigned)keylen_in; \
157 if (!(head)) { \
158 head = (add); \
159 (head)->hh.prev = NULL; \
160 HASH_MAKE_TABLE(hh,head); \
161 } else { \
162 (head)->hh.tbl->tail->next = (add); \
163 (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \
164 (head)->hh.tbl->tail = &((add)->hh); \
165 } \
166 (head)->hh.tbl->num_items++; \
167 (add)->hh.tbl = (head)->hh.tbl; \
168 HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets, \
169 (add)->hh.hashv, _ha_bkt); \
170 HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh); \
171 HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv); \
172 HASH_EMIT_KEY(hh,head,keyptr,keylen_in); \
173 HASH_FSCK(hh,head); \
174} while(0)
175
176#define HASH_TO_BKT( hashv, num_bkts, bkt ) \
177do { \
178 bkt = ((hashv) & ((num_bkts) - 1)); \
179} while(0)
180
181/* delete "delptr" from the hash table.
182 * "the usual" patch-up process for the app-order doubly-linked-list.
183 * The use of _hd_hh_del below deserves special explanation.
184 * These used to be expressed using (delptr) but that led to a bug
185 * if someone used the same symbol for the head and deletee, like
186 * HASH_DELETE(hh,users,users);
187 * We want that to work, but by changing the head (users) below
188 * we were forfeiting our ability to further refer to the deletee (users)
189 * in the patch-up process. Solution: use scratch space to
190 * copy the deletee pointer, then the latter references are via that
191 * scratch pointer rather than through the repointed (users) symbol.
192 */
193#define HASH_DELETE(hh,head,delptr) \
194do { \
195 unsigned _hd_bkt; \
196 struct UT_hash_handle *_hd_hh_del; \
197 if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \
198 uthash_free((head)->hh.tbl->buckets, \
199 (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
200 HASH_BLOOM_FREE((head)->hh.tbl); \
201 uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \
202 head = NULL; \
203 } else { \
204 _hd_hh_del = &((delptr)->hh); \
205 if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \
206 (head)->hh.tbl->tail = \
207 (UT_hash_handle*)((char*)((delptr)->hh.prev) + \
208 (head)->hh.tbl->hho); \
209 } \
210 if ((delptr)->hh.prev) { \
211 ((UT_hash_handle*)((char*)((delptr)->hh.prev) + \
212 (head)->hh.tbl->hho))->next = (delptr)->hh.next; \
213 } else { \
214 DECLTYPE_ASSIGN(head,(delptr)->hh.next); \
215 } \
216 if (_hd_hh_del->next) { \
217 ((UT_hash_handle*)((char*)_hd_hh_del->next + \
218 (head)->hh.tbl->hho))->prev = \
219 _hd_hh_del->prev; \
220 } \
221 HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \
222 HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \
223 (head)->hh.tbl->num_items--; \
224 } \
225 HASH_FSCK(hh,head); \
226} while (0)
227
228
229/* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */
230#define HASH_FIND_STR(head,findstr,out) \
231 HASH_FIND(hh,head,findstr,strlen(findstr),out)
232#define HASH_ADD_STR(head,strfield,add) \
233 HASH_ADD(hh,head,strfield,strlen(add->strfield),add)
234#define HASH_FIND_INT(head,findint,out) \
235 HASH_FIND(hh,head,findint,sizeof(int),out)
236#define HASH_ADD_INT(head,intfield,add) \
237 HASH_ADD(hh,head,intfield,sizeof(int),add)
238#define HASH_FIND_PTR(head,findptr,out) \
239 HASH_FIND(hh,head,findptr,sizeof(void *),out)
240#define HASH_ADD_PTR(head,ptrfield,add) \
241 HASH_ADD(hh,head,ptrfield,sizeof(void *),add)
242#define HASH_DEL(head,delptr) \
243 HASH_DELETE(hh,head,delptr)
244
245/* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined.
246 * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined.
247 */
248#ifdef HASH_DEBUG
249#define HASH_OOPS(...) do { printf("%s\n"__VA_ARGS__); longjmp(THIS_BUF_ERROR,-1); } while (0)
250#define HASH_FSCK(hh,head) \
251do { \
252 unsigned _bkt_i; \
253 unsigned _count, _bkt_count; \
254 char *_prev; \
255 struct UT_hash_handle *_thh; \
256 if (head) { \
257 _count = 0; \
258 for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \
259 _bkt_count = 0; \
260 _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \
261 _prev = NULL; \
262 while (_thh) { \
263 if (_prev != (char*)(_thh->hh_prev)) { \
264 HASH_OOPS("invalid hh_prev %p, actual %p\n", \
265 _thh->hh_prev, _prev ); \
266 } \
267 _bkt_count++; \
268 _prev = (char*)(_thh); \
269 _thh = _thh->hh_next; \
270 } \
271 _count += _bkt_count; \
272 if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \
273 HASH_OOPS("invalid bucket count %d, actual %d\n", \
274 (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \
275 } \
276 } \
277 if (_count != (head)->hh.tbl->num_items) { \
278 HASH_OOPS("invalid hh item count %d, actual %d\n", \
279 (head)->hh.tbl->num_items, _count ); \
280 } \
281 /* traverse hh in app order; check next/prev integrity, count */ \
282 _count = 0; \
283 _prev = NULL; \
284 _thh = &(head)->hh; \
285 while (_thh) { \
286 _count++; \
287 if (_prev !=(char*)(_thh->prev)) { \
288 HASH_OOPS("invalid prev %p, actual %p\n", \
289 _thh->prev, _prev ); \
290 } \
291 _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \
292 _thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \
293 (head)->hh.tbl->hho) : NULL ); \
294 } \
295 if (_count != (head)->hh.tbl->num_items) { \
296 HASH_OOPS("invalid app item count %d, actual %d\n", \
297 (head)->hh.tbl->num_items, _count ); \
298 } \
299 } \
300} while (0)
301#else
302#define HASH_FSCK(hh,head)
303#endif
304
305/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to
306 * the descriptor to which this macro is defined for tuning the hash function.
307 * The app can #include <unistd.h> to get the prototype for write(2). */
308#ifdef HASH_EMIT_KEYS
309#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \
310do { \
311 unsigned _klen = fieldlen; \
312 write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \
313 write(HASH_EMIT_KEYS, keyptr, fieldlen); \
314} while (0)
315#else
316#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)
317#endif
318
319/* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */
320#ifdef HASH_FUNCTION
321#define HASH_FCN HASH_FUNCTION
322#else
323#define HASH_FCN HASH_JEN
324#endif
325
326/* The Bernstein hash function, used in Perl prior to v5.6 */
327#define HASH_BER(key,keylen,num_bkts,hashv,bkt) \
328do { \
329 unsigned _hb_keylen=keylen; \
330 char *_hb_key=(char*)(key); \
331 (hashv) = 0; \
332 while (_hb_keylen--) { (hashv) = ((hashv) * 33) + *_hb_key++; } \
333 bkt = (hashv) & (num_bkts-1); \
334} while (0)
335
336
337/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at
338 * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */
339#define HASH_SAX(key,keylen,num_bkts,hashv,bkt) \
340do { \
341 unsigned _sx_i; \
342 char *_hs_key=(char*)(key); \
343 hashv = 0; \
344 for(_sx_i=0; _sx_i < keylen; _sx_i++) \
345 hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \
346 bkt = hashv & (num_bkts-1); \
347} while (0)
348
349#define HASH_FNV(key,keylen,num_bkts,hashv,bkt) \
350do { \
351 unsigned _fn_i; \
352 char *_hf_key=(char*)(key); \
353 hashv = 2166136261UL; \
354 for(_fn_i=0; _fn_i < keylen; _fn_i++) \
355 hashv = (hashv * 16777619) ^ _hf_key[_fn_i]; \
356 bkt = hashv & (num_bkts-1); \
357} while(0)
358
359#define HASH_OAT(key,keylen,num_bkts,hashv,bkt) \
360do { \
361 unsigned _ho_i; \
362 char *_ho_key=(char*)(key); \
363 hashv = 0; \
364 for(_ho_i=0; _ho_i < keylen; _ho_i++) { \
365 hashv += _ho_key[_ho_i]; \
366 hashv += (hashv << 10); \
367 hashv ^= (hashv >> 6); \
368 } \
369 hashv += (hashv << 3); \
370 hashv ^= (hashv >> 11); \
371 hashv += (hashv << 15); \
372 bkt = hashv & (num_bkts-1); \
373} while(0)
374
375#define HASH_JEN_MIX(a,b,c) \
376do { \
377 a -= b; a -= c; a ^= ( c >> 13 ); \
378 b -= c; b -= a; b ^= ( a << 8 ); \
379 c -= a; c -= b; c ^= ( b >> 13 ); \
380 a -= b; a -= c; a ^= ( c >> 12 ); \
381 b -= c; b -= a; b ^= ( a << 16 ); \
382 c -= a; c -= b; c ^= ( b >> 5 ); \
383 a -= b; a -= c; a ^= ( c >> 3 ); \
384 b -= c; b -= a; b ^= ( a << 10 ); \
385 c -= a; c -= b; c ^= ( b >> 15 ); \
386} while (0)
387
388#define HASH_JEN(key,keylen,num_bkts,hashv,bkt) \
389do { \
390 unsigned _hj_i,_hj_j,_hj_k; \
391 char *_hj_key=(char*)(key); \
392 hashv = 0xfeedbeef; \
393 _hj_i = _hj_j = 0x9e3779b9; \
394 _hj_k = (unsigned)keylen; \
395 while (_hj_k >= 12) { \
396 _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \
397 + ( (unsigned)_hj_key[2] << 16 ) \
398 + ( (unsigned)_hj_key[3] << 24 ) ); \
399 _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \
400 + ( (unsigned)_hj_key[6] << 16 ) \
401 + ( (unsigned)_hj_key[7] << 24 ) ); \
402 hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \
403 + ( (unsigned)_hj_key[10] << 16 ) \
404 + ( (unsigned)_hj_key[11] << 24 ) ); \
405 \
406 HASH_JEN_MIX(_hj_i, _hj_j, hashv); \
407 \
408 _hj_key += 12; \
409 _hj_k -= 12; \
410 } \
411 hashv += keylen; \
412 switch ( _hj_k ) { \
413 case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); \
414 case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); \
415 case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); \
416 case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); \
417 case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); \
418 case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); \
419 case 5: _hj_j += _hj_key[4]; \
420 case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); \
421 case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); \
422 case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); \
423 case 1: _hj_i += _hj_key[0]; \
424 } \
425 HASH_JEN_MIX(_hj_i, _hj_j, hashv); \
426 bkt = hashv & (num_bkts-1); \
427} while(0)
428
429/* The Paul Hsieh hash function */
430#undef get16bits
431#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
432 || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
433#define get16bits(d) (*((const uint16_t *) (d)))
434#endif
435
436#if !defined (get16bits)
437#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \
438 +(uint32_t)(((const uint8_t *)(d))[0]) )
439#endif
440#define HASH_SFH(key,keylen,num_bkts,hashv,bkt) \
441do { \
442 char *_sfh_key=(char*)(key); \
443 uint32_t _sfh_tmp, _sfh_len = keylen; \
444 \
445 int _sfh_rem = _sfh_len & 3; \
446 _sfh_len >>= 2; \
447 hashv = 0xcafebabe; \
448 \
449 /* Main loop */ \
450 for (;_sfh_len > 0; _sfh_len--) { \
451 hashv += get16bits (_sfh_key); \
452 _sfh_tmp = (get16bits (_sfh_key+2) << 11) ^ hashv; \
453 hashv = (hashv << 16) ^ _sfh_tmp; \
454 _sfh_key += 2*sizeof (uint16_t); \
455 hashv += hashv >> 11; \
456 } \
457 \
458 /* Handle end cases */ \
459 switch (_sfh_rem) { \
460 case 3: hashv += get16bits (_sfh_key); \
461 hashv ^= hashv << 16; \
462 hashv ^= _sfh_key[sizeof (uint16_t)] << 18; \
463 hashv += hashv >> 11; \
464 break; \
465 case 2: hashv += get16bits (_sfh_key); \
466 hashv ^= hashv << 11; \
467 hashv += hashv >> 17; \
468 break; \
469 case 1: hashv += *_sfh_key; \
470 hashv ^= hashv << 10; \
471 hashv += hashv >> 1; \
472 } \
473 \
474 /* Force "avalanching" of final 127 bits */ \
475 hashv ^= hashv << 3; \
476 hashv += hashv >> 5; \
477 hashv ^= hashv << 4; \
478 hashv += hashv >> 17; \
479 hashv ^= hashv << 25; \
480 hashv += hashv >> 6; \
481 bkt = hashv & (num_bkts-1); \
482} while(0)
483
484#ifdef HASH_USING_NO_STRICT_ALIASING
485/* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads.
486 * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error.
487 * MurmurHash uses the faster approach only on CPU's where we know it's safe.
488 *
489 * Note the preprocessor built-in defines can be emitted using:
490 *
491 * gcc -m64 -dM -E - < /dev/null (on gcc)
492 * cc -## a.c (where a.c is a simple test file) (Sun Studio)
493 */
494#if (defined(__i386__) || defined(__x86_64__))
495#define MUR_GETBLOCK(p,i) p[i]
496#else /* non intel */
497#define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 0x3) == 0)
498#define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 0x3) == 1)
499#define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 0x3) == 2)
500#define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 0x3) == 3)
501#define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL))
502#if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__))
503#define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24))
504#define MUR_TWO_TWO(p) ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16))
505#define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >> 8))
506#else /* assume little endian non-intel */
507#define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24))
508#define MUR_TWO_TWO(p) ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16))
509#define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) << 8))
510#endif
511#define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) : \
512 (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \
513 (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) : \
514 MUR_ONE_THREE(p))))
515#endif
516#define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
517#define MUR_FMIX(_h) \
518do { \
519 _h ^= _h >> 16; \
520 _h *= 0x85ebca6b; \
521 _h ^= _h >> 13; \
522 _h *= 0xc2b2ae35l; \
523 _h ^= _h >> 16; \
524} while(0)
525
526#define HASH_MUR(key,keylen,num_bkts,hashv,bkt) \
527do { \
528 const uint8_t *_mur_data = (const uint8_t*)(key); \
529 const int _mur_nblocks = (keylen) / 4; \
530 uint32_t _mur_h1 = 0xf88D5353; \
531 uint32_t _mur_c1 = 0xcc9e2d51; \
532 uint32_t _mur_c2 = 0x1b873593; \
533 const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+_mur_nblocks*4); \
534 int _mur_i; \
535 for(_mur_i = -_mur_nblocks; _mur_i; _mur_i++) { \
536 uint32_t _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i); \
537 _mur_k1 *= _mur_c1; \
538 _mur_k1 = MUR_ROTL32(_mur_k1,15); \
539 _mur_k1 *= _mur_c2; \
540 \
541 _mur_h1 ^= _mur_k1; \
542 _mur_h1 = MUR_ROTL32(_mur_h1,13); \
543 _mur_h1 = _mur_h1*5+0xe6546b64; \
544 } \
545 const uint8_t *_mur_tail = (const uint8_t*)(_mur_data + _mur_nblocks*4); \
546 uint32_t _mur_k1=0; \
547 switch((keylen) & 3) { \
548 case 3: _mur_k1 ^= _mur_tail[2] << 16; \
549 case 2: _mur_k1 ^= _mur_tail[1] << 8; \
550 case 1: _mur_k1 ^= _mur_tail[0]; \
551 _mur_k1 *= _mur_c1; \
552 _mur_k1 = MUR_ROTL32(_mur_k1,15); \
553 _mur_k1 *= _mur_c2; \
554 _mur_h1 ^= _mur_k1; \
555 } \
556 _mur_h1 ^= (keylen); \
557 MUR_FMIX(_mur_h1); \
558 hashv = _mur_h1; \
559 bkt = hashv & (num_bkts-1); \
560} while(0)
561#endif /* HASH_USING_NO_STRICT_ALIASING */
562
563/* key comparison function; return 0 if keys equal */
564#define HASH_KEYCMP(a,b,len) memcmp(a,b,len)
565
566/* iterate over items in a known bucket to find desired item */
567#define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \
568do { \
569 if (head.hh_head) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head)); \
570 else out=NULL; \
571 while (out) { \
572 if ((out)->hh.keylen == keylen_in) { \
573 if ((HASH_KEYCMP((out)->hh.key,keyptr,keylen_in)) == 0) break; \
574 } \
575 if ((out)->hh.hh_next) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,(out)->hh.hh_next)); \
576 else out = NULL; \
577 } \
578} while(0)
579
580/* add an item to a bucket */
581#define HASH_ADD_TO_BKT(head,addhh) \
582do { \
583 head.count++; \
584 (addhh)->hh_next = head.hh_head; \
585 (addhh)->hh_prev = NULL; \
586 if (head.hh_head) { (head).hh_head->hh_prev = (addhh); } \
587 (head).hh_head=addhh; \
588 if (head.count >= ((head.expand_mult+1) * HASH_BKT_CAPACITY_THRESH) \
589 && (addhh)->tbl->noexpand != 1) { \
590 HASH_EXPAND_BUCKETS((addhh)->tbl); \
591 } \
592} while(0)
593
594/* remove an item from a given bucket */
595#define HASH_DEL_IN_BKT(hh,head,hh_del) \
596 (head).count--; \
597 if ((head).hh_head == hh_del) { \
598 (head).hh_head = hh_del->hh_next; \
599 } \
600 if (hh_del->hh_prev) { \
601 hh_del->hh_prev->hh_next = hh_del->hh_next; \
602 } \
603 if (hh_del->hh_next) { \
604 hh_del->hh_next->hh_prev = hh_del->hh_prev; \
605 }
606
607/* Bucket expansion has the effect of doubling the number of buckets
608 * and redistributing the items into the new buckets. Ideally the
609 * items will distribute more or less evenly into the new buckets
610 * (the extent to which this is true is a measure of the quality of
611 * the hash function as it applies to the key domain).
612 *
613 * With the items distributed into more buckets, the chain length
614 * (item count) in each bucket is reduced. Thus by expanding buckets
615 * the hash keeps a bound on the chain length. This bounded chain
616 * length is the essence of how a hash provides constant time lookup.
617 *
618 * The calculation of tbl->ideal_chain_maxlen below deserves some
619 * explanation. First, keep in mind that we're calculating the ideal
620 * maximum chain length based on the *new* (doubled) bucket count.
621 * In fractions this is just n/b (n=number of items,b=new num buckets).
622 * Since the ideal chain length is an integer, we want to calculate
623 * ceil(n/b). We don't depend on floating point arithmetic in this
624 * hash, so to calculate ceil(n/b) with integers we could write
625 *
626 * ceil(n/b) = (n/b) + ((n%b)?1:0)
627 *
628 * and in fact a previous version of this hash did just that.
629 * But now we have improved things a bit by recognizing that b is
630 * always a power of two. We keep its base 2 log handy (call it lb),
631 * so now we can write this with a bit shift and logical AND:
632 *
633 * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0)
634 *
635 */
636#define HASH_EXPAND_BUCKETS(tbl) \
637do { \
638 unsigned _he_bkt; \
639 unsigned _he_bkt_i; \
640 struct UT_hash_handle *_he_thh, *_he_hh_nxt; \
641 UT_hash_bucket *_he_new_buckets, *_he_newbkt; \
642 unsigned long _he_bkt_size = 2 * tbl->num_buckets \
643 * sizeof(struct UT_hash_bucket); \
644 if (!(_he_bkt_size > 0)) { uthash_fatal( "unknown error"); } \
645 _he_new_buckets = (UT_hash_bucket*)uthash_malloc(_he_bkt_size); \
646 if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \
647 memset(_he_new_buckets, 0, \
648 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \
649 tbl->ideal_chain_maxlen = \
650 (tbl->num_items >> (tbl->log2_num_buckets+1)) + \
651 ((tbl->num_items & ((tbl->num_buckets*2)-1)) ? 1 : 0); \
652 tbl->nonideal_items = 0; \
653 for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \
654 { \
655 _he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \
656 while (_he_thh) { \
657 _he_hh_nxt = _he_thh->hh_next; \
658 HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2, _he_bkt); \
659 _he_newbkt = &(_he_new_buckets[ _he_bkt ]); \
660 if (_he_newbkt) { \
661 if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \
662 tbl->nonideal_items++; \
663 _he_newbkt->expand_mult = _he_newbkt->count / \
664 tbl->ideal_chain_maxlen; \
665 } \
666 _he_thh->hh_prev = NULL; \
667 _he_thh->hh_next = _he_newbkt->hh_head; \
668 if (_he_newbkt->hh_head) _he_newbkt->hh_head->hh_prev = \
669 _he_thh; \
670 _he_newbkt->hh_head = _he_thh; \
671 _he_thh = _he_hh_nxt; \
672 } \
673 else { uthash_fatal( "out of memory"); } \
674 } \
675 } \
676 uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
677 tbl->num_buckets *= 2; \
678 tbl->log2_num_buckets++; \
679 tbl->buckets = _he_new_buckets; \
680 tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \
681 (tbl->ineff_expands+1) : 0; \
682 if (tbl->ineff_expands > 1) { \
683 tbl->noexpand=1; \
684 uthash_noexpand_fyi(tbl); \
685 } \
686 uthash_expand_fyi(tbl); \
687} while(0)
688
689
690/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */
691/* Note that HASH_SORT assumes the hash handle name to be hh.
692 * HASH_SRT was added to allow the hash handle name to be passed in. */
693#define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn)
694#define HASH_SRT(hh,head,cmpfcn) \
695do { \
696 unsigned _hs_i; \
697 unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \
698 struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \
699 if (head) { \
700 _hs_insize = 1; \
701 _hs_looping = 1; \
702 _hs_list = &((head)->hh); \
703 while (_hs_looping) { \
704 _hs_p = _hs_list; \
705 _hs_list = NULL; \
706 _hs_tail = NULL; \
707 _hs_nmerges = 0; \
708 while (_hs_p) { \
709 _hs_nmerges++; \
710 _hs_q = _hs_p; \
711 _hs_psize = 0; \
712 for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \
713 _hs_psize++; \
714 _hs_q = (UT_hash_handle*)((_hs_q->next) ? \
715 ((void*)((char*)(_hs_q->next) + \
716 (head)->hh.tbl->hho)) : NULL); \
717 if (! (_hs_q) ) break; \
718 } \
719 _hs_qsize = _hs_insize; \
720 while ((_hs_psize > 0) || ((_hs_qsize > 0) && _hs_q )) { \
721 if (_hs_psize == 0) { \
722 _hs_e = _hs_q; \
723 _hs_q = (UT_hash_handle*)((_hs_q->next) ? \
724 ((void*)((char*)(_hs_q->next) + \
725 (head)->hh.tbl->hho)) : NULL); \
726 _hs_qsize--; \
727 } else if ( (_hs_qsize == 0) || !(_hs_q) ) { \
728 _hs_e = _hs_p; \
729 _hs_p = (UT_hash_handle*)((_hs_p->next) ? \
730 ((void*)((char*)(_hs_p->next) + \
731 (head)->hh.tbl->hho)) : NULL); \
732 _hs_psize--; \
733 } else if (( \
734 cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \
735 DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \
736 ) <= 0) { \
737 _hs_e = _hs_p; \
738 _hs_p = (UT_hash_handle*)((_hs_p->next) ? \
739 ((void*)((char*)(_hs_p->next) + \
740 (head)->hh.tbl->hho)) : NULL); \
741 _hs_psize--; \
742 } else { \
743 _hs_e = _hs_q; \
744 _hs_q = (UT_hash_handle*)((_hs_q->next) ? \
745 ((void*)((char*)(_hs_q->next) + \
746 (head)->hh.tbl->hho)) : NULL); \
747 _hs_qsize--; \
748 } \
749 if ( _hs_tail ) { \
750 _hs_tail->next = ((_hs_e) ? \
751 ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \
752 } else { \
753 _hs_list = _hs_e; \
754 } \
755 _hs_e->prev = ((_hs_tail) ? \
756 ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \
757 _hs_tail = _hs_e; \
758 } \
759 _hs_p = _hs_q; \
760 } \
761 _hs_tail->next = NULL; \
762 if ( _hs_nmerges <= 1 ) { \
763 _hs_looping=0; \
764 (head)->hh.tbl->tail = _hs_tail; \
765 DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \
766 } \
767 _hs_insize *= 2; \
768 } \
769 HASH_FSCK(hh,head); \
770 } \
771} while (0)
772
773/* This function selects items from one hash into another hash.
774 * The end result is that the selected items have dual presence
775 * in both hashes. There is no copy of the items made; rather
776 * they are added into the new hash through a secondary hash
777 * hash handle that must be present in the structure. */
778#define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \
779do { \
780 unsigned _src_bkt, _dst_bkt; \
781 void *_last_elt=NULL, *_elt; \
782 UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \
783 ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \
784 if (src) { \
785 for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \
786 for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \
787 _src_hh; \
788 _src_hh = _src_hh->hh_next) { \
789 _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \
790 if (cond(_elt)) { \
791 _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \
792 _dst_hh->key = _src_hh->key; \
793 _dst_hh->keylen = _src_hh->keylen; \
794 _dst_hh->hashv = _src_hh->hashv; \
795 _dst_hh->prev = _last_elt; \
796 _dst_hh->next = NULL; \
797 if (_last_elt_hh) { _last_elt_hh->next = _elt; } \
798 if (!dst) { \
799 DECLTYPE_ASSIGN(dst,_elt); \
800 HASH_MAKE_TABLE(hh_dst,dst); \
801 } else { \
802 _dst_hh->tbl = (dst)->hh_dst.tbl; \
803 } \
804 HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \
805 HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \
806 (dst)->hh_dst.tbl->num_items++; \
807 _last_elt = _elt; \
808 _last_elt_hh = _dst_hh; \
809 } \
810 } \
811 } \
812 } \
813 HASH_FSCK(hh_dst,dst); \
814} while (0)
815
816#define HASH_CLEAR(hh,head) \
817do { \
818 if (head) { \
819 uthash_free((head)->hh.tbl->buckets, \
820 (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \
821 HASH_BLOOM_FREE((head)->hh.tbl); \
822 uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \
823 (head)=NULL; \
824 } \
825} while(0)
826
827#ifdef NO_DECLTYPE
828#define HASH_ITER(hh,head,el,tmp) \
829for((el)=(head), (*(char**)(&(tmp)))=(char*)((head)?(head)->hh.next:NULL); \
830 el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL))
831#else
832#define HASH_ITER(hh,head,el,tmp) \
833for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL); \
834 el; (el)=(tmp),(tmp)=DECLTYPE(el)((tmp)?(tmp)->hh.next:NULL))
835#endif
836
837/* obtain a count of items in the hash */
838#define HASH_COUNT(head) HASH_CNT(hh,head)
839#define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0)
840
841typedef struct UT_hash_bucket {
842 struct UT_hash_handle *hh_head;
843 unsigned count;
844
845 /* expand_mult is normally set to 0. In this situation, the max chain length
846 * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If
847 * the bucket's chain exceeds this length, bucket expansion is triggered).
848 * However, setting expand_mult to a non-zero value delays bucket expansion
849 * (that would be triggered by additions to this particular bucket)
850 * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH.
851 * (The multiplier is simply expand_mult+1). The whole idea of this
852 * multiplier is to reduce bucket expansions, since they are expensive, in
853 * situations where we know that a particular bucket tends to be overused.
854 * It is better to let its chain length grow to a longer yet-still-bounded
855 * value, than to do an O(n) bucket expansion too often.
856 */
857 unsigned expand_mult;
858
859} UT_hash_bucket;
860
861/* random signature used only to find hash tables in external analysis */
862#define HASH_SIGNATURE 0xa0111fe1
863#define HASH_BLOOM_SIGNATURE 0xb12220f2
864
865typedef struct UT_hash_table {
866 UT_hash_bucket *buckets;
867 unsigned num_buckets, log2_num_buckets;
868 unsigned num_items;
869 struct UT_hash_handle *tail; /* tail hh in app order, for fast append */
870 ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */
871
872 /* in an ideal situation (all buckets used equally), no bucket would have
873 * more than ceil(#items/#buckets) items. that's the ideal chain length. */
874 unsigned ideal_chain_maxlen;
875
876 /* nonideal_items is the number of items in the hash whose chain position
877 * exceeds the ideal chain maxlen. these items pay the penalty for an uneven
878 * hash distribution; reaching them in a chain traversal takes >ideal steps */
879 unsigned nonideal_items;
880
881 /* ineffective expands occur when a bucket doubling was performed, but
882 * afterward, more than half the items in the hash had nonideal chain
883 * positions. If this happens on two consecutive expansions we inhibit any
884 * further expansion, as it's not helping; this happens when the hash
885 * function isn't a good fit for the key domain. When expansion is inhibited
886 * the hash will still work, albeit no longer in constant time. */
887 unsigned ineff_expands, noexpand;
888
889 uint32_t signature; /* used only to find hash tables in external analysis */
890#ifdef HASH_BLOOM
891 uint32_t bloom_sig; /* used only to test bloom exists in external analysis */
892 uint8_t *bloom_bv;
893 char bloom_nbits;
894#endif
895
896} UT_hash_table;
897
898typedef struct UT_hash_handle {
899 struct UT_hash_table *tbl;
900 void *prev; /* prev element in app order */
901 void *next; /* next element in app order */
902 struct UT_hash_handle *hh_prev; /* previous hh in bucket order */
903 struct UT_hash_handle *hh_next; /* next hh in bucket order */
904 void *key; /* ptr to enclosing struct's key */
905 unsigned keylen; /* enclosing struct's key len */
906 unsigned hashv; /* result of hash-fcn(key) */
907} UT_hash_handle;
908
909#endif /* UTHASH_H */
910

Archive Download this file

Revision: 2115