Chameleon

Chameleon Svn Source Tree

Root/branches/cparm/i386/libsaio/uthash.h

1/*
2Copyright (c) 2003-2012, Troy D. Hanson http://uthash.sourceforge.net
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
12IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
13TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
15OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
16EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
17PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
18PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
19LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
20NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22*/
23
24#ifndef UTHASH_H
25#define UTHASH_H
26
27#include "libsaio.h"
28
29/* These macros use decltype or the earlier __typeof GNU extension.
30 As decltype is only available in newer compilers (VS2010 or gcc 4.3+
31 when compiling c++ source) this code uses whatever method is needed
32 or, for VS2008 where neither is available, uses casting workarounds. */
33#ifdef _MSC_VER /* MS compiler */
34#if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */
35#define DECLTYPE(x) (decltype(x))
36#else /* VS2008 or older (or VS2010 in C mode) */
37#define NO_DECLTYPE
38#define DECLTYPE(x)
39#endif
40#else /* GNU, Sun and other compilers */
41#define DECLTYPE(x) (__typeof(x))
42#endif
43
44#ifdef NO_DECLTYPE
45#define DECLTYPE_ASSIGN(dst,src) \
46do { \
47 char **_da_dst = (char**)(&(dst)); \
48 *_da_dst = (char*)(src); \
49} while(0)
50#else
51#define DECLTYPE_ASSIGN(dst,src) \
52do { \
53 (dst) = DECLTYPE(dst)(src); \
54} while(0)
55#endif
56
57#define UTHASH_VERSION 1.9.6
58
59#ifndef uthash_fatal
60#define uthash_fatal(msg) longjmp(uterror,-1) /* fatal error (out of memory,etc) */
61#endif
62#ifndef uthash_malloc
63#define uthash_malloc(sz) malloc(sz) /* malloc fcn */
64#endif
65#ifndef uthash_free
66#define uthash_free(ptr,sz) free(ptr) /* free fcn */
67#endif
68#ifndef uthash_noexpand_fyi
69#define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */
70#endif
71#ifndef uthash_expand_fyi
72#define uthash_expand_fyi(tbl) /* can be defined to log expands */
73#endif
74
75/* initial number of buckets */
76#define HASH_INITIAL_NUM_BUCKETS 32 /* initial number of buckets */
77#define HASH_INITIAL_NUM_BUCKETS_LOG2 5 /* lg2 of initial number of buckets */
78#define HASH_BKT_CAPACITY_THRESH 10 /* expand when bucket count reaches */
79
80/* calculate the element whose hash handle address is hhe */
81#define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho)))
82
83#define HASH_FIND(hh,head,keyptr,keylen,out) \
84do { \
85 unsigned _hf_bkt,_hf_hashv; \
86 out=NULL; \
87 if (head) { \
88 HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt); \
89 if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv)) { \
90 HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], \
91 keyptr,keylen,out); \
92 } \
93 } \
94} while (0)
95
96#ifdef HASH_BLOOM
97#define HASH_BLOOM_BITLEN (1ULL << HASH_BLOOM)
98#define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8) + ((HASH_BLOOM_BITLEN%8) ? 1:0)
99#define HASH_BLOOM_MAKE(tbl) \
100do { \
101 (tbl)->bloom_nbits = HASH_BLOOM; \
102 (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \
103 if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \
104 memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \
105 (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \
106} while (0)
107
108#define HASH_BLOOM_FREE(tbl) \
109do { \
110 uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \
111} while (0)
112
113#define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8] |= (1U << ((idx)%8)))
114#define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8] & (1U << ((idx)%8)))
115
116#define HASH_BLOOM_ADD(tbl,hashv) \
117 HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))
118
119#define HASH_BLOOM_TEST(tbl,hashv) \
120 HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))
121
122#else
123#define HASH_BLOOM_MAKE(tbl)
124#define HASH_BLOOM_FREE(tbl)
125#define HASH_BLOOM_ADD(tbl,hashv)
126#define HASH_BLOOM_TEST(tbl,hashv) (1)
127#endif
128
129#define HASH_MAKE_TABLE(hh,head) \
130do { \
131 (head)->hh.tbl = (UT_hash_table*)uthash_malloc( \
132 sizeof(UT_hash_table)); \
133 if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \
134 memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \
135 (head)->hh.tbl->tail = &((head)->hh); \
136 (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \
137 (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \
138 (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \
139 (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \
140 HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \
141 if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \
142 memset((head)->hh.tbl->buckets, 0, \
143 HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \
144 HASH_BLOOM_MAKE((head)->hh.tbl); \
145 (head)->hh.tbl->signature = HASH_SIGNATURE; \
146} while(0)
147
148#define HASH_ADD(hh,head,fieldname,keylen_in,add) \
149 HASH_ADD_KEYPTR(hh,head,&((add)->fieldname),keylen_in,add)
150
151#define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \
152do { \
153 unsigned _ha_bkt; \
154 (add)->hh.next = NULL; \
155 (add)->hh.key = (char*)keyptr; \
156 (add)->hh.keylen = (unsigned)keylen_in; \
157 if (!(head)) { \
158 head = (add); \
159 (head)->hh.prev = NULL; \
160 HASH_MAKE_TABLE(hh,head); \
161 } else { \
162 (head)->hh.tbl->tail->next = (add); \
163 (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \
164 (head)->hh.tbl->tail = &((add)->hh); \
165 } \
166 (head)->hh.tbl->num_items++; \
167 (add)->hh.tbl = (head)->hh.tbl; \
168 HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets, \
169 (add)->hh.hashv, _ha_bkt); \
170 HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh); \
171 HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv); \
172 HASH_EMIT_KEY(hh,head,keyptr,keylen_in); \
173 HASH_FSCK(hh,head); \
174} while(0)
175
176#define HASH_TO_BKT( hashv, num_bkts, bkt ) \
177do { \
178 bkt = ((hashv) & ((num_bkts) - 1)); \
179} while(0)
180
181/* delete "delptr" from the hash table.
182 * "the usual" patch-up process for the app-order doubly-linked-list.
183 * The use of _hd_hh_del below deserves special explanation.
184 * These used to be expressed using (delptr) but that led to a bug
185 * if someone used the same symbol for the head and deletee, like
186 * HASH_DELETE(hh,users,users);
187 * We want that to work, but by changing the head (users) below
188 * we were forfeiting our ability to further refer to the deletee (users)
189 * in the patch-up process. Solution: use scratch space to
190 * copy the deletee pointer, then the latter references are via that
191 * scratch pointer rather than through the repointed (users) symbol.
192 */
193#define HASH_DELETE(hh,head,delptr) \
194do { \
195 unsigned _hd_bkt; \
196 struct UT_hash_handle *_hd_hh_del; \
197 if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \
198 uthash_free((head)->hh.tbl->buckets, \
199 (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
200 HASH_BLOOM_FREE((head)->hh.tbl); \
201 uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \
202 head = NULL; \
203 } else { \
204 _hd_hh_del = &((delptr)->hh); \
205 if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \
206 (head)->hh.tbl->tail = \
207 (UT_hash_handle*)((char*)((delptr)->hh.prev) + \
208 (head)->hh.tbl->hho); \
209 } \
210 if ((delptr)->hh.prev) { \
211 ((UT_hash_handle*)((char*)((delptr)->hh.prev) + \
212 (head)->hh.tbl->hho))->next = (delptr)->hh.next; \
213 } else { \
214 DECLTYPE_ASSIGN(head,(delptr)->hh.next); \
215 } \
216 if (_hd_hh_del->next) { \
217 ((UT_hash_handle*)((char*)_hd_hh_del->next + \
218 (head)->hh.tbl->hho))->prev = \
219 _hd_hh_del->prev; \
220 } \
221 HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \
222 HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \
223 (head)->hh.tbl->num_items--; \
224 } \
225 HASH_FSCK(hh,head); \
226} while (0)
227
228
229/* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */
230#define HASH_FIND_STR(head,findstr,out) \
231 HASH_FIND(hh,head,findstr,strlen(findstr),out)
232#define HASH_ADD_STR(head,strfield,add) \
233 HASH_ADD(hh,head,strfield,strlen(add->strfield),add)
234#define HASH_FIND_INT(head,findint,out) \
235 HASH_FIND(hh,head,findint,sizeof(int),out)
236#define HASH_ADD_INT(head,intfield,add) \
237 HASH_ADD(hh,head,intfield,sizeof(int),add)
238#define HASH_FIND_PTR(head,findptr,out) \
239 HASH_FIND(hh,head,findptr,sizeof(void *),out)
240#define HASH_ADD_PTR(head,ptrfield,add) \
241 HASH_ADD(hh,head,ptrfield,sizeof(void *),add)
242#define HASH_DEL(head,delptr) \
243 HASH_DELETE(hh,head,delptr)
244
245/* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined.
246 * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined.
247 */
248#ifdef HASH_DEBUG
249#define HASH_OOPS(...) do { printf("%s\n"__VA_ARGS__); longjmp(THIS_BUF_ERROR,-1); } while (0)
250#define HASH_FSCK(hh,head) \
251do { \
252 unsigned _bkt_i; \
253 unsigned _count, _bkt_count; \
254 char *_prev; \
255 struct UT_hash_handle *_thh; \
256 if (head) { \
257 _count = 0; \
258 for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \
259 _bkt_count = 0; \
260 _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \
261 _prev = NULL; \
262 while (_thh) { \
263 if (_prev != (char*)(_thh->hh_prev)) { \
264 HASH_OOPS("invalid hh_prev %p, actual %p\n", \
265 _thh->hh_prev, _prev ); \
266 } \
267 _bkt_count++; \
268 _prev = (char*)(_thh); \
269 _thh = _thh->hh_next; \
270 } \
271 _count += _bkt_count; \
272 if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \
273 HASH_OOPS("invalid bucket count %d, actual %d\n", \
274 (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \
275 } \
276 } \
277 if (_count != (head)->hh.tbl->num_items) { \
278 HASH_OOPS("invalid hh item count %d, actual %d\n", \
279 (head)->hh.tbl->num_items, _count ); \
280 } \
281 /* traverse hh in app order; check next/prev integrity, count */ \
282 _count = 0; \
283 _prev = NULL; \
284 _thh = &(head)->hh; \
285 while (_thh) { \
286 _count++; \
287 if (_prev !=(char*)(_thh->prev)) { \
288 HASH_OOPS("invalid prev %p, actual %p\n", \
289 _thh->prev, _prev ); \
290 } \
291 _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \
292 _thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \
293 (head)->hh.tbl->hho) : NULL ); \
294 } \
295 if (_count != (head)->hh.tbl->num_items) { \
296 HASH_OOPS("invalid app item count %d, actual %d\n", \
297 (head)->hh.tbl->num_items, _count ); \
298 } \
299 } \
300} while (0)
301#else
302#define HASH_FSCK(hh,head)
303#endif
304
305/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to
306 * the descriptor to which this macro is defined for tuning the hash function.
307 * The app can #include <unistd.h> to get the prototype for write(2). */
308#ifdef HASH_EMIT_KEYS
309#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \
310do { \
311 unsigned _klen = fieldlen; \
312 write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \
313 write(HASH_EMIT_KEYS, keyptr, fieldlen); \
314} while (0)
315#else
316#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)
317#endif
318
319/* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */
320#ifdef HASH_FUNCTION
321#define HASH_FCN HASH_FUNCTION
322#else
323#define HASH_FCN HASH_JEN
324#endif
325
326/* The Bernstein hash function, used in Perl prior to v5.6 */
327#define HASH_BER(key,keylen,num_bkts,hashv,bkt) \
328do { \
329 unsigned _hb_keylen=keylen; \
330 char *_hb_key=(char*)(key); \
331 (hashv) = 0; \
332 while (_hb_keylen--) { (hashv) = ((hashv) * 33) + *_hb_key++; } \
333 bkt = (hashv) & (num_bkts-1); \
334} while (0)
335
336
337/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at
338 * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */
339#define HASH_SAX(key,keylen,num_bkts,hashv,bkt) \
340do { \
341 unsigned _sx_i; \
342 char *_hs_key=(char*)(key); \
343 hashv = 0; \
344 for(_sx_i=0; _sx_i < keylen; _sx_i++) \
345 hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \
346 bkt = hashv & (num_bkts-1); \
347} while (0)
348
349#define HASH_FNV(key,keylen,num_bkts,hashv,bkt) \
350do { \
351 unsigned _fn_i; \
352 char *_hf_key=(char*)(key); \
353 hashv = 2166136261UL; \
354 for(_fn_i=0; _fn_i < keylen; _fn_i++) \
355 hashv = (hashv * 16777619) ^ _hf_key[_fn_i]; \
356 bkt = hashv & (num_bkts-1); \
357} while(0)
358
359#define HASH_OAT(key,keylen,num_bkts,hashv,bkt) \
360do { \
361 unsigned _ho_i; \
362 char *_ho_key=(char*)(key); \
363 hashv = 0; \
364 for(_ho_i=0; _ho_i < keylen; _ho_i++) { \
365 hashv += _ho_key[_ho_i]; \
366 hashv += (hashv << 10); \
367 hashv ^= (hashv >> 6); \
368 } \
369 hashv += (hashv << 3); \
370 hashv ^= (hashv >> 11); \
371 hashv += (hashv << 15); \
372 bkt = hashv & (num_bkts-1); \
373} while(0)
374
375#define HASH_JEN_MIX(a,b,c) \
376do { \
377 a -= b; a -= c; a ^= ( c >> 13 ); \
378 b -= c; b -= a; b ^= ( a << 8 ); \
379 c -= a; c -= b; c ^= ( b >> 13 ); \
380 a -= b; a -= c; a ^= ( c >> 12 ); \
381 b -= c; b -= a; b ^= ( a << 16 ); \
382 c -= a; c -= b; c ^= ( b >> 5 ); \
383 a -= b; a -= c; a ^= ( c >> 3 ); \
384 b -= c; b -= a; b ^= ( a << 10 ); \
385 c -= a; c -= b; c ^= ( b >> 15 ); \
386} while (0)
387
388#define HASH_JEN(key,keylen,num_bkts,hashv,bkt) \
389do { \
390 unsigned _hj_i,_hj_j,_hj_k; \
391 char *_hj_key=(char*)(key); \
392 hashv = 0xfeedbeef; \
393 _hj_i = _hj_j = 0x9e3779b9; \
394 _hj_k = (unsigned)keylen; \
395 while (_hj_k >= 12) { \
396 _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \
397 + ( (unsigned)_hj_key[2] << 16 ) \
398 + ( (unsigned)_hj_key[3] << 24 ) ); \
399 _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \
400 + ( (unsigned)_hj_key[6] << 16 ) \
401 + ( (unsigned)_hj_key[7] << 24 ) ); \
402 hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \
403 + ( (unsigned)_hj_key[10] << 16 ) \
404 + ( (unsigned)_hj_key[11] << 24 ) ); \
405 \
406 HASH_JEN_MIX(_hj_i, _hj_j, hashv); \
407 \
408 _hj_key += 12; \
409 _hj_k -= 12; \
410 } \
411 hashv += keylen; \
412 switch ( _hj_k ) { \
413 case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); \
414 case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); \
415 case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); \
416 case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); \
417 case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); \
418 case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); \
419 case 5: _hj_j += _hj_key[4]; \
420 case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); \
421 case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); \
422 case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); \
423 case 1: _hj_i += _hj_key[0]; \
424 } \
425 HASH_JEN_MIX(_hj_i, _hj_j, hashv); \
426 bkt = hashv & (num_bkts-1); \
427} while(0)
428
429/* The Paul Hsieh hash function */
430#undef get16bits
431#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
432 || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
433#define get16bits(d) (*((const uint16_t *) (d)))
434#endif
435
436#if !defined (get16bits)
437#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \
438 +(uint32_t)(((const uint8_t *)(d))[0]) )
439#endif
440#define HASH_SFH(key,keylen,num_bkts,hashv,bkt) \
441do { \
442 char *_sfh_key=(char*)(key); \
443 uint32_t _sfh_tmp, _sfh_len = keylen; \
444 \
445 int _sfh_rem = _sfh_len & 3; \
446 _sfh_len >>= 2; \
447 hashv = 0xcafebabe; \
448 \
449 /* Main loop */ \
450 for (;_sfh_len > 0; _sfh_len--) { \
451 hashv += get16bits (_sfh_key); \
452 _sfh_tmp = (get16bits (_sfh_key+2) << 11) ^ hashv; \
453 hashv = (hashv << 16) ^ _sfh_tmp; \
454 _sfh_key += 2*sizeof (uint16_t); \
455 hashv += hashv >> 11; \
456 } \
457 \
458 /* Handle end cases */ \
459 switch (_sfh_rem) { \
460 case 3: hashv += get16bits (_sfh_key); \
461 hashv ^= hashv << 16; \
462 hashv ^= _sfh_key[sizeof (uint16_t)] << 18; \
463 hashv += hashv >> 11; \
464 break; \
465 case 2: hashv += get16bits (_sfh_key); \
466 hashv ^= hashv << 11; \
467 hashv += hashv >> 17; \
468 break; \
469 case 1: hashv += *_sfh_key; \
470 hashv ^= hashv << 10; \
471 hashv += hashv >> 1; \
472 } \
473 \
474 /* Force "avalanching" of final 127 bits */ \
475 hashv ^= hashv << 3; \
476 hashv += hashv >> 5; \
477 hashv ^= hashv << 4; \
478 hashv += hashv >> 17; \
479 hashv ^= hashv << 25; \
480 hashv += hashv >> 6; \
481 bkt = hashv & (num_bkts-1); \
482} while(0)
483
484#ifdef HASH_USING_NO_STRICT_ALIASING
485/* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads.
486 * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error.
487 * MurmurHash uses the faster approach only on CPU's where we know it's safe.
488 *
489 * Note the preprocessor built-in defines can be emitted using:
490 *
491 * gcc -m64 -dM -E - < /dev/null (on gcc)
492 * cc -## a.c (where a.c is a simple test file) (Sun Studio)
493 */
494#if (defined(__i386__) || defined(__x86_64__))
495#define MUR_GETBLOCK(p,i) p[i]
496#else /* non intel */
497#define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 0x3) == 0)
498#define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 0x3) == 1)
499#define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 0x3) == 2)
500#define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 0x3) == 3)
501#define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL))
502#if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__))
503#define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24))
504#define MUR_TWO_TWO(p) ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16))
505#define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >> 8))
506#else /* assume little endian non-intel */
507#define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24))
508#define MUR_TWO_TWO(p) ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16))
509#define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) << 8))
510#endif
511#define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) : \
512 (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \
513 (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) : \
514 MUR_ONE_THREE(p))))
515#endif
516#define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
517#define MUR_FMIX(_h) \
518do { \
519 _h ^= _h >> 16; \
520 _h *= 0x85ebca6b; \
521 _h ^= _h >> 13; \
522 _h *= 0xc2b2ae35l; \
523 _h ^= _h >> 16; \
524} while(0)
525
526#define HASH_MUR(key,keylen,num_bkts,hashv,bkt) \
527do { \
528 const uint8_t *_mur_data = (const uint8_t*)(key); \
529 const int _mur_nblocks = (keylen) / 4; \
530 uint32_t _mur_h1 = 0xf88D5353; \
531 uint32_t _mur_c1 = 0xcc9e2d51; \
532 uint32_t _mur_c2 = 0x1b873593; \
533 const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+_mur_nblocks*4); \
534 int _mur_i; \
535 for(_mur_i = -_mur_nblocks; _mur_i; _mur_i++) { \
536 uint32_t _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i); \
537 _mur_k1 *= _mur_c1; \
538 _mur_k1 = MUR_ROTL32(_mur_k1,15); \
539 _mur_k1 *= _mur_c2; \
540 \
541 _mur_h1 ^= _mur_k1; \
542 _mur_h1 = MUR_ROTL32(_mur_h1,13); \
543 _mur_h1 = _mur_h1*5+0xe6546b64; \
544 } \
545 const uint8_t *_mur_tail = (const uint8_t*)(_mur_data + _mur_nblocks*4); \
546 uint32_t _mur_k1=0; \
547 switch((keylen) & 3) { \
548 case 3: _mur_k1 ^= _mur_tail[2] << 16; \
549 case 2: _mur_k1 ^= _mur_tail[1] << 8; \
550 case 1: _mur_k1 ^= _mur_tail[0]; \
551 _mur_k1 *= _mur_c1; \
552 _mur_k1 = MUR_ROTL32(_mur_k1,15); \
553 _mur_k1 *= _mur_c2; \
554 _mur_h1 ^= _mur_k1; \
555 } \
556 _mur_h1 ^= (keylen); \
557 MUR_FMIX(_mur_h1); \
558 hashv = _mur_h1; \
559 bkt = hashv & (num_bkts-1); \
560} while(0)
561#endif /* HASH_USING_NO_STRICT_ALIASING */
562
563/* key comparison function; return 0 if keys equal */
564#define HASH_KEYCMP(a,b,len) memcmp(a,b,len)
565
566/* iterate over items in a known bucket to find desired item */
567#define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \
568do { \
569 if (head.hh_head) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head)); \
570 else out=NULL; \
571 while (out) { \
572 if ((out)->hh.keylen == keylen_in) { \
573 if ((HASH_KEYCMP((out)->hh.key,keyptr,keylen_in)) == 0) break; \
574 } \
575 if ((out)->hh.hh_next) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,(out)->hh.hh_next)); \
576 else out = NULL; \
577 } \
578} while(0)
579
580/* add an item to a bucket */
581#define HASH_ADD_TO_BKT(head,addhh) \
582do { \
583 head.count++; \
584 (addhh)->hh_next = head.hh_head; \
585 (addhh)->hh_prev = NULL; \
586 if (head.hh_head) { (head).hh_head->hh_prev = (addhh); } \
587 (head).hh_head=addhh; \
588 if (head.count >= ((head.expand_mult+1) * HASH_BKT_CAPACITY_THRESH) \
589 && (addhh)->tbl->noexpand != 1) { \
590 HASH_EXPAND_BUCKETS((addhh)->tbl); \
591 } \
592} while(0)
593
594/* remove an item from a given bucket */
595#define HASH_DEL_IN_BKT(hh,head,hh_del) \
596 (head).count--; \
597 if ((head).hh_head == hh_del) { \
598 (head).hh_head = hh_del->hh_next; \
599 } \
600 if (hh_del->hh_prev) { \
601 hh_del->hh_prev->hh_next = hh_del->hh_next; \
602 } \
603 if (hh_del->hh_next) { \
604 hh_del->hh_next->hh_prev = hh_del->hh_prev; \
605 }
606
607/* Bucket expansion has the effect of doubling the number of buckets
608 * and redistributing the items into the new buckets. Ideally the
609 * items will distribute more or less evenly into the new buckets
610 * (the extent to which this is true is a measure of the quality of
611 * the hash function as it applies to the key domain).
612 *
613 * With the items distributed into more buckets, the chain length
614 * (item count) in each bucket is reduced. Thus by expanding buckets
615 * the hash keeps a bound on the chain length. This bounded chain
616 * length is the essence of how a hash provides constant time lookup.
617 *
618 * The calculation of tbl->ideal_chain_maxlen below deserves some
619 * explanation. First, keep in mind that we're calculating the ideal
620 * maximum chain length based on the *new* (doubled) bucket count.
621 * In fractions this is just n/b (n=number of items,b=new num buckets).
622 * Since the ideal chain length is an integer, we want to calculate
623 * ceil(n/b). We don't depend on floating point arithmetic in this
624 * hash, so to calculate ceil(n/b) with integers we could write
625 *
626 * ceil(n/b) = (n/b) + ((n%b)?1:0)
627 *
628 * and in fact a previous version of this hash did just that.
629 * But now we have improved things a bit by recognizing that b is
630 * always a power of two. We keep its base 2 log handy (call it lb),
631 * so now we can write this with a bit shift and logical AND:
632 *
633 * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0)
634 *
635 */
636#define HASH_EXPAND_BUCKETS(tbl) \
637do { \
638 unsigned _he_bkt; \
639 unsigned _he_bkt_i; \
640 struct UT_hash_handle *_he_thh, *_he_hh_nxt; \
641 UT_hash_bucket *_he_new_buckets, *_he_newbkt; \
642 _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \
643 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \
644 if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \
645 memset(_he_new_buckets, 0, \
646 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \
647 tbl->ideal_chain_maxlen = \
648 (tbl->num_items >> (tbl->log2_num_buckets+1)) + \
649 ((tbl->num_items & ((tbl->num_buckets*2)-1)) ? 1 : 0); \
650 tbl->nonideal_items = 0; \
651 for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \
652 { \
653 _he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \
654 while (_he_thh) { \
655 _he_hh_nxt = _he_thh->hh_next; \
656 HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2, _he_bkt); \
657 _he_newbkt = &(_he_new_buckets[ _he_bkt ]); \
658 if (_he_newbkt) { \
659 if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \
660 tbl->nonideal_items++; \
661 _he_newbkt->expand_mult = _he_newbkt->count / \
662 tbl->ideal_chain_maxlen; \
663 } \
664 _he_thh->hh_prev = NULL; \
665 _he_thh->hh_next = _he_newbkt->hh_head; \
666 if (_he_newbkt->hh_head) _he_newbkt->hh_head->hh_prev = \
667 _he_thh; \
668 _he_newbkt->hh_head = _he_thh; \
669 _he_thh = _he_hh_nxt; \
670 } \
671 else { uthash_fatal( "out of memory"); } \
672 } \
673 } \
674 uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
675 tbl->num_buckets *= 2; \
676 tbl->log2_num_buckets++; \
677 tbl->buckets = _he_new_buckets; \
678 tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \
679 (tbl->ineff_expands+1) : 0; \
680 if (tbl->ineff_expands > 1) { \
681 tbl->noexpand=1; \
682 uthash_noexpand_fyi(tbl); \
683 } \
684 uthash_expand_fyi(tbl); \
685} while(0)
686
687
688/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */
689/* Note that HASH_SORT assumes the hash handle name to be hh.
690 * HASH_SRT was added to allow the hash handle name to be passed in. */
691#define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn)
692#define HASH_SRT(hh,head,cmpfcn) \
693do { \
694 unsigned _hs_i; \
695 unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \
696 struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \
697 if (head) { \
698 _hs_insize = 1; \
699 _hs_looping = 1; \
700 _hs_list = &((head)->hh); \
701 while (_hs_looping) { \
702 _hs_p = _hs_list; \
703 _hs_list = NULL; \
704 _hs_tail = NULL; \
705 _hs_nmerges = 0; \
706 while (_hs_p) { \
707 _hs_nmerges++; \
708 _hs_q = _hs_p; \
709 _hs_psize = 0; \
710 for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \
711 _hs_psize++; \
712 _hs_q = (UT_hash_handle*)((_hs_q->next) ? \
713 ((void*)((char*)(_hs_q->next) + \
714 (head)->hh.tbl->hho)) : NULL); \
715 if (! (_hs_q) ) break; \
716 } \
717 _hs_qsize = _hs_insize; \
718 while ((_hs_psize > 0) || ((_hs_qsize > 0) && _hs_q )) { \
719 if (_hs_psize == 0) { \
720 _hs_e = _hs_q; \
721 _hs_q = (UT_hash_handle*)((_hs_q->next) ? \
722 ((void*)((char*)(_hs_q->next) + \
723 (head)->hh.tbl->hho)) : NULL); \
724 _hs_qsize--; \
725 } else if ( (_hs_qsize == 0) || !(_hs_q) ) { \
726 _hs_e = _hs_p; \
727 _hs_p = (UT_hash_handle*)((_hs_p->next) ? \
728 ((void*)((char*)(_hs_p->next) + \
729 (head)->hh.tbl->hho)) : NULL); \
730 _hs_psize--; \
731 } else if (( \
732 cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \
733 DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \
734 ) <= 0) { \
735 _hs_e = _hs_p; \
736 _hs_p = (UT_hash_handle*)((_hs_p->next) ? \
737 ((void*)((char*)(_hs_p->next) + \
738 (head)->hh.tbl->hho)) : NULL); \
739 _hs_psize--; \
740 } else { \
741 _hs_e = _hs_q; \
742 _hs_q = (UT_hash_handle*)((_hs_q->next) ? \
743 ((void*)((char*)(_hs_q->next) + \
744 (head)->hh.tbl->hho)) : NULL); \
745 _hs_qsize--; \
746 } \
747 if ( _hs_tail ) { \
748 _hs_tail->next = ((_hs_e) ? \
749 ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \
750 } else { \
751 _hs_list = _hs_e; \
752 } \
753 _hs_e->prev = ((_hs_tail) ? \
754 ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \
755 _hs_tail = _hs_e; \
756 } \
757 _hs_p = _hs_q; \
758 } \
759 _hs_tail->next = NULL; \
760 if ( _hs_nmerges <= 1 ) { \
761 _hs_looping=0; \
762 (head)->hh.tbl->tail = _hs_tail; \
763 DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \
764 } \
765 _hs_insize *= 2; \
766 } \
767 HASH_FSCK(hh,head); \
768 } \
769} while (0)
770
771/* This function selects items from one hash into another hash.
772 * The end result is that the selected items have dual presence
773 * in both hashes. There is no copy of the items made; rather
774 * they are added into the new hash through a secondary hash
775 * hash handle that must be present in the structure. */
776#define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \
777do { \
778 unsigned _src_bkt, _dst_bkt; \
779 void *_last_elt=NULL, *_elt; \
780 UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \
781 ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \
782 if (src) { \
783 for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \
784 for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \
785 _src_hh; \
786 _src_hh = _src_hh->hh_next) { \
787 _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \
788 if (cond(_elt)) { \
789 _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \
790 _dst_hh->key = _src_hh->key; \
791 _dst_hh->keylen = _src_hh->keylen; \
792 _dst_hh->hashv = _src_hh->hashv; \
793 _dst_hh->prev = _last_elt; \
794 _dst_hh->next = NULL; \
795 if (_last_elt_hh) { _last_elt_hh->next = _elt; } \
796 if (!dst) { \
797 DECLTYPE_ASSIGN(dst,_elt); \
798 HASH_MAKE_TABLE(hh_dst,dst); \
799 } else { \
800 _dst_hh->tbl = (dst)->hh_dst.tbl; \
801 } \
802 HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \
803 HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \
804 (dst)->hh_dst.tbl->num_items++; \
805 _last_elt = _elt; \
806 _last_elt_hh = _dst_hh; \
807 } \
808 } \
809 } \
810 } \
811 HASH_FSCK(hh_dst,dst); \
812} while (0)
813
814#define HASH_CLEAR(hh,head) \
815do { \
816 if (head) { \
817 uthash_free((head)->hh.tbl->buckets, \
818 (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \
819 HASH_BLOOM_FREE((head)->hh.tbl); \
820 uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \
821 (head)=NULL; \
822 } \
823} while(0)
824
825#ifdef NO_DECLTYPE
826#define HASH_ITER(hh,head,el,tmp) \
827for((el)=(head), (*(char**)(&(tmp)))=(char*)((head)?(head)->hh.next:NULL); \
828 el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL))
829#else
830#define HASH_ITER(hh,head,el,tmp) \
831for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL); \
832 el; (el)=(tmp),(tmp)=DECLTYPE(el)((tmp)?(tmp)->hh.next:NULL))
833#endif
834
835/* obtain a count of items in the hash */
836#define HASH_COUNT(head) HASH_CNT(hh,head)
837#define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0)
838
839typedef struct UT_hash_bucket {
840 struct UT_hash_handle *hh_head;
841 unsigned count;
842
843 /* expand_mult is normally set to 0. In this situation, the max chain length
844 * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If
845 * the bucket's chain exceeds this length, bucket expansion is triggered).
846 * However, setting expand_mult to a non-zero value delays bucket expansion
847 * (that would be triggered by additions to this particular bucket)
848 * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH.
849 * (The multiplier is simply expand_mult+1). The whole idea of this
850 * multiplier is to reduce bucket expansions, since they are expensive, in
851 * situations where we know that a particular bucket tends to be overused.
852 * It is better to let its chain length grow to a longer yet-still-bounded
853 * value, than to do an O(n) bucket expansion too often.
854 */
855 unsigned expand_mult;
856
857} UT_hash_bucket;
858
859/* random signature used only to find hash tables in external analysis */
860#define HASH_SIGNATURE 0xa0111fe1
861#define HASH_BLOOM_SIGNATURE 0xb12220f2
862
863typedef struct UT_hash_table {
864 UT_hash_bucket *buckets;
865 unsigned num_buckets, log2_num_buckets;
866 unsigned num_items;
867 struct UT_hash_handle *tail; /* tail hh in app order, for fast append */
868 ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */
869
870 /* in an ideal situation (all buckets used equally), no bucket would have
871 * more than ceil(#items/#buckets) items. that's the ideal chain length. */
872 unsigned ideal_chain_maxlen;
873
874 /* nonideal_items is the number of items in the hash whose chain position
875 * exceeds the ideal chain maxlen. these items pay the penalty for an uneven
876 * hash distribution; reaching them in a chain traversal takes >ideal steps */
877 unsigned nonideal_items;
878
879 /* ineffective expands occur when a bucket doubling was performed, but
880 * afterward, more than half the items in the hash had nonideal chain
881 * positions. If this happens on two consecutive expansions we inhibit any
882 * further expansion, as it's not helping; this happens when the hash
883 * function isn't a good fit for the key domain. When expansion is inhibited
884 * the hash will still work, albeit no longer in constant time. */
885 unsigned ineff_expands, noexpand;
886
887 uint32_t signature; /* used only to find hash tables in external analysis */
888#ifdef HASH_BLOOM
889 uint32_t bloom_sig; /* used only to test bloom exists in external analysis */
890 uint8_t *bloom_bv;
891 char bloom_nbits;
892#endif
893
894} UT_hash_table;
895
896typedef struct UT_hash_handle {
897 struct UT_hash_table *tbl;
898 void *prev; /* prev element in app order */
899 void *next; /* next element in app order */
900 struct UT_hash_handle *hh_prev; /* previous hh in bucket order */
901 struct UT_hash_handle *hh_next; /* next hh in bucket order */
902 void *key; /* ptr to enclosing struct's key */
903 unsigned keylen; /* enclosing struct's key len */
904 unsigned hashv; /* result of hash-fcn(key) */
905} UT_hash_handle;
906
907#endif /* UTHASH_H */
908

Archive Download this file

Revision: 2057