更快bobhash, 比time33快 (memcached也使用)
http://burtleburtle.net/bob/hash/doobs.htmlBob优化它的第二版本hash, 速度提高了3倍,http://burtleburtle.net/bob/c/lookup3.c
下面我提取的一个变长key, 小端版本(intel机器)
#include <stdint.h> /* defines uint32_t etc */
#include <sys/param.h>/* attempt to define endianness */
#ifdef linux
# include <endian.h> /* attempt to define endianness */
#endif
/*
* My best guess at if you are big-endian or little-endian.This may
* need adjustment.
*/
#if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
__BYTE_ORDER == __LITTLE_ENDIAN) || \
(defined(i386) || defined(__i386__) || defined(__i486__) || \
defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL))
# define HASH_LITTLE_ENDIAN 1
# define HASH_BIG_ENDIAN 0
#elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
__BYTE_ORDER == __BIG_ENDIAN) || \
(defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
# define HASH_LITTLE_ENDIAN 0
# define HASH_BIG_ENDIAN 1
#else
# define HASH_LITTLE_ENDIAN 0
# define HASH_BIG_ENDIAN 0
#endif
#define hashsize(n) ((uint32_t)1<<(n))
#define hashmask(n) (hashsize(n)-1)
#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
#define mix(a,b,c) \
{ \
a -= c;a ^= rot(c, 4);c += b; \
b -= a;b ^= rot(a, 6);a += c; \
c -= b;c ^= rot(b, 8);b += a; \
a -= c;a ^= rot(c,16);c += b; \
b -= a;b ^= rot(a,19);a += c; \
c -= b;c ^= rot(b, 4);b += a; \
}
#define final(a,b,c) \
{ \
c ^= b; c -= rot(b,14); \
a ^= c; a -= rot(c,11); \
b ^= a; b -= rot(a,25); \
c ^= b; c -= rot(b,16); \
a ^= c; a -= rot(c,4);\
b ^= a; b -= rot(a,14); \
c ^= b; c -= rot(b,24); \
}
uint32_t bob_hash( const void *key, size_t length, uint32_t initval)
{
uint32_t a,b,c; /* internal state */
union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
/* Set up the internal state */
a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
u.ptr = key;
if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
const uint8_t*k8;
/*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
while (length > 12)
{
a += k;
b += k;
c += k;
mix(a,b,c);
length -= 12;
k += 3;
}
/*----------------------------- handle the last (probably partial) block */
/*
* "k&0xffffff" actually reads beyond the end of the string, but
* then masks off the part it's not allowed to read.Because the
* string is aligned, the masked-off tail is in the same word as the
* rest of the string.Every machine with memory protection I've seen
* does it on word boundaries, so is OK with this.But VALGRIND will
* still catch it and complain.The masking trick does make the hash
* noticably faster for short strings (like English words).
*/
#ifndef VALGRIND
switch(length)
{
case 12: c+=k; b+=k; a+=k; break;
case 11: c+=k&0xffffff; b+=k; a+=k; break;
case 10: c+=k&0xffff; b+=k; a+=k; break;
case 9 : c+=k&0xff; b+=k; a+=k; break;
case 8 : b+=k; a+=k; break;
case 7 : b+=k&0xffffff; a+=k; break;
case 6 : b+=k&0xffff; a+=k; break;
case 5 : b+=k&0xff; a+=k; break;
case 4 : a+=k; break;
case 3 : a+=k&0xffffff; break;
case 2 : a+=k&0xffff; break;
case 1 : a+=k&0xff; break;
case 0 : return c; /* zero length strings require no mixing */
}
#else /* make valgrind happy */
k8 = (const uint8_t *)k;
switch(length)
{
case 12: c+=k; b+=k; a+=k; break;
case 11: c+=((uint32_t)k8)<<16;/* fall through */
case 10: c+=((uint32_t)k8)<<8; /* fall through */
case 9 : c+=k8; /* fall through */
case 8 : b+=k; a+=k; break;
case 7 : b+=((uint32_t)k8)<<16; /* fall through */
case 6 : b+=((uint32_t)k8)<<8; /* fall through */
case 5 : b+=k8; /* fall through */
case 4 : a+=k; break;
case 3 : a+=((uint32_t)k8)<<16; /* fall through */
case 2 : a+=((uint32_t)k8)<<8; /* fall through */
case 1 : a+=k8; break;
case 0 : return c;
}
#endif /* !valgrind */
} else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
const uint8_t*k8;
/*--------------- all but last block: aligned reads and different mixing */
while (length > 12)
{
a += k + (((uint32_t)k)<<16);
b += k + (((uint32_t)k)<<16);
c += k + (((uint32_t)k)<<16);
mix(a,b,c);
length -= 12;
k += 6;
}
/*----------------------------- handle the last (probably partial) block */
k8 = (const uint8_t *)k;
switch(length)
{
case 12: c+=k+(((uint32_t)k)<<16);
b+=k+(((uint32_t)k)<<16);
a+=k+(((uint32_t)k)<<16);
break;
case 11: c+=((uint32_t)k8)<<16; /* fall through */
case 10: c+=k;
b+=k+(((uint32_t)k)<<16);
a+=k+(((uint32_t)k)<<16);
break;
case 9 : c+=k8; /* fall through */
case 8 : b+=k+(((uint32_t)k)<<16);
a+=k+(((uint32_t)k)<<16);
break;
case 7 : b+=((uint32_t)k8)<<16; /* fall through */
case 6 : b+=k;
a+=k+(((uint32_t)k)<<16);
break;
case 5 : b+=k8; /* fall through */
case 4 : a+=k+(((uint32_t)k)<<16);
break;
case 3 : a+=((uint32_t)k8)<<16; /* fall through */
case 2 : a+=k;
break;
case 1 : a+=k8;
break;
case 0 : return c; /* zero length requires no mixing */
}
} else { /* need to read the key one byte at a time */
const uint8_t *k = (const uint8_t *)key;
/*--------------- all but the last block: affect some 32 bits of (a,b,c) */
while (length > 12)
{
a += k;
a += ((uint32_t)k)<<8;
a += ((uint32_t)k)<<16;
a += ((uint32_t)k)<<24;
b += k;
b += ((uint32_t)k)<<8;
b += ((uint32_t)k)<<16;
b += ((uint32_t)k)<<24;
c += k;
c += ((uint32_t)k)<<8;
c += ((uint32_t)k)<<16;
c += ((uint32_t)k)<<24;
mix(a,b,c);
length -= 12;
k += 12;
}
/*-------------------------------- last block: affect all 32 bits of (c) */
switch(length) /* all the case statements fall through */
{
case 12: c+=((uint32_t)k)<<24;
case 11: c+=((uint32_t)k)<<16;
case 10: c+=((uint32_t)k)<<8;
case 9 : c+=k;
case 8 : b+=((uint32_t)k)<<24;
case 7 : b+=((uint32_t)k)<<16;
case 6 : b+=((uint32_t)k)<<8;
case 5 : b+=k;
case 4 : a+=((uint32_t)k)<<24;
case 3 : a+=((uint32_t)k)<<16;
case 2 : a+=((uint32_t)k)<<8;
case 1 : a+=k;
break;
case 0 : return c;
}
}
final(a,b,c);
return c;
}
测试
1000w个53字节长的key, 结果
real 0m0.790s
user 0m0.788s
sys 0m0.000s
time33是:
real 0m1.041s
user 0m1.028s
sys 0m0.004s
页:
[1]