更快bobhash, 比time33快 (memcached也使用)

熬死你的 发表于 2015-9-1 04:01:18

　　http://burtleburtle.net/bob/hash/doobs.html
　　Bob优化它的第二版本hash, 速度提高了3倍，http://burtleburtle.net/bob/c/lookup3.c
　　下面我提取的一个变长key, 小端版本(intel机器)
　　#include <stdint.h> /* defines uint32_t etc */
#include <sys/param.h>/* attempt to define endianness */
#ifdef linux
# include <endian.h> /* attempt to define endianness */
#endif
　　/*
* My best guess at if you are big-endian or little-endian.This may
* need adjustment.
*/
#if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
__BYTE_ORDER == __LITTLE_ENDIAN) || \
(defined(i386) || defined(__i386__) || defined(__i486__) || \
defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL))
# define HASH_LITTLE_ENDIAN 1
# define HASH_BIG_ENDIAN 0
#elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
   __BYTE_ORDER == __BIG_ENDIAN) || \
   (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
# define HASH_LITTLE_ENDIAN 0
# define HASH_BIG_ENDIAN 1
#else
# define HASH_LITTLE_ENDIAN 0
# define HASH_BIG_ENDIAN 0
#endif
　　
　　#define hashsize(n) ((uint32_t)1<<(n))
#define hashmask(n) (hashsize(n)-1)
#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
　　
#define mix(a,b,c) \
{ \
a -= c;a ^= rot(c, 4);c += b; \
b -= a;b ^= rot(a, 6);a += c; \
c -= b;c ^= rot(b, 8);b += a; \
a -= c;a ^= rot(c,16);c += b; \
b -= a;b ^= rot(a,19);a += c; \
c -= b;c ^= rot(b, 4);b += a; \
}
　　#define final(a,b,c) \
{ \
c ^= b; c -= rot(b,14); \
a ^= c; a -= rot(c,11); \
b ^= a; b -= rot(a,25); \
c ^= b; c -= rot(b,16); \
a ^= c; a -= rot(c,4);\
b ^= a; b -= rot(a,14); \
c ^= b; c -= rot(b,24); \
}
　　
uint32_t bob_hash( const void *key, size_t length, uint32_t initval)
{
uint32_t a,b,c;                                        /* internal state */
union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
　　/* Set up the internal state */
a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
　　u.ptr = key;
if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
const uint32_t *k = (const uint32_t *)key;       /* read 32-bit chunks */
const uint8_t*k8;
　　/*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
while (length > 12)
{
   a += k;
   b += k;
   c += k;
   mix(a,b,c);
   length -= 12;
   k += 3;
}
　　/*----------------------------- handle the last (probably partial) block */
/*
* "k&0xffffff" actually reads beyond the end of the string, but
* then masks off the part it's not allowed to read.Because the
* string is aligned, the masked-off tail is in the same word as the
* rest of the string.Every machine with memory protection I've seen
* does it on word boundaries, so is OK with this.But VALGRIND will
* still catch it and complain.The masking trick does make the hash
* noticably faster for short strings (like English words).
*/
#ifndef VALGRIND
　　switch(length)
{
case 12: c+=k; b+=k; a+=k; break;
case 11: c+=k&0xffffff; b+=k; a+=k; break;
case 10: c+=k&0xffff; b+=k; a+=k; break;
case 9 : c+=k&0xff; b+=k; a+=k; break;
case 8 : b+=k; a+=k; break;
case 7 : b+=k&0xffffff; a+=k; break;
case 6 : b+=k&0xffff; a+=k; break;
case 5 : b+=k&0xff; a+=k; break;
case 4 : a+=k; break;
case 3 : a+=k&0xffffff; break;
case 2 : a+=k&0xffff; break;
case 1 : a+=k&0xff; break;
case 0 : return c;          /* zero length strings require no mixing */
}
　　#else /* make valgrind happy */
　　k8 = (const uint8_t *)k;
switch(length)
{
case 12: c+=k; b+=k; a+=k; break;
case 11: c+=((uint32_t)k8)<<16;/* fall through */
case 10: c+=((uint32_t)k8)<<8; /* fall through */
case 9 : c+=k8;                /* fall through */
case 8 : b+=k; a+=k; break;
case 7 : b+=((uint32_t)k8)<<16; /* fall through */
case 6 : b+=((uint32_t)k8)<<8; /* fall through */
case 5 : b+=k8;                /* fall through */
case 4 : a+=k; break;
case 3 : a+=((uint32_t)k8)<<16; /* fall through */
case 2 : a+=((uint32_t)k8)<<8; /* fall through */
case 1 : a+=k8; break;
case 0 : return c;
}
　　#endif /* !valgrind */
　　} else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
const uint16_t *k = (const uint16_t *)key;       /* read 16-bit chunks */
const uint8_t*k8;
　　/*--------------- all but last block: aligned reads and different mixing */
while (length > 12)
{
   a += k + (((uint32_t)k)<<16);
   b += k + (((uint32_t)k)<<16);
   c += k + (((uint32_t)k)<<16);
   mix(a,b,c);
   length -= 12;
   k += 6;
}
　　/*----------------------------- handle the last (probably partial) block */
k8 = (const uint8_t *)k;
switch(length)
{
case 12: c+=k+(((uint32_t)k)<<16);
         b+=k+(((uint32_t)k)<<16);
         a+=k+(((uint32_t)k)<<16);
         break;
case 11: c+=((uint32_t)k8)<<16; /* fall through */
case 10: c+=k;
         b+=k+(((uint32_t)k)<<16);
         a+=k+(((uint32_t)k)<<16);
         break;
case 9 : c+=k8;                   /* fall through */
case 8 : b+=k+(((uint32_t)k)<<16);
         a+=k+(((uint32_t)k)<<16);
         break;
case 7 : b+=((uint32_t)k8)<<16;    /* fall through */
case 6 : b+=k;
         a+=k+(((uint32_t)k)<<16);
         break;
case 5 : b+=k8;                   /* fall through */
case 4 : a+=k+(((uint32_t)k)<<16);
         break;
case 3 : a+=((uint32_t)k8)<<16;    /* fall through */
case 2 : a+=k;
         break;
case 1 : a+=k8;
         break;
case 0 : return c;                   /* zero length requires no mixing */
}
　　} else {                      /* need to read the key one byte at a time */
const uint8_t *k = (const uint8_t *)key;
　　/*--------------- all but the last block: affect some 32 bits of (a,b,c) */
while (length > 12)
{
   a += k;
   a += ((uint32_t)k)<<8;
   a += ((uint32_t)k)<<16;
   a += ((uint32_t)k)<<24;
   b += k;
   b += ((uint32_t)k)<<8;
   b += ((uint32_t)k)<<16;
   b += ((uint32_t)k)<<24;
   c += k;
   c += ((uint32_t)k)<<8;
   c += ((uint32_t)k)<<16;
   c += ((uint32_t)k)<<24;
   mix(a,b,c);
   length -= 12;
   k += 12;
}
　　/*-------------------------------- last block: affect all 32 bits of (c) */
switch(length)                /* all the case statements fall through */
{
case 12: c+=((uint32_t)k)<<24;
case 11: c+=((uint32_t)k)<<16;
case 10: c+=((uint32_t)k)<<8;
case 9 : c+=k;
case 8 : b+=((uint32_t)k)<<24;
case 7 : b+=((uint32_t)k)<<16;
case 6 : b+=((uint32_t)k)<<8;
case 5 : b+=k;
case 4 : a+=((uint32_t)k)<<24;
case 3 : a+=((uint32_t)k)<<16;
case 2 : a+=((uint32_t)k)<<8;
case 1 : a+=k;
         break;
case 0 : return c;
}
}
　　final(a,b,c);
return c;
}
测试
　　1000w个53字节长的key, 结果
　　real 0m0.790s
user 0m0.788s
sys 0m0.000s
　　time33是：
　　real 0m1.041s
user 0m1.028s
sys 0m0.004s
　　
　　

页: [1]

运维网's Archiver

更快bobhash, 比time33快 (memcached也使用)