更快bobhash, 比time33快 (memcached也使用)

熬死你的 · 发表于 2015-9-1 04:01:18

　　http://burtleburtle.net/bob/hash/doobs.html
　　Bob优化它的第二版本hash, 速度提高了3倍，http://burtleburtle.net/bob/c/lookup3.c
　　下面我提取的一个变长key, 小端版本(intel机器)

　　#include <stdint.h>    /* defines uint32_t etc */
#include <sys/param.h>  /* attempt to define endianness */
#ifdef linux
# include <endian.h> /* attempt to define endianness */
#endif
　　/*
* My best guess at if you are big-endian or little-endian.  This may
* need adjustment.
*/
#if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
   __BYTE_ORDER == __LITTLE_ENDIAN) || \
(defined(i386) || defined(__i386__) || defined(__i486__) || \
   defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL))
# define HASH_LITTLE_ENDIAN 1
# define HASH_BIG_ENDIAN 0
#elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
   __BYTE_ORDER == __BIG_ENDIAN) || \
   (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
# define HASH_LITTLE_ENDIAN 0
# define HASH_BIG_ENDIAN 1
#else
# define HASH_LITTLE_ENDIAN 0
# define HASH_BIG_ENDIAN 0
#endif
　　
　　#define hashsize(n) ((uint32_t)1<<(n))
#define hashmask(n) (hashsize(n)-1)
#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
　　
#define mix(a,b,c) \
{ \
  a -= c;  a ^= rot(c, 4);  c += b; \
  b -= a;  b ^= rot(a, 6);  a += c; \
  c -= b;  c ^= rot(b, 8);  b += a; \
  a -= c;  a ^= rot(c,16);  c += b; \
  b -= a;  b ^= rot(a,19);  a += c; \
  c -= b;  c ^= rot(b, 4);  b += a; \
}
　　#define final(a,b,c) \
{ \
  c ^= b; c -= rot(b,14); \
  a ^= c; a -= rot(c,11); \
  b ^= a; b -= rot(a,25); \
  c ^= b; c -= rot(b,16); \
  a ^= c; a -= rot(c,4);  \
  b ^= a; b -= rot(a,14); \
  c ^= b; c -= rot(b,24); \
}
　　
uint32_t bob_hash( const void *key, size_t length, uint32_t initval)
{
  uint32_t a,b,c;                                        /* internal state */
  union { const void *ptr; size_t i; } u;    /* needed for Mac Powerbook G4 */
　　/* Set up the internal state */
  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
　　u.ptr = key;
  if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
const uint32_t *k = (const uint32_t *)key;       /* read 32-bit chunks */
const uint8_t  *k8;
　　/*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
while (length > 12)
{
   a += k[0];
   b += k[1];
   c += k[2];
   mix(a,b,c);
   length -= 12;
   k += 3;
}
　　/*----------------------------- handle the last (probably partial) block */
/*
   * "k[2]&0xffffff" actually reads beyond the end of the string, but
   * then masks off the part it's not allowed to read.  Because the
   * string is aligned, the masked-off tail is in the same word as the
   * rest of the string.  Every machine with memory protection I've seen
   * does it on word boundaries, so is OK with this.  But VALGRIND will
   * still catch it and complain.  The masking trick does make the hash
   * noticably faster for short strings (like English words).
   */
#ifndef VALGRIND
　　switch(length)
{
case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
case 8 : b+=k[1]; a+=k[0]; break;
case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
case 6 : b+=k[1]&0xffff; a+=k[0]; break;
case 5 : b+=k[1]&0xff; a+=k[0]; break;
case 4 : a+=k[0]; break;
case 3 : a+=k[0]&0xffffff; break;
case 2 : a+=k[0]&0xffff; break;
case 1 : a+=k[0]&0xff; break;
case 0 : return c;             /* zero length strings require no mixing */
}
　　#else /* make valgrind happy */
　　k8 = (const uint8_t *)k;
switch(length)
{
case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
case 9 : c+=k8[8];                /* fall through */
case 8 : b+=k[1]; a+=k[0]; break;
case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
case 5 : b+=k8[4];                /* fall through */
case 4 : a+=k[0]; break;
case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
case 1 : a+=k8[0]; break;
case 0 : return c;
}
　　#endif /* !valgrind */
　　} else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
const uint16_t *k = (const uint16_t *)key;       /* read 16-bit chunks */
const uint8_t  *k8;
　　/*--------------- all but last block: aligned reads and different mixing */
while (length > 12)
{
   a += k[0] + (((uint32_t)k[1])<<16);
   b += k[2] + (((uint32_t)k[3])<<16);
   c += k[4] + (((uint32_t)k[5])<<16);
   mix(a,b,c);
   length -= 12;
   k += 6;
}
　　/*----------------------------- handle the last (probably partial) block */
k8 = (const uint8_t *)k;
switch(length)
{
case 12: c+=k[4]+(((uint32_t)k[5])<<16);
         b+=k[2]+(((uint32_t)k[3])<<16);
         a+=k[0]+(((uint32_t)k[1])<<16);
         break;
case 11: c+=((uint32_t)k8[10])<<16;    /* fall through */
case 10: c+=k[4];
         b+=k[2]+(((uint32_t)k[3])<<16);
         a+=k[0]+(((uint32_t)k[1])<<16);
         break;
case 9 : c+=k8[8];                   /* fall through */
case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
         a+=k[0]+(((uint32_t)k[1])<<16);
         break;
case 7 : b+=((uint32_t)k8[6])<<16;    /* fall through */
case 6 : b+=k[2];
         a+=k[0]+(((uint32_t)k[1])<<16);
         break;
case 5 : b+=k8[4];                   /* fall through */
case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
         break;
case 3 : a+=((uint32_t)k8[2])<<16;    /* fall through */
case 2 : a+=k[0];
         break;
case 1 : a+=k8[0];
         break;
case 0 : return c;                   /* zero length requires no mixing */
}
　　} else {                      /* need to read the key one byte at a time */
const uint8_t *k = (const uint8_t *)key;
　　/*--------------- all but the last block: affect some 32 bits of (a,b,c) */
while (length > 12)
{
   a += k[0];
   a += ((uint32_t)k[1])<<8;
   a += ((uint32_t)k[2])<<16;
   a += ((uint32_t)k[3])<<24;
   b += k[4];
   b += ((uint32_t)k[5])<<8;
   b += ((uint32_t)k[6])<<16;
   b += ((uint32_t)k[7])<<24;
   c += k[8];
   c += ((uint32_t)k[9])<<8;
   c += ((uint32_t)k[10])<<16;
   c += ((uint32_t)k[11])<<24;
   mix(a,b,c);
   length -= 12;
   k += 12;
}
　　/*-------------------------------- last block: affect all 32 bits of (c) */
switch(length)                /* all the case statements fall through */
{
case 12: c+=((uint32_t)k[11])<<24;
case 11: c+=((uint32_t)k[10])<<16;
case 10: c+=((uint32_t)k[9])<<8;
case 9 : c+=k[8];
case 8 : b+=((uint32_t)k[7])<<24;
case 7 : b+=((uint32_t)k[6])<<16;
case 6 : b+=((uint32_t)k[5])<<8;
case 5 : b+=k[4];
case 4 : a+=((uint32_t)k[3])<<24;
case 3 : a+=((uint32_t)k[2])<<16;
case 2 : a+=((uint32_t)k[1])<<8;
case 1 : a+=k[0];
         break;
case 0 : return c;
}
  }
　　final(a,b,c);
  return c;
}

  测试
  　　1000w个53字节长的key, 结果

　　real 0m0.790s
user 0m0.788s
sys    0m0.000s

　　time33是：

　　real 0m1.041s
user 0m1.028s
sys    0m0.004s
　　

账号		自动登录	找回密码
密码			立即注册

wirelessnetview好用的无线分析工具

亿图图示专家(EDraw Max) V7.9 中文破解版

zabbix3.4.1安装部署+微信推送信息+大屏显

Red Hat OpenShift I: Containers & Kubern

2025 年，C++ 还能“硬核”多久？

RH199 RHCSA Rapid Track

Red Hat RHCE 8 (EX294) Cert Guide

[经验分享] 更快bobhash, 比time33快 (memcached也使用)

浏览过的版块

扫码加入运维网微信交流群