Redis源码研究--字典
计划每天花1小时学习Redis 源码。在博客上做个记录。--------6月18日-----------
redis的字典dict主要涉及几个数据结构,
dictEntry:具体的k-v链表结点
dictht:哈希表
dict:字典
具体关系为
1 typedef struct dict {
2 dictType *type;
3 void *privdata;
4 dictht ht;
5 int rehashidx; /* rehashing not in progress if rehashidx == -1 */
6 int iterators; /* number of iterators currently running */
7 } dict;
1 typedef struct dictht {
2 dictEntry **table;
3 unsigned long size;
4 unsigned long sizemask;
5 unsigned long used;
6 } dictht;
1 typedef struct dictEntry {
2 void *key;
3 union {
4 void *val;
5 uint64_t u64;
6 int64_t s64;
7 } v;
8 struct dictEntry *next;
9 } dictEntry;
一个字典有两个哈希表, 冲突后采用了链地址法,很好理解。
一些简单操作采用了宏
#define dictGetKey(he) ((he)->key)
#define dictGetVal(he) ((he)->v.val)
#define dictGetSignedIntegerVal(he) ((he)->v.s64)
#define dictGetUnsignedIntegerVal(he) ((he)->v.u64)
------------6月19日----------------------
字典具体用到了两种哈希算法,我只看了简单的那一种,没想到代码竟然可以那么少,算法名字为djb2,
1 /* And a case insensitive hash function (based on djb hash) */
2 unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len) {
3 unsigned int hash = (unsigned int)dict_hash_function_seed;
4
5 while (len--)
6 hash = ((hash ht);
9 _dictReset(&d->ht);
10
11 d->type = type;
12 d->privdata = privDataPtr;
13 d->rehashidx = -1;
14 d->iterators = 0;
15
16 return DICT_OK;
17 }
18
19 static void _dictReset(dictht *ht){
20 ht->table = NULL;
21 ht->size = 0;
22 ht->sizemask = 0;
23 ht->used = 0;
24 }
学了这么多年c语言了,malloc(sizeof(*d))我还是第一次看到。
说到sizeof,我还要提一句,c99之后,sizeof是运行时确定的,c99还加入了动态数组这一概念。csdn上的回答是错的。
对字典进行紧缩处理,让 哈希表中的数/哈希表长度接近1:
1 int dictResize(dict *d){
2 int minimal;
3
4 if (!dict_can_resize || dictIsRehashing(d)) return DICT_ERR;
5
6 minimal = d->ht.used;
7
8 if (minimal < DICT_HT_INITIAL_SIZE)
9 minimal = DICT_HT_INITIAL_SIZE;
10
11 return dictExpand(d, minimal);
12 }
13
14 #define dictIsRehashing(ht) ((ht)->rehashidx != -1)
15 #define DICT_HT_INITIAL_SIZE 4
当字典正在Rehash的时候不能进行Resize操作,初始时哈希表大小为4,哈希表大小一般都是2的幂次方。
如果minimal是5,经过dictExpand后,哈希表大小变为8.
1 static unsigned long _dictNextPower(unsigned long size){
2 unsigned long i = DICT_HT_INITIAL_SIZE;
3
4 if (size >= LONG_MAX) return LONG_MAX;
5 while(1) {
6 if (i >= size)
7 return i;
8 i *= 2;
9 }
10 }
11
12 int dictExpand(dict *d, unsigned long size){
13 dictht n; /* the new hash table */
14
15 unsigned long realsize = _dictNextPower(size);
16
17 /* the size is invalid if it is smaller than the number of
18 * elements already inside the hash table */
19 if (dictIsRehashing(d) || d->ht.used > size)
20 return DICT_ERR;
21
22 /* Allocate the new hash table and initialize all pointers to NULL */
23 n.size = realsize;
24 n.sizemask = realsize-1;
25 n.table = zcalloc(realsize*sizeof(dictEntry*));
26 n.used = 0;
27
28 /* Is this the first initialization? If so it's not really a rehashing
29 * we just set the first hash table so that it can accept keys. */
30 if (d->ht.table == NULL) {
31 d->ht = n;
32 return DICT_OK;
33 }
34
35 /* Prepare a second hash table for incremental rehashing */
36 d->ht = n;
37 d->rehashidx = 0;
38
39 return DICT_OK;
40 }
新建了一个哈希表n,size是扩展后的size,ht.table 为空说明这是第一次初始化,不是扩展,直接赋值。
ht.table 不为空,说明这是一次扩展,把n赋给ht,ReHash标志rehashix也被设为0.
上边这段不大好理解,先看后面的,一会返过来再研究dictExpand函数。
--------------------6月20日--------------------------
向字典中添加元素需要调用dictAdd函数:
1 /* Add an element to the target hash table */
2 int dictAdd(dict *d, void *key, void *val){
3 dictEntry *entry = dictAddRaw(d,key);
4
5 if (!entry) return DICT_ERR;
6 dictSetVal(d, entry, val);
7 return DICT_OK;
8 }
具体实现需要看dictAddRaw函数:
1 dictEntry *dictAddRaw(dict *d, void *key){
2 int index;
3 dictEntry *entry;
4 dictht *ht;
5
6 if (dictIsRehashing(d)) _dictRehashStep(d);
7
8 /* Get the index of the new element, or -1 if
9 * the element already exists. */
10 if ((index = _dictKeyIndex(d, key)) == -1)
11 return NULL;
12
13 /* Allocate the memory and store the new entry */
14 ht = dictIsRehashing(d) ? &d->ht : &d->ht;
15 entry = zmalloc(sizeof(*entry));
16 entry->next = ht->table;
17 ht->table = entry;
18 ht->used++;
19
20 /* Set the hash entry fields. */
21 dictSetKey(d, entry, key);
22 return entry;
23 }
先判断是不是在进行Rehash,如果在Rehash,执行渐进式Rehash。
找到要插入的key的位置,如果相同的key已经存在了,返回NULL
如果在进行Rehash,ht指向ht表,然后利用链表头插法(这个我熟)将entry插入,更新used。
添加key前需要查找key的位置:
1 /* Returns the index of a free slot that can be populated with
2* an hash entry for the given 'key'.
3* If the key already exists, -1 is returned.
4*
5* Note that if we are in the process of rehashing the hash table, the
6* index is always returned in the context of the second (new) hash table. */
7 static int _dictKeyIndex(dict *d, const void *key){
8 unsigned int h, idx, table;
9 dictEntry *he;
10
11 /* Expand the hash table if needed */
12 if (_dictExpandIfNeeded(d) == DICT_ERR)
13 return -1;
14 /* Compute the key hash value */
15 h = dictHashKey(d, key);
16 for (table = 0; table ht.sizemask;
18 /* Search if this slot does not already contain the given key */
19 he = d->ht.table;
20 while(he) {
21 if (dictCompareKeys(d, key, he->key))
22 return -1;
23 he = he->next;
24 }
25 if (!dictIsRehashing(d)) break;
26 }
27 return idx;
28 }
插入之前,程序会检查一下哈希表空间是否够,需不需要expand。通过某种哈希算法计算key对应的哈希值h,sizemask二进制格式大体是这样的011111111,哈希值跟它一与,相当于只保留了后面几位。算出来的idx就是要插入的索引号。然后需要比较在这个索引上的链表中有没有跟要插入的key一样的,如果重复了,返回-1.
最后判断下当前如果没有在进行Rehash,ht表就不用管了。
-----------------------6月21日---------------------
1 /* Expand the hash table if needed */
2 static int _dictExpandIfNeeded(dict *d){
3 /* Incremental rehashing already in progress. Return. */
4 if (dictIsRehashing(d)) return DICT_OK;
5
6 /* If the hash table is empty expand it to the initial size. */
7 if (d->ht.size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);
8
9 /* If we reached the 1:1 ratio, and we are allowed to resize the hash
10 * table (global setting) or we should avoid it but the ratio between
11 * elements/buckets is over the "safe" threshold, we resize doubling
12 * the number of buckets. */
13 if (d->ht.used >= d->ht.size &&
14 (dict_can_resize ||
15 d->ht.used/d->ht.size > dict_force_resize_ratio))
16 {
17 return dictExpand(d, d->ht.used*2);
18 }
19 return DICT_OK;
20 }
函数名前面带下划线的都表示这是private的。程序第4行又是先判断是否正在进行Rehash,
为什么要说又呢
如果哈希表是空的,那么我们扩展到DICT_HT_INITIAL_SIZE(4)个。
第13行有点不理解,used什么时候会大于size啊????标记一下,以后再看。
dict_can_resize是个全局变量。dict_force_resize_ratio = 5.
/* Using dictEnableResize() / dictDisableResize() we make possible to
* enable/disable resizing of the hash table as needed. This is very important
* for Redis, as we use copy-on-write and don't want to move too much memory
* around when there is a child performing saving operations.
*
* Note that even when dict_can_resize is set to 0, not all resizes are
* prevented: an hash table is still allowed to grow if the ratio between
* the number of elements and the buckets > dict_force_resize_ratio. */
1 void dictEnableResize(void) {
2 dict_can_resize = 1;
3 }
4
5 void dictDisableResize(void) {
6 dict_can_resize = 0;
7 }
字典的 rehash 操作实际上就是执行以下任务:
[*]创建一个比 ht->table 更大的 ht->table ;
[*]将 ht->table 中的所有键值对迁移到 ht->table ;
[*]将原有 ht 的数据清空,并将 ht 替换为新的 ht ;
经过以上步骤之后, 程序就在不改变原有键值对数据的基础上, 增大了哈希表的大小。
--------------6月22日---------------------------
先上Rehash的代码
1 int dictRehash(dict *d, int n) {
2 if (!dictIsRehashing(d)) return 0;
3
4 while(n--) {
5 dictEntry *de, *nextde;
6
7 /* Check if we already rehashed the whole table... */
8 if (d->ht.used == 0) {
9 zfree(d->ht.table);
10 d->ht = d->ht;
11 _dictReset(&d->ht);
12 d->rehashidx = -1;
13 return 0;
14 }
15
16 /* Note that rehashidx can't overflow as we are sure there are more
17 * elements because ht.used != 0 */
18 assert(d->ht.size > (unsigned)d->rehashidx);
19 while(d->ht.table == NULL) d->rehashidx++;
20 de = d->ht.table;
21 /* Move all the keys in this bucket from the old to the new hash HT */
22 while(de) {
23 unsigned int h;
24
25 nextde = de->next;
26 /* Get the index in the new hash table */
27 h = dictHashKey(d, de->key) & d->ht.sizemask;
28 de->next = d->ht.table;
29 d->ht.table = de;
30 d->ht.used--;
31 d->ht.used++;
32 de = nextde;
33 }
34 d->ht.table = NULL;
35 d->rehashidx++;
36 }
37 return 1;
38 }
n步Rehash,在ht中找到第一个不为空的table,将这个位置的链表(可能只有一个元素)全部移到ht中,并更新ht.used、ht.used。
执行过程中,ht中的元素如果都已经转到了ht中,即ht.used == 0,停止执行,释放ht.table指向的空间,ht变为ht,将rehashidx置为-1。
字典还剩一小部分,大体意思我弄懂了,加上之前看的动态字符串sds、双向链表adlist,加上空格注释统计了下共2248行。
1341 adlist.c
2 93 adlist.h
3810 dict.c
4173 dict.h
5732 sds.c
6 99 sds.h
7 2248 total
主要参考了《Redis 设计与实现》 。谢谢90后作者了。
页:
[1]