dictEntry *dictFind(dict *d, const void *key)
{
dictEntry *he;
unsigned int h, idx, table;
if (d->ht[0].size == 0) return NULL; /* We don't have a table at all */
if (dictIsRehashing(d)) _dictRehashStep(d);//如果正在进行rehash,则进行一次rehash操作
h = dictHashKey(d, key);//计算key的哈希值
//先在ht[0]表上查找
for (table = 0; table <= 1; table++) {
idx = h & d->ht[table].sizemask;
he = d->ht[table].table[idx];
while(he) {
if (dictCompareKeys(d, key, he->key))
return he;
he = he->next;
}
//在ht[0]上找不到时,如果现在正进行rehash,key有可能在ht[1]上,需要在ht[1]上查找
if (!dictIsRehashing(d)) return NULL;
}
return NULL;
}
//添加条目到字典中
/* Add an element to the target hash table */
int dictAdd(dict *d, void *key, void *val)
{
dictEntry *entry = dictAddRaw(d,key);//插入key
if (!entry) return DICT_ERR;
dictSetVal(d, entry, val);//设置key所对应的value
return DICT_OK;
}
/* Low level add. This function adds the entry but instead of setting
* a value returns the dictEntry structure to the user, that will make
* sure to fill the value field as he wishes.
*
* This function is also directly exposed to the user API to be called
* mainly in order to store non-pointers inside the hash value, example:
*
* entry = dictAddRaw(dict,mykey);
* if (entry != NULL) dictSetSignedIntegerVal(entry,1000);
*
* Return values:
*
* If key already exists NULL is returned.
* If key was added, the hash entry is returned to be manipulated by the caller.
*/
dictEntry *dictAddRaw(dict *d, void *key)
{
int index;
dictEntry *entry;
dictht *ht;
if (dictIsRehashing(d)) _dictRehashStep(d); //rehash
//如果key已经存在,则返回null
/* Get the index of the new element, or -1 if
* the element already exists. */
if ((index = _dictKeyIndex(d, key)) == -1)
return NULL;
//如果正在进行rehash,则就把新的元素插入到ht[1]中,否则插入到ht[0]
/* Allocate the memory and store the new entry */
ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
entry = zmalloc(sizeof(*entry));
entry->next = ht->table[index];
ht->table[index] = entry;
ht->used++;
/* Set the hash entry fields. */
dictSetKey(d, entry, key); //插入
return entry;
}
当dict没有进行rehash时,元素插入到ht[0]是比较容易的。但如果正在进行rehash,则要把元素插入到ht[1]中。为什么一定要把元素插入到ht[1]中,而不能是ht[0]?原因就在rehash的过程。rehash的过程是把条目由ht[0]移动到ht[1]的过程,当所有条目都移动完毕时,rehash的过程也就完成。要保证rehash过程能完成,需要注意几点:
a. ht[0]的元素不能一直在增,即使元素在增长也不能快于移动元素到ht[1]的速度。
b. 确定下一个要移动的条目(如按某种方法支确定下一个条目,能否遍历所有ht[0]上的条目)
c. 确定何时移动完所有条目
//先在ht[0]中查找,如找不到则在ht[1]中查找,有则删除。
/* Search and remove an element */
static int dictGenericDelete(dict *d, const void *key, int nofree)
{
unsigned int h, idx;
dictEntry *he, *prevHe;
int table;
if (d->ht[0].size == 0) return DICT_ERR; /* d->ht[0].table is NULL */
if (dictIsRehashing(d)) _dictRehashStep(d);
h = dictHashKey(d, key);
for (table = 0; table <= 1; table++) {
idx = h & d->ht[table].sizemask;
he = d->ht[table].table[idx];
prevHe = NULL;
while(he) {
if (dictCompareKeys(d, key, he->key)) {
/* Unlink the element from the list */
if (prevHe)
prevHe->next = he->next;
else
d->ht[table].table[idx] = he->next;
if (!nofree) {
dictFreeKey(d, he);
dictFreeVal(d, he);
}
zfree(he);
d->ht[table].used--;
return DICT_OK;
}
prevHe = he;
he = he->next;
}
if (!dictIsRehashing(d)) break;
}
return DICT_ERR; /* not found */
}
/* This function performs just a step of rehashing, and only if there are
* no safe iterators bound to our hash table. When we have iterators in the
* middle of a rehashing we can't mess with the two hash tables otherwise
* some element can be missed or duplicated.
*
* This function is called by common lookup or update operations in the
* dictionary so that the hash table automatically migrates from H1 to H2
* while it is actively used. */
static void _dictRehashStep(dict *d) {
if (d->iterators == 0) dictRehash(d,1); //如果安全迭代器计数器为0,则允许进行rehash操作
}
而从释放迭代器的函数 dictReleaseIterator 可以看到并没有检查 fingerprint的操作,因此可以得出所谓的安全迭代器,实则是指:
a. 迭代过程中可以允许插入、删除条目
b. 迭代过程中不会进行rehash,如开始迭代前已经进行了rehash,则迭代开始后rehash会被暂停,直到迭代完成后rehash接着进行。
现在讨论为什么安全迭代器在遍历过程中不允许rehash,因为如果允许rehash,遍历过程将无法保证,有些元素可能会遍历多次,有些元素会没有遍历到。下面举一些情景:
a. 迭代器现在遍历到ht[0]某个元素x,此时x位于2号桶,由于rehash可以进行,刚好把ht[0]的1号桶的元素Y移动到ht[1]中,此后迭代器遍历完ht[0]后就会遍历到ht[1],会把Y再一次遍历。
b. 迭代器此时正遍历到ht[1]的4号桶,后面的桶都还没遍历,此时rehash过程进行且刚好把ht[0]的所有元素都移动到ht[1]上,rehash过程完成,ht[1]切换到ht[0]。由于迭代器中记录目前正在遍历ht[1],所以此后迭代器遍历ht[1](原来的ht[0])的4号桶后的元素时已经没有元素了,遍历过程结束,而实际上还有一些元素没有被遍历。