mirror of
https://gitee.com/Lamdonn/varch.git
synced 2025-12-06 16:56:42 +08:00
370 lines
14 KiB
Markdown
370 lines
14 KiB
Markdown
## 介绍
|
||
|
||
dict字典是逻辑上离散的容器,与set容器很相似,set容器是以`index - data`形式存在,而dict容器是以`key - value`形式存在,set的index是整型数,dict的key为字符串(这里和python的dict不太一样,python的key除了可以字符串还可以整型或者元组等,varch类似python的dict叫做map映射),set的data和dict的value本质是同一个东西。
|
||
varch的dict容器,在底层实现上采用了**哈希表**,查找迅速,而且也支持随机访问,占用空间相对较小。可以通过迭代器遍历dict。
|
||
|
||
## 接口
|
||
|
||
### 创建和删除dict对象
|
||
```c
|
||
dict_t dict_create(int dsize);
|
||
void dict_delete(dict_t dict);
|
||
#define dict(type) // 为了更简便的使用,对dict_create套一层宏定义
|
||
#define _dict(dict) // 对dict_delete套一层宏定义,并在dict删除后置为空
|
||
```
|
||
其中**dict_t**为dict的结构体,创建方法则会返回一个空的dict对象,创建失败则返回NULL,其中`dsize`传入数据的大小。删除方法则是删除传入的dict对象。创建方法和删除应该成对使用,创建出来在结束使用应该删除掉。
|
||
```c
|
||
void test(void)
|
||
{
|
||
dict_t dict = dict(int); // 定义并创建一个int型的dict
|
||
_dict(dict); // 成对使用,用完即删除
|
||
}
|
||
```
|
||
|
||
### dict的插入和移除
|
||
```c
|
||
void* dict_insert(dict_t dict, const char *key, void *value);
|
||
int dict_erase(dict_t dict, const char *key);
|
||
```
|
||
该dict通过hash值来定位的,可以很快的通过hash值定位到数据存储的位置。
|
||
插入的方法是添加指定key并将数据复制到这个键(在其中,value传入NULL时则只是开辟空间,不进行赋值),在插入key的过程中会进行查重,保证key的唯一性,插入成功后返回插入后的数据的地址,插入失败则是返回NULL。而移除则是移除指定键的数据,成功返回1,失败返回0。
|
||
|
||
### dict数据的读写
|
||
```c
|
||
void* dict_value(dict_t dict, const char *key);
|
||
void* dict_error(dict_t dict);
|
||
#define dict_at(dict, type, key)
|
||
```
|
||
`dict_value`方法就是根据键来获取数据的地址,返回的则是指定的数据的地址,`dict_error()`则是失败。而`dict_at`则是在`dict_value`的基础上加多类型,`dict_value`具备读写保护机制,因为返回的是`dict_error()`而不是NULL,所以在使用`dict_at`方法`i`写错了就会修改`dict_error()`指向的内容,而不会导致奔溃。
|
||
dict的随机访问是通过计算出键的哈希值,根据哈希值定位在哈希表中的数据。
|
||
|
||
```c
|
||
void test(void)
|
||
{
|
||
dict_t dict = dict(int);
|
||
int value;
|
||
|
||
value = 100; dict_insert(dict, "hello", &value);
|
||
value = 1; dict_insert(dict, "ZhangSan", &value);
|
||
value = 2; dict_insert(dict, "LiSi", &value);
|
||
value = 3; dict_insert(dict, "WangWu", &value);
|
||
value = 4; dict_insert(dict, "SunLiu", &value);
|
||
value = 5; dict_insert(dict, "QianQi", &value);
|
||
|
||
printf("dict[hello] = %d\r\n", dict_at(dict, int, "hello"));
|
||
printf("dict[SunLiu] = %d\r\n", dict_at(dict, int, "SunLiu"));
|
||
|
||
_dict(dict);
|
||
}
|
||
```
|
||
结果:
|
||
```
|
||
dict[hello] = 100
|
||
dict[SunLiu] = 4
|
||
```
|
||
|
||
### dict的大小和和数据大小
|
||
```c
|
||
int dict_size(dict_t dict);
|
||
int dict_dsize(dict_t dict);
|
||
```
|
||
dict的`size`很好理解,也就是像数组那样的大小,`dsize`也就是创建时候传入的数据的大小。
|
||
```c
|
||
void test(void)
|
||
{
|
||
dict_t dict = dict(int);
|
||
int value;
|
||
|
||
value = 100; dict_insert(dict, "hello", &value);
|
||
value = 1; dict_insert(dict, "ZhangSan", &value);
|
||
value = 2; dict_insert(dict, "LiSi", &value);
|
||
value = 3; dict_insert(dict, "WangWu", &value);
|
||
value = 4; dict_insert(dict, "SunLiu", &value);
|
||
value = 5; dict_insert(dict, "QianQi", &value);
|
||
|
||
printf("size = %d, value size = %d\r\n", dict_size(dict), dict_dsize(dict));
|
||
|
||
_dict(dict);
|
||
}
|
||
```
|
||
结果:
|
||
```
|
||
size = 6, value size = 4
|
||
```
|
||
|
||
### dict查找
|
||
```c
|
||
int dict_find(dict_t dict, const char *key);
|
||
```
|
||
这个方法其实套`dict_value`实现,只是find成功返回1失败返回0。
|
||
|
||
### dict迭代器
|
||
|
||
```c
|
||
void dict_it_init(dict_t dict);
|
||
void* dict_it_get(dict_t dict, char **key);
|
||
```
|
||
|
||
dict也支持内置的迭代器,但主要dict的迭代器用于遍历。因为数组要遍历的时候是知道键从0开始逐一递增遍历的。但是dict是离散型的key,无法通过这种逐一递增的方式进行遍历,所以这里给定了两个迭代器函数用于遍历dict。
|
||
`dict_it_init`初始化迭代器。`dict_it_get`获取迭代,更新迭代位置,`*key`为输出的key(当前所在的key,也可以传入NULL不接收),返回迭代位置的数据。
|
||
通过`dict_size`来把控迭代次数。
|
||
|
||
```c
|
||
void test(void)
|
||
{
|
||
dict_t dict = dict(int);
|
||
int value;
|
||
char *key;
|
||
void *data;
|
||
int i;
|
||
|
||
value = 100; dict_insert(dict, "hello", &value);
|
||
value = 1; dict_insert(dict, "ZhangSan", &value);
|
||
value = 2; dict_insert(dict, "LiSi", &value);
|
||
value = 3; dict_insert(dict, "WangWu", &value);
|
||
value = 4; dict_insert(dict, "SunLiu", &value);
|
||
value = 5; dict_insert(dict, "QianQi", &value);
|
||
|
||
dict_it_init(dict, DICT_HEAD);
|
||
i = dict_size(dict);
|
||
while (i--)
|
||
{
|
||
data = dict_it_get(dict, &key);
|
||
printf("dict[%s] = %d\r\n", key, *(int *)data);
|
||
}
|
||
|
||
_dict(dict);
|
||
}
|
||
```
|
||
|
||
结果:
|
||
```
|
||
dict[LiSi] = 2
|
||
dict[QianQi] = 5
|
||
dict[SunLiu] = 4
|
||
dict[WangWu] = 3
|
||
dict[ZhangSan] = 1
|
||
dict[hello] = 100
|
||
```
|
||
|
||
## 源码解析
|
||
|
||
### dict结构体
|
||
dict容器的所有结构体都是隐式的,也就是不能直接访问到结构体成员的,这样子的方式保证了模块的独立与安全,防止外部调用修改结构体的成员导致dict存储结构的破坏。所以dict解析器只留了唯一一个dict的声明在头文件,然后结构体的定义都在源文件。只能使用dict容器提供的方法对dict对象进行操作。
|
||
dict类型声明
|
||
```c
|
||
typedef struct DICT *dict_t;
|
||
```
|
||
使用时候,只是用`dict_t`即可。
|
||
```c
|
||
/* dict type define */
|
||
typedef struct DICT
|
||
{
|
||
groove_t *base; /* base address for groove data */
|
||
void *error; /* error space */
|
||
int vsize; /* size of value */
|
||
unsigned int size; /* size of dict */
|
||
unsigned int capacity; /* capacity of dict */
|
||
unsigned int it; /* iterator index */
|
||
} DICT;
|
||
```
|
||
`DICT`结构体中包含了6个成员,`base`(哈希表的基地址),`error`(dict的错误区,当随机访问到不存在的key时候就会返回这个错误的地址,如此在使用`at`方法时候不会操作到无效的内存地址),`size`(dict的大小,也就是dict的长度),`vsize`(每个数据的大小),`capacity`(哈希表的容量),`it`(迭代器遍历的时候,记录当前访问的哈希表索引)。
|
||
dict容器最主要的问题就是解决哈希冲突以及哈希表容量调整的问题。
|
||
```c
|
||
/* dict node type define */
|
||
typedef struct
|
||
{
|
||
unsigned int hash; /* hash value */
|
||
char *key; /* key */
|
||
void *value; /* value */
|
||
} GROOVE, *groove_t;
|
||
```
|
||
在dict里面,数据通过数组进存储,每个数组项只是存储每个槽(groove)的地址,每个槽里面就存储3部分的内容,当前这个槽在当前的hash表中的hash值(为了更快的查找hash冲突的键值对),以及实际存储的键值对。
|
||
```
|
||
+------+------------------------+------------------------+
|
||
| hash | key | value |
|
||
+------+------------------------+------------------------+
|
||
| ... | ... | ... |
|
||
+------+------------------------+------------------------+
|
||
| ... | ... | ... |
|
||
+------+------------------------+------------------------+
|
||
```
|
||
|
||
### dict创建及删除
|
||
|
||
```c
|
||
dict_t dict_create(int vsize)
|
||
{
|
||
dict_t dict;
|
||
if (vsize <= 0) return NULL;
|
||
dict = (dict_t)malloc(sizeof(DICT));
|
||
if (!dict) return NULL;
|
||
dict->error = malloc(vsize);
|
||
if (!dict->error) { free(dict); return NULL; }
|
||
dict->base = NULL;
|
||
dict->vsize = vsize;
|
||
dict->size = 0;
|
||
dict->capacity = 0;
|
||
dict->it = 0;
|
||
return dict;
|
||
}
|
||
```
|
||
dict的创建,只是创建了一个空的dict,对基本参数进行了初始化。而删除方法就是释放在对dict操作的过程中分配的内存空间。
|
||
|
||
### dict的插入
|
||
```c
|
||
void* dict_insert(dict_t dict, const char *key, void *value)
|
||
{
|
||
groove_t groove = NULL;
|
||
unsigned int hash = 0, index;
|
||
int len = 0;
|
||
|
||
/*
|
||
对传入的形参有效性的检查
|
||
*/
|
||
if (!dict) return NULL;
|
||
if (!key) return NULL;
|
||
|
||
/*
|
||
检查大小有没有超过哈希表容量的 3/4 了,超过 3/4 就对哈希表进行扩容
|
||
因为超过 3/4 说明哈希表已经很满了,查找的效率会大大降低,所以就要保持哈希表一些空闲的空间
|
||
这里扩容,是呈2的指数依次变化的,也就是 4、8、16、32 ... 这样子增长
|
||
*/
|
||
/* the current capacity affects the search rate and needs to be expanded */
|
||
if (dict->size >= ((dict->capacity >> 2) + (dict->capacity >> 1))) /* size exceeds 3/4 of capacity */
|
||
{
|
||
/*
|
||
在扩容的过程中,先是重新分配一块新容量的空间
|
||
然后把旧的哈希表里面的成员,一个个重新哈希插回到新的哈希表中
|
||
*/
|
||
/* allocate new hash table space */
|
||
if (!dict_resize(dict, dict->capacity < MIN_CAPACITY ? MIN_CAPACITY : dict->capacity << 1)) return NULL;
|
||
}
|
||
|
||
/*
|
||
计算插进来的key的哈希值(默认使用了bkdr哈希算法),让hash值对哈希表容量进行取模
|
||
然后在检查这个hash值有没有冲突了,冲突了的话,通过开放定址法,线性+1向后寻找空闲空间
|
||
在向后寻找的过程中,检查相应槽中的hash值,hash值为-1表示被erase方法移除了而实际槽没有被删除的标识,也是视为这个槽可以用
|
||
*/
|
||
/* find a free groove */
|
||
len = strlen(key);
|
||
hash = hash_bkdr((void *)key, len) % dict->capacity;
|
||
index = hash;
|
||
while (dict->base[index] && dict->base[index]->hash != -1)
|
||
{
|
||
index = (index + 1) % dict->capacity;
|
||
if (index == hash) return NULL;
|
||
}
|
||
|
||
/*
|
||
分配槽的空间
|
||
*/
|
||
/* space allocation */
|
||
groove = dict->base[index];
|
||
if (!groove) groove = (groove_t)malloc(sizeof(GROOVE));
|
||
if (!groove) return NULL;
|
||
groove->key = (char *)malloc(len + 1);
|
||
if (!groove->key) { if (!dict->base[index]) free(groove); return NULL; }
|
||
groove->value = malloc(dict->vsize);
|
||
if (!groove->value) { free(groove->key), groove->key = NULL; if (!dict->base[index]) free(groove); return NULL; }
|
||
|
||
/* assign */
|
||
groove->hash = hash; // 记录当前槽的哈希值
|
||
strcpy(groove->key, key);
|
||
if (value) memcpy(groove->value, value, dict->vsize);
|
||
|
||
/* insert */
|
||
dict->base[index] = groove;
|
||
dict->size++;
|
||
|
||
return groove->value;
|
||
}
|
||
```
|
||
实际存储例子:
|
||
```c
|
||
value = 100; dict_insert(dict, "hello", &value);
|
||
value = 1; dict_insert(dict, "ZhangSan", &value);
|
||
value = 2; dict_insert(dict, "LiSi", &value);
|
||
value = 3; dict_insert(dict, "WangWu", &value);
|
||
value = 4; dict_insert(dict, "SunLiu", &value);
|
||
value = 5; dict_insert(dict, "QianQi", &value);
|
||
value = 8; dict_insert(dict, "WangBa", &value);
|
||
value = 9; dict_insert(dict, "LiuJiu", &value);
|
||
```
|
||
哈希表
|
||
```
|
||
+------+------------------------+------------------------+
|
||
| hash | key | value |
|
||
+------+------------------------+------------------------+
|
||
| 0 | ZhangSan | 1 |
|
||
+------+------------------------+------------------------+
|
||
| 1 | QianQi | 5 |
|
||
+------+------------------------+------------------------+
|
||
| | | |
|
||
+------+------------------------+------------------------+
|
||
| | | |
|
||
+------+------------------------+------------------------+
|
||
| 4 | SunLiu | 4 |
|
||
+------+------------------------+------------------------+
|
||
| | | |
|
||
+------+------------------------+------------------------+
|
||
| 6 | WangBa | 8 |
|
||
+------+------------------------+------------------------+
|
||
| 7 | LiSi | 2 |
|
||
+------+------------------------+------------------------+
|
||
| | | |
|
||
+------+------------------------+------------------------+
|
||
| 9 | WangWu | 3 |
|
||
+------+------------------------+------------------------+
|
||
| | | |
|
||
+------+------------------------+------------------------+
|
||
| | | |
|
||
+------+------------------------+------------------------+
|
||
| | | |
|
||
+------+------------------------+------------------------+
|
||
| | | |
|
||
+------+------------------------+------------------------+
|
||
| 14 | hello | 100 |
|
||
+------+------------------------+------------------------+
|
||
| 14 | LiuJiu | 9 |
|
||
+------+------------------------+------------------------+
|
||
```
|
||
|
||
|
||
### dict的移除
|
||
```c
|
||
int dict_erase(dict_t dict, const char *key)
|
||
{
|
||
groove_t groove;
|
||
unsigned int index, next;
|
||
|
||
if (!dict) return 0;
|
||
if (!key) return 0;
|
||
|
||
/*
|
||
要移除就得先找到这个key所在哈希表的位置
|
||
find_index就是根据类似前面的插入的方法同逻辑,没找到就返回-1
|
||
*/
|
||
index = find_index(dict, key);
|
||
if (index == -1) return 0;
|
||
groove = dict->base[index];
|
||
|
||
if (groove->key) { free(groove->key); groove->key = NULL; } // 把key释放并置为NULL
|
||
if (groove->value) { free(groove->value); groove->value = NULL; } // 把value释放并置为NULL
|
||
groove->hash = -1; // 槽就不真正释放,而是把hash值标志为-1(-1哈希值在哈希表中用不上)
|
||
dict->size--;
|
||
|
||
/*
|
||
如同插入方法,插入要适配大小容量,移除也是
|
||
当大小小于容量 1/4 时候,就调整容量,缩小到一般,这样就占用 1/2 了
|
||
*/
|
||
if (dict->capacity > MIN_CAPACITY && dict->size <= (dict->capacity >> 2)) /* size less than 1/4 of capacity */
|
||
{
|
||
dict_resize(dict, dict->capacity >> 1);
|
||
}
|
||
|
||
return 1;
|
||
}
|
||
```
|
||
|