1 概述

2 数据结构相关

2.1 内存分配

config.h

#ifndef __CONFIG_H
#define __CONFIG_H

#ifdef __APPLE__
#include <AvailabilityMacros.h>
#endif

/* Use tcmalloc's malloc_size() when available.
* When tcmalloc is used, native OSX malloc_size() may never be used because
* this expects a different allocation scheme. Therefore, *exclusively* use
* either tcmalloc or OSX's malloc_size()! */

//如果系统中存在Google的TC_MALLOC库,redis_malloc_size函数就当做tc_malloc_size函数使用
//tc_malloc比原始的malloc性能好
//redis_malloc_size的功能是获得参数p所指向的内存块的大小
#if defined(USE_TCMALLOC)
#include <google/tcmalloc.h>
#if TC_VERSION_MAJOR >= 1 && TC_VERSION_MINOR >= 6
//HAVE_MALLOC_SIZE用来标记是否定义了redis_malloc_size函数
//可是为什么不直接检查redis_malloc_size是否存在,还要额外定义一个标记呢?
#define HAVE_MALLOC_SIZE 1
#define redis_malloc_size(p) tc_malloc_size(p)
#endif
//或者,如果系统是Mac系统,那么redis_malloc_size函数就当做原始的malloc_size函数使用
#elif defined(__APPLE__)
#include <malloc/malloc.h>
#define HAVE_MALLOC_SIZE 1
#define redis_malloc_size(p) malloc_size(p)
#endif

/* define redis_fstat to fstat or fstat64() */
#if defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
#define redis_fstat fstat64
#define redis_stat stat64
#else
#define redis_fstat fstat
#define redis_stat stat
#endif

/* test for proc filesystem */
//如果是linux系统,当前文件系统就是procfs
#ifdef __linux__
#define HAVE_PROCFS 1
#endif

/* test for task_info() */
//如果是unix系统,就可以使用task_info,macos是基于unix的
#if defined(__APPLE__)
#define HAVE_TASKINFO 1
#endif

/* test for backtrace() */
#if defined(__APPLE__) || defined(__linux__)
#define HAVE_BACKTRACE 1
#endif

/* test for polling API */
#ifdef __linux__
#define HAVE_EPOLL 1
#endif

#if (defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined (__NetBSD__)
#define HAVE_KQUEUE 1
#endif

/* define aof_fsync to fdatasync() in Linux and fsync() for all the rest */
#ifdef __linux__
#define aof_fsync fdatasync
#else
#define aof_fsync fsync
#endif

#endif

zmalloc.c

// zmalloc - total amount of allocated memory aware version of malloc()

//redis是基于内存的数据库,所以内存管理很重要。
//redis把C语言的内存分配函数封装成zmalloc、zfree等z开头的函数,来屏蔽各底层平台的差异。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>
#include "config.h"
#include "zmalloc.h"

//如果定义了HAVE_MALLOC_SIZE,即定义了redis_malloc_size函数,PREFIX_SIZE就是0
//PREFIX_SIZE用于在分配到的的空间头部存储原本申请空间的大小
#ifdef HAVE_MALLOC_SIZE
#define PREFIX_SIZE (0)
#else
//如果没有定义HAVE_MALLOC_SIZE,且当前系统是Solaris,PREFIX_SIZE就是long long类型的长度
#if defined(__sun)
#define PREFIX_SIZE (sizeof(long long))
#else
//否则,PREFIX_SIZE就是size_t的长度
#define PREFIX_SIZE (sizeof(size_t))
#endif
#endif

/* Explicitly override malloc/free etc when using tcmalloc. */
//如果使用了tcmalloc库,就用tcmalloc库函数替换原始的malloc库函数
#if defined(USE_TCMALLOC)
#define malloc(size) tc_malloc(size)
#define calloc(count,size) tc_calloc(count,size)
#define realloc(ptr,size) tc_realloc(ptr,size)
#define free(ptr) tc_free(ptr)
#endif

//update_zmalloc_stat_alloc用于在分配内存的时候更新已分配大小
//__n是实际分配到的空间大小,__size是程序原本请求的空间大小
//__size应该是改了代码以后忘记删掉的参数
//使用do-while(0)封装成代码块,防止宏定义展开的时候出问题
#define update_zmalloc_stat_alloc(__n,__size) do { \
size_t _n = (__n); \
//64位系统中,sizeof(long)通常是8
//malloc分配的内存是8字节对齐的,如果请求分配的内存不是8的倍数,那么malloc就会多分配一点来凑成8的倍数
//如果_n值不是内存分配单元(sizeof(long))的整数倍,说明当前分配的内存大小有碎片,为了与malloc的实际结果匹配,需要补齐到8的整数倍
if (_n&(sizeof(long)-1)) _n += sizeof(long)-(_n&(sizeof(long)-1)); \
if (zmalloc_thread_safe) { \
//如果要考虑线程安全,先加锁再修改used_memory
pthread_mutex_lock(&used_memory_mutex); \
used_memory += _n; \
pthread_mutex_unlock(&used_memory_mutex); \
} else { \
//不考虑线程安全时,直接修改used_memory
used_memory += _n; \
} \
} while(0)

//update_zmalloc_stat_free用于释放已经分配的空间
//__n是待释放的空间大小
#define update_zmalloc_stat_free(__n) do { \
size_t _n = (__n); \
if (_n&(sizeof(long)-1)) _n += sizeof(long)-(_n&(sizeof(long)-1)); \
if (zmalloc_thread_safe) { \
pthread_mutex_lock(&used_memory_mutex); \
used_memory -= _n; \
pthread_mutex_unlock(&used_memory_mutex); \
} else { \
used_memory -= _n; \
} \
} while(0)

//分配得到的内存大小
static size_t used_memory = 0;
//是否要考虑线程安全,默认不考虑
static int zmalloc_thread_safe = 0;
//保证线程安全的锁
pthread_mutex_t used_memory_mutex = PTHREAD_MUTEX_INITIALIZER;

//oom的错误处理函数
static void zmalloc_oom(size_t size) {
//把错误信息输出到stderr流文件中
fprintf(stderr, "zmalloc: Out of memory trying to allocate %zu bytes\n",
size);
//刷新缓冲,把stderr中的数据发给错误输出设备
fflush(stderr);
//终止当前进程,但不清理任何对象
abort();
}

// size 是分配的内存大小
void *zmalloc(size_t size) {
// 实际多申请了PREFIX_SIZE大小的空间
void *ptr = malloc(size+PREFIX_SIZE);
// 如果分配失败,调用zmalloc_oom函数打印oom的错误信息,然后退出进程
if (!ptr) zmalloc_oom(size);
#ifdef HAVE_MALLOC_SIZE
//如果已经定义了redis_malloc_size函数,直接计算ptr的实际大小,然后更新used_memory
update_zmalloc_stat_alloc(redis_malloc_size(ptr),size);
return ptr;
#else
//否则,先在分配到的空间的第一个字长处保存住原本请求的空间大小size
//然后只能默认size+PREFIX_SIZE是已分配的大小(大概没有malloc_size算出来的靠谱),更新used_memory
//多申请的PREFIX_SIZE空间就是用来存储size的,所以当定义了redis_malloc_size函数时PREFIX_SIZE就是0,因为已经不需要存储size了
*((size_t*)ptr) = size;
update_zmalloc_stat_alloc(size+PREFIX_SIZE,size);
return (char*)ptr+PREFIX_SIZE;
#endif
}

//对calloc的封装,更新了used_memory
//调用calloc时,第一个参数固定为1,所以每次只会分配一个size+PREFIX_SIZE大小的空间
void *zcalloc(size_t size) {
void *ptr = calloc(1, size+PREFIX_SIZE);

if (!ptr) zmalloc_oom(size);
#ifdef HAVE_MALLOC_SIZE
update_zmalloc_stat_alloc(redis_malloc_size(ptr),size);
return ptr;
#else
*((size_t*)ptr) = size;
update_zmalloc_stat_alloc(size+PREFIX_SIZE,size);
return (char*)ptr+PREFIX_SIZE;
#endif
}

//对realloc的封装,重新分配内存,重置并更新了used_memory
void *zrealloc(void *ptr, size_t size) {
#ifndef HAVE_MALLOC_SIZE
void *realptr;
#endif
size_t oldsize;
void *newptr;

if (ptr == NULL) return zmalloc(size);
#ifdef HAVE_MALLOC_SIZE
oldsize = redis_malloc_size(ptr);
newptr = realloc(ptr,size);
if (!newptr) zmalloc_oom(size);

update_zmalloc_stat_free(oldsize);
update_zmalloc_stat_alloc(redis_malloc_size(newptr),size);
return newptr;
#else
realptr = (char*)ptr-PREFIX_SIZE;
oldsize = *((size_t*)realptr);
newptr = realloc(realptr,size+PREFIX_SIZE);
if (!newptr) zmalloc_oom(size);

*((size_t*)newptr) = size;
update_zmalloc_stat_free(oldsize);
update_zmalloc_stat_alloc(size,size);
return (char*)newptr+PREFIX_SIZE;
#endif
}

//对free的封装,重置了used_memory
void zfree(void *ptr) {
#ifndef HAVE_MALLOC_SIZE
void *realptr;
size_t oldsize;
#endif

if (ptr == NULL) return;
#ifdef HAVE_MALLOC_SIZE
update_zmalloc_stat_free(redis_malloc_size(ptr));
free(ptr);
#else
realptr = (char*)ptr-PREFIX_SIZE;
oldsize = *((size_t*)realptr);
update_zmalloc_stat_free(oldsize+PREFIX_SIZE);
free(realptr);
#endif
}

//对strdup的封装,复制一个字符串到新的内存空间
char *zstrdup(const char *s) {
size_t l = strlen(s)+1;
char *p = zmalloc(l);

memcpy(p,s,l);
return p;
}

//返回当前的used_memory
size_t zmalloc_used_memory(void) {
size_t um;

if (zmalloc_thread_safe) pthread_mutex_lock(&used_memory_mutex);
um = used_memory;
if (zmalloc_thread_safe) pthread_mutex_unlock(&used_memory_mutex);
return um;
}

//在vm.c中被调用,当系统支持多线程时,要保证线程安全
void zmalloc_enable_thread_safeness(void) {
zmalloc_thread_safe = 1;
}

/* Get the RSS information in an OS-specific way.
*
* WARNING: the function zmalloc_get_rss() is not designed to be fast
* and may not be called in the busy loops where Redis tries to release
* memory expiring or swapping out objects.
*
* For this kind of "fast RSS reporting" usages use instead the
* function RedisEstimateRSS() that is a much faster (and less precise)
* version of the funciton. */

//zmalloc_get_rss用于获取当前进程实际所驻留在内存中的空间大小
//rss全称是Resident Set Size,即驻留集。因为程序申请的内存空间不会全部常驻于内存,系统会把其中暂时不用的部分从内存中置换到swap区,所以rss表示的就是不包括swap区的实际驻留在内存中的空间大小
//在linux系统中,可以通过读取/proc/pid/stat文件获取,该文件的第24个字段是rss的信息,pid为当前进程的进程号。读取到的不是byte数,而是内存页数。通过系统调用sysconf(_SC_PAGESIZE)可以获得当前系统的内存页大小。
//Unix系统可以直接通过task_info直接获取rss,比linux系统简单的多。

//如果是linux的procfs文件系统,就读取/proc/pid/stat
#if defined(HAVE_PROCFS)
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>

size_t zmalloc_get_rss(void) {
//获取内存页大小
int page = sysconf(_SC_PAGESIZE);
size_t rss;
char buf[4096];
char filename[256];
int fd, count;
char *p, *x;

//snprintf的作用是把stat文件的绝对路径复制到filename
snprintf(filename,256,"/proc/%d/stat",getpid());
if ((fd = open(filename,O_RDONLY)) == -1) return 0;
//为什么只读4096个字符呢?
if (read(fd,buf,4096) <= 0) {
close(fd);
return 0;
}
close(fd);

p = buf;
//第24个字段是rss的信息,所以找到第23个空格,后面就是rss
count = 23; /* RSS is the 24th field in /proc/<pid>/stat */
while(p && count--) {
p = strchr(p,' ');
if (p) p++;
}
if (!p) return 0;
x = strchr(p,' ');
if (!x) return 0;
*x = '\0';

//把字符串转换成10进制的数
rss = strtoll(p,NULL,10);
rss *= page;
return rss;
}
//如果是unix系统,就使用task_info获取rss
#elif defined(HAVE_TASKINFO)
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/sysctl.h>
#include <mach/task.h>
#include <mach/mach_init.h>

size_t zmalloc_get_rss(void) {
task_t task = MACH_PORT_NULL;
struct task_basic_info t_info;
mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT;

if (task_for_pid(current_task(), getpid(), &task) != KERN_SUCCESS)
return 0;
task_info(task, TASK_BASIC_INFO, (task_info_t)&t_info, &t_info_count);

return t_info.resident_size;
}
#else
size_t zmalloc_get_rss(void) {
/* If we can't get the RSS in an OS-specific way for this system just
* return the memory usage we estimated in zmalloc()..
*
* Fragmentation will appear to be always 1 (no fragmentation)
* of course... */
//获取不到rss,说明当前系统就不用考虑碎片
return zmalloc_used_memory();
}
#endif

/* Fragmentation = RSS / allocated-bytes */
//获得进程的RSS后,可以计算目前的内存碎片率,直接用rss除以used_memory。rss包含进程的所有内存使用,包括代码,共享库,堆栈等。但是由于通常情况下redis在内存中数据的量要远远大于这些数据所占用的内存,因此这个简单的计算还是比较准确的。
//之所以会产生碎片,是因为malloc并不是严格按照参数的值来分配内存。比如程序只请求一个byte的内存,malloc通常会基于内存对齐等方面的考虑而分配4个byte。malloc进行小内存分配是很浪费的,浪费的空间因为用不上就不会在rss中
float zmalloc_get_fragmentation_ratio(void) {
return (float)zmalloc_get_rss()/zmalloc_used_memory();
}

2.2 动态字符串

sds.h

// SDSLib, A C dynamic strings library
//比起 C 字符串, SDS 具有以下优点:常数复杂度获取字符串长度,杜绝缓冲区溢出,减少修改字符串长度时所需的内存重分配次数,二进制安全,兼容部分 C 字符串函数
#ifndef __SDS_H
#define __SDS_H

#include <sys/types.h>
#include <stdarg.h>

typedef char *sds;

struct sdshdr {
int len; //记录buf数组中已使用字节的数量,有效字符串的长度
int free; //记录buf数组中未使用字节的数量
char buf[]; //字节数组,用于保存字符串
//C99中,结构中的最后一个元素允许是未知大小的数组,这就叫做柔性数组成员,但结构中的柔性数组成员前面必须至少一个其他成员。柔性数组成员允许结构中包含一个大小可变的数组。sizeof返回的这种结构大小不包括柔性数组的内存,所以sizeof(struct sdshdr)==8。包含柔性数组成员的结构用malloc()函数进行内存的动态分配,并且分配的内存应该大于结构的大小,以适应柔性数组的预期大小。
};

sds sdscatvprintf(sds s, const char *fmt, va_list ap);
#ifdef __GNUC__
sds sdscatprintf(sds s, const char *fmt, ...)
//如果用的gcc编译器,需要提醒编译器检查可变参数的类型或者个数是否正确
__attribute__((format(printf, 2, 3)));
#else
sds sdscatprintf(sds s, const char *fmt, ...);
#endif

#endif

sds.c

// SDSLib, A C dynamic strings library

#define SDS_ABORT_ON_OOM

#include "sds.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "zmalloc.h"

//oom错误处理函数
static void sdsOomAbort(void) {
fprintf(stderr,"SDS: Out Of Memory (SDS_ABORT_ON_OOM defined)\n");
abort();
}

//根据初始化的字符串init和给定的字符串长度initlen,创建新的sdshdr
//const void *init表示可以修改指针本身的指向,但不能修改指针指向的内容
//void * const init指的才是不能修改指针本身
//返回值类型sds定义成了char指针的别名
sds sdsnewlen(const void *init, size_t initlen) {
struct sdshdr *sh;

//buf数组不被计算在sizeof里,所以initlen要单独加上,再多分配一个字节给'\0'
sh = zmalloc(sizeof(struct sdshdr)+initlen+1);
//已经明确define过SDS_ABORT_ON_OOM了还做判断,莫名其妙
#ifdef SDS_ABORT_ON_OOM
//内存分配失败就报oom的error
if (sh == NULL) sdsOomAbort();
#else
if (sh == NULL) return NULL;
#endif
sh->len = initlen;
sh->free = 0;

//根据initlen把init复制到sh的buf数组中
if (initlen) {
if (init) memcpy(sh->buf, init, initlen);
else memset(sh->buf,0,initlen);
}
sh->buf[initlen] = '\0';
//返回的是buf数组而不是结构体
return (char*)sh->buf;
}

//生成只包含'\0'的空的sdshdr
sds sdsempty(void) {
return sdsnewlen("",0);
}

//给定字符串init并调用sdsnewlen,来创建sdshdr
sds sdsnew(const char *init) {
size_t initlen = (init == NULL) ? 0 : strlen(init);
return sdsnewlen(init, initlen);
}

//返回sdshdr结构体中len字段的值
size_t sdslen(const sds s) {
//参数s是sdshdr结构体末尾的buf数组的指针,需要重建sdshdr结构体才能得到len字段的值
//给结构体分配的内存空间是连续的,因此只需要将s指针回退一段距离就是原始结构体的头地址,回退的长度是len和free两个字段的大小,也就是sizeof(struct sdshdr)
struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
return sh->len;
}

//给定buf数组创建新的sdshdr,相当于复制原始的sdshdr
sds sdsdup(const sds s) {
return sdsnewlen(s, sdslen(s));
}

//释放sdshdr对象的空间
void sdsfree(sds s) {
if (s == NULL) return;
//s指针回退得到指向sdshdr对象头部的指针,调用zfree释放空间
zfree(s-sizeof(struct sdshdr));
}

//返回sdshdr结构体中free字段的值
size_t sdsavail(sds s) {
struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
return sh->free;
}

//根据buf数组的内容调整len和free的值
void sdsupdatelen(sds s) {
//得到结构体对象的指针
struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
//计算真实长度
int reallen = strlen(s);
//根据真实长度调整free和len
sh->free += (sh->len-reallen);
sh->len = reallen;
}

//使buf数组有足够的额外空间容纳addlen个字节的数据
//静态函数,只能本文件内调用
static sds sdsMakeRoomFor(sds s, size_t addlen) {
struct sdshdr *sh, *newsh;
size_t free = sdsavail(s);
size_t len, newlen;

//若剩余空间已经足够,不做修改直接返回
if (free >= addlen) return s;
len = sdslen(s);
sh = (void*) (s-(sizeof(struct sdshdr)));
//实际分配的数组大小是申请的两倍,减少可能的重分配次数
newlen = (len+addlen)*2;
newsh = zrealloc(sh, sizeof(struct sdshdr)+newlen+1);
#ifdef SDS_ABORT_ON_OOM
//空间不足报oom的error
//空间不足为什么不试试只申请len+addlen的空间呢?
if (newsh == NULL) sdsOomAbort();
#else
if (newsh == NULL) return NULL;
#endif

//len是不变的,扩容只是增加free
newsh->free = newlen - len;
return newsh->buf;
}

/* Grow the sds to have the specified length. Bytes that were not part of
* the original length of the sds will be set to zero. */
//将buf数组扩容到指定长度,指定len字段的值,并用0填充新空间
sds sdsgrowzero(sds s, size_t len) {
struct sdshdr *sh = (void*)(s-(sizeof(struct sdshdr)));
size_t totlen, curlen = sh->len;

//若目标长度比当前长度小,就不用了扩容了
if (len <= curlen) return s;
//实际增加的长度是(curlen+len-curlen)*2==len*2
s = sdsMakeRoomFor(s,len-curlen);
if (s == NULL) return NULL;

/* Make sure added region doesn't contain garbage */
sh = (void*)(s-(sizeof(struct sdshdr)));
//增加了2个len长度,实际只对一个len长度填充0
memset(s+curlen,0,(len-curlen+1)); /* also set trailing \0 byte */
totlen = sh->len+sh->free;
//指定len字段的值
sh->len = len;
//这个free值有什么意义?
sh->free = totlen-sh->len;
return s;
}

//将长度为len的字符串t追加到sdshdr的有效字符串末尾
sds sdscatlen(sds s, void *t, size_t len) {
struct sdshdr *sh;
size_t curlen = sdslen(s);

//先对buf数组扩容
s = sdsMakeRoomFor(s,len);
//返回NULL表示空间不足
if (s == NULL) return NULL;
sh = (void*) (s-(sizeof(struct sdshdr)));
//从有效字符串的末尾开始,将长度为len的字符串t复制到指针指向的位置
memcpy(s+curlen, t, len);
//有效字符串长度增加len
sh->len = curlen+len;
//剩余空间减少len
sh->free = sh->free-len;
s[curlen+len] = '\0';
return s;
}

//求个字符串长度而已,多此一举
sds sdscat(sds s, char *t) {
return sdscatlen(s, t, strlen(t));
}

//用长度为len的字符串t从头覆盖buf数组
sds sdscpylen(sds s, char *t, size_t len) {
struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
size_t totlen = sh->free+sh->len;

if (totlen < len) {
//若buf数组长度比要存的字符串短,先扩容,但是扩容的长度不是与buf数组总长度的差,而是与有效字符串长度的差
s = sdsMakeRoomFor(s,len-sh->len);
if (s == NULL) return NULL;
sh = (void*) (s-(sizeof(struct sdshdr)));
totlen = sh->free+sh->len;
}
//从buf数组头部开始覆盖
memcpy(s, t, len);
//标记新有效字符串的末尾
s[len] = '\0';
sh->len = len;
//后面存的是什么都无所谓了,反正都算free
sh->free = totlen-len;
return s;
}

//同样的多此一举
sds sdscpy(sds s, char *t) {
return sdscpylen(s, t, strlen(t));
}

//把ap里的所有参数格式化后拼接到buf数组s的后面
sds sdscatvprintf(sds s, const char *fmt, va_list ap) {
va_list cpy;
char *buf, *t;
//缓冲区初始长度设为16
size_t buflen = 16;

while(1) {
buf = zmalloc(buflen);
#ifdef SDS_ABORT_ON_OOM
if (buf == NULL) sdsOomAbort();
#else
if (buf == NULL) return NULL;
#endif
//缓冲区的倒数第二位设为结束符
//为什么不是最后一位?
buf[buflen-2] = '\0';
//把ap指针复制到cpy,之后回到sdscatprintf函数里ap指针还要free掉,所以这里不能直接用
va_copy(cpy,ap);
//把可变参数表格式化并输出到缓冲区
vsnprintf(buf, buflen, fmt, cpy);
//如果缓冲区的结束符被覆盖了,说明缓冲区长度不够,直接free掉,加大长度重新zmalloc
if (buf[buflen-2] != '\0') {
zfree(buf);
buflen *= 2;
continue;
}
break;
}
//把缓冲区数据追加到s末尾
t = sdscat(s, buf);
zfree(buf);
return t;
}

//根据fmt格式化参数列表,结果追加到s末尾并返回。使用了可变参数,所以要在<sds.h>中提示编译器检查可变参数
//s只是作为一个容器而已,原本的内容不会被修改
//va_开头的是<stdarg.h>中定义的结构和函数
//va_list是用于存放参数列表的结构,实际上是char*的别名,通过移动指针取参数
//va_start函数根据fmt指针来初始化参数列表ap,其实就是让ap指向可变参数表里面的第一个参数。因为fmt是紧挨着可变参数表的前一个参数,所以就让ap指向fmt后面的第一个参数
//va_end函数负责清理参数列表,因为ap是字符指针,所以最后需要释放
/* Example:
* s = sdsnew("Sum is: ");
* s = sdscatprintf(s,"%d+%d = %d",a,b,a+b)
*/
sds sdscatprintf(sds s, const char *fmt, ...) {
va_list ap;
char *t;
va_start(ap, fmt);
t = sdscatvprintf(s,fmt,ap);
va_end(ap);
return t;
}

//从s数组左右两端分别移除所有在cset字符串中出现过的字符,也就是保证s两端的两个字符不在cset中
/* Example:
* s = sdsnew("AA...AA.a.aa.aHelloWorld :::");
* s = sdstrim(s,"Aa. :"); => "Hello World"
*/
sds sdstrim(sds s, const char *cset) {
struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
char *start, *end, *sp, *ep;
size_t len;

//头标记和尾标记设为s的两端
sp = start = s;
ep = end = s+sdslen(s)-1;
//两端分别逐位判断字符是否在cset中,一旦匹配失败就退出
while(sp <= end && strchr(cset, *sp)) sp++;
while(ep > start && strchr(cset, *ep)) ep--;
len = (sp > ep) ? 0 : ((ep-sp)+1);
//用sp到ep的子串从头覆盖buf数组,因为是子串所以不用判断溢出
if (sh->buf != sp) memmove(sh->buf, sp, len);
sh->buf[len] = '\0';
sh->free = sh->free+(sh->len-len);
sh->len = len;
return s;
}

//用s中start到end的子串覆盖原始的s
/* Example:
* s = sdsnew("Hello World");
* sdsrange(s,1,-1); => "ello World"
*/
sds sdsrange(sds s, int start, int end) {
struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
size_t newlen, len = sdslen(s);

if (len == 0) return s;
//先把负下标换成正数
if (start < 0) {
start = len+start;
//负过头了就归0
if (start < 0) start = 0;
}
if (end < 0) {
end = len+end;
if (end < 0) end = 0;
}
//子串的长度
newlen = (start > end) ? 0 : (end-start)+1;
if (newlen != 0) {
//若start超出buf的有效字符串长度,则子串不存在,长度设为0
if (start >= (signed)len) {
newlen = 0;
//若end越界超出buf的有效字符串长度,则退回到有效字符串末尾
} else if (end >= (signed)len) {
end = len-1;
newlen = (start > end) ? 0 : (end-start)+1;
}
} else {
start = 0;
}
//用子串覆盖buf数组
if (start && newlen) memmove(sh->buf, sh->buf+start, newlen);
//结束符为什么不是'\0'
sh->buf[newlen] = 0;
sh->free = sh->free+(sh->len-newlen);
sh->len = newlen;
return s;
}

//字符数组转小写
void sdstolower(sds s) {
int len = sdslen(s), j;

for (j = 0; j < len; j++) s[j] = tolower(s[j]);
}

//字符数组转大写
void sdstoupper(sds s) {
int len = sdslen(s), j;

for (j = 0; j < len; j++) s[j] = toupper(s[j]);
}

//字符串比较
int sdscmp(sds s1, sds s2) {
size_t l1, l2, minlen;
int cmp;

l1 = sdslen(s1);
l2 = sdslen(s2);
minlen = (l1 < l2) ? l1 : l2;
cmp = memcmp(s1,s2,minlen);
//相等应该直接返回0,为什么多此一举非要算出个0?
if (cmp == 0) return l1-l2;
return cmp;
}

/* Split 's' with separator in 'sep'. An array
* of sds strings is returned. *count will be set
* by reference to the number of tokens returned.
*
* On out of memory, zero length string, zero length
* separator, NULL is returned.
*
* Note that 'sep' is able to split a string using
* a multi-character separator. For example
* sdssplit("foo_-_bar","_-_"); will return two
* elements "foo" and "bar".
*
* This version of the function is binary-safe but
* requires length arguments. sdssplit() is just the
* same function but for zero-terminated strings.
*/
//使用分隔符(字符串)sep分割字符串s,返回一个sds数组,同时count存放分割后子串数量,因为是指针所以能修改数值
//len是s的长度,seplen是sep的长度
sds *sdssplitlen(char *s, int len, char *sep, int seplen, int *count) {
int elements = 0, slots = 5, start = 0, j;
//slots是预设的子串数量,因为要申请空间所以要先预设,不够再扩容
//sds本身是char*,所以tokens实际上是指针数组的指针
sds *tokens = zmalloc(sizeof(sds)*slots);
#ifdef SDS_ABORT_ON_OOM
//空间不足就返回NULL
if (tokens == NULL) sdsOomAbort();
#endif
if (seplen < 1 || len < 0 || tokens == NULL) return NULL;
//s为空,则返回空的tokens,count设为0
if (len == 0) {
*count = 0;
return tokens;
}
for (j = 0; j < (len-(seplen-1)); j++) {
/* make sure there is room for the next element and the final one */
//tokens数组要有至少存放两个sds的空位,不够就扩容成两倍
//因为后面在循环体内部要存入一个sds,又因为循环下标截止到len-(seplen-1),循环结束后还会存入最后一个sds,所以要预留两个空位
if (slots < elements+2) {
sds *newtokens;

slots *= 2;
newtokens = zrealloc(tokens,sizeof(sds)*slots);
//若空间不足扩容失败,报错并释放tokens指针
if (newtokens == NULL) {
#ifdef SDS_ABORT_ON_OOM
sdsOomAbort();
#else
goto cleanup;
#endif
}
tokens = newtokens;
}
/* search the separator */
//sep只有一个字符时直接比较,有多个字符时用memcmp比较
if ((seplen == 1 && *(s+j) == sep[0]) || (memcmp(s+j,sep,seplen) == 0)) {
//构造新的sds,空间不足就退出
tokens[elements] = sdsnewlen(s+start,j-start);
if (tokens[elements] == NULL) {
#ifdef SDS_ABORT_ON_OOM
sdsOomAbort();
#else
goto cleanup;
#endif
}
//下标加一
elements++;
start = j+seplen;
j = j+seplen-1; /* skip the separator */
}
}
/* Add the final element. We are sure there is room in the tokens array. */
//存入最后一个子串
tokens[elements] = sdsnewlen(s+start,len-start);
if (tokens[elements] == NULL) {
#ifdef SDS_ABORT_ON_OOM
sdsOomAbort();
#else
goto cleanup;
#endif
}
elements++;
*count = elements;
return tokens;

#ifndef SDS_ABORT_ON_OOM
//tokens是指针数组的指针,所以tokens指针和其内部的指针元素要分别释放
cleanup:
{
int i;
for (i = 0; i < elements; i++) sdsfree(tokens[i]);
zfree(tokens);
return NULL;
}
#endif
}

//和cleanup基本重复了,如果sdssplitlen里直接用count计数而不用额外的elements计数,就能合并了
void sdsfreesplitres(sds *tokens, int count) {
if (!tokens) return;
while(count--)
sdsfree(tokens[count]);
zfree(tokens);
}

//将长整型数据转成字符串
sds sdsfromlonglong(long long value) {
//buf和p的作用重复了,没必要
char buf[32], *p;
unsigned long long v;

v = (value < 0) ? -value : value;
//其实long long int最长就20位,没必要留这么多位置
//从后往前赋值,最后p就指向了字符串头部
p = buf+31; /* point to the last character */
do {
*p-- = '0'+(v%10);
v /= 10;
} while(v);
if (value < 0) *p-- = '-';
p++;
//指定p和p的长度创建sds
return sdsnewlen(p,32-(p-buf));
}

//将长度为len的字符串p以带引号的格式追加到s的末尾,也就是添加引用字符串
sds sdscatrepr(sds s, char *p, size_t len) {
//s末尾添加'"',作为引用字符串的开头
s = sdscatlen(s,"\"",1);
while(len--) {
switch(*p) {
//把non-printable characters转换成printable characters
case '\\':
case '"':
s = sdscatprintf(s,"\\%c",*p);
break;
case '\n': s = sdscatlen(s,"\\n",1); break;
case '\r': s = sdscatlen(s,"\\r",1); break;
case '\t': s = sdscatlen(s,"\\t",1); break;
case '\a': s = sdscatlen(s,"\\a",1); break;
case '\b': s = sdscatlen(s,"\\b",1); break;
default:
//isprint判断是否为printable character
if (isprint(*p))
s = sdscatprintf(s,"%c",*p);
else
//不可打印的用两位十六进制字符串表示
s = sdscatprintf(s,"\\x%02x",(unsigned char)*p);
break;
}
p++;
}
//最后添加'"',作为引用字符串的结束
return sdscatlen(s,"\"",1);
}

/* Helper function for sdssplitargs() that returns non zero if 'c'
* is a valid hex digit. */
//判断一个给定字符是否是十六进制数
int is_hex_digit(char c) {
return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
(c >= 'A' && c <= 'F');
}

/* Helper function for sdssplitargs() that converts an hex digit into an
* integer from 0 to 15 */
//把单个十六进制字符转换为相应的十进制数字
int hex_digit_to_int(char c) {
switch(c) {
case '0': return 0;
case '1': return 1;
case '2': return 2;
case '3': return 3;
case '4': return 4;
case '5': return 5;
case '6': return 6;
case '7': return 7;
case '8': return 8;
case '9': return 9;
case 'a': case 'A': return 10;
case 'b': case 'B': return 11;
case 'c': case 'C': return 12;
case 'd': case 'D': return 13;
case 'e': case 'E': return 14;
case 'f': case 'F': return 15;
default: return 0;
}
}

/* Split a line into arguments, where every argument can be in the
* following programming-language REPL-alike form:
*
* foo bar "newline are supported\n" and "\xff\x00otherstuff"
*
* The number of arguments is stored into *argc, and an array
* of sds is returned. The caller should sdsfree() all the returned
* strings and finally zfree() the array itself.
*
* Note that sdscatrepr() is able to convert back a string into
* a quoted string in the same format sdssplitargs() is able to parse.
*/
//命令行解析,argc存放解析后的参数个数
sds *sdssplitargs(char *line, int *argc) {
char *p = line;
char *current = NULL;
char **vector = NULL;

*argc = 0;
while(1) {
/* skip blanks */
while(*p && isspace(*p)) p++;
if (*p) {
/* get a token */
int inq=0; /* set to 1 if we are in "quotes" */
int done=0;

//后面已经把current重置成NULL了,这个判断多此一举
if (current == NULL) current = sdsempty();
while(!done) {
//双引号内的解析
//在双引号内部不关心分隔符,读到右双引号才算读完一个参数
if (inq) {
//读到一个以'\x'开头的表示两位十六进制整数的字符串
if (*p == '\\' && *(p+1) == 'x' &&
is_hex_digit(*(p+2)) &&
is_hex_digit(*(p+3)))
{
unsigned char byte;
//转换成十进制格式的字符串
byte = (hex_digit_to_int(*(p+2))*16)+
hex_digit_to_int(*(p+3));
//追加到current尾部
current = sdscatlen(current,(char*)&byte,1);
//跳过这个十六进制的数,接着读下个字符
p += 3;
} else if (*p == '\\' && *(p+1)) {
//处理'\'开头的特殊字符
char c;

p++;
switch(*p) {
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
case 'b': c = '\b'; break;
case 'a': c = '\a'; break;
default: c = *p; break;
}
//特殊字符也追加到current尾部,因为都在一个双引号的引用内
current = sdscatlen(current,&c,1);
} else if (*p == '"') {
/* closing quote must be followed by a space */
//读到右双引号表示读取完毕,跳出循环,但是与下个参数的分隔符必须是空格,否则报错
if (*(p+1) && !isspace(*(p+1))) goto err;
done=1;
} else if (!*p) {
/* unterminated quotes */
//没读到右双引号就读完了,说明命令行参数写错了,报error
goto err;
} else {
//剩下的情况就是一般字符,直接追加到current尾部
current = sdscatlen(current,p,1);
}
//双引号外的解析
} else {
switch(*p) {
//读到分割符,done=1表示当前参数解析完毕
case ' ':
case '\n':
case '\r':
case '\t':
case '\0':
done=1;
break;
//读到双引号
case '"':
inq=1;
break;
default:
//如果是一般字符,就追加到current尾部,跳出循环开始读下个字符
current = sdscatlen(current,p,1);
break;
}
}
//没有break就接着读下个字符
if (*p) p++;
}
/* add the token to the vector */
//事先不知道能解析出多少个参数,只能读到一个就扩容一个空位
vector = zrealloc(vector,((*argc)+1)*sizeof(char*));
//读出的参数先是保存在current中,再存入vector中
vector[*argc] = current;
(*argc)++;
current = NULL;
} else {
//返回解析出的参数列表
return vector;
}
}

err:
//和上面的tokens一样,指针数组的指针要内外分别释放
while((*argc)--)
sdsfree(vector[*argc]);
zfree(vector);
if (current) sdsfree(current);
return NULL;
}

2.3 双端链表

adlist.h

// adlist.h - A generic doubly linked list implementation

#ifndef __ADLIST_H__
#define __ADLIST_H__

/* Node, List, and Iterator are the only data structures used currently. */

//链表节点
typedef struct listNode {
//指向前一个节点
struct listNode *prev;
//指向后一个节点
struct listNode *next;
//value是void类型,表示链表可以保存各种不同类型的值,相当于多态
void *value;
} listNode;

//用于访问链表的迭代器,双向链表自然支持双向迭代
//迭代器并不保存链表,只是标记链表的一个节点
typedef struct listIter {
//下个待访问的节点
listNode *next;
//迭代访问的方向,AL_START_HEAD表示向前,AL_START_TAIL表示向后,在后面的宏定义里
int direction;
} listIter;

//双向链表
typedef struct list {
//指向头结点
listNode *head;
//指向尾节点
listNode *tail;
//提供了三个函数指针, 供用户传入自定义函数
//dup用于复制节点所保存的值,free用于释放节点所保存的值(因为节点值是void指针),match用于匹配节点所保存的值
void *(*dup)(void *ptr);
void (*free)(void *ptr);
int (*match)(void *ptr, void *key);
//链表长度
unsigned int len;
} list;

/* Functions implemented as macros */
//把指针的操作封装成了宏的类型,方便了程序员的使用
//l表示list指针,n表示listNode指针,m表示函数
#define listLength(l) ((l)->len)
#define listFirst(l) ((l)->head)
#define listLast(l) ((l)->tail)
#define listPrevNode(n) ((n)->prev)
#define listNextNode(n) ((n)->next)
#define listNodeValue(n) ((n)->value)

#define listSetDupMethod(l,m) ((l)->dup = (m))
#define listSetFreeMethod(l,m) ((l)->free = (m))
#define listSetMatchMethod(l,m) ((l)->match = (m))

#define listGetDupMethod(l) ((l)->dup)
#define listGetFree(l) ((l)->free)
#define listGetMatchMethod(l) ((l)->match)

/* Directions for iterators */
//迭代链表的方向
#define AL_START_HEAD 0
#define AL_START_TAIL 1

#endif /* __ADLIST_H__ */

adlist.c

// adlist.c - A generic doubly linked list implementation

#include <stdlib.h>
#include "adlist.h"
#include "zmalloc.h"

/* Create a new list. The created list can be freed with
* AlFreeList(), but private value of every node need to be freed
* by the user before to call AlFreeList().
*
* On error, NULL is returned. Otherwise the pointer to the new list. */
//创建链表
//空参数实际上表示函数需要不确定个数的参数,比如main(),而void参数才是明确告诉编译器函数不需要参数
list *listCreate(void)
{
struct list *list;
//空间不足就退出
if ((list = zmalloc(sizeof(*list))) == NULL)
return NULL;
//初始化
list->head = list->tail = NULL;
list->len = 0;
list->dup = NULL;
list->free = NULL;
list->match = NULL;
//返回链表指针
return list;
}

/* Free the whole list.
*
* This function can't fail. */
//释放链表
//先释放listNode的value指针,再释放listNode指针,释放完所有节点后再释放list指针
void listRelease(list *list)
{
unsigned int len;
listNode *current, *next;

current = list->head;
len = list->len;
while(len--) {
next = current->next;
if (list->free) list->free(current->value);
zfree(current);
current = next;
}
zfree(list);
}

/* Add a new node to the list, to head, contaning the specified 'value'
* pointer as value.
*
* On error, NULL is returned and no operation is performed (i.e. the
* list remains unaltered).
* On success the 'list' pointer you pass to the function is returned. */
//给定一个值value和链表list,根据value构造新节点添加到list的表头
list *listAddNodeHead(list *list, void *value)
{
listNode *node;
//申请空节点
if ((node = zmalloc(sizeof(*node))) == NULL)
return NULL;
//给节点赋值
node->value = value;
//设置链表的头尾指针和新节点的前后指针
if (list->len == 0) {
list->head = list->tail = node;
node->prev = node->next = NULL;
} else {
node->prev = NULL;
node->next = list->head;
list->head->prev = node;
list->head = node;
}
//更新链表长度
list->len++;
return list;
}

/* Add a new node to the list, to tail, contaning the specified 'value'
* pointer as value.
*
* On error, NULL is returned and no operation is performed (i.e. the
* list remains unaltered).
* On success the 'list' pointer you pass to the function is returned. */
//给定一个值value和链表list,根据value构造新节点添加到list的尾部
list *listAddNodeTail(list *list, void *value)
{
listNode *node;

if ((node = zmalloc(sizeof(*node))) == NULL)
return NULL;
node->value = value;
if (list->len == 0) {
list->head = list->tail = node;
node->prev = node->next = NULL;
} else {
node->prev = list->tail;
node->next = NULL;
list->tail->next = node;
list->tail = node;
}
list->len++;
return list;
}

//将一个包含给定值value的新节点添加到给定节点old_node的之前或者之后
list *listInsertNode(list *list, listNode *old_node, void *value, int after) {
listNode *node;

if ((node = zmalloc(sizeof(*node))) == NULL)
return NULL;
node->value = value;
//after==0表示新节点添加到old_node前面,否则就是添加到后面
if (after) {
node->prev = old_node;
node->next = old_node->next;
if (list->tail == old_node) {
list->tail = node;
}
} else {
node->next = old_node;
node->prev = old_node->prev;
if (list->head == old_node) {
list->head = node;
}
}
//让新节点的前后节点指向该新节点
if (node->prev != NULL) {
node->prev->next = node;
}
if (node->next != NULL) {
node->next->prev = node;
}
list->len++;
return list;
}

/* Remove the specified node from the specified list.
* It's up to the caller to free the private value of the node.
*
* This function can't fail. */
//从链表中删除给定节点
void listDelNode(list *list, listNode *node)
{
if (node->prev)
node->prev->next = node->next;
else
list->head = node->next;
if (node->next)
node->next->prev = node->prev;
else
list->tail = node->prev;
if (list->free) list->free(node->value);
zfree(node);
list->len--;
}

/* Returns a list iterator 'iter'. After the initialization every
* call to listNext() will return the next element of the list.
*
* This function can't fail. */
//指定迭代方向,获取链表的迭代器
listIter *listGetIterator(list *list, int direction)
{
listIter *iter;

if ((iter = zmalloc(sizeof(*iter))) == NULL) return NULL;
if (direction == AL_START_HEAD)
//正向迭代从头结点开始
iter->next = list->head;
else
//反向迭代从尾结点开始
iter->next = list->tail;
iter->direction = direction;
return iter;
}

/* Release the iterator memory */
//释放迭代器,就是给zfree指定了参数类型,多此一举
void listReleaseIterator(listIter *iter) {
zfree(iter);
}

/* Create an iterator in the list private iterator structure */
//重置迭代器li,起点设为链表头结点,方向设为自前向后
void listRewind(list *list, listIter *li) {
li->next = list->head;
li->direction = AL_START_HEAD;
}

//重置迭代器li,起点设为链表尾结点,方向设为自后向前
void listRewindTail(list *list, listIter *li) {
li->next = list->tail;
li->direction = AL_START_TAIL;
}

/* Return the next element of an iterator.
* It's valid to remove the currently returned element using
* listDelNode(), but not to remove other elements.
*
* The function returns a pointer to the next element of the list,
* or NULL if there are no more elements, so the classical usage patter
* is:
*
* iter = listGetIterator(list,<direction>);
* while ((node = listNext(iter)) != NULL) {
* doSomethingWith(listNodeValue(node));
* }
*
* */
//返回迭代器指向的下一个节点,然后根据迭代方向更新迭代器指向的节点
listNode *listNext(listIter *iter)
{
listNode *current = iter->next;

if (current != NULL) {
if (iter->direction == AL_START_HEAD)
iter->next = current->next;
else
iter->next = current->prev;
}
return current;
}

/* Duplicate the whole list. On out of memory NULL is returned.
* On success a copy of the original list is returned.
*
* The 'Dup' method set with listSetDupMethod() function is used
* to copy the node value. Otherwise the same pointer value of
* the original node is used as value of the copied node.
*
* The original list both on success or error is never modified. */
//复制整个链表orig,返回新副本链表的指针
list *listDup(list *orig)
{
list *copy;
listIter *iter;
listNode *node;

if ((copy = listCreate()) == NULL)
return NULL;
//先复制orig的三个方法
copy->dup = orig->dup;
copy->free = orig->free;
copy->match = orig->match;
//获取orig的迭代器,逐个节点进行复制
iter = listGetIterator(orig, AL_START_HEAD);
while((node = listNext(iter)) != NULL) {
void *value;

if (copy->dup) {
value = copy->dup(node->value);
if (value == NULL) {
//复制出错,清理迭代器和copy并退出
listRelease(copy);
listReleaseIterator(iter);
return NULL;
}
} else
value = node->value;
//根据value构造新节点添加到copy末尾
if (listAddNodeTail(copy, value) == NULL) {
listRelease(copy);
listReleaseIterator(iter);
return NULL;
}
}
//迭代器用完就释放
listReleaseIterator(iter);
return copy;
}

/* Search the list for a node matching a given key.
* The match is performed using the 'match' method
* set with listSetMatchMethod(). If no 'match' method
* is set, the 'value' pointer of every node is directly
* compared with the 'key' pointer.
*
* On success the first matching node pointer is returned
* (search starts from head). If no matching node exists
* NULL is returned. */
//获取链表中节点值等于给定key的节点
listNode *listSearchKey(list *list, void *key)
{
listIter *iter;
listNode *node;
//利用迭代器遍历
iter = listGetIterator(list, AL_START_HEAD);
while((node = listNext(iter)) != NULL) {
if (list->match) {
if (list->match(node->value, key)) {
listReleaseIterator(iter);
return node;
}
} else {
if (key == node->value) {
listReleaseIterator(iter);
return node;
}
}
}
listReleaseIterator(iter);
return NULL;
}

/* Return the element at the specified zero-based index
* where 0 is the head, 1 is the element next to head
* and so on. Negative integers are used in order to count
* from the tail, -1 is the last element, -2 the penultimante
* and so on. If the index is out of range NULL is returned. */
//返回链表在给定索引上的节点
listNode *listIndex(list *list, int index) {
listNode *n;
//支持负索引
if (index < 0) {
index = (-index)-1;
//负索引是从尾节点倒着数
n = list->tail;
while(index-- && n) n = n->prev;
} else {
//正索引是从头结点正着数
n = list->head;
while(index-- && n) n = n->next;
}
return n;
}

2.4 字典

dict.h

// Hash Tables Implementation.

#ifndef __DICT_H
#define __DICT_H

//定义的状态码,DICT_OK表示对字典的操作成功,DICT_ERR表示操作失败
#define DICT_OK 0
#define DICT_ERR 1

/* Unused arguments generate annoying warnings... */
//没用到
#define DICT_NOTUSED(V) ((void) V)

//字典表项,一个key-value对
//因为采用拉链法处理哈希碰撞,所以需要一个指针所在链表的下一个节点
typedef struct dictEntry {
void *key;
void *val;
struct dictEntry *next;
} dictEntry;

//字典需要的一组函数
typedef struct dictType {
//计算哈希值的函数
unsigned int (*hashFunction)(const void *key);
//复制键的函数
void *(*keyDup)(void *privdata, const void *key);
//复制值的函数
void *(*valDup)(void *privdata, const void *obj);
//对比键的函数
int (*keyCompare)(void *privdata, const void *key1, const void *key2);
//销毁键的函数
void (*keyDestructor)(void *privdata, void *key);
//销毁值的函数
void (*valDestructor)(void *privdata, void *obj);
} dictType;

/* This is our hash table structure. Every dictionary has two of this as we
* implement incremental rehashing, for the old to the new table. */
//字典使用的哈希表结构
typedef struct dictht {
//哈希表数组,数组中的每个元素都是一个指向某个dictEntry的指针
//也就是拉链法需要的链表的链表,第一层链表(桶链表)的每个元素是第二层链表的头节点,第二层的链表(节点链表)存储哈希值相同的dictEntry元素
dictEntry **table;
//table的大小,也就是桶的数量
unsigned long size;
//哈希表大小掩码,用于计算索引值。sizemask=size-1,给定dictEntry节点的key的哈希值计算出来后,与sizemask进行按位与操作,决定该节点应该被放在桶链表的哪个桶里
unsigned long sizemask;
//哈希表已有dictEntry节点的数量
unsigned long used;
} dictht;

//字典
typedef struct dict {
//为字典设置的针对不同数据类型的特定函数簇
dictType *type;
//私有数据,保存了需要传给type中特定函数的可选参数
void *privdata;
//一个字典使用了两个哈希表,用户使用的是0号哈希表,1号哈希表用于对0号哈希表进行rehash
//rehash的目的是提高哈希表的查找效率。因为要映射哈希值的缘故,桶链表的长度在创建以后就不能改了,随着节点的增多,哈希表的负载因子会越来越大(负载因子=总节点数/桶链表长度),表现为桶不够用了,使得节点链表过长,查询操作会在节点链表上浪费时间。所以rehash就是新建一个更长的桶链表,把节点疏散开。反之,如果节点数过少,rehash的过程就是新建一个更短的桶链表,不让节点分布太疏散。rehash完成后,就用1号哈希表替换0号哈希表,所以1号哈希表只是辅助rehash的,用户无需访问。
dictht ht[2];
//rehash标示,为-1表示不在rehash,不为0表示正在rehash的桶序号
int rehashidx; /* rehashing not in progress if rehashidx == -1 */
//当前正在运行的安全迭代器数量
//存在安全迭代器就不会进行rehash,也就不会有节点被偷偷迁移到1号哈希表,保证了迭代的准确性
int iterators; /* number of iterators currently running */
} dict;

/* If safe is set to 1 this is a safe iteartor, that means, you can call
* dictAdd, dictFind, and other functions against the dictionary even while
* iterating. Otherwise it is a non safe iterator, and only dictNext()
* should be called while iterating. */
//字典的迭代器
typedef struct dictIterator {
//被迭代的字典
dict *d;
//table是当前正在迭代的哈希表序号,取值为0或1
//index是迭代器当前所指向的桶索引位置
//safe标识此迭代器是否安全。safe=1表示安全,迭代时可以对节点增删改查,否则就是不安全的,只能迭代哈希表而不能修改
int table, index, safe;
//entry是当前迭代到的节点的指针
//nextEntry是当前迭代节点的下一个节点。因为在迭代时entry指针可能会被修改,所以要单独保存下个节点的地址,防止丢失链接
dictEntry *entry, *nextEntry;
} dictIterator;

/* This is the initial size of every hash table */
//哈希表的初始长度,即桶的数量
#define DICT_HT_INITIAL_SIZE 4

/* ------------------------------- Macros ------------------------------------*/
//宏定义函数,d表示字典dict,entry表示节点dictEntry

//释放节点的val指针
#define dictFreeEntryVal(d, entry) \
if ((d)->type->valDestructor) \
(d)->type->valDestructor((d)->privdata, (entry)->val)

//设置节点的val
#define dictSetHashVal(d, entry, _val_) do { \
if ((d)->type->valDup) \
//如果定义了复制值的函数,就调用该函数完成
entry->val = (d)->type->valDup((d)->privdata, _val_); \
else \
entry->val = (_val_); \
} while(0)

//释放节点的key指针
#define dictFreeEntryKey(d, entry) \
if ((d)->type->keyDestructor) \
(d)->type->keyDestructor((d)->privdata, (entry)->key)

//设置节点的key
#define dictSetHashKey(d, entry, _key_) do { \
if ((d)->type->keyDup) \
entry->key = (d)->type->keyDup((d)->privdata, _key_); \
else \
entry->key = (_key_); \
} while(0)

//比较两个key
#define dictCompareHashKeys(d, key1, key2) \
(((d)->type->keyCompare) ? \
(d)->type->keyCompare((d)->privdata, key1, key2) : \
(key1) == (key2))

//计算key的哈希值
#define dictHashKey(d, key) (d)->type->hashFunction(key)

//获取节点的key和val
#define dictGetEntryKey(he) ((he)->key)
#define dictGetEntryVal(he) ((he)->val)
//计算字典中两个哈希表的总大小(桶的数量)
#define dictSlots(d) ((d)->ht[0].size+(d)->ht[1].size)
//计算字典中两个哈希表的总节点数
#define dictSize(d) ((d)->ht[0].used+(d)->ht[1].used)
//是否正在进行rehash
#define dictIsRehashing(ht) ((ht)->rehashidx != -1)

/* Hash table types */
//作者的样例代码用到的,实际没啥用
extern dictType dictTypeHeapStringCopyKey;
extern dictType dictTypeHeapStrings;
extern dictType dictTypeHeapStringCopyKeyValue;

#endif /* __DICT_H */

dict.c

/* Hash Tables Implementation.*/

#include "fmacros.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <assert.h>
#include <limits.h>
#include <sys/time.h>
#include <ctype.h>

#include "dict.h"
#include "zmalloc.h"

/* Using dictEnableResize() / dictDisableResize() we make possible to
* enable/disable resizing of the hash table as needed. This is very important
* for Redis, as we use copy-on-write and don't want to move too much memory
* around when there is a child performing saving operations.
*
* Note that even when dict_can_resize is set to 0, not all resizes are
* prevented: an hash table is still allowed to grow if the ratio between
* the number of elements and the buckets > dict_force_resize_ratio. */
static int dict_can_resize = 1;
static unsigned int dict_force_resize_ratio = 5;

/* -------------------------- private prototypes ---------------------------- */

static int _dictExpandIfNeeded(dict *ht);
static unsigned long _dictNextPower(unsigned long size);
static int _dictKeyIndex(dict *ht, const void *key);
static int _dictInit(dict *ht, dictType *type, void *privDataPtr);

/* -------------------------- hash functions -------------------------------- */

/* Thomas Wang's 32 bit Mix Function */
//针对整型的哈希函数,把整型key转换成对应的哈希值,作为哈希表的键值
unsigned int dictIntHashFunction(unsigned int key)
{
key += ~(key << 15);
key ^= (key >> 10);
key += (key << 3);
key ^= (key >> 6);
key += ~(key << 11);
key ^= (key >> 16);
return key;
}

/* Identity hash function for integer keys */
//不使用哈希函数,直接把整型作为哈希表的key
unsigned int dictIdentityHashFunction(unsigned int key)
{
return key;
}

/* Generic hash function (a popular one from Bernstein).
* I tested a few and this was the best. */
//djb哈希算法,一种通用的哈希函数,计算字符串buf的哈希值
unsigned int dictGenHashFunction(const unsigned char *buf, int len) {
//hash seed
unsigned int hash = 5381;

while (len--)
//字符串的ascii值与hash seed做运算
hash = ((hash << 5) + hash) + (*buf++); /* hash * 33 + c */
return hash;
}

/* And a case insensitive version */
//大小写无关的djb哈希算法
unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len) {
unsigned int hash = 5381;

while (len--)
hash = ((hash << 5) + hash) + (tolower(*buf++)); /* hash * 33 + c */
return hash;
}

/* ----------------------------- API implementation ------------------------- */

/* Reset an hashtable already initialized with ht_init().
* NOTE: This function should only called by ht_destroy(). */
//初始化或重置哈希表
static void _dictReset(dictht *ht)
{
ht->table = NULL;
ht->size = 0;
ht->sizemask = 0;
ht->used = 0;
}

/* Create a new hash table */
//创建新的空字典
dict *dictCreate(dictType *type,
void *privDataPtr)
{
dict *d = zmalloc(sizeof(*d));
//申请到空间后做初始化
_dictInit(d,type,privDataPtr);
//返回字典指针
return d;
}

/* Initialize the hash table */
//字典的初始化函数
int _dictInit(dict *d, dictType *type,
void *privDataPtr)
{
//重置字典的两个哈希表
_dictReset(&d->ht[0]);
_dictReset(&d->ht[1]);
//初始化赋值
d->type = type;
d->privdata = privDataPtr;
d->rehashidx = -1;
d->iterators = 0;
//返回成功的状态码
return DICT_OK;
}

/* Resize the table to the minimal size that contains all the elements,
* but with the invariant of a USER/BUCKETS ratio near to <= 1 */
//调整字典d中哈希表的size,保证每个节点占一个单独的桶,但长度可能有冗余,因为哈希表长度必须是DICT_HT_INITIAL_SIZE乘以2的幂次
int dictResize(dict *d)
{
int minimal;
//如果不支持resize,或者字典正在rehash,返回错误码
if (!dict_can_resize || dictIsRehashing(d)) return DICT_ERR;
//桶的数量定为0号哈希表的总节点数,但不能少于初始化的DICT_HT_INITIAL_SIZE
minimal = d->ht[0].used;
if (minimal < DICT_HT_INITIAL_SIZE)
minimal = DICT_HT_INITIAL_SIZE;
//调用dictExpand对哈希表扩容
return dictExpand(d, minimal);
}

/* Expand or create the hashtable */
//哈希表扩容函数
int dictExpand(dict *d, unsigned long size)
{
dictht n; /* the new hashtable */
//计算实际的扩容长度,结果是DICT_HT_INITIAL_SIZE乘以2的幂次
unsigned long realsize = _dictNextPower(size);

/* the size is invalid if it is smaller than the number of
* elements already inside the hashtable */
//若字典正在rehash,需要等待rehash完成再扩容
//若节点数大于扩容的长度,说明出了问题,具体什么问题?
if (dictIsRehashing(d) || d->ht[0].used > size)
return DICT_ERR;

/* Allocate the new hashtable and initialize all pointers to NULL */
//初始化扩容的哈希表
n.size = realsize;
n.sizemask = realsize-1;
n.table = zcalloc(realsize*sizeof(dictEntry*));
n.used = 0;

/* Is this the first initialization? If so it's not really a rehashing
* we just set the first hash table so that it can accept keys. */
if (d->ht[0].table == NULL) {
//如果当前字典的0号哈希表是空,就把新哈希表赋给0号,可以直接使用
d->ht[0] = n;
return DICT_OK;
}

/* Prepare a second hash table for incremental rehashing */
//如果当前字典有正在使用的0号哈希表,就把新哈希表先赋给1号,之后rehash的时候再赋给0号
d->ht[1] = n;
d->rehashidx = 0;
return DICT_OK;
}

/* Performs N steps of incremental rehashing. Returns 1 if there are still
* keys to move from the old to the new hash table, otherwise 0 is returned.
* Note that a rehashing step consists in moving a bucket (that may have more
* thank one key as we use chaining) from the old to the new hash table. */
//字典的rehash函数,采用分n步渐进式的rehash,因为当前的字典可能非常庞大,如果一次性把0号表的节点全部迁移到1号表,可能会占用大量时间和资源,影响系统性能,所以redis实际上是把rehash操作平摊到 dictAddRaw 、dictGetRandomKey 、dictFind 、dictGenericDelete 这些函数里,每当这些函数被执行的时候, 就会顺便执行_dictRehashStep函数,_dictRehashStep再调用dictRehash来迁移部分节点。此外还有dictRehashMilliseconds函数,支持在给定的时间段内集中进行rehash。
//参数n表示本次要迁移的桶的数量
//返回1说明rehash还没完,继续下一步,返回0就表示rehash已经完成
int dictRehash(dict *d, int n) {
//查询字典的rehashidx标志,如果表示已经rehash完毕,返回0
if (!dictIsRehashing(d)) return 0;
//循环迁移n个桶中的节点
while(n--) {
//辅助指针,用于迭代当前桶中的节点链表
dictEntry *de, *nextde;

/* Check if we already rehashed the whole table... */
//0号表节点数为0说明已经全部迁移完成,此时用1号表替换0号表,然后重置1号表,修改rehashidx标志,返回0表示rehash完毕
if (d->ht[0].used == 0) {
zfree(d->ht[0].table);
d->ht[0] = d->ht[1];
_dictReset(&d->ht[1]);
d->rehashidx = -1;
return 0;
}

/* Note that rehashidx can't overflow as we are sure there are more
* elements because ht[0].used != 0 */
//当前的桶迁移完毕后开始迁移下一个桶
//这里桶的索引不会越界,因为越界说明0号表已经全部清空了,这在前面已经判断过了
while(d->ht[0].table[d->rehashidx] == NULL) d->rehashidx++;
//把当前桶中节点链表的头节点赋给de
de = d->ht[0].table[d->rehashidx];
/* Move all the keys in this bucket from the old to the new hash HT */
//把当前桶中的所有节点迁移到1号表
while(de) {
unsigned int h;

nextde = de->next;
/* Get the index in the new hash table */
//用字典的type函数簇里的哈希函数计算当前节点key的哈希值,与1号表的掩码按位与,得到在1号表中的桶索引
h = dictHashKey(d, de->key) & d->ht[1].sizemask;
//节点总是被插入到1号表的节点链表的表头
de->next = d->ht[1].table[h];
d->ht[1].table[h] = de;
//更新两个表的节点数
d->ht[0].used--;
d->ht[1].used++;
de = nextde;
}
//因为迁移过程是用辅助指针做的,所以最后还要把0号表当前桶中的链表指针设为NULL,表示桶已经清空
d->ht[0].table[d->rehashidx] = NULL;
d->rehashidx++;
}
//是否rehash完毕要在函数头部检查,此时无脑返回1即可
return 1;
}

//获取当前的毫秒时间
long long timeInMilliseconds(void) {
struct timeval tv;

gettimeofday(&tv,NULL);
return (((long long)tv.tv_sec)*1000)+(tv.tv_usec/1000);
}

/* Rehash for an amount of time between ms milliseconds and ms+1 milliseconds */
//在给定的毫秒时间段内集中进行rehash
int dictRehashMilliseconds(dict *d, int ms) {
long long start = timeInMilliseconds();
int rehashes = 0;
//每次迁移100个桶
while(dictRehash(d,100)) {
rehashes += 100;
//时间到了就中止rehash
if (timeInMilliseconds()-start > ms) break;
}
//返回桶索引
return rehashes;
}

/* This function performs just a step of rehashing, and only if there are
* no safe iterators bound to our hash table. When we have iterators in the
* middle of a rehashing we can't mess with the two hash tables otherwise
* some element can be missed or duplicated.
*
* This function is called by common lookup or update operations in the
* dictionary so that the hash table automatically migrates from H1 to H2
* while it is actively used. */
//如果当前字典没有迭代器,执行一步rehash,迁移一个桶的节点
static void _dictRehashStep(dict *d) {
if (d->iterators == 0) dictRehash(d,1);
}

/* Add an element to the target hash table */
//向字典插入dictEntry节点
int dictAdd(dict *d, void *key, void *val)
{
int index;
dictEntry *entry;
dictht *ht;
//如果正在rehash,顺便执行一步
if (dictIsRehashing(d)) _dictRehashStep(d);

/* Get the index of the new element, or -1 if
* the element already exists. */
//根据节点的key计算应该放入的桶序号。哈希碰撞指的是索引冲突
//当key已经在字典中,插入失败,返回错误码
if ((index = _dictKeyIndex(d, key)) == -1)
return DICT_ERR;

/* Allocates the memory and stores key */
//如果正在rehash,就往1号表里插,否则直接往0号表插
ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
entry = zmalloc(sizeof(*entry));
//插入到节点链表的头部
entry->next = ht->table[index];
ht->table[index] = entry;
ht->used++;

/* Set the hash entry fields. */
//把key和val赋给新节点
dictSetHashKey(d, entry, key);
dictSetHashVal(d, entry, val);
return DICT_OK;
}

/* Add an element, discarding the old if the key already exists.
* Return 1 if the key was added from scratch, 0 if there was already an
* element with such key and dictReplace() just performed a value update
* operation. */
//向字典插入dictEntry节点,若key已经存在就更新val
int dictReplace(dict *d, void *key, void *val)
{
dictEntry *entry, auxentry;

/* Try to add the element. If the key
* does not exists dictAdd will suceed. */
//能插就插
if (dictAdd(d, key, val) == DICT_OK)
return 1;
/* It already exists, get the entry */
//插入失败说明key已经存在,获取对应的节点
entry = dictFind(d, key);
/* Free the old value and set the new one */
/* Set the new value and free the old one. Note that it is important
* to do that in this order, as the value may just be exactly the same
* as the previous one. In this context, think to reference counting,
* you want to increment (set), and then decrement (free), and not the
* reverse. */
//更新节点的val,释放旧的val指针
auxentry = *entry;
dictSetHashVal(d, entry, val);
dictFreeEntryVal(d, &auxentry);
return 0;
}

/* Search and remove an element */
//删除字典中给定key的结点,可控制是否调用释放方法
//节点指针一定会被释放,nofree参数用于指定是否要释放节点中key和val的指针
static int dictGenericDelete(dict *d, const void *key, int nofree)
{
unsigned int h, idx;
dictEntry *he, *prevHe;
int table;
//如果字典的0号表是NULL,返回错误码。空的表size一般不是0,没初始化的表才是NULL
if (d->ht[0].size == 0) return DICT_ERR; /* d->ht[0].table is NULL */
//如果正在rehash,顺便执行一步
if (dictIsRehashing(d)) _dictRehashStep(d);
//用字典的type函数簇里的哈希函数计算节点key的哈希值
h = dictHashKey(d, key);
//不知道节点在哪个表,所以都查一遍
for (table = 0; table <= 1; table++) {
//key的哈希值与表的掩码按位与,得到桶索引
idx = h & d->ht[table].sizemask;
//得到节点链表的头节点
he = d->ht[table].table[idx];
//保存前一个节点,删除节点后用于重连
prevHe = NULL;
while(he) {
//比较两个key,如果type函数簇中有自定义的比较函数,调用之
if (dictCompareHashKeys(d, key, he->key)) {
/* Unlink the element from the list */
//断开目标节点的链接
if (prevHe)
prevHe->next = he->next;
else
d->ht[table].table[idx] = he->next;
if (!nofree) {
dictFreeEntryKey(d, he);
dictFreeEntryVal(d, he);
}
//释放节点指针
zfree(he);
d->ht[table].used--;
return DICT_OK;
}
prevHe = he;
he = he->next;
}
//遍历完0号表,如果当前没有在rehash,就不用再查1号表了,因为肯定是空的
if (!dictIsRehashing(d)) break;
}
return DICT_ERR; /* not found */
}

//删除节点,同时释放节点的key和val指针
int dictDelete(dict *ht, const void *key) {
return dictGenericDelete(ht,key,0);
}

//删除节点,但不释放节点的key和val指针
int dictDeleteNoFree(dict *ht, const void *key) {
return dictGenericDelete(ht,key,1);
}

/* Destroy an entire dictionary */
//清空字典中指定的哈希表
int _dictClear(dict *d, dictht *ht)
{
unsigned long i;

/* Free all the elements */
//遍历指定表ht的桶
for (i = 0; i < ht->size && ht->used > 0; i++) {
dictEntry *he, *nextHe;

if ((he = ht->table[i]) == NULL) continue;
//逐个节点进行free
while(he) {
nextHe = he->next;
dictFreeEntryKey(d, he);
dictFreeEntryVal(d, he);
zfree(he);
ht->used--;
he = nextHe;
}
}
/* Free the table and the allocated cache structure */
//所有桶都清空后,释放哈希表的指针
zfree(ht->table);
/* Re-initialize the table */
//把ht重置成NULL
_dictReset(ht);
return DICT_OK; /* never fails */
}

/* Clear & Release the hash table */
//清空整个字典,也就是先分别清空两个哈希表,再释放字典指针
void dictRelease(dict *d)
{
_dictClear(d,&d->ht[0]);
_dictClear(d,&d->ht[1]);
zfree(d);
}

//获取字典中指定key的节点
dictEntry *dictFind(dict *d, const void *key)
{
dictEntry *he;
unsigned int h, idx, table;
//0号表是NULL,说明整个字典都是空的
if (d->ht[0].size == 0) return NULL; /* We don't have a table at all */
//如果正在rehash,顺便执行一步
if (dictIsRehashing(d)) _dictRehashStep(d);
//计算key的哈希值
h = dictHashKey(d, key);
//遍历两个哈希表
for (table = 0; table <= 1; table++) {
//计算桶索引
idx = h & d->ht[table].sizemask;
//获取节点链表的表头
he = d->ht[table].table[idx];
//遍历节点链表,找到就返回
while(he) {
if (dictCompareHashKeys(d, key, he->key))
return he;
he = he->next;
}
//如果当前没有在rehash,不用再查1号表
if (!dictIsRehashing(d)) return NULL;
}
return NULL;
}

//获取指定key节点的val指针
void *dictFetchValue(dict *d, const void *key) {
dictEntry *he;

he = dictFind(d,key);
return he ? dictGetEntryVal(he) : NULL;
}

//获取迭代器,默认迭代0号表(迭代完会自动切换到1号表),不安全,只设置字典而不设置节点指针
dictIterator *dictGetIterator(dict *d)
{
dictIterator *iter = zmalloc(sizeof(*iter));

iter->d = d;
iter->table = 0;
iter->index = -1;
iter->safe = 0;
iter->entry = NULL;
iter->nextEntry = NULL;
return iter;
}

//获取安全迭代器
dictIterator *dictGetSafeIterator(dict *d) {
dictIterator *i = dictGetIterator(d);

i->safe = 1;
return i;
}

//迭代器获取下一个节点
dictEntry *dictNext(dictIterator *iter)
{
while (1) {
if (iter->entry == NULL) {
//如果没设置节点指针,先获取迭代的表
dictht *ht = &iter->d->ht[iter->table];
if (iter->safe && iter->index == -1 && iter->table == 0)
//条件里有safe,所以字典的iterators属性其实是记录安全迭代器的数量
iter->d->iterators++;
//桶索引归零
iter->index++;
//如果恰好遍历完当前的表,桶索引会越界
if (iter->index >= (signed) ht->size) {
//如果遍历完的是0号表且正在rehash,说明1号表也有节点,把迭代的表切换到1号表
if (dictIsRehashing(iter->d) && iter->table == 0) {
iter->table++;
iter->index = 0;
ht = &iter->d->ht[1];
//如果遍历完的是1号表,或者没有在rehash,说明整个字典的全部节点都遍历完了
} else {
break;
}
}
//设置当前迭代的节点
iter->entry = ht->table[iter->index];
} else {
//entry不为NULL,表示应该返回nextEntry
iter->entry = iter->nextEntry;
}
if (iter->entry) {
/* We need to save the 'next' here, the iterator user
* may delete the entry we are returning. */
//用户怎么处理得到的entry节点都无所谓,反正已经事先设置了nextEntry节点,迭代器不会丢失指针
iter->nextEntry = iter->entry->next;
return iter->entry;
}
}
return NULL;
}

//释放迭代器
void dictReleaseIterator(dictIterator *iter)
{
//字典的iterators增加后,迭代器的index一定不是-1,所以(iter->index == -1 && iter->table == 0)说明没有调用过dictNext,字典的iterators也就没有增加过,所以不需要减一
if (iter->safe && !(iter->index == -1 && iter->table == 0))
iter->d->iterators--;
zfree(iter);
}

/* Return a random entry from the hash table. Useful to
* implement randomized algorithms */
//获取字典中一个随机的节点
dictEntry *dictGetRandomKey(dict *d)
{
dictEntry *he, *orighe;
unsigned int h;
int listlen, listele;
//如果两个哈希表的总节点数是0,就没有可返回的节点
if (dictSize(d) == 0) return NULL;
//如果正在rehash,顺便执行一步
if (dictIsRehashing(d)) _dictRehashStep(d);
//如果正在rehash,随机的范围就是两个表的桶,通过判断桶里的表头指针是不是NULL,可以保证随机到的桶不是空的
if (dictIsRehashing(d)) {
do {
h = random() % (d->ht[0].size+d->ht[1].size);
he = (h >= d->ht[0].size) ? d->ht[1].table[h - d->ht[0].size] :
d->ht[0].table[h];
} while(he == NULL);
//如果没有在rehash,随机的范围只有0号表
} else {
do {
h = random() & d->ht[0].sizemask;
he = d->ht[0].table[h];
} while(he == NULL);
}

/* Now we found a non empty bucket, but it is a linked
* list and we need to get a random element from the list.
* The only sane way to do so is counting the elements and
* select a random index. */
listlen = 0;
orighe = he;
//计算桶里的节点数
while(he) {
he = he->next;
listlen++;
}
//随机选一个节点
listele = random() % listlen;
he = orighe;
while(listele--) he = he->next;
return he;
}

/* ------------------------- private functions ------------------------------ */

/* Expand the hash table if needed */
//推断是否需要扩容
static int _dictExpandIfNeeded(dict *d)
{
/* Incremental rehashing already in progress. Return. */
//如果正在扩容,返回0,表示不需要
if (dictIsRehashing(d)) return DICT_OK;

/* If the hash table is empty expand it to the intial size. */
//如果0号表长度是0,先扩展到长度下限DICT_HT_INITIAL_SIZE
if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);

/* If we reached the 1:1 ratio, and we are allowed to resize the hash
* table (global setting) or we should avoid it but the ratio between
* elements/buckets is over the "safe" threshold, we resize doubling
* the number of buckets. */
//如果字典支持resize,每当负载因子大于等于1,就触发扩容
//如果不支持resize,只有负载因子超过预设的dict_force_resize_ratio时,才触发强制扩容
//扩容后的长度是总节点数的两倍
//先扩容才能rehash
if (d->ht[0].used >= d->ht[0].size &&
(dict_can_resize ||
d->ht[0].used/d->ht[0].size > dict_force_resize_ratio))
{
//这里为什么还要判断d->ht[0].size > d->ht[0].used?
return dictExpand(d, ((d->ht[0].size > d->ht[0].used) ?
d->ht[0].size : d->ht[0].used)*2);
}
return DICT_OK;
}

/* Our hash table capability is a power of two */
//计算哈希表实际的扩容长度
//实际长度必须是DICT_HT_INITIAL_SIZE值乘以2的幂次,结果不小于给定的size,但不能大于long int的上限
static unsigned long _dictNextPower(unsigned long size)
{
unsigned long i = DICT_HT_INITIAL_SIZE;

if (size >= LONG_MAX) return LONG_MAX;
while(1) {
if (i >= size)
return i;
i *= 2;
}
}

/* Returns the index of a free slot that can be populated with
* an hash entry for the given 'key'.
* If the key already exists, -1 is returned.
*
* Note that if we are in the process of rehashing the hash table, the
* index is always returned in the context of the second (new) hash table. */
//计算给定的key在哈希表中的索引
//字典的key是唯一的,但索引是可以冲突的。哈希碰撞不是key重复,而是不同的key被放在了同一个桶里
static int _dictKeyIndex(dict *d, const void *key)
{
unsigned int h, idx, table;
dictEntry *he;

/* Expand the hashtable if needed */
//先判断要不要扩容,返回错误码说明需要扩容但是扩容失败
//但是为什么选在这个时候扩容?
if (_dictExpandIfNeeded(d) == DICT_ERR)
return -1;
/* Compute the key hash value */
//计算key的哈希值
h = dictHashKey(d, key);
//依次计算在两个表中的桶索引,如果正在rehash,返回的就是1号表中的索引,否则返回0号表中的索引
for (table = 0; table <= 1; table++) {
idx = h & d->ht[table].sizemask;
/* Search if this slot does not already contain the given key */
he = d->ht[table].table[idx];
while(he) {
if (dictCompareHashKeys(d, key, he->key))
return -1;
he = he->next;
}
if (!dictIsRehashing(d)) break;
}
return idx;
}

//清空整个字典,清空和销毁不一样,只释放哈希表指针,不释放字典指针
void dictEmpty(dict *d) {
_dictClear(d,&d->ht[0]);
_dictClear(d,&d->ht[1]);
d->rehashidx = -1;
d->iterators = 0;
}

//调试函数,输出哈希表的相关信息
#define DICT_STATS_VECTLEN 50
static void _dictPrintStatsHt(dictht *ht) {
unsigned long i, slots = 0, chainlen, maxchainlen = 0;
unsigned long totchainlen = 0;
unsigned long clvector[DICT_STATS_VECTLEN];

if (ht->used == 0) {
printf("No stats available for empty dictionaries\n");
return;
}

for (i = 0; i < DICT_STATS_VECTLEN; i++) clvector[i] = 0;
for (i = 0; i < ht->size; i++) {
dictEntry *he;

if (ht->table[i] == NULL) {
clvector[0]++;
continue;
}
slots++;
/* For each hash entry on this slot... */
chainlen = 0;
he = ht->table[i];
while(he) {
chainlen++;
he = he->next;
}
clvector[(chainlen < DICT_STATS_VECTLEN) ? chainlen : (DICT_STATS_VECTLEN-1)]++;
if (chainlen > maxchainlen) maxchainlen = chainlen;
totchainlen += chainlen;
}
printf("Hash table stats:\n");
printf(" table size: %ld\n", ht->size);
printf(" number of elements: %ld\n", ht->used);
printf(" different slots: %ld\n", slots);
printf(" max chain length: %ld\n", maxchainlen);
printf(" avg chain length (counted): %.02f\n", (float)totchainlen/slots);
printf(" avg chain length (computed): %.02f\n", (float)ht->used/slots);
printf(" Chain length distribution:\n");
for (i = 0; i < DICT_STATS_VECTLEN-1; i++) {
if (clvector[i] == 0) continue;
printf(" %s%ld: %ld (%.02f%%)\n",(i == DICT_STATS_VECTLEN-1)?">= ":"", i, clvector[i], ((float)clvector[i]/ht->size)*100);
}
}

//调试函数,输出字典的相关信息
void dictPrintStats(dict *d) {
_dictPrintStatsHt(&d->ht[0]);
if (dictIsRehashing(d)) {
printf("-- Rehashing into ht[1]:\n");
_dictPrintStatsHt(&d->ht[1]);
}
}

//控制是否允许resize
void dictEnableResize(void) {
dict_can_resize = 1;
}

void dictDisableResize(void) {
dict_can_resize = 0;
}

#if 0

/* The following are just example hash table types implementations.
* Not useful for Redis so they are commented out.
*/
//作者的样例代码
/* ----------------------- StringCopy Hash Table Type ------------------------*/

static unsigned int _dictStringCopyHTHashFunction(const void *key)
{
return dictGenHashFunction(key, strlen(key));
}

static void *_dictStringDup(void *privdata, const void *key)
{
int len = strlen(key);
char *copy = zmalloc(len+1);
DICT_NOTUSED(privdata);

memcpy(copy, key, len);
copy[len] = '\0';
return copy;
}

static int _dictStringCopyHTKeyCompare(void *privdata, const void *key1,
const void *key2)
{
DICT_NOTUSED(privdata);

return strcmp(key1, key2) == 0;
}

static void _dictStringDestructor(void *privdata, void *key)
{
DICT_NOTUSED(privdata);

zfree(key);
}

dictType dictTypeHeapStringCopyKey = {
_dictStringCopyHTHashFunction, /* hash function */
_dictStringDup, /* key dup */
NULL, /* val dup */
_dictStringCopyHTKeyCompare, /* key compare */
_dictStringDestructor, /* key destructor */
NULL /* val destructor */
};

/* This is like StringCopy but does not auto-duplicate the key.
* It's used for intepreter's shared strings. */
dictType dictTypeHeapStrings = {
_dictStringCopyHTHashFunction, /* hash function */
NULL, /* key dup */
NULL, /* val dup */
_dictStringCopyHTKeyCompare, /* key compare */
_dictStringDestructor, /* key destructor */
NULL /* val destructor */
};

/* This is like StringCopy but also automatically handle dynamic
* allocated C strings as values. */
dictType dictTypeHeapStringCopyKeyValue = {
_dictStringCopyHTHashFunction, /* hash function */
_dictStringDup, /* key dup */
_dictStringDup, /* val dup */
_dictStringCopyHTKeyCompare, /* key compare */
_dictStringDestructor, /* key destructor */
_dictStringDestructor, /* val destructor */
};
#endif

2.5 跳跃表

redis.h(跳跃表相关部分)

//跳跃表就是给有序list添加多级索引,能够以更大的步长遍历list,提高查找效率,属于空间换时间。

#define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
#define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */

//跳跃表的节点
typedef struct zskiplistNode {
//节点真正存储的数据,类型是redis定义的对象,也是在redis.h中定义的
robj *obj;
//分值。在跳跃表中,节点按各自所保存的分值从小到大排列
//跳跃表中各个节点保存的对象必须是唯一的,但多个节点的分值可以是相同的,分值相同的节点将按照对象在字典序中的大小来进行排序
double score;
//后退指针,用于从表尾向表头遍历
struct zskiplistNode *backward;
//节点所在层
struct zskiplistLevel {
//前进指针,用于从表头向表尾遍历
//一个节点有多个不同跨度的前进指针,但只有一个后退指针,所以只有正向遍历是跳跃的,反向遍历只能退到前一个节点
struct zskiplistNode *forward;
//跨度(步长),记录了前进指针所指向的节点到当前节点的距离。因为是有跨度的遍历,所以遍历到某个节点后,将沿途所有经过的节点的跨度相加,就是该节点在跳跃表中的次序(rank)
unsigned int span;
} level[];
} zskiplistNode;

//跳跃表
typedef struct zskiplist {
//表头节点和表尾节点
struct zskiplistNode *header, *tail;
//表中节点的数量,表头节点不计算在内,因为头结点只存储层次不存储数据
unsigned long length;
//表中层数最大的节点的层数,表头节点的层数不计算在内
int level;
} zskiplist;

/* Sorted sets data type */
zskiplist *zslCreate(void);
void zslFree(zskiplist *zsl);
zskiplistNode *zslInsert(zskiplist *zsl, double score, robj *obj);

t_zset.c(跳跃表相关部分)

//创建跳跃表的节点。设置存储对象和分值,level参数只用于申请空间,level数组并没有初始化
zskiplistNode *zslCreateNode(int level, double score, robj *obj) {
zskiplistNode *zn = zmalloc(sizeof(*zn)+level*sizeof(struct zskiplistLevel));
zn->score = score;
zn->obj = obj;
return zn;
}

//创建跳跃表
zskiplist *zslCreate(void) {
int j;
zskiplist *zsl;

zsl = zmalloc(sizeof(*zsl));
//初始化最高层数是1,不考虑头结点
zsl->level = 1;
//初始节点数0
zsl->length = 0;
//头结点的层数设成上限,分值是0(最小),存储的对象是NULL。每层初始化前进指针和跨度
zsl->header = zslCreateNode(ZSKIPLIST_MAXLEVEL,0,NULL);
for (j = 0; j < ZSKIPLIST_MAXLEVEL; j++) {
zsl->header->level[j].forward = NULL;
zsl->header->level[j].span = 0;
}
zsl->header->backward = NULL;
zsl->tail = NULL;
return zsl;
}

//释放节点,这里不用考虑表的链接,因为只有两种情况会释放节点,一个是要释放整个表,链接自然就不用管了,另一个是先删除节点再释放节点,删除节点的函数中已经调整好链接了,这里就不用管了
void zslFreeNode(zskiplistNode *node) {
//节点存储的对象的引用计数减一,在object.c中定义的
decrRefCount(node->obj);
//释放节点指针
zfree(node);
}

//释放跳跃表
void zslFree(zskiplist *zsl) {
zskiplistNode *node = zsl->header->level[0].forward, *next;
//先释放头结点
zfree(zsl->header);
//沿着第0层遍历节点并释放,因为第0层的跨度是1,就等于按顺序遍历全部节点
while(node) {
next = node->level[0].forward;
zslFreeNode(node);
node = next;
}
//最后释放表的指针
zfree(zsl);
}

//返回一个介于1和ZSKIPLIST_MAXLEVEL之间的随机值,作为节点的层数
//根据幂次定律(power law),越大的层数产生的几率就越小
int zslRandomLevel(void) {
int level = 1;
//random返回一个long int范围内的随机整数,random()&0xFFFF的结果就是一个0到65535的随机整数
//ZSKIPLIST_P=0.25,每次level只有1/4的概率加一,所以结果就是越大的数越难得到
while ((random()&0xFFFF) < (ZSKIPLIST_P * 0xFFFF))
level += 1;
return (level<ZSKIPLIST_MAXLEVEL) ? level : ZSKIPLIST_MAXLEVEL;
}

//给定分值和存储对象,创建新节点并插入跳跃表,返回该节点指针
zskiplistNode *zslInsert(zskiplist *zsl, double score, robj *obj) {
//update数组存放寻找新节点位置的过程中经过的节点
zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
//rank数组存放寻找新节点位置的过程中经过的节点的跨度累加和
unsigned int rank[ZSKIPLIST_MAXLEVEL];
int i, level;
//从头节点正向遍历
x = zsl->header;
//从高层到低层遍历,相当于先查高层索引再查低层索引,步子迈得越来越小,最后定位到新节点应该存放的位置
for (i = zsl->level-1; i >= 0; i--) {
/* store rank that is crossed to reach the insert position */
//遍历过程中记录的跨度是累加和,不是单个节点的跨度
//因为i是从大到小,所以最终的跨度和是存在rank[0]里,所以这里是先把上一层的rank值继承下来
rank[i] = i == (zsl->level-1) ? 0 : rank[i+1];
//因为跳跃表从小到大排序,所以要找到当前层第一个分值大于新节点,或者分值相同但字典序大于新节点的节点,此时退出循环,x就是本层中新节点的左侧邻居节点。但新节点插入的位置必须精确到真正相邻的两个节点之间,而高层里相邻的节点跨度可能大于1,并不是真正相邻,所以要进入下一层继续精确新节点的左邻居,直到新节点的左右邻居跨度是1,新节点只能插入这个唯一的间隙中,才算真正确定了插入位置。
//只有在没找到本层的左邻居时,rank值才会增加,所以rank[i]并不包括update[i]的跨度,所以rank[i]代表的是找到左邻居的过程中在本层遍历的总距离。又因为在当前层一定是从上一层左邻居的位置开始向右遍历,所以最后累加得到的rank[0]就是从头节点到新节点真正左邻居的距离。
while (x->level[i].forward &&
(x->level[i].forward->score < score ||
(x->level[i].forward->score == score &&
compareStringObjects(x->level[i].forward->obj,obj) < 0))) {
//继承下来的rank值加上当前节点的rank值
rank[i] += x->level[i].span;
x = x->level[i].forward;
}
//记录新节点在第i层的左邻居
update[i] = x;
}
/* we assume the key is not already inside, since we allow duplicated
* scores, and the re-insertion of score and redis object should never
* happpen since the caller of zslInsert() should test in the hash table
* if the element is already inside or not. */
//新节点的最高层数是随机的
//为什么不把索引设成固定间隔的?随机的会不会性能不太好?
level = zslRandomLevel();
//如果新节点的最高层比整个表的最高层(不包括头节点)低,那么新节点最高层以上的层次就不用关心了
//反之,如果新节点最高层创了新高,就需要多做一些修改
if (level > zsl->level) {
for (i = zsl->level; i < level; i++) {
rank[i] = 0;
//因为是前所未有的新高层,所以左邻居只能是头节点
update[i] = zsl->header;
//头节点的跨度为什么是节点数?不应该是rank[0]吗?
update[i]->level[i].span = zsl->length;
}
//更新跳跃表的最高层数
zsl->level = level;
}
//创建新节点
x = zslCreateNode(level,score,obj);
//与每一层的左右邻居做链接
for (i = 0; i < level; i++) {
//插入以后,原本是新节点左邻居的右邻居就成了新节点的右邻居,用前进指针链接
x->level[i].forward = update[i]->level[i].forward;
//新节点x就成了他左邻居的右邻居,用前进指针链接
update[i]->level[i].forward = x;

/* update span covered by update[i] as x is inserted here */
//update[i]->level[i].span是第i层中左邻居到右邻居的跨度,rank[0]-rank[i]是第i层左邻居到第0层左邻居的距离,所以二者相减得到的是第0层左邻居到第i层右邻居的跨度。新节点插入后,新节点到第i层右邻居的跨度就等于原本第0层左邻居到第i层右邻居的跨度。
x->level[i].span = update[i]->level[i].span - (rank[0] - rank[i]);
//左邻居的跨度要改成左邻居到新节点的距离
update[i]->level[i].span = (rank[0] - rank[i]) + 1;
}

/* increment span for untouched levels */
//新节点最高层以上的层次找到的也都是左邻居,所以新节点插入后它们的跨度都加一
for (i = level; i < zsl->level; i++) {
update[i]->level[i].span++;
}
//后退指针指向左邻居,后退指针只和第0层有关,所以不用循环
x->backward = (update[0] == zsl->header) ? NULL : update[0];
//如果新节点有右邻居,就把右邻居的后退指针指向新节点
if (x->level[0].forward)
x->level[0].forward->backward = x;
else
//如果没有右邻居,自然就成了尾结点
zsl->tail = x;
//跳跃表节点数加一
zsl->length++;
return x;
}

/* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
//删除节点x
void zslDeleteNode(zskiplist *zsl, zskiplistNode *x, zskiplistNode **update) {
int i;
//遍历update每一层的节点,一定是x左边的节点,但不一定是左邻居,因为x的最高层数可能低于i
for (i = 0; i < zsl->level; i++) {
//如果是左邻居,就要继承x的跨度,前进指针指向x的下一个节点
if (update[i]->level[i].forward == x) {
update[i]->level[i].span += x->level[i].span - 1;
update[i]->level[i].forward = x->level[i].forward;
//如果不是左邻居,只是跨度减一
} else {
update[i]->level[i].span -= 1;
}
}
//如果x有右邻居,就把右邻居的后退指针指向x的左邻居
if (x->level[0].forward) {
x->level[0].forward->backward = x->backward;
//如果没有右邻居,x就是尾结点,重新设置尾结点就行了
} else {
zsl->tail = x->backward;
}
//如果删掉的节点是最高的,就需要重新设置跳跃表的最高层数
while(zsl->level > 1 && zsl->header->level[zsl->level-1].forward == NULL)
zsl->level--;
//节点数减一
zsl->length--;
}

/* Delete an element with matching score/object from the skiplist. */
//删除跳跃表中包含给定对象和分值的节点
int zslDelete(zskiplist *zsl, double score, robj *obj) {
zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
int i;

x = zsl->header;
//和插入节点的逻辑一样,根据分值从高层到底层找左邻居,记录左邻居是为了删除节点后调整指针链接
for (i = zsl->level-1; i >= 0; i--) {
while (x->level[i].forward &&
(x->level[i].forward->score < score ||
(x->level[i].forward->score == score &&
compareStringObjects(x->level[i].forward->obj,obj) < 0)))
x = x->level[i].forward;
update[i] = x;
}
/* We may have multiple elements with the same score, what we need
* is to find the element with both the right score and object. */
x = x->level[0].forward;
//找到分值相同的节点之后,再找存储的对象是obj的节点
if (x && score == x->score && equalStringObjects(x->obj,obj)) {
//先删除再释放节点
zslDeleteNode(zsl, x, update);
zslFreeNode(x);
return 1;
} else {
return 0; /* not found */
}
return 0; /* not found */
}

/* Struct to hold a inclusive/exclusive range spec. */
//分值范围
typedef struct {
//最小值和最大值
double min, max;
//最小值和最大值是否包含在本范围里,表示区间开闭
//ex是exclusive的意思,所以0表示包含(闭区间),1表示不包含(开区间)
int minex, maxex; /* are min or max exclusive? */
} zrangespec;

/* Delete all the elements with score between min and max from the skiplist.
* Min and mx are inclusive, so a score >= min || score <= max is deleted.
* Note that this function takes the reference to the hash table view of the
* sorted set, in order to remove the elements from the hash table too. */
//给定一个分值范围,删除跳跃表中所有在这个范围之内的节点,同时也在字典中删除该节点
unsigned long zslDeleteRangeByScore(zskiplist *zsl, zrangespec range, dict *dict) {
zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
unsigned long removed = 0;
int i;

x = zsl->header;
//找到分值范围的左邻居
for (i = zsl->level-1; i >= 0; i--) {
while (x->level[i].forward && (range.minex ?
x->level[i].forward->score <= range.min :
x->level[i].forward->score < range.min))
x = x->level[i].forward;
update[i] = x;
}

/* Current node is the last with score < or <= min. */
//在第0层非跳跃地遍历节点
x = x->level[0].forward;

/* Delete nodes while in range. */
//在跳跃表和字典中删除节点,最后释放该节点
while (x && (range.maxex ? x->score < range.max : x->score <= range.max)) {
zskiplistNode *next = x->level[0].forward;
zslDeleteNode(zsl,x,update);
dictDelete(dict,x->obj);
zslFreeNode(x);
removed++;
x = next;
}
//返回删除的节点数
return removed;
}

/* Delete all the elements with rank between start and end from the skiplist.
* Start and end are inclusive. Note that start and end need to be 1-based */
//给定一个排序范围,删除跳跃表中所有在这个范围之内的节点
//大体逻辑同zslDeleteRangeByScore,最后也是返回删除的节点数
unsigned long zslDeleteRangeByRank(zskiplist *zsl, unsigned int start, unsigned int end, dict *dict) {
zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
unsigned long traversed = 0, removed = 0;
int i;

x = zsl->header;
for (i = zsl->level-1; i >= 0; i--) {
while (x->level[i].forward && (traversed + x->level[i].span) < start) {
traversed += x->level[i].span;
x = x->level[i].forward;
}
update[i] = x;
}

traversed++;
x = x->level[0].forward;
while (x && traversed <= end) {
zskiplistNode *next = x->level[0].forward;
zslDeleteNode(zsl,x,update);
dictDelete(dict,x->obj);
zslFreeNode(x);
removed++;
traversed++;
x = next;
}
return removed;
}

/* Find the first node having a score equal or greater than the specified one.
* Returns NULL if there is no match. */
//获取第一个分值不小于给定分值的节点
zskiplistNode *zslFirstWithScore(zskiplist *zsl, double score) {
zskiplistNode *x;
int i;

x = zsl->header;
//直接从高层到低层找,不用记录左邻居了。最后找到第0层,得到的就是真正的左邻居
for (i = zsl->level-1; i >= 0; i--) {
while (x->level[i].forward && x->level[i].forward->score < score)
x = x->level[i].forward;
}
/* We may have multiple elements with the same score, what we need
* is to find the element with both the right score and object. */
//返回左邻居的下个节点
return x->level[0].forward;
}

/* Find the rank for an element by both score and key.
* Returns 0 when the element cannot be found, rank otherwise.
* Note that the rank is 1-based due to the span of zsl->header to the
* first element. */
//返回包含给定对象和分值的节点在跳跃表中的排序
//和zslInsert中一样,从高层到低层遍历,通过累加途径节点的跨度来得到节点在表中的rank值
unsigned long zslGetRank(zskiplist *zsl, double score, robj *o) {
zskiplistNode *x;
unsigned long rank = 0;
int i;

x = zsl->header;
//和
for (i = zsl->level-1; i >= 0; i--) {
while (x->level[i].forward &&
(x->level[i].forward->score < score ||
(x->level[i].forward->score == score &&
compareStringObjects(x->level[i].forward->obj,o) <= 0))) {
rank += x->level[i].span;
x = x->level[i].forward;
}

/* x might be equal to zsl->header, so test if obj is non-NULL */
if (x->obj && equalStringObjects(x->obj,o)) {
return rank;
}
}
return 0;
}

/* Finds an element by its rank. The rank argument needs to be 1-based. */
//返回跳跃表在给定排序上的节点
//从高层到底层遍历,记录节点的rank值,每当下个节点的rank值高于给定的rank,就跳到下一层继续遍历,因为到了下一层右邻居会更左
zskiplistNode* zslGetElementByRank(zskiplist *zsl, unsigned long rank) {
zskiplistNode *x;
unsigned long traversed = 0;
int i;

x = zsl->header;
for (i = zsl->level-1; i >= 0; i--) {
while (x->level[i].forward && (traversed + x->level[i].span) <= rank)
{
traversed += x->level[i].span;
x = x->level[i].forward;
}
if (traversed == rank) {
return x;
}
}
return NULL;
}

/* Populate the rangespec according to the objects min and max. */
//把spec的左右边界设成min和max
static int zslParseRange(robj *min, robj *max, zrangespec *spec) {
char *eptr;
//初始默认是闭区间
spec->minex = spec->maxex = 0;

/* Parse the min-max interval. If one of the values is prefixed
* by the "(" character, it's considered "open". For instance
* ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
* ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
//对象的ptr指针指向底层的数据结构
//如果对象的数据结构是整型,直接赋给spec,也是默认闭区间
if (min->encoding == REDIS_ENCODING_INT) {
spec->min = (long)min->ptr;
} else {
//如果不是整型,对象就是用字符串表示的数
//字符串以'('开头表示开区间
if (((char*)min->ptr)[0] == '(') {
//strtod将ptr指向的字符串转换成浮点数,如果发生错误就让eptr指针指向出错的字符
spec->min = strtod((char*)min->ptr+1,&eptr);
//如果转换出错了,返回错误码
if (eptr[0] != '\0' || isnan(spec->min)) return REDIS_ERR;
//开区间的exclusive是1
spec->minex = 1;
} else {
spec->min = strtod((char*)min->ptr,&eptr);
if (eptr[0] != '\0' || isnan(spec->min)) return REDIS_ERR;
}
}
//右边界设置同上
if (max->encoding == REDIS_ENCODING_INT) {
spec->max = (long)max->ptr;
} else {
if (((char*)max->ptr)[0] == '(') {
spec->max = strtod((char*)max->ptr+1,&eptr);
if (eptr[0] != '\0' || isnan(spec->max)) return REDIS_ERR;
spec->maxex = 1;
} else {
spec->max = strtod((char*)max->ptr,&eptr);
if (eptr[0] != '\0' || isnan(spec->max)) return REDIS_ERR;
}
}
//设置成功,返回成功码
return REDIS_OK;
}

3 内存编码结构相关

3.1 整数集合数据结构

intset.h

#ifndef __INTSET_H
#define __INTSET_H
#include <stdint.h>

typedef struct intset {
//编码方式,包括int16_t、int32_t和int64_t
uint32_t encoding;
//集合中元素数量
uint32_t length;
//存储元素的有序数组,要实现的集合是无序的,但底层的数组是有序的
//虽然contents属性声明为int8_t类型的数组,但实际上contents数组的真正类型取决于encoding属性的值
//由于contents所有元素的类型要一致,因此contents的类型是由其中最大的数决定的
int8_t contents[];
} intset;

#endif // __INTSET_H

intset.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "intset.h"
#include "zmalloc.h"

/* Note that these encodings are ordered, so:
* INTSET_ENC_INT16 < INTSET_ENC_INT32 < INTSET_ENC_INT64. */
//int的编码方式
//int16_t,2bytes
#define INTSET_ENC_INT16 (sizeof(int16_t))
//int32_t,4bytes
#define INTSET_ENC_INT32 (sizeof(int32_t))
//int64_t,8bytes
#define INTSET_ENC_INT64 (sizeof(int64_t))

/* Return the required encoding for the provided value. */
//根据给定数的大小,返回满足需求又最省空间的int类型
static uint8_t _intsetValueEncoding(int64_t v) {
if (v < INT32_MIN || v > INT32_MAX)
return INTSET_ENC_INT64;
else if (v < INT16_MIN || v > INT16_MAX)
return INTSET_ENC_INT32;
return INTSET_ENC_INT16;
}

/* Return the value at pos, given an encoding. */
//得到enc编码方式下的第pos个位置的值
//因为呈现给用户的intset是无序的,所以能访问底层数组contents的函数必须是声明为static的内部函数
static int64_t _intsetGetEncoded(intset *is, int pos, uint8_t enc) {
if (enc == INTSET_ENC_INT64)
return ((int64_t*)is->contents)[pos];
else if (enc == INTSET_ENC_INT32)
return ((int32_t*)is->contents)[pos];
return ((int16_t*)is->contents)[pos];
}

/* Return the value at pos, using the configured encoding. */
//得到is中第pos个位置的值
static int64_t _intsetGet(intset *is, int pos) {
return _intsetGetEncoded(is,pos,is->encoding);
}

/* Set the value at pos, using the configured encoding. */
//把集合is第pos个位置的值设为value
static void _intsetSet(intset *is, int pos, int64_t value) {
if (is->encoding == INTSET_ENC_INT64)
((int64_t*)is->contents)[pos] = value;
else if (is->encoding == INTSET_ENC_INT32)
((int32_t*)is->contents)[pos] = value;
else
((int16_t*)is->contents)[pos] = value;
}

/* Create an empty intset. */
//创建空的整数集合
intset *intsetNew(void) {
intset *is = zmalloc(sizeof(intset));
//默认使用int16_t,只支持升级,不支持降级
is->encoding = INTSET_ENC_INT16;
is->length = 0;
return is;
}

/* Resize the intset */
//因为结构体的内存空间是连续的,所以添加新元素之前要先扩容,删除元素之后要收缩。len是新的元素总数
static intset *intsetResize(intset *is, uint32_t len) {
//is->encoding就是表示一个整数需要的bit数
uint32_t size = len*is->encoding;
//需要的空间是结构体大小加所有元素需要的大小
is = zrealloc(is,sizeof(intset)+size);
return is;
}

/* Search for the position of "value". Return 1 when the value was found and
* sets "pos" to the position of the value within the intset. Return 0 when
* the value is not present in the intset and sets "pos" to the position
* where "value" can be inserted. */
//在集合的有序数组中查询给定的value,如果找到,把索引赋给pos,如果找不到,把value能插入的位置赋给pos
//返回1表示找到了,返回0表示没找到
//采用二分查找
static uint8_t intsetSearch(intset *is, int64_t value, uint32_t *pos) {
int min = 0, max = is->length-1, mid = -1;
int64_t cur = -1;

/* The value can never be found when the set is empty */
//集合是空的,直接返回0,pos=0表示value可以直接插在数组头部
if (is->length == 0) {
if (pos) *pos = 0;
return 0;
} else {
/* Check for the case where we know we cannot find the value,
* but do know the insert position. */
//因为数组是有序的,所以先和数组收尾比较,如果value小于左边界或大于右边界,就说明不在数组中,pos设置为数组的头或尾
if (value > _intsetGet(is,is->length-1)) {
if (pos) *pos = is->length;
return 0;
} else if (value < _intsetGet(is,0)) {
if (pos) *pos = 0;
return 0;
}
}

//二分查找,cur记录的是数组中mid索引的值
while(max >= min) {
mid = (min+max)/2;
cur = _intsetGet(is,mid);
if (value > cur) {
min = mid+1;
} else if (value < cur) {
max = mid-1;
} else {
break;
}
}

//退出上面循环的一种情况是value==cur,pos设为cur的索引mid
if (value == cur) {
if (pos) *pos = mid;
return 1;
//另一种情况是max<min,此时value的插入位置就是新的min(想一想就知道)
} else {
if (pos) *pos = min;
return 0;
}
}

/* Upgrades the intset to a larger encoding and inserts the given integer. */
//升级is的编码(int类型),并插入新的value
//因为调用该函数的条件是value超过了is支持的表示范围,所以编码要先升级,而且value一定是插在数组头或尾
static intset *intsetUpgradeAndAdd(intset *is, int64_t value) {
uint8_t curenc = is->encoding;
//选择刚好适合新元素value的int类型
uint8_t newenc = _intsetValueEncoding(value);
int length = is->length;
//负数插在数组头,正数插在数组尾
int prepend = value < 0 ? 1 : 0;

/* First set new encoding and resize */
//修改is的encoding,再为新元素扩容
is->encoding = newenc;
is = intsetResize(is,is->length+1);

/* Upgrade back-to-front so we don't overwrite values.
* Note that the "prepend" variable is used to make sure we have an empty
* space at either the beginning or the end of the intset. */
//在扩容以后的数组中
//如果value是负数,就从后到前把每个元素插到它后面的位置,最后空出数组头来存放value
//如果value是正数,就从后到前把每个元素插到它当前的位置,相当于啥也没干,最后空出数组尾来存放value
while(length--)
_intsetSet(is,length+prepend,_intsetGetEncoded(is,length,curenc));

/* Set the value at the beginning or the end. */
//负数插在数组头,正数插在数组尾
if (prepend)
_intsetSet(is,0,value);
else
_intsetSet(is,is->length,value);
//元素数量加一
is->length++;
return is;
}

//把集合中从from位置开始的所有元素移动到从to位置开始
//memmove比memcpy更安全。如果目标区域和源区域有重叠的话,memmove能够保证被覆盖之前将重叠区域的字节拷贝到目标区域中
//使用memmove要设定移动的长度,所以要分别考虑三种不同长度的int类型
static void intsetMoveTail(intset *is, uint32_t from, uint32_t to) {
void *src, *dst;
//bytes是from之后的总元素数量
uint32_t bytes = is->length-from;
if (is->encoding == INTSET_ENC_INT64) {
src = (int64_t*)is->contents+from;
dst = (int64_t*)is->contents+to;
//元素数量乘以单个元素占用的空间就是要移动的内存长度
bytes *= sizeof(int64_t);
} else if (is->encoding == INTSET_ENC_INT32) {
src = (int32_t*)is->contents+from;
dst = (int32_t*)is->contents+to;
bytes *= sizeof(int32_t);
} else {
src = (int16_t*)is->contents+from;
dst = (int16_t*)is->contents+to;
bytes *= sizeof(int16_t);
}
memmove(dst,src,bytes);
}

/* Insert an integer in the intset */
//向集合is中插入value
//success只是个flag,大概只有测试的时候用得到
intset *intsetAdd(intset *is, int64_t value, uint8_t *success) {
//查询适合value的int类型
uint8_t valenc = _intsetValueEncoding(value);
uint32_t pos;
//初始默认插入成功
if (success) *success = 1;

/* Upgrade encoding if necessary. If we need to upgrade, we know that
* this value should be either appended (if > 0) or prepended (if < 0),
* because it lies outside the range of existing values. */
//适合value的int长度比is支持的要长,就要先扩容再添加,而且超出规格说明value本来也不在is中
if (valenc > is->encoding) {
/* This always succeeds, so we don't need to curry *success. */
return intsetUpgradeAndAdd(is,value);
} else {
/* Abort if the value is already present in the set.
* This call will populate "pos" with the right position to insert
* the value when it cannot be found. */
//如果value已经在is中了,集合元素不能重复,success设为0表示插入失败
if (intsetSearch(is,value,&pos)) {
if (success) *success = 0;
return is;
}
//先给新元素扩容
is = intsetResize(is,is->length+1);
//之前调用intsetSearch时,已经把应该插入的位置赋给了pos
//插入前要把pos之后的全部元素后移一位
if (pos < is->length) intsetMoveTail(is,pos,pos+1);
}
//在pos处插入新元素
_intsetSet(is,pos,value);
is->length++;
return is;
}

/* Delete integer from intset */
//从is中移除元素value
//和intsetAdd的基本逻辑相同
intset *intsetRemove(intset *is, int64_t value, int *success) {
uint8_t valenc = _intsetValueEncoding(value);
uint32_t pos;
if (success) *success = 0;

if (valenc <= is->encoding && intsetSearch(is,value,&pos)) {
/* We know we can delete */
if (success) *success = 1;

/* Overwrite value with tail and update length */
//删除pos位置的元素,就是把pos+1之后的元素都前移一位
if (pos < (is->length-1)) intsetMoveTail(is,pos+1,pos);
//数组长度收缩一位
is = intsetResize(is,is->length-1);
is->length--;
}
return is;
}

/* Determine whether a value belongs to this set */
//查询value是否在集合is中
//intsetSearch函数里是直接查,但是实际情况中要先判断value是否超出了集合支持的int类型的表示范围
uint8_t intsetFind(intset *is, int64_t value) {
uint8_t valenc = _intsetValueEncoding(value);
return valenc <= is->encoding && intsetSearch(is,value,NULL);
}

/* Return random member */
//随机返回集合中一个元素
int64_t intsetRandom(intset *is) {
return _intsetGet(is,rand()%is->length);
}

/* Sets the value to the value at the given position. When this position is
* out of range the function returns 0, when in range it returns 1. */
//获取pos位置上的元素
//仅仅是在_intsetGet之前判断了pos索引是否越界,没必要单独写成一个函数
uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) {
if (pos < is->length) {
*value = _intsetGet(is,pos);
return 1;
}
return 0;
}

/* Return intset length */
//返回集合的元素总数
uint32_t intsetLen(intset *is) {
return is->length;
}

3.2 压缩列表

ziplist.h

//标记ziplist的头部和尾部,在插入新元素时会用到
#define ZIPLIST_HEAD 0
#define ZIPLIST_TAIL 1

ziplist.c

/* Ziplist是为了节约内存而设计的特殊的双端队列,Ziplist能存储strings和integer值,整型值被存储为实际的整型值而不是字符串。Ziplist在头部和尾部的操作时间O(1),但是由于ziplist的操作都需要重新分配内存,所以实际的复杂度和ziplist使用的内存大小有关。
* ----------------------------------------------------------------------------
*
* ZIPLIST OVERALL LAYOUT:
* The general layout of the ziplist is as follows:
* <zlbytes><zltail><zllen><entry><entry><zlend>
*
* zlbytes,uint32_t,4字节,记录整个压缩列表占用的内存字节数:在对压缩列表进行内存重分配, 或者计算 zlend 的位置时使用
* zltail,uint32_t,4字节,记录压缩列表最后一个entry距离压缩列表的起始地址有多少字节,通过这个偏移量,程序无须遍历整个压缩列表就可以确定表尾节点的地址。
* zllen,uint16_t,2字节,记录了压缩列表包含的节点数量,当这个属性的值小于 UINT16_MAX(65535)时,这个属性的值就是压缩列表包含节点的数量,当这个值等于UINT16_MAX 时,节点的真实数量需要遍历整个压缩列表才能计算得出。
* zlend,uint8_t,1字节,特殊值 0xFF(十进制255),用于标记压缩列表的末端
*
* ZIPLIST ENTRIES:
*
* 每个压缩列表节点都由 previous_entry_length 、encoding 、content 三个部分组成。
*
* 节点的 previous_entry_length 属性以字节为单位,记录了压缩列表中前一个节点的长度,previous_entry_length的长度可以是1字节或者5字节。如果前一节点的长度小于254字节,那么previous_entry_length长度就是1字节,因为1字节可以表示0~253(0xFD)范围内的值。如果前一节点的长度大于等于254字节,那么previous_entry_length的长度就是5字节,其中第一个字节会被设置为 0xFE(254),而之后的四个字节则用于保存前一节点的长度。这是由于0xFF已经是ziplist的结束标字节了,所以entry的flag字节最大只能是0xFE,所以表示的数就以254为界限分成上述两种情况。
*
* 节点的 encoding 属性记录了节点的 content 属性所保存数据的类型以及长度。如果是1字节、2字节或者5字节长,且值的最高位对应着分别为00、01或10,表示节点的content保存着字节数组,数组的长度由编码除去最高两位之后的其他位记录。如果是1字节长,且值的最高位以11,表示节点的content保存着整数值,整数值的类型由编码除去最高两位之后的其他位记录,整数的长度无需记录。
*
* |00pppppp| - 1 byte
* String value with length less than or equal to 63 bytes (6 bits).
* |01pppppp|qqqqqqqq| - 2 bytes
* String value with length less than or equal to 16383 bytes (14 bits).
* |10______|qqqqqqqq|rrrrrrrr|ssssssss|tttttttt| - 5 bytes
* String value with length greater than or equal to 16384 bytes.
* |1100____| - 1 byte
* Integer encoded as int16_t (2 bytes).
* |1101____| - 1 byte
* Integer encoded as int32_t (4 bytes).
* |1110____| - 1 byte
* Integer encoded as int64_t (8 bytes).
*
* 节点的 content 属性负责保存节点的值,节点值可以是一个字节数组或者整数,值的类型和长度由节点的 encoding 属性决定。
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <assert.h>
#include <limits.h>
#include "zmalloc.h"
#include "ziplist.h"

//把long long类型的value转换成字符串,字符串存储在s中,len是s的最大长度
//返回值是s的实际长度
int ll2string(char *s, size_t len, long long value);

//0xFF,也就是zlend的值
#define ZIP_END 255
//previous_entry_length两种情况的分界
#define ZIP_BIGLEN 254

/* Different encoding/length possibilities */
//entry可使用的编码
//1字节,00开头,后6位表示字节数组的长度
#define ZIP_STR_06B (0 << 6)
//2字节,01开头,后14位表示字节数组的长度
#define ZIP_STR_14B (1 << 6)
//5字节,10000000开头,后32位表示字节数组的长度
#define ZIP_STR_32B (2 << 6)
//整数的编码是1字节,前两位都是11,后六位表示int类型
//11000000,int16_t,后面的content是16位
#define ZIP_INT_16B (0xc0 | 0<<4)
//11010000,int32_t,后面的content是32位
#define ZIP_INT_32B (0xc0 | 1<<4)
//11100000,int64_t,后面的content是64位
#define ZIP_INT_64B (0xc0 | 2<<4)

/* Macro's to determine type */
//判断节点存的是不是字符串,特征是编码的第一字节小于0xc0(11000000)
#define ZIP_IS_STR(enc) (((enc) & 0xc0) < 0xc0)
//判断节点存的是不是整数,!ZIP_IS_STR(enc)说明是11开头,(enc) & 0x30) < 0x30说明不是1111开头,因为支持的三种整数编码没有1111开头的
#define ZIP_IS_INT(enc) (!ZIP_IS_STR(enc) && ((enc) & 0x30) < 0x30)

/* Utility macros */
//zl就是ziplist,本质其实就是一个字符串
//返回zlbytes字段的指针,在zl的1~4字节,(uint32_t*)表示取zl前4个字节
#define ZIPLIST_BYTES(zl) (*((uint32_t*)(zl)))
//返回zltail字段的指针,在zl的5~8字节
#define ZIPLIST_TAIL_OFFSET(zl) (*((uint32_t*)((zl)+sizeof(uint32_t))))
//返回zllen字段的指针,在zl的9~10字节
#define ZIPLIST_LENGTH(zl) (*((uint16_t*)((zl)+sizeof(uint32_t)*2)))
//返回zl的header的长度,就是zlbytes、zltail、zllen三个字段的总长
#define ZIPLIST_HEADER_SIZE (sizeof(uint32_t)*2+sizeof(uint16_t))
//返回第一个节点的指针
#define ZIPLIST_ENTRY_HEAD(zl) ((zl)+ZIPLIST_HEADER_SIZE)
//返回最后一个节点的指针
#define ZIPLIST_ENTRY_TAIL(zl) ((zl)+ZIPLIST_TAIL_OFFSET(zl))
//返回zlend字段的指针
#define ZIPLIST_ENTRY_END(zl) ((zl)+ZIPLIST_BYTES(zl)-1)

/* We know a positive increment can only be 1 because entries can only be
* pushed one at a time. */
//增加ziplist的节点数,就是修改zllen字段的值
#define ZIPLIST_INCR_LENGTH(zl,incr) { \
if (ZIPLIST_LENGTH(zl) < UINT16_MAX) ZIPLIST_LENGTH(zl)+=incr; }

//ziplist的节点
//entry是ziplist字符串的一个子串
//zlentry结构体用于描述entry的各个属性的一种结构
typedef struct zlentry {
//prevrawlen是前一个entry子串content字段的长度(字节数)
//prevrawlensize是当前entry中表示prevrawlen所需的字节数
//这两个属性对应entry的previous_entry_length字段
unsigned int prevrawlensize, prevrawlen;

//len是当前entry子串content字段的长度(字节数)
//lensize是表示len所需的字节数
//这两个属性对应entry的、encoding字段
unsigned int lensize, len;

//当前entry子串中header的字节数,headersize=prevrawlensize+lensize
unsigned int headersize;
//当前节点值所使用的编码类型
unsigned char encoding;
//指向当前节点表示的entry子串的指针
unsigned char *p;
} zlentry;

/* Return the encoding pointer to by 'p'. */
//返回p指向的entry的编码类型
//该函数用于计算zlentry结构体中的encoding属性,因为encoding属性的char类型,是1个字节,所以函数返回的也应该是char类型。entry的编码类型确实只需要一个字节就能表示六种编码:ZIP_STR_06B是0x00,ZIP_STR_14B是0x40,ZIP_STR_32B是0x80,ZIP_INT_16B是0xc0,ZIP_INT_32B是0xd0,ZIP_INT_64B是0xe0
//因为编码类型要从encoding字段中解析,所以p不应该指向entry的头部,而是要向后偏移prevrawlensize个字节,指向entry的encoding字段
static unsigned int zipEntryEncoding(unsigned char *p) {
/* String encoding: 2 MSBs */
//如果是字符串类型,b就是0x00、0x40或0x80
unsigned char b = p[0] & 0xc0;
if (b < 0xc0) {
return b;
//如果是整数类型,p[0] & 0xf0的结果就是0xc0、0xd0或0xe0
} else {
/* Integer encoding: 4 MSBs */
return p[0] & 0xf0;
}
//不是字符串也不是整数,强制让assert为false,向stderr报错
assert(NULL);
return 0;
}

/* Return bytes needed to store integer encoded by 'encoding' */
//用于返回整数节点的len属性,也就是content字段存储的整数的字节数,三种int分别是2字节、4字节和8字节
//一个字符串的长度是不固定的,但是一个整数的长度是固定的,所以不需要计算,根据int类型就能知道整数占多少字节
static unsigned int zipIntSize(unsigned char encoding) {
switch(encoding) {
case ZIP_INT_16B: return sizeof(int16_t);
case ZIP_INT_32B: return sizeof(int32_t);
case ZIP_INT_64B: return sizeof(int64_t);
}
//传入的编码不支持,向stderr报错
assert(NULL);
return 0;
}

/* Decode the encoded length pointed by 'p'. If a pointer to 'lensize' is
* provided, it is set to the number of bytes required to encode the length. */
//用于解析entry的encoding字段,得到zlentry结构体的len和lensize属性
//p指向entry子串的encoding字段,len是content字段的字节数,lensize是表示len需要的字节数,
static unsigned int zipDecodeLength(unsigned char *p, unsigned int *lensize) {
//获取p的编码类型
unsigned char encoding = zipEntryEncoding(p);
unsigned int len = 0;
//如果是字符串
if (ZIP_IS_STR(encoding)) {
switch(encoding) {
//00开头的1字节编码
case ZIP_STR_06B:
//后6位是字符串长度,0x3f表示把p[0]的最高两位置0,len就是后6位表示的整数值
len = p[0] & 0x3f;
//6bit的整数需要1个字节表示
if (lensize) *lensize = 1;
break;
//01开头的2字节编码
case ZIP_STR_14B:
//后14位是字符串长度,0x3f表示把p[0]的最高两位置0,len就是后14位表示的整数值
len = ((p[0] & 0x3f) << 8) | p[1];
//14bit的整数需要2个字节表示
if (lensize) *lensize = 2;
break;
//10000000开头的5字节编码
case ZIP_STR_32B:
//后32位是字符串长度,用不到第一个字节,所以就不用考虑高位置0,len就是后4个字节表示的整数值
len = (p[1] << 24) | (p[2] << 16) | (p[3] << 8) | p[4];
//4个字节的无符号整数,为什么需要5个字节表示呢?
if (lensize) *lensize = 5;
break;
//都不是就报错
default:
assert(NULL);
}
//如果是整数类型
} else {
//整数的长度只能是2字节、4字节或8字节
len = zipIntSize(encoding);
//2、4和8只需要1个字节表示
if (lensize) *lensize = 1;
}
return len;
}

/* Encode the length 'l' writing it in 'p'. If p is NULL it just returns
* the amount of bytes required to encode such a length. */
//rawlen是某个entry的content字段的长度(字节数),返回的是表示rawlen需要的字节数
//p指针指向entry的encoding字段,如果p!=NULL,就把算出来的encoding字段写到p地址上
//传进来的encoding是zlentry的属性,只有1个字节,要算出的是entry字符串中的encoding字段,可能是1字节、2字节或5字节
static unsigned int zipEncodeLength(unsigned char *p, unsigned char encoding, unsigned int rawlen) {
//len是表示rawlen需要的字节数,默认是1
//buf存的是encoding字段的值
unsigned char len = 1, buf[5];
//如果是字符串
if (ZIP_IS_STR(encoding)) {
/* Although encoding is given it may not be set for strings,
* so we determine it here using the raw length. */
//如果rawlen不大于6bit整数的上限
if (rawlen <= 0x3f) {
//需要1个字节表示
if (!p) return len;
//ZIP_STR_06B后6位是0,相当于给rawlen加上00的前缀,变成encoding字段的值
buf[0] = ZIP_STR_06B | rawlen;
//如果rawlen不大于14bit整数的上限
} else if (rawlen <= 0x3fff) {
//需要2个字节表示
len += 1;
if (!p) return len;
//ZIP_STR_14B后14位是0,相当于先对rawlen最高两位置0,再给rawlen加上01的前缀,变成encoding字段的值
//为什么要置0呢?不大于0x3fff已经能确定最高两位是00了呀?
buf[0] = ZIP_STR_14B | ((rawlen >> 8) & 0x3f);
buf[1] = rawlen & 0xff;
//否则就是需要5个字节表示
} else {
len += 4;
if (!p) return len;
//buf就是ZIP_STR_32B前缀加上rawlen的后4个字节
buf[0] = ZIP_STR_32B;
buf[1] = (rawlen >> 24) & 0xff;
buf[2] = (rawlen >> 16) & 0xff;
buf[3] = (rawlen >> 8) & 0xff;
buf[4] = rawlen & 0xff;
}
//如果是整数编码
} else {
/* Implies integer encoding, so length is always 1. */
//默认的一个字节就足够
if (!p) return len;
//对于整数,zlentry的encoding属性和entry的encoding字段是相同的,因为只表示int类型不表示长度
//而对于字符串,即使都是一个字节,两个encoding的值也不相同,因为zlentry的encoding属性后6位是0,而entry的encoding字段后6位表示字符串长度
buf[0] = encoding;
}

/* Store this length at p */
//把buf的前len个字节写到p地址
memcpy(p,buf,len);
return len;
}

/* Decode the length of the previous element stored at "p". */
//返回前一个entry的content长度
//与上面的p不同,这里的p指向entry头部
static unsigned int zipPrevDecodeLength(unsigned char *p, unsigned int *lensize) {
unsigned int len = *p;
//第一个字节小于254,说明前一个entry的content小于254个字节
if (len < ZIP_BIGLEN) {
//需要1个字节就能表示
if (lensize) *lensize = 1;
//第一个字节等于254,说明前一个entry的content大于等于254
} else {
//需要多1个字节,因为第一个字节默认是0xfe的标志
if (lensize) *lensize = 1+sizeof(len);
//跳过第一个字节,读取长度值并赋给len
memcpy(&len,p+1,sizeof(len));
}
return len;
}

/* Encode the length of the previous entry and write it to "p". Return the
* number of bytes needed to encode this length if "p" is NULL. */
//返回表示len需要的字节数
//和zipPrevDecodeLength逻辑正好相反
static unsigned int zipPrevEncodeLength(unsigned char *p, unsigned int len) {
if (p == NULL) {
return (len < ZIP_BIGLEN) ? 1 : sizeof(len)+1;
} else {
if (len < ZIP_BIGLEN) {
p[0] = len;
return 1;
} else {
p[0] = ZIP_BIGLEN;
//跳过第一个字节,把长度值从第二个字节开始写入previous_entry_length字段
memcpy(p+1,&len,sizeof(len));
return 1+sizeof(len);
}
}
}

/* Encode the length of the previous entry and write it to "p". This only
* uses the larger encoding (required in __ziplistCascadeUpdate). */
//强制将上一个entry的content长度以0xfe开头5字节形式写入当前entry的previous_entry_length字段
static void zipPrevEncodeLengthForceLarge(unsigned char *p, unsigned int len) {
if (p == NULL) return;
p[0] = ZIP_BIGLEN;
memcpy(p+1,&len,sizeof(len));
}

/* Return the difference in number of bytes needed to store the new length
* "len" on the entry pointed to by "p". */
//计算表示len需要的字节数与上个entry的previous_entry_length字段的字节数之差
static int zipPrevLenByteDiff(unsigned char *p, unsigned int len) {
unsigned int prevlensize;
//prevlensize记录上个entry的previous_entry_length字段的字节数
zipPrevDecodeLength(p,&prevlensize);
return zipPrevEncodeLength(NULL,len)-prevlensize;
}

/* Check if string pointed to by 'entry' can be encoded as an integer.
* Stores the integer value in 'v' and its encoding in 'encoding'. */
//判断entrylen个字节的字符串类型的content能否转换为数值,如果可以返回1,否则返回0,并将拿到的数字赋值给*v, 将encoding字段赋值给 *encoding
static int zipTryEncoding(unsigned char *entry, unsigned int entrylen, long long *v, unsigned char *encoding) {
long long value;
char *eptr;
char buf[32];

//最高支持32字节的有符号整数
if (entrylen >= 32 || entrylen == 0) return 0;
//'-'开头表示是负数
if (entry[0] == '-' || (entry[0] >= '0' && entry[0] <= '9')) {
int slen;

/* Perform a back-and-forth conversion to make sure that
* the string turned into an integer is not losing any info. */
//把字符串全部复制到buf数组
memcpy(buf,entry,entrylen);
//字符串的结束标记
buf[entrylen] = '\0';
//把buf转换成10进制的long long int类型整数,eptr返回可转换的子串的下一个字符
value = strtoll(buf,&eptr,10);
//可转换的子串下一个字符不是结束符,说明从eptr位置开始不能转换成整数,退出函数
if (eptr[0] != '\0') return 0;
//把value再转换成最长32字节的字符串
slen = ll2string(buf,32,value);
//转成数值再转回来,如果长度不相等,或者buf和entry不相等,说明出了问题
//为什么会出问题?
if (entrylen != (unsigned)slen || memcmp(buf,entry,slen)) return 0;

/* Great, the string can be encoded. Check what's the smallest
* of our encoding types that can hold this value. */
//根据value选择合适的编码
if (value >= INT16_MIN && value <= INT16_MAX) {
*encoding = ZIP_INT_16B;
} else if (value >= INT32_MIN && value <= INT32_MAX) {
*encoding = ZIP_INT_32B;
} else {
*encoding = ZIP_INT_64B;
}
*v = value;
//成功就返回1
return 1;
}
return 0;
}

/* Store integer 'value' at 'p', encoded as 'encoding' */
//指定编码类型,在p地址写入整数value,p指向entry的content字段
//参数的value是8字节的,根据encoding判断前多少个字节是有效的
static void zipSaveInteger(unsigned char *p, int64_t value, unsigned char encoding) {
int16_t i16;
int32_t i32;
int64_t i64;
if (encoding == ZIP_INT_16B) {
i16 = value;
memcpy(p,&i16,sizeof(i16));
} else if (encoding == ZIP_INT_32B) {
i32 = value;
memcpy(p,&i32,sizeof(i32));
} else if (encoding == ZIP_INT_64B) {
i64 = value;
memcpy(p,&i64,sizeof(i64));
} else {
assert(NULL);
}
}

/* Read integer encoded as 'encoding' from 'p' */
//指定编码类型,从p地址取出存储的整数,p指向entry的content字段
static int64_t zipLoadInteger(unsigned char *p, unsigned char encoding) {
int16_t i16;
int32_t i32;
int64_t i64, ret = 0;
if (encoding == ZIP_INT_16B) {
memcpy(&i16,p,sizeof(i16));
ret = i16;
} else if (encoding == ZIP_INT_32B) {
memcpy(&i32,p,sizeof(i32));
ret = i32;
} else if (encoding == ZIP_INT_64B) {
memcpy(&i64,p,sizeof(i64));
ret = i64;
} else {
assert(NULL);
}
return ret;
}

/* Return a struct with all information about an entry. */
//解析指针p指向的entry字符串,生成zlentry结构
static zlentry zipEntry(unsigned char *p) {
zlentry e;
//p指向entry头部,解析previous_entry_length字段
//得到上个entry的content字段的字节数prevrawlen,以及表示prevrawlen所需的字节数prevrawlensize
e.prevrawlen = zipPrevDecodeLength(p,&e.prevrawlensize);
//p指向entry的encoding字段,解析encoding字段
//得到当前entry的content字段的字节数prevrawlen,以及表示len所需的字节数lensize
e.len = zipDecodeLength(p+e.prevrawlensize,&e.lensize);
//计算headersize,也就是header部分(content之前)的字节数
e.headersize = e.prevrawlensize+e.lensize;
//获取当前节点的编码类型
e.encoding = zipEntryEncoding(p+e.prevrawlensize);
//指针指向entry头部
e.p = p;
return e;
}

/* Return the total number of bytes used by the entry at "p". */
//返回p指向的entry的总字节数
static unsigned int zipRawEntryLength(unsigned char *p) {
//解析成zlentry结构
zlentry e = zipEntry(p);
//总字节数就是header字节数加content字节数
return e.headersize + e.len;
}

/* Create a new empty ziplist. */
//创建空的ziplist
unsigned char *ziplistNew(void) {
//ziplist的header固定10个字节,再加上末尾的zlend是1个字节
unsigned int bytes = ZIPLIST_HEADER_SIZE+1;
unsigned char *zl = zmalloc(bytes);
//zlbytes字段记录整个ziplist的总字节数,也就是bytes
ZIPLIST_BYTES(zl) = bytes;
//zltail字段记录表尾节点到表头的距离,因为新列表还没有节点,所以尾节点默认在header后面
ZIPLIST_TAIL_OFFSET(zl) = ZIPLIST_HEADER_SIZE;
//zllen字段记录entry的个数
ZIPLIST_LENGTH(zl) = 0;
//最后一个字节是0xff
zl[bytes-1] = ZIP_END;
return zl;
}

/* Resize the ziplist. */
//重新设置ziplist的大小,参数len是整个ziplist的字节数
static unsigned char *ziplistResize(unsigned char *zl, unsigned int len) {
zl = zrealloc(zl,len);
ZIPLIST_BYTES(zl) = len;
zl[len-1] = ZIP_END;
return zl;
}

/* When an entry is inserted, we need to set the prevlen field of the next
* entry to equal the length of the inserted entry. It can occur that this
* length cannot be encoded in 1 byte and the next entry needs to be grow
* a bit larger to hold the 5-byte encoded prevlen. This can be done for free,
* because this only happens when an entry is already being inserted (which
* causes a realloc and memmove). However, encoding the prevlen may require
* that this entry is grown as well. This effect may cascade throughout
* the ziplist when there are consecutive entries with a size close to
* ZIP_BIGLEN, so we need to check that the prevlen can be encoded in every
* consecutive entry.
*
* Note that this effect can also happen in reverse, where the bytes required
* to encode the prevlen field can shrink. This effect is deliberately ignored,
* because it can cause a "flapping" effect where a chain prevlen fields is
* first grown and then shrunk again after consecutive inserts. Rather, the
* field is allowed to stay larger than necessary, because a large prevlen
* field implies the ziplist is holding large entries anyway.
*
* The pointer "p" points to the first entry that does NOT need to be
* updated, i.e. consecutive fields MAY need an update. */
//连锁更新函数
//当在ziplist中插入新entry后,如果前一个entry长度大于254个字节,而新entry的previous_entry_length字段只有一个字节,就需要对新entry扩容,同时如果新entry长度大于254个字节,而后一个entry的previous_entry_length字段只有一个字节,就需要对后一个entry扩容。所以在插入新entry后,可能会对包括新entry在内的之后连续的几个entry进行扩容,这个过程叫做连锁更新。
static unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p) {
//curlen是当前ziplist的总字节数
unsigned int curlen = ZIPLIST_BYTES(zl), rawlen, rawlensize;
unsigned int offset, noffset, extra;
unsigned char *np;
zlentry cur, next;
//从p指针指向的entry开始检查,每次遍历到p指向的entry时,实际要修改的是下个entry的字段
while (p[0] != ZIP_END) {
//生成当前entry的zlentry结构
cur = zipEntry(p);
//rawlen是当前entry的总字节数
rawlen = cur.headersize + cur.len;
//rawlensize是表示rawlen需要的字节数
rawlensize = zipPrevEncodeLength(NULL,rawlen);

/* Abort if there is no next entry. */
//后面没有entry了,所以就没有待修改的entry了,跳出循环
if (p[rawlen] == ZIP_END) break;
//生成下个entry的zlentry结构
next = zipEntry(p+rawlen);

/* Abort when "prevlen" has not changed. */
//如果下个entry的prevrawlen属性等于当前entry的字节数,说明后面所有entry的previous_entry_length都不用改了,退出循环
if (next.prevrawlen == rawlen) break;

//如果下个entry的previous_entry_length的字节数不足以表示当前entry的长度,就要对ziplist扩容
if (next.prevrawlensize < rawlensize) {
/* The "prevlen" field of "next" needs more bytes to hold
* the raw length of "cur". */
//offset是ziplist中当前entry之前的字节数,也就是当前entry头部到ziplist头部的字节数
offset = p-zl;
//需要增加的字节数量
extra = rawlensize-next.prevrawlensize;
//把ziplist的字节数设为当前字节数加extra
zl = ziplistResize(zl,curlen+extra);
//ziplist扩容后,当前entry之前的部分没有变化,所以新的ziplist中offset的位置仍然是当前entry
p = zl+offset;

/* Current pointer and offset for next element. */
//下个entry的地址
np = p+rawlen;
//下个entry到ziplist头部的字节数
noffset = np-zl;

/* Update tail offset when next element is not the tail element. */
//zltail是最后一个entry到ziplist头部的字节数,如果下个entry不是最后一个,扩容多出来的几个字节应该算在zltail里
if ((zl+ZIPLIST_TAIL_OFFSET(zl)) != np)
ZIPLIST_TAIL_OFFSET(zl) += extra;

/* Move the tail to the back. */
//增加下个entry的previous_entry_length字段的长度,就是把ziplist中从下个entry的encoding开始的后面的部分全部后移extra个字节。换言之,把旧的encoding字段开始的后半段移动到新的encoding字段开始的地址上。
//np+rawlensize就是下个entry的encoding字段在新ziplist的起始位置,np+next.prevrawlensize就是下个entry的encoding字段在旧ziplist的起始位置,移动的后半段的字节数是curlen-noffset-next.prevrawlensize-1
memmove(np+rawlensize,
np+next.prevrawlensize,
curlen-noffset-next.prevrawlensize-1);
//把新的previous_entry_length字段写入下个entry的头部
zipPrevEncodeLength(np,rawlen);

/* Advance the cursor */
//继续检查下个entry
p += rawlen;
//更新ziplist总字节数
curlen += extra;
//如果下个entry的previous_entry_length的字节数足够表示当前entry的长度
} else {
//如果当前entry的长度只需要1字节表示,但是下个entry的previous_entry_length有5字节,说明容量多了。但此时不缩减ziplist的长度,而是把rawlensize以5字节的记录方式写入下个entry的previous_entry_length
//所以连锁更新的时候ziplist只扩容不收缩,为啥呢?
if (next.prevrawlensize > rawlensize) {
/* This would result in shrinking, which we want to avoid.
* So, set "rawlen" in the available bytes. */
zipPrevEncodeLengthForceLarge(p+rawlen,rawlen);
//如果恰好都是1字节,就直接把rawlen写入下个entry的previous_entry_length
} else {
zipPrevEncodeLength(p+rawlen,rawlen);
}

/* Stop here, as the raw length of "next" has not changed. */
//在这种情况下,下个entry只需要修改字段值,而不需要扩容,所以连锁更新到此为止
break;
}
}
return zl;
}

/* Delete "num" entries, starting at "p". Returns pointer to the ziplist. */
//在zl中,从p指向的entry开始,删除num个entry
static unsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsigned int num) {
unsigned int i, totlen, deleted = 0;
int offset, nextdiff = 0;
zlentry first, tail;

//原始的p指针保存在first中
first = zipEntry(p);
//p指针移动到所有要删除的entry的末尾
for (i = 0; p[0] != ZIP_END && i < num; i++) {
p += zipRawEntryLength(p);
deleted++;
}

//要删除的总字节数就是现在的p地址和原始的p地址的位置差
totlen = p-first.p;
if (totlen > 0) {
//p没有到ziplist末尾,说明没有删除最后一个entry
if (p[0] != ZIP_END) {
/* Tricky: storing the prevlen in this entry might reduce or
* increase the number of bytes needed, compared to the current
* prevlen. Note that we can always store this length because
* it was previously stored by an entry that is being deleted. */
//原始的p地址和现在的p地址之间被删掉了,所以要重新连接的是原来的p指向的entry的上个entry,和现在的p指向的entry。所以要更新的就是现在的p指向的entry的previous_entry_length字段,也就是让现在的p指向的entry的previous_entry_length字段存储first.prevrawlen表示的数值
//如果当前entry的previous_entry_length字段不足以表示first.prevrawlen的值,nextdiff存储的就是额外需要的字节数
nextdiff = zipPrevLenByteDiff(p,first.prevrawlen);
//不需要扩容,额外的字节从p前面找,反正前面的entry是准备删掉的,直接覆盖写入就好,到时候少删掉nextdiff个字节就ok了
zipPrevEncodeLength(p-nextdiff,first.prevrawlen);

/* Update offset for tail */
//zltail要减去删除掉的字节数
ZIPLIST_TAIL_OFFSET(zl) -= totlen;

/* When the tail contains more than one entry, we need to take
* "nextdiff" in account as well. Otherwise, a change in the
* size of prevlen doesn't have an effect on the *tail* offset. */
tail = zipEntry(p);
//长度对不上,是因为上面多删了nextdiff个字节,所以在这里要加回来
if (p[tail.headersize+tail.len] != ZIP_END)
ZIPLIST_TAIL_OFFSET(zl) += nextdiff;

/* Move tail to the front of the ziplist */
//把p-nextdiff开始的内容复制到first.p开始的地址,就相当于把中间的部分删除了
memmove(first.p,p-nextdiff,ZIPLIST_BYTES(zl)-(p-zl)-1+nextdiff);
//p就是ziplist末尾,说明原始的p地址后面所有的entry都删掉了
} else {
/* The entire tail was deleted. No need to move memory. */
//zltail是最后一个entry到ziplist头部的字节数,first.p-zl是最初的p指向的entry到ziplist头部的字节数,但因为这个entry也被删掉了,所以还要移动到上个entry的头部,也就是回退first.prevrawlen个字节
ZIPLIST_TAIL_OFFSET(zl) = (first.p-zl)-first.prevrawlen;
}

/* Resize and update length */
//记录第一个删除的节点的头地址
offset = first.p-zl;
//减少ziplist的总字节数
zl = ziplistResize(zl, ZIPLIST_BYTES(zl)-totlen+nextdiff);
//修改zllen字段的值,zllen表示ziplist中entry的数量,所以减去deleted
ZIPLIST_INCR_LENGTH(zl,-deleted);
//调整完ziplist的长度后,定位第一个删除的节点的头地址
p = zl+offset;

/* When nextdiff != 0, the raw length of the next entry has changed, so
* we need to cascade the update throughout the ziplist */
//nextdiff不等于0,说明p指向的entry的previous_entry_length字段被延长了,也就是这个entry的总长度变了,所以需要从这个entry开始连锁更新,但是是从下个entry的previous_entry_length字段开始更新,p指向的entry不变
if (nextdiff != 0)
zl = __ziplistCascadeUpdate(zl,p);
}
return zl;
}

/* Insert item at "p". */
//在zl的位置p插入一个新entry,s是被插入的entry,slen是s的content字段的字节数
static unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) {
//curlen是当前ziplist的总字节数
unsigned int curlen = ZIPLIST_BYTES(zl), reqlen, prevlen = 0;
unsigned int offset, nextdiff = 0;
unsigned char encoding = 0;
long long value;
zlentry entry, tail;

/* Find out prevlen for the entry that is inserted. */
//p是插入entry的位置,所以当前p指向的entry就是新entry的后一个entry,所以p指向的entry的previous_entry_length字段应该存在新entry里
//如果p没有指向ZIP_END,说明新entry的后面还有entry,也就是p指向的entry
if (p[0] != ZIP_END) {
//获取p指向的entry的prevrawlen属性
entry = zipEntry(p);
prevlen = entry.prevrawlen;
//如果p指向ZIP_END,说明新entry的后面没有entry了,所以新entry的prevrawlen属性就不能从后面的entry头部找,而是直接解析前一个entry,也就是zltail指向的entry
} else {
unsigned char *ptail = ZIPLIST_ENTRY_TAIL(zl);
if (ptail[0] != ZIP_END) {
prevlen = zipRawEntryLength(ptail);
}
}

/* See if the entry can be encoded */
//在ziplist中,整型值被存储为实际的整型值而不是字符串。所以插入新entry时要检查content字段是否可以当做整数存储,如果可以转成整数,就不用存成字符串了,这样可以节省空间
if (zipTryEncoding(s,slen,&value,&encoding)) {
/* 'encoding' is set to the appropriate integer encoding */
//如果能转成整数,reqlen就是该整数类型需要的字节数
reqlen = zipIntSize(encoding);
} else {
/* 'encoding' is untouched, however zipEncodeLength will use the
* string length to figure out how to encode it. */
//否则,reqlen就是新entry的content字段的字节数
reqlen = slen;
}
/* We need space for both the length of the previous entry and
* the length of the payload. */
//reqlen再加上新entry的previous_entry_length字段所需的字节数
reqlen += zipPrevEncodeLength(NULL,prevlen);
//reqlen再加上表示新entry需要的字节数,也就是新entry的encoding字段的字节数
reqlen += zipEncodeLength(NULL,encoding,slen);
//三个字段都算上了,现在的reqlen就是新entry的总字节数

/* When the insert position is not equal to the tail, we need to
* make sure that the next entry can hold this entry's length in
* its prevlen field. */
//如果新entry后面还有entry,需要考虑后面entry的previous_entry_length字段是否足够表示新entry的长度
//nextdiff就是后面的entry的previous_entry_length字段需要延长的字节数
nextdiff = (p[0] != ZIP_END) ? zipPrevLenByteDiff(p,reqlen) : 0;

/* Store offset because a realloc may change the address of zl. */
//因为是在p地址后面插入,所以要保存p地址到ziplist头部的距离
offset = p-zl;
//给ziplist扩容,增加的长度是新entry所需的字节数加上后面entry需要延长的字节数
zl = ziplistResize(zl,curlen+reqlen+nextdiff);
//在新的ziplist中重定位p的位置
p = zl+offset;

/* Apply memory move when necessary and update tail offset. */
if (p[0] != ZIP_END) {
/* Subtract one because of the ZIP_END bytes */
//p-nextdiff是后面的entry的头地址,把这之后的所有内容后移reqlen个字节,给新entry腾出位置
memmove(p+reqlen,p-nextdiff,curlen-offset-1+nextdiff);

/* Encode this entry's raw length in the next entry. */
//更新后面entry的previous_entry_length,来表示新entry的总字节数reqlen
zipPrevEncodeLength(p+reqlen,reqlen);

/* Update offset for tail */
//zltail的值要增加
ZIPLIST_TAIL_OFFSET(zl) += reqlen;

/* When the tail contains more than one entry, we need to take
* "nextdiff" in account as well. Otherwise, a change in the
* size of prevlen doesn't have an effect on the *tail* offset. */
tail = zipEntry(p+reqlen);
//如果后面的entry头部被延长了,就需要给zltail补回nextdiff
if (p[reqlen+tail.headersize+tail.len] != ZIP_END)
ZIPLIST_TAIL_OFFSET(zl) += nextdiff;
} else {
/* This element will be the new tail. */
//如果新entry就是最后一个entry,zltail就是新entry头地址到ziplist头地址的距离
ZIPLIST_TAIL_OFFSET(zl) = p-zl;
}

/* When nextdiff != 0, the raw length of the next entry has changed, so
* we need to cascade the update throughout the ziplist */
//如果后面的entry被延长了,需要连锁更新
if (nextdiff != 0) {
offset = p-zl;
zl = __ziplistCascadeUpdate(zl,p+reqlen);
p = zl+offset;
}

/* Write the entry */
//填写新entry各个字段的值
p += zipPrevEncodeLength(p,prevlen);
p += zipEncodeLength(p,encoding,slen);
if (ZIP_IS_STR(encoding)) {
memcpy(p,s,slen);
} else {
zipSaveInteger(p,value,encoding);
}
//zllen表示entry总数,插入新entry后要加一
ZIPLIST_INCR_LENGTH(zl,1);
return zl;
}

//向zl中插入s,插入位置可以选择是头部或尾部
unsigned char *ziplistPush(unsigned char *zl, unsigned char *s, unsigned int slen, int where) {
unsigned char *p;
p = (where == ZIPLIST_HEAD) ? ZIPLIST_ENTRY_HEAD(zl) : ZIPLIST_ENTRY_END(zl);
return __ziplistInsert(zl,p,s,slen);
}

/* Returns an offset to use for iterating with ziplistNext. When the given
* index is negative, the list is traversed back to front. When the list
* doesn't contain an element at the provided index, NULL is returned. */
//返回zl中第index个entry的头地址
unsigned char *ziplistIndex(unsigned char *zl, int index) {
unsigned char *p;
zlentry entry;
//支持负索引
if (index < 0) {
index = (-index)-1;
//负索引是从最后一个entry向前数
p = ZIPLIST_ENTRY_TAIL(zl);
if (p[0] != ZIP_END) {
entry = zipEntry(p);
//每跳过一个entry,指针p就减去这个entry的长度,相当于退到前一个entry
while (entry.prevrawlen > 0 && index--) {
p -= entry.prevrawlen;
entry = zipEntry(p);
}
}
} else {
//从第一个entry开始,每跳过一个entry,指针p就加上这个entry的长度,最后p指向的就是第index个entry
p = ZIPLIST_ENTRY_HEAD(zl);
while (p[0] != ZIP_END && index--) {
p += zipRawEntryLength(p);
}
}
//如果索引越界,就返回NULL
return (p[0] == ZIP_END || index > 0) ? NULL : p;
}

/* Return pointer to next entry in ziplist.
*
* zl is the pointer to the ziplist
* p is the pointer to the current element
*
* The element after 'p' is returned, otherwise NULL if we are at the end. */
//返回p指向的entry的下一个entry的头地址
unsigned char *ziplistNext(unsigned char *zl, unsigned char *p) {
((void) zl);

/* "p" could be equal to ZIP_END, caused by ziplistDelete,
* and we should return NULL. Otherwise, we should return NULL
* when the *next* element is ZIP_END (there is no next entry). */
//如果p是ZIP_END,后面就没有entry了
if (p[0] == ZIP_END) {
return NULL;
} else {
//其实就是把p后移,后移长度是p指向的entry的长度
p = p+zipRawEntryLength(p);
return (p[0] == ZIP_END) ? NULL : p;
}
}

/* Return pointer to previous entry in ziplist. */
//返回p指向的entry的前一个entry的头地址
unsigned char *ziplistPrev(unsigned char *zl, unsigned char *p) {
zlentry entry;

/* Iterating backwards from ZIP_END should return the tail. When "p" is
* equal to the first element of the list, we're already at the head,
* and should return NULL. */
//如果p是ZIP_END,上一个entry就是最后一个entry
if (p[0] == ZIP_END) {
p = ZIPLIST_ENTRY_TAIL(zl);
//如果最后一个entry也是ZIP_END,说明zl是空的
return (p[0] == ZIP_END) ? NULL : p;
//如果p指向的就是第一个entry,前面就没有entry了
} else if (p == ZIPLIST_ENTRY_HEAD(zl)) {
return NULL;
} else {
//否则就是把p前移,前移长度是p指向的entry的长度
//这时就体现出previous_entry_length字段的作用了,之所以要记录前一个entry的长度,就是为了从后向前遍历
entry = zipEntry(p);
assert(entry.prevrawlen > 0);
return p-entry.prevrawlen;
}
}

/* Get entry pointer to by 'p' and store in either 'e' or 'v' depending
* on the encoding of the entry. 'e' is always set to NULL to be able
* to find out whether the string pointer or the integer value was set.
* Return 0 if 'p' points to the end of the zipmap, 1 otherwise. */
//读取p指向的entry的content字段,如果是字符串就赋给sstr,同时字符串长度赋给slen,如果是整数就赋给sval
unsigned int ziplistGet(unsigned char *p, unsigned char **sstr, unsigned int *slen, long long *sval) {
zlentry entry;
//如果p指向的不是entry,就返回0,表示读取失败
if (p == NULL || p[0] == ZIP_END) return 0;
if (sstr) *sstr = NULL;

entry = zipEntry(p);
//如果content的类型的字符串
if (ZIP_IS_STR(entry.encoding)) {
//判断一下,防止传进来的sstr指针本身就是NULL
if (sstr) {
*slen = entry.len;
*sstr = p+entry.headersize;
}
} else {
if (sval) {
*sval = zipLoadInteger(p+entry.headersize,entry.encoding);
}
}
return 1;
}

/* Insert an entry at "p". */
//毫无意义的函数,和__ziplistInsert参数一样,只是对__ziplistInsert的调用
unsigned char *ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) {
return __ziplistInsert(zl,p,s,slen);
}

/* Delete a single entry from the ziplist, pointed to by *p.
* Also update *p in place, to be able to iterate over the
* ziplist, while deleting entries. */
//在zl中删除p指向的entry,再把下个entry的地址赋给p
unsigned char *ziplistDelete(unsigned char *zl, unsigned char **p) {
unsigned int offset = *p-zl;
zl = __ziplistDelete(zl,*p,1);

/* Store pointer to current element in p, because ziplistDelete will
* do a realloc which might result in a different "zl"-pointer.
* When the delete direction is back to front, we might delete the last
* entry and end up with "p" pointing to ZIP_END, so check this. */
*p = zl+offset;
return zl;
}

/* Delete a range of entries from the ziplist. */
//从第index个entry开始,删除num个entry
unsigned char *ziplistDeleteRange(unsigned char *zl, unsigned int index, unsigned int num) {
unsigned char *p = ziplistIndex(zl,index);
//把p==NULL的判断放在__ziplistDelete里,就不必写成两个函数了
return (p == NULL) ? zl : __ziplistDelete(zl,p,num);
}

/* Compare entry pointer to by 'p' with 'entry'. Return 1 if equal. */
//把p指向的entry的content字段,和长度为slen的字符串sstr作比较,相同返回1,不相同返回0
unsigned int ziplistCompare(unsigned char *p, unsigned char *sstr, unsigned int slen) {
zlentry entry;
unsigned char sencoding;
long long zval, sval;
//如果p指向的不是entry,就退出
if (p[0] == ZIP_END) return 0;

entry = zipEntry(p);
//如果content的内容是字符串
if (ZIP_IS_STR(entry.encoding)) {
/* Raw compare */
//用memcmp比较字符串
if (entry.len == slen) {
return memcmp(p+entry.headersize,sstr,slen) == 0;
} else {
return 0;
}
//如果content的内容是整数
} else {
/* Try to compare encoded values */
//先把字符串表示的整数转换成int类型
if (zipTryEncoding(sstr,slen,&sval,&sencoding)) {
//encoding相不相等其实无所谓吧,encoding不相等的话值肯定不会相等的,存疑?
if (entry.encoding == sencoding) {
zval = zipLoadInteger(p+entry.headersize,entry.encoding);
return zval == sval;
}
}
}
return 0;
}

/* Return length of ziplist. */
//返回zllen字段的值,也就是ziplist中存储的entry的数量
unsigned int ziplistLen(unsigned char *zl) {
unsigned int len = 0;
//因为zllen的类型是uint16_t,所以如果zllen小于UINT16_MAX,说明表示的值是有效的
if (ZIPLIST_LENGTH(zl) < UINT16_MAX) {
len = ZIPLIST_LENGTH(zl);
//如果zllen等于UINT16_MAX,说明溢出了,需要遍历ziplist数entry的个数,数量存在len变量里
//len是uint32_t,所以ziplist支持的最大entry数量是uint32_t的上界
} else {
//p定位在第一个entry的头地址
unsigned char *p = zl+ZIPLIST_HEADER_SIZE;
while (*p != ZIP_END) {
p += zipRawEntryLength(p);
len++;
}

/* Re-store length if small enough */
if (len < UINT16_MAX) ZIPLIST_LENGTH(zl) = len;
}
return len;
}

/* Return size in bytes of ziplist. */
//返回zlbytes字段的值,也就是ziplist的总字节数
unsigned int ziplistSize(unsigned char *zl) {
return ZIPLIST_BYTES(zl);
}

//打印ziplist的结构,debug用
void ziplistRepr(unsigned char *zl) {
unsigned char *p;
int index = 0;
zlentry entry;

printf(
"{total bytes %d} "
"{length %u}\n"
"{tail offset %u}\n",
ZIPLIST_BYTES(zl),
ZIPLIST_LENGTH(zl),
ZIPLIST_TAIL_OFFSET(zl));
p = ZIPLIST_ENTRY_HEAD(zl);
while(*p != ZIP_END) {
entry = zipEntry(p);
printf(
"{"
"addr 0x%08lx, "
"index %2d, "
"offset %5ld, "
"rl: %5u, "
"hs %2u, "
"pl: %5u, "
"pls: %2u, "
"payload %5u"
"} ",
(long unsigned)p,
index,
(unsigned long) (p-zl),
entry.headersize+entry.len,
entry.headersize,
entry.prevrawlen,
entry.prevrawlensize,
entry.len);
p += entry.headersize;
if (ZIP_IS_STR(entry.encoding)) {
if (entry.len > 40) {
if (fwrite(p,40,1,stdout) == 0) perror("fwrite");
printf("...");
} else {
if (entry.len &&
fwrite(p,entry.len,1,stdout) == 0) perror("fwrite");
}
} else {
printf("%lld", (long long) zipLoadInteger(p,entry.encoding));
}
printf("\n");
p += entry.len;
index++;
}
printf("{end}\n\n");
}

3.3 压缩字典

zipmap.c

// String -> String Map data structure optimized for size.
//和ziplist类似,zipmap是为节省内存而设计的一种存储String->String数据的字符串形式的字典

/* Memory layout of a zipmap, for the map "foo" => "bar", "hello" => "world":
*
* <zmlen><len>"foo"<len><free>"bar"<len>"hello"<len><free>"world"
*
* zmlen字段存储的是zipmap中的key-value对的数量,只有1字节,当zipmap长度超过254时,zmlen的值就失效了,需要遍历zipmap来得到真实的key-value数量。
*
* 每个len字段存储的是它后面的数据的字节数,数据可能是key也可能是value,len字段长度是1字节或5字节。当第一个字节值小于254时,表示len字段只有1个字节,该字节的值就是数据的长度。当第一个字节值等于254时,表示len字段有5个字节,后面4个字节的值才是数据的长度。当第一个字节值等于255时,表示到了zipmap的末尾。
*
* 因为每个字符串前面都有len字段,所以执行查询操作时,不需要逐字节比较,直接取len长度的字符串用memcmp比较一次即可。
*
* free字段存储的是可用的空闲字节数,如果把某个value字符串替换成更短的字符串,后面就会留出多余的字节。空闲字节数只需要1个字节表示,如果空出来的字节数过多,zipmap会被realloc
*
* The most compact representation of the above two elements hash is actually:
*
* "\x02\x03foo\x03\x00bar\x05hello\x05\x00world\xff"
*/

#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "zmalloc.h"

//254是len字段的分界值
#define ZIPMAP_BIGLEN 254
//255是zipmap尾部的标记
#define ZIPMAP_END 255

/* The following defines the max value for the <free> field described in the
* comments above, that is, the max number of trailing bytes in a value. */
//zipmap允许的最大free字节数3(4是不可取的上限),所以free字段只需要1个字节表示
#define ZIPMAP_VALUE_MAX_FREE 4

/* The following macro returns the number of bytes needed to encode the length
* for the integer value _l, that is, 1 byte for lengths < ZIPMAP_BIGLEN and
* 5 bytes for all the other lengths. */
//返回表示长度_l的len字段所需的字节数,如果小于254就只需要1个字节,否则需要5个字节
#define ZIPMAP_LEN_BYTES(_l) (((_l) < ZIPMAP_BIGLEN) ? 1 : sizeof(unsigned int)+1)

/* Create a new empty zipmap. */
//创建空的zipmap
unsigned char *zipmapNew(void) {
unsigned char *zm = zmalloc(2);
//没存储key-value时,只有zmlen字段和表示结尾的len字段,zmlen=0表示存储了0个key-value对,len=255是zipmap末尾的标记
zm[0] = 0; /* Length */
zm[1] = ZIPMAP_END;
return zm;
}

/* Decode the encoded length pointed by 'p' */
//获取len字段表示的数值,p指向的是某个len字段
static unsigned int zipmapDecodeLength(unsigned char *p) {
//len是int类型,所以默认取1个字节
unsigned int len = *p;
//第一个字节的值小于254,说明len字段只有1个字节
if (len < ZIPMAP_BIGLEN) return len;
//否则len的值就是p后面的4个字节表示的值
memcpy(&len,p+1,sizeof(unsigned int));
return len;
}

/* Encode the length 'l' writing it in 'p'. If p is NULL it just returns
* the amount of bytes required to encode such a length. */
//返回表示长度len需要的字节数,并把len的数值写入p指向的len字段
static unsigned int zipmapEncodeLength(unsigned char *p, unsigned int len) {
//如果p没有指向len字段,就只获取需要的字节数,不必把len写入p
if (p == NULL) {
return ZIPMAP_LEN_BYTES(len);
} else {
//以ZIPMAP_BIGLEN为分界判断需要1个字节还是5个字节
if (len < ZIPMAP_BIGLEN) {
p[0] = len;
return 1;
} else {
p[0] = ZIPMAP_BIGLEN;
memcpy(p+1,&len,sizeof(len));
return 1+sizeof(len);
}
}
}

/* Search for a matching key, returning a pointer to the entry inside the
* zipmap. Returns NULL if the key is not found.
*
* If NULL is returned, and totlen is not NULL, it is set to the entire
* size of the zimap, so that the calling function will be able to
* reallocate the original zipmap to make room for more entries. */
//在zm中查找长度为klen的key并返回,同时把zm的总字节数存入参数totlen中
//因为这两种操作都需要进行遍历,所以索性写在一个函数里
static unsigned char *zipmapLookupRaw(unsigned char *zm, unsigned char *key, unsigned int klen, unsigned int *totlen) {
//跳过zmlen字段,p指向第一个len字段
unsigned char *p = zm+1, *k = NULL;
unsigned int l,llen;
//p指向的字节的值不是ZIPMAP_END,说明后面还有key-value对
while(*p != ZIPMAP_END) {
unsigned char free;

/* Match or skip the key */
//l是下个key的字节数,llen是len字段的字节数
l = zipmapDecodeLength(p);
llen = zipmapEncodeLength(NULL,l);
//如果参数的key和zipmap中的key相等
if (k == NULL && l == klen && !memcmp(p+llen,key,l)) {
/* Only return when the user doesn't care
* for the total length of the zipmap. */
//totlen != NULL说明需要返回zm的总字节数,即使找到了给定的key也要遍历完整个zipmap
if (totlen != NULL) {
k = p;
//totlen == NULL说明不需要返回zm的总字节数,找到了给定的key就结束
} else {
return p;
}
}
//跳过了<len><key>两个字段,开始解析<len><free><value>三个字段
//一个key-value对包含<len><key><len><free><value>这5个字段
p += llen+l;
/* Skip the value as well */
//因为查找的目标是key,所以value不用比较
//p指针移动的长度包含四部分,len字段,free字段,value字段,以及free字段标示的value尾部的空余字节
l = zipmapDecodeLength(p);
p += zipmapEncodeLength(NULL,l);
free = p[0];
p += l+1+free; /* +1 to skip the free byte */
}
//遍历到ZIPMAP_END后,把zm的总字节数存入totlen
if (totlen != NULL) *totlen = (unsigned int)(p-zm)+1;
return k;
}

//给定key的长度和value的长度,返回存储该key-value对需要的总字节数
//也就是<len><key><len><free><value>的总字节数
static unsigned long zipmapRequiredLength(unsigned int klen, unsigned int vlen) {
unsigned int l;
//两个len字段和free字段默认都是1个字节
l = klen+vlen+3;
//如果长度超过253个字节,len字段需要5个字节
if (klen >= ZIPMAP_BIGLEN) l += 4;
if (vlen >= ZIPMAP_BIGLEN) l += 4;
return l;
}

/* Return the total amount used by a key (encoded length + payload) */
//p指向len字段,返回<len><key>两个字段的字节数
static unsigned int zipmapRawKeyLength(unsigned char *p) {
//l是key的字节数
unsigned int l = zipmapDecodeLength(p);
//zipmapEncodeLength得到的是记录l需要的len字段的字节数
return zipmapEncodeLength(NULL,l) + l;
}

/* Return the total amount used by a value
* (encoded length + single byte free count + payload) */
//p指向len字段,返回<len><free><value>三个字段的字节数,但value尾部的空余字节不被计入
static unsigned int zipmapRawValueLength(unsigned char *p) {
unsigned int l = zipmapDecodeLength(p);
unsigned int used;
//used得到的是value字段的有效长度,而不是全部长度,因为排除了尾部的空余字节
used = zipmapEncodeLength(NULL,l);
used += p[used] + 1 + l;
return used;
}

/* If 'p' points to a key, this function returns the total amount of
* bytes used to store this entry (entry = key + associated value + trailing
* free space if any). */
//p指向len字段,返回一个key-value对,也就是<len><key><len><free><value>5个字段的字节数
//仿照ziplist,这样一个key-value对也叫做一个entry
static unsigned int zipmapRawEntryLength(unsigned char *p) {
unsigned int l = zipmapRawKeyLength(p);
return l + zipmapRawValueLength(p+l);
}

//重新调整zipmap的大小,参数len是zipmap的总字节数
static inline unsigned char *zipmapResize(unsigned char *zm, unsigned int len) {
zm = zrealloc(zm, len);
zm[len-1] = ZIPMAP_END;
return zm;
}

/* Set key to value, creating the key if it does not already exist.
* If 'update' is not NULL, *update is set to 1 if the key was
* already preset, otherwise to 0. */
//把长度为klen的vlen的key-value对插入到zm中。如果key已经存在,就变成更新对应的value,同时把参数update设为1
unsigned char *zipmapSet(unsigned char *zm, unsigned char *key, unsigned int klen, unsigned char *val, unsigned int vlen, int *update) {
unsigned int zmlen, offset;
unsigned int freelen, reqlen = zipmapRequiredLength(klen,vlen);
unsigned int empty, vempty;
unsigned char *p;

freelen = reqlen;
if (update) *update = 0;
//在zm中查找key,同时把zm的总字节数存在变量zmlen中
p = zipmapLookupRaw(zm,key,klen,&zmlen);
//如果key不存在,就是插入新的key-value
if (p == NULL) {
/* Key not found: enlarge */
//先给zm扩容,增加的reqlen是存储key-value需要的字节数
zm = zipmapResize(zm, zmlen+reqlen);
//默认在尾部插入,所以把p指向到倒数第二个字节,因为最后一个字节是0xff的结束符
p = zm+zmlen-1;
//新zipmap的总字节数
zmlen = zmlen+reqlen;

/* Increase zipmap length (this is an insert) */
//如果zmlen字段没有溢出,就加一表示zm的真实长度
if (zm[0] < ZIPMAP_BIGLEN) zm[0]++;
//如果key已经存在,就是更新value
} else {
/* Key found. Is there enough space for the new value? */
/* Compute the total length: */
if (update) *update = 1;
//得到当前key所在的整个entry的字节数
freelen = zipmapRawEntryLength(p);
//如果目标entry的长度不够,就要先扩容
if (freelen < reqlen) {
/* Store the offset of this key within the current zipmap, so
* it can be resized. Then, move the tail backwards so this
* pair fits at the current position. */
offset = p-zm;
//增加的长度是reqlen-freelen,也就是新entry比旧entry多的字节数
//因为<len><key><free>三个字段长度都不变,所以增加的长度是<len><value>两个字段的
zm = zipmapResize(zm, zmlen-freelen+reqlen);
//在新的zm中定位到当前entry
p = zm+offset;

/* The +1 in the number of bytes to be moved is caused by the
* end-of-zipmap byte. Note: the *original* zmlen is used. */
//更新的value不一定在zm尾部,所以要把后面的内容后移,防止数据覆盖
//p+freelen是旧zm中下个entry的地址,p+reqlen是新zm中下个entry应该在的地址
memmove(p+reqlen, p+freelen, zmlen-(offset+freelen+1));
//计算新zm的总字节数
zmlen = zmlen-freelen+reqlen;
freelen = reqlen;
}
}

/* We now have a suitable block where the key/value entry can
* be written. If there is too much free space, move the tail
* of the zipmap a few bytes to the front and shrink the zipmap,
* as we want zipmaps to be very space efficient. */
//需要扩容的情况一定是扩容到刚刚好,所以value尾部不会有空余字节。
//只有在更新value且新value长度比旧value小的情况下,才会有空余字节
empty = freelen-reqlen;
//空余字节大于等于ZIPMAP_VALUE_MAX_FREE,就需要收缩zm
if (empty >= ZIPMAP_VALUE_MAX_FREE) {
/* First, move the tail <empty> bytes to the front, then resize
* the zipmap to be <empty> bytes smaller. */
offset = p-zm;
memmove(p+reqlen, p+freelen, zmlen-(offset+freelen+1));
zmlen -= empty;
zm = zipmapResize(zm, zmlen);
p = zm+offset;
vempty = 0;
//否则就留着空余字节,把字节数记在free字段
} else {
vempty = empty;
}

/* Just write the key + value and we are done. */
/* Key: */
//写入第一个len字段,p移动到key字段
p += zipmapEncodeLength(p,klen);
//写入key字段
memcpy(p,key,klen);
//p移动到第二个len字段
p += klen;
/* Value: */
//写入第二个len字段,p移动到free字段
p += zipmapEncodeLength(p,vlen);
//写入free字段,p移动到value字段
*p++ = vempty;
//写入value
memcpy(p,val,vlen);
return zm;
}

/* Remove the specified key. If 'deleted' is not NULL the pointed integer is
* set to 0 if the key was not found, to 1 if it was found and deleted. */
//删除给定key的key-value对
unsigned char *zipmapDel(unsigned char *zm, unsigned char *key, unsigned int klen, int *deleted) {
unsigned int zmlen, freelen;
//查找key,并得到zm总字节数
unsigned char *p = zipmapLookupRaw(zm,key,klen,&zmlen);
//找到key了
if (p) {
//得到entry的字节数
freelen = zipmapRawEntryLength(p);
//把目标entry后面的内容复制到entry头部,相当于覆盖了待删除的entry
memmove(p, p+freelen, zmlen-((p-zm)+freelen+1));
//收缩zm的长度
zm = zipmapResize(zm, zmlen-freelen);

/* Decrease zipmap length */
//如果zmlen字段的值有效就维护起来
if (zm[0] < ZIPMAP_BIGLEN) zm[0]--;
//标记为删除成功
if (deleted) *deleted = 1;
//没找到key直接标记为删除失败
} else {
if (deleted) *deleted = 0;
}
return zm;
}

/* Call it before to iterate trought elements via zipmapNext() */
//用于开始迭代zipmap时,跳过zmlen字段,功能太简单,没必要单独写成函数
unsigned char *zipmapRewind(unsigned char *zm) {
return zm+1;
}

/* This function is used to iterate through all the zipmap elements.
* In the first call the first argument is the pointer to the zipmap + 1.
* In the next calls what zipmapNext returns is used as first argument.
* Example:
*
* unsigned char *i = zipmapRewind(my_zipmap);
* while((i = zipmapNext(i,&key,&klen,&value,&vlen)) != NULL) {
* printf("%d bytes key at $p\n", klen, key);
* printf("%d bytes value at $p\n", vlen, value);
* }
*/
//一步的遍历,只遍历zm指针指向的entry,然后zm指向下一个entry,遍历得到的entry信息保存在参数里的四个指针里
unsigned char *zipmapNext(unsigned char *zm, unsigned char **key, unsigned int *klen, unsigned char **value, unsigned int *vlen) {
if (zm[0] == ZIPMAP_END) return NULL;
if (key) {
*key = zm;
*klen = zipmapDecodeLength(zm);
*key += ZIPMAP_LEN_BYTES(*klen);
}
zm += zipmapRawKeyLength(zm);
if (value) {
*value = zm+1;
*vlen = zipmapDecodeLength(zm);
*value += ZIPMAP_LEN_BYTES(*vlen);
}
zm += zipmapRawValueLength(zm);
return zm;
}

/* Search a key and retrieve the pointer and len of the associated value.
* If the key is found the function returns 1, otherwise 0. */
//给定长度为klen的key,查询对应的value值并存入参数的value指针,同时把value的长度存入参数的vlen指针
int zipmapGet(unsigned char *zm, unsigned char *key, unsigned int klen, unsigned char **value, unsigned int *vlen) {
unsigned char *p;

if ((p = zipmapLookupRaw(zm,key,klen,NULL)) == NULL) return 0;
p += zipmapRawKeyLength(p);
*vlen = zipmapDecodeLength(p);
*value = p + ZIPMAP_LEN_BYTES(*vlen) + 1;
return 1;
}

/* Return 1 if the key exists, otherwise 0 is returned. */
//查询长度为klen的key是否在zm中
int zipmapExists(unsigned char *zm, unsigned char *key, unsigned int klen) {
return zipmapLookupRaw(zm,key,klen,NULL) != NULL;
}

/* Return the number of entries inside a zipmap */
//遍历zipmap,返回entry的数量
unsigned int zipmapLen(unsigned char *zm) {
unsigned int len = 0;
//如果zmlen字段的值有效,直接返回这个值,无需遍历
if (zm[0] < ZIPMAP_BIGLEN) {
len = zm[0];
//如果zmlen字段溢出了,就需要遍历整个zipmap
} else {
unsigned char *p = zipmapRewind(zm);
while((p = zipmapNext(p,NULL,NULL,NULL,NULL)) != NULL) len++;

/* Re-store length if small enough */
if (len < ZIPMAP_BIGLEN) zm[0] = len;
}
return len;
}

//打印zipmap的结构,debug用
void zipmapRepr(unsigned char *p) {
unsigned int l;

printf("{status %u}",*p++);
while(1) {
if (p[0] == ZIPMAP_END) {
printf("{end}");
break;
} else {
unsigned char e;

l = zipmapDecodeLength(p);
printf("{key %u}",l);
p += zipmapEncodeLength(NULL,l);
if (l != 0 && fwrite(p,l,1,stdout) == 0) perror("fwrite");
p += l;

l = zipmapDecodeLength(p);
printf("{value %u}",l);
p += zipmapEncodeLength(NULL,l);
e = *p++;
if (l != 0 && fwrite(p,l,1,stdout) == 0) perror("fwrite");
p += l+e;
if (e) {
printf("[");
while(e--) printf(".");
printf("]");
}
}
}
printf("\n");
}

4 数据类型相关

4.1 对象系统

redis.h(对象系统相关部分)

/* Error codes */
//状态码
#define REDIS_OK 0
#define REDIS_ERR -1

/* Virtual memory object->where field. */
//对象存储的位置
//内存
#define REDIS_VM_MEMORY 0 /* The object is on memory */
//磁盘
#define REDIS_VM_SWAPPED 1 /* The object is on disk */
//正在从内存换出到磁盘
#define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
//正在从磁盘换入内存
#define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */

/* Object types */
//对象类型
#define REDIS_STRING 0
#define REDIS_LIST 1
#define REDIS_SET 2
#define REDIS_ZSET 3
#define REDIS_HASH 4
#define REDIS_VMPOINTER 8

/* Objects encoding. Some kind of objects like Strings and Hashes can be
* internally represented in multiple ways. The 'encoding' field of the object
* is set to one of this fields for this object. */
//对象编码,其实就是所使用的底层数据结构
//简单动态字符串sds
#define REDIS_ENCODING_RAW 0 /* Raw representation */
//整数
#define REDIS_ENCODING_INT 1 /* Encoded as integer */
//字典
#define REDIS_ENCODING_HT 2 /* Encoded as hash table */
//压缩字典
#define REDIS_ENCODING_ZIPMAP 3 /* Encoded as zipmap */
//双端链表
#define REDIS_ENCODING_LINKEDLIST 4 /* Encoded as regular linked list */
//压缩列表
#define REDIS_ENCODING_ZIPLIST 5 /* Encoded as ziplist */
//整数集合
#define REDIS_ENCODING_INTSET 6 /* Encoded as intset */
//跳跃表
#define REDIS_ENCODING_SKIPLIST 7 /* Encoded as skiplist */

/* The actual Redis Object */
//逻辑时钟的最大位数,类似现实中的表盘,划分了最大的刻度。计算出的实际LRU时间要对最大刻度LRU_CLOCK_MAX取模
#define REDIS_LRU_CLOCK_MAX ((1<<21)-1) /* Max value of obj->lru */
//LRU算法的精度,即一个LRU的单位是多长
#define REDIS_LRU_CLOCK_RESOLUTION 10 /* LRU clock resolution in seconds */

//顶层对象
typedef struct redisObject {
//对象类型,有6种对象,所以需要4bits表示
unsigned type:4;
//存储位置,有4种取值,所以需要2bits表示
unsigned storage:2; /* REDIS_VM_MEMORY or REDIS_VM_SWAPPING */
//编码,有8中取值,需要4bits表示
unsigned encoding:4;
//对象最后一次被访问的时间
unsigned lru:22; /* lru time (relative to server.lruclock) */
//引用计数,用于垃圾回收
int refcount;
//指向底层数据结构的指针
void *ptr;
/* VM fields are only allocated if VM is active, otherwise the
* object allocation function will just allocate
* sizeof(redisObjct) minus sizeof(redisObjectVM), so using
* Redis without VM active will not have any overhead. */
} robj;

/* The VM pointer structure - identifies an object in the swap file.
*
* This object is stored in place of the value
* object in the main key->value hash table representing a database.
* Note that the first fields (type, storage) are the same as the redisObject
* structure so that vmPointer strucuters can be accessed even when casted
* as redisObject structures.
*
* This is useful as we don't know if a value object is or not on disk, but we
* are always able to read obj->storage to check this. For vmPointer
* structures "type" is set to REDIS_VMPOINTER (even if without this field
* is still possible to check the kind of object from the value of 'storage').*/
//标记对象在磁盘交换区的存储位置
//当value存储在内存中时,Redis使用一个RedisObject与之关联;而当value存储在磁盘中时,Redis使用一个VMPointer与之关联。要知道value存储在磁盘中还是内存中,只需判断storage字段。
typedef struct vmPointer {
//type和storage两个字段与robj一样
unsigned type:4;
unsigned storage:2; /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
unsigned notused:26;
//交换出去的对象类型
unsigned int vtype; /* type of the object stored in the swap file */
//记录对象在交换区中从哪页开始
off_t page; /* the page at witch the object is stored on disk */
//记录对象共包含几页
off_t usedpages; /* number of pages used on disk */
} vmpointer;

object.c

#include "redis.h"
#include <pthread.h>
#include <math.h>

//给定对象类型和底层数据结构的指针,创建新的robj
//默认是基于sds的字符串对象
robj *createObject(int type, void *ptr) {
robj *o = zmalloc(sizeof(*o));
o->type = type;
//先默认ptr是简单动态字符串
o->encoding = REDIS_ENCODING_RAW;
o->ptr = ptr;
//生成对象后引用计数设为1
o->refcount = 1;

/* Set the LRU to the current lruclock (minutes resolution).
* We do this regardless of the fact VM is active as LRU is also
* used for the maxmemory directive when Redis is used as cache.
*
* Note that this code may run in the context of an I/O thread
* and accessing server.lruclock in theory is an error
* (no locks). But in practice this is safe, and even if we read
* garbage Redis will not fail. */
//设置对象的最后访问时间
//server.lruclock来源于redis.h定义的redisServer结构体中
o->lru = server.lruclock;
/* The following is only needed if VM is active, but since the conditional
* is probably more costly than initializing the field it's better to
* have every field properly initialized anyway. */
//默认存储在内存中
o->storage = REDIS_VM_MEMORY;
return o;
}

//创建基于简单动态字符串的字符串对象
robj *createStringObject(char *ptr, size_t len) {
return createObject(REDIS_STRING,sdsnewlen(ptr,len));
}

//创建基于整数值的字符串对象
robj *createStringObjectFromLongLong(long long value) {
robj *o;
//Redis内部采用了shared integer的方式来省去分配内存的开销,即在系统启动时先分配一个从1~REDIS_SHARED_INTEGERS那么多个数值对象放在一个池子中。如果要创建的对象的整数值在这个共享池里,就不用创建新的对象,而是直接返回共享池里的对象,并增加该对象的引用计数
//只共享了整数类型的对象,大概是因为只有整数的相等能在O(1)时间内验证,共享复杂的对象会影响cpu性能
//要求当前线程是服务端的主线程,why?
if (value >= 0 && value < REDIS_SHARED_INTEGERS &&
pthread_equal(pthread_self(),server.mainthread)) {
incrRefCount(shared.integers[value]);
o = shared.integers[value];
} else {
//支持的最大整数值是long int的上限
if (value >= LONG_MIN && value <= LONG_MAX) {
o = createObject(REDIS_STRING, NULL);
o->encoding = REDIS_ENCODING_INT;
o->ptr = (void*)((long)value);
//如果超过了上限,就不能创建基于整数值的字符串对象,而是要把整数值转成sds,创建基于sds的字符串对象
} else {
o = createObject(REDIS_STRING,sdsfromlonglong(value));
}
}
return o;
}

//字符串对象的深拷贝,原对象与副本对象的ptr指针指向的不是同一个sds,因为在调用createStringObject时用sdsnewlen函数基于原对象的sds生成了一个新的sds对象
robj *dupStringObject(robj *o) {
redisAssert(o->encoding == REDIS_ENCODING_RAW);
return createStringObject(o->ptr,sdslen(o->ptr));
}

//创建基于双端链表(adlist)的列表对象
robj *createListObject(void) {
//创建底层的adlist
list *l = listCreate();
//指定类型是列表,创建robj
robj *o = createObject(REDIS_LIST,l);
//把adlist的free函数设为引用计数减一的函数
listSetFreeMethod(l,decrRefCount);
//底层数据结构设为adlist
o->encoding = REDIS_ENCODING_LINKEDLIST;
return o;
}

//创建基于压缩列表(ziplist)的列表对象
robj *createZiplistObject(void) {
unsigned char *zl = ziplistNew();
robj *o = createObject(REDIS_LIST,zl);
o->encoding = REDIS_ENCODING_ZIPLIST;
return o;
}

//创建基于字典(dict)的集合对象
robj *createSetObject(void) {
dict *d = dictCreate(&setDictType,NULL);
robj *o = createObject(REDIS_SET,d);
o->encoding = REDIS_ENCODING_HT;
return o;
}

//创建基于整数集合(intset)的集合对象
robj *createIntsetObject(void) {
intset *is = intsetNew();
robj *o = createObject(REDIS_SET,is);
o->encoding = REDIS_ENCODING_INTSET;
return o;
}

//创建基于压缩字典(zipmap)的哈希对象(后来的版本中取消了zipmap,使用ziplist实现哈希对象)
robj *createHashObject(void) {
/* All the Hashes start as zipmaps. Will be automatically converted
* into hash tables if there are enough elements or big elements
* inside. */
unsigned char *zm = zipmapNew();
robj *o = createObject(REDIS_HASH,zm);
o->encoding = REDIS_ENCODING_ZIPMAP;
return o;
}

//创建基于跳跃表(skiplist)和字典(dict)的有序集合对象
robj *createZsetObject(void) {
zset *zs = zmalloc(sizeof(*zs));
robj *o;
zs->dict = dictCreate(&zsetDictType,NULL);
zs->zsl = zslCreate();
o = createObject(REDIS_ZSET,zs);
o->encoding = REDIS_ENCODING_SKIPLIST;
return o;
}

//释放字符串对象
//为什么只释放sds类型的对象?难道是因为long int占的空间少所以没必要释放?
void freeStringObject(robj *o) {
if (o->encoding == REDIS_ENCODING_RAW) {
sdsfree(o->ptr);
}
}

//释放列表对象
void freeListObject(robj *o) {
switch (o->encoding) {
//adlist是列表指针套着节点指针的复杂结构,所以需要单独设计的free函数,用zfree只能释放列表指针,不能释放里面的节点指针
case REDIS_ENCODING_LINKEDLIST:
listRelease((list*) o->ptr);
break;
//ziplist就是个字符串,所以不需要为它单独定义一个free函数,直接用zfree
case REDIS_ENCODING_ZIPLIST:
zfree(o->ptr);
break;
default:
redisPanic("Unknown list encoding type");
}
}

//释放集合对象
void freeSetObject(robj *o) {
switch (o->encoding) {
case REDIS_ENCODING_HT:
dictRelease((dict*) o->ptr);
break;
//intset只有一个结构体指针,所以也可以用zfree释放
case REDIS_ENCODING_INTSET:
zfree(o->ptr);
break;
default:
redisPanic("Unknown set encoding type");
}
}

//释放有序集合对象
void freeZsetObject(robj *o) {
zset *zs = o->ptr;
//zset包含一个adlist和一个dict,所以要先分别调用各自的free函数,最后再释放最外层的zset指针
dictRelease(zs->dict);
zslFree(zs->zsl);
zfree(zs);
}

//释放哈希对象
void freeHashObject(robj *o) {
switch (o->encoding) {
case REDIS_ENCODING_HT:
dictRelease((dict*) o->ptr);
break;
//zipmap只是个字符串
case REDIS_ENCODING_ZIPMAP:
zfree(o->ptr);
break;
default:
redisPanic("Unknown hash encoding type");
break;
}
}

//对象的引用计数加一
void incrRefCount(robj *o) {
o->refcount++;
}

//对象的引用计数加一,如果会减到0意味着要释放对象
void decrRefCount(void *obj) {
robj *o = obj;

/* Object is a swapped out value, or in the process of being loaded. */
//如果对象存在磁盘上
if (server.vm_enabled &&
(o->storage == REDIS_VM_SWAPPED || o->storage == REDIS_VM_LOADING))
{
vmpointer *vp = obj;
//如果对象正在被加载到内存,先中止当前作业,因为当前作业可能会篡改对象,或者删除对象会使当前作业产生错误
if (o->storage == REDIS_VM_LOADING) vmCancelThreadedIOJob(o);
//被交换到磁盘上的对象引用只能是1,所以引用计数减一就是删除对象
//释放对象占用的所有页
vmMarkPagesFree(vp->page,vp->usedpages);
server.vm_stats_swapped_objects--;
//最后释放对象的指针
zfree(vp);
return;
}

//对象已经没有引用了,还要求减引用就说明出bug了
if (o->refcount <= 0) redisPanic("decrRefCount against refcount <= 0");
/* Object is in memory, or in the process of being swapped out.
*
* If the object is being swapped out, abort the operation on
* decrRefCount even if the refcount does not drop to 0: the object
* is referenced at least two times, as value of the key AND as
* job->val in the iojob. So if we don't invalidate the iojob, when it is
* done but the relevant key was removed in the meantime, the
* complete jobs handler will not find the key about the job and the
* assert will fail. */
//如果对象存在内存里,但正在把对象交换到磁盘上,也需要先中止作业
if (server.vm_enabled && o->storage == REDIS_VM_SWAPPING)
vmCancelThreadedIOJob(o);
//如果当前引用数是1,再减就会到0,所以直接删除对象,执行对象的free函数
if (--(o->refcount) == 0) {
switch(o->type) {
case REDIS_STRING: freeStringObject(o); break;
case REDIS_LIST: freeListObject(o); break;
case REDIS_SET: freeSetObject(o); break;
case REDIS_ZSET: freeZsetObject(o); break;
case REDIS_HASH: freeHashObject(o); break;
default: redisPanic("Unknown object type"); break;
}
o->ptr = NULL; /* defensive programming. We'll see NULL in traces. */
//最后释放robj的指针
zfree(o);
}
}

//服务端对客户端提交的对象进行类型检查
int checkType(redisClient *c, robj *o, int type) {
if (o->type != type) {
//如果类型错误,把错误信息返回给客户端
addReply(c,shared.wrongtypeerr);
return 1;
}
return 0;
}

/* Try to encode a string object in order to save space */
//尝试编码字符串对象以节省空间,其实就是尝试用整数值替换sds字符串
robj *tryObjectEncoding(robj *o) {
long value;
sds s = o->ptr;

//如果对象存储的已经是整数的表示了,就不需要修改
if (o->encoding != REDIS_ENCODING_RAW)
return o; /* Already encoded */

/* It's not safe to encode shared objects: shared objects can be shared
* everywhere in the "object space" of Redis. Encoded objects can only
* appear as "values" (and not, for instance, as keys) */
//如果当前对象有多个引用,在此修改对象是不安全的,直接退出
if (o->refcount > 1) return o;

/* Currently we try to encode only strings */
//只能编码字符串对象
redisAssert(o->type == REDIS_STRING);

/* Check if we can represent this string as a long integer */
//如果当前对象存储的sds字符串不能转成整数,直接退出
if (isStringRepresentableAsLong(s,&value) == REDIS_ERR) return o;

/* Ok, this object can be encoded...
*
* Can I use a shared object? Only if the object is inside a given
* range and if this is the main thread, since when VM is enabled we
* have the constraint that I/O thread should only handle non-shared
* objects, in order to avoid race conditions (we don't have per-object
* locking).
*
* Note that we also avoid using shared integers when maxmemory is used
* because very object needs to have a private LRU field for the LRU
* algorithm to work well. */
//如果能用共享池里的对象,就删除当前对象,返回共享池里的对象
if (server.maxmemory == 0 && value >= 0 && value < REDIS_SHARED_INTEGERS &&
pthread_equal(pthread_self(),server.mainthread)) {
decrRefCount(o);
incrRefCount(shared.integers[value]);
return shared.integers[value];
//如果没用共享池里的对象,就修改当前对象的ptr,释放sds数据,把ptr指向相应的整型数据
} else {
//把数据类型变成int
o->encoding = REDIS_ENCODING_INT;
sdsfree(o->ptr);
o->ptr = (void*) value;
return o;
}
}

/* Get a decoded version of an encoded object (returned as a new object).
* If the object is already raw-encoded just increment the ref count. */
//和tryObjectEncoding相反,是把基于整数的字符串对象转换成基于sds的字符串对象
robj *getDecodedObject(robj *o) {
robj *dec;

//如果已经是基于sds的字符串对象,引用加一并返回
if (o->encoding == REDIS_ENCODING_RAW) {
incrRefCount(o);
return o;
}
if (o->type == REDIS_STRING && o->encoding == REDIS_ENCODING_INT) {
char buf[32];

ll2string(buf,32,(long)o->ptr);
dec = createStringObject(buf,strlen(buf));
return dec;
} else {
redisPanic("Unknown encoding type");
}
}

/* Compare two string objects via strcmp() or alike.
* Note that the objects may be integer-encoded. In such a case we
* use ll2string() to get a string representation of the numbers on the stack
* and compare the strings, it's much faster than calling getDecodedObject().
*
* Important note: if objects are not integer encoded, but binary-safe strings,
* sdscmp() from sds.c will apply memcmp() so this function ca be considered
* binary safe. */
//比较两个字符串对象保存的数据是否相同,最后比较的是两个字符串
int compareStringObjects(robj *a, robj *b) {
redisAssert(a->type == REDIS_STRING && b->type == REDIS_STRING);
//只能比较最长128个字节的字符串
char bufa[128], bufb[128], *astr, *bstr;
int bothsds = 1;

//如果两个指针指向的是一个对象,就说明是相等的,返回0
if (a == b) return 0;
//如果是基于整数的字符串对象,要先转成sds字符串,最后再比较两个对象的sds字符串
if (a->encoding != REDIS_ENCODING_RAW) {
ll2string(bufa,sizeof(bufa),(long) a->ptr);
astr = bufa;
bothsds = 0;
} else {
astr = a->ptr;
}
if (b->encoding != REDIS_ENCODING_RAW) {
ll2string(bufb,sizeof(bufb),(long) b->ptr);
bstr = bufb;
bothsds = 0;
} else {
bstr = b->ptr;
}
//如果都是原生的sds字符串,就用sdscmp比较,如果二者之中有整数转成的字符串,就用strcmp比较
//其实根本没区别,因为二者要么是ll2string产生的字符串,要么是sdshdr结构体中的buf字符串,都是字符串,直接用strcmp就可以
return bothsds ? sdscmp(astr,bstr) : strcmp(astr,bstr);
}

/* Equal string objects return 1 if the two objects are the same from the
* point of view of a string comparison, otherwise 0 is returned. Note that
* this function is faster then checking for (compareStringObject(a,b) == 0)
* because it can perform some more optimization. */
//比较两个字符串对象保存的数据是否相同
int equalStringObjects(robj *a, robj *b) {
//如果都存储的整数,直接比较整数
if (a->encoding != REDIS_ENCODING_RAW && b->encoding != REDIS_ENCODING_RAW){
return a->ptr == b->ptr;
//如果不全是整数,就都转成字符串比较
} else {
return compareStringObjects(a,b) == 0;
}
}

//获取字符串对象存储的数据的长度
size_t stringObjectLen(robj *o) {
redisAssert(o->type == REDIS_STRING);
//对于sds直接调用sds的len函数
if (o->encoding == REDIS_ENCODING_RAW) {
return sdslen(o->ptr);
//对于整数值,返回其转成的字符串的长度
} else {
char buf[32];
return ll2string(buf,32,(long)o->ptr);
}
}

//从字符串对象中获取double类型的数值,虽然字符串对象能以数值形式保存的只有整数,但浮点数可以以sds的形式存在对象中
int getDoubleFromObject(robj *o, double *target) {
double value;
char *eptr;

if (o == NULL) {
value = 0;
} else {
redisAssert(o->type == REDIS_STRING);
//如果是sds就转成浮点数
if (o->encoding == REDIS_ENCODING_RAW) {
//strtod把ptr指向的字符串转成浮点数,eptr指向的是遇到的第一个无法转成数值的字符
value = strtod(o->ptr, &eptr);
if (eptr[0] != '\0' || isnan(value)) return REDIS_ERR;
//如果对象存的是整数,就把整数赋给浮点数变量
} else if (o->encoding == REDIS_ENCODING_INT) {
value = (long)o->ptr;
} else {
redisPanic("Unknown string encoding");
}
}

*target = value;
return REDIS_OK;
}

//服务端从对象中获取double类型的数值,返回给客户端
//返回的值写入target指针,msg保存的是获取失败的情况下默认返回给客户端的错误信息
int getDoubleFromObjectOrReply(redisClient *c, robj *o, double *target, const char *msg) {
double value;
if (getDoubleFromObject(o, &value) != REDIS_OK) {
if (msg != NULL) {
addReplyError(c,(char*)msg);
} else {
addReplyError(c,"value is not a double");
}
return REDIS_ERR;
}

*target = value;
return REDIS_OK;
}

//从字符串对象中获取long long类型的数值,和getDoubleFromObject的逻辑相同
int getLongLongFromObject(robj *o, long long *target) {
long long value;
char *eptr;

if (o == NULL) {
value = 0;
} else {
redisAssert(o->type == REDIS_STRING);
if (o->encoding == REDIS_ENCODING_RAW) {
//strtoll把ptr指向的字符串转成10进制的long long类型的值,eptr指向的是遇到的第一个无法转成数值的字符
value = strtoll(o->ptr, &eptr, 10);
if (eptr[0] != '\0') return REDIS_ERR;
//errno 是记录系统的最后一次错误代码,ERANGE是<errno.h>里的宏定义,表示有某个变量溢出了
//为什么getDoubleFromObject里不用判断越界呢?
if (errno == ERANGE && (value == LLONG_MIN || value == LLONG_MAX))
return REDIS_ERR;
} else if (o->encoding == REDIS_ENCODING_INT) {
value = (long)o->ptr;
} else {
redisPanic("Unknown string encoding");
}
}

if (target) *target = value;
return REDIS_OK;
}

//服务端从对象中获取long long类型的数值,返回给客户端
//和getDoubleFromObjectOrReply逻辑相同
int getLongLongFromObjectOrReply(redisClient *c, robj *o, long long *target, const char *msg) {
long long value;
if (getLongLongFromObject(o, &value) != REDIS_OK) {
if (msg != NULL) {
addReplyError(c,(char*)msg);
} else {
addReplyError(c,"value is not an integer or out of range");
}
return REDIS_ERR;
}

*target = value;
return REDIS_OK;
}

//服务端从对象中获取long类型的数值,返回给客户端
int getLongFromObjectOrReply(redisClient *c, robj *o, long *target, const char *msg) {
long long value;
//为什么不用专门写一个getLongFromObject函数呢?
if (getLongLongFromObjectOrReply(c, o, &value, msg) != REDIS_OK) return REDIS_ERR;
if (value < LONG_MIN || value > LONG_MAX) {
if (msg != NULL) {
addReplyError(c,(char*)msg);
} else {
addReplyError(c,"value is out of range");
}
return REDIS_ERR;
}
//把long long类型的值赋给long类型变量不会有危险吗?
*target = value;
return REDIS_OK;
}

//获取编码类型的字符串表示
char *strEncoding(int encoding) {
switch(encoding) {
case REDIS_ENCODING_RAW: return "raw";
case REDIS_ENCODING_INT: return "int";
case REDIS_ENCODING_HT: return "hashtable";
case REDIS_ENCODING_ZIPMAP: return "zipmap";
case REDIS_ENCODING_LINKEDLIST: return "linkedlist";
case REDIS_ENCODING_ZIPLIST: return "ziplist";
case REDIS_ENCODING_INTSET: return "intset";
case REDIS_ENCODING_SKIPLIST: return "skiplist";
default: return "unknown";
}
}

/* Given an object returns the min number of seconds the object was never
* requested, using an approximated LRU algorithm. */
//计算给定对象的闲置时长,使用近似LRU算法

unsigned long estimateObjectIdleTime(robj *o) {
if (server.lruclock >= o->lru) {
return (server.lruclock - o->lru) * REDIS_LRU_CLOCK_RESOLUTION;
} else {
//当server.lruclock的值超出REDIS_LRU_CLOCK_MAX时,会从头开始计算,导致其小于对象的lru,所以算的时候要加上REDIS_LRU_CLOCK_MAX
//为什么认为server.lruclock只会多转一周呢?会不会加上REDIS_LRU_CLOCK_MAX还是小于o->lru?
return ((REDIS_LRU_CLOCK_MAX - o->lru) + server.lruclock) *
REDIS_LRU_CLOCK_RESOLUTION;
}
}

/* This is an helper function for the DEBUG command. We need to lookup keys
* without any modification of LRU or other parameters. */
//给定key查找字典中对应的val
robj *objectCommandLookup(redisClient *c, robj *key) {
dictEntry *de;

if ((de = dictFind(c->db->dict,key->ptr)) == NULL) return NULL;
return (robj*) dictGetEntryVal(de);
}

//给定key查找字典中对应的val,如果获取失败就返回reply给客户端
robj *objectCommandLookupOrReply(redisClient *c, robj *key, robj *reply) {
robj *o = objectCommandLookup(c,key);

if (!o) addReply(c, reply);
return o;
}

/* Object command allows to inspect the internals of an Redis Object.
* Usage: OBJECT <verb> ... arguments ... */
//解析并执行客户端的object命令
void objectCommand(redisClient *c) {
robj *o;

//OBJECT REFCOUNT <key> ,返回字典中给定key对应的value被引用的次数
if (!strcasecmp(c->argv[1]->ptr,"refcount") && c->argc == 3) {
if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk))
== NULL) return;
addReplyLongLong(c,o->refcount);
//OBJECT ENCODING <key> ,返回给定key对应的value的编码类型的字符串表示
} else if (!strcasecmp(c->argv[1]->ptr,"encoding") && c->argc == 3) {
if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk))
== NULL) return;
addReplyBulkCString(c,strEncoding(o->encoding));
//OBJECT IDLETIME <key> ,返回给定key对应的value自储存以来的空闲时间
} else if (!strcasecmp(c->argv[1]->ptr,"idletime") && c->argc == 3) {
if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk))
== NULL) return;
addReplyLongLong(c,estimateObjectIdleTime(o));
} else {
addReplyError(c,"Syntax error. Try OBJECT (refcount|encoding|idletime)");
}
}

4.2 字符串

t_string.c

#include "redis.h"

/*-----------------------------------------------------------------------------
* String Commands
*----------------------------------------------------------------------------*/

//字符串长度不能超过512MB
static int checkStringLength(redisClient *c, long long size) {
if (size > 512*1024*1024) {
addReplyError(c,"string exceeds maximum allowed size (512MB)");
return REDIS_ERR;
}
return REDIS_OK;
}

//处理客户端发来的SET命令
void setGenericCommand(redisClient *c, int nx, robj *key, robj *val, robj *expire) {
int retval;
//harmness warning?
long seconds = 0; /* initialized to avoid an harmness warning */

//如果设置了过期时间
if (expire) {
//从字符串对象中解析出long类型的值,存储在seconds变量中
if (getLongFromObjectOrReply(c, expire, &seconds, NULL) != REDIS_OK)
return;
//设置的过期时间必须大于0
if (seconds <= 0) {
addReplyError(c,"invalid expire time in SETEX");
return;
}
}

//在db中查询key,同时检查是否过期,过期则删除key
lookupKeyWrite(c->db,key); /* Force expire of old key if needed */
//向db中插入键值对
retval = dbAdd(c->db,key,val);
//插入失败,说明key已经存在
if (retval == REDIS_ERR) {
//如果不是SETNX命令,就用新的键值对替换旧的
if (!nx) {
dbReplace(c->db,key,val);
incrRefCount(val);
//如果是SETNX命令,放弃插入
} else {
//插入失败,向客户端返回0(共享池中的对象)
addReply(c,shared.czero);
return;
}
//插入成功,value的引用加一
} else {
incrRefCount(val);
}
//遍历所有正在watch这个key客户端,打开他们的REDIS_DIRTY_CAS标识
touchWatchedKey(c->db,key);
server.dirty++;
//清除旧的过期时间的记录
//key的过期时间保存在过期字典里,由过期时间对象指向key对象,过期字典存储的redisDb对象里
//SET命令会清除过期时间,默认把对象持久化,所以SETEX和SET混用时要注意
removeExpire(c->db,key);
//设置key的过期时间
if (expire) setExpire(c->db,key,time(NULL)+seconds);
//插入成功,向客户端返回成功码
addReply(c, nx ? shared.cone : shared.ok);
}

//实现set命令
//SET KEY_NAME VALUE
void setCommand(redisClient *c) {
//先尝试把value字符串转换成数值以节省空间
c->argv[2] = tryObjectEncoding(c->argv[2]);
setGenericCommand(c,0,c->argv[1],c->argv[2],NULL);
}

//实现Setnx(SET if Not eXists)命令
//SETNX KEY_NAME VALUE
void setnxCommand(redisClient *c) {
c->argv[2] = tryObjectEncoding(c->argv[2]);
setGenericCommand(c,1,c->argv[1],c->argv[2],NULL);
}

//实现Setex命令,为指定的key设置值及其过期时间
//SETEX KEY_NAME TIMEOUT VALUE
void setexCommand(redisClient *c) {
c->argv[3] = tryObjectEncoding(c->argv[3]);
setGenericCommand(c,0,c->argv[1],c->argv[3],c->argv[2]);
}

//处理客户端发来的GET命令
int getGenericCommand(redisClient *c) {
robj *o;
//在db中查询key并向客户端返回结果,shared.nullbulk是共享池里表示-1的对象,如果没找到就向客户端返回-1
//没找到就返回REDIS_OK退出
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL)
return REDIS_OK;
//如果找到了key但其对应的value不是字符串对象,就向客户端报错
//虽然在set时会尝试把底层数据改成整型,但基于整型的字符串对象的类型还是字符串
if (o->type != REDIS_STRING) {
addReply(c,shared.wrongtypeerr);
return REDIS_ERR;
//否则就是查找成功,把查到的value返回给客户端
} else {
addReplyBulk(c,o);
return REDIS_OK;
}
}

//实现get命令
//GET KEY_NAME
void getCommand(redisClient *c) {
getGenericCommand(c);
}

//实现getset命令
//GETSET KEY_NAME VALUE
void getsetCommand(redisClient *c) {
//先get旧的value,返回给客户端
if (getGenericCommand(c) == REDIS_ERR) return;
//在db中用新的value替换旧的value
c->argv[2] = tryObjectEncoding(c->argv[2]);
dbReplace(c->db,c->argv[1],c->argv[2]);
incrRefCount(c->argv[2]);
//遍历所有正在watch这个key客户端,打开他们的REDIS_DIRTY_CAS标识
touchWatchedKey(c->db,c->argv[1]);
server.dirty++;
//getset命令也执行了set,所以要清除过期时间的记录
removeExpire(c->db,c->argv[1]);
}

//检查setbit和getbit命令中的offset参数是否合法
static int getBitOffsetFromArgument(redisClient *c, robj *o, size_t *offset) {
long long loffset;
char *err = "bit offset is not an integer or out of range";

//先把offset转成long long类型的数值
if (getLongLongFromObjectOrReply(c,o,&loffset,err) != REDIS_OK)
return REDIS_ERR;

/* Limit offset to 512MB in bytes */
//判断offset是否越界,因为offset单位是bit,字符串size单位是byte,所以offset要先右移三位
if ((loffset < 0) || ((unsigned long long)loffset >> 3) >= (512*1024*1024))
{
addReplyError(c,err);
return REDIS_ERR;
}

//没有问题,就把offset数值赋给参数
*offset = (size_t)loffset;
return REDIS_OK;
}

//实现setbit命令
//Setbit KEY_NAME OFFSET VALUE
void setbitCommand(redisClient *c) {
robj *o;
char *err = "bit is not an integer or out of range";
size_t bitoffset;
int byte, bit;
int byteval, bitval;
long on;

//先检查offset是否越界,并把表示的数值存储在bitoffset中
if (getBitOffsetFromArgument(c,c->argv[2],&bitoffset) != REDIS_OK)
return;
//读取命令中的value对象,解析出整数值,存在on中
if (getLongFromObjectOrReply(c,c->argv[3],&on,err) != REDIS_OK)
return;

/* Bits can only be set or cleared... */
//因为是给一个bit赋值,所以value只能是0或1
if (on & ~1) {
addReplyError(c,err);
return;
}

//在db中查询key,同时检查是否过期,过期则删除key
o = lookupKeyWrite(c->db,c->argv[1]);
//如果key不存在,就创建基于sds的空字符串对象写入db
if (o == NULL) {
o = createObject(REDIS_STRING,sdsempty());
dbAdd(c->db,c->argv[1],o);
} else {
if (checkType(c,o,REDIS_STRING)) return;

/* Create a copy when the object is shared or encoded. */
//如果value不是原生的sds字符串,就要先转成sds。因为基于int的字符串只是节省内存的一种存储捷径,但修改某个bit时所针对的是原始sds字符串的某个bit
//为什么要判断o->refcount?
if (o->refcount != 1 || o->encoding != REDIS_ENCODING_RAW) {
robj *decoded = getDecodedObject(o);
o = createStringObject(decoded->ptr, sdslen(decoded->ptr));
//调用getDecodedObject时,可能返回转过类型且引用加一的对象,也可能返回新创建引用数为1的新对象,这两种情况下都需要引用减一,来删除本次操作增加的引用,新对象的值既然已经存到对象o里了,所以引用减到0直接删除该对象。
decrRefCount(decoded);
dbReplace(c->db,c->argv[1],o);
}
}

/* Grow sds value to the right length if necessary */
//计算offset对应的字节长度,如果比value字符串还长,就要先将value延长并补0
byte = bitoffset >> 3;
o->ptr = sdsgrowzero(o->ptr,byte+1);

/* Get current values */
//先取字节值,再从字节值里取目标位的值
byteval = ((char*)o->ptr)[byte];
bit = 7 - (bitoffset & 0x7);
bitval = byteval & (1 << bit);

/* Update byte with new bit value and return original value */
//在byte里更新bit的值,再把byte写入字符串中
byteval &= ~(1 << bit);
byteval |= ((on & 0x1) << bit);
((char*)o->ptr)[byte] = byteval;
touchWatchedKey(c->db,c->argv[1]);
server.dirty++;
addReply(c, bitval ? shared.cone : shared.czero);
}

//实现getbit命令
//GETBIT KEY_NAME OFFSET
void getbitCommand(redisClient *c) {
robj *o;
char llbuf[32];
size_t bitoffset;
size_t byte, bit;
size_t bitval = 0;

//判断offset合法性,并转成数值
if (getBitOffsetFromArgument(c,c->argv[2],&bitoffset) != REDIS_OK)
return;

//取出key对应的value字符串
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
checkType(c,o,REDIS_STRING)) return;

//计算目标的字节和其中具体的位
byte = bitoffset >> 3;
bit = 7 - (bitoffset & 0x7);
//如果value不是原生sds,要先转成字符串,再从字符串里取目标位的值
if (o->encoding != REDIS_ENCODING_RAW) {
if (byte < (size_t)ll2string(llbuf,sizeof(llbuf),(long)o->ptr))
bitval = llbuf[byte] & (1 << bit);
} else {
if (byte < sdslen(o->ptr))
bitval = ((char*)o->ptr)[byte] & (1 << bit);
}

addReply(c, bitval ? shared.cone : shared.czero);
}

//实现setrange命令
//SETRANGE KEY_NAME OFFSET VALUE
void setrangeCommand(redisClient *c) {
robj *o;
long offset;
sds value = c->argv[3]->ptr;
//把offset转成数值
if (getLongFromObjectOrReply(c,c->argv[2],&offset,NULL) != REDIS_OK)
return;

if (offset < 0) {
addReplyError(c,"offset is out of range");
return;
}

//从db中取出key对应的value
o = lookupKeyWrite(c->db,c->argv[1]);
//如果key不存在,就变成了插入新的键值对
if (o == NULL) {
/* Return 0 when setting nothing on a non-existing string */
//但是如果要set的value是空字符串,就不继续插入,直接退出,因为set空的值就等于啥也没干
if (sdslen(value) == 0) {
addReply(c,shared.czero);
return;
}

/* Return when the resulting string exceeds allowed size */
//执行set之后的字符串长度不能超过512MB,offset是修改起始点前面的长度,sdslen(value)是修改起始点后面的长度
if (checkStringLength(c,offset+sdslen(value)) != REDIS_OK)
return;
//创建新的字符串对象并插入db
o = createObject(REDIS_STRING,sdsempty());
dbAdd(c->db,c->argv[1],o);
//如果key存在,就是修改对应的字符串
} else {
size_t olen;

/* Key exists, check type */
if (checkType(c,o,REDIS_STRING))
return;

/* Return existing string length when setting nothing */
//要写入的value不能是空字符串
olen = stringObjectLen(o);
if (sdslen(value) == 0) {
addReplyLongLong(c,olen);
return;
}

/* Return when the resulting string exceeds allowed size */
//修改后的字符串不能超过512MB
if (checkStringLength(c,offset+sdslen(value)) != REDIS_OK)
return;

/* Create a copy when the object is shared or encoded. */
//把非sds的对象转成基于sds的字符串后,再进行赋值
if (o->refcount != 1 || o->encoding != REDIS_ENCODING_RAW) {
robj *decoded = getDecodedObject(o);
o = createStringObject(decoded->ptr, sdslen(decoded->ptr));
decrRefCount(decoded);
dbReplace(c->db,c->argv[1],o);
}
}
//前面已经判断过sdslen(value),这里是不是没必要?
if (sdslen(value) > 0) {
o->ptr = sdsgrowzero(o->ptr,offset+sdslen(value));
memcpy((char*)o->ptr+offset,value,sdslen(value));
touchWatchedKey(c->db,c->argv[1]);
server.dirty++;
}
//返回给客户端的是被修改后的字符串长度
addReplyLongLong(c,sdslen(o->ptr));
}

//实现getrange命令
//GETRANGE KEY_NAME start end
void getrangeCommand(redisClient *c) {
robj *o;
long start, end;
char *str, llbuf[32];
size_t strlen;
//把start和end转成数值
if (getLongFromObjectOrReply(c,c->argv[2],&start,NULL) != REDIS_OK)
return;
if (getLongFromObjectOrReply(c,c->argv[3],&end,NULL) != REDIS_OK)
return;
//如果key不存在,或value不是字符串,就退出
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptybulk)) == NULL ||
checkType(c,o,REDIS_STRING)) return;

//获取value字符串的长度
if (o->encoding == REDIS_ENCODING_INT) {
str = llbuf;
strlen = ll2string(llbuf,sizeof(llbuf),(long)o->ptr);
} else {
str = o->ptr;
strlen = sdslen(str);
}

/* Convert negative indexes */
//支持负索引,取值时先把负索引换算成正索引,还要把越界的索引拉回到边界
if (start < 0) start = strlen+start;
if (end < 0) end = strlen+end;
if (start < 0) start = 0;
if (end < 0) end = 0;
if ((unsigned)end >= strlen) end = strlen-1;

/* Precondition: end >= 0 && end < strlen, so the only condition where
* nothing can be returned is: start > end. */
//如果起点在终点之后,返回空的字符串
if (start > end) {
addReply(c,shared.emptybulk);
} else {
//取出字符串后返回给客户端
//取的方法是把ptr指针指到start位置,取出长度为end-start+1的字符串
addReplyBulkCBuffer(c,(char*)str+start,end-start+1);
}
}

//实现mget命令
//MGET KEY1 KEY2 .. KEYN
void mgetCommand(redisClient *c) {
int j;
//因为要向客户端返回多个值,所以先算出输出时显示的序号
addReplyMultiBulkLen(c,c->argc-1);
//查询所有的key并逐个返回,key不存在或value类型不是字符串的返回shared.nullbulk
for (j = 1; j < c->argc; j++) {
robj *o = lookupKeyRead(c->db,c->argv[j]);
if (o == NULL) {
addReply(c,shared.nullbulk);
} else {
if (o->type != REDIS_STRING) {
addReply(c,shared.nullbulk);
} else {
addReplyBulk(c,o);
}
}
}
}

//处理客户端发来的MSET和MSETNX命令
void msetGenericCommand(redisClient *c, int nx) {
int j, busykeys = 0;

//argc从0开始计数,所以argc的值就是所有key和value的数量,所以必须是偶数
if ((c->argc % 2) == 0) {
addReplyError(c,"wrong number of arguments for MSET");
return;
}
/* Handle the NX flag. The MSETNX semantic is to return zero and don't
* set nothing at all if at least one already key exists. */
//对于msetnx命令,只有当所有给定key都不存在时,才能进行set
if (nx) {
for (j = 1; j < c->argc; j += 2) {
if (lookupKeyWrite(c->db,c->argv[j]) != NULL) {
busykeys++;
}
}
}
//busykeys是已存在的key的数量,放弃set,失败返回0
if (busykeys) {
addReply(c, shared.czero);
return;
}
//set所有的键值对
for (j = 1; j < c->argc; j += 2) {
c->argv[j+1] = tryObjectEncoding(c->argv[j+1]);
//已经确定key不存在了,可以直接用dbAdd
dbReplace(c->db,c->argv[j],c->argv[j+1]);
incrRefCount(c->argv[j+1]);
//执行set类命令后要清除过期时间记录
removeExpire(c->db,c->argv[j]);
touchWatchedKey(c->db,c->argv[j]);
}
server.dirty += (c->argc-1)/2;
addReply(c, nx ? shared.cone : shared.ok);
}

//实现mset命令
//MSET key1 value1 key2 value2 .. keyN valueN
void msetCommand(redisClient *c) {
msetGenericCommand(c,0);
}

//实现msetnx命令
//MSETNX key1 value1 key2 value2 .. keyN valueN
void msetnxCommand(redisClient *c) {
msetGenericCommand(c,1);
}

//处理客户端发来的INCR和DECR类命令
void incrDecrCommand(redisClient *c, long long incr) {
long long value, oldvalue;
robj *o;
//获取value字符串
o = lookupKeyWrite(c->db,c->argv[1]);
//先确定value是字符串对象,再确定是否是数值或能否转成数值
if (o != NULL && checkType(c,o,REDIS_STRING)) return;
if (getLongLongFromObjectOrReply(c,o,&value,NULL) != REDIS_OK) return;

oldvalue = value;
//自增还是自减只需要设置incr参数
value += incr;
//如果自增以后反而变小,或者自减以后反而变大,说明执行加减操作后value溢出了
if ((incr < 0 && value > oldvalue) || (incr > 0 && value < oldvalue)) {
addReplyError(c,"increment or decrement would overflow");
return;
}
//创建新的基于整型的字符串对象,替换旧的value对象
o = createStringObjectFromLongLong(value);
dbReplace(c->db,c->argv[1],o);
touchWatchedKey(c->db,c->argv[1]);
server.dirty++;
//客户端的输出是冒号-新value值-回车换行
addReply(c,shared.colon);
addReply(c,o);
addReply(c,shared.crlf);
}

//实现incr命令
//INCR KEY_NAME
void incrCommand(redisClient *c) {
incrDecrCommand(c,1);
}

//实现decr命令
//DECR KEY_NAME
void decrCommand(redisClient *c) {
incrDecrCommand(c,-1);
}

//实现incrby命令
//INCRBY KEY_NAME INCR_AMOUNT
void incrbyCommand(redisClient *c) {
long long incr;
//把命令里的INCR_AMOUNT转成数值
if (getLongLongFromObjectOrReply(c, c->argv[2], &incr, NULL) != REDIS_OK) return;
incrDecrCommand(c,incr);
}

//实现decrby命令
//DECRBY KEY_NAME DECREMENT_AMOUNT
void decrbyCommand(redisClient *c) {
long long incr;
if (getLongLongFromObjectOrReply(c, c->argv[2], &incr, NULL) != REDIS_OK) return;
incrDecrCommand(c,-incr);
}

//实现append命令
//APPEND KEY_NAME NEW_VALUE
void appendCommand(redisClient *c) {
size_t totlen;
robj *o, *append;

o = lookupKeyWrite(c->db,c->argv[1]);
//如果key不存在,就相当于set命令
if (o == NULL) {
/* Create the key */
c->argv[2] = tryObjectEncoding(c->argv[2]);
dbAdd(c->db,c->argv[1],c->argv[2]);
incrRefCount(c->argv[2]);
totlen = stringObjectLen(c->argv[2]);
} else {
/* Key exists, check type */
if (checkType(c,o,REDIS_STRING))
return;

/* "append" is an argument, so always an sds */
append = c->argv[2];
totlen = stringObjectLen(o)+sdslen(append->ptr);
//判断追加后的字符串长度是否超过512MB
if (checkStringLength(c,totlen) != REDIS_OK)
return;

/* If the object is shared or encoded, we have to make a copy */
//只能追加到sds字符串后面,所以先转数据类型
if (o->refcount != 1 || o->encoding != REDIS_ENCODING_RAW) {
robj *decoded = getDecodedObject(o);
o = createStringObject(decoded->ptr, sdslen(decoded->ptr));
decrRefCount(decoded);
dbReplace(c->db,c->argv[1],o);
}

/* Append the value */
o->ptr = sdscatlen(o->ptr,append->ptr,sdslen(append->ptr));
totlen = sdslen(o->ptr);
}
touchWatchedKey(c->db,c->argv[1]);
server.dirty++;
//返回给客户端的是追加后的字符串长度
addReplyLongLong(c,totlen);
}

//实现strlen命令
//STRLEN KEY_NAME
void strlenCommand(redisClient *c) {
robj *o;
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
checkType(c,o,REDIS_STRING)) return;
addReplyLongLong(c,stringObjectLen(o));
}

4.3 列表

redis.h(列表相关)

/* Structure to hold list iteration abstraction. */
//列表的迭代器
typedef struct {
//列表对象
robj *subject;
//列表编码
unsigned char encoding;
//迭代方向
unsigned char direction; /* Iteration direction */
//下个ziplist节点的头地址
unsigned char *zi;
//下个linkedlist节点的地址
listNode *ln;
} listTypeIterator;

/* Structure for an entry while iterating over a list. */
//迭代过程中存储迭代器当前节点的信息
typedef struct {
listTypeIterator *li;
unsigned char *zi; /* Entry in ziplist */
listNode *ln; /* Entry in linked list */
} listTypeEntry;

t_list.c

#include "redis.h"

/*-----------------------------------------------------------------------------
* List API
*----------------------------------------------------------------------------*/

/* Check the argument length to see if it requires us to convert the ziplist
* to a real list. Only check raw-encoded objects because integer encoded
* objects are never too long. */
//在向列表插入元素前调用,如果当前列表subject是基于ziplist的,且要插入的value字符串的长度超过了系统设定的ziplist单个entry的最大长度,就要把当前列表转换成基于linkedlist的。
void listTypeTryConversion(robj *subject, robj *value) {
//不是基于ziplist的列表不需要考虑转换
if (subject->encoding != REDIS_ENCODING_ZIPLIST) return;
if (value->encoding == REDIS_ENCODING_RAW &&
sdslen(value->ptr) > server.list_max_ziplist_value)
listTypeConvert(subject,REDIS_ENCODING_LINKEDLIST);
}

//向列表中插入元素,subject是列表对象,value是要插入的字符串对象,where是插入位置(0是表头,1是表尾)
void listTypePush(robj *subject, robj *value, int where) {
//先检查当前列表需不需要转成基于linkedlist的
listTypeTryConversion(subject,value);
//如果当前列表基于ziplist,且存储的entry数量超过了系统设定的ziplist可容纳的最大entry数,也要把当前列表转成基于linkedlist的
if (subject->encoding == REDIS_ENCODING_ZIPLIST &&
ziplistLen(subject->ptr) >= server.list_max_ziplist_entries)
listTypeConvert(subject,REDIS_ENCODING_LINKEDLIST);
//如果是基于ziplist的列表,调用ziplistPush插入
if (subject->encoding == REDIS_ENCODING_ZIPLIST) {
int pos = (where == REDIS_HEAD) ? ZIPLIST_HEAD : ZIPLIST_TAIL;
//如果value是基于整型的字符串,要先转成基于sds的
value = getDecodedObject(value);
subject->ptr = ziplistPush(subject->ptr,value->ptr,sdslen(value->ptr),pos);
//value已经存进列表了,本次对value的引用可以撤销了
decrRefCount(value);
//如果是基于linkedlist的列表,调用listAddNodeHead和listAddNodeTail插入
} else if (subject->encoding == REDIS_ENCODING_LINKEDLIST) {
if (where == REDIS_HEAD) {
listAddNodeHead(subject->ptr,value);
} else {
listAddNodeTail(subject->ptr,value);
}
incrRefCount(value);
} else {
redisPanic("Unknown list encoding");
}
}

//弹出列表元素,where是弹出位置(0是表头,1是表尾)
robj *listTypePop(robj *subject, int where) {
robj *value = NULL;
if (subject->encoding == REDIS_ENCODING_ZIPLIST) {
unsigned char *p;
unsigned char *vstr;
unsigned int vlen;
long long vlong;
int pos = (where == REDIS_HEAD) ? 0 : -1;
p = ziplistIndex(subject->ptr,pos);
//从ziplist中取出要弹出的元素,如果是字符串就存在vstr里,如果是整数就存在vlong里
if (ziplistGet(p,&vstr,&vlen,&vlong)) {
if (vstr) {
//返回值要求是robj对象,所以要基于弹出的元素新建robj
value = createStringObject((char*)vstr,vlen);
} else {
value = createStringObjectFromLongLong(vlong);
}
/* We only need to delete an element when it exists */
//pop实际是两步,先生成并返回副本,然后删除原本
subject->ptr = ziplistDelete(subject->ptr,&p);
}
} else if (subject->encoding == REDIS_ENCODING_LINKEDLIST) {
list *list = subject->ptr;
listNode *ln;
if (where == REDIS_HEAD) {
ln = listFirst(list);
} else {
ln = listLast(list);
}
if (ln != NULL) {
value = listNodeValue(ln);
incrRefCount(value);
listDelNode(list,ln);
}
} else {
redisPanic("Unknown list encoding");
}
return value;
}

//返回列表长度
unsigned long listTypeLength(robj *subject) {
if (subject->encoding == REDIS_ENCODING_ZIPLIST) {
return ziplistLen(subject->ptr);
} else if (subject->encoding == REDIS_ENCODING_LINKEDLIST) {
return listLength((list*)subject->ptr);
} else {
redisPanic("Unknown list encoding");
}
}

/* Initialize an iterator at the specified index. */
//创建列表的迭代器,index是指定的起始点,direction是迭代方向
listTypeIterator *listTypeInitIterator(robj *subject, int index, unsigned char direction) {
listTypeIterator *li = zmalloc(sizeof(listTypeIterator));
li->subject = subject;
li->encoding = subject->encoding;
li->direction = direction;
//zi和ln分别存储ziplist和linkedlist的节点地址
//对于ziplist,节点地址就是entry子串的头地址,对于linkedlist,节点地址就是指向某个listNode的指针
if (li->encoding == REDIS_ENCODING_ZIPLIST) {
li->zi = ziplistIndex(subject->ptr,index);
} else if (li->encoding == REDIS_ENCODING_LINKEDLIST) {
li->ln = listIndex(subject->ptr,index);
} else {
redisPanic("Unknown list encoding");
}
return li;
}

/* Clean up the iterator. */
//释放迭代器
void listTypeReleaseIterator(listTypeIterator *li) {
zfree(li);
}

/* Stores pointer to current the entry in the provided entry structure
* and advances the position of the iterator. Returns 1 when the current
* entry is in fact an entry, 0 otherwise. */
//把迭代器li的当前节点取出,存储在entry中,再让li指向下个节点
//成功返回1,失败返回0
int listTypeNext(listTypeIterator *li, listTypeEntry *entry) {
/* Protect from converting when iterating */
//如果迭代器最初保存的列表编码与当前的列表编码不同,说明列表被convert过,退出程序保平安
redisAssert(li->subject->encoding == li->encoding);
//先把迭代器中的节点信息zi或ln转移到entry中,再更新迭代器中的zi或ln
entry->li = li;
if (li->encoding == REDIS_ENCODING_ZIPLIST) {
entry->zi = li->zi;
if (entry->zi != NULL) {
if (li->direction == REDIS_TAIL)
li->zi = ziplistNext(li->subject->ptr,li->zi);
else
li->zi = ziplistPrev(li->subject->ptr,li->zi);
return 1;
}
} else if (li->encoding == REDIS_ENCODING_LINKEDLIST) {
entry->ln = li->ln;
if (entry->ln != NULL) {
if (li->direction == REDIS_TAIL)
li->ln = li->ln->next;
else
li->ln = li->ln->prev;
return 1;
}
} else {
redisPanic("Unknown list encoding");
}
return 0;
}

/* Return entry or NULL at the current position of the iterator. */
//返回entry中存储的节点对象
robj *listTypeGet(listTypeEntry *entry) {
listTypeIterator *li = entry->li;
robj *value = NULL;
if (li->encoding == REDIS_ENCODING_ZIPLIST) {
unsigned char *vstr;
unsigned int vlen;
long long vlong;
redisAssert(entry->zi != NULL);
//对于ziplist是要新建robj
if (ziplistGet(entry->zi,&vstr,&vlen,&vlong)) {
if (vstr) {
value = createStringObject((char*)vstr,vlen);
} else {
value = createStringObjectFromLongLong(vlong);
}
}
} else if (li->encoding == REDIS_ENCODING_LINKEDLIST) {
redisAssert(entry->ln != NULL);
//对于linkedlist就直接返回节点指针,引用加一
value = listNodeValue(entry->ln);
incrRefCount(value);
} else {
redisPanic("Unknown list encoding");
}
return value;
}

//将新元素value插入到entry所表示的节点之前或之后
void listTypeInsert(listTypeEntry *entry, robj *value, int where) {
robj *subject = entry->li->subject;
if (entry->li->encoding == REDIS_ENCODING_ZIPLIST) {
value = getDecodedObject(value);
if (where == REDIS_TAIL) {
//获取当前节点后面的节点
unsigned char *next = ziplistNext(subject->ptr,entry->zi);

//如果后面没有节点了,说明value应该插在表尾,直接调用ziplistPush,否则调用ziplistInsert
//subject->ptr是ziplist字符串,value->ptr是要插入的字符串
if (next == NULL) {
subject->ptr = ziplistPush(subject->ptr,value->ptr,sdslen(value->ptr),REDIS_TAIL);
} else {
subject->ptr = ziplistInsert(subject->ptr,next,value->ptr,sdslen(value->ptr));
}
} else {
subject->ptr = ziplistInsert(subject->ptr,entry->zi,value->ptr,sdslen(value->ptr));
}
decrRefCount(value);
} else if (entry->li->encoding == REDIS_ENCODING_LINKEDLIST) {
if (where == REDIS_TAIL) {
listInsertNode(subject->ptr,entry->ln,value,AL_START_TAIL);
} else {
listInsertNode(subject->ptr,entry->ln,value,AL_START_HEAD);
}
incrRefCount(value);
} else {
redisPanic("Unknown list encoding");
}
}

/* Compare the given object with the entry at the current position. */
//对比entry中存储的节点对象和给定的对象o
int listTypeEqual(listTypeEntry *entry, robj *o) {
listTypeIterator *li = entry->li;
if (li->encoding == REDIS_ENCODING_ZIPLIST) {
//ziplistCompare只能比较字符串
//如果o是基于整型的字符串,为什么不能转一下再比较?下面的equalStringObjects就考虑了ll2string
redisAssert(o->encoding == REDIS_ENCODING_RAW);
return ziplistCompare(entry->zi,o->ptr,sdslen(o->ptr));
} else if (li->encoding == REDIS_ENCODING_LINKEDLIST) {
return equalStringObjects(o,listNodeValue(entry->ln));
} else {
redisPanic("Unknown list encoding");
}
}

/* Delete the element pointed to. */
//删除entry中记录的节点
void listTypeDelete(listTypeEntry *entry) {
listTypeIterator *li = entry->li;
if (li->encoding == REDIS_ENCODING_ZIPLIST) {
unsigned char *p = entry->zi;
//删除p指向的entry,再把下个entry的地址重新赋给p
li->subject->ptr = ziplistDelete(li->subject->ptr,&p);
//li指向的本来就是下个节点,但是删除了当前节点后,下个节点的地址就变了,所以要重新让迭代器指向下个节点
//所以结果就是entry指向的节点被删了,li指向的节点不变,但是节点地址变了
if (li->direction == REDIS_TAIL)
li->zi = p;
else
li->zi = ziplistPrev(li->subject->ptr,p);
} else if (entry->li->encoding == REDIS_ENCODING_LINKEDLIST) {
listNode *next;
if (li->direction == REDIS_TAIL)
next = entry->ln->next;
else
next = entry->ln->prev;
listDelNode(li->subject->ptr,entry->ln);
li->ln = next;
} else {
redisPanic("Unknown list encoding");
}
}

//把基于ziplist的列表转成基于linkedlist的
void listTypeConvert(robj *subject, int enc) {
listTypeIterator *li;
listTypeEntry entry;
redisAssert(subject->type == REDIS_LIST);

if (enc == REDIS_ENCODING_LINKEDLIST) {
list *l = listCreate();
listSetFreeMethod(l,decrRefCount);

/* listTypeGet returns a robj with incremented refcount */
//创建一个原列表迭代器
li = listTypeInitIterator(subject,0,REDIS_TAIL);
//迭代每个节点并add到新列表
while (listTypeNext(li,&entry)) listAddNodeTail(l,listTypeGet(&entry));
listTypeReleaseIterator(li);

subject->encoding = REDIS_ENCODING_LINKEDLIST;
//free掉原来的ziplist
zfree(subject->ptr);
subject->ptr = l;
} else {
redisPanic("Unsupported list conversion");
}
}

/*-----------------------------------------------------------------------------
* List Commands
*----------------------------------------------------------------------------*/
//处理客户端发来的lpush和rpush请求
void pushGenericCommand(redisClient *c, int where) {
//在db中根据key获取列表对象
robj *lobj = lookupKeyWrite(c->db,c->argv[1]);
//这里为什么要把value转成整型呢?后面调用listTypePush时明明又会把value转回字符串
c->argv[2] = tryObjectEncoding(c->argv[2]);
//列表不存在
if (lobj == NULL) {
//如果有正在等待blocking pop的客户端,就把要push的新元素直接返回给那个等待pop的客户端,就不用继续push了
if (handleClientsWaitingListPush(c,c->argv[1],c->argv[2])) {
addReply(c,shared.cone);
return;
}
//如果没有等待pop的客户端,就新建列表,把新元素push进去
lobj = createZiplistObject();
dbAdd(c->db,c->argv[1],lobj);
} else {
//如果key对应的value不是列表对象,向客户端报错
if (lobj->type != REDIS_LIST) {
addReply(c,shared.wrongtypeerr);
return;
}
if (handleClientsWaitingListPush(c,c->argv[1],c->argv[2])) {
touchWatchedKey(c->db,c->argv[1]);
addReply(c,shared.cone);
return;
}
}
//执行push操作
listTypePush(lobj,c->argv[2],where);、
//返回给客户端的是push后列表的长度
addReplyLongLong(c,listTypeLength(lobj));
touchWatchedKey(c->db,c->argv[1]);
server.dirty++;
}

//实现lpush命令,2.4版本以前的lpush只接受单个value参数
//LPUSH KEY_NAME VALUE
void lpushCommand(redisClient *c) {
pushGenericCommand(c,REDIS_HEAD);
}

//实现rpush命令,2.4版本以前的rpush只接受单个value参数
//RPUSH KEY_NAME VALUE
void rpushCommand(redisClient *c) {
pushGenericCommand(c,REDIS_TAIL);
}

//处理客户端发来的lpushx、rpushx和linsert请求
void pushxGenericCommand(redisClient *c, robj *refval, robj *val, int where) {
robj *subject;
listTypeIterator *iter;
listTypeEntry entry;
int inserted = 0;
//如果目标列表不存在或不是列表对象,放弃push操作
if ((subject = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
checkType(c,subject,REDIS_LIST)) return;

//refval不为NULL说明是linsert命令,要把val插入到refval之前或之后
if (refval != NULL) {
/* Note: we expect refval to be string-encoded because it is *not* the
* last argument of the multi-bulk LINSERT. */
redisAssert(refval->encoding == REDIS_ENCODING_RAW);

/* We're not sure if this value can be inserted yet, but we cannot
* convert the list inside the iterator. We don't want to loop over
* the list twice (once to see if the value can be inserted and once
* to do the actual insert), so we assume this value can be inserted
* and convert the ziplist to a regular list if necessary. */
//先检查当前列表需不需要转成基于linkedlist的
listTypeTryConversion(subject,val);

/* Seek refval from head to tail */
//创建迭代器,在列表中定位refval,然后插入val
iter = listTypeInitIterator(subject,0,REDIS_TAIL);
while (listTypeNext(iter,&entry)) {
if (listTypeEqual(&entry,refval)) {
listTypeInsert(&entry,val,where);
inserted = 1;
break;
}
}
listTypeReleaseIterator(iter);

if (inserted) {
/* Check if the length exceeds the ziplist length threshold. */
//如果当前是ziplist,还要检查插入后是否超过了允许容纳的最大entry数量
if (subject->encoding == REDIS_ENCODING_ZIPLIST &&
ziplistLen(subject->ptr) > server.list_max_ziplist_entries)
listTypeConvert(subject,REDIS_ENCODING_LINKEDLIST);
touchWatchedKey(c->db,c->argv[1]);
server.dirty++;
} else {
/* Notify client of a failed insert */
//插入失败,给客户端返回-1
addReply(c,shared.cnegone);
return;
}
//如果refval是NULL,正常执行push操作
} else {
listTypePush(subject,val,where);
touchWatchedKey(c->db,c->argv[1]);
server.dirty++;
}
//返回给客户端的是执行push后列表的长度
addReplyLongLong(c,listTypeLength(subject));
}

//实现lpushx命令
//LPUSHX KEY_NAME VALUE
void lpushxCommand(redisClient *c) {
c->argv[2] = tryObjectEncoding(c->argv[2]);
pushxGenericCommand(c,NULL,c->argv[2],REDIS_HEAD);
}

//实现rpushx命令
//RPUSHX KEY_NAME VALUE
void rpushxCommand(redisClient *c) {
c->argv[2] = tryObjectEncoding(c->argv[2]);
pushxGenericCommand(c,NULL,c->argv[2],REDIS_TAIL);
}

//实现linsert命令
//LINSERT key BEFORE|AFTER pivot value
void linsertCommand(redisClient *c) {
c->argv[4] = tryObjectEncoding(c->argv[4]);
if (strcasecmp(c->argv[2]->ptr,"after") == 0) {
pushxGenericCommand(c,c->argv[3],c->argv[4],REDIS_TAIL);
} else if (strcasecmp(c->argv[2]->ptr,"before") == 0) {
pushxGenericCommand(c,c->argv[3],c->argv[4],REDIS_HEAD);
} else {
addReply(c,shared.syntaxerr);
}
}

//实现llen命令
//LLEN KEY_NAME
void llenCommand(redisClient *c) {
robj *o = lookupKeyReadOrReply(c,c->argv[1],shared.czero);
if (o == NULL || checkType(c,o,REDIS_LIST)) return;
addReplyLongLong(c,listTypeLength(o));
}

//实现lindex命令
//LINDEX KEY_NAME INDEX_POSITION
void lindexCommand(redisClient *c) {
robj *o = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk);
if (o == NULL || checkType(c,o,REDIS_LIST)) return;
int index = atoi(c->argv[2]->ptr);
robj *value = NULL;

if (o->encoding == REDIS_ENCODING_ZIPLIST) {
unsigned char *p;
unsigned char *vstr;
unsigned int vlen;
long long vlong;
//获取第index个entry的头地址
p = ziplistIndex(o->ptr,index);
if (ziplistGet(p,&vstr,&vlen,&vlong)) {
if (vstr) {
value = createStringObject((char*)vstr,vlen);
} else {
value = createStringObjectFromLongLong(vlong);
}
addReplyBulk(c,value);
decrRefCount(value);
} else {
addReply(c,shared.nullbulk);
}
} else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
listNode *ln = listIndex(o->ptr,index);
if (ln != NULL) {
value = listNodeValue(ln);
addReplyBulk(c,value);
} else {
addReply(c,shared.nullbulk);
}
} else {
redisPanic("Unknown list encoding");
}
}

//实现lset命令
//LSET KEY_NAME INDEX VALUE
void lsetCommand(redisClient *c) {
robj *o = lookupKeyWriteOrReply(c,c->argv[1],shared.nokeyerr);
if (o == NULL || checkType(c,o,REDIS_LIST)) return;
int index = atoi(c->argv[2]->ptr);
robj *value = (c->argv[3] = tryObjectEncoding(c->argv[3]));

listTypeTryConversion(o,value);
if (o->encoding == REDIS_ENCODING_ZIPLIST) {
unsigned char *p, *zl = o->ptr;
p = ziplistIndex(zl,index);
//找不到第index个entry,就报索引越界的错
if (p == NULL) {
addReply(c,shared.outofrangeerr);
} else {
//set新value的逻辑是,先删掉旧的,再插入新的
o->ptr = ziplistDelete(o->ptr,&p);
value = getDecodedObject(value);
o->ptr = ziplistInsert(o->ptr,p,value->ptr,sdslen(value->ptr));
decrRefCount(value);
addReply(c,shared.ok);
touchWatchedKey(c->db,c->argv[1]);
server.dirty++;
}
} else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
listNode *ln = listIndex(o->ptr,index);
if (ln == NULL) {
addReply(c,shared.outofrangeerr);
} else {
//ziplist的entry没有引用,所以能直接删,但是linkedlist的listNode有引用,所以在重置listNode指针之前要把指向的value引用减一
//ziplist的插入是把字符串hardcode到ziplist中,不是对value的引用,但是linkedlist的插入是把listNode的指针指向value,是对value的引用
decrRefCount((robj*)listNodeValue(ln));
listNodeValue(ln) = value;
incrRefCount(value);
addReply(c,shared.ok);
touchWatchedKey(c->db,c->argv[1]);
server.dirty++;
}
} else {
redisPanic("Unknown list encoding");
}
}

//处理客户端发来的lpop和rpop请求
void popGenericCommand(redisClient *c, int where) {
robj *o = lookupKeyWriteOrReply(c,c->argv[1],shared.nullbulk);
if (o == NULL || checkType(c,o,REDIS_LIST)) return;
//返回弹出的元素
robj *value = listTypePop(o,where);
if (value == NULL) {
addReply(c,shared.nullbulk);
} else {
addReplyBulk(c,value);
decrRefCount(value);
//如果弹出后列表为空,就顺便从db中删除列表
if (listTypeLength(o) == 0) dbDelete(c->db,c->argv[1]);
touchWatchedKey(c->db,c->argv[1]);
server.dirty++;
}
}

//实现lpop命令
//Lpop KEY_NAME
void lpopCommand(redisClient *c) {
popGenericCommand(c,REDIS_HEAD);
}

//实现rpop命令
//Rpop KEY_NAME
void rpopCommand(redisClient *c) {
popGenericCommand(c,REDIS_TAIL);
}

//实现lrange命令
//LRANGE KEY_NAME START END
void lrangeCommand(redisClient *c) {
robj *o;
int start = atoi(c->argv[2]->ptr);
int end = atoi(c->argv[3]->ptr);
int llen;
int rangelen;

if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptymultibulk)) == NULL
|| checkType(c,o,REDIS_LIST)) return;
//获取列表的长度
llen = listTypeLength(o);

/* convert negative indexes */
//把负索引转成正索引
if (start < 0) start = llen+start;
if (end < 0) end = llen+end;
//调整边界
if (start < 0) start = 0;

/* Invariant: start >= 0, so this test will be true when end < 0.
* The range is empty when start > end or start >= length. */
if (start > end || start >= llen) {
addReply(c,shared.emptymultibulk);
return;
}
//调整边界
if (end >= llen) end = llen-1;
rangelen = (end-start)+1;

/* Return the result in form of a multi-bulk reply */
addReplyMultiBulkLen(c,rangelen);
//先调用ziplistIndex把指针p放在起点,再循环调用ziplistGet取出元素
if (o->encoding == REDIS_ENCODING_ZIPLIST) {
unsigned char *p = ziplistIndex(o->ptr,start);
unsigned char *vstr;
unsigned int vlen;
long long vlong;

while(rangelen--) {
ziplistGet(p,&vstr,&vlen,&vlong);
if (vstr) {
addReplyBulkCBuffer(c,vstr,vlen);
} else {
addReplyBulkLongLong(c,vlong);
}
p = ziplistNext(o->ptr,p);
}
//对于linkedlist,直接通过节点的next就能遍历
} else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
listNode *ln;

/* If we are nearest to the end of the list, reach the element
* starting from tail and going backward, as it is faster. */
if (start > llen/2) start -= llen;
ln = listIndex(o->ptr,start);

while(rangelen--) {
addReplyBulk(c,ln->value);
ln = ln->next;
}
} else {
redisPanic("List encoding is not LINKEDLIST nor ZIPLIST!");
}
}

//实现ltrim命令
//LTRIM KEY_NAME START STOP
void ltrimCommand(redisClient *c) {
robj *o;
int start = atoi(c->argv[2]->ptr);
int end = atoi(c->argv[3]->ptr);
int llen;
//ltrim是从列表头部裁减掉的节点数,rtrim是从列表尾部裁减掉的节点数
int j, ltrim, rtrim;
list *list;
listNode *ln;

if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.ok)) == NULL ||
checkType(c,o,REDIS_LIST)) return;
llen = listTypeLength(o);

/* convert negative indexes */
if (start < 0) start = llen+start;
if (end < 0) end = llen+end;
if (start < 0) start = 0;

/* Invariant: start >= 0, so this test will be true when end < 0.
* The range is empty when start > end or start >= length. */
if (start > end || start >= llen) {
/* Out of range start or start > end result in empty list */
ltrim = llen;
rtrim = 0;
} else {
if (end >= llen) end = llen-1;
//因为列表索引是从0开始的,索引为start的节点其实是第start+1个节点,所以左侧要删除的节点数是就start
ltrim = start;
//llen-end是end右侧且包括end索引的节点数,因为end索引处的节点要保留,所以最后减一
rtrim = llen-end-1;
}

/* Remove list elements to perform the trim */
if (o->encoding == REDIS_ENCODING_ZIPLIST) {
//从列表头部开始删除ltrim个节点,左侧裁剪完成
o->ptr = ziplistDeleteRange(o->ptr,0,ltrim);
//从列表尾部删除时用负索引定位倒数第rtrim个节点,往后删除共rtrim个节点,右侧裁剪完成
o->ptr = ziplistDeleteRange(o->ptr,-rtrim,rtrim);
} else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
list = o->ptr;
for (j = 0; j < ltrim; j++) {
ln = listFirst(list);
listDelNode(list,ln);
}
for (j = 0; j < rtrim; j++) {
ln = listLast(list);
listDelNode(list,ln);
}
} else {
redisPanic("Unknown list encoding");
}
//如果裁剪后列表为空,就顺便从db中删除列表
if (listTypeLength(o) == 0) dbDelete(c->db,c->argv[1]);
touchWatchedKey(c->db,c->argv[1]);
server.dirty++;
addReply(c,shared.ok);
}

//实现lrem命令
//LREM KEY_NAME COUNT VALUE
void lremCommand(redisClient *c) {
robj *subject, *obj;
obj = c->argv[3] = tryObjectEncoding(c->argv[3]);
//toremove是要移除的节点数,也就是命令里count参数的绝对值
int toremove = atoi(c->argv[2]->ptr);
//removed是已经移除的节点数
int removed = 0;
listTypeEntry entry;

subject = lookupKeyWriteOrReply(c,c->argv[1],shared.czero);
if (subject == NULL || checkType(c,subject,REDIS_LIST)) return;

/* Make sure obj is raw when we're dealing with a ziplist */
//在ziplist中只能做字符串比较,所以要先把value转成sds字符串
if (subject->encoding == REDIS_ENCODING_ZIPLIST)
obj = getDecodedObject(obj);

listTypeIterator *li;
//toremove<0表示从后往前遍历列表,设置好迭代器的方向后,把toremove转成正数
if (toremove < 0) {
toremove = -toremove;
li = listTypeInitIterator(subject,-1,REDIS_HEAD);
} else {
li = listTypeInitIterator(subject,0,REDIS_TAIL);
}
//迭代节点并删除直到 removed == toremove
while (listTypeNext(li,&entry)) {
if (listTypeEqual(&entry,obj)) {
listTypeDelete(&entry);
server.dirty++;
removed++;
if (toremove && removed == toremove) break;
}
}
listTypeReleaseIterator(li);

/* Clean up raw encoded object */
//之前调用getDecodedObject增加了对sds字符串的引用,这里引用要减一
if (subject->encoding == REDIS_ENCODING_ZIPLIST)
decrRefCount(obj);
//如果删除节点后列表为空,就顺便从db中删除列表
if (listTypeLength(subject) == 0) dbDelete(c->db,c->argv[1]);
//返回给客户端的是实际删除的节点数
addReplyLongLong(c,removed);
if (removed) touchWatchedKey(c->db,c->argv[1]);
}

/* This is the semantic of this command:
* RPOPLPUSH srclist dstlist:
* IF LLEN(srclist) > 0
* element = RPOP srclist
* LPUSH dstlist element
* RETURN element
* ELSE
* RETURN nil
* END
* END
*
* The idea is to be able to get an element from a list in a reliable way
* since the element is not just returned but pushed against another list
* as well. This command was originally proposed by Ezra Zygmuntowicz.
*/
//处理客户端发来的rpoplpush命令,负责执行push的部分
void rpoplpushHandlePush(redisClient *origclient, redisClient *c, robj *dstkey, robj *dstobj, robj *value) {
robj *aux;
//如果有正在阻塞的客户端,就把value发送给该客户端执行push操作
if (!handleClientsWaitingListPush(origclient,dstkey,value)) {
/* Create the list if the key does not exist */
//如果要push的列表不存在,就先创建新列表
if (!dstobj) {
dstobj = createZiplistObject();
dbAdd(c->db,dstkey,dstobj);
} else {
touchWatchedKey(c->db,dstkey);
}
//把value添加到列表头部
listTypePush(dstobj,value,REDIS_HEAD);
/* If we are pushing as a result of LPUSH against a key
* watched by BRPOPLPUSH, we need to rewrite the command vector
* as an LPUSH.
*
* If this is called directly by RPOPLPUSH (either directly
* or via a BRPOPLPUSH where the popped list exists)
* we should replicate the RPOPLPUSH command itself. */
//修改客户端命令,写入AOF文件,没看懂???
if (c != origclient) {
aux = createStringObject("LPUSH",5);
rewriteClientCommandVector(origclient,3,aux,dstkey,value);
decrRefCount(aux);
} else {
/* Make sure to always use RPOPLPUSH in the replication / AOF,
* even if the original command was BRPOPLPUSH. */
aux = createStringObject("RPOPLPUSH",9);
rewriteClientCommandVector(origclient,3,aux,c->argv[1],c->argv[2]);
decrRefCount(aux);
}
server.dirty++;
}

/* Always send the pushed value to the client. */
addReplyBulk(c,value);
}

//实现rpoplpush命令
//RPOPLPUSH SOURCE_KEY_NAME DESTINATION_KEY_NAME
void rpoplpushCommand(redisClient *c) {
robj *sobj, *value;
//获取执行pop的列表
if ((sobj = lookupKeyWriteOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
checkType(c,sobj,REDIS_LIST)) return;
//因为要执行pop,所以列表不能为空
if (listTypeLength(sobj) == 0) {
addReply(c,shared.nullbulk);
} else {
//获取执行push的列表
robj *dobj = lookupKeyWrite(c->db,c->argv[2]);
robj *touchedkey = c->argv[1];

if (dobj && checkType(c,dobj,REDIS_LIST)) return;
//执行pop得到value
value = listTypePop(sobj,REDIS_TAIL);
/* We saved touched key, and protect it, since rpoplpushHandlePush
* may change the client command argument vector. */
incrRefCount(touchedkey);
//把value添加到执行push的列表中,当执行rpoplpush命令时,pop和push操作都是在同一个client下完成,所以两个client参数相同
rpoplpushHandlePush(c,c,c->argv[2],dobj,value);

/* listTypePop returns an object with its refcount incremented */
//push完毕,value就没用了,引用减一
decrRefCount(value);

/* Delete the source list when it is empty */
//对于执行pop的列表,如果pop以后列表为空,就删除该列表
if (listTypeLength(sobj) == 0) dbDelete(c->db,touchedkey);
touchWatchedKey(c->db,touchedkey);
decrRefCount(touchedkey);
server.dirty++;
}
}

/*-----------------------------------------------------------------------------
* Blocking POP operations
*----------------------------------------------------------------------------*/

/* Currently Redis blocking operations support is limited to list POP ops,
* so the current implementation is not fully generic, but it is also not
* completely specific so it will not require a rewrite to support new
* kind of blocking operations in the future.
*
* Still it's important to note that list blocking operations can be already
* used as a notification mechanism in order to implement other blocking
* operations at application level, so there must be a very strong evidence
* of usefulness and generality before new blocking operations are implemented.
*
* This is how the current blocking POP works, we use BLPOP as example:
* - If the user calls BLPOP and the key exists and contains a non empty list
* then LPOP is called instead. So BLPOP is semantically the same as LPOP
* if there is not to block.
* - If instead BLPOP is called and the key does not exists or the list is
* empty we need to block. In order to do so we remove the notification for
* new data to read in the client socket (so that we'll not serve new
* requests if the blocking request is not served). Also we put the client
* in a dictionary (db->blocking_keys) mapping keys to a list of clients
* blocking for this keys.
* - If a PUSH operation against a key with blocked clients waiting is
* performed, we serve the first in the list: basically instead to push
* the new element inside the list we return it to the (first / oldest)
* blocking client, unblock the client, and remove it form the list.
*
* The above comment and the source code should be enough in order to understand
* the implementation and modify / fix it later.
*/

/* Set a client in blocking mode for the specified key, with the specified
* timeout */
//因为只支持blocking pop的命令,所以阻塞客户端就是等待指定key对应的列表有可以pop的元素
//keys是存储所有等待pop的列表的数组,numkeys是keys数组的长度,timeout是超时时间,target是被push的列表的key
void blockForKeys(redisClient *c, robj **keys, int numkeys, time_t timeout, robj *target) {
dictEntry *de;
list *l;
int j;
//bpop是client的属性,记录阻塞信息
c->bpop.keys = zmalloc(sizeof(robj*)*numkeys);
c->bpop.count = numkeys;
c->bpop.timeout = timeout;
c->bpop.target = target;

if (target != NULL) {
incrRefCount(target);
}

for (j = 0; j < numkeys; j++) {
/* Add the key in the client structure, to map clients -> keys */
//把所有等待pop的列表添加到c->bpop,标记阻塞的客户端正在监听哪些列表
c->bpop.keys[j] = keys[j];
incrRefCount(keys[j]);

/* And in the other "side", to map keys -> clients */
//c->db->blocking_keys字典存储正在监听某个列表的所有客户端,key是列表的键,value就是存储客户端的列表
de = dictFind(c->db->blocking_keys,keys[j]);
//当前列表没有被其他客户端监听
if (de == NULL) {
int retval;

/* For every key we take a list of clients blocked for it */
//新建客户端列表,存入blocking_keys字典
l = listCreate();
retval = dictAdd(c->db->blocking_keys,keys[j],l);
incrRefCount(keys[j]);
redisAssert(retval == DICT_OK);
} else {
//否则就是直接往客户端列表里add
l = dictGetEntryVal(de);
}
listAddNodeTail(l,c);
}
/* Mark the client as a blocked client */
//修改客户端的阻塞标记
c->flags |= REDIS_BLOCKED;
server.bpop_blocked_clients++;
}

/* Unblock a client that's waiting in a blocking operation such as BLPOP */
//解除客户端的阻塞
void unblockClientWaitingData(redisClient *c) {
dictEntry *de;
list *l;
int j;

redisAssert(c->bpop.keys != NULL);
/* The client may wait for multiple keys, so unblock it for every key. */
//遍历当前客户端监视的所有列表,修改blocking_keys
for (j = 0; j < c->bpop.count; j++) {
/* Remove this client from the list of clients waiting for this key. */
//把客户端从blocking_keys中清除
de = dictFind(c->db->blocking_keys,c->bpop.keys[j]);
redisAssert(de != NULL);
l = dictGetEntryVal(de);
listDelNode(l,listSearchKey(l,c));
/* If the list is empty we need to remove it to avoid wasting memory */
//删除以后如果列表为空,就顺便把列表也删除
if (listLength(l) == 0)
dictDelete(c->db->blocking_keys,c->bpop.keys[j]);
decrRefCount(c->bpop.keys[j]);
}

/* Cleanup the client structure */
//解除客户端的阻塞就要把c->bpop.keys里的列表的key全部删除
zfree(c->bpop.keys);
c->bpop.keys = NULL;
//调用本函数前,c->bpop.target已经被handler复制下来了,所以现在就可以清除记录了
if (c->bpop.target) decrRefCount(c->bpop.target);
c->bpop.target = NULL;
//修改阻塞标记和非阻塞标记
c->flags &= ~REDIS_BLOCKED;
c->flags |= REDIS_UNBLOCKED;
server.bpop_blocked_clients--;
listAddNodeTail(server.unblocked_clients,c);
}

/* This should be called from any function PUSHing into lists.
* 'c' is the "pushing client", 'key' is the key it is pushing data against,
* 'ele' is the element pushed.
*
* If the function returns 0 there was no client waiting for a list push
* against this key.
*
* If the function returns 1 there was a client waiting for a list push
* against this key, the element was passed to this client thus it's not
* needed to actually add it to the list and the caller should return asap. */
//每当有新元素被push到列表中时,都要先检查当前是否有正在等待的blocking pop命令
//c是执行push命令的client,key是执行pop的列表的key,ele是被push的新元素
//返回0表示没有正在等待的blocking pop,回到原函数中要继续执行push。返回1表示有正在等待的blocking pop,直接把ele返回给执行pop的client,ele就不需要再push到列表中了
//过程比较乱,涉及到一个pop和两个push。列表A被等待blocking pop的客户端监听,当新元素被push到A中,这个push实际不执行,直接当做pop的结果返回,如果是rpoplpush命令,后面还有一个push操作,pop出的元素会被push到列表B中,这个push是一定会执行的
int handleClientsWaitingListPush(redisClient *c, robj *key, robj *ele) {
struct dictEntry *de;
redisClient *receiver;
int numclients;
list *clients;
listNode *ln;
robj *dstkey, *dstobj;
//c->db->blocking_keys中没有记录,说明当前列表没有被任何客户端所等待
de = dictFind(c->db->blocking_keys,key);
if (de == NULL) return 0;
//获取当前列表关联的所有阻塞客户端
clients = dictGetEntryVal(de);
numclients = listLength(clients);

/* Try to handle the push as long as there are clients waiting for a push.
* Note that "numclients" is used because the list of clients waiting for a
* push on "key" is deleted by unblockClient() when empty.
*
* This loop will have more than 1 iteration when there is a BRPOPLPUSH
* that cannot push the target list because it does not contain a list. If
* this happens, it simply tries the next client waiting for a push. */
while (numclients--) {
ln = listFirst(clients);
redisAssert(ln != NULL);
//receiver是等待执行pop的客户端
receiver = ln->value;
//如果pop出的元素要立即被push到另一个列表中,dstkey是被执行push的列表的key
dstkey = receiver->bpop.target;

/* Protect receiver->bpop.target, that will be freed by
* the next unblockClientWaitingData() call. */
if (dstkey) incrRefCount(dstkey);
/* This should remove the first element of the "clients" list. */
//执行push前先解除客户端的阻塞
unblockClientWaitingData(receiver);
//如果没指定push的列表,说明当前的命令就是单纯的blocking pop,因为此前pop已经执行,函数直接在此退出
if (dstkey == NULL) {
/* BRPOP/BLPOP */
addReplyMultiBulkLen(receiver,2);
addReplyBulk(receiver,key);
addReplyBulk(receiver,ele);
return 1; /* Serve just the first client as in B[RL]POP semantics */
} else {
/* BRPOPLPUSH, note that receiver->db is always equal to c->db. */
dstobj = lookupKeyWrite(receiver->db,dstkey);
if (!(dstobj && checkType(receiver,dstobj,REDIS_LIST))) {
rpoplpushHandlePush(c,receiver,dstkey,dstobj,ele);
decrRefCount(dstkey);
return 1;
}
decrRefCount(dstkey);
}
}

return 0;
}

//解析出对象object的过期时间,结果保存在timeout中,其实只是调用getLongFromObjectOrReply
int getTimeoutFromObjectOrReply(redisClient *c, robj *object, time_t *timeout) {
long tval;

if (getLongFromObjectOrReply(c,object,&tval,
"timeout is not an integer or out of range") != REDIS_OK)
return REDIS_ERR;

if (tval < 0) {
addReplyError(c,"timeout is negative");
return REDIS_ERR;
}

if (tval > 0) tval += time(NULL);
*timeout = tval;

return REDIS_OK;
}

/* Blocking RPOP/LPOP */
//处理客户端发来的blpop和brpop命令
void blockingPopGenericCommand(redisClient *c, int where) {
robj *o;
time_t timeout;
int j;

if (getTimeoutFromObjectOrReply(c,c->argv[c->argc-1],&timeout) != REDIS_OK)
return;

//按顺序遍历命令参数里的所有列表,找到第一个非空列表后,执行pop并返回
for (j = 1; j < c->argc-1; j++) {
//获取列表
o = lookupKeyWrite(c->db,c->argv[j]);
if (o != NULL) {
//不是列表对象就退出
if (o->type != REDIS_LIST) {
addReply(c,shared.wrongtypeerr);
return;
} else {
//如果列表不为空就可以执行pop
if (listTypeLength(o) != 0) {
/* If the list contains elements fall back to the usual
* non-blocking POP operation */
struct redisCommand *orig_cmd;
robj *argv[2], **orig_argv;
int orig_argc;

/* We need to alter the command arguments before to call
* popGenericCommand() as the command takes a single key. */
//因为要切换到执行pop命令的popGenericCommand函数,所以需要调整一下命令参数
orig_argv = c->argv;
orig_argc = c->argc;
orig_cmd = c->cmd;
argv[1] = c->argv[j];
c->argv = argv;
c->argc = 2;

/* Also the return value is different, we need to output
* the multi bulk reply header and the key name. The
* "real" command will add the last element (the value)
* for us. If this souds like an hack to you it's just
* because it is... */
addReplyMultiBulkLen(c,2);
addReplyBulk(c,argv[1]);

popGenericCommand(c,where);

/* Fix the client structure with the original stuff */
c->argv = orig_argv;
c->argc = orig_argc;
c->cmd = orig_cmd;

return;
}
}
}
}

/* If we are inside a MULTI/EXEC and the list is empty the only thing
* we can do is treating it as a timeout (even with timeout 0). */
//没看懂???
if (c->flags & REDIS_MULTI) {
addReply(c,shared.nullmultibulk);
return;
}

/* If the list is empty or the key does not exists we must block */
//如果执行到这里,说明命令里的所有列表都是空的,pop执行失败,需要把当前客户端设置为阻塞并且关联上命令里的所有列表
blockForKeys(c, c->argv + 1, c->argc - 2, timeout, NULL);
}

//实现blpop命令
//BLPOP LIST1 LIST2 .. LISTN TIMEOUT
void blpopCommand(redisClient *c) {
blockingPopGenericCommand(c,REDIS_HEAD);
}

//实现brpop命令
//BLPOP LIST1 LIST2 .. LISTN TIMEOUT
void brpopCommand(redisClient *c) {
blockingPopGenericCommand(c,REDIS_TAIL);
}

//实现brpoplpush命令
//BRPOPLPUSH LIST1 ANOTHER_LIST TIMEOUT
//rpoplpush里如果列表为空就退出,brpoplpush里如果列表为空就阻塞
void brpoplpushCommand(redisClient *c) {
time_t timeout;

if (getTimeoutFromObjectOrReply(c,c->argv[3],&timeout) != REDIS_OK)
return;

robj *key = lookupKeyWrite(c->db, c->argv[1]);
//如果要执行pop的列表为空,就调用blockForKeys
if (key == NULL) {
if (c->flags & REDIS_MULTI) {

/* Blocking against an empty list in a multi state
* returns immediately. */
addReply(c, shared.nullbulk);
} else {
/* The list is empty and the client blocks. */
blockForKeys(c, c->argv + 1, 1, timeout, c->argv[2]);
}
//如果要执行pop的列表不为空,就调用rpoplpushCommand
} else {
if (key->type != REDIS_LIST) {
addReply(c, shared.wrongtypeerr);
} else {

/* The list exists and has elements, so
* the regular rpoplpushCommand is executed. */
redisAssert(listTypeLength(key) > 0);
rpoplpushCommand(c);
}
}
}

4.4 哈希

t_hash.c



5 单机数据库相关

6 客户端和服务器端相关

7 多机数据库相关

8 测试类文件

testhelp.h

/* This is a really minimal testing framework for C.
*
* Example:
*
* test_cond("Check if 1 == 1", 1==1)
* test_cond("Check if 5 > 10", 5 > 10)
* test_report()
*/

#ifndef __TESTHELP_H
#define __TESTHELP_H

//测试失败次数
int __failed_tests = 0;
//测试总次数
int __test_num = 0;
#define test_cond(descr,_c) do { \
//测试次数加一,先输出描述信息
__test_num++; printf("%d - %s: ", __test_num, descr); \
//执行_c表示的表达式,输出成功或失败提示
if(_c) printf("PASSED\n"); else {printf("FAILED\n"); __failed_tests++;} \
} while(0);

//测试报告
#define test_report() do { \
//输出测试总次数,成功次数和失败次数
printf("%d tests, %d passed, %d failed\n", __test_num, \
__test_num-__failed_tests, __failed_tests); \
//如果有失败的样例就报warning
if (__failed_tests) { \
printf("=== WARNING === We have failed tests here...\n"); \
} \
} while(0);

#endif

9 工具类文件

10 封装类文件