从上图可以看出list的底层实现方式有三种:adlist、ziplist和quicklist。但是看它的迭代器源码其实只有一种:
/* Structure to hold list iteration abstraction. */
typedef struct {
robj *subject;
unsigned char encoding;
unsigned char direction; /* Iteration direction */
quicklistIter *iter;
} listTypeIterator;
/* Structure for an entry while iterating over a list. */
typedef struct {
listTypeIterator *li;
quicklistEntry entry; /* Entry in quicklist */
} listTypeEntry;
list有很多接口,基本上是对quicklist的一层封装,以常用的push和pop操作来看这些函数的实现:
/* The function pushes an element to the specified list object 'subject',
* at head or tail position as specified by 'where'.
*
* There is no need for the caller to increment the refcount of 'value' as
* the function takes care of it if needed. */
void listTypePush(robj *subject, robj *value, int where) {
if (subject->encoding == OBJ_ENCODING_QUICKLIST) {
int pos = (where == LIST_HEAD) ? QUICKLIST_HEAD : QUICKLIST_TAIL;
if (value->encoding == OBJ_ENCODING_INT) {
char buf[32];
ll2string(buf, 32, (long)value->ptr);
quicklistPush(subject->ptr, buf, strlen(buf), pos);
} else {
quicklistPush(subject->ptr, value->ptr, sdslen(value->ptr), pos);
}
} else {
serverPanic("Unknown list encoding");
}
}
quickpush之前的代码中就已经分析过
robj *listTypePop(robj *subject, int where) {
long long vlong;
robj *value = NULL;
int ql_where = where == LIST_HEAD ? QUICKLIST_HEAD : QUICKLIST_TAIL;
if (subject->encoding == OBJ_ENCODING_QUICKLIST) {
if (quicklistPopCustom(subject->ptr, ql_where, (unsigned char **)&value,
NULL, &vlong, listPopSaver)) {
if (!value)
value = createStringObjectFromLongLong(vlong);
}
} else {
serverPanic("Unknown list encoding");
}
return value;
}
void *listPopSaver(unsigned char *data, unsigned int sz) {
return createStringObject((char*)data,sz);
}
list还提供了很多命令供用户使用,分析下LPUSH和RPUSH的实现
/* LPUSH <key> <element> [<element> ...] */
void lpushCommand(client *c) {
pushGenericCommand(c,LIST_HEAD,0);
}
/* RPUSH <key> <element> [<element> ...] */
void rpushCommand(client *c) {
pushGenericCommand(c,LIST_TAIL,0);
}
/* Implements LPUSH/RPUSH/LPUSHX/RPUSHX.
* 'xx': push if key exists. */
void pushGenericCommand(client *c, int where, int xx) {
int j;
for (j = 2; j < c->argc; j++) {
if (sdslen(c->argv[j]->ptr) > LIST_MAX_ITEM_SIZE) {
addReplyError(c, "Element too large");
return;
}
}
robj *lobj = lookupKeyWrite(c->db, c->argv[1]);
if (checkType(c,lobj,OBJ_LIST)) return;
if (!lobj) {
if (xx) {
addReply(c, shared.czero);
return;
}
lobj = createQuicklistObject();
quicklistSetOptions(lobj->ptr, server.list_max_ziplist_size,
server.list_compress_depth);
dbAdd(c->db,c->argv[1],lobj);
}
for (j = 2; j < c->argc; j++) {
listTypePush(lobj,c->argv[j],where);
server.dirty++;
}
addReplyLongLong(c, listTypeLength(lobj));
char *event = (where == LIST_HEAD) ? "lpush" : "rpush";
signalModifiedKey(c,c->db,c->argv[1]);
notifyKeyspaceEvent(NOTIFY_LIST,event,c->argv[1],c->db->id);
}
此外注意到我们list为用户还提供了三个带有阻塞模式的命令:BLPOP、BRPOP、BLPOPRPUSH
/* BLPOP <key> [<key> ...] <timeout> */
void blpopCommand(client *c) {
blockingPopGenericCommand(c,LIST_HEAD);
}
/* BLPOP <key> [<key> ...] <timeout> */
void brpopCommand(client *c) {
blockingPopGenericCommand(c,LIST_TAIL);
}
/* Blocking RPOP/LPOP */
void blockingPopGenericCommand(client *c, int where) {
robj *o;
mstime_t timeout;
int j;
//取出timeout参数
if (getTimeoutFromObjectOrReply(c,c->argv[c->argc-1],&timeout,UNIT_SECONDS)
!= C_OK) return;
for (j = 1; j < c->argc-1; j++) {
o = lookupKeyWrite(c->db,c->argv[j]);
if (o != NULL) {
if (checkType(c,o,OBJ_LIST)) {
return;
} else {
if (listTypeLength(o) != 0) { //如果list长度不为空,就按正常的pop执行
/* Non empty list, this is like a normal [LR]POP. */
robj *value = listTypePop(o,where);
serverAssert(value != NULL);
addReplyArrayLen(c,2);
addReplyBulk(c,c->argv[j]);
addReplyBulk(c,value);
decrRefCount(value);
listElementsRemoved(c,c->argv[j],where,o,1);
/* Replicate it as an [LR]POP instead of B[LR]POP. */
rewriteClientCommandVector(c,2,
(where == LIST_HEAD) ? shared.lpop : shared.rpop,
c->argv[j]);
return;
}
}
}
}
/* If we are not allowed to block the client, the only thing
* we can do is treating it as a timeout (even with timeout 0). */
if (c->flags & CLIENT_DENY_BLOCKING) {
addReplyNullArray(c);
return;
}
/* If the keys do not exist we must block */
struct listPos pos = {where};
blockForKeys(c,BLOCKED_LIST,c->argv + 1,c->argc - 2,timeout,NULL,&pos,NULL);
}
看下blockForKeys如何实现
/* This is how the current blocking lists/sorted sets/streams work, we use
* BLPOP as example, but the concept is the same for other list ops, sorted
* sets and XREAD.
* - If the user calls BLPOP and the key exists and contains a non empty list
* then LPOP is called instead. So BLPOP is semantically the same as LPOP
* if blocking is not required.
* - If instead BLPOP is called and the key does not exists or the list is
* empty we need to block. In order to do so we remove the notification for
* new data to read in the client socket (so that we'll not serve new
* requests if the blocking request is not served). Also we put the client
* in a dictionary (db->blocking_keys) mapping keys to a list of clients
* blocking for this keys.
* - If a PUSH operation against a key with blocked clients waiting is
* performed, we mark this key as "ready", and after the current command,
* MULTI/EXEC block, or script, is executed, we serve all the clients waiting
* for this list, from the one that blocked first, to the last, accordingly
* to the number of elements we have in the ready list.
*/
翻译:
以BLPOP为例,分析:
1.如果用户用blpop并且这个key存在而且非空,就和lpop一样
2.如果调用BLPOP并且key不存在或者list是空的,我们需要阻塞。为此,我们删除了要在客户端读取的新数据,这样我们就不会回复新的请求,如果阻塞的请求没有被回复。并且我们将这个客户放到mapping中记录下因为这个key被阻塞的client list。
/* Set a client in blocking mode for the specified key (list, zset or stream),
* with the specified timeout. The 'type' argument is BLOCKED_LIST,
* BLOCKED_ZSET or BLOCKED_STREAM depending on the kind of operation we are
* waiting for an empty key in order to awake the client. The client is blocked
* for all the 'numkeys' keys as in the 'keys' argument. When we block for
* stream keys, we also provide an array of streamID structures: clients will
* be unblocked only when items with an ID greater or equal to the specified
* one is appended to the stream. */
翻译:
将客户端设置为blocking模式因为指定的key,和特殊的timeout,这个tepe的参数是block_list\zset\stream依赖于我们等待这个空key发生什么操作,然后唤醒客户端。客户端因为numkeys的keys被阻塞住,当我们因为streamkey祖肃住,同样提供了一系列的streamID的数据结构:客户端将在ID大于指定项的时候解锁
void blockForKeys(client *c, int btype, robj **keys, int numkeys, mstime_t timeout, robj *target, struct listPos *listpos, streamID *ids) {
dictEntry *de;
list *l;
int j;
// 设定阻塞超时时间
c->bpop.timeout = timeout;
// 设置目标选型,target在执行RPOPLPUSH命令时使用
c->bpop.target = target;
if (listpos != NULL) c->bpop.listpos = *listpos;
if (target != NULL) incrRefCount(target);
for (j = 0; j < numkeys; j++) {
/* Allocate our bkinfo structure, associated to each key the client
* is blocked for. */
bkinfo *bki = zmalloc(sizeof(*bki));
if (btype == BLOCKED_STREAM)
bki->stream_id = ids[j];
/* If the key already exists in the dictionary ignore it. */
//如果键存在忽略,反之添加
if (dictAdd(c->bpop.keys,keys[j],bki) != DICT_OK) {
zfree(bki);
continue;
}
incrRefCount(keys[j]);
/* And in the other "side", to map keys -> clients */
de = dictFind(c->db->blocking_keys,keys[j]);
//如果mapping上没有
if (de == NULL) {
int retval;
/* For every key we take a list of clients blocked for it */
//每一个key都设置一个list保存被阻塞的client
l = listCreate();
retval = dictAdd(c->db->blocking_keys,keys[j],l);
incrRefCount(keys[j]);
serverAssertWithInfo(c,keys[j],retval == DICT_OK);
} else {
l = dictGetVal(de);
}
listAddNodeTail(l,c);
bki->listnode = listLast(l);
}
//阻塞该客户端
blockClient(c,btype);
}
/* This structure represents the blocked key information that we store
* in the client structure. Each client blocked on keys, has a
* client->bpop.keys hash table. The keys of the hash table are Redis
* keys pointers to 'robj' structures. The value is this structure.
* The structure has two goals: firstly we store the list node that this
* client uses to be listed in the database "blocked clients for this key"
* list, so we can later unblock in O(1) without a list scan.
* Secondly for certain blocking types, we have additional info. Right now
* the only use for additional info we have is when clients are blocked
* on streams, as we have to remember the ID it blocked for. */
翻译:
数据结构记录着阻塞key的信息,存在client的数据结构中。每一个被key阻塞的客户端,有一个client->bpop.keys的哈希表。哈希表的key是redis的key指针(指向robj的数据结构)。值是数据结构。这样设计有两个目标:首先我们存list节点,我们后续解除阻塞的时候可以只换0(1)的时间,而不需要扫描list。第二对于确定的阻塞类型,我们有额外信息。现在这个唯一
typedef struct bkinfo {
listNode *listnode; /* List node for db->blocking_keys[key] list. */
streamID stream_id; /* Stream ID if we blocked in a stream. */
} bkinfo;
罗列下涉及的数据结构:
typedef struct client {
redisDb *db; // 指向当前数据库
blockingState bpop; // 记录阻塞状态
// ...其他的参数省略
}
typedef struct redisDb {
dict *blocking_keys; // 记录所有造成阻塞的键,及其相应的客户端
// ...其他参数省略
} redisDb;
Redis采用了一个字典结构blocking_keys,其将所有造成阻塞的键,以及阻塞于该键的所有客户端的信息存放起来。执行完这些以后,就调用blockClient函数,真正的对该客户端进行阻塞。
如何接触阻塞
执行阻塞的时候,设置了超时参数,如果阻塞时长超过了该参数设定的时间,则自动对该客户端进行解阻塞
执行阻塞的时候,记录了所有造成客户端阻塞的键,那么如果有其他客户端执行命令,往造成阻塞的键里面添加了新值,这个时候Redis检查到该键中有值了,就会处理pop命令,也就是说,Redis采用先阻塞,后执行的策略来执行阻塞命令。
/* If the specified key has clients blocked waiting for list pushes, this
* function will put the key reference into the server.ready_keys list.
* Note that db->ready_keys is a hash table that allows us to avoid putting
* the same key again and again in the list in case of multiple pushes
* made by a script or in the context of MULTI/EXEC.
*
* The list will be finally processed by handleClientsBlockedOnKeys() */
翻译:
如果特定的key有客户端被list的push阻塞住,这个方法将key放入server.ready_key列表中。注意到db->ready_keys十一个哈希表,可以编码我们放同样的元素在list中。
void signalKeyAsReady(redisDb *db, robj *key, int type) {
readyList *rl;
/* Quick returns. */
int btype = getBlockedTypeByType(type);
if (btype == BLOCKED_NONE) {
/* The type can never block. */
return;
}
if (!server.blocked_clients_by_type[btype] &&
!server.blocked_clients_by_type[BLOCKED_MODULE]) {
/* No clients block on this type. Note: Blocked modules are represented
* by BLOCKED_MODULE, even if the intention is to wake up by normal
* types (list, zset, stream), so we need to check that there are no
* blocked modules before we do a quick return here. */
return;
}
/* No clients blocking for this key? No need to queue it. */
if (dictFind(db->blocking_keys,key) == NULL) return;
/* Key was already signaled? No need to queue it again. */
if (dictFind(db->ready_keys,key) != NULL) return;
/* Ok, we need to queue this key into server.ready_keys. */
rl = zmalloc(sizeof(*rl));
rl->key = key;
rl->db = db;
incrRefCount(key);
listAddNodeTail(server.ready_keys,rl);
/* We also add the key in the db->ready_keys dictionary in order
* to avoid adding it multiple times into a list with a simple O(1)
* check. */
incrRefCount(key);
serverAssert(dictAdd(db->ready_keys,key,NULL) == DICT_OK);
}
注意我们将readt_key在dictAdd(db->ready_keys,key,NULL),listAddNodeTail(server.ready_keys,rl)都放入了。
f放入dict是为了阻止多次添加和o(1)的寻找速度
typedef struct redisDb {
dict *ready_keys; // 存放push操作添加的造成阻塞的键,字典结构
// 省略了其他参数
} redisDb;
struct redisServer {
list *ready_keys; // 存在push操作添加的造成阻塞的键,链表结构
// 省略了不必要的参数
}
// ready_keys链表结构中存放的节点数据结构
typedef struct readyList {
redisDb *db; // key所在的数据库
robj *key; //造成阻塞的键
} readyList;
阻塞的操作,后续会单独开篇来描述。