复习下zpilist和skiplist的数据结构:
skiplist
hashtable
robj数据结构:
typedef struct redisObject {
unsigned type:4;
unsigned encoding:4;
unsigned lru:LRU_BITS; /* LRU time (relative to global lru_clock) or
* LFU data (least significant 8 bits frequency
* and most significant 16 bits access time). */
int refcount;
void *ptr;
} robj;
Zset迭代器
/* Sorted set iterators. */
union _iterzset {
struct {
unsigned char *zl;
unsigned char *eptr, *sptr;
} zl;
struct {
zset *zs;
zskiplistNode *node;
} sl;
} zset;
} iter;
ziplist编码的zset
| zlbytes | zltail | zllen | ele1 | score1 | .... | zlend
ziplist有很多的接口函数,其中挑出几个来分析
unsigned char *zzlInsertAt(unsigned char *zl, unsigned char *eptr, sds ele, double score) {
unsigned char *sptr;
char scorebuf[128];
int scorelen;
size_t offset;
scorelen = d2string(scorebuf,sizeof(scorebuf),score);
if (eptr == NULL) {
zl = ziplistPush(zl,(unsigned char*)ele,sdslen(ele),ZIPLIST_TAIL);
zl = ziplistPush(zl,(unsigned char*)scorebuf,scorelen,ZIPLIST_TAIL);
} else {
/* Keep offset relative to zl, as it might be re-allocated. */
offset = eptr-zl;
zl = ziplistInsert(zl,eptr,(unsigned char*)ele,sdslen(ele));
eptr = zl+offset;
/* Insert score after the element. */
serverAssert((sptr = ziplistNext(zl,eptr)) != NULL);
zl = ziplistInsert(zl,sptr,(unsigned char*)scorebuf,scorelen);
}
return zl;
}
zset的数据结构
typedef struct zset {
dict *dict;
zskiplist *zsl;
} zset;
这里给跳跃表加上了一个字典结构,我们知道跳跃表在插入、删除和查找操作上都可以做到O(logn)的时间复杂度,但是,zset还需要支持获取给定元素的分值、判断某元素是否存在于zset中等操作,这些如果在skiplist的基础上做就相对较复杂,效率不高,所以zset维护了一个字典结构,用来快速的获取给定元素的分值以及判断元素值是否存在于zset中等操作,这样可以提高zset的效率。
zskip的操作在之前的文章中有分析过了。
转换
void zsetConvert(robj *zobj, int encoding) {
zset *zs;
zskiplistNode *node, *next;
sds ele;
double score;
if (zobj->encoding == encoding) return;
if (zobj->encoding == OBJ_ENCODING_ZIPLIST) { //ziplist转zskiplist
unsigned char *zl = zobj->ptr;
unsigned char *eptr, *sptr;
unsigned char *vstr;
unsigned int vlen;
long long vlong;
if (encoding != OBJ_ENCODING_SKIPLIST)
serverPanic("Unknown target encoding");
zs = zmalloc(sizeof(*zs));
zs->dict = dictCreate(&zsetDictType,NULL);
zs->zsl = zslCreate();
eptr = ziplistIndex(zl,0); //第一个元素值
serverAssertWithInfo(NULL,zobj,eptr != NULL);
sptr = ziplistNext(zl,eptr); //第一个元素的score
serverAssertWithInfo(NULL,zobj,sptr != NULL);
while (eptr != NULL) {
score = zzlGetScore(sptr); //获取到分数
serverAssertWithInfo(NULL,zobj,ziplistGet(eptr,&vstr,&vlen,&vlong));
if (vstr == NULL)
ele = sdsfromlonglong(vlong);
else
ele = sdsnewlen((char*)vstr,vlen);
node = zslInsert(zs->zsl,score,ele); //zskiplist中插入
serverAssert(dictAdd(zs->dict,ele,&node->score) == DICT_OK); //dict中插入
zzlNext(zl,&eptr,&sptr);
}
zfree(zobj->ptr);
zobj->ptr = zs;
zobj->encoding = OBJ_ENCODING_SKIPLIST;
} else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) { //还支持skiplist转换为ziplist
unsigned char *zl = ziplistNew();
if (encoding != OBJ_ENCODING_ZIPLIST)
serverPanic("Unknown target encoding");
/* Approach similar to zslFree(), since we want to free the skiplist at
* the same time as creating the ziplist. */
zs = zobj->ptr;
dictRelease(zs->dict); //释放字典
node = zs->zsl->header->level[0].forward; //找到首节点
zfree(zs->zsl->header);
zfree(zs->zsl);
while (node) {
zl = zzlInsertAt(zl,NULL,node->ele,node->score);
next = node->level[0].forward;
zslFreeNode(node);
node = next;
}
zfree(zs);
zobj->ptr = zl;
zobj->encoding = OBJ_ENCODING_ZIPLIST;
} else {
serverPanic("Unknown sorted set encoding");
}
}
我们注意到这里的转换支持双向转换,zskiplist到ziplist也是可以的。
/* Convert the sorted set object into a ziplist if it is not already a ziplist
* and if the number of elements and the maximum element size and total elements size
* are within the expected ranges. */
void zsetConvertToZiplistIfNeeded(robj *zobj, size_t maxelelen, size_t totelelen) {
if (zobj->encoding == OBJ_ENCODING_ZIPLIST) return;
zset *zset = zobj->ptr;
if (zset->zsl->length <= server.zset_max_ziplist_entries &&
maxelelen <= server.zset_max_ziplist_value &&
ziplistSafeToAdd(NULL, totelelen))
{
zsetConvert(zobj,OBJ_ENCODING_ZIPLIST);
}
}
#define ZADD_IN_NONE 0
void zaddCommand(client *c) {
zaddGenericCommand(c,ZADD_IN_NONE);
}
/* This generic command implements both ZADD and ZINCRBY. */
void zaddGenericCommand(client *c, int flags) {
static char *nanerr = "resulting score is not a number (NaN)";
robj *key = c->argv[1];
robj *zobj;
sds ele;
double score = 0, *scores = NULL;
int j, elements, ch = 0;
int scoreidx = 0;
/* The following vars are used in order to track what the command actually
* did during the execution, to reply to the client and to trigger the
* notification of keyspace change. */
int added = 0; /* Number of new elements added. */
int updated = 0; /* Number of elements with updated score. */
int processed = 0; /* Number of elements processed, may remain zero with
options like XX. */
/* Parse options. At the end 'scoreidx' is set to the argument position
* of the score of the first score-element pair. */
scoreidx = 2;
while(scoreidx < c->argc) {
char *opt = c->argv[scoreidx]->ptr;
if (!strcasecmp(opt,"nx")) flags |= ZADD_IN_NX;
else if (!strcasecmp(opt,"xx")) flags |= ZADD_IN_XX;
else if (!strcasecmp(opt,"ch")) ch = 1; /* Return num of elements added or updated. */
else if (!strcasecmp(opt,"incr")) flags |= ZADD_IN_INCR;
else if (!strcasecmp(opt,"gt")) flags |= ZADD_IN_GT;
else if (!strcasecmp(opt,"lt")) flags |= ZADD_IN_LT;
else break;
scoreidx++;
}
/* Turn options into simple to check vars. */
int incr = (flags & ZADD_IN_INCR) != 0;
int nx = (flags & ZADD_IN_NX) != 0;
int xx = (flags & ZADD_IN_XX) != 0;
int gt = (flags & ZADD_IN_GT) != 0;
int lt = (flags & ZADD_IN_LT) != 0;
/* After the options, we expect to have an even number of args, since
* we expect any number of score-element pairs. */
elements = c->argc-scoreidx;
if (elements % 2 || !elements) {
addReplyErrorObject(c,shared.syntaxerr);
return;
}
elements /= 2; /* Now this holds the number of score-element pairs. */
/* Check for incompatible options. */
if (nx && xx) {
addReplyError(c,
"XX and NX options at the same time are not compatible");
return;
}
if ((gt && nx) || (lt && nx) || (gt && lt)) {
addReplyError(c,
"GT, LT, and/or NX options at the same time are not compatible");
return;
}
/* Note that XX is compatible with either GT or LT */
if (incr && elements > 1) {
addReplyError(c,
"INCR option supports a single increment-element pair");
return;
}
/* Start parsing all the scores, we need to emit any syntax error
* before executing additions to the sorted set, as the command should
* either execute fully or nothing at all. */
scores = zmalloc(sizeof(double)*elements);
for (j = 0; j < elements; j++) {
if (getDoubleFromObjectOrReply(c,c->argv[scoreidx+j*2],&scores[j],NULL)
!= C_OK) goto cleanup;
}
/* Lookup the key and create the sorted set if does not exist. */
zobj = lookupKeyWrite(c->db,key);
if (checkType(c,zobj,OBJ_ZSET)) goto cleanup;
if (zobj == NULL) {
if (xx) goto reply_to_client; /* No key + XX option: nothing to do. */
if (server.zset_max_ziplist_entries == 0 ||
server.zset_max_ziplist_value < sdslen(c->argv[scoreidx+1]->ptr))
{
zobj = createZsetObject();
} else {
zobj = createZsetZiplistObject();
}
dbAdd(c->db,key,zobj);
}
for (j = 0; j < elements; j++) {
double newscore;
score = scores[j];
int retflags = 0;
ele = c->argv[scoreidx+1+j*2]->ptr;
int retval = zsetAdd(zobj, score, ele, flags, &retflags, &newscore);
if (retval == 0) {
addReplyError(c,nanerr);
goto cleanup;
}
if (retflags & ZADD_OUT_ADDED) added++;
if (retflags & ZADD_OUT_UPDATED) updated++;
if (!(retflags & ZADD_OUT_NOP)) processed++;
score = newscore;
}
server.dirty += (added+updated);
reply_to_client:
if (incr) { /* ZINCRBY or INCR option. */
if (processed)
addReplyDouble(c,score);
else
addReplyNull(c);
} else { /* ZADD. */
addReplyLongLong(c,ch ? added+updated : added);
}
cleanup:
zfree(scores);
if (added || updated) {
signalModifiedKey(c,c->db,key);
notifyKeyspaceEvent(NOTIFY_ZSET,
incr ? "zincr" : "zadd", key, c->db->id);
}
}
主要记住两个点,zskiplist还有一个hashtbale,还有ziplist可以转换为zskiplist。