intset简介:
之前介绍的redis的数据结构功能强大,但是内存消耗也多,redis也有一些小巧不占内存的数据结构。当一个集合只包含整数值元素,并且这个集合的元素数量不多时,就会用intset作为集合键的底层实现。
数据结构:
typedef struct intset {
uint32_t encoding; //编码方式
uint32_t length; //集合包含的元素数量
int8_t contents[];
} intset;
内存示意图:
升级操作:
当将一个新元素添加到集合里面,并且新元素的类型比整数集合现在所有的都要长
示例;
升级前:int16_t
升级后:int32_t
整数集合相关API:
生成:
/* Create an empty intset. */
intset *intsetNew(void) {
intset *is = zmalloc(sizeof(intset));
is->encoding = intrev32ifbe(INTSET_ENC_INT16);
is->length = 0;
return is;
}
intrev32ifbe()是一个宏定义,定义和实现在redis根目录下的endianconv.h和endianconv.c中根据主机字节序用来做整数大小端的转换
添加:
/* Insert an integer in the intset */
intset *intsetAdd(intset *is, int64_t value, uint8_t *success) {
uint8_t valenc = _intsetValueEncoding(value); //返回值所需要的编码
uint32_t pos;
if (success) *success = 1;
/* Upgrade encoding if necessary. If we need to upgrade, we know that
* this value should be either appended (if > 0) or prepended (if < 0),
* because it lies outside the range of existing values. */
if (valenc > intrev32ifbe(is->encoding)) { //如果值编码比现在intset的要大
/* This always succeeds, so we don't need to curry *success. */
return intsetUpgradeAndAdd(is,value); //升级后插入
} else {
/* Abort if the value is already present in the set.
* This call will populate "pos" with the right position to insert
* the value when it cannot be found. */
if (intsetSearch(is,value,&pos)) { //如果找到了要插入的元素 找到要插入的地方
if (success) *success = 0;
return is;
}
is = intsetResize(is,intrev32ifbe(is->length)+1); //扩容1
if (pos < intrev32ifbe(is->length)) intsetMoveTail(is,pos,pos+1); //如果不是要插在最后面,把pos之前的移动
}
_intsetSet(is,pos,value); //插入值
is->length = intrev32ifbe(intrev32ifbe(is->length)+1);
return is;
}
/* Return the required encoding for the provided value. */
static uint8_t _intsetValueEncoding(int64_t v) {
if (v < INT32_MIN || v > INT32_MAX)
return INTSET_ENC_INT64;
else if (v < INT16_MIN || v > INT16_MAX)
return INTSET_ENC_INT32;
else
return INTSET_ENC_INT16;
}
升级后拆入:
/* Upgrades the intset to a larger encoding and inserts the given integer. */
static intset *intsetUpgradeAndAdd(intset *is, int64_t value) {
uint8_t curenc = intrev32ifbe(is->encoding);
uint8_t newenc = _intsetValueEncoding(value);
int length = intrev32ifbe(is->length);
int prepend = value < 0 ? 1 : 0;
/* First set new encoding and resize */
is->encoding = intrev32ifbe(newenc);
is = intsetResize(is,intrev32ifbe(is->length)+1);
/* Upgrade back-to-front so we don't overwrite values.
* Note that the "prepend" variable is used to make sure we have an empty
* space at either the beginning or the end of the intset. */
while(length--)
_intsetSet(is,length+prepend,_intsetGetEncoded(is,length,curenc));
/* Set the value at the beginning or the end. */
if (prepend)
_intsetSet(is,0,value);
else
_intsetSet(is,intrev32ifbe(is->length),value);
is->length = intrev32ifbe(intrev32ifbe(is->length)+1);
return is;
}
查找:
/* Search for the position of "value". Return 1 when the value was found and
* sets "pos" to the position of the value within the intset. Return 0 when
* the value is not present in the intset and sets "pos" to the position
* where "value" can be inserted. */
static uint8_t intsetSearch(intset *is, int64_t value, uint32_t *pos) {
int min = 0, max = intrev32ifbe(is->length)-1, mid = -1;
int64_t cur = -1;
/* The value can never be found when the set is empty */
// 特殊情况的判断
if (intrev32ifbe(is->length) == 0) {
if (pos) *pos = 0;
return 0;
} else {
/* Check for the case where we know we cannot find the value,
* but do know the insert position. */
if (value > _intsetGet(is,max)) {
if (pos) *pos = intrev32ifbe(is->length);
return 0;
} else if (value < _intsetGet(is,0)) {
if (pos) *pos = 0;
return 0;
}
}
//因为有有序的 二分法查找
while(max >= min) {
mid = ((unsigned int)min + (unsigned int)max) >> 1;
cur = _intsetGet(is,mid);
if (value > cur) {
min = mid+1;
} else if (value < cur) {
max = mid-1;
} else {
break;
}
}
if (value == cur) {
if (pos) *pos = mid;
return 1;
} else {
if (pos) *pos = min;
return 0;
}
}
插入到具体的地方:
/* Set the value at pos, using the configured encoding. */
static void _intsetSet(intset *is, int pos, int64_t value) {
uint32_t encoding = intrev32ifbe(is->encoding);
if (encoding == INTSET_ENC_INT64) {
((int64_t*)is->contents)[pos] = value;
memrev64ifbe(((int64_t*)is->contents)+pos);
} else if (encoding == INTSET_ENC_INT32) {
((int32_t*)is->contents)[pos] = value;
memrev32ifbe(((int32_t*)is->contents)+pos);
} else {
((int16_t*)is->contents)[pos] = value;
memrev16ifbe(((int16_t*)is->contents)+pos);
}
}
memrev16ifbe()是一个宏定义,定义和实现在redis根目录下的endianconv.h和endianconv.c中根据主机字节序用来做内存大小端的转换。
扩容:
/* Resize the intset */
static intset *intsetResize(intset *is, uint32_t len) {
uint64_t size = (uint64_t)len*intrev32ifbe(is->encoding);
assert(size <= SIZE_MAX - sizeof(intset));
is = zrealloc(is,sizeof(intset)+size);
return is;
}