命令 | 功能 |
---|---|
MULTI | 开始一个新的事务 |
DISCARD | 放弃执行事务 |
EXEC | 执行事务中的所有命令 |
WATCH | 监视一个或多个key,如果至少有一个key在EXEC之前被修改,则放弃执行事务 |
UNWATCH | 取消WATCH命令对所有键的监视 |
Redis对于事务的实现由三个步骤:事务开始、事务队列和事务执行。下面就分别从这三个步骤入手,分析整个事务的实现过程。
事务开始
当我们发送MULTI
命令是,表示客户端需要执行一个事务。客户端定义了几个参数,来标记事务是否开始。
/* Client flags */
#define CLIENT_MULTI (1<<3) /* This client is in a MULTI context */
客户端通过flags |= CLIENT_MULTI
语句来标记事务开启与否,然后服务器在执行命令的时候只需要检查flags参数,就能知道事务是否开启。下面是MULTI
命令的源码实现:
void multiCommand(client *c) {
if (c->flags & CLIENT_MULTI) {
addReplyError(c,"MULTI calls can not be nested");
return;
}
c->flags |= CLIENT_MULTI;
addReply(c,shared.ok);
}
事务队列
既然事务中包含了一系列的操作,这些操作不能立即被执行,Redis必然会找个位置来存放这些命令。于是Redis定义了下面的结构体:
/* 客户端结构体 */
typedef struct client {
multiState mstate; /* MULTI/EXEC state */
}
typedef struct multiState {
multiCmd *commands; /* Array of MULTI commands */
int count; /* Total number of MULTI commands */
int cmd_flags; /* The accumulated command flags OR-ed together.
So if at least a command has a given flag, it
will be set in this field. */
int cmd_inv_flags; /* Same as cmd_flags, OR-ing the ~flags. so that it
is possible to know if all the commands have a
certain flag. */
size_t argv_len_sums; /* mem used by all commands arguments */
} multiState;
/* 命令队列 */
/* Client MULTI/EXEC state */
typedef struct multiCmd {
robj **argv;
int argv_len;
int argc;
struct redisCommand *cmd;
} multiCmd;
其中,所有在事务期间的命令都存放在事务队列中,也就是commands
指针内。Redis在processCommand
执行命令的函数里面判断此时是否开启了一个事务,如开启,则将命令压入命令队列,等待事务来处理。
/* Redis的命令处理函数 */
int processCommand(client *c){
if (c->flags & CLIENT_MULTI &&
c->cmd->proc != execCommand &&
c->cmd->proc != discardCommand &&
c->cmd->proc != multiCommand &&
c->cmd->proc != watchCommand &&
c->cmd->proc != quitCommand &&
c->cmd->proc != resetCommand)
{
queueMultiCommand(c);
addReply(c,shared.queued);
事务命令入队的功能由queueMultiCommand
函数执行,其源码如下:
/* Add a new command into the MULTI commands queue */
void queueMultiCommand(client *c) {
multiCmd *mc;
/* No sense to waste memory if the transaction is already aborted.
* this is useful in case client sends these in a pipeline, or doesn't
* bother to read previous responses and didn't notice the multi was already
* aborted. */
if (c->flags & (CLIENT_DIRTY_CAS|CLIENT_DIRTY_EXEC))
return;
c->mstate.commands = zrealloc(c->mstate.commands,
sizeof(multiCmd)*(c->mstate.count+1));
mc = c->mstate.commands+c->mstate.count;
mc->cmd = c->cmd;
mc->argc = c->argc;
mc->argv = c->argv;
mc->argv_len = c->argv_len;
c->mstate.count++;
c->mstate.cmd_flags |= c->cmd->flags;
c->mstate.cmd_inv_flags |= ~c->cmd->flags;
c->mstate.argv_len_sums += c->argv_len_sum + sizeof(robj*)*c->argc;
/* Reset the client's args since we copied them into the mstate and shouldn't
* reference them from c anymore. */
c->argv = NULL;
c->argc = 0;
c->argv_len_sum = 0;
c->argv_len = 0;
}
事务执行
前面事务开始后的命令都存放在命令队列中,当客户端执行EXEC
命令时,服务器会将事务队列中存放的命令以『先进先出』的方式一一执行,然后回复给客户端。
void execCommand(client *c) {
int j;
robj **orig_argv;
int orig_argc, orig_argv_len;
struct redisCommand *orig_cmd;
int was_master = server.masterhost == NULL;
if (!(c->flags & CLIENT_MULTI)) {
addReplyError(c,"EXEC without MULTI");
return;
}
/* EXEC with expired watched key is disallowed*/
if (isWatchedKeyExpired(c)) {
c->flags |= (CLIENT_DIRTY_CAS);
}
/* Check if we need to abort the EXEC because:
* 1) Some WATCHed key was touched.
* 2) There was a previous error while queueing commands.
* A failed EXEC in the first case returns a multi bulk nil object
* (technically it is not an error but a special behavior), while
* in the second an EXECABORT error is returned. */
if (c->flags & (CLIENT_DIRTY_CAS | CLIENT_DIRTY_EXEC)) {
if (c->flags & CLIENT_DIRTY_EXEC) {
addReplyErrorObject(c, shared.execaborterr);
} else {
addReply(c, shared.nullarray[c->resp]);
}
discardTransaction(c);
return;
}
uint64_t old_flags = c->flags;
/* we do not want to allow blocking commands inside multi */
c->flags |= CLIENT_DENY_BLOCKING;
/* Exec all the queued commands */
unwatchAllKeys(c); /* Unwatch ASAP otherwise we'll waste CPU cycles */
server.in_exec = 1;
orig_argv = c->argv;
orig_argv_len = c->argv_len;
orig_argc = c->argc;
orig_cmd = c->cmd;
addReplyArrayLen(c,c->mstate.count);
for (j = 0; j < c->mstate.count; j++) {
c->argc = c->mstate.commands[j].argc;
c->argv = c->mstate.commands[j].argv;
c->argv_len = c->mstate.commands[j].argv_len;
c->cmd = c->mstate.commands[j].cmd;
/* ACL permissions are also checked at the time of execution in case
* they were changed after the commands were queued. */
int acl_errpos;
int acl_retval = ACLCheckAllPerm(c,&acl_errpos);
if (acl_retval != ACL_OK) {
char *reason;
switch (acl_retval) {
case ACL_DENIED_CMD:
reason = "no permission to execute the command or subcommand";
break;
case ACL_DENIED_KEY:
reason = "no permission to touch the specified keys";
break;
case ACL_DENIED_CHANNEL:
reason = "no permission to access one of the channels used "
"as arguments";
break;
default:
reason = "no permission";
break;
}
addACLLogEntry(c,acl_retval,ACL_LOG_CTX_MULTI,acl_errpos,NULL,NULL);
addReplyErrorFormat(c,
"-NOPERM ACLs rules changed between the moment the "
"transaction was accumulated and the EXEC call. "
"This command is no longer allowed for the "
"following reason: %s", reason);
} else {
if (c->id == CLIENT_ID_AOF)
call(c,CMD_CALL_NONE);
else
call(c,CMD_CALL_FULL);
serverAssert((c->flags & CLIENT_BLOCKED) == 0);
}
/* Commands may alter argc/argv, restore mstate. */
c->mstate.commands[j].argc = c->argc;
c->mstate.commands[j].argv = c->argv;
c->mstate.commands[j].cmd = c->cmd;
}
// restore old DENY_BLOCKING value
if (!(old_flags & CLIENT_DENY_BLOCKING))
c->flags &= ~CLIENT_DENY_BLOCKING;
c->argv = orig_argv;
c->argv_len = orig_argv_len;
c->argc = orig_argc;
c->cmd = orig_cmd;
discardTransaction(c);
/* Make sure the EXEC command will be propagated as well if MULTI
* was already propagated. */
if (server.propagate_in_transaction) {
int is_master = server.masterhost == NULL;
server.dirty++;
/* If inside the MULTI/EXEC block this instance was suddenly
* switched from master to slave (using the SLAVEOF command), the
* initial MULTI was propagated into the replication backlog, but the
* rest was not. We need to make sure to at least terminate the
* backlog with the final EXEC. */
if (server.repl_backlog && was_master && !is_master) {
char *execcmd = "*1\r\n$4\r\nEXEC\r\n";
feedReplicationBuffer(execcmd,strlen(execcmd));
}
afterPropagateExec();
}
server.in_exec = 0;
}
事务取消
Redis提供了DISCARD
函数来取消当前客户端的事务状态,其主要操作是:
- 清空命令队列
- 初始化命令队列
- 取消标记flag
- 取消所有被监视的键
它的实现很简单,源码如下:
void discardCommand(client *c) {
if (!(c->flags & CLIENT_MULTI)) {
addReplyError(c,"DISCARD without MULTI");
return;
}
discardTransaction(c);
addReply(c,shared.ok);
}
void discardTransaction(client *c) {
freeClientMultiState(c); //释放事务队列
initClientMultiState(c); //初始化事务队列
c->flags &= ~(CLIENT_MULTI|CLIENT_DIRTY_CAS|CLIENT_DIRTY_EXEC); //取消所有事务标记
unwatchAllKeys(c); //取消所有被监视的键
}
WATCH实现
事务功能中还提供了监视键的功能,当我们对某个键执行了监视之后,如果事务执行期间该键被修改,则不执行该事务。
/* 客户端结构 */
typedef struct client {
// ...
list *watched_keys; // 保存该客户端所有被监视的键
// ...
}
/* 被监视的键结构体 */
typedef struct watchedKey {
robj *key; // 保存键
redisDb *db; // 保存键所在的数据库
} watchedKey;
typedef struct redisDb {
// ...
dict *watched_keys; // 保存所有被监视的键及相应客户端
// ...
} redisDb;
/* redisDB数据库结构体
* | key1 | —— | client1 | -> | client2 |-> | client3 |
* | key2 | —— | client4 |
* | key3 | —— | client5 | -> | client6 |
* 该字典结构的键为被监视的键,值为链表,保存监视该键的所有客户端
*/
这么做的原因是,当客户端添加监视键的时候,能快速判断该键是否已经被监视;而且,当客户端取消所有被监视键的时候,可以快速找到该键所在的数据库,从而在redisDb->watched_keys
删除该被监视的键。下面来看看添加监视键和取消监视键的源码实现。
/* Watch for the specified key */
void watchForKey(client *c, robj *key) {
list *clients = NULL;
listIter li;
listNode *ln;
watchedKey *wk;
/* Check if we are already watching for this key */
listRewind(c->watched_keys,&li);
while((ln = listNext(&li))) {
wk = listNodeValue(ln);
if (wk->db == c->db && equalStringObjects(key,wk->key))
return; /* Key already watched */
}
/* This key is not already watched in this DB. Let's add it */
clients = dictFetchValue(c->db->watched_keys,key);
if (!clients) {
clients = listCreate();
dictAdd(c->db->watched_keys,key,clients);
incrRefCount(key);
}
listAddNodeTail(clients,c);
/* Add the new key to the list of keys watched by this client */
wk = zmalloc(sizeof(*wk));
wk->key = key;
wk->db = c->db;
incrRefCount(key);
listAddNodeTail(c->watched_keys,wk);
}
/* Unwatch all the keys watched by this client. To clean the EXEC dirty
* flag is up to the caller. */
void unwatchAllKeys(client *c) {
listIter li;
listNode *ln;
if (listLength(c->watched_keys) == 0) return;
listRewind(c->watched_keys,&li);
while((ln = listNext(&li))) {
list *clients;
watchedKey *wk;
/* Lookup the watched key -> clients list and remove the client
* from the list */
wk = listNodeValue(ln);
clients = dictFetchValue(wk->db->watched_keys, wk->key);
serverAssertWithInfo(c,NULL,clients != NULL);
listDelNode(clients,listSearchKey(clients,c));
/* Kill the entry at all if this was the only client */
if (listLength(clients) == 0)
dictDelete(wk->db->watched_keys, wk->key);
/* Remove this watched key from the client->watched list */
listDelNode(c->watched_keys,ln);
decrRefCount(wk->key);
zfree(wk);
}
}
以上源码就是对字典结构和链表结构的添加和删除操作,很好理解。那么服务器运行过程中,在哪里判断该键有没有被修改呢?我们找到了touchWatchedKey
函数。
/* "Touch" a key, so that if this key is being WATCHed by some client the
* next EXEC will fail. */
void touchWatchedKey(redisDb *db, robj *key) {
list *clients;
listIter li;
listNode *ln;
if (dictSize(db->watched_keys) == 0) return;
clients = dictFetchValue(db->watched_keys, key);
if (!clients) return;
/* Mark all the clients watching this key as CLIENT_DIRTY_CAS */
/* Check if we are already watching for this key */
listRewind(clients,&li);
while((ln = listNext(&li))) {
client *c = listNodeValue(ln);
c->flags |= CLIENT_DIRTY_CAS;
/* As the client is marked as dirty, there is no point in getting here
* again in case that key (or others) are modified again (or keep the
* memory overhead till EXEC). */
unwatchAllKeys(c);
}
}
当然,这只是对所有被修改键的客户端进行标记,还是没有弄清楚在什么时候标记这些客户端。于是,继续追溯,发现这个函数通常被signalModifyKey()
函数进行封装,这下又见到了我们的『老朋友』了,这个总是在键被修改的函数里调用的函数。