搜ijsio 发表于 2015-11-12 12:37:36

redis源码分析:主从模式中从服务器同步策略

  从服务器在与主服务器建立连接后向主服务器发送同步命令,要求进行同步。发送同步命令后,将相应的读操作设为readSyncBulkPayload。

void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
char tmpfile, *err;
int dfd, maxtries = 5;
int sockerr = 0;
socklen_t errlen = sizeof(sockerr);
REDIS_NOTUSED(el);
REDIS_NOTUSED(privdata);
REDIS_NOTUSED(mask);
/* If this event fired after the user turned the instance into a master
* with SLAVEOF NO ONE we must just return ASAP. */
if (server.repl_state == REDIS_REPL_NONE) {
close(fd);
return;
}
/* Check for errors in the socket. */
if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &sockerr, &errlen) == -1)
sockerr = errno;
if (sockerr) {
aeDeleteFileEvent(server.el,fd,AE_READABLE|AE_WRITABLE);
redisLog(REDIS_WARNING,"Error condition on socket for SYNC: %s",
strerror(sockerr));
goto error;
}
/* If we were connecting, it's time to send a non blocking PING, we want to
* make sure the master is able to reply before going into the actual
* replication process where we have long timeouts in the order of
* seconds (in the meantime the slave would block). */
if (server.repl_state == REDIS_REPL_CONNECTING) {
redisLog(REDIS_NOTICE,"Non blocking connect for SYNC fired the event.");
/* Delete the writable event so that the readable event remains
* registered and we can wait for the PONG reply. */
aeDeleteFileEvent(server.el,fd,AE_WRITABLE);
server.repl_state = REDIS_REPL_RECEIVE_PONG;
/* Send the PING, don't check for errors at all, we have the timeout
* that will take care about this. */
syncWrite(fd,"PING\r\n",6,100);
return;
}
/* Receive the PONG command. */
if (server.repl_state == REDIS_REPL_RECEIVE_PONG) {
char buf;
/* Delete the readable event, we no longer need it now that there is
* the PING reply to read. */
aeDeleteFileEvent(server.el,fd,AE_READABLE);
/* Read the reply with explicit timeout. */
buf = '\0';
if (syncReadLine(fd,buf,sizeof(buf),
server.repl_syncio_timeout*1000) == -1)
{
redisLog(REDIS_WARNING,
"I/O error reading PING reply from master: %s",
strerror(errno));
goto error;
}
/* We don't care about the reply, it can be +PONG or an error since
* the server requires AUTH. As long as it replies correctly, it's
* fine from our point of view. */
if (buf != '-' && buf != '+') {
redisLog(REDIS_WARNING,"Unexpected reply to PING from master.");
goto error;
} else {
redisLog(REDIS_NOTICE,
"Master replied to PING, replication can continue...");
}
}
/* AUTH with the master if required. */
if(server.masterauth) {
err = sendSynchronousCommand(fd,"AUTH",server.masterauth,NULL);
if (err) {
redisLog(REDIS_WARNING,"Unable to AUTH to MASTER: %s",err);
sdsfree(err);
goto error;
}
}
/* Set the slave port, so that Master's INFO command can list the
* slave listening port correctly. */
{
sds port = sdsfromlonglong(server.port);
err = sendSynchronousCommand(fd,"REPLCONF","listening-port",port,
NULL);
sdsfree(port);
/* Ignore the error if any, not all the Redis versions support
* REPLCONF listening-port. */
if (err) {
redisLog(REDIS_NOTICE,"(non critical): Master does not understand REPLCONF listening-port: %s", err);
sdsfree(err);
}
}
/* Issue the SYNC command */
if (syncWrite(fd,"SYNC\r\n",6,server.repl_syncio_timeout*1000) == -1) {
redisLog(REDIS_WARNING,"I/O error writing to MASTER: %s",
strerror(errno));
goto error;
}
/* Prepare a suitable temp file for bulk transfer */
while(maxtries--) {
snprintf(tmpfile,256,
"temp-%d.%ld.rdb",(int)server.unixtime,(long int)getpid());
dfd = open(tmpfile,O_CREAT|O_WRONLY|O_EXCL,0644);
if (dfd != -1) break;
sleep(1);
}
if (dfd == -1) {
redisLog(REDIS_WARNING,&quot;Opening the temp file needed for MASTER <-> SLAVE synchronization: %s&quot;,strerror(errno));
goto error;
}
/* Setup the non blocking download of the bulk file. */
if (aeCreateFileEvent(server.el,fd, AE_READABLE,readSyncBulkPayload,NULL)
== AE_ERR)
{
redisLog(REDIS_WARNING,&quot;Can't create readable event for SYNC&quot;);
goto error;
}
server.repl_state = REDIS_REPL_TRANSFER;
server.repl_transfer_size = -1;
server.repl_transfer_read = 0;
server.repl_transfer_last_fsync_off = 0;
server.repl_transfer_fd = dfd;
server.repl_transfer_lastio = server.unixtime;
server.repl_transfer_tmpfile = zstrdup(tmpfile);
return;
error:
close(fd);
server.repl_transfer_s = -1;
server.repl_state = REDIS_REPL_CONNECT;
return;
}
  readSyncBulkPayload函数负责从连接中读取主服务器建立的同步文件,可分多次读完所有同步数据。在数据超过8MB后,每次读取操作都会进行写磁盘操作,如果在最后才进行写磁盘操作可能会造成极大的延迟。



/* Asynchronously read the SYNC payload we receive from a master */
#define REPL_MAX_WRITTEN_BEFORE_FSYNC (1024*1024*8) /* 8 MB */
void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) {
char buf;
ssize_t nread, readlen;
off_t left;
REDIS_NOTUSED(el);
REDIS_NOTUSED(privdata);
REDIS_NOTUSED(mask);
/* If repl_transfer_size == -1 we still have to read the bulk length
* from the master reply. */
if (server.repl_transfer_size == -1) {
if (syncReadLine(fd,buf,1024,server.repl_syncio_timeout*1000) == -1) {
redisLog(REDIS_WARNING,
&quot;I/O error reading bulk count from MASTER: %s&quot;,
strerror(errno));
goto error;
}
if (buf == '-') {
redisLog(REDIS_WARNING,
&quot;MASTER aborted replication with an error: %s&quot;,
buf+1);
goto error;
} else if (buf == '\0') {
/* At this stage just a newline works as a PING in order to take
* the connection live. So we refresh our last interaction
* timestamp. */
server.repl_transfer_lastio = server.unixtime;
return;
} else if (buf != '$') {
redisLog(REDIS_WARNING,&quot;Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?&quot;);
goto error;
}
server.repl_transfer_size = strtol(buf+1,NULL,10);
redisLog(REDIS_NOTICE,
&quot;MASTER <-> SLAVE sync: receiving %ld bytes from master&quot;,
server.repl_transfer_size);
return;
}
/* Read bulk data */
left = server.repl_transfer_size - server.repl_transfer_read;
readlen = (left < (signed)sizeof(buf)) ? left : (signed)sizeof(buf);
nread = read(fd,buf,readlen);
if (nread <= 0) {
redisLog(REDIS_WARNING,&quot;I/O error trying to sync with MASTER: %s&quot;,
(nread == -1) ? strerror(errno) : &quot;connection lost&quot;);
replicationAbortSyncTransfer();
return;
}
server.repl_transfer_lastio = server.unixtime;
if (write(server.repl_transfer_fd,buf,nread) != nread) {
redisLog(REDIS_WARNING,&quot;Write error or short write writing to the DB dump file needed for MASTER <-> SLAVE synchronization: %s&quot;, strerror(errno));
goto error;
}
server.repl_transfer_read += nread;
/* Sync data on disk from time to time, otherwise at the end of the transfer
* we may suffer a big delay as the memory buffers are copied into the
* actual disk. */
if (server.repl_transfer_read >=
server.repl_transfer_last_fsync_off + REPL_MAX_WRITTEN_BEFORE_FSYNC)
{
off_t sync_size = server.repl_transfer_read -
server.repl_transfer_last_fsync_off;
rdb_fsync_range(server.repl_transfer_fd,
server.repl_transfer_last_fsync_off, sync_size);
server.repl_transfer_last_fsync_off += sync_size;
}
/* Check if the transfer is now complete */
if (server.repl_transfer_read == server.repl_transfer_size) {
if (rename(server.repl_transfer_tmpfile,server.rdb_filename) == -1) {
redisLog(REDIS_WARNING,&quot;Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s&quot;, strerror(errno));
replicationAbortSyncTransfer();
return;
}
redisLog(REDIS_NOTICE, &quot;MASTER <-> SLAVE sync: Loading DB in memory&quot;);
emptyDb();
/* Before loading the DB into memory we need to delete the readable
* handler, otherwise it will get called recursively since
* rdbLoad() will call the event loop to process events from time to
* time for non blocking loading. */
aeDeleteFileEvent(server.el,server.repl_transfer_s,AE_READABLE);
if (rdbLoad(server.rdb_filename) != REDIS_OK) {
redisLog(REDIS_WARNING,&quot;Failed trying to load the MASTER synchronization DB from disk&quot;);
replicationAbortSyncTransfer();
return;
}
/* Final setup of the connected slave <- master link */
zfree(server.repl_transfer_tmpfile);
close(server.repl_transfer_fd);
server.master = createClient(server.repl_transfer_s);
server.master->flags |= REDIS_MASTER;
server.master->authenticated = 1;
server.repl_state = REDIS_REPL_CONNECTED;
redisLog(REDIS_NOTICE, &quot;MASTER <-> SLAVE sync: Finished with success&quot;);
/* Restart the AOF subsystem now that we finished the sync. This
* will trigger an AOF rewrite, and when done will start appending
* to the new file. */
if (server.aof_state != REDIS_AOF_OFF) {
int retry = 10;
stopAppendOnly();
while (retry-- && startAppendOnly() == REDIS_ERR) {
redisLog(REDIS_WARNING,&quot;Failed enabling the AOF after successful master synchrnization! Trying it again in one second.&quot;);
sleep(1);
}
if (!retry) {
redisLog(REDIS_WARNING,&quot;FATAL: this slave instance finished the synchronization with its master, but the AOF can't be turned on. Exiting now.&quot;);
exit(1);
}
}
}
return;
error:
replicationAbortSyncTransfer();
return;
}

  在从服务器数据未同步完的情况下,如果从服务器没有设置只读,是可以进行数据写入的。如果设置了只读,在同步中,只有主节点可以执行写操作,其他节点都不可以执行写操作。从服务器收到完整同步文件后清空内存再加载rdb文件到内存。
  

版权声明:本文为博主原创文章,未经博主允许不得转载。
页: [1]
查看完整版本: redis源码分析:主从模式中从服务器同步策略