preface

There are two kinds of redis persistence: AOF and RDB. The last article introduced the logic of AOF. Aof is incremental and new and periodically rewrites AOF files to achieve the effect of reducing the size.

The body of the

An RDB is called a snapshot of Redis. A snapshot is a full backup file that stores all information about the DATABASE at the current point in time.

RBD can be triggered in two ways: active and passive:

  • Active trigger: Invoke the save and BGsave commands
    • Save: the main process performs the save logic and blocks the main process if the data volume is large
    • Bgsave: The main process forks a child process to execute logic without blocking
  • Passive trigger: Execute the configuration file save 900 1. You can configure multiple similar configurations. Gbsave is executed every 900 seconds for every database change

Since snapshots can be executed by the child process without blocking the master process, the master process will execute various Redis commands without causing data inconsistency in the child process.

When a child process is forked, the child process shares the same memory space with the parent process. At this time, the data in the DB is consistent. When the parent process changes the DB data, the data page will be copied. But the child process still sees the same data

Bgsave command

The BGSave command invokes the save command internally using the child process, so just look at bgSave

void bgsaveCommand(client *c) {
    int schedule = 0;
    if (c->argc > 1) {
        if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"schedule")) {
            schedule = 1;
        } else {
            addReply(c,shared.syntaxerr);
            return;
        }
    }

    rdbSaveInfo rsi, *rsiptr;
    rsiptr = rdbPopulateSaveInfo(&rsi);

    if(server.rdb_child_pid ! =- 1) {
        addReplyError(c,"Background save already in progress");
        // Set rDB_BGSAVE_scheduled if you have the RDB thread
    } else if (hasActiveChildProcess()) {
        if (schedule) {
            server.rdb_bgsave_scheduled = 1;
            addReplyStatus(c,"Background saving scheduled");
        } else {
            addReplyError(c,
            "Another child process is active (AOF?) : can't BGSAVE right now. "
            "Use BGSAVE SCHEDULE in order to schedule a BGSAVE whenever "
            "possible."); }}else if (rdbSaveBackground(server.rdb_filename,rsiptr) == C_OK) {
        addReplyStatus(c,"Background saving started");
    } else{ addReply(c,shared.err); }}Copy the code

Gbsave supports the SCHEDULE parameter. When SCHEDULE is specified, rDB_BGSAVE_scheduled is saved to 1 if gbSave was executed last time, and bgSave will be executed next time this parameter is scanned

if(! hasActiveChildProcess() && server.rdb_bgsave_scheduled && (server.unixtime-server.lastbgsave_try > CONFIG_BGSAVE_RETRY_DELAY || server.lastbgsave_status == C_OK)) { rdbSaveInfo rsi, *rsiptr; rsiptr = rdbPopulateSaveInfo(&rsi);if (rdbSaveBackground(server.rdb_filename,rsiptr) == C_OK)
            server.rdb_bgsave_scheduled = 0;
    }
Copy the code

This is the serverCron method, which scans rDB_BGSAVE_Scheduled to execute rdbSaveBackground

int rdbSaveBackground(char *filename, rdbSaveInfo *rsi) {
    pid_t childpid;

    if (hasActiveChildProcess()) return C_ERR;

    // Save dirty before this bgSave to determine the number of db changes
    server.dirty_before_bgsave = server.dirty;
    server.lastbgsave_try = time(NULL);
    openChildInfoPipe();

    if ((childpid = redisFork()) == 0) {
        int retval;

        // Set the child process name
        redisSetProcTitle("redis-rdb-bgsave");
        redisSetCpuAffinity(server.bgsave_cpulist);
        retval = rdbSave(filename,rsi);
        if (retval == C_OK) {
            sendChildCOWInfo(CHILD_INFO_TYPE_RDB, "RDB");
        }
        exitFromChild((retval == C_OK) ? 0 : 1);
    } else {

        if (childpid == - 1) {
            closeChildInfoPipe();
            server.lastbgsave_status = C_ERR;
            serverLog(LL_WARNING,"Can't save in background: fork: %s",
                strerror(errno));
            return C_ERR;
        }
        serverLog(LL_NOTICE,"Background saving started by pid %d",childpid);
        server.rdb_save_time_start = time(NULL);
        server.rdb_child_pid = childpid;
        server.rdb_child_type = RDB_CHILD_TYPE_DISK;
        return C_OK;
    }
    return C_OK;
}
Copy the code

RdbSaveBackground and AOF code is very similar, the flow is similar, mainly calls rdbSave

int rdbSave(char *filename, rdbSaveInfo *rsi) {
    char tmpfile[256];
    char cwd[MAXPATHLEN]; /* Current working dir path for error messages. */
    FILE *fp;
    rio rdb;
    int error = 0;

    // Open temporary files
    snprintf(tmpfile,256."temp-%d.rdb", (int) getpid());
    fp = fopen(tmpfile,"w");
    if(! fp) {char *cwdp = getcwd(cwd,MAXPATHLEN);
        serverLog(LL_WARNING,
            "Failed opening the RDB file %s (in server root dir %s) "
            "for saving: %s",
            filename,
            cwdp ? cwdp : "unknown",
            strerror(errno));
        return C_ERR;
    }

    rioInitWithFile(&rdb,fp);
    startSaving(RDBFLAGS_NONE);

    if (server.rdb_save_incremental_fsync)
        rioSetAutoSync(&rdb,REDIS_AUTOSYNC_BYTES);

    // Call save here
    if (rdbSaveRio(&rdb,&error,RDBFLAGS_NONE,rsi) == C_ERR) {
        errno = error;
        goto werr;
    }

    / / fsync file
    if (fflush(fp) == EOF) goto werr;
    if (fsync(fileno(fp)) == - 1) goto werr;
    if (fclose(fp) == EOF) goto werr;

    / / renamed
    if (rename(tmpfile,filename) == - 1) {
        char *cwdp = getcwd(cwd,MAXPATHLEN);
        serverLog(LL_WARNING,
            "Error moving temp DB file %s on the final "
            "destination %s (in server root dir %s): %s",
            tmpfile,
            filename,
            cwdp ? cwdp : "unknown",
            strerror(errno));
        unlink(tmpfile);
        stopSaving(0);
        return C_ERR;
    }

    serverLog(LL_NOTICE,"DB saved on disk");
    server.dirty = 0;
    server.lastsave = time(NULL);
    server.lastbgsave_status = C_OK;
    stopSaving(1);
    return C_OK;

werr:
    serverLog(LL_WARNING,"Write error saving DB on disk: %s", strerror(errno));
    fclose(fp);
    unlink(tmpfile);
    stopSaving(0);
    return C_ERR;
}

Copy the code

Open the temporary file and save it by calling the rdbSaveRio method

int rdbSaveRio(rio *rdb, int *error, int rdbflags, rdbSaveInfo *rsi) {
    dictIterator *di = NULL;
    dictEntry *de;
    char magic[10];
    int j;
    uint64_t cksum;
    size_t processed = 0;

    if (server.rdb_checksum)
        rdb->update_cksum = rioGenericUpdateChecksum;
    snprintf(magic,sizeof(magic),"REDIS%04d",RDB_VERSION);
    // Write the magic number
    if (rdbWriteRaw(rdb,magic,9) = =- 1) goto werr;
    if (rdbSaveInfoAuxFields(rdb,rdbflags,rsi) == - 1) goto werr;
    if (rdbSaveModulesAux(rdb, REDISMODULE_AUX_BEFORE_RDB) == - 1) goto werr;

    for (j = 0; j < server.dbnum; j++) {
        redisDb *db = server.db+j;
        dict *d = db->dict;
        if (dictSize(d) == 0) continue;
        di = dictGetSafeIterator(d);

        // Write the SELECT statement
        if (rdbSaveType(rdb,RDB_OPCODE_SELECTDB) == - 1) goto werr;
        if (rdbSaveLen(rdb,j) == - 1) goto werr;

        // Write db length and expiration key length
        uint64_t db_size, expires_size;
        db_size = dictSize(db->dict);
        expires_size = dictSize(db->expires);
        if (rdbSaveType(rdb,RDB_OPCODE_RESIZEDB) == - 1) goto werr;
        if (rdbSaveLen(rdb,db_size) == - 1) goto werr;
        if (rdbSaveLen(rdb,expires_size) == - 1) goto werr;

        // Iterate over dict writes
        while((de = dictNext(di)) ! =NULL) {
            sds keystr = dictGetKey(de);
            robj key, *o = dictGetVal(de);
            long long expire;

            // The expired key is stored separately
            initStaticStringObject(key,keystr);
            expire = getExpire(db,&key);
            if (rdbSaveKeyValuePair(rdb,&key,o,expire) == - 1) goto werr;

            // If aof is configured to write to RDB, the new length and the parent process need to read data
            if (rdbflags & RDBFLAGS_AOF_PREAMBLE &&
                rdb->processed_bytes > processed+AOF_READ_DIFF_INTERVAL_BYTES)
            {
                processed = rdb->processed_bytes;
                aofReadDiffFromParent();
            }
        }
        dictReleaseIterator(di);
        di = NULL;
    }

    /* If we are storing the replication information on disk, persist * the script cache as well: on successful PSYNC after a restart, we need * to be able to process any EVALSHA inside the replication backlog the * master will send us. */

    // Write the lua script
    if (rsi && dictSize(server.lua_scripts)) {
        di = dictGetIterator(server.lua_scripts);
        while((de = dictNext(di)) ! =NULL) {
            robj *body = dictGetVal(de);
            if (rdbSaveAuxField(rdb,"lua".3,body->ptr,sdslen(body->ptr)) == - 1)
                goto werr;
        }
        dictReleaseIterator(di);
        di = NULL;
    }

    if (rdbSaveModulesAux(rdb, REDISMODULE_AUX_AFTER_RDB) == - 1) goto werr;

    / / write eof
    if (rdbSaveType(rdb,RDB_OPCODE_EOF) == - 1) goto werr;

    /* CRC64 checksum. It will be zero if checksum computation is disabled, the * loading code skips the check in this case. */
    cksum = rdb->cksum;
    memrev64ifbe(&cksum);
    if (rioWrite(rdb,&cksum,8) = =0) goto werr;
    return C_OK;

werr:
    if (error) *error = errno;
    if (di) dictReleaseIterator(di);
    return C_ERR;
}
Copy the code

Take a look at what rdbSaveRio does:

  • First write the file header, such as magic number
  • Loop through db, first writing the SELECT statement
  • Write keys and the number of expired keys, traversing the dict to write files
  • Write the Lua script and, when finished, the EOF ending

RDB does not require parent processes to communicate with each other, but it can be configured to rewrite aOF in the same format as RBD, so it needs to read data sent by the parent process

The RDB stores the following information: LRU or LFU information, expiration information, Type information, and key-Val information

Passive implementation

// Check the sava 900 1 command to generate an RDB file every 900 seconds and one database change
        for (j = 0; j < server.saveparamslen; j++) {
            struct saveparam *sp = server.saveparams+j;
            // Check whether the configuration is matched
            if (server.dirty >= sp->changes &&
                server.unixtime-server.lastsave > sp->seconds &&
                (server.unixtime-server.lastbgsave_try >
                 CONFIG_BGSAVE_RETRY_DELAY ||
                 server.lastbgsave_status == C_OK))
            {
                serverLog(LL_NOTICE,"%d changes in %d seconds. Saving...",
                    sp->changes, (int)sp->seconds);
                rdbSaveInfo rsi, *rsiptr;
                rsiptr = rdbPopulateSaveInfo(&rsi);
                rdbSaveBackground(server.rdb_filename,rsiptr);
                break; }}Copy the code

ServerCron periodically scans the configured information to perform bgSave

void backgroundSaveDoneHandlerDisk(int exitcode, int bysignal) {
    Change dirty to the difference between RBD task execution and RBD task completion
    if(! bysignal && exitcode ==0) {
        serverLog(LL_NOTICE,
            "Background saving terminated with success");
        server.dirty = server.dirty - server.dirty_before_bgsave;
        server.lastsave = time(NULL);
        server.lastbgsave_status = C_OK;
        Lastbgsave_status: retry next time
    } else if(! bysignal && exitcode ! =0) {
        serverLog(LL_WARNING, "Background saving error");
        server.lastbgsave_status = C_ERR;
        // Exit via signal
    } else {
        mstime_t latency;

        serverLog(LL_WARNING,
            "Background saving terminated by signal %d", bysignal);
        latencyStartMonitor(latency);
        // Delete temporary files
        rdbRemoveTempFile(server.rdb_child_pid);
        latencyEndMonitor(latency);
        latencyAddSampleIfNeeded("rdb-unlink-temp-file",latency);
        if(bysignal ! = SIGUSR1) server.lastbgsave_status = C_ERR; } server.rdb_child_pid =- 1;
    server.rdb_child_type = RDB_CHILD_TYPE_NONE;
    server.rdb_save_time_last = time(NULL)-server.rdb_save_time_start;
    server.rdb_save_time_start = - 1; updateSlavesWaitingBgsave((! bysignal && exitcode ==0)? C_OK : C_ERR, RDB_CHILD_TYPE_DISK); }Copy the code

The RDB, like the AOF, also has to do a close check at the end, so this is the close check and you can see in the notes what’s going on.

conclusion

RDB execution process can be seen or relatively simple, but there is no RDB format and related to the compression algorithm, these are temporarily not important for me.