Java JVM multithreading MySQL Redis Kafka Docker RocketMQ Nginx MQ queue data structure concurrent programming concurrent pressure kill architecture, etc

Structure of SDS

The definition of SDS

sds.h

// define a char pointer typedef char * SDS; /* Note: sdshdr5 is never used, we just access the flags byte directly. * However is here to document the layout of type 5 SDS strings. */ struct __attribute__ ((__packed__)) sdshdr5 { unsigned char flags; /* 3 lsb of type, and 5 msb of string length */ char buf[]; }; //__attribute__ ((__packed__)) means that the structure is manually aligned. Struct __attribute__ ((__packed__)) sdshdr8 {// uint8_t len; /* used */ / uint8_t alloc; /* used */ / /* excluding the header and null terminator */ / only 3 valid bits, because the type is 0 to 4, all the 8 bits of flags are not used in unsigned char flags; /* 3 LSB of type, 5 unused bits */ / }; Struct __attribute__ ((__packed__)) sdshdr16 {uint16_t len; /* used */ uint16_t alloc; /* excluding the header and null terminator */ unsigned char flags; /* 3 lsb of type, 5 unused bits */ char buf[]; }; Struct __attribute__ ((__packed__)) sdshdr32 {uint32_t len; /* used */ uint32_t alloc; /* excluding the header and null terminator */ unsigned char flags; /* 3 lsb of type, 5 unused bits */ char buf[]; }; Struct __attribute__ ((__packed__)) sdshdr64 {uint64_t len; /* used */ uint64_t alloc; /* excluding the header and null terminator */ unsigned char flags; /* 3 lsb of type, 5 unused bits */ char buf[]; };Copy the code

It can be seen that the structure of SDS is basically the same under different lengths, and the only different sdSHDR5 has not been used.

Initialization of SDS

/* Create a new sds string with the content specified by the 'init' pointer * and 'initlen'. * If NULL is used for 'init' the string is initialized with zero bytes. * If SDS_NOINIT is used, the buffer is left uninitialized; * * The string is always null-termined (all the sds strings are, always) so * even if you create an sds string with: * * mystring = sdsnewlen("abc",3); * * You can print the string with printf() as there is an implicit \0 at the * end of the string. However the string is binary safe and can contain * \0 characters in the middle, As the length is stored in the SDS header. */ / As the length is stored in the SDS header, the whole length is specified at the beginning of the SDS header. SDS sdsnewlen(const void *init, size_t initlen) {// This pointer points to the beginning of the entire SDS void *sh; // SDS is actually a pointer, but s points to the beginning of the struct buf. SDS char type = sdsReqType(initlen); // SDS char type = sdsReqType(initlen); /* Empty strings are usually created in order to append. Use type 8 * since type 5 is not good at this If (type == sds_type_5&& initlen == 0) type = SDS_TYPE_8; if (type == sds_type_5&& initlen == 0) type = SDS_TYPE_8; Int hdrlen = sdsHdrSize(type); // flag pointer to unsigned char *fp to indicate which type SDS is; Sh = s_malloc(hdrlen+initlen+1); If (sh == NULL) return NULL; If (init==SDS_NOINIT) init= NULL; // Clear 0 else if (! Memset (sh, 0, hdrlen+initlen+1); memset(sh, 0, hdrlen+ 1); SDS head s = (char*)sh+hdrlen; Len,alloc,flag,buf, so now s points to buf, fp points to the address of flag. fp = ((unsigned char*)s)-1; / / below is based on the switch (type) {case SDS_TYPE_5: {* fp = type | (initlen < < SDS_TYPE_BITS); break; Struct SDSHDR SDS_HDR_VAR(struct SDSHDR SDS_HDR_VAR(8,s)); Sh ->len = initlen; sh->alloc = initlen; // the address corresponding to fp is assigned the corresponding type. The value of type is 1-4 *fp = type; break; } case SDS_TYPE_16: { SDS_HDR_VAR(16,s); sh->len = initlen; sh->alloc = initlen; *fp = type; break; } case SDS_TYPE_32: { SDS_HDR_VAR(32,s); sh->len = initlen; sh->alloc = initlen; *fp = type; break; } case SDS_TYPE_64: { SDS_HDR_VAR(64,s); sh->len = initlen; sh->alloc = initlen; *fp = type; break; }} // If neither is null, init the corresponding string is assigned to s if (initlen && init) // Buf is assigned to memcpy(s, init, initlen); // assign a terminator s[initlen] = '\0'; return s; } // The following method, Static inline char sdsReqType(size_t string_size) {if (string_size < 1<<5) return SDS_TYPE_5; if (string_size < 1<<8) return SDS_TYPE_8; if (string_size < 1<<16) return SDS_TYPE_16; #if (LONG_MAX == LLONG_MAX) if (string_size < 1ll<<32) return SDS_TYPE_32; return SDS_TYPE_64; #else return SDS_TYPE_32; Static inline int sdsHdrSize(char type) {//SDS_TYPE_MASK removes unnecessary bits SDS_TYPE_MASK For 00000111 switch(type&SDS_TYPE_MASK) {case SDS_TYPE_5: return sizeof(struct sdshdr5); case SDS_TYPE_8: return sizeof(struct sdshdr8); case SDS_TYPE_16: return sizeof(struct sdshdr16); case SDS_TYPE_32: return sizeof(struct sdshdr32); case SDS_TYPE_64: return sizeof(struct sdshdr64); } return 0; } #define SDS_TYPE_5 0 #define SDS_TYPE_8 1 #define SDS_TYPE_16 2 #define SDS_TYPE_32 3 #define SDS_TYPE_64 4 #define SDS_TYPE_MASK 7 #define SDS_TYPE_BITS 3 Struct #define SDS_HDR_VAR(T,s) struct sdshdr## sh = (void*)((s)-(sizeof(struct sdshdr##T))); Struct #define SDS_HDR(T,s) ((struct sdshdr##T *)((s)-(sizeof(struct sdshdr##T)))) #define SDS_TYPE_5_LEN(f) ((f)>>SDS_TYPE_BITS)Copy the code

SDS diagram

SDS scale

/* Enlarge the free space at the end of the sds string so that the caller * is sure that after calling this function can  overwrite up to addlen * bytes after the end of the string, plus one more byte for nul term. * * Note: this does not change the *length* of the sds string as returned * by sdslen(), But only the free buffer space we have. */ Addlen-leftlen SDS sdsMakeRoomFor(SDS s, size_t addlen) {void *sh, *newsh; Size_t Avail = sdsavail(s); size_t len, newlen; Oldtype = s[-1] &sds_type_mask; oldType = s[-1] &sds_type_mask; int hdrlen; /* Return ASAP if there is enough space left. */ / If (Avail >= addlen) Return s; Len = sdslen(s); //sh returns to the starting position of the SDS. sh = (char*)s-sdsHdrSize(oldtype); Newlen = (len+addlen); If (newlen < SDS_MAX_PREALLOC) // but the actual length will be allocated twice, if the space is less than the maximum threshold newlen *= 2; else newlen += SDS_MAX_PREALLOC; // Get the type of the new length type = sdsReqType(newlen); /* Don't use type 5: the user is appending to the string and type 5 is * not able to remember empty space, so sdsMakeRoomFor() must be called * at every appending operation. */ if (type == SDS_TYPE_5) type = SDS_TYPE_8; hdrlen = sdsHdrSize(type); If (oldType ==type) {// if (oldtype==type) {// if (oldtype==type) {// if (oldtype==type) {// if (oldtype==type) {// if (oldtype==type) {// if (oldtype==type) {// if (oldtype==type) {// if (oldtype==type) {// Newsh = s_realloc(sh, hdrlen+newlen+1); if (newsh == NULL) return NULL; S = (char*)newsh+hdrlen; } else {/* Since the header size changes, need to move the string forward, * and can't use realloc */ Address content is not reusable, so find new space. newsh = s_malloc(hdrlen+newlen+1); if (newsh == NULL) return NULL; Memcpy ((char*)newsh+hdrlen, s, //newsh+hdrlen = SDS buf //newsh+hdrlen = SDS buf len+1); S_free (sh); S = (char*)newsh+hdrlen; S [-1] = type; Sdssetlen (s, len); } // allocate the value of alloc sdssetalloc(s, newlen); // return new SDS return s; Static inline void sdssetLen (SDS s, size_t newlen) {unsigned char flags = s[-1]; switch(flags&SDS_TYPE_MASK) { case SDS_TYPE_5: { unsigned char *fp = ((unsigned char*)s)-1; *fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS); } break; case SDS_TYPE_8: SDS_HDR(8,s)->len = newlen; break; case SDS_TYPE_16: SDS_HDR(16,s)->len = newlen; break; case SDS_TYPE_32: SDS_HDR(32,s)->len = newlen; break; case SDS_TYPE_64: SDS_HDR(64,s)->len = newlen; break; }} // Get the current SDS, available length. static inline size_t sdsavail(const sds s) { unsigned char flags = s[-1]; switch(flags&SDS_TYPE_MASK) { case SDS_TYPE_5: { return 0; } case SDS_TYPE_8: { SDS_HDR_VAR(8,s); return sh->alloc - sh->len; } case SDS_TYPE_16: { SDS_HDR_VAR(16,s); return sh->alloc - sh->len; } case SDS_TYPE_32: { SDS_HDR_VAR(32,s); return sh->alloc - sh->len; } case SDS_TYPE_64: { SDS_HDR_VAR(64,s); return sh->alloc - sh->len; } } return 0; } /* sdsalloc() = sdsavail() + sdslen() * flags = s[-1]; switch(flags&SDS_TYPE_MASK) { case SDS_TYPE_5: return SDS_TYPE_5_LEN(flags); case SDS_TYPE_8: return SDS_HDR(8,s)->alloc; case SDS_TYPE_16: return SDS_HDR(16,s)->alloc; case SDS_TYPE_32: return SDS_HDR(32,s)->alloc; case SDS_TYPE_64: return SDS_HDR(64,s)->alloc; } return 0; Static inline void sdssetalloc(SDS s, size_t newlen) {unsigned char flags = s[-1]; switch(flags&SDS_TYPE_MASK) { case SDS_TYPE_5: /* Nothing to do, this type has no total allocation info. */ break; case SDS_TYPE_8: SDS_HDR(8,s)->alloc = newlen; break; case SDS_TYPE_16: SDS_HDR(16,s)->alloc = newlen; break; case SDS_TYPE_32: SDS_HDR(32,s)->alloc = newlen; break; case SDS_TYPE_64: SDS_HDR(64,s)->alloc = newlen; break; }}Copy the code

The above mainly describes how SDS expands capacity. It can be seen that the biggest feature of SDS is that it can pre-allocate memory and is very efficient in expanding capacity. You don’t have to copy and copy

Why use memory misalignment (very important, this is the only place where SDS is covered throughout the web)

Before looking at the following first to have a basic concept of memory alignment, why to memory alignment, here is related to the working principle of the CPU, can baidu CPU working principle, mainly with memory address and register mapping relationship, but there are two theorems can be understood here.

  1. A 32-bit CPU can read 32 bits of data from memory per cycle.

  2. For this reason and the cause of the register, the CPU time to read the address of the beginning is in multiples of four, for example, we are going to read the address 2 the data length is 4 above, you will need to two cycles, the CPU must first read data from 0 to 3 address above, and then from 3 to 7 above the address of the data, here we can see the memory alignment. So the question is why redis author, who uses memory and CPU extremely, should use unaligned SDS, because the structure of SDS is destined to be unaligned. See below how our structure represents the situation in memory in the aligned state.

! [image](https://upload-images.jianshu.io/upload_images/24613101-5a9a800d08d03e35.png? imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)

image

It is impossible to access the flag from the position of the sdSHDR8 pointer without knowing the type of the sdSHDR8 pointer. In theory, this will not sacrifice too much memory. Performance is also guaranteed, but this is only the structure below the 32-bit system, if the 64-bit system, it may be a different structure. Ok, so somebody said can we put the pointer at the beginning of flag. The answer is no, 1, so we won’t be perfectly compatible with string, 2, so we’ll introduce all kinds of type judgment adjustments, so Redis ends up using memory misalignment.

conclusion

  1. It can be seen that SDS is binary safe compared with ordinary C language string, because string has no length identifier. When there are multiple end symbols in an address, string cannot access the content after the first /0. The structure of SDS makes up for this shortcoming.

  2. SDS is also incomparable to String for dynamic expansion. For example, the structure of SDS allows SDS to pre-allocate memory, even on the basis of the original length type unchanged, it can use the original memory. To achieve dynamic capacity expansion, the above code is also very clear. If a string is very long, if you want to do append on the original string, you need to copy the content to the new address, which is a very expensive thing, and SDS can solve this problem. Especially when converting network IO data into specific command operations, append operations on strings are often required. So SDS structure is very suitable for Redis.

  3. And SDS can also be used directly as a string, clever pointer use also makes SDS perfectly compatible with string. As for why WE need to talk about SDS, the most basic data in Redis besides dictionary is SDS, so before we talk about how REDis transforms network messages into specific commands, we need to be familiar with the structure of SDS.