We start with a lot of restrictions on the database. Now, he will:

  • Two operations are supported: insert a row of data and print all rows.
  • It is saved in memory (not persisted to disk).
  • Support for a simple, hard-coded table.

Our hard-coded table holds users and is shown in the following table:

column type
id integer
username varchar(30)
email varchar(255)
This is a simple architecture, but it allows us to support multiple data types and multiple data type sizes.

The insert statement looks like this:

insert  1 cstack [email protected]
Copy the code

This means that we need to modify our prepare_statement function to parse the parameters.

 if (strncmp(input_buffer->buffer, "insert".6) = =0) {
     statement->type = STATEMENT_INSERT;
    int args_assigned = sscanf(
        input_buffer->buffer, "insert %d %s %s", &(statement->row_to_insert.id),
        statement->row_to_insert.username, statement->row_to_insert.email);
    if (args_assigned < 3) {
      return PREPARE_SYNTAX_ERROR;
    }
     return PREPARE_SUCCESS;
   }
   if (strcmp(input_buffer->buffer, "select") = =0) {
Copy the code

We store these parsed parameters in a new Row data structure in the statement object:

#define COLUMN_USERNAME_SIZE 32
#define COLUMN_EMAIL_SIZE 255
typedef struct {
  uint32_t id;
  char username[COLUMN_USERNAME_SIZE];
  char email[COLUMN_EMAIL_SIZE];
} Row;

 typedef struct {
  StatementType type;
  Row row_to_insert; 
 } Statement;
Copy the code

Now we need to copy this data into the same data structure to represent a table. SQLite uses b-tree to query, insert, and delete. We will do the same as SQLITE, like b-Tree, which will group rows into Pages, but instead of arranging the pages as a tree, it will arrange them as an array.

Here’s my plan:

  • Store rows in memory.
  • Store as many rows per page as possible
  • The rows of each page are serialized into a compact representation
  • pagesGive it euromemory only when needed
  • Holds a fixed-size array of Pointers to the page

First, we’ll define a compact representation of the row:

+#define size_of_attribute(Struct, Attribute) sizeof(((Struct*)0)->Attribute)
+
+const uint32_t ID_SIZE = size_of_attribute(Row, id);
+const uint32_t USERNAME_SIZE = size_of_attribute(Row, username);
+const uint32_t EMAIL_SIZE = size_of_attribute(Row, email);
+const uint32_t ID_OFFSET = 0;
+const uint32_t USERNAME_OFFSET = ID_OFFSET + ID_SIZE;
+const uint32_t EMAIL_OFFSET = USERNAME_OFFSET + USERNAME_SIZE;
+const uint32_t ROW_SIZE = ID_SIZE + USERNAME_SIZE + EMAIL_SIZE;
Copy the code

This means that the serialization layout looks like this: | column | size (bytes) | offset| | —- | —- | —-| —-| | id | 4 | 0| | username | 32 | 4 | | email | 255 | 36 | | total | 291 | 36 |

void serialize_row(Row* source, void* destination) {
  memcpy(destination + ID_OFFSET, &(source->id), ID_SIZE);
  memcpy(destination + USERNAME_OFFSET, &(source->username), USERNAME_SIZE);
  memcpy(destination + EMAIL_OFFSET, &(source->email), EMAIL_SIZE);
}

void deserialize_row(void* source, Row* destination) {
  memcpy(&(destination->id), source + ID_OFFSET, ID_SIZE);
  memcpy(&(destination->username), source + USERNAME_OFFSET, USERNAME_SIZE);
  memcpy(&(destination->email), source + EMAIL_OFFSET, EMAIL_SIZE);
}
Copy the code

Next comes a “Table” structure that points to a page of rows and tracks the number of rows

const uint32_t PAGE_SIZE = 4096;
#define TABLE_MAX_PAGES 100
const uint32_t ROWS_PER_PAGE = PAGE_SIZE / ROW_SIZE;
const uint32_t TABLE_MAX_ROWS = ROWS_PER_PAGE * TABLE_MAX_PAGES;

typedef struct {
  uint32_t num_rows;
  void* pages[TABLE_MAX_PAGES];
} Table;
Copy the code

I set the page size to 4KB because it is the same as the page size used in virtual memory systems for most computer architectures. This means that every page we have in the database has a page in the operating system. The operating system moves pages in and out of memory as a unit, rather than separating them. When we allocate memory, I limit arbitrary Settings to 100 pages. When we convert to a tree mechanism, the maximum size of our database is limited only by the maximum file size, although we still limit how many pages can be kept in memory at a time.

Lines should not go beyond page boundaries, and since pages may not exist next to each other in memory, this assumption makes it easier to read/write lines. Speaking of which, here’s how we can determine the read/write location in memory for a particular row:

void* row_slot(Table* table, uint32_t row_num) {
  uint32_t page_num = row_num / ROWS_PER_PAGE;
  void* page = table->pages[page_num];
  if (page == NULL) {
    page = table->pages[page_num] = malloc(PAGE_SIZE);
  }
  uint32_t row_offset = row_num % ROWS_PER_PAGE;
  uint32_t byte_offset = row_offset * ROW_SIZE;
  return page + byte_offset;
}
Copy the code

Now we will use the execute_statement function to read/write data from our table structure.

ExecuteResult execute_insert(Statement* statement, Table* table) {
  if (table->num_rows >= TABLE_MAX_ROWS) {
    return EXECUTE_TABLE_FULL;
  }

  Row* row_to_insert = &(statement->row_to_insert);

  serialize_row(row_to_insert, row_slot(table, table->num_rows));
  table->num_rows += 1;

  return EXECUTE_SUCCESS;
}

ExecuteResult execute_select(Statement* statement, Table* table) {
  Row row;
  for (uint32_t i = 0; i < table->num_rows; i++) {
    deserialize_row(row_slot(table, i), &row);
    print_row(&row);
  }
  return EXECUTE_SUCCESS;
}

ExecuteResult execute_statement(Statement* statement, Table* table) {
   switch (statement->type) {
     case (STATEMENT_INSERT):
      return execute_insert(statement, table);
     case (STATEMENT_SELECT):
      returnexecute_select(statement, table); }}Copy the code

Finally, we’ll initialize the table, create a function that frees memory separately, and handle some errors.

 Table* new_table(a) {
  Table* table = malloc(sizeof(Table));
  table->num_rows = 0;
  for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++) {
     table->pages[i] = NULL;
  }
  return table;
}

void free_table(Table* table) {
    for (int i = 0; table->pages[i]; i++) {
	free(table->pages[i]);
    }
    free(table);
}
Copy the code
 int main(int argc, char* argv[]) {
  Table* table = new_table();
   InputBuffer* input_buffer = new_input_buffer();
   while (true) { print_prompt(); @ @- 105..13 +203.22@ @int main(int argc, char* argv[]) {
     switch (prepare_statement(input_buffer, &statement)) {
       case (PREPARE_SUCCESS):
         break;
      case (PREPARE_SYNTAX_ERROR):
        printf("Syntax error. Could not parse statement.\n");
        continue;
       case (PREPARE_UNRECOGNIZED_STATEMENT):
         printf("Unrecognized keyword at start of '%s'.\n",
                input_buffer->buffer);
         continue;
     }

    switch (execute_statement(&statement, table)) {
      case (EXECUTE_SUCCESS):
        printf("Executed.\n");
        break;
      case (EXECUTE_TABLE_FULL):
        printf("Error: Table full.\n");
        break; }}}Copy the code

These changes we can actually save to our database.

~ ./db
db > insert 1 cstack [email protected]
Executed.
db > insert 2 bob [email protected]
Executed.
db > select
(1, cstack, [email protected])
(2, bob, [email protected])
Executed.
db > insert foo bar 1
Syntax error. Could not parse statement.
db > .exit
~
Copy the code

Now we’ll have plenty of time to write some tests for two reasons:

  • We plan to significantly change the data structure of the stored tables, and the test will capture regression.
  • There are a few edge cases that we haven’t tested manually (e.g. filling out forms)

We’ll discuss these issues in the next chapter, and here’s the code in full:

 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdint.h>

 typedef struct {
   char* buffer;
 typedef struct {
 } InputBuffer;

typedef enum { EXECUTE_SUCCESS, EXECUTE_TABLE_FULL } ExecuteResult;
typedef enum {
  META_COMMAND_SUCCESS,
  META_COMMAND_UNRECOGNIZED_COMMAND
} MetaCommandResult;

typedef enum {
  PREPARE_SUCCESS,
  PREPARE_SYNTAX_ERROR,
  PREPARE_UNRECOGNIZED_STATEMENT
 } PrepareResult;

typedef enum { STATEMENT_INSERT, STATEMENT_SELECT } StatementType;

#define COLUMN_USERNAME_SIZE 32
#define COLUMN_EMAIL_SIZE 255
typedef struct {
  uint32_t id;
  char username[COLUMN_USERNAME_SIZE];
  char email[COLUMN_EMAIL_SIZE];
} Row;

typedef struct {
  StatementType type;
  Row row_to_insert;
} Statement;
#define size_of_attribute(Struct, Attribute) sizeof(((Struct*)0)->Attribute)

const uint32_t ID_SIZE = size_of_attribute(Row, id);
const uint32_t USERNAME_SIZE = size_of_attribute(Row, username);
const uint32_t EMAIL_SIZE = size_of_attribute(Row, email);
const uint32_t ID_OFFSET = 0;
const uint32_t USERNAME_OFFSET = ID_OFFSET + ID_SIZE;
const uint32_t EMAIL_OFFSET = USERNAME_OFFSET + USERNAME_SIZE;
const uint32_t ROW_SIZE = ID_SIZE + USERNAME_SIZE + EMAIL_SIZE;

const uint32_t PAGE_SIZE = 4096;
#define TABLE_MAX_PAGES 100
const uint32_t ROWS_PER_PAGE = PAGE_SIZE / ROW_SIZE;
const uint32_t TABLE_MAX_ROWS = ROWS_PER_PAGE * TABLE_MAX_PAGES;

typedef struct {
  uint32_t num_rows;
  void* pages[TABLE_MAX_PAGES];
} Table;
void print_row(Row* row) {
  printf("(%d, %s, %s)\n", row->id, row->username, row->email);
}

void serialize_row(Row* source, void* destination) {
  memcpy(destination + ID_OFFSET, &(source->id), ID_SIZE);
  memcpy(destination + USERNAME_OFFSET, &(source->username), USERNAME_SIZE);
  memcpy(destination + EMAIL_OFFSET, &(source->email), EMAIL_SIZE);
}
void deserialize_row(void *source, Row* destination) {
  memcpy(&(destination->id), source + ID_OFFSET, ID_SIZE);
  memcpy(&(destination->username), source + USERNAME_OFFSET, USERNAME_SIZE);
  memcpy(&(destination->email), source + EMAIL_OFFSET, EMAIL_SIZE);
}

void* row_slot(Table* table, uint32_t row_num) {
  uint32_t page_num = row_num / ROWS_PER_PAGE;
  void *page = table->pages[page_num];
  if (page == NULL) {
     page = table->pages[page_num] = malloc(PAGE_SIZE);
  }
  uint32_t row_offset = row_num % ROWS_PER_PAGE;
  uint32_t byte_offset = row_offset * ROW_SIZE;
  return page + byte_offset;
}

Table* new_table(a) {
  Table* table = malloc(sizeof(Table));
  table->num_rows = 0;
  for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++) {
     table->pages[i] = NULL;
  }
  return table;
}

void free_table(Table* table) {
  for (int i = 0; table->pages[i]; i++) {
     free(table->pages[i]);
  }
  free(table);
}

 InputBuffer* new_input_buffer(a) {
   InputBuffer* input_buffer = malloc(sizeof(InputBuffer));
   input_buffer->buffer = NULL;
   void close_input_buffer(InputBuffer* input_buffer) {
     free(input_buffer);
 }

MetaCommandResult do_meta_command(InputBuffer* input_buffer, Table *table) {
  if (strcmp(input_buffer->buffer, ".exit") = =0) {
     close_input_buffer(input_buffer);
    free_table(table);
    exit(EXIT_SUCCESS);
 } else {
    returnMETA_COMMAND_UNRECOGNIZED_COMMAND; }}PrepareResult prepare_statement(InputBuffer* input_buffer, Statement* statement) {
  if (strncmp(input_buffer->buffer, "insert".6) = =0) {
    statement->type = STATEMENT_INSERT;
    int args_assigned = sscanf(
	input_buffer->buffer, "insert %d %s %s", &(statement->row_to_insert.id),
	statement->row_to_insert.username, statement->row_to_insert.email
	);
    if (args_assigned < 3) {
	return PREPARE_SYNTAX_ERROR;
    }
    return PREPARE_SUCCESS;
  }
  if (strcmp(input_buffer->buffer, "select") = =0) {
    statement->type = STATEMENT_SELECT;
    return PREPARE_SUCCESS;
  }

  return PREPARE_UNRECOGNIZED_STATEMENT;
}

ExecuteResult execute_insert(Statement* statement, Table* table) {
  if (table->num_rows >= TABLE_MAX_ROWS) {
     return EXECUTE_TABLE_FULL;
  }

  Row* row_to_insert = &(statement->row_to_insert);

  serialize_row(row_to_insert, row_slot(table, table->num_rows));
  table->num_rows += 1;

  return EXECUTE_SUCCESS;
}

ExecuteResult execute_select(Statement* statement, Table* table) {
  Row row;
  for (uint32_t i = 0; i < table->num_rows; i++) {
     deserialize_row(row_slot(table, i), &row);
     print_row(&row);
  }
  return EXECUTE_SUCCESS;
}

ExecuteResult execute_statement(Statement* statement, Table *table) {
  switch (statement->type) {
    case (STATEMENT_INSERT):
      	return execute_insert(statement, table);
    case (STATEMENT_SELECT):
	returnexecute_select(statement, table); }}int main(int argc, char* argv[]) {
  Table* table = new_table();
   InputBuffer* input_buffer = new_input_buffer();
   while (true) {
     print_prompt();
     read_input(input_buffer);
    if (input_buffer->buffer[0] = ='. ') {
      switch (do_meta_command(input_buffer, table)) {
        case (META_COMMAND_SUCCESS):
          continue;
        case (META_COMMAND_UNRECOGNIZED_COMMAND):
          printf("Unrecognized command '%s'\n", input_buffer->buffer);
          continue;
      }
    }

    Statement statement;
    switch (prepare_statement(input_buffer, &statement)) {
      case (PREPARE_SUCCESS):
        break;
      case (PREPARE_SYNTAX_ERROR):
	printf("Syntax error. Could not parse statement.\n");
	continue;
      case (PREPARE_UNRECOGNIZED_STATEMENT):
        printf("Unrecognized keyword at start of '%s'.\n",
               input_buffer->buffer);
        continue;
    }

    switch (execute_statement(&statement, table)) {
	case (EXECUTE_SUCCESS):
	    printf("Executed.\n");
	    break;
	case (EXECUTE_TABLE_FULL):
	    printf("Error: Table full.\n");
	    break; }}}Copy the code

Kiss!!!!! Scan the code or search my public account, there will be more wonderful content waiting for you