This analysis is based on lua version 5.3.1. Lua’s source code is relatively concise and looks laborious. Figuring out how a Lua script is compiled into bytecode and then executed in a Lua virtual machine is a complicated process. First, you need to understand how data is defined in Lua.

1. LUA type abstraction

The type in Lua is called Value. Value is defined as follows:

/*
** Union of all Lua values
*/
typedef union Value {
  struct GCObject *gc;    /* collectable objects */
  void *p;         /* light userdata */
  lua_CFunction f; /* light C functions */
  lua_Integer i;   /* integer numbers */
  lua_Number n;    /* float numbers */
} Value;

Copy the code

Value is a Union type. A Value, either a GCObject, or a light userdata/ light C functions/ INTEGER numbers/float numbers.

Then encapsulate the Union layer and add a type to identify the type of the value. The new type is TValue.

/*
** Tagged Values. This is the basic representation of values in Lua:
** an actual value plus a tag with its type.
*/

#define TValuefields	Value value_; lu_byte tt_

typedef struct TValue {
  TValuefields;
} TValue;

Copy the code

TVaulefileds is a macro. Expand the macro:

typedef struct TValue{
   Value value_;
   lua_byte _tt; // This is the type of the TValue
};
Copy the code

To illustrate, here’s the picture:

for

 void *p;         /* light userdata */
  lua_CFunction f; /* light C functions */
  lua_Integer i;   /* integer numbers */
  lua_Number n;    /* float numbers */
Copy the code

These types are easier to understand from annotations, but for GCObject, this is an abstract type of Lua, and objects representing this type can be garbage collected.

2. Lua garbage collector object type

As you can see from the figure above, there is an object type of TValue that can be garbage collected -GCObject.

2.1 GCObject Object definition

Lua itself is a dynamic language, and all objects are actually allocated on the heap, managed through a GCList linked list. So Lua uses the GCObject type for abstraction when it creates objects, which means that everything it creates is actually a GCObject object. As the name suggests, GCObject is an Object that can be garbage collected. The simple understanding is that in Lua VM, they are all objects managed by the garbage collection linked list.

Let’s look at the definition of the GCObject object:

/*
** Common Header for all collectable objects (in macro form, to be
** included in other objects)
*/

#define CommonHeader	GCObject *next; lu_byte tt; lu_byte marked


/*
** Common type has only the common header
*/
struct GCObject {
  CommonHeader;
};

Copy the code

You can see that each GCObject has a CommnHeader with three members:

  • Next pointer to prove that all variables in Lua are connected using a linked list;
  • Tt The current Object type
  • Marked indicates the collection status of the user, which will be introduced in the subsequent reanalysis of LUA garbage collection.

So what garbage collector types are supported within LUA? From the code analysis, we can see that there are five internal types, as follows:

  1. TString
  2. Udata
  3. Proto
  4. Closure
  5. Table

The following are definitions of several data types.

TString definition:

/*
** Header for string value; string bytes follow the end of this structure
** (aligned according to 'UTString'; see next).
*/
typedef struct TString {
  CommonHeader;
  lu_byte extra;  /* reserved words for short strings; "has hash" for longs */
  lu_byte shrlen;  /* length for short strings */
  unsigned int hash;
  union {
    size_t lnglen;  /* length for long strings */
    struct TString *hnext;  /* linked list for hash table */
  } u;
} TString;

Copy the code

Udata definition:

/*
** Header for userdata; memory area follows the end of this structure
** (aligned according to 'UUdata'; see next).
*/
typedef struct Udata {
  CommonHeader;
  lu_byte ttuv_;  /* user value's tag */
  struct Table *metatable;
  size_t len;  /* number of bytes */
  union Value user_;  /* user value */
} Udata;

Copy the code

Proto definition:

/*
** Function Prototypes
*/
typedef struct Proto {
  CommonHeader;
  lu_byte numparams;  /* number of fixed parameters */
  lu_byte is_vararg;
  lu_byte maxstacksize;  /* number of registers needed by this function */
  int sizeupvalues;  /* size of 'upvalues' */
  int sizek;  /* size of 'k' */
  int sizecode;
  int sizelineinfo;
  int sizep;  /* size of 'p' */
  int sizelocvars;
  int linedefined;
  int lastlinedefined;
  TValue *k;  /* constants used by the function */
  Instruction *code;  /* opcodes */
  struct Proto支那p;  /* functions defined inside the function */
  int *lineinfo;  /* map from opcodes to source lines (debug information) */
  LocVar *locvars;  /* information about local variables (debug information) */
  Upvaldesc *upvalues;  /* upvalue information */
  struct LClosure *cache;  /* last-created closure with this prototype */
  TString  *source;  /* used for debug information */
  GCObject *gclist;
} Proto;
Copy the code

Closures fall into two categories, Lua closures and C closures, defined as follows:

/*
** Closures
*/

#define ClosureHeader \
	CommonHeader; lu_byte nupvalues; GCObject *gclist

typedef struct CClosure {
  ClosureHeader;
  lua_CFunction f;
  TValue upvalue[1];  /* list of upvalues */
} CClosure;

typedef struct LClosure {
  ClosureHeader;
  struct Proto *p; // Prototype pointer to lua functions
  UpVal *upvals[1];  /* list of upvalues */
} LClosure;
Copy the code

The Table definition:

typedef struct Table {
  CommonHeader;
  lu_byte flags;  /* 1<<p means tagmethod(p) is not present */
  lu_byte lsizenode;  /* log2 of size of 'node' array */
  unsigned int sizearray;  /* size of 'array' array */
  TValue *array;  /* array part */
  Node *node;
  Node *lastfree;  /* any free position is before this position */
  struct Table *metatable;
  GCObject *gclist;
} Table;
Copy the code

From an object-oriented perspective, you can think of these five internal base types as inheriting from GCObject.

2.2 GCObject object creation process in Lua

2.2.1 Creating a GCObject object

Create an internal object using the same interface function luaC_newobj (lua_State *L, int tt, size_t sz).

  • Lua_Stata *L is the current Lua context
  • Tt Specifies the type of the object to be created
  • Sz Specifies the size of the object to create

Return value description:

  • GCObject creates a pointer to the abstract object

This function is a bit like an object-oriented factory method. If you pass in a desired object type, you will return a pointer to the created parent class, but this is really just a memory allocation operation. Each object has its own creation function, and each creation function calls luaC_newobj to allocate memory. LuaC_newobj: luaC_newobj

/*
** create a new collectable object (with given type and size) and link
** it to 'allgc' list.
*/
GCObject *luaC_newobj (lua_State *L, int tt, size_t sz) {
  global_State *g = G(L);
  GCObject *o = cast(GCObject *, luaM_newobject(L, novariant(tt), sz));
  o->marked = luaC_white(g); // mark this object as white
  o->tt = tt;  / / type
  o->next = g->allgc; // The newly created object is placed first in gclist
  g->allgc = o;
  return o;
}

#define luaM_newobject(L,tag,s)	luaM_realloc_(L, NULL, tag, (s))

/*
** generic allocation routine.
*/
void *luaM_realloc_ (lua_State *L, void *block, size_t osize, size_t nsize) {
  void *newblock;
  global_State *g = G(L);
  size_t realosize = (block) ? osize : 0; // Block is NULL
  lua_assert((realosize == 0) == (block == NULL));
#if defined(HARDMEMTESTS)
  if (nsize > realosize && g->gcrunning)
    luaC_fullgc(L, 1);  /* force a GC whenever possible */
#endif
  newblock = (*g->frealloc)(g->ud, block, osize, nsize);  // call l_alloc
  if (newblock == NULL && nsize > 0) {
    lua_assert(nsize > realosize);  /* cannot fail when shrinking a block */
    if (g->version) {  /* is state fully built? * /
      luaC_fullgc(L, 1);  /* try to free some memory... * /
      newblock = (*g->frealloc)(g->ud, block, osize, nsize);  /* try again */
    }
    if (newblock == NULL)
      luaD_throw(L, LUA_ERRMEM);
  }
  lua_assert((nsize == 0) == (newblock == NULL));
  g->GCdebt = (g->GCdebt + nsize) - realosize;
  return newblock;
}

Copy the code

The above function incorporates a lot of GC processing logic. This paper does not introduce the principle of garbage collection in detail. Frealloc (g->ud, block, osize, nsize); This function. And this function is actually realloc, requisition heap memory.

2.2.2 Conversion from abstract objects to concrete objects

Internal objects are created of type GCObjcet through the unified interface luaC_newobj function. LUA encapsulates a set of macro functions for this abstract structure to be converted into a concrete object structure:

/* macros to convert a GCObject into a specific value */
#define gco2ts(o)  \
	check_exp(novariant((o)->tt) == LUA_TSTRING, &((cast_u(o))->ts))
#define gco2u(o)  check_exp((o)->tt == LUA_TUSERDATA, &((cast_u(o))->u))
#define gco2lcl(o)  check_exp((o)->tt == LUA_TLCL, &((cast_u(o))->cl.l))
#define gco2ccl(o)  check_exp((o)->tt == LUA_TCCL, &((cast_u(o))->cl.c))
#define gco2cl(o)  \
	check_exp(novariant((o)->tt) == LUA_TFUNCTION, &((cast_u(o))->cl))
#define gco2t(o)  check_exp((o)->tt == LUA_TTABLE, &((cast_u(o))->h))
#define gco2p(o)  check_exp((o)->tt == LUA_TPROTO, &((cast_u(o))->p))
#define gco2th(o)  check_exp((o)->tt == LUA_TTHREAD, &((cast_u(o))->th))

Copy the code

3 summary

To sum up: