preface

React uses JSX syntax to write components that cannot be directly rendered to DOM nodes. Instead, Babel compiles JSX code to generate executable code: react.createElement (). For example, the following JSX code, compiled by Babel, produces the following code:

const App = <div><h1>{userName}</h1></div>
ReactDOM.render(App, document.getElementById('root'));

var App = React.createElement("div", null, 
    React.createElement("h1", null, userName)
);
ReactDOM.render(App, document.getElementById('root'));
Copy the code

As you can see, App is a JSX element that, when compiled, becomes a call to the react.CreateElement () method. The React. CreateElement method’s signature looks something like this:

React.createElement(tag, attrs, ... children);Copy the code

The first argument is the label name, the second argument is the attribute object, and the following arguments are 0 to multiple children. If it is closed (unary tag) and tag, just generate the first two parameters, as follows:

// JSX const App = <input type="button" value="click me" />; Var App = react. createElement('input', {type: 'button', value: 'click me'});Copy the code

Now that we’ve seen some of the changes from JSX to object code, can we simulate the process?

Next we simulate the compilation process through two steps:

  1. JSX is parsed into AST tree data structure.

  2. Generate object code from this tree structure.

Compile JSX syntax to generate ast Element node tree structure

The compiled AST tree structure:

First let’s take a look at the JSX syntax to compile and the ast Element data structure to generate. This will help us to implement the compilation process:

// JSX const App = (` <div className="container"> <p style={style}>{greet('scott')}</p> <div> <p>this is jsx-like code</p> <i className="icon"/> <p>parsing it now</p> <img className="icon"/> </div> <input type="button" value="i am a button"/> <em/> </div> `); let root = parseJSX(App); console.log('root ast', JSON.stringify(root, null, 2)); // Ast tree structure root AST {"tag": "div", "attrs": {"className": {"type": "staticAttr", "value": "container" } }, "children": [ { "tag": "p", "attrs": { "style": { "type": "dynamicAttr", "value": "style" } }, "children": [ { "type": "exprText", "value": "greet('scott')" } ] }, { "tag": "div", "attrs": {}, "children": [ { "tag": "p", "attrs": {}, "children": [ { "type": "staticText", "value": "this is jsx-like code" } ] }, { "tag": "i", "attrs": { "className": { "type": "staticAttr", "value": "icon" } }, "children": [] }, { "tag": "p", "attrs": {}, "children": [ { "type": "staticText", "value": "parsing it now" } ] }, { "tag": "img", "attrs": { "className": { "type": "staticAttr", "value": "icon" } }, "children": [] } ] }, { "tag": "input", "attrs": { "type": { "type": "staticAttr", "value": "button" }, "value": { "type": "staticAttr", "value": "i am a button" } }, "children": [] }, { "tag": "em", "attrs": {}, "children": [] } ] }Copy the code

Each element node is made up of tag, attrs, and children attributes.

parseJSX

Closures are used to implement the local scope of variables. Since many variables will be used and they are only used for the current function, closures are used to avoid creating them multiple times and to avoid contaminating global variables.

const parseJSX = function() { const TAG_LEFT = '<'; const TAG_RIGHT = '>'; const CLOSE_SLASH = '/'; const WHITE_SPACE = ' '; const ATTR_EQUAL = '='; const DOUBLE_QUOTE = '"'; const LEFT_CURLY = '{'; const RIGHT_CURLY = '}'; let index = -1; // Let stack = []; // let source = ''; // JSX code let parent = null; SeekCharacter (target) {let found = false; seekCharacter(target) {let found = false; while (! found) { if (source.charAt(++index) === target) found = true; Function explore(target) {let I = index, found = false, rangeContext = ''; // interval content while (! found) { let ch = source.charAt(++i); If (ch === TAG_RIGHT) {if (target === WHITE_SPACE) {if (target === WHITE_SPACE) { '<div>' return {idx: i-1, STR: rangeContext}; } else if (target ! Return {idx: -1, STR: rangeContext}; return {idx: -1, STR: rangeContext}; }} if (ch === target) {// Found = true; } else if (ch ! == CLOSE_SLASH) {// Slash does not add interval content rangeContext += ch; }} return {idx: i-1, // end of the matched content STR: Function skipSpace() {while (true) {let ch = source.charat (index + 1); if (ch ! == WHITE_SPACE) { break; } else { index ++; Function parseTag() {//  { } if (stack.length > 0) { let rangeResult = explore(TAG_LEFT); / / match the current position and the next '<' between the content of the let resultStr = rangeResult. STR. Replace (/ ^ | \ n \ n $/, ' '). The trim (); Resultstr.length > 0) {// If (resultstr.length > 0) {// If there is content before the next '<' : let exprPositions = []; Function, resultstr.replace (/{.+?), which matches all {} expressions. }/, function(match, startIndex) {// match is the result of a regular match: '{greet(' Scott ')}', startIndex = '{' let endIndex = startIndex + mate.length-1; exprPositions.push({ startIndex, endIndex }); }); let strAry = []; let currIndex = 0; While (currIndex < resultstr.length) {// There is no expression, If (exprPositions. Length < 1) {strAry. Push ({type: 'staticText', value: resultStr.substring(currIndex), }); break; } // Let expr = exprPositions. Shift (); Const text = resultstr.subString (currIndex, expr.startIndex).trim(); if (text) { strAry.push({ type: 'staticText', value: text, }); } // Add the expression strAry. Push ({type: 'exprText', // expression text value: resultstr.subString (expr.startIndex + 1, expr.endIndex),}); currIndex = expr.endIndex + 1; } parent.children.push(... strAry); index = rangeResult.idx; parseTag(); Return parent; return parent; } } seekCharacter(TAG_LEFT); </div> if (source.charAt(index + 1) === CLOSE_SLASH) {seekCharacter(CLOSE_SLASH); let endResult = explore(TAG_RIGHT); If (stack.length === 1) {parent = null; return stack.pop(); } let completeTag = stack.pop(); Parent = stack[stack.length-1]; // Update the parent node parent.children.push(completeTag); index = endResult.idx; // Update parseTag(); Return completeTag; // The current tag is parsed. } // 1, let tagResult = explore(WHITE_SPACE); Div let element = {tag: tagresult. STR, div let element = {tag: tagresult. STR, // attrs: {}, children: [] } index = tagResult.idx; While (true) {skipSpace(); // Skip the space between (tag name and attribute, or attribute and attribute) let attrKeyResult = explore(ATTR_EQUAL); Key if (attrKeyresult. idx === -1) break; Index = attrKeyresult. idx + 1; index = attrKeyresult. idx + 1; // Update the resolution position to the position of attribute = let attrValResult = {}; If (source.charat (index + 1) === LEFT_CURLY) {seekCharacter(LEFT_CURLY); AttrValResult = explore(RIGHT_CURLY); // Parse the variables between LEFT_CURLY and RIGHT_CURLY attrValResult = {idx: attrvalresult. idx, info: {type: 'dynamicAttr', // Value: attrvalresult. STR // Value of dynamic attribute}}} else {// Static attribute seekCharacter(DOUBLE_QUOTE); // go to the "position" (why not just write index++? AttrValResult = explore(DOUBLE_QUOTE); AttrValResult = {idx: attrvalresult. idx, info: {type: 'staticAttr', value: attrvalresult. STR}}; attrValResult = {idx: attrvalresult. idx, info: {type: 'staticAttr', value: attrvalresult. STR}}; } index = attrValResult.idx + 1; // Update element. Attrs [attrKeyresult.str] = attrvalresult.info; // skip the end tag seekCharacter(TAG_RIGHT); If (source.charAt(index-1) === CLOSE_SLASH) {// Unary tags (self-closing tags) indicate that the current tag is complete. Append to parent if (parent) {parent.children.push(element); parseTag(); }} else {// The element with the closing tag is put on the stack, continue parsing the child element or closing the tag stack.push(element); parent = element; parseTag(); } return element; } return function(jsx) { source = jsx; return parseTag(); }} ();Copy the code

This method is quite long, with nearly 200 lines of code to implement. Let’s break it down one by one according to the scenario:

  • Variable declarations

First we declare some constants. We can see that these constants are associated with the element node and are used when we match tags, attributes, and variables. We then declare the variable information used in the parsing process, focusing first on index, which is used to record the position of the current string being parsed.

const parseJSX = function() {
    const TAG_LEFT = '<';
    const TAG_RIGHT = '>';
    const CLOSE_SLASH = '/';
    const WHITE_SPACE = ' ';
    const ATTR_EQUAL = '=';
    const DOUBLE_QUOTE = '"';
    const LEFT_CURLY = '{';
    const RIGHT_CURLY = '}';

    let index = -1;     // 当前解析的位置
    let stack = [];     // 存放已解析的父节点的存储栈
    let source = '';    // 要解析的 JSX 代码
    let parent = null;  // 当前元素的父节点
    
    // ...
}();
Copy the code
  • Get the compilation template

The source variable is used to hold the JSX syntax we are compiling. Since we are using parseJSX as a function that is returned through a closure, we save the JSX code when we call this return function:

const parseJSX = function() {
    // ...
    return function(jsx) {
        source = jsx;
        return parseTag();
    }
}();
Copy the code
  • Parse the opening character ‘<‘ of the opening tag

When we get the source, we execute the parseTag method to compile it. We use the example above

as a reference for our compilation.
function parseTag() { // ... seekCharacter(TAG_LEFT); // find the '<' tag //... } // Index function seekCharacter(target) {let found = false; while (! found) { if (source.charAt(++index) === target) found = true; }}Copy the code

The seekCharacter method is simple enough to update the parse index index to the specified target index position. In this case, we pass in TAG_LEFT, which is the parse index position of ‘<‘.

  • Resolving tag names

Once we have the TAG_LEFT position, we can use the space between the tagName and the first attribute as the right target to match the tagName in the interval:

function parseTag() { // ... let tagResult = explore(WHITE_SPACE); Div let element = {tag: tagresult. STR, // tag name attrs: {}, children: [] } index = tagResult.idx; // Update the parse location //... } function explore(target) { let i = index, found = false, rangeContext = ''; // interval content while (! found) { let ch = source.charAt(++i); If (ch === TAG_RIGHT) {if (target === WHITE_SPACE) {if (target === WHITE_SPACE) { '<div>' return {idx: i-1, STR: rangeContext}; } else if (target ! Return {idx: -1, STR: rangeContext}; return {idx: -1, STR: rangeContext}; }} if (ch === target) {// Found = true; } else if (ch ! == CLOSE_SLASH) {// Slash does not add interval content rangeContext += ch; }} return {idx: i-1, STR: rangeContext // matching range}}Copy the code

We first call explore to pass WHITE_SPACE, or space, as an argument, and then match the tag name. There are two matching results:

  1. <div classNameMatches between ‘<div’ and ‘className’WHITE_SPACE, get the label name and return it;
  2. <div>No matchWHITE_SPACEInstead, it matchedTAG_RIGHTTag terminator, at which point we finish parsing and return the tag name that was parsed.

After getting the tag name, create the element node and update the parse index position. After getting the tag name, parse the attributes on the tag name.

  • Parsing tag attributes
function parseTag() { // ... while (true) { skipSpace(); // Skip the space between (tag name and attribute, or attribute and attribute) let attrKeyResult = explore(ATTR_EQUAL); Key if (attrKeyresult. idx === -1) break; Index = attrKeyresult. idx + 1; index = attrKeyresult. idx + 1; // Update the resolution position to the position of attribute = let attrValResult = {}; If (source.charat (index + 1) === LEFT_CURLY) {seekCharacter(LEFT_CURLY); AttrValResult = explore(RIGHT_CURLY); // Parse the variables between LEFT_CURLY and RIGHT_CURLY attrValResult = {idx: attrvalresult. idx, info: {type: 'dynamicAttr', // Value: attrvalresult. STR // Value of dynamic attribute}}} else {// Static attribute seekCharacter(DOUBLE_QUOTE); // go to the "position" (why not just write index++? AttrValResult = explore(DOUBLE_QUOTE); AttrValResult = {idx: attrvalresult. idx, info: {type: 'staticAttr', value: attrvalresult. STR}}; attrValResult = {idx: attrvalresult. idx, info: {type: 'staticAttr', value: attrvalresult. STR}}; } index = attrValResult.idx + 1; // Update element. Attrs [attrKeyresult.str] = attrvalresult.info; // Add attribute} //... } function skipSpace() { while (true) { let ch = source.charAt(index + 1); if (ch ! == WHITE_SPACE) { break; } else { index ++; }}}Copy the code

First skip the tag name and attribute or the space between attribute and attribute, and then call explore to match ATTR_EQUAL (that is, =);

If there are no matching attributes on the element, the tag terminator ‘>’ is matched, i.e., if (attrKeyresult. idx === -1) break; Break out of the property resolution loop.

If there is a matching attribute on the element, the attribute key is obtained. The attribute value is then parsed to determine whether it is a dynamic or static attribute based on whether it starts with a ‘{‘. Finally, the parsed attribute information is added to element.attrs.

  • Recursively call parseTag to parse down
function parseTag() { // ... seekCharacter(TAG_RIGHT); If (source.charAt(index-1) === CLOSE_SLASH) {// Unary tag (self-closing tag) indicates that the current tag has been parsed, Append to parent if (parent) {parent.children.push(element); parseTag(); }} else {// The element with the closing tag is put on the stack, continue parsing the child element or closing the tag stack.push(element); parent = element; parseTag(); } return element; }Copy the code

After property parsing is complete, run seekCharacter to move index to the > position. If it is a unary tag, parseTag is called to parse the next tag after the current element is parsed. If it’s not a unary tag, save the current element node to the stack and call parseTag to parse the element’s children.

  • Parse text nodes in elements
function parseTag() { if (stack.length > 0) { let rangeResult = explore(TAG_LEFT); / / match the current position and the next '<' between the content of the let resultStr = rangeResult. STR. Replace (/ ^ | \ n \ n $/, ' '). The trim (); Resultstr.length > 0) {// If (resultstr.length > 0) {// If there is content before the next '<' : let exprPositions = []; Function, resultstr.replace (/{.+?), which matches all {} expressions. }/, function(match, startIndex) {// match is the result of a regular match: '{greet(' Scott ')}', startIndex = '{' let endIndex = startIndex + mate.length-1; exprPositions.push({ startIndex, endIndex }); }); let strAry = []; let currIndex = 0; While (currIndex < resultstr.length) {// There is no expression, If (exprPositions. Length < 1) {strAry. Push ({type: 'staticText', value: resultStr.substring(currIndex), }); break; } // Let expr = exprPositions. Shift (); Const text = resultstr.subString (currIndex, expr.startIndex).trim(); if (text) { strAry.push({ type: 'staticText', value: text, }); } // Add the expression strAry. Push ({type: 'exprText', // expression text value: resultstr.subString (expr.startIndex + 1, expr.endIndex),}); currIndex = expr.endIndex + 1; } parent.children.push(... strAry); index = rangeResult.idx; parseTag(); Return parent; return parent; }} / /... }Copy the code

When reentering the parseTag method, the parseTag method matches the contents between the current parseTag position index and the next <, excluding Spaces. If there is a value, the text node is present. Then {} is used to distinguish between text expression and static text. Finally, add the text node to parent. Children.

  • Closed label parsing
function parseTag() { // ... if (source.charAt(index + 1) === CLOSE_SLASH) { seekCharacter(CLOSE_SLASH); let endResult = explore(TAG_RIGHT); If (stack.length === 1) {parent = null; return stack.pop(); } let completeTag = stack.pop(); Parent = stack[stack.length-1]; // Update the parent node parent.children.push(completeTag); index = endResult.idx; // Update parseTag(); Return completeTag; // The current tag is parsed. } / /... }Copy the code

Convert the AST element tree structure into executable code

let code = transform(root); console.log(code); React.createElement('div', {className: "container"}, react. createElement('p', {style: style}, greet('scott') ),React.createElement( 'div', null, React.createElement( 'p', null, "this is jsx-like code" ),React.createElement( 'i', {className: "icon"} ),React.createElement( 'p', null, "parsing it now" ),React.createElement( 'img', {className: "icon"} ) ),React.createElement( 'input', {type: "button"value: "i am a button"} ),React.createElement( 'em', null ) )Copy the code

transform

Const transform = function() {function processAttrs(attrs) {let result = []; Object.keys(attrs).forEach((key, index) => { let type = attrs[key].type; let value = attrs[key].value; Value = '${key}: ${type === 'dynamicAttr'? value : ('"' + value + '"')}`; result.push(value); }); if (result.length === 0) return 'null'; return '{' + result.join('') + '}'; } function processElem(elem, parent) {let content = "; Ele.children. ForEach ((child, If (child.tag) {content += processElem(child, elem); return; } // The text node handles content += (child.type === 'exprText'? child.value : `"${child.value}"`); if (index < elem.children.length - 1) content += ','; }); let isLastChildren = elem === parent.children[parent.children.length - 1]; return ( `React.createElement( '${elem.tag}', ${processAttrs(elem.attrs)}${content.trim().length ? ',' : ''} ${content} )${isLastChildren ? ':', '} `); } return function(element) { return processElem(element, element).replace(/,$/, ''); }} ();Copy the code

React: Babel compiles JSX generated code