Chapter 2-02: Front-end regular expression magic (middle) — capture groups, backreferences

Source: JavaScript Ninja Secrets

The Front-end Road series continues with regular expressions!

[toc]

A predefined character class

A character set that represents the matching class, provided by the regular expression syntax. As follows:

Pre-defined terms Match the content
. Matches handle any character other than newline \n
\s White space characters
\S Non-whitespace character
\w A character that forms a word
\W A character that cannot form a word
\d digital
\D The digital
\b Word boundaries
\B Not word boundaries
\t Horizontal TAB character
\v Vertical TAB character
\f Page identifier
\r A carriage return
\n A newline
\cA : \cZ Control characters, such as \cM, match a CTRL + M
\x0000 : \xFFFF The hexadecimal Unicode code
\x00 : \xFF Hexadecimal ASCII code

Now, let’s write a small Demo according to the table above

1-1: means “.

All characters except Spaces and newlines eg:

var reg = /./g
var str = 'x'
var str2 = 123
var str3 = ' '
var str4 = '\n'
var str5 = '\r'
var res = reg.test(str) // true
var res2 = reg.test(str2) // true
var res3 = reg.test(str3) // false
var res4 = reg.test(str4) // false
var res5 = reg.test(str5) // false
Copy the code

1-2: means “\s”

Matches the whitespace character (space) eg:

var reg = /\s/g
var str = 'x'
var str2 = ' '
var str3 = '\n'
var str4 = '\r'
var str5 = '\r\n'
var res = reg.test(str)    // false
var res2 = reg.test(str2)  // true
var res3 = reg.test(str3)  // false
var res4 = reg.test(str4)  // true
var res5 = reg.test(str5)  // true

🤔??
// Unexplainable? N stands for newLine and r stands for return newLine
// The only way to explain this is that newline is empty because the line is full. Return is forced to break a line, so it must be empty.
Copy the code

1-3: means “\S”

Matches non-whitespace characters (Space) eg:

var reg = /\S/g
var str = 'x'
var str2 = ' '
var res = reg.test(str)    // true
var res2 = reg.test(str2)  // false
Copy the code

1-4: means “\w”

Matches characters that can form words (world) (matches letters, numbers, underscores) eg:

var reg = /\w/g
var str = 'age'
var str2 = 'a1ge'
var str3 = 'a ge'
var str4 = 'a&ge'
var str5 = 123
var res = reg.test(str)    // true
var res2 = reg.test(str2)  // true
var res3 = reg.test(str3)  // true
var res4 = reg.test(str4)  // true
var res5 = reg.test(str5)  // false
var arr = [res,res2,res3,res4,res5]
for(let i = 0; i<arr.length; i++) {
	console.log(arr[i])
}
Copy the code

1-5: means “\W”

Matches characters that cannot form words (World) eg:

var reg = /\W/g
var str = 'age'
var str2 = 'a1ge'
var str3 = 'a ge'
var str4 = 'a&ge'
var str5 = 123
var res = reg.test(str)    // false
var res2 = reg.test(str2)  // false
var res3 = reg.test(str3)  // true
var res4 = reg.test(str4)  // false
var res5 = reg.test(str5)  // false

var arr = [res,res2,res3,res4,res5]
for(let i = 0; i<arr.length; i++) {
	console.log(arr[i])
}

// have doubts 🤔 why?
// str3 contains Spaces that do not form words
Copy the code

1-6: means “\ D”

Match digit (Digit) eg:

var reg = /\d/g
var str = 123
var str2 = 'xxx'
var res = reg.test(str)		// true
var res2 = reg.test(str2)	// false
Copy the code

1-7: means “\D”

Match non-digit (Digit) eg:

var reg = /\D/g
var str = 123
var str2 = 'xxx'
var res = reg.test(str)		// false
var res2 = reg.test(str2)	// true
Copy the code

1-7: means “\ B”

Match the word’s boundary (letters, numbers, underscores)

var reg = /\b/g
var str = 123
var str2 = The '@'
var str3 = '_'
var str4 = '&'
var str5 = 'xxx'
var res = reg.test(str)		// true
var res2 = reg.test(str2)	// false
var res3 = reg.test(str3)	// true
var res4 = reg.test(str4)	// false
var res5 = reg.test(str5)	// true
Copy the code

1-8: means “\B”

Match non-word boundaries eg:

var reg = /\B/g
var str = 123
var str2 = The '@'
var str3 = '_'
var str4 = '&'
var str5 = 'xxx'
var res = reg.test(str)		// true
var res2 = reg.test(str2)	// true
var res3 = reg.test(str3)	// false
var res4 = reg.test(str4)	// true
var res5 = reg.test(str5)	// true
Copy the code

Second, the group

If you want to apply the operator to a list of strings, you can use (), which is the concept of grouping.

For example, (ab)+ matches one or more strings ab. When you group them with parentheses, you also create something called capture.

Let’s take a look at some examples.

// demo-1
var reg = /(ab)+/g
var str = 'bababababbababababaaabbb'
var res = str.match(reg)	// ["abababab", "abababab", "ab"]

// demo-2
var reg = /(ab)/g
var str = 'bababababbababababaaabbb'
var res = str.match(reg)	
// ["ab", "ab", "ab", "ab", "ab", "ab", "ab", "ab", "ab"]
Copy the code

Greed matches consecutive strings that meet the ab * n rule

The or operator

Expressed in | or relationship, for example, a | b means to match a or b character eg:

// demo
var reg = /a|b/g
var str = 'abcd'
var str2 = 'aabbccdd'
var res = str.match(reg) // ["a", "b"]
var res2 = str2.match(reg) // ["a", "a", "b", "b"]
Copy the code

4. Backreference

A captured backreference is defined in a regular expression as a candidate string for successful matching of terms in the regular expression. The term is a backslant bar followed by a number of catches to be referenced. eg:

var str = '<p>1</p>'
var str2 = '<strong>2</strong>'
var reg = /<(\w+)>(.+)<\/\1>/

var res = str.match(reg)
var res2 = str2.match(reg)
Copy the code

Still confused? 🤔 Why? Often when a definition is incomprehensible, a new way of understanding is needed to understand the incomprehensible. Another special interpretation: the captured content can be referenced programmatically outside the regular expression or inside the regular expression. This method of reference is called backreferencing. Here’s another question: What does a capture group mean?

4-1 capturing group

What is a capture group?

The capture group is to save the content matched by the regular expression neutron expression to a numbered or explicitly named group in memory, which is easy to reference later. Of course, this reference can be either inside or outside the regular expression.

There are two forms of capture group, one is ordinary capture group, the other is named capture group, commonly referred to as ordinary capture group. The syntax is as follows:

Ordinary capture group :(Expression)

Naming capture groups :(?

Expression)

Demo-1 Common capture group:

var str = "2018-08-30"
var generalReg = /(\d{4})-(\d{2})-(\d\d)/g
var res = str.match(generalReg)		/ / / "2018-08-30"
Copy the code

The expression in the above example is explicit in capturing groups, numbers, and so on

Serial number named Capture group Match the content
0 (\d{4})-(\d{2})-(\d\d)) 2018-08-30
1 (\d{4}) 2018
2 (\d{2}) 08
3 (\d\d) 30

Demo-2 Naming a capture group:

var str = "2018-08-30"
var generalReg = / (? 
      
       \d{4})-(? 
       
        \d{2})-(? 
        
         \d\d)/g
        
       
      
var res = str.match(generalReg)		/ / / "2018-08-30"
Copy the code

The expression in the above example is explicit in capturing groups, numbers, and so on

Serial number named Capture group Match the content
0 (\d{4})-(\d{2})-(\d\d)) 2018-08-30
1 year (\d{4}) 2018
2 date (\d{2}) 08
3 day (\d\d) 30

4-2 Reverse reference

In the regular expression, the content captured by the previous capture group is referenced, which is called the reverse reference. Let’s combine the above mentioned content for another Demo:

var str = 'abcdebbcde'
var reg = /([ab])/g
var res = reg.test(str)		// true
var res0 = str.match(reg)	// ['bb']

var reg2 = /([ab])\2/g
var res2 = reg2.test(str)		// false
var res3 = str.match(reg2)		// null

var strs = 'abcdbaabbccde'
var reg3 = /([ab])([a])\2/g
var res3 = reg3.test(strs)
var res4 = strs.match(reg3)	 // ["baa"]
Copy the code

Let’s do another Demo

var str = '<div><p>1</p><strong>2</strong><div>'
var reg = /<(\w+)>(.+)<(\/\1)>? /g
var res = str.match(reg)		// ["<p>1</p>", "<strong>2</strong>"]
Copy the code

Here is another understanding of backreferencing

Backreference in the actual use of the scene is still more, but also a more important concept, I hope to deeply feel.

It’s been raining in Shenzhen in recent days. Remember to bring an umbrella when you go home from work

GitHub address: (Welcome star, welcome recommendation:)

Front-end regular expression magic (Middle)