正则表达式（Regular Expression，简称 RegExp）是一种强大的文本处理工具，用于匹配、查找、替换和验证字符串。本教程将带你从零开始掌握正则表达式的基础语法。

1. 正则表达式的基本概念

什么是正则表达式？

正则表达式是用预定义的特殊字符和普通字符组成的字符串模式，用于描述一类字符串的特征。

在 JavaScript 中使用正则表达式

// 字面量方式（常用）
const regex1 = /pattern/flags;

// 构造函数方式
const regex2 = new RegExp('pattern', 'flags');

// 示例
const emailRegex = /\w+@\w+\.\w+/;
const emailRegex2 = new RegExp('\\w+@\\w+\\.\\w+');

2. 基础元字符

2.1 字符匹配

元字符	说明	示例
`.`	匹配除换行符外的任意单个字符	`/a.c/` 匹配 “abc”, “aac”, “a@c”
`\d`	匹配数字，等价于 `[0-9]`	`/\d/` 匹配 “5”, “9”
`\D`	匹配非数字，等价于 `[^0-9]`	`/\D/` 匹配 “a”, “-“
`\w`	匹配单词字符（字母、数字、下划线），等价于 `[a-zA-Z0-9_]`	`/\w/` 匹配 “a”, “5”, “_”
`\W`	匹配非单词字符	`/\W/` 匹配 “@”, ” “
`\s`	匹配空白字符（空格、制表符、换行符）	`/\s/` 匹配 ” “, “\t”
`\S`	匹配非空白字符	`/\S/` 匹配 “a”, “1”
`\n`	匹配换行符
`\t`	匹配制表符

// 示例
console.log(/\d/.test("abc123"));     // true
console.log(/\w/.test("hello_world")); // true
console.log(/\s/.test("hello world")); // true
console.log(/a.c/.test("abc"));        // true
console.log(/a.c/.test("axc"));        // true
console.log(/a.c/.test("ac"));         // false（缺少中间字符）

2.2 字符类

// 方括号 [] 定义字符类
const regex = /[aeiou]/;  // 匹配任意一个元音字母

// 示例
console.log(/[aeiou]/.test("apple"));   // true
console.log(/[0-9]/.test("123"));       // true
console.log(/[a-z]/.test("ABC"));       // false（大写不匹配）
console.log(/[A-Za-z]/.test("Abc"));    // true

// 否定字符类（^ 在括号内表示否定）
console.log(/[^0-9]/.test("abc"));      // true（非数字）
console.log(/[^aeiou]/.test("hello"));  // true（'h', 'l', 'l' 都不是元音）

2.3 预定义字符类简写

// 等价关系
/\d/ === /[0-9]/
/\D/ === /[^0-9]/
/\w/ === /[a-zA-Z0-9_]/
/\W/ === /[^a-zA-Z0-9_]/
/\s/ === /[ \t\n\r\f\v]/
/\S/ === /[^ \t\n\r\f\v]/

3. 量词（Quantifiers）

量词用于指定前面的字符或子表达式出现的次数。

量词	说明	示例
`*`	匹配前一个字符 0 次或多次	`/bo*/` 匹配 “boooo” 或 “b”
`+`	匹配前一个字符 1 次或多次	`/bo+/` 匹配 “boooo”，不匹配 “b”
`?`	匹配前一个字符 0 次或 1 次	`/bo?/` 匹配 “bo” 或 “b”
`{n}`	匹配前一个字符恰好 n 次	`/a{3}/` 匹配 “aaa”
`{n,}`	匹配前一个字符至少 n 次	`/a{2,}/` 匹配 “aa”, “aaa”, …
`{n,m}`	匹配前一个字符至少 n 次，至多 m 次	`/a{2,4}/` 匹配 “aa”, “aaa”, “aaaa”

// 示例
console.log(/bo*/.test("boooo"));    // true
console.log(/bo+/.test("b"));        // false
console.log(/bo+/.test("bo"));       // true
console.log(/bo?/.test("b"));        // true
console.log(/a{3}/.test("aaa"));     // true
console.log(/a{2,}/.test("a"));      // false
console.log(/a{2,4}/.test("aaaaa")); // false（5个a，超过上限4）

贪婪匹配与非贪婪匹配

// 贪婪匹配（默认）：尽可能多地匹配
const greedy = /a+/;
console.log("aaa".match(greedy));    // ["aaa"]

// 非贪婪匹配（加 ?）：尽可能少地匹配
const lazy = /a+?/;
console.log("aaa".match(lazy));     // ["a"]

4. 边界匹配

元字符	说明	示例
`^`	匹配字符串的开始位置	`/^hello/` 匹配 “hello world”
`$`	匹配字符串的结束位置	`/world$/` 匹配 “hello world”
`\b`	匹配单词边界	`/\bword\b/` 匹配独立的 “word”
`\B`	匹配非单词边界

// 示例
console.log(/^hello/.test("hello world"));  // true
console.log(/world$/.test("hello world"));  // true
console.log(/^world/.test("hello world"));  // false

console.log(/\bcat\b/.test("the cat sat")); // true
console.log(/\bcat\b/.test("category"));    // false（cat 不是独立单词）

// 同时使用 ^ 和 $ 进行完整匹配
console.log(/^\d+$/.test("123"));   // true（全是数字）
console.log(/^\d+$/.test("123a"));  // false（包含非数字）

5. 分组和捕获

5.1 分组

// 圆括号 () 用于分组
const regex = /(ab)+/;
console.log(regex.test("ababab"));  // true
console.log(regex.test("ab"));      // true
console.log(regex.test("aabb"));    // false

5.2 捕获组

// 捕获的内容可以通过 match 或 exec 方法获取
const dateRegex = /(\d{4})-(\d{2})-(\d{2})/;
const result = "2023-10-25".match(dateRegex);
console.log(result[0]);  // "2023-10-25"（整个匹配）
console.log(result[1]);  // "2023"（第一组）
console.log(result[2]);  // "10"（第二组）
console.log(result[3]);  // "25"（第三组）

5.3 反向引用

// 使用 \n 引用第 n 个捕获组
const htmlTagRegex = /<(\w+)>(.*?)<\/\1>/;
console.log(htmlTagRegex.test("<div>content</div>"));  // true
console.log(htmlTagRegex.test("<div>content</span>")); // false（开始和结束标签不匹配）

5.4 非捕获组

// 使用 (?:...) 创建不捕获的分组
const nonCapturing = /(?:ab)+/;
const capturing = /(ab)+/;

const testStr = "ababab";
console.log(testStr.match(nonCapturing));  // ["ababab"]（只有一个元素）
console.log(testStr.match(capturing));     // ["ababab", "ab"]（有两个元素，第二个是捕获组）

6. 选择（或）操作

// 竖线 | 表示或操作
const colorRegex = /red|green|blue/;
console.log(colorRegex.test("red car"));    // true
console.log(colorRegex.test("green tree")); // true
console.log(colorRegex.test("yellow sun")); // false

// 结合分组使用
const timeRegex = /(\d{1,2}:\d{2}) (AM|PM)/;
console.log(timeRegex.test("10:30 AM"));   // true

7. 断言（Assertions）

7.1 正向先行断言（Lookahead）

// (?=...) 正向先行断言：后面必须跟着指定模式
const positiveLookahead = /Windows(?=\s\d)/;
console.log(positiveLookahead.test("Windows 10"));  // true
console.log(positiveLookahead.test("Windows XP"));  // false

// 提取价格数字（后面跟着 "元"）
const priceRegex = /\d+(?=元)/g;
const text = "苹果5元，香蕉3元，橙子10元";
console.log(text.match(priceRegex));  // ["5", "3", "10"]

7.2 负向先行断言（Negative Lookahead）

// (?!...) 负向先行断言：后面不能跟着指定模式
const negativeLookahead = /Windows(?!\s\d)/;
console.log(negativeLookahead.test("Windows XP"));  // true
console.log(negativeLookahead.test("Windows 10"));  // false

// 匹配后面不是数字的单词
const notFollowedByNumber = /\b\w+(?!\d)\b/g;
const sentence = "hello world2 test3 example";
console.log(sentence.match(notFollowedByNumber));  // ["hello", "example"]

7.3 正向后行断言（Lookbehind）

// (?<=...) 正向后行断言：前面必须是指定模式
const positiveLookbehind = /(?<=\$)\d+/;
console.log(positiveLookbehind.test("Price: $100"));  // true
console.log(positiveLookbehind.test("Price: 100"));   // false

// 提取货币金额
const currencyRegex = /(?<=\$|¥)\d+(\.\d{2})?/g;
const text = "Total: $99.99, Tax: ¥15.50";
console.log(text.match(currencyRegex));  // ["99.99", "15.50"]

7.4 负向后行断言

// (?<!...) 负向后行断言：前面不能是指定模式
const negativeLookbehind = /(?<\!\$)\d+/;
console.log(negativeLookbehind.test("Price: $100"));  // false
console.log(negativeLookbehind.test("Price: 100"));   // true

8. 标志（Flags）

标志	说明	示例
`g`	全局匹配	`/\d/g` 匹配所有数字
`i`	忽略大小写	`/abc/i` 匹配 “abc”, “ABC”
`m`	多行模式	`/^abc/m` 在多行文本中匹配行首
`s`	dotAll 模式（. 匹配换行符）	`/a.c/s` 匹配 “a\nc”
`u`	Unicode 模式	`/\u{61}/u` 匹配 “a”
`y`	粘性匹配	从 lastIndex 位置开始匹配

// 示例
const text = "Apple banana apple CHERRY";

// i 标志：忽略大小写
console.log(text.match(/apple/gi));  // ["Apple", "apple"]

// g 标志：全局匹配
console.log("112233".match(/\d/g)); // ["1", "1", "2", "2", "3", "3"]

// m 标志：多行模式
const multilineText = "first line\nsecond line\nthird line";
console.log(multilineText.match(/^second/m));  // ["second"]

// s 标志：dotAll
console.log(/a.c/s.test("a\nc"));  // true（没有 s 标志则为 false）

9. 常用正则表达式示例

9.1 邮箱验证

const emailRegex = /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/;
console.log(emailRegex.test("user@example.com"));    // true
console.log(emailRegex.test("invalid.email"));       // false

9.2 手机号验证（中国大陆）

const phoneRegex = /^1[3-9]\d{9}$/;
console.log(phoneRegex.test("13800138000"));  // true
console.log(phoneRegex.test("12345678901"));  // false

9.3 身份证号验证（18位）

const idCardRegex = /^[1-9]\d{5}(19|20)\d{2}(0[1-9]|1[0-2])(0[1-9]|[12]\d|3[01])\d{3}[\dXx]$/;
console.log(idCardRegex.test("11010519491231002X"));  // true

9.4 URL 验证

const urlRegex = /^(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?$/;
console.log(urlRegex.test("https://www.example.com"));  // true
console.log(urlRegex.test("http://example"));           // false

9.5 密码强度验证（至少8位，包含大小写字母和数字）

const passwordRegex = /^(?=.*[a-z])(?=.*[A-Z])(?=.*\d).{8,}$/;
console.log(passwordRegex.test("Abc12345"));  // true
console.log(passwordRegex.test("abc123"));    // false（长度不足且无大写字母）

10. 正则表达式方法

10.1 RegExp 方法

const regex = /(\d+)-(\d+)-(\d+)/;
const str = "Today is 2023-10-25";

// test() - 测试是否匹配
console.log(regex.test(str));  // true

// exec() - 执行匹配，返回结果数组
const result = regex.exec(str);
console.log(result[0]);  // "2023-10-25"
console.log(result[1]);  // "2023"
console.log(result.index); // 9（匹配的起始位置）

// 全局匹配时使用循环
const globalRegex = /\d+/g;
let match;
while ((match = globalRegex.exec(str)) !== null) {
    console.log(`Found ${match[0]} at position ${match.index}`);
}

10.2 String 方法

const str = "The rain in Spain falls mainly in the plain";
const searchStr = "ain";

// search() - 返回匹配位置的索引
console.log(str.search(searchStr));  // 5

// match() - 返回匹配结果数组
console.log(str.match(/\b\w{4}\b/g));  // ["rain", "Spain", "falls", "main", "plain"]

// matchAll() - 返回所有匹配的迭代器
const allMatches = [...str.matchAll(/ain/g)];
console.log(allMatches.length);  // 6

// replace() - 替换匹配的文本
console.log(str.replace(/ain/g, "XYZ"));  // "The rXYZ in SpXYZ falls mXYZly in the plXYZ"

// split() - 使用正则分割字符串
console.log("a1b2c3".split(/\d/));  // ["a", "b", "c", ""]

11. 实际应用示例

11.1 提取 HTML 标签内容

const html = '<div class="header">Title</div><p>Paragraph</p>';
const tagContentRegex = /<(\w+)[^>]*>(.*?)<\/\1>/g;

let match;
while ((match = tagContentRegex.exec(html)) !== null) {
    console.log(`Tag: ${match[1]}, Content: ${match[2]}`);
}
// 输出:
// Tag: div, Content: Title
// Tag: p, Content: Paragraph

11.2 格式化电话号码

function formatPhoneNumber(phone) {
    const cleaned = phone.replace(/\D/g, '');  // 移除非数字字符
    const match = cleaned.match(/^(\d{3})(\d{4})(\d{4})$/);
    if (match) {
        return `${match[1]}-${match[2]}-${match[3]}`;
    }
    return phone;
}

console.log(formatPhoneNumber("13800138000"));  // "138-0013-8000"
console.log(formatPhoneNumber("138-0013-8000")); // "138-0013-8000"

11.3 解析查询字符串

function parseQueryString(queryString) {
    const params = {};
    const regex = /([^&=]+)=([^&]*)/g;
    
    let match;
    while ((match = regex.exec(queryString)) !== null) {
        params[decodeURIComponent(match[1])] = decodeURIComponent(match[2]);
    }
    
    return params;
}

const query = "name=John&age=30&city=New%20York";
console.log(parseQueryString(query));
// { name: "John", age: "30", city: "New York" }