Skip to content

Commit e12ae1c

Browse files
authored
perf(es/parser): Use arrayvec and unsafe push to optimize escaped string parsing (#10369)
1 parent 68f7667 commit e12ae1c

File tree

4 files changed

+40
-13
lines changed

4 files changed

+40
-13
lines changed

.changeset/violet-days-care.md

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
swc_core: minor
3+
swc_ecma_parser: minor
4+
---
5+
6+
perf(es/parser): use `arrayvec` and unsafe `push` to optimize escaped string parsing

Cargo.lock

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/swc_ecma_parser/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ typescript = []
2525
verify = ["swc_ecma_visit"]
2626

2727
[dependencies]
28+
arrayvec = { workspace = true }
2829
bitflags = { workspace = true }
2930
either = { workspace = true }
3031
num-bigint = { workspace = true }

crates/swc_ecma_parser/src/lexer/mod.rs

+32-13
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
33
use std::{cell::RefCell, char, iter::FusedIterator, mem::transmute, rc::Rc};
44

5+
use arrayvec::ArrayVec;
56
use either::Either::{Left, Right};
6-
use smallvec::{smallvec, SmallVec};
77
use swc_atoms::{Atom, AtomStoreCell};
88
use swc_common::{
99
comments::Comments,
@@ -52,7 +52,7 @@ impl From<u32> for Char {
5252
}
5353
}
5454

55-
pub(crate) struct CharIter(SmallVec<[char; 7]>);
55+
pub(crate) struct CharIter(ArrayVec<char, 12>);
5656

5757
/// Ported from https://github.com/web-infra-dev/oxc/blob/99a4816ce7b6132b2667257984f9d92ae3768f03/crates/oxc_parser/src/lexer/mod.rs#L1349-L1374
5858
impl IntoIterator for Char {
@@ -67,9 +67,16 @@ impl IntoIterator for Char {
6767
// }
6868

6969
CharIter(match char::from_u32(self.0) {
70-
Some(c) => smallvec![c],
70+
Some(c) => {
71+
let mut buf = ArrayVec::new();
72+
// Safety: we can make sure that `buf` has enough capacity
73+
unsafe {
74+
buf.push_unchecked(c);
75+
}
76+
buf
77+
}
7178
None => {
72-
let mut buf = smallvec![];
79+
let mut buf = ArrayVec::new();
7380

7481
let high = self.0 & 0xffff0000 >> 16;
7582

@@ -78,19 +85,31 @@ impl IntoIterator for Char {
7885
// The second code unit of a surrogate pair is always in the range from 0xDC00
7986
// to 0xDFFF, and is called a low surrogate or a trail surrogate.
8087
if !(0xdc00..=0xdfff).contains(&low) {
81-
buf.push('\\');
82-
buf.push('u');
83-
buf.extend(format!("{high:x}").chars());
84-
buf.push('\\');
85-
buf.push('u');
86-
buf.extend(format!("{low:x}").chars());
88+
// Safety: we can make sure that `buf` has enough capacity
89+
unsafe {
90+
buf.push_unchecked('\\');
91+
buf.push_unchecked('u');
92+
for c in format!("{high:x}").chars() {
93+
buf.push_unchecked(c);
94+
}
95+
buf.push_unchecked('\\');
96+
buf.push_unchecked('u');
97+
for c in format!("{low:x}").chars() {
98+
buf.push_unchecked(c);
99+
}
100+
}
87101
} else {
88102
// `https://tc39.es/ecma262/#sec-utf16decodesurrogatepair`
89103
let astral_code_point = (high - 0xd800) * 0x400 + low - 0xdc00 + 0x10000;
90104

91-
buf.push('\\');
92-
buf.push('u');
93-
buf.extend(format!("{astral_code_point:x}").chars());
105+
// Safety: we can make sure that `buf` has enough capacity
106+
unsafe {
107+
buf.push_unchecked('\\');
108+
buf.push_unchecked('u');
109+
for c in format!("{astral_code_point:x}").chars() {
110+
buf.push_unchecked(c);
111+
}
112+
}
94113
}
95114

96115
buf

0 commit comments

Comments
 (0)