Skip to content

Commit 1c3eaf6

Browse files
authored
perf(common): Make character analysis lazy (#9696)
**Related issue:** - #9601
1 parent 5a6f0e6 commit 1c3eaf6

File tree

7 files changed

+184
-34
lines changed

7 files changed

+184
-34
lines changed

Diff for: .changeset/healthy-donuts-rule.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
swc_common: major
3+
---
4+
5+
perf(common): Make `new_source_file` lazy

Diff for: crates/swc_common/src/cache.rs

+115
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
use std::ops::Deref;
2+
3+
use once_cell::sync::OnceCell;
4+
5+
/// Wrapper for [OnceCell] with support for [rkyv].
6+
#[derive(Clone, Debug)]
7+
pub struct CacheCell<T>(OnceCell<T>);
8+
9+
impl<T> Deref for CacheCell<T> {
10+
type Target = OnceCell<T>;
11+
12+
fn deref(&self) -> &Self::Target {
13+
&self.0
14+
}
15+
}
16+
17+
impl<T> CacheCell<T> {
18+
pub fn new() -> Self {
19+
Self(OnceCell::new())
20+
}
21+
}
22+
23+
impl<T> From<T> for CacheCell<T> {
24+
fn from(value: T) -> Self {
25+
Self(OnceCell::from(value))
26+
}
27+
}
28+
29+
impl<T> Default for CacheCell<T> {
30+
fn default() -> Self {
31+
Self::new()
32+
}
33+
}
34+
35+
#[cfg(feature = "rkyv-impl")]
36+
mod rkyv_impl {
37+
use std::{hint::unreachable_unchecked, ptr};
38+
39+
use rkyv::{
40+
option::ArchivedOption, out_field, Archive, Archived, Deserialize, Fallible, Resolver,
41+
Serialize,
42+
};
43+
44+
use super::*;
45+
46+
#[allow(dead_code)]
47+
#[repr(u8)]
48+
enum ArchivedOptionTag {
49+
None,
50+
Some,
51+
}
52+
53+
#[repr(C)]
54+
struct ArchivedOptionVariantNone(ArchivedOptionTag);
55+
56+
#[repr(C)]
57+
struct ArchivedOptionVariantSome<T>(ArchivedOptionTag, T);
58+
59+
impl<T> Archive for CacheCell<T>
60+
where
61+
T: Archive,
62+
{
63+
type Archived = Archived<Option<T>>;
64+
type Resolver = Resolver<Option<T>>;
65+
66+
unsafe fn resolve(&self, pos: usize, resolver: Self::Resolver, out: *mut Self::Archived) {
67+
match resolver {
68+
None => {
69+
let out = out.cast::<ArchivedOptionVariantNone>();
70+
ptr::addr_of_mut!((*out).0).write(ArchivedOptionTag::None);
71+
}
72+
Some(resolver) => {
73+
let out = out.cast::<ArchivedOptionVariantSome<T::Archived>>();
74+
ptr::addr_of_mut!((*out).0).write(ArchivedOptionTag::Some);
75+
76+
let v = self.0.get();
77+
let value = if let Some(value) = v.as_ref() {
78+
value
79+
} else {
80+
unreachable_unchecked();
81+
};
82+
83+
let (fp, fo) = out_field!(out.1);
84+
value.resolve(pos + fp, resolver, fo);
85+
}
86+
}
87+
}
88+
}
89+
90+
impl<T: Serialize<S>, S: Fallible + ?Sized> Serialize<S> for CacheCell<T> {
91+
#[inline]
92+
fn serialize(&self, serializer: &mut S) -> Result<Self::Resolver, S::Error> {
93+
self.0
94+
.get()
95+
.map(|value| value.serialize(serializer))
96+
.transpose()
97+
}
98+
}
99+
100+
impl<T: Archive, D: Fallible + ?Sized> Deserialize<CacheCell<T>, D> for ArchivedOption<T::Archived>
101+
where
102+
T::Archived: Deserialize<T, D>,
103+
{
104+
#[inline]
105+
fn deserialize(&self, deserializer: &mut D) -> Result<CacheCell<T>, D::Error> {
106+
match self {
107+
ArchivedOption::Some(value) => {
108+
let v = value.deserialize(deserializer)?;
109+
Ok(CacheCell::from(v))
110+
}
111+
ArchivedOption::None => Ok(CacheCell::new()),
112+
}
113+
}
114+
}
115+
}

Diff for: crates/swc_common/src/lib.rs

+3-2
Original file line numberDiff line numberDiff line change
@@ -51,14 +51,13 @@ pub use self::{
5151
source_map::{FileLines, FileLoader, FilePathMapping, SourceMap, SpanSnippetError},
5252
syntax_pos::LineCol,
5353
};
54-
#[doc(hidden)]
55-
pub mod private;
5654

5755
/// A trait for ast nodes.
5856
pub trait AstNode: Debug + PartialEq + Clone + Spanned {
5957
const TYPE: &'static str;
6058
}
6159

60+
pub mod cache;
6261
pub mod collections;
6362
pub mod comments;
6463
mod eq;
@@ -68,6 +67,8 @@ pub mod iter;
6867
pub mod pass;
6968
pub mod plugin;
7069
mod pos;
70+
#[doc(hidden)]
71+
pub mod private;
7172
mod rustc_data_structures;
7273
pub mod serializer;
7374
pub mod source_map;

Diff for: crates/swc_common/src/source_map.rs

+15-11
Original file line numberDiff line numberDiff line change
@@ -316,10 +316,11 @@ impl SourceMap {
316316
let line_info = self.lookup_line_with(fm, pos);
317317
match line_info {
318318
Ok(SourceFileAndLine { sf: f, line: a }) => {
319+
let analysis = f.analyze();
319320
let chpos = self.bytepos_to_file_charpos_with(&f, pos);
320321

321322
let line = a + 1; // Line numbers start at 1
322-
let linebpos = f.lines[a];
323+
let linebpos = f.analyze().lines[a];
323324
assert!(
324325
pos >= linebpos,
325326
"{}: bpos = {:?}; linebpos = {:?};",
@@ -332,16 +333,17 @@ impl SourceMap {
332333
let col = chpos - linechpos;
333334

334335
let col_display = {
335-
let start_width_idx = f
336+
let start_width_idx = analysis
336337
.non_narrow_chars
337338
.binary_search_by_key(&linebpos, |x| x.pos())
338339
.unwrap_or_else(|x| x);
339-
let end_width_idx = f
340+
let end_width_idx = analysis
340341
.non_narrow_chars
341342
.binary_search_by_key(&pos, |x| x.pos())
342343
.unwrap_or_else(|x| x);
343344
let special_chars = end_width_idx - start_width_idx;
344-
let non_narrow: usize = f.non_narrow_chars[start_width_idx..end_width_idx]
345+
let non_narrow: usize = analysis.non_narrow_chars
346+
[start_width_idx..end_width_idx]
345347
.iter()
346348
.map(|x| x.width())
347349
.sum();
@@ -367,14 +369,15 @@ impl SourceMap {
367369
})
368370
}
369371
Err(f) => {
372+
let analysis = f.analyze();
370373
let chpos = self.bytepos_to_file_charpos(pos)?;
371374

372375
let col_display = {
373-
let end_width_idx = f
376+
let end_width_idx = analysis
374377
.non_narrow_chars
375378
.binary_search_by_key(&pos, |x| x.pos())
376379
.unwrap_or_else(|x| x);
377-
let non_narrow: usize = f.non_narrow_chars[0..end_width_idx]
380+
let non_narrow: usize = analysis.non_narrow_chars[0..end_width_idx]
378381
.iter()
379382
.map(|x| x.width())
380383
.sum();
@@ -1028,11 +1031,11 @@ impl SourceMap {
10281031
) -> u32 {
10291032
let mut total_extra_bytes = state.total_extra_bytes;
10301033
let mut index = state.mbc_index;
1031-
1034+
let analysis = file.analyze();
10321035
if bpos >= state.pos {
1033-
let range = index..file.multibyte_chars.len();
1036+
let range = index..analysis.multibyte_chars.len();
10341037
for i in range {
1035-
let mbc = &file.multibyte_chars[i];
1038+
let mbc = &analysis.multibyte_chars[i];
10361039
debug!("{}-byte char at {:?}", mbc.bytes, mbc.pos);
10371040
if mbc.pos >= bpos {
10381041
break;
@@ -1052,7 +1055,7 @@ impl SourceMap {
10521055
} else {
10531056
let range = 0..index;
10541057
for i in range.rev() {
1055-
let mbc = &file.multibyte_chars[i];
1058+
let mbc = &analysis.multibyte_chars[i];
10561059
debug!("{}-byte char at {:?}", mbc.bytes, mbc.pos);
10571060
if mbc.pos < bpos {
10581061
break;
@@ -1322,7 +1325,8 @@ impl SourceMap {
13221325
None => continue,
13231326
};
13241327

1325-
let linebpos = f.lines[line as usize];
1328+
let analysis = f.analyze();
1329+
let linebpos = analysis.lines[line as usize];
13261330
debug_assert!(
13271331
pos >= linebpos,
13281332
"{}: bpos = {:?}; linebpos = {:?};",

Diff for: crates/swc_common/src/syntax_pos.rs

+44-19
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use url::Url;
1616

1717
use self::hygiene::MarkData;
1818
pub use self::hygiene::{Mark, SyntaxContext};
19-
use crate::{rustc_data_structures::stable_hasher::StableHasher, sync::Lrc};
19+
use crate::{cache::CacheCell, rustc_data_structures::stable_hasher::StableHasher, sync::Lrc};
2020

2121
mod analyze_source_file;
2222
pub mod hygiene;
@@ -827,14 +827,26 @@ pub struct SourceFile {
827827
pub start_pos: BytePos,
828828
/// The end position of this source in the `SourceMap`
829829
pub end_pos: BytePos,
830+
/// A hash of the filename, used for speeding up the incr. comp. hashing.
831+
pub name_hash: u128,
832+
833+
lazy: CacheCell<SourceFileAnalysis>,
834+
}
835+
836+
#[cfg_attr(
837+
any(feature = "rkyv-impl"),
838+
derive(rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)
839+
)]
840+
#[cfg_attr(feature = "rkyv-impl", archive(check_bytes))]
841+
#[cfg_attr(feature = "rkyv-impl", archive_attr(repr(C)))]
842+
#[derive(Clone)]
843+
pub struct SourceFileAnalysis {
830844
/// Locations of lines beginnings in the source code
831845
pub lines: Vec<BytePos>,
832846
/// Locations of multi-byte characters in the source code
833847
pub multibyte_chars: Vec<MultiByteChar>,
834848
/// Width of characters that are not narrow in the source code
835849
pub non_narrow_chars: Vec<NonNarrowChar>,
836-
/// A hash of the filename, used for speeding up the incr. comp. hashing.
837-
pub name_hash: u128,
838850
}
839851

840852
impl fmt::Debug for SourceFile {
@@ -888,9 +900,6 @@ impl SourceFile {
888900
};
889901
let end_pos = start_pos.to_usize() + src.len();
890902

891-
let (lines, multibyte_chars, non_narrow_chars) =
892-
analyze_source_file::analyze_source_file(&src[..], start_pos);
893-
894903
SourceFile {
895904
name,
896905
name_was_remapped,
@@ -900,17 +909,16 @@ impl SourceFile {
900909
src_hash,
901910
start_pos,
902911
end_pos: SmallPos::from_usize(end_pos),
903-
lines,
904-
multibyte_chars,
905-
non_narrow_chars,
906912
name_hash,
913+
lazy: CacheCell::new(),
907914
}
908915
}
909916

910917
/// Return the BytePos of the beginning of the current line.
911918
pub fn line_begin_pos(&self, pos: BytePos) -> BytePos {
912919
let line_index = self.lookup_line(pos).unwrap();
913-
self.lines[line_index]
920+
let analysis = self.analyze();
921+
analysis.lines[line_index]
914922
}
915923

916924
/// Get a line from the list of pre-computed line-beginnings.
@@ -928,7 +936,8 @@ impl SourceFile {
928936
}
929937

930938
let begin = {
931-
let line = self.lines.get(line_number)?;
939+
let analysis = self.analyze();
940+
let line = analysis.lines.get(line_number)?;
932941
let begin: BytePos = *line - self.start_pos;
933942
begin.to_usize()
934943
};
@@ -945,20 +954,22 @@ impl SourceFile {
945954
}
946955

947956
pub fn count_lines(&self) -> usize {
948-
self.lines.len()
957+
let analysis = self.analyze();
958+
analysis.lines.len()
949959
}
950960

951961
/// Find the line containing the given position. The return value is the
952962
/// index into the `lines` array of this SourceFile, not the 1-based line
953963
/// number. If the `source_file` is empty or the position is located before
954964
/// the first line, `None` is returned.
955965
pub fn lookup_line(&self, pos: BytePos) -> Option<usize> {
956-
if self.lines.is_empty() {
966+
let analysis = self.analyze();
967+
if analysis.lines.is_empty() {
957968
return None;
958969
}
959970

960-
let line_index = lookup_line(&self.lines[..], pos);
961-
assert!(line_index < self.lines.len() as isize);
971+
let line_index = lookup_line(&analysis.lines, pos);
972+
assert!(line_index < analysis.lines.len() as isize);
962973
if line_index >= 0 {
963974
Some(line_index as usize)
964975
} else {
@@ -971,18 +982,32 @@ impl SourceFile {
971982
return (self.start_pos, self.end_pos);
972983
}
973984

974-
assert!(line_index < self.lines.len());
975-
if line_index == (self.lines.len() - 1) {
976-
(self.lines[line_index], self.end_pos)
985+
let analysis = self.analyze();
986+
987+
assert!(line_index < analysis.lines.len());
988+
if line_index == (analysis.lines.len() - 1) {
989+
(analysis.lines[line_index], self.end_pos)
977990
} else {
978-
(self.lines[line_index], self.lines[line_index + 1])
991+
(analysis.lines[line_index], analysis.lines[line_index + 1])
979992
}
980993
}
981994

982995
#[inline]
983996
pub fn contains(&self, byte_pos: BytePos) -> bool {
984997
byte_pos >= self.start_pos && byte_pos <= self.end_pos
985998
}
999+
1000+
pub fn analyze(&self) -> &SourceFileAnalysis {
1001+
self.lazy.get_or_init(|| {
1002+
let (lines, multibyte_chars, non_narrow_chars) =
1003+
analyze_source_file::analyze_source_file(&self.src[..], self.start_pos);
1004+
SourceFileAnalysis {
1005+
lines,
1006+
multibyte_chars,
1007+
non_narrow_chars,
1008+
}
1009+
})
1010+
}
9861011
}
9871012

9881013
/// Remove utf-8 BOM if any.

Diff for: crates/swc_error_reporters/src/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ impl SourceCode for MietteSourceCode<'_> {
131131
}
132132

133133
let loc = self.0.lookup_char_pos(span.lo());
134-
let line_count = loc.file.lines.len();
134+
let line_count = loc.file.analyze().lines.len();
135135

136136
let name = if self.1.skip_filename {
137137
None

Diff for: crates/swc_estree_compat/src/swcify/ctx.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ pub struct Context {
1212

1313
impl Context {
1414
fn locate_line_col(&self, loc: LineCol) -> BytePos {
15-
if let Some(&line_start) = self.fm.lines.get(loc.line) {
15+
if let Some(&line_start) = self.fm.analyze().lines.get(loc.line) {
1616
line_start + BytePos(loc.column as _)
1717
} else {
1818
BytePos(0)

0 commit comments

Comments
 (0)