Skip to content

Commit

Permalink
ref: Sort tokens in SourceMap (#91)
Browse files Browse the repository at this point in the history
The fact that the tokens in a sourcemap can be arbitrarily ordered causes a substantial amount of complication (we have to keep a sorted index in addition) for no benefit that I've ever seen. Therefore, we now sort tokens upon creation and remove all the Index rigmarole. This is a breaking change because it removes some types and functions.
  • Loading branch information
loewenheim authored Aug 2, 2024
1 parent 3de0fa3 commit de24044
Show file tree
Hide file tree
Showing 7 changed files with 66 additions and 98 deletions.
14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
# Changelog

## Unreleased

### Various fixes and improvements

- ref: Tokens within a sourcemap are now always sorted by their position in the
minified file (#91) by @loewenheim.
Consequently:
- the type `IndexIter` and the functions `get_index_size`, `index_iter`,
and `idx_from_token` have been deleted;
- the function `sourcemap_from_token` has been turned into the method
`sourcemap` on `Token`;
- the `idx` parameter of `SourceMap::get_token` now has the type `usize`.


## 8.0.1

### Various fixes & improvements
Expand Down
2 changes: 0 additions & 2 deletions src/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,6 @@ fn serialize_mappings(sm: &SourceMap) -> String {
let mut prev_src_id = 0;

for (idx, token) in sm.tokens().enumerate() {
let idx = idx as u32;

if token.get_dst_line() != prev_dst_line {
prev_dst_col = 0;
while token.get_dst_line() != prev_dst_line {
Expand Down
7 changes: 4 additions & 3 deletions src/hermes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,10 @@ impl SourceMapHermes {

// Find the closest mapping, just like here:
// https://github.com/facebook/metro/blob/63b523eb20e7bdf62018aeaf195bb5a3a1a67f36/packages/metro-symbolicate/src/SourceMetadataMapConsumer.js#L204-L231
let mapping = greatest_lower_bound(&function_map.mappings, &token.get_src(), |o| {
(o.line, o.column)
})?;
let (_mapping_idx, mapping) =
greatest_lower_bound(&function_map.mappings, &token.get_src(), |o| {
(o.line, o.column)
})?;
function_map
.names
.get(mapping.name_index as usize)
Expand Down
4 changes: 2 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ pub use crate::errors::{Error, Result};
pub use crate::hermes::SourceMapHermes;
pub use crate::sourceview::SourceView;
pub use crate::types::{
DecodedMap, IndexIter, NameIter, RawToken, RewriteOptions, SourceContentsIter, SourceIter,
SourceMap, SourceMapIndex, SourceMapSection, SourceMapSectionIter, Token, TokenIter,
DecodedMap, NameIter, RawToken, RewriteOptions, SourceContentsIter, SourceIter, SourceMap,
SourceMapIndex, SourceMapSection, SourceMapSectionIter, Token, TokenIter,
};
pub use crate::utils::make_relative_path;

Expand Down
14 changes: 4 additions & 10 deletions src/sourceview.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use if_chain::if_chain;
use crate::detector::{locate_sourcemap_reference_slice, SourceMapRef};
use crate::errors::Result;
use crate::js_identifiers::{get_javascript_token, is_valid_javascript_identifier};
use crate::types::{idx_from_token, sourcemap_from_token, Token};
use crate::types::Token;

/// An iterator that iterates over tokens in reverse.
pub struct RevTokenIter<'view, 'map> {
Expand All @@ -24,17 +24,11 @@ impl<'view, 'map> Iterator for RevTokenIter<'view, 'map> {
type Item = (Token<'map>, Option<&'view str>);

fn next(&mut self) -> Option<(Token<'map>, Option<&'view str>)> {
let token = match self.token.take() {
None => {
return None;
}
Some(token) => token,
};
let token = self.token.take()?;
let idx = token.idx;

let idx = idx_from_token(&token);
if idx > 0 {
let sm = sourcemap_from_token(&token);
self.token = sm.get_token(idx - 1);
self.token = token.sm.get_token(idx - 1);
}

// if we are going to the same line as we did last iteration, we don't have to scan
Expand Down
97 changes: 29 additions & 68 deletions src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,16 @@ pub struct RawToken {
#[derive(Copy, Clone)]
pub struct Token<'a> {
raw: &'a RawToken,
i: &'a SourceMap,
pub(crate) sm: &'a SourceMap,
pub(crate) idx: usize,
offset: u32,
idx: u32,
}

impl<'a> Token<'a> {
/// The sourcemap this token is linked to.
pub fn sourcemap(&self) -> &'a SourceMap {
self.sm
}
}

impl<'a> PartialEq for Token<'a> {
Expand Down Expand Up @@ -241,7 +248,7 @@ impl<'a> Token<'a> {
if self.raw.src_id == !0 {
None
} else {
self.i.get_source(self.raw.src_id)
self.sm.get_source(self.raw.src_id)
}
}

Expand All @@ -255,7 +262,7 @@ impl<'a> Token<'a> {
if self.raw.name_id == !0 {
None
} else {
self.i.get_name(self.raw.name_id)
self.sm.get_name(self.raw.name_id)
}
}

Expand Down Expand Up @@ -287,7 +294,7 @@ impl<'a> Token<'a> {

/// Returns the referenced source view.
pub fn get_source_view(&self) -> Option<&SourceView> {
self.i.get_source_view(self.get_src_id())
self.sm.get_source_view(self.get_src_id())
}

/// If true, this token is a range token.
Expand All @@ -298,18 +305,10 @@ impl<'a> Token<'a> {
}
}

pub fn idx_from_token(token: &Token<'_>) -> u32 {
token.idx
}

pub fn sourcemap_from_token<'a>(token: &Token<'a>) -> &'a SourceMap {
token.i
}

/// Iterates over all tokens in a sourcemap
pub struct TokenIter<'a> {
i: &'a SourceMap,
next_idx: u32,
next_idx: usize,
}

impl<'a> TokenIter<'a> {
Expand Down Expand Up @@ -390,23 +389,6 @@ impl<'a> Iterator for NameIter<'a> {
}
}

/// Iterates over all index items in a sourcemap
pub struct IndexIter<'a> {
i: &'a SourceMap,
next_idx: usize,
}

impl<'a> Iterator for IndexIter<'a> {
type Item = (u32, u32, u32);

fn next(&mut self) -> Option<(u32, u32, u32)> {
self.i.index.get(self.next_idx).map(|idx| {
self.next_idx += 1;
*idx
})
}
}

impl<'a> fmt::Debug for Token<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "<Token {self:#}>")
Expand Down Expand Up @@ -481,7 +463,6 @@ pub struct SourceMapIndex {
pub struct SourceMap {
pub(crate) file: Option<Arc<str>>,
pub(crate) tokens: Vec<RawToken>,
pub(crate) index: Vec<(u32, u32, u32)>,
pub(crate) names: Vec<Arc<str>>,
pub(crate) source_root: Option<Arc<str>>,
pub(crate) sources: Vec<Arc<str>>,
Expand Down Expand Up @@ -596,21 +577,15 @@ impl SourceMap {
/// - `sources_content` optional source contents
pub fn new(
file: Option<Arc<str>>,
tokens: Vec<RawToken>,
mut tokens: Vec<RawToken>,
names: Vec<Arc<str>>,
sources: Vec<Arc<str>>,
sources_content: Option<Vec<Option<Arc<str>>>>,
) -> SourceMap {
let mut index: Vec<_> = tokens
.iter()
.enumerate()
.map(|(idx, token)| (token.dst_line, token.dst_col, idx as u32))
.collect();
index.sort_unstable();
tokens.sort_unstable_by_key(|t| (t.dst_line, t.dst_col));
SourceMap {
file,
tokens,
index,
names,
source_root: None,
sources,
Expand Down Expand Up @@ -681,10 +656,10 @@ impl SourceMap {
}

/// Looks up a token by its index.
pub fn get_token(&self, idx: u32) -> Option<Token<'_>> {
self.tokens.get(idx as usize).map(|raw| Token {
pub fn get_token(&self, idx: usize) -> Option<Token<'_>> {
self.tokens.get(idx).map(|raw| Token {
raw,
i: self,
sm: self,
idx,
offset: 0,
})
Expand All @@ -705,9 +680,15 @@ impl SourceMap {

/// Looks up the closest token to a given 0-indexed line and column.
pub fn lookup_token(&self, line: u32, col: u32) -> Option<Token<'_>> {
let ii = greatest_lower_bound(&self.index, &(line, col), |ii| (ii.0, ii.1))?;
let (idx, raw) =
greatest_lower_bound(&self.tokens, &(line, col), |t| (t.dst_line, t.dst_col))?;

let mut token = self.get_token(ii.2)?;
let mut token = Token {
raw,
sm: self,
idx,
offset: 0,
};

if token.is_range() {
token.offset = col - token.get_dst_col();
Expand Down Expand Up @@ -827,19 +808,6 @@ impl SourceMap {
self.names.clear();
}

/// Returns the number of items in the index
pub fn get_index_size(&self) -> usize {
self.index.len()
}

/// Returns the number of items in the index
pub fn index_iter(&self) -> IndexIter<'_> {
IndexIter {
i: self,
next_idx: 0,
}
}

/// This rewrites the sourcemap according to the provided rewrite
/// options.
///
Expand Down Expand Up @@ -998,7 +966,6 @@ impl SourceMap {
// the `self` tokens and `src_line/col` for the `adjustment` tokens.
let self_tokens = std::mem::take(&mut self.tokens);
let original_ranges = create_ranges(self_tokens, |t| (t.dst_line, t.dst_col));
self.index.clear();
let adjustment_ranges =
create_ranges(adjustment.tokens.clone(), |t| (t.src_line, t.src_col));

Expand Down Expand Up @@ -1059,14 +1026,8 @@ impl SourceMap {
}
}

// Regenerate the index
self.index.extend(
self.tokens
.iter()
.enumerate()
.map(|(idx, token)| (token.dst_line, token.dst_col, idx as u32)),
);
self.index.sort_unstable();
self.tokens
.sort_unstable_by_key(|t| (t.dst_line, t.dst_col));
}
}

Expand Down Expand Up @@ -1190,7 +1151,7 @@ impl SourceMapIndex {
/// If a sourcemap is encountered that is not embedded but just
/// externally referenced it is silently skipped.
pub fn lookup_token(&self, line: u32, col: u32) -> Option<Token<'_>> {
let section =
let (_section_idx, section) =
greatest_lower_bound(&self.sections, &(line, col), SourceMapSection::get_offset)?;
let map = section.get_sourcemap()?;
let (off_line, off_col) = section.get_offset();
Expand Down
26 changes: 13 additions & 13 deletions src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,13 +118,13 @@ pub fn greatest_lower_bound<'a, T, K: Ord, F: Fn(&'a T) -> K>(
slice: &'a [T],
key: &K,
map: F,
) -> Option<&'a T> {
) -> Option<(usize, &'a T)> {
let mut idx = match slice.binary_search_by_key(key, &map) {
Ok(index) => index,
Err(index) => {
// If there is no match, then we know for certain that the index is where we should
// insert a new token, and that the token directly before is the greatest lower bound.
return slice.get(index.checked_sub(1)?);
return slice.get(index.checked_sub(1)?).map(|res| (index, res));
}
};

Expand All @@ -138,7 +138,7 @@ pub fn greatest_lower_bound<'a, T, K: Ord, F: Fn(&'a T) -> K>(
break;
}
}
slice.get(idx)
slice.get(idx).map(|res| (idx, res))
}

#[test]
Expand Down Expand Up @@ -201,27 +201,27 @@ fn test_greatest_lower_bound() {
let cmp = |&(i, _id)| i;

let haystack = vec![(1, 1)];
assert_eq!(greatest_lower_bound(&haystack, &1, cmp), Some(&(1, 1)));
assert_eq!(greatest_lower_bound(&haystack, &2, cmp), Some(&(1, 1)));
assert_eq!(greatest_lower_bound(&haystack, &1, cmp).unwrap().1, &(1, 1));
assert_eq!(greatest_lower_bound(&haystack, &2, cmp).unwrap().1, &(1, 1));
assert_eq!(greatest_lower_bound(&haystack, &0, cmp), None);

let haystack = vec![(1, 1), (1, 2)];
assert_eq!(greatest_lower_bound(&haystack, &1, cmp), Some(&(1, 1)));
assert_eq!(greatest_lower_bound(&haystack, &2, cmp), Some(&(1, 2)));
assert_eq!(greatest_lower_bound(&haystack, &1, cmp).unwrap().1, &(1, 1));
assert_eq!(greatest_lower_bound(&haystack, &2, cmp).unwrap().1, &(1, 2));
assert_eq!(greatest_lower_bound(&haystack, &0, cmp), None);

let haystack = vec![(1, 1), (1, 2), (1, 3)];
assert_eq!(greatest_lower_bound(&haystack, &1, cmp), Some(&(1, 1)));
assert_eq!(greatest_lower_bound(&haystack, &2, cmp), Some(&(1, 3)));
assert_eq!(greatest_lower_bound(&haystack, &1, cmp).unwrap().1, &(1, 1));
assert_eq!(greatest_lower_bound(&haystack, &2, cmp).unwrap().1, &(1, 3));
assert_eq!(greatest_lower_bound(&haystack, &0, cmp), None);

let haystack = vec![(1, 1), (1, 2), (1, 3), (1, 4)];
assert_eq!(greatest_lower_bound(&haystack, &1, cmp), Some(&(1, 1)));
assert_eq!(greatest_lower_bound(&haystack, &2, cmp), Some(&(1, 4)));
assert_eq!(greatest_lower_bound(&haystack, &1, cmp).unwrap().1, &(1, 1));
assert_eq!(greatest_lower_bound(&haystack, &2, cmp).unwrap().1, &(1, 4));
assert_eq!(greatest_lower_bound(&haystack, &0, cmp), None);

let haystack = vec![(1, 1), (1, 2), (1, 3), (1, 4), (1, 5)];
assert_eq!(greatest_lower_bound(&haystack, &1, cmp), Some(&(1, 1)));
assert_eq!(greatest_lower_bound(&haystack, &2, cmp), Some(&(1, 5)));
assert_eq!(greatest_lower_bound(&haystack, &1, cmp).unwrap().1, &(1, 1));
assert_eq!(greatest_lower_bound(&haystack, &2, cmp).unwrap().1, &(1, 5));
assert_eq!(greatest_lower_bound(&haystack, &0, cmp), None);
}

0 comments on commit de24044

Please sign in to comment.