From 6d2f9db42b02009b87c6971267dfbf19d75e2525 Mon Sep 17 00:00:00 2001 From: Justin Ridgewell Date: Mon, 24 Oct 2022 23:19:07 -0400 Subject: [PATCH 1/7] Fix lookup_token on Index maps The old code improperly found the section to call recurse into. Sourcemap sections behave very similarly to the mappings, and we need to find the section whose offset is `<=` the desired position. Additionally, the `col` is only subtracted if it is on the offset's line. This matches [Mozilla's behavior](https://github.com/mozilla/source-map/blob/0.6.1/lib/source-map-consumer.js#L1000-L1003). And as a final bonus, we can binary search it to speed up searches on larger maps. --- src/types.rs | 28 ++++++++++++++++++++-------- tests/test_index.rs | 20 +++++++++++++++++++- 2 files changed, 39 insertions(+), 9 deletions(-) diff --git a/src/types.rs b/src/types.rs index 504592ba..5be7322c 100644 --- a/src/types.rs +++ b/src/types.rs @@ -619,7 +619,7 @@ impl SourceMap { while low < high { let mid = (low + high) / 2; - let ii = &self.index[mid as usize]; + let ii = &self.index[mid]; if (line, col) < (ii.0, ii.1) { high = mid; } else { @@ -935,15 +935,27 @@ impl SourceMapIndex { /// If a sourcemap is encountered that is not embedded but just /// externally referenced it is silently skipped. pub fn lookup_token(&self, line: u32, col: u32) -> Option> { - for section in self.sections() { - let (off_line, off_col) = section.get_offset(); - if off_line < line || off_col < col { - continue; + let mut low = 0; + let mut high = self.sections.len(); + + while low < high { + let mid = (low + high) / 2; + let section = &self.sections[mid]; + if (line, col) < section.get_offset() { + high = mid; + } else { + low = mid + 1; } + } + + if low > 0 && low <= self.sections.len() { + let section = &self.sections[low - 1]; + let (off_line, off_col) = section.get_offset(); if let Some(map) = section.get_sourcemap() { - if let Some(tok) = map.lookup_token(line - off_line, col - off_col) { - return Some(tok); - } + return map.lookup_token( + line - off_line, + if line == off_line { col - off_col } else { col }, + ); } } None diff --git a/tests/test_index.rs b/tests/test_index.rs index df5965d1..849feaf1 100644 --- a/tests/test_index.rs +++ b/tests/test_index.rs @@ -23,7 +23,7 @@ fn test_basic_indexed_sourcemap() { { "offset": { "line": 1, - "column": 0 + "column": 1 }, "map": { "version":3, @@ -95,6 +95,24 @@ fn test_basic_indexed_sourcemap() { .unwrap_or_else(|| panic!("no source for {}", filename)); assert_eq!(&view.lines().collect::>(), ref_contents); } + + assert_eq!( + ism.lookup_token(0, 0).unwrap().to_tuple(), + ("file1.js", 0, 0, None) + ); + assert_eq!(ism.lookup_token(1, 0), None); + assert_eq!( + ism.lookup_token(1, 1).unwrap().to_tuple(), + ("file2.js", 0, 0, None) + ); + assert_eq!( + ism.lookup_token(1, 8).unwrap().to_tuple(), + ("file2.js", 0, 0, None) + ); + assert_eq!( + ism.lookup_token(1, 9).unwrap().to_tuple(), + ("file2.js", 0, 9, Some("multiply")) + ); } #[test] From 7f6b325cc2f2a326446a87ef95bfaaae3001c064 Mon Sep 17 00:00:00 2001 From: Justin Ridgewell Date: Mon, 24 Oct 2022 23:45:11 -0400 Subject: [PATCH 2/7] Fix accidental lookup of last token Token lookup uses "greatest lower bound" lookups, which means we need to find the token on the line which is `<=` than our desired column. But, the old code accidentally considered the last token to span infinite columns and infinite lines, meaning a lookup of `(10000000, 0)` would return the last token. --- src/types.rs | 8 +++++--- tests/test_regular.rs | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 3 deletions(-) create mode 100644 tests/test_regular.rs diff --git a/src/types.rs b/src/types.rs index 5be7322c..8379ad7d 100644 --- a/src/types.rs +++ b/src/types.rs @@ -628,10 +628,12 @@ impl SourceMap { } if low > 0 && low <= self.index.len() { - self.get_token(self.index[low as usize - 1].2) - } else { - None + let ii = &self.index[low - 1]; + if line == ii.0 { + return self.get_token(ii.2); + } } + None } /// Given a location, name and minified source file resolve a minified diff --git a/tests/test_regular.rs b/tests/test_regular.rs new file mode 100644 index 00000000..c0bbe37a --- /dev/null +++ b/tests/test_regular.rs @@ -0,0 +1,34 @@ +use sourcemap::SourceMap; + +#[test] +fn test_basic_sourcemap() { + let input: &[_] = b"{ + \"version\":3, + \"sources\":[\"coolstuff.js\"], + \"names\":[\"x\",\"alert\"], + \"mappings\":\"AAAA,GAAIA,GAAI,EACR,IAAIA,GAAK,EAAG,CACVC,MAAM\" + }"; + let sm = SourceMap::from_reader(input).unwrap(); + + assert_eq!( + sm.lookup_token(0, 0).unwrap().to_tuple(), + ("coolstuff.js", 0, 0, None) + ); + assert_eq!( + sm.lookup_token(0, 3).unwrap().to_tuple(), + ("coolstuff.js", 0, 4, Some("x")) + ); + assert_eq!( + sm.lookup_token(0, 24).unwrap().to_tuple(), + ("coolstuff.js", 2, 8, None) + ); + + // Lines continue out to infinity + assert_eq!( + sm.lookup_token(0, 1000).unwrap().to_tuple(), + ("coolstuff.js", 2, 8, None) + ); + + // Token must be on the same line to be found. + assert_eq!(sm.lookup_token(1000, 0), None); +} From d000c40f5f0c6fa3dcf553bc95381b71c904d90b Mon Sep 17 00:00:00 2001 From: Justin Ridgewell Date: Tue, 25 Oct 2022 16:18:21 -0400 Subject: [PATCH 3/7] Extract greatest_lower_bound binary search to helper --- src/types.rs | 88 ++++++++++++++++++++++++---------------------------- 1 file changed, 40 insertions(+), 48 deletions(-) diff --git a/src/types.rs b/src/types.rs index 8379ad7d..fbd71732 100644 --- a/src/types.rs +++ b/src/types.rs @@ -295,14 +295,11 @@ pub struct TokenIter<'a> { impl<'a> TokenIter<'a> { pub fn seek(&mut self, line: u32, col: u32) -> bool { - let token = self.i.lookup_token(line, col); - match token { - Some(token) => { - self.next_idx = token.idx + 1; - true - } - None => false, - } + let ii = greatest_lower_bound(&self.i.index, |ii| (line, col).cmp(&(ii.0, ii.1))); + ii.map(|ii| { + self.next_idx = ii.2 + 1; + }) + .is_some() } } @@ -614,26 +611,14 @@ impl SourceMap { /// Looks up the closest token to a given 0-indexed line and column. pub fn lookup_token(&self, line: u32, col: u32) -> Option> { - let mut low = 0; - let mut high = self.index.len(); - - while low < high { - let mid = (low + high) / 2; - let ii = &self.index[mid]; - if (line, col) < (ii.0, ii.1) { - high = mid; - } else { - low = mid + 1; - } - } - - if low > 0 && low <= self.index.len() { - let ii = &self.index[low - 1]; + let ii = greatest_lower_bound(&self.index, |ii| (line, col).cmp(&(ii.0, ii.1))); + ii.and_then(|ii| { if line == ii.0 { - return self.get_token(ii.2); + self.get_token(ii.2) + } else { + None } - } - None + }) } /// Given a location, name and minified source file resolve a minified @@ -937,30 +922,16 @@ impl SourceMapIndex { /// If a sourcemap is encountered that is not embedded but just /// externally referenced it is silently skipped. pub fn lookup_token(&self, line: u32, col: u32) -> Option> { - let mut low = 0; - let mut high = self.sections.len(); - - while low < high { - let mid = (low + high) / 2; - let section = &self.sections[mid]; - if (line, col) < section.get_offset() { - high = mid; - } else { - low = mid + 1; - } - } - - if low > 0 && low <= self.sections.len() { - let section = &self.sections[low - 1]; - let (off_line, off_col) = section.get_offset(); - if let Some(map) = section.get_sourcemap() { - return map.lookup_token( + let section = greatest_lower_bound(&self.sections, |s| (line, col).cmp(&s.get_offset())); + section.and_then(|s| { + s.get_sourcemap().and_then(|m| { + let (off_line, off_col) = s.get_offset(); + m.lookup_token( line - off_line, if line == off_line { col - off_col } else { col }, - ); - } - } - None + ) + }) + }) } /// Flattens an indexed sourcemap into a regular one. This requires @@ -1087,3 +1058,24 @@ impl SourceMapSection { self.map = sm.map(Box::new); } } + +fn greatest_lower_bound Ordering>(slice: &[T], cmp: C) -> Option<&T> { + let mut low = 0; + let mut high = slice.len(); + + while low < high { + let mid = (low + high) / 2; + let ii = &slice[mid]; + if cmp(ii) == Ordering::Less { + high = mid; + } else { + low = mid + 1; + } + } + + if low > 0 && low <= slice.len() { + Some(&slice[low - 1]) + } else { + None + } +} From 631d1dc300e0d7bd9c0fc833ab523e79b718d632 Mon Sep 17 00:00:00 2001 From: Justin Ridgewell Date: Mon, 31 Oct 2022 13:34:47 -0400 Subject: [PATCH 4/7] Revert change to SourceMap::lookupToken --- src/types.rs | 21 +++++++++------------ tests/test_index.rs | 5 ++++- tests/test_regular.rs | 7 +++++-- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/src/types.rs b/src/types.rs index fbd71732..ccf9f618 100644 --- a/src/types.rs +++ b/src/types.rs @@ -295,11 +295,14 @@ pub struct TokenIter<'a> { impl<'a> TokenIter<'a> { pub fn seek(&mut self, line: u32, col: u32) -> bool { - let ii = greatest_lower_bound(&self.i.index, |ii| (line, col).cmp(&(ii.0, ii.1))); - ii.map(|ii| { - self.next_idx = ii.2 + 1; - }) - .is_some() + let token = self.i.lookup_token(line, col); + match token { + Some(token) => { + self.next_idx = token.idx + 1; + true + } + None => false, + } } } @@ -612,13 +615,7 @@ impl SourceMap { /// Looks up the closest token to a given 0-indexed line and column. pub fn lookup_token(&self, line: u32, col: u32) -> Option> { let ii = greatest_lower_bound(&self.index, |ii| (line, col).cmp(&(ii.0, ii.1))); - ii.and_then(|ii| { - if line == ii.0 { - self.get_token(ii.2) - } else { - None - } - }) + self.get_token(ii.2) } /// Given a location, name and minified source file resolve a minified diff --git a/tests/test_index.rs b/tests/test_index.rs index 849feaf1..e373b7bf 100644 --- a/tests/test_index.rs +++ b/tests/test_index.rs @@ -100,7 +100,10 @@ fn test_basic_indexed_sourcemap() { ism.lookup_token(0, 0).unwrap().to_tuple(), ("file1.js", 0, 0, None) ); - assert_eq!(ism.lookup_token(1, 0), None); + assert_eq!( + ism.lookup_token(1, 0).unwrap().to_tuple(), + ("file1.js", 2, 12, Some("b")) + ); assert_eq!( ism.lookup_token(1, 1).unwrap().to_tuple(), ("file2.js", 0, 0, None) diff --git a/tests/test_regular.rs b/tests/test_regular.rs index c0bbe37a..019896f1 100644 --- a/tests/test_regular.rs +++ b/tests/test_regular.rs @@ -29,6 +29,9 @@ fn test_basic_sourcemap() { ("coolstuff.js", 2, 8, None) ); - // Token must be on the same line to be found. - assert_eq!(sm.lookup_token(1000, 0), None); + // Token can return prior lines. + assert_eq!( + sm.lookup_token(1000, 0).unwrap().to_tuple(), + ("coolstuff.js", 2, 8, None) + ); } From 98f78e24c8d261357550c186cde56e20344effba Mon Sep 17 00:00:00 2001 From: Justin Ridgewell Date: Mon, 31 Oct 2022 13:35:25 -0400 Subject: [PATCH 5/7] Use binary_search_by_key --- src/types.rs | 50 ++++++++++++++++++++++---------------------------- 1 file changed, 22 insertions(+), 28 deletions(-) diff --git a/src/types.rs b/src/types.rs index ccf9f618..83bf40d8 100644 --- a/src/types.rs +++ b/src/types.rs @@ -614,7 +614,7 @@ impl SourceMap { /// Looks up the closest token to a given 0-indexed line and column. pub fn lookup_token(&self, line: u32, col: u32) -> Option> { - let ii = greatest_lower_bound(&self.index, |ii| (line, col).cmp(&(ii.0, ii.1))); + let ii = greatest_lower_bound(&self.index, &(line, col), |ii| (ii.0, ii.1))?; self.get_token(ii.2) } @@ -919,16 +919,14 @@ impl SourceMapIndex { /// If a sourcemap is encountered that is not embedded but just /// externally referenced it is silently skipped. pub fn lookup_token(&self, line: u32, col: u32) -> Option> { - let section = greatest_lower_bound(&self.sections, |s| (line, col).cmp(&s.get_offset())); - section.and_then(|s| { - s.get_sourcemap().and_then(|m| { - let (off_line, off_col) = s.get_offset(); - m.lookup_token( - line - off_line, - if line == off_line { col - off_col } else { col }, - ) - }) - }) + let section = + greatest_lower_bound(&self.sections, &(line, col), SourceMapSection::get_offset)?; + let map = section.get_sourcemap()?; + let (off_line, off_col) = section.get_offset(); + map.lookup_token( + line - off_line, + if line == off_line { col - off_col } else { col }, + ) } /// Flattens an indexed sourcemap into a regular one. This requires @@ -1056,23 +1054,19 @@ impl SourceMapSection { } } -fn greatest_lower_bound Ordering>(slice: &[T], cmp: C) -> Option<&T> { - let mut low = 0; - let mut high = slice.len(); - - while low < high { - let mid = (low + high) / 2; - let ii = &slice[mid]; - if cmp(ii) == Ordering::Less { - high = mid; - } else { - low = mid + 1; +fn greatest_lower_bound<'a, T, K: Ord, F: Fn(&'a T) -> K>( + slice: &'a [T], + key: &K, + map: F, +) -> Option<&'a T> { + match slice.binary_search_by_key(key, map) { + Ok(index) => Some(&slice[index]), + Err(index) => { + if index > 0 { + Some(&slice[index - 1]) + } else { + None + } } } - - if low > 0 && low <= slice.len() { - Some(&slice[low - 1]) - } else { - None - } } From a8dbb2ec6161024450d847defc58c9f0defd00af Mon Sep 17 00:00:00 2001 From: Justin Ridgewell Date: Mon, 31 Oct 2022 14:53:49 -0400 Subject: [PATCH 6/7] Reuse greatest_lower_bound in get_scope_for_token --- src/hermes.rs | 22 +++++----------------- src/types.rs | 19 +------------------ src/utils.rs | 17 +++++++++++++++++ 3 files changed, 23 insertions(+), 35 deletions(-) diff --git a/src/hermes.rs b/src/hermes.rs index 652d1948..6d2188d1 100644 --- a/src/hermes.rs +++ b/src/hermes.rs @@ -3,9 +3,9 @@ use crate::encoder::{encode, Encodable}; use crate::errors::{Error, Result}; use crate::jsontypes::{FacebookScopeMapping, FacebookSources, RawSourceMap}; use crate::types::{DecodedMap, RewriteOptions, SourceMap}; +use crate::utils::greatest_lower_bound; use crate::vlq::parse_vlq_segment_into; use crate::Token; -use std::cmp::Ordering; use std::io::{Read, Write}; use std::ops::{Deref, DerefMut}; @@ -104,24 +104,12 @@ impl SourceMapHermes { // Find the closest mapping, just like here: // https://github.com/facebook/metro/blob/63b523eb20e7bdf62018aeaf195bb5a3a1a67f36/packages/metro-symbolicate/src/SourceMetadataMapConsumer.js#L204-L231 - let mapping = - function_map - .mappings - .binary_search_by(|o| match o.line.cmp(&token.get_src_line()) { - Ordering::Equal => o.column.cmp(&token.get_src_col()), - x => x, - }); - let name_index = function_map - .mappings - .get(match mapping { - Ok(a) => a, - Err(a) => a.saturating_sub(1), - })? - .name_index; - + let mapping = greatest_lower_bound(&function_map.mappings, &token.get_src(), |o| { + (o.line, o.column) + })?; function_map .names - .get(name_index as usize) + .get(mapping.name_index as usize) .map(|n| n.as_str()) } diff --git a/src/types.rs b/src/types.rs index 83bf40d8..76bce2bc 100644 --- a/src/types.rs +++ b/src/types.rs @@ -10,7 +10,7 @@ use crate::encoder::encode; use crate::errors::{Error, Result}; use crate::hermes::SourceMapHermes; use crate::sourceview::SourceView; -use crate::utils::find_common_prefix; +use crate::utils::{find_common_prefix, greatest_lower_bound}; /// Controls the `SourceMap::rewrite` behavior /// @@ -1053,20 +1053,3 @@ impl SourceMapSection { self.map = sm.map(Box::new); } } - -fn greatest_lower_bound<'a, T, K: Ord, F: Fn(&'a T) -> K>( - slice: &'a [T], - key: &K, - map: F, -) -> Option<&'a T> { - match slice.binary_search_by_key(key, map) { - Ok(index) => Some(&slice[index]), - Err(index) => { - if index > 0 { - Some(&slice[index - 1]) - } else { - None - } - } - } -} diff --git a/src/utils.rs b/src/utils.rs index bf2b7bda..893163da 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -144,6 +144,23 @@ pub fn make_relative_path(base: &str, target: &str) -> String { } } +pub fn greatest_lower_bound<'a, T, K: Ord, F: Fn(&'a T) -> K>( + slice: &'a [T], + key: &K, + map: F, +) -> Option<&'a T> { + match slice.binary_search_by_key(key, map) { + Ok(index) => Some(&slice[index]), + Err(index) => { + if index > 0 { + Some(&slice[index - 1]) + } else { + None + } + } + } +} + #[test] fn test_is_abs_path() { assert!(is_abs_path("C:\\foo.txt")); From 9670f5d864e700467cadc08078e9e372e41cd449 Mon Sep 17 00:00:00 2001 From: Justin Ridgewell Date: Tue, 8 Nov 2022 15:01:14 -0500 Subject: [PATCH 7/7] Fix clippy warning --- src/detector.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/detector.rs b/src/detector.rs index b85c8f8e..e8100394 100644 --- a/src/detector.rs +++ b/src/detector.rs @@ -51,7 +51,7 @@ impl SourceMapRef { if url.starts_with("data:") { return None; } - resolve_url(url, &Url::from_file_path(&minified_path).ok()?) + resolve_url(url, &Url::from_file_path(minified_path).ok()?) .and_then(|x| x.to_file_path().ok()) }