From cbdaff060a768bf07eecd7caee3c33ea7d16c7f4 Mon Sep 17 00:00:00 2001 From: Justin Ridgewell Date: Thu, 23 Feb 2023 22:15:59 -0500 Subject: [PATCH 1/2] Ensure greatest_lower_bound returns lowest match index When we switched to Rust's `binary_search_by_key`, we accidentally changed the behavior when a token matches exactly. Before, we were guaranteed to always return the lowest index because we didn't stop iterating when we found a match. Now, we exit as soon as we find a match, which can be any matching based on the size of the slice. Fixes https://github.com/swc-project/swc/pull/6973 --- src/utils.rs | 52 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 49 insertions(+), 3 deletions(-) diff --git a/src/utils.rs b/src/utils.rs index 8bd74327..5ef99adc 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -119,10 +119,26 @@ pub fn greatest_lower_bound<'a, T, K: Ord, F: Fn(&'a T) -> K>( key: &K, map: F, ) -> Option<&'a T> { - match slice.binary_search_by_key(key, map) { - Ok(index) => slice.get(index), - Err(index) => slice.get(index.checked_sub(1)?), + let mut idx = match slice.binary_search_by_key(key, &map) { + Ok(index) => index, + Err(index) => { + // If there is no match, then we no for certain that the index is where we should + // insert a new token, and that the token directly before is the greatest lower bound. + return slice.get(index.checked_sub(1)?); + } + }; + + // If we get an exact match, then we need to continue looking at previous tokens to see if + // they also match. We use a linear search because the number of exact matches is generally + // very small, and almost certainly smaller than the number of tokens before the index. + for i in (0..idx).rev() { + if map(&slice[i]) == *key { + idx = i; + } else { + break; + } } + slice.get(idx) } #[test] @@ -179,3 +195,33 @@ fn test_make_relative_path() { assert_eq!(&make_relative_path("foo.txt", "foo.js"), "foo.js"); assert_eq!(&make_relative_path("blah/foo.txt", "foo.js"), "../foo.js"); } + +#[test] +fn test_greatest_lower_bound() { + let cmp = |&(i, _id)| i; + + let haystack = vec![(1, 1)]; + assert_eq!(greatest_lower_bound(&haystack, &1, cmp), Some(&(1, 1))); + assert_eq!(greatest_lower_bound(&haystack, &2, cmp), Some(&(1, 1))); + assert_eq!(greatest_lower_bound(&haystack, &0, cmp), None); + + let haystack = vec![(1, 1), (1, 2)]; + assert_eq!(greatest_lower_bound(&haystack, &1, cmp), Some(&(1, 1))); + assert_eq!(greatest_lower_bound(&haystack, &2, cmp), Some(&(1, 2))); + assert_eq!(greatest_lower_bound(&haystack, &0, cmp), None); + + let haystack = vec![(1, 1), (1, 2), (1, 3)]; + assert_eq!(greatest_lower_bound(&haystack, &1, cmp), Some(&(1, 1))); + assert_eq!(greatest_lower_bound(&haystack, &2, cmp), Some(&(1, 3))); + assert_eq!(greatest_lower_bound(&haystack, &0, cmp), None); + + let haystack = vec![(1, 1), (1, 2), (1, 3), (1, 4)]; + assert_eq!(greatest_lower_bound(&haystack, &1, cmp), Some(&(1, 1))); + assert_eq!(greatest_lower_bound(&haystack, &2, cmp), Some(&(1, 4))); + assert_eq!(greatest_lower_bound(&haystack, &0, cmp), None); + + let haystack = vec![(1, 1), (1, 2), (1, 3), (1, 4), (1, 5)]; + assert_eq!(greatest_lower_bound(&haystack, &1, cmp), Some(&(1, 1))); + assert_eq!(greatest_lower_bound(&haystack, &2, cmp), Some(&(1, 5))); + assert_eq!(greatest_lower_bound(&haystack, &0, cmp), None); +} From 5b883a35dbb4247921ff31329b839179d475b09f Mon Sep 17 00:00:00 2001 From: Justin Ridgewell Date: Fri, 24 Feb 2023 03:33:34 -0500 Subject: [PATCH 2/2] Update src/utils.rs Co-authored-by: Arpad Borsos --- src/utils.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils.rs b/src/utils.rs index 5ef99adc..32c7d9c8 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -122,7 +122,7 @@ pub fn greatest_lower_bound<'a, T, K: Ord, F: Fn(&'a T) -> K>( let mut idx = match slice.binary_search_by_key(key, &map) { Ok(index) => index, Err(index) => { - // If there is no match, then we no for certain that the index is where we should + // If there is no match, then we know for certain that the index is where we should // insert a new token, and that the token directly before is the greatest lower bound. return slice.get(index.checked_sub(1)?); }