diff --git a/Cargo.lock b/Cargo.lock index eb9602407af0..e339ec660621 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -525,6 +525,7 @@ dependencies = [ "crossbeam", "dunce", "glob-match", + "globset", "globwalk", "ignore", "log", diff --git a/crates/oxide/Cargo.toml b/crates/oxide/Cargo.toml index 763b0c291f69..e873fcc00452 100644 --- a/crates/oxide/Cargo.toml +++ b/crates/oxide/Cargo.toml @@ -14,9 +14,10 @@ tracing = { version = "0.1.40", features = [] } tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } walkdir = "2.5.0" ignore = "0.4.23" -glob-match = "0.2.1" dunce = "1.0.5" bexpand = "1.2.0" +globset = "0.4.15" +glob-match = "0.2.1" [dev-dependencies] tempfile = "3.13.0" diff --git a/crates/oxide/src/glob.rs b/crates/oxide/src/glob.rs index d74b60b4d824..c391d771c710 100644 --- a/crates/oxide/src/glob.rs +++ b/crates/oxide/src/glob.rs @@ -1,24 +1,70 @@ +use fxhash::{FxHashMap, FxHashSet}; use glob_match::glob_match; use std::iter; use std::path::{Path, PathBuf}; +use tracing::event; use crate::GlobEntry; pub fn fast_glob( patterns: &Vec, ) -> Result, std::io::Error> { - Ok(get_fast_patterns(patterns) + Ok(optimize_patterns(patterns) .into_iter() - .flat_map(|(base_path, patterns)| { - globwalk::GlobWalkerBuilder::from_patterns(base_path, &patterns) - .follow_links(true) - .build() - .unwrap() - .filter_map(Result::ok) - .map(|file| file.path().to_path_buf()) + .flat_map(|glob_entry| { + globwalk::GlobWalkerBuilder::from_patterns( + glob_entry.base, + &[glob_entry.pattern.as_str()][..], + ) + .follow_links(true) + .build() + .unwrap() + .filter_map(Result::ok) + .map(|file| file.path().to_path_buf()) })) } +pub fn hoist_static_glob_parts(entries: &Vec) -> Vec { + let mut result = vec![]; + + for entry in entries { + let (static_part, dynamic_part) = split_pattern(&entry.pattern); + + let base: PathBuf = entry.base.clone().into(); + let base = match static_part { + Some(static_part) => base.join(static_part), + None => base, + }; + + let base = match dunce::canonicalize(&base) { + Ok(base) => base, + Err(err) => { + event!(tracing::Level::ERROR, "Failed to resolve glob: {:?}", err); + // If we can't resolve the new base on disk, let's just skip this entry. + continue; + } + }; + + let pattern = match dynamic_part { + Some(dynamic_part) => dynamic_part, + None => { + if base.is_dir() { + "**/*".to_owned() + } else { + "".to_owned() + } + } + }; + + result.push(GlobEntry { + base: base.to_string_lossy().to_string(), + pattern, + }); + } + + result +} + /// This function attempts to optimize the glob patterns to improve performance. The problem is /// that if you run the following command: /// ```sh @@ -42,98 +88,88 @@ pub fn fast_glob( /// tailwind --pwd ./project/pages --content "**/*.js" /// tailwind --pwd ./project/components --content "**/*.js" /// ``` -pub fn get_fast_patterns(patterns: &Vec) -> Vec<(PathBuf, Vec)> { - let mut optimized_patterns: Vec<(PathBuf, Vec)> = vec![]; +pub fn optimize_patterns(entries: &Vec) -> Vec { + let entries = hoist_static_glob_parts(entries); - for pattern in patterns { - let base_path = PathBuf::from(&pattern.base); - let pattern = &pattern.pattern; + // Track all base paths and their patterns. Later we will turn them back into `GlobalEntry`s. + let mut pattern_map: FxHashMap> = FxHashMap::default(); - let is_negated = pattern.starts_with('!'); - let mut pattern = pattern.clone(); - if is_negated { - pattern.remove(0); - } + for glob_entry in entries { + let entry = pattern_map.entry(glob_entry.base).or_default(); + entry.insert(glob_entry.pattern.clone()); + } - let mut folders = pattern.split('/').collect::>(); - - if folders.len() <= 1 { - // No paths we can simplify, so let's use it as-is. - optimized_patterns.push((base_path, vec![pattern])); - } else { - // We do have folders because `/` exists. Let's try to simplify the globs! - // Safety: We know that the length is greater than 1, so we can safely unwrap. - let file_pattern = folders.pop().unwrap(); - let all_folders = folders.clone(); - let mut temp_paths = vec![base_path]; - - let mut bail = false; - - for (i, folder) in folders.into_iter().enumerate() { - // There is a wildcard in the folder, so we have to bail now... 😢 But this also - // means that we can skip looking at the rest of the folders, so there is at least - // this small optimization we can apply! - if folder.contains('*') { - // Get all the remaining folders, attach the existing file_pattern so that this - // can now be the final pattern we use. - let mut remaining_folders = all_folders[i..].to_vec(); - remaining_folders.push(file_pattern); - - let pattern = remaining_folders.join("/"); - for path in &temp_paths { - optimized_patterns.push((path.to_path_buf(), vec![pattern.to_string()])); - } - - bail = true; - break; - } + // TODO: Optimization, if any of the patterns result in `**/*`, then we can do two things: + // 1. All base paths in the pattern_map, that start with the current base path, can be removed. + // 2. All patterns that are not `**/*` can be removed from the current base path. - // The folder is very likely using an expandable pattern which we can expand! - if folder.contains('{') && folder.contains('}') { - let branches = expand_braces(folder); - - let existing_paths = temp_paths; - temp_paths = branches - .iter() - .flat_map(|branch| { - existing_paths - .clone() - .into_iter() - .map(|path| path.join(branch)) - .collect::>() - }) - .collect::>(); - } - // The folder should just be a simple folder name without any glob magic. We should - // be able to safely add it to the existing paths. - else { - temp_paths = temp_paths - .into_iter() - .map(|path| path.join(folder)) - .collect(); - } + let mut glob_entries = pattern_map + .into_iter() + .map(|(base, patterns)| { + let size = patterns.len(); + let mut patterns = patterns.into_iter().collect::>(); + patterns.sort(); + let combined_patterns = patterns.join(","); + + // TODO: Right now this will generate something like `{**/*.html,**/*.js}`, but maybe + // we want to generate this instead:`**/*.{html,js}`. + + GlobEntry { + base, + pattern: match size { + 1 => combined_patterns, + _ => format!("{{{}}}", combined_patterns), + }, } + }) + .collect::>(); - // As long as we didn't bail, we can now add the current expanded patterns to the - // optimized patterns. - if !bail { - for path in &temp_paths { - optimized_patterns.push((path.to_path_buf(), vec![file_pattern.to_string()])); - } - } + // Sort the entries by base path to ensure we have stable results. + glob_entries.sort_by(|a, z| a.base.cmp(&z.base)); + + glob_entries +} + +// Split a glob pattern into a `static` and `dynamic` part. +// +// Assumption: we assume that all globs are expanded, which means that the only dynamic parts are +// using `*`. +// +// E.g.: +// Original input: `../project-b/**/*.{html,js}` +// Expanded input: `../project-b/**/*.html` & `../project-b/**/*.js` +// Split on first input: ("../project-b", "**/*.html") +// Split on second input: ("../project-b", "**/*.js") +fn split_pattern(pattern: &str) -> (Option, Option) { + // No dynamic parts, so we can just return the input as-is. + if !pattern.contains('*') { + return (Some(pattern.to_owned()), None); + } + + let mut last_slash_position = None; + + for (i, c) in pattern.char_indices() { + if c == '/' { + last_slash_position = Some(i); } - // Ensure that we re-add all the `!` signs to the patterns. - if is_negated { - for (_, patterns) in &mut optimized_patterns { - for pattern in patterns { - pattern.insert(0, '!'); - } - } + if c == '*' || c == '!' { + break; } } - optimized_patterns + // Very first character is a `*`, therefore there is no static part, only a dynamic part. + let Some(last_slash_position) = last_slash_position else { + return (None, Some(pattern.to_owned())); + }; + + let static_part = pattern[..last_slash_position].to_owned(); + let dynamic_part = pattern[last_slash_position + 1..].to_owned(); + + let static_part = (!static_part.is_empty()).then_some(static_part); + let dynamic_part = (!dynamic_part.is_empty()).then_some(dynamic_part); + + (static_part, dynamic_part) } pub fn path_matches_globs(path: &Path, globs: &[GlobEntry]) -> bool { @@ -144,167 +180,185 @@ pub fn path_matches_globs(path: &Path, globs: &[GlobEntry]) -> bool { .any(|g| glob_match(&format!("{}/{}", g.base, g.pattern), &path)) } -/// Given this input: a-{b,c}-d-{e,f} -/// We will get: -/// [ -/// a-b-d-e -/// a-b-d-f -/// a-c-d-e -/// a-c-d-f -/// ] -/// TODO: There is probably a way nicer way of doing this, but this works for now. -fn expand_braces(input: &str) -> Vec { - let mut result: Vec = vec![]; - - let mut in_braces = false; - let mut last_char: char = '\0'; - - let mut current = String::new(); - - // Given the input: a-{b,c}-d-{e,f}-g - // The template will look like this: ["a-", "-d-", "g"]. - let mut template: Vec = vec![]; - - // The branches will look like this: [["b", "c"], ["e", "f"]]. - let mut branches: Vec> = vec![]; - - for (i, c) in input.char_indices() { - let is_escaped = i > 0 && last_char == '\\'; - last_char = c; - - match c { - '{' if !is_escaped => { - // Ensure that when a new set of braces is opened, that we at least have 1 - // template. - if template.is_empty() { - template.push(String::new()); - } - - in_braces = true; - branches.push(vec![]); - template.push(String::new()); - } - '}' if !is_escaped => { - in_braces = false; - if let Some(last) = branches.last_mut() { - last.push(current.clone()); - } - current.clear(); - } - ',' if !is_escaped && in_braces => { - if let Some(last) = branches.last_mut() { - last.push(current.clone()); - } - current.clear(); +#[cfg(test)] +mod tests { + use super::optimize_patterns; + use crate::GlobEntry; + use bexpand::Expression; + use std::process::Command; + use std::{fs, path}; + use tempfile::tempdir; + + fn create_folders(folders: &[&str]) -> String { + // Create a temporary working directory + let dir = tempdir().unwrap().into_path(); + + // Initialize this directory as a git repository + let _ = Command::new("git").arg("init").current_dir(&dir).output(); + + // Create the necessary files + for path in folders { + // Ensure we use the right path separator for the current platform + let path = dir.join(path.replace('/', path::MAIN_SEPARATOR.to_string().as_str())); + let parent = path.parent().unwrap(); + if !parent.exists() { + fs::create_dir_all(parent).unwrap(); } - _ if in_braces => current.push(c), - _ => { - if template.is_empty() { - template.push(String::new()); - } - if let Some(last) = template.last_mut() { - last.push(c); - } - } - }; - } + fs::write(path, "").unwrap(); + } - // Ensure we have a string that we can start adding information too. - if !template.is_empty() && !branches.is_empty() { - result.push("".to_string()); - } + let base = format!("{}", dir.display()); - // Let's try to generate everything! - for (i, template) in template.into_iter().enumerate() { - // Append current template string to all existing results. - result = result.into_iter().map(|x| x + &template).collect(); - - // Get the results, and copy it for every single branch. - if let Some(branches) = branches.get(i) { - result = branches - .iter() - .flat_map(|branch| { - result - .clone() - .into_iter() - .map(|x| x + branch) - .collect::>() - }) - .collect::>(); - } + base } - result -} - -#[cfg(test)] -mod tests { - use super::get_fast_patterns; - use crate::GlobEntry; - use std::path::PathBuf; + fn test(base: &str, sources: &[GlobEntry]) -> Vec { + // Resolve all content paths for the (temporary) current working directory + let sources: Vec = sources + .iter() + .map(|x| GlobEntry { + base: format!("{}{}", base, x.base), + pattern: x.pattern.clone(), + }) + .collect(); + + // Expand glob patterns into multiple `GlobEntry`s. + let sources = sources + .iter() + .flat_map(|source| { + let expression: Result = source.pattern[..].try_into(); + let Ok(expression) = expression else { + return vec![source.clone()]; + }; + + expression + .into_iter() + .filter_map(Result::ok) + .map(move |pattern| GlobEntry { + base: source.base.clone(), + pattern: pattern.into(), + }) + .collect::>() + }) + .collect::>(); + + let optimized_sources = optimize_patterns(&sources); + + let parent_dir = format!("{}", fs::canonicalize(base).unwrap().display()); + + // Remove the temporary directory from the base + optimized_sources + .into_iter() + .map(|source| GlobEntry { + // Normalize paths to use unix style separators + base: source.base.replace(&parent_dir, "").replace('\\', "/"), + pattern: source.pattern, + }) + .collect() + } #[test] fn it_should_keep_globs_that_start_with_file_wildcards_as_is() { - let actual = get_fast_patterns(&vec![GlobEntry { + let base = create_folders(&["projects"]); + + let actual = test( + &base, + &[GlobEntry { + base: "/projects".to_string(), + pattern: "*.html".to_string(), + }], + ); + + let expected = vec![GlobEntry { base: "/projects".to_string(), pattern: "*.html".to_string(), - }]); - let expected = vec![(PathBuf::from("/projects"), vec!["*.html".to_string()])]; + }]; - assert_eq!(actual, expected,); + assert_eq!(actual, expected); } #[test] fn it_should_keep_globs_that_start_with_folder_wildcards_as_is() { - let actual = get_fast_patterns(&vec![GlobEntry { + let base = create_folders(&["projects"]); + + let actual = test( + &base, + &[GlobEntry { + base: "/projects".to_string(), + pattern: "**/*.html".to_string(), + }], + ); + + let expected = vec![GlobEntry { base: "/projects".to_string(), pattern: "**/*.html".to_string(), - }]); - - let expected = vec![(PathBuf::from("/projects"), vec!["**/*.html".to_string()])]; + }]; assert_eq!(actual, expected,); } #[test] fn it_should_move_the_starting_folder_to_the_path() { - let actual = get_fast_patterns(&vec![GlobEntry { - base: "/projects".to_string(), - pattern: "example/*.html".to_string(), - }]); - let expected = vec![( - PathBuf::from("/projects/example"), - vec!["*.html".to_string()], - )]; + let base = create_folders(&["projects/example"]); + + let actual = test( + &base, + &[GlobEntry { + base: "/projects".to_string(), + pattern: "example/*.html".to_string(), + }], + ); + + let expected = vec![GlobEntry { + base: "/projects/example".to_string(), + pattern: "*.html".to_string(), + }]; assert_eq!(actual, expected,); } #[test] fn it_should_move_the_starting_folders_to_the_path() { - let actual = get_fast_patterns(&vec![GlobEntry { - base: "/projects".to_string(), - pattern: "example/other/*.html".to_string(), - }]); - let expected = vec![( - PathBuf::from("/projects/example/other"), - vec!["*.html".to_string()], - )]; + let base = create_folders(&["projects/example/other"]); + + let actual = test( + &base, + &[GlobEntry { + base: "/projects".to_string(), + pattern: "example/other/*.html".to_string(), + }], + ); + + let expected = vec![GlobEntry { + base: "/projects/example/other".to_string(), + pattern: "*.html".to_string(), + }]; assert_eq!(actual, expected,); } #[test] fn it_should_branch_expandable_folders() { - let actual = get_fast_patterns(&vec![GlobEntry { - base: "/projects".to_string(), - pattern: "{foo,bar}/*.html".to_string(), - }]); + let base = create_folders(&["projects/foo", "projects/bar"]); + + let actual = test( + &base, + &[GlobEntry { + base: "/projects".to_string(), + pattern: "{foo,bar}/*.html".to_string(), + }], + ); let expected = vec![ - (PathBuf::from("/projects/foo"), vec!["*.html".to_string()]), - (PathBuf::from("/projects/bar"), vec!["*.html".to_string()]), + GlobEntry { + base: "/projects/bar".to_string(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/foo".to_string(), + pattern: "*.html".to_string(), + }, ]; assert_eq!(actual, expected,); @@ -312,27 +366,38 @@ mod tests { #[test] fn it_should_expand_multiple_expansions_in_the_same_folder() { - let actual = get_fast_patterns(&vec![GlobEntry { - base: "/projects".to_string(), - pattern: "a-{b,c}-d-{e,f}-g/*.html".to_string(), - }]); + let base = create_folders(&[ + "projects/a-b-d-e-g", + "projects/a-b-d-f-g", + "projects/a-c-d-e-g", + "projects/a-c-d-f-g", + ]); + + let actual = test( + &base, + &[GlobEntry { + base: "/projects".to_string(), + pattern: "a-{b,c}-d-{e,f}-g/*.html".to_string(), + }], + ); + let expected = vec![ - ( - PathBuf::from("/projects/a-b-d-e-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/a-c-d-e-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/a-b-d-f-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/a-c-d-f-g"), - vec!["*.html".to_string()], - ), + GlobEntry { + base: "/projects/a-b-d-e-g".to_string(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/a-b-d-f-g".to_string(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/a-c-d-e-g".to_string(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/a-c-d-f-g".to_string(), + pattern: "*.html".to_string(), + }, ]; assert_eq!(actual, expected,); @@ -340,75 +405,98 @@ mod tests { #[test] fn multiple_expansions_per_folder_starting_at_the_root() { - let actual = get_fast_patterns(&vec![GlobEntry { - base: "/projects".to_string(), - pattern: "{a,b}-c-{d,e}-f/{b,c}-d-{e,f}-g/*.html".to_string(), - }]); + let base = create_folders(&[ + "projects/a-c-d-f/b-d-e-g", + "projects/a-c-d-f/b-d-f-g", + "projects/a-c-d-f/c-d-e-g", + "projects/a-c-d-f/c-d-f-g", + "projects/a-c-e-f/b-d-e-g", + "projects/a-c-e-f/b-d-f-g", + "projects/a-c-e-f/c-d-e-g", + "projects/a-c-e-f/c-d-f-g", + "projects/b-c-d-f/b-d-e-g", + "projects/b-c-d-f/b-d-f-g", + "projects/b-c-d-f/c-d-e-g", + "projects/b-c-d-f/c-d-f-g", + "projects/b-c-e-f/b-d-e-g", + "projects/b-c-e-f/b-d-f-g", + "projects/b-c-e-f/c-d-e-g", + "projects/b-c-e-f/c-d-f-g", + ]); + + let actual = test( + &base, + &[GlobEntry { + base: "/projects".to_string(), + pattern: "{a,b}-c-{d,e}-f/{b,c}-d-{e,f}-g/*.html".to_string(), + }], + ); + let expected = vec![ - ( - PathBuf::from("/projects/a-c-d-f/b-d-e-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/b-c-d-f/b-d-e-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/a-c-e-f/b-d-e-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/b-c-e-f/b-d-e-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/a-c-d-f/c-d-e-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/b-c-d-f/c-d-e-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/a-c-e-f/c-d-e-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/b-c-e-f/c-d-e-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/a-c-d-f/b-d-f-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/b-c-d-f/b-d-f-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/a-c-e-f/b-d-f-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/b-c-e-f/b-d-f-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/a-c-d-f/c-d-f-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/b-c-d-f/c-d-f-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/a-c-e-f/c-d-f-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/b-c-e-f/c-d-f-g"), - vec!["*.html".to_string()], - ), + GlobEntry { + base: "/projects/a-c-d-f/b-d-e-g".into(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/a-c-d-f/b-d-f-g".into(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/a-c-d-f/c-d-e-g".into(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/a-c-d-f/c-d-f-g".into(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/a-c-e-f/b-d-e-g".into(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/a-c-e-f/b-d-f-g".into(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/a-c-e-f/c-d-e-g".into(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/a-c-e-f/c-d-f-g".into(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/b-c-d-f/b-d-e-g".into(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/b-c-d-f/b-d-f-g".into(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/b-c-d-f/c-d-e-g".into(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/b-c-d-f/c-d-f-g".into(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/b-c-e-f/b-d-e-g".into(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/b-c-e-f/b-d-f-g".into(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/b-c-e-f/c-d-e-g".into(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/b-c-e-f/c-d-f-g".into(), + pattern: "*.html".to_string(), + }, ]; assert_eq!(actual, expected,); @@ -416,20 +504,25 @@ mod tests { #[test] fn it_should_stop_expanding_once_we_hit_a_wildcard() { - let actual = get_fast_patterns(&vec![GlobEntry { - base: "/projects".to_string(), - pattern: "{foo,bar}/example/**/{baz,qux}/*.html".to_string(), - }]); + let base = create_folders(&["projects/bar/example", "projects/foo/example"]); + + let actual = test( + &base, + &[GlobEntry { + base: "/projects".to_string(), + pattern: "{foo,bar}/example/**/{baz,qux}/*.html".to_string(), + }], + ); let expected = vec![ - ( - PathBuf::from("/projects/foo/example"), - vec!["**/{baz,qux}/*.html".to_string()], - ), - ( - PathBuf::from("/projects/bar/example"), - vec!["**/{baz,qux}/*.html".to_string()], - ), + GlobEntry { + base: "/projects/bar/example".to_string(), + pattern: "{**/baz/*.html,**/qux/*.html}".to_string(), + }, + GlobEntry { + base: "/projects/foo/example".to_string(), + pattern: "{**/baz/*.html,**/qux/*.html}".to_string(), + }, ]; assert_eq!(actual, expected,); @@ -437,41 +530,60 @@ mod tests { #[test] fn it_should_keep_the_negation_symbol_for_all_new_patterns() { - let actual = get_fast_patterns(&vec![GlobEntry { + let base = create_folders(&["projects"]); + + let actual = test( + &base, + &[GlobEntry { + base: "/projects".to_string(), + pattern: "!{foo,bar}/*.html".to_string(), + }], + ); + + let expected = vec![GlobEntry { base: "/projects".to_string(), - pattern: "!{foo,bar}/*.html".to_string(), - }]); - let expected = vec![ - (PathBuf::from("/projects/foo"), vec!["!*.html".to_string()]), - (PathBuf::from("/projects/bar"), vec!["!*.html".to_string()]), - ]; + // TODO: This is wrong, because `!` should be in front. But right now we don't support + // `@source "!../foo/bar";` anyway. + pattern: "{!bar/*.html,!foo/*.html}".to_string(), + }]; assert_eq!(actual, expected,); } #[test] fn it_should_expand_a_complex_example() { - let actual = get_fast_patterns(&vec![GlobEntry { - base: "/projects".to_string(), - pattern: "a/{b,c}/d/{e,f}/g/*.html".to_string(), - }]); + let base = create_folders(&[ + "projects/a/b/d/e/g", + "projects/a/b/d/f/g", + "projects/a/c/d/e/g", + "projects/a/c/d/f/g", + ]); + + let actual = test( + &base, + &[GlobEntry { + base: "/projects".to_string(), + pattern: "a/{b,c}/d/{e,f}/g/*.html".to_string(), + }], + ); + let expected = vec![ - ( - PathBuf::from("/projects/a/b/d/e/g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/a/c/d/e/g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/a/b/d/f/g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/a/c/d/f/g"), - vec!["*.html".to_string()], - ), + GlobEntry { + base: "/projects/a/b/d/e/g".to_string(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/a/b/d/f/g".to_string(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/a/c/d/e/g".to_string(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/a/c/d/f/g".to_string(), + pattern: "*.html".to_string(), + }, ]; assert_eq!(actual, expected,); diff --git a/crates/oxide/src/lib.rs b/crates/oxide/src/lib.rs index 24eecca8bc0c..0c3cf6e360c1 100644 --- a/crates/oxide/src/lib.rs +++ b/crates/oxide/src/lib.rs @@ -1,10 +1,12 @@ +use crate::glob::hoist_static_glob_parts; use crate::parser::Extractor; +use crate::scanner::allowed_paths::resolve_paths; use crate::scanner::detect_sources::DetectSources; use bexpand::Expression; use bstr::ByteSlice; use fxhash::{FxHashMap, FxHashSet}; -use glob::fast_glob; -use glob::get_fast_patterns; +use glob::optimize_patterns; +use globset::Glob; use rayon::prelude::*; use std::fs; use std::path::PathBuf; @@ -55,7 +57,7 @@ pub struct ScanResult { pub globs: Vec, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] pub struct GlobEntry { pub base: String, pub pattern: String, @@ -255,9 +257,6 @@ impl Scanner { false }); - // Turn `Vec<&GlobEntry>` in `Vec` - let glob_sources: Vec<_> = glob_sources.into_iter().cloned().collect(); - for path in auto_sources .iter() .map(|source| PathBuf::from(&source.base).join(source.pattern.trim_end_matches("**/*"))) @@ -269,46 +268,60 @@ impl Scanner { self.globs.extend(globs); } - let resolved_files: Vec<_> = match fast_glob(&glob_sources) { - Ok(matches) => matches - .filter_map(|x| dunce::canonicalize(&x).ok()) - .collect(), - Err(err) => { - event!(tracing::Level::ERROR, "Failed to resolve glob: {:?}", err); - vec![] + // Turn `Vec<&GlobEntry>` in `Vec` + let glob_sources: Vec<_> = glob_sources.into_iter().cloned().collect(); + let hoisted = hoist_static_glob_parts(&glob_sources); + + for source in &hoisted { + // If the pattern is empty, then the base points to a specific file or folder already + // if it doesn't contain any dynamic parts. In that case we can use the base as the + // pattern. + // + // Otherwise we need to combine the base and the pattern, otherwise a pattern that + // looks like `*.html`, will never match a path that looks like + // `/my-project/project-a/index.html`, because it contains `/`. + // + // We can't prepend `**/`, because then `/my-project/project-a/nested/index.html` would + // match as well. + // + // Instead we combine the base and the pattern as a single glob pattern. + let mut full_pattern = source.base.clone(); + if !source.pattern.is_empty() { + full_pattern.push('/'); + full_pattern.push_str(&source.pattern); } - }; + let Ok(glob) = Glob::new(&full_pattern) else { + continue; + }; - self.files.extend(resolved_files); - self.globs.extend(glob_sources); + let glob = glob.compile_matcher(); - // Re-optimize the globs to reduce the number of patterns we have to scan. - self.globs = get_fast_patterns(&self.globs) - .into_iter() - .filter_map(|(root, globs)| { - let root = match dunce::canonicalize(root) { - Ok(root) => root, - Err(error) => { - event!( - tracing::Level::ERROR, - "Failed to canonicalize base path {:?}", - error - ); - return None; - } + let base = PathBuf::from(&source.base); + for entry in resolve_paths(&base) { + let Some(file_type) = entry.file_type() else { + continue; }; - Some((root, globs)) - }) - .flat_map(|(root, globs)| { - let base = root.display().to_string(); + if !file_type.is_file() { + continue; + } - globs.into_iter().map(move |glob| GlobEntry { - base: base.clone(), - pattern: glob, - }) - }) - .collect::>(); + let file_path = entry.into_path(); + + let Some(file_path_str) = file_path.to_str() else { + continue; + }; + + if glob.is_match(file_path_str) { + self.files.push(file_path); + } + } + } + + self.globs.extend(hoisted); + + // Re-optimize the globs to reduce the number of patterns we have to scan. + self.globs = optimize_patterns(&self.globs); } } diff --git a/crates/oxide/src/scanner/allowed_paths.rs b/crates/oxide/src/scanner/allowed_paths.rs index 3015e9dd0465..a761cd34b03c 100644 --- a/crates/oxide/src/scanner/allowed_paths.rs +++ b/crates/oxide/src/scanner/allowed_paths.rs @@ -30,7 +30,7 @@ pub fn resolve_allowed_paths(root: &Path) -> impl Iterator { WalkBuilder::new(root) .hidden(false) .require_git(false) - .filter_entry(|entry| match entry.file_type() { + .filter_entry(move |entry| match entry.file_type() { Some(file_type) if file_type.is_dir() => match entry.file_name().to_str() { Some(dir) => !IGNORED_CONTENT_DIRS.contains(&dir), None => false, @@ -44,6 +44,15 @@ pub fn resolve_allowed_paths(root: &Path) -> impl Iterator { .filter_map(Result::ok) } +#[tracing::instrument(skip(root))] +pub fn resolve_paths(root: &Path) -> impl Iterator { + WalkBuilder::new(root) + .hidden(false) + .require_git(false) + .build() + .filter_map(Result::ok) +} + pub fn is_allowed_content_path(path: &Path) -> bool { // Skip known ignored files if path diff --git a/crates/oxide/tests/scanner.rs b/crates/oxide/tests/scanner.rs index fe15be414fb2..14ab350c456b 100644 --- a/crates/oxide/tests/scanner.rs +++ b/crates/oxide/tests/scanner.rs @@ -66,17 +66,16 @@ mod scanner { )); } + let parent_dir = format!( + "{}{}", + fs::canonicalize(&base).unwrap().display(), + path::MAIN_SEPARATOR + ); + paths = paths .into_iter() .map(|x| { - let parent_dir = format!( - "{}{}", - fs::canonicalize(&base).unwrap().display(), - path::MAIN_SEPARATOR - ); - x.replace(&parent_dir, "") - // Normalize paths to use unix style separators - .replace('\\', "/") + x.replace(&parent_dir, "").replace('\\', "/") // Normalize paths to use unix style separators }) .collect(); diff --git a/integrations/cli/index.test.ts b/integrations/cli/index.test.ts index b36ca4c07688..b1b17eb1af40 100644 --- a/integrations/cli/index.test.ts +++ b/integrations/cli/index.test.ts @@ -280,9 +280,7 @@ test( /* Run auto-content detection in ../../project-b */ @import 'tailwindcss/utilities' source('../../project-b'); - /* Additive: */ - /* {my-lib-1,my-lib-2}: expand */ - /* *.html: only look for .html */ + /* Explicitly using node_modules in the @source allows git ignored folders */ @source '../node_modules/{my-lib-1,my-lib-2}/src/**/*.html'; /* We typically ignore these extensions, but now include them explicitly */ @@ -290,6 +288,13 @@ test( /* Project C should apply auto source detection */ @source '../../project-c'; + + /* Project D should apply auto source detection rules, such as ignoring node_modules */ + @source '../../project-d/**/*.{html,js}'; + @source '../../project-d/**/*.bin'; + + /* Same as above, but my-lib-2 _should_ be includes */ + @source '../../project-d/node_modules/my-lib-2/*.{html,js}'; `, // Project A is the current folder, but we explicitly configured @@ -362,6 +367,36 @@ test( class="content-['SHOULD-NOT-EXIST-IN-OUTPUT'] content-['project-c/node_modules/my-lib-1/src/index.html']" > `, + + // Project D should apply auto source detection rules, such as ignoring + // node_modules. + 'project-d/node_modules/my-lib-1/src/index.html': html` +
+ `, + + // Project D has an explicit glob containing node_modules, thus should include the html file + 'project-d/node_modules/my-lib-2/src/index.html': html` +
+ `, + + // Project D should look for files with the extensions html and js. + 'project-d/src/index.html': html` +
+ `, + + // Project D should have a binary file even though we ignore binary files + // by default, but it's explicitly listed. + 'project-d/my-binary-file.bin': html` +
+ `, }, }, async ({ fs, exec, spawn, root }) => { @@ -392,6 +427,18 @@ test( --tw-content: 'project-c/src/index.html'; content: var(--tw-content); } + .content-\\[\\'project-d\\/my-binary-file\\.bin\\'\\] { + --tw-content: 'project-d/my-binary-file.bin'; + content: var(--tw-content); + } + .content-\\[\\'project-d\\/node_modules\\/my-lib-2\\/src\\/index\\.html\\'\\] { + --tw-content: 'project-d/node modules/my-lib-2/src/index.html'; + content: var(--tw-content); + } + .content-\\[\\'project-d\\/src\\/index\\.html\\'\\] { + --tw-content: 'project-d/src/index.html'; + content: var(--tw-content); + } @supports (-moz-orient: inline) { @layer base { *, ::before, ::after, ::backdrop { diff --git a/integrations/utils.ts b/integrations/utils.ts index 9e11c5a7358a..9389d638b381 100644 --- a/integrations/utils.ts +++ b/integrations/utils.ts @@ -112,8 +112,14 @@ export function test( (error, stdout, stderr) => { if (error) { if (execOptions.ignoreStdErr !== true) console.error(stderr) + if (only || debug) { + console.error(stdout) + } reject(error) } else { + if (only || debug) { + console.log(stdout.toString() + '\n\n' + stderr.toString()) + } resolve(stdout.toString() + '\n\n' + stderr.toString()) } }, @@ -187,14 +193,14 @@ export function test( child.stdout.on('data', (result) => { let content = result.toString() - if (debug) console.log(content) + if (debug || only) console.log(content) combined.push(['stdout', content]) stdoutMessages.push(content) notifyNext(stdoutActors, stdoutMessages) }) child.stderr.on('data', (result) => { let content = result.toString() - if (debug) console.error(content) + if (debug || only) console.error(content) combined.push(['stderr', content]) stderrMessages.push(content) notifyNext(stderrActors, stderrMessages)