diff --git a/Cargo.lock b/Cargo.lock index aaf0b476e4d9..dc3dce005d27 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -83,9 +83,9 @@ dependencies = [ [[package]] name = "crossbeam-channel" -version = "0.5.13" +version = "0.5.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" dependencies = [ "crossbeam-utils", ] @@ -200,9 +200,9 @@ checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" [[package]] name = "globset" -version = "0.4.16" +version = "0.4.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54a1028dfc5f5df5da8a56a73e6c153c9a9708ec57232470703592a3f18e49f5" +checksum = "eab69130804d941f8075cfd713bf8848a2c3b3f201a9457a11e6f87e1ab62305" dependencies = [ "aho-corasick", "bstr", @@ -218,18 +218,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0bf760ebf69878d9fd8f110c89703d90ce35095324d1f1edcb595c63945ee757" dependencies = [ "bitflags", - "ignore 0.4.23 (registry+https://github.com/rust-lang/crates.io-index)", + "ignore 0.4.23", "walkdir", ] [[package]] name = "ignore" version = "0.4.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d89fd380afde86567dfba715db065673989d6253f42b88179abd3eae47bda4b" dependencies = [ - "bstr", - "crossbeam-channel", "crossbeam-deque", - "dunce", "globset", "log", "memchr", @@ -241,11 +240,12 @@ dependencies = [ [[package]] name = "ignore" -version = "0.4.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d89fd380afde86567dfba715db065673989d6253f42b88179abd3eae47bda4b" +version = "0.4.24" dependencies = [ + "bstr", + "crossbeam-channel", "crossbeam-deque", + "dunce", "globset", "log", "memchr", @@ -599,7 +599,7 @@ dependencies = [ "dunce", "fast-glob", "globwalk", - "ignore 0.4.23", + "ignore 0.4.24", "log", "pretty_assertions", "rayon", diff --git a/crates/ignore/Cargo.toml b/crates/ignore/Cargo.toml index b8ae1b1bf721..bd0352d15fe3 100644 --- a/crates/ignore/Cargo.toml +++ b/crates/ignore/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "ignore" -version = "0.4.23" #:version +version = "0.4.24" #:version authors = ["Andrew Gallant "] description = """ A fast library for efficiently matching ignore files such as `.gitignore` @@ -12,7 +12,7 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/ignore" readme = "README.md" keywords = ["glob", "ignore", "gitignore", "pattern", "file"] license = "Unlicense OR MIT" -edition = "2021" +edition = "2024" [lib] name = "ignore" @@ -20,7 +20,7 @@ bench = false [dependencies] crossbeam-deque = "0.8.3" -globset = "0.4.16" +globset = "0.4.17" log = "0.4.20" memchr = "2.6.3" same-file = "1.0.6" @@ -37,7 +37,7 @@ version = "0.1.2" [dev-dependencies] bstr = { version = "1.6.2", default-features = false, features = ["std"] } -crossbeam-channel = "0.5.8" +crossbeam-channel = "0.5.15" [features] # DEPRECATED. It is a no-op. SIMD is done automatically through runtime diff --git a/crates/ignore/README.md b/crates/ignore/README.md index a4c34e505cf3..72258e6b5824 100644 --- a/crates/ignore/README.md +++ b/crates/ignore/README.md @@ -1,5 +1,5 @@ -# ignore - +ignore +====== The ignore crate provides a fast recursive directory iterator that respects various filters such as globs, file types and `.gitignore` files. This crate also provides lower level direct access to gitignore and file type matchers. @@ -29,6 +29,7 @@ recursively traverse the current directory while automatically filtering out files and directories according to ignore globs found in files like `.ignore` and `.gitignore`: + ```rust,no_run use ignore::Walk; diff --git a/crates/ignore/examples/walk.rs b/crates/ignore/examples/walk.rs index 5bbd10f2bc2d..9c627dc3e55b 100644 --- a/crates/ignore/examples/walk.rs +++ b/crates/ignore/examples/walk.rs @@ -18,8 +18,10 @@ fn main() { let stdout_thread = std::thread::spawn(move || { let mut stdout = std::io::BufWriter::new(std::io::stdout()); for dent in rx { - stdout.write(&*Vec::from_path_lossy(dent.path())).unwrap(); - stdout.write(b"\n").unwrap(); + stdout + .write_all(&Vec::from_path_lossy(dent.path())) + .unwrap(); + stdout.write_all(b"\n").unwrap(); } }); diff --git a/crates/ignore/src/default_types.rs b/crates/ignore/src/default_types.rs index 2cf8ad80794b..4e060b76ae83 100644 --- a/crates/ignore/src/default_types.rs +++ b/crates/ignore/src/default_types.rs @@ -27,9 +27,10 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[ (&["bat", "batch"], &["*.bat"]), (&["bazel"], &[ "*.bazel", "*.bzl", "*.BUILD", "*.bazelrc", "BUILD", "MODULE.bazel", - "WORKSPACE", "WORKSPACE.bazel", + "WORKSPACE", "WORKSPACE.bazel", "WORKSPACE.bzlmod", ]), (&["bitbake"], &["*.bb", "*.bbappend", "*.bbclass", "*.conf", "*.inc"]), + (&["boxlang"], &["*.bx", "*.bxm", "*.bxs"]), (&["brotli"], &["*.br"]), (&["buildstream"], &["*.bst"]), (&["bzip2"], &["*.bz2", "*.tbz2"]), @@ -39,6 +40,7 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[ (&["carp"], &["*.carp"]), (&["cbor"], &["*.cbor"]), (&["ceylon"], &["*.ceylon"]), + (&["cfml"], &["*.cfc", "*.cfm"]), (&["clojure"], &["*.clj", "*.cljc", "*.cljs", "*.cljx"]), (&["cmake"], &["*.cmake", "CMakeLists.txt"]), (&["cmd"], &["*.bat", "*.cmd"]), @@ -62,7 +64,7 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[ (&["cython"], &["*.pyx", "*.pxi", "*.pxd"]), (&["d"], &["*.d"]), (&["dart"], &["*.dart"]), - (&["devicetree"], &["*.dts", "*.dtsi"]), + (&["devicetree"], &["*.dts", "*.dtsi", "*.dtso"]), (&["dhall"], &["*.dhall"]), (&["diff"], &["*.patch", "*.diff"]), (&["dita"], &["*.dita", "*.ditamap", "*.ditaval"]), @@ -88,6 +90,8 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[ (&["fsharp"], &["*.fs", "*.fsx", "*.fsi"]), (&["fut"], &["*.fut"]), (&["gap"], &["*.g", "*.gap", "*.gi", "*.gd", "*.tst"]), + (&["gdscript"], &["*.gd"]), + (&["gleam"], &["*.gleam"]), (&["gn"], &["*.gn", "*.gni"]), (&["go"], &["*.go"]), (&["gprbuild"], &["*.gpr"]), @@ -117,6 +121,7 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[ (&["julia"], &["*.jl"]), (&["jupyter"], &["*.ipynb", "*.jpynb"]), (&["k"], &["*.k"]), + (&["kconfig"], &["Kconfig", "Kconfig.*"]), (&["kotlin"], &["*.kt", "*.kts"]), (&["lean"], &["*.lean"]), (&["less"], &["*.less"]), @@ -149,6 +154,7 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[ ]), (&["lilypond"], &["*.ly", "*.ily"]), (&["lisp"], &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]), + (&["llvm"], &["*.ll"]), (&["lock"], &["*.lock", "package-lock.json"]), (&["log"], &["*.log"]), (&["lua"], &["*.lua"]), @@ -159,6 +165,7 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[ "[Gg][Nn][Uu]makefile", "[Mm]akefile", "[Gg][Nn][Uu]makefile.am", "[Mm]akefile.am", "[Gg][Nn][Uu]makefile.in", "[Mm]akefile.in", + "Makefile.*", "*.mk", "*.mak" ]), (&["mako"], &["*.mako", "*.mao"]), @@ -181,7 +188,7 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[ (&["motoko"], &["*.mo"]), (&["msbuild"], &[ "*.csproj", "*.fsproj", "*.vcxproj", "*.proj", "*.props", "*.targets", - "*.sln", + "*.sln", "*.slnf" ]), (&["nim"], &["*.nim", "*.nimf", "*.nimble", "*.nims"]), (&["nix"], &["*.nix"]), @@ -210,7 +217,9 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[ (&["py", "python"], &["*.py", "*.pyi"]), (&["qmake"], &["*.pro", "*.pri", "*.prf"]), (&["qml"], &["*.qml"]), - (&["r"], &["*.R", "*.r", "*.Rmd", "*.Rnw"]), + (&["qrc"], &["*.qrc"]), + (&["qui"], &["*.ui"]), + (&["r"], &["*.R", "*.r", "*.Rmd", "*.rmd", "*.Rnw", "*.rnw"]), (&["racket"], &["*.rkt"]), (&["raku"], &[ "*.raku", "*.rakumod", "*.rakudoc", "*.rakutest", @@ -227,14 +236,16 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[ // Idiomatic files "config.ru", "Gemfile", ".irbrc", "Rakefile", // Extensions - "*.gemspec", "*.rb", "*.rbw" + "*.gemspec", "*.rb", "*.rbw", "*.rake" ]), (&["rust"], &["*.rs"]), (&["sass"], &["*.sass", "*.scss"]), (&["scala"], &["*.scala", "*.sbt"]), + (&["scdoc"], &["*.scd", "*.scdoc"]), + (&["seed7"], &["*.sd7", "*.s7i"]), (&["sh"], &[ // Portable/misc. init files - ".login", ".logout", ".profile", "profile", + ".env", ".login", ".logout", ".profile", "profile", // bash-specific init files ".bash_login", "bash_login", ".bash_logout", "bash_logout", @@ -253,7 +264,7 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[ ".zprofile", "zprofile", ".zshrc", "zshrc", // Extensions - "*.bash", "*.csh", "*.ksh", "*.sh", "*.tcsh", "*.zsh", + "*.bash", "*.csh", "*.env", "*.ksh", "*.sh", "*.tcsh", "*.zsh", ]), (&["slim"], &["*.skim", "*.slim", "*.slime"]), (&["smarty"], &["*.tpl"]), @@ -265,7 +276,7 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[ (&["sql"], &["*.sql", "*.psql"]), (&["stylus"], &["*.styl"]), (&["sv"], &["*.v", "*.vg", "*.sv", "*.svh", "*.h"]), - (&["svelte"], &["*.svelte"]), + (&["svelte"], &["*.svelte", "*.svelte.ts"]), (&["svg"], &["*.svg"]), (&["swift"], &["*.swift"]), (&["swig"], &["*.def", "*.i"]), @@ -280,9 +291,8 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[ (&["texinfo"], &["*.texi"]), (&["textile"], &["*.textile"]), (&["tf"], &[ - "*.tf", "*.auto.tfvars", "terraform.tfvars", "*.tf.json", - "*.auto.tfvars.json", "terraform.tfvars.json", "*.terraformrc", - "terraform.rc", "*.tfrc", "*.terraform.lock.hcl", + "*.tf", "*.tf.json", "*.tfvars", "*.tfvars.json", + "*.terraformrc", "terraform.rc", "*.tfrc", "*.terraform.lock.hcl", ]), (&["thrift"], &["*.thrift"]), (&["toml"], &["*.toml", "Cargo.lock"]), @@ -290,6 +300,7 @@ pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[ (&["twig"], &["*.twig"]), (&["txt"], &["*.txt"]), (&["typoscript"], &["*.typoscript", "*.ts"]), + (&["typst"], &["*.typ"]), (&["usd"], &["*.usd", "*.usda", "*.usdc"]), (&["v"], &["*.v", "*.vsh"]), (&["vala"], &["*.vala"]), diff --git a/crates/ignore/src/dir.rs b/crates/ignore/src/dir.rs index 9bbf1442b382..11b58f8ca525 100644 --- a/crates/ignore/src/dir.rs +++ b/crates/ignore/src/dir.rs @@ -118,6 +118,18 @@ struct IgnoreInner { /// The absolute base path of this matcher. Populated only if parent /// directories are added. absolute_base: Option>, + /// The directory that gitignores should be interpreted relative to. + /// + /// Usually this is the directory containing the gitignore file. But in + /// some cases, like for global gitignores or for gitignores specified + /// explicitly, this should generally be set to the current working + /// directory. This is only used for global gitignores or "explicit" + /// gitignores. + /// + /// When `None`, this means the CWD could not be determined or is unknown. + /// In this case, global gitignore files are ignored because they otherwise + /// cannot be matched correctly. + global_gitignores_relative_to: Option, /// Explicit global ignore matchers specified by the caller. explicit_ignores: Arc>, /// Ignore files used in addition to `.ignore` @@ -209,7 +221,7 @@ impl Ignore { igtmp.is_absolute_parent = true; igtmp.absolute_base = Some(absolute_base.clone()); igtmp.has_git = if self.0.opts.require_git && self.0.opts.git_ignore { - parent.join(".git").exists() + parent.join(".git").exists() || parent.join(".jj").exists() } else { false }; @@ -241,7 +253,7 @@ impl Ignore { } else { None }; - let has_git = git_type.map(|_| true).unwrap_or(false); + let has_git = git_type.is_some() || dir.join(".jj").exists(); let mut errs = PartialErrorBuilder::default(); let custom_ig_matcher = if self.0.custom_ignore_filenames.is_empty() { @@ -280,6 +292,7 @@ impl Ignore { errs.maybe_push(err); m }; + let gi_exclude_matcher = if !self.0.opts.git_exclude { Gitignore::empty() } else { @@ -308,6 +321,7 @@ impl Ignore { parent: Some(self.clone()), is_absolute_parent: false, absolute_base: self.0.absolute_base.clone(), + global_gitignores_relative_to: self.0.global_gitignores_relative_to.clone(), explicit_ignores: self.0.explicit_ignores.clone(), custom_ignore_filenames: self.0.custom_ignore_filenames.clone(), custom_ignore_matcher: custom_ig_matcher, @@ -429,36 +443,38 @@ impl Ignore { saw_git = saw_git || ig.0.has_git; } if self.0.opts.parents { - // CHANGED: We removed a code path that rewrote the `path` to be relative to - // `self.absolute_base()` because it assumed that the every path is inside the base - // which is not the case for us as we use `WalkBuilder#add` to add roots outside of the - // base. - for ig in self.parents().skip_while(|ig| !ig.0.is_absolute_parent) { - if m_custom_ignore.is_none() { - m_custom_ignore = - ig.0.custom_ignore_matcher - .matched(&path, is_dir) - .map(IgnoreMatch::gitignore); - } - if m_ignore.is_none() { - m_ignore = - ig.0.ignore_matcher - .matched(&path, is_dir) - .map(IgnoreMatch::gitignore); - } - if any_git && !saw_git && m_gi.is_none() { - m_gi = - ig.0.git_ignore_matcher - .matched(&path, is_dir) - .map(IgnoreMatch::gitignore); + if let Some(_) = self.absolute_base() { + // CHANGED: We removed a code path that rewrote the `path` to be relative to + // `self.absolute_base()` because it assumed that the every path is inside the base + // which is not the case for us as we use `WalkBuilder#add` to add roots outside of the + // base. + for ig in self.parents().skip_while(|ig| !ig.0.is_absolute_parent) { + if m_custom_ignore.is_none() { + m_custom_ignore = + ig.0.custom_ignore_matcher + .matched(&path, is_dir) + .map(IgnoreMatch::gitignore); + } + if m_ignore.is_none() { + m_ignore = + ig.0.ignore_matcher + .matched(&path, is_dir) + .map(IgnoreMatch::gitignore); + } + if any_git && !saw_git && m_gi.is_none() { + m_gi = + ig.0.git_ignore_matcher + .matched(&path, is_dir) + .map(IgnoreMatch::gitignore); + } + if any_git && !saw_git && m_gi_exclude.is_none() { + m_gi_exclude = + ig.0.git_exclude_matcher + .matched(&path, is_dir) + .map(IgnoreMatch::gitignore); + } + saw_git = saw_git || ig.0.has_git; } - if any_git && !saw_git && m_gi_exclude.is_none() { - m_gi_exclude = - ig.0.git_exclude_matcher - .matched(&path, is_dir) - .map(IgnoreMatch::gitignore); - } - saw_git = saw_git || ig.0.has_git; } } for gi in self.0.explicit_ignores.iter().rev() { @@ -520,6 +536,12 @@ impl Ignore { pub(crate) fn parents(&self) -> Parents<'_> { Parents(Some(self)) } + + /// Returns the first absolute path of the first absolute parent, if + /// one exists. + fn absolute_base(&self) -> Option<&Path> { + self.0.absolute_base.as_ref().map(|p| &***p) + } } /// An iterator over all parents of an ignore matcher, including itself. @@ -554,6 +576,16 @@ pub(crate) struct IgnoreBuilder { explicit_ignores: Vec, /// Ignore files in addition to .ignore. custom_ignore_filenames: Vec, + /// The directory that gitignores should be interpreted relative to. + /// + /// Usually this is the directory containing the gitignore file. But in + /// some cases, like for global gitignores or for gitignores specified + /// explicitly, this should generally be set to the current working + /// directory. This is only used for global gitignores or "explicit" + /// gitignores. + /// + /// When `None`, global gitignores are ignored. + global_gitignores_relative_to: Option, /// Ignore config. opts: IgnoreOptions, } @@ -561,8 +593,9 @@ pub(crate) struct IgnoreBuilder { impl IgnoreBuilder { /// Create a new builder for an `Ignore` matcher. /// - /// All relative file paths are resolved with respect to the current - /// working directory. + /// It is likely a bug to use this without also calling `current_dir()` + /// outside of tests. This isn't made mandatory because this is an internal + /// abstraction and it's annoying to update tests. pub(crate) fn new() -> IgnoreBuilder { IgnoreBuilder { dir: Path::new("").to_path_buf(), @@ -570,6 +603,7 @@ impl IgnoreBuilder { types: Arc::new(Types::empty()), explicit_ignores: vec![], custom_ignore_filenames: vec![], + global_gitignores_relative_to: None, opts: IgnoreOptions { hidden: true, ignore: true, @@ -588,10 +622,20 @@ impl IgnoreBuilder { /// The matcher returned won't match anything until ignore rules from /// directories are added to it. pub(crate) fn build(&self) -> Ignore { + self.build_with_cwd(None) + } + + /// Builds a new `Ignore` matcher using the given CWD directory. + /// + /// The matcher returned won't match anything until ignore rules from + /// directories are added to it. + pub(crate) fn build_with_cwd(&self, cwd: Option) -> Ignore { + let global_gitignores_relative_to = + cwd.or_else(|| self.global_gitignores_relative_to.clone()); let git_global_matcher = if !self.opts.git_global { Gitignore::empty() - } else { - let mut builder = GitignoreBuilder::new(""); + } else if let Some(ref cwd) = global_gitignores_relative_to { + let mut builder = GitignoreBuilder::new(cwd); builder .case_insensitive(self.opts.ignore_case_insensitive) .unwrap(); @@ -600,6 +644,9 @@ impl IgnoreBuilder { log::debug!("{}", err); } gi + } else { + log::debug!("ignoring global gitignore file because CWD is not known"); + Gitignore::empty() }; Ignore(Arc::new(IgnoreInner { @@ -610,6 +657,7 @@ impl IgnoreBuilder { parent: None, is_absolute_parent: true, absolute_base: None, + global_gitignores_relative_to, explicit_ignores: Arc::new(self.explicit_ignores.clone()), custom_ignore_filenames: Arc::new(self.custom_ignore_filenames.clone()), custom_ignore_matcher: Gitignore::empty(), @@ -622,6 +670,12 @@ impl IgnoreBuilder { })) } + /// Set the current directory used for matching global gitignores. + pub(crate) fn current_dir(&mut self, cwd: impl Into) -> &mut IgnoreBuilder { + self.global_gitignores_relative_to = Some(cwd.into()); + self + } + /// Add an override matcher. /// /// By default, no override matcher is used. @@ -843,11 +897,17 @@ fn resolve_git_commondir(dir: &Path, git_type: Option) -> Result + ?Sized>(prefix: &'a P, path: &'a Path) -> &'a Path { + strip_prefix(prefix, path).map_or(path, |p| p) +} + #[cfg(test)] mod tests { use std::{io::Write, path::Path}; - use crate::{dir::IgnoreBuilder, gitignore::Gitignore, tests::TempDir, Error}; + use crate::{Error, dir::IgnoreBuilder, gitignore::Gitignore, tests::TempDir}; fn wfile>(path: P, contents: &str) { let mut file = std::fs::File::create(path).unwrap(); @@ -913,6 +973,19 @@ mod tests { assert!(ig.matched("baz", false).is_none()); } + #[test] + fn gitignore_with_jj() { + let td = tmpdir(); + mkdirp(td.path().join(".jj")); + wfile(td.path().join(".gitignore"), "foo\n!bar"); + + let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_ignore()); + assert!(ig.matched("bar", false).is_whitelist()); + assert!(ig.matched("baz", false).is_none()); + } + #[test] fn gitignore_no_git() { let td = tmpdir(); diff --git a/crates/ignore/src/gitignore.rs b/crates/ignore/src/gitignore.rs index 7da86153cf79..f822d8390d20 100644 --- a/crates/ignore/src/gitignore.rs +++ b/crates/ignore/src/gitignore.rs @@ -20,8 +20,8 @@ use { }; use crate::{ - pathutil::{is_file_name, strip_prefix}, Error, Match, PartialErrorBuilder, + pathutil::{is_file_name, strip_prefix}, }; /// Glob represents a single glob in a gitignore file. @@ -128,7 +128,10 @@ impl Gitignore { /// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not /// set or is empty, then `$HOME/.config/git/ignore` is used instead. pub fn global() -> (Gitignore, Option) { - GitignoreBuilder::new("").build_global() + match std::env::current_dir() { + Ok(cwd) => GitignoreBuilder::new(cwd).build_global(), + Err(err) => (Gitignore::empty(), Some(err.into())), + } } /// Creates a new empty gitignore matcher that never matches anything. @@ -303,6 +306,7 @@ pub struct GitignoreBuilder { root: PathBuf, globs: Vec, case_insensitive: bool, + allow_unclosed_class: bool, // CHANGED: Add a flag to have Gitignore rules that apply only to files. only_on_files: bool, } @@ -321,6 +325,7 @@ impl GitignoreBuilder { root: strip_prefix("./", root).unwrap_or(root).to_path_buf(), globs: vec![], case_insensitive: false, + allow_unclosed_class: true, // CHANGED: Add a flag to have Gitignore rules that apply only to files. only_on_files: false, } @@ -403,6 +408,15 @@ impl GitignoreBuilder { break; } }; + + // Match Git's handling of .gitignore files that begin with the Unicode BOM + const UTF8_BOM: &str = "\u{feff}"; + let line = if i == 0 { + line.trim_start_matches(UTF8_BOM) + } else { + &line + }; + if let Err(err) = self.add_line(Some(path.to_path_buf()), &line) { errs.push(err.tagged(path, lineno)); } @@ -506,6 +520,7 @@ impl GitignoreBuilder { .literal_separator(true) .case_insensitive(self.case_insensitive) .backslash_escape(true) + .allow_unclosed_class(self.allow_unclosed_class) .build() .map_err(|err| Error::Glob { glob: Some(glob.original.clone()), @@ -529,6 +544,23 @@ impl GitignoreBuilder { Ok(self) } + /// Toggle whether unclosed character classes are allowed. When allowed, + /// a `[` without a matching `]` is treated literally instead of resulting + /// in a parse error. + /// + /// For example, if this is set then the glob `[abc` will be treated as the + /// literal string `[abc` instead of returning an error. + /// + /// By default, this is true in order to match established `gitignore` + /// semantics. Generally speaking, enabling this leads to worse failure + /// modes since the glob parser becomes more permissive. You might want to + /// enable this when compatibility (e.g., with POSIX glob implementations) + /// is more important than good error messages. + pub fn allow_unclosed_class(&mut self, yes: bool) -> &mut GitignoreBuilder { + self.allow_unclosed_class = yes; + self + } + /// CHANGED: Add a flag to have Gitignore rules that apply only to files. /// /// If this is set, then the globs will only be matched against file paths. diff --git a/crates/ignore/src/lib.rs b/crates/ignore/src/lib.rs index cd0af7ad1c47..609004c4ea33 100644 --- a/crates/ignore/src/lib.rs +++ b/crates/ignore/src/lib.rs @@ -495,11 +495,7 @@ impl Match { /// Return the match if it is not none. Otherwise, return other. pub fn or(self, other: Self) -> Self { - if self.is_none() { - other - } else { - self - } + if self.is_none() { other } else { self } } } @@ -544,7 +540,7 @@ mod tests { let tmpdir = env::temp_dir(); for _ in 0..TRIES { - let count = COUNTER.fetch_add(1, Ordering::SeqCst); + let count = COUNTER.fetch_add(1, Ordering::Relaxed); let path = tmpdir.join("rust-ignore").join(count.to_string()); if path.is_dir() { continue; diff --git a/crates/ignore/src/overrides.rs b/crates/ignore/src/overrides.rs index 693c7dd0a79b..afb9f16ce2c6 100644 --- a/crates/ignore/src/overrides.rs +++ b/crates/ignore/src/overrides.rs @@ -1,5 +1,6 @@ /*! The overrides module provides a way to specify a set of override globs. + This provides functionality similar to `--include` or `--exclude` in command line tools. */ @@ -7,8 +8,8 @@ line tools. use std::path::Path; use crate::{ - gitignore::{self, Gitignore, GitignoreBuilder}, Error, Match, + gitignore::{self, Gitignore, GitignoreBuilder}, }; /// Glob represents a single glob in an override matcher. @@ -116,9 +117,9 @@ impl OverrideBuilder { /// /// Matching is done relative to the directory path provided. pub fn new>(path: P) -> OverrideBuilder { - OverrideBuilder { - builder: GitignoreBuilder::new(path), - } + let mut builder = GitignoreBuilder::new(path); + builder.allow_unclosed_class(false); + OverrideBuilder { builder } } /// Builds a new override matcher from the globs added so far. @@ -141,7 +142,8 @@ impl OverrideBuilder { /// Toggle whether the globs should be matched case insensitively or not. /// - /// When this option is changed, only globs added after the change will be affected. + /// When this option is changed, only globs added after the change will be + /// affected. /// /// This is disabled by default. pub fn case_insensitive(&mut self, yes: bool) -> Result<&mut OverrideBuilder, Error> { @@ -150,6 +152,28 @@ impl OverrideBuilder { self.builder.case_insensitive(yes)?; Ok(self) } + + /// Toggle whether unclosed character classes are allowed. When allowed, + /// a `[` without a matching `]` is treated literally instead of resulting + /// in a parse error. + /// + /// For example, if this is set then the glob `[abc` will be treated as the + /// literal string `[abc` instead of returning an error. + /// + /// By default, this is false. Generally speaking, enabling this leads to + /// worse failure modes since the glob parser becomes more permissive. You + /// might want to enable this when compatibility (e.g., with POSIX glob + /// implementations) is more important than good error messages. + /// + /// This default is different from the default for [`Gitignore`]. Namely, + /// [`Gitignore`] is intended to match git's behavior as-is. But this + /// abstraction for "override" globs does not necessarily conform to any + /// other known specification and instead prioritizes better error + /// messages. + pub fn allow_unclosed_class(&mut self, yes: bool) -> &mut OverrideBuilder { + self.builder.allow_unclosed_class(yes); + self + } } #[cfg(test)] diff --git a/crates/ignore/src/types.rs b/crates/ignore/src/types.rs index 814ee4a0a5f7..aa23999c06fe 100644 --- a/crates/ignore/src/types.rs +++ b/crates/ignore/src/types.rs @@ -91,7 +91,7 @@ use { regex_automata::util::pool::Pool, }; -use crate::{default_types::DEFAULT_TYPES, pathutil::file_name, Error, Match}; +use crate::{Error, Match, default_types::DEFAULT_TYPES, pathutil::file_name}; /// Glob represents a single glob in a set of file type definitions. /// diff --git a/crates/ignore/src/walk.rs b/crates/ignore/src/walk.rs index 9c1f7413d918..ebf20947a692 100644 --- a/crates/ignore/src/walk.rs +++ b/crates/ignore/src/walk.rs @@ -5,7 +5,7 @@ use std::{ io, path::{Path, PathBuf}, sync::atomic::{AtomicBool, AtomicUsize, Ordering as AtomicOrdering}, - sync::Arc, + sync::{Arc, OnceLock}, }; use { @@ -15,11 +15,11 @@ use { }; use crate::{ + Error, PartialErrorBuilder, dir::{Ignore, IgnoreBuilder}, gitignore::{Gitignore, GitignoreBuilder}, overrides::Override, types::Types, - Error, PartialErrorBuilder, }; /// A directory entry with a possible error attached. @@ -484,6 +484,7 @@ pub struct WalkBuilder { paths: Vec, ig_builder: IgnoreBuilder, max_depth: Option, + min_depth: Option, max_filesize: Option, follow_links: bool, same_file_system: bool, @@ -491,6 +492,17 @@ pub struct WalkBuilder { threads: usize, skip: Option>, filter: Option, + /// The directory that gitignores should be interpreted relative to. + /// + /// Usually this is the directory containing the gitignore file. But in + /// some cases, like for global gitignores or for gitignores specified + /// explicitly, this should generally be set to the current working + /// directory. This is only used for global gitignores or "explicit" + /// gitignores. + /// + /// When `None`, the CWD is fetched from `std::env::current_dir()`. If + /// that fails, then global gitignores are ignored (an error is logged). + global_gitignores_relative_to: OnceLock>>, } #[derive(Clone)] @@ -508,10 +520,18 @@ impl std::fmt::Debug for WalkBuilder { .field("paths", &self.paths) .field("ig_builder", &self.ig_builder) .field("max_depth", &self.max_depth) + .field("min_depth", &self.min_depth) .field("max_filesize", &self.max_filesize) .field("follow_links", &self.follow_links) + .field("same_file_system", &self.same_file_system) + .field("sorter", &"<...>") .field("threads", &self.threads) .field("skip", &self.skip) + .field("filter", &"<...>") + .field( + "global_gitignores_relative_to", + &self.global_gitignores_relative_to, + ) .finish() } } @@ -528,6 +548,7 @@ impl WalkBuilder { paths: vec![path.as_ref().to_path_buf()], ig_builder: IgnoreBuilder::new(), max_depth: None, + min_depth: None, max_filesize: None, follow_links: false, same_file_system: false, @@ -535,6 +556,7 @@ impl WalkBuilder { threads: 0, skip: None, filter: None, + global_gitignores_relative_to: OnceLock::new(), } } @@ -542,6 +564,7 @@ impl WalkBuilder { pub fn build(&self) -> Walk { let follow_links = self.follow_links; let max_depth = self.max_depth; + let min_depth = self.min_depth; let sorter = self.sorter.clone(); let its = self .paths @@ -556,6 +579,9 @@ impl WalkBuilder { if let Some(max_depth) = max_depth { wd = wd.max_depth(max_depth); } + if let Some(min_depth) = min_depth { + wd = wd.min_depth(min_depth); + } if let Some(ref sorter) = sorter { match sorter.clone() { Sorter::ByName(cmp) => { @@ -571,7 +597,10 @@ impl WalkBuilder { }) .collect::>() .into_iter(); - let ig_root = self.ig_builder.build(); + let ig_root = self + .get_or_set_current_dir() + .map(|cwd| self.ig_builder.build_with_cwd(Some(cwd.to_path_buf()))) + .unwrap_or_else(|| self.ig_builder.build()); Walk { its, it: None, @@ -589,10 +618,15 @@ impl WalkBuilder { /// Instead, the returned value must be run with a closure. e.g., /// `builder.build_parallel().run(|| |path| { println!("{path:?}"); WalkState::Continue })`. pub fn build_parallel(&self) -> WalkParallel { + let ig_root = self + .get_or_set_current_dir() + .map(|cwd| self.ig_builder.build_with_cwd(Some(cwd.to_path_buf()))) + .unwrap_or_else(|| self.ig_builder.build()); WalkParallel { paths: self.paths.clone().into_iter(), - ig_root: self.ig_builder.build(), + ig_root, max_depth: self.max_depth, + min_depth: self.min_depth, max_filesize: self.max_filesize, follow_links: self.follow_links, same_file_system: self.same_file_system, @@ -617,6 +651,20 @@ impl WalkBuilder { /// The default, `None`, imposes no depth restriction. pub fn max_depth(&mut self, depth: Option) -> &mut WalkBuilder { self.max_depth = depth; + if self.min_depth.is_some() && self.max_depth.is_some() && self.max_depth < self.min_depth { + self.max_depth = self.min_depth; + } + self + } + + /// The minimum depth to recurse. + /// + /// The default, `None`, imposes no minimum depth restriction. + pub fn min_depth(&mut self, depth: Option) -> &mut WalkBuilder { + self.min_depth = depth; + if self.max_depth.is_some() && self.min_depth.is_some() && self.min_depth > self.max_depth { + self.min_depth = self.max_depth; + } self } @@ -647,11 +695,26 @@ impl WalkBuilder { /// /// This has lower precedence than all other sources of ignore rules. /// + /// # Errors + /// /// If there was a problem adding the ignore file, then an error is /// returned. Note that the error may indicate *partial* failure. For /// example, if an ignore file contains an invalid glob, all other globs /// are still applied. + /// + /// An error will also occur if this walker could not get the current + /// working directory (and `WalkBuilder::current_dir` isn't set). pub fn add_ignore>(&mut self, path: P) -> Option { + // CHANGED: Dropped this code + // let path = path.as_ref(); + // let Some(cwd) = self.get_or_set_current_dir() else { + // let err = std::io::Error::other(format!( + // "CWD is not known, ignoring global gitignore {}", + // path.display() + // )); + // return Some(err.into()); + // }; + // let mut builder = GitignoreBuilder::new(cwd); let mut builder = GitignoreBuilder::new(""); let mut errs = PartialErrorBuilder::default(); errs.maybe_push(builder.add(path)); @@ -799,6 +862,10 @@ impl WalkBuilder { /// /// When disabled, git-related ignore rules are applied even when searching /// outside a git repository. + /// + /// In particular, if this is `false` then `.gitignore` files will be read + /// from parent directories above the git root directory containing `.git`, + /// which is different from the git behavior. pub fn require_git(&mut self, yes: bool) -> &mut WalkBuilder { self.ig_builder.require_git(yes); self @@ -895,6 +962,10 @@ impl WalkBuilder { /// /// Note that the errors for reading entries that may not satisfy the /// predicate will still be yielded. + /// + /// Note also that only one filter predicate can be applied to a + /// `WalkBuilder`. Calling this subsequent times overrides previous filter + /// predicates. pub fn filter_entry

(&mut self, filter: P) -> &mut WalkBuilder where P: Fn(&DirEntry) -> bool + Send + Sync + 'static, @@ -902,6 +973,49 @@ impl WalkBuilder { self.filter = Some(Filter(Arc::new(filter))); self } + + /// Set the current working directory used for matching global gitignores. + /// + /// If this is not set, then this walker will attempt to discover the + /// correct path from the environment's current working directory. If + /// that fails, then global gitignore files will be ignored. + /// + /// Global gitignore files come from things like a user's git configuration + /// or from gitignore files added via [`WalkBuilder::add_ignore`]. + pub fn current_dir(&mut self, cwd: impl Into) -> &mut WalkBuilder { + let cwd = cwd.into(); + self.ig_builder.current_dir(cwd.clone()); + if let Err(cwd) = self.global_gitignores_relative_to.set(Ok(cwd)) { + // OK because `Err` from `set` implies a value exists. + *self.global_gitignores_relative_to.get_mut().unwrap() = cwd; + } + self + } + + /// Gets the currently configured CWD on this walk builder. + /// + /// This is "lazy." That is, we only ask for the CWD from the environment + /// if `WalkBuilder::current_dir` hasn't been called yet. And we ensure + /// that we only do it once. + fn get_or_set_current_dir(&self) -> Option<&Path> { + let result = self.global_gitignores_relative_to.get_or_init(|| { + let result = std::env::current_dir().map_err(Arc::new); + match result { + Ok(ref path) => { + log::trace!("automatically discovered CWD: {}", path.display()); + } + Err(ref err) => { + log::debug!( + "failed to find CWD \ + (global gitignores will be ignored): \ + {err}" + ); + } + } + result + }); + result.as_ref().ok().map(|path| &**path) + } } /// Walk is a recursive directory iterator over file paths in one or more @@ -1191,6 +1305,7 @@ pub struct WalkParallel { ig_root: Ignore, max_filesize: Option, max_depth: Option, + min_depth: Option, follow_links: bool, same_file_system: bool, threads: usize, @@ -1288,6 +1403,7 @@ impl WalkParallel { quit_now: quit_now.clone(), active_workers: active_workers.clone(), max_depth: self.max_depth, + min_depth: self.min_depth, max_filesize: self.max_filesize, follow_links: self.follow_links, skip: self.skip.clone(), @@ -1303,7 +1419,9 @@ impl WalkParallel { fn threads(&self) -> usize { if self.threads == 0 { - 2 + std::thread::available_parallelism() + .map_or(1, |n| n.get()) + .min(12) } else { self.threads } @@ -1418,8 +1536,11 @@ impl Stack { stealers: stealers.clone(), }) .collect(); - // Distribute the initial messages. + // Distribute the initial messages, reverse the order to cancel out + // the other reversal caused by the inherent LIFO processing of the + // per-thread stacks which are filled here. init.into_iter() + .rev() .zip(stacks.iter().cycle()) .for_each(|(m, s)| s.push(m)); stacks @@ -1474,6 +1595,8 @@ struct Worker<'s> { /// The maximum depth of directories to descend. A value of `0` means no /// descension at all. max_depth: Option, + /// The minimum depth of directories to descend. + min_depth: Option, /// The maximum size a searched file can be (in bytes). If a file exceeds /// this size it will be skipped. max_filesize: Option, @@ -1502,10 +1625,19 @@ impl<'s> Worker<'s> { } fn run_one(&mut self, mut work: Work) -> WalkState { + let should_visit = self + .min_depth + .map(|min_depth| work.dent.depth() >= min_depth) + .unwrap_or(true); + // If the work is not a directory, then we can just execute the // caller's callback immediately and move on. if work.is_symlink() || !work.is_dir() { - return self.visitor.visit(Ok(work.dent)); + return if should_visit { + self.visitor.visit(Ok(work.dent)) + } else { + WalkState::Continue + }; } if let Some(err) = work.add_parents() { let state = self.visitor.visit(Err(err)); @@ -1538,9 +1670,11 @@ impl<'s> Worker<'s> { // entry before passing the error value. let readdir = work.read_dir(); let depth = work.dent.depth(); - let state = self.visitor.visit(Ok(work.dent)); - if !state.is_continue() { - return state; + if should_visit { + let state = self.visitor.visit(Ok(work.dent)); + if !state.is_continue() { + return state; + } } if !descend { return WalkState::Skip; @@ -1885,7 +2019,7 @@ fn device_num>(path: P) -> io::Result { #[cfg(windows)] fn device_num>(path: P) -> io::Result { - use winapi_util::{file, Handle}; + use winapi_util::{Handle, file}; let h = Handle::from_path_any(path)?; file::information(h).map(|info| info.volume_serial_number()) @@ -2144,6 +2278,51 @@ mod tests { ); } + #[test] + fn min_depth() { + let td = tmpdir(); + mkdirp(td.path().join("a/b/c")); + wfile(td.path().join("foo"), ""); + wfile(td.path().join("a/foo"), ""); + wfile(td.path().join("a/b/foo"), ""); + wfile(td.path().join("a/b/c/foo"), ""); + + let builder = WalkBuilder::new(td.path()); + assert_paths( + td.path(), + &builder, + &["a", "a/b", "a/b/c", "foo", "a/foo", "a/b/foo", "a/b/c/foo"], + ); + let mut builder = WalkBuilder::new(td.path()); + assert_paths( + td.path(), + &builder.min_depth(Some(0)), + &["a", "a/b", "a/b/c", "foo", "a/foo", "a/b/foo", "a/b/c/foo"], + ); + assert_paths( + td.path(), + &builder.min_depth(Some(1)), + &["a", "a/b", "a/b/c", "foo", "a/foo", "a/b/foo", "a/b/c/foo"], + ); + assert_paths( + td.path(), + builder.min_depth(Some(2)), + &["a/b", "a/b/c", "a/b/c/foo", "a/b/foo", "a/foo"], + ); + assert_paths( + td.path(), + builder.min_depth(Some(3)), + &["a/b/c", "a/b/c/foo", "a/b/foo"], + ); + assert_paths(td.path(), builder.min_depth(Some(10)), &[]); + + assert_paths( + td.path(), + builder.min_depth(Some(2)).max_depth(Some(1)), + &["a/b", "a/foo"], + ); + } + #[test] fn max_filesize() { let td = tmpdir(); diff --git a/crates/ignore/tests/gitignore_skip_bom.gitignore b/crates/ignore/tests/gitignore_skip_bom.gitignore new file mode 100644 index 000000000000..a78c13a9b992 --- /dev/null +++ b/crates/ignore/tests/gitignore_skip_bom.gitignore @@ -0,0 +1,2 @@ +ignore/this/path +# This file begins with a BOM (U+FEFF) diff --git a/crates/ignore/tests/gitignore_skip_bom.rs b/crates/ignore/tests/gitignore_skip_bom.rs new file mode 100644 index 000000000000..14d533d63642 --- /dev/null +++ b/crates/ignore/tests/gitignore_skip_bom.rs @@ -0,0 +1,17 @@ +use ignore::gitignore::GitignoreBuilder; + +const IGNORE_FILE: &'static str = "tests/gitignore_skip_bom.gitignore"; + +/// Skip a Byte-Order Mark (BOM) at the beginning of the file, matching Git's +/// behavior. +/// +/// Ref: +#[test] +fn gitignore_skip_bom() { + let mut builder = GitignoreBuilder::new("ROOT"); + let error = builder.add(IGNORE_FILE); + assert!(error.is_none(), "failed to open gitignore file"); + let g = builder.build().unwrap(); + + assert!(g.matched("ignore/this/path", false).is_ignore()); +}