diff --git a/CHANGELOG.md b/CHANGELOG.md index d9e29a7b6..0cc1c1312 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ # Changelog +- **Added** A task's `env` and `untrackedEnv` glob patterns now support `!` negation: a `!`-prefixed pattern excludes matching variables (e.g. `["VITE_*", "!VITE_SECRET"]` tracks every `VITE_*` except `VITE_SECRET`) ([#425](https://github.com/voidzero-dev/vite-task/pull/425)) - **Fixed** `package.json` and `pnpm-workspace.yaml` files with a UTF-8 BOM no longer fail to parse ([#424](https://github.com/voidzero-dev/vite-task/pull/424)) - **Changed** `vp run --filter ` now exits 0 with a warning when the filter matches no packages, matching pnpm. Use `--fail-if-no-match` to restore the previous strict behavior ([#393](https://github.com/voidzero-dev/vite-task/pull/393)) - **Added** task command shorthands for defining tasks as command strings or command string arrays ([#391](https://github.com/voidzero-dev/vite-task/pull/391)) diff --git a/Cargo.lock b/Cargo.lock index eb132b9de..3da36c591 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1529,6 +1529,19 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" +[[package]] +name = "globset" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52dfc19153a48bde0cbd630453615c8151bce3a5adfac7a0aebfbf0a1e1f57e3" +dependencies = [ + "aho-corasick", + "bstr", + "log", + "regex-automata", + "regex-syntax", +] + [[package]] name = "hashbrown" version = "0.15.5" @@ -4065,8 +4078,8 @@ checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" name = "vite_glob" version = "0.0.0" dependencies = [ + "globset", "thiserror 2.0.18", - "vite_path", "vite_str", "wax", ] diff --git a/Cargo.toml b/Cargo.toml index a9934d3a5..a73393501 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -80,6 +80,7 @@ fspy_shared = { path = "crates/fspy_shared" } fspy_shared_unix = { path = "crates/fspy_shared_unix" } futures = "0.3.31" futures-util = "0.3.31" +globset = "0.4.18" jsonc-parser = { version = "0.32.0", features = ["serde"] } libc = "0.2.185" libtest-mimic = "0.8.2" diff --git a/crates/vite_glob/Cargo.toml b/crates/vite_glob/Cargo.toml index 7bfa46ccb..c5d31c807 100644 --- a/crates/vite_glob/Cargo.toml +++ b/crates/vite_glob/Cargo.toml @@ -8,8 +8,8 @@ publish = false rust-version.workspace = true [dependencies] +globset = { workspace = true } thiserror = { workspace = true } -vite_path = { workspace = true } wax = { workspace = true } [dev-dependencies] diff --git a/crates/vite_glob/README.md b/crates/vite_glob/README.md new file mode 100644 index 000000000..5c91b6d93 --- /dev/null +++ b/crates/vite_glob/README.md @@ -0,0 +1,22 @@ +# vite_glob + +Centralizes glob-matching semantics so every crate in the workspace matches +patterns the same way, instead of each call site reaching for an ad-hoc glob +engine with subtly different rules (separators, case sensitivity, negation). + +Two use cases, each with its own module, matcher, and error type: + +- **`env`** — environment-variable **name** matching. Names are flat strings, + not paths, so this is backed by `globset` with path-separator handling + disabled: `*`/`?`/`[...]`/`{a,b}` are plain-string wildcards, and matching is + case-sensitive on Unix and case-insensitive on Windows (mirroring env lookup). + Use `EnvGlob` for one literal pattern, or `EnvGlobSet` for a set with + negation: a `!`-prefixed pattern excludes (e.g. `["VITE_*", "!VITE_SECRET"]`). +- **`path`** — filesystem **path** matching with gitignore semantics, backed by + `wax`. `!`-prefixed patterns negate; first-match-wins, or last-match-wins once + any negation is present. Use `PathGlobSet`. + +Keeping both behind one crate means a change to how, say, env names are matched +happens in exactly one place and applies everywhere — the runner's cache +fingerprinting, the IPC server's `getEnvs`, workspace package discovery, and so +on. diff --git a/crates/vite_glob/src/env.rs b/crates/vite_glob/src/env.rs new file mode 100644 index 000000000..65e163311 --- /dev/null +++ b/crates/vite_glob/src/env.rs @@ -0,0 +1,219 @@ +//! Glob matching for environment-variable **names** (flat strings, never paths). +//! +//! Backed by `globset` with path-separator handling disabled, so `*`, `?`, +//! `[...]`, and `{a,b}` behave as plain-string wildcards. Matching is +//! case-sensitive on Unix and case-insensitive on Windows, mirroring how +//! environment variables are looked up on each platform. +//! +//! [`EnvGlobSet`] supports negation: a `!`-prefixed pattern *excludes* names, +//! and a name matches the set when it matches an include pattern and no exclude +//! pattern. [`EnvGlob`] matches a single pattern literally — `!` is an ordinary +//! character there (no negation), since a lone exclude has nothing to subtract +//! from. + +use globset::{Glob, GlobBuilder, GlobMatcher, GlobSet, GlobSetBuilder}; + +/// Error compiling an environment-variable name pattern. +#[derive(Debug, thiserror::Error)] +#[error(transparent)] +pub struct EnvGlobError(#[from] globset::Error); + +/// Compiles `pattern` into a `globset::Glob` configured for env-name matching: +/// separators are not special, and case follows the platform's env semantics. +fn build(pattern: &str) -> Result { + GlobBuilder::new(pattern) + // Env names contain no path separators, so disabling separator handling + // makes `*`/`?` match any character — a pure string match. + .literal_separator(false) + // Env lookups are case-insensitive on Windows, case-sensitive elsewhere. + .case_insensitive(cfg!(windows)) + .build() +} + +/// Matches a single environment-variable name against one glob pattern. +#[derive(Debug, Clone)] +pub struct EnvGlob { + matcher: GlobMatcher, +} + +impl EnvGlob { + /// Compiles `pattern` into an env-name matcher. + /// + /// # Errors + /// Returns an error if `pattern` is not a valid glob. + pub fn new(pattern: &str) -> Result { + Ok(Self { matcher: build(pattern)?.compile_matcher() }) + } + + /// Returns whether `name` matches the pattern. + #[must_use] + pub fn is_match(&self, name: &str) -> bool { + self.matcher.is_match(name) + } +} + +/// Matches an environment-variable name against a **set** of glob patterns, +/// with negation. +/// +/// Patterns are split into includes and excludes: a `!`-prefixed pattern is an +/// **exclude**, any other pattern is an **include**. +/// +/// A name matches when it matches some include pattern and no exclude pattern. +/// A set with no include patterns matches nothing (an exclude has nothing to +/// subtract from), so an empty set — or a set of only excludes — never matches. +#[derive(Debug, Clone)] +pub struct EnvGlobSet { + include: GlobSet, + exclude: GlobSet, +} + +impl EnvGlobSet { + /// Compiles `patterns` into a combined env-name matcher. + /// + /// # Errors + /// Returns an error if any pattern is not a valid glob. + pub fn new(patterns: I) -> Result + where + I: IntoIterator, + S: AsRef, + { + let mut include = GlobSetBuilder::new(); + let mut exclude = GlobSetBuilder::new(); + for pattern in patterns { + let pattern = pattern.as_ref(); + if let Some(rest) = pattern.strip_prefix('!') { + exclude.add(build(rest)?); + } else { + include.add(build(pattern)?); + } + } + Ok(Self { include: include.build()?, exclude: exclude.build()? }) + } + + /// Returns whether `name` matches an include pattern and no exclude pattern. + #[must_use] + pub fn is_match(&self, name: &str) -> bool { + self.include.is_match(name) && !self.exclude.is_match(name) + } +} + +#[cfg(test)] +mod tests { + use super::{EnvGlob, EnvGlobSet}; + + #[test] + fn matches_star_prefix_and_suffix() { + let g = EnvGlob::new("VITE_*").unwrap(); + assert!(g.is_match("VITE_FOO")); + assert!(g.is_match("VITE_")); // `*` matches the empty string + assert!(!g.is_match("MYVITE_FOO")); + + let g = EnvGlob::new("*_KEY").unwrap(); + assert!(g.is_match("MY_KEY")); + assert!(!g.is_match("MY_KEYS")); + + let g = EnvGlob::new("*_CREDENTIAL*").unwrap(); + assert!(g.is_match("AWS_CREDENTIALS")); + assert!(g.is_match("X_CREDENTIAL_Y")); + } + + #[test] + fn question_mark_matches_exactly_one_char() { + let g = EnvGlob::new("APP?_*").unwrap(); + assert!(g.is_match("APP1_TOKEN")); + assert!(g.is_match("APP2_NAME")); + // `?` requires exactly one character, so `APP_X` (nothing before `_`) does not match. + assert!(!g.is_match("APP_X")); + } + + #[test] + fn brace_alternation_is_supported() { + let g = EnvGlob::new("{VITE,NEXT}_*").unwrap(); + assert!(g.is_match("VITE_FOO")); + assert!(g.is_match("NEXT_BAR")); + assert!(!g.is_match("NUXT_BAR")); + } + + #[test] + fn dot_and_separators_are_literal_not_path_special() { + // Env names are flat strings: `*` spans `.` and `/` (no path semantics), + // and a literal `.` in the pattern matches a literal `.`. + assert!(EnvGlob::new("A*").unwrap().is_match("A.B")); + assert!(EnvGlob::new("A*").unwrap().is_match("A/B")); + assert!(EnvGlob::new("*.local").unwrap().is_match("APP.local")); + assert!(!EnvGlob::new("*.local").unwrap().is_match("APPXlocal")); + } + + #[test] + fn single_glob_bang_is_a_literal_character() { + // A single `EnvGlob` has no negation: `!FOO` matches the literal name + // `!FOO`, not `FOO`. + let g = EnvGlob::new("!FOO").unwrap(); + assert!(g.is_match("!FOO")); + assert!(!g.is_match("FOO")); + } + + #[test] + fn non_match_default_is_false() { + assert!(!EnvGlob::new("VITE_*").unwrap().is_match("PATH")); + } + + #[test] + fn set_matches_any_pattern() { + let set = EnvGlobSet::new(["VITE_*", "*_KEY", "APP?_*"]).unwrap(); + assert!(set.is_match("VITE_FOO")); + assert!(set.is_match("MY_KEY")); + assert!(set.is_match("APP1_TOKEN")); + assert!(!set.is_match("PATH")); + assert!(!set.is_match("APP_X")); + } + + #[test] + fn empty_set_matches_nothing() { + let set = EnvGlobSet::new(std::iter::empty::<&str>()).unwrap(); + assert!(!set.is_match("VITE_FOO")); + } + + #[test] + fn set_negation_excludes_matching_names() { + // `!VITE_SECRET` excludes that name from the `VITE_*` include set. + let set = EnvGlobSet::new(["VITE_*", "!VITE_SECRET"]).unwrap(); + assert!(set.is_match("VITE_FOO")); + assert!(set.is_match("VITE_BAR")); + assert!(!set.is_match("VITE_SECRET")); + assert!(!set.is_match("PATH")); + + // An exclude glob can itself be a wildcard. + let set = EnvGlobSet::new(["*", "!*_SECRET"]).unwrap(); + assert!(set.is_match("VITE_FOO")); + assert!(!set.is_match("API_SECRET")); + } + + #[test] + fn set_only_excludes_matches_nothing() { + // With no include patterns there is nothing to subtract from. + let set = EnvGlobSet::new(["!FOO"]).unwrap(); + assert!(!set.is_match("FOO")); + assert!(!set.is_match("BAR")); + } + + #[test] + #[cfg(not(windows))] + fn unix_matching_is_case_sensitive() { + let g = EnvGlob::new("VITE_*").unwrap(); + assert!(g.is_match("VITE_FOO")); + assert!(!g.is_match("vite_foo")); + let set = EnvGlobSet::new(["VITE_*"]).unwrap(); + assert!(!set.is_match("vite_foo")); + } + + #[test] + #[cfg(windows)] + fn windows_matching_is_case_insensitive() { + let g = EnvGlob::new("VITE_*").unwrap(); + assert!(g.is_match("VITE_FOO")); + assert!(g.is_match("vite_foo")); + let set = EnvGlobSet::new(["VITE_*"]).unwrap(); + assert!(set.is_match("vite_foo")); + } +} diff --git a/crates/vite_glob/src/error.rs b/crates/vite_glob/src/error.rs deleted file mode 100644 index 5d1b75f8e..000000000 --- a/crates/vite_glob/src/error.rs +++ /dev/null @@ -1,9 +0,0 @@ -#[derive(Debug, thiserror::Error)] -pub enum Error { - #[error(transparent)] - WaxBuild(#[from] wax::BuildError), - #[error(transparent)] - Walk(#[from] wax::walk::WalkError), - #[error(transparent)] - InvalidPathData(#[from] vite_path::relative::InvalidPathDataError), -} diff --git a/crates/vite_glob/src/lib.rs b/crates/vite_glob/src/lib.rs index 467538128..7f04aaef1 100644 --- a/crates/vite_glob/src/lib.rs +++ b/crates/vite_glob/src/lib.rs @@ -1,366 +1,12 @@ -mod error; - -#[expect(clippy::disallowed_types, reason = "wax::Glob::is_match requires std::path::Path")] -use std::path::Path; - -pub use error::Error; -use wax::{Glob, Program}; - -/// If there are no negated patterns, it will follow the first match wins semantics. -/// Otherwise, it will follow the last match wins semantics. -#[derive(Debug)] -pub struct GlobPatternSet<'a> { - /// (`glob_pattern`, `match_or_not`) - patterns: Vec<(Glob<'a>, bool)>, - has_negated: bool, -} - -impl<'a> GlobPatternSet<'a> { - /// # Errors - /// Returns an error if any glob pattern is invalid. - pub fn new(match_patterns: I) -> Result - where - I: IntoIterator, - S: AsRef + 'a + ?Sized, - { - let mut patterns = Vec::new(); - let mut has_negated = false; - for pattern in match_patterns { - let pattern_str = pattern.as_ref(); - if let Some(negated) = pattern_str.strip_prefix('!') { - // negated pattern, ignore the path - patterns.push((Glob::new(negated)?, false)); - // set to true to follow last match wins semantics - has_negated = true; - } else { - // positive pattern, match the path - patterns.push((Glob::new(pattern_str)?, true)); - } - } - Ok(Self { patterns, has_negated }) - } - - #[expect(clippy::disallowed_types, reason = "wax::Glob::is_match requires std::path::Path")] - pub fn is_match(&self, path: impl AsRef) -> bool { - let mut should_match = false; // Default: don't match - for (glob, match_or_not) in &self.patterns { - if glob.is_match(path.as_ref()) { - should_match = *match_or_not; - if !self.has_negated { - // first match wins semantics - break; - } - } - } - should_match - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_match_ignores_node_modules() -> Result<(), Error> { - let patterns = vec![ - // ignore all paths - "**/*", - // keep node_modules directories themselves - "!**/node_modules", - "!node_modules", - // keep lock files and package.json - "!**/package.json", - "!**/package-lock.json", - "!**/yarn.lock", - "!**/pnpm-lock.yaml", - ]; - let ignores = GlobPatternSet::new(&patterns)?; - - // Should ignore paths inside node_modules - assert!(ignores.is_match("node_modules/react/index.js")); - assert!(ignores.is_match("apps/web/node_modules/react/index.js")); - assert!(ignores.is_match("packages/cli/node_modules/@types/node/index.d.ts")); - - // Should ignore paths outside node_modules - assert!(ignores.is_match("src/index.js")); - assert!(ignores.is_match("tsbuildinfo.json")); - - // Should NOT ignore node_modules directories themselves (due to negation) - assert!(!ignores.is_match("node_modules")); - assert!(!ignores.is_match("apps/web/node_modules")); - assert!(!ignores.is_match("packages/cli/node_modules")); - - // Should NOT ignore lock files and package.json - assert!(!ignores.is_match("package.json")); - assert!(!ignores.is_match("apps/web/package.json")); - assert!(!ignores.is_match("package-lock.json")); - assert!(!ignores.is_match("apps/web/yarn.lock")); - assert!(!ignores.is_match("pnpm-lock.yaml")); - assert!(!ignores.is_match("node_modules/react/package.json")); - - Ok(()) - } - - #[test] - fn test_match_ignores_with_file_patterns() -> Result<(), Error> { - let patterns = vec!["*.log", "**/*.tmp", "!important.log"]; - let ignores = GlobPatternSet::new(&patterns)?; - - // Should ignore matching files - assert!(ignores.is_match("debug.log")); - assert!(ignores.is_match("error.log")); - assert!(ignores.is_match("temp/file.tmp")); - assert!(ignores.is_match("deep/nested/path/cache.tmp")); - #[expect(clippy::disallowed_types, reason = "wax::Glob::is_match requires std::path::Path")] - { - assert!(ignores.is_match(String::from("deep/nested/path/cache.tmp"))); - assert!(ignores.is_match(Path::new("deep/nested/path/cache.tmp"))); - } - - // Should NOT ignore negated patterns - assert!(!ignores.is_match("important.log")); - - // Should NOT ignore non-matching files - assert!(!ignores.is_match("file.txt")); - assert!(!ignores.is_match("logs/file.txt")); - - Ok(()) - } - - #[test] - fn test_match_ignores_directory_patterns() -> Result<(), Error> { - let patterns = vec!["dist/**", "build/**", "!dist/public/**"]; - let ignores = GlobPatternSet::new(&patterns)?; - - // Should ignore paths in dist and build - assert!(ignores.is_match("dist/bundle.js")); - assert!(ignores.is_match("dist/assets/style.css")); - assert!(ignores.is_match("build/output.js")); - assert!(ignores.is_match("build/assets/image.png")); - - // Should NOT ignore negated paths - assert!(!ignores.is_match("dist/public/index.html")); - assert!(!ignores.is_match("dist/public/assets/logo.png")); - - // Should NOT ignore paths outside target directories - assert!(!ignores.is_match("src/index.js")); - assert!(!ignores.is_match("public/index.html")); - - Ok(()) - } - - #[test] - fn test_match_ignores_complex_patterns() -> Result<(), Error> { - let patterns = vec![ - "**/*.test.js", - "**/*.spec.ts", - "**/test/**", - "**/tests/**", - "!**/integration/tests/**", - ]; - let ignores = GlobPatternSet::new(&patterns)?; - - // Should ignore test files - assert!(ignores.is_match("src/utils.test.js")); - assert!(ignores.is_match("components/Button.spec.ts")); - assert!(ignores.is_match("lib/test/helper.js")); - assert!(ignores.is_match("src/tests/unit/math.js")); - - // Should NOT ignore negated patterns - assert!(!ignores.is_match("integration/tests/e2e.js")); - assert!(!ignores.is_match("integration/tests/api/user.js")); - - // Should NOT ignore non-test files - assert!(!ignores.is_match("src/index.js")); - assert!(!ignores.is_match("lib/utils.js")); - - Ok(()) - } - - #[test] - fn test_match_ignores_empty_patterns() -> Result<(), Error> { - let patterns: Vec<&str> = vec![]; - let ignores = GlobPatternSet::new(&patterns)?; - - // Should not ignore anything with empty patterns - assert!(!ignores.is_match("node_modules/package.json")); - assert!(!ignores.is_match("src/index.js")); - assert!(!ignores.is_match("dist/bundle.js")); - - Ok(()) - } - - #[test] - fn test_match_ignores_with_wildcards() -> Result<(), Error> { - let patterns = vec!["*.{js,ts,jsx,tsx}", "!index.js", "!main.ts"]; - let ignores = GlobPatternSet::new(&patterns)?; - - // Should ignore matching extensions - assert!(ignores.is_match("utils.js")); - assert!(ignores.is_match("component.tsx")); - assert!(ignores.is_match("service.ts")); - assert!(ignores.is_match("App.jsx")); - - // Should NOT ignore negated files - assert!(!ignores.is_match("index.js")); - assert!(!ignores.is_match("main.ts")); - - // Should NOT ignore other extensions - assert!(!ignores.is_match("styles.css")); - assert!(!ignores.is_match("data.json")); - - Ok(()) - } - - #[test] - fn test_match_ignores_dotfiles() -> Result<(), Error> { - let patterns = vec![".*", "!.gitignore", "!.env.example"]; - let ignores = GlobPatternSet::new(&patterns)?; - - // Should ignore dotfiles - assert!(ignores.is_match(".env")); - assert!(ignores.is_match(".DS_Store")); - assert!(ignores.is_match(".vscode")); - - // Should NOT ignore negated dotfiles - assert!(!ignores.is_match(".gitignore")); - assert!(!ignores.is_match(".env.example")); - - // Should NOT ignore regular files - assert!(!ignores.is_match("README.md")); - assert!(!ignores.is_match("src/index.js")); - - Ok(()) - } - - #[test] - fn test_match_ignores_root_patterns() -> Result<(), Error> { - // Note: wax doesn't support leading / for root patterns like gitignore - // Using glob patterns that work with wax - let patterns = vec![ - "**/dist", // Match dist at any level - "!dist/public", - "**/node_modules", - ]; - let ignores = GlobPatternSet::new(&patterns)?; - // Patterns match at any level - assert!(ignores.is_match("dist")); - assert!(ignores.is_match("src/dist")); // Also matches nested - - // Negation works - assert!(!ignores.is_match("dist/public")); - - // Node_modules patterns - assert!(ignores.is_match("node_modules")); - assert!(ignores.is_match("src/node_modules")); - assert!(ignores.is_match("packages/app/node_modules")); - - Ok(()) - } - - #[test] - fn test_match_ignores_directory_only_patterns() -> Result<(), Error> { - let patterns = vec![ - "build/**", // Match everything under build - "!build/keep/**", // But not under build/keep - ]; - let ignores = GlobPatternSet::new(&patterns)?; - // Directory patterns - assert!(ignores.is_match("build/output.js")); - assert!(ignores.is_match("build/assets/style.css")); - - // Negated directory - assert!(!ignores.is_match("build/keep/important.txt")); - - Ok(()) - } - - #[test] - fn test_match_ignores_mixed_patterns() -> Result<(), Error> { - let patterns = vec![ - "**/*.log", // Match .log files at any depth - "**/temp/**", - "node_modules/**", - "!**/temp/keep/**", - "!debug.log", - ]; - let ignores = GlobPatternSet::new(&patterns)?; - - // Test various patterns together - assert!(ignores.is_match("error.log")); - assert!(ignores.is_match("src/app.log")); - assert!(!ignores.is_match("debug.log")); // Negated - - assert!(ignores.is_match("temp/file.txt")); - assert!(ignores.is_match("src/temp/cache.dat")); - assert!(!ignores.is_match("temp/keep/important.txt")); // Negated - - assert!(ignores.is_match("node_modules/react/index.js")); - assert!(ignores.is_match("node_modules/@types/node/index.d.ts")); - - assert!(!ignores.is_match("src/index.js")); - assert!(!ignores.is_match("package.json")); - - Ok(()) - } - - #[expect( - clippy::disallowed_types, - reason = "tests that is_match accepts various argument types" - )] - #[test] - fn test_generic_api_with_different_types() -> Result<(), Error> { - use vite_str::Str; - - // Test with Vec<&str> - let patterns_str = vec!["*.log", "!important.log"]; - let ignores_str = GlobPatternSet::new(&patterns_str)?; - assert!(ignores_str.is_match("debug.log")); - assert!(!ignores_str.is_match("important.log")); - - // Test with Vec - let patterns_string = vec![String::from("*.tmp"), String::from("!keep.tmp")]; - let ignores_string = GlobPatternSet::new(&patterns_string)?; - assert!(ignores_string.is_match("temp.tmp")); - assert!(!ignores_string.is_match("keep.tmp")); - - // Test with Vec - let patterns_vite_str = vec![Str::from("*.rs"), Str::from("!main.rs")]; - let ignores_vite_str = GlobPatternSet::new(&patterns_vite_str)?; - assert!(ignores_vite_str.is_match("lib.rs")); - assert!(!ignores_vite_str.is_match("main.rs")); - - // Test with array - let patterns_array = ["build/**", "!build/dist/**"]; - let ignores_array = GlobPatternSet::new(&patterns_array)?; - assert!(ignores_array.is_match("build/src/main.js")); - assert!(!ignores_array.is_match("build/dist/bundle.js")); - - // Test with iterator - let patterns_iter = ["*.md", "!README.md"].iter(); - let ignores_iter = GlobPatternSet::new(patterns_iter)?; - assert!(ignores_iter.is_match("CHANGELOG.md")); - assert!(!ignores_iter.is_match("README.md")); - - Ok(()) - } - - #[test] - fn test_match_ignores_last_matching_pattern() -> Result<(), Error> { - // Test that the last matching pattern wins (gitignore semantics) - let patterns = vec![ - "logs/**", // First: ignore everything in logs/ - "!logs/important.log", // Second: don't ignore important.log - "logs/important.log", // Third: ignore important.log again (this wins) - ]; - let ignores = GlobPatternSet::new(&patterns)?; - - assert!(ignores.is_match("logs/error.log")); - assert!(ignores.is_match("logs/src/app.log")); - assert!(ignores.is_match("logs/debug.log")); - // The last pattern "logs/important.log" (positive) wins over "!logs/important.log" (negative) - assert!(ignores.is_match("logs/important.log")); // Should be ignored! - - Ok(()) - } -} +//! Glob matching, split by use case: +//! +//! - [`mod@env`] — environment-variable **name** matching (flat strings), +//! backed by `globset` with path semantics disabled. +//! - [`mod@path`] — filesystem **path** matching with gitignore semantics, +//! backed by `wax`. +//! +//! Each module owns its own error type ([`env::EnvGlobError`] / +//! [`path::PathGlobError`]). + +pub mod env; +pub mod path; diff --git a/crates/vite_glob/src/path.rs b/crates/vite_glob/src/path.rs new file mode 100644 index 000000000..330e614fd --- /dev/null +++ b/crates/vite_glob/src/path.rs @@ -0,0 +1,375 @@ +//! Glob matching for filesystem **paths** with gitignore semantics. +//! +//! Backed by `wax`. `!`-prefixed patterns negate. With no negation present, +//! first-match-wins; with any negation, last-match-wins (gitignore). + +#[expect(clippy::disallowed_types, reason = "wax::Glob::is_match requires std::path::Path")] +use std::path::Path; + +use wax::{Glob, Program}; + +/// Error compiling a path glob pattern. +#[derive(Debug, thiserror::Error)] +#[error(transparent)] +pub struct PathGlobError(#[from] wax::BuildError); + +/// Matches filesystem paths against an ordered set of glob patterns. +/// +/// If there are no negated patterns, it will follow the first match wins semantics. +/// Otherwise, it will follow the last match wins semantics. +#[derive(Debug)] +pub struct PathGlobSet<'a> { + /// (`glob_pattern`, `match_or_not`) + patterns: Vec<(Glob<'a>, bool)>, + has_negated: bool, +} + +impl<'a> PathGlobSet<'a> { + /// # Errors + /// Returns an error if any glob pattern is invalid. + pub fn new(match_patterns: I) -> Result + where + I: IntoIterator, + S: AsRef + 'a + ?Sized, + { + let mut patterns = Vec::new(); + let mut has_negated = false; + for pattern in match_patterns { + let pattern_str = pattern.as_ref(); + if let Some(negated) = pattern_str.strip_prefix('!') { + // negated pattern, ignore the path + patterns.push((Glob::new(negated)?, false)); + // set to true to follow last match wins semantics + has_negated = true; + } else { + // positive pattern, match the path + patterns.push((Glob::new(pattern_str)?, true)); + } + } + Ok(Self { patterns, has_negated }) + } + + #[expect(clippy::disallowed_types, reason = "wax::Glob::is_match requires std::path::Path")] + pub fn is_match(&self, path: impl AsRef) -> bool { + let mut should_match = false; // Default: don't match + for (glob, match_or_not) in &self.patterns { + if glob.is_match(path.as_ref()) { + should_match = *match_or_not; + if !self.has_negated { + // first match wins semantics + break; + } + } + } + should_match + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_match_ignores_node_modules() -> Result<(), PathGlobError> { + let patterns = vec![ + // ignore all paths + "**/*", + // keep node_modules directories themselves + "!**/node_modules", + "!node_modules", + // keep lock files and package.json + "!**/package.json", + "!**/package-lock.json", + "!**/yarn.lock", + "!**/pnpm-lock.yaml", + ]; + let ignores = PathGlobSet::new(&patterns)?; + + // Should ignore paths inside node_modules + assert!(ignores.is_match("node_modules/react/index.js")); + assert!(ignores.is_match("apps/web/node_modules/react/index.js")); + assert!(ignores.is_match("packages/cli/node_modules/@types/node/index.d.ts")); + + // Should ignore paths outside node_modules + assert!(ignores.is_match("src/index.js")); + assert!(ignores.is_match("tsbuildinfo.json")); + + // Should NOT ignore node_modules directories themselves (due to negation) + assert!(!ignores.is_match("node_modules")); + assert!(!ignores.is_match("apps/web/node_modules")); + assert!(!ignores.is_match("packages/cli/node_modules")); + + // Should NOT ignore lock files and package.json + assert!(!ignores.is_match("package.json")); + assert!(!ignores.is_match("apps/web/package.json")); + assert!(!ignores.is_match("package-lock.json")); + assert!(!ignores.is_match("apps/web/yarn.lock")); + assert!(!ignores.is_match("pnpm-lock.yaml")); + assert!(!ignores.is_match("node_modules/react/package.json")); + + Ok(()) + } + + #[test] + fn test_match_ignores_with_file_patterns() -> Result<(), PathGlobError> { + let patterns = vec!["*.log", "**/*.tmp", "!important.log"]; + let ignores = PathGlobSet::new(&patterns)?; + + // Should ignore matching files + assert!(ignores.is_match("debug.log")); + assert!(ignores.is_match("error.log")); + assert!(ignores.is_match("temp/file.tmp")); + assert!(ignores.is_match("deep/nested/path/cache.tmp")); + #[expect(clippy::disallowed_types, reason = "wax::Glob::is_match requires std::path::Path")] + { + assert!(ignores.is_match(String::from("deep/nested/path/cache.tmp"))); + assert!(ignores.is_match(Path::new("deep/nested/path/cache.tmp"))); + } + + // Should NOT ignore negated patterns + assert!(!ignores.is_match("important.log")); + + // Should NOT ignore non-matching files + assert!(!ignores.is_match("file.txt")); + assert!(!ignores.is_match("logs/file.txt")); + + Ok(()) + } + + #[test] + fn test_match_ignores_directory_patterns() -> Result<(), PathGlobError> { + let patterns = vec!["dist/**", "build/**", "!dist/public/**"]; + let ignores = PathGlobSet::new(&patterns)?; + + // Should ignore paths in dist and build + assert!(ignores.is_match("dist/bundle.js")); + assert!(ignores.is_match("dist/assets/style.css")); + assert!(ignores.is_match("build/output.js")); + assert!(ignores.is_match("build/assets/image.png")); + + // Should NOT ignore negated paths + assert!(!ignores.is_match("dist/public/index.html")); + assert!(!ignores.is_match("dist/public/assets/logo.png")); + + // Should NOT ignore paths outside target directories + assert!(!ignores.is_match("src/index.js")); + assert!(!ignores.is_match("public/index.html")); + + Ok(()) + } + + #[test] + fn test_match_ignores_complex_patterns() -> Result<(), PathGlobError> { + let patterns = vec![ + "**/*.test.js", + "**/*.spec.ts", + "**/test/**", + "**/tests/**", + "!**/integration/tests/**", + ]; + let ignores = PathGlobSet::new(&patterns)?; + + // Should ignore test files + assert!(ignores.is_match("src/utils.test.js")); + assert!(ignores.is_match("components/Button.spec.ts")); + assert!(ignores.is_match("lib/test/helper.js")); + assert!(ignores.is_match("src/tests/unit/math.js")); + + // Should NOT ignore negated patterns + assert!(!ignores.is_match("integration/tests/e2e.js")); + assert!(!ignores.is_match("integration/tests/api/user.js")); + + // Should NOT ignore non-test files + assert!(!ignores.is_match("src/index.js")); + assert!(!ignores.is_match("lib/utils.js")); + + Ok(()) + } + + #[test] + fn test_match_ignores_empty_patterns() -> Result<(), PathGlobError> { + let patterns: Vec<&str> = vec![]; + let ignores = PathGlobSet::new(&patterns)?; + + // Should not ignore anything with empty patterns + assert!(!ignores.is_match("node_modules/package.json")); + assert!(!ignores.is_match("src/index.js")); + assert!(!ignores.is_match("dist/bundle.js")); + + Ok(()) + } + + #[test] + fn test_match_ignores_with_wildcards() -> Result<(), PathGlobError> { + let patterns = vec!["*.{js,ts,jsx,tsx}", "!index.js", "!main.ts"]; + let ignores = PathGlobSet::new(&patterns)?; + + // Should ignore matching extensions + assert!(ignores.is_match("utils.js")); + assert!(ignores.is_match("component.tsx")); + assert!(ignores.is_match("service.ts")); + assert!(ignores.is_match("App.jsx")); + + // Should NOT ignore negated files + assert!(!ignores.is_match("index.js")); + assert!(!ignores.is_match("main.ts")); + + // Should NOT ignore other extensions + assert!(!ignores.is_match("styles.css")); + assert!(!ignores.is_match("data.json")); + + Ok(()) + } + + #[test] + fn test_match_ignores_dotfiles() -> Result<(), PathGlobError> { + let patterns = vec![".*", "!.gitignore", "!.env.example"]; + let ignores = PathGlobSet::new(&patterns)?; + + // Should ignore dotfiles + assert!(ignores.is_match(".env")); + assert!(ignores.is_match(".DS_Store")); + assert!(ignores.is_match(".vscode")); + + // Should NOT ignore negated dotfiles + assert!(!ignores.is_match(".gitignore")); + assert!(!ignores.is_match(".env.example")); + + // Should NOT ignore regular files + assert!(!ignores.is_match("README.md")); + assert!(!ignores.is_match("src/index.js")); + + Ok(()) + } + + #[test] + fn test_match_ignores_root_patterns() -> Result<(), PathGlobError> { + // Note: wax doesn't support leading / for root patterns like gitignore + // Using glob patterns that work with wax + let patterns = vec![ + "**/dist", // Match dist at any level + "!dist/public", + "**/node_modules", + ]; + let ignores = PathGlobSet::new(&patterns)?; + // Patterns match at any level + assert!(ignores.is_match("dist")); + assert!(ignores.is_match("src/dist")); // Also matches nested + + // Negation works + assert!(!ignores.is_match("dist/public")); + + // Node_modules patterns + assert!(ignores.is_match("node_modules")); + assert!(ignores.is_match("src/node_modules")); + assert!(ignores.is_match("packages/app/node_modules")); + + Ok(()) + } + + #[test] + fn test_match_ignores_directory_only_patterns() -> Result<(), PathGlobError> { + let patterns = vec![ + "build/**", // Match everything under build + "!build/keep/**", // But not under build/keep + ]; + let ignores = PathGlobSet::new(&patterns)?; + // Directory patterns + assert!(ignores.is_match("build/output.js")); + assert!(ignores.is_match("build/assets/style.css")); + + // Negated directory + assert!(!ignores.is_match("build/keep/important.txt")); + + Ok(()) + } + + #[test] + fn test_match_ignores_mixed_patterns() -> Result<(), PathGlobError> { + let patterns = vec![ + "**/*.log", // Match .log files at any depth + "**/temp/**", + "node_modules/**", + "!**/temp/keep/**", + "!debug.log", + ]; + let ignores = PathGlobSet::new(&patterns)?; + + // Test various patterns together + assert!(ignores.is_match("error.log")); + assert!(ignores.is_match("src/app.log")); + assert!(!ignores.is_match("debug.log")); // Negated + + assert!(ignores.is_match("temp/file.txt")); + assert!(ignores.is_match("src/temp/cache.dat")); + assert!(!ignores.is_match("temp/keep/important.txt")); // Negated + + assert!(ignores.is_match("node_modules/react/index.js")); + assert!(ignores.is_match("node_modules/@types/node/index.d.ts")); + + assert!(!ignores.is_match("src/index.js")); + assert!(!ignores.is_match("package.json")); + + Ok(()) + } + + #[expect( + clippy::disallowed_types, + reason = "tests that is_match accepts various argument types" + )] + #[test] + fn test_generic_api_with_different_types() -> Result<(), PathGlobError> { + use vite_str::Str; + + // Test with Vec<&str> + let patterns_str = vec!["*.log", "!important.log"]; + let ignores_str = PathGlobSet::new(&patterns_str)?; + assert!(ignores_str.is_match("debug.log")); + assert!(!ignores_str.is_match("important.log")); + + // Test with Vec + let patterns_string = vec![String::from("*.tmp"), String::from("!keep.tmp")]; + let ignores_string = PathGlobSet::new(&patterns_string)?; + assert!(ignores_string.is_match("temp.tmp")); + assert!(!ignores_string.is_match("keep.tmp")); + + // Test with Vec + let patterns_vite_str = vec![Str::from("*.rs"), Str::from("!main.rs")]; + let ignores_vite_str = PathGlobSet::new(&patterns_vite_str)?; + assert!(ignores_vite_str.is_match("lib.rs")); + assert!(!ignores_vite_str.is_match("main.rs")); + + // Test with array + let patterns_array = ["build/**", "!build/dist/**"]; + let ignores_array = PathGlobSet::new(&patterns_array)?; + assert!(ignores_array.is_match("build/src/main.js")); + assert!(!ignores_array.is_match("build/dist/bundle.js")); + + // Test with iterator + let patterns_iter = ["*.md", "!README.md"].iter(); + let ignores_iter = PathGlobSet::new(patterns_iter)?; + assert!(ignores_iter.is_match("CHANGELOG.md")); + assert!(!ignores_iter.is_match("README.md")); + + Ok(()) + } + + #[test] + fn test_match_ignores_last_matching_pattern() -> Result<(), PathGlobError> { + // Test that the last matching pattern wins (gitignore semantics) + let patterns = vec![ + "logs/**", // First: ignore everything in logs/ + "!logs/important.log", // Second: don't ignore important.log + "logs/important.log", // Third: ignore important.log again (this wins) + ]; + let ignores = PathGlobSet::new(&patterns)?; + + assert!(ignores.is_match("logs/error.log")); + assert!(ignores.is_match("logs/src/app.log")); + assert!(ignores.is_match("logs/debug.log")); + // The last pattern "logs/important.log" (positive) wins over "!logs/important.log" (negative) + assert!(ignores.is_match("logs/important.log")); // Should be ignored! + + Ok(()) + } +} diff --git a/crates/vite_task_bin/tests/e2e_snapshots/fixtures/env_negation/package.json b/crates/vite_task_bin/tests/e2e_snapshots/fixtures/env_negation/package.json new file mode 100644 index 000000000..0967ef424 --- /dev/null +++ b/crates/vite_task_bin/tests/e2e_snapshots/fixtures/env_negation/package.json @@ -0,0 +1 @@ +{} diff --git a/crates/vite_task_bin/tests/e2e_snapshots/fixtures/env_negation/snapshots.toml b/crates/vite_task_bin/tests/e2e_snapshots/fixtures/env_negation/snapshots.toml new file mode 100644 index 000000000..a24b8ff31 --- /dev/null +++ b/crates/vite_task_bin/tests/e2e_snapshots/fixtures/env_negation/snapshots.toml @@ -0,0 +1,19 @@ +[[e2e]] +name = "env_negation_excludes_matching_var" +comment = """A `!`-prefixed env pattern excludes matching variables. With `env: ["PROBE_*", "!PROBE_SECRET"]`, the task receives PROBE_PUBLIC but not PROBE_SECRET — the negation filters it out — so print-env reports it undefined.""" +steps = [ + { argv = [ + "vt", + "run", + "print", + ], envs = [ + [ + "PROBE_PUBLIC", + "public-value", + ], + [ + "PROBE_SECRET", + "secret-value", + ], + ], comment = "PROBE_SECRET is filtered out by !PROBE_SECRET" }, +] diff --git a/crates/vite_task_bin/tests/e2e_snapshots/fixtures/env_negation/snapshots/env_negation_excludes_matching_var.md b/crates/vite_task_bin/tests/e2e_snapshots/fixtures/env_negation/snapshots/env_negation_excludes_matching_var.md new file mode 100644 index 000000000..739deb99f --- /dev/null +++ b/crates/vite_task_bin/tests/e2e_snapshots/fixtures/env_negation/snapshots/env_negation_excludes_matching_var.md @@ -0,0 +1,18 @@ +# env_negation_excludes_matching_var + +A `!`-prefixed env pattern excludes matching variables. With `env: ["PROBE_*", "!PROBE_SECRET"]`, the task receives PROBE_PUBLIC but not PROBE_SECRET — the negation filters it out — so print-env reports it undefined. + +## `PROBE_PUBLIC=public-value PROBE_SECRET=secret-value vt run print` + +PROBE_SECRET is filtered out by !PROBE_SECRET + +``` +$ vtt print-env PROBE_PUBLIC +public-value + +$ vtt print-env PROBE_SECRET +(undefined) + +--- +vt run: 0/2 cache hit (0%). (Run `vt run --last-details` for full details) +``` diff --git a/crates/vite_task_bin/tests/e2e_snapshots/fixtures/env_negation/vite-task.json b/crates/vite_task_bin/tests/e2e_snapshots/fixtures/env_negation/vite-task.json new file mode 100644 index 000000000..ffb3041ba --- /dev/null +++ b/crates/vite_task_bin/tests/e2e_snapshots/fixtures/env_negation/vite-task.json @@ -0,0 +1,10 @@ +{ + "cache": true, + "tasks": { + "print": { + "command": "vtt print-env PROBE_PUBLIC && vtt print-env PROBE_SECRET", + "env": ["PROBE_*", "!PROBE_SECRET"], + "cache": true + } + } +} diff --git a/crates/vite_task_plan/src/envs.rs b/crates/vite_task_plan/src/envs.rs index 95e410af0..dc110ead8 100644 --- a/crates/vite_task_plan/src/envs.rs +++ b/crates/vite_task_plan/src/envs.rs @@ -3,7 +3,7 @@ use std::{collections::BTreeMap, ffi::OsStr, fmt::Write as _, mem::MaybeUninit, use rustc_hash::FxHashMap; use serde::{Deserialize, Serialize}; use sha2::{Digest as _, Sha256}; -use vite_glob::GlobPatternSet; +use vite_glob::env::EnvGlobSet; use vite_str::Str; use vite_task_graph::config::EnvConfig; use wincode::{ @@ -64,7 +64,7 @@ pub enum ResolveEnvError { GlobError { #[source] #[from] - glob_error: vite_glob::Error, + glob_error: vite_glob::env::EnvGlobError, }, #[error("Env value is not valid unicode: {key} = {value:?}")] @@ -107,10 +107,8 @@ impl EnvFingerprints { // Resolve fingerprinted envs let mut fingerprinted_envs = BTreeMap::>::new(); if !env_config.fingerprinted_envs.is_empty() { - let fingerprinted_env_patterns = GlobPatternSet::new( - env_config.fingerprinted_envs.iter().filter(|s| !s.starts_with('!')), - )?; - let sensitive_patterns = GlobPatternSet::new(SENSITIVE_PATTERNS)?; + let fingerprinted_env_patterns = EnvGlobSet::new(env_config.fingerprinted_envs.iter())?; + let sensitive_patterns = EnvGlobSet::new(SENSITIVE_PATTERNS.iter())?; for (name, value) in all_envs.iter() { let Some(name) = name.to_str() else { continue; @@ -161,20 +159,8 @@ impl EnvFingerprints { fn resolve_envs_with_patterns<'a>( env_vars: impl Iterator, &'a Arc)>, patterns: &[&str], -) -> Result, Arc>, vite_glob::Error> { - let patterns = GlobPatternSet::new(patterns.iter().filter(|pattern| { - if pattern.starts_with('!') { - // FIXME: use better way to print warning log - // Or parse and validate TaskConfig in command parsing phase - tracing::warn!( - "env pattern starts with '!' is not supported, will be ignored: {}", - pattern - ); - false - } else { - true - } - }))?; +) -> Result, Arc>, vite_glob::env::EnvGlobError> { + let patterns = EnvGlobSet::new(patterns.iter())?; let envs: FxHashMap, Arc> = env_vars .filter_map(|(name, value)| { let name_str = name.as_ref().to_str()?; diff --git a/crates/vite_workspace/src/error.rs b/crates/vite_workspace/src/error.rs index 36aee086f..bd0b6f766 100644 --- a/crates/vite_workspace/src/error.rs +++ b/crates/vite_workspace/src/error.rs @@ -56,7 +56,7 @@ pub enum Error { WaxWalk(#[from] wax::walk::WalkError), #[error(transparent)] - Glob(#[from] vite_glob::Error), + Glob(#[from] vite_glob::path::PathGlobError), } impl From> for Error { diff --git a/crates/vite_workspace/src/lib.rs b/crates/vite_workspace/src/lib.rs index a5f170a9c..3b22d8bc9 100644 --- a/crates/vite_workspace/src/lib.rs +++ b/crates/vite_workspace/src/lib.rs @@ -10,7 +10,7 @@ use petgraph::graph::{DefaultIx, DiGraph, EdgeIndex, IndexType, NodeIndex}; use rustc_hash::{FxHashMap as HashMap, FxHashSet as HashSet}; use serde::Deserialize; use vec1::smallvec_v1::SmallVec1; -use vite_glob::GlobPatternSet; +use vite_glob::path::PathGlobSet; use vite_path::{AbsolutePath, AbsolutePathBuf, RelativePathBuf}; use vite_str::Str; use wax::{Glob, walk::Entry as _}; @@ -107,7 +107,7 @@ impl WorkspaceMemberGlobs { } all.push(pattern); } - let glob_patterns = if has_negated { Some(GlobPatternSet::new(&all)?) } else { None }; + let glob_patterns = if has_negated { Some(PathGlobSet::new(&all)?) } else { None }; // TODO: parallelize this for inclusion in inclusions {