diff --git a/Cargo.lock b/Cargo.lock index 3fabe57..439bcde 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -105,6 +105,18 @@ dependencies = [ "derive_arbitrary", ] +[[package]] +name = "async-compression" +version = "0.4.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e79b3f8a79cccc2898f31920fc69f304859b3bd567490f75ebf51ae1c792a9ac" +dependencies = [ + "compression-codecs", + "compression-core", + "pin-project-lite", + "tokio", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -287,6 +299,23 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +[[package]] +name = "compression-codecs" +version = "0.4.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce2548391e9c1929c21bf6aa2680af86fe4c1b33e6cea9ac1cfeec0bd11218cf" +dependencies = [ + "compression-core", + "flate2", + "memchr", +] + +[[package]] +name = "compression-core" +version = "0.4.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc14f565cf027a105f7a44ccf9e5b424348421a1d8952a8fc9d499d313107789" + [[package]] name = "constant_time_eq" version = "0.3.1" @@ -360,6 +389,7 @@ dependencies = [ "quick-xml", "regex", "reqwest", + "semver", "serde", "serde_derive", "serde_json", @@ -1626,6 +1656,7 @@ version = "0.12.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f" dependencies = [ + "async-compression", "base64", "bytes", "cookie", @@ -1652,6 +1683,7 @@ dependencies = [ "sync_wrapper", "tokio", "tokio-native-tls", + "tokio-util", "tower", "tower-http", "tower-service", @@ -1761,6 +1793,12 @@ dependencies = [ "libc", ] +[[package]] +name = "semver" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" + [[package]] name = "serde" version = "1.0.228" diff --git a/Cargo.toml b/Cargo.toml index 044fecd..9c07d83 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,6 +24,7 @@ dirs = "5.0.1" reqwest = { version = "0.12.23", default-features = false, features = [ "blocking", "cookies", + "gzip", "json", "multipart", "native-tls", @@ -32,6 +33,7 @@ reqwest = { version = "0.12.23", default-features = false, features = [ toml = "0.8.8" log = "0.4" env_logger = "0.11" +semver = "1" serde = { version = "1.0.195", features = ["derive"] } serde_json = "1.0.111" serde_derive = "1.0.195" diff --git a/skills/corgea/SKILL.md b/skills/corgea/SKILL.md index f23293f..0c6427d 100644 --- a/skills/corgea/SKILL.md +++ b/skills/corgea/SKILL.md @@ -1,6 +1,6 @@ --- name: corgea -description: Scans code for security vulnerabilities using Corgea's AI-powered BLAST scanner and third-party tools, manages findings, and displays AI-generated fixes. Use when the user needs to scan for security issues, upload scan reports, list or inspect vulnerabilities, view fixes, or integrate security scanning into CI/CD. +description: Scans code for security vulnerabilities using Corgea's AI-powered BLAST scanner and third-party tools, gates `pip` and `npm` package installs against vulnerable and suspiciously-recent dependencies (including transitive), manages findings, and displays AI-generated fixes. Use when the user needs to install pip/npm packages safely, scan for security issues, upload scan reports, list or inspect vulnerabilities, view fixes, or integrate security scanning into CI/CD. allowed-tools: Shell, Read, Grep, Glob, StrReplace --- @@ -131,6 +131,77 @@ Agent environments default to compact TSV; force output with `--format human|age Notes: `deps scan --out-format table|json|sarif` is the report/export selector; do not combine it with `deps scan --format`. +### Install Wrappers — `corgea pip|npm ` + +Run a package manager through Corgea's install gate. Install commands with +named targets are resolved against the public registry first, then gated +twice: a version published within `--threshold` (default `2d`) blocks +(exit 1), and each resolved version is checked against Corgea's vuln-api — +known-vulnerable or malicious versions block. CVE checks are public and need +no token; vuln-api lookup outages warn and continue (fail-open). Everything +else passes through with the package manager's own exit code. Git/URL/path +specs (including `pip install .`, PEP 508 `name @ url` direct references, and +npm GitHub shorthand `user/repo`) are noted, never blocked. The install verb +is found behind global flags (`npm --loglevel silent install x` is still +gated). Bare `npm install` (zero specs, project `package.json` found like npm +finds it — nearest ancestor) is gated too: the full lockfile-resolved tree is +verdicted, so a vulnerable lockfile blocks. `npm ci` (and aliases) is gated +from the project lockfile directly. + +The vuln check covers the **full would-install set**, not just the named +targets: `pip` and `npm` resolve the complete tree (named + transitive) via a +safe dry-run (`pip install --dry-run …`; an isolated +`npm install --package-lock-only` in a temp dir, never touching your +lockfile); every resolved package is verdicted, so a flagged **transitive** +dependency blocks the install too, labeled by provenance (`(transitive)`, +`(from requirements)`, `(already in package.json)`, `(locked)`). Whenever a +dry-run fails or an npm flag redirects the project root (`--prefix`, `-g`), +the gate falls back to named-only and prints +`warning: transitive dependencies not checked (…); only named packages were verified.` +— for pip, entries of `-r requirements.txt` files are still parsed and +verified in that fallback. Verdict requests run in a bounded pool +(8 parallel). + +Wrapper flags (`--force`, `--no-fail`, `-t`) are read between the manager +name and the install verb (`corgea npm --force install x`); flags after the +verb belong to the package manager and are forwarded untouched. + +Blocked findings steer to the fix: each advisory line shows +`fixed in ` (or `no fixed version known`). When every advisory on a +package has a fix, the gate prints `→ safe version: @` — the +highest fix covering every advisory. Install that version instead. + +```bash +corgea pip install requests==2.31.0 # resolves, checks recency + vuln verdict, then runs pip +corgea npm install axios@^1.0.0 # same gate for npm ranges +corgea pip --no-fail install newpkg # demote a recency block to a warning (vuln blocks still apply) +corgea pip --force install badpkg # print findings but install anyway (overrides every block) +corgea pip list # non-install subcommands pass straight through +``` + +| Flag | Short | Description | +|------|-------|-------------| +| `--threshold` | `-t` | Recency threshold (`2d`, `12h`). Younger resolved versions block. | +| `--no-fail` | | Demote a recency block to a warning. Does NOT bypass vulnerable blocks. | +| `--force` | | Proceed despite all findings (vulnerable, recent). Findings still print. | + +Overrides for testing: `CORGEA_PYPI_REGISTRY`, `CORGEA_NPM_REGISTRY`, +`CORGEA_VULN_API_URL`. + +#### Limitations + +The gate is a wrapper, not an enforcement boundary. By design it cannot catch: + +- **Direct invocation** — running the package manager itself (`pip`, `npm`, + `python -m pip`) skips the gate entirely. +- **Custom indexes/registries** — `--index-url`, `--registry`, and `.npmrc`/ + `pip.conf` overrides change where packages resolve from. The gate still + verdicts each `name@version`, but it cannot vouch that a substituted + registry serves the same artifact those advisories describe. +- **Named-only fallback** — when a dry-run fails (old pip, broken resolution) + or `--prefix`/`-g` redirects npm's root, transitive dependencies install + unchecked behind the printed warning. + ## Common Workflows ### Scan full project diff --git a/src/config.rs b/src/config.rs index f8d7db0..1196c25 100644 --- a/src/config.rs +++ b/src/config.rs @@ -2,6 +2,8 @@ use serde::{Deserialize, Serialize}; use std::path::PathBuf; use std::{env, fs, io}; +pub const DEFAULT_VULN_API_URL: &str = "https://cve-worker-staging.corgea.workers.dev"; + #[derive(Serialize, Deserialize, Clone)] pub struct Config { pub(crate) url: String, @@ -119,3 +121,39 @@ impl Config { self.default_agent.clone() } } + +/// Base URL for the vuln-api service: `CORGEA_VULN_API_URL` env var, +/// then the public default. Pure env/constant — no config file field. +pub fn vuln_api_url() -> String { + resolve_vuln_api_url(crate::utils::generic::get_env_var_if_exists( + "CORGEA_VULN_API_URL", + )) +} + +/// Pure resolution rule, split out so tests never mutate process-global +/// env (`set_var` races concurrent `getenv` under the parallel harness). +fn resolve_vuln_api_url(override_url: Option) -> String { + override_url + .unwrap_or_else(|| DEFAULT_VULN_API_URL.to_string()) + .trim() + .trim_end_matches('/') + .to_string() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn vuln_api_url_resolution_order() { + // Default when the env var is unset (`get_env_var_if_exists` + // already maps empty/whitespace-only values to None). + assert_eq!(resolve_vuln_api_url(None), DEFAULT_VULN_API_URL); + + // Override wins; whitespace and trailing slash trimmed. + assert_eq!( + resolve_vuln_api_url(Some(" https://env.example.com/ ".to_string())), + "https://env.example.com" + ); + } +} diff --git a/src/deps/ecosystems/npm.rs b/src/deps/ecosystems/npm.rs index edbc7cf..1fd8d91 100644 --- a/src/deps/ecosystems/npm.rs +++ b/src/deps/ecosystems/npm.rs @@ -312,7 +312,11 @@ fn parse_npm_lock(path: &Path) -> Result, DepsError Ok(out) } -fn package_name_from_lock_key(key: &str) -> &str { +/// Package name from a lockfile `packages` key: the path after the last +/// `node_modules/` (or the whole key), truncated to one component — two for +/// scoped names. Also shared with the install gate's lockfile parse +/// (`precheck::tree`). +pub(crate) fn package_name_from_lock_key(key: &str) -> &str { let package_path = key .rsplit_once("node_modules/") .map(|(_, name)| name) diff --git a/src/lib.rs b/src/lib.rs index 2f8423e..498e83d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,6 @@ pub mod deps; +pub mod precheck; +pub mod verify_deps; // Also declared in the binary crate (src/main.rs); re-declared here so library modules // (e.g. vuln_api) can use `crate::log::debug`. src/log.rs is a thin `::log` facade that // compiles cleanly in both crates. diff --git a/src/main.rs b/src/main.rs index 549ca1c..2669a74 100644 --- a/src/main.rs +++ b/src/main.rs @@ -211,6 +211,57 @@ enum Commands { #[command(subcommand)] command: corgea::deps::run::DepsSubcommand, }, + /// Wrap `npm` commands: gate install targets on recency + vuln verdicts, then run npm. + Npm(InstallWrapArgs), + /// Wrap `pip` commands: gate install targets on recency + vuln verdicts, then run pip. + Pip(InstallWrapArgs), +} + +/// Shared flags for the install-wrapper subcommands (`corgea npm|pip`). +#[derive(clap::Args, Debug, Clone)] +struct InstallWrapArgs { + #[arg( + long, + short = 't', + default_value = "2d", + value_parser = corgea::verify_deps::parse_threshold, + help = "Recency threshold. Resolved versions younger than this are blocked. e.g. '2d', '12h'." + )] + threshold: std::time::Duration, + + #[arg( + long, + help = "Demote a recency block to a printed warning. The install still runs." + )] + no_fail: bool, + + #[arg( + long, + help = "Proceed with the install despite vulnerable or recent findings. Findings are still printed." + )] + force: bool, + + /// Arguments forwarded to the package manager (subcommand and package specs). + #[arg(trailing_var_arg = true, allow_hyphen_values = true)] + cmd: Vec, +} + +fn install_wrap_options(args: &InstallWrapArgs) -> corgea::precheck::PrecheckOptions { + corgea::precheck::PrecheckOptions { + threshold: args.threshold, + no_fail: args.no_fail, + force: args.force, + verdict: Some(corgea::precheck::VerdictConfig { + base_url: config::vuln_api_url(), + }), + npm_registry: utils::generic::get_env_var_if_exists("CORGEA_NPM_REGISTRY"), + pypi_registry: utils::generic::get_env_var_if_exists("CORGEA_PYPI_REGISTRY"), + } +} + +fn run_install_wrap_command(manager: corgea::precheck::PackageManager, args: &InstallWrapArgs) { + let code = corgea::precheck::run_install(manager, &args.cmd, install_wrap_options(args)); + std::process::exit(code); } #[derive(Subcommand, Debug)] @@ -584,7 +635,19 @@ fn main() { // Offline: no token / network. Exit code propagates fail-on policy. std::process::exit(i32::from(corgea::deps::run::run(command.clone()))); } + // Install wrappers: no auth gate. Public CVE checks run without a + // token and fail open on lookup outages. + Some(Commands::Npm(args)) => { + run_install_wrap_command(corgea::precheck::PackageManager::Npm, args) + } + Some(Commands::Pip(args)) => { + run_install_wrap_command(corgea::precheck::PackageManager::Pip, args) + } None => { + if let Some(message) = corgea::precheck::pip3_alias_message(&cli.args) { + eprintln!("{message}"); + std::process::exit(1); + } utils::terminal::show_welcome_message(); let _ = Cli::command().print_help(); println!(); diff --git a/src/precheck/exec.rs b/src/precheck/exec.rs new file mode 100644 index 0000000..2e86cda --- /dev/null +++ b/src/precheck/exec.rs @@ -0,0 +1,65 @@ +//! Resolve and exec the real package manager, forwarding args and exit codes. + +use std::ffi::OsString; +use std::process::Command; + +use super::PackageManager; + +pub(super) fn exec_install_with_args( + manager: PackageManager, + subcommand: &str, + rest: &[String], +) -> i32 { + let mut full = Vec::with_capacity(rest.len() + 1); + full.push(subcommand.to_string()); + full.extend(rest.iter().cloned()); + exec_command(manager.binary_name(), &full) +} + +/// Resolve `binary` on PATH. On Windows this finds `.cmd` shims. pip is the +/// one manager with a conventional alias, so a missing `pip` retries `pip3`. +/// The error names the binary and any fallback tried. +pub(super) fn resolve_binary(binary: &str) -> Result { + if let Ok(p) = which::which(binary) { + return Ok(p); + } + if binary == "pip" { + if let Ok(p) = which::which("pip3") { + return Ok(p); + } + return Err("error: 'pip' not found on PATH (also tried 'pip3')".to_string()); + } + Err(format!("error: '{binary}' not found on PATH")) +} + +pub(super) fn exec_command(binary: &str, args: &[String]) -> i32 { + let resolved = match resolve_binary(binary) { + Ok(p) => p, + Err(msg) => { + eprintln!("{msg}"); + return 127; + } + }; + + let os_args: Vec = args.iter().map(OsString::from).collect(); + + let mut command = Command::new(&resolved); + command.args(&os_args); + match command.status() { + Ok(status) => status.code().unwrap_or_else(|| { + #[cfg(unix)] + { + use std::os::unix::process::ExitStatusExt; + if let Some(sig) = status.signal() { + return 128 + sig; + } + } + 1 + }), + Err(e) => { + // Name the resolved path: it may be the pip3 fallback, not `binary`. + eprintln!("failed to exec {}: {}", resolved.display(), e); + 1 + } + } +} diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs new file mode 100644 index 0000000..15dad1f --- /dev/null +++ b/src/precheck/mod.rs @@ -0,0 +1,1118 @@ +//! Install wrappers: `corgea npm`, `corgea pip`. +//! +//! Wraps an install command from a supported package manager, resolves what +//! the package manager *would* install against the public registry, and +//! either blocks the install or runs it transparently. +//! +//! Two independent blocks: +//! * recency — the resolved version was published within `--threshold` +//! (default `2d`); `--no-fail` demotes this to a warning; +//! * vuln verdict — the vuln-api knows a resolved version (named or +//! transitive) is vulnerable or malicious; only `--force` overrides this. +//! +//! Verdict lookups are public and fail open: a vuln-api outage warns and the +//! install continues. + +mod exec; +mod parse; +mod render; +mod tree; +mod verdict; + +#[cfg(test)] +mod test_support; + +use std::time::Duration; + +use chrono::Utc; + +/// Supported package managers. Each one shares enough behaviour with +/// the others that we only need a small per-manager dispatch. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PackageManager { + Npm, + Pip, +} + +impl PackageManager { + pub fn binary_name(self) -> &'static str { + match self { + PackageManager::Npm => "npm", + PackageManager::Pip => "pip", + } + } + + /// Subcommands that this manager treats as "install something new" + /// — the only ones we need to verify before running. + pub fn is_install_subcommand(self, sub: &str) -> bool { + match self { + // npm's install command accepts a wide alias set (and tolerates + // common typos). Mirror npm's own `lib/utils/cmd-list.js` exactly + // so none falls through to the ungated passthrough. `npm ci` and + // its aliases are gated separately, *before* this check (see + // `run_install`), so they are intentionally absent here. + PackageManager::Npm => matches!( + sub, + "install" + | "i" + | "in" + | "ins" + | "inst" + | "insta" + | "instal" + | "isnt" + | "isnta" + | "isntal" + | "isntall" + | "add" + ), + PackageManager::Pip => matches!(sub, "install"), + } + } + + /// vuln-api ecosystem for this manager's registry. + pub fn ecosystem(self) -> crate::vuln_api::Ecosystem { + match self { + PackageManager::Npm => crate::vuln_api::Ecosystem::Npm, + PackageManager::Pip => crate::vuln_api::Ecosystem::Pypi, + } + } + + /// Canonical package name for dedup/matching across spec spellings — + /// the ecosystem's rule (`vuln_api::Ecosystem::normalize_name`). + /// + /// Invariant: request-time normalization is owned by the vuln-api + /// client (`vuln_api::check_package_version`); comparison sites + /// (`verdict::apply_verdicts` / tree dedup) normalize here. Parsers + /// and resolvers carry raw names. + pub fn normalize_name(self, name: &str) -> String { + self.ecosystem().normalize_name(name) + } +} + +/// Connection details for the vuln-api verdict pass. Lookups are public +/// (no auth) and fail open: known vulnerable/malicious verdicts block, +/// while lookup errors warn and continue. +#[derive(Debug, Clone)] +pub struct VerdictConfig { + pub base_url: String, +} + +/// Threat verdict for one resolved target. +#[derive(Debug, Clone)] +pub enum VerdictStatus { + /// vuln-api answered: no known advisories for this exact version. + Clean, + /// vuln-api answered: known vulnerable or malicious — blocks. + Vulnerable(Vec), + /// The verdict could not be obtained (network/5xx/integrity). + /// Public mode fails open: warns, never blocks. + Unverifiable(String), + /// Verdict never attempted (no `VerdictConfig`). + NotChecked, +} + +impl VerdictStatus { + /// Whether this verdict blocks the install. The single definition of + /// "blocking finding", shared by `verdict::block_reason` and the + /// refusal-blame predicate. + fn blocks(&self) -> bool { + matches!(self, VerdictStatus::Vulnerable(_)) + } +} + +#[derive(Debug, Clone)] +pub struct PrecheckOptions { + pub threshold: Duration, + /// If true, demote a recent finding from "block" to "warn-and-run". + pub no_fail: bool, + /// If true, never block: print findings (recent, vulnerable, + /// unverifiable) and run the install anyway. + pub force: bool, + /// `Some` ⇒ run the vuln-api verdict pass against this endpoint. + /// `None` is retained for tests and direct library callers that want + /// recency-only behavior. + pub verdict: Option, + /// Optional registry overrides, used by tests. + pub npm_registry: Option, + pub pypi_registry: Option, +} + +/// Each item the user (or a `-r` requirements file) asked us to install. +#[derive(Debug, Clone)] +pub struct InstallTarget { + pub name: String, + /// Display form, e.g. `axios@^1.0.0` or `requests==2.31.0`. + pub display: String, + /// What we'll feed into the resolver. + pub kind: TargetKind, +} + +#[derive(Debug, Clone)] +pub enum TargetKind { + Npm(crate::verify_deps::registry::NpmSpec), + Pypi(crate::verify_deps::registry::PypiSpec), + /// Something we can't verify (URL/git/file/path) — we surface this + /// as a warning but never block on it. + Unverifiable { + reason: String, + }, +} + +/// Outcome of resolving + verifying a single target. +#[derive(Debug, Clone)] +pub enum TargetOutcome { + /// Resolved cleanly. The blocking recency condition is derived from + /// `age` against the report's threshold (`PrecheckReport::is_recent`). + Resolved { + target: InstallTarget, + resolved: crate::verify_deps::registry::ResolvedPackage, + age: Duration, + verdict: VerdictStatus, + }, + /// We deliberately couldn't verify this target (URL / git / etc.). + Skipped { + target: InstallTarget, + reason: String, + }, + /// Resolution failed (network, unknown package, bad spec). + Error { + target: InstallTarget, + error: String, + }, +} + +/// Why a tree-pass finding is in the would-install set. Drives the +/// provenance label so a package the user asked for (or already depends on) +/// is never mislabeled "(transitive)". +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum TreeOrigin { + /// Pulled in as a dependency of something else. + Transitive, + /// Explicitly requested (pip report `"requested"` — CLI arg or + /// requirements file; leftovers here come from `-r` files since named + /// CLI targets match a named outcome instead). + Requested, + /// Already a direct dependency in the project's `package.json`. + PreExisting, + /// Pinned by the project's lockfile (`npm ci`). + Locked, +} + +impl TreeOrigin { + fn label(self) -> &'static str { + match self { + TreeOrigin::Transitive => "(transitive)", + TreeOrigin::Requested => "(from requirements)", + TreeOrigin::PreExisting => "(already in package.json)", + TreeOrigin::Locked => "(locked)", + } + } +} + +/// Verdict for one package the tree pass resolved beyond the named targets. +#[derive(Debug)] +pub struct TreeOutcome { + pub name: String, + pub version: String, + pub origin: TreeOrigin, + pub verdict: VerdictStatus, +} + +/// Result of the tree pass. `PrecheckReport.tree` is `None` when the pass +/// never ran (verdicts disabled, or nothing to resolve). +#[derive(Debug)] +pub enum TreeReport { + /// The full would-install set was resolved and verdicted. + Full { + /// Distinct packages the dry-run resolved (named + transitive). + resolved_count: usize, + /// Verdicts for resolved packages beyond the named targets. + transitive: Vec, + }, + /// Resolution unavailable or failed — only named targets were verified. + NamedOnly { reason: String }, +} + +#[derive(Debug)] +pub struct PrecheckReport { + pub manager: PackageManager, + pub subcommand: String, + pub original_args: Vec, + pub outcomes: Vec, + pub threshold: Duration, + /// `None` ⇒ no tree pass ran. + pub tree: Option, + /// True when the command named nothing — no CLI targets and no + /// requirements files — so everything the tree pass resolved predates + /// this command (bare `npm install`). Distinct from + /// `outcomes.is_empty()`: a requirements-only install also has no named + /// outcomes, but its resolved set IS added by the command. + pub bare_install: bool, +} + +impl PrecheckReport { + fn count(&self, pred: impl Fn(&TargetOutcome) -> bool) -> usize { + self.outcomes.iter().filter(|o| pred(o)).count() + } + /// True when this age is within the recency threshold (the blocking + /// condition). The single definition of "recent". + fn is_recent(&self, age: Duration) -> bool { + age < self.threshold + } + pub fn ok_count(&self) -> usize { + self.count(|o| matches!(o, TargetOutcome::Resolved { age, .. } if !self.is_recent(*age))) + } + pub fn recent_count(&self) -> usize { + self.count(|o| matches!(o, TargetOutcome::Resolved { age, .. } if self.is_recent(*age))) + } + /// Every verdict in the report: named (resolved) outcomes, then + /// transitive tree findings. + fn verdicts(&self) -> impl Iterator { + self.named_verdicts().chain(self.tree_verdicts()) + } + /// Verdicts on the named targets this command adds. + fn named_verdicts(&self) -> impl Iterator { + self.outcomes.iter().filter_map(|o| match o { + TargetOutcome::Resolved { verdict, .. } => Some(verdict), + _ => None, + }) + } + /// Verdicts beyond the named targets (the resolved tree). + fn tree_verdicts(&self) -> impl Iterator { + match &self.tree { + Some(TreeReport::Full { transitive, .. }) => transitive.as_slice(), + Some(TreeReport::NamedOnly { .. }) | None => &[], + } + .iter() + .map(|o| &o.verdict) + } + pub fn vulnerable_count(&self) -> usize { + self.verdicts() + .filter(|v| matches!(v, VerdictStatus::Vulnerable(_))) + .count() + } + pub fn unverifiable_count(&self) -> usize { + self.verdicts() + .filter(|v| matches!(v, VerdictStatus::Unverifiable(_))) + .count() + } + /// Vulnerable findings beyond the named targets (the resolved tree). + pub fn tree_vulnerable_count(&self) -> usize { + self.tree_verdicts() + .filter(|v| matches!(v, VerdictStatus::Vulnerable(_))) + .count() + } + /// Unverifiable findings beyond the named targets (the resolved tree). + pub fn tree_unverifiable_count(&self) -> usize { + self.tree_verdicts() + .filter(|v| matches!(v, VerdictStatus::Unverifiable(_))) + .count() + } + pub fn skipped_count(&self) -> usize { + self.count(|o| matches!(o, TargetOutcome::Skipped { .. })) + } + pub fn error_count(&self) -> usize { + self.count(|o| matches!(o, TargetOutcome::Error { .. })) + } +} + +/// Canonical entry for ecosystem commands (`corgea npm install …`). +/// +/// `cmd` is everything after the ecosystem name, e.g. +/// `["install", "axios@^1.0.0", "--save-dev"]`. An empty `cmd` execs the +/// package manager with no arguments. +pub fn run_install(manager: PackageManager, cmd: &[String], opts: PrecheckOptions) -> i32 { + if cmd.is_empty() { + return exec::exec_command(manager.binary_name(), &[]); + } + + // The install verb may follow global flags (`npm --silent install x`); + // route on the first non-flag token so flags-before-verb can't slip + // past the gate ungated. + let Some(verb_idx) = find_subcommand(manager, cmd) else { + return exec::exec_command(manager.binary_name(), cmd); + }; + let subcommand = &cmd[verb_idx]; + let rest_vec: Vec = cmd[..verb_idx] + .iter() + .chain(&cmd[verb_idx + 1..]) + .cloned() + .collect(); + let rest = rest_vec.as_slice(); + + if manager == PackageManager::Pip && subcommand == "add" { + eprintln!("{}", unsupported_pip_add_message(rest)); + return 1; + } + + // `npm ci` installs the lockfile exactly as written — gate it from the + // project lockfile directly. + if manager == PackageManager::Npm + && matches!( + subcommand.as_str(), + "ci" | "ic" | "clean-install" | "install-clean" | "isntall-clean" + ) + { + return run_npm_ci(subcommand, rest, opts); + } + + if !manager.is_install_subcommand(subcommand) { + // Non-install subcommand: transparent passthrough, args untouched. + return exec::exec_command(manager.binary_name(), cmd); + } + + let parsed = match parse::parse_install_args(manager, rest) { + Ok(p) => p, + Err(e) => { + eprintln!("failed to parse install args: {}", e); + return 2; + } + }; + + warn_registry_override(manager, rest, None); + + run_parsed_install( + manager, + subcommand, + rest, + parsed, + || exec::exec_install_with_args(manager, subcommand, rest), + opts, + ) +} + +/// Index of the first non-flag token in `cmd` — the subcommand verb. +/// Skips flag values with the same `takes_value` table as the arg parsers, +/// so `npm --loglevel silent install x` routes on `install`, not `silent`. +/// `None` ⇒ no subcommand at all (flags only, e.g. `npm --version`). +fn find_subcommand(manager: PackageManager, cmd: &[String]) -> Option { + let mut i = 0; + while i < cmd.len() { + let a = &cmd[i]; + if a == "--" { + return (i + 1 < cmd.len()).then_some(i + 1); + } + if !a.starts_with('-') { + return Some(i); + } + i += if !a.contains('=') && parse::takes_value(manager, a) { + 2 + } else { + 1 + }; + } + None +} + +/// `corgea ` — the suggested-command string used by the +/// "Did you mean …" messages. +fn corgea_cmd(words: &[&str], rest: &[String]) -> String { + let mut parts = vec!["corgea".to_string()]; + parts.extend(words.iter().map(|w| w.to_string())); + parts.extend(rest.iter().cloned()); + parts.join(" ") +} + +pub fn pip3_alias_message(args: &[String]) -> Option { + let rest = args.strip_prefix(&["pip3".to_string()])?; + Some(format!( + "error: unknown package manager `pip3`.\nDid you mean `{}`?", + corgea_cmd(&["pip"], rest) + )) +} + +fn unsupported_pip_add_message(rest: &[String]) -> String { + format!( + "error: pip does not support `add`.\nDid you mean `{}`?", + corgea_cmd(&["pip", "install"], rest) + ) +} + +/// Warn when a custom registry/index is selected — via CLI flag or, for npm, +/// the project `.npmrc`. The gate resolves and verdicts against the default +/// (env/public) registry, so it cannot vouch that the artifact the manager +/// pulls from the override matches the advisory universe. Resolving against +/// the override (and multi-index cases like `--extra-index-url`) is a +/// documented limitation — registry allow-listing is future work, separate +/// PRD. +/// +/// pip config-file (`pip.conf`) and `PIP_INDEX_URL`-style env detection is +/// future work: only pip CLI index flags are inspected here. +fn warn_registry_override( + manager: PackageManager, + rest: &[String], + npm_root: Option<&std::path::Path>, +) { + let flags: &[&str] = match manager { + PackageManager::Npm => &["--registry"], + PackageManager::Pip => &["-i", "--index-url", "--extra-index-url"], + }; + if let Some(flag) = rest.iter().find(|a| { + flags + .iter() + .any(|f| a.as_str() == *f || a.starts_with(&format!("{f}="))) + }) { + eprintln!( + "warning: '{flag}' points {} at a custom registry/index; the gate resolves and verdicts against the default registry and cannot vouch the installed artifact matches.", + manager.binary_name() + ); + } + + // A project `.npmrc` `registry=` / `@scope:registry=` line redirects + // resolution just like the CLI flag, but silently — the tree pass copies + // the `.npmrc` into its temp dir so resolution honours it, so the verdict + // would still be against the default advisory universe with no flag in + // `rest` to catch. Warn on it so the redirect isn't silent. + if manager == PackageManager::Npm { + if let Some(path) = npmrc_registry_override_path(npm_root) { + eprintln!( + "warning: '{}' sets a custom registry; the gate resolves and verdicts against the default registry and cannot vouch the installed artifact matches.", + path.display() + ); + } + } +} + +/// The first `.npmrc` (CWD, then the npm project root) holding a `registry=` +/// or `@:registry=` line, if any. Best-effort: an absent or unreadable +/// `.npmrc` yields `None` — it can't redirect resolution if it can't be read. +/// +/// `npm_root` lets a caller that already resolved the project root pass it in +/// so `tree::npm_project_root()` isn't walked twice (e.g. `run_npm_ci`); `None` +/// resolves it here. +fn npmrc_registry_override_path(npm_root: Option<&std::path::Path>) -> Option { + let cwd = std::env::current_dir().ok(); + // CWD first, then the project root npm would actually operate on; skip the + // root when it equals the CWD so the same file isn't checked twice. + let mut candidates: Vec = cwd.iter().map(|d| d.join(".npmrc")).collect(); + let root = npm_root + .map(std::path::Path::to_path_buf) + .or_else(tree::npm_project_root); + if let Some(root) = root { + if cwd.as_deref() != Some(root.as_path()) { + candidates.push(root.join(".npmrc")); + } + } + candidates.into_iter().find(|path| { + std::fs::read_to_string(path) + .map(|c| npmrc_has_registry_override(&c)) + .unwrap_or(false) + }) +} + +/// Does this `.npmrc` content select a custom registry? True when an +/// uncommented line's key is `registry` or ends with `:registry` (the +/// `@:registry=...` form). `.npmrc` is INI-ish `key=value`; lines +/// starting with `;` or `#` are comments. +fn npmrc_has_registry_override(contents: &str) -> bool { + contents.lines().any(|line| { + let line = line.trim(); + if line.is_empty() || line.starts_with(';') || line.starts_with('#') { + return false; + } + let Some((key, _)) = line.split_once('=') else { + return false; + }; + let key = key.trim(); + key == "registry" || key.ends_with(":registry") + }) +} + +/// Shared tail of every gated path: render the report, refuse (exit 1) when +/// the block predicate fires, otherwise run the install. +fn report_and_exec( + report: &PrecheckReport, + opts: &PrecheckOptions, + exec: impl FnOnce() -> i32, +) -> i32 { + render::print_text(report); + render::warn_public_lookup_failures(report, opts); + if let Some(reason) = verdict::block_reason(report, opts) { + render::print_refusal(reason); + return 1; + } + exec() +} + +/// Refuse an install the gate cannot verify *before* it can build a +/// `PrecheckReport` — so the decision can't run through `block_reason`. Prints a +/// uniform `cannot verify … (pass --force …)` line and exits 1; `--force` is the +/// single escape. These pre-report refusals are the deliberate, enumerated +/// exceptions to the "all blocking goes through `block_reason`" rule. Callers: +/// the bare-`npm install` and `npm ci` root-redirect guards (a redirected +/// project's tree can't be resolved from a copy of the CWD) and the `npm ci` +/// unparsable-lockfile guard (no lockfile to verdict). +fn refuse_unverifiable(detail: &str) -> i32 { + eprintln!("error: cannot verify {detail} (pass --force to proceed unchecked)"); + 1 +} + +/// Collapse a tree-resolution thread's join into the resolver's own `Result`. +/// A panic in the spawned thread becomes a resolution `Err` (which the caller +/// routes to the named-only fallback with a loud warning) instead of +/// re-panicking on the main thread. The gate's verdict path fails open, so an +/// unexpected resolver bug must degrade coverage, never abort the user's +/// install. (We join the handle, so `thread::scope` treats the panic as handled +/// and does not re-propagate it.) +fn tree_resolution_from_join( + join: std::thread::Result, String>>, +) -> Result, String> { + join.unwrap_or_else(|_| Err("tree resolution panicked".to_string())) +} + +/// Post-parse verification shared by the npm and pip install paths. +fn run_parsed_install( + manager: PackageManager, + subcommand_label: &str, + rest: &[String], + parsed: parse::ParsedInstall, + exec: impl FnOnce() -> i32, + opts: PrecheckOptions, +) -> i32 { + // With a verdict config, the tree pass resolves the full would-install + // set; `tree::covers_input` owns what each manager's resolver can chew on. + let tree_eligible = opts.verdict.is_some() && tree::covers_input(manager, &parsed); + let bare_install = parsed.targets.is_empty() && parsed.requirements_files.is_empty(); + + // A BARE `npm install --prefix ` installs another project's whole + // tree, but the gate can't safely resolve that redirected root from a copy + // of the CWD. Nothing named verifies it either, so it would install wholly + // unchecked — fail closed unless `--force`. (A NAMED install still verifies + // its targets and degrades the tree pass to a loud named-only warning.) + if manager == PackageManager::Npm && bare_install && opts.verdict.is_some() && !opts.force { + if let Some(flag) = tree::npm_root_redirect_flag(rest) { + return refuse_unverifiable(&format!( + "a bare 'npm install' that redirects the project root ('{flag}'): the would-install tree is unknown" + )); + } + } + + if parsed.targets.is_empty() && !tree_eligible { + // A `-r requirements.txt` install with verdicts disabled is only + // noted; a truly bare install has nothing to note at all. + // + // One bare-npm case lands here not because there's nothing to gate but + // because the project root couldn't be resolved at all: an unreadable + // CWD makes `npm_project_root()` (via `find_up`) return None, so + // `covers_input` is false. Say so loudly instead of skipping the gate + // silently. (npm will most likely fail on the same unreadable CWD; the + // warning explains why nothing was verified.) + if manager == PackageManager::Npm + && opts.verdict.is_some() + && std::env::current_dir().is_err() + { + eprintln!( + "warning: cannot determine the npm project (current directory is unreadable); proceeding without tree verification." + ); + } + render::requirements_note(&parsed); + return exec(); + } + + // The named-target registry lookups and the tree dry-run are independent + // network/subprocess work — overlap them; verdicts need both. + let now = Utc::now(); + let (mut outcomes, tree_resolution) = std::thread::scope(|s| { + let tree = tree_eligible.then(|| s.spawn(|| tree::resolve_tree(manager, rest, &parsed))); + let outcomes = verdict::verify_all(&parsed.targets, &opts, &now, parsed.allow_prerelease); + ( + outcomes, + tree.map(|handle| tree_resolution_from_join(handle.join())), + ) + }); + + let tree = if let Some(resolution) = tree_resolution { + Some(run_tree_pass( + manager, + resolution, + &mut outcomes, + &parsed, + &opts, + &now, + )) + } else { + run_verdict_pass(manager, &mut outcomes, &opts); + None + }; + + // The mandatory loud warning when the tree pass fell back to named-only. + if let Some(TreeReport::NamedOnly { reason }) = &tree { + eprintln!( + "warning: transitive dependencies not checked ({reason}); only named packages were verified." + ); + } + // The note is recency-specific, and recency never covers requirements-file + // packages: even under a Full tree pass they are verdicted but become + // `TreeOutcome`s with no `age` (recency only blocks named CLI targets), so + // the caveat applies in every path. `requirements_note` self-guards when + // there are no `-r` files. + render::requirements_note(&parsed); + + let report = PrecheckReport { + manager, + subcommand: subcommand_label.to_string(), + original_args: rest.to_vec(), + outcomes, + threshold: opts.threshold, + tree, + bare_install, + }; + + report_and_exec(&report, &opts, exec) +} + +/// `npm ci` (and aliases): installs the project lockfile exactly as +/// written, so the gate verdicts the lockfile-pinned set directly — no +/// dry-run needed. Recency isn't checked — locked versions aren't newly +/// chosen by this command; the verdict pass is the gate. Without a project +/// or lockfile npm errors on its own; the gate just execs. +fn run_npm_ci(subcommand: &str, rest: &[String], opts: PrecheckOptions) -> i32 { + let exec = || exec::exec_install_with_args(PackageManager::Npm, subcommand, rest); + + let Some(cfg) = &opts.verdict else { + return exec(); + }; + // Resolve the project root once and reuse it for both the registry-override + // warning (its `.npmrc` lookup) and the lockfile read below. + let root = tree::npm_project_root(); + // `npm ci --registry ` (or a project `.npmrc` `registry=` line) pulls + // tarballs from an override while the gate verdicts the lockfile against + // the default registry — same false-assurance gap as the named-install + // path, so warn here too. + warn_registry_override(PackageManager::Npm, rest, root.as_deref()); + // A root-redirect flag (`--prefix ../other`, `-C ../other`) makes npm ci + // install a DIFFERENT project's lockfile than the CWD one we'd verdict, so + // verifying the CWD lockfile would pass on the wrong project. Fail closed + // unless `--force`. + if !opts.force { + if let Some(flag) = tree::npm_root_redirect_flag(rest) { + return refuse_unverifiable(&format!( + "'npm {subcommand}' with '{flag}': it installs a redirected project's lockfile, not this one" + )); + } + } + let Some(root) = root else { + return exec(); + }; + let Some(lock_path) = tree::npm_lockfile_in(&root) else { + return exec(); + }; + + let lock = std::fs::read_to_string(&lock_path) + .map_err(|e| format!("read {}: {e}", lock_path.display())) + .and_then(|content| tree::parse_npm_lockfile(&content)); + let jobs = match lock { + Ok(jobs) => jobs, + Err(e) if opts.force => { + eprintln!("warning: cannot verify 'npm {subcommand}' ({e}); proceeding under --force"); + return exec(); + } + Err(e) => { + // A pre-report refusal: an unparsable lockfile leaves no report to + // feed `block_reason`, so the gate refuses directly through the + // shared `refuse_unverifiable` helper (--force above is the only + // escape). That helper enumerates the full set of these deliberate + // exceptions to the single-block-predicate rule. + return refuse_unverifiable(&format!("'npm {subcommand}': {e}")); + } + }; + + // npm lockfiles repeat the same name@version across nested node_modules + // paths (v2/v3) and diamond deps (v1 tree); collapse to one verdict job + // each so the vuln-api is hit — and each package counted — exactly once. + let jobs = dedup_packages(PackageManager::Npm, jobs); + let resolved_count = jobs.len(); + let results = verdict::verdict_pool(jobs, cfg, PackageManager::Npm); + let transitive = results + .into_iter() + .map(|(pkg, verdict)| TreeOutcome { + name: pkg.name, + version: pkg.version, + origin: TreeOrigin::Locked, + verdict, + }) + .collect(); + let report = PrecheckReport { + manager: PackageManager::Npm, + subcommand: subcommand.to_string(), + original_args: rest.to_vec(), + outcomes: Vec::new(), + threshold: opts.threshold, + tree: Some(TreeReport::Full { + resolved_count, + transitive, + }), + bare_install: true, + }; + + report_and_exec(&report, &opts, exec) +} + +/// Collapse repeated packages to one verdict job each, keyed on +/// `(normalize_name(name), version)`, preserving first-seen order. npm +/// lockfiles repeat the same name@version across nested `node_modules` paths +/// (v2/v3) and diamond deps (v1 `dependencies` tree), so verdicting the raw +/// parse would hit the vuln-api — and count the package — once per copy. +fn dedup_packages(manager: PackageManager, jobs: Vec) -> Vec { + let mut seen = std::collections::HashSet::new(); + let mut out = Vec::with_capacity(jobs.len()); + for p in jobs { + if seen.insert((manager.normalize_name(&p.name), p.version.clone())) { + out.push(p); + } + } + out +} + +/// One verdict job (`requested: true`) per named resolved target, in +/// outcome order. +fn resolved_jobs(outcomes: &[TargetOutcome]) -> impl Iterator + '_ { + outcomes.iter().filter_map(|o| match o { + TargetOutcome::Resolved { resolved, .. } => Some(tree::TreePackage { + name: resolved.name.clone(), + version: resolved.version.clone(), + requested: true, + }), + _ => None, + }) +} + +/// Verdict the resolved would-install set (`tree::resolve_tree`'s result). +/// On any resolution failure, fall back to the named-only verdict pass; the +/// caller renders the loud warning from the returned `NamedOnly` reason. +/// Only called when `opts.verdict.is_some()`. +fn run_tree_pass( + manager: PackageManager, + resolution: Result, String>, + outcomes: &mut Vec, + parsed: &parse::ParsedInstall, + opts: &PrecheckOptions, + now: &chrono::DateTime, +) -> TreeReport { + let set = match resolution { + Ok(set) => set, + Err(reason) => { + outcomes.extend(requirements_fallback_outcomes(manager, parsed, opts, now)); + run_verdict_pass(manager, outcomes, opts); + return TreeReport::NamedOnly { reason }; + } + }; + + // Dedup the dry-run set (npm lockfiles repeat the same name@version at + // multiple nested paths), then union in the named-resolved targets — a + // named target already installed is absent from the dry-run delta but + // must still be verdicted. + let norm = |n: &str| manager.normalize_name(n); + let mut jobs = dedup_packages(manager, set); + let resolved_count = jobs.len(); + let mut seen: std::collections::HashSet<(String, String)> = jobs + .iter() + .map(|p| (norm(&p.name), p.version.clone())) + .collect(); + // Names the pip dry-run already covers as `requested` (the user named + // them). When pip backtracked one to a different version than the CLI's + // `pypi_resolve` picked, the dry-run's installed version is authoritative; + // `apply_verdicts` collapses it onto the named outcome. Unioning the CLI + // version in too would queue a redundant job that re-matches and could + // clobber that authoritative verdict, so skip it. npm jobs are never + // `requested`, so this set is empty and the npm union is unchanged. + let requested_names: std::collections::HashSet = jobs + .iter() + .filter(|p| p.requested) + .map(|p| norm(&p.name)) + .collect(); + for p in resolved_jobs(outcomes) { + if requested_names.contains(&norm(&p.name)) { + continue; + } + if seen.insert((norm(&p.name), p.version.clone())) { + jobs.push(p); + } + } + + // npm leftovers that are direct deps of the project manifest are + // pre-existing, not transitive. pip carries `requested` instead. + let direct_deps = if manager == PackageManager::Npm { + tree::project_direct_deps() + } else { + Default::default() + }; + + let cfg = opts + .verdict + .as_ref() + .expect("tree pass requires verdict config"); + let results = verdict::verdict_pool(jobs, cfg, manager); + let transitive = verdict::apply_verdicts(manager, results, outcomes, &direct_deps); + TreeReport::Full { + resolved_count, + transitive, + } +} + +fn requirements_fallback_outcomes( + manager: PackageManager, + parsed: &parse::ParsedInstall, + opts: &PrecheckOptions, + now: &chrono::DateTime, +) -> Vec { + if manager != PackageManager::Pip || parsed.requirements_files.is_empty() { + return Vec::new(); + } + + let mut targets = Vec::new(); + let mut outcomes = Vec::new(); + for file in &parsed.requirements_files { + match parse::parse_requirement_file_targets(file) { + Ok(mut file_targets) => targets.append(&mut file_targets), + Err(error) => outcomes.push(TargetOutcome::Error { + target: InstallTarget { + name: file.display().to_string(), + display: file.display().to_string(), + kind: TargetKind::Unverifiable { + reason: "requirements file could not be read".to_string(), + }, + }, + error, + }), + } + } + + outcomes.extend(verdict::verify_all( + &targets, + opts, + now, + parsed.allow_prerelease, + )); + outcomes +} + +/// Vuln-api verdict pass over resolved targets, run through the bounded +/// worker pool. No-op without a `VerdictConfig` (recency-only callers). +/// Any client/call failure becomes `Unverifiable`, which warns but never +/// blocks: public lookups fail open. +fn run_verdict_pass( + manager: PackageManager, + outcomes: &mut [TargetOutcome], + opts: &PrecheckOptions, +) { + let Some(cfg) = &opts.verdict else { return }; + + // One job per resolved target, in outcome order; the pool preserves + // order, so verdicts zip straight back onto the resolved outcomes. + let jobs: Vec = resolved_jobs(outcomes).collect(); + + let mut results = verdict::verdict_pool(jobs, cfg, manager).into_iter(); + for o in outcomes.iter_mut() { + if let TargetOutcome::Resolved { verdict, .. } = o { + *verdict = match results.next() { + Some((_, v)) => v, + // Pool invariant broken — fail safe instead of panicking: + // Unverifiable warns instead of silently reading as clean. + None => VerdictStatus::Unverifiable( + "internal error: verdict pool returned fewer results than outcomes".to_string(), + ), + }; + } + } +} + +#[cfg(test)] +mod tests { + use super::test_support::*; + use super::*; + + #[test] + fn install_subcommand_recognition() { + // The full npm install alias set (including common typos) must gate; + // none may fall through to the ungated passthrough. + // The full npm install alias set per `lib/utils/cmd-list.js`. + for alias in [ + "install", "i", "in", "ins", "inst", "insta", "instal", "isnt", "isnta", "isntal", + "isntall", "add", + ] { + assert!( + PackageManager::Npm.is_install_subcommand(alias), + "npm `{alias}` must route through the gate" + ); + } + assert!(!PackageManager::Npm.is_install_subcommand("update")); + // `installation` is not a real npm alias, and `innit` maps to npm + // `init` (not `install`) — neither must be treated as an install. + assert!(!PackageManager::Npm.is_install_subcommand("installation")); + assert!(!PackageManager::Npm.is_install_subcommand("innit")); + // `npm ci` aliases are gated by a separate dispatch that runs before + // this check, so they must NOT be recognized here. + for ci_alias in [ + "ci", + "ic", + "clean-install", + "install-clean", + "isntall-clean", + ] { + assert!( + !PackageManager::Npm.is_install_subcommand(ci_alias), + "npm `{ci_alias}` is handled by run_npm_ci, not this check" + ); + } + + assert!(PackageManager::Pip.is_install_subcommand("install")); + assert!(!PackageManager::Pip.is_install_subcommand("freeze")); + } + + /// Run `run_parsed_install` for `pip install ` with an exec + /// closure that records whether it ran (returning 42 instead of + /// spawning anything). + fn gate_pip_install(args: &[&str], opts: PrecheckOptions) -> (i32, bool) { + let rest: Vec = args.iter().map(|s| s.to_string()).collect(); + let parsed = parse::parse_install_args(PackageManager::Pip, &rest).expect("parse"); + let mut exec_ran = false; + let code = run_parsed_install( + PackageManager::Pip, + "install", + &rest, + parsed, + || { + exec_ran = true; + 42 + }, + opts, + ); + (code, exec_ran) + } + + #[test] + fn unverifiable_target_skips_and_proceeds() { + // git+ spec → Skipped outcome, no registry hit, install proceeds. + let opts = stub_opts(); + let (code, exec_ran) = gate_pip_install(&["git+https://github.com/psf/requests.git"], opts); + assert_eq!(code, 42); + assert!(exec_ran); + } + + #[test] + fn bare_install_passes_through_without_verification() { + // Bare `pip install` (no targets) → straight exec, no registry hit. + let opts = stub_opts(); + let (code, exec_ran) = gate_pip_install(&[], opts); + assert_eq!(code, 42); + assert!(exec_ran); + } + + #[test] + fn requirements_files_note_then_exec() { + // `-r reqs.txt` alone, verdicts disabled → printed note, no + // verification, exec runs. + let opts = stub_opts(); + let (code, exec_ran) = gate_pip_install(&["-r", "reqs.txt"], opts); + assert_eq!(code, 42); + assert!(exec_ran); + } + + #[test] + fn ecosystem_mapping() { + use crate::vuln_api::Ecosystem; + assert_eq!(PackageManager::Pip.ecosystem(), Ecosystem::Pypi); + assert_eq!(PackageManager::Npm.ecosystem(), Ecosystem::Npm); + } + + #[test] + fn diamond_lockfile_parse_then_dedup_counts_each_package_once() { + // A v1 `dependencies` tree where the same package@version is nested + // under two parents (a DIAMOND): `parent-a` and `parent-b` both pull + // `shared@2.0.0`. parse_npm_lockfile returns it once per parent — the + // dedup the npm-ci path applies is what collapses it. Without dedup + // the shared package would be verdicted (and counted) twice. + const DIAMOND: &str = r#"{ + "name": "proj", "lockfileVersion": 1, + "dependencies": { + "parent-a": {"version": "1.0.0", "dependencies": { + "shared": {"version": "2.0.0"} + }}, + "parent-b": {"version": "1.0.0", "dependencies": { + "shared": {"version": "2.0.0"} + }} + } + }"#; + + // parse_npm_lockfile returns duplicates by design (one row per tree + // position): `shared@2.0.0` appears twice. + let parsed = tree::parse_npm_lockfile(DIAMOND).expect("parse v1 diamond lock"); + let shared_in_parse = parsed + .iter() + .filter(|p| p.name == "shared" && p.version == "2.0.0") + .count(); + assert_eq!(shared_in_parse, 2, "parse keeps one row per tree position"); + + // Dedup (the run_npm_ci path) collapses it to a single verdict job, so + // `resolved_count` and the verdict list count it once. + let jobs = dedup_packages(PackageManager::Npm, parsed); + assert_eq!( + jobs.iter() + .filter(|p| p.name == "shared" && p.version == "2.0.0") + .count(), + 1, + "dedup yields the diamond package exactly once" + ); + assert_eq!(jobs.len(), 3, "parent-a, parent-b, shared — no duplicates"); + } + + #[test] + fn npmrc_registry_override_detection() { + // A bare `registry=` line is an override. + assert!(npmrc_has_registry_override( + "registry=https://evil.example/\n" + )); + // The scoped form `@:registry=` is too. + assert!(npmrc_has_registry_override( + "@acme:registry=https://evil.example/\n" + )); + // Surrounding config lines and whitespace don't hide it. + assert!(npmrc_has_registry_override( + "save-exact=true\n registry = https://evil.example/\nfund=false\n" + )); + // Commented-out lines (; and #) don't count. + assert!(!npmrc_has_registry_override( + "; registry=https://evil.example/\n# @acme:registry=https://evil.example/\n" + )); + // No registry directive at all. + assert!(!npmrc_has_registry_override( + "save-exact=true\nfund=false\n" + )); + // A key that merely contains "registry" but isn't `registry` / + // `:registry` (e.g. npm's auth keys) must not trip the warning. + assert!(!npmrc_has_registry_override( + "//evil.example/:_authToken=abc\nregistry-other=x\n" + )); + assert!(!npmrc_has_registry_override("")); + } + + #[test] + fn normalize_name_per_manager() { + // pypi: PEP 503 — lowercase, separator runs collapse to one `-`. + assert_eq!( + PackageManager::Pip.normalize_name("Flask_Cors"), + "flask-cors" + ); + assert_eq!(PackageManager::Pip.normalize_name("a__b"), "a-b"); + // npm names are case-sensitive and pass through verbatim. + assert_eq!(PackageManager::Npm.normalize_name("Left_Pad"), "Left_Pad"); + } + + #[test] + fn tree_resolution_panic_becomes_err_not_abort() { + // A panicking tree-resolution thread must degrade to a resolution Err + // (→ named-only fallback), never re-panic on the caller. + let panicked = std::thread::spawn(|| -> Result, String> { + panic!("simulated resolver bug"); + }); + assert_eq!( + tree_resolution_from_join(panicked.join()), + Err("tree resolution panicked".to_string()) + ); + // A normal result passes straight through. + let ok = std::thread::spawn(|| Ok(Vec::new())); + assert_eq!(tree_resolution_from_join(ok.join()), Ok(Vec::new())); + } +} diff --git a/src/precheck/parse.rs b/src/precheck/parse.rs new file mode 100644 index 0000000..2134791 --- /dev/null +++ b/src/precheck/parse.rs @@ -0,0 +1,1229 @@ +//! Parse install-command argument lists into structured `InstallTarget`s. +//! +//! The goal is to be liberal with valid inputs (real install commands +//! mix flags, package specs, and pass-through args freely) and clear +//! about anything we can't verify (URLs / git / filesystem refs). + +use std::path::{Path, PathBuf}; + +use crate::verify_deps::registry::{NpmSpec, PypiSpec}; + +use super::{InstallTarget, PackageManager, TargetKind}; + +#[derive(Debug, Default)] +pub struct ParsedInstall { + pub targets: Vec, + /// `pip install -r foo.txt` — requirements files are only noted + /// (not verified) by the baseline gate. + pub requirements_files: Vec, + /// `pip install --pre` — allow prerelease versions when resolving the + /// version that would install, so the gate verdicts what pip installs + /// rather than the latest stable. + pub allow_prerelease: bool, +} + +fn build_parsed_install( + positionals: PositionalSplit, + parse_spec: impl Fn(&str) -> InstallTarget, +) -> ParsedInstall { + ParsedInstall { + targets: positionals + .specs + .iter() + .map(|raw| parse_spec(raw)) + .collect(), + requirements_files: positionals.requirements_files, + allow_prerelease: false, + } +} + +/// The default npm dist-tag from `--tag ` / `--tag=value`, which +/// changes what a *bare* spec (`pkg`, no `@version`) installs. Stops at `--` +/// (everything after is positional). The gate must resolve that tag rather +/// than `latest`, or a fresh/vulnerable `beta`/`canary` release bypasses +/// both blocks whenever `latest` is old/clean. +fn npm_default_tag(args: &[String]) -> Option { + // npm config is last-wins: `--tag beta --tag canary` installs canary. + // Returning the first match would gate the wrong dist-tag. + let mut tag = None; + let mut i = 0; + while i < args.len() { + let a = &args[i]; + if a == "--" { + break; + } + if a == "--tag" { + tag = args.get(i + 1).cloned(); + i += 2; + continue; + } + if let Some(v) = a.strip_prefix("--tag=") { + tag = Some(v.to_string()); + } + i += 1; + } + tag +} + +/// Whether the forwarded pip args request prereleases (`--pre`). Stops at +/// `--` (positional thereafter). +fn pip_allows_prerelease(args: &[String]) -> bool { + args.iter() + .take_while(|a| a.as_str() != "--") + .any(|a| a == "--pre") +} + +pub fn parse_install_args( + manager: PackageManager, + args: &[String], +) -> Result { + match manager { + PackageManager::Pip => { + let mut parsed = build_parsed_install(extract_pip_positionals(args)?, parse_pypi_spec); + parsed.allow_prerelease = pip_allows_prerelease(args); + Ok(parsed) + } + PackageManager::Npm => { + let default_tag = npm_default_tag(args); + Ok(build_parsed_install( + extract_node_positionals(manager, args), + |raw| parse_npm_spec(raw, default_tag.as_deref()), + )) + } + } +} + +/// Best-effort extraction of registry-installable entries from pip +/// requirements files. This is a fallback for when pip's full dry-run cannot +/// resolve the tree. It deliberately skips file-level options and constraints, +/// while preserving URL/VCS/editable entries as unverifiable targets. +pub(super) fn parse_requirement_file_targets(path: &Path) -> Result, String> { + let mut seen = std::collections::HashSet::new(); + parse_requirement_file_targets_inner(path, &mut seen) +} + +fn parse_requirement_file_targets_inner( + path: &Path, + seen: &mut std::collections::HashSet, +) -> Result, String> { + let path_for_io = if path.is_absolute() { + path.to_path_buf() + } else { + std::env::current_dir() + .map_err(|e| format!("read {}: {e}", path.display()))? + .join(path) + }; + let seen_key = std::fs::canonicalize(&path_for_io).unwrap_or_else(|_| path_for_io.clone()); + if !seen.insert(seen_key) { + return Ok(Vec::new()); + } + + let content = std::fs::read_to_string(&path_for_io) + .map_err(|e| format!("read {}: {e}", path.display()))?; + let base = path_for_io.parent().unwrap_or_else(|| Path::new(".")); + let mut targets = Vec::new(); + + for line in requirement_logical_lines(&content) { + match requirement_line_entry(&line) { + Some(RequirementLineEntry::Target(spec)) => targets.push(parse_pypi_spec(&spec)), + Some(RequirementLineEntry::Include(include)) => { + targets.extend(parse_requirement_file_targets_inner( + &base.join(include), + seen, + )?); + } + None => {} + } + } + + Ok(targets) +} + +/// First format-control directive (`--no-binary` / `--only-binary`) found in +/// any of `files`, following nested `-r`/`--requirement` AND `-c`/`--constraint` +/// includes. pip applies file-level format-control AFTER command-line options +/// (the file parser mutates the shared FormatControl object post-CLI-parse), so +/// a `--no-binary :all:` line inside a requirements file overrides the tree +/// pass's trailing `--only-binary :all:` guard and would build sdists — +/// executing package code — during the dry-run. pip reads and applies +/// format-control from nested constraint (`-c`) files too, not just requirement +/// (`-r`) includes, so both kinds of include must be followed. The tree pass +/// must refuse to dry-run such files. Returns `(file, directive)` of the first +/// hit. +pub(super) fn requirements_format_control_directive( + files: &[PathBuf], +) -> Option<(PathBuf, String)> { + let mut seen = std::collections::HashSet::new(); + files + .iter() + .find_map(|file| format_control_scan(file, &mut seen)) +} + +fn format_control_scan( + path: &Path, + seen: &mut std::collections::HashSet, +) -> Option<(PathBuf, String)> { + let path_for_io = if path.is_absolute() { + path.to_path_buf() + } else { + std::env::current_dir().ok()?.join(path) + }; + let seen_key = std::fs::canonicalize(&path_for_io).unwrap_or_else(|_| path_for_io.clone()); + if !seen.insert(seen_key) { + return None; + } + + // Best-effort: an unreadable/missing file can't carry a directive we'd + // miss — pip runs as the same uid, so it can't read it either and the + // dry-run fails loudly on its own. + let content = std::fs::read_to_string(&path_for_io).ok()?; + let base = path_for_io.parent().unwrap_or_else(|| Path::new(".")); + + for line in requirement_logical_lines(&content) { + let line = strip_requirement_comment(&line); + let first = line.split_whitespace().next().unwrap_or_default(); + if first == "--no-binary" + || first == "--only-binary" + || first.starts_with("--no-binary=") + || first.starts_with("--only-binary=") + { + return Some((path.to_path_buf(), first.to_string())); + } + // pip applies file-level format-control from both `-r` requirement + // includes and `-c` constraint includes, so follow both. + for (short, long) in [("-r", "--requirement"), ("-c", "--constraint")] { + if let Some(include) = requirement_flag_value(line, short, long) { + if let Some(hit) = format_control_scan(&base.join(include), seen) { + return Some(hit); + } + } + } + } + None +} + +enum RequirementLineEntry { + Target(String), + Include(PathBuf), +} + +fn requirement_logical_lines(content: &str) -> Vec { + let mut lines = Vec::new(); + let mut current = String::new(); + + for raw in content.lines() { + let trimmed = raw.trim_end(); + let (part, continued) = match trimmed.strip_suffix('\\') { + Some(part) => (part.trim_end(), true), + None => (trimmed, false), + }; + if !current.is_empty() { + current.push(' '); + } + current.push_str(part.trim()); + if !continued { + lines.push(std::mem::take(&mut current)); + } + } + + if !current.trim().is_empty() { + lines.push(current); + } + lines +} + +fn requirement_line_entry(line: &str) -> Option { + let line = strip_requirement_comment(line); + if line.is_empty() { + return None; + } + + if let Some(path) = requirement_flag_value(line, "-r", "--requirement") { + return Some(RequirementLineEntry::Include(PathBuf::from(path))); + } + if requirement_flag_value(line, "-c", "--constraint").is_some() { + return None; + } + if let Some(path) = requirement_flag_value(line, "-e", "--editable") { + return Some(RequirementLineEntry::Target(format!("-e {path}"))); + } + + if line.starts_with('-') { + return None; + } + + let spec = strip_inline_requirement_options(line); + (!spec.is_empty()).then(|| RequirementLineEntry::Target(spec.to_string())) +} + +fn strip_requirement_comment(line: &str) -> &str { + let trimmed = line.trim(); + if trimmed.starts_with('#') { + return ""; + } + [" #", "\t#"] + .iter() + .filter_map(|marker| trimmed.find(marker)) + .min() + .map_or(trimmed, |idx| trimmed[..idx].trim()) +} + +fn requirement_flag_value<'a>(line: &'a str, short: &str, long: &str) -> Option<&'a str> { + let mut parts = line.split_whitespace(); + let first = parts.next()?; + if first == short || first == long { + return parts.next(); + } + if let Some(value) = first.strip_prefix(&format!("{long}=")) { + return Some(value); + } + first + .strip_prefix(short) + .filter(|value| !value.is_empty() && !value.starts_with('-')) +} + +fn strip_inline_requirement_options(line: &str) -> &str { + [ + " --hash", + " --config-setting", + " --global-option", + " --install-option", + ] + .iter() + .filter_map(|marker| line.find(marker)) + .min() + .map_or(line.trim(), |idx| line[..idx].trim()) +} + +#[derive(Debug, Default)] +struct PositionalSplit { + specs: Vec, + requirements_files: Vec, +} + +/// Known install flags that take a separate value argument, per manager. +/// The fallback heuristic in [`skip_unknown_flag`] only skips URL/path-like +/// values, so a bare-word value (`-w my-workspace`) would otherwise parse — +/// and get verified or blocked — as a package spec. Not exhaustive; the +/// heuristic still backstops anything unlisted. +pub(super) fn takes_value(manager: PackageManager, flag: &str) -> bool { + match manager { + PackageManager::Npm => matches!( + flag, + "-w" | "--workspace" + | "--prefix" + | "--registry" + | "--tag" + | "--omit" + | "--include" + | "--loglevel" + | "--install-strategy" + | "--before" + | "--cpu" + | "--os" + | "--libc" + | "--otp" + | "--location" + | "--cache" + | "--script-shell" + | "--userconfig" + | "--globalconfig" + | "--depth" + ), + PackageManager::Pip => matches!( + flag, + "-i" | "--index-url" + | "--extra-index-url" + | "-f" + | "--find-links" + | "--platform" + | "--python-version" + | "--implementation" + | "--abi" + | "-t" + | "--target" + | "--prefix" + | "--root" + | "--src" + | "--upgrade-strategy" + | "--no-binary" + | "--only-binary" + | "--progress-bar" + | "--proxy" + | "--retries" + | "--timeout" + | "--exists-action" + | "--trusted-host" + | "--cert" + | "--client-cert" + | "--cache-dir" + | "--log" + | "--python" + | "--keyring-provider" + | "--report" + | "--use-feature" + | "--use-deprecated" + | "--config-settings" + | "-C" + | "--global-option" + | "--hash" + ), + } +} + +/// Strip flags from an npm install argument list, returning only the +/// positional package specs. +/// +/// We treat anything starting with `-` as a flag. Boolean flags (`-D`, +/// `--save-dev`, `--no-save`, ...) are dropped on their own. Flags +/// that take a value can be written as either `--flag=value` or +/// `--flag value`; known value-taking flags ([`takes_value`]) skip the +/// next token outright, anything else skips it only if it looks like a +/// value (a URL / path), never like a package spec. +fn extract_node_positionals(manager: PackageManager, args: &[String]) -> PositionalSplit { + let mut out = PositionalSplit::default(); + let mut i = 0; + while i < args.len() { + let a = &args[i]; + if a == "--" { + // After `--`, everything is positional. + for rest in &args[i + 1..] { + out.specs.push(rest.clone()); + } + break; + } + if a.starts_with('-') { + if !a.contains('=') && takes_value(manager, a) { + i += 2; + continue; + } + i = skip_unknown_flag(args, i); + continue; + } + out.specs.push(a.clone()); + i += 1; + } + out +} + +/// Advance past an unknown flag at `i`. `--flag=value` is self-contained; +/// otherwise peek at the next arg and skip it too if it doesn't look like +/// a package spec (contains `://` or is path-like) — see the heuristic +/// rationale on [`extract_node_positionals`]. +fn skip_unknown_flag(args: &[String], i: usize) -> usize { + if args[i].contains('=') { + return i + 1; + } + let next_is_value = args + .get(i + 1) + .map(|n| { + !n.starts_with('-') + && (n.contains("://") + || n.starts_with('/') + || n.starts_with("./") + || n.starts_with('~')) + }) + .unwrap_or(false); + i + if next_is_value { 2 } else { 1 } +} + +/// pip's argument grammar is more structured than npm's: there are +/// known flags that take a value (`-r FILE`, `-c FILE`, `-e PATH`, +/// `--index-url URL`, `--target DIR`, ...). We special-case `-r/-c/-e` +/// because they affect behaviour, and treat the rest with the same +/// liberal heuristic as npm. +fn extract_pip_positionals(args: &[String]) -> Result { + let mut out = PositionalSplit::default(); + let mut i = 0; + while i < args.len() { + let a = &args[i]; + if a == "--" { + for rest in &args[i + 1..] { + out.specs.push(rest.clone()); + } + break; + } + match a.as_str() { + "-r" | "--requirement" => { + let path = args + .get(i + 1) + .ok_or_else(|| "`-r` / `--requirement` requires a file path".to_string())?; + out.requirements_files.push(PathBuf::from(path)); + i += 2; + continue; + } + "-c" | "--constraint" => { + // Constraints don't add packages, but skip the path. + i += 2; + continue; + } + "-e" | "--editable" => { + // Editable installs are explicit unverifiable targets. + let path = args.get(i + 1).cloned().unwrap_or_default(); + out.specs.push(format!("-e {}", path)); + i += if args.get(i + 1).is_some() { 2 } else { 1 }; + continue; + } + _ => {} + } + // Attached short-option forms (pip's optparse): `-rreqs.txt`, + // `-cfile`, `-e./path`. Missing these would silently skip the + // whole gate (`-rreqs.txt` would read as a boolean flag and the + // install would look bare). + if let Some(path) = attached_short_value(a, "-r") { + out.requirements_files.push(PathBuf::from(path)); + i += 1; + continue; + } + if attached_short_value(a, "-c").is_some() { + i += 1; + continue; + } + if let Some(path) = attached_short_value(a, "-e") { + out.specs.push(format!("-e {}", path)); + i += 1; + continue; + } + // Long-form `--requirement=foo.txt`. + if let Some(rest) = a.strip_prefix("--requirement=") { + out.requirements_files.push(PathBuf::from(rest)); + i += 1; + continue; + } + if a.strip_prefix("--constraint=").is_some() { + i += 1; + continue; + } + if let Some(rest) = a.strip_prefix("--editable=") { + out.specs.push(format!("-e {}", rest)); + i += 1; + continue; + } + if a.starts_with('-') { + if !a.contains('=') && takes_value(PackageManager::Pip, a) { + i += 2; + continue; + } + i = skip_unknown_flag(args, i); + continue; + } + out.specs.push(a.clone()); + i += 1; + } + Ok(out) +} + +/// `-rreqs.txt` → `reqs.txt`: the value attached directly to a short +/// option. `None` for the bare flag itself (handled by the exact-match +/// arms) and for long `--` forms. +fn attached_short_value<'a>(arg: &'a str, flag: &str) -> Option<&'a str> { + arg.strip_prefix(flag).filter(|rest| !rest.is_empty()) +} + +/// Parse a single npm-style positional, e.g. `axios`, `axios@1.0.0`, +/// `axios@^1.0.0`, `axios@latest`, `@types/node@20.10.5`, +/// `git+https://...`, `file:./local`, `./local`, `npm:other@1.0.0`. +/// +/// `default_tag` is the `--tag ` from the command, applied only to a +/// *bare* spec (no `@version`): `npm install --tag beta pkg` installs the +/// `beta` dist-tag, so the gate must resolve that, not `latest`. An explicit +/// `pkg@latest` / `pkg@1.0.0` overrides the default tag. +fn parse_npm_spec(raw: &str, default_tag: Option<&str>) -> InstallTarget { + let display = raw.to_string(); + let trimmed = raw.trim(); + + let unverifiable_prefixes = [ + "git+", + "git:", + "git@", + "github:", + "gist:", + "bitbucket:", + "gitlab:", + "ssh://", + "http://", + "https://", + "file:", + "./", + "../", + "/", + "~/", + "npm:", + "workspace:", + ]; + if let Some(p) = unverifiable_prefixes + .iter() + .find(|p| trimmed.starts_with(*p)) + { + let reason = match *p { + "npm:" => "npm: aliased dependency — registry verification skipped", + "workspace:" => "workspace: dependency — registry verification skipped", + _ => "spec is a URL/git/filesystem reference — registry verification skipped", + }; + return InstallTarget { + name: trimmed.to_string(), + display, + kind: TargetKind::Unverifiable { + reason: reason.to_string(), + }, + }; + } + + // Bare `.` / `..` install the current/parent directory; `user/repo` + // (one `/`, not an `@scope/` name) is npm's GitHub shorthand. Neither + // is a registry package — resolving them would 404 and block a command + // plain npm accepts. + if trimmed == "." || trimmed == ".." { + return InstallTarget { + name: trimmed.to_string(), + display, + kind: TargetKind::Unverifiable { + reason: "spec is a filesystem path — registry verification skipped".to_string(), + }, + }; + } + if !trimmed.starts_with('@') && trimmed.contains('/') { + return InstallTarget { + name: trimmed.to_string(), + display, + kind: TargetKind::Unverifiable { + reason: "spec is a GitHub shorthand or path — registry verification skipped" + .to_string(), + }, + }; + } + + // Find the version separator. Scoped names start with `@` and the + // version separator is the *next* `@` (if any). Unscoped names + // use the first `@`. + let (name_part, spec_part): (&str, &str) = if let Some(rest) = trimmed.strip_prefix('@') { + match rest.find('@') { + Some(at_in_rest) => { + let split = 1 + at_in_rest; + (&trimmed[..split], &trimmed[split + 1..]) + } + None => (trimmed, ""), + } + } else { + match trimmed.find('@') { + Some(at) => (&trimmed[..at], &trimmed[at + 1..]), + None => (trimmed, ""), + } + }; + + let name = name_part.trim().to_string(); + let spec_str = spec_part.trim(); + + let kind = if spec_str.is_empty() { + // A bare spec picks up the command's `--tag`, if any; otherwise latest. + match default_tag { + Some(tag) => TargetKind::Npm(NpmSpec::Tag(tag.to_string())), + None => TargetKind::Npm(NpmSpec::Latest), + } + } else if spec_str.eq_ignore_ascii_case("latest") { + TargetKind::Npm(NpmSpec::Latest) + } else if semver::Version::parse(spec_str).is_ok() { + TargetKind::Npm(NpmSpec::Exact(spec_str.to_string())) + } else if let Some(rest) = spec_str + .strip_prefix('v') + .filter(|rest| semver::Version::parse(rest).is_ok()) + { + // npm coerces a leading `v` (`pkg@v1.2.3` installs 1.2.3); without + // this it would read as a dist-tag and error. + TargetKind::Npm(NpmSpec::Exact(rest.to_string())) + } else if looks_like_npm_range(spec_str) { + TargetKind::Npm(NpmSpec::Range(spec_str.to_string())) + } else if is_npm_dist_tag(spec_str) { + TargetKind::Npm(NpmSpec::Tag(spec_str.to_string())) + } else { + TargetKind::Unverifiable { + reason: format!( + "could not classify version spec '{}' (not a valid semver, range, or dist-tag)", + spec_str + ), + } + }; + + InstallTarget { + name, + display, + kind, + } +} + +/// Loose check: does this spec look like an npm version range? +/// We accept anything that *starts* with a range metacharacter +/// (`^`, `~`, `>`, `<`, `=`, `*`) or with a digit (so `1.x`, `1.2.x`, +/// and bare ranges still resolve). Validation against the registry's +/// version list happens later inside the resolver. +fn looks_like_npm_range(s: &str) -> bool { + matches!( + s.chars().next(), + Some('^') | Some('~') | Some('>') | Some('<') | Some('=') | Some('*') + ) || s + .chars() + .next() + .map(|c| c.is_ascii_digit()) + .unwrap_or(false) +} + +/// A dist-tag is a non-empty alphanumeric string (e.g. `latest`, +/// `next`, `beta`, `alpha-1`). We reject anything that contains +/// version-spec metacharacters. +fn is_npm_dist_tag(s: &str) -> bool { + !s.is_empty() + && s.chars() + .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.') + && s.chars() + .next() + .map(|c| c.is_ascii_alphabetic()) + .unwrap_or(false) +} + +/// Parse a single pip-style positional, e.g. `requests`, `requests==2.31.0`, +/// `requests>=2.0`, `requests[security]`, `git+https://...`, `./local`. +fn parse_pypi_spec(raw: &str) -> InstallTarget { + let display = raw.to_string(); + let trimmed = raw.trim(); + + let unverifiable_prefixes = [ + "git+", "hg+", "svn+", "bzr+", "http://", "https://", "file:", "./", "../", "/", "~/", + "-e ", "-e=", + ]; + if unverifiable_prefixes.iter().any(|p| trimmed.starts_with(p)) { + return InstallTarget { + name: trimmed.to_string(), + display, + kind: TargetKind::Unverifiable { + reason: "spec is a VCS / URL / editable / filesystem reference — registry verification skipped".to_string(), + }, + }; + } + + // Strip the PEP 508 environment marker first — its comparison operators + // (`; python_version >= "3.7"`) must not be mistaken for version + // operators, which would split the name inside the marker. + let req_part = trimmed.split(';').next().unwrap_or(trimmed).trim(); + + // PEP 508 direct reference: `name @ https://…` — unverifiable like a + // bare URL (never a registry lookup, never a block). + if let Some((_, after_at)) = req_part.split_once('@') { + if after_at.contains("://") { + return InstallTarget { + name: trimmed.to_string(), + display, + kind: TargetKind::Unverifiable { + reason: "spec is a PEP 508 direct reference (name @ url) — registry verification skipped".to_string(), + }, + }; + } + } + + // Bare `.` / `..` and anything with a path separator install from the + // filesystem (`pip install .`), not the registry. + if req_part == "." || req_part == ".." || req_part.contains('/') || req_part.contains('\\') { + return InstallTarget { + name: trimmed.to_string(), + display, + kind: TargetKind::Unverifiable { + reason: "spec is a filesystem path — registry verification skipped".to_string(), + }, + }; + } + + // Split at the leftmost specifier operator (`==`, `>=`, `<=`, `!=`, + // `~=`, `>`, `<`; PEP 440 also allows `===`). Only the index matters — + // the operator itself stays with the spec part. + let separators = ["===", "==", ">=", "<=", "!=", "~=", ">", "<"]; + let split_at = separators.iter().filter_map(|sep| req_part.find(sep)).min(); + + let (name_part, spec_part): (&str, &str) = match split_at { + Some(idx) => (&req_part[..idx], &req_part[idx..]), + None => (req_part, ""), + }; + + // Strip extras: `requests[security]` -> `requests`. + let name_no_extras = name_part + .split_once('[') + .map_or(name_part, |(n, _)| n) + .trim(); + + let spec_str = spec_part.trim(); + + let kind = if spec_str.is_empty() { + TargetKind::Pypi(PypiSpec::Latest) + } else if let Some(rest) = spec_str.strip_prefix("===") { + TargetKind::Pypi(PypiSpec::Exact(rest.trim().to_string())) + } else if let Some(rest) = spec_str.strip_prefix("==") { + let v = rest.trim(); + if v.is_empty() { + TargetKind::Unverifiable { + reason: "empty `==` specifier".to_string(), + } + } else if v.contains('*') { + // Wildcard pin (`==1.4.*`) — a range, not a literal version; + // the resolver desugars it. + TargetKind::Pypi(PypiSpec::Specifier(spec_str.to_string())) + } else { + TargetKind::Pypi(PypiSpec::Exact(v.to_string())) + } + } else { + TargetKind::Pypi(PypiSpec::Specifier(spec_str.to_string())) + }; + + InstallTarget { + name: name_no_extras.to_string(), + display, + kind, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + + #[test] + fn extracts_npm_positionals_skipping_flags() { + let args = vec![ + "axios".to_string(), + "--save-dev".to_string(), + "@types/node@latest".to_string(), + "-D".to_string(), + "--registry".to_string(), + "https://example.com/registry".to_string(), + "lodash@^4.0.0".to_string(), + ]; + let p = extract_node_positionals(PackageManager::Npm, &args); + assert_eq!( + p.specs, + vec![ + "axios".to_string(), + "@types/node@latest".to_string(), + "lodash@^4.0.0".to_string(), + ] + ); + } + + #[test] + fn npm_workspace_flag_value_is_not_a_spec() { + // npm's `-w ` / `--workspace ` take a bare-word value; + // it must never be verified (or blocked) as a package spec. + for flag in ["-w", "--workspace"] { + let args = vec![ + flag.to_string(), + "my-workspace".to_string(), + "lodash".to_string(), + ]; + let p = extract_node_positionals(PackageManager::Npm, &args); + assert_eq!(p.specs, vec!["lodash".to_string()], "flag {flag}"); + } + // `--workspace=name` is self-contained. + let args = vec!["--workspace=my-workspace".to_string(), "lodash".to_string()]; + let p = extract_node_positionals(PackageManager::Npm, &args); + assert_eq!(p.specs, vec!["lodash".to_string()]); + } + + #[test] + fn extracts_npm_positionals_after_double_dash() { + let args = vec![ + "--save-dev".to_string(), + "--".to_string(), + "axios".to_string(), + "--this-is-positional-now".to_string(), + ]; + let p = extract_node_positionals(PackageManager::Npm, &args); + assert_eq!( + p.specs, + vec!["axios".to_string(), "--this-is-positional-now".to_string()] + ); + } + + #[test] + fn npm_tag_flag_changes_bare_spec_resolution() { + // `--tag beta` (before or after the verb's rest) makes a bare spec + // resolve the beta dist-tag, not latest. An explicit version wins. + for args in [ + vec!["--tag".to_string(), "beta".to_string(), "pkg".to_string()], + vec!["pkg".to_string(), "--tag=beta".to_string()], + ] { + let p = parse_install_args(PackageManager::Npm, &args).unwrap(); + assert_eq!(p.targets.len(), 1, "args {args:?}"); + assert!( + matches!(&p.targets[0].kind, TargetKind::Npm(NpmSpec::Tag(t)) if t == "beta"), + "bare spec must pick up --tag: {:?}", + p.targets[0].kind + ); + } + + // Explicit pin ignores --tag. + let args = vec![ + "--tag".to_string(), + "beta".to_string(), + "pkg@1.0.0".to_string(), + ]; + let p = parse_install_args(PackageManager::Npm, &args).unwrap(); + assert!( + matches!(&p.targets[0].kind, TargetKind::Npm(NpmSpec::Exact(v)) if v == "1.0.0"), + "explicit version must override --tag: {:?}", + p.targets[0].kind + ); + + // No --tag → bare spec stays latest. + let args = vec!["pkg".to_string()]; + let p = parse_install_args(PackageManager::Npm, &args).unwrap(); + assert!(matches!( + &p.targets[0].kind, + TargetKind::Npm(NpmSpec::Latest) + )); + } + + #[test] + fn npm_tag_flag_is_last_wins_like_npm_config() { + // npm's config parser is last-wins: `--tag beta --tag canary` + // installs canary. Gating beta would verdict the wrong release. + let args = vec![ + "--tag".to_string(), + "beta".to_string(), + "pkg".to_string(), + "--tag=canary".to_string(), + ]; + let p = parse_install_args(PackageManager::Npm, &args).unwrap(); + assert!( + matches!(&p.targets[0].kind, TargetKind::Npm(NpmSpec::Tag(t)) if t == "canary"), + "last --tag must win: {:?}", + p.targets[0].kind + ); + } + + #[test] + fn pip_pre_flag_sets_allow_prerelease() { + let with = parse_install_args( + PackageManager::Pip, + &["--pre".to_string(), "flask".to_string()], + ) + .unwrap(); + assert!(with.allow_prerelease, "--pre must set allow_prerelease"); + + let without = parse_install_args(PackageManager::Pip, &["flask".to_string()]).unwrap(); + assert!(!without.allow_prerelease); + } + + #[test] + fn parse_npm_spec_classifies() { + let cases = vec![ + ("axios", NpmSpec::Latest), + ("axios@", NpmSpec::Latest), + ("axios@latest", NpmSpec::Latest), + ("axios@1.0.0", NpmSpec::Exact("1.0.0".to_string())), + ("axios@^1.0.0", NpmSpec::Range("^1.0.0".to_string())), + ("axios@~1.0.0", NpmSpec::Range("~1.0.0".to_string())), + ( + "axios@>=1.0.0 <2.0.0", + NpmSpec::Range(">=1.0.0 <2.0.0".to_string()), + ), + ("axios@next", NpmSpec::Tag("next".to_string())), + ("axios@beta", NpmSpec::Tag("beta".to_string())), + ("@types/node", NpmSpec::Latest), + ("@types/node@20.10.5", NpmSpec::Exact("20.10.5".to_string())), + ("@types/node@^20.0.0", NpmSpec::Range("^20.0.0".to_string())), + ("@types/node@latest", NpmSpec::Latest), + ]; + for (input, expected) in cases { + let target = parse_npm_spec(input, None); + match (&target.kind, &expected) { + (TargetKind::Npm(actual), expected) => { + assert_eq!(actual, expected, "for input '{}'", input); + } + _ => panic!("unexpected kind for '{}'", input), + } + } + } + + #[test] + fn parse_npm_spec_extracts_scoped_names() { + assert_eq!(parse_npm_spec("@types/node", None).name, "@types/node"); + assert_eq!( + parse_npm_spec("@types/node@20.10.5", None).name, + "@types/node" + ); + assert_eq!(parse_npm_spec("axios@1.2.3", None).name, "axios"); + assert_eq!(parse_npm_spec("axios", None).name, "axios"); + } + + #[test] + fn parse_npm_spec_skips_unverifiable() { + let unverifiable = vec![ + "git+https://github.com/x/y.git", + "git@github.com:x/y.git", + "github:expressjs/express", + "https://example.com/pkg.tgz", + "file:./local-pkg", + "./local-pkg", + "../sibling", + "/abs/path", + "npm:alias-of-other@1.0.0", + "workspace:*", + // GitHub shorthand and bare paths — registry lookups would 404. + "expressjs/express", + "user/repo#semver:^1.0.0", + ".", + "..", + ]; + for u in unverifiable { + let t = parse_npm_spec(u, None); + assert!( + matches!(t.kind, TargetKind::Unverifiable { .. }), + "for '{}'", + u + ); + } + // Scoped names keep their one `/` and stay verifiable. + assert!(matches!( + parse_npm_spec("@types/node", None).kind, + TargetKind::Npm(NpmSpec::Latest) + )); + } + + #[test] + fn parse_npm_spec_coerces_leading_v() { + // npm installs `pkg@v1.2.3` as 1.2.3; a dist-tag reading would error. + let t = parse_npm_spec("axios@v1.2.3", None); + assert!( + matches!(t.kind, TargetKind::Npm(NpmSpec::Exact(ref v)) if v == "1.2.3"), + "got {:?}", + t.kind + ); + // …but a real tag that merely starts with `v` stays a tag. + let t = parse_npm_spec("node@v8-canary", None); + assert!( + matches!(t.kind, TargetKind::Npm(NpmSpec::Tag(ref s)) if s == "v8-canary"), + "got {:?}", + t.kind + ); + } + + #[test] + fn parse_pypi_spec_classifies() { + let cases = vec![ + ("requests", PypiSpec::Latest), + ("requests==2.31.0", PypiSpec::Exact("2.31.0".to_string())), + ("requests>=2.0", PypiSpec::Specifier(">=2.0".to_string())), + ("requests~=2.0", PypiSpec::Specifier("~=2.0".to_string())), + ("requests<3,>=2", PypiSpec::Specifier("<3,>=2".to_string())), + ("requests[security]", PypiSpec::Latest), + ( + "requests[security]==2.31.0", + PypiSpec::Exact("2.31.0".to_string()), + ), + ]; + for (input, expected) in cases { + let t = parse_pypi_spec(input); + match (&t.kind, &expected) { + (TargetKind::Pypi(actual), expected) => { + assert_eq!(actual, expected, "for '{}'", input); + } + _ => panic!("unexpected kind for '{}'", input), + } + } + } + + #[test] + fn parse_pypi_spec_strips_extras_and_markers() { + assert_eq!( + parse_pypi_spec("requests[security]==2.31.0").name, + "requests" + ); + let t = parse_pypi_spec("requests==2.31.0; python_version >= \"3.7\""); + assert_eq!(t.name, "requests"); + assert!( + matches!(t.kind, TargetKind::Pypi(PypiSpec::Exact(ref v)) if v == "2.31.0"), + "env marker must not leak into the spec: {:?}", + t.kind + ); + + // A marker-only spec must not split inside the marker: the name is + // `pkg` and the (versionless) spec resolves latest. + let marker_only = parse_pypi_spec("pkg; python_version >= \"3.7\""); + assert_eq!(marker_only.name, "pkg"); + assert!( + matches!(marker_only.kind, TargetKind::Pypi(PypiSpec::Latest)), + "got {:?}", + marker_only.kind + ); + } + + #[test] + fn parse_pypi_spec_wildcard_pin_is_a_specifier() { + // `==1.4.*` is a range; matching it as a literal release key would + // always miss and block. + let t = parse_pypi_spec("django==4.2.*"); + assert_eq!(t.name, "django"); + assert!( + matches!(t.kind, TargetKind::Pypi(PypiSpec::Specifier(ref s)) if s == "==4.2.*"), + "got {:?}", + t.kind + ); + } + + #[test] + fn parse_pypi_spec_direct_reference_and_paths_are_unverifiable() { + // PEP 508 direct reference, bare dot, and separator-bearing paths + // must never be looked up (and thus never blocked) as registry names. + for spec in [ + "requests @ https://files.pythonhosted.org/requests-2.31.0.whl", + "pkg @ https://example.com/x.whl ; python_version >= \"3.7\"", + ".", + "..", + "sub/dir", + ] { + let t = parse_pypi_spec(spec); + assert!( + matches!(t.kind, TargetKind::Unverifiable { .. }), + "for '{}': {:?}", + spec, + t.kind + ); + } + } + + #[test] + fn parse_pypi_spec_skips_unverifiable() { + let unverifiable = vec![ + "git+https://github.com/x/y.git", + "https://example.com/pkg.tar.gz", + "./local-pkg", + "/abs/path", + "-e ./local", + ]; + for u in unverifiable { + let t = parse_pypi_spec(u); + assert!( + matches!(t.kind, TargetKind::Unverifiable { .. }), + "for '{}'", + u + ); + } + } + + #[test] + fn pip_args_extract_requirements_files() { + let args = vec![ + "-r".to_string(), + "reqs.txt".to_string(), + "requests==2.31.0".to_string(), + "--requirement=other.txt".to_string(), + "--constraint".to_string(), + "constraints.txt".to_string(), + "--constraint=other-constraints.txt".to_string(), + "-e".to_string(), + "./local".to_string(), + ]; + let p = extract_pip_positionals(&args).unwrap(); + assert_eq!( + p.requirements_files, + vec![PathBuf::from("reqs.txt"), PathBuf::from("other.txt")] + ); + assert!(p.specs.contains(&"requests==2.31.0".to_string())); + assert!(p.specs.iter().any(|s| s.starts_with("-e "))); + assert!(!p.specs.contains(&"constraints.txt".to_string())); + assert!(!p.specs.contains(&"other-constraints.txt".to_string())); + assert!(!p + .requirements_files + .contains(&PathBuf::from("constraints.txt"))); + assert!(!p + .requirements_files + .contains(&PathBuf::from("other-constraints.txt"))); + } + + #[test] + fn pip_attached_short_options_are_recognized() { + // pip accepts `-rreqs.txt` (value attached); reading it as a boolean + // flag would make the install look bare and skip the gate entirely. + let args = vec![ + "-rreqs.txt".to_string(), + "-cconstraints.txt".to_string(), + "-e./local".to_string(), + ]; + let p = extract_pip_positionals(&args).unwrap(); + assert_eq!(p.requirements_files, vec![PathBuf::from("reqs.txt")]); + assert!(p.specs.contains(&"-e ./local".to_string())); + assert!(!p.specs.contains(&"-cconstraints.txt".to_string())); + } + + #[test] + fn pip_value_flag_values_are_not_specs() { + // A bare-word value of a known value-taking flag must not be + // verified (or blocked) as a package. + let args = vec![ + "--platform".to_string(), + "win_amd64".to_string(), + "--no-binary".to_string(), + ":all:".to_string(), + "--target".to_string(), + "build".to_string(), + "requests".to_string(), + ]; + let p = extract_pip_positionals(&args).unwrap(); + assert_eq!(p.specs, vec!["requests".to_string()]); + } + + #[test] + fn requirements_format_control_scan_follows_includes() { + // SECURITY: pip applies file-level format-control AFTER CLI flags, + // so a --no-binary line (even in a nested -r include) defeats the + // tree pass's trailing --only-binary :all: guard. The scan must + // find it transitively; option lines that don't touch + // format-control must not trip it. + let dir = tempfile::tempdir().expect("tempdir"); + std::fs::write( + dir.path().join("inner.txt"), + "# comment\n--no-binary :all:\n", + ) + .expect("write inner"); + std::fs::write(dir.path().join("outer.txt"), "flask==1.0\n-r inner.txt\n") + .expect("write outer"); + let (file, directive) = + requirements_format_control_directive(&[dir.path().join("outer.txt")]) + .expect("directive must be found through the include"); + assert!(file.ends_with("outer.txt") || file.ends_with("inner.txt")); + assert_eq!(directive, "--no-binary"); + + // SECURITY: pip reads nested `-c` constraint includes and applies + // their format-control too, so a --no-binary hidden behind a `-c` + // include must also be found transitively. + std::fs::write( + dir.path().join("constraint_inner.txt"), + "# comment\n--no-binary :all:\n", + ) + .expect("write constraint inner"); + std::fs::write( + dir.path().join("constraint_outer.txt"), + "flask==1.0\n-c constraint_inner.txt\n", + ) + .expect("write constraint outer"); + let (c_file, c_directive) = + requirements_format_control_directive(&[dir.path().join("constraint_outer.txt")]) + .expect("directive must be found through the -c constraint include"); + assert!( + c_file.ends_with("constraint_outer.txt") || c_file.ends_with("constraint_inner.txt") + ); + assert_eq!(c_directive, "--no-binary"); + + // Attached `=` form counts too. + std::fs::write(dir.path().join("eq.txt"), "--only-binary=:none:\n").expect("write eq"); + assert!(requirements_format_control_directive(&[dir.path().join("eq.txt")]).is_some()); + + // Non-format-control options don't trip the scan. + std::fs::write( + dir.path().join("clean.txt"), + "flask==1.0\n--prefer-binary\n--hash=sha256:abc\n", + ) + .expect("write clean"); + assert!(requirements_format_control_directive(&[dir.path().join("clean.txt")]).is_none()); + + // A missing file is pip's error to report, not the scan's — it + // can't hide a directive pip could read (same uid). + assert!(requirements_format_control_directive(&[dir.path().join("absent.txt")]).is_none()); + } +} diff --git a/src/precheck/render.rs b/src/precheck/render.rs new file mode 100644 index 0000000..4a942c2 --- /dev/null +++ b/src/precheck/render.rs @@ -0,0 +1,497 @@ +//! Report rendering: text output, refusal line, fix/steer helpers. + +use crate::verify_deps; + +use super::{ + parse, PrecheckOptions, PrecheckReport, TargetOutcome, TreeOrigin, TreeReport, VerdictStatus, +}; + +/// The refusal line on stderr. Messaging only; the block decision and the +/// choice of escape hatch live in `verdict::block_reason`. +pub(super) fn print_refusal(reason: super::verdict::BlockReason) { + use super::verdict::BlockReason; + match reason { + BlockReason::ExistingTree => eprintln!( + "Refusing to run install: your existing dependency tree has known-vulnerable packages (none were added by this command). Fix them or pass --force." + ), + BlockReason::Findings => { + eprintln!("Refusing to run install. Pass --force to proceed despite findings.") + } + BlockReason::RecencyOnly => { + eprintln!("Refusing to run install. Pass --no-fail to proceed anyway.") + } + } +} + +/// Print the "requirements files are not recency-checked" note when the +/// install carried any `-r` files. No-op otherwise. +pub(super) fn requirements_note(parsed: &parse::ParsedInstall) { + if parsed.requirements_files.is_empty() { + return; + } + let files: Vec = parsed + .requirements_files + .iter() + .map(|p| p.display().to_string()) + .collect(); + eprintln!( + "note: requirements files ({}) are not recency-checked by the baseline gate", + files.join(", ") + ); +} + +pub(super) fn warn_public_lookup_failures(report: &PrecheckReport, opts: &PrecheckOptions) { + if opts.verdict.is_some() && report.unverifiable_count() > 0 { + eprintln!("warning: CVE check unavailable; continuing because public mode is fail-open."); + } +} + +/// Suffix for a vulnerable match line: the advisory's fix, if known. +fn fix_note(m: &crate::vuln_api::VulnMatch) -> String { + match &m.fixed_version { + Some(v) => format!(" — fixed in {v}"), + None => " — no fixed version known".to_string(), + } +} + +/// Highest of `fixes` after sort/dedup: a single distinct value is returned +/// as-is (no parsing — preserves odd-but-unambiguous forms); several distinct +/// values compare by lenient semver. With `all_must_parse`, one unparsable +/// candidate among several poisons the answer (`None`); otherwise unparsable +/// candidates are skipped. +fn highest_fix(mut fixes: Vec<&str>, all_must_parse: bool) -> Option { + fixes.sort_unstable(); + fixes.dedup(); + match fixes.as_slice() { + [] => None, + [only] => Some((*only).to_string()), + many => { + let mut parsed = Vec::with_capacity(many.len()); + for raw in many { + match semver::Version::parse(&verify_deps::registry::normalize_for_semver(raw)) { + Ok(v) => parsed.push((v, *raw)), + Err(_) if all_must_parse => return None, + Err(_) => {} + } + } + parsed + .into_iter() + .max_by(|(a, _), (b, _)| a.cmp(b)) + .map(|(_, raw)| raw.to_string()) + } + } +} + +/// The one version certified to clear every match. Requires every match to +/// carry a `fixed_version`; any match without one — or an unparsable +/// candidate among several — means no version can be certified, so `None`. +fn safe_version(matches: &[crate::vuln_api::VulnMatch]) -> Option { + let fixes: Vec<&str> = matches + .iter() + .map(|m| m.fixed_version.as_deref()) + .collect::>()?; + highest_fix(fixes, true) +} + +/// Highest `fixed_version` the advisories advertise, by lenient semver. +/// Unlike `safe_version` this is *not* a certification: matches without a +/// fix are ignored, so the result may still be vulnerable to them. `None` +/// only when no match advertises a fix (or no candidate parses). +fn advertised_fix(matches: &[crate::vuln_api::VulnMatch]) -> Option { + let fixes: Vec<&str> = matches + .iter() + .filter_map(|m| m.fixed_version.as_deref()) + .collect(); + highest_fix(fixes, false) +} + +/// Per-match advisory lines plus the safe-version steer, shared by the +/// named-target and transitive vulnerable render arms. Built for agent +/// self-correction: each advisory carries `fixed in `, and the +/// steer names the exact spec to install instead. +fn print_vulnerable_matches(name: &str, matches: &[crate::vuln_api::VulnMatch]) { + for m in matches { + println!( + " {} ({}){}", + m.advisory_id, + m.severity_level, + fix_note(m) + ); + } + if let Some(safe) = safe_version(matches) { + println!(" → safe version: {name}@{safe}"); + } +} + +/// One summary-line segment, e.g. `"2 vulnerable (2 from resolved tree)"`. +/// The parenthetical separates findings the resolved tree carried in from +/// findings on the targets this command names; omitted when the tree +/// contributed none. +fn summary_segment(total: usize, from_tree: usize, label: &str) -> String { + if from_tree > 0 { + format!("{total} {label} ({from_tree} from resolved tree)") + } else { + format!("{total} {label}") + } +} + +/// More than this many unverifiable findings with the same error-prefix +/// render as one collapsed line instead of one line per package. +const UNVERIFIABLE_COLLAPSE_THRESHOLD: usize = 3; + +/// Group key for collapsing repeated unverifiable errors: the text before +/// the first `(` — strips per-package detail (URLs, status codes) so one +/// outage groups under one key. +fn error_prefix(error: &str) -> &str { + match error.find('(') { + Some(i) => error[..i].trim_end(), + None => error, + } +} + +/// Unverifiable error strings across transitive tree findings and named +/// outcomes, in render order. +fn unverifiable_errors(report: &PrecheckReport) -> Vec<&str> { + let mut errors = Vec::new(); + if let Some(TreeReport::Full { transitive, .. }) = &report.tree { + for t in transitive { + if let VerdictStatus::Unverifiable(e) = &t.verdict { + errors.push(e.as_str()); + } + } + } + for o in &report.outcomes { + if let TargetOutcome::Resolved { + verdict: VerdictStatus::Unverifiable(e), + .. + } = o + { + errors.push(e.as_str()); + } + } + errors +} + +/// `(prefix, count, first error)` groups of unverifiable findings large +/// enough to collapse (> `UNVERIFIABLE_COLLAPSE_THRESHOLD` per prefix) — +/// the vuln-api outage case, where every package fails the same way. +/// Display-only: counts and exit codes never change. +fn collapsed_unverifiable_groups(report: &PrecheckReport) -> Vec<(&str, usize, &str)> { + let mut groups: Vec<(&str, usize, &str)> = Vec::new(); + for e in unverifiable_errors(report) { + let prefix = error_prefix(e); + match groups.iter_mut().find(|(p, _, _)| *p == prefix) { + Some((_, count, _)) => *count += 1, + None => groups.push((prefix, 1, e)), + } + } + groups.retain(|(_, count, _)| *count > UNVERIFIABLE_COLLAPSE_THRESHOLD); + groups +} + +pub(super) fn print_text(report: &PrecheckReport) { + // Build the echoed command from non-empty parts: a bare gated install + // (e.g. `npm install` with zero specs) has no args to append. + let mut command = format!("{} {}", report.manager.binary_name(), report.subcommand); + if !report.original_args.is_empty() { + command.push(' '); + command.push_str(&report.original_args.join(" ")); + } + + let collapsed = collapsed_unverifiable_groups(report); + let is_collapsed = |error: &str| { + collapsed + .iter() + .any(|(prefix, _, _)| *prefix == error_prefix(error)) + }; + + println!( + "Pre-checking `{}` (threshold {})", + command, + verify_deps::format_duration(report.threshold) + ); + println!( + " {} ok, {} recent, {}, {}, {} skipped, {} errors", + report.ok_count(), + report.recent_count(), + summary_segment( + report.vulnerable_count(), + report.tree_vulnerable_count(), + "vulnerable" + ), + summary_segment( + report.unverifiable_count(), + report.tree_unverifiable_count(), + "unverifiable" + ), + report.skipped_count(), + report.error_count(), + ); + + match &report.tree { + Some(TreeReport::Full { + resolved_count, + transitive, + .. + }) => { + println!( + " tree: {} packages resolved, {} transitive checked", + resolved_count, + transitive.len() + ); + for t in transitive { + match &t.verdict { + VerdictStatus::Vulnerable(matches) => { + println!( + " ✗ {}@{} {} known vulnerable:", + t.name, + t.version, + t.origin.label() + ); + print_vulnerable_matches(&t.name, matches); + // A vulnerable dep the project already declares can be + // bumped directly — point at the fix as a command. + // When `safe_version` is `Some` it equals + // `advertised_fix` and clears every advisory; otherwise + // some advisory has no fix, so the "(advertised fix)" + // hedge marks the bump as partial. + if t.origin == TreeOrigin::PreExisting { + if let Some(fix) = advertised_fix(matches) { + let hedge = if safe_version(matches).is_some() { + "" + } else { + " (advertised fix)" + }; + println!( + " fix with: corgea {} install {}@{}{}", + report.manager.binary_name(), + t.name, + fix, + hedge + ); + } + } + } + VerdictStatus::Unverifiable(error) => { + if !is_collapsed(error) { + println!( + " ⚠ {}@{} {} could not be verified: {}", + t.name, + t.version, + t.origin.label(), + error + ); + } + } + // Clean / not-checked tree entries stay quiet in text mode. + VerdictStatus::Clean | VerdictStatus::NotChecked => {} + } + } + } + Some(TreeReport::NamedOnly { reason }) => { + println!(" tree: transitive dependencies NOT checked ({reason})"); + } + None => {} + } + + // One line per collapsed outage group instead of one per package. + for (_, count, first_error) in &collapsed { + println!( + " ⚠ {count} packages could not be verified (vuln-api unreachable: {first_error})" + ); + } + + for o in &report.outcomes { + match o { + TargetOutcome::Resolved { + target, + resolved, + age, + verdict, + } => match verdict { + VerdictStatus::Vulnerable(matches) => { + println!( + " ✗ {} → {}@{} known vulnerable:", + target.display, resolved.name, resolved.version, + ); + print_vulnerable_matches(&resolved.name, matches); + } + VerdictStatus::Unverifiable(error) => { + if !is_collapsed(error) { + println!( + " ⚠ {} → {}@{} could not be verified: {}", + target.display, resolved.name, resolved.version, error, + ); + } + } + VerdictStatus::Clean | VerdictStatus::NotChecked => { + if report.is_recent(*age) { + println!( + " ⚠ {} → {}@{} published {} ago at {} (within threshold)", + target.display, + resolved.name, + resolved.version, + verify_deps::format_duration(*age), + resolved.published_at.format("%Y-%m-%d %H:%M:%S UTC"), + ); + } else { + println!( + " ✓ {} → {}@{} published {} ago", + target.display, + resolved.name, + resolved.version, + verify_deps::format_duration(*age), + ); + } + } + }, + TargetOutcome::Skipped { target, reason } => { + println!(" ? {}: {}", target.display, reason); + } + TargetOutcome::Error { target, error } => { + // Be explicit that an unresolvable target was NOT vetted: + // without this line a resolution failure followed by a + // proceeding install reads like a pass. + println!( + " ✗ {}: {} (not verified — this target is ungated)", + target.display, error + ); + } + } + } +} + +#[cfg(test)] +mod tests { + use super::super::test_support::*; + use super::super::TreeOutcome; + use super::*; + + #[test] + fn safe_version_single_fix() { + assert_eq!( + safe_version(&[vm("A-1", Some("2.0.0"))]), + Some("2.0.0".to_string()) + ); + } + + #[test] + fn safe_version_duplicate_fixes_collapse_without_parsing() { + // "1.0rc1" is unparsable, but a single distinct value needs no parse. + assert_eq!( + safe_version(&[vm("A-1", Some("1.0rc1")), vm("A-2", Some("1.0rc1"))]), + Some("1.0rc1".to_string()) + ); + } + + #[test] + fn safe_version_picks_highest_of_distinct_fixes() { + // Semver order, not lexical ("1.2.0" > "1.10.0" lexically). + assert_eq!( + safe_version(&[vm("A-1", Some("1.2.0")), vm("A-2", Some("1.10.0"))]), + Some("1.10.0".to_string()) + ); + } + + #[test] + fn safe_version_two_component_versions_normalize() { + assert_eq!( + safe_version(&[vm("A-1", Some("4.0")), vm("A-2", Some("3.2.5"))]), + Some("4.0".to_string()) + ); + } + + #[test] + fn safe_version_mixed_fix_and_none_is_none() { + assert_eq!( + safe_version(&[vm("A-1", Some("2.0.0")), vm("A-2", None)]), + None + ); + } + + #[test] + fn safe_version_unparsable_among_distinct_is_none() { + assert_eq!( + safe_version(&[vm("A-1", Some("2!1.0")), vm("A-2", Some("1.0.0"))]), + None + ); + } + + #[test] + fn safe_version_empty_matches_is_none() { + assert_eq!(safe_version(&[]), None); + } + + #[test] + fn error_prefix_strips_parenthesized_detail() { + // The reqwest network-failure shape: per-package URL in parens. + assert_eq!( + error_prefix("Failed to send vuln-api request: error sending request for url (http://x/v1/packages/pypi/a/versions/1.0.0/check)"), + "Failed to send vuln-api request: error sending request for url" + ); + assert_eq!( + error_prefix("vuln-api unavailable (HTTP 503)"), + "vuln-api unavailable" + ); + assert_eq!(error_prefix("no parens here"), "no parens here"); + } + + /// Four unverifiable findings sharing a prefix collapse into one group + /// (named + transitive both count); three do not. + #[test] + fn collapsed_groups_require_more_than_threshold() { + let unverifiable = |name: &str| { + let mut o = resolved_outcome(name, "1.0.0", false); + set_verdict( + &mut o, + VerdictStatus::Unverifiable(format!("vuln-api unavailable (HTTP 503: {name})")), + ); + o + }; + + let mut report = report_with(vec![ + unverifiable("a"), + unverifiable("b"), + unverifiable("c"), + ]); + assert!(collapsed_unverifiable_groups(&report).is_empty()); + + report.tree = Some(TreeReport::Full { + resolved_count: 4, + transitive: vec![TreeOutcome { + name: "d".to_string(), + version: "1.0.0".to_string(), + verdict: VerdictStatus::Unverifiable( + "vuln-api unavailable (HTTP 503: d)".to_string(), + ), + origin: TreeOrigin::Transitive, + }], + }); + let groups = collapsed_unverifiable_groups(&report); + assert_eq!(groups.len(), 1); + let (prefix, count, first) = groups[0]; + assert_eq!(prefix, "vuln-api unavailable"); + assert_eq!(count, 4); + // Render order is transitive-first, so the tree finding leads. + assert_eq!(first, "vuln-api unavailable (HTTP 503: d)"); + } + + #[test] + fn advertised_fix_ignores_matches_without_fix() { + // safe_version returns None here; the advertised fix still surfaces. + assert_eq!( + advertised_fix(&[vm("A-1", Some("2.0.0")), vm("A-2", None)]), + Some("2.0.0".to_string()) + ); + assert_eq!(advertised_fix(&[vm("A-1", None)]), None); + assert_eq!(advertised_fix(&[]), None); + } + + #[test] + fn advertised_fix_picks_highest_by_semver() { + assert_eq!( + advertised_fix(&[vm("A-1", Some("1.2.0")), vm("A-2", Some("1.10.0"))]), + Some("1.10.0".to_string()) + ); + } +} diff --git a/src/precheck/test_support.rs b/src/precheck/test_support.rs new file mode 100644 index 0000000..448406d --- /dev/null +++ b/src/precheck/test_support.rs @@ -0,0 +1,99 @@ +//! Shared builders for precheck unit tests (mod.rs, render.rs, verdict.rs). +//! Test-only: declared `#[cfg(test)]` from mod.rs. + +use std::time::Duration; + +use chrono::Utc; + +use super::{ + InstallTarget, PackageManager, PrecheckOptions, PrecheckReport, TargetKind, TargetOutcome, + VerdictConfig, VerdictStatus, +}; + +/// Baseline options: pypi registry at a dead address (a port that +/// refuses connections - these tests never dial it), no verdict config. +/// Override fields per test via struct update. +pub(crate) fn stub_opts() -> PrecheckOptions { + PrecheckOptions { + threshold: Duration::from_secs(2 * 86400), + no_fail: false, + force: false, + verdict: None, + npm_registry: None, + pypi_registry: Some("http://127.0.0.1:9".to_string()), + } +} + +/// `stub_opts()` plus a verdict config pointing at `base_url`. +pub(crate) fn verdict_opts(base_url: &str) -> PrecheckOptions { + PrecheckOptions { + verdict: Some(VerdictConfig { + base_url: base_url.to_string(), + }), + ..stub_opts() + } +} + +pub(crate) fn public_opts(no_fail: bool, force: bool) -> PrecheckOptions { + PrecheckOptions { + no_fail, + force, + ..verdict_opts("http://127.0.0.1:9") + } +} + +pub(crate) fn resolved_outcome(name: &str, version: &str, recent: bool) -> TargetOutcome { + // Recency derives from age vs `report_with`'s 2-day threshold: + // one hour => recent, a year => not. + let age = if recent { + Duration::from_secs(3600) + } else { + Duration::from_secs(365 * 86400) + }; + TargetOutcome::Resolved { + target: InstallTarget { + name: name.to_string(), + display: format!("{name}=={version}"), + kind: TargetKind::Unverifiable { + reason: "test".to_string(), + }, + }, + resolved: crate::verify_deps::registry::ResolvedPackage { + name: name.to_string(), + version: version.to_string(), + published_at: Utc::now() - chrono::Duration::from_std(age).unwrap(), + }, + age, + verdict: VerdictStatus::NotChecked, + } +} + +pub(crate) fn report_with(outcomes: Vec) -> PrecheckReport { + PrecheckReport { + manager: PackageManager::Pip, + subcommand: "install".to_string(), + original_args: vec![], + outcomes, + threshold: Duration::from_secs(2 * 86400), + tree: None, + // Most tests model an install that named something; bare-install + // cases set this explicitly. + bare_install: false, + } +} + +pub(crate) fn set_verdict(outcome: &mut TargetOutcome, v: VerdictStatus) { + if let TargetOutcome::Resolved { verdict, .. } = outcome { + *verdict = v; + } +} + +pub(crate) fn vm(advisory: &str, fixed: Option<&str>) -> crate::vuln_api::VulnMatch { + crate::vuln_api::VulnMatch { + advisory_id: advisory.to_string(), + severity_level: "high".to_string(), + tier: Some(1), + vulnerable_version_range: None, + fixed_version: fixed.map(str::to_string), + } +} diff --git a/src/precheck/tree.rs b/src/precheck/tree.rs new file mode 100644 index 0000000..feb3620 --- /dev/null +++ b/src/precheck/tree.rs @@ -0,0 +1,827 @@ +//! Full would-install-set resolution (the "tree pass"). +//! +//! Safety invariant: resolution must never execute package code. +//! pip: `--only-binary :all:` (appended last, so it wins over CLI +//! format-control flags) prevents sdist builds (pypa/pip#13091) — BUT pip +//! applies format-control directives found *inside* `-r` files after CLI +//! parsing, so requirements files are pre-scanned and any `--no-binary` / +//! `--only-binary` line refuses the dry-run (named-only fallback) instead. +//! npm: `--ignore-scripts` guards npm/cli#2787. + +use std::io::Read; +use std::process::{Command, Stdio}; +use std::thread; +use std::time::{Duration, Instant}; + +use super::PackageManager; + +/// Upper bound on a single tree resolution (pip dry-run / npm lockfile gen). +/// Generous on purpose: a large dependency tree legitimately takes tens of +/// seconds to resolve against a registry. The point is only to keep a hung +/// or stalled pip/npm from hanging the install the gate sits in front of — +/// on overrun the resolver errs and the caller degrades to named-only. +const TREE_RESOLVE_TIMEOUT: Duration = Duration::from_secs(120); + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct TreePackage { + pub name: String, + pub version: String, + /// pip report `"requested"`: the user named this package (CLI arg or + /// requirements file). Always false for npm — its lockfile has no + /// equivalent flag. + pub requested: bool, +} + +/// Whether this manager's resolver has anything to resolve for the parsed +/// install. pip's dry-run also reads `-r` requirements files, so those make +/// an install eligible even with no named targets. npm's lockfile resolution +/// reads `package.json`, so a bare `npm install` is eligible whenever the +/// project (found like npm finds it — nearest ancestor manifest) has one. +pub fn covers_input(manager: PackageManager, parsed: &super::parse::ParsedInstall) -> bool { + !parsed.targets.is_empty() + || (manager == PackageManager::Pip && !parsed.requirements_files.is_empty()) + || (manager == PackageManager::Npm && npm_project_root().is_some()) +} + +/// Nearest ancestor file named `name`, starting at the CWD. +pub(super) fn find_up(name: &str) -> Option { + let cwd = std::env::current_dir().ok()?; + cwd.ancestors() + .map(|dir| dir.join(name)) + .find(|p| p.is_file()) +} + +/// The project directory npm itself would operate on: the nearest ancestor +/// holding `package.json`. A bare `npm install` from a subdirectory +/// installs THAT project's tree, so the gate must look there too. +pub(super) fn npm_project_root() -> Option { + Some(find_up("package.json")?.parent()?.to_path_buf()) +} + +/// The lockfile npm would read from `dir`, preferring `npm-shrinkwrap.json` +/// over `package-lock.json` (npm gives the shrinkwrap precedence). `None` +/// when `dir` holds neither. Shared by `npm ci` (reads the project lockfile) +/// and the tree pass (reads the one npm just generated). +pub(super) fn npm_lockfile_in(dir: &std::path::Path) -> Option { + ["npm-shrinkwrap.json", "package-lock.json"] + .iter() + .map(|n| dir.join(n)) + .find(|p| p.is_file()) +} + +/// The npm flag that redirects the project root (`--prefix`, `-C`, `-g`, +/// `--global`), if present. The gate can't safely resolve or verify the +/// redirected project from a throwaway copy of the CWD, so the callers fail +/// closed (bare install / `npm ci`) or degrade to named-only. +/// +/// `--location` is *not* an unconditional redirect: per npm v10/v11 only +/// `--location=global` (equivalent to `-g`) changes the install root. +/// `--location=project` / `--location=user` merely select which config layer +/// is read/written and leave what installs untouched, so only the `global` +/// value counts — in both `--location=global` and `--location global` forms. +pub(super) fn npm_root_redirect_flag(args: &[String]) -> Option { + const ROOT_REDIRECT_FLAGS: [&str; 4] = ["--prefix", "-C", "--global", "-g"]; + for (i, a) in args.iter().enumerate() { + if ROOT_REDIRECT_FLAGS + .iter() + .any(|f| a.as_str() == *f || a.starts_with(&format!("{f}="))) + { + return Some(a.clone()); + } + if a == "--location=global" { + return Some(a.clone()); + } + if a == "--location" && args.get(i + 1).is_some_and(|v| v == "global") { + return Some("--location global".to_string()); + } + } + None +} + +/// `Err(reason)`: the dry-run failed — the caller falls back to named-only +/// and its warning carries `reason`. +pub fn resolve_tree( + manager: PackageManager, + install_args: &[String], + parsed: &super::parse::ParsedInstall, +) -> Result, String> { + match manager { + PackageManager::Pip => resolve_pip_tree(manager.binary_name(), install_args, parsed), + PackageManager::Npm => resolve_npm_tree(manager.binary_name(), install_args), + } +} + +/// Last stderr line of a failed subprocess, for one-line error messages. +fn stderr_tail(output: &std::process::Output) -> String { + String::from_utf8_lossy(&output.stderr) + .trim() + .lines() + .last() + .unwrap_or("unknown error") + .to_string() +} + +/// Grace past the overall deadline allowed for the reader threads to drain +/// after the direct child has exited. In the common case they EOF the instant +/// the child closes its pipe ends, so this is never spent; it only bounds the +/// pathological case where a grandchild inherited the pipe write-end and is +/// still holding it open (so `read_to_end` never sees EOF). +const READER_DRAIN_GRACE: Duration = Duration::from_secs(3); + +/// Run `cmd` to completion, capturing stdout/stderr, but kill it if it +/// outruns `timeout`. The resolvers shell out to pip/npm, which can stall on +/// a slow or unreachable registry; the gate runs before the real install, so +/// an unbounded wait would hang the user's command. On overrun the child is +/// killed and an `Err` is returned, which the callers route to the named-only +/// fallback. +/// +/// stdout/stderr are drained on threads rather than read after exit because +/// pip's `--report -` JSON can exceed the OS pipe buffer; an unread full pipe +/// would block the child before it exits, so a bare wait-with-timeout on the +/// handle would deadlock instead of timing out. +/// +/// The reader joins are themselves bounded: a grandchild that inherited the +/// pipe write-end keeps `read_to_end` from ever seeing EOF even after the +/// direct child is reaped, so an unbounded `join()` here would hang past the +/// deadline this function exists to enforce. If the readers don't drain within +/// `READER_DRAIN_GRACE` of the deadline they are abandoned (the leaked thread +/// and FD die with this short-lived CLI moments later) and an `Err` is +/// returned, which the callers route to the named-only fallback. +fn run_bounded(mut cmd: Command, timeout: Duration) -> Result { + let mut child = cmd + .stdin(Stdio::null()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .map_err(|e| format!("spawn: {e}"))?; + + let mut out_pipe = child.stdout.take().ok_or("no stdout pipe")?; + let mut err_pipe = child.stderr.take().ok_or("no stderr pipe")?; + let out_reader = thread::spawn(move || { + let mut buf = Vec::new(); + let _ = out_pipe.read_to_end(&mut buf); + buf + }); + let err_reader = thread::spawn(move || { + let mut buf = Vec::new(); + let _ = err_pipe.read_to_end(&mut buf); + buf + }); + + let deadline = Instant::now() + timeout; + let status = loop { + match child.try_wait().map_err(|e| format!("wait: {e}"))? { + Some(status) => break status, + None if Instant::now() >= deadline => { + let _ = child.kill(); + let _ = child.wait(); + return Err(format!( + "resolution exceeded {}s timeout", + timeout.as_secs() + )); + } + None => thread::sleep(Duration::from_millis(50)), + } + }; + + // The child is reaped, but a grandchild may still hold the pipe write-end, + // so the readers can outlive it. Bound the wait: poll `is_finished()` + // (never `join()` before it is finished, which would block unbounded) + // against a short grace past the deadline. + let drain_deadline = deadline + READER_DRAIN_GRACE; + while !(out_reader.is_finished() && err_reader.is_finished()) { + if Instant::now() >= drain_deadline { + return Err( + "output readers did not drain (resolver may have left a background process)" + .to_string(), + ); + } + thread::sleep(Duration::from_millis(50)); + } + + // Both readers are finished, so these joins return immediately. + let stdout = out_reader.join().map_err(|_| "stdout reader panicked")?; + let stderr = err_reader.join().map_err(|_| "stderr reader panicked")?; + Ok(std::process::Output { + status, + stdout, + stderr, + }) +} + +/// Drop any user-supplied `--report ` / `--report=` from the pip +/// dry-run args. pip's `--report` is last-wins, so a user value would redirect +/// the JSON the gate parses off stdout into a file, leaving stdout empty and +/// silently degrading the tree pass to named-only. The user's real install +/// (exec'd separately with the original args) still honors their `--report`. +fn strip_pip_report_flag(install_args: &[String]) -> Vec<&String> { + let mut out = Vec::with_capacity(install_args.len()); + let mut skip_value = false; + for arg in install_args { + if skip_value { + // The value token following a bare `--report`. + skip_value = false; + continue; + } + if arg == "--report" { + skip_value = true; + continue; + } + if arg.starts_with("--report=") { + continue; + } + out.push(arg); + } + out +} + +fn resolve_pip_tree( + binary: &str, + install_args: &[String], + parsed: &super::parse::ParsedInstall, +) -> Result, String> { + // pip applies format-control directives found INSIDE a requirements + // file AFTER command-line parsing (acknowledged pip behavior — the file + // parser mutates the shared FormatControl object), so a `--no-binary + // :all:` line in a `-r` file would override the trailing CLI guard + // below and build sdists during the dry-run. Refuse to dry-run such + // files; the caller degrades to the named-only fallback, whose + // requirements parser skips option lines entirely. + if let Some((file, directive)) = + super::parse::requirements_format_control_directive(&parsed.requirements_files) + { + return Err(format!( + "{} sets {} (file-level format-control overrides the sdist guard; not dry-running)", + file.display(), + directive + )); + } + // Same binary resolution as the exec path (pip → pip3 fallback) — the + // tree pass must not silently degrade on pip3-only systems. + let resolved = super::exec::resolve_binary(binary)?; + // The non-execution guard `--only-binary :all:` is appended AFTER the + // user's args: pip's format-control flags are last-wins per package, so a + // user `--no-binary :all:` / `--only-binary :none:` placed in install_args + // must not re-enable sdist builds (which would run package code during the + // report step, violating this file's safety invariant). + // Strip the user's `--report` (if any) so it can't redirect the dry-run + // JSON off stdout — the gate needs it there to parse the would-install set. + let mut cmd = Command::new(resolved); + cmd.arg("install") + .args(["--dry-run", "--quiet", "--report", "-"]) + .args(strip_pip_report_flag(install_args)) + .args(["--only-binary", ":all:"]); + let output = + run_bounded(cmd, TREE_RESOLVE_TIMEOUT).map_err(|e| format!("run pip dry-run: {e}"))?; + if !output.status.success() { + return Err(format!("pip dry-run failed: {}", stderr_tail(&output))); + } + parse_pip_report(&String::from_utf8_lossy(&output.stdout)) +} + +fn parse_pip_report(json: &str) -> Result, String> { + let report: serde_json::Value = + serde_json::from_str(json).map_err(|e| format!("parse pip report: {e}"))?; + let install = report + .get("install") + .and_then(|v| v.as_array()) + .ok_or("pip report has no install[] array")?; + install + .iter() + .map(|item| { + let metadata = item.get("metadata").ok_or("report item missing metadata")?; + let field = |k: &str| { + metadata + .get(k) + .and_then(|v| v.as_str()) + .map(str::to_string) + .ok_or_else(|| format!("report item missing metadata.{k}")) + }; + Ok(TreePackage { + name: field("name")?, + version: field("version")?, + requested: item + .get("requested") + .and_then(|v| v.as_bool()) + .unwrap_or(false), + }) + }) + .collect() +} + +/// Direct dependency names declared by the project's `package.json` (the +/// manifest `resolve_npm_tree` copies — nearest ancestor, like npm). +/// Empty when the manifest is absent or unparsable — origin labeling then +/// degrades to `(transitive)`. +pub fn project_direct_deps() -> std::collections::HashSet { + npm_project_root() + .and_then(|root| std::fs::read_to_string(root.join("package.json")).ok()) + .map(|s| direct_deps_from_manifest(&s)) + .unwrap_or_default() +} + +fn direct_deps_from_manifest(json: &str) -> std::collections::HashSet { + let Ok(manifest) = serde_json::from_str::(json) else { + return Default::default(); + }; + let groups = [ + "dependencies", + "devDependencies", + "optionalDependencies", + "peerDependencies", + ]; + groups + .iter() + .filter_map(|g| manifest.get(g)?.as_object()) + .flat_map(|deps| deps.keys().cloned()) + .collect() +} + +/// Resolve npm's full would-install set by generating a lockfile in a +/// throwaway dir so the user's own lockfile is never touched. npm's +/// `--dry-run --json` only emits counts (npm/cli#6558), so we read the +/// generated `package-lock.json` instead. +/// +/// `--ignore-scripts` because npm has run lifecycle scripts under +/// `--package-lock-only` before (npm/cli#2787). +fn resolve_npm_tree(binary: &str, install_args: &[String]) -> Result, String> { + // Flags that redirect npm's project root would defeat the throwaway-dir + // isolation below (`--prefix` overrides `current_dir`, so the dry run + // would write the USER'S package-lock.json) — degrade to named-only. + if let Some(flag) = npm_root_redirect_flag(install_args) { + return Err(format!( + "'{flag}' redirects npm's project root; lockfile resolution skipped" + )); + } + + let resolved = super::exec::resolve_binary(binary)?; + let work = tempfile::tempdir().map_err(|e| format!("create temp dir: {e}"))?; + // Copy the manifests from the project npm would operate on (nearest + // ancestor package.json), not just the CWD. The `.npmrc` copy is + // config-only (registry/auth/save prefs) so resolution matches a real + // install; CLI flags below still win over it (`--ignore-scripts` can't + // be undone by an `ignore-scripts=false` line). A `package-lock=false` + // `.npmrc` makes the resolution emit no lockfile → named-only fallback + // by design, not a hole: nothing executes either way. + let root = npm_project_root(); + for manifest in [ + "package.json", + "package-lock.json", + "npm-shrinkwrap.json", + ".npmrc", + ] { + let src = match &root { + Some(root) => root.join(manifest), + None => std::path::PathBuf::from(manifest), + }; + if src.exists() { + std::fs::copy(&src, work.path().join(manifest)) + .map_err(|e| format!("copy {manifest}: {e}"))?; + } + } + let mut cmd = Command::new(&resolved); + cmd.arg("install") + .args(install_args) + .args([ + "--package-lock-only", + "--ignore-scripts", + "--no-audit", + "--no-fund", + ]) + .current_dir(work.path()); + let output = run_bounded(cmd, TREE_RESOLVE_TIMEOUT) + .map_err(|e| format!("run npm lockfile resolution: {e}"))?; + if !output.status.success() { + return Err(format!( + "npm lockfile resolution failed: {}", + stderr_tail(&output) + )); + } + // Read whichever lockfile npm actually produced/used (shrinkwrap wins). + let lock_path = npm_lockfile_in(work.path()).ok_or("npm produced no lockfile to verify")?; + let lock = std::fs::read_to_string(&lock_path) + .map_err(|e| format!("read generated {}: {e}", lock_path.display()))?; + parse_npm_lockfile(&lock) +} + +pub(super) fn parse_npm_lockfile(json: &str) -> Result, String> { + let lock: serde_json::Value = + serde_json::from_str(json).map_err(|e| format!("parse package-lock.json: {e}"))?; + // lockfileVersion 2/3 carries the `packages` map; v1 only has the + // `dependencies` tree, which npm still understands — support both so a + // v1 project isn't forced to bypass the gate with `--force`. + if let Some(packages) = lock.get("packages").and_then(|v| v.as_object()) { + Ok(packages + .iter() + // Only `node_modules/...` entries are registry-installed deps. + // Skip the root project (""), symlinked workspaces (`link: true`), + // and workspace SOURCE stanzas (`packages/foo`, `apps/bar`) — those + // are local packages with no registry identity, so sending them to + // the public vuln-api would falsely block a monorepo install when a + // public package shares the name@version. + .filter(|(path, entry)| { + path.contains("node_modules/") + && entry.get("link").and_then(|v| v.as_bool()) != Some(true) + }) + .filter_map(|(path, entry)| { + let name = entry + .get("name") + .and_then(|v| v.as_str()) + .map(str::to_string) + .or_else(|| name_from_lock_path(path))?; + let version = entry.get("version").and_then(|v| v.as_str())?; + Some(TreePackage { + name, + version: version.to_string(), + requested: false, + }) + }) + .collect()) + } else if let Some(deps) = lock.get("dependencies").and_then(|v| v.as_object()) { + let mut out = Vec::new(); + collect_v1_dependencies(deps, &mut out, 0)?; + Ok(out) + } else { + Err("package-lock.json has neither a packages map nor a dependencies tree".to_string()) + } +} + +/// npm-written v1 trees are finite (no cycles by construction), but +/// `npm ci` feeds this parser an attacker-supplied file — cap the depth so +/// a crafted deep nest can't overflow the stack. In practice serde_json's +/// own 128-level recursion limit rejects such files at parse time (each v1 +/// level is two JSON levels); this cap is defense-in-depth should that +/// limit ever change. Real trees are a handful of levels deep. +const V1_MAX_DEPTH: usize = 64; + +/// Recursively collect `name@version` from a lockfileVersion 1 +/// `dependencies` tree. Nested `dependencies` are deduped by the caller's +/// pool; local/link entries (`"link": true`) carry no registry identity and +/// are skipped. Fails loudly past `V1_MAX_DEPTH` (callers refuse or fall +/// back — never silently truncate the verdict set). +fn collect_v1_dependencies( + deps: &serde_json::Map, + out: &mut Vec, + depth: usize, +) -> Result<(), String> { + if depth > V1_MAX_DEPTH { + return Err(format!( + "package-lock.json dependencies nest deeper than {V1_MAX_DEPTH} levels; refusing to parse" + )); + } + for (name, entry) in deps { + if entry.get("link").and_then(|v| v.as_bool()) == Some(true) { + continue; + } + if let Some(version) = entry.get("version").and_then(|v| v.as_str()) { + out.push(TreePackage { + name: name.clone(), + version: version.to_string(), + requested: false, + }); + } + if let Some(nested) = entry.get("dependencies").and_then(|v| v.as_object()) { + collect_v1_dependencies(nested, out, depth + 1)?; + } + } + Ok(()) +} + +/// Derive a package name from a lockfile path key like +/// `node_modules/a/node_modules/@scope/pkg` → `@scope/pkg`. `None` for keys +/// outside `node_modules/` (workspace stanzas carry an explicit `name`). +fn name_from_lock_path(path: &str) -> Option { + if !path.contains("node_modules/") { + return None; + } + let name = crate::deps::ecosystems::npm::package_name_from_lock_key(path); + (!name.is_empty()).then(|| name.to_string()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn run_bounded_kills_a_child_that_outruns_the_timeout() { + let mut cmd = Command::new("sh"); + cmd.args(["-c", "sleep 30"]); + let start = Instant::now(); + let err = run_bounded(cmd, Duration::from_millis(200)).expect_err("should time out"); + assert!(err.contains("timeout"), "unexpected error: {err}"); + // Killed promptly, not after the full sleep — proves the gate can't hang. + assert!( + start.elapsed() < Duration::from_secs(5), + "child not killed promptly: {:?}", + start.elapsed() + ); + } + + #[test] + fn run_bounded_captures_output_of_a_fast_child() { + let mut cmd = Command::new("sh"); + cmd.args(["-c", "printf hello"]); + let out = run_bounded(cmd, Duration::from_secs(10)).expect("fast child ok"); + assert!(out.status.success()); + assert_eq!(String::from_utf8_lossy(&out.stdout), "hello"); + } + + #[test] + fn run_bounded_does_not_hang_when_a_grandchild_holds_the_pipe() { + // The direct `sh` exits 0 immediately, but backgrounds a `sleep 30` + // that inherits the stdout/stderr pipe write-end. `read_to_end` never + // sees EOF while that grandchild lives, so an unbounded join on the + // success path would block for the full 30s. The bounded drain must + // return (Ok with partial output OR Err) well before then — proving + // the gate can't hang past its deadline even on the exit path. + let mut cmd = Command::new("sh"); + cmd.args(["-c", "sleep 30 & exit 0"]); + let start = Instant::now(); + // Small timeout: the child exits at once, so the drain bound is + // `timeout + READER_DRAIN_GRACE` (~3.2s here) — far under the 30s the + // grandchild stays alive, and under the 10s assertion below. + let _ = run_bounded(cmd, Duration::from_millis(200)); + assert!( + start.elapsed() < Duration::from_secs(10), + "run_bounded blocked on a grandchild-held pipe: {:?}", + start.elapsed() + ); + } + + #[test] + fn strip_pip_report_flag_drops_user_report_but_keeps_rest() { + let argv = |xs: &[&str]| xs.iter().map(|s| s.to_string()).collect::>(); + fn got(args: &[String]) -> Vec<&str> { + strip_pip_report_flag(args) + .iter() + .map(|s| s.as_str()) + .collect() + } + // `--report ` (value token also dropped) and `--report=`. + let a = argv(&[ + "requests", + "--report", + "out.json", + "--quiet", + "--report=x.json", + "flask", + ]); + assert_eq!(got(&a), vec!["requests", "--quiet", "flask"]); + // A bare trailing `--report` (no value) is dropped without panicking. + let b = argv(&["pkg", "--report"]); + assert_eq!(got(&b), vec!["pkg"]); + // No `--report` → args pass through untouched (incl. our own `-`). + let c = argv(&["a", "-r", "reqs.txt"]); + assert_eq!(got(&c), vec!["a", "-r", "reqs.txt"]); + } + + const OK_REPORT: &str = r#"{"version":"1","pip_version":"24.0","install":[ + {"metadata":{"name":"oldpkg","version":"1.0.0"},"requested":true}, + {"metadata":{"name":"evildep","version":"0.4.2"},"requested":false}]}"#; + + #[test] + fn parse_pip_report_ok() { + let pkgs = parse_pip_report(OK_REPORT).expect("parse ok report"); + assert_eq!( + pkgs, + vec![ + TreePackage { + name: "oldpkg".to_string(), + version: "1.0.0".to_string(), + requested: true, + }, + TreePackage { + name: "evildep".to_string(), + version: "0.4.2".to_string(), + requested: false, + }, + ] + ); + } + + #[test] + fn parse_pip_report_missing_requested_defaults_false() { + let json = r#"{"install":[{"metadata":{"name":"x","version":"1.0.0"}}]}"#; + let pkgs = parse_pip_report(json).expect("parse report without requested"); + assert!(!pkgs[0].requested); + } + + #[test] + fn parse_pip_report_missing_install() { + let err = parse_pip_report(r#"{"version":"1"}"#).expect_err("no install[]"); + assert!(err.contains("no install[]"), "got: {err}"); + } + + #[test] + fn parse_pip_report_missing_version() { + let json = r#"{"install":[{"metadata":{"name":"x"}}]}"#; + let err = parse_pip_report(json).expect_err("missing version"); + assert!(err.contains("metadata.version"), "got: {err}"); + } + + #[test] + fn parse_pip_report_non_json() { + let err = parse_pip_report("not json").expect_err("non-json"); + assert!(err.contains("parse pip report"), "got: {err}"); + } + + // lockfile-v3 with: root entry (skipped), a plain dep, a nested dep, + // a scoped dep, and a workspace `link: true` entry (skipped). + const NPM_LOCK: &str = r#"{ + "name": "proj", "lockfileVersion": 3, + "packages": { + "": {"name": "proj", "version": "1.0.0"}, + "node_modules/oldpkg": {"version": "1.0.0"}, + "node_modules/evildep": {"version": "0.4.2"}, + "node_modules/a/node_modules/b": {"version": "2.3.4"}, + "node_modules/@scope/pkg": {"version": "9.0.1"}, + "node_modules/localdep": {"resolved": "../local", "link": true}, + "packages/localdep": {"name": "localdep", "version": "0.0.1"} + } + }"#; + + fn pkg(name: &str, version: &str) -> TreePackage { + TreePackage { + name: name.to_string(), + version: version.to_string(), + requested: false, + } + } + + #[test] + fn parse_npm_lockfile_ok() { + let mut pkgs = parse_npm_lockfile(NPM_LOCK).expect("parse npm lock"); + pkgs.sort_by(|a, b| a.name.cmp(&b.name)); + // The workspace SOURCE stanza `packages/localdep` is a local package, + // not a registry dep — it must NOT be verdicted, only the four + // node_modules/ entries are. + assert_eq!( + pkgs, + vec![ + pkg("@scope/pkg", "9.0.1"), + pkg("b", "2.3.4"), + pkg("evildep", "0.4.2"), + pkg("oldpkg", "1.0.0"), + ] + ); + } + + #[test] + fn parse_npm_lockfile_v1_dependencies_tree() { + // lockfileVersion 1 has no `packages` map — npm still understands it, + // so the gate must too (recursing into nested `dependencies`), and + // skip `link` entries. + const V1: &str = r#"{ + "name": "proj", "lockfileVersion": 1, + "dependencies": { + "oldpkg": {"version": "1.0.0"}, + "evildep": {"version": "0.4.2", "dependencies": { + "deepdep": {"version": "3.2.1"} + }}, + "locallink": {"version": "file:../local", "link": true} + } + }"#; + let mut pkgs = parse_npm_lockfile(V1).expect("parse v1 lock"); + pkgs.sort_by(|a, b| a.name.cmp(&b.name)); + assert_eq!( + pkgs, + vec![ + pkg("deepdep", "3.2.1"), + pkg("evildep", "0.4.2"), + pkg("oldpkg", "1.0.0"), + ] + ); + } + + #[test] + fn parse_npm_lockfile_neither_schema_is_error() { + let err = parse_npm_lockfile(r#"{"lockfileVersion":1}"#).expect_err("no deps"); + assert!(err.contains("neither a packages map"), "got: {err}"); + } + + #[test] + fn parse_npm_lockfile_v1_depth_bomb_errors_instead_of_overflowing() { + // `npm ci` parses attacker-supplied lockfiles; a crafted deep nest + // must hit the depth cap (loud error → refuse/fallback), not + // overflow the stack. + let mut inner = r#"{"version":"1.0.0"}"#.to_string(); + for _ in 0..(V1_MAX_DEPTH + 2) { + inner = format!(r#"{{"version":"1.0.0","dependencies":{{"d":{inner}}}}}"#); + } + let lock = format!(r#"{{"lockfileVersion":1,"dependencies":{{"a":{inner}}}}}"#); + let err = parse_npm_lockfile(&lock).expect_err("depth bomb must error"); + // serde_json's recursion limit fires first today; the explicit + // V1_MAX_DEPTH cap is the backstop. Either way: loud error. + assert!( + err.contains("deeper than") || err.contains("recursion limit"), + "got: {err}" + ); + } + + #[test] + fn name_from_lock_path_handles_nested_and_scoped() { + assert_eq!( + name_from_lock_path("node_modules/oldpkg").as_deref(), + Some("oldpkg") + ); + assert_eq!( + name_from_lock_path("node_modules/a/node_modules/b").as_deref(), + Some("b") + ); + assert_eq!( + name_from_lock_path("node_modules/a/node_modules/@scope/pkg").as_deref(), + Some("@scope/pkg") + ); + assert_eq!(name_from_lock_path("packages/foo"), None); + } + + #[test] + fn direct_deps_from_manifest_unions_all_groups() { + let manifest = r#"{ + "name": "proj", + "dependencies": {"a": "^1.0.0", "@scope/b": "2.x"}, + "devDependencies": {"c": "*"}, + "optionalDependencies": {"d": "1.2.3"}, + "peerDependencies": {"e": ">=1"} + }"#; + let deps = direct_deps_from_manifest(manifest); + for name in ["a", "@scope/b", "c", "d", "e"] { + assert!(deps.contains(name), "missing {name}"); + } + assert_eq!(deps.len(), 5); + } + + #[test] + fn direct_deps_from_manifest_degrades_to_empty() { + assert!(direct_deps_from_manifest("not json").is_empty()); + assert!(direct_deps_from_manifest(r#"{"name":"proj"}"#).is_empty()); + assert!(direct_deps_from_manifest(r#"{"dependencies":[]}"#).is_empty()); + } + + fn args(parts: &[&str]) -> Vec { + parts.iter().map(|s| s.to_string()).collect() + } + + #[test] + fn npm_root_redirect_flag_unconditional_redirects() { + // These always redirect npm's install root, regardless of value. + assert_eq!( + npm_root_redirect_flag(&args(&["install", "--prefix", "/x", "lodash"])).as_deref(), + Some("--prefix") + ); + assert_eq!( + npm_root_redirect_flag(&args(&["install", "--prefix=/x", "lodash"])).as_deref(), + Some("--prefix=/x") + ); + assert_eq!( + npm_root_redirect_flag(&args(&["install", "-C", "/x", "lodash"])).as_deref(), + Some("-C") + ); + assert_eq!( + npm_root_redirect_flag(&args(&["install", "-g", "lodash"])).as_deref(), + Some("-g") + ); + assert_eq!( + npm_root_redirect_flag(&args(&["install", "--global", "lodash"])).as_deref(), + Some("--global") + ); + } + + #[test] + fn npm_root_redirect_flag_only_location_global_redirects() { + // Only `--location=global` (≡ `-g`) redirects the install root. + assert_eq!( + npm_root_redirect_flag(&args(&["install", "--location=global", "lodash"])).as_deref(), + Some("--location=global") + ); + assert_eq!( + npm_root_redirect_flag(&args(&["install", "--location", "global", "lodash"])) + .as_deref(), + Some("--location global") + ); + } + + #[test] + fn npm_root_redirect_flag_non_global_location_is_not_a_redirect() { + // project/user only pick a config layer — they do NOT change what + // installs, so the gate must resolve the tree, not refuse it. + assert!( + npm_root_redirect_flag(&args(&["install", "--location", "project", "lodash"])) + .is_none() + ); + assert!( + npm_root_redirect_flag(&args(&["install", "--location=project", "lodash"])).is_none() + ); + assert!( + npm_root_redirect_flag(&args(&["install", "--location", "user", "lodash"])).is_none() + ); + assert!(npm_root_redirect_flag(&args(&["install", "--location=user", "lodash"])).is_none()); + // A bare trailing `--location` (no value) is not a redirect either. + assert!(npm_root_redirect_flag(&args(&["install", "lodash", "--location"])).is_none()); + } + + #[test] + fn npm_root_redirect_flag_plain_install_has_no_redirect() { + assert!(npm_root_redirect_flag(&args(&["install", "lodash"])).is_none()); + } +} diff --git a/src/precheck/verdict.rs b/src/precheck/verdict.rs new file mode 100644 index 0000000..ad2b1c1 --- /dev/null +++ b/src/precheck/verdict.rs @@ -0,0 +1,743 @@ +//! Verdict pass: bounded vuln-api worker pool, result matching, and the +//! single block predicate (`block_reason`). + +use std::time::Duration; + +use super::{ + tree, InstallTarget, PackageManager, PrecheckOptions, PrecheckReport, TargetKind, + TargetOutcome, TreeOrigin, TreeOutcome, TreeReport, VerdictConfig, VerdictStatus, +}; + +/// Above this many verdict jobs, print a stderr progress line so a big tree +/// pass doesn't look hung. +const VERDICT_PROGRESS_THRESHOLD: usize = 8; + +/// Max parallel vuln-api / registry requests. +const VERDICT_CONCURRENCY: usize = 8; + +/// Bounded worker pool over the verdict jobs. On client/request failure every +/// job comes back `Unverifiable`, which warns but never blocks: public +/// lookups fail open. Order is preserved: result `i` belongs to job `i`. +pub(super) fn verdict_pool( + jobs: Vec, + cfg: &VerdictConfig, + manager: PackageManager, +) -> Vec<(tree::TreePackage, VerdictStatus)> { + let client = match crate::vuln_api::http_client() { + Ok(c) => c, + Err(e) => { + return jobs + .into_iter() + .map(|j| (j, VerdictStatus::Unverifiable(e.clone()))) + .collect(); + } + }; + + if jobs.len() > VERDICT_PROGRESS_THRESHOLD { + eprintln!("checking {} packages against Corgea vuln-api…", jobs.len()); + } + + let ecosystem = manager.ecosystem(); + let verdicts = + pooled_map( + &jobs, + VERDICT_CONCURRENCY, + |job| match crate::vuln_api::check_package_version( + &client, + &cfg.base_url, + ecosystem, + &job.name, + &job.version, + ) { + Ok(resp) if resp.is_vulnerable => VerdictStatus::Vulnerable(resp.matches), + Ok(_) => VerdictStatus::Clean, + Err(e) => VerdictStatus::Unverifiable(e.to_string()), + }, + ); + jobs.into_iter().zip(verdicts).collect() +} + +/// Order-preserving bounded worker pool: `results[i]` is `f(&items[i])`. +/// Each call is an independent blocking HTTP request on the gate's critical +/// path, so they must not run serially. Plain work-stealing over an index, +/// no new crates; single-item lists skip the thread machinery. +fn pooled_map( + items: &[T], + concurrency: usize, + f: impl Fn(&T) -> R + Sync, +) -> Vec { + use std::sync::atomic::{AtomicUsize, Ordering}; + use std::sync::Mutex; + + if items.len() <= 1 { + return items.iter().map(&f).collect(); + } + let next = AtomicUsize::new(0); + let results: Mutex>> = Mutex::new(items.iter().map(|_| None).collect()); + let workers = concurrency.clamp(1, items.len()); + std::thread::scope(|s| { + for _ in 0..workers { + s.spawn(|| loop { + let i = next.fetch_add(1, Ordering::Relaxed); + let Some(item) = items.get(i) else { break }; + let result = f(item); + results.lock().unwrap()[i] = Some(result); + }); + } + }); + results + .into_inner() + .unwrap() + .into_iter() + .map(|r| r.expect("pooled_map worker filled every slot")) + .collect() +} + +/// Assign pooled verdicts onto matching named outcomes (by normalized +/// name + version) and return the unmatched leftovers — the tree findings. +/// Each leftover carries its provenance: pip's `requested` flag, membership +/// in the project manifest's direct deps (`direct_deps`), or transitive. +/// +/// pip backtracking reconciliation: pip's resolver may install a named +/// target at a different version than the CLI's independent `pypi_resolve` +/// picked (e.g. `pip install flask werkzeug` where werkzeug constrains flask +/// below latest). The named outcome then holds the CLI version while the +/// dry-run carries pip's `requested:true` entry at the installed version, so +/// the exact `(name, version)` match misses. Such a leftover is the SAME +/// package the user named, not a transitive finding — collapse it onto the +/// named outcome, verdicted at the installed version (which is what actually +/// installs), instead of emitting a duplicate `(from requirements)` finding. +/// This can never mis-collapse an npm multi-version package: npm's lockfile +/// never sets `requested` (`TreePackage::requested` is always false for npm), +/// so the requested branch below is unreachable for npm and its same-name / +/// different-version copies stay distinct findings. +pub(super) fn apply_verdicts( + manager: PackageManager, + results: Vec<(tree::TreePackage, VerdictStatus)>, + outcomes: &mut [TargetOutcome], + direct_deps: &std::collections::HashSet, +) -> Vec { + let norm = |n: &str| manager.normalize_name(n); + // Index named outcomes by (normalized name, version) for exact matches, + // and by name alone for the pip-backtracking reconciliation below. Both + // keep result matching linear on big trees. + let mut named: std::collections::HashMap<(String, String), Vec> = + std::collections::HashMap::new(); + let mut named_by_name: std::collections::HashMap> = + std::collections::HashMap::new(); + for (i, o) in outcomes.iter().enumerate() { + if let TargetOutcome::Resolved { resolved, .. } = o { + named + .entry((norm(&resolved.name), resolved.version.clone())) + .or_default() + .push(i); + named_by_name + .entry(norm(&resolved.name)) + .or_default() + .push(i); + } + } + + let mut transitive = Vec::new(); + for (pkg, verdict) in results { + if let Some(indices) = named.get(&(norm(&pkg.name), pkg.version.clone())) { + for &i in indices { + if let TargetOutcome::Resolved { verdict: v, .. } = &mut outcomes[i] { + *v = verdict.clone(); + } + } + } else if let Some(indices) = pkg + .requested + .then(|| named_by_name.get(&norm(&pkg.name))) + .flatten() + { + // pip backtracked this named target to a different version. Adopt + // the installed version on the named row (honesty: the named row + // must show what installs) and verdict it there. `age` / + // `resolved.published_at` still reflect the CLI-resolved version — + // a one-minor-version drift, left as-is rather than re-fetching on + // the gate's critical path; recency stays driven by that age. + for &i in indices { + if let TargetOutcome::Resolved { + resolved, + verdict: v, + .. + } = &mut outcomes[i] + { + resolved.version = pkg.version.clone(); + *v = verdict.clone(); + } + } + } else { + let origin = if pkg.requested { + TreeOrigin::Requested + } else if direct_deps.contains(&pkg.name) { + TreeOrigin::PreExisting + } else { + TreeOrigin::Transitive + }; + transitive.push(TreeOutcome { + name: pkg.name, + version: pkg.version, + origin, + verdict, + }); + } + } + transitive +} + +/// Why the gate refuses to run the install. The single owner of both the +/// block decision and the escape hatch the refusal advertises — +/// `render::print_refusal` only maps variants to text. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(super) enum BlockReason { + /// Every blocking finding predates this command (existing tree only). + /// `--force` is the escape. + ExistingTree, + /// Vulnerable findings. `--force` is the escape. + Findings, + /// Only the recency threshold fired. `--no-fail` is the escape. + RecencyOnly, +} + +pub(super) fn block_reason(report: &PrecheckReport, opts: &PrecheckOptions) -> Option { + if opts.force { + return None; + } + if report.verdicts().any(|v| v.blocks()) { + return Some(if blames_existing_tree(report) { + BlockReason::ExistingTree + } else { + BlockReason::Findings + }); + } + if !opts.no_fail && report.recent_count() > 0 { + return Some(BlockReason::RecencyOnly); + } + None +} + +/// True when the block is entirely the existing tree's doing: vulnerable +/// findings exist, no named target blocks, and every *blocking* tree +/// finding (`VerdictStatus::blocks`, same predicate `block_reason` refuses +/// on) genuinely predates this command. A `Requested` finding (pip `-r`) +/// is added by this command and renders as `(from requirements)`; a +/// `Transitive` finding on any install that names targets or requirements +/// files is being pulled in by them right now. Only a truly bare install +/// (`report.bare_install`) or manifest-declared `PreExisting` findings may +/// blame the existing tree. +fn blames_existing_tree(report: &PrecheckReport) -> bool { + let named_blocks = report.named_verdicts().any(|v| v.blocks()); + if report.vulnerable_count() == 0 || named_blocks { + return false; + } + let Some(TreeReport::Full { transitive, .. }) = &report.tree else { + return false; + }; + transitive + .iter() + .filter(|t| t.verdict.blocks()) + .all(|t| match t.origin { + // A locked pin predates the `npm ci` that installs it. + TreeOrigin::PreExisting | TreeOrigin::Locked => true, + TreeOrigin::Requested => false, + TreeOrigin::Transitive => report.bare_install, + }) +} + +/// Resolve every named target against its registry through the bounded +/// worker pool. Order is preserved: outcome `i` belongs to `targets[i]`. +pub(super) fn verify_all( + targets: &[InstallTarget], + opts: &PrecheckOptions, + now: &chrono::DateTime, + allow_prerelease: bool, +) -> Vec { + pooled_map(targets, VERDICT_CONCURRENCY, |t| { + verify_one(t, opts, now, allow_prerelease) + }) +} + +fn verify_one( + target: &InstallTarget, + opts: &PrecheckOptions, + now: &chrono::DateTime, + allow_prerelease: bool, +) -> TargetOutcome { + use crate::verify_deps::registry; + + let resolved = match &target.kind { + TargetKind::Unverifiable { reason } => { + return TargetOutcome::Skipped { + target: target.clone(), + reason: reason.clone(), + }; + } + TargetKind::Npm(spec) => { + registry::npm_resolve(&target.name, spec, opts.npm_registry.as_deref()) + } + TargetKind::Pypi(spec) => registry::pypi_resolve( + &target.name, + spec, + opts.pypi_registry.as_deref(), + allow_prerelease, + ), + }; + + match resolved { + Ok(resolved) => { + // Future publish dates clamp to zero — maximally recent. + let age = now + .signed_duration_since(resolved.published_at) + .to_std() + .unwrap_or_else(|_| Duration::from_secs(0)); + TargetOutcome::Resolved { + target: target.clone(), + resolved, + age, + verdict: VerdictStatus::NotChecked, + } + } + Err(e) => TargetOutcome::Error { + target: target.clone(), + error: e, + }, + } +} + +#[cfg(test)] +mod tests { + use super::super::test_support::*; + use super::super::{ + run_verdict_pass, InstallTarget, PackageManager, TargetKind, TargetOutcome, TreeOrigin, + TreeOutcome, TreeReport, VerdictStatus, + }; + use super::*; + + fn should_block_install(report: &PrecheckReport, opts: &PrecheckOptions) -> bool { + block_reason(report, opts).is_some() + } + + /// Predicate matrix: force ⇒ never block; vulnerable always blocks + /// (`--no-fail` must not waive it); unverifiable findings and resolution + /// errors never block (public mode fails open); recency blocks unless + /// `--no-fail` demotes it. + #[test] + fn block_predicate_matrix() { + let clean = { + let mut o = resolved_outcome("pkg", "1.0.0", false); + set_verdict(&mut o, VerdictStatus::Clean); + report_with(vec![o]) + }; + let recent = report_with(vec![resolved_outcome("pkg", "1.0.0", true)]); + let vulnerable = { + let mut o = resolved_outcome("pkg", "1.0.0", false); + set_verdict(&mut o, VerdictStatus::Vulnerable(vec![])); + report_with(vec![o]) + }; + let unverifiable = { + let mut o = resolved_outcome("pkg", "1.0.0", false); + set_verdict(&mut o, VerdictStatus::Unverifiable("503".to_string())); + report_with(vec![o]) + }; + let resolution_error = report_with(vec![TargetOutcome::Error { + target: InstallTarget { + name: "pkg".to_string(), + display: "pkg==1.0.0".to_string(), + kind: TargetKind::Unverifiable { + reason: "test".to_string(), + }, + }, + error: "registry unavailable".to_string(), + }]); + + assert!(!should_block_install(&clean, &public_opts(false, false))); + assert!(should_block_install(&recent, &public_opts(false, false))); + assert!(!should_block_install(&recent, &public_opts(true, false))); + assert!(should_block_install( + &vulnerable, + &public_opts(false, false) + )); + assert!( + should_block_install(&vulnerable, &public_opts(true, false)), + "--no-fail must not waive a vulnerable block" + ); + assert!( + !should_block_install(&unverifiable, &public_opts(false, false)), + "public mode must fail open on lookup errors" + ); + assert!( + !should_block_install(&resolution_error, &public_opts(false, false)), + "public mode must fail open when no verdict can be obtained" + ); + for report in [ + &clean, + &recent, + &vulnerable, + &unverifiable, + &resolution_error, + ] { + assert!( + !should_block_install(report, &public_opts(false, true)), + "--force must never block" + ); + } + } + + /// A clean named outcome plus a vulnerable transitive tree finding must + /// roll into the block counts: `vulnerable_count() == 1`, + /// `should_block_install` true without `--force`, false with it. + #[test] + fn tree_findings_extend_block_counts() { + let mut named = resolved_outcome("pkg", "1.0.0", false); + set_verdict(&mut named, VerdictStatus::Clean); + let mut report = report_with(vec![named]); + report.tree = Some(TreeReport::Full { + resolved_count: 2, + transitive: vec![TreeOutcome { + name: "evildep".to_string(), + version: "0.4.2".to_string(), + origin: TreeOrigin::Transitive, + verdict: VerdictStatus::Vulnerable(vec![]), + }], + }); + + assert_eq!(report.vulnerable_count(), 1); + let opts = |force: bool| PrecheckOptions { + force, + ..stub_opts() + }; + assert!(should_block_install(&report, &opts(false))); + assert!(!should_block_install(&report, &opts(true))); + } + + /// The existing-tree refusal fires only when every vulnerable finding + /// predates the command: a `Requested` finding (pip `-r`) is added by + /// this command, and a `Transitive` finding is being pulled in right + /// now unless the install is truly bare. `bare_install` is the explicit + /// discriminator — a requirements-only install also has no named + /// outcomes, but its resolved set is the command's doing. + #[test] + fn refusal_blame_respects_finding_origin() { + let tree_vulnerable = |origin| TreeOutcome { + name: "dep".to_string(), + version: "1.0.0".to_string(), + verdict: VerdictStatus::Vulnerable(vec![vm("A-1", None)]), + origin, + }; + // (origin, named outcomes present, bare_install, expected). + // (origin, named=false, bare=false) is the requirements-only shape. + let cases = [ + (TreeOrigin::PreExisting, false, true, true), + (TreeOrigin::PreExisting, false, false, true), + (TreeOrigin::PreExisting, true, false, true), + (TreeOrigin::Locked, false, true, true), + (TreeOrigin::Transitive, false, true, true), + (TreeOrigin::Transitive, false, false, false), + (TreeOrigin::Transitive, true, false, false), + (TreeOrigin::Requested, false, true, false), + (TreeOrigin::Requested, false, false, false), + (TreeOrigin::Requested, true, false, false), + ]; + for (origin, with_named, bare_install, blames_tree) in cases { + let outcomes = if with_named { + vec![resolved_outcome("cleanpkg", "1.0.0", false)] + } else { + vec![] + }; + let mut report = report_with(outcomes); + report.bare_install = bare_install; + report.tree = Some(TreeReport::Full { + resolved_count: 1, + transitive: vec![tree_vulnerable(origin)], + }); + assert_eq!( + blames_existing_tree(&report), + blames_tree, + "origin {origin:?}, with_named {with_named}, bare {bare_install}" + ); + } + } + + /// A vulnerable NAMED target must never blame the existing tree, even + /// when a pre-existing tree finding is also vulnerable. + #[test] + fn refusal_blame_requires_clean_named_targets() { + let mut named = resolved_outcome("badpkg", "1.0.0", false); + set_verdict(&mut named, VerdictStatus::Vulnerable(vec![vm("A-1", None)])); + let mut report = report_with(vec![named]); + report.tree = Some(TreeReport::Full { + resolved_count: 2, + transitive: vec![TreeOutcome { + name: "stickydep".to_string(), + version: "1.0.0".to_string(), + verdict: VerdictStatus::Vulnerable(vec![vm("A-2", None)]), + origin: TreeOrigin::PreExisting, + }], + }); + assert!(!blames_existing_tree(&report)); + } + + /// Verdict pass against an in-process stub: vulnerable body → Vulnerable + /// with matches; 503 override → Unverifiable; no VerdictConfig → outcomes + /// keep NotChecked. + #[test] + fn verdict_pass_maps_stub_responses() { + use std::collections::HashMap; + + let key = |name: &str| crate::vuln_api_stub::key("pypi", name, "1.0.0"); + let mut checks = HashMap::new(); + checks.insert( + key("evil"), + crate::vuln_api_stub::vulnerable_body("pypi", "evil", "1.0.0", "MAL-2024-0001", None), + ); + checks.insert(key("flaky"), "{}".to_string()); + let mut statuses = HashMap::new(); + statuses.insert(key("flaky"), 503u16); + let stub = crate::vuln_api_stub::spawn_with_statuses(checks, statuses); + + let opts = verdict_opts(&stub.base_url); + + let mut outcomes = vec![ + resolved_outcome("evil", "1.0.0", false), + resolved_outcome("flaky", "1.0.0", false), + resolved_outcome("goodpkg", "1.0.0", false), // unknown → stub default clean + ]; + run_verdict_pass(PackageManager::Pip, &mut outcomes, &opts); + + let verdicts: Vec<_> = outcomes + .iter() + .map(|o| match o { + TargetOutcome::Resolved { verdict, .. } => verdict.clone(), + _ => unreachable!(), + }) + .collect(); + assert!( + matches!(&verdicts[0], VerdictStatus::Vulnerable(m) if m[0].advisory_id == "MAL-2024-0001") + ); + assert!(matches!(&verdicts[1], VerdictStatus::Unverifiable(_))); + assert!(matches!(&verdicts[2], VerdictStatus::Clean)); + + // Without a VerdictConfig the pass is a no-op. + let mut untouched = vec![resolved_outcome("evil", "1.0.0", false)]; + let no_verdict = stub_opts(); + run_verdict_pass(PackageManager::Pip, &mut untouched, &no_verdict); + assert!(matches!( + &untouched[0], + TargetOutcome::Resolved { + verdict: VerdictStatus::NotChecked, + .. + } + )); + } + + /// The pool must verdict every job exactly once and return the flagged + /// job `Vulnerable` with the rest `Clean`. + #[test] + fn verdict_pool_returns_all_results() { + use std::collections::HashMap; + + let mut checks = HashMap::new(); + checks.insert( + crate::vuln_api_stub::key("pypi", "evil", "1.0.0"), + crate::vuln_api_stub::vulnerable_body("pypi", "evil", "1.0.0", "MAL-2024-0001", None), + ); + let stub = crate::vuln_api_stub::spawn_with_statuses(checks, HashMap::new()); + + let cfg = VerdictConfig { + base_url: stub.base_url.clone(), + }; + + let jobs: Vec = ["a", "b", "evil", "c", "d", "e"] + .iter() + .map(|n| tree::TreePackage { + name: n.to_string(), + version: "1.0.0".to_string(), + requested: false, + }) + .collect(); + + let results = verdict_pool(jobs, &cfg, PackageManager::Pip); + assert_eq!(results.len(), 6, "all jobs verdicted"); + let flagged = results + .iter() + .filter(|(_, v)| matches!(v, VerdictStatus::Vulnerable(_))) + .count(); + let clean = results + .iter() + .filter(|(_, v)| matches!(v, VerdictStatus::Clean)) + .count(); + assert_eq!(flagged, 1, "only evil flagged"); + assert_eq!(clean, 5, "rest clean"); + let evil = results + .iter() + .find(|(p, _)| p.name == "evil") + .expect("evil present"); + assert!( + matches!(&evil.1, VerdictStatus::Vulnerable(m) if m[0].advisory_id == "MAL-2024-0001") + ); + } + + /// `pooled_map` maps every item and preserves order at any concurrency + /// (1 = serial, 8 > item count = all workers spawn but some drain empty). + #[test] + fn pooled_map_preserves_order_at_any_concurrency() { + let items: Vec = (0..6).collect(); + for concurrency in [1usize, 8] { + assert_eq!( + pooled_map(&items, concurrency, |i| i * 2), + vec![0, 2, 4, 6, 8, 10], + "concurrency {concurrency}" + ); + } + } + + /// Leftover origin assignment: pip `requested` ⇒ Requested; manifest + /// direct dep ⇒ PreExisting; otherwise Transitive. Requested wins over + /// a direct-dep hit. + #[test] + fn apply_verdicts_assigns_origins() { + let pkg = |name: &str, requested: bool| tree::TreePackage { + name: name.to_string(), + version: "1.0.0".to_string(), + requested, + }; + let results = vec![ + (pkg("reqdep", true), VerdictStatus::Clean), + (pkg("predep", false), VerdictStatus::Clean), + (pkg("deepdep", false), VerdictStatus::Clean), + ]; + let direct_deps = std::collections::HashSet::from(["predep".to_string()]); + let mut outcomes = []; + let mut tree = apply_verdicts(PackageManager::Npm, results, &mut outcomes, &direct_deps); + tree.sort_by(|a, b| a.name.cmp(&b.name)); + let origins: Vec<(&str, TreeOrigin)> = + tree.iter().map(|t| (t.name.as_str(), t.origin)).collect(); + assert_eq!( + origins, + vec![ + ("deepdep", TreeOrigin::Transitive), + ("predep", TreeOrigin::PreExisting), + ("reqdep", TreeOrigin::Requested), + ] + ); + } + + fn pkg(name: &str, version: &str, requested: bool) -> tree::TreePackage { + tree::TreePackage { + name: name.to_string(), + version: version.to_string(), + requested, + } + } + + fn named_version(outcome: &TargetOutcome) -> &str { + match outcome { + TargetOutcome::Resolved { resolved, .. } => resolved.version.as_str(), + _ => unreachable!("expected a resolved outcome"), + } + } + + fn named_verdict(outcome: &TargetOutcome) -> &VerdictStatus { + match outcome { + TargetOutcome::Resolved { verdict, .. } => verdict, + _ => unreachable!("expected a resolved outcome"), + } + } + + /// pip backtracking: the CLI's `pypi_resolve` picked flask 3.0.3 but + /// werkzeug constrains it, so pip's dry-run installs flask 3.0.2 with + /// `requested:true`. The exact `(name, version)` match misses, but the + /// leftover is the SAME named package — it must collapse onto the named + /// outcome (verdicted at the installed 3.0.2, the named row showing what + /// installs), with NO duplicate `(from requirements)` finding. + #[test] + fn apply_verdicts_collapses_pip_backtracked_named_target() { + let mut outcomes = vec![resolved_outcome("flask", "3.0.3", false)]; + let results = vec![( + pkg("flask", "3.0.2", true), + VerdictStatus::Vulnerable(vec![vm("CVE-1", None)]), + )]; + let direct_deps = std::collections::HashSet::new(); + + let transitive = apply_verdicts(PackageManager::Pip, results, &mut outcomes, &direct_deps); + + assert!( + transitive.is_empty(), + "no transitive (from requirements) duplicate for a named package" + ); + assert_eq!( + named_version(&outcomes[0]), + "3.0.2", + "named row shows the version pip installs, not the CLI-resolved one" + ); + assert!( + matches!(named_verdict(&outcomes[0]), VerdictStatus::Vulnerable(_)), + "named outcome verdicted at the installed version" + ); + + // The package appears exactly once: one named outcome, zero tree + // findings — counts reflect a single package. + let report = report_with(outcomes); + let report = PrecheckReport { + tree: Some(TreeReport::Full { + resolved_count: 1, + transitive, + }), + ..report + }; + assert_eq!(report.vulnerable_count(), 1, "flask counted once"); + } + + /// An exact `(name, version)` match still collapses onto the named + /// outcome unchanged — the version is untouched and the verdict lands. + #[test] + fn apply_verdicts_exact_named_match_unchanged() { + let mut outcomes = vec![resolved_outcome("flask", "3.0.3", false)]; + let results = vec![( + pkg("flask", "3.0.3", true), + VerdictStatus::Vulnerable(vec![vm("CVE-1", None)]), + )]; + let direct_deps = std::collections::HashSet::new(); + + let transitive = apply_verdicts(PackageManager::Pip, results, &mut outcomes, &direct_deps); + + assert!(transitive.is_empty()); + assert_eq!(named_version(&outcomes[0]), "3.0.3"); + assert!(matches!( + named_verdict(&outcomes[0]), + VerdictStatus::Vulnerable(_) + )); + } + + /// npm multi-version must NOT collapse: the same name legitimately + /// appears at two versions in one tree (nested node_modules). Neither is + /// a named-and-requested pip target (npm never sets `requested`), so both + /// stay distinct tree findings rather than folding into one. + #[test] + fn apply_verdicts_keeps_npm_multi_version_distinct() { + // A named outcome shares the name to prove name-only matching is NOT + // used for npm: the by-name index exists, but the requested guard + // keeps both copies out of it. + let mut outcomes = vec![resolved_outcome("lodash", "4.17.21", false)]; + let results = vec![ + (pkg("lodash", "4.17.20", false), VerdictStatus::Clean), + (pkg("lodash", "3.10.1", false), VerdictStatus::Clean), + ]; + let direct_deps = std::collections::HashSet::new(); + + let mut transitive = + apply_verdicts(PackageManager::Npm, results, &mut outcomes, &direct_deps); + transitive.sort_by(|a, b| a.version.cmp(&b.version)); + + let versions: Vec<&str> = transitive.iter().map(|t| t.version.as_str()).collect(); + assert_eq!( + versions, + vec!["3.10.1", "4.17.20"], + "both npm copies stay distinct findings" + ); + // The named lodash@4.17.21 keeps its own version untouched. + assert_eq!(named_version(&outcomes[0]), "4.17.21"); + } +} diff --git a/src/verify_deps/mod.rs b/src/verify_deps/mod.rs new file mode 100644 index 0000000..b813529 --- /dev/null +++ b/src/verify_deps/mod.rs @@ -0,0 +1,137 @@ +//! Slim slice of #89's verify_deps: registry resolution + threshold helpers. + +pub mod registry; + +use std::time::Duration; + +/// Parse a human-friendly duration like `2d`, `48h`, `30m`, `45s`, or +/// a bare integer (interpreted as days). Returns the parsed duration. +pub fn parse_threshold(input: &str) -> Result { + let s = input.trim(); + if s.is_empty() { + return Err("threshold cannot be empty".to_string()); + } + + let (num_str, unit) = match s.chars().last() { + Some(c) if c.is_ascii_alphabetic() => { + (&s[..s.len() - c.len_utf8()], c.to_ascii_lowercase()) + } + _ => (s, 'd'), + }; + + let value: f64 = num_str + .trim() + .parse() + .map_err(|_| format!("invalid threshold number: '{}'", num_str))?; + + if value < 0.0 || !value.is_finite() { + return Err(format!( + "threshold must be a non-negative finite number: '{}'", + input + )); + } + + let secs = match unit { + 's' => value, + 'm' => value * 60.0, + 'h' => value * 3600.0, + 'd' => value * 86400.0, + 'w' => value * 7.0 * 86400.0, + other => { + return Err(format!( + "unknown threshold unit '{}'. Use s, m, h, d, or w.", + other + )) + } + }; + + let d = Duration::try_from_secs_f64(secs).map_err(|_| "threshold too large".to_string())?; + // Establish the invariant every consumer relies on: the threshold + // must also fit in a `chrono::Duration` (see precheck's from_std). + chrono::Duration::from_std(d).map_err(|_| "threshold too large".to_string())?; + Ok(d) +} + +/// Format a Duration as a short human-readable string (e.g. `1d 4h`). +pub fn format_duration(d: Duration) -> String { + let total_secs = d.as_secs(); + if total_secs < 60 { + return format!("{}s", total_secs); + } + let mins = total_secs / 60; + if mins < 60 { + return format!("{}m", mins); + } + let hours = total_secs / 3600; + let rem_mins = (total_secs % 3600) / 60; + if hours < 24 { + if rem_mins == 0 { + return format!("{}h", hours); + } + return format!("{}h {}m", hours, rem_mins); + } + let days = total_secs / 86400; + let rem_hours = (total_secs % 86400) / 3600; + if rem_hours == 0 { + format!("{}d", days) + } else { + format!("{}d {}h", days, rem_hours) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_threshold_units() { + assert_eq!( + parse_threshold("2d").unwrap(), + Duration::from_secs(2 * 86400) + ); + assert_eq!( + parse_threshold("48h").unwrap(), + Duration::from_secs(48 * 3600) + ); + assert_eq!( + parse_threshold("30m").unwrap(), + Duration::from_secs(30 * 60) + ); + assert_eq!(parse_threshold("90s").unwrap(), Duration::from_secs(90)); + assert_eq!( + parse_threshold("1w").unwrap(), + Duration::from_secs(7 * 86400) + ); + assert_eq!( + parse_threshold("3").unwrap(), + Duration::from_secs(3 * 86400) + ); + assert_eq!(parse_threshold("0.5d").unwrap(), Duration::from_secs(43200)); + } + + #[test] + fn parse_threshold_rejects_garbage() { + assert!(parse_threshold("").is_err()); + assert!(parse_threshold("abc").is_err()); + assert!(parse_threshold("-1d").is_err()); + assert!(parse_threshold("1y").is_err()); + } + + #[test] + fn parse_threshold_rejects_absurdly_large_values() { + // Too large for chrono::Duration (precheck converts via from_std). + assert!(parse_threshold("999999999999d").is_err()); + // Too large even for std::time::Duration. + assert!(parse_threshold("1e308d").is_err()); + } + + #[test] + fn format_duration_short() { + assert_eq!(format_duration(Duration::from_secs(5)), "5s"); + assert_eq!(format_duration(Duration::from_secs(120)), "2m"); + assert_eq!(format_duration(Duration::from_secs(3600)), "1h"); + assert_eq!(format_duration(Duration::from_secs(3700)), "1h 1m"); + assert_eq!(format_duration(Duration::from_secs(86400)), "1d"); + assert_eq!(format_duration(Duration::from_secs(90000)), "1d 1h"); + } +} diff --git a/src/verify_deps/registry.rs b/src/verify_deps/registry.rs new file mode 100644 index 0000000..8168fc5 --- /dev/null +++ b/src/verify_deps/registry.rs @@ -0,0 +1,997 @@ +//! Registry lookups for npm and PyPI publish times. +//! +//! These talk to public registries (no auth) and are kept independent +//! of the rest of the CLI's HTTP client because: +//! * we must not send the user's Corgea auth header to a third-party, +//! * the timeouts and retry policy are different. +//! +//! Both resolvers turn a version spec into the concrete version that +//! would be installed, plus its publish time as a UTC timestamp. + +use chrono::{DateTime, Utc}; +use serde::Deserialize; +use std::sync::OnceLock; +use std::time::Duration; + +const DEFAULT_NPM_REGISTRY: &str = "https://registry.npmjs.org"; +const DEFAULT_PYPI_REGISTRY: &str = "https://pypi.org"; + +// Matches `vuln_api::REQUEST_TIMEOUT` so a gate run degrades uniformly: +// both legs of a verdict pass give up at the same horizon. +const REQUEST_TIMEOUT: Duration = Duration::from_secs(30); + +use crate::vuln_api::{encode_npm_name, user_agent}; + +fn http_client() -> &'static reqwest::blocking::Client { + static CLIENT: OnceLock = OnceLock::new(); + CLIENT.get_or_init(|| { + reqwest::blocking::Client::builder() + .timeout(REQUEST_TIMEOUT) + .user_agent(user_agent("deps")) + .build() + .expect("registry http client") + }) +} + +/// Shared fetch/parse boilerplate for registry metadata GETs: 404 → "not +/// found", other non-success → status error, then parse the JSON body. +/// `label` names the registry in error messages ("npm registry" / "PyPI"). +fn fetch_registry_json( + url: &str, + label: &str, + name: &str, + base: &str, +) -> Result { + let resp = http_client() + .get(url) + .header("Accept", "application/json") + .send() + .map_err(|e| format!("{} request failed: {}", label, e))?; + + let status = resp.status(); + if status == reqwest::StatusCode::NOT_FOUND { + return Err(format!( + "package '{}' not found on {} ({})", + name, label, base + )); + } + if !status.is_success() { + return Err(format!( + "{} returned status {} for '{}'", + label, status, name + )); + } + + let body = resp + .text() + .map_err(|e| format!("failed to read {} response: {}", label, e))?; + serde_json::from_str(&body) + .map_err(|e| format!("failed to parse {} response for '{}': {}", label, name, e)) +} + +#[derive(Debug, Deserialize)] +struct PypiUrl { + upload_time_iso_8601: Option, + upload_time: Option, + /// PEP 592. PyPI's JSON API emits a bool; some mirrors emit the + /// yank reason string instead. Either form means yanked. + #[serde(default)] + yanked: Option, +} + +impl PypiUrl { + fn is_yanked(&self) -> bool { + match &self.yanked { + Some(serde_json::Value::Bool(b)) => *b, + Some(serde_json::Value::String(_)) => true, + _ => false, + } + } +} + +/// Parse an ISO-8601 timestamp from npm or PyPI. PyPI sometimes emits +/// a naive timestamp like `2023-05-22T18:30:00` (no offset) which +/// chrono's RFC3339 parser rejects, so we accept both shapes. +fn parse_iso8601(raw: &str) -> Result, String> { + if let Ok(dt) = DateTime::parse_from_rfc3339(raw) { + return Ok(dt.with_timezone(&Utc)); + } + if let Ok(naive) = chrono::NaiveDateTime::parse_from_str(raw, "%Y-%m-%dT%H:%M:%S") { + return Ok(DateTime::::from_naive_utc_and_offset(naive, Utc)); + } + if let Ok(naive) = chrono::NaiveDateTime::parse_from_str(raw, "%Y-%m-%dT%H:%M:%S%.f") { + return Ok(DateTime::::from_naive_utc_and_offset(naive, Utc)); + } + Err(format!("unrecognised timestamp format: {}", raw)) +} + +/// What the user typed after `pkg@` in an install command. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum NpmSpec { + /// `axios`, `axios@`, or no spec — resolve to the `latest` dist-tag. + Latest, + /// `axios@latest`, `axios@next`, etc. + Tag(String), + /// `axios@1.2.3` — already resolved. + Exact(String), + /// `axios@^1.0.0`, `axios@~1.2.0`, `axios@>=1.0.0 <2.0.0`, etc. + Range(String), +} + +#[derive(Debug, Clone)] +pub struct ResolvedPackage { + pub name: String, + pub version: String, + pub published_at: DateTime, +} + +#[derive(Debug, Deserialize)] +struct NpmFullMetadata { + #[serde(default, rename = "dist-tags")] + dist_tags: std::collections::BTreeMap, + /// Only the keys (published version strings) are used; `IgnoredAny` + /// avoids allocating multi-MB JSON trees for big packuments. + #[serde(default)] + versions: std::collections::BTreeMap, + #[serde(default)] + time: std::collections::BTreeMap, +} + +/// Resolve an `NpmSpec` against the npm registry and return the +/// concrete version + publish time. Used by install wrappers when the +/// install command says e.g. `axios@^1.0.0` and we need to know what +/// would actually be installed before the install runs. +pub fn npm_resolve( + name: &str, + spec: &NpmSpec, + registry: Option<&str>, +) -> Result { + if name.is_empty() { + return Err("empty package name".to_string()); + } + let base = registry + .unwrap_or(DEFAULT_NPM_REGISTRY) + .trim_end_matches('/'); + let url = format!("{}/{}", base, encode_npm_name(name)); + let meta: NpmFullMetadata = fetch_registry_json(&url, "npm registry", name, base)?; + + let resolved_version = match spec { + NpmSpec::Latest => meta.dist_tags.get("latest").cloned().ok_or_else(|| { + format!( + "package '{}' has no 'latest' dist-tag on the npm registry", + name + ) + })?, + NpmSpec::Tag(tag) => meta.dist_tags.get(tag).cloned().ok_or_else(|| { + format!( + "package '{}' has no dist-tag named '{}' (available: {})", + name, + tag, + meta.dist_tags + .keys() + .cloned() + .collect::>() + .join(", "), + ) + })?, + NpmSpec::Exact(v) => { + if !meta.versions.contains_key(v) { + return Err(format!( + "version '{}' for package '{}' was not found on the npm registry", + v, name + )); + } + v.clone() + } + NpmSpec::Range(range) => { + npm_pick_highest_matching(&meta.versions, range).ok_or_else(|| { + format!( + "no published version of '{}' satisfies range '{}'", + name, range + ) + })? + } + }; + + let raw_time = meta.time.get(&resolved_version).ok_or_else(|| { + format!( + "publish time missing for {}@{} on the npm registry", + name, resolved_version + ) + })?; + + let published_at = parse_iso8601(raw_time).map_err(|e| { + format!( + "could not parse publish time '{}' for {}@{}: {}", + raw_time, name, resolved_version, e + ) + })?; + + Ok(ResolvedPackage { + name: name.to_string(), + version: resolved_version, + published_at, + }) +} + +/// Translate an npm-style version range to `semver::VersionReq` +/// alternatives (one per `||` branch — any-match). Handles npm grammar +/// the Rust crate doesn't: whitespace AND separators, hyphen ranges +/// (`1.0.0 - 2.0.0`), `||` unions, and bare partials (`1.0`, which npm +/// reads as `1.0.x` but Cargo would read as `^1.0`). +fn parse_npm_range(range: &str) -> Option> { + range + .split("||") + .map(|alt| parse_npm_range_alternative(alt.trim())) + .collect() +} + +fn parse_npm_range_alternative(alt: &str) -> Option { + if let Some((lo, hi)) = alt.split_once(" - ") { + return hyphen_range(lo.trim(), hi.trim()); + } + if let Some(tilde) = bare_partial_to_tilde(alt) { + return semver::VersionReq::parse(&tilde).ok(); + } + if let Ok(req) = semver::VersionReq::parse(alt) { + return Some(req); + } + let normalised = alt.split_whitespace().collect::>().join(","); + semver::VersionReq::parse(&normalised).ok() +} + +/// node-semver hyphen range `A - B`. A partial low bound fills with zeros +/// (`1.2` → `>=1.2.0`); a partial high bound excludes the next component +/// (`- 2.3` → `<2.4.0`, `- 2` → `<3.0.0`), matching npm. +fn hyphen_range(lo: &str, hi: &str) -> Option { + let lo_v = pad_partial(lo)?; + let hi_segments = hi.split('.').count(); + let hi_v = pad_partial(hi)?; + let expr = match hi_segments { + 1 => format!(">={lo_v}, <{}", semver::Version::new(hi_v.major + 1, 0, 0)), + 2 => format!( + ">={lo_v}, <{}", + semver::Version::new(hi_v.major, hi_v.minor + 1, 0) + ), + _ => format!(">={lo_v}, <={hi_v}"), + }; + semver::VersionReq::parse(&expr).ok() +} + +/// `1.2` → `1.2.0` (accepts an optional leading `v`, like npm). +fn pad_partial(v: &str) -> Option { + let v = v.trim(); + let v = v.strip_prefix('v').unwrap_or(v); + let mut segments: Vec<&str> = v.split('.').collect(); + while segments.len() < 3 { + segments.push("0"); + } + semver::Version::parse(&segments.join(".")).ok() +} + +/// npm desugars a bare two-component version (`1.0`) to the x-range +/// `1.0.x`; Cargo's `VersionReq` would read it as caret (`^1.0`, matching +/// 1.9). Translate to tilde, which has npm's intended bounds. +fn bare_partial_to_tilde(alt: &str) -> Option { + let segments: Vec<&str> = alt.split('.').collect(); + (segments.len() == 2 + && segments + .iter() + .all(|s| !s.is_empty() && s.chars().all(|c| c.is_ascii_digit()))) + .then(|| format!("~{alt}")) +} + +/// Pick the highest published version that satisfies `range`. Pre-releases +/// are excluded unless the range itself references one (matches npm). +fn npm_pick_highest_matching( + versions: &std::collections::BTreeMap, + range: &str, +) -> Option { + let reqs = parse_npm_range(range)?; + let range_has_prerelease = range.contains('-') && !range.contains(" - "); + versions + .keys() + .filter_map(|raw| semver::Version::parse(raw).ok().map(|v| (v, raw))) + .filter(|(v, _)| { + (v.pre.is_empty() || range_has_prerelease) && reqs.iter().any(|req| req.matches(v)) + }) + .max_by(|(a, _), (b, _)| a.cmp(b)) + .map(|(_, raw)| raw.clone()) +} + +/// PyPI version specifier used by install wrappers. We parse a +/// limited subset of PEP 440 specifiers — enough for the common +/// install-command cases (`pkg`, `pkg==X`, `pkg>=X`, `pkg=2.0`, `<3,>=2`, `~=1.4`). + Specifier(String), +} + +#[derive(Debug, Default, Deserialize)] +struct PypiInfo { + #[serde(default)] + name: Option, +} + +#[derive(Debug, Deserialize)] +struct PypiInfoResponse { + /// `info.name` is PyPI's canonical spelling — the registry answers any + /// PEP 503-equivalent request spelling but echoes the stored name. + #[serde(default)] + info: PypiInfo, + releases: std::collections::BTreeMap>, +} + +/// The name a resolved pypi package should carry forward: the registry's +/// canonical spelling when the response provides one, else the requested +/// spelling. Vuln advisories are keyed by lowercase(canonical) — checking +/// a user-typed variant (`Flask_Cors`) would miss the `flask-cors` row. +/// The canonical name is accepted only when PEP 503-equivalent to the +/// request, so a hostile mirror can't redirect the verdict to a different +/// package's (clean) identity. +fn canonical_pypi_name(requested: &str, info_name: Option<&str>) -> String { + use crate::deps::ecosystems::pypi::normalize_pypi_name; + match info_name { + Some(n) if !n.is_empty() && normalize_pypi_name(n) == normalize_pypi_name(requested) => { + n.to_string() + } + _ => requested.to_string(), + } +} + +/// Resolve a `PypiSpec` against PyPI and return the concrete version plus +/// publish time. The latest non-prerelease, non-yanked release is preferred. +/// +/// The `allow_prerelease` flag mirrors pip's `--pre`: when set, prerelease +/// versions become eligible for `Latest`/specifier resolution so the gate +/// verdicts the version pip would actually install, not the latest stable. +pub fn pypi_resolve( + name: &str, + spec: &PypiSpec, + registry: Option<&str>, + allow_prerelease: bool, +) -> Result { + if name.is_empty() { + return Err("empty package name".to_string()); + } + let base = registry + .unwrap_or(DEFAULT_PYPI_REGISTRY) + .trim_end_matches('/'); + let url = format!("{}/pypi/{}/json", base, urlencoding::encode(name)); + let meta: PypiInfoResponse = fetch_registry_json(&url, "PyPI", name, base)?; + + let candidates = collect_pypi_candidates(&meta); + // A yanked release resolves only via an exact pin (PEP 592), matching + // pip — otherwise we'd gate a version pip would never choose. + let installable: Vec = + candidates.iter().filter(|c| !c.yanked).cloned().collect(); + let chosen = match spec { + PypiSpec::Latest => { + pick_latest_stable(&installable, allow_prerelease).map(|c| c.version.clone()) + } + // PEP 440 equality, not string equality: `==2.31` must match the + // release key `2.31.0` (and resolve to the key, so the publish-time + // lookup below finds it). + PypiSpec::Exact(v) => { + let want = PypiVersion::parse(v); + candidates + .iter() + .find(|c| { + &c.version == v + || matches!( + (&want, PypiVersion::parse(&c.version)), + (Some(w), Some(cv)) if *w == cv + ) + }) + .map(|c| c.version.clone()) + } + PypiSpec::Specifier(spec_str) => { + pypi_resolve_specifier(&installable, spec_str, allow_prerelease) + .map_err(|e| format!("{} for '{}'", e, name))? + } + }; + + let chosen = chosen.ok_or_else(|| match spec { + PypiSpec::Exact(v) => { + format!( + "version '{}' for package '{}' was not found on PyPI", + v, name + ) + } + _ => format!("no installable version found for '{}' on PyPI", name), + })?; + + let published_at = candidates + .iter() + .find(|c| c.version == chosen) + .map(|c| c.uploaded) + .ok_or_else(|| { + format!( + "no upload timestamp for '{}' version '{}' on PyPI", + name, chosen + ) + })?; + + Ok(ResolvedPackage { + // Carry the registry's canonical spelling forward so the vuln-api + // check hits the advisory row keyed by it (see canonical_pypi_name). + name: canonical_pypi_name(name, meta.info.name.as_deref()), + version: chosen, + published_at, + }) +} + +/// One published release a `PypiSpec` can resolve to. +#[derive(Debug, Clone)] +struct PypiCandidate { + version: String, + uploaded: DateTime, + /// Every artifact of this release is yanked (PEP 592) — pip skips + /// it for anything but an exact pin, so non-exact resolution must too. + yanked: bool, +} + +/// Returns a candidate for every release that has at least one uploaded, +/// timestamped artifact. Empty or timestampless release entries (which +/// PyPI sometimes keeps around for deleted / private versions) are +/// filtered out so we never pick them. +fn collect_pypi_candidates(meta: &PypiInfoResponse) -> Vec { + let mut out = Vec::new(); + for (ver, files) in &meta.releases { + if files.is_empty() { + continue; + } + let earliest = files + .iter() + .filter_map(|f| { + f.upload_time_iso_8601 + .as_deref() + .or(f.upload_time.as_deref()) + }) + .filter_map(|raw| parse_iso8601(raw).ok()) + .min(); + if let Some(dt) = earliest { + out.push(PypiCandidate { + version: ver.clone(), + uploaded: dt, + yanked: files.iter().all(PypiUrl::is_yanked), + }); + } + } + out +} + +/// PEP 440-ish ordering key: the semver-parsed release plus its `.postN` +/// number. Post-releases order after their base (`1.0.post1` > `1.0`) and +/// pip installs them by default — dropping them from candidates would +/// verdict a different version than the install. Prereleases (`1.0rc1`, +/// `1.0a2`, `1.0.dev3`) parse with a rank-encoded semver prerelease so they +/// order dev < a < b < rc and all below the plain release; they are filtered +/// out at resolution time unless `--pre` is set. Epochs (`1!2.0`) and local +/// versions (`1.0+abc`) remain unsupported and are skipped. +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +struct PypiVersion { + base: semver::Version, + /// `.postN` number; `None` for a plain release. Ordering: derive(Ord) + /// compares `base` first, then `post` (`None` < `Some(_)`), which is + /// exactly PEP 440's post-release ordering. + post: Option, +} + +impl PypiVersion { + fn parse(raw: &str) -> Option { + let (rest, post) = match raw.find(".post") { + Some(idx) => { + let n: u64 = raw[idx + ".post".len()..].parse().ok()?; + (&raw[..idx], Some(n)) + } + None => (raw, None), + }; + let (release, pre) = split_pep440_prerelease(rest)?; + let semver_src = match &pre { + Some(p) => format!("{}-{}", normalize_for_semver(release), p), + None => normalize_for_semver(release), + }; + let base = semver::Version::parse(&semver_src).ok()?; + Some(PypiVersion { base, post }) + } + + fn is_prerelease(&self) -> bool { + !self.base.pre.is_empty() + } +} + +/// Split a PEP 440 release string into its numeric release and an optional +/// semver-encoded prerelease identifier. The rank prefix (`0dev` < `1a` < +/// `2b` < `3rc`) makes the derived `Ord` on the semver prerelease match PEP +/// 440 ordering, and any prerelease sorts below the plain release. +/// +/// Returns `(release, None)` for a plain release, `(release, Some(pre))` for +/// a recognized prerelease, and `None` for an alpha-bearing form we don't +/// recognize (epochs, local versions, combined pre+dev) so the candidate is +/// skipped rather than mis-ordered — matching prior conservative behavior. +fn split_pep440_prerelease(v: &str) -> Option<(&str, Option)> { + let Some(idx) = v.find(|c: char| c.is_ascii_alphabetic()) else { + return Some((v, None)); + }; + let release = v[..idx].trim_end_matches(['.', '-', '_']); + let suffix = &v[idx..]; + let (rank, label, rest) = if let Some(r) = suffix.strip_prefix("rc") { + (3, "rc", r) + } else if let Some(r) = suffix.strip_prefix("dev") { + (0, "dev", r) + } else if let Some(r) = suffix.strip_prefix('a') { + (1, "a", r) + } else if let Some(r) = suffix.strip_prefix('b') { + (2, "b", r) + } else if let Some(r) = suffix.strip_prefix('c') { + // PEP 440 spells release-candidate `c` and `rc` interchangeably. + (3, "rc", r) + } else { + return None; + }; + let num_str = rest.trim_start_matches(['.', '-', '_']); + // Reject anything we didn't fully consume (combined `a1.dev2`, local + // `+abc`, etc.) — dropping it is safer than guessing its order. + if !num_str.chars().all(|c| c.is_ascii_digit()) { + return None; + } + let num: u64 = num_str.parse().unwrap_or(0); + Some((release, Some(format!("{rank}{label}.{num}")))) +} + +/// Pick the latest version using PEP 440-ish parsing as a best-effort +/// ordering. Prereleases are excluded unless `allow_prerelease` (pip's +/// `--pre`) is set. No upload-time fallback when nothing parses: guessing +/// by upload time could pick a prerelease without `--pre`, and a +/// resolution error (→ visible, ungated) beats a silent wrong pick — +/// consistent with `pypi_resolve_specifier`, which errors rather than +/// guesses. +fn pick_latest_stable( + candidates: &[PypiCandidate], + allow_prerelease: bool, +) -> Option<&PypiCandidate> { + candidates + .iter() + .filter_map(|c| { + PypiVersion::parse(&c.version) + .filter(|v| allow_prerelease || !v.is_prerelease()) + .map(|v| (v, c)) + }) + .max_by(|(a, _), (b, _)| a.cmp(b)) + .map(|(_, c)| c) +} + +/// Best-effort PEP 440 → semver: PyPI versions are usually `X.Y.Z` or +/// `X.Y` or `X.Y.Z.postN` — the dotted-number form usually parses +/// straight as semver if we pad to 3 components. Anything more exotic +/// (`1.0a1`, `2!1.0`, etc.) is left alone and rejected by semver. +/// +/// Also used outside the registry (`precheck::safe_version`) as a lenient +/// cross-ecosystem pad for ordering fixed versions; keep it ecosystem-agnostic. +pub(crate) fn normalize_for_semver(v: &str) -> String { + if v.contains('!') + || v.contains('a') + || v.contains('b') + || v.contains("rc") + || v.contains(".dev") + { + return v.to_string(); + } + let parts: Vec<&str> = v.split('.').collect(); + match parts.len() { + 1 => format!("{}.0.0", parts[0]), + 2 => format!("{}.{}.0", parts[0], parts[1]), + _ => v.to_string(), + } +} + +/// Apply a PEP 440-style specifier expression to the candidate list +/// and return the highest match (`Ok(None)` when nothing satisfies it). +/// Supported operators: `==` (incl. wildcards `==1.4.*`), `>=`, `>`, +/// `<=`, `<`, `~=`, `!=`. An expression we can't parse (unknown operator, +/// exotic version) is `Err` — resolving anything else would gate a +/// different version than the package manager installs. +fn pypi_resolve_specifier( + candidates: &[PypiCandidate], + spec: &str, + allow_prerelease: bool, +) -> Result, String> { + let parts: Vec<&str> = spec.split(',').map(|s| s.trim()).collect(); + let mut requirements: Vec<(&'static str, semver::Version)> = Vec::new(); + + // Longest prefixes first so `>=` never matches as `>`. + const OPERATORS: &[(&str, &str)] = &[ + ("===", "=="), + ("==", "=="), + (">=", ">="), + ("<=", "<="), + ("!=", "!="), + ("~=", "~="), + (">", ">"), + ("<", "<"), + ]; + for p in &parts { + let unsupported = || format!("unsupported version specifier '{}'", spec); + let (op, val) = OPERATORS + .iter() + .find_map(|(prefix, op)| p.strip_prefix(prefix).map(|v| (*op, v.trim()))) + .ok_or_else(unsupported)?; + // Wildcard pin `==X.Y.*` — desugar to the half-open range it means. + if op == "==" { + if let Some(prefix) = val.strip_suffix(".*") { + let (lo, hi) = wildcard_bounds(prefix).ok_or_else(unsupported)?; + requirements.push((">=", lo)); + requirements.push(("<", hi)); + continue; + } + } + if val.contains('*') { + return Err(unsupported()); + } + let v = semver::Version::parse(&normalize_for_semver(val)).map_err(|_| unsupported())?; + // PEP 440 `~=X.Y` bumps the LAST release component of the written + // spec: `~=1.4` means `<2.0`, `~=1.4.5` means `<1.5.0`. Desugar + // here — the padded `v` has lost the component count. + if op == "~=" { + let hi = match val.split('.').count() { + 2 => semver::Version::new(v.major + 1, 0, 0), + 3 => semver::Version::new(v.major, v.minor + 1, 0), + _ => return Err(unsupported()), + }; + requirements.push((">=", v)); + requirements.push(("<", hi)); + continue; + } + requirements.push((op, v)); + } + + // PEP 440 comparison against a candidate that may be a post-release: + // `>=V` includes V's posts, `>V`/`<=V` exclude them, `==V` matches + // only the plain release. + let satisfies = |c: &PypiVersion| { + requirements.iter().all(|(op, want)| match *op { + "==" => c.base == *want && c.post.is_none(), + ">=" => c.base >= *want, + "<=" => c.base < *want || (c.base == *want && c.post.is_none()), + "!=" => !(c.base == *want && c.post.is_none()), + ">" => c.base > *want, + "<" => c.base < *want, + _ => false, + }) + }; + Ok(candidates + .iter() + .filter_map(|c| PypiVersion::parse(&c.version).map(|v| (v, &c.version))) + .filter(|(v, _)| (allow_prerelease || !v.is_prerelease()) && satisfies(v)) + .max_by(|(a, _), (b, _)| a.cmp(b)) + .map(|(_, raw)| raw.clone())) +} + +/// `==X.*` / `==X.Y.*` / `==X.Y.Z.*` bounds: everything the written prefix +/// covers, half-open at the bumped last component. +fn wildcard_bounds(prefix: &str) -> Option<(semver::Version, semver::Version)> { + let lo = semver::Version::parse(&normalize_for_semver(prefix)).ok()?; + let hi = match prefix.split('.').count() { + 1 => semver::Version::new(lo.major + 1, 0, 0), + 2 => semver::Version::new(lo.major, lo.minor + 1, 0), + 3 => semver::Version::new(lo.major, lo.minor, lo.patch + 1), + _ => return None, + }; + Some((lo, hi)) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn candidates(versions: &[&str]) -> Vec { + versions + .iter() + .map(|v| PypiCandidate { + version: v.to_string(), + uploaded: Utc::now(), + yanked: false, + }) + .collect() + } + + #[test] + fn specifier_resolves_highest_match() { + let c = candidates(&["1.0.0", "2.5.0", "3.0.0"]); + assert_eq!( + pypi_resolve_specifier(&c, ">=1.0,<3", false).expect("parse"), + Some("2.5.0".to_string()) + ); + } + + #[test] + fn prerelease_eligible_only_with_allow_prerelease() { + // `2.0.0rc1` is a prerelease. pip's default skips it; `--pre` selects + // it as the newest, so the gate must verdict it instead of 1.0.0. + let c = candidates(&["1.0.0", "2.0.0rc1"]); + assert_eq!( + pick_latest_stable(&c, false).map(|c| c.version.as_str()), + Some("1.0.0"), + "default resolution excludes the prerelease" + ); + assert_eq!( + pick_latest_stable(&c, true).map(|c| c.version.as_str()), + Some("2.0.0rc1"), + "--pre makes the prerelease eligible" + ); + // Same for specifier resolution. + assert_eq!( + pypi_resolve_specifier(&c, ">=1.0", false).expect("parse"), + Some("1.0.0".to_string()) + ); + assert_eq!( + pypi_resolve_specifier(&c, ">=1.0", true).expect("parse"), + Some("2.0.0rc1".to_string()) + ); + } + + #[test] + fn specifier_with_no_match_is_ok_none() { + let c = candidates(&["1.0.0"]); + assert_eq!( + pypi_resolve_specifier(&c, ">=9.0", false).expect("parse"), + None + ); + } + + #[test] + fn canonical_pypi_name_accepts_equivalent_rejects_other() { + // Registry canonical spelling wins when PEP 503-equivalent… + assert_eq!( + canonical_pypi_name("Flask_Cors", Some("Flask-Cors")), + "Flask-Cors" + ); + assert_eq!( + canonical_pypi_name("zope-interface", Some("zope.interface")), + "zope.interface" + ); + // …but a non-equivalent name (hostile mirror) keeps the request. + assert_eq!( + canonical_pypi_name("flask-cors", Some("requests")), + "flask-cors" + ); + assert_eq!(canonical_pypi_name("flask-cors", None), "flask-cors"); + assert_eq!(canonical_pypi_name("flask-cors", Some("")), "flask-cors"); + } + + #[test] + fn latest_with_no_parseable_version_is_none_not_a_guess() { + // When nothing parses as PEP 440, guessing by upload time could + // pick a prerelease without --pre. None → a visible resolution + // error, consistent with unparseable specifiers. + let c = candidates(&["2!1.0", "weird-version"]); + assert!(pick_latest_stable(&c, false).is_none()); + assert!(pick_latest_stable(&c, true).is_none()); + } + + #[test] + fn unparseable_specifier_errors_instead_of_falling_back() { + // Resolving "latest stable" for an expression we can't represent + // would gate the wrong version. + let c = candidates(&["1.0.0", "2.0.0"]); + for spec in ["@weird", ">= not-a-version", "!=1.*"] { + let err = pypi_resolve_specifier(&c, spec, false).expect_err(spec); + assert!( + err.contains("unsupported version specifier"), + "{spec}: {err}" + ); + } + } + + #[test] + fn wildcard_pin_resolves_as_a_range() { + // pip: `==4.2.*` matches the 4.2 series, highest first. + let c = candidates(&["4.1.0", "4.2.0", "4.2.9", "4.3.0"]); + assert_eq!( + pypi_resolve_specifier(&c, "==4.2.*", false).expect("parse"), + Some("4.2.9".to_string()) + ); + let c = candidates(&["0.9.0", "1.0.0", "1.9.0", "2.0.0"]); + assert_eq!( + pypi_resolve_specifier(&c, "==1.*", false).expect("parse"), + Some("1.9.0".to_string()) + ); + } + + #[test] + fn compatible_release_bumps_the_written_component() { + // PEP 440: `~=4.0` means `>=4.0, <5.0` (NOT `<4.1`) — pip installs + // 4.2.x, so the gate must verdict the same series. + let c = candidates(&["4.0.0", "4.0.5", "4.2.9", "5.0.0"]); + assert_eq!( + pypi_resolve_specifier(&c, "~=4.0", false).expect("parse"), + Some("4.2.9".to_string()) + ); + // `~=1.4.5` means `>=1.4.5, <1.5.0`. + let c = candidates(&["1.4.4", "1.4.6", "1.5.0"]); + assert_eq!( + pypi_resolve_specifier(&c, "~=1.4.5", false).expect("parse"), + Some("1.4.6".to_string()) + ); + } + + #[test] + fn post_releases_resolve_and_outrank_their_base() { + // pip installs post-releases by default; dropping them would + // verdict a different version than the install. + let c = candidates(&["1.0", "1.0.post1", "0.9.0"]); + assert_eq!( + pypi_resolve_specifier(&c, ">=1.0", false).expect("parse"), + Some("1.0.post1".to_string()) + ); + assert_eq!( + pick_latest_stable(&c, false).map(|c| c.version.as_str()), + Some("1.0.post1") + ); + // …but a plain `==1.0` pin means the base release, not its posts. + assert_eq!( + pypi_resolve_specifier(&c, "==1.0", false).expect("parse"), + Some("1.0".to_string()) + ); + // PEP 440: `>V` excludes V's own post-releases. + assert_eq!( + pypi_resolve_specifier(&c, ">1.0", false).expect("parse"), + None + ); + } + + #[test] + fn yanked_only_releases_are_flagged() { + // 2.0.0 has every file yanked (one bool, one mirror-style reason + // string); 1.0.0 has a non-yanked file. Timestamps alone must not + // decide yanked status — yanked files keep theirs. + let meta: PypiInfoResponse = serde_json::from_str( + r#"{"releases":{ + "1.0.0":[{"upload_time_iso_8601":"2020-01-01T00:00:00Z","yanked":false}], + "2.0.0":[{"upload_time_iso_8601":"2021-01-01T00:00:00Z","yanked":true}, + {"upload_time_iso_8601":"2021-01-01T00:00:00Z","yanked":"broken build"}] + }}"#, + ) + .expect("parse pypi json"); + let candidates = collect_pypi_candidates(&meta); + let yanked_of = |v: &str| candidates.iter().find(|c| c.version == v).unwrap().yanked; + assert!(!yanked_of("1.0.0")); + assert!(yanked_of("2.0.0")); + + // Latest/specifier resolution must skip the yanked release… + let installable: Vec = + candidates.iter().filter(|c| !c.yanked).cloned().collect(); + assert_eq!( + pick_latest_stable(&installable, false).map(|c| c.version.as_str()), + Some("1.0.0") + ); + assert_eq!( + pypi_resolve_specifier(&installable, ">=1.0", false).expect("parse"), + Some("1.0.0".to_string()) + ); + // …while an exact pin still finds it (pip installs it with a warning). + assert!(candidates.iter().any(|c| c.version == "2.0.0")); + } + + #[test] + fn release_with_partially_yanked_files_stays_installable() { + let meta: PypiInfoResponse = serde_json::from_str( + r#"{"releases":{"1.5.0":[ + {"upload_time_iso_8601":"2020-06-01T00:00:00Z","yanked":true}, + {"upload_time_iso_8601":"2020-06-01T00:00:00Z","yanked":false} + ]}}"#, + ) + .expect("parse pypi json"); + let candidates = collect_pypi_candidates(&meta); + assert!(!candidates[0].yanked); + } + + #[test] + fn parses_iso8601_variants() { + assert!(parse_iso8601("2024-01-02T03:04:05Z").is_ok()); + assert!(parse_iso8601("2024-01-02T03:04:05.123Z").is_ok()); + assert!(parse_iso8601("2024-01-02T03:04:05+00:00").is_ok()); + assert!(parse_iso8601("2024-01-02T03:04:05").is_ok()); + assert!(parse_iso8601("not a date").is_err()); + } + + /// Network-touching integration tests. Skipped by default (#[ignore]) + /// so unit-test runs stay hermetic. Run with: + /// cargo test -- --ignored verify_deps::registry::tests::live + #[test] + #[ignore] + fn live_npm_resolve_latest() { + let r = npm_resolve("left-pad", &NpmSpec::Latest, None).expect("npm resolve latest"); + assert_eq!(r.name, "left-pad"); + assert_eq!(r.version, "1.3.0"); + assert_eq!(r.published_at.format("%Y-%m-%d").to_string(), "2018-04-09"); + } + + #[test] + #[ignore] + fn live_npm_resolve_exact() { + let r = npm_resolve("left-pad", &NpmSpec::Exact("1.3.0".to_string()), None) + .expect("npm resolve exact"); + assert_eq!(r.version, "1.3.0"); + } + + #[test] + #[ignore] + fn live_npm_resolve_range() { + let r = npm_resolve("left-pad", &NpmSpec::Range("^1.0.0".to_string()), None) + .expect("npm resolve range"); + assert_eq!(r.version, "1.3.0"); + } + + #[test] + #[ignore] + fn live_npm_resolve_npm_style_range() { + // npm uses spaces, the Rust crate uses commas — we should + // accept both. + let r = npm_resolve( + "left-pad", + &NpmSpec::Range(">=1.0.0 <2.0.0".to_string()), + None, + ) + .expect("npm resolve space-range"); + assert_eq!(r.version, "1.3.0"); + } + + #[test] + #[ignore] + fn live_npm_resolve_unknown_tag() { + let err = npm_resolve( + "left-pad", + &NpmSpec::Tag("does-not-exist".to_string()), + None, + ) + .err() + .unwrap(); + assert!(err.contains("dist-tag"), "got: {}", err); + } + + #[test] + #[ignore] + fn live_pypi_resolve_latest() { + let r = pypi_resolve("flask", &PypiSpec::Latest, None, false).expect("pypi resolve latest"); + assert_eq!(r.name, "flask"); + assert!(!r.version.is_empty()); + } + + #[test] + #[ignore] + fn live_pypi_resolve_exact() { + let r = pypi_resolve( + "requests", + &PypiSpec::Exact("2.31.0".to_string()), + None, + false, + ) + .expect("pypi resolve exact"); + assert_eq!(r.version, "2.31.0"); + assert_eq!(r.published_at.format("%Y-%m-%d").to_string(), "2023-05-22"); + } + + #[test] + #[ignore] + fn live_pypi_resolve_specifier() { + let r = pypi_resolve( + "requests", + &PypiSpec::Specifier(">=2.30,<2.32".to_string()), + None, + false, + ) + .expect("pypi resolve specifier"); + // `requests==2.31.0` is the only release in [2.30, 2.32). + assert_eq!(r.version, "2.31.0"); + } +} diff --git a/tests/cli_bare_install.rs b/tests/cli_bare_install.rs new file mode 100644 index 0000000..755ee50 --- /dev/null +++ b/tests/cli_bare_install.rs @@ -0,0 +1,213 @@ +//! Hermetic e2e tests for zero-spec ("bare") installs. +//! +//! With a `package.json`, bare `npm install` is gated like any other +//! install: the tree pass resolves the full lockfile set and verdicts +//! every package, so a vulnerable lockfile blocks (exit 1, `--force` +//! escape). +//! +//! Harness mirrors `cli_tree.rs`: tree-aware fake npm on a private PATH + +//! local registry stub + in-crate vuln-api stub. `oldpkg` is published in +//! 2020 so recency never blocks here. + +#![cfg(unix)] + +mod common; + +use common::{key, vulnerable_body, GateHarness, NPM_LOCK, RESOLUTION_FAILS}; +use std::collections::HashMap; + +const PACKAGE_JSON: &str = r#"{"name":"proj","version":"1.0.0","dependencies":{"oldpkg":"1.0.0"}}"#; + +#[test] +fn bare_npm_install_vulnerable_lockfile_blocks() { + let mut checks = HashMap::new(); + checks.insert( + key("npm", "evildep", "0.4.2"), + vulnerable_body("npm", "evildep", "0.4.2", "MAL-2024-0002", None), + ); + let mut h = GateHarness::new() + .fake_tree_pm("npm", NPM_LOCK, 0) + .oldpkg_registry() + .vuln_checks(checks) + .with_project_file("package.json", PACKAGE_JSON) + .build(); + let out = h.cmd.args(["npm", "install"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "vulnerable lockfile must block"); + assert_eq!( + h.recorded_argv(), + None, + "npm must not run on a vulnerable verdict" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("evildep"), "stdout: {stdout}"); + assert!(stdout.contains("MAL-2024-0002"), "stdout: {stdout}"); + assert!(stdout.contains("(transitive)"), "stdout: {stdout}"); + // A bare install names no targets, so everything resolved is the + // existing tree's — the refusal must say so. + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("your existing dependency tree has known-vulnerable packages"), + "bare install blames the existing tree: {stderr}" + ); +} + +#[test] +fn bare_npm_install_clean_lockfile_proceeds() { + let mut h = GateHarness::new() + .fake_tree_pm("npm", NPM_LOCK, 0) + .oldpkg_registry() + .vuln_checks(HashMap::new()) + .with_project_file("package.json", PACKAGE_JSON) + .build(); + let out = h.cmd.args(["npm", "install"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "clean tree must proceed"); + assert_eq!(h.recorded_argv().as_deref(), Some("install")); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("tree: 2 packages resolved"), + "stdout: {stdout}" + ); +} + +#[test] +fn bare_npm_install_force_overrides_block() { + let mut checks = HashMap::new(); + checks.insert( + key("npm", "evildep", "0.4.2"), + vulnerable_body("npm", "evildep", "0.4.2", "MAL-2024-0002", None), + ); + let mut h = GateHarness::new() + .fake_tree_pm("npm", NPM_LOCK, 0) + .oldpkg_registry() + .vuln_checks(checks) + .with_project_file("package.json", PACKAGE_JSON) + .build(); + let out = h + .cmd + .args(["npm", "--force", "install"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "--force must run the install"); + assert_eq!(h.recorded_argv().as_deref(), Some("install")); + assert!( + String::from_utf8_lossy(&out.stdout).contains("evildep"), + "findings still printed under --force" + ); +} + +#[test] +fn bare_npm_resolution_failure_falls_back_with_warning() { + // Fake npm exits 1 on `--package-lock-only`. Nothing named remains to + // verify, so the install proceeds behind the loud fallback warning. + let mut h = GateHarness::new() + .fake_tree_pm("npm", RESOLUTION_FAILS, 0) + .oldpkg_registry() + .vuln_checks(HashMap::new()) + .with_project_file("package.json", PACKAGE_JSON) + .build(); + let out = h.cmd.args(["npm", "install"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "fallback must proceed"); + assert_eq!(h.recorded_argv().as_deref(), Some("install")); + assert!( + String::from_utf8_lossy(&out.stderr).contains("transitive dependencies not checked"), + "stderr must carry the fallback warning: {}", + String::from_utf8_lossy(&out.stderr) + ); +} + +#[test] +fn bare_npm_without_package_json_passes_through() { + // No package.json in cwd → nothing to resolve → straight exec, no gate. + let mut h = GateHarness::new() + .fake_tree_pm("npm", NPM_LOCK, 3) + .oldpkg_registry() + .vuln_checks(HashMap::new()) + .in_project_dir() + .build(); + let out = h.cmd.args(["npm", "install"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(3), "npm's own exit code propagates"); + assert_eq!(h.recorded_argv().as_deref(), Some("install")); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(!stdout.contains("Pre-checking"), "stdout: {stdout}"); +} + +#[test] +fn bare_npm_install_root_redirect_refuses_without_force() { + // A bare `npm install --prefix ` installs another project's whole + // tree; the gate can't resolve that from the CWD and nothing named + // verifies it — fail closed unless --force. + let mut h = GateHarness::new() + .fake_tree_pm("npm", NPM_LOCK, 0) + .oldpkg_registry() + .vuln_checks(HashMap::new()) + .with_project_file("package.json", PACKAGE_JSON) + .build(); + let out = h + .cmd + .args(["npm", "install", "--prefix", "/tmp/other-project"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "bare root-redirect must refuse"); + assert_eq!(h.recorded_argv(), None, "npm must not run"); + assert!( + String::from_utf8_lossy(&out.stderr).contains("redirects the project root"), + "stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); + + // --force bypasses. + let mut h = GateHarness::new() + .fake_tree_pm("npm", NPM_LOCK, 0) + .oldpkg_registry() + .vuln_checks(HashMap::new()) + .with_project_file("package.json", PACKAGE_JSON) + .build(); + let out = h + .cmd + .args([ + "npm", + "--force", + "install", + "--prefix", + "/tmp/other-project", + ]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "--force proceeds"); + assert_eq!( + h.recorded_argv().as_deref(), + Some("install --prefix /tmp/other-project") + ); +} + +#[test] +fn bare_npm_install_from_subdirectory_is_gated() { + // npm walks ancestors to find the project; the gate must too, or a + // bare install from /src would install the whole (vulnerable) + // tree silently unchecked. + let mut checks = HashMap::new(); + checks.insert( + key("npm", "evildep", "0.4.2"), + vulnerable_body("npm", "evildep", "0.4.2", "MAL-2024-0002", None), + ); + let mut h = GateHarness::new() + .fake_tree_pm("npm", NPM_LOCK, 0) + .oldpkg_registry() + .vuln_checks(checks) + .with_project_file("package.json", PACKAGE_JSON) + .in_subdir("src") + .build(); + let out = h.cmd.args(["npm", "install"]).output().expect("run corgea"); + assert_eq!( + out.status.code(), + Some(1), + "vulnerable lockfile must block from a subdirectory too" + ); + assert_eq!( + h.recorded_argv(), + None, + "npm must not run on a vulnerable verdict" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("evildep"), "stdout: {stdout}"); +} diff --git a/tests/cli_exec_fallback.rs b/tests/cli_exec_fallback.rs new file mode 100644 index 0000000..87dcdbd --- /dev/null +++ b/tests/cli_exec_fallback.rs @@ -0,0 +1,111 @@ +//! Hermetic e2e tests for package-manager binary resolution: the pip→pip3 +//! fallback and the missing-binary error (exit 127). +//! +//! Same harness shape as `cli_install.rs`: the real `corgea` binary, a local +//! TcpListener stub standing in for PyPI, and a controlled `PATH` dir that +//! either holds a fake `pip3` (recording its argv to a marker file) or +//! nothing at all. Unix-only — the fake manager is a shell script. + +#![cfg(unix)] + +mod common; + +use common::GateHarness; + +#[test] +fn pip_install_falls_back_to_pip3_when_pip_missing() { + let mut h = GateHarness::new() + .fake_recorder("pip3", 0) + .oldpkg_registry() + .build(); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "stdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert_eq!( + h.recorded_argv().as_deref(), + Some("install oldpkg==1.0.0"), + "the install must run via pip3 with forwarded args" + ); +} + +#[test] +fn pip_passthrough_falls_back_to_pip3() { + let mut h = GateHarness::new() + .fake_recorder("pip3", 0) + .oldpkg_registry() + .build(); + let out = h.cmd.args(["pip", "list"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(0)); + assert_eq!(h.recorded_argv().as_deref(), Some("list")); +} + +#[test] +fn pip_missing_both_pip_and_pip3_exits_127_with_message() { + let mut h = GateHarness::new().oldpkg_registry().build(); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(127)); + assert_eq!(h.recorded_argv(), None, "nothing must have run"); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("error: 'pip' not found on PATH (also tried 'pip3')"), + "stderr: {stderr}" + ); +} + +#[test] +fn pip3_top_level_command_prints_pip_wrapper_suggestion() { + let mut h = GateHarness::new() + .fake_recorder("pip3", 0) + .oldpkg_registry() + .build(); + let out = h + .cmd + .args(["pip3", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None, "pip3 must not run"); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("error: unknown package manager `pip3`."), + "stderr: {stderr}" + ); + assert!( + stderr.contains("Did you mean `corgea pip install oldpkg==1.0.0`?"), + "stderr: {stderr}" + ); + assert!( + String::from_utf8_lossy(&out.stdout).is_empty(), + "stdout: {}", + String::from_utf8_lossy(&out.stdout) + ); +} + +#[test] +fn npm_missing_binary_error_names_binary_without_fallback() { + let mut h = GateHarness::new().oldpkg_registry().build(); + let out = h.cmd.args(["npm", "list"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(127)); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("error: 'npm' not found on PATH"), + "stderr: {stderr}" + ); + assert!( + !stderr.contains("also tried"), + "npm has no fallback alias; stderr: {stderr}" + ); +} diff --git a/tests/cli_install.rs b/tests/cli_install.rs new file mode 100644 index 0000000..ded87c4 --- /dev/null +++ b/tests/cli_install.rs @@ -0,0 +1,404 @@ +//! Hermetic end-to-end tests for the install wrappers (`corgea pip|npm …`). +//! +//! Each test spawns the real binary (`CARGO_BIN_EXE_corgea`) against: +//! * a local TcpListener stub standing in for PyPI / the npm registry +//! (wired up via `CORGEA_PYPI_REGISTRY` / `CORGEA_NPM_REGISTRY`), and +//! * a fake package manager on `PATH` — a shell script that records its +//! argv to a marker file, proving whether the install actually ran. +//! +//! No live network. The fake package managers are Unix shell scripts, so +//! the whole file is Unix-only (matching the repo's Linux/macOS CI). + +#![cfg(unix)] + +mod common; + +use common::{ + npm_packument, pip_harness, pypi_release_json, spawn_http_stub, GateHarness, NOT_FOUND_JSON, + OLD_TS, RESOLUTION_FAILS, +}; +use std::collections::HashMap; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::Arc; + +/// Spawn a registry stub serving both the PyPI and npm routes the +/// resolver hits. Returns the base URL and a counter of accepted +/// connections (used to prove "no registry hit" for passthroughs). +/// +/// Routes: +/// * `/pypi/oldpkg/json` — one release, published 2020-01-01 +/// * `/pypi/freshpkg/json` — one release, published one hour ago +/// * `/oldpkg` — npm metadata, published 2020-01-01 +/// * `/freshpkg` — npm metadata, published one hour ago +/// * anything else — 404 +fn spawn_registry_stub() -> (String, Arc) { + let hits = Arc::new(AtomicUsize::new(0)); + let hits_in_stub = Arc::clone(&hits); + let base_url = spawn_http_stub(move |path| { + hits_in_stub.fetch_add(1, Ordering::SeqCst); + let fresh_ts = (chrono::Utc::now() - chrono::Duration::hours(1)) + .format("%Y-%m-%dT%H:%M:%SZ") + .to_string(); + match path { + "/pypi/oldpkg/json" => ("200 OK", pypi_release_json("oldpkg", "1.0.0", OLD_TS)), + "/pypi/freshpkg/json" => ("200 OK", pypi_release_json("freshpkg", "9.9.9", &fresh_ts)), + "/oldpkg" => ("200 OK", npm_packument("1.0.0", OLD_TS)), + "/freshpkg" => ("200 OK", npm_packument("9.9.9", &fresh_ts)), + _ => ("404 Not Found", NOT_FOUND_JSON.to_string()), + } + }); + (base_url, hits) +} + +fn wrapper(binary: &str, registry_env: &str, pm_exit_code: i32) -> GateHarness { + wrapper_with_hits(binary, registry_env, pm_exit_code).0 +} + +fn wrapper_with_hits( + binary: &str, + registry_env: &str, + pm_exit_code: i32, +) -> (GateHarness, Arc) { + let (base_url, registry_hits) = spawn_registry_stub(); + // RESOLUTION_FAILS: the tree dry-run exits non-zero without touching + // the argv marker, so `recorded_argv()` reflects only the real install. + let h = GateHarness::new() + .fake_tree_pm(binary, RESOLUTION_FAILS, pm_exit_code) + .registry_env(registry_env, &base_url) + .build(); + (h, registry_hits) +} + +#[test] +fn pip_fresh_pin_blocks_without_running_install() { + let mut h = wrapper("pip", "CORGEA_PYPI_REGISTRY", 0); + let out = h + .cmd + .args(["pip", "install", "freshpkg==9.9.9"]) + .output() + .expect("failed to run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None, "pip must not run when blocked"); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("within threshold"), "stdout: {stdout}"); + assert!( + String::from_utf8_lossy(&out.stderr).contains("Refusing to run install"), + "stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); +} + +#[test] +fn pip_old_pin_runs_install_with_forwarded_args() { + let mut h = wrapper("pip", "CORGEA_PYPI_REGISTRY", 0); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("failed to run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "stdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg==1.0.0")); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("published"), "stdout: {stdout}"); +} + +#[test] +fn pip_no_fail_demotes_block_and_installs() { + let mut h = wrapper("pip", "CORGEA_PYPI_REGISTRY", 0); + let out = h + .cmd + .args(["pip", "--no-fail", "install", "freshpkg==9.9.9"]) + .output() + .expect("failed to run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); + assert_eq!( + h.recorded_argv().as_deref(), + Some("install freshpkg==9.9.9"), + "--no-fail must still run the install" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("within threshold"), "stdout: {stdout}"); +} + +#[test] +fn pip_non_install_subcommand_passes_through_without_registry_hit() { + let (mut h, registry_hits) = wrapper_with_hits("pip", "CORGEA_PYPI_REGISTRY", 0); + let out = h + .cmd + .args(["pip", "list"]) + .output() + .expect("failed to run corgea"); + assert_eq!(out.status.code(), Some(0)); + assert_eq!(h.recorded_argv().as_deref(), Some("list")); + assert_eq!( + registry_hits.load(Ordering::SeqCst), + 0, + "passthrough must not touch the registry" + ); +} + +#[test] +fn pip_add_blocks_with_install_suggestion_without_running_pip() { + let (mut h, registry_hits) = wrapper_with_hits("pip", "CORGEA_PYPI_REGISTRY", 0); + let out = h + .cmd + .args(["pip", "add", "oldpkg"]) + .output() + .expect("failed to run corgea"); + + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None, "pip must not run"); + assert_eq!( + registry_hits.load(Ordering::SeqCst), + 0, + "invalid pip command must not touch the registry" + ); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("error: pip does not support `add`."), + "stderr: {stderr}" + ); + assert!( + stderr.contains("Did you mean `corgea pip install oldpkg`?"), + "stderr: {stderr}" + ); +} + +#[test] +fn pip_resolution_error_prints_error_but_install_proceeds() { + // `nosuchpkg` hits the stub's 404 route → an error outcome, which + // warns but does not block: public mode fails open when no verdict + // can be obtained — the install must still run. + let (mut h, registry_hits) = wrapper_with_hits("pip", "CORGEA_PYPI_REGISTRY", 0); + let out = h + .cmd + .args(["pip", "install", "nosuchpkg==1.0.0"]) + .output() + .expect("failed to run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "stdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert!( + registry_hits.load(Ordering::SeqCst) >= 1, + "the 404 route must have been hit" + ); + assert_eq!( + h.recorded_argv().as_deref(), + Some("install nosuchpkg==1.0.0"), + "a resolution error must not block the install" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("not found"), "stdout: {stdout}"); + assert!(stdout.contains("1 errors"), "stdout: {stdout}"); +} + +#[test] +fn pip_mixed_fresh_and_old_pins_block_without_running_install() { + let mut h = wrapper("pip", "CORGEA_PYPI_REGISTRY", 0); + let out = h + .cmd + .args(["pip", "install", "freshpkg==9.9.9", "oldpkg==1.0.0"]) + .output() + .expect("failed to run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!( + h.recorded_argv(), + None, + "one recent target must block the whole install" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("within threshold"), "stdout: {stdout}"); + assert!(stdout.contains("1 ok, 1 recent"), "stdout: {stdout}"); +} + +#[test] +fn npm_fresh_pin_blocks_without_running_install() { + let mut h = wrapper("npm", "CORGEA_NPM_REGISTRY", 0); + let out = h + .cmd + .args(["npm", "install", "freshpkg@9.9.9"]) + .output() + .expect("failed to run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None, "npm must not run when blocked"); + assert!( + String::from_utf8_lossy(&out.stderr).contains("Refusing to run install"), + "stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); +} + +#[test] +fn npm_old_pin_runs_install_with_forwarded_args() { + let mut h = wrapper("npm", "CORGEA_NPM_REGISTRY", 0); + let out = h + .cmd + .args(["npm", "install", "oldpkg@1.0.0"]) + .output() + .expect("failed to run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "stdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg@1.0.0")); +} + +#[test] +fn npm_install_verb_behind_global_flags_is_still_gated() { + // SKILL.md promises `npm --loglevel silent install x` is still gated: + // the verb is found behind global flags, and the flag's value is not + // mistaken for the verb. + let mut h = wrapper("npm", "CORGEA_NPM_REGISTRY", 0); + let out = h + .cmd + .args(["npm", "--loglevel", "silent", "install", "freshpkg@9.9.9"]) + .output() + .expect("failed to run corgea"); + assert_eq!(out.status.code(), Some(1), "gate must fire behind flags"); + assert_eq!(h.recorded_argv(), None, "npm must not run when blocked"); +} + +#[test] +fn npm_install_aliases_are_gated_not_passed_through() { + // npm accepts many install aliases (and tolerates the typo `isntall`). + // Each must route through the GATE, not the ungated passthrough: a + // fresh-pinned package blocks (exit 1, npm never runs). If the alias + // slipped past the gate, npm would run the fresh package instead. + for alias in ["isntall", "in", "ins"] { + let mut h = wrapper("npm", "CORGEA_NPM_REGISTRY", 0); + let out = h + .cmd + .args(["npm", alias, "freshpkg@9.9.9"]) + .output() + .expect("failed to run corgea"); + assert_eq!( + out.status.code(), + Some(1), + "alias `{alias}` must be gated; stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); + assert_eq!( + h.recorded_argv(), + None, + "alias `{alias}`: npm must not run when blocked" + ); + assert!( + String::from_utf8_lossy(&out.stderr).contains("Refusing to run install"), + "alias `{alias}` stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); + } +} + +#[test] +fn wrapper_forwards_package_manager_exit_code() { + let mut h = wrapper("pip", "CORGEA_PYPI_REGISTRY", 7); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("failed to run corgea"); + assert_eq!( + out.status.code(), + Some(7), + "the package manager's exit code must be forwarded" + ); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg==1.0.0")); +} + +// SKILL.md promises "Git/URL/path specs … are noted, never blocked". The +// three tests below pin that end-to-end. + +#[test] +fn pip_git_url_spec_skips_verification_and_execs() { + let mut h = pip_harness(HashMap::new(), HashMap::new(), 0); + let out = h + .cmd + .args(["pip", "install", "git+https://github.com/x/y.git"]) + .output() + .expect("failed to run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "stdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert_eq!( + h.recorded_argv().as_deref(), + Some("install git+https://github.com/x/y.git"), + "pip must receive the raw spec" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("registry verification skipped"), + "stdout: {stdout}" + ); +} + +#[test] +fn pip_filesystem_path_spec_skips_verification_and_execs() { + let mut h = pip_harness(HashMap::new(), HashMap::new(), 0); + let out = h + .cmd + .args(["pip", "install", "."]) + .output() + .expect("failed to run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "stdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert_eq!(h.recorded_argv().as_deref(), Some("install .")); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("registry verification skipped"), + "stdout: {stdout}" + ); +} + +#[test] +fn npm_github_shorthand_skips_verification_and_execs() { + let mut h = GateHarness::new() + .fake_recorder("npm", 0) + .vuln_checks(HashMap::new()) + .build(); + let out = h + .cmd + .args(["npm", "install", "user/repo"]) + .output() + .expect("failed to run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "stdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert_eq!(h.recorded_argv().as_deref(), Some("install user/repo")); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("registry verification skipped"), + "stdout: {stdout}" + ); +} diff --git a/tests/cli_npm_ci.rs b/tests/cli_npm_ci.rs new file mode 100644 index 0000000..19abb9a --- /dev/null +++ b/tests/cli_npm_ci.rs @@ -0,0 +1,265 @@ +//! Hermetic e2e tests for the `corgea npm ci` gate and install-verb routing. +//! +//! `npm ci` installs the project lockfile exactly as written, so the gate +//! verdicts the lockfile-pinned set directly — no dry-run subprocess. Verb +//! routing must also find the install verb behind global flags +//! (`npm --silent install …`), or those spellings would exec ungated. +//! +//! Harness mirrors `cli_bare_install.rs`: fake npm argv recorder on a +//! private PATH + local registry stub + in-crate vuln-api stub. + +#![cfg(unix)] + +mod common; + +use common::{key, vulnerable_body, GateHarness, NPM_LOCK}; +use std::collections::HashMap; + +const PACKAGE_JSON: &str = r#"{"name":"proj","version":"1.0.0","dependencies":{"oldpkg":"1.0.0"}}"#; + +fn vulnerable_evildep_checks() -> HashMap { + let mut checks = HashMap::new(); + checks.insert( + key("npm", "evildep", "0.4.2"), + vulnerable_body("npm", "evildep", "0.4.2", "MAL-2024-0002", None), + ); + checks +} + +#[test] +fn npm_ci_vulnerable_lockfile_blocks() { + let mut h = GateHarness::new() + .fake_recorder("npm", 0) + .vuln_checks(vulnerable_evildep_checks()) + .with_project_file("package.json", PACKAGE_JSON) + .with_project_file("package-lock.json", NPM_LOCK) + .build(); + let out = h.cmd.args(["npm", "ci"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "vulnerable lockfile must block"); + assert_eq!( + h.recorded_argv(), + None, + "npm must not run on a vulnerable verdict" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + for needle in ["evildep", "MAL-2024-0002", "(locked)"] { + assert!(stdout.contains(needle), "stdout: {stdout}"); + } +} + +#[test] +fn npm_ci_clean_lockfile_proceeds() { + let mut h = GateHarness::new() + .fake_recorder("npm", 0) + .vuln_checks(HashMap::new()) + .with_project_file("package.json", PACKAGE_JSON) + .with_project_file("package-lock.json", NPM_LOCK) + .build(); + let out = h + .cmd + .args(["npm", "ci", "--ignore-scripts"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "clean lockfile must proceed"); + assert_eq!(h.recorded_argv().as_deref(), Some("ci --ignore-scripts")); +} + +#[test] +fn npm_ci_unparsable_lockfile_refuses_without_force() { + let mut h = GateHarness::new() + .fake_recorder("npm", 0) + .vuln_checks(HashMap::new()) + .with_project_file("package.json", PACKAGE_JSON) + .with_project_file("package-lock.json", "not json") + .build(); + let out = h.cmd.args(["npm", "ci"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "unverifiable lockfile refuses"); + assert_eq!(h.recorded_argv(), None, "npm must not run"); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("cannot verify 'npm ci'") && stderr.contains("--force"), + "stderr: {stderr}" + ); +} + +#[test] +fn npm_ci_unparsable_lockfile_force_proceeds() { + let mut h = GateHarness::new() + .fake_recorder("npm", 0) + .vuln_checks(HashMap::new()) + .with_project_file("package.json", PACKAGE_JSON) + .with_project_file("package-lock.json", "not json") + .build(); + let out = h + .cmd + .args(["npm", "--force", "ci"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "--force proceeds unchecked"); + assert_eq!(h.recorded_argv().as_deref(), Some("ci")); + assert!( + String::from_utf8_lossy(&out.stderr).contains("proceeding under --force"), + "stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); +} + +#[test] +fn npm_ci_root_redirect_refuses_without_force() { + // `npm ci --prefix ../other` installs a different project's lockfile than + // the CWD one we'd verdict — fail closed rather than pass on the wrong + // project. + let mut h = GateHarness::new() + .fake_recorder("npm", 0) + .vuln_checks(HashMap::new()) + .with_project_file("package.json", PACKAGE_JSON) + .with_project_file("package-lock.json", NPM_LOCK) + .build(); + let out = h + .cmd + .args(["npm", "ci", "--prefix", "/tmp/other-project"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "root-redirect ci must refuse"); + assert_eq!(h.recorded_argv(), None, "npm must not run"); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("--prefix") && stderr.contains("redirected project"), + "stderr: {stderr}" + ); + + // --force bypasses. + let mut h = GateHarness::new() + .fake_recorder("npm", 0) + .vuln_checks(HashMap::new()) + .with_project_file("package.json", PACKAGE_JSON) + .with_project_file("package-lock.json", NPM_LOCK) + .build(); + let out = h + .cmd + .args(["npm", "--force", "ci", "--prefix", "/tmp/other-project"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "--force proceeds"); + assert_eq!( + h.recorded_argv().as_deref(), + Some("ci --prefix /tmp/other-project") + ); +} + +#[test] +fn npm_ci_registry_flag_warns_then_proceeds() { + // `npm ci --registry ` pulls tarballs from the override while the + // gate verdicts the lockfile against the default registry — warn, but + // still proceed on a clean lockfile. + let mut h = GateHarness::new() + .fake_recorder("npm", 0) + .vuln_checks(HashMap::new()) + .with_project_file("package.json", PACKAGE_JSON) + .with_project_file("package-lock.json", NPM_LOCK) + .build(); + let out = h + .cmd + .args(["npm", "ci", "--registry", "https://evil.example/"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "clean lockfile must proceed"); + assert_eq!( + h.recorded_argv().as_deref(), + Some("ci --registry https://evil.example/") + ); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("--registry") && stderr.contains("custom registry"), + "npm ci --registry must warn: {stderr}" + ); +} + +#[test] +fn npm_ci_npmrc_registry_warns_then_proceeds() { + // A project `.npmrc` `registry=` line silently redirects resolution; the + // gate copies it into its temp dir so resolution honours the override, yet + // verdicts against the default registry — warn on it. + let mut h = GateHarness::new() + .fake_recorder("npm", 0) + .vuln_checks(HashMap::new()) + .with_project_file("package.json", PACKAGE_JSON) + .with_project_file("package-lock.json", NPM_LOCK) + .with_project_file(".npmrc", "registry=https://evil.example/\n") + .build(); + let out = h.cmd.args(["npm", "ci"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "clean lockfile must proceed"); + assert_eq!(h.recorded_argv().as_deref(), Some("ci")); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains(".npmrc") && stderr.contains("custom registry"), + "project .npmrc registry override must warn: {stderr}" + ); +} + +#[test] +fn npm_ci_without_lockfile_execs() { + // npm ci errors on its own without a lockfile; nothing to gate. + let mut h = GateHarness::new() + .fake_recorder("npm", 9) + .vuln_checks(HashMap::new()) + .with_project_file("package.json", PACKAGE_JSON) + .build(); + let out = h.cmd.args(["npm", "ci"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(9), "npm's own exit code propagates"); + assert_eq!(h.recorded_argv().as_deref(), Some("ci")); +} + +#[test] +fn global_flags_before_the_verb_still_gate() { + // `npm --loglevel silent install ` must route to the + // gate, not the ungated passthrough. + let mut checks = HashMap::new(); + checks.insert( + key("npm", "oldpkg", "1.0.0"), + vulnerable_body("npm", "oldpkg", "1.0.0", "MAL-2024-0001", None), + ); + let mut h = GateHarness::new() + .fake_tree_pm("npm", NPM_LOCK, 0) + .oldpkg_registry() + .vuln_checks(checks) + .in_project_dir() + .build(); + let out = h + .cmd + .args(["npm", "--loglevel", "silent", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!( + out.status.code(), + Some(1), + "flags before the verb must not skip the gate: {}", + String::from_utf8_lossy(&out.stderr) + ); + assert_eq!( + h.recorded_argv(), + None, + "npm must not run on a vulnerable verdict" + ); +} + +#[test] +fn global_flags_before_the_verb_forward_on_clean() { + let mut h = GateHarness::new() + .fake_tree_pm("npm", NPM_LOCK, 0) + .oldpkg_registry() + .vuln_checks(HashMap::new()) + .in_project_dir() + .build(); + let out = h + .cmd + .args(["npm", "--loglevel", "silent", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "clean pin proceeds"); + // The verb leads the reconstructed argv; the global flags still arrive. + assert_eq!( + h.recorded_argv().as_deref(), + Some("install --loglevel silent oldpkg@1.0.0") + ); +} diff --git a/tests/cli_provenance.rs b/tests/cli_provenance.rs new file mode 100644 index 0000000..b4485ae --- /dev/null +++ b/tests/cli_provenance.rs @@ -0,0 +1,180 @@ +//! Hermetic e2e tests for provenance labels on tree-pass findings: +//! `(from requirements)` for pip-requested packages, `(already in +//! package.json)` for npm direct deps the project already declares (plus the +//! `fix with:` advertised-fix hint), `(transitive)` otherwise. +//! +//! Same harness pattern as `cli_tree.rs`: fake package manager on a private +//! PATH (answers the tree-resolution invocation with a canned payload), +//! a local registry stub, and the in-crate vuln-api stub. `oldpkg` is +//! published in 2020 so recency never blocks — every block is the verdict's. + +#![cfg(unix)] + +mod common; + +use common::{key, tree_harness, GateHarness, NPM_LOCK}; +use std::collections::HashMap; + +/// Vulnerable verdict body; `fixed: None` renders `"fixed_version":null`. +fn vulnerable_body(ecosystem: &str, name: &str, version: &str, fixed: Option<&str>) -> String { + common::vulnerable_body(ecosystem, name, version, "MAL-2024-0002", fixed) +} + +/// Pip report: only `reqpkg`, requested (as if it came from a `-r` file). +const PIP_REQ_REPORT: &str = r#"{"version":"1","pip_version":"24.0","install":[ + {"metadata":{"name":"reqpkg","version":"6.0.0"},"requested":true}]}"#; + +/// Project manifest that already declares `evildep` as a direct dep. +const PROJECT_MANIFEST: &str = + r#"{"name":"proj","version":"1.0.0","dependencies":{"evildep":"^0.4.0"}}"#; + +/// npm tree harness whose project dir holds a `package.json` that already +/// declares `evildep` as a direct dep. +fn npm_project_harness( + checks: HashMap, + payload: &str, +) -> GateHarness { + tree_harness("npm", checks, HashMap::new(), payload) + .with_project_file("package.json", PROJECT_MANIFEST) +} + +#[test] +fn pip_requirements_finding_labeled_from_requirements() { + // The flagged package comes from a `-r` file (pip marks it `requested`), + // so it must not be mislabeled "(transitive)". + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "reqpkg", "6.0.0"), + vulnerable_body("pypi", "reqpkg", "6.0.0", None), + ); + let mut h = tree_harness("pip", checks, HashMap::new(), PIP_REQ_REPORT); + let out = h + .cmd + .args(["pip", "install", "-r", "reqs.txt"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "requested vuln must block"); + assert_eq!(h.recorded_argv(), None, "pip must not run"); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("reqpkg@6.0.0 (from requirements)"), + "stdout: {stdout}" + ); + assert!(!stdout.contains("(transitive)"), "stdout: {stdout}"); +} + +#[test] +fn npm_preexisting_direct_dep_labeled_with_fix_hint() { + // `evildep` is already a direct dep in the project's package.json; the + // finding gets the pre-existing label plus the fix-command hint. The + // fix 1.2.2 covers every advisory (`safe_version` is Some), so the hint + // drops the "(advertised fix)" hedge. + let mut checks = HashMap::new(); + checks.insert( + key("npm", "evildep", "0.4.2"), + vulnerable_body("npm", "evildep", "0.4.2", Some("1.2.2")), + ); + let mut h = npm_project_harness(checks, NPM_LOCK); + let out = h + .cmd + .args(["npm", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "pre-existing vuln must block"); + assert_eq!(h.recorded_argv(), None, "npm must not run"); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("evildep@0.4.2 (already in package.json)"), + "stdout: {stdout}" + ); + assert!( + stdout.contains("fix with: corgea npm install evildep@1.2.2\n"), + "verified fix hint must print without the advertised-fix hedge: {stdout}" + ); +} + +#[test] +fn npm_preexisting_fix_hint_keeps_hedge_when_fix_is_partial() { + // One advisory advertises fix 1.2.2, the other has no fix: bumping is + // still the best move but doesn't clear everything, so the steer line + // stays quiet and the fix-command hint keeps its "(advertised fix)" + // hedge. + let mut checks = HashMap::new(); + checks.insert( + key("npm", "evildep", "0.4.2"), + r#"{"ecosystem":"npm","package_name":"evildep","version":"0.4.2","is_vulnerable":true, + "matches":[{"advisory_id":"MAL-2024-0002","severity_level":"critical","tier":1, + "vulnerable_version_range":null,"fixed_version":"1.2.2"}, + {"advisory_id":"MAL-2024-0003","severity_level":"critical","tier":1, + "vulnerable_version_range":null,"fixed_version":null}]}"# + .to_string(), + ); + let mut h = npm_project_harness(checks, NPM_LOCK); + let out = h + .cmd + .args(["npm", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "pre-existing vuln must block"); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("fix with: corgea npm install evildep@1.2.2 (advertised fix)"), + "partial fix hint must keep the hedge: {stdout}" + ); + assert!( + !stdout.contains("→ safe version"), + "a partial fix must not print the steer: {stdout}" + ); +} + +#[test] +fn npm_preexisting_without_fix_has_no_hint() { + let mut checks = HashMap::new(); + checks.insert( + key("npm", "evildep", "0.4.2"), + vulnerable_body("npm", "evildep", "0.4.2", None), + ); + let mut h = npm_project_harness(checks, NPM_LOCK); + let out = h + .cmd + .args(["npm", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("evildep@0.4.2 (already in package.json)"), + "stdout: {stdout}" + ); + assert!( + !stdout.contains("fix with:"), + "no advertised fix → no hint; stdout: {stdout}" + ); +} + +#[test] +fn named_install_with_transitive_vulnerable_keeps_generic_refusal() { + // A named install pulling in a vulnerable transitive is the command's + // doing — the refusal must NOT blame the existing tree. + let mut checks = HashMap::new(); + checks.insert( + key("npm", "evildep", "0.4.2"), + vulnerable_body("npm", "evildep", "0.4.2", None), + ); + let mut h = tree_harness("npm", checks, HashMap::new(), NPM_LOCK); + let out = h + .cmd + .args(["npm", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("Refusing to run install. Pass --force to proceed despite findings."), + "stderr: {stderr}" + ); + assert!( + !stderr.contains("your existing dependency tree"), + "a command-added transitive must not blame the existing tree: {stderr}" + ); +} diff --git a/tests/cli_remediation.rs b/tests/cli_remediation.rs new file mode 100644 index 0000000..f8ce5e9 --- /dev/null +++ b/tests/cli_remediation.rs @@ -0,0 +1,74 @@ +//! Hermetic e2e tests for remediation steering: a blocked install names the +//! safe version from the verdict's `fixed_version` data — the highest fix +//! covering every advisory. When any advisory has no known fix, no steer +//! prints. +//! +//! Uses the shared `common::pip_harness` (pypi stub published 2020 so recency +//! never blocks, a fake pip recording its argv, and the in-crate vuln-api +//! stub) — every block here is the verdict's doing. + +#![cfg(unix)] + +mod common; + +use common::{key, pip_harness, vulnerable_body}; +use std::collections::HashMap; + +fn fixed_body() -> String { + vulnerable_body("pypi", "oldpkg", "1.0.0", "MAL-2024-0001", Some("2.0.0")) +} + +fn no_fix_body() -> String { + vulnerable_body("pypi", "oldpkg", "1.0.0", "MAL-2024-0002", None) +} + +#[test] +fn fixed_match_blocks_and_names_safe_version() { + let mut checks = HashMap::new(); + checks.insert(key("pypi", "oldpkg", "1.0.0"), fixed_body()); + let mut h = pip_harness(checks, HashMap::new(), 0); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!( + h.recorded_argv(), + None, + "pip must not run on a vulnerable verdict" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("fixed in 2.0.0"), "stdout: {stdout}"); + assert!( + stdout.contains("safe version: oldpkg@2.0.0"), + "stdout: {stdout}" + ); +} + +#[test] +fn no_fix_match_reports_no_fixed_version_known() { + let mut checks = HashMap::new(); + checks.insert(key("pypi", "oldpkg", "1.0.0"), no_fix_body()); + let mut h = pip_harness(checks, HashMap::new(), 0); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!( + h.recorded_argv(), + None, + "pip must not run on a vulnerable verdict" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("no fixed version known"), + "stdout: {stdout}" + ); + assert!( + !stdout.contains("safe version:"), + "no steer line when the fix is unknown: {stdout}" + ); +} diff --git a/tests/cli_tree.rs b/tests/cli_tree.rs new file mode 100644 index 0000000..654682c --- /dev/null +++ b/tests/cli_tree.rs @@ -0,0 +1,380 @@ +//! Hermetic e2e tests for the full-tree resolution pass +//! (`corgea pip|npm install …` with a `CORGEA_VULN_API_URL` stub). +//! +//! Composes the `cli_verdict.rs` harness pattern (fake package manager on a +//! private PATH + local registry stub + in-crate vuln-api stub) with a +//! tree-aware fake manager: a dry-run invocation answers with a canned +//! payload, every other invocation records its argv to a marker and exits. +//! `oldpkg` is published in 2020 so recency never blocks here — every block +//! is the verdict's doing. + +#![cfg(unix)] + +mod common; + +use common::{ + key, tree_harness, vulnerable_body, GateHarness, NPM_LOCK, RESOLUTION_FAILS, TREE_REPORT, +}; +use std::collections::HashMap; +use tempfile::TempDir; + +#[test] +fn pip_only_binary_guard_wins_over_user_no_binary() { + // SECURITY: the non-execution guard `--only-binary :all:` must land AFTER + // the user's args (pip format-control is last-wins), so a user + // `--no-binary :all:` can't re-enable sdist builds during the report step. + // The fake pip records its dry-run argv to the marker on the --dry-run + // branch and no-ops the real install, so `recorded_argv()` is the dry-run. + let mut h = GateHarness::new() + .script_with_paths("pip", |_, marker| { + format!( + "#!/bin/sh\ncase \" $* \" in *\" --dry-run \"*) printf '%s' \"$*\" > '{}'; printf '{{\"install\":[{{\"metadata\":{{\"name\":\"oldpkg\",\"version\":\"1.0.0\"}},\"requested\":true}}]}}'; exit 0;; esac\nexit 0\n", + marker.display() + ) + }) + .oldpkg_registry() + .vuln_checks(HashMap::new()) + .build(); + let out = h + .cmd + .args(["pip", "install", "--no-binary", ":all:", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "clean tree proceeds"); + let argv = h.recorded_argv().expect("dry-run argv recorded"); + assert!( + argv.contains("--no-binary :all:"), + "user flag must be forwarded: {argv}" + ); + assert!( + argv.trim_end().ends_with("--only-binary :all:"), + "the guard must be appended LAST so it wins: {argv}" + ); +} + +#[test] +fn pip_requirements_format_control_refuses_dry_run() { + // SECURITY: pip applies `--no-binary` directives found INSIDE a -r file + // AFTER CLI parsing, overriding the trailing `--only-binary :all:` + // guard — the dry-run would select and build sdists, executing package + // code. The tree pass must refuse to dry-run such files and degrade to + // the named-only fallback (whose parser skips option lines), still + // verdicting the file's registry entries. + let cwd = TempDir::new().expect("temp cwd"); + std::fs::write( + cwd.path().join("requirements.txt"), + "--no-binary :all:\noldpkg==1.0.0\n", + ) + .expect("write requirements.txt"); + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "oldpkg", "1.0.0"), + vulnerable_body("pypi", "oldpkg", "1.0.0", "MAL-2024-0004", None), + ); + // The fake pip records argv ONLY on its dry-run branch: a recorded + // marker would mean the dry-run executed against the hostile file. + let mut h = GateHarness::new() + .script_with_paths("pip", |_, marker| { + format!( + "#!/bin/sh\ncase \" $* \" in *\" --dry-run \"*) printf '%s' \"$*\" > '{}';; esac\nexit 0\n", + marker.display() + ) + }) + .oldpkg_registry() + .vuln_checks(checks) + .build(); + let out = h + .cmd + .current_dir(cwd.path()) + .args(["pip", "install", "-r", "requirements.txt"]) + .output() + .expect("run corgea"); + + assert_eq!( + out.status.code(), + Some(1), + "the file's vulnerable entry must still block via the fallback" + ); + assert_eq!( + h.recorded_argv(), + None, + "the dry-run must never execute against a format-control requirements file" + ); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("--no-binary") && stderr.contains("not dry-running"), + "stderr must name the refusing directive: {stderr}" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("MAL-2024-0004"), "stdout: {stdout}"); +} + +fn vulnerable_evildep_body(ecosystem: &str) -> String { + vulnerable_body(ecosystem, "evildep", "0.4.2", "MAL-2024-0002", None) +} + +#[test] +fn transitive_vulnerable_blocks_install() { + // Only the transitive `evildep` is flagged; the named `oldpkg` is clean. + let cases = [ + ( + "pip", + "pypi", + TREE_REPORT, + &["pip", "install", "oldpkg==1.0.0"][..], + ), + ( + "npm", + "npm", + NPM_LOCK, + &["npm", "install", "oldpkg@1.0.0"][..], + ), + ]; + for (binary, eco, payload, args) in cases { + let mut checks = HashMap::new(); + checks.insert(key(eco, "evildep", "0.4.2"), vulnerable_evildep_body(eco)); + let mut h = tree_harness(binary, checks, HashMap::new(), payload); + let out = h.cmd.args(args).output().expect("run corgea"); + assert_eq!( + out.status.code(), + Some(1), + "{binary}: transitive vuln must block" + ); + assert_eq!( + h.recorded_argv(), + None, + "{binary} must not run on a transitive vulnerable verdict" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + for needle in ["evildep", "MAL-2024-0002", "(transitive)"] { + assert!(stdout.contains(needle), "{binary} stdout: {stdout}"); + } + } +} + +#[test] +fn tree_pass_runs_via_pip3_when_pip_is_absent() { + // Only `pip3` exists on PATH (common Linux/macOS). The tree pass must + // use the same pip → pip3 fallback as the exec path instead of silently + // degrading to named-only — the transitive `evildep` must still block. + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "evildep", "0.4.2"), + vulnerable_evildep_body("pypi"), + ); + let mut h = tree_harness("pip3", checks, HashMap::new(), TREE_REPORT); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "transitive vuln must block"); + assert_eq!(h.recorded_argv(), None); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + !stderr.contains("transitive dependencies not checked"), + "tree pass must not degrade with only pip3 on PATH: {stderr}" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("evildep"), "stdout: {stdout}"); +} + +#[test] +fn resolution_failure_falls_back_with_loud_warning() { + // The fake manager fails its tree invocation (pip: exits 2 on `--dry-run`, + // simulating an old pip with no `--report`; npm: exits 1 on + // `--package-lock-only`). Stub is all-clean, so the named-only fallback + // proceeds. + let cases = [ + ( + "pip", + &["pip", "install", "oldpkg==1.0.0"][..], + "install oldpkg==1.0.0", + ), + ( + "npm", + &["npm", "install", "oldpkg@1.0.0"][..], + "install oldpkg@1.0.0", + ), + ]; + for (binary, args, forwarded_argv) in cases { + let mut h = tree_harness(binary, HashMap::new(), HashMap::new(), RESOLUTION_FAILS); + let out = h.cmd.args(args).output().expect("run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "{binary}: clean named-only must proceed" + ); + assert_eq!(h.recorded_argv().as_deref(), Some(forwarded_argv)); + assert!( + String::from_utf8_lossy(&out.stderr).contains("transitive dependencies not checked"), + "{binary} stderr must carry the fallback warning: {}", + String::from_utf8_lossy(&out.stderr) + ); + } +} + +#[test] +fn pip_requirements_fallback_checks_file_entries_when_tree_fails() { + // A VCS requirement can make pip's dry-run fail before it emits a report. + // The degraded path must still verify registry requirements from the file + // and surface the VCS row as skipped instead of producing an empty check. + let cwd = TempDir::new().expect("temp cwd"); + std::fs::write( + cwd.path().join("requirements.txt"), + "oldpkg==1.0.0\nidna @ git+https://github.com/jazzband/idna.git@main\n", + ) + .expect("write requirements.txt"); + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "oldpkg", "1.0.0"), + vulnerable_body("pypi", "oldpkg", "1.0.0", "MAL-2024-0003", None), + ); + let mut h = tree_harness("pip", checks, HashMap::new(), RESOLUTION_FAILS); + let out = h + .cmd + .current_dir(cwd.path()) + .args(["pip", "install", "-r", "requirements.txt"]) + .output() + .expect("run corgea"); + + assert_eq!(out.status.code(), Some(1), "requirements vuln must block"); + assert_eq!( + h.recorded_argv(), + None, + "pip must not run on a vulnerable requirements entry" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + for needle in [ + "oldpkg==1.0.0", + "MAL-2024-0003", + "idna @ git+https://github.com/jazzband/idna.git@main", + "PEP 508 direct reference", + ] { + assert!(stdout.contains(needle), "stdout: {stdout}"); + } + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("transitive dependencies not checked"), + "stderr must carry the fallback warning: {stderr}" + ); +} + +#[test] +fn pip_clean_tree_proceeds() { + // Stub default-clean (no overrides), so every resolved package is clean. + let mut h = tree_harness("pip", HashMap::new(), HashMap::new(), TREE_REPORT); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "clean tree must proceed"); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg==1.0.0")); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("tree: 2 packages resolved"), + "stdout: {stdout}" + ); +} + +#[test] +fn pip_full_tree_still_notes_requirements_skip_recency() { + // A successful (Full) tree pass verdicts the `-r` packages but never + // recency-checks them, so the "not recency-checked" note must still print — + // it was previously suppressed whenever the tree pass was Full. + let cwd = TempDir::new().expect("temp cwd"); + std::fs::write(cwd.path().join("requirements.txt"), "oldpkg==1.0.0\n") + .expect("write requirements.txt"); + let mut h = tree_harness("pip", HashMap::new(), HashMap::new(), TREE_REPORT); + let out = h + .cmd + .current_dir(cwd.path()) + .args(["pip", "install", "-r", "requirements.txt"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "clean tree must proceed"); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("requirements.txt") && stderr.contains("not recency-checked"), + "Full tree pass with -r must still note recency is skipped: {stderr}" + ); +} + +#[test] +fn npm_root_redirect_flag_degrades_to_named_only() { + // `--prefix` overrides npm's project root regardless of cwd, so the + // throwaway-dir resolution would write the REAL lockfile at that path. + // The tree pass must refuse and fall back to named-only instead. + let elsewhere = TempDir::new().expect("redirect target"); + let lock_path = elsewhere.path().join("package-lock.json"); + + let mut h = tree_harness("npm", HashMap::new(), HashMap::new(), NPM_LOCK); + let out = h + .cmd + .args([ + "npm", + "install", + "--prefix", + elsewhere.path().to_str().unwrap(), + "oldpkg@1.0.0", + ]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "clean named target proceeds"); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("transitive dependencies not checked") && stderr.contains("--prefix"), + "must degrade loudly naming the flag: {stderr}" + ); + assert!( + !lock_path.exists(), + "the dry run must never write through --prefix" + ); + // The real install still gets the user's full argv. + assert_eq!( + h.recorded_argv(), + Some(format!( + "install --prefix {} oldpkg@1.0.0", + elsewhere.path().display() + )) + ); +} + +#[test] +fn npm_does_not_touch_project_lockfile() { + // Run from a project dir holding sentinel manifests; the resolver works in + // a throwaway copy, so after a gated run both files are byte-identical. + let project = TempDir::new().expect("project dir"); + let pkg_json = project.path().join("package.json"); + let lock_json = project.path().join("package-lock.json"); + let pkg_sentinel = r#"{"name":"sentinel","version":"0.0.0"}"#; + let lock_sentinel = r#"{"name":"sentinel","lockfileVersion":3,"packages":{}}"#; + std::fs::write(&pkg_json, pkg_sentinel).expect("write package.json"); + std::fs::write(&lock_json, lock_sentinel).expect("write package-lock.json"); + + let mut checks = HashMap::new(); + checks.insert( + key("npm", "evildep", "0.4.2"), + vulnerable_evildep_body("npm"), + ); + let mut h = tree_harness("npm", checks, HashMap::new(), NPM_LOCK); + let out = h + .cmd + .current_dir(project.path()) + .args(["npm", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "transitive vuln must block"); + + assert_eq!( + std::fs::read_to_string(&pkg_json).unwrap(), + pkg_sentinel, + "package.json must be untouched" + ); + assert_eq!( + std::fs::read_to_string(&lock_json).unwrap(), + lock_sentinel, + "package-lock.json must be untouched" + ); +} diff --git a/tests/cli_verdict.rs b/tests/cli_verdict.rs new file mode 100644 index 0000000..5fbf988 --- /dev/null +++ b/tests/cli_verdict.rs @@ -0,0 +1,186 @@ +//! Hermetic e2e tests for the install-gate vuln-api verdict +//! (`corgea pip install …` with a public `CORGEA_VULN_API_URL` stub). +//! +//! Composes the `cli_install.rs` harness pattern (fake package manager on a +//! private PATH + local pypi registry stub) with the in-crate vuln-api stub — +//! the shared `common::pip_harness`. Every package is published in 2020, so +//! recency never blocks here — every block in this file is the verdict's +//! doing. Lookups are public: outages warn and fail open. + +#![cfg(unix)] + +mod common; + +use common::{key, pip_harness, vulnerable_body}; +use std::collections::HashMap; + +#[test] +fn vulnerable_pin_blocks_without_running_install() { + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "oldpkg", "1.0.0"), + vulnerable_body("pypi", "oldpkg", "1.0.0", "MAL-2024-0001", Some("2.0.0")), + ); + let mut h = pip_harness(checks, HashMap::new(), 0); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!( + h.recorded_argv(), + None, + "pip must not run on a vulnerable verdict" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("MAL-2024-0001"), "stdout: {stdout}"); + assert!(stdout.contains("critical"), "stdout: {stdout}"); + assert!( + String::from_utf8_lossy(&out.stderr).contains("--force"), + "block message must name --force" + ); +} + +#[test] +fn alternate_pypi_spelling_hits_canonical_verdict() { + // Advisories are keyed by lowercase(canonical) — the server does NOT + // apply PEP 503. `pip install Flask_Cors` must still block on the + // `flask-cors` row: resolution adopts the registry's canonical + // spelling (`info.name`, like real PyPI, which answers any PEP 503- + // equivalent request) and the verdict checks that. + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "flask-cors", "1.0.0"), + vulnerable_body("pypi", "flask-cors", "1.0.0", "GHSA-TEST-0001", None), + ); + // Model real PyPI: serve the alternate request spelling, echo the + // canonical name in info.name. + let registry = common::spawn_http_stub(|path| match path { + "/pypi/Flask_Cors/json" | "/pypi/flask-cors/json" => ( + "200 OK", + common::pypi_release_json("Flask-Cors", "1.0.0", common::OLD_TS), + ), + _ => ("404 Not Found", common::NOT_FOUND_JSON.to_string()), + }); + // Like `pip_harness`: the tree dry-run exits 2 (old pip, no --report), + // so the block is the named verdict's doing and a recorded argv would + // mean the real install ran. + let mut h = common::GateHarness::new() + .fake_tree_pm("pip", common::RESOLUTION_FAILS, 0) + .registry_env("CORGEA_PYPI_REGISTRY", ®istry) + .vuln_checks(checks) + .build(); + let out = h + .cmd + .args(["pip", "install", "Flask_Cors==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!( + out.status.code(), + Some(1), + "alternate spelling must not bypass the gate" + ); + assert_eq!(h.recorded_argv(), None); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("GHSA-TEST-0001"), "stdout: {stdout}"); +} + +#[test] +fn force_overrides_vulnerable_block_and_propagates_exit_code() { + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "oldpkg", "1.0.0"), + vulnerable_body("pypi", "oldpkg", "1.0.0", "MAL-2024-0001", Some("2.0.0")), + ); + let mut h = pip_harness(checks, HashMap::new(), 7); + let out = h + .cmd + .args(["pip", "--force", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!( + out.status.code(), + Some(7), + "manager exit code must propagate under --force" + ); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg==1.0.0")); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("MAL-2024-0001"), + "findings must still print under --force: {stdout}" + ); +} + +#[test] +fn no_fail_does_not_waive_vulnerable_block() { + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "oldpkg", "1.0.0"), + vulnerable_body("pypi", "oldpkg", "1.0.0", "MAL-2024-0001", Some("2.0.0")), + ); + let mut h = pip_harness(checks, HashMap::new(), 0); + let out = h + .cmd + .args(["pip", "--no-fail", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!( + out.status.code(), + Some(1), + "--no-fail demotes recency only, never a vulnerable verdict" + ); + assert_eq!(h.recorded_argv(), None); +} + +#[test] +fn verdict_503_warns_and_fails_open() { + let mut checks = HashMap::new(); + checks.insert(key("pypi", "oldpkg", "1.0.0"), "{}".to_string()); + let mut statuses = HashMap::new(); + statuses.insert(key("pypi", "oldpkg", "1.0.0"), 503u16); + let mut h = pip_harness(checks, statuses, 0); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "a 503 verdict must fail open in public mode; stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg==1.0.0")); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("could not be verified"), "stdout: {stdout}"); + assert!( + String::from_utf8_lossy(&out.stderr) + .contains("CVE check unavailable; continuing because public mode is fail-open"), + "stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); +} + +#[test] +fn vuln_api_outage_warns_but_installs() { + let mut h = pip_harness(HashMap::new(), HashMap::new(), 0); + // Point the gate at a dead vuln-api: connection refused on every check. + h.cmd.env("CORGEA_VULN_API_URL", "http://127.0.0.1:1"); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "public lookup outage must fail open" + ); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg==1.0.0")); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("CVE check unavailable; continuing because public mode is fail-open"), + "stderr: {stderr}" + ); +} diff --git a/tests/common/mod.rs b/tests/common/mod.rs index f2a1a8e..46c5ace 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -69,6 +69,20 @@ pub fn npm_packument(version: &str, ts: &str) -> String { ) } +/// Pip `--report -` payload: `oldpkg` (named/requested) + `evildep` +/// (transitive). +#[allow(dead_code)] +pub const TREE_REPORT: &str = r#"{"version":"1","pip_version":"24.0","install":[ + {"metadata":{"name":"oldpkg","version":"1.0.0"},"requested":true}, + {"metadata":{"name":"evildep","version":"0.4.2"},"requested":false}]}"#; + +/// npm lockfile-v3 fixture: named `oldpkg` 1.0.0 + transitive `evildep` 0.4.2. +#[allow(dead_code)] +pub const NPM_LOCK: &str = r#"{"name":"proj","lockfileVersion":3,"packages":{ + "":{"name":"proj","version":"1.0.0"}, + "node_modules/oldpkg":{"version":"1.0.0"}, + "node_modules/evildep":{"version":"0.4.2"}}}"#; + /// Spawn a one-response-per-connection HTTP stub on an ephemeral 127.0.0.1 /// port; `route` maps a request path to `(status line, body)`. Returns the /// base URL. @@ -138,6 +152,18 @@ pub fn write_script(dir: &std::path::Path, binary: &str, script: &str) { .expect("chmod fake script"); } +/// Shell loop that emits the file at `path` line by line via builtins — +/// works under the locked-down test PATH (no `cat`); the `|| [ -n "$line" ]` +/// guard keeps a final line with no trailing newline. +#[cfg(unix)] +#[allow(dead_code)] +pub fn emit(path: &std::path::Path) -> String { + format!( + "while IFS= read -r line || [ -n \"$line\" ]; do printf '%s\\n' \"$line\"; done < '{}'", + path.display() + ) +} + /// Write an executable fake package manager named `binary` into `dir`. It /// records its argv to `marker` and exits `exit_code` — proving both "the /// install ran (with these args)" and exit-code forwarding. @@ -157,6 +183,46 @@ pub fn write_fake_recorder( write_script(dir, binary, &script); } +/// Sentinel payload that makes a tree-aware fake manager exit non-zero on +/// its tree (resolution) invocation, forcing the named-only fallback. +#[allow(dead_code)] +pub const RESOLUTION_FAILS: &str = "RESOLUTION_FAILS"; + +/// Write an executable tree-aware fake package manager into `dir`. An +/// invocation carrying the manager's tree flag emits `payload` (stdout for +/// pip's `--dry-run --report -`, `./package-lock.json` for npm's +/// `--package-lock-only`, whose cwd is the resolver's throwaway temp dir) +/// and exits 0 — the tree pass; if `payload` is `RESOLUTION_FAILS` it exits +/// non-zero instead, emitting nothing. Any other invocation records its +/// argv to `marker` and exits `exit_code`. +#[cfg(unix)] +#[allow(dead_code)] +pub fn write_fake_tree_pm( + dir: &std::path::Path, + binary: &str, + marker: &std::path::Path, + payload: &str, + exit_code: i32, +) { + let (tree_flag, redirect, fail_exit) = match binary { + "pip" | "pip3" => ("--dry-run", "", 2), + "npm" => ("--package-lock-only", " > package-lock.json", 1), + other => panic!("unsupported fake manager {other}"), + }; + let tree_branch = if payload == RESOLUTION_FAILS { + format!("exit {fail_exit}") + } else { + let payload_path = dir.join(format!("{binary}-tree-payload.json")); + std::fs::write(&payload_path, payload).expect("write fake pm payload"); + format!("{}{redirect}; exit 0", emit(&payload_path)) + }; + let script = format!( + "#!/bin/sh\ncase \" $* \" in *\" {tree_flag} \"*) {tree_branch};; esac\nprintf '%s' \"$*\" > '{marker}'\nexit {exit_code}\n", + marker = marker.display(), + ); + write_script(dir, binary, &script); +} + /// One configurable harness behind every gate test: isolated `corgea`, a /// private PATH of fake package managers, optional registry stubs, the /// vuln-api stub, and an optional throwaway project cwd. @@ -195,6 +261,13 @@ impl GateHarness { } } + /// Tree-aware fake manager: emits `payload` on its tree flag, records + /// argv and exits `exit_code` otherwise. + pub fn fake_tree_pm(self, binary: &str, payload: &str, exit_code: i32) -> Self { + write_fake_tree_pm(self._bin.path(), binary, &self.marker, payload, exit_code); + self + } + /// Plain argv recorder. Call repeatedly for multiple binaries; call /// never for an empty PATH. pub fn fake_recorder(self, binary: &str, exit_code: i32) -> Self { @@ -267,6 +340,18 @@ impl GateHarness { self } + /// Re-point the corgea invocation at a (created) subdirectory of the + /// project dir — for tests proving ancestor-walk behavior. + pub fn in_subdir(mut self, name: &str) -> Self { + if self.project.is_none() { + self = self.in_project_dir(); + } + let dir = self.project.as_ref().unwrap().path().join(name); + std::fs::create_dir_all(&dir).expect("create subdir"); + self.cmd.current_dir(&dir); + self + } + pub fn build(mut self) -> Self { if !self.vuln_api { return self; @@ -284,3 +369,43 @@ impl GateHarness { std::fs::read_to_string(&self.marker).ok() } } + +/// `corgea` wired to the wildcard pypi registry stub (every package +/// published 2020 → recency never blocks), a report-less fake pip +/// (recording its argv to a marker), and a vuln-api stub. Every block in a +/// `pip_harness` test is the verdict's doing. +#[cfg(unix)] +#[allow(dead_code)] +pub fn pip_harness( + checks: HashMap, + statuses: HashMap, + pip_exit_code: i32, +) -> GateHarness { + // RESOLUTION_FAILS models an old pip with no `--report`: the tree + // dry-run exits 2, so these tests exercise the named-only fallback. + GateHarness::new() + .fake_tree_pm("pip", RESOLUTION_FAILS, pip_exit_code) + .wildcard_pypi_registry() + .vuln_checks(checks) + .vuln_statuses(statuses) + .build() +} + +/// `corgea` wired to the oldpkg registry stub, a tree-aware fake `binary` +/// (`"pip"`, `"pip3"`, or `"npm"`) answering the tree pass with `payload`, +/// and a vuln-api stub. +#[cfg(unix)] +#[allow(dead_code)] +pub fn tree_harness( + binary: &str, + checks: HashMap, + statuses: HashMap, + payload: &str, +) -> GateHarness { + GateHarness::new() + .fake_tree_pm(binary, payload, 0) + .oldpkg_registry() + .vuln_checks(checks) + .vuln_statuses(statuses) + .build() +}