diff --git a/Cargo.lock b/Cargo.lock index 58bd8d2..07513be 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -105,6 +105,18 @@ dependencies = [ "derive_arbitrary", ] +[[package]] +name = "async-compression" +version = "0.4.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e79b3f8a79cccc2898f31920fc69f304859b3bd567490f75ebf51ae1c792a9ac" +dependencies = [ + "compression-codecs", + "compression-core", + "pin-project-lite", + "tokio", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -287,6 +299,23 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +[[package]] +name = "compression-codecs" +version = "0.4.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce2548391e9c1929c21bf6aa2680af86fe4c1b33e6cea9ac1cfeec0bd11218cf" +dependencies = [ + "compression-core", + "flate2", + "memchr", +] + +[[package]] +name = "compression-core" +version = "0.4.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc14f565cf027a105f7a44ccf9e5b424348421a1d8952a8fc9d499d313107789" + [[package]] name = "constant_time_eq" version = "0.3.1" @@ -360,6 +389,7 @@ dependencies = [ "quick-xml", "regex", "reqwest", + "semver", "serde", "serde_derive", "serde_json", @@ -1626,6 +1656,7 @@ version = "0.12.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f" dependencies = [ + "async-compression", "base64", "bytes", "cookie", @@ -1652,6 +1683,7 @@ dependencies = [ "sync_wrapper", "tokio", "tokio-native-tls", + "tokio-util", "tower", "tower-http", "tower-service", @@ -1761,6 +1793,12 @@ dependencies = [ "libc", ] +[[package]] +name = "semver" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" + [[package]] name = "serde" version = "1.0.228" diff --git a/Cargo.toml b/Cargo.toml index 5228045..f4e4818 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,7 @@ dirs = "5.0.1" reqwest = { version = "0.12.23", default-features = false, features = [ "blocking", "cookies", + "gzip", "json", "multipart", "native-tls", @@ -31,6 +32,7 @@ reqwest = { version = "0.12.23", default-features = false, features = [ toml = "0.8.8" log = "0.4" env_logger = "0.11" +semver = "1" serde = { version = "1.0.195", features = ["derive"] } serde_json = "1.0.111" serde_derive = "1.0.195" diff --git a/skills/corgea/SKILL.md b/skills/corgea/SKILL.md index f23293f..0f95380 100644 --- a/skills/corgea/SKILL.md +++ b/skills/corgea/SKILL.md @@ -131,6 +131,85 @@ Agent environments default to compact TSV; force output with `--format human|age Notes: `deps scan --out-format table|json|sarif` is the report/export selector; do not combine it with `deps scan --format`. +### Install Wrappers — `corgea pip|npm ` + +Run a package manager through Corgea's install gate. Install commands with +named targets are resolved against the public registry first, then gated +twice: a version published within `--threshold` (default `2d`) blocks +(exit 1), and each resolved version is checked against Corgea's vuln-api — +known-vulnerable or malicious versions block. CVE checks are public and need +no token; vuln-api lookup outages warn and continue (fail-open). Everything +else passes through with the package manager's own exit code. Git/URL/path +specs (including `pip install .`, PEP 508 `name @ url` direct references, and +npm GitHub shorthand `user/repo`) are noted, never blocked. The install verb +is found behind global flags (`npm --loglevel silent install x` is still +gated). Bare installs (no named targets) and `-r requirements.txt` files are +noted, not gated. `npm ci` passes through ungated. + +Wrapper flags (`--force`, `--no-fail`, `-t`) are read between the manager +name and the install verb (`corgea npm --force install x`); flags after the +verb belong to the package manager and are forwarded untouched. + +Blocked findings steer to the fix: each advisory line shows +`fixed in ` (or `no fixed version known`). When every advisory on a +package has a fix, the gate prints `→ safe version: @` — the +highest fix covering every advisory. Install that version instead. + +```bash +corgea pip install requests==2.31.0 # resolves, checks recency + vuln verdict, then runs pip +corgea npm install axios@^1.0.0 # same gate for npm ranges +corgea pip --no-fail install newpkg # demote a recency block to a warning (vuln blocks still apply) +corgea pip --force install badpkg # print findings but install anyway (overrides every block) +corgea pip list # non-install subcommands pass straight through +``` + +| Flag | Short | Description | +|------|-------|-------------| +| `--threshold` | `-t` | Recency threshold (`2d`, `12h`). Younger resolved versions block. | +| `--no-fail` | | Demote a recency block to a warning. Does NOT bypass vulnerable blocks. | +| `--force` | | Proceed despite all findings (vulnerable, recent). Findings still print. | + +Overrides for testing: `CORGEA_PYPI_REGISTRY`, `CORGEA_NPM_REGISTRY`, +`CORGEA_VULN_API_URL`. + +#### Limitations + +The gate is a wrapper, not an enforcement boundary. By design it cannot catch: + +- **Direct invocation** — running the package manager itself (`pip`, `npm`, + `python -m pip`) skips the gate entirely. +- **Custom indexes/registries** — `--index-url`, `--registry`, and `.npmrc`/ + `pip.conf` overrides change where packages resolve from. The gate still + verdicts each `name@version`, but it cannot vouch that a substituted + registry serves the same artifact those advisories describe. +- **Transitive dependencies** — only the named install targets are verified; + the rest of the resolved tree installs unchecked. +- **Bare installs and lockfiles** — `npm install` with no targets, `npm ci`, + and `-r requirements.txt` files run unchecked after a note. + +Hard enforcement needs org-level controls — lockfile review, registry +allow-listing — alongside the wrapper. + +#### Testing the gate + +The staging vuln-api (`https://cve-worker-staging.corgea.workers.dev`) is the +current default endpoint and serves deterministic verdicts for dogfooding. +Known-vulnerable targets: + +| Ecosystem | Target | Verdict | +|-----------|--------|---------| +| npm | `axios@0.21.0` | vulnerable — fixed in 0.21.2 | +| npm | `minimist@0.0.8` | vulnerable — fixed in 1.2.2 | +| npm | `node-fetch@2.6.0` | vulnerable — fixed in 2.6.7 | +| PyPI | `mezzanine==6.0.0` | vulnerable — no fixed version known | + +Verify the gate end-to-end: + +```bash +corgea npm install axios@0.21.0 # exit 1, names CVE-2021-3749, steers to 0.21.2 +corgea pip install mezzanine==6.0.0 # exit 1, no fixed version known +``` + ## Common Workflows ### Scan full project diff --git a/src/config.rs b/src/config.rs index 257a483..2c9287c 100644 --- a/src/config.rs +++ b/src/config.rs @@ -2,6 +2,8 @@ use serde::{Deserialize, Serialize}; use std::path::PathBuf; use std::{env, fs, io}; +pub const DEFAULT_VULN_API_URL: &str = "https://cve-worker-staging.corgea.workers.dev"; + #[derive(Serialize, Deserialize, Clone)] pub struct Config { pub(crate) url: String, @@ -101,3 +103,38 @@ impl Config { self.debug } } + +/// Base URL for the vuln-api service: `CORGEA_VULN_API_URL` env var, +/// then the public default. Pure env/constant — no config file field. +pub fn vuln_api_url() -> String { + crate::utils::generic::get_env_var_if_exists("CORGEA_VULN_API_URL") + .unwrap_or_else(|| DEFAULT_VULN_API_URL.to_string()) + .trim() + .trim_end_matches('/') + .to_string() +} + +#[cfg(test)] +mod tests { + use super::*; + + /// All `vuln_api_url` cases in one test fn: the env-var cases + /// mutate process-global state, so they must not run concurrently + /// with each other under the parallel test harness. + #[test] + fn vuln_api_url_resolution_order() { + env::remove_var("CORGEA_VULN_API_URL"); + + // Default when the env var is unset. + assert_eq!(vuln_api_url(), DEFAULT_VULN_API_URL); + + // Env var wins; whitespace and trailing slash trimmed. + env::set_var("CORGEA_VULN_API_URL", " https://env.example.com/ "); + assert_eq!(vuln_api_url(), "https://env.example.com"); + + // Empty / whitespace-only env var is treated as unset. + env::set_var("CORGEA_VULN_API_URL", " "); + assert_eq!(vuln_api_url(), DEFAULT_VULN_API_URL); + env::remove_var("CORGEA_VULN_API_URL"); + } +} diff --git a/src/lib.rs b/src/lib.rs index 2f8423e..498e83d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,6 @@ pub mod deps; +pub mod precheck; +pub mod verify_deps; // Also declared in the binary crate (src/main.rs); re-declared here so library modules // (e.g. vuln_api) can use `crate::log::debug`. src/log.rs is a thin `::log` facade that // compiles cleanly in both crates. diff --git a/src/main.rs b/src/main.rs index 442c5a1..ea4ab1c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -199,6 +199,57 @@ enum Commands { #[command(subcommand)] command: corgea::deps::run::DepsSubcommand, }, + /// Wrap `npm` commands: gate install targets on recency + vuln verdicts, then run npm. + Npm(InstallWrapArgs), + /// Wrap `pip` commands: gate install targets on recency + vuln verdicts, then run pip. + Pip(InstallWrapArgs), +} + +/// Shared flags for the install-wrapper subcommands (`corgea npm|pip`). +#[derive(clap::Args, Debug, Clone)] +struct InstallWrapArgs { + #[arg( + long, + short = 't', + default_value = "2d", + value_parser = corgea::verify_deps::parse_threshold, + help = "Recency threshold. Resolved versions younger than this are blocked. e.g. '2d', '12h'." + )] + threshold: std::time::Duration, + + #[arg( + long, + help = "Demote a recency block to a printed warning. The install still runs." + )] + no_fail: bool, + + #[arg( + long, + help = "Proceed with the install despite vulnerable or recent findings. Findings are still printed." + )] + force: bool, + + /// Arguments forwarded to the package manager (subcommand and package specs). + #[arg(trailing_var_arg = true, allow_hyphen_values = true)] + cmd: Vec, +} + +fn install_wrap_options(args: &InstallWrapArgs) -> corgea::precheck::PrecheckOptions { + corgea::precheck::PrecheckOptions { + threshold: args.threshold, + no_fail: args.no_fail, + force: args.force, + verdict: Some(corgea::precheck::VerdictConfig { + base_url: config::vuln_api_url(), + }), + npm_registry: utils::generic::get_env_var_if_exists("CORGEA_NPM_REGISTRY"), + pypi_registry: utils::generic::get_env_var_if_exists("CORGEA_PYPI_REGISTRY"), + } +} + +fn run_install_wrap_command(manager: corgea::precheck::PackageManager, args: &InstallWrapArgs) { + let code = corgea::precheck::run_install(manager, &args.cmd, install_wrap_options(args)); + std::process::exit(code); } #[derive(Subcommand, Debug, Clone, PartialEq)] @@ -504,7 +555,19 @@ fn main() { // Offline: no token / network. Exit code propagates fail-on policy. std::process::exit(i32::from(corgea::deps::run::run(command.clone()))); } + // Install wrappers: no auth gate. Public CVE checks run without a + // token and fail open on lookup outages. + Some(Commands::Npm(args)) => { + run_install_wrap_command(corgea::precheck::PackageManager::Npm, args) + } + Some(Commands::Pip(args)) => { + run_install_wrap_command(corgea::precheck::PackageManager::Pip, args) + } None => { + if let Some(message) = corgea::precheck::pip3_alias_message(&cli.args) { + eprintln!("{message}"); + std::process::exit(1); + } utils::terminal::show_welcome_message(); let _ = Cli::command().print_help(); println!(); diff --git a/src/precheck/exec.rs b/src/precheck/exec.rs new file mode 100644 index 0000000..2e86cda --- /dev/null +++ b/src/precheck/exec.rs @@ -0,0 +1,65 @@ +//! Resolve and exec the real package manager, forwarding args and exit codes. + +use std::ffi::OsString; +use std::process::Command; + +use super::PackageManager; + +pub(super) fn exec_install_with_args( + manager: PackageManager, + subcommand: &str, + rest: &[String], +) -> i32 { + let mut full = Vec::with_capacity(rest.len() + 1); + full.push(subcommand.to_string()); + full.extend(rest.iter().cloned()); + exec_command(manager.binary_name(), &full) +} + +/// Resolve `binary` on PATH. On Windows this finds `.cmd` shims. pip is the +/// one manager with a conventional alias, so a missing `pip` retries `pip3`. +/// The error names the binary and any fallback tried. +pub(super) fn resolve_binary(binary: &str) -> Result { + if let Ok(p) = which::which(binary) { + return Ok(p); + } + if binary == "pip" { + if let Ok(p) = which::which("pip3") { + return Ok(p); + } + return Err("error: 'pip' not found on PATH (also tried 'pip3')".to_string()); + } + Err(format!("error: '{binary}' not found on PATH")) +} + +pub(super) fn exec_command(binary: &str, args: &[String]) -> i32 { + let resolved = match resolve_binary(binary) { + Ok(p) => p, + Err(msg) => { + eprintln!("{msg}"); + return 127; + } + }; + + let os_args: Vec = args.iter().map(OsString::from).collect(); + + let mut command = Command::new(&resolved); + command.args(&os_args); + match command.status() { + Ok(status) => status.code().unwrap_or_else(|| { + #[cfg(unix)] + { + use std::os::unix::process::ExitStatusExt; + if let Some(sig) = status.signal() { + return 128 + sig; + } + } + 1 + }), + Err(e) => { + // Name the resolved path: it may be the pip3 fallback, not `binary`. + eprintln!("failed to exec {}: {}", resolved.display(), e); + 1 + } + } +} diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs new file mode 100644 index 0000000..9b148de --- /dev/null +++ b/src/precheck/mod.rs @@ -0,0 +1,436 @@ +//! Install wrappers: `corgea npm`, `corgea pip`. +//! +//! Wraps an install command from a supported package manager, resolves the +//! named install targets against the public registry, and either blocks the +//! install or runs it transparently. +//! +//! Two independent blocks: +//! * recency — the resolved version was published within `--threshold` +//! (default `2d`); `--no-fail` demotes this to a warning; +//! * vuln verdict — the vuln-api knows the resolved version is vulnerable +//! or malicious; only `--force` overrides this. +//! +//! Verdict lookups are public and fail open: a vuln-api outage warns and the +//! install continues. + +mod exec; +mod parse; +mod render; +mod verdict; + +#[cfg(test)] +mod test_support; + +use std::time::Duration; + +use chrono::Utc; + +/// Supported package managers. Each one shares enough behaviour with +/// the others that we only need a small per-manager dispatch. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PackageManager { + Npm, + Pip, +} + +impl PackageManager { + pub fn binary_name(self) -> &'static str { + match self { + PackageManager::Npm => "npm", + PackageManager::Pip => "pip", + } + } + + /// Subcommands that this manager treats as "install something new" + /// — the only ones we need to verify before running. + pub fn is_install_subcommand(self, sub: &str) -> bool { + match self { + PackageManager::Npm => matches!(sub, "install" | "i" | "add"), + PackageManager::Pip => matches!(sub, "install"), + } + } + + /// vuln-api ecosystem for this manager's registry. + pub fn ecosystem(self) -> crate::vuln_api::Ecosystem { + match self { + PackageManager::Npm => crate::vuln_api::Ecosystem::Npm, + PackageManager::Pip => crate::vuln_api::Ecosystem::Pypi, + } + } +} + +/// Connection details for the vuln-api verdict pass. Lookups are public +/// (no auth) and fail open: known vulnerable/malicious verdicts block, +/// while lookup errors warn and continue. +#[derive(Debug, Clone)] +pub struct VerdictConfig { + pub base_url: String, +} + +/// Threat verdict for one resolved target. +#[derive(Debug, Clone)] +pub enum VerdictStatus { + /// vuln-api answered: no known advisories for this exact version. + Clean, + /// vuln-api answered: known vulnerable or malicious — blocks. + Vulnerable(Vec), + /// The verdict could not be obtained (network/5xx/integrity). + /// Public mode fails open: warns, never blocks. + Unverifiable(String), + /// Verdict never attempted (no `VerdictConfig`). + NotChecked, +} + +impl VerdictStatus { + /// Whether this verdict blocks the install. The single definition of + /// "blocking finding", used by `verdict::block_reason`. + fn blocks(&self) -> bool { + matches!(self, VerdictStatus::Vulnerable(_)) + } +} + +#[derive(Debug, Clone)] +pub struct PrecheckOptions { + pub threshold: Duration, + /// If true, demote a recent finding from "block" to "warn-and-run". + pub no_fail: bool, + /// If true, never block: print findings (recent, vulnerable, + /// unverifiable) and run the install anyway. + pub force: bool, + /// `Some` ⇒ run the vuln-api verdict pass against this endpoint. + /// `None` is retained for tests and direct library callers that want + /// recency-only behavior. + pub verdict: Option, + /// Optional registry overrides, used by tests. + pub npm_registry: Option, + pub pypi_registry: Option, +} + +/// Each item the user asked us to install. +#[derive(Debug, Clone)] +pub struct InstallTarget { + pub name: String, + /// Display form, e.g. `axios@^1.0.0` or `requests==2.31.0`. + pub display: String, + /// What we'll feed into the resolver. + pub kind: TargetKind, +} + +#[derive(Debug, Clone)] +pub enum TargetKind { + Npm(crate::verify_deps::registry::NpmSpec), + Pypi(crate::verify_deps::registry::PypiSpec), + /// Something we can't verify (URL/git/file/path) — we surface this + /// as a warning but never block on it. + Unverifiable { + reason: String, + }, +} + +/// Outcome of resolving + verifying a single target. +#[derive(Debug, Clone)] +pub enum TargetOutcome { + /// Resolved cleanly. The blocking recency condition is derived from + /// `age` against the report's threshold (`PrecheckReport::is_recent`). + Resolved { + target: InstallTarget, + resolved: crate::verify_deps::registry::ResolvedPackage, + age: Duration, + verdict: VerdictStatus, + }, + /// We deliberately couldn't verify this target (URL / git / etc.). + Skipped { + target: InstallTarget, + reason: String, + }, + /// Resolution failed (network, unknown package, bad spec). + Error { + target: InstallTarget, + error: String, + }, +} + +#[derive(Debug)] +pub struct PrecheckReport { + pub manager: PackageManager, + pub subcommand: String, + pub original_args: Vec, + pub outcomes: Vec, + pub threshold: Duration, +} + +impl PrecheckReport { + fn count(&self, pred: impl Fn(&TargetOutcome) -> bool) -> usize { + self.outcomes.iter().filter(|o| pred(o)).count() + } + /// True when this age is within the recency threshold (the blocking + /// condition). The single definition of "recent". + fn is_recent(&self, age: Duration) -> bool { + age < self.threshold + } + pub fn ok_count(&self) -> usize { + self.count(|o| matches!(o, TargetOutcome::Resolved { age, .. } if !self.is_recent(*age))) + } + pub fn recent_count(&self) -> usize { + self.count(|o| matches!(o, TargetOutcome::Resolved { age, .. } if self.is_recent(*age))) + } + /// Verdicts on the resolved named targets. + fn verdicts(&self) -> impl Iterator { + self.outcomes.iter().filter_map(|o| match o { + TargetOutcome::Resolved { verdict, .. } => Some(verdict), + _ => None, + }) + } + pub fn vulnerable_count(&self) -> usize { + self.verdicts() + .filter(|v| matches!(v, VerdictStatus::Vulnerable(_))) + .count() + } + pub fn unverifiable_count(&self) -> usize { + self.verdicts() + .filter(|v| matches!(v, VerdictStatus::Unverifiable(_))) + .count() + } + pub fn skipped_count(&self) -> usize { + self.count(|o| matches!(o, TargetOutcome::Skipped { .. })) + } + pub fn error_count(&self) -> usize { + self.count(|o| matches!(o, TargetOutcome::Error { .. })) + } +} + +/// Canonical entry for ecosystem commands (`corgea npm install …`). +/// +/// `cmd` is everything after the ecosystem name, e.g. +/// `["install", "axios@^1.0.0", "--save-dev"]`. An empty `cmd` execs the +/// package manager with no arguments. +pub fn run_install(manager: PackageManager, cmd: &[String], opts: PrecheckOptions) -> i32 { + if cmd.is_empty() { + return exec::exec_command(manager.binary_name(), &[]); + } + + // The install verb may follow global flags (`npm --silent install x`); + // route on the first non-flag token so flags-before-verb can't slip + // past the gate ungated. + let Some(verb_idx) = find_subcommand(manager, cmd) else { + return exec::exec_command(manager.binary_name(), cmd); + }; + let subcommand = &cmd[verb_idx]; + let rest_vec: Vec = cmd[..verb_idx] + .iter() + .chain(&cmd[verb_idx + 1..]) + .cloned() + .collect(); + let rest = rest_vec.as_slice(); + + if manager == PackageManager::Pip && subcommand == "add" { + eprintln!("{}", unsupported_pip_add_message(rest)); + return 1; + } + + if !manager.is_install_subcommand(subcommand) { + // Non-install subcommand: transparent passthrough, args untouched. + return exec::exec_command(manager.binary_name(), cmd); + } + + let parsed = match parse::parse_install_args(manager, rest) { + Ok(p) => p, + Err(e) => { + eprintln!("failed to parse install args: {}", e); + return 2; + } + }; + + warn_registry_override(manager, rest); + + run_parsed_install( + manager, + subcommand, + rest, + parsed, + || exec::exec_install_with_args(manager, subcommand, rest), + opts, + ) +} + +/// Index of the first non-flag token in `cmd` — the subcommand verb. +/// Skips flag values with the same `takes_value` table as the arg parsers, +/// so `npm --loglevel silent install x` routes on `install`, not `silent`. +/// `None` ⇒ no subcommand at all (flags only, e.g. `npm --version`). +fn find_subcommand(manager: PackageManager, cmd: &[String]) -> Option { + let mut i = 0; + while i < cmd.len() { + let a = &cmd[i]; + if a == "--" { + return (i + 1 < cmd.len()).then_some(i + 1); + } + if !a.starts_with('-') { + return Some(i); + } + i += if !a.contains('=') && parse::takes_value(manager, a) { + 2 + } else { + 1 + }; + } + None +} + +/// `corgea ` — the suggested-command string used by the +/// "Did you mean …" messages. +fn corgea_cmd(words: &[&str], rest: &[String]) -> String { + let mut parts = vec!["corgea".to_string()]; + parts.extend(words.iter().map(|w| w.to_string())); + parts.extend(rest.iter().cloned()); + parts.join(" ") +} + +pub fn pip3_alias_message(args: &[String]) -> Option { + let rest = args.strip_prefix(&["pip3".to_string()])?; + Some(format!( + "error: unknown package manager `pip3`.\nDid you mean `{}`?", + corgea_cmd(&["pip"], rest) + )) +} + +fn unsupported_pip_add_message(rest: &[String]) -> String { + format!( + "error: pip does not support `add`.\nDid you mean `{}`?", + corgea_cmd(&["pip", "install"], rest) + ) +} + +/// Warn when a custom registry/index flag is forwarded: the gate resolves +/// and verdicts against the default (env/public) registry, so it cannot +/// vouch that the artifact the manager pulls from the override matches the +/// advisory universe. Resolving against the override (and multi-index cases +/// like `--extra-index-url`) is a documented limitation — registry +/// allow-listing is future work, separate PRD. +fn warn_registry_override(manager: PackageManager, rest: &[String]) { + let flags: &[&str] = match manager { + PackageManager::Npm => &["--registry"], + PackageManager::Pip => &["-i", "--index-url", "--extra-index-url"], + }; + if let Some(flag) = rest.iter().find(|a| { + flags + .iter() + .any(|f| a.as_str() == *f || a.starts_with(&format!("{f}="))) + }) { + eprintln!( + "warning: '{flag}' points {} at a custom registry/index; the gate resolves and verdicts against the default registry and cannot vouch the installed artifact matches.", + manager.binary_name() + ); + } +} + +/// Post-parse verification: resolve named targets, verdict them, render the +/// report, refuse (exit 1) when the block predicate fires, otherwise run +/// the install. +fn run_parsed_install( + manager: PackageManager, + subcommand_label: &str, + rest: &[String], + parsed: parse::ParsedInstall, + exec: impl FnOnce() -> i32, + opts: PrecheckOptions, +) -> i32 { + if parsed.targets.is_empty() { + // Nothing named: bare installs and requirements-only installs are + // noted, never gated, by this phase. + render::requirements_note(&parsed); + return exec(); + } + + let now = Utc::now(); + let mut outcomes = verdict::verify_all(&parsed.targets, &opts, &now, parsed.allow_prerelease); + verdict::run_verdict_pass(manager, &mut outcomes, &opts); + render::requirements_note(&parsed); + + let report = PrecheckReport { + manager, + subcommand: subcommand_label.to_string(), + original_args: rest.to_vec(), + outcomes, + threshold: opts.threshold, + }; + + render::print_text(&report); + render::warn_public_lookup_failures(&report, &opts); + if let Some(reason) = verdict::block_reason(&report, &opts) { + render::print_refusal(reason); + return 1; + } + exec() +} + +#[cfg(test)] +mod tests { + use super::test_support::*; + use super::*; + + #[test] + fn install_subcommand_recognition() { + assert!(PackageManager::Npm.is_install_subcommand("install")); + assert!(PackageManager::Npm.is_install_subcommand("i")); + assert!(PackageManager::Npm.is_install_subcommand("add")); + assert!(!PackageManager::Npm.is_install_subcommand("update")); + + assert!(PackageManager::Pip.is_install_subcommand("install")); + assert!(!PackageManager::Pip.is_install_subcommand("freeze")); + } + + /// Run `run_parsed_install` for `pip install ` with an exec + /// closure that records whether it ran (returning 42 instead of + /// spawning anything). + fn gate_pip_install(args: &[&str], opts: PrecheckOptions) -> (i32, bool) { + let rest: Vec = args.iter().map(|s| s.to_string()).collect(); + let parsed = parse::parse_install_args(PackageManager::Pip, &rest).expect("parse"); + let mut exec_ran = false; + let code = run_parsed_install( + PackageManager::Pip, + "install", + &rest, + parsed, + || { + exec_ran = true; + 42 + }, + opts, + ); + (code, exec_ran) + } + + #[test] + fn unverifiable_target_skips_and_proceeds() { + // git+ spec → Skipped outcome, no registry hit, install proceeds. + let opts = stub_opts(); + let (code, exec_ran) = gate_pip_install(&["git+https://github.com/psf/requests.git"], opts); + assert_eq!(code, 42); + assert!(exec_ran); + } + + #[test] + fn bare_install_passes_through_without_verification() { + // Bare `pip install` (no targets) → straight exec, no registry hit. + let opts = stub_opts(); + let (code, exec_ran) = gate_pip_install(&[], opts); + assert_eq!(code, 42); + assert!(exec_ran); + } + + #[test] + fn requirements_files_note_then_exec() { + // `-r reqs.txt` alone → printed note, no verification, exec runs. + let opts = stub_opts(); + let (code, exec_ran) = gate_pip_install(&["-r", "reqs.txt"], opts); + assert_eq!(code, 42); + assert!(exec_ran); + } + + #[test] + fn ecosystem_mapping() { + use crate::vuln_api::Ecosystem; + assert_eq!(PackageManager::Pip.ecosystem(), Ecosystem::Pypi); + assert_eq!(PackageManager::Npm.ecosystem(), Ecosystem::Npm); + } +} diff --git a/src/precheck/parse.rs b/src/precheck/parse.rs new file mode 100644 index 0000000..f2873f0 --- /dev/null +++ b/src/precheck/parse.rs @@ -0,0 +1,968 @@ +//! Parse install-command argument lists into structured `InstallTarget`s. +//! +//! The goal is to be liberal with valid inputs (real install commands +//! mix flags, package specs, and pass-through args freely) and clear +//! about anything we can't verify (URLs / git / filesystem refs). + +use std::path::PathBuf; + +use crate::verify_deps::registry::{NpmSpec, PypiSpec}; + +use super::{InstallTarget, PackageManager, TargetKind}; + +#[derive(Debug, Default)] +pub struct ParsedInstall { + pub targets: Vec, + /// `pip install -r foo.txt` — requirements files are only noted + /// (not verified) by the baseline gate. + pub requirements_files: Vec, + /// `pip install --pre` — allow prerelease versions when resolving the + /// version that would install, so the gate verdicts what pip installs + /// rather than the latest stable. + pub allow_prerelease: bool, +} + +fn build_parsed_install( + positionals: PositionalSplit, + parse_spec: impl Fn(&str) -> InstallTarget, +) -> ParsedInstall { + ParsedInstall { + targets: positionals + .specs + .iter() + .map(|raw| parse_spec(raw)) + .collect(), + requirements_files: positionals.requirements_files, + allow_prerelease: false, + } +} + +/// The default npm dist-tag from `--tag ` / `--tag=value`, which +/// changes what a *bare* spec (`pkg`, no `@version`) installs. Stops at `--` +/// (everything after is positional). The gate must resolve that tag rather +/// than `latest`, or a fresh/vulnerable `beta`/`canary` release bypasses +/// both blocks whenever `latest` is old/clean. +fn npm_default_tag(args: &[String]) -> Option { + // npm config is last-wins: `--tag beta --tag canary` installs canary. + // Returning the first match would gate the wrong dist-tag. + let mut tag = None; + let mut i = 0; + while i < args.len() { + let a = &args[i]; + if a == "--" { + break; + } + if a == "--tag" { + tag = args.get(i + 1).cloned(); + i += 2; + continue; + } + if let Some(v) = a.strip_prefix("--tag=") { + tag = Some(v.to_string()); + } + i += 1; + } + tag +} + +/// Whether the forwarded pip args request prereleases (`--pre`). Stops at +/// `--` (positional thereafter). +fn pip_allows_prerelease(args: &[String]) -> bool { + args.iter() + .take_while(|a| a.as_str() != "--") + .any(|a| a == "--pre") +} + +pub fn parse_install_args( + manager: PackageManager, + args: &[String], +) -> Result { + match manager { + PackageManager::Pip => { + let mut parsed = build_parsed_install(extract_pip_positionals(args)?, parse_pypi_spec); + parsed.allow_prerelease = pip_allows_prerelease(args); + Ok(parsed) + } + PackageManager::Npm => { + let default_tag = npm_default_tag(args); + Ok(build_parsed_install( + extract_node_positionals(manager, args), + |raw| parse_npm_spec(raw, default_tag.as_deref()), + )) + } + } +} + +#[derive(Debug, Default)] +struct PositionalSplit { + specs: Vec, + requirements_files: Vec, +} + +/// Known install flags that take a separate value argument, per manager. +/// The fallback heuristic in [`skip_unknown_flag`] only skips URL/path-like +/// values, so a bare-word value (`-w my-workspace`) would otherwise parse — +/// and get verified or blocked — as a package spec. Not exhaustive; the +/// heuristic still backstops anything unlisted. +pub(super) fn takes_value(manager: PackageManager, flag: &str) -> bool { + match manager { + PackageManager::Npm => matches!( + flag, + "-w" | "--workspace" + | "--prefix" + | "--registry" + | "--tag" + | "--omit" + | "--include" + | "--loglevel" + | "--install-strategy" + | "--before" + | "--cpu" + | "--os" + | "--libc" + | "--otp" + | "--location" + | "--cache" + | "--script-shell" + | "--userconfig" + | "--globalconfig" + | "--depth" + ), + PackageManager::Pip => matches!( + flag, + "-i" | "--index-url" + | "--extra-index-url" + | "-f" + | "--find-links" + | "--platform" + | "--python-version" + | "--implementation" + | "--abi" + | "-t" + | "--target" + | "--prefix" + | "--root" + | "--src" + | "--upgrade-strategy" + | "--no-binary" + | "--only-binary" + | "--progress-bar" + | "--proxy" + | "--retries" + | "--timeout" + | "--exists-action" + | "--trusted-host" + | "--cert" + | "--client-cert" + | "--cache-dir" + | "--log" + | "--python" + | "--keyring-provider" + | "--report" + | "--use-feature" + | "--use-deprecated" + | "--config-settings" + | "-C" + | "--global-option" + | "--hash" + ), + } +} + +/// Strip flags from an npm install argument list, returning only the +/// positional package specs. +/// +/// We treat anything starting with `-` as a flag. Boolean flags (`-D`, +/// `--save-dev`, `--no-save`, ...) are dropped on their own. Flags +/// that take a value can be written as either `--flag=value` or +/// `--flag value`; known value-taking flags ([`takes_value`]) skip the +/// next token outright, anything else skips it only if it looks like a +/// value (a URL / path), never like a package spec. +fn extract_node_positionals(manager: PackageManager, args: &[String]) -> PositionalSplit { + let mut out = PositionalSplit::default(); + let mut i = 0; + while i < args.len() { + let a = &args[i]; + if a == "--" { + // After `--`, everything is positional. + for rest in &args[i + 1..] { + out.specs.push(rest.clone()); + } + break; + } + if a.starts_with('-') { + if !a.contains('=') && takes_value(manager, a) { + i += 2; + continue; + } + i = skip_unknown_flag(args, i); + continue; + } + out.specs.push(a.clone()); + i += 1; + } + out +} + +/// Advance past an unknown flag at `i`. `--flag=value` is self-contained; +/// otherwise peek at the next arg and skip it too if it doesn't look like +/// a package spec (contains `://` or is path-like) — see the heuristic +/// rationale on [`extract_node_positionals`]. +fn skip_unknown_flag(args: &[String], i: usize) -> usize { + if args[i].contains('=') { + return i + 1; + } + let next_is_value = args + .get(i + 1) + .map(|n| { + !n.starts_with('-') + && (n.contains("://") + || n.starts_with('/') + || n.starts_with("./") + || n.starts_with('~')) + }) + .unwrap_or(false); + i + if next_is_value { 2 } else { 1 } +} + +/// pip's argument grammar is more structured than npm's: there are +/// known flags that take a value (`-r FILE`, `-c FILE`, `-e PATH`, +/// `--index-url URL`, `--target DIR`, ...). We special-case `-r/-c/-e` +/// because they affect behaviour, and treat the rest with the same +/// liberal heuristic as npm. +fn extract_pip_positionals(args: &[String]) -> Result { + let mut out = PositionalSplit::default(); + let mut i = 0; + while i < args.len() { + let a = &args[i]; + if a == "--" { + for rest in &args[i + 1..] { + out.specs.push(rest.clone()); + } + break; + } + match a.as_str() { + "-r" | "--requirement" => { + let path = args + .get(i + 1) + .ok_or_else(|| "`-r` / `--requirement` requires a file path".to_string())?; + out.requirements_files.push(PathBuf::from(path)); + i += 2; + continue; + } + "-c" | "--constraint" => { + // Constraints don't add packages, but skip the path. + i += 2; + continue; + } + "-e" | "--editable" => { + // Editable installs are explicit unverifiable targets. + let path = args.get(i + 1).cloned().unwrap_or_default(); + out.specs.push(format!("-e {}", path)); + i += if args.get(i + 1).is_some() { 2 } else { 1 }; + continue; + } + _ => {} + } + // Attached short-option forms (pip's optparse): `-rreqs.txt`, + // `-cfile`, `-e./path`. Missing these would silently skip the + // whole gate (`-rreqs.txt` would read as a boolean flag and the + // install would look bare). + if let Some(path) = attached_short_value(a, "-r") { + out.requirements_files.push(PathBuf::from(path)); + i += 1; + continue; + } + if attached_short_value(a, "-c").is_some() { + i += 1; + continue; + } + if let Some(path) = attached_short_value(a, "-e") { + out.specs.push(format!("-e {}", path)); + i += 1; + continue; + } + // Long-form `--requirement=foo.txt`. + if let Some(rest) = a.strip_prefix("--requirement=") { + out.requirements_files.push(PathBuf::from(rest)); + i += 1; + continue; + } + if a.strip_prefix("--constraint=").is_some() { + i += 1; + continue; + } + if let Some(rest) = a.strip_prefix("--editable=") { + out.specs.push(format!("-e {}", rest)); + i += 1; + continue; + } + if a.starts_with('-') { + if !a.contains('=') && takes_value(PackageManager::Pip, a) { + i += 2; + continue; + } + i = skip_unknown_flag(args, i); + continue; + } + out.specs.push(a.clone()); + i += 1; + } + Ok(out) +} + +/// `-rreqs.txt` → `reqs.txt`: the value attached directly to a short +/// option. `None` for the bare flag itself (handled by the exact-match +/// arms) and for long `--` forms. +fn attached_short_value<'a>(arg: &'a str, flag: &str) -> Option<&'a str> { + arg.strip_prefix(flag).filter(|rest| !rest.is_empty()) +} + +/// Parse a single npm-style positional, e.g. `axios`, `axios@1.0.0`, +/// `axios@^1.0.0`, `axios@latest`, `@types/node@20.10.5`, +/// `git+https://...`, `file:./local`, `./local`, `npm:other@1.0.0`. +/// +/// `default_tag` is the `--tag ` from the command, applied only to a +/// *bare* spec (no `@version`): `npm install --tag beta pkg` installs the +/// `beta` dist-tag, so the gate must resolve that, not `latest`. An explicit +/// `pkg@latest` / `pkg@1.0.0` overrides the default tag. +fn parse_npm_spec(raw: &str, default_tag: Option<&str>) -> InstallTarget { + let display = raw.to_string(); + let trimmed = raw.trim(); + + let unverifiable_prefixes = [ + "git+", + "git:", + "git@", + "github:", + "gist:", + "bitbucket:", + "gitlab:", + "ssh://", + "http://", + "https://", + "file:", + "./", + "../", + "/", + "~/", + "npm:", + "workspace:", + ]; + if let Some(p) = unverifiable_prefixes + .iter() + .find(|p| trimmed.starts_with(*p)) + { + let reason = match *p { + "npm:" => "npm: aliased dependency — registry verification skipped", + "workspace:" => "workspace: dependency — registry verification skipped", + _ => "spec is a URL/git/filesystem reference — registry verification skipped", + }; + return InstallTarget { + name: trimmed.to_string(), + display, + kind: TargetKind::Unverifiable { + reason: reason.to_string(), + }, + }; + } + + // Bare `.` / `..` install the current/parent directory; `user/repo` + // (one `/`, not an `@scope/` name) is npm's GitHub shorthand. Neither + // is a registry package — resolving them would 404 and block a command + // plain npm accepts. + if trimmed == "." || trimmed == ".." { + return InstallTarget { + name: trimmed.to_string(), + display, + kind: TargetKind::Unverifiable { + reason: "spec is a filesystem path — registry verification skipped".to_string(), + }, + }; + } + if !trimmed.starts_with('@') && trimmed.contains('/') { + return InstallTarget { + name: trimmed.to_string(), + display, + kind: TargetKind::Unverifiable { + reason: "spec is a GitHub shorthand or path — registry verification skipped" + .to_string(), + }, + }; + } + + // Find the version separator. Scoped names start with `@` and the + // version separator is the *next* `@` (if any). Unscoped names + // use the first `@`. + let (name_part, spec_part): (&str, &str) = if let Some(rest) = trimmed.strip_prefix('@') { + match rest.find('@') { + Some(at_in_rest) => { + let split = 1 + at_in_rest; + (&trimmed[..split], &trimmed[split + 1..]) + } + None => (trimmed, ""), + } + } else { + match trimmed.find('@') { + Some(at) => (&trimmed[..at], &trimmed[at + 1..]), + None => (trimmed, ""), + } + }; + + let name = name_part.trim().to_string(); + let spec_str = spec_part.trim(); + + let kind = if spec_str.is_empty() { + // A bare spec picks up the command's `--tag`, if any; otherwise latest. + match default_tag { + Some(tag) => TargetKind::Npm(NpmSpec::Tag(tag.to_string())), + None => TargetKind::Npm(NpmSpec::Latest), + } + } else if spec_str.eq_ignore_ascii_case("latest") { + TargetKind::Npm(NpmSpec::Latest) + } else if semver::Version::parse(spec_str).is_ok() { + TargetKind::Npm(NpmSpec::Exact(spec_str.to_string())) + } else if let Some(rest) = spec_str + .strip_prefix('v') + .filter(|rest| semver::Version::parse(rest).is_ok()) + { + // npm coerces a leading `v` (`pkg@v1.2.3` installs 1.2.3); without + // this it would read as a dist-tag and error. + TargetKind::Npm(NpmSpec::Exact(rest.to_string())) + } else if looks_like_npm_range(spec_str) { + TargetKind::Npm(NpmSpec::Range(spec_str.to_string())) + } else if is_npm_dist_tag(spec_str) { + TargetKind::Npm(NpmSpec::Tag(spec_str.to_string())) + } else { + TargetKind::Unverifiable { + reason: format!( + "could not classify version spec '{}' (not a valid semver, range, or dist-tag)", + spec_str + ), + } + }; + + InstallTarget { + name, + display, + kind, + } +} + +/// Loose check: does this spec look like an npm version range? +/// We accept anything that *starts* with a range metacharacter +/// (`^`, `~`, `>`, `<`, `=`, `*`) or with a digit (so `1.x`, `1.2.x`, +/// and bare ranges still resolve). Validation against the registry's +/// version list happens later inside the resolver. +fn looks_like_npm_range(s: &str) -> bool { + matches!( + s.chars().next(), + Some('^') | Some('~') | Some('>') | Some('<') | Some('=') | Some('*') + ) || s + .chars() + .next() + .map(|c| c.is_ascii_digit()) + .unwrap_or(false) +} + +/// A dist-tag is a non-empty alphanumeric string (e.g. `latest`, +/// `next`, `beta`, `alpha-1`). We reject anything that contains +/// version-spec metacharacters. +fn is_npm_dist_tag(s: &str) -> bool { + !s.is_empty() + && s.chars() + .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.') + && s.chars() + .next() + .map(|c| c.is_ascii_alphabetic()) + .unwrap_or(false) +} + +/// Parse a single pip-style positional, e.g. `requests`, `requests==2.31.0`, +/// `requests>=2.0`, `requests[security]`, `git+https://...`, `./local`. +fn parse_pypi_spec(raw: &str) -> InstallTarget { + let display = raw.to_string(); + let trimmed = raw.trim(); + + let unverifiable_prefixes = [ + "git+", "hg+", "svn+", "bzr+", "http://", "https://", "file:", "./", "../", "/", "~/", + "-e ", "-e=", + ]; + if unverifiable_prefixes.iter().any(|p| trimmed.starts_with(p)) { + return InstallTarget { + name: trimmed.to_string(), + display, + kind: TargetKind::Unverifiable { + reason: "spec is a VCS / URL / editable / filesystem reference — registry verification skipped".to_string(), + }, + }; + } + + // Strip the PEP 508 environment marker first — its comparison operators + // (`; python_version >= "3.7"`) must not be mistaken for version + // operators, which would split the name inside the marker. + let req_part = trimmed.split(';').next().unwrap_or(trimmed).trim(); + + // PEP 508 direct reference: `name @ https://…` — unverifiable like a + // bare URL (never a registry lookup, never a block). + if let Some((_, after_at)) = req_part.split_once('@') { + if after_at.contains("://") { + return InstallTarget { + name: trimmed.to_string(), + display, + kind: TargetKind::Unverifiable { + reason: "spec is a PEP 508 direct reference (name @ url) — registry verification skipped".to_string(), + }, + }; + } + } + + // Bare `.` / `..` and anything with a path separator install from the + // filesystem (`pip install .`), not the registry. + if req_part == "." || req_part == ".." || req_part.contains('/') || req_part.contains('\\') { + return InstallTarget { + name: trimmed.to_string(), + display, + kind: TargetKind::Unverifiable { + reason: "spec is a filesystem path — registry verification skipped".to_string(), + }, + }; + } + + // Split at the leftmost specifier operator (`==`, `>=`, `<=`, `!=`, + // `~=`, `>`, `<`; PEP 440 also allows `===`). Only the index matters — + // the operator itself stays with the spec part. + let separators = ["===", "==", ">=", "<=", "!=", "~=", ">", "<"]; + let split_at = separators.iter().filter_map(|sep| req_part.find(sep)).min(); + + let (name_part, spec_part): (&str, &str) = match split_at { + Some(idx) => (&req_part[..idx], &req_part[idx..]), + None => (req_part, ""), + }; + + // Strip extras: `requests[security]` -> `requests`. + let name_no_extras = name_part + .split_once('[') + .map_or(name_part, |(n, _)| n) + .trim(); + + let spec_str = spec_part.trim(); + + let kind = if spec_str.is_empty() { + TargetKind::Pypi(PypiSpec::Latest) + } else if let Some(rest) = spec_str.strip_prefix("===") { + TargetKind::Pypi(PypiSpec::Exact(rest.trim().to_string())) + } else if let Some(rest) = spec_str.strip_prefix("==") { + let v = rest.trim(); + if v.is_empty() { + TargetKind::Unverifiable { + reason: "empty `==` specifier".to_string(), + } + } else if v.contains('*') { + // Wildcard pin (`==1.4.*`) — a range, not a literal version; + // the resolver desugars it. + TargetKind::Pypi(PypiSpec::Specifier(spec_str.to_string())) + } else { + TargetKind::Pypi(PypiSpec::Exact(v.to_string())) + } + } else { + TargetKind::Pypi(PypiSpec::Specifier(spec_str.to_string())) + }; + + InstallTarget { + name: name_no_extras.to_string(), + display, + kind, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + + #[test] + fn extracts_npm_positionals_skipping_flags() { + let args = vec![ + "axios".to_string(), + "--save-dev".to_string(), + "@types/node@latest".to_string(), + "-D".to_string(), + "--registry".to_string(), + "https://example.com/registry".to_string(), + "lodash@^4.0.0".to_string(), + ]; + let p = extract_node_positionals(PackageManager::Npm, &args); + assert_eq!( + p.specs, + vec![ + "axios".to_string(), + "@types/node@latest".to_string(), + "lodash@^4.0.0".to_string(), + ] + ); + } + + #[test] + fn npm_workspace_flag_value_is_not_a_spec() { + // npm's `-w ` / `--workspace ` take a bare-word value; + // it must never be verified (or blocked) as a package spec. + for flag in ["-w", "--workspace"] { + let args = vec![ + flag.to_string(), + "my-workspace".to_string(), + "lodash".to_string(), + ]; + let p = extract_node_positionals(PackageManager::Npm, &args); + assert_eq!(p.specs, vec!["lodash".to_string()], "flag {flag}"); + } + // `--workspace=name` is self-contained. + let args = vec!["--workspace=my-workspace".to_string(), "lodash".to_string()]; + let p = extract_node_positionals(PackageManager::Npm, &args); + assert_eq!(p.specs, vec!["lodash".to_string()]); + } + + #[test] + fn extracts_npm_positionals_after_double_dash() { + let args = vec![ + "--save-dev".to_string(), + "--".to_string(), + "axios".to_string(), + "--this-is-positional-now".to_string(), + ]; + let p = extract_node_positionals(PackageManager::Npm, &args); + assert_eq!( + p.specs, + vec!["axios".to_string(), "--this-is-positional-now".to_string()] + ); + } + + #[test] + fn npm_tag_flag_changes_bare_spec_resolution() { + // `--tag beta` (before or after the verb's rest) makes a bare spec + // resolve the beta dist-tag, not latest. An explicit version wins. + for args in [ + vec!["--tag".to_string(), "beta".to_string(), "pkg".to_string()], + vec!["pkg".to_string(), "--tag=beta".to_string()], + ] { + let p = parse_install_args(PackageManager::Npm, &args).unwrap(); + assert_eq!(p.targets.len(), 1, "args {args:?}"); + assert!( + matches!(&p.targets[0].kind, TargetKind::Npm(NpmSpec::Tag(t)) if t == "beta"), + "bare spec must pick up --tag: {:?}", + p.targets[0].kind + ); + } + + // Explicit pin ignores --tag. + let args = vec![ + "--tag".to_string(), + "beta".to_string(), + "pkg@1.0.0".to_string(), + ]; + let p = parse_install_args(PackageManager::Npm, &args).unwrap(); + assert!( + matches!(&p.targets[0].kind, TargetKind::Npm(NpmSpec::Exact(v)) if v == "1.0.0"), + "explicit version must override --tag: {:?}", + p.targets[0].kind + ); + + // No --tag → bare spec stays latest. + let args = vec!["pkg".to_string()]; + let p = parse_install_args(PackageManager::Npm, &args).unwrap(); + assert!(matches!( + &p.targets[0].kind, + TargetKind::Npm(NpmSpec::Latest) + )); + } + + #[test] + fn npm_tag_flag_is_last_wins_like_npm_config() { + // npm's config parser is last-wins: `--tag beta --tag canary` + // installs canary. Gating beta would verdict the wrong release. + let args = vec![ + "--tag".to_string(), + "beta".to_string(), + "pkg".to_string(), + "--tag=canary".to_string(), + ]; + let p = parse_install_args(PackageManager::Npm, &args).unwrap(); + assert!( + matches!(&p.targets[0].kind, TargetKind::Npm(NpmSpec::Tag(t)) if t == "canary"), + "last --tag must win: {:?}", + p.targets[0].kind + ); + } + + #[test] + fn pip_pre_flag_sets_allow_prerelease() { + let with = parse_install_args( + PackageManager::Pip, + &["--pre".to_string(), "flask".to_string()], + ) + .unwrap(); + assert!(with.allow_prerelease, "--pre must set allow_prerelease"); + + let without = parse_install_args(PackageManager::Pip, &["flask".to_string()]).unwrap(); + assert!(!without.allow_prerelease); + } + + #[test] + fn parse_npm_spec_classifies() { + let cases = vec![ + ("axios", NpmSpec::Latest), + ("axios@", NpmSpec::Latest), + ("axios@latest", NpmSpec::Latest), + ("axios@1.0.0", NpmSpec::Exact("1.0.0".to_string())), + ("axios@^1.0.0", NpmSpec::Range("^1.0.0".to_string())), + ("axios@~1.0.0", NpmSpec::Range("~1.0.0".to_string())), + ( + "axios@>=1.0.0 <2.0.0", + NpmSpec::Range(">=1.0.0 <2.0.0".to_string()), + ), + ("axios@next", NpmSpec::Tag("next".to_string())), + ("axios@beta", NpmSpec::Tag("beta".to_string())), + ("@types/node", NpmSpec::Latest), + ("@types/node@20.10.5", NpmSpec::Exact("20.10.5".to_string())), + ("@types/node@^20.0.0", NpmSpec::Range("^20.0.0".to_string())), + ("@types/node@latest", NpmSpec::Latest), + ]; + for (input, expected) in cases { + let target = parse_npm_spec(input, None); + match (&target.kind, &expected) { + (TargetKind::Npm(actual), expected) => { + assert_eq!(actual, expected, "for input '{}'", input); + } + _ => panic!("unexpected kind for '{}'", input), + } + } + } + + #[test] + fn parse_npm_spec_extracts_scoped_names() { + assert_eq!(parse_npm_spec("@types/node", None).name, "@types/node"); + assert_eq!( + parse_npm_spec("@types/node@20.10.5", None).name, + "@types/node" + ); + assert_eq!(parse_npm_spec("axios@1.2.3", None).name, "axios"); + assert_eq!(parse_npm_spec("axios", None).name, "axios"); + } + + #[test] + fn parse_npm_spec_skips_unverifiable() { + let unverifiable = vec![ + "git+https://github.com/x/y.git", + "git@github.com:x/y.git", + "github:expressjs/express", + "https://example.com/pkg.tgz", + "file:./local-pkg", + "./local-pkg", + "../sibling", + "/abs/path", + "npm:alias-of-other@1.0.0", + "workspace:*", + // GitHub shorthand and bare paths — registry lookups would 404. + "expressjs/express", + "user/repo#semver:^1.0.0", + ".", + "..", + ]; + for u in unverifiable { + let t = parse_npm_spec(u, None); + assert!( + matches!(t.kind, TargetKind::Unverifiable { .. }), + "for '{}'", + u + ); + } + // Scoped names keep their one `/` and stay verifiable. + assert!(matches!( + parse_npm_spec("@types/node", None).kind, + TargetKind::Npm(NpmSpec::Latest) + )); + } + + #[test] + fn parse_npm_spec_coerces_leading_v() { + // npm installs `pkg@v1.2.3` as 1.2.3; a dist-tag reading would error. + let t = parse_npm_spec("axios@v1.2.3", None); + assert!( + matches!(t.kind, TargetKind::Npm(NpmSpec::Exact(ref v)) if v == "1.2.3"), + "got {:?}", + t.kind + ); + // …but a real tag that merely starts with `v` stays a tag. + let t = parse_npm_spec("node@v8-canary", None); + assert!( + matches!(t.kind, TargetKind::Npm(NpmSpec::Tag(ref s)) if s == "v8-canary"), + "got {:?}", + t.kind + ); + } + + #[test] + fn parse_pypi_spec_classifies() { + let cases = vec![ + ("requests", PypiSpec::Latest), + ("requests==2.31.0", PypiSpec::Exact("2.31.0".to_string())), + ("requests>=2.0", PypiSpec::Specifier(">=2.0".to_string())), + ("requests~=2.0", PypiSpec::Specifier("~=2.0".to_string())), + ("requests<3,>=2", PypiSpec::Specifier("<3,>=2".to_string())), + ("requests[security]", PypiSpec::Latest), + ( + "requests[security]==2.31.0", + PypiSpec::Exact("2.31.0".to_string()), + ), + ]; + for (input, expected) in cases { + let t = parse_pypi_spec(input); + match (&t.kind, &expected) { + (TargetKind::Pypi(actual), expected) => { + assert_eq!(actual, expected, "for '{}'", input); + } + _ => panic!("unexpected kind for '{}'", input), + } + } + } + + #[test] + fn parse_pypi_spec_strips_extras_and_markers() { + assert_eq!( + parse_pypi_spec("requests[security]==2.31.0").name, + "requests" + ); + let t = parse_pypi_spec("requests==2.31.0; python_version >= \"3.7\""); + assert_eq!(t.name, "requests"); + assert!( + matches!(t.kind, TargetKind::Pypi(PypiSpec::Exact(ref v)) if v == "2.31.0"), + "env marker must not leak into the spec: {:?}", + t.kind + ); + + // A marker-only spec must not split inside the marker: the name is + // `pkg` and the (versionless) spec resolves latest. + let marker_only = parse_pypi_spec("pkg; python_version >= \"3.7\""); + assert_eq!(marker_only.name, "pkg"); + assert!( + matches!(marker_only.kind, TargetKind::Pypi(PypiSpec::Latest)), + "got {:?}", + marker_only.kind + ); + } + + #[test] + fn parse_pypi_spec_wildcard_pin_is_a_specifier() { + // `==1.4.*` is a range; matching it as a literal release key would + // always miss and block. + let t = parse_pypi_spec("django==4.2.*"); + assert_eq!(t.name, "django"); + assert!( + matches!(t.kind, TargetKind::Pypi(PypiSpec::Specifier(ref s)) if s == "==4.2.*"), + "got {:?}", + t.kind + ); + } + + #[test] + fn parse_pypi_spec_direct_reference_and_paths_are_unverifiable() { + // PEP 508 direct reference, bare dot, and separator-bearing paths + // must never be looked up (and thus never blocked) as registry names. + for spec in [ + "requests @ https://files.pythonhosted.org/requests-2.31.0.whl", + "pkg @ https://example.com/x.whl ; python_version >= \"3.7\"", + ".", + "..", + "sub/dir", + ] { + let t = parse_pypi_spec(spec); + assert!( + matches!(t.kind, TargetKind::Unverifiable { .. }), + "for '{}': {:?}", + spec, + t.kind + ); + } + } + + #[test] + fn parse_pypi_spec_skips_unverifiable() { + let unverifiable = vec![ + "git+https://github.com/x/y.git", + "https://example.com/pkg.tar.gz", + "./local-pkg", + "/abs/path", + "-e ./local", + ]; + for u in unverifiable { + let t = parse_pypi_spec(u); + assert!( + matches!(t.kind, TargetKind::Unverifiable { .. }), + "for '{}'", + u + ); + } + } + + #[test] + fn pip_args_extract_requirements_files() { + let args = vec![ + "-r".to_string(), + "reqs.txt".to_string(), + "requests==2.31.0".to_string(), + "--requirement=other.txt".to_string(), + "--constraint".to_string(), + "constraints.txt".to_string(), + "--constraint=other-constraints.txt".to_string(), + "-e".to_string(), + "./local".to_string(), + ]; + let p = extract_pip_positionals(&args).unwrap(); + assert_eq!( + p.requirements_files, + vec![PathBuf::from("reqs.txt"), PathBuf::from("other.txt")] + ); + assert!(p.specs.contains(&"requests==2.31.0".to_string())); + assert!(p.specs.iter().any(|s| s.starts_with("-e "))); + assert!(!p.specs.contains(&"constraints.txt".to_string())); + assert!(!p.specs.contains(&"other-constraints.txt".to_string())); + assert!(!p + .requirements_files + .contains(&PathBuf::from("constraints.txt"))); + assert!(!p + .requirements_files + .contains(&PathBuf::from("other-constraints.txt"))); + } + + #[test] + fn pip_attached_short_options_are_recognized() { + // pip accepts `-rreqs.txt` (value attached); reading it as a boolean + // flag would make the install look bare and skip the gate entirely. + let args = vec![ + "-rreqs.txt".to_string(), + "-cconstraints.txt".to_string(), + "-e./local".to_string(), + ]; + let p = extract_pip_positionals(&args).unwrap(); + assert_eq!(p.requirements_files, vec![PathBuf::from("reqs.txt")]); + assert!(p.specs.contains(&"-e ./local".to_string())); + assert!(!p.specs.contains(&"-cconstraints.txt".to_string())); + } + + #[test] + fn pip_value_flag_values_are_not_specs() { + // A bare-word value of a known value-taking flag must not be + // verified (or blocked) as a package. + let args = vec![ + "--platform".to_string(), + "win_amd64".to_string(), + "--no-binary".to_string(), + ":all:".to_string(), + "--target".to_string(), + "build".to_string(), + "requests".to_string(), + ]; + let p = extract_pip_positionals(&args).unwrap(); + assert_eq!(p.specs, vec!["requests".to_string()]); + } +} diff --git a/src/precheck/render.rs b/src/precheck/render.rs new file mode 100644 index 0000000..b7ada75 --- /dev/null +++ b/src/precheck/render.rs @@ -0,0 +1,248 @@ +//! Report rendering: text output, refusal line, fix/steer helpers. + +use crate::verify_deps; + +use super::{parse, PrecheckOptions, PrecheckReport, TargetOutcome, VerdictStatus}; + +/// The refusal line on stderr. Messaging only; the block decision and the +/// choice of escape hatch live in `verdict::block_reason`. +pub(super) fn print_refusal(reason: super::verdict::BlockReason) { + use super::verdict::BlockReason; + match reason { + BlockReason::Findings => { + eprintln!("Refusing to run install. Pass --force to proceed despite findings.") + } + BlockReason::RecencyOnly => { + eprintln!("Refusing to run install. Pass --no-fail to proceed anyway.") + } + } +} + +/// Print the "requirements files are not recency-checked" note when the +/// install carried any `-r` files. No-op otherwise. +pub(super) fn requirements_note(parsed: &parse::ParsedInstall) { + if parsed.requirements_files.is_empty() { + return; + } + let files: Vec = parsed + .requirements_files + .iter() + .map(|p| p.display().to_string()) + .collect(); + eprintln!( + "note: requirements files ({}) are not recency-checked by the baseline gate", + files.join(", ") + ); +} + +pub(super) fn warn_public_lookup_failures(report: &PrecheckReport, opts: &PrecheckOptions) { + if opts.verdict.is_some() && report.unverifiable_count() > 0 { + eprintln!("warning: CVE check unavailable; continuing because public mode is fail-open."); + } +} + +/// Suffix for a vulnerable match line: the advisory's fix, if known. +fn fix_note(m: &crate::vuln_api::VulnMatch) -> String { + match &m.fixed_version { + Some(v) => format!(" — fixed in {v}"), + None => " — no fixed version known".to_string(), + } +} + +/// Highest of `fixes` after sort/dedup: a single distinct value is returned +/// as-is (no parsing — preserves odd-but-unambiguous forms); several distinct +/// values compare by lenient semver. One unparsable candidate among several +/// poisons the answer (`None`) — certifying a "safe version" from a partial +/// ordering could steer to a still-vulnerable release. +fn highest_fix(mut fixes: Vec<&str>) -> Option { + fixes.sort_unstable(); + fixes.dedup(); + match fixes.as_slice() { + [] => None, + [only] => Some((*only).to_string()), + many => { + let mut parsed = Vec::with_capacity(many.len()); + for raw in many { + match semver::Version::parse(&verify_deps::registry::normalize_for_semver(raw)) { + Ok(v) => parsed.push((v, *raw)), + Err(_) => return None, + } + } + parsed + .into_iter() + .max_by(|(a, _), (b, _)| a.cmp(b)) + .map(|(_, raw)| raw.to_string()) + } + } +} + +/// The one version certified to clear every match. Requires every match to +/// carry a `fixed_version`; any match without one — or an unparsable +/// candidate among several — means no version can be certified, so `None`. +fn safe_version(matches: &[crate::vuln_api::VulnMatch]) -> Option { + let fixes: Vec<&str> = matches + .iter() + .map(|m| m.fixed_version.as_deref()) + .collect::>()?; + highest_fix(fixes) +} + +/// Per-match advisory lines plus the safe-version steer. Built for agent +/// self-correction: each advisory carries `fixed in `, and the +/// steer names the exact spec to install instead. +fn print_vulnerable_matches(name: &str, matches: &[crate::vuln_api::VulnMatch]) { + for m in matches { + println!( + " {} ({}){}", + m.advisory_id, + m.severity_level, + fix_note(m) + ); + } + if let Some(safe) = safe_version(matches) { + println!(" → safe version: {name}@{safe}"); + } +} + +pub(super) fn print_text(report: &PrecheckReport) { + // Build the echoed command from non-empty parts: a gated install with + // zero remaining args has nothing to append. + let mut command = format!("{} {}", report.manager.binary_name(), report.subcommand); + if !report.original_args.is_empty() { + command.push(' '); + command.push_str(&report.original_args.join(" ")); + } + + println!( + "Pre-checking `{}` (threshold {})", + command, + verify_deps::format_duration(report.threshold) + ); + println!( + " {} ok, {} recent, {} vulnerable, {} unverifiable, {} skipped, {} errors", + report.ok_count(), + report.recent_count(), + report.vulnerable_count(), + report.unverifiable_count(), + report.skipped_count(), + report.error_count(), + ); + + for o in &report.outcomes { + match o { + TargetOutcome::Resolved { + target, + resolved, + age, + verdict, + } => match verdict { + VerdictStatus::Vulnerable(matches) => { + println!( + " ✗ {} → {}@{} known vulnerable:", + target.display, resolved.name, resolved.version, + ); + print_vulnerable_matches(&resolved.name, matches); + } + VerdictStatus::Unverifiable(error) => { + println!( + " ⚠ {} → {}@{} could not be verified: {}", + target.display, resolved.name, resolved.version, error, + ); + } + VerdictStatus::Clean | VerdictStatus::NotChecked => { + if report.is_recent(*age) { + println!( + " ⚠ {} → {}@{} published {} ago at {} (within threshold)", + target.display, + resolved.name, + resolved.version, + verify_deps::format_duration(*age), + resolved.published_at.format("%Y-%m-%d %H:%M:%S UTC"), + ); + } else { + println!( + " ✓ {} → {}@{} published {} ago", + target.display, + resolved.name, + resolved.version, + verify_deps::format_duration(*age), + ); + } + } + }, + TargetOutcome::Skipped { target, reason } => { + println!(" ? {}: {}", target.display, reason); + } + TargetOutcome::Error { target, error } => { + // Be explicit that an unresolvable target was NOT vetted: + // without this line a resolution failure followed by a + // proceeding install reads like a pass. + println!( + " ✗ {}: {} (not verified — this target is ungated)", + target.display, error + ); + } + } + } +} + +#[cfg(test)] +mod tests { + use super::super::test_support::*; + use super::*; + + #[test] + fn safe_version_single_fix() { + assert_eq!( + safe_version(&[vm("A-1", Some("2.0.0"))]), + Some("2.0.0".to_string()) + ); + } + + #[test] + fn safe_version_duplicate_fixes_collapse_without_parsing() { + // "1.0rc1" is unparsable, but a single distinct value needs no parse. + assert_eq!( + safe_version(&[vm("A-1", Some("1.0rc1")), vm("A-2", Some("1.0rc1"))]), + Some("1.0rc1".to_string()) + ); + } + + #[test] + fn safe_version_picks_highest_of_distinct_fixes() { + // Semver order, not lexical ("1.2.0" > "1.10.0" lexically). + assert_eq!( + safe_version(&[vm("A-1", Some("1.2.0")), vm("A-2", Some("1.10.0"))]), + Some("1.10.0".to_string()) + ); + } + + #[test] + fn safe_version_two_component_versions_normalize() { + assert_eq!( + safe_version(&[vm("A-1", Some("4.0")), vm("A-2", Some("3.2.5"))]), + Some("4.0".to_string()) + ); + } + + #[test] + fn safe_version_mixed_fix_and_none_is_none() { + assert_eq!( + safe_version(&[vm("A-1", Some("2.0.0")), vm("A-2", None)]), + None + ); + } + + #[test] + fn safe_version_unparsable_among_distinct_is_none() { + assert_eq!( + safe_version(&[vm("A-1", Some("2!1.0")), vm("A-2", Some("1.0.0"))]), + None + ); + } + + #[test] + fn safe_version_empty_matches_is_none() { + assert_eq!(safe_version(&[]), None); + } +} diff --git a/src/precheck/test_support.rs b/src/precheck/test_support.rs new file mode 100644 index 0000000..4b84300 --- /dev/null +++ b/src/precheck/test_support.rs @@ -0,0 +1,95 @@ +//! Shared builders for precheck unit tests (mod.rs, render.rs, verdict.rs). +//! Test-only: declared `#[cfg(test)]` from mod.rs. + +use std::time::Duration; + +use chrono::Utc; + +use super::{ + InstallTarget, PackageManager, PrecheckOptions, PrecheckReport, TargetKind, TargetOutcome, + VerdictConfig, VerdictStatus, +}; + +/// Baseline options: pypi registry at a dead address (a port that +/// refuses connections - these tests never dial it), no verdict config. +/// Override fields per test via struct update. +pub(crate) fn stub_opts() -> PrecheckOptions { + PrecheckOptions { + threshold: Duration::from_secs(2 * 86400), + no_fail: false, + force: false, + verdict: None, + npm_registry: None, + pypi_registry: Some("http://127.0.0.1:9".to_string()), + } +} + +/// `stub_opts()` plus a verdict config pointing at `base_url`. +pub(crate) fn verdict_opts(base_url: &str) -> PrecheckOptions { + PrecheckOptions { + verdict: Some(VerdictConfig { + base_url: base_url.to_string(), + }), + ..stub_opts() + } +} + +pub(crate) fn public_opts(no_fail: bool, force: bool) -> PrecheckOptions { + PrecheckOptions { + no_fail, + force, + ..verdict_opts("http://127.0.0.1:9") + } +} + +pub(crate) fn resolved_outcome(name: &str, version: &str, recent: bool) -> TargetOutcome { + // Recency derives from age vs `report_with`'s 2-day threshold: + // one hour => recent, a year => not. + let age = if recent { + Duration::from_secs(3600) + } else { + Duration::from_secs(365 * 86400) + }; + TargetOutcome::Resolved { + target: InstallTarget { + name: name.to_string(), + display: format!("{name}=={version}"), + kind: TargetKind::Unverifiable { + reason: "test".to_string(), + }, + }, + resolved: crate::verify_deps::registry::ResolvedPackage { + name: name.to_string(), + version: version.to_string(), + published_at: Utc::now() - chrono::Duration::from_std(age).unwrap(), + }, + age, + verdict: VerdictStatus::NotChecked, + } +} + +pub(crate) fn report_with(outcomes: Vec) -> PrecheckReport { + PrecheckReport { + manager: PackageManager::Pip, + subcommand: "install".to_string(), + original_args: vec![], + outcomes, + threshold: Duration::from_secs(2 * 86400), + } +} + +pub(crate) fn set_verdict(outcome: &mut TargetOutcome, v: VerdictStatus) { + if let TargetOutcome::Resolved { verdict, .. } = outcome { + *verdict = v; + } +} + +pub(crate) fn vm(advisory: &str, fixed: Option<&str>) -> crate::vuln_api::VulnMatch { + crate::vuln_api::VulnMatch { + advisory_id: advisory.to_string(), + severity_level: "high".to_string(), + tier: 1, + vulnerable_version_range: None, + fixed_version: fixed.map(str::to_string), + } +} diff --git a/src/precheck/verdict.rs b/src/precheck/verdict.rs new file mode 100644 index 0000000..1fb4baa --- /dev/null +++ b/src/precheck/verdict.rs @@ -0,0 +1,328 @@ +//! Verdict pass: bounded vuln-api worker pool, registry resolution, and the +//! single block predicate (`block_reason`). + +use std::time::Duration; + +use super::{ + InstallTarget, PackageManager, PrecheckOptions, PrecheckReport, TargetKind, TargetOutcome, + VerdictStatus, +}; + +/// Max parallel vuln-api / registry requests. +const VERDICT_CONCURRENCY: usize = 8; + +/// Vuln-api verdict pass over resolved targets, run through the bounded +/// worker pool. No-op without a `VerdictConfig` (recency-only callers). +/// Any client/call failure becomes `Unverifiable`, which warns but never +/// blocks: public lookups fail open. +pub(super) fn run_verdict_pass( + manager: PackageManager, + outcomes: &mut [TargetOutcome], + opts: &PrecheckOptions, +) { + let Some(cfg) = &opts.verdict else { return }; + + let jobs: Vec<(usize, String, String)> = outcomes + .iter() + .enumerate() + .filter_map(|(i, o)| match o { + TargetOutcome::Resolved { resolved, .. } => { + Some((i, resolved.name.clone(), resolved.version.clone())) + } + _ => None, + }) + .collect(); + if jobs.is_empty() { + return; + } + + let client = crate::vuln_api::http_client(); + let ecosystem = manager.ecosystem(); + let verdicts = pooled_map(&jobs, VERDICT_CONCURRENCY, |(_, name, version)| { + let client = match &client { + Ok(c) => c, + Err(e) => return VerdictStatus::Unverifiable(e.clone()), + }; + match crate::vuln_api::check_package_version( + client, + &cfg.base_url, + ecosystem, + name, + version, + ) { + Ok(resp) if resp.is_vulnerable => VerdictStatus::Vulnerable(resp.matches), + Ok(_) => VerdictStatus::Clean, + Err(e) => VerdictStatus::Unverifiable(e.to_string()), + } + }); + + for ((i, _, _), v) in jobs.into_iter().zip(verdicts) { + if let TargetOutcome::Resolved { verdict, .. } = &mut outcomes[i] { + *verdict = v; + } + } +} + +/// Order-preserving bounded worker pool: `results[i]` is `f(&items[i])`. +/// Each call is an independent blocking HTTP request on the gate's critical +/// path, so they must not run serially. Plain work-stealing over an index, +/// no new crates; single-item lists skip the thread machinery. +fn pooled_map( + items: &[T], + concurrency: usize, + f: impl Fn(&T) -> R + Sync, +) -> Vec { + use std::sync::atomic::{AtomicUsize, Ordering}; + use std::sync::Mutex; + + if items.len() <= 1 { + return items.iter().map(&f).collect(); + } + let next = AtomicUsize::new(0); + let results: Mutex>> = Mutex::new(items.iter().map(|_| None).collect()); + let workers = concurrency.clamp(1, items.len()); + std::thread::scope(|s| { + for _ in 0..workers { + s.spawn(|| loop { + let i = next.fetch_add(1, Ordering::Relaxed); + let Some(item) = items.get(i) else { break }; + let result = f(item); + results.lock().unwrap()[i] = Some(result); + }); + } + }); + results + .into_inner() + .unwrap() + .into_iter() + .map(|r| r.expect("pooled_map worker filled every slot")) + .collect() +} + +/// Why the gate refuses to run the install. The single owner of both the +/// block decision and the escape hatch the refusal advertises — +/// `render::print_refusal` only maps variants to text. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(super) enum BlockReason { + /// Vulnerable findings. `--force` is the escape. + Findings, + /// Only the recency threshold fired. `--no-fail` is the escape. + RecencyOnly, +} + +pub(super) fn block_reason(report: &PrecheckReport, opts: &PrecheckOptions) -> Option { + if opts.force { + return None; + } + if report.verdicts().any(|v| v.blocks()) { + return Some(BlockReason::Findings); + } + if !opts.no_fail && report.recent_count() > 0 { + return Some(BlockReason::RecencyOnly); + } + None +} + +/// Resolve every named target against its registry through the bounded +/// worker pool. Order is preserved: outcome `i` belongs to `targets[i]`. +pub(super) fn verify_all( + targets: &[InstallTarget], + opts: &PrecheckOptions, + now: &chrono::DateTime, + allow_prerelease: bool, +) -> Vec { + pooled_map(targets, VERDICT_CONCURRENCY, |t| { + verify_one(t, opts, now, allow_prerelease) + }) +} + +fn verify_one( + target: &InstallTarget, + opts: &PrecheckOptions, + now: &chrono::DateTime, + allow_prerelease: bool, +) -> TargetOutcome { + use crate::verify_deps::registry; + + let resolved = match &target.kind { + TargetKind::Unverifiable { reason } => { + return TargetOutcome::Skipped { + target: target.clone(), + reason: reason.clone(), + }; + } + TargetKind::Npm(spec) => { + registry::npm_resolve(&target.name, spec, opts.npm_registry.as_deref()) + } + TargetKind::Pypi(spec) => registry::pypi_resolve( + &target.name, + spec, + opts.pypi_registry.as_deref(), + allow_prerelease, + ), + }; + + match resolved { + Ok(resolved) => { + // Future publish dates clamp to zero — maximally recent. + let age = now + .signed_duration_since(resolved.published_at) + .to_std() + .unwrap_or_else(|_| Duration::from_secs(0)); + TargetOutcome::Resolved { + target: target.clone(), + resolved, + age, + verdict: VerdictStatus::NotChecked, + } + } + Err(e) => TargetOutcome::Error { + target: target.clone(), + error: e, + }, + } +} + +#[cfg(test)] +mod tests { + use super::super::test_support::*; + use super::super::{InstallTarget, PackageManager, TargetKind, TargetOutcome, VerdictStatus}; + use super::*; + + fn should_block_install(report: &PrecheckReport, opts: &PrecheckOptions) -> bool { + block_reason(report, opts).is_some() + } + + /// Predicate matrix: force ⇒ never block; vulnerable always blocks + /// (`--no-fail` must not waive it); unverifiable findings and resolution + /// errors never block (public mode fails open); recency blocks unless + /// `--no-fail` demotes it. + #[test] + fn block_predicate_matrix() { + let clean = { + let mut o = resolved_outcome("pkg", "1.0.0", false); + set_verdict(&mut o, VerdictStatus::Clean); + report_with(vec![o]) + }; + let recent = report_with(vec![resolved_outcome("pkg", "1.0.0", true)]); + let vulnerable = { + let mut o = resolved_outcome("pkg", "1.0.0", false); + set_verdict(&mut o, VerdictStatus::Vulnerable(vec![])); + report_with(vec![o]) + }; + let unverifiable = { + let mut o = resolved_outcome("pkg", "1.0.0", false); + set_verdict(&mut o, VerdictStatus::Unverifiable("503".to_string())); + report_with(vec![o]) + }; + let resolution_error = report_with(vec![TargetOutcome::Error { + target: InstallTarget { + name: "pkg".to_string(), + display: "pkg==1.0.0".to_string(), + kind: TargetKind::Unverifiable { + reason: "test".to_string(), + }, + }, + error: "registry unavailable".to_string(), + }]); + + assert!(!should_block_install(&clean, &public_opts(false, false))); + assert!(should_block_install(&recent, &public_opts(false, false))); + assert!(!should_block_install(&recent, &public_opts(true, false))); + assert!(should_block_install( + &vulnerable, + &public_opts(false, false) + )); + assert!( + should_block_install(&vulnerable, &public_opts(true, false)), + "--no-fail must not waive a vulnerable block" + ); + assert!( + !should_block_install(&unverifiable, &public_opts(false, false)), + "public mode must fail open on lookup errors" + ); + assert!( + !should_block_install(&resolution_error, &public_opts(false, false)), + "public mode must fail open when no verdict can be obtained" + ); + for report in [ + &clean, + &recent, + &vulnerable, + &unverifiable, + &resolution_error, + ] { + assert!( + !should_block_install(report, &public_opts(false, true)), + "--force must never block" + ); + } + } + + /// Verdict pass against an in-process stub: vulnerable body → Vulnerable + /// with matches; 503 override → Unverifiable; no VerdictConfig → outcomes + /// keep NotChecked. + #[test] + fn verdict_pass_maps_stub_responses() { + use std::collections::HashMap; + + let key = |name: &str| crate::vuln_api_stub::key("pypi", name, "1.0.0"); + let mut checks = HashMap::new(); + checks.insert( + key("evil"), + crate::vuln_api_stub::vulnerable_body("pypi", "evil", "1.0.0", "MAL-2024-0001", None), + ); + checks.insert(key("flaky"), "{}".to_string()); + let mut statuses = HashMap::new(); + statuses.insert(key("flaky"), 503u16); + let stub = crate::vuln_api_stub::spawn_with_statuses(checks, statuses); + + let opts = verdict_opts(&stub.base_url); + + let mut outcomes = vec![ + resolved_outcome("evil", "1.0.0", false), + resolved_outcome("flaky", "1.0.0", false), + resolved_outcome("goodpkg", "1.0.0", false), // unknown → stub default clean + ]; + run_verdict_pass(PackageManager::Pip, &mut outcomes, &opts); + + let verdicts: Vec<_> = outcomes + .iter() + .map(|o| match o { + TargetOutcome::Resolved { verdict, .. } => verdict.clone(), + _ => unreachable!(), + }) + .collect(); + assert!( + matches!(&verdicts[0], VerdictStatus::Vulnerable(m) if m[0].advisory_id == "MAL-2024-0001") + ); + assert!(matches!(&verdicts[1], VerdictStatus::Unverifiable(_))); + assert!(matches!(&verdicts[2], VerdictStatus::Clean)); + + // Without a VerdictConfig the pass is a no-op. + let mut untouched = vec![resolved_outcome("evil", "1.0.0", false)]; + let no_verdict = stub_opts(); + run_verdict_pass(PackageManager::Pip, &mut untouched, &no_verdict); + assert!(matches!( + &untouched[0], + TargetOutcome::Resolved { + verdict: VerdictStatus::NotChecked, + .. + } + )); + } + + /// `pooled_map` maps every item and preserves order at any concurrency + /// (1 = serial, 8 > item count = all workers spawn but some drain empty). + #[test] + fn pooled_map_preserves_order_at_any_concurrency() { + let items: Vec = (0..6).collect(); + for concurrency in [1usize, 8] { + assert_eq!( + pooled_map(&items, concurrency, |i| i * 2), + vec![0, 2, 4, 6, 8, 10], + "concurrency {concurrency}" + ); + } + } +} diff --git a/src/verify_deps/mod.rs b/src/verify_deps/mod.rs new file mode 100644 index 0000000..b813529 --- /dev/null +++ b/src/verify_deps/mod.rs @@ -0,0 +1,137 @@ +//! Slim slice of #89's verify_deps: registry resolution + threshold helpers. + +pub mod registry; + +use std::time::Duration; + +/// Parse a human-friendly duration like `2d`, `48h`, `30m`, `45s`, or +/// a bare integer (interpreted as days). Returns the parsed duration. +pub fn parse_threshold(input: &str) -> Result { + let s = input.trim(); + if s.is_empty() { + return Err("threshold cannot be empty".to_string()); + } + + let (num_str, unit) = match s.chars().last() { + Some(c) if c.is_ascii_alphabetic() => { + (&s[..s.len() - c.len_utf8()], c.to_ascii_lowercase()) + } + _ => (s, 'd'), + }; + + let value: f64 = num_str + .trim() + .parse() + .map_err(|_| format!("invalid threshold number: '{}'", num_str))?; + + if value < 0.0 || !value.is_finite() { + return Err(format!( + "threshold must be a non-negative finite number: '{}'", + input + )); + } + + let secs = match unit { + 's' => value, + 'm' => value * 60.0, + 'h' => value * 3600.0, + 'd' => value * 86400.0, + 'w' => value * 7.0 * 86400.0, + other => { + return Err(format!( + "unknown threshold unit '{}'. Use s, m, h, d, or w.", + other + )) + } + }; + + let d = Duration::try_from_secs_f64(secs).map_err(|_| "threshold too large".to_string())?; + // Establish the invariant every consumer relies on: the threshold + // must also fit in a `chrono::Duration` (see precheck's from_std). + chrono::Duration::from_std(d).map_err(|_| "threshold too large".to_string())?; + Ok(d) +} + +/// Format a Duration as a short human-readable string (e.g. `1d 4h`). +pub fn format_duration(d: Duration) -> String { + let total_secs = d.as_secs(); + if total_secs < 60 { + return format!("{}s", total_secs); + } + let mins = total_secs / 60; + if mins < 60 { + return format!("{}m", mins); + } + let hours = total_secs / 3600; + let rem_mins = (total_secs % 3600) / 60; + if hours < 24 { + if rem_mins == 0 { + return format!("{}h", hours); + } + return format!("{}h {}m", hours, rem_mins); + } + let days = total_secs / 86400; + let rem_hours = (total_secs % 86400) / 3600; + if rem_hours == 0 { + format!("{}d", days) + } else { + format!("{}d {}h", days, rem_hours) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_threshold_units() { + assert_eq!( + parse_threshold("2d").unwrap(), + Duration::from_secs(2 * 86400) + ); + assert_eq!( + parse_threshold("48h").unwrap(), + Duration::from_secs(48 * 3600) + ); + assert_eq!( + parse_threshold("30m").unwrap(), + Duration::from_secs(30 * 60) + ); + assert_eq!(parse_threshold("90s").unwrap(), Duration::from_secs(90)); + assert_eq!( + parse_threshold("1w").unwrap(), + Duration::from_secs(7 * 86400) + ); + assert_eq!( + parse_threshold("3").unwrap(), + Duration::from_secs(3 * 86400) + ); + assert_eq!(parse_threshold("0.5d").unwrap(), Duration::from_secs(43200)); + } + + #[test] + fn parse_threshold_rejects_garbage() { + assert!(parse_threshold("").is_err()); + assert!(parse_threshold("abc").is_err()); + assert!(parse_threshold("-1d").is_err()); + assert!(parse_threshold("1y").is_err()); + } + + #[test] + fn parse_threshold_rejects_absurdly_large_values() { + // Too large for chrono::Duration (precheck converts via from_std). + assert!(parse_threshold("999999999999d").is_err()); + // Too large even for std::time::Duration. + assert!(parse_threshold("1e308d").is_err()); + } + + #[test] + fn format_duration_short() { + assert_eq!(format_duration(Duration::from_secs(5)), "5s"); + assert_eq!(format_duration(Duration::from_secs(120)), "2m"); + assert_eq!(format_duration(Duration::from_secs(3600)), "1h"); + assert_eq!(format_duration(Duration::from_secs(3700)), "1h 1m"); + assert_eq!(format_duration(Duration::from_secs(86400)), "1d"); + assert_eq!(format_duration(Duration::from_secs(90000)), "1d 1h"); + } +} diff --git a/src/verify_deps/registry.rs b/src/verify_deps/registry.rs new file mode 100644 index 0000000..8168fc5 --- /dev/null +++ b/src/verify_deps/registry.rs @@ -0,0 +1,997 @@ +//! Registry lookups for npm and PyPI publish times. +//! +//! These talk to public registries (no auth) and are kept independent +//! of the rest of the CLI's HTTP client because: +//! * we must not send the user's Corgea auth header to a third-party, +//! * the timeouts and retry policy are different. +//! +//! Both resolvers turn a version spec into the concrete version that +//! would be installed, plus its publish time as a UTC timestamp. + +use chrono::{DateTime, Utc}; +use serde::Deserialize; +use std::sync::OnceLock; +use std::time::Duration; + +const DEFAULT_NPM_REGISTRY: &str = "https://registry.npmjs.org"; +const DEFAULT_PYPI_REGISTRY: &str = "https://pypi.org"; + +// Matches `vuln_api::REQUEST_TIMEOUT` so a gate run degrades uniformly: +// both legs of a verdict pass give up at the same horizon. +const REQUEST_TIMEOUT: Duration = Duration::from_secs(30); + +use crate::vuln_api::{encode_npm_name, user_agent}; + +fn http_client() -> &'static reqwest::blocking::Client { + static CLIENT: OnceLock = OnceLock::new(); + CLIENT.get_or_init(|| { + reqwest::blocking::Client::builder() + .timeout(REQUEST_TIMEOUT) + .user_agent(user_agent("deps")) + .build() + .expect("registry http client") + }) +} + +/// Shared fetch/parse boilerplate for registry metadata GETs: 404 → "not +/// found", other non-success → status error, then parse the JSON body. +/// `label` names the registry in error messages ("npm registry" / "PyPI"). +fn fetch_registry_json( + url: &str, + label: &str, + name: &str, + base: &str, +) -> Result { + let resp = http_client() + .get(url) + .header("Accept", "application/json") + .send() + .map_err(|e| format!("{} request failed: {}", label, e))?; + + let status = resp.status(); + if status == reqwest::StatusCode::NOT_FOUND { + return Err(format!( + "package '{}' not found on {} ({})", + name, label, base + )); + } + if !status.is_success() { + return Err(format!( + "{} returned status {} for '{}'", + label, status, name + )); + } + + let body = resp + .text() + .map_err(|e| format!("failed to read {} response: {}", label, e))?; + serde_json::from_str(&body) + .map_err(|e| format!("failed to parse {} response for '{}': {}", label, name, e)) +} + +#[derive(Debug, Deserialize)] +struct PypiUrl { + upload_time_iso_8601: Option, + upload_time: Option, + /// PEP 592. PyPI's JSON API emits a bool; some mirrors emit the + /// yank reason string instead. Either form means yanked. + #[serde(default)] + yanked: Option, +} + +impl PypiUrl { + fn is_yanked(&self) -> bool { + match &self.yanked { + Some(serde_json::Value::Bool(b)) => *b, + Some(serde_json::Value::String(_)) => true, + _ => false, + } + } +} + +/// Parse an ISO-8601 timestamp from npm or PyPI. PyPI sometimes emits +/// a naive timestamp like `2023-05-22T18:30:00` (no offset) which +/// chrono's RFC3339 parser rejects, so we accept both shapes. +fn parse_iso8601(raw: &str) -> Result, String> { + if let Ok(dt) = DateTime::parse_from_rfc3339(raw) { + return Ok(dt.with_timezone(&Utc)); + } + if let Ok(naive) = chrono::NaiveDateTime::parse_from_str(raw, "%Y-%m-%dT%H:%M:%S") { + return Ok(DateTime::::from_naive_utc_and_offset(naive, Utc)); + } + if let Ok(naive) = chrono::NaiveDateTime::parse_from_str(raw, "%Y-%m-%dT%H:%M:%S%.f") { + return Ok(DateTime::::from_naive_utc_and_offset(naive, Utc)); + } + Err(format!("unrecognised timestamp format: {}", raw)) +} + +/// What the user typed after `pkg@` in an install command. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum NpmSpec { + /// `axios`, `axios@`, or no spec — resolve to the `latest` dist-tag. + Latest, + /// `axios@latest`, `axios@next`, etc. + Tag(String), + /// `axios@1.2.3` — already resolved. + Exact(String), + /// `axios@^1.0.0`, `axios@~1.2.0`, `axios@>=1.0.0 <2.0.0`, etc. + Range(String), +} + +#[derive(Debug, Clone)] +pub struct ResolvedPackage { + pub name: String, + pub version: String, + pub published_at: DateTime, +} + +#[derive(Debug, Deserialize)] +struct NpmFullMetadata { + #[serde(default, rename = "dist-tags")] + dist_tags: std::collections::BTreeMap, + /// Only the keys (published version strings) are used; `IgnoredAny` + /// avoids allocating multi-MB JSON trees for big packuments. + #[serde(default)] + versions: std::collections::BTreeMap, + #[serde(default)] + time: std::collections::BTreeMap, +} + +/// Resolve an `NpmSpec` against the npm registry and return the +/// concrete version + publish time. Used by install wrappers when the +/// install command says e.g. `axios@^1.0.0` and we need to know what +/// would actually be installed before the install runs. +pub fn npm_resolve( + name: &str, + spec: &NpmSpec, + registry: Option<&str>, +) -> Result { + if name.is_empty() { + return Err("empty package name".to_string()); + } + let base = registry + .unwrap_or(DEFAULT_NPM_REGISTRY) + .trim_end_matches('/'); + let url = format!("{}/{}", base, encode_npm_name(name)); + let meta: NpmFullMetadata = fetch_registry_json(&url, "npm registry", name, base)?; + + let resolved_version = match spec { + NpmSpec::Latest => meta.dist_tags.get("latest").cloned().ok_or_else(|| { + format!( + "package '{}' has no 'latest' dist-tag on the npm registry", + name + ) + })?, + NpmSpec::Tag(tag) => meta.dist_tags.get(tag).cloned().ok_or_else(|| { + format!( + "package '{}' has no dist-tag named '{}' (available: {})", + name, + tag, + meta.dist_tags + .keys() + .cloned() + .collect::>() + .join(", "), + ) + })?, + NpmSpec::Exact(v) => { + if !meta.versions.contains_key(v) { + return Err(format!( + "version '{}' for package '{}' was not found on the npm registry", + v, name + )); + } + v.clone() + } + NpmSpec::Range(range) => { + npm_pick_highest_matching(&meta.versions, range).ok_or_else(|| { + format!( + "no published version of '{}' satisfies range '{}'", + name, range + ) + })? + } + }; + + let raw_time = meta.time.get(&resolved_version).ok_or_else(|| { + format!( + "publish time missing for {}@{} on the npm registry", + name, resolved_version + ) + })?; + + let published_at = parse_iso8601(raw_time).map_err(|e| { + format!( + "could not parse publish time '{}' for {}@{}: {}", + raw_time, name, resolved_version, e + ) + })?; + + Ok(ResolvedPackage { + name: name.to_string(), + version: resolved_version, + published_at, + }) +} + +/// Translate an npm-style version range to `semver::VersionReq` +/// alternatives (one per `||` branch — any-match). Handles npm grammar +/// the Rust crate doesn't: whitespace AND separators, hyphen ranges +/// (`1.0.0 - 2.0.0`), `||` unions, and bare partials (`1.0`, which npm +/// reads as `1.0.x` but Cargo would read as `^1.0`). +fn parse_npm_range(range: &str) -> Option> { + range + .split("||") + .map(|alt| parse_npm_range_alternative(alt.trim())) + .collect() +} + +fn parse_npm_range_alternative(alt: &str) -> Option { + if let Some((lo, hi)) = alt.split_once(" - ") { + return hyphen_range(lo.trim(), hi.trim()); + } + if let Some(tilde) = bare_partial_to_tilde(alt) { + return semver::VersionReq::parse(&tilde).ok(); + } + if let Ok(req) = semver::VersionReq::parse(alt) { + return Some(req); + } + let normalised = alt.split_whitespace().collect::>().join(","); + semver::VersionReq::parse(&normalised).ok() +} + +/// node-semver hyphen range `A - B`. A partial low bound fills with zeros +/// (`1.2` → `>=1.2.0`); a partial high bound excludes the next component +/// (`- 2.3` → `<2.4.0`, `- 2` → `<3.0.0`), matching npm. +fn hyphen_range(lo: &str, hi: &str) -> Option { + let lo_v = pad_partial(lo)?; + let hi_segments = hi.split('.').count(); + let hi_v = pad_partial(hi)?; + let expr = match hi_segments { + 1 => format!(">={lo_v}, <{}", semver::Version::new(hi_v.major + 1, 0, 0)), + 2 => format!( + ">={lo_v}, <{}", + semver::Version::new(hi_v.major, hi_v.minor + 1, 0) + ), + _ => format!(">={lo_v}, <={hi_v}"), + }; + semver::VersionReq::parse(&expr).ok() +} + +/// `1.2` → `1.2.0` (accepts an optional leading `v`, like npm). +fn pad_partial(v: &str) -> Option { + let v = v.trim(); + let v = v.strip_prefix('v').unwrap_or(v); + let mut segments: Vec<&str> = v.split('.').collect(); + while segments.len() < 3 { + segments.push("0"); + } + semver::Version::parse(&segments.join(".")).ok() +} + +/// npm desugars a bare two-component version (`1.0`) to the x-range +/// `1.0.x`; Cargo's `VersionReq` would read it as caret (`^1.0`, matching +/// 1.9). Translate to tilde, which has npm's intended bounds. +fn bare_partial_to_tilde(alt: &str) -> Option { + let segments: Vec<&str> = alt.split('.').collect(); + (segments.len() == 2 + && segments + .iter() + .all(|s| !s.is_empty() && s.chars().all(|c| c.is_ascii_digit()))) + .then(|| format!("~{alt}")) +} + +/// Pick the highest published version that satisfies `range`. Pre-releases +/// are excluded unless the range itself references one (matches npm). +fn npm_pick_highest_matching( + versions: &std::collections::BTreeMap, + range: &str, +) -> Option { + let reqs = parse_npm_range(range)?; + let range_has_prerelease = range.contains('-') && !range.contains(" - "); + versions + .keys() + .filter_map(|raw| semver::Version::parse(raw).ok().map(|v| (v, raw))) + .filter(|(v, _)| { + (v.pre.is_empty() || range_has_prerelease) && reqs.iter().any(|req| req.matches(v)) + }) + .max_by(|(a, _), (b, _)| a.cmp(b)) + .map(|(_, raw)| raw.clone()) +} + +/// PyPI version specifier used by install wrappers. We parse a +/// limited subset of PEP 440 specifiers — enough for the common +/// install-command cases (`pkg`, `pkg==X`, `pkg>=X`, `pkg=2.0`, `<3,>=2`, `~=1.4`). + Specifier(String), +} + +#[derive(Debug, Default, Deserialize)] +struct PypiInfo { + #[serde(default)] + name: Option, +} + +#[derive(Debug, Deserialize)] +struct PypiInfoResponse { + /// `info.name` is PyPI's canonical spelling — the registry answers any + /// PEP 503-equivalent request spelling but echoes the stored name. + #[serde(default)] + info: PypiInfo, + releases: std::collections::BTreeMap>, +} + +/// The name a resolved pypi package should carry forward: the registry's +/// canonical spelling when the response provides one, else the requested +/// spelling. Vuln advisories are keyed by lowercase(canonical) — checking +/// a user-typed variant (`Flask_Cors`) would miss the `flask-cors` row. +/// The canonical name is accepted only when PEP 503-equivalent to the +/// request, so a hostile mirror can't redirect the verdict to a different +/// package's (clean) identity. +fn canonical_pypi_name(requested: &str, info_name: Option<&str>) -> String { + use crate::deps::ecosystems::pypi::normalize_pypi_name; + match info_name { + Some(n) if !n.is_empty() && normalize_pypi_name(n) == normalize_pypi_name(requested) => { + n.to_string() + } + _ => requested.to_string(), + } +} + +/// Resolve a `PypiSpec` against PyPI and return the concrete version plus +/// publish time. The latest non-prerelease, non-yanked release is preferred. +/// +/// The `allow_prerelease` flag mirrors pip's `--pre`: when set, prerelease +/// versions become eligible for `Latest`/specifier resolution so the gate +/// verdicts the version pip would actually install, not the latest stable. +pub fn pypi_resolve( + name: &str, + spec: &PypiSpec, + registry: Option<&str>, + allow_prerelease: bool, +) -> Result { + if name.is_empty() { + return Err("empty package name".to_string()); + } + let base = registry + .unwrap_or(DEFAULT_PYPI_REGISTRY) + .trim_end_matches('/'); + let url = format!("{}/pypi/{}/json", base, urlencoding::encode(name)); + let meta: PypiInfoResponse = fetch_registry_json(&url, "PyPI", name, base)?; + + let candidates = collect_pypi_candidates(&meta); + // A yanked release resolves only via an exact pin (PEP 592), matching + // pip — otherwise we'd gate a version pip would never choose. + let installable: Vec = + candidates.iter().filter(|c| !c.yanked).cloned().collect(); + let chosen = match spec { + PypiSpec::Latest => { + pick_latest_stable(&installable, allow_prerelease).map(|c| c.version.clone()) + } + // PEP 440 equality, not string equality: `==2.31` must match the + // release key `2.31.0` (and resolve to the key, so the publish-time + // lookup below finds it). + PypiSpec::Exact(v) => { + let want = PypiVersion::parse(v); + candidates + .iter() + .find(|c| { + &c.version == v + || matches!( + (&want, PypiVersion::parse(&c.version)), + (Some(w), Some(cv)) if *w == cv + ) + }) + .map(|c| c.version.clone()) + } + PypiSpec::Specifier(spec_str) => { + pypi_resolve_specifier(&installable, spec_str, allow_prerelease) + .map_err(|e| format!("{} for '{}'", e, name))? + } + }; + + let chosen = chosen.ok_or_else(|| match spec { + PypiSpec::Exact(v) => { + format!( + "version '{}' for package '{}' was not found on PyPI", + v, name + ) + } + _ => format!("no installable version found for '{}' on PyPI", name), + })?; + + let published_at = candidates + .iter() + .find(|c| c.version == chosen) + .map(|c| c.uploaded) + .ok_or_else(|| { + format!( + "no upload timestamp for '{}' version '{}' on PyPI", + name, chosen + ) + })?; + + Ok(ResolvedPackage { + // Carry the registry's canonical spelling forward so the vuln-api + // check hits the advisory row keyed by it (see canonical_pypi_name). + name: canonical_pypi_name(name, meta.info.name.as_deref()), + version: chosen, + published_at, + }) +} + +/// One published release a `PypiSpec` can resolve to. +#[derive(Debug, Clone)] +struct PypiCandidate { + version: String, + uploaded: DateTime, + /// Every artifact of this release is yanked (PEP 592) — pip skips + /// it for anything but an exact pin, so non-exact resolution must too. + yanked: bool, +} + +/// Returns a candidate for every release that has at least one uploaded, +/// timestamped artifact. Empty or timestampless release entries (which +/// PyPI sometimes keeps around for deleted / private versions) are +/// filtered out so we never pick them. +fn collect_pypi_candidates(meta: &PypiInfoResponse) -> Vec { + let mut out = Vec::new(); + for (ver, files) in &meta.releases { + if files.is_empty() { + continue; + } + let earliest = files + .iter() + .filter_map(|f| { + f.upload_time_iso_8601 + .as_deref() + .or(f.upload_time.as_deref()) + }) + .filter_map(|raw| parse_iso8601(raw).ok()) + .min(); + if let Some(dt) = earliest { + out.push(PypiCandidate { + version: ver.clone(), + uploaded: dt, + yanked: files.iter().all(PypiUrl::is_yanked), + }); + } + } + out +} + +/// PEP 440-ish ordering key: the semver-parsed release plus its `.postN` +/// number. Post-releases order after their base (`1.0.post1` > `1.0`) and +/// pip installs them by default — dropping them from candidates would +/// verdict a different version than the install. Prereleases (`1.0rc1`, +/// `1.0a2`, `1.0.dev3`) parse with a rank-encoded semver prerelease so they +/// order dev < a < b < rc and all below the plain release; they are filtered +/// out at resolution time unless `--pre` is set. Epochs (`1!2.0`) and local +/// versions (`1.0+abc`) remain unsupported and are skipped. +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +struct PypiVersion { + base: semver::Version, + /// `.postN` number; `None` for a plain release. Ordering: derive(Ord) + /// compares `base` first, then `post` (`None` < `Some(_)`), which is + /// exactly PEP 440's post-release ordering. + post: Option, +} + +impl PypiVersion { + fn parse(raw: &str) -> Option { + let (rest, post) = match raw.find(".post") { + Some(idx) => { + let n: u64 = raw[idx + ".post".len()..].parse().ok()?; + (&raw[..idx], Some(n)) + } + None => (raw, None), + }; + let (release, pre) = split_pep440_prerelease(rest)?; + let semver_src = match &pre { + Some(p) => format!("{}-{}", normalize_for_semver(release), p), + None => normalize_for_semver(release), + }; + let base = semver::Version::parse(&semver_src).ok()?; + Some(PypiVersion { base, post }) + } + + fn is_prerelease(&self) -> bool { + !self.base.pre.is_empty() + } +} + +/// Split a PEP 440 release string into its numeric release and an optional +/// semver-encoded prerelease identifier. The rank prefix (`0dev` < `1a` < +/// `2b` < `3rc`) makes the derived `Ord` on the semver prerelease match PEP +/// 440 ordering, and any prerelease sorts below the plain release. +/// +/// Returns `(release, None)` for a plain release, `(release, Some(pre))` for +/// a recognized prerelease, and `None` for an alpha-bearing form we don't +/// recognize (epochs, local versions, combined pre+dev) so the candidate is +/// skipped rather than mis-ordered — matching prior conservative behavior. +fn split_pep440_prerelease(v: &str) -> Option<(&str, Option)> { + let Some(idx) = v.find(|c: char| c.is_ascii_alphabetic()) else { + return Some((v, None)); + }; + let release = v[..idx].trim_end_matches(['.', '-', '_']); + let suffix = &v[idx..]; + let (rank, label, rest) = if let Some(r) = suffix.strip_prefix("rc") { + (3, "rc", r) + } else if let Some(r) = suffix.strip_prefix("dev") { + (0, "dev", r) + } else if let Some(r) = suffix.strip_prefix('a') { + (1, "a", r) + } else if let Some(r) = suffix.strip_prefix('b') { + (2, "b", r) + } else if let Some(r) = suffix.strip_prefix('c') { + // PEP 440 spells release-candidate `c` and `rc` interchangeably. + (3, "rc", r) + } else { + return None; + }; + let num_str = rest.trim_start_matches(['.', '-', '_']); + // Reject anything we didn't fully consume (combined `a1.dev2`, local + // `+abc`, etc.) — dropping it is safer than guessing its order. + if !num_str.chars().all(|c| c.is_ascii_digit()) { + return None; + } + let num: u64 = num_str.parse().unwrap_or(0); + Some((release, Some(format!("{rank}{label}.{num}")))) +} + +/// Pick the latest version using PEP 440-ish parsing as a best-effort +/// ordering. Prereleases are excluded unless `allow_prerelease` (pip's +/// `--pre`) is set. No upload-time fallback when nothing parses: guessing +/// by upload time could pick a prerelease without `--pre`, and a +/// resolution error (→ visible, ungated) beats a silent wrong pick — +/// consistent with `pypi_resolve_specifier`, which errors rather than +/// guesses. +fn pick_latest_stable( + candidates: &[PypiCandidate], + allow_prerelease: bool, +) -> Option<&PypiCandidate> { + candidates + .iter() + .filter_map(|c| { + PypiVersion::parse(&c.version) + .filter(|v| allow_prerelease || !v.is_prerelease()) + .map(|v| (v, c)) + }) + .max_by(|(a, _), (b, _)| a.cmp(b)) + .map(|(_, c)| c) +} + +/// Best-effort PEP 440 → semver: PyPI versions are usually `X.Y.Z` or +/// `X.Y` or `X.Y.Z.postN` — the dotted-number form usually parses +/// straight as semver if we pad to 3 components. Anything more exotic +/// (`1.0a1`, `2!1.0`, etc.) is left alone and rejected by semver. +/// +/// Also used outside the registry (`precheck::safe_version`) as a lenient +/// cross-ecosystem pad for ordering fixed versions; keep it ecosystem-agnostic. +pub(crate) fn normalize_for_semver(v: &str) -> String { + if v.contains('!') + || v.contains('a') + || v.contains('b') + || v.contains("rc") + || v.contains(".dev") + { + return v.to_string(); + } + let parts: Vec<&str> = v.split('.').collect(); + match parts.len() { + 1 => format!("{}.0.0", parts[0]), + 2 => format!("{}.{}.0", parts[0], parts[1]), + _ => v.to_string(), + } +} + +/// Apply a PEP 440-style specifier expression to the candidate list +/// and return the highest match (`Ok(None)` when nothing satisfies it). +/// Supported operators: `==` (incl. wildcards `==1.4.*`), `>=`, `>`, +/// `<=`, `<`, `~=`, `!=`. An expression we can't parse (unknown operator, +/// exotic version) is `Err` — resolving anything else would gate a +/// different version than the package manager installs. +fn pypi_resolve_specifier( + candidates: &[PypiCandidate], + spec: &str, + allow_prerelease: bool, +) -> Result, String> { + let parts: Vec<&str> = spec.split(',').map(|s| s.trim()).collect(); + let mut requirements: Vec<(&'static str, semver::Version)> = Vec::new(); + + // Longest prefixes first so `>=` never matches as `>`. + const OPERATORS: &[(&str, &str)] = &[ + ("===", "=="), + ("==", "=="), + (">=", ">="), + ("<=", "<="), + ("!=", "!="), + ("~=", "~="), + (">", ">"), + ("<", "<"), + ]; + for p in &parts { + let unsupported = || format!("unsupported version specifier '{}'", spec); + let (op, val) = OPERATORS + .iter() + .find_map(|(prefix, op)| p.strip_prefix(prefix).map(|v| (*op, v.trim()))) + .ok_or_else(unsupported)?; + // Wildcard pin `==X.Y.*` — desugar to the half-open range it means. + if op == "==" { + if let Some(prefix) = val.strip_suffix(".*") { + let (lo, hi) = wildcard_bounds(prefix).ok_or_else(unsupported)?; + requirements.push((">=", lo)); + requirements.push(("<", hi)); + continue; + } + } + if val.contains('*') { + return Err(unsupported()); + } + let v = semver::Version::parse(&normalize_for_semver(val)).map_err(|_| unsupported())?; + // PEP 440 `~=X.Y` bumps the LAST release component of the written + // spec: `~=1.4` means `<2.0`, `~=1.4.5` means `<1.5.0`. Desugar + // here — the padded `v` has lost the component count. + if op == "~=" { + let hi = match val.split('.').count() { + 2 => semver::Version::new(v.major + 1, 0, 0), + 3 => semver::Version::new(v.major, v.minor + 1, 0), + _ => return Err(unsupported()), + }; + requirements.push((">=", v)); + requirements.push(("<", hi)); + continue; + } + requirements.push((op, v)); + } + + // PEP 440 comparison against a candidate that may be a post-release: + // `>=V` includes V's posts, `>V`/`<=V` exclude them, `==V` matches + // only the plain release. + let satisfies = |c: &PypiVersion| { + requirements.iter().all(|(op, want)| match *op { + "==" => c.base == *want && c.post.is_none(), + ">=" => c.base >= *want, + "<=" => c.base < *want || (c.base == *want && c.post.is_none()), + "!=" => !(c.base == *want && c.post.is_none()), + ">" => c.base > *want, + "<" => c.base < *want, + _ => false, + }) + }; + Ok(candidates + .iter() + .filter_map(|c| PypiVersion::parse(&c.version).map(|v| (v, &c.version))) + .filter(|(v, _)| (allow_prerelease || !v.is_prerelease()) && satisfies(v)) + .max_by(|(a, _), (b, _)| a.cmp(b)) + .map(|(_, raw)| raw.clone())) +} + +/// `==X.*` / `==X.Y.*` / `==X.Y.Z.*` bounds: everything the written prefix +/// covers, half-open at the bumped last component. +fn wildcard_bounds(prefix: &str) -> Option<(semver::Version, semver::Version)> { + let lo = semver::Version::parse(&normalize_for_semver(prefix)).ok()?; + let hi = match prefix.split('.').count() { + 1 => semver::Version::new(lo.major + 1, 0, 0), + 2 => semver::Version::new(lo.major, lo.minor + 1, 0), + 3 => semver::Version::new(lo.major, lo.minor, lo.patch + 1), + _ => return None, + }; + Some((lo, hi)) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn candidates(versions: &[&str]) -> Vec { + versions + .iter() + .map(|v| PypiCandidate { + version: v.to_string(), + uploaded: Utc::now(), + yanked: false, + }) + .collect() + } + + #[test] + fn specifier_resolves_highest_match() { + let c = candidates(&["1.0.0", "2.5.0", "3.0.0"]); + assert_eq!( + pypi_resolve_specifier(&c, ">=1.0,<3", false).expect("parse"), + Some("2.5.0".to_string()) + ); + } + + #[test] + fn prerelease_eligible_only_with_allow_prerelease() { + // `2.0.0rc1` is a prerelease. pip's default skips it; `--pre` selects + // it as the newest, so the gate must verdict it instead of 1.0.0. + let c = candidates(&["1.0.0", "2.0.0rc1"]); + assert_eq!( + pick_latest_stable(&c, false).map(|c| c.version.as_str()), + Some("1.0.0"), + "default resolution excludes the prerelease" + ); + assert_eq!( + pick_latest_stable(&c, true).map(|c| c.version.as_str()), + Some("2.0.0rc1"), + "--pre makes the prerelease eligible" + ); + // Same for specifier resolution. + assert_eq!( + pypi_resolve_specifier(&c, ">=1.0", false).expect("parse"), + Some("1.0.0".to_string()) + ); + assert_eq!( + pypi_resolve_specifier(&c, ">=1.0", true).expect("parse"), + Some("2.0.0rc1".to_string()) + ); + } + + #[test] + fn specifier_with_no_match_is_ok_none() { + let c = candidates(&["1.0.0"]); + assert_eq!( + pypi_resolve_specifier(&c, ">=9.0", false).expect("parse"), + None + ); + } + + #[test] + fn canonical_pypi_name_accepts_equivalent_rejects_other() { + // Registry canonical spelling wins when PEP 503-equivalent… + assert_eq!( + canonical_pypi_name("Flask_Cors", Some("Flask-Cors")), + "Flask-Cors" + ); + assert_eq!( + canonical_pypi_name("zope-interface", Some("zope.interface")), + "zope.interface" + ); + // …but a non-equivalent name (hostile mirror) keeps the request. + assert_eq!( + canonical_pypi_name("flask-cors", Some("requests")), + "flask-cors" + ); + assert_eq!(canonical_pypi_name("flask-cors", None), "flask-cors"); + assert_eq!(canonical_pypi_name("flask-cors", Some("")), "flask-cors"); + } + + #[test] + fn latest_with_no_parseable_version_is_none_not_a_guess() { + // When nothing parses as PEP 440, guessing by upload time could + // pick a prerelease without --pre. None → a visible resolution + // error, consistent with unparseable specifiers. + let c = candidates(&["2!1.0", "weird-version"]); + assert!(pick_latest_stable(&c, false).is_none()); + assert!(pick_latest_stable(&c, true).is_none()); + } + + #[test] + fn unparseable_specifier_errors_instead_of_falling_back() { + // Resolving "latest stable" for an expression we can't represent + // would gate the wrong version. + let c = candidates(&["1.0.0", "2.0.0"]); + for spec in ["@weird", ">= not-a-version", "!=1.*"] { + let err = pypi_resolve_specifier(&c, spec, false).expect_err(spec); + assert!( + err.contains("unsupported version specifier"), + "{spec}: {err}" + ); + } + } + + #[test] + fn wildcard_pin_resolves_as_a_range() { + // pip: `==4.2.*` matches the 4.2 series, highest first. + let c = candidates(&["4.1.0", "4.2.0", "4.2.9", "4.3.0"]); + assert_eq!( + pypi_resolve_specifier(&c, "==4.2.*", false).expect("parse"), + Some("4.2.9".to_string()) + ); + let c = candidates(&["0.9.0", "1.0.0", "1.9.0", "2.0.0"]); + assert_eq!( + pypi_resolve_specifier(&c, "==1.*", false).expect("parse"), + Some("1.9.0".to_string()) + ); + } + + #[test] + fn compatible_release_bumps_the_written_component() { + // PEP 440: `~=4.0` means `>=4.0, <5.0` (NOT `<4.1`) — pip installs + // 4.2.x, so the gate must verdict the same series. + let c = candidates(&["4.0.0", "4.0.5", "4.2.9", "5.0.0"]); + assert_eq!( + pypi_resolve_specifier(&c, "~=4.0", false).expect("parse"), + Some("4.2.9".to_string()) + ); + // `~=1.4.5` means `>=1.4.5, <1.5.0`. + let c = candidates(&["1.4.4", "1.4.6", "1.5.0"]); + assert_eq!( + pypi_resolve_specifier(&c, "~=1.4.5", false).expect("parse"), + Some("1.4.6".to_string()) + ); + } + + #[test] + fn post_releases_resolve_and_outrank_their_base() { + // pip installs post-releases by default; dropping them would + // verdict a different version than the install. + let c = candidates(&["1.0", "1.0.post1", "0.9.0"]); + assert_eq!( + pypi_resolve_specifier(&c, ">=1.0", false).expect("parse"), + Some("1.0.post1".to_string()) + ); + assert_eq!( + pick_latest_stable(&c, false).map(|c| c.version.as_str()), + Some("1.0.post1") + ); + // …but a plain `==1.0` pin means the base release, not its posts. + assert_eq!( + pypi_resolve_specifier(&c, "==1.0", false).expect("parse"), + Some("1.0".to_string()) + ); + // PEP 440: `>V` excludes V's own post-releases. + assert_eq!( + pypi_resolve_specifier(&c, ">1.0", false).expect("parse"), + None + ); + } + + #[test] + fn yanked_only_releases_are_flagged() { + // 2.0.0 has every file yanked (one bool, one mirror-style reason + // string); 1.0.0 has a non-yanked file. Timestamps alone must not + // decide yanked status — yanked files keep theirs. + let meta: PypiInfoResponse = serde_json::from_str( + r#"{"releases":{ + "1.0.0":[{"upload_time_iso_8601":"2020-01-01T00:00:00Z","yanked":false}], + "2.0.0":[{"upload_time_iso_8601":"2021-01-01T00:00:00Z","yanked":true}, + {"upload_time_iso_8601":"2021-01-01T00:00:00Z","yanked":"broken build"}] + }}"#, + ) + .expect("parse pypi json"); + let candidates = collect_pypi_candidates(&meta); + let yanked_of = |v: &str| candidates.iter().find(|c| c.version == v).unwrap().yanked; + assert!(!yanked_of("1.0.0")); + assert!(yanked_of("2.0.0")); + + // Latest/specifier resolution must skip the yanked release… + let installable: Vec = + candidates.iter().filter(|c| !c.yanked).cloned().collect(); + assert_eq!( + pick_latest_stable(&installable, false).map(|c| c.version.as_str()), + Some("1.0.0") + ); + assert_eq!( + pypi_resolve_specifier(&installable, ">=1.0", false).expect("parse"), + Some("1.0.0".to_string()) + ); + // …while an exact pin still finds it (pip installs it with a warning). + assert!(candidates.iter().any(|c| c.version == "2.0.0")); + } + + #[test] + fn release_with_partially_yanked_files_stays_installable() { + let meta: PypiInfoResponse = serde_json::from_str( + r#"{"releases":{"1.5.0":[ + {"upload_time_iso_8601":"2020-06-01T00:00:00Z","yanked":true}, + {"upload_time_iso_8601":"2020-06-01T00:00:00Z","yanked":false} + ]}}"#, + ) + .expect("parse pypi json"); + let candidates = collect_pypi_candidates(&meta); + assert!(!candidates[0].yanked); + } + + #[test] + fn parses_iso8601_variants() { + assert!(parse_iso8601("2024-01-02T03:04:05Z").is_ok()); + assert!(parse_iso8601("2024-01-02T03:04:05.123Z").is_ok()); + assert!(parse_iso8601("2024-01-02T03:04:05+00:00").is_ok()); + assert!(parse_iso8601("2024-01-02T03:04:05").is_ok()); + assert!(parse_iso8601("not a date").is_err()); + } + + /// Network-touching integration tests. Skipped by default (#[ignore]) + /// so unit-test runs stay hermetic. Run with: + /// cargo test -- --ignored verify_deps::registry::tests::live + #[test] + #[ignore] + fn live_npm_resolve_latest() { + let r = npm_resolve("left-pad", &NpmSpec::Latest, None).expect("npm resolve latest"); + assert_eq!(r.name, "left-pad"); + assert_eq!(r.version, "1.3.0"); + assert_eq!(r.published_at.format("%Y-%m-%d").to_string(), "2018-04-09"); + } + + #[test] + #[ignore] + fn live_npm_resolve_exact() { + let r = npm_resolve("left-pad", &NpmSpec::Exact("1.3.0".to_string()), None) + .expect("npm resolve exact"); + assert_eq!(r.version, "1.3.0"); + } + + #[test] + #[ignore] + fn live_npm_resolve_range() { + let r = npm_resolve("left-pad", &NpmSpec::Range("^1.0.0".to_string()), None) + .expect("npm resolve range"); + assert_eq!(r.version, "1.3.0"); + } + + #[test] + #[ignore] + fn live_npm_resolve_npm_style_range() { + // npm uses spaces, the Rust crate uses commas — we should + // accept both. + let r = npm_resolve( + "left-pad", + &NpmSpec::Range(">=1.0.0 <2.0.0".to_string()), + None, + ) + .expect("npm resolve space-range"); + assert_eq!(r.version, "1.3.0"); + } + + #[test] + #[ignore] + fn live_npm_resolve_unknown_tag() { + let err = npm_resolve( + "left-pad", + &NpmSpec::Tag("does-not-exist".to_string()), + None, + ) + .err() + .unwrap(); + assert!(err.contains("dist-tag"), "got: {}", err); + } + + #[test] + #[ignore] + fn live_pypi_resolve_latest() { + let r = pypi_resolve("flask", &PypiSpec::Latest, None, false).expect("pypi resolve latest"); + assert_eq!(r.name, "flask"); + assert!(!r.version.is_empty()); + } + + #[test] + #[ignore] + fn live_pypi_resolve_exact() { + let r = pypi_resolve( + "requests", + &PypiSpec::Exact("2.31.0".to_string()), + None, + false, + ) + .expect("pypi resolve exact"); + assert_eq!(r.version, "2.31.0"); + assert_eq!(r.published_at.format("%Y-%m-%d").to_string(), "2023-05-22"); + } + + #[test] + #[ignore] + fn live_pypi_resolve_specifier() { + let r = pypi_resolve( + "requests", + &PypiSpec::Specifier(">=2.30,<2.32".to_string()), + None, + false, + ) + .expect("pypi resolve specifier"); + // `requests==2.31.0` is the only release in [2.30, 2.32). + assert_eq!(r.version, "2.31.0"); + } +} diff --git a/tests/cli_exec_fallback.rs b/tests/cli_exec_fallback.rs new file mode 100644 index 0000000..87dcdbd --- /dev/null +++ b/tests/cli_exec_fallback.rs @@ -0,0 +1,111 @@ +//! Hermetic e2e tests for package-manager binary resolution: the pip→pip3 +//! fallback and the missing-binary error (exit 127). +//! +//! Same harness shape as `cli_install.rs`: the real `corgea` binary, a local +//! TcpListener stub standing in for PyPI, and a controlled `PATH` dir that +//! either holds a fake `pip3` (recording its argv to a marker file) or +//! nothing at all. Unix-only — the fake manager is a shell script. + +#![cfg(unix)] + +mod common; + +use common::GateHarness; + +#[test] +fn pip_install_falls_back_to_pip3_when_pip_missing() { + let mut h = GateHarness::new() + .fake_recorder("pip3", 0) + .oldpkg_registry() + .build(); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "stdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert_eq!( + h.recorded_argv().as_deref(), + Some("install oldpkg==1.0.0"), + "the install must run via pip3 with forwarded args" + ); +} + +#[test] +fn pip_passthrough_falls_back_to_pip3() { + let mut h = GateHarness::new() + .fake_recorder("pip3", 0) + .oldpkg_registry() + .build(); + let out = h.cmd.args(["pip", "list"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(0)); + assert_eq!(h.recorded_argv().as_deref(), Some("list")); +} + +#[test] +fn pip_missing_both_pip_and_pip3_exits_127_with_message() { + let mut h = GateHarness::new().oldpkg_registry().build(); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(127)); + assert_eq!(h.recorded_argv(), None, "nothing must have run"); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("error: 'pip' not found on PATH (also tried 'pip3')"), + "stderr: {stderr}" + ); +} + +#[test] +fn pip3_top_level_command_prints_pip_wrapper_suggestion() { + let mut h = GateHarness::new() + .fake_recorder("pip3", 0) + .oldpkg_registry() + .build(); + let out = h + .cmd + .args(["pip3", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None, "pip3 must not run"); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("error: unknown package manager `pip3`."), + "stderr: {stderr}" + ); + assert!( + stderr.contains("Did you mean `corgea pip install oldpkg==1.0.0`?"), + "stderr: {stderr}" + ); + assert!( + String::from_utf8_lossy(&out.stdout).is_empty(), + "stdout: {}", + String::from_utf8_lossy(&out.stdout) + ); +} + +#[test] +fn npm_missing_binary_error_names_binary_without_fallback() { + let mut h = GateHarness::new().oldpkg_registry().build(); + let out = h.cmd.args(["npm", "list"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(127)); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("error: 'npm' not found on PATH"), + "stderr: {stderr}" + ); + assert!( + !stderr.contains("also tried"), + "npm has no fallback alias; stderr: {stderr}" + ); +} diff --git a/tests/cli_install.rs b/tests/cli_install.rs new file mode 100644 index 0000000..4914d1f --- /dev/null +++ b/tests/cli_install.rs @@ -0,0 +1,370 @@ +//! Hermetic end-to-end tests for the install wrappers (`corgea pip|npm …`). +//! +//! Each test spawns the real binary (`CARGO_BIN_EXE_corgea`) against: +//! * a local TcpListener stub standing in for PyPI / the npm registry +//! (wired up via `CORGEA_PYPI_REGISTRY` / `CORGEA_NPM_REGISTRY`), and +//! * a fake package manager on `PATH` — a shell script that records its +//! argv to a marker file, proving whether the install actually ran. +//! +//! No live network. The fake package managers are Unix shell scripts, so +//! the whole file is Unix-only (matching the repo's Linux/macOS CI). + +#![cfg(unix)] + +mod common; + +use common::{ + npm_packument, pip_harness, pypi_release_json, spawn_http_stub, GateHarness, NOT_FOUND_JSON, + OLD_TS, +}; +use std::collections::HashMap; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::Arc; + +/// Spawn a registry stub serving both the PyPI and npm routes the +/// resolver hits. Returns the base URL and a counter of accepted +/// connections (used to prove "no registry hit" for passthroughs). +/// +/// Routes: +/// * `/pypi/oldpkg/json` — one release, published 2020-01-01 +/// * `/pypi/freshpkg/json` — one release, published one hour ago +/// * `/oldpkg` — npm metadata, published 2020-01-01 +/// * `/freshpkg` — npm metadata, published one hour ago +/// * anything else — 404 +fn spawn_registry_stub() -> (String, Arc) { + let hits = Arc::new(AtomicUsize::new(0)); + let hits_in_stub = Arc::clone(&hits); + let base_url = spawn_http_stub(move |path| { + hits_in_stub.fetch_add(1, Ordering::SeqCst); + let fresh_ts = (chrono::Utc::now() - chrono::Duration::hours(1)) + .format("%Y-%m-%dT%H:%M:%SZ") + .to_string(); + match path { + "/pypi/oldpkg/json" => ("200 OK", pypi_release_json("oldpkg", "1.0.0", OLD_TS)), + "/pypi/freshpkg/json" => ("200 OK", pypi_release_json("freshpkg", "9.9.9", &fresh_ts)), + "/oldpkg" => ("200 OK", npm_packument("1.0.0", OLD_TS)), + "/freshpkg" => ("200 OK", npm_packument("9.9.9", &fresh_ts)), + _ => ("404 Not Found", NOT_FOUND_JSON.to_string()), + } + }); + (base_url, hits) +} + +fn wrapper(binary: &str, registry_env: &str, pm_exit_code: i32) -> GateHarness { + wrapper_with_hits(binary, registry_env, pm_exit_code).0 +} + +fn wrapper_with_hits( + binary: &str, + registry_env: &str, + pm_exit_code: i32, +) -> (GateHarness, Arc) { + let (base_url, registry_hits) = spawn_registry_stub(); + let h = GateHarness::new() + .fake_recorder(binary, pm_exit_code) + .registry_env(registry_env, &base_url) + .build(); + (h, registry_hits) +} + +#[test] +fn pip_fresh_pin_blocks_without_running_install() { + let mut h = wrapper("pip", "CORGEA_PYPI_REGISTRY", 0); + let out = h + .cmd + .args(["pip", "install", "freshpkg==9.9.9"]) + .output() + .expect("failed to run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None, "pip must not run when blocked"); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("within threshold"), "stdout: {stdout}"); + assert!( + String::from_utf8_lossy(&out.stderr).contains("Refusing to run install"), + "stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); +} + +#[test] +fn pip_old_pin_runs_install_with_forwarded_args() { + let mut h = wrapper("pip", "CORGEA_PYPI_REGISTRY", 0); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("failed to run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "stdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg==1.0.0")); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("published"), "stdout: {stdout}"); +} + +#[test] +fn pip_no_fail_demotes_block_and_installs() { + let mut h = wrapper("pip", "CORGEA_PYPI_REGISTRY", 0); + let out = h + .cmd + .args(["pip", "--no-fail", "install", "freshpkg==9.9.9"]) + .output() + .expect("failed to run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); + assert_eq!( + h.recorded_argv().as_deref(), + Some("install freshpkg==9.9.9"), + "--no-fail must still run the install" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("within threshold"), "stdout: {stdout}"); +} + +#[test] +fn pip_non_install_subcommand_passes_through_without_registry_hit() { + let (mut h, registry_hits) = wrapper_with_hits("pip", "CORGEA_PYPI_REGISTRY", 0); + let out = h + .cmd + .args(["pip", "list"]) + .output() + .expect("failed to run corgea"); + assert_eq!(out.status.code(), Some(0)); + assert_eq!(h.recorded_argv().as_deref(), Some("list")); + assert_eq!( + registry_hits.load(Ordering::SeqCst), + 0, + "passthrough must not touch the registry" + ); +} + +#[test] +fn pip_add_blocks_with_install_suggestion_without_running_pip() { + let (mut h, registry_hits) = wrapper_with_hits("pip", "CORGEA_PYPI_REGISTRY", 0); + let out = h + .cmd + .args(["pip", "add", "oldpkg"]) + .output() + .expect("failed to run corgea"); + + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None, "pip must not run"); + assert_eq!( + registry_hits.load(Ordering::SeqCst), + 0, + "invalid pip command must not touch the registry" + ); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("error: pip does not support `add`."), + "stderr: {stderr}" + ); + assert!( + stderr.contains("Did you mean `corgea pip install oldpkg`?"), + "stderr: {stderr}" + ); +} + +#[test] +fn pip_resolution_error_prints_error_but_install_proceeds() { + // `nosuchpkg` hits the stub's 404 route → an error outcome, which + // warns but does not block: public mode fails open when no verdict + // can be obtained — the install must still run. + let (mut h, registry_hits) = wrapper_with_hits("pip", "CORGEA_PYPI_REGISTRY", 0); + let out = h + .cmd + .args(["pip", "install", "nosuchpkg==1.0.0"]) + .output() + .expect("failed to run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "stdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert!( + registry_hits.load(Ordering::SeqCst) >= 1, + "the 404 route must have been hit" + ); + assert_eq!( + h.recorded_argv().as_deref(), + Some("install nosuchpkg==1.0.0"), + "a resolution error must not block the install" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("not found"), "stdout: {stdout}"); + assert!(stdout.contains("1 errors"), "stdout: {stdout}"); +} + +#[test] +fn pip_mixed_fresh_and_old_pins_block_without_running_install() { + let mut h = wrapper("pip", "CORGEA_PYPI_REGISTRY", 0); + let out = h + .cmd + .args(["pip", "install", "freshpkg==9.9.9", "oldpkg==1.0.0"]) + .output() + .expect("failed to run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!( + h.recorded_argv(), + None, + "one recent target must block the whole install" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("within threshold"), "stdout: {stdout}"); + assert!(stdout.contains("1 ok, 1 recent"), "stdout: {stdout}"); +} + +#[test] +fn npm_fresh_pin_blocks_without_running_install() { + let mut h = wrapper("npm", "CORGEA_NPM_REGISTRY", 0); + let out = h + .cmd + .args(["npm", "install", "freshpkg@9.9.9"]) + .output() + .expect("failed to run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None, "npm must not run when blocked"); + assert!( + String::from_utf8_lossy(&out.stderr).contains("Refusing to run install"), + "stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); +} + +#[test] +fn npm_old_pin_runs_install_with_forwarded_args() { + let mut h = wrapper("npm", "CORGEA_NPM_REGISTRY", 0); + let out = h + .cmd + .args(["npm", "install", "oldpkg@1.0.0"]) + .output() + .expect("failed to run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "stdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg@1.0.0")); +} + +#[test] +fn npm_install_verb_behind_global_flags_is_still_gated() { + // SKILL.md promises `npm --loglevel silent install x` is still gated: + // the verb is found behind global flags, and the flag's value is not + // mistaken for the verb. + let mut h = wrapper("npm", "CORGEA_NPM_REGISTRY", 0); + let out = h + .cmd + .args(["npm", "--loglevel", "silent", "install", "freshpkg@9.9.9"]) + .output() + .expect("failed to run corgea"); + assert_eq!(out.status.code(), Some(1), "gate must fire behind flags"); + assert_eq!(h.recorded_argv(), None, "npm must not run when blocked"); +} + +#[test] +fn wrapper_forwards_package_manager_exit_code() { + let mut h = wrapper("pip", "CORGEA_PYPI_REGISTRY", 7); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("failed to run corgea"); + assert_eq!( + out.status.code(), + Some(7), + "the package manager's exit code must be forwarded" + ); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg==1.0.0")); +} + +// SKILL.md promises "Git/URL/path specs … are noted, never blocked". The +// three tests below pin that end-to-end. + +#[test] +fn pip_git_url_spec_skips_verification_and_execs() { + let mut h = pip_harness(HashMap::new(), HashMap::new(), 0); + let out = h + .cmd + .args(["pip", "install", "git+https://github.com/x/y.git"]) + .output() + .expect("failed to run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "stdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert_eq!( + h.recorded_argv().as_deref(), + Some("install git+https://github.com/x/y.git"), + "pip must receive the raw spec" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("registry verification skipped"), + "stdout: {stdout}" + ); +} + +#[test] +fn pip_filesystem_path_spec_skips_verification_and_execs() { + let mut h = pip_harness(HashMap::new(), HashMap::new(), 0); + let out = h + .cmd + .args(["pip", "install", "."]) + .output() + .expect("failed to run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "stdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert_eq!(h.recorded_argv().as_deref(), Some("install .")); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("registry verification skipped"), + "stdout: {stdout}" + ); +} + +#[test] +fn npm_github_shorthand_skips_verification_and_execs() { + let mut h = GateHarness::new() + .fake_recorder("npm", 0) + .vuln_checks(HashMap::new()) + .build(); + let out = h + .cmd + .args(["npm", "install", "user/repo"]) + .output() + .expect("failed to run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "stdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert_eq!(h.recorded_argv().as_deref(), Some("install user/repo")); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("registry verification skipped"), + "stdout: {stdout}" + ); +} diff --git a/tests/cli_remediation.rs b/tests/cli_remediation.rs new file mode 100644 index 0000000..f8ce5e9 --- /dev/null +++ b/tests/cli_remediation.rs @@ -0,0 +1,74 @@ +//! Hermetic e2e tests for remediation steering: a blocked install names the +//! safe version from the verdict's `fixed_version` data — the highest fix +//! covering every advisory. When any advisory has no known fix, no steer +//! prints. +//! +//! Uses the shared `common::pip_harness` (pypi stub published 2020 so recency +//! never blocks, a fake pip recording its argv, and the in-crate vuln-api +//! stub) — every block here is the verdict's doing. + +#![cfg(unix)] + +mod common; + +use common::{key, pip_harness, vulnerable_body}; +use std::collections::HashMap; + +fn fixed_body() -> String { + vulnerable_body("pypi", "oldpkg", "1.0.0", "MAL-2024-0001", Some("2.0.0")) +} + +fn no_fix_body() -> String { + vulnerable_body("pypi", "oldpkg", "1.0.0", "MAL-2024-0002", None) +} + +#[test] +fn fixed_match_blocks_and_names_safe_version() { + let mut checks = HashMap::new(); + checks.insert(key("pypi", "oldpkg", "1.0.0"), fixed_body()); + let mut h = pip_harness(checks, HashMap::new(), 0); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!( + h.recorded_argv(), + None, + "pip must not run on a vulnerable verdict" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("fixed in 2.0.0"), "stdout: {stdout}"); + assert!( + stdout.contains("safe version: oldpkg@2.0.0"), + "stdout: {stdout}" + ); +} + +#[test] +fn no_fix_match_reports_no_fixed_version_known() { + let mut checks = HashMap::new(); + checks.insert(key("pypi", "oldpkg", "1.0.0"), no_fix_body()); + let mut h = pip_harness(checks, HashMap::new(), 0); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!( + h.recorded_argv(), + None, + "pip must not run on a vulnerable verdict" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("no fixed version known"), + "stdout: {stdout}" + ); + assert!( + !stdout.contains("safe version:"), + "no steer line when the fix is unknown: {stdout}" + ); +} diff --git a/tests/cli_verdict.rs b/tests/cli_verdict.rs new file mode 100644 index 0000000..8f84fa3 --- /dev/null +++ b/tests/cli_verdict.rs @@ -0,0 +1,183 @@ +//! Hermetic e2e tests for the install-gate vuln-api verdict +//! (`corgea pip install …` with a public `CORGEA_VULN_API_URL` stub). +//! +//! Composes the `cli_install.rs` harness pattern (fake package manager on a +//! private PATH + local pypi registry stub) with the in-crate vuln-api stub — +//! the shared `common::pip_harness`. Every package is published in 2020, so +//! recency never blocks here — every block in this file is the verdict's +//! doing. Lookups are public: outages warn and fail open. + +#![cfg(unix)] + +mod common; + +use common::{key, pip_harness, vulnerable_body}; +use std::collections::HashMap; + +#[test] +fn vulnerable_pin_blocks_without_running_install() { + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "oldpkg", "1.0.0"), + vulnerable_body("pypi", "oldpkg", "1.0.0", "MAL-2024-0001", Some("2.0.0")), + ); + let mut h = pip_harness(checks, HashMap::new(), 0); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!( + h.recorded_argv(), + None, + "pip must not run on a vulnerable verdict" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("MAL-2024-0001"), "stdout: {stdout}"); + assert!(stdout.contains("critical"), "stdout: {stdout}"); + assert!( + String::from_utf8_lossy(&out.stderr).contains("--force"), + "block message must name --force" + ); +} + +#[test] +fn alternate_pypi_spelling_hits_canonical_verdict() { + // Advisories are keyed by lowercase(canonical) — the server does NOT + // apply PEP 503. `pip install Flask_Cors` must still block on the + // `flask-cors` row: resolution adopts the registry's canonical + // spelling (`info.name`, like real PyPI, which answers any PEP 503- + // equivalent request) and the verdict checks that. + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "flask-cors", "1.0.0"), + vulnerable_body("pypi", "flask-cors", "1.0.0", "GHSA-TEST-0001", None), + ); + // Model real PyPI: serve the alternate request spelling, echo the + // canonical name in info.name. + let registry = common::spawn_http_stub(|path| match path { + "/pypi/Flask_Cors/json" | "/pypi/flask-cors/json" => ( + "200 OK", + common::pypi_release_json("Flask-Cors", "1.0.0", common::OLD_TS), + ), + _ => ("404 Not Found", common::NOT_FOUND_JSON.to_string()), + }); + let mut h = common::GateHarness::new() + .fake_recorder("pip", 0) + .registry_env("CORGEA_PYPI_REGISTRY", ®istry) + .vuln_checks(checks) + .build(); + let out = h + .cmd + .args(["pip", "install", "Flask_Cors==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!( + out.status.code(), + Some(1), + "alternate spelling must not bypass the gate" + ); + assert_eq!(h.recorded_argv(), None); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("GHSA-TEST-0001"), "stdout: {stdout}"); +} + +#[test] +fn force_overrides_vulnerable_block_and_propagates_exit_code() { + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "oldpkg", "1.0.0"), + vulnerable_body("pypi", "oldpkg", "1.0.0", "MAL-2024-0001", Some("2.0.0")), + ); + let mut h = pip_harness(checks, HashMap::new(), 7); + let out = h + .cmd + .args(["pip", "--force", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!( + out.status.code(), + Some(7), + "manager exit code must propagate under --force" + ); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg==1.0.0")); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("MAL-2024-0001"), + "findings must still print under --force: {stdout}" + ); +} + +#[test] +fn no_fail_does_not_waive_vulnerable_block() { + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "oldpkg", "1.0.0"), + vulnerable_body("pypi", "oldpkg", "1.0.0", "MAL-2024-0001", Some("2.0.0")), + ); + let mut h = pip_harness(checks, HashMap::new(), 0); + let out = h + .cmd + .args(["pip", "--no-fail", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!( + out.status.code(), + Some(1), + "--no-fail demotes recency only, never a vulnerable verdict" + ); + assert_eq!(h.recorded_argv(), None); +} + +#[test] +fn verdict_503_warns_and_fails_open() { + let mut checks = HashMap::new(); + checks.insert(key("pypi", "oldpkg", "1.0.0"), "{}".to_string()); + let mut statuses = HashMap::new(); + statuses.insert(key("pypi", "oldpkg", "1.0.0"), 503u16); + let mut h = pip_harness(checks, statuses, 0); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "a 503 verdict must fail open in public mode; stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg==1.0.0")); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("could not be verified"), "stdout: {stdout}"); + assert!( + String::from_utf8_lossy(&out.stderr) + .contains("CVE check unavailable; continuing because public mode is fail-open"), + "stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); +} + +#[test] +fn vuln_api_outage_warns_but_installs() { + let mut h = pip_harness(HashMap::new(), HashMap::new(), 0); + // Point the gate at a dead vuln-api: connection refused on every check. + h.cmd.env("CORGEA_VULN_API_URL", "http://127.0.0.1:1"); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "public lookup outage must fail open" + ); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg==1.0.0")); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("CVE check unavailable; continuing because public mode is fail-open"), + "stderr: {stderr}" + ); +} diff --git a/tests/common/mod.rs b/tests/common/mod.rs index f2a1a8e..fc7b70d 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -284,3 +284,22 @@ impl GateHarness { std::fs::read_to_string(&self.marker).ok() } } + +/// `corgea` wired to the wildcard pypi registry stub (every package +/// published 2020 → recency never blocks), a fake pip recording its argv +/// to a marker, and a vuln-api stub. Every block in a `pip_harness` test +/// is the verdict's doing. +#[cfg(unix)] +#[allow(dead_code)] +pub fn pip_harness( + checks: HashMap, + statuses: HashMap, + pip_exit_code: i32, +) -> GateHarness { + GateHarness::new() + .fake_recorder("pip", pip_exit_code) + .wildcard_pypi_registry() + .vuln_checks(checks) + .vuln_statuses(statuses) + .build() +}