diff --git a/.github/workflows/staging-contract.yml b/.github/workflows/staging-contract.yml new file mode 100644 index 0000000..7397075 --- /dev/null +++ b/.github/workflows/staging-contract.yml @@ -0,0 +1,34 @@ +name: Staging vuln-api contract + +# The deterministic staging-target contract tests are #[ignore]d so the +# default `./harness ci` gate stays hermetic (no network). This scheduled, +# non-blocking job runs them against the live staging worker so endpoint / +# schema / seed drift is caught out-of-band instead of shipping undetected. +# It does not gate PRs — failures here signal "the staging contract moved", +# not "this PR is broken". + +on: + schedule: + # Daily at 07:00 UTC. + - cron: "0 7 * * *" + workflow_dispatch: {} + +permissions: + contents: read + +jobs: + staging-contract: + name: vuln-api staging contract (non-blocking) + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Rust + uses: dtolnay/rust-toolchain@stable + + - name: Cache cargo + uses: Swatinem/rust-cache@v2 + + - name: Run ignored staging contract tests + run: cargo test --test vuln_api_contract -- --ignored diff --git a/Cargo.lock b/Cargo.lock index 2b9c8e7..58bd8d2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -344,6 +344,7 @@ version = "1.8.8" dependencies = [ "chrono", "clap", + "corgea", "dirs", "env_logger", "git2", diff --git a/Cargo.toml b/Cargo.toml index d60edad..5228045 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,18 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[[bin]] +name = "corgea" +path = "src/main.rs" + +[features] +# Compiles the in-crate vuln-api test stub (`vuln_api_stub`). Enabled for all +# test builds via the self dev-dependency below; never part of release builds. +test-stub = [] + +[dev-dependencies] +corgea = { path = ".", features = ["test-stub"] } + [dependencies] clap = { version = "4.4.13", features = ["derive"] } dirs = "5.0.1" diff --git a/harness b/harness index 84b5076..ce9e7c1 100755 --- a/harness +++ b/harness @@ -268,7 +268,7 @@ cmd_pre_commit() { printf "\n%s[pre-commit]%s\n\n" "$BLUE" "$RESET" # Check-only: never rewrite the working tree behind the commit. # Mirrors the CI gate so anything that passes here passes there. - run "Clippy (strict)" 0 -- cargo clippy -- -D warnings + run "Clippy (strict)" 0 -- cargo clippy --all-targets -- -D warnings run "Format check" 0 -- cargo fmt --check cmd_test } @@ -282,7 +282,7 @@ cmd_check() { [ $? -eq 0 ] && passed=$(( passed + 1 )) || failed=$(( failed + 1 )) run "Format" 1 -- cargo fmt [ $? -eq 0 ] && passed=$(( passed + 1 )) || failed=$(( failed + 1 )) - run "Clippy (strict)" 1 -- cargo clippy -- -D warnings + run "Clippy (strict)" 1 -- cargo clippy --all-targets -- -D warnings [ $? -eq 0 ] && passed=$(( passed + 1 )) || failed=$(( failed + 1 )) run_with_summary "Tests" 1 -- cargo test [ $? -eq 0 ] && passed=$(( passed + 1 )) || failed=$(( failed + 1 )) @@ -304,7 +304,7 @@ cmd_check() { cmd_ci() { printf "\n%s[ci]%s\n\n" "$BLUE" "$RESET" - run "Clippy (strict)" 0 -- cargo clippy -- -D warnings + run "Clippy (strict)" 0 -- cargo clippy --all-targets -- -D warnings run "Format check" 0 -- cargo fmt --check _cmd_audit_inner 1 if ! cargo llvm-cov --version >/dev/null 2>&1; then diff --git a/src/authorize.rs b/src/authorize.rs index 7271cf9..6535847 100644 --- a/src/authorize.rs +++ b/src/authorize.rs @@ -539,6 +539,18 @@ mod tests { use tokio::runtime::Runtime; use tokio::time::{timeout, Duration}; + // Serializes tests that bind ephemeral ports. `cargo test` runs the module's + // tests on parallel threads; without this lock one test's just-freed port can + // be handed to another between a `drop` and a re-check, which made + // `port_is_available_reflects_current_port_usage` flake. + static PORT_TEST_LOCK: Mutex<()> = Mutex::new(()); + + fn lock_ports() -> std::sync::MutexGuard<'static, ()> { + PORT_TEST_LOCK + .lock() + .unwrap_or_else(|poisoned| poisoned.into_inner()) + } + fn reserve_ephemeral_port() -> u16 { let listener = StdTcpListener::bind("127.0.0.1:0").expect("failed to bind ephemeral port"); listener @@ -624,6 +636,7 @@ mod tests { #[test] fn port_is_available_reflects_current_port_usage() { + let _guard = lock_ports(); let listener = StdTcpListener::bind("127.0.0.1:0").expect("failed to bind ephemeral port"); let port = listener .local_addr() @@ -637,6 +650,7 @@ mod tests { #[test] fn find_available_port_skips_ports_that_are_in_use() { + let _guard = lock_ports(); let listener = StdTcpListener::bind("127.0.0.1:0").expect("failed to bind ephemeral port"); let occupied_port = listener .local_addr() @@ -650,7 +664,12 @@ mod tests { #[tokio::test] async fn start_callback_server_returns_without_waiting_for_second_connection() { - let port = reserve_ephemeral_port(); + // Reserve under the lock so the :0 probe can't race the sync port tests; + // the guard drops before the await, so no lock is held across `.await`. + let port = { + let _guard = lock_ports(); + reserve_ephemeral_port() + }; let auth_code = Arc::new(Mutex::new(Some("test-code".to_string()))); let returned_code = timeout( @@ -666,6 +685,7 @@ mod tests { #[test] fn start_callback_server_returns_bind_error_if_port_is_occupied() { + let _guard = lock_ports(); let listener = StdTcpListener::bind("127.0.0.1:0").expect("failed to bind ephemeral port"); let occupied_port = listener .local_addr() @@ -684,6 +704,7 @@ mod tests { #[test] fn callback_server_serves_waiting_error_and_success_pages_then_returns_code() { + let _guard = lock_ports(); let port = reserve_ephemeral_port(); let auth_code = Arc::new(Mutex::new(None::)); let result_rx = spawn_callback_server(port, auth_code); diff --git a/src/deps/ecosystems/pypi.rs b/src/deps/ecosystems/pypi.rs index 062f13c..3a77faa 100644 --- a/src/deps/ecosystems/pypi.rs +++ b/src/deps/ecosystems/pypi.rs @@ -367,7 +367,10 @@ fn exact_version_from_declared(name: &str, declared: &str) -> Option { Some(declared.trim_start_matches('=').trim().to_string()) } -fn normalize_pypi_name(name: &str) -> String { +/// PEP 503 name normalization: lowercase, runs of `-`/`_`/`.` collapse to `-`. +/// Also used by the vuln-api client (`vuln_api`) so both features share one +/// canonical pypi name form. +pub(crate) fn normalize_pypi_name(name: &str) -> String { let mut out = String::new(); let mut last_was_separator = false; for c in name.trim().chars() { diff --git a/src/lib.rs b/src/lib.rs index 49bc6d0..2f8423e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1 +1,11 @@ pub mod deps; +// Also declared in the binary crate (src/main.rs); re-declared here so library modules +// (e.g. vuln_api) can use `crate::log::debug`. src/log.rs is a thin `::log` facade that +// compiles cleanly in both crates. +mod log; +pub mod vuln_api; +// Test-only HTTP stub for the vuln-api. Gated out of release builds; the +// `test-stub` feature is enabled for every test build by the self +// dev-dependency in Cargo.toml, so integration tests can use it too. +#[cfg(any(test, feature = "test-stub"))] +pub mod vuln_api_stub; diff --git a/src/utils/api.rs b/src/utils/api.rs index 9b9a445..23e53ae 100644 --- a/src/utils/api.rs +++ b/src/utils/api.rs @@ -18,8 +18,9 @@ use std::path::Path; const CHUNK_SIZE: usize = 50 * 1024 * 1024; // 50 MB const API_BASE: &str = "/api/v1"; -fn get_source() -> String { - std::env::var("CORGEA_SOURCE").unwrap_or_else(|_| "cli".to_string()) +fn get_source() -> &'static str { + // One definition of the CORGEA-SOURCE value (cached there). + corgea::vuln_api::source() } fn is_jwt(token: &str) -> bool { diff --git a/src/vuln_api/mod.rs b/src/vuln_api/mod.rs new file mode 100644 index 0000000..c92f7df --- /dev/null +++ b/src/vuln_api/mod.rs @@ -0,0 +1,656 @@ +//! Corgea vuln-api client. +//! +//! Deliberately independent of `utils::api::SHARED_CLIENT` because: +//! * the vuln-api host is user-configurable via `CORGEA_VULN_API_URL`, +//! so we must never silently replay Corgea cookies or auth headers +//! via redirect following or the shared cookie jar. +//! * the shared client's `check_for_warnings` exits the process on +//! HTTP 410, which is wrong for per-dep CVE lookups. +//! +//! This phase attaches no Corgea credential: the staging deployment +//! (`VULN_API_REQUIRE_AUTH=false`) accepts anonymous checks. The +//! production /check route requires a Corgea token — wired in with +//! authenticated mode, and never sent to a user-configured host +//! without explicit opt-in. + +use serde::Deserialize; +use std::sync::OnceLock; +use std::time::Duration; + +use crate::log::debug; + +const REQUEST_TIMEOUT: Duration = Duration::from_secs(30); + +/// Cap on how much of an error response body we splice into the +/// user-facing error message. Fits a CLI line, captures +/// `{"error":"…"}`-class messages comfortably, and truncates +/// Cloudflare HTML before it gets ugly. +const ERROR_BODY_SNIPPET_LEN: usize = 300; + +/// Registry ecosystem a package check targets. Typed so the URL path +/// segment and the per-ecosystem name encoding can't drift apart on a +/// string spelling. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Ecosystem { + Npm, + Pypi, +} + +impl Ecosystem { + pub fn path_segment(self) -> &'static str { + match self { + Ecosystem::Npm => "npm", + Ecosystem::Pypi => "pypi", + } + } + + /// Canonical package name for IDENTITY COMPARISONS: PEP 503 for pypi + /// (shared with `deps`), verbatim for npm. The server lowercases every + /// ecosystem for lookup (worker.js `normalizePackageName`) and the + /// identity check below compares case-insensitively, so npm casing is + /// not load-bearing. Not the wire spelling — see `request_name`. + pub fn normalize_name(self, name: &str) -> String { + match self { + Ecosystem::Npm => name.to_string(), + Ecosystem::Pypi => crate::deps::ecosystems::pypi::normalize_pypi_name(name), + } + } + + /// Wire spelling for the request path. The server normalizes lookups + /// with lowercase + trim only (worker.js `normalizePackageName`), NOT + /// PEP 503 — collapsing `zope.interface` to `zope-interface` here + /// would miss the stored advisory row and read a vulnerable package + /// as clean. Match the server's rule exactly. + pub fn request_name(self, name: &str) -> String { + match self { + Ecosystem::Npm => name.to_string(), + Ecosystem::Pypi => name.trim().to_lowercase(), + } + } +} + +#[derive(Debug, Clone, Deserialize)] +pub struct VulnCheckResponse { + pub ecosystem: String, + pub package_name: String, + pub version: String, + pub is_vulnerable: bool, + pub matches: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize)] +pub struct VulnMatch { + pub advisory_id: String, + pub severity_level: String, + pub tier: Option, + pub vulnerable_version_range: Option, + pub fixed_version: Option, +} + +/// `corgea-cli/ (