diff --git a/.cargo/audit.toml b/.cargo/audit.toml new file mode 100644 index 0000000000..09d5425505 --- /dev/null +++ b/.cargo/audit.toml @@ -0,0 +1,21 @@ +# cargo-audit configuration (read by `cargo audit`). +# +# The advisories below are all "unmaintained" warnings — NOT security +# vulnerabilities. Each crate is pulled in transitively only by the `starlark` +# 0.13.0 family (starlark / starlark_syntax / starlark_map), which crates/tui and +# crates/whaleflow depend on directly for Starlark-based Fleet/workflow config. +# `cargo tree -i ` confirms starlark is the sole path for each. +# +# There is no fix available without an upstream `starlark` release that drops +# these deps, and none is exploitable here. They are accepted for now and +# tracked in this file so `cargo audit` stays clean for genuinely new advisories. +# Remove an entry once a starlark upgrade/removal drops the transitive dep +# (re-check with `cargo tree -i derivative` and `cargo audit`). +# +# Audit #11, scratchpad/bug-audit-2026-06-24.md. +[advisories] +ignore = [ + "RUSTSEC-2024-0388", # derivative 2.2.0 unmaintained — transitive via starlark 0.13.0 + "RUSTSEC-2025-0057", # fxhash 0.2.1 unmaintained — transitive via starlark_map 0.13.0 + "RUSTSEC-2024-0436", # paste 1.0.15 unmaintained — transitive via starlark 0.13.0 +] diff --git a/AGENTS.md b/AGENTS.md index 3beca978b2..f825d045d8 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -6,15 +6,18 @@ **not** hard-code a device-specific checkout path here — work in whichever local checkout you have and always **confirm with `git branch --show-current` before editing.** -- **Active branch:** `codex/v0.8.63-integration` (also at - `origin/codex/v0.8.63-integration`) for the current fix/integration lane. - If a newer handoff or objective file names a different branch, verify with - `git branch --show-current` and follow the live branch. -- **Workspace version is `0.8.63`** in `Cargo.toml`. Do not bump versions - opportunistically; version bumps, tags, release artifacts, publishing, and - GitHub Releases require Hunter's explicit approval. -- **Milestone guidepost:** GitHub milestone `v0.8.63`. Check live state with - `gh issue list --repo Hmbown/CodeWhale --milestone "v0.8.63" --state open`. +- **Active branch:** start from live truth, not a hard-coded lane. Confirm the + current fix/integration branch from the latest handoff/objective file and + `git branch --show-current`; recent work has landed on `main` through small + PRs rather than a long-lived `codex/...` integration branch, so don't assume a + named integration branch still exists — verify before relying on it. +- **Workspace version:** read it from `Cargo.toml` (`[workspace.package] + version`); it advances per release lane, so don't trust a number memorized + here. Do not bump versions opportunistically; version bumps, tags, release + artifacts, publishing, and GitHub Releases require Hunter's explicit approval. +- **Milestone guidepost:** use the current release milestone named in the active + handoff and list it live, e.g. + `gh issue list --repo Hmbown/CodeWhale --milestone "" --state open`. - **Default branch is `main`.** Never commit directly to `main`; work on the active integration branch or a fresh `codex/...` branch/worktree off it for an isolated change. Open a PR into `main` only when a unit of work is @@ -105,7 +108,8 @@ - Close or update issues and PRs only after verifying the landed commit on the relevant branch. If the release branch already contains equivalent behavior, leave a clear note linking the commit and describing any remaining delta. -- For the active release queue, start from the GitHub `v0.8.63` milestone - (`gh issue list --repo Hmbown/CodeWhale --milestone "v0.8.63"`) and refresh - state before acting. Older per-version triage docs under `docs/` are +- For the active release queue, start from the current GitHub release milestone + named in the active handoff + (`gh issue list --repo Hmbown/CodeWhale --milestone ""`) and + refresh state before acting. Older per-version triage docs under `docs/` are historical reference only. diff --git a/CHANGELOG.md b/CHANGELOG.md index 891277cbf4..f88c2fd108 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,21 +7,90 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.8.65] - 2026-06-24 + ### Added +- **Provider/model/route resolution (EPIC #2608).** Canonical provider, model, + offering, and route types with a single `RouteResolver` that produces a + resolved `ReadyRouteCandidate` (endpoint, wire protocol, model id, context + limit, price) for every switch (#3458, #3084, #3384). The executing client is + now constructed from the resolved candidate rather than re-derived from config + (#3384). A committed, network-free Models.dev-shaped catalog gives models real + context windows and pricing, with a secret-free live cache (#3497, #3498, + #3385). Offering pricing with provenance is projected onto candidates (#3501, + #3085), and route limits feed a route-aware context-budget service (#3508, + #3523, #3086). +- **Fleet execution substrate (EPIC #3154).** Fleet profile types and config + (#3469), durable manager resume, workspace agent-profile loading resolved into + the worker runtime (#3367), loadout intent carried in task specs (#3512), and + receipts that persist the resolved route for inspection (#3154, #3166). Worker + status is folded into the unified `/fleet` surface and exposed through the + Runtime API. +- **Provider surfaces.** A `/provider` readiness dashboard with reasoning + readiness, an experimental/supported maturity marker, and an "open models for + this provider" action (#3083, #2984, #3485); cross-provider `/model` search + with scroll and provider type-ahead (#3484, #3075); inline `` + reasoning-stream routing with per-provider overrides (#3222); usage telemetry + normalized into canonical token classes including Responses cache-miss and + reasoning tokens (#2961, #3509); and remote MCP OAuth login with bearer/header + auth precedence (#3527). +- **More providers and routes.** User-defined OpenAI-compatible custom providers + via `[providers.]` (#1519); a DeepSeek Anthropic-compatible route (#2963, + #3449); a Qianfan route (#3425); Zhipu folded into Z.ai with equal-treatment + model normalization (#3539); DashScope/Together fixtures. - **Localized mode picker and composer indicators.** The `/mode` picker prompt, mode names, and hints, plus the composer's Vim mode indicator, now render in all seven shipped locales (model-facing mode labels stay English). Harvested from #2239 by @gordonlu. +- **Website and automation.** A runtime/integrations page, provenance and + mirror-trust copy, a fact-drift CI gate, a published install script, and a + weekly community digest archive on codewhale.net (#3419, #3421, #3415, #3482, + #3420); per-automation mode/shell/trust/approval settings (#3467). ### Changed +- **Config modularization (#3311).** `ProviderKind` (#3505), harness posture + (#3507), and provider default seeds (#3503) moved into dedicated modules, and + the `config.rs` monolith split into clean leaf modules (paths, search, + model/base-URL constants, sub-agent limits) behind a `pub use` facade. + `AppMode` helpers were centralized (#3510), and mode-vs-permission policy is + now derived through a single `base_policy_for_mode` resolver instead of + scattered mutation (#3386, advisory review-intent behavior preserved). +- **Leaner tool surface.** Dropped `task_shell_*` from the active set and folded + `tool_search_*` (#3463); ablated the in-turn loop_guard and encoded reasoning + dispositions (#3462); added the Orchestration disposition to the constitution. +- **Routing.** Provider/model switches and the capability-aware fallback chain + resolve through `RouteResolver`; reasoning effort is normalized for the + *resolved* provider; the fallback chain now skips providers that lack auth + (#2574); and context window and memory-pressure come from the resolved route + (#3086). +- **UX.** Approval modal gained a group divider and selected-row caret (#3515); + picker scroll/type-ahead and selection contrast hardened (#3500); the README + was rewritten as an architecture end-cap (#3087); and repo agent guidance was + de-hardcoded to live truth. - **Restored contributor credit.** Threaded machine-readable credit (`docs/CONTRIBUTORS.md` + `.github/AUTHOR_MAP`) for earlier merged work that shipped without it, including the `/jobs cancel-all` action and the npm retry-timeout hint (#1538) by @jieshu666, and the community ACP adapter reference by @rockeverm3m. +### Fixed + +- **Release hygiene.** The strict `cargo clippy --workspace --all-targets --locked + -- -D warnings` gate passes; `npm run build` no longer dirties the generated + web facts; the site sets `metadataBase`; the community digest page parses each + record independently and localizes its chrome; and `cargo audit` is clean with + the starlark-transitive unmaintained advisories documented. +- **Routing and mode correctness.** Ordinary prompt text is no longer + interpreted as a mode switch (#3387, #3491); model candidates are scoped to the + active provider; Together-owned DeepSeek routes are accepted (#3426); insecure + `http://` custom endpoints raise an advisory warning (#1519); and the Fleet + setup planner's role/model selection now drives the generated profile. +- **Runtime stability.** MCP connection drops are explicit (#3524), HTTP API + calls reuse a shared MCP pool (#3532), and per-agent sub-agent mailbox + telemetry is throttled to cut UI lag (#3454). + ## [0.8.64] - 2026-06-22 ### Added @@ -2361,6 +2430,7 @@ overflow report and `/theme` picker edge-wrapping patch in #1814. Older releases (v0.8.39 and earlier) are archived in [docs/CHANGELOG_ARCHIVE.md](docs/CHANGELOG_ARCHIVE.md). [Unreleased]: https://github.com/Hmbown/CodeWhale/compare/v0.8.64...HEAD +[0.8.65]: https://github.com/Hmbown/CodeWhale/compare/v0.8.64...v0.8.65 [0.8.64]: https://github.com/Hmbown/CodeWhale/compare/v0.8.63...v0.8.64 [0.8.63]: https://github.com/Hmbown/CodeWhale/compare/v0.8.62...v0.8.63 [0.8.62]: https://github.com/Hmbown/CodeWhale/compare/v0.8.61...v0.8.62 diff --git a/CLAUDE.md b/CLAUDE.md index 6f21ab5bfd..2151165315 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -38,16 +38,17 @@ for Claude-based agents working in this repository. ## Current Release Work -- The active branch for this release lane is `codex/v0.8.63-integration` - (also at `origin/codex/v0.8.63-integration`). This repo lives on multiple - devices, so do not hard-code a checkout path; work in whichever local - checkout you have and confirm with `git branch --show-current` before - editing. Never commit directly to `main`. -- The workspace version is `0.8.63`. Do not tag, publish, create a GitHub - Release, push release artifacts, or merge to `main` without Hunter's - explicit approval. -- Base release triage on the GitHub `v0.8.63` milestone - (`gh issue list --repo Hmbown/CodeWhale --milestone "v0.8.63" --state open`) +- Confirm the active branch for the current release lane from the latest handoff + and `git branch --show-current`; recent work has landed on `main` through small + PRs rather than a long-lived `codex/...` integration branch. This repo lives on + multiple devices, so do not hard-code a checkout path; work in whichever local + checkout you have and confirm the branch before editing. Never commit directly + to `main`. +- Read the workspace version from `Cargo.toml`; it advances per release lane. Do + not tag, publish, create a GitHub Release, push release artifacts, or merge to + `main` without Hunter's explicit approval. +- Base release triage on the current GitHub release milestone named in the active + handoff (`gh issue list --repo Hmbown/CodeWhale --milestone "" --state open`) unless Hunter gives a newer branch/milestone. - Work the queue in this order: release blockers, recently approved PRs, clean PRs with small scope, blocked PRs with obvious fixes, dirty PRs that can be diff --git a/Cargo.lock b/Cargo.lock index 71b2949b1d..a349627c41 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -777,7 +777,7 @@ checksum = "e9b18233253483ce2f65329a24072ec414db782531bdbb7d0bbc4bd2ce6b7e21" [[package]] name = "codewhale-agent" -version = "0.8.64" +version = "0.8.65" dependencies = [ "codewhale-config", "serde", @@ -785,7 +785,7 @@ dependencies = [ [[package]] name = "codewhale-app-server" -version = "0.8.64" +version = "0.8.65" dependencies = [ "anyhow", "axum", @@ -813,7 +813,7 @@ dependencies = [ [[package]] name = "codewhale-cli" -version = "0.8.64" +version = "0.8.65" dependencies = [ "anyhow", "chrono", @@ -843,7 +843,7 @@ dependencies = [ [[package]] name = "codewhale-config" -version = "0.8.64" +version = "0.8.65" dependencies = [ "anyhow", "codewhale-execpolicy", @@ -860,7 +860,7 @@ dependencies = [ [[package]] name = "codewhale-core" -version = "0.8.64" +version = "0.8.65" dependencies = [ "anyhow", "chrono", @@ -879,7 +879,7 @@ dependencies = [ [[package]] name = "codewhale-execpolicy" -version = "0.8.64" +version = "0.8.65" dependencies = [ "anyhow", "codewhale-protocol", @@ -888,7 +888,7 @@ dependencies = [ [[package]] name = "codewhale-hooks" -version = "0.8.64" +version = "0.8.65" dependencies = [ "anyhow", "async-trait", @@ -902,7 +902,7 @@ dependencies = [ [[package]] name = "codewhale-mcp" -version = "0.8.64" +version = "0.8.65" dependencies = [ "anyhow", "serde", @@ -911,7 +911,7 @@ dependencies = [ [[package]] name = "codewhale-protocol" -version = "0.8.64" +version = "0.8.65" dependencies = [ "chrono", "serde", @@ -921,7 +921,7 @@ dependencies = [ [[package]] name = "codewhale-release" -version = "0.8.64" +version = "0.8.65" dependencies = [ "anyhow", "reqwest 0.13.4", @@ -932,7 +932,7 @@ dependencies = [ [[package]] name = "codewhale-secrets" -version = "0.8.64" +version = "0.8.65" dependencies = [ "dirs", "keyring", @@ -945,7 +945,7 @@ dependencies = [ [[package]] name = "codewhale-state" -version = "0.8.64" +version = "0.8.65" dependencies = [ "anyhow", "chrono", @@ -957,7 +957,7 @@ dependencies = [ [[package]] name = "codewhale-tools" -version = "0.8.64" +version = "0.8.65" dependencies = [ "anyhow", "async-trait", @@ -971,7 +971,7 @@ dependencies = [ [[package]] name = "codewhale-tui" -version = "0.8.64" +version = "0.8.65" dependencies = [ "anyhow", "arboard", @@ -1046,7 +1046,7 @@ dependencies = [ [[package]] name = "codewhale-whaleflow" -version = "0.8.64" +version = "0.8.65" dependencies = [ "anyhow", "serde", diff --git a/Cargo.toml b/Cargo.toml index da8baa4246..77c2643a45 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,7 @@ default-members = ["crates/cli", "crates/app-server", "crates/tui"] resolver = "2" [workspace.package] -version = "0.8.64" +version = "0.8.65" edition = "2024" # Rust 1.88 stabilized `let_chains` in `if`/`while` conditions, which the # codebase relies on extensively. Cargo enforces this so users on older diff --git a/README.ja-JP.md b/README.ja-JP.md index ee2822ee0a..853e6e0e1e 100644 --- a/README.ja-JP.md +++ b/README.ja-JP.md @@ -44,8 +44,8 @@ nix run github:Hmbown/CodeWhale scoop install codewhale # または GitHub Releases の NSIS インストーラ # GitHub に安定して到達できない場合の CNB ミラー -cargo install --git https://cnb.cool/codewhale.net/codewhale --tag v0.8.64 codewhale-cli --locked --force -cargo install --git https://cnb.cool/codewhale.net/codewhale --tag v0.8.64 codewhale-tui --locked --force +cargo install --git https://cnb.cool/codewhale.net/codewhale --tag v0.8.65 codewhale-cli --locked --force +cargo install --git https://cnb.cool/codewhale.net/codewhale --tag v0.8.65 codewhale-tui --locked --force # 旧 Homebrew 互換。formula の改名が完了するまで deepseek-tui 名のままです brew tap Hmbown/deepseek-tui diff --git a/README.md b/README.md index 4a9d0701a9..ce191343fd 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,6 @@ # CodeWhale -> An open source terminal coding agent, built to bring the best available models -> to as many people as possible. +> The terminal coding agent for any model — open models first. CodeWhale is a terminal coding agent — a TUI and a CLI. You point it at a model and a project, and it gets to work: reading code, making edits, running @@ -9,14 +8,17 @@ commands, checking results, planning multi-step tasks, and correcting itself when something fails. It's open source (MIT, Rust), it runs on your machine, and it works with the -models people actually use. DeepSeek and open-weight models are first-class, -but Claude, GPT, Kimi, and a local vLLM/Ollama box on your LAN are all full -peers. The goal is simple: keep the local terminal workflow current with the -best research and practical features in coding agents. - -Developers from all over the world have shaped CodeWhale into what it is. If -there's a model, endpoint, or feature you don't see that you want, open an issue -— that's how the project grows. +models people actually use. DeepSeek and open-weight models are first-class, and +a local vLLM/SGLang/Ollama box on your LAN needs no key at all — but Claude, GPT, +Kimi, and GLM are full peers through the same runtime and the same tools. You +pick a provider and a model; CodeWhale resolves a real route and runs. + +The project began as `deepseek-tui`, a coding harness built around DeepSeek +workflows. The developer community — much of it in China — adopted it, filed +reports, and contributed fixes, and it became clear the harness was bigger than +one model. Multi-provider support followed, and the project became CodeWhale to +match. If there's a model, endpoint, or feature you don't see that you want, +open an issue — that's how the project grows. [简体中文 README](README.zh-CN.md) · [日本語 README](README.ja-JP.md) · [Tiếng Việt README](README.vi.md) · [codewhale.net](https://codewhale.net/) · [Install guide](docs/INSTALL.md) · [Provider registry](docs/PROVIDERS.md) · [Changelog](CHANGELOG.md) @@ -60,8 +62,8 @@ nix run github:Hmbown/CodeWhale scoop install codewhale # or the NSIS installer from GitHub Releases # CNB mirror for users who cannot reliably reach GitHub -cargo install --git https://cnb.cool/codewhale.net/codewhale --tag v0.8.64 codewhale-cli --locked --force -cargo install --git https://cnb.cool/codewhale.net/codewhale --tag v0.8.64 codewhale-tui --locked --force +cargo install --git https://cnb.cool/codewhale.net/codewhale --tag v0.8.65 codewhale-cli --locked --force +cargo install --git https://cnb.cool/codewhale.net/codewhale --tag v0.8.65 codewhale-tui --locked --force # Legacy Homebrew compatibility while the formula is renamed brew tap Hmbown/deepseek-tui @@ -77,7 +79,7 @@ China mirrors, Windows specifics, and troubleshooting live in skills, and MCP settings are preserved. See [docs/REBRAND.md](docs/REBRAND.md), then run `codewhale doctor` to confirm. -## First Run +## First run ```bash codewhale auth set --provider deepseek @@ -97,8 +99,12 @@ read for compatibility. Useful in-session commands: -- `/provider` and `/model` switch the route and model mid-session. +- `/provider` opens the readiness dashboard — per provider it shows auth state, + the resolved default route, and the cost/usage meter. `/model` picks the model + and reasoning effort. Both also take arguments (`/provider nvidia-nim`, + `/model auto`) to switch mid-session. - `/restore` rolls back a prior turn from side-git snapshots. +- `/fleet` opens the Fleet setup view — roles, profiles, loadouts, and policy. - `/skills` loads reusable workflows from `~/.codewhale/skills/`. - `/config` edits runtime settings; `/statusline` shows the current route, cost, and session state. @@ -111,46 +117,112 @@ Headless, for scripts and CI: codewhale exec --allowed-tools read_file,exec_shell --max-turns 10 "fix the failing test" ``` -## The models +## Providers and routing + +You pick a provider and a model, and CodeWhale resolves a **real route** — a +concrete endpoint, wire protocol, model ID, context limit, and price — instead +of just swapping a base URL. A `RouteResolver` is the only thing that can mint a +resolved route, so the same selection logic backs the TUI picker, the CLI, and +headless runs. The catalog behind it is a committed, network-free snapshot in +the Models.dev shape, optionally refreshed from a provider's live `/models` +endpoint. + +Because the route is resolved, the rest of the harness can be honest about it: + +- **Route-aware context budgets.** The compaction threshold and usable window + come from the resolved route's real context limit, not a hardcoded guess. +- **Honest cost display.** A route reports exactly one cost state: per-token + pricing, a subscription/quota meter, account credits, *local / not + applicable*, or *unknown / stale*. CodeWhale never invents a price it doesn't + have — an unmatched model shows as unknown rather than $0. +- **Explicit wire protocol.** Whether a route speaks Chat Completions, the + OpenAI Responses API, or native Anthropic Messages is carried on the resolved + route, not inferred from a prompt. Reasoning effort is translated into each + provider's own dialect. + +Switch the route mid-session with `/provider` and `/model`. The full registry — +credentials, base URLs, capability boundaries — lives in +[docs/PROVIDERS.md](docs/PROVIDERS.md). -Supported providers route through the same runtime and the same tools. If the -one you want isn't here, that's a good issue to open. +### Supported providers -- **Open models, hosted:** `deepseek` (first among equals), `openrouter`, - `huggingface` (Inference Providers), `moonshot` (Kimi — OAuth temporarily - broken), `zai` (GLM — recommended), `minimax`, `volcengine` (Ark), - `nvidia-nim`, `together`, `fireworks`, `novita`, `siliconflow` / - `siliconflow-CN`, `arcee`, `xiaomi-mimo`, `deepinfra`, `stepfun`, - `atlascloud`, `wanjie-ark`, plus a generic `openai`-compatible route for any - gateway. +Every provider routes through the same runtime and the same tools. If the one +you want isn't here, that's a good issue to open. + +- **Open models, hosted:** `deepseek` (the default), `openrouter`, + `huggingface` (Inference Providers), `moonshot` (Kimi), `zai` (GLM), + `minimax`, `volcengine` (Ark), `nvidia-nim`, `together`, `fireworks`, + `novita`, `siliconflow` / `siliconflow-CN`, `arcee`, `xiaomi-mimo`, + `deepinfra`, `stepfun`, `atlascloud`, `qianfan`, `wanjie-ark`, plus a generic + `openai`-compatible route for any gateway. - **Open models, self-hosted:** `vllm`, `sglang`, and `ollama` against your own localhost endpoints — no key required. - **Closed providers, natively:** `anthropic` through a dedicated `/v1/messages` adapter with adaptive thinking, prompt-cache breakpoints, and - signed-thinking replay — and `openai-codex`, which reuses an existing - ChatGPT/Codex CLI login (working). + signed-thinking replay; `deepseek-anthropic`, DeepSeek's opt-in Messages-API + route; and `openai-codex` (experimental), which reuses an existing + ChatGPT/Codex CLI login instead of an API key. -Routing is more than a base URL swap: `/reasoning` effort is translated into -each provider's wire dialect, sub-agent tiers resolve per provider, and the -system prompt's model facts are templated per-model instead of hardcoded. -Switch mid-session with `/provider` and `/model`. The full registry — -credentials, base URLs, capability boundaries — lives in -[docs/PROVIDERS.md](docs/PROVIDERS.md). +## Fleet + +Fleet is CodeWhale's durable control plane for multi-worker runs. A fleet worker +is a headless `codewhale exec` run, but the fleet launches and tracks it durably: +work is recorded in an append-only ledger (`.codewhale/fleet.jsonl`), so a run +survives a manager exit, laptop sleep, or a runtime restart. -Sub-agent fanout is config-first. Set global `[subagents]` defaults, then add -`[subagents.providers.deepseek]`, `[subagents.providers.glm]`, -`[subagents.providers.openrouter]`, or other provider profiles to match the API -you are actually using. Direct DeepSeek can stay wide; subscription or -rate-limited routes can stay at 3-5 concurrent agents without changing prompts -or code. See [docs/SUBAGENTS.md](docs/SUBAGENTS.md#concurrency-cap). +```bash +codewhale fleet run tasks.json --max-workers 4 +codewhale fleet status +codewhale fleet resume +``` -Atlas Cloud is included as an OpenAI-compatible hosted route for users who want -its curated catalog behind one key: set `DEEPSEEK_PROVIDER=atlascloud`, -`ATLASCLOUD_API_KEY`, and optionally `ATLASCLOUD_MODEL`, for example -`deepseek-ai/deepseek-v4-pro`. Atlas model IDs pass through as selected; use -Atlas's model catalog or Coding Plan page for the current list and pricing. +`fleet resume` replays the ledger, reconciles any in-flight task whose worker +stopped heartbeating (retrying within budget, else failing and escalating), and +is idempotent — safe to run after anything that interrupted the manager. Each +worker records a typed receipt (`pass` / `fail` / `partial` / `skip` / +`timeout`) so `fleet status` can report what actually happened. -## What makes CodeWhale different +Workers are shaped by **roles**, **profiles**, **loadouts**, and **slots**, +configured under `[fleet]` in your config or authored from the in-app Fleet +setup view. Loadouts express model intent as a class — `strong`, `balanced`, or +`fast` — and the route resolver turns that into a concrete provider/model. This +is the same headless runtime that backs in-session sub-agents; Fleet is the +durable layer on top. See [docs/FLEET.md](docs/FLEET.md). + +## Safety + +CodeWhale edits files and runs commands, so the safety posture is part of the +product, not an afterthought. + +- **Three modes.** Plan (read-only investigation), Agent (executes, asks per + action), and YOLO (auto-approve). Switch with `Tab` or `/mode`. +- **Approval-gated tools.** A `.codewhale/hooks.toml` hook system can allow, + deny, or ask before any tool call, and the exec policy decides whether a + command runs, needs approval, or is forbidden outright. +- **OS sandboxing.** Seatbelt on macOS, Landlock plus a seccomp syscall filter + on Linux, and bubblewrap (bwrap) where it's available. +- **Rollback.** Side-git snapshots live outside your repo's `.git`, so + `/restore` can undo a turn without ever touching your real history. + +## Features + +- **Persistent goal loop.** Set an objective with `/goal` and the agent keeps + working across turns — reading, editing, running, checking results — until the + goal is done, it's blocked, or you stop it. No turn cap. `/task` tracks + background tasks; the Work sidebar shows live plan and checklist state. +- **Durable sessions.** Persist across restarts and system sleep; a task that + takes forty tool calls survives the forty-first. +- **Headless mode.** `codewhale exec` with `--allowed-tools`, + `--disallowed-tools` (deny wins), `--max-turns`, and `--append-system-prompt` + for scripts and CI. +- **MCP, bidirectionally.** Consume tools from external MCP servers, or expose + CodeWhale itself as an MCP server via `codewhale mcp`. +- **Skills.** Reusable workflows in `~/.codewhale/skills/`, loaded with + `/skills`. +- **Embedded everywhere.** HTTP/SSE and ACP runtime APIs, a VS Code extension, + and Telegram/Feishu bridges (Weixin experimental). + +## How instructions are ranked As a project evolves, the instructions pile up and they inevitably conflict: the original spec, a later refactor that contradicts it, stale memory, a previous @@ -172,47 +244,34 @@ code (there are tests asserting it can't drift): 4. **Live evidence** — what the tools actually returned. Ground truth; the model may be ordered past it, but it may never report a fact that isn't there. -When two instructions conflict, each yields to the one above. The model isn't -renegotiating the stack each turn — the order is fixed, so it can act on the -mountain of overlapping context without being paralyzed or quietly wrong. And -because the law lives in the harness, not the model, swapping models keeps the -structure intact. +When two instructions conflict, each yields to the one above. Because the law +lives in the harness, not the model, swapping models keeps the structure intact. -## Features +## Where details live -- **Three modes.** Plan (read-only investigation), Agent (executes, asks per - action), YOLO (auto-approve). Switch with `Tab` or `/mode`. -- **Persistent goal loop.** Set an objective with `/goal` and the agent keeps - working across turns — reading, editing, running, checking results — until the - goal is done, it's blocked, or you stop it. No turn cap. `/task` tracks - background tasks; the Work sidebar shows live plan and checklist state. -- **Sub-agents.** Independent investigations and implementation slices run in - parallel with provider-specific fanout caps, clean context, and - provider-aware model tiers (big vs. cheap). -- **Broad provider support.** DeepSeek, GLM, Claude, GPT, Kimi, MiniMax, - OpenRouter, and local vLLM/SGLang/Ollama, all behind the same runtime and tools. Switch - mid-session with `/provider` and `/model`. -- **Rollback.** Side-git snapshots and `/restore`, kept outside your repo's - `.git` — undoing a turn never touches your history. -- **Sandboxing & approval gates.** OS sandboxing (bwrap, Landlock, Seatbelt, - seccomp) and a `.codewhale/hooks.toml` hook system that can allow, deny, or ask - before any tool call. -- **Durable sessions.** Persist across restarts and system sleep; a task that - takes forty tool calls survives the forty-first. -- **Headless mode.** `codewhale exec` with `--allowed-tools`, `--disallowed-tools` - (deny wins), `--max-turns`, and `--append-system-prompt` for scripts and CI. -- **MCP, bidirectionally.** Consume tools from external servers, or expose - CodeWhale itself as an MCP server via `codewhale mcp`. -- **Skills.** Reusable workflows in `~/.codewhale/skills/`, loaded with `/skills`. -- **Embedded everywhere.** HTTP/SSE and ACP runtime APIs, a VS Code extension, - and Telegram/Feishu bridges (Weixin experimental). +The README is the short version. The rest is in docs and on +[codewhale.net](https://codewhale.net/): + +- [User guide](docs/GUIDE.md) · [Install guide](docs/INSTALL.md) · + [Configuration](docs/CONFIGURATION.md) · [Provider registry](docs/PROVIDERS.md) +- [Modes](docs/MODES.md) — Agent, Plan, and YOLO. +- [Fleet](docs/FLEET.md) · [Sub-agents](docs/SUBAGENTS.md) — roles, lifecycle, + output contract, and recovery behavior. +- [Architecture](docs/ARCHITECTURE.md) — crate layout, runtime flow, tool system, + extension points, and security model. +- [WhaleFlow authoring](docs/WHALEFLOW_AUTHORING.md) · [MCP](docs/MCP.md) · + [Runtime API](docs/RUNTIME_API.md) · [Model Lab](docs/MODEL_LAB.md) +- [Keybindings](docs/KEYBINDINGS.md) · [Sandbox & approvals](docs/SANDBOX.md) + · [Accessibility](docs/ACCESSIBILITY.md) · [Docker](docs/DOCKER.md) + · [Memory](docs/MEMORY.md) +- [Full docs index](docs) — everything else. ## The project CodeWhale started as one person's DeepSeek side project. Developers from countries all over the world have made it what it is — the contributor list on -every release is the proof. The project is built in the open, issues are -triaged in the open, and releases cut from `main`. +every release is the proof. The project is built in the open, issues are triaged +in the open, and releases cut from `main`. Something I learned early in teaching: **all feedback is a gift.** Issues, PRs, bug reports, feature ideas, "first PR"s, and curious questions all count as real @@ -230,26 +289,6 @@ most useful thing you can tell the project. Support: [Buy me a coffee](https://www.buymeacoffee.com/hmbown). -## Where details live - -The README is the short version. The rest is in docs and on -[codewhale.net](https://codewhale.net/): - -- [User guide](docs/GUIDE.md) · [Install guide](docs/INSTALL.md) · - [Configuration](docs/CONFIGURATION.md) · [Provider registry](docs/PROVIDERS.md) -- [Modes](docs/MODES.md) — Agent, Plan, and YOLO. -- [Sub-agents](docs/SUBAGENTS.md) — roles, lifecycle, output contract, and - recovery behavior. -- [Architecture](docs/ARCHITECTURE.md) — crate layout, runtime flow, tool system, - extension points, and security model. -- [Fleet](docs/FLEET.md) · [WhaleFlow authoring](docs/WHALEFLOW_AUTHORING.md) · - [MCP](docs/MCP.md) · [Runtime API](docs/RUNTIME_API.md) · - [Model Lab](docs/MODEL_LAB.md) -- [Keybindings](docs/KEYBINDINGS.md) · [Sandbox & approvals](docs/SANDBOX.md) - · [Accessibility](docs/ACCESSIBILITY.md) · [Docker](docs/DOCKER.md) - · [Memory](docs/MEMORY.md) -- [Full docs index](docs) — everything else. - ## Thanks CodeWhale exists because of the people who use it, break it, and fix it. diff --git a/README.vi.md b/README.vi.md index 43c3239504..bd79961a44 100644 --- a/README.vi.md +++ b/README.vi.md @@ -50,8 +50,8 @@ nix run github:Hmbown/CodeWhale scoop install codewhale # hoặc trình cài NSIS từ GitHub Releases # CNB mirror cho người dùng khó truy cập GitHub ổn định -cargo install --git https://cnb.cool/codewhale.net/codewhale --tag v0.8.64 codewhale-cli --locked --force -cargo install --git https://cnb.cool/codewhale.net/codewhale --tag v0.8.64 codewhale-tui --locked --force +cargo install --git https://cnb.cool/codewhale.net/codewhale --tag v0.8.65 codewhale-cli --locked --force +cargo install --git https://cnb.cool/codewhale.net/codewhale --tag v0.8.65 codewhale-tui --locked --force # Homebrew legacy trong lúc formula đang được đổi tên brew tap Hmbown/deepseek-tui diff --git a/README.zh-CN.md b/README.zh-CN.md index 9db8237734..fd00001fae 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -49,8 +49,8 @@ nix run github:Hmbown/CodeWhale scoop install codewhale # 或使用 GitHub Releases 中的 NSIS 安装包 # CNB 镜像:适合无法稳定访问 GitHub 的用户 -cargo install --git https://cnb.cool/codewhale.net/codewhale --tag v0.8.64 codewhale-cli --locked --force -cargo install --git https://cnb.cool/codewhale.net/codewhale --tag v0.8.64 codewhale-tui --locked --force +cargo install --git https://cnb.cool/codewhale.net/codewhale --tag v0.8.65 codewhale-cli --locked --force +cargo install --git https://cnb.cool/codewhale.net/codewhale --tag v0.8.65 codewhale-tui --locked --force # 旧 Homebrew 兼容路径:formula 改名期间仍沿用 deepseek-tui brew tap Hmbown/deepseek-tui diff --git a/benchmark_results/deepseek-anthropic-comparison-2026-06-24.md b/benchmark_results/deepseek-anthropic-comparison-2026-06-24.md new file mode 100644 index 0000000000..d416236407 --- /dev/null +++ b/benchmark_results/deepseek-anthropic-comparison-2026-06-24.md @@ -0,0 +1,524 @@ +# DeepSeek Anthropic-Compatible Endpoint — Comparison Report & Decision (#2963) + +- **Issue:** [#2963](https://github.com/Hmbown/CodeWhale/issues/2963) — *v0.8.65: DeepSeek Anthropic-compatible endpoint wire-protocol spike* +- **Release lane:** v0.8.65 +- **Date:** 2026-06-24 +- **Status:** Implementation **landed and Experimental**. Keep-vs-promote decision **PENDING live numbers** (Section 4). +- **Scope of this document:** A *report*. It changes no Rust code and makes no live API calls — no DeepSeek credentials are available in this environment, so all live figures below are left as a checklist for a human operator to fill in. + +> Do **not** reimplement the route. It already exists on `main` (commit +> `5b8a5ac0b2c478261740f49756d29c4a7f83d89c`, PR +> [#3449](https://github.com/Hmbown/CodeWhale/pull/3449)). This document +> verifies what landed, derives what can be concluded from the code without a +> network, and specifies the exact live procedure to settle the open question. + +All file:line citations below are against the tree at this report's commit +(verified ancestry: `5b8a5ac0b` is an ancestor of `HEAD`). + +--- + +## 1. What's landed + +The opt-in DeepSeek route that speaks the **Anthropic Messages** wire protocol +is implemented end to end. **It is already in `main`; do not re-implement it.** + +### 1.1 Provider descriptor / route selection + +- `crates/config/src/provider.rs:140-178` — `DeepseekAnthropic` provider: + - id `deepseek-anthropic` (`provider.rs:143-145`) + - display name `DeepSeek (Anthropic-compatible)` (`provider.rs:151-153`) + - aliases `deepseek_anthropic`, `deepseek-claude`, `deepseek_claude` + (`provider.rs:171-173`) + - **wire format `WireFormat::AnthropicMessages`** (`provider.rs:175-177`) + - API-key env var: **`DEEPSEEK_API_KEY` only** (`provider.rs:163-165`) — it + does **not** fall back to `ANTHROPIC_API_KEY`. +- `crates/config/src/provider.rs:31-38` — `WireFormat` enum + (`ChatCompletions` / `Responses` / `AnthropicMessages`). +- Registry wiring: static entry `provider.rs:544`, registered at + `provider.rs:573`. +- Defaults (`crates/config/src/provider_defaults.rs`): + - base URL `https://api.deepseek.com/anthropic` + (`provider_defaults.rs:14`) + - default model `deepseek-v4-pro` — `DEFAULT_DEEPSEEK_ANTHROPIC_MODEL` + aliases `DEFAULT_DEEPSEEK_MODEL` (`provider_defaults.rs:8-9`) +- The Chat-Completions DeepSeek route, for contrast, defaults to base URL + `https://api.deepseek.com/beta` (`provider_defaults.rs:13`) with the same + default model `deepseek-v4-pro`. + +### 1.2 Dispatch + +- `crates/tui/src/client.rs:1331-1339` (`create_message`) and + `client.rs:1341-1352` (`create_message_stream`) route to the Anthropic + adapter when `api_provider_uses_anthropic_messages(self.api_provider)` is + true. +- `client.rs:864-869` — `api_provider_uses_anthropic_messages` returns true for + `ApiProvider::Anthropic | ApiProvider::DeepseekAnthropic`. +- Request payload mode is selected by route, not prompt: + `crates/tui/src/config.rs:526-530` sets + `RequestPayloadMode::AnthropicMessages` for `DeepseekAnthropic`, else + `ChatCompletions`. + +### 1.3 Auth dialect + +- `crates/tui/src/client.rs:805-838` builds headers: + - injects `anthropic-version: 2023-06-01` for Anthropic-wire providers + (`client.rs:808-815`) + - uses **`x-api-key`** (never `Authorization: Bearer`) for the API key + (`client.rs:817-819`, applied `client.rs:831-837`) +- `client.rs:846-862` strips any caller-supplied `Authorization` / `api-key` / + `x-api-key` extra headers so a stale OpenAI-style auth header cannot leak onto + the Anthropic wire (`is_auth_dialect_header`, `client.rs:858-862`). +- Tests: `deepseek_anthropic_uses_anthropic_header_dialect` + (`client.rs:2216`+) asserts `x-api-key` + `anthropic-version` are present and + that Bearer / MiMo headers are absent. + +### 1.4 Request encoding (Messages body) + +- `crates/tui/src/client/anthropic.rs:40-143` — `build_anthropic_body`: + - `model` / `max_tokens` / `stream` (`anthropic.rs:41-45`) + - `system` as text or cache-aware blocks (`anthropic.rs:47-66`) + - `messages` via `message_to_anthropic` (`anthropic.rs:68-74`, + `anthropic.rs:291-301`) + - `tools` with `strict` + `cache_control` (`anthropic.rs:76-98`) + - `tool_choice` mapped from OpenAI-style string/object to Anthropic object + form (`anthropic.rs:100-102`, `anthropic.rs:279-287`) + - reasoning → `thinking: {type: adaptive}` + `output_config.effort` + (low/medium/high/max), gated on `model_supports_reasoning` + (`anthropic.rs:104-128`) + - sampling-parameter rules: send at most one of temperature/top_p, or neither + for models that reject them (`anthropic.rs:130-139`, + `anthropic.rs:269-275`) + - `cache_control` breakpoint placement, capped at 4 + (`anthropic.rs:141`, `anthropic.rs:367-446`) +- Endpoint URL builder tolerates a `/v1` suffix + (`anthropic.rs:259-266`); `https://api.deepseek.com/anthropic` → + `…/anthropic/v1/messages`. + +### 1.5 Response & stream decoding + +- Non-streaming: `anthropic.rs:240-254` (`handle_anthropic_message`) parses the + JSON body and normalizes `usage`. +- Streaming: `anthropic.rs:170-237` (`handle_anthropic_stream`) is an SSE + pass-through; `convert_anthropic_sse_data` (`anthropic.rs:450-494`) decodes + `message_start` / `content_block_*` / `message_delta` / `message_stop` / + `ping` / `error`, tolerates unknown event types, and normalizes usage on + `message_start` / `message_delta`. +- Send/error path: `anthropic.rs:145-167` (`send_anthropic_request`) sets + `Accept: text/event-stream`, maps non-2xx into a typed error via + `parse_anthropic_error_envelope` (`anthropic.rs:528-548`). + +### 1.6 Usage / cache normalization (#2961 convention) + +- `anthropic.rs:499-523` (`parse_anthropic_usage`): + - `prompt_cache_hit_tokens = cache_read_input_tokens` + - `prompt_cache_miss_tokens = input_tokens + cache_creation_input_tokens` + - normalized `input_tokens = input_tokens + cache_creation + cache_read` + (total prompt — the DeepSeek convention) + +### 1.7 Operational guardrails added with the route + +- Health check **skips the `/anthropic/v1/models` probe** for this route + (`client.rs:871-873`, `api_provider_skips_models_probe`); test + `deepseek_anthropic_health_check_skips_models_probe` (`client.rs:2301`+). +- **FIM is unsupported** on this route and fails locally with a clear message + (`client.rs:1722-1727`); test `deepseek_anthropic_fim_fails_without_http_request` + (`client.rs:2314`+). +- Base-URL env override is route-aware: `CODEWHALE_BASE_URL` / `DEEPSEEK_BASE_URL` + writes into `providers.deepseek_anthropic.base_url` + (`crates/tui/src/config.rs:3928-3939`). +- Translation helper uses the Messages endpoint for this provider + (`client.rs:974-977`); test + `deepseek_anthropic_translate_uses_messages_endpoint` (`client.rs:2251`+). + +### 1.8 Docs framing + +- `docs/PROVIDERS.md:48-51`, `:81`, `:111-112`, `:237` document the route as + **Anthropic *wire-protocol* compatibility** (not Anthropic model/provider + semantics), list the aliases, and state "Keep `provider = "deepseek"` for the + default Chat Completions path." + +### 1.9 Test coverage already present (no live calls) + +In `crates/tui/src/client/anthropic.rs` `#[cfg(test)]` (from `anthropic.rs:550`): +body cache-control placement, reasoning→effort mapping, sampling-param dropping, +signed/unsigned thinking replay, breakpoint cap, full SSE fixture decode +(text + thinking + signature + tool_use + usage), error/unknown-event handling, +usage mapping with missing cache fields, error-envelope parsing, URL `/v1` +tolerance. In `crates/tui/src/client.rs`: the auth-dialect, models-probe-skip, +translate-endpoint, and FIM-unsupported tests cited above. + +--- + +## 2. Code-derived findings (no live calls needed) + +These are behavioral facts that can be stated **from the code today**, before +any live comparison. They are the deltas a reviewer most needs to know. + +### 2.1 Server tools / web search are NOT exercised via this route today + +`content_block_to_anthropic` **drops** the server-tool block types on encode: + +``` +crates/tui/src/client/anthropic.rs:359-364 + // Server-tool block types are DeepSeek/internal concepts with no + // Anthropic client-side wire equivalent. + ContentBlock::ServerToolUse { .. } + | ContentBlock::ToolSearchToolResult { .. } + | ContentBlock::CodeExecutionToolResult { .. } => None, +``` + +Consequence: any server-tool / web-search content the engine holds is filtered +out before the request is sent on this route. There is also no encode-side path +that *injects* an Anthropic-style server-tool definition (e.g. a `web_search` +tool) into the outbound body — `build_anthropic_body` only forwards +caller-supplied client tools (`anthropic.rs:76-98`). So **server-side web +search / code execution is not exercised through the DeepSeek Anthropic route as +implemented.** Whether DeepSeek's endpoint would *accept* such a tool is a +separate, still-open question that only live testing (Section 4, Test E) can +answer; the code neither offers nor depends on it. + +### 2.2 Usage telemetry: two real deltas vs the Chat-Completions path + +Compare the two usage parsers: + +| Field | Anthropic route (`anthropic.rs:499-523`) | Chat-Completions route (`client.rs:1643-1711`) | +|---|---|---| +| `input_tokens` (normalized) | `input + cache_creation + cache_read` | `input_tokens`/`prompt_tokens` as-is | +| `prompt_cache_hit_tokens` | `cache_read_input_tokens` | `prompt_cache_hit_tokens`, else `prompt_tokens_details.cached_tokens` | +| `prompt_cache_miss_tokens` | `input + cache_creation` | `prompt_cache_miss_tokens`, else `input − hit` | +| `reasoning_tokens` | **always `None`** (`anthropic.rs:519`) | parsed from `completion_tokens_details.reasoning_tokens` (`client.rs:1658-1685`) | +| `reasoning_replay_tokens` | `None` (`anthropic.rs:520`) | `None` (`client.rs:1708`) | +| `server_tool_use` | **always `None`** (`anthropic.rs:521`) | parsed from `server_tool_use.{code_execution,tool_search}_requests` (`client.rs:1687-1700`) | +| `output_tokens` | Anthropic `output_tokens` | `output_tokens`/`completion_tokens`, with fallbacks to reasoning or `total − input` (`client.rs:1648-1670`) | + +Two concrete deltas to record honestly in any telemetry comparison: + +1. **`reasoning_tokens` is never populated on the Anthropic route.** Reasoning + *content* still flows (thinking blocks decode and signed blocks replay — + `anthropic.rs:315-330`, `anthropic.rs:822-868` fixture), but the **count** + is dropped. On the Chat-Completions route the count is read from + `completion_tokens_details.reasoning_tokens`. This is per the #2961/#3085 + "explicit unknown/null for unsupported fields" rule, but it means + reasoning-token *accounting parity* between the two routes cannot be + expected — confirm in Test C. +2. **`server_tool_use` is never populated on the Anthropic route** (consistent + with §2.1: the route doesn't drive server tools). + +### 2.3 Thinking / reasoning request shaping differs by design + +The Anthropic route maps `reasoning_effort` tiers to +`thinking: {type: adaptive}` + `output_config.effort` +(`anthropic.rs:104-128`), gated on `model_supports_reasoning`. The +Chat-Completions DeepSeek path uses its own reasoning-split / payload +conventions (`config.rs:526-530` selects the payload mode; DeepSeek-family +reasoning handling lives on the Chat path). Equivalent *output* is the bar to +test (Section 3/4), not byte-identical requests. + +### 2.4 Caching model differs in shape + +The Anthropic route places explicit `cache_control` breakpoints (max 4) on the +prefix and latest user turn (`anthropic.rs:367-446`) and reports cache +hit/miss from Anthropic's `cache_read` / `cache_creation` fields. The +Chat-Completions route relies on DeepSeek's automatic prefix caching and reads +`prompt_cache_hit_tokens` / `prompt_cache_miss_tokens` (or +`prompt_tokens_details.cached_tokens`). Both normalize into the same #2961 +fields, so cache *telemetry* is comparable even though the *mechanism* differs. + +### 2.5 Capability/operational deltas (route-level, from code) + +- **FIM**: supported on Chat-Completions DeepSeek; **unsupported** on the + Anthropic route (`client.rs:1722-1727`). +- **Models probe**: skipped on the Anthropic route (`client.rs:871-873`); the + Chat path probes `/models`. +- **Auth**: `x-api-key` + `anthropic-version` (Anthropic route) vs + `Authorization: Bearer` (Chat route) — `client.rs:817-827`. +- **Endpoint**: `…/anthropic/v1/messages` vs `…/beta` chat completions. + +### 2.6 What is *equivalent* by construction + +Tool-call and tool-result mapping, image blocks, system prompt, and stop +reasons all have direct encoders (`anthropic.rs:303-358`) and the SSE decoder +reconstructs tool-use input JSON (fixture `anthropic.rs:816-897`). So for an +ordinary "prompt → text / tool_use" exchange, the two routes are expected to be +functionally equivalent; the open questions are the *quantitative* ones +(latency, token counts) and the *server-tool* one. + +--- + +## 3. Comparison methodology + +Compare DeepSeek's **Chat-Completions** route (`provider = "deepseek"`) against +its **Anthropic-Messages** route (`provider = "deepseek-anthropic"`) for the +**same model** (`deepseek-v4-pro`, and `deepseek-v4-flash` if the account has +it). Hold everything else constant (same prompt, same `max_tokens`, same +reasoning effort, same temperature where accepted). + +Dimensions: + +1. **Correctness / output equivalence** — same prompt → semantically equivalent + answer; same tool selection and arguments for a tool-use prompt; valid JSON + for a structured prompt. +2. **Latency** — wall-clock total and (for streaming) time-to-first-token, over + N≥5 runs each; report median + spread, not a single sample. +3. **Token / usage accounting parity** — compare `input_tokens` (normalized), + `output_tokens`, `prompt_cache_hit_tokens`, `prompt_cache_miss_tokens`, + `reasoning_tokens`. **Expect `reasoning_tokens` to be null on the Anthropic + route** (§2.2) — record it, don't treat it as a bug. +4. **Telemetry fields** — which of the #2961/#3085 normalized fields are + populated vs null on each route; note `server_tool_use` is null on the + Anthropic route by construction. +5. **Server-tool / web-search support** — does DeepSeek's Anthropic endpoint + *accept*, *ignore*, or *reject* an Anthropic-style server tool (e.g. + `web_search`)? Capture the raw request/response. (Recall the engine does not + send such a tool today — §2.1 — so this is an endpoint-capability probe with + a hand-built request, not a test of CodeWhale's encoder.) +6. **Error envelopes & rate limiting** — confirm 4xx/5xx map cleanly + (`anthropic.rs:528-548`) and that the route honors the same retry/backoff. + +Pass bar for "comparable" (issue Acceptance Criteria): equivalent correctness on +the smoke tasks, latency within a reasonable band, and usage telemetry that maps +into the normalized fields (with explicit nulls where unsupported). + +--- + +## 4. Runnable live checklist (human, with `DEEPSEEK_API_KEY` set) + +All commands are copy-pasteable. They assume the repo root and a DeepSeek key. +**No credentials exist in this environment; these are for a human to run.** + +### 4.0 One-time setup + +```bash +export DEEPSEEK_API_KEY="sk-..." # your DeepSeek key +MODEL="deepseek-v4-pro" # also repeat with deepseek-v4-flash if available +CHAT_BASE="https://api.deepseek.com" # Chat Completions (OpenAI-compatible) +ANTH_BASE="https://api.deepseek.com/anthropic" # Anthropic Messages +mkdir -p benchmark_results/2963-live && cd "$(git rev-parse --show-toplevel)" +``` + +### Test A — correctness, single turn (text) + +Chat Completions: + +```bash +curl -sS -w '\n[http %{http_code} | total %{time_total}s | ttfb %{time_starttransfer}s]\n' \ + -X POST "$CHAT_BASE/v1/chat/completions" \ + -H "Authorization: Bearer $DEEPSEEK_API_KEY" \ + -H "Content-Type: application/json" \ + -d "{\"model\":\"$MODEL\",\"max_tokens\":64,\"stream\":false, + \"messages\":[{\"role\":\"user\",\"content\":\"Reply with exactly the word: PONG\"}]}" \ + | tee benchmark_results/2963-live/A_chat.json +``` + +Anthropic Messages (note `x-api-key` + `anthropic-version`, no Bearer): + +```bash +curl -sS -w '\n[http %{http_code} | total %{time_total}s | ttfb %{time_starttransfer}s]\n' \ + -X POST "$ANTH_BASE/v1/messages" \ + -H "x-api-key: $DEEPSEEK_API_KEY" \ + -H "anthropic-version: 2023-06-01" \ + -H "Content-Type: application/json" \ + -d "{\"model\":\"$MODEL\",\"max_tokens\":64,\"stream\":false, + \"messages\":[{\"role\":\"user\",\"content\":\"Reply with exactly the word: PONG\"}]}" \ + | tee benchmark_results/2963-live/A_anthropic.json +``` + +Record: does each return "PONG"? HTTP status, total time. + +### Test B — usage / token accounting (read the `usage` object on both) + +```bash +echo "Chat usage:"; jq '.usage' benchmark_results/2963-live/A_chat.json +echo "Anthropic usage:"; jq '.usage' benchmark_results/2963-live/A_anthropic.json +``` + +Fill in the table: + +| Field | Chat Completions | Anthropic Messages | +|---|---|---| +| prompt/input tokens | | | +| completion/output tokens | | | +| cache hit (`prompt_cache_hit_tokens` / `cache_read_input_tokens`) | | | +| cache miss (`prompt_cache_miss_tokens` / `cache_creation_input_tokens`) | | | +| reasoning tokens (`completion_tokens_details.reasoning_tokens`) | | (expected absent) | + +### Test C — reasoning / thinking + +Chat Completions (DeepSeek reasoner-style): + +```bash +curl -sS -X POST "$CHAT_BASE/v1/chat/completions" \ + -H "Authorization: Bearer $DEEPSEEK_API_KEY" -H "Content-Type: application/json" \ + -d "{\"model\":\"$MODEL\",\"max_tokens\":512,\"stream\":false, + \"messages\":[{\"role\":\"user\",\"content\":\"A bat and ball cost \$1.10. The bat costs \$1 more than the ball. How much is the ball? Think, then answer.\"}]}" \ + | tee benchmark_results/2963-live/C_chat.json | jq '{content:.choices[0].message, usage:.usage}' +``` + +Anthropic Messages with adaptive thinking: + +```bash +curl -sS -X POST "$ANTH_BASE/v1/messages" \ + -H "x-api-key: $DEEPSEEK_API_KEY" -H "anthropic-version: 2023-06-01" -H "Content-Type: application/json" \ + -d "{\"model\":\"$MODEL\",\"max_tokens\":512,\"stream\":false, + \"thinking\":{\"type\":\"adaptive\"},\"output_config\":{\"effort\":\"high\"}, + \"messages\":[{\"role\":\"user\",\"content\":\"A bat and ball cost \$1.10. The bat costs \$1 more than the ball. How much is the ball? Think, then answer.\"}]}" \ + | tee benchmark_results/2963-live/C_anthropic.json | jq '{content:.content, usage:.usage}' +``` + +Record: both should answer **\$0.05**. Note whether a `thinking` block is +returned by the Anthropic route and whether reasoning tokens appear anywhere. + +### Test D — tool use (same tool both routes) + +Chat Completions: + +```bash +curl -sS -X POST "$CHAT_BASE/v1/chat/completions" \ + -H "Authorization: Bearer $DEEPSEEK_API_KEY" -H "Content-Type: application/json" \ + -d "{\"model\":\"$MODEL\",\"max_tokens\":256, + \"tools\":[{\"type\":\"function\",\"function\":{\"name\":\"get_weather\", + \"description\":\"Get weather for a city\", + \"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}], + \"messages\":[{\"role\":\"user\",\"content\":\"What's the weather in Paris? Use the tool.\"}]}" \ + | tee benchmark_results/2963-live/D_chat.json | jq '.choices[0].message.tool_calls' +``` + +Anthropic Messages: + +```bash +curl -sS -X POST "$ANTH_BASE/v1/messages" \ + -H "x-api-key: $DEEPSEEK_API_KEY" -H "anthropic-version: 2023-06-01" -H "Content-Type: application/json" \ + -d "{\"model\":\"$MODEL\",\"max_tokens\":256, + \"tools\":[{\"name\":\"get_weather\",\"description\":\"Get weather for a city\", + \"input_schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}], + \"messages\":[{\"role\":\"user\",\"content\":\"What's the weather in Paris? Use the tool.\"}]}" \ + | tee benchmark_results/2963-live/D_anthropic.json | jq '.content' +``` + +Record: does each emit a `get_weather` call with `city = "Paris"`? + +### Test E — server-tool / web-search capability probe (the open question) + +Send an Anthropic-style server tool and **record whether DeepSeek accepts, +ignores, or rejects it** (capture the full body). The engine does not send this +today (§2.1); this is a raw endpoint probe. + +```bash +curl -sS -w '\n[http %{http_code}]\n' -X POST "$ANTH_BASE/v1/messages" \ + -H "x-api-key: $DEEPSEEK_API_KEY" -H "anthropic-version: 2023-06-01" -H "Content-Type: application/json" \ + -d "{\"model\":\"$MODEL\",\"max_tokens\":256, + \"tools\":[{\"type\":\"web_search_20250305\",\"name\":\"web_search\",\"max_uses\":2}], + \"messages\":[{\"role\":\"user\",\"content\":\"Search the web: what is the latest stable Rust version? Cite a source.\"}]}" \ + | tee benchmark_results/2963-live/E_websearch.json +``` + +Classify the outcome: +- **Accepted + worked** — response contains server-tool-use / search results. +- **Ignored** — 200 OK, plain answer, no tool activity. +- **Rejected** — 4xx with an error envelope (record `error.type` / message). + +### Test F — streaming smoke (both routes) + +```bash +# Anthropic SSE +curl -N -sS -X POST "$ANTH_BASE/v1/messages" \ + -H "x-api-key: $DEEPSEEK_API_KEY" -H "anthropic-version: 2023-06-01" \ + -H "Accept: text/event-stream" -H "Content-Type: application/json" \ + -d "{\"model\":\"$MODEL\",\"max_tokens\":64,\"stream\":true, + \"messages\":[{\"role\":\"user\",\"content\":\"Count: one two three\"}]}" \ + | tee benchmark_results/2963-live/F_anthropic.sse | head -40 +``` + +Confirm `message_start` → `content_block_*` → `message_delta` → `message_stop` +arrive (the shapes `convert_anthropic_sse_data` decodes, `anthropic.rs:450-494`). + +### Test G — end-to-end through CodeWhale (optional, exercises the real adapter) + +```bash +# Anthropic route through the built binary +cargo run -q -p codewhale -- --provider deepseek-anthropic --model "$MODEL" \ + --print "Reply with exactly: PONG" +# Chat route for comparison +cargo run -q -p codewhale -- --provider deepseek --model "$MODEL" \ + --print "Reply with exactly: PONG" +``` + +(Adjust the binary/flag names to the project's actual non-interactive entry +point if different; the point is to run one prompt through each resolved route.) + +### 4.1 Results table to fill in + +| Dimension | Chat Completions | Anthropic Messages | Verdict | +|---|---|---|---| +| Correctness (A/C/D) | | | | +| Latency median (N=…) | | | | +| TTFT (streaming) | | | | +| Token accounting (B) | | | | +| reasoning_tokens present | | (expected no) | | +| Tool use (D) | | | | +| Web search (E) | n/a | accept / ignore / reject | | +| Streaming (F) | | | | + +--- + +## 5. Decision + +**Recommendation: KEEP as Experimental. The keep-vs-promote decision is PENDING +the live numbers in Section 4. This report does not assert a "verified" verdict +because no live calls were made.** + +Rationale, grounded in code: + +- **Keep (not reject):** the route is fully implemented, isolated behind opt-in + provider selection (`deepseek-anthropic` / `deepseek-claude`), guarded + (FIM-unsupported message, models-probe skip, auth-header hygiene), and covered + by unit + SSE-fixture tests. It does not touch or regress the default + Chat-Completions DeepSeek path (separate dispatch at `client.rs:1331-1352`; + docs say keep `provider = "deepseek"` for the default). Nothing in the code + argues for ripping it out. +- **Do not promote yet:** the issue's promotion bar requires the Anthropic route + to be *at least comparable* on a live A/B, plus explicit server-tool evidence. + That evidence does not exist here. Two code-derived caveats that promotion + must weigh: (a) `reasoning_tokens` accounting is dropped on this route + (§2.2 #1), and (b) server tools / web search are not exercised through it + (§2.1) — so if web search is a requirement for "preferred," this route does + not satisfy it today regardless of what Test E shows about the endpoint. +- **Gate to flip the decision:** complete Section 4 (especially Tests A–E), + fill the §4.1 table, and confirm equivalent correctness + comparable latency + + clean telemetry mapping. If all green and web search is not a blocker → + candidate to promote to preferred for DeepSeek V4. Otherwise → remain + Experimental, or reject the *promotion* (not the route) if telemetry/latency + regress. + +### Suggested issue note (after live numbers are in) + +> Implementation verified landed (#3449 / `5b8a5ac0b`); see +> `benchmark_results/deepseek-anthropic-comparison-2026-06-24.md`. Live A/B +> results: [fill in]. Server-tool/web-search probe (Test E): [accept/ignore/ +> reject + evidence]. Decision: [keep experimental | promote to preferred]. + +--- + +## Appendix — citation index + +| Topic | Location | +|---|---| +| `WireFormat` enum | `crates/config/src/provider.rs:31-38` | +| `DeepseekAnthropic` descriptor | `crates/config/src/provider.rs:140-178` | +| Registry entry | `crates/config/src/provider.rs:544`, `:573` | +| Base URL / model defaults | `crates/config/src/provider_defaults.rs:8-9,13-14` | +| Dispatch to Anthropic adapter | `crates/tui/src/client.rs:1331-1352` | +| `api_provider_uses_anthropic_messages` | `crates/tui/src/client.rs:864-869` | +| Auth header build (`x-api-key`/`anthropic-version`) | `crates/tui/src/client.rs:805-862` | +| Models-probe skip | `crates/tui/src/client.rs:871-873` | +| FIM unsupported | `crates/tui/src/client.rs:1722-1727` | +| Chat-Completions usage parser | `crates/tui/src/client.rs:1643-1711` | +| Base-URL env override (route-aware) | `crates/tui/src/config.rs:3928-3939` | +| Payload-mode selection | `crates/tui/src/config.rs:526-530` | +| `build_anthropic_body` | `crates/tui/src/client/anthropic.rs:40-143` | +| Messages URL builder | `crates/tui/src/client/anthropic.rs:259-266` | +| **Server-tool blocks dropped on encode** | `crates/tui/src/client/anthropic.rs:359-364` | +| Anthropic usage normalizer | `crates/tui/src/client/anthropic.rs:499-523` | +| Error-envelope parser | `crates/tui/src/client/anthropic.rs:528-548` | +| Docs framing | `docs/PROVIDERS.md:48-51,81,111-112,237` | +| Landed commit / PR | `5b8a5ac0b2c478261740f49756d29c4a7f83d89c` / [#3449](https://github.com/Hmbown/CodeWhale/pull/3449) | diff --git a/crates/agent/Cargo.toml b/crates/agent/Cargo.toml index addf453d7c..749089991b 100644 --- a/crates/agent/Cargo.toml +++ b/crates/agent/Cargo.toml @@ -7,5 +7,5 @@ repository.workspace = true description = "Model/provider registry and fallback strategy for CodeWhale" [dependencies] -codewhale-config = { path = "../config", version = "0.8.64" } +codewhale-config = { path = "../config", version = "0.8.65" } serde.workspace = true diff --git a/crates/app-server/Cargo.toml b/crates/app-server/Cargo.toml index 949eff39ef..1d9141b595 100644 --- a/crates/app-server/Cargo.toml +++ b/crates/app-server/Cargo.toml @@ -12,15 +12,15 @@ autobins = false anyhow.workspace = true axum.workspace = true clap.workspace = true -codewhale-agent = { path = "../agent", version = "0.8.64" } -codewhale-config = { path = "../config", version = "0.8.64" } -codewhale-core = { path = "../core", version = "0.8.64" } -codewhale-execpolicy = { path = "../execpolicy", version = "0.8.64" } -codewhale-hooks = { path = "../hooks", version = "0.8.64" } -codewhale-mcp = { path = "../mcp", version = "0.8.64" } -codewhale-protocol = { path = "../protocol", version = "0.8.64" } -codewhale-state = { path = "../state", version = "0.8.64" } -codewhale-tools = { path = "../tools", version = "0.8.64" } +codewhale-agent = { path = "../agent", version = "0.8.65" } +codewhale-config = { path = "../config", version = "0.8.65" } +codewhale-core = { path = "../core", version = "0.8.65" } +codewhale-execpolicy = { path = "../execpolicy", version = "0.8.65" } +codewhale-hooks = { path = "../hooks", version = "0.8.65" } +codewhale-mcp = { path = "../mcp", version = "0.8.65" } +codewhale-protocol = { path = "../protocol", version = "0.8.65" } +codewhale-state = { path = "../state", version = "0.8.65" } +codewhale-tools = { path = "../tools", version = "0.8.65" } serde.workspace = true serde_json.workspace = true rustls.workspace = true diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index ba1a7ab066..f0e3336dd9 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -19,14 +19,14 @@ path = "src/bin/codew_legacy_shim.rs" anyhow.workspace = true clap.workspace = true clap_complete.workspace = true -codewhale-agent = { path = "../agent", version = "0.8.64" } -codewhale-app-server = { path = "../app-server", version = "0.8.64" } -codewhale-config = { path = "../config", version = "0.8.64" } -codewhale-execpolicy = { path = "../execpolicy", version = "0.8.64" } -codewhale-mcp = { path = "../mcp", version = "0.8.64" } -codewhale-release = { path = "../release", version = "0.8.64" } -codewhale-secrets = { path = "../secrets", version = "0.8.64" } -codewhale-state = { path = "../state", version = "0.8.64" } +codewhale-agent = { path = "../agent", version = "0.8.65" } +codewhale-app-server = { path = "../app-server", version = "0.8.65" } +codewhale-config = { path = "../config", version = "0.8.65" } +codewhale-execpolicy = { path = "../execpolicy", version = "0.8.65" } +codewhale-mcp = { path = "../mcp", version = "0.8.65" } +codewhale-release = { path = "../release", version = "0.8.65" } +codewhale-secrets = { path = "../secrets", version = "0.8.65" } +codewhale-state = { path = "../state", version = "0.8.65" } chrono.workspace = true dirs.workspace = true serde.workspace = true diff --git a/crates/config/Cargo.toml b/crates/config/Cargo.toml index 4b5ee4927c..9b864b2641 100644 --- a/crates/config/Cargo.toml +++ b/crates/config/Cargo.toml @@ -8,8 +8,8 @@ description = "Config schema and precedence model for CodeWhale" [dependencies] anyhow.workspace = true -codewhale-execpolicy = { path = "../execpolicy", version = "0.8.64" } -codewhale-secrets = { path = "../secrets", version = "0.8.64" } +codewhale-execpolicy = { path = "../execpolicy", version = "0.8.65" } +codewhale-secrets = { path = "../secrets", version = "0.8.65" } dirs.workspace = true libc = "0.2" serde.workspace = true diff --git a/crates/config/assets/models_dev.bundled.json b/crates/config/assets/models_dev.bundled.json new file mode 100644 index 0000000000..c5802c1256 --- /dev/null +++ b/crates/config/assets/models_dev.bundled.json @@ -0,0 +1,425 @@ +{ + "_meta": { + "about": "Bundled, network-free Models.dev-shaped catalog snapshot for CodeWhale (#3385).", + "schema": "Matches crates/config/src/models_dev.rs ModelsDevCatalog ({ models, providers }).", + "source": "Curated from in-repo verified facts, NOT a live models.dev dump. Context windows and max-output are sourced from crates/tui/src/models.rs (context_window_for_model / max_output_tokens_for_model); USD-per-million pricing is sourced from crates/tui/src/pricing.rs. The public models.dev catalog tracks a different (real) model generation than CodeWhale's curated forward-dated model set, so a live transform would disagree with the repo's own model registry and tests. Curated-but-accurate is preferred per the issue.", + "honesty": "Pricing is intentionally OMITTED where the repo does not publish a trustworthy per-token rate: DeepSeek-native rows (priced via the time-aware DeepSeek table elsewhere, kept UnknownOrStale at the route layer), aggregator-hosted DeepSeek rows (aggregator account terms, not DeepSeek Platform pricing), Anthropic rows (no in-repo per-token table), and Xiaomi MiMo Token-Plan rows (credit/quota based). Absent pricing surfaces as PricingSku::UnknownOrStale, never a fabricated zero.", + "default_rows": "Each provider's `default: true` wire id equals that provider's built-in DEFAULT_*_MODEL so RouteResolver::new() and the descriptor stay in agreement.", + "coverage": "13 providers, 27 chat offerings." + }, + "models": { + "deepseek-v4-pro": { + "id": "deepseek-v4-pro", + "name": "DeepSeek V4 Pro", + "family": "deepseek", + "reasoning": true, + "tool_call": true, + "modalities": { "input": ["text"], "output": ["text"] }, + "limit": { "context": 1000000, "output": 384000 }, + "open_weights": true + }, + "deepseek-v4-flash": { + "id": "deepseek-v4-flash", + "name": "DeepSeek V4 Flash", + "family": "deepseek", + "reasoning": true, + "tool_call": true, + "modalities": { "input": ["text"], "output": ["text"] }, + "limit": { "context": 1000000, "output": 384000 }, + "open_weights": true + } + }, + "providers": { + "deepseek": { + "id": "deepseek", + "name": "DeepSeek", + "api": "https://api.deepseek.com", + "npm": "@ai-sdk/openai-compatible", + "env": ["DEEPSEEK_API_KEY"], + "models": { + "deepseek-v4-pro": { + "id": "deepseek-v4-pro", + "base_model": "deepseek-v4-pro", + "name": "DeepSeek V4 Pro", + "family": "deepseek", + "default": true, + "reasoning": true, + "tool_call": true, + "modalities": { "input": ["text"], "output": ["text"] }, + "limit": { "context": 1000000, "output": 384000 } + }, + "deepseek-v4-flash": { + "id": "deepseek-v4-flash", + "base_model": "deepseek-v4-flash", + "name": "DeepSeek V4 Flash", + "family": "deepseek", + "reasoning": true, + "tool_call": true, + "modalities": { "input": ["text"], "output": ["text"] }, + "limit": { "context": 1000000, "output": 384000 } + } + } + }, + "zai": { + "id": "zai", + "name": "Zhipu AI / Z.ai", + "api": "https://api.z.ai/api/paas/v4", + "npm": "@ai-sdk/openai-compatible", + "env": ["ZAI_API_KEY", "ZHIPU_API_KEY", "GLM_API_KEY"], + "models": { + "GLM-5.2": { + "id": "GLM-5.2", + "name": "GLM-5.2", + "family": "glm", + "default": true, + "reasoning": true, + "reasoning_options": [{ "type": "effort", "values": ["high", "max"] }], + "tool_call": true, + "modalities": { "input": ["text"], "output": ["text"] }, + "limit": { "context": 1000000, "output": 131072 } + }, + "glm-5.1": { + "id": "glm-5.1", + "name": "GLM-5.1", + "family": "glm", + "reasoning": true, + "tool_call": true, + "modalities": { "input": ["text"], "output": ["text"] }, + "limit": { "context": 202752, "output": 131072 }, + "cost": { "input": 0.98, "output": 3.08, "cache_read": 0.182 } + } + } + }, + "moonshot": { + "id": "moonshot", + "name": "Moonshot / Kimi", + "api": "https://api.moonshot.ai/v1", + "npm": "@ai-sdk/openai-compatible", + "env": ["MOONSHOT_API_KEY", "KIMI_API_KEY"], + "models": { + "kimi-k2.7-code": { + "id": "kimi-k2.7-code", + "name": "Kimi K2.7 Code", + "family": "kimi", + "default": true, + "reasoning": true, + "tool_call": true, + "modalities": { "input": ["text"], "output": ["text"] }, + "limit": { "context": 262144, "output": 262144 } + }, + "kimi-k2.6": { + "id": "kimi-k2.6", + "name": "Kimi K2.6", + "family": "kimi", + "reasoning": true, + "tool_call": true, + "modalities": { "input": ["text"], "output": ["text"] }, + "limit": { "context": 262144, "output": 262144 }, + "cost": { "input": 0.68, "output": 3.41, "cache_read": 0.34 } + } + } + }, + "minimax": { + "id": "minimax", + "name": "MiniMax", + "api": "https://api.minimax.io/v1", + "npm": "@ai-sdk/openai-compatible", + "env": ["MINIMAX_API_KEY"], + "models": { + "MiniMax-M3": { + "id": "MiniMax-M3", + "name": "MiniMax M3", + "family": "minimax", + "default": true, + "reasoning": true, + "tool_call": true, + "modalities": { "input": ["text"], "output": ["text"] }, + "limit": { "context": 1000000, "output": 524288 }, + "cost": { "input": 0.30, "output": 1.20, "cache_read": 0.06 } + }, + "minimax-m2.7": { + "id": "minimax-m2.7", + "name": "MiniMax M2.7", + "family": "minimax", + "reasoning": true, + "tool_call": true, + "modalities": { "input": ["text"], "output": ["text"] }, + "limit": { "context": 204800, "output": 204800 } + } + } + }, + "openai": { + "id": "openai", + "name": "OpenAI-compatible", + "api": "https://api.openai.com/v1", + "npm": "@ai-sdk/openai", + "env": ["OPENAI_API_KEY"], + "models": { + "deepseek-v4-pro": { + "id": "deepseek-v4-pro", + "name": "DeepSeek V4 Pro (OpenAI-compatible default)", + "family": "deepseek", + "default": true, + "reasoning": true, + "tool_call": true, + "modalities": { "input": ["text"], "output": ["text"] }, + "limit": { "context": 1000000, "output": 384000 } + }, + "gpt-5.5": { + "id": "gpt-5.5", + "name": "GPT-5.5", + "family": "gpt", + "reasoning": true, + "tool_call": true, + "modalities": { "input": ["text"], "output": ["text"] }, + "limit": { "context": 1050000, "output": 128000 }, + "cost": { "input": 5.00, "output": 30.00, "cache_read": 0.50 } + }, + "gpt-5.5-pro": { + "id": "gpt-5.5-pro", + "name": "GPT-5.5 Pro", + "family": "gpt", + "reasoning": true, + "tool_call": true, + "modalities": { "input": ["text"], "output": ["text"] }, + "limit": { "context": 1050000, "output": 128000 }, + "cost": { "input": 30.00, "output": 180.00 } + } + } + }, + "anthropic": { + "id": "anthropic", + "name": "Anthropic", + "api": "https://api.anthropic.com", + "npm": "@ai-sdk/anthropic", + "env": ["ANTHROPIC_API_KEY"], + "models": { + "claude-sonnet-4-6": { + "id": "claude-sonnet-4-6", + "name": "Claude Sonnet 4.6", + "family": "claude", + "default": true, + "reasoning": true, + "tool_call": true, + "modalities": { "input": ["text"], "output": ["text"] }, + "limit": { "context": 1000000, "output": 64000 } + }, + "claude-opus-4-8": { + "id": "claude-opus-4-8", + "name": "Claude Opus 4.8", + "family": "claude", + "reasoning": true, + "tool_call": true, + "modalities": { "input": ["text"], "output": ["text"] }, + "limit": { "context": 1000000, "output": 128000 } + }, + "claude-haiku-4-5": { + "id": "claude-haiku-4-5", + "name": "Claude Haiku 4.5", + "family": "claude", + "reasoning": true, + "tool_call": true, + "modalities": { "input": ["text"], "output": ["text"] }, + "limit": { "context": 200000, "output": 64000 } + } + } + }, + "openrouter": { + "id": "openrouter", + "name": "OpenRouter", + "api": "https://openrouter.ai/api/v1", + "npm": "@openrouter/ai-sdk-provider", + "env": ["OPENROUTER_API_KEY"], + "models": { + "deepseek/deepseek-v4-pro": { + "id": "deepseek/deepseek-v4-pro", + "base_model": "deepseek-v4-pro", + "name": "DeepSeek V4 Pro (OpenRouter)", + "family": "deepseek", + "default": true, + "reasoning": true, + "tool_call": true, + "modalities": { "input": ["text"], "output": ["text"] }, + "limit": { "context": 1000000, "output": 384000 } + }, + "qwen/qwen3.6-flash": { + "id": "qwen/qwen3.6-flash", + "name": "Qwen3.6 Flash", + "family": "qwen", + "reasoning": true, + "tool_call": true, + "modalities": { "input": ["text"], "output": ["text"] }, + "limit": { "context": 1000000, "output": 65536 }, + "cost": { "input": 0.1875, "output": 1.125 } + }, + "qwen/qwen3.6-plus": { + "id": "qwen/qwen3.6-plus", + "name": "Qwen3.6 Plus", + "family": "qwen", + "reasoning": true, + "tool_call": true, + "modalities": { "input": ["text"], "output": ["text"] }, + "limit": { "context": 1000000, "output": 65536 }, + "cost": { "input": 0.325, "output": 1.95 } + }, + "qwen/qwen3.6-35b-a3b": { + "id": "qwen/qwen3.6-35b-a3b", + "name": "Qwen3.6 35B-A3B", + "family": "qwen", + "reasoning": true, + "tool_call": true, + "modalities": { "input": ["text"], "output": ["text"] }, + "limit": { "context": 262144, "output": 262140 }, + "cost": { "input": 0.15, "output": 1.00, "cache_read": 0.05 } + }, + "minimax/minimax-m3": { + "id": "minimax/minimax-m3", + "name": "MiniMax M3 (OpenRouter)", + "family": "minimax", + "reasoning": true, + "tool_call": true, + "modalities": { "input": ["text"], "output": ["text"] }, + "limit": { "context": 1000000, "output": 524288 }, + "cost": { "input": 0.30, "output": 1.20, "cache_read": 0.06 } + } + } + }, + "together": { + "id": "together", + "name": "Together AI", + "api": "https://api.together.xyz/v1", + "npm": "@ai-sdk/openai-compatible", + "env": ["TOGETHER_API_KEY"], + "models": { + "deepseek-ai/DeepSeek-V4-Pro": { + "id": "deepseek-ai/DeepSeek-V4-Pro", + "base_model": "deepseek-v4-pro", + "name": "DeepSeek V4 Pro (Together)", + "family": "deepseek", + "default": true, + "reasoning": true, + "tool_call": true, + "modalities": { "input": ["text"], "output": ["text"] }, + "limit": { "context": 1000000, "output": 384000 } + } + } + }, + "fireworks": { + "id": "fireworks", + "name": "Fireworks AI", + "api": "https://api.fireworks.ai/inference/v1", + "npm": "@ai-sdk/openai-compatible", + "env": ["FIREWORKS_API_KEY"], + "models": { + "accounts/fireworks/models/deepseek-v4-pro": { + "id": "accounts/fireworks/models/deepseek-v4-pro", + "base_model": "deepseek-v4-pro", + "name": "DeepSeek V4 Pro (Fireworks)", + "family": "deepseek", + "default": true, + "reasoning": true, + "tool_call": true, + "modalities": { "input": ["text"], "output": ["text"] }, + "limit": { "context": 1000000, "output": 384000 } + } + } + }, + "novita": { + "id": "novita", + "name": "Novita AI", + "api": "https://api.novita.ai/v3/openai", + "npm": "@ai-sdk/openai-compatible", + "env": ["NOVITA_API_KEY"], + "models": { + "deepseek/deepseek-v4-pro": { + "id": "deepseek/deepseek-v4-pro", + "base_model": "deepseek-v4-pro", + "name": "DeepSeek V4 Pro (Novita)", + "family": "deepseek", + "default": true, + "reasoning": true, + "tool_call": true, + "modalities": { "input": ["text"], "output": ["text"] }, + "limit": { "context": 1000000, "output": 384000 } + } + } + }, + "siliconflow": { + "id": "siliconflow", + "name": "SiliconFlow", + "api": "https://api.siliconflow.com/v1", + "npm": "@ai-sdk/openai-compatible", + "env": ["SILICONFLOW_API_KEY"], + "models": { + "deepseek-ai/DeepSeek-V4-Pro": { + "id": "deepseek-ai/DeepSeek-V4-Pro", + "base_model": "deepseek-v4-pro", + "name": "DeepSeek V4 Pro (SiliconFlow)", + "family": "deepseek", + "default": true, + "reasoning": true, + "tool_call": true, + "modalities": { "input": ["text"], "output": ["text"] }, + "limit": { "context": 1000000, "output": 384000 } + } + } + }, + "arcee": { + "id": "arcee", + "name": "Arcee AI", + "api": "https://api.arcee.ai/v1", + "npm": "@ai-sdk/openai-compatible", + "env": ["ARCEE_API_KEY"], + "models": { + "trinity-large-thinking": { + "id": "trinity-large-thinking", + "name": "Trinity Large Thinking", + "family": "trinity", + "default": true, + "reasoning": true, + "tool_call": true, + "modalities": { "input": ["text"], "output": ["text"] }, + "limit": { "context": 262144, "output": 262144 }, + "cost": { "input": 0.22, "output": 0.85, "cache_read": 0.06 } + }, + "trinity-mini": { + "id": "trinity-mini", + "name": "Trinity Mini", + "family": "trinity", + "reasoning": true, + "tool_call": true, + "modalities": { "input": ["text"], "output": ["text"] }, + "limit": { "context": 128000 } + } + } + }, + "xiaomi-mimo": { + "id": "xiaomi-mimo", + "name": "Xiaomi MiMo", + "api": "https://api-mimo.xiaomi.com/v1", + "npm": "@ai-sdk/openai-compatible", + "env": ["XIAOMI_MIMO_API_KEY", "MIMO_API_KEY"], + "models": { + "mimo-v2.5-pro": { + "id": "mimo-v2.5-pro", + "name": "MiMo v2.5 Pro", + "family": "mimo", + "default": true, + "reasoning": true, + "tool_call": true, + "modalities": { "input": ["text"], "output": ["text"] }, + "limit": { "context": 1000000, "output": 131072 } + }, + "mimo-v2.5": { + "id": "mimo-v2.5", + "name": "MiMo v2.5", + "family": "mimo", + "reasoning": true, + "tool_call": true, + "modalities": { "input": ["text"], "output": ["text"] }, + "limit": { "context": 1000000, "output": 131072 } + } + } + } + } +} diff --git a/crates/config/src/catalog.rs b/crates/config/src/catalog.rs index 974928d45e..299ae51e7d 100644 --- a/crates/config/src/catalog.rs +++ b/crates/config/src/catalog.rs @@ -112,7 +112,11 @@ impl CatalogOffering { /// Project the minimal routing identity the resolver consumes. /// /// The catalog deliberately carries richer facts than routing needs; this - /// drops them so `RouteResolver::from_offerings` stays the single seam. + /// drops most of them so `RouteResolver::from_offerings` stays the single + /// seam. The route-facing pricing meter is the exception: it is projected + /// here (where the offering's sourced `cost` is in scope) via + /// [`crate::pricing::route_pricing_sku`] so a resolved candidate can carry + /// honest pricing without the route layer ever seeing raw cost (#3085). #[must_use] pub fn to_offering(&self) -> ProviderModelOffering { ProviderModelOffering { @@ -126,6 +130,7 @@ impl CatalogOffering { .as_ref() .map(RouteLimits::from) .unwrap_or_default(), + pricing: crate::pricing::route_pricing_sku(self), } } @@ -135,6 +140,39 @@ impl CatalogOffering { } } +/// The committed, network-free Models.dev-shaped catalog snapshot (#3385). +/// +/// Curated from in-repo verified model facts (context windows / output caps from +/// `crates/tui/src/models.rs`, USD pricing from `crates/tui/src/pricing.rs`) +/// rather than a live models.dev dump, because the public catalog tracks a +/// different real model generation than CodeWhale's curated forward-dated set. +/// This is the default bundled layer feeding [`crate::route::RouteResolver::new`]. +/// See the asset's `_meta` block for sourcing and the honesty rule on omitted +/// pricing (`UnknownOrStale`, never a fabricated zero). +pub const BUNDLED_MODELS_DEV_JSON: &str = include_str!("../assets/models_dev.bundled.json"); + +/// Parse the committed bundled Models.dev snapshot. +/// +/// # Panics +/// Panics only if the committed asset is not valid Models.dev JSON. The +/// `tests::bundled_asset_parses` guard makes that a build-time failure, so this +/// never panics in shipped builds. +#[must_use] +pub fn bundled_models_dev_catalog() -> ModelsDevCatalog { + ModelsDevCatalog::parse_json(BUNDLED_MODELS_DEV_JSON) + .expect("committed bundled Models.dev asset must be valid JSON") +} + +/// The bundled-layer [`CatalogOffering`] rows from the committed snapshot. +/// +/// This is the real-data source for the default resolver: every text-chat row +/// from [`BUNDLED_MODELS_DEV_JSON`], tagged [`CatalogSource::Bundled`], with +/// honest limits and pricing. +#[must_use] +pub fn bundled_catalog_offerings() -> Vec { + bundled_offerings_from_models_dev(&bundled_models_dev_catalog()) +} + /// Hydrate bundled [`CatalogOffering`] rows from a parsed Models.dev catalog. /// /// Only text-chat offerings are emitted (TTS/audio-only rows stay in the parsed diff --git a/crates/config/src/catalog/tests.rs b/crates/config/src/catalog/tests.rs index 10485b76ca..12c6348a88 100644 --- a/crates/config/src/catalog/tests.rs +++ b/crates/config/src/catalog/tests.rs @@ -519,3 +519,82 @@ fn snapshot_feeds_route_resolver_offerings() { .any(|o| o.wire_model_id.as_str() == "glm-voice") ); } + +// --------------------------------------------------------------------------- +// #3385: the committed bundled Models.dev asset. +// --------------------------------------------------------------------------- + +#[test] +fn bundled_asset_parses() { + // The committed asset must `include_str!`-load and deserialize into the + // parser's `ModelsDevCatalog` shape. This is the build-time guard that keeps + // `bundled_models_dev_catalog()` panic-free in shipped builds. + let catalog = ModelsDevCatalog::parse_json(BUNDLED_MODELS_DEV_JSON) + .expect("committed bundled asset must be valid Models.dev JSON"); + assert!( + !catalog.providers.is_empty(), + "bundled asset must carry provider rows" + ); + // The helper returns the same parsed catalog. + assert_eq!(bundled_models_dev_catalog(), catalog); +} + +#[test] +fn bundled_asset_yields_real_chat_offerings_for_key_models() { + let rows = bundled_catalog_offerings(); + assert!( + rows.len() >= 20, + "expected dozens of bundled chat offerings, got {}", + rows.len() + ); + + // A GLM and a Kimi row carry their real (non-default) context windows, + // proving real facts flow rather than `RouteLimits::default()` (unknown). + let glm = find(&rows, "zai", "GLM-5.2"); + assert_eq!(glm.limit.as_ref().and_then(|l| l.context), Some(1_000_000)); + assert!(glm.default_for_provider); + + let kimi = find(&rows, "moonshot", "kimi-k2.7-code"); + assert_eq!(kimi.limit.as_ref().and_then(|l| l.context), Some(262_144)); + + // Audio/TTS rows are absent (the asset only ships chat models, but assert + // the filter contract anyway). + assert!( + rows.iter().all(|r| !r.wire_model_id.contains("tts")), + "no TTS rows should reach the offering layer" + ); +} + +#[test] +fn bundled_asset_pricing_is_honest() { + let rows = bundled_catalog_offerings(); + + // DeepSeek-native rows are intentionally unpriced here (priced via the + // time-aware DeepSeek table elsewhere); pricing them would also break the + // route layer's `unpriced_offering_stays_unknown` invariant. + let deepseek = find(&rows, "deepseek", "deepseek-v4-pro"); + assert!( + deepseek.cost.is_none(), + "DeepSeek-native rows must stay unpriced in the bundled asset" + ); + + // Any row that *does* carry a cost must expose a usable input/output rate + // (the honesty rule: no cache-only / empty cost objects that would render as + // a rate-less Token at the route layer). + for row in &rows { + if let Some(cost) = row.cost.as_ref() { + assert!( + cost.input.is_some() || cost.output.is_some(), + "{}/{}: priced row must have an input or output rate", + row.provider, + row.wire_model_id + ); + } + } + + // A sampled priced row matches the in-repo USD table (crates/tui pricing). + let glm51 = find(&rows, "zai", "glm-5.1"); + let cost = glm51.cost.as_ref().expect("glm-5.1 is priced"); + assert_eq!(cost.input, Some(0.98)); + assert_eq!(cost.output, Some(3.08)); +} diff --git a/crates/config/src/lib.rs b/crates/config/src/lib.rs index 0b8045dd57..c1bd3fa6c8 100644 --- a/crates/config/src/lib.rs +++ b/crates/config/src/lib.rs @@ -144,6 +144,13 @@ pub struct ProvidersToml { pub minimax: ProviderConfigToml, #[serde(default, alias = "deep-infra", alias = "deep_infra")] pub deepinfra: ProviderConfigToml, + /// Catch-all table for the dynamic OpenAI-compatible custom provider + /// identity (#1519). Arbitrary `[providers.]` tables are handled by + /// the tui-side flatten map; this named slot keeps the canonical + /// `ProviderKind::Custom` lookups total without leaking into another + /// provider's config. + #[serde(default)] + pub custom: ProviderConfigToml, } /// Sibling `permissions.toml` schema. @@ -201,6 +208,7 @@ impl ProvidersToml { ProviderKind::Stepfun => &self.stepfun, ProviderKind::Minimax => &self.minimax, ProviderKind::Deepinfra => &self.deepinfra, + ProviderKind::Custom => &self.custom, } } @@ -233,6 +241,7 @@ impl ProvidersToml { ProviderKind::Stepfun => &mut self.stepfun, ProviderKind::Minimax => &mut self.minimax, ProviderKind::Deepinfra => &mut self.deepinfra, + ProviderKind::Custom => &mut self.custom, } } } @@ -1890,6 +1899,10 @@ impl ConfigToml { ProviderKind::Stepfun => DEFAULT_STEPFUN_BASE_URL.to_string(), ProviderKind::Minimax => DEFAULT_MINIMAX_BASE_URL.to_string(), ProviderKind::Deepinfra => DEFAULT_DEEPINFRA_BASE_URL.to_string(), + // The custom provider has no built-in endpoint; fall back to its + // descriptor placeholder so the lookup is total. Real custom + // routes always supply a configured base_url before this point. + ProviderKind::Custom => provider.provider().default_base_url().to_string(), }) }; // CLI flag wins outright. Otherwise: config-file → injected secrets/env. @@ -2460,6 +2473,8 @@ fn default_model_for_provider(provider: ProviderKind) -> &'static str { ProviderKind::Stepfun => DEFAULT_STEPFUN_MODEL, ProviderKind::Minimax => DEFAULT_MINIMAX_MODEL, ProviderKind::Deepinfra => DEFAULT_DEEPINFRA_MODEL, + // No built-in default model; the registry placeholder keeps this total. + ProviderKind::Custom => provider.provider().default_model(), } } @@ -2492,6 +2507,8 @@ fn default_base_url_for_provider(provider: ProviderKind) -> &'static str { ProviderKind::Stepfun => DEFAULT_STEPFUN_BASE_URL, ProviderKind::Minimax => DEFAULT_MINIMAX_BASE_URL, ProviderKind::Deepinfra => DEFAULT_DEEPINFRA_BASE_URL, + // No built-in default base URL; the registry placeholder keeps this total. + ProviderKind::Custom => provider.provider().default_base_url(), } } @@ -4225,6 +4242,9 @@ impl EnvRuntimeOverrides { ProviderKind::Stepfun => self.stepfun_base_url.clone(), ProviderKind::Minimax => self.minimax_base_url.clone(), ProviderKind::Deepinfra => self.deepinfra_base_url.clone(), + // No dedicated CODEWHALE_CUSTOM_BASE_URL env override: a custom + // provider's base URL comes from its `[providers.]` table. + ProviderKind::Custom => None, } } diff --git a/crates/config/src/models_dev.rs b/crates/config/src/models_dev.rs index e2a3862298..1b603449f1 100644 --- a/crates/config/src/models_dev.rs +++ b/crates/config/src/models_dev.rs @@ -94,6 +94,7 @@ impl ModelsDevCatalog { .as_ref() .map(RouteLimits::from) .unwrap_or_default(), + pricing: crate::pricing::route_pricing_sku_from_cost(model.cost.as_ref()), }) } @@ -123,6 +124,7 @@ impl ModelsDevCatalog { .as_ref() .map(RouteLimits::from) .unwrap_or_default(), + pricing: crate::pricing::route_pricing_sku_from_cost(model.cost.as_ref()), }) .collect(), ) diff --git a/crates/config/src/pricing.rs b/crates/config/src/pricing.rs index aeb63e14f5..88c07cf056 100644 --- a/crates/config/src/pricing.rs +++ b/crates/config/src/pricing.rs @@ -21,6 +21,7 @@ use serde::{Deserialize, Serialize}; use crate::catalog::{CatalogOffering, CatalogSource}; +use crate::models_dev::ModelsDevCost; use crate::route::PricingSku; /// Billing currency for a pricing row. Models.dev publishes USD per-million @@ -217,6 +218,30 @@ pub fn route_pricing_sku(offering: &CatalogOffering) -> PricingSku { .map_or(PricingSku::UnknownOrStale, |pricing| pricing.to_route_sku()) } +/// The honest route-facing pricing meter for a raw Models.dev `cost` block. +/// +/// Same honesty rule as [`route_pricing_sku`], but for callers that hold a +/// [`ModelsDevCost`] directly (the route-offering builders in +/// [`crate::models_dev`]) rather than a full [`CatalogOffering`]. An absent or +/// concretely-empty cost, or a cache-only cost, yields +/// [`PricingSku::UnknownOrStale`]; only a usable input/output rate yields +/// [`PricingSku::Token`]. +#[must_use] +pub(crate) fn route_pricing_sku_from_cost(cost: Option<&ModelsDevCost>) -> PricingSku { + let Some(cost) = cost else { + return PricingSku::UnknownOrStale; + }; + if cost.input.is_none() && cost.output.is_none() { + // No input/output rate: a cache-only or empty cost would render as a + // rate-less `Token` at the route layer, so it stays honestly unknown. + return PricingSku::UnknownOrStale; + } + PricingSku::Token { + input_per_mtok: cost.input, + output_per_mtok: cost.output, + } +} + fn provenance_from_source(source: &CatalogSource) -> PricingProvenance { match source { CatalogSource::Bundled => PricingProvenance::ModelsDevBundled, diff --git a/crates/config/src/provider.rs b/crates/config/src/provider.rs index e9cb1e55ff..8fc8e41c71 100644 --- a/crates/config/src/provider.rs +++ b/crates/config/src/provider.rs @@ -540,6 +540,59 @@ provider!( aliases: ["deep-infra", "deep_infra"] ); +/// User-defined OpenAI-compatible endpoint (#1519). +/// +/// A single dynamic provider identity for arbitrary `[providers.] +/// kind="openai-compatible"` config entries. Unlike the built-in providers it +/// carries no real default base URL/model/env var: the concrete endpoint, model +/// id, and auth env var all arrive from the named `[providers.]` config +/// table at route time. The placeholder base URL/model here exist only so the +/// descriptor stays well-formed (non-empty) for conformance; runtime routing +/// always supplies a `base_url_override` and a wire model id, so these +/// placeholders are never used to reach the network. +pub struct Custom; + +impl Provider for Custom { + fn id(&self) -> &'static str { + "custom" + } + + fn kind(&self) -> ProviderKind { + ProviderKind::Custom + } + + fn display_name(&self) -> &'static str { + "Custom (OpenAI-compatible)" + } + + fn default_base_url(&self) -> &'static str { + // Placeholder only; the real endpoint comes from the named config table + // via the route's base_url_override. Loopback so a misconfigured custom + // provider fails closed locally rather than reaching a public host. + "http://localhost/v1" + } + + fn default_model(&self) -> &'static str { + // Placeholder only; the real model id comes from config and is preserved + // verbatim as the wire model id. + "custom-model" + } + + fn env_vars(&self) -> &'static [&'static str] { + // No built-in env var: the auth env var is named per-entry via + // `[providers.] api_key_env = "..."`. + &[] + } + + fn provider_config_key(&self) -> &'static str { + "custom" + } + + fn wire(&self) -> WireFormat { + WireFormat::ChatCompletions + } +} + static DEEPSEEK: Deepseek = Deepseek; static DEEPSEEK_ANTHROPIC: DeepseekAnthropic = DeepseekAnthropic; static NVIDIA_NIM: NvidiaNim = NvidiaNim; @@ -567,8 +620,9 @@ static ZAI: Zai = Zai; static STEPFUN: Stepfun = Stepfun; static MINIMAX: Minimax = Minimax; static DEEPINFRA: Deepinfra = Deepinfra; +static CUSTOM: Custom = Custom; -static PROVIDER_REGISTRY: [&dyn Provider; 27] = [ +static PROVIDER_REGISTRY: [&dyn Provider; 28] = [ &DEEPSEEK, &DEEPSEEK_ANTHROPIC, &NVIDIA_NIM, @@ -596,6 +650,7 @@ static PROVIDER_REGISTRY: [&dyn Provider; 27] = [ &STEPFUN, &MINIMAX, &DEEPINFRA, + &CUSTOM, ]; /// Return all built-in provider metadata entries in `ProviderKind::ALL` order. diff --git a/crates/config/src/provider_kind.rs b/crates/config/src/provider_kind.rs index 10c3ae8df8..08370106e0 100644 --- a/crates/config/src/provider_kind.rs +++ b/crates/config/src/provider_kind.rs @@ -96,10 +96,18 @@ pub enum ProviderKind { Minimax, #[serde(alias = "deep-infra", alias = "deep_infra")] Deepinfra, + /// User-defined OpenAI-compatible endpoint (#1519). + /// + /// A single dynamic identity for arbitrary `[providers.] + /// kind="openai-compatible"` entries. It speaks the OpenAI Chat Completions + /// wire protocol and carries no built-in base URL/model — the concrete + /// endpoint and model arrive via config (`base_url` / `model`) and the + /// route's `base_url_override`, never from this static descriptor. + Custom, } impl ProviderKind { - pub const ALL: [Self; 27] = [ + pub const ALL: [Self; 28] = [ Self::Deepseek, Self::DeepseekAnthropic, Self::NvidiaNim, @@ -127,6 +135,7 @@ impl ProviderKind { Self::Stepfun, Self::Minimax, Self::Deepinfra, + Self::Custom, ]; #[must_use] diff --git a/crates/config/src/route/candidate.rs b/crates/config/src/route/candidate.rs index c531048b6e..432283d783 100644 --- a/crates/config/src/route/candidate.rs +++ b/crates/config/src/route/candidate.rs @@ -63,7 +63,11 @@ pub enum ResolvedAuthSource { /// Pricing/quota class for the resolved route. /// /// Carries only coarse, non-sensitive shape; never secrets or account ids. -#[derive(Debug, Clone, Serialize, Deserialize)] +/// +/// `PartialEq` (but not `Eq`: the `Token` rates are `f64`) lets offerings and +/// candidates be compared in tests and lets +/// [`super::offering::ProviderModelOffering`] carry a pricing meter. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] pub enum PricingSku { /// Per-token pricing. diff --git a/crates/config/src/route/offering.rs b/crates/config/src/route/offering.rs index 91ac190956..56b8321abb 100644 --- a/crates/config/src/route/offering.rs +++ b/crates/config/src/route/offering.rs @@ -13,6 +13,7 @@ use serde::{Deserialize, Serialize}; +use super::candidate::PricingSku; use super::ids::{ModelId, ProviderId, WireModelId}; /// Token limits for one resolved route/offering. @@ -42,7 +43,10 @@ impl RouteLimits { } /// One provider's way of serving a (possibly canonical) model. -#[derive(Debug, Clone, PartialEq, Eq)] +/// +/// `Eq` is intentionally NOT derived: [`PricingSku::Token`] carries `f64` rates, +/// so the offering is only `PartialEq`. No caller keys a set/map on offerings. +#[derive(Debug, Clone, PartialEq)] pub struct ProviderModelOffering { /// Provider serving this offering. pub provider: ProviderId, @@ -56,6 +60,14 @@ pub struct ProviderModelOffering { pub default_for_provider: bool, /// Provider/offering-scoped token limits, when known. pub limits: RouteLimits, + /// Coarse route-facing pricing meter for this offering (#3085). + /// + /// Projected from the offering's sourced cost at the layer that owns it + /// (`CatalogOffering::to_offering` → [`crate::pricing::route_pricing_sku`]). + /// The resolver carries this verbatim onto the candidate; it is + /// [`PricingSku::UnknownOrStale`] whenever no price was sourced — never a + /// fabricated zero (the #2608 / #3085 honesty rule). + pub pricing: PricingSku, } /// A static, lazily-materialized seam catalog. @@ -119,6 +131,9 @@ pub fn bundled_offerings() -> Vec { endpoint_key: seed.endpoint_key.to_string(), default_for_provider: seed.default_for_provider, limits: RouteLimits::default(), + // The bundled seam carries no sourced cost, so pricing is honestly + // unknown here (never a fabricated zero). + pricing: PricingSku::UnknownOrStale, }) .collect() } diff --git a/crates/config/src/route/resolver.rs b/crates/config/src/route/resolver.rs index ea2751cfa7..4692c19767 100644 --- a/crates/config/src/route/resolver.rs +++ b/crates/config/src/route/resolver.rs @@ -35,6 +35,7 @@ use super::errors::RouteError; use super::ids::{LogicalModelRef, ModelId, ProviderId, WireModelId}; use super::offering::{ProviderModelOffering, RouteLimits, bundled_offerings}; use crate::ProviderKind; +use crate::catalog::{CatalogOffering, bundled_catalog_offerings}; /// A request to resolve into an executable route. /// @@ -66,9 +67,20 @@ impl Default for RouteResolver { impl RouteResolver { /// Construct a resolver with CodeWhale's bundled offline offerings. + /// + /// The default offerings are the committed Models.dev-shaped catalog asset + /// (`crate::catalog::bundled_catalog_offerings`, real context windows and + /// honest per-row `cost`) merged with the tiny hand seam + /// ([`bundled_offerings`]). The hand seam is kept and given precedence on a + /// `(provider, wire id)` collision: it encodes the curated canonical-model + /// joins the route invariants depend on (e.g. a DeepSeek-native row and the + /// aggregator rows that map a prefixed wire id back to `deepseek-v4-pro`), + /// which generated Models.dev JSON does not prove. Asset-only rows (GLM, + /// Kimi, MiniMax, Qwen, …) add the real provider/model facts the picker and + /// candidates were previously missing. #[must_use] pub fn new() -> Self { - Self::from_offerings(bundled_offerings()) + Self::from_offerings(default_offerings()) } /// Construct a resolver from a provider-scoped offering catalog. @@ -132,7 +144,7 @@ impl RouteResolver { } else { classify(provider_kind) }; - let (wire_model_id, canonical_model, endpoint_key, limits) = if is_auto { + let (wire_model_id, canonical_model, endpoint_key, limits, pricing) = if is_auto { default_offering.map_or_else( || { ( @@ -140,6 +152,9 @@ impl RouteResolver { None, "chat".to_string(), RouteLimits::default(), + // No offering in hand on the default branch: pricing is + // honestly unknown (#3085), never a fabricated zero. + PricingSku::UnknownOrStale, ) }, |offering| { @@ -148,6 +163,8 @@ impl RouteResolver { offering.canonical_model.clone(), offering.endpoint_key.clone(), offering.limits, + // Matched offering: carry its sourced pricing meter. + offering.pricing.clone(), ) }, ) @@ -164,10 +181,16 @@ impl RouteResolver { protocol: descriptor.protocol(), }; - let validation = ValidationReport { - ok: true, - messages: Vec::new(), - }; + // Advisory validation (#1519): a non-loopback `http://` endpoint sends + // credentials in plaintext. This is advisory, not a hard fail, so + // `ok` stays true and local `http://localhost` runtimes (Ollama / vLLM / + // SGLang defaults) stay clean. + let mut messages = Vec::new(); + if endpoint_uses_insecure_http(&endpoint.base_url) { + messages + .push("endpoint uses insecure http:// (credentials sent in plaintext)".to_string()); + } + let validation = ValidationReport { ok: true, messages }; Ok(ReadyRouteCandidate::new( provider_id, @@ -179,7 +202,10 @@ impl RouteResolver { ResolvedAuthSource::Missing, descriptor.protocol(), limits, - Some(PricingSku::UnknownOrStale), + // #3085: honest pricing projected from the matched offering (the + // catalog layer maps sourced cost → SKU); `UnknownOrStale` whenever + // no offering was matched or the offering carried no price. + Some(pricing), validation, )) } @@ -191,7 +217,16 @@ impl RouteResolver { provider_id: &ProviderId, logical_model: &LogicalModelRef, class: ProviderClass, - ) -> Result<(WireModelId, Option, String, RouteLimits), RouteError> { + ) -> Result< + ( + WireModelId, + Option, + String, + RouteLimits, + PricingSku, + ), + RouteError, + > { let raw = logical_model.raw(); // Try to match a catalog offering owned by THIS provider, either by @@ -212,6 +247,8 @@ impl RouteResolver { offering.canonical_model.clone(), offering.endpoint_key.clone(), offering.limits, + // Matched offering: carry its sourced pricing meter (#3085). + offering.pricing.clone(), )); } } @@ -235,23 +272,27 @@ impl RouteResolver { }); } // A bare, unknown model on a strict direct provider is passed - // through verbatim (the provider validates it server-side). + // through verbatim (the provider validates it server-side). No + // offering matched, so pricing is honestly unknown (#3085). Ok(( WireModelId::from(raw), None, "chat".to_string(), RouteLimits::default(), + PricingSku::UnknownOrStale, )) } // Aggregators, local runtimes, and custom OpenAI-compatible // endpoints legitimately accept arbitrary / prefixed ids verbatim. ProviderClass::Aggregator | ProviderClass::LocalOrCustom => { let _ = provider_kind; + // No offering matched: pricing is honestly unknown (#3085). Ok(( WireModelId::from(raw), None, "chat".to_string(), RouteLimits::default(), + PricingSku::UnknownOrStale, )) } } @@ -289,6 +330,35 @@ impl RouteResolver { } } +/// Build the default resolver offerings: the bundled Models.dev asset rows +/// merged under the hand seam, with the seam winning a `(provider, wire id)` +/// collision. +/// +/// The seam is appended *after* the asset rows and de-duplicated keeping the +/// first-seen row per identity, so seam rows (which carry the curated canonical +/// joins and the deliberately unpriced DeepSeek-native entries the route +/// invariants assert) shadow any asset row with the same `(provider, wire id)`. +/// Order is otherwise preserved for deterministic resolution. +fn default_offerings() -> Vec { + let mut seen: std::collections::HashSet<(String, String)> = std::collections::HashSet::new(); + let mut out = Vec::new(); + let asset_rows = bundled_catalog_offerings() + .iter() + .map(CatalogOffering::to_offering) + .collect::>(); + // Seam first so it wins identity collisions, then asset-only rows follow. + for offering in bundled_offerings().into_iter().chain(asset_rows) { + let key = ( + offering.provider.as_str().to_string(), + offering.wire_model_id.as_str().to_string(), + ); + if seen.insert(key) { + out.push(offering); + } + } + out +} + /// The resolver's minimal route classification. /// /// Intentionally narrower than tui's `validate_route`. @@ -350,3 +420,55 @@ fn normalize_route_base_url(base_url: &str) -> String { } trimmed.to_ascii_lowercase() } + +/// True when `base_url` is an `http://` endpoint whose host is NOT loopback +/// (#1519). Such an endpoint sends credentials in plaintext over the network; +/// loopback (`localhost` / `127.0.0.1` / `::1`) is exempt because local +/// runtimes (Ollama / vLLM / SGLang) default to plain `http://localhost`. +fn endpoint_uses_insecure_http(base_url: &str) -> bool { + let trimmed = base_url.trim(); + // Scheme match is case-insensitive but must be `http`, not `https`. + let Some(rest) = strip_http_scheme(trimmed) else { + return false; + }; + !is_loopback_host(host_of_authority(rest)) +} + +/// Strip a leading case-insensitive `http://` scheme, returning the remainder. +/// Returns `None` for any other scheme (including `https://`) or no scheme. +fn strip_http_scheme(base_url: &str) -> Option<&str> { + let idx = base_url.find("://")?; + let (scheme, rest) = base_url.split_at(idx); + if scheme.eq_ignore_ascii_case("http") { + Some(&rest[3..]) + } else { + None + } +} + +/// Extract the bare host from an authority+path string: take the authority up +/// to the first `/`, drop any `user@` userinfo and `:port` suffix, and unwrap +/// `[..]` IPv6 brackets. +fn host_of_authority(rest: &str) -> &str { + let authority = rest.split('/').next().unwrap_or(rest); + // Drop userinfo (`user:pass@host`) if present. + let authority = authority.rsplit('@').next().unwrap_or(authority); + if let Some(inner) = authority.strip_prefix('[') { + // Bracketed IPv6 literal: host is everything up to the closing bracket. + return inner.split(']').next().unwrap_or(inner); + } + // Otherwise strip a trailing `:port`. + authority.split(':').next().unwrap_or(authority) +} + +/// Whether `host` is an IPv4/IPv6/name loopback address. +fn is_loopback_host(host: &str) -> bool { + let host = host.trim().trim_matches(|c| c == '[' || c == ']'); + host.eq_ignore_ascii_case("localhost") + || host == "127.0.0.1" + || host == "::1" + // Any 127.0.0.0/8 address is loopback. + || host + .strip_prefix("127.") + .is_some_and(|_| host.split('.').count() == 4) +} diff --git a/crates/config/src/route/tests.rs b/crates/config/src/route/tests.rs index 55062275d4..9dcdf36e12 100644 --- a/crates/config/src/route/tests.rs +++ b/crates/config/src/route/tests.rs @@ -424,6 +424,30 @@ fn resolver_custom_endpoint_allows_namespaced_selector_for_strict_provider() { assert_eq!(out.endpoint.base_url, "https://example.local/v1"); } +#[test] +fn resolver_explicit_custom_with_base_url_override_passes_model_through_verbatim() { + // #1519: an explicit `Custom` provider with a base_url override resolves via + // the LocalOrCustom pass-through, preserving even a namespaced selector as + // the verbatim wire id and binding the override endpoint + Chat Completions. + let r = RouteResolver::new(); + let request = RouteRequest { + explicit_provider: Some(ProviderKind::Custom), + model_selector: Some(LogicalModelRef::from("vendor/custom-model-v1")), + saved_provider_model: None, + base_url_override: Some("https://api.example.com/v1".to_string()), + }; + let out = r + .resolve(&request) + .expect("custom provider should resolve via pass-through"); + assert_eq!(out.provider_kind, ProviderKind::Custom); + assert_eq!(out.provider_id.as_str(), "custom"); + assert_eq!(out.wire_model_id.as_str(), "vendor/custom-model-v1"); + assert_eq!(out.endpoint.base_url, "https://api.example.com/v1"); + assert_eq!(out.protocol, crate::route::RequestProtocol::ChatCompletions); + assert!(out.validation.ok); + assert!(out.validation.messages.is_empty()); +} + #[test] fn resolver_strict_direct_rejects_models_dev_offering_from_another_provider() { let r = models_dev_route_resolver(); @@ -437,6 +461,86 @@ fn resolver_strict_direct_rejects_models_dev_offering_from_another_provider() { } } +// --------------------------------------------------------------------------- +// #3385: the DEFAULT resolver now sources the bundled Models.dev catalog asset, +// so real provider/model facts (context windows) reach candidates. +// --------------------------------------------------------------------------- + +#[test] +fn default_resolver_yields_real_facts_from_bundled_catalog() { + let r = RouteResolver::new(); + + // A GLM row (Z.ai) resolves to a real, non-default context window — proof + // the bundled asset feeds the default resolver rather than the old 4-row + // seam, which only knew deepseek/together/openrouter and left everything + // else at `RouteLimits::default()` (unknown). + let glm = r + .resolve(&req(Some(ProviderKind::Zai), Some("GLM-5.2"))) + .expect("Z.ai GLM-5.2 should resolve from the bundled catalog"); + assert_eq!(glm.provider_kind, ProviderKind::Zai); + assert_eq!(glm.wire_model_id.as_str(), "GLM-5.2"); + assert_eq!( + glm.limits.context_tokens, + Some(1_000_000), + "GLM-5.2 must carry its real context window, not the unknown default" + ); + assert_eq!(glm.limits.output_tokens, Some(131_072)); + assert!(glm.limits.has_known_limit()); + + // A Kimi row (Moonshot) likewise resolves with its real window — a model + // the 4-row seam never knew about at all. + let kimi = r + .resolve(&req(Some(ProviderKind::Moonshot), Some("kimi-k2.7-code"))) + .expect("Moonshot kimi-k2.7-code should resolve from the bundled catalog"); + assert_eq!(kimi.limits.context_tokens, Some(262_144)); + assert_eq!(kimi.limits.output_tokens, Some(262_144)); + + // With the #3085 pricing keystone present on the release branch, the asset's + // provider-scoped `cost` now projects onto the candidate via + // `route_pricing_sku`, so a priced Z.ai row carries a real per-token meter + // rather than `UnknownOrStale` — the "lighting up" that #3385 + #3085 deliver + // together. + let glm51 = r + .resolve(&req(Some(ProviderKind::Zai), Some("glm-5.1"))) + .expect("Z.ai glm-5.1 should resolve from the bundled catalog"); + assert_eq!(glm51.limits.context_tokens, Some(202_752)); + assert!(matches!( + glm51.pricing, + Some(super::candidate::PricingSku::Token { .. }) + )); +} + +#[test] +fn default_resolver_preserves_seam_canonical_joins() { + // The bundled asset is merged UNDER the hand seam, so the seam's curated + // canonical-model joins still win: a DeepSeek-native selector keeps its + // canonical id, and an aggregator-prefixed wire id still maps back to the + // canonical DeepSeek model. (This is what keeps the existing route + // invariants green after the asset was wired in.) + let r = RouteResolver::new(); + + let direct = r + .resolve(&req(Some(ProviderKind::Deepseek), Some("deepseek-v4-pro"))) + .expect("deepseek-v4-pro resolves"); + assert_eq!( + direct.canonical_model.as_ref().map(ModelId::as_str), + Some("deepseek-v4-pro") + ); + + let hosted = r + .resolve(&req( + Some(ProviderKind::Together), + Some("deepseek-ai/DeepSeek-V4-Pro"), + )) + .expect("together hosted deepseek resolves"); + assert_eq!( + hosted.canonical_model.as_ref().map(ModelId::as_str), + Some("deepseek-v4-pro"), + "seam canonical join must survive the asset merge" + ); + assert_eq!(hosted.wire_model_id.as_str(), "deepseek-ai/DeepSeek-V4-Pro"); +} + #[test] fn resolver_deepseek_none_selector_uses_default_wire_id() { let r = RouteResolver::new(); @@ -540,3 +644,158 @@ fn resolver_protocol_matches_descriptor_for_every_provider() { ); } } + +// --------------------------------------------------------------------------- +// #3085: honest pricing on resolved candidates. +// --------------------------------------------------------------------------- + +/// A resolver whose single offering is a DeepSeek-priced catalog row, projected +/// through the wired `CatalogOffering::to_offering` pricing seam. +fn priced_deepseek_resolver() -> RouteResolver { + use crate::catalog::{CatalogOffering, CatalogSource}; + use crate::models_dev::ModelsDevCost; + + let priced = CatalogOffering { + provider: "deepseek".into(), + wire_model_id: "deepseek-v4-pro".into(), + canonical_model: Some("deepseek-v4-pro".into()), + endpoint_key: "chat".into(), + default_for_provider: true, + cost: Some(ModelsDevCost { + input: Some(0.28), + output: Some(0.42), + cache_read: Some(0.028), + cache_write: None, + }), + source: CatalogSource::Bundled, + ..Default::default() + }; + RouteResolver::from_offerings(vec![priced.to_offering()]) +} + +#[test] +fn priced_offering_yields_token_pricing_sku() { + use super::candidate::PricingSku; + + let r = priced_deepseek_resolver(); + let out = r + .resolve(&req(Some(ProviderKind::Deepseek), Some("deepseek-v4-pro"))) + .expect("priced DeepSeek route should resolve"); + + match out.pricing { + Some(PricingSku::Token { + input_per_mtok, + output_per_mtok, + }) => { + assert_eq!(input_per_mtok, Some(0.28)); + assert_eq!(output_per_mtok, Some(0.42)); + } + other => panic!("expected Some(Token), got {other:?}"), + } +} + +#[test] +fn unpriced_offering_stays_unknown() { + use super::candidate::PricingSku; + + // The bundled seam (`RouteResolver::new`) carries no sourced cost, so a + // matched offering must surface honest UnknownOrStale, never a fabricated + // zero price (#2608 / #3085 honesty rule). + let r = RouteResolver::new(); + let out = r + .resolve(&req(Some(ProviderKind::Deepseek), Some("deepseek-v4-pro"))) + .expect("bundled DeepSeek route should resolve"); + assert!( + matches!(out.pricing, Some(PricingSku::UnknownOrStale)), + "bundled offering carries no price → UnknownOrStale, got {:?}", + out.pricing + ); + + // A pass-through route with no matched offering is likewise unknown. + let passthrough = r + .resolve(&req(Some(ProviderKind::Ollama), Some("my-local:7b"))) + .expect("local passthrough should resolve"); + assert!(matches!( + passthrough.pricing, + Some(PricingSku::UnknownOrStale) + )); +} + +// --------------------------------------------------------------------------- +// #1519: advisory insecure-http warning, loopback-exempt. +// --------------------------------------------------------------------------- + +/// Build a request with an explicit base-URL override. +fn req_with_base(provider: ProviderKind, model: &str, base_url: &str) -> RouteRequest { + RouteRequest { + explicit_provider: Some(provider), + model_selector: Some(LogicalModelRef::from(model)), + saved_provider_model: None, + base_url_override: Some(base_url.to_string()), + } +} + +#[test] +fn http_custom_endpoint_emits_insecure_warning() { + let r = RouteResolver::new(); + let out = r + .resolve(&req_with_base( + ProviderKind::Openai, + "gpt-whatever", + "http://example.com/v1", + )) + .expect("custom http endpoint should still resolve"); + + // Advisory only: the route stays usable. + assert!( + out.validation.ok, + "insecure http is advisory, not a hard fail" + ); + assert!( + out.validation + .messages + .iter() + .any(|m| m.contains("insecure http")), + "expected an insecure-http advisory, got {:?}", + out.validation.messages + ); +} + +#[test] +fn loopback_http_endpoint_does_not_warn() { + let r = RouteResolver::new(); + // localhost, 127.0.0.1, and ::1 are all loopback and must stay clean. + for base in [ + "http://localhost:11434/v1", + "http://127.0.0.1:8000/v1", + "http://[::1]:8080/v1", + ] { + let out = r + .resolve(&req_with_base(ProviderKind::Ollama, "my-local:7b", base)) + .unwrap_or_else(|e| panic!("loopback route {base} should resolve: {e}")); + assert!(out.validation.ok); + assert!( + out.validation.messages.is_empty(), + "loopback {base} must not warn, got {:?}", + out.validation.messages + ); + } +} + +#[test] +fn https_endpoint_has_no_warning() { + let r = RouteResolver::new(); + let out = r + .resolve(&req_with_base( + ProviderKind::Openai, + "gpt-whatever", + "https://example.com/v1", + )) + .expect("https endpoint should resolve"); + assert!(out.validation.ok); + assert!( + out.validation.messages.is_empty(), + "https must not warn, got {:?}", + out.validation.messages + ); +} diff --git a/crates/config/src/tests.rs b/crates/config/src/tests.rs index e7cd7ccb0e..43ca09811a 100644 --- a/crates/config/src/tests.rs +++ b/crates/config/src/tests.rs @@ -2985,7 +2985,13 @@ fn provider_metadata_defaults_match_runtime_helpers() { default_base_url_for_provider(kind) ); assert!(!provider.display_name().trim().is_empty()); - assert!(!provider.env_vars().is_empty()); + // The dynamic custom provider (#1519) intentionally declares no + // built-in auth env var: the key env var name is supplied per entry via + // `[providers.] api_key_env = "..."`. Every built-in provider + // still must declare at least one. + if kind != ProviderKind::Custom { + assert!(!provider.env_vars().is_empty()); + } // OpenAI Codex (ChatGPT) speaks the Responses API and Anthropic // speaks the native Messages API; every other built-in provider // is OpenAI-compatible Chat Completions. diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 1452a176eb..8457a779e2 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -9,14 +9,14 @@ description = "Core runtime boundaries for CodeWhale" [dependencies] anyhow.workspace = true chrono.workspace = true -codewhale-agent = { path = "../agent", version = "0.8.64" } -codewhale-config = { path = "../config", version = "0.8.64" } -codewhale-execpolicy = { path = "../execpolicy", version = "0.8.64" } -codewhale-hooks = { path = "../hooks", version = "0.8.64" } -codewhale-mcp = { path = "../mcp", version = "0.8.64" } -codewhale-protocol = { path = "../protocol", version = "0.8.64" } -codewhale-state = { path = "../state", version = "0.8.64" } -codewhale-tools = { path = "../tools", version = "0.8.64" } +codewhale-agent = { path = "../agent", version = "0.8.65" } +codewhale-config = { path = "../config", version = "0.8.65" } +codewhale-execpolicy = { path = "../execpolicy", version = "0.8.65" } +codewhale-hooks = { path = "../hooks", version = "0.8.65" } +codewhale-mcp = { path = "../mcp", version = "0.8.65" } +codewhale-protocol = { path = "../protocol", version = "0.8.65" } +codewhale-state = { path = "../state", version = "0.8.65" } +codewhale-tools = { path = "../tools", version = "0.8.65" } serde_json.workspace = true tracing.workspace = true uuid.workspace = true diff --git a/crates/execpolicy/Cargo.toml b/crates/execpolicy/Cargo.toml index 186a206d73..0bb35a1822 100644 --- a/crates/execpolicy/Cargo.toml +++ b/crates/execpolicy/Cargo.toml @@ -8,5 +8,5 @@ description = "Execution policy and approval model for CodeWhale" [dependencies] anyhow.workspace = true -codewhale-protocol = { path = "../protocol", version = "0.8.64" } +codewhale-protocol = { path = "../protocol", version = "0.8.65" } serde.workspace = true diff --git a/crates/hooks/Cargo.toml b/crates/hooks/Cargo.toml index adc9200805..8fd760d017 100644 --- a/crates/hooks/Cargo.toml +++ b/crates/hooks/Cargo.toml @@ -10,7 +10,7 @@ description = "Hook dispatch and notifications support for CodeWhale" anyhow.workspace = true async-trait.workspace = true chrono.workspace = true -codewhale-protocol = { path = "../protocol", version = "0.8.64" } +codewhale-protocol = { path = "../protocol", version = "0.8.65" } reqwest.workspace = true serde.workspace = true serde_json.workspace = true diff --git a/crates/protocol/src/fleet.rs b/crates/protocol/src/fleet.rs index e17ff23ef3..83a1ac2e53 100644 --- a/crates/protocol/src/fleet.rs +++ b/crates/protocol/src/fleet.rs @@ -838,6 +838,40 @@ impl FleetAlertEndpoint { } } +/// Resolved-route detail persisted on a [`FleetReceipt`] (#3154). +/// +/// This is an additive, *plain-strings* snapshot of the route a fleet worker +/// resolved to. It deliberately does NOT depend on any `codewhale-config` route +/// type so the protocol crate stays free of the route model. +/// +/// CRITICAL no-secrets invariant: this struct carries ONLY non-sensitive route +/// shape — provider id/kind, model ids, wire protocol, role/loadout intent, and +/// the resolution source. It must NEVER hold a credential, API key, bearer +/// token, or a base URL that embeds credentials. There is intentionally no +/// field that could carry a secret. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct FleetResolvedRoute { + /// Resolved provider canonical id (e.g. `"deepseek"`). + pub provider_id: String, + /// Resolved provider kind (e.g. `"deepseek"`). + pub provider_kind: String, + /// Canonical, provider-agnostic model identity, when known. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub canonical_model: Option, + /// Provider-owned wire model id placed on the request. + pub wire_model_id: String, + /// Selected wire protocol (e.g. `"chat_completions"`). + pub protocol: String, + /// Effective Fleet role intent, when one applied. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub role: Option, + /// Effective Fleet loadout intent, when one applied. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub loadout: Option, + /// How the route was produced (e.g. `"resolver"`). + pub source: String, +} + /// Receipt produced when a task completes verification. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct FleetReceipt { @@ -852,6 +886,12 @@ pub struct FleetReceipt { pub artifacts: Vec, #[serde(default)] pub score: Option, + /// Resolved-route snapshot for this task (#3154). + /// + /// `#[serde(default)]` keeps older ledgers (written before this field + /// existed) deserializable. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub resolved_route: Option, } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] @@ -1167,6 +1207,7 @@ mod tests { max: Some(1.0), notes: None, }), + resolved_route: None, }; let json = serde_json::to_string(&receipt).unwrap(); let back: FleetReceipt = serde_json::from_str(&json).unwrap(); @@ -1189,6 +1230,7 @@ mod tests { max: Some(1.0), notes: Some("manual verification required".to_string()), }), + resolved_route: None, }; let json = serde_json::to_string(&receipt).unwrap(); @@ -1325,4 +1367,106 @@ mod tests { assert!(!FleetTrustLevel::Sandbox.may_write_workspace()); assert!(FleetTrustLevel::Operator.may_write_workspace()); } + + fn sample_receipt_with_route() -> FleetReceipt { + FleetReceipt { + run_id: FleetRunId::from("run-route"), + task_id: "task-route".to_string(), + worker_id: "worker-route".to_string(), + completed_at: "2026-06-23T00:00:00Z".to_string(), + result: FleetTaskResult::Pass, + failure_kind: None, + artifacts: vec![], + score: None, + resolved_route: Some(FleetResolvedRoute { + provider_id: "deepseek".to_string(), + provider_kind: "deepseek".to_string(), + canonical_model: Some("deepseek-v4-pro".to_string()), + wire_model_id: "deepseek-v4-pro".to_string(), + protocol: "chat_completions".to_string(), + role: Some("builder".to_string()), + loadout: Some("auto".to_string()), + source: "resolver".to_string(), + }), + } + } + + #[test] + fn fleet_resolved_route_round_trips() { + let receipt = sample_receipt_with_route(); + let json = serde_json::to_string(&receipt).unwrap(); + let back: FleetReceipt = serde_json::from_str(&json).unwrap(); + assert_eq!(back.resolved_route, receipt.resolved_route); + let route = back.resolved_route.unwrap(); + assert_eq!(route.provider_id, "deepseek"); + assert_eq!(route.wire_model_id, "deepseek-v4-pro"); + assert_eq!(route.protocol, "chat_completions"); + assert_eq!(route.role.as_deref(), Some("builder")); + assert_eq!(route.source, "resolver"); + } + + #[test] + fn fleet_receipt_without_resolved_route_still_deserializes() { + // An old ledger receipt JSON written before #3154 has no + // `resolved_route` key; `#[serde(default)]` must keep it readable. + let legacy = r#"{ + "run_id": "run-legacy", + "task_id": "task-legacy", + "worker_id": "worker-legacy", + "completed_at": "2026-06-01T00:00:00Z", + "result": "pass", + "artifacts": [], + "score": null + }"#; + let receipt: FleetReceipt = serde_json::from_str(legacy).unwrap(); + assert_eq!(receipt.task_id, "task-legacy"); + assert!(receipt.resolved_route.is_none()); + } + + #[test] + fn fleet_resolved_route_serialization_carries_no_secrets() { + let receipt = sample_receipt_with_route(); + // Scan the serialized resolved-route object: this is the field whose + // no-secrets invariant we are asserting. Scoping to the route value + // avoids false positives from unrelated envelope ids (e.g. a task id + // such as "task-foo" innocently contains the substring "sk-"). + let route_json = serde_json::to_string(receipt.resolved_route.as_ref().unwrap()).unwrap(); + assert_no_secret_markers(&route_json); + // The envelope as a whole must also stay credential-free. + let receipt_json = serde_json::to_string(&receipt).unwrap(); + for needle in SECRET_KEY_MARKERS { + assert!( + !receipt_json.to_ascii_lowercase().contains(needle), + "receipt JSON must not contain secret-key marker {needle:?}: {receipt_json}" + ); + } + } + + /// Substrings that indicate a leaked credential field/value. These are + /// deliberately specific so legitimate ids/model names do not trip them. + const SECRET_KEY_MARKERS: &[&str] = &[ + "api_key", + "apikey", + "api-key", + "authorization", + "bearer ", + "auth_token", + "auth-token", + "password", + "credential", + "sk-ant-", + "sk-proj-", + "sk-or-", + "secret", + ]; + + fn assert_no_secret_markers(json: &str) { + let haystack = json.to_ascii_lowercase(); + for needle in SECRET_KEY_MARKERS { + assert!( + !haystack.contains(needle), + "resolved-route JSON must not contain secret marker {needle:?}: {json}" + ); + } + } } diff --git a/crates/tools/Cargo.toml b/crates/tools/Cargo.toml index c0947878a4..3c4ed1eaa3 100644 --- a/crates/tools/Cargo.toml +++ b/crates/tools/Cargo.toml @@ -9,7 +9,7 @@ description = "Tool invocation lifecycle, schema validation, and scheduler paral [dependencies] anyhow.workspace = true async-trait.workspace = true -codewhale-protocol = { path = "../protocol", version = "0.8.64" } +codewhale-protocol = { path = "../protocol", version = "0.8.65" } serde.workspace = true serde_json.workspace = true thiserror.workspace = true diff --git a/crates/tui/CHANGELOG.md b/crates/tui/CHANGELOG.md index 7caab3856b..1515a89722 100644 --- a/crates/tui/CHANGELOG.md +++ b/crates/tui/CHANGELOG.md @@ -7,21 +7,90 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.8.65] - 2026-06-24 + ### Added +- **Provider/model/route resolution (EPIC #2608).** Canonical provider, model, + offering, and route types with a single `RouteResolver` that produces a + resolved `ReadyRouteCandidate` (endpoint, wire protocol, model id, context + limit, price) for every switch (#3458, #3084, #3384). The executing client is + now constructed from the resolved candidate rather than re-derived from config + (#3384). A committed, network-free Models.dev-shaped catalog gives models real + context windows and pricing, with a secret-free live cache (#3497, #3498, + #3385). Offering pricing with provenance is projected onto candidates (#3501, + #3085), and route limits feed a route-aware context-budget service (#3508, + #3523, #3086). +- **Fleet execution substrate (EPIC #3154).** Fleet profile types and config + (#3469), durable manager resume, workspace agent-profile loading resolved into + the worker runtime (#3367), loadout intent carried in task specs (#3512), and + receipts that persist the resolved route for inspection (#3154, #3166). Worker + status is folded into the unified `/fleet` surface and exposed through the + Runtime API. +- **Provider surfaces.** A `/provider` readiness dashboard with reasoning + readiness, an experimental/supported maturity marker, and an "open models for + this provider" action (#3083, #2984, #3485); cross-provider `/model` search + with scroll and provider type-ahead (#3484, #3075); inline `` + reasoning-stream routing with per-provider overrides (#3222); usage telemetry + normalized into canonical token classes including Responses cache-miss and + reasoning tokens (#2961, #3509); and remote MCP OAuth login with bearer/header + auth precedence (#3527). +- **More providers and routes.** User-defined OpenAI-compatible custom providers + via `[providers.]` (#1519); a DeepSeek Anthropic-compatible route (#2963, + #3449); a Qianfan route (#3425); Zhipu folded into Z.ai with equal-treatment + model normalization (#3539); DashScope/Together fixtures. - **Localized mode picker and composer indicators.** The `/mode` picker prompt, mode names, and hints, plus the composer's Vim mode indicator, now render in all seven shipped locales (model-facing mode labels stay English). Harvested from #2239 by @gordonlu. +- **Website and automation.** A runtime/integrations page, provenance and + mirror-trust copy, a fact-drift CI gate, a published install script, and a + weekly community digest archive on codewhale.net (#3419, #3421, #3415, #3482, + #3420); per-automation mode/shell/trust/approval settings (#3467). ### Changed +- **Config modularization (#3311).** `ProviderKind` (#3505), harness posture + (#3507), and provider default seeds (#3503) moved into dedicated modules, and + the `config.rs` monolith split into clean leaf modules (paths, search, + model/base-URL constants, sub-agent limits) behind a `pub use` facade. + `AppMode` helpers were centralized (#3510), and mode-vs-permission policy is + now derived through a single `base_policy_for_mode` resolver instead of + scattered mutation (#3386, advisory review-intent behavior preserved). +- **Leaner tool surface.** Dropped `task_shell_*` from the active set and folded + `tool_search_*` (#3463); ablated the in-turn loop_guard and encoded reasoning + dispositions (#3462); added the Orchestration disposition to the constitution. +- **Routing.** Provider/model switches and the capability-aware fallback chain + resolve through `RouteResolver`; reasoning effort is normalized for the + *resolved* provider; the fallback chain now skips providers that lack auth + (#2574); and context window and memory-pressure come from the resolved route + (#3086). +- **UX.** Approval modal gained a group divider and selected-row caret (#3515); + picker scroll/type-ahead and selection contrast hardened (#3500); the README + was rewritten as an architecture end-cap (#3087); and repo agent guidance was + de-hardcoded to live truth. - **Restored contributor credit.** Threaded machine-readable credit (`docs/CONTRIBUTORS.md` + `.github/AUTHOR_MAP`) for earlier merged work that shipped without it, including the `/jobs cancel-all` action and the npm retry-timeout hint (#1538) by @jieshu666, and the community ACP adapter reference by @rockeverm3m. +### Fixed + +- **Release hygiene.** The strict `cargo clippy --workspace --all-targets --locked + -- -D warnings` gate passes; `npm run build` no longer dirties the generated + web facts; the site sets `metadataBase`; the community digest page parses each + record independently and localizes its chrome; and `cargo audit` is clean with + the starlark-transitive unmaintained advisories documented. +- **Routing and mode correctness.** Ordinary prompt text is no longer + interpreted as a mode switch (#3387, #3491); model candidates are scoped to the + active provider; Together-owned DeepSeek routes are accepted (#3426); insecure + `http://` custom endpoints raise an advisory warning (#1519); and the Fleet + setup planner's role/model selection now drives the generated profile. +- **Runtime stability.** MCP connection drops are explicit (#3524), HTTP API + calls reuse a shared MCP pool (#3532), and per-agent sub-agent mailbox + telemetry is throttled to cut UI lag (#3454). + ## [0.8.64] - 2026-06-22 ### Added @@ -1393,102 +1462,6 @@ Thanks to **@xyuai** (#2587), **@IcedOranges** (#2584), **@BH8GCJ** (#2588), **@gordonlu**, **@encyc**, and **@simuusang** (#2603, #2620) for reports, patches, retesting, and release-stabilization signals that shaped this pass. -## [0.8.51] - 2026-06-02 - -### Added - -- **Arcee AI as a direct provider.** New `[providers.arcee]` config block and - `ARCEE_API_KEY` / `ARCEE_BASE_URL` / `ARCEE_MODEL` environment variables, - wired through CLI auth (`codewhale auth set --provider arcee`), the TUI - provider picker, and the model registry. The default direct-API model is - `trinity-large-thinking` (reasoning-capable, 262K context and 262K max - output); `trinity-large-preview` (262K context, non-reasoning) and - `trinity-mini` (128K context) are also selectable. OpenRouter's - `arcee-ai/trinity-large-thinking` route remains separate. -- **Arcee Cloudflare-WAF compatibility.** The opening turn to the Arcee gateway - uses a benign read-only tool surface (`read_file`, `list_dir`, `file_search`, - `grep_files`, `git_status`, `git_diff`, `checklist_write`, `update_plan`) and - splits example payloads such as `python -c …` out of the system prompt, so the - WAF does not reject the first request; the full tool catalog stays reachable - through tool-search. `trinity-large-thinking`'s `reasoning_content` is - recognized and replayed on tool-call turns. -- **Expanded model catalog.** Added context-window, max-output, and - reasoning-capability metadata for additional model IDs, including - `qwen/qwen3.6-flash`, `qwen/qwen3.6-plus`, `qwen/qwen3.6-max-preview`, and - Xiaomi MiMo v2.5 chat/ASR/TTS variants; `trinity-large-preview`'s context - window was corrected to 262K. -- **Provider-aware model picker.** The picker groups models by provider, shows - per-model hints, and remembers a saved model per provider. - -### Changed - -- **Auto-compaction is now percentage- and model-aware.** The per-model - threshold helper is `compaction_threshold_for_model_at_percent(model, - percent)` (replacing the effort-based variant), and the default - `auto_compact_threshold_percent` is 80%. Auto-compaction defaults on for - models with a context window of 256K or smaller and stays opt-in for 1M-token - models (e.g. DeepSeek V4) to protect prefix-cache economics, unless the user - has explicitly set `auto_compact`. -- **Clearer provider/gateway errors.** HTTP error bodies are sanitized before - display — HTML interstitials and Cloudflare "Access Denied" pages collapse to - a one-line reason (with the ray/error ID) instead of dumping raw markup into - the transcript — and 403s are split into authentication vs. authorization - (gateway/WAF block) categories. -- The invalid-model error now names the active provider and lists Arcee among - the options. - -### Removed - -- **The session "cycle" / checkpoint-restart system.** Removed the `/cycles`, - `/cycle `, and `/recall` commands, the `recall_archive` tool, the - cycle-handoff briefing prompt, the sidebar "cycles" lines, and the - `cycle_manager` engine plumbing (`EngineConfig.cycle`, `Event::CycleAdvanced`, - seam-manager cycle thresholds and flash briefings). Long sessions no longer - auto-reset their context at a fixed token boundary — reclaim budget with - `/compact` or model-aware auto-compaction instead. Existing on-disk cycle - archives are left untouched but are no longer read or written. - -### Fixed - -- Assistant turns no longer leave an orphaned role glyph (the stray "blue dot") - when a turn streams only whitespace between reasoning and a tool call. -- Scrolling the mouse wheel over the right-hand sidebar no longer leaks into the - transcript scroll. -- The sidebar hover tooltip now appears only for truncated lines, sits below the - cursor, and uses a neutral surface color instead of the warning-orange - highlight that overlapped neighbouring rows. -- Corrected the README's description of the Constitution (Article VII is the - hierarchy itself; Article II's truth duty overrides even a user request) to - match `prompts/base.md`. -- Repaired release-blocking unit and integration tests left failing by the - cycle-removal and compaction-threshold refactors (relay instruction, - model-reject message, compaction budget, mock-LLM threshold helper). -- Fixed DEC private-mode CSI fragment leakage into composer text after - terminal resets, restoring clean prompt editing (#2592). -- The engine now recovers from turn-level panics instead of killing the - main event loop, keeping the session alive through transient failures - (#2583, #1269). -- Deeply nested files are now discoverable via @-mention and Ctrl+P file - picker; the default walk depth was relaxed to handle monorepo layouts (#2488). -- Command-palette selection stays visible when scrolling through long lists - instead of scrolling off-screen (#2590). -- exec_shell child processes now inherit .NET/NuGet and Windows app-data - environment variables, fixing toolchain resolution on Windows (#1857). -- A warning is emitted when shell/sandbox config keys are nested under - unknown top-level sections instead of being silently ignored (#2589). -- Diff-render now preserves leading whitespace in patch content lines, - fixing an extra-space regression in PR previews (#2591). Thanks @zlh124. -- Model selection from the /model command now persists per-provider across - restarts, with a warning when persistence fails. - -### Community - -Thanks to **@zlh124** (#2591) and **@reidliu41** (#2601) for the fixes -harvested into this release. Thanks also to **@idling11** (#2602), -**@gordonlu** (#2585), **@cyq1017** (#2593), **@xyuai** (#2587, #2584), -and **@IcedOranges** (#2584) for reports, drafts, and investigations -that shaped this release cycle. - --- Older releases: [CHANGELOG.md](https://github.com/Hmbown/CodeWhale/blob/main/CHANGELOG.md) and [docs/CHANGELOG_ARCHIVE.md](https://github.com/Hmbown/CodeWhale/blob/main/docs/CHANGELOG_ARCHIVE.md). diff --git a/crates/tui/Cargo.toml b/crates/tui/Cargo.toml index d75cf228ec..3ada3b82a9 100644 --- a/crates/tui/Cargo.toml +++ b/crates/tui/Cargo.toml @@ -21,12 +21,12 @@ path = "src/main.rs" [dependencies] anyhow = "1.0.100" -codewhale-config = { path = "../config", version = "0.8.64" } -codewhale-execpolicy = { path = "../execpolicy", version = "0.8.64" } -codewhale-protocol = { path = "../protocol", version = "0.8.64" } -codewhale-release = { path = "../release", version = "0.8.64" } -codewhale-secrets = { path = "../secrets", version = "0.8.64" } -codewhale-tools = { path = "../tools", version = "0.8.64" } +codewhale-config = { path = "../config", version = "0.8.65" } +codewhale-execpolicy = { path = "../execpolicy", version = "0.8.65" } +codewhale-protocol = { path = "../protocol", version = "0.8.65" } +codewhale-release = { path = "../release", version = "0.8.65" } +codewhale-secrets = { path = "../secrets", version = "0.8.65" } +codewhale-tools = { path = "../tools", version = "0.8.65" } schemaui = { version = "0.12.0", default-features = false, optional = true } async-stream = "0.3.6" async-trait = "0.1" diff --git a/crates/tui/src/client.rs b/crates/tui/src/client.rs index a670782a52..4636bbdace 100644 --- a/crates/tui/src/client.rs +++ b/crates/tui/src/client.rs @@ -14,6 +14,8 @@ use serde::{Deserialize, Serialize}; use serde_json::{Value, json}; use tokio::sync::Mutex as AsyncMutex; +use codewhale_config::route::ReadyRouteCandidate; + use crate::config::{ApiProvider, Config, RetryPolicy, wire_model_for_provider}; use crate::llm_client::{ LlmClient, LlmError, RetryConfig as LlmRetryConfig, extract_retry_after, @@ -640,12 +642,36 @@ fn add_extra_root_certs( impl DeepSeekClient { /// Create a DeepSeek client from CLI configuration. pub fn new(config: &Config) -> Result { + Self::from_parts(config.deepseek_base_url(), config.default_model(), config) + } + + /// Create a DeepSeek client whose transport is bound to a runtime-resolved + /// route (#3384). + /// + /// The base URL and default model come from the executable `candidate`, so + /// the client talks to exactly the endpoint and wire model the resolver + /// chose instead of re-deriving them from `Config`. Secrets stay in + /// `Config`: `ReadyRouteCandidate` is secret-free by design (it carries only + /// an auth-source *class*), so the API key and provider are still read from + /// `config`. + pub fn from_candidate(config: &Config, candidate: &ReadyRouteCandidate) -> Result { + Self::from_parts( + candidate.endpoint.base_url.clone(), + candidate.wire_model_id.as_str().to_string(), + config, + ) + } + + /// Shared constructor body for [`Self::new`] and [`Self::from_candidate`]. + /// + /// `base_url` and `default_model` are the only inputs that differ between + /// the two entry points; everything else (auth, provider, retry, headers, + /// timeouts) is derived from `config` so the two paths cannot drift. + fn from_parts(base_url: String, default_model: String, config: &Config) -> Result { let api_key = config.deepseek_api_key()?; - let base_url = config.deepseek_base_url(); let api_provider = config.api_provider(); validate_base_url_security(&base_url)?; let retry = config.retry_policy(); - let default_model = config.default_model(); let stream_idle_timeout = Duration::from_secs(config.stream_chunk_timeout_secs()); let http_headers = config.http_headers(); let insecure_skip_tls_verify = config.insecure_skip_tls_verify(); @@ -1432,7 +1458,8 @@ pub(super) fn apply_reasoning_effort( | ApiProvider::WanjieArk | ApiProvider::Qianfan | ApiProvider::Arcee - | ApiProvider::Huggingface => {} + | ApiProvider::Huggingface + | ApiProvider::Custom => {} ApiProvider::Moonshot => { // #3024: Kimi models accept thinking enable/disable. body["thinking"] = json!({ "type": "disabled" }); @@ -1512,7 +1539,8 @@ pub(super) fn apply_reasoning_effort( ApiProvider::Openai | ApiProvider::WanjieArk | ApiProvider::Qianfan - | ApiProvider::OpenaiCodex => {} + | ApiProvider::OpenaiCodex + | ApiProvider::Custom => {} ApiProvider::Moonshot => { // #3024: Kimi models accept thinking enable. body["thinking"] = json!({ "type": "enabled" }); @@ -1579,7 +1607,8 @@ pub(super) fn apply_reasoning_effort( ApiProvider::Openai | ApiProvider::WanjieArk | ApiProvider::Qianfan - | ApiProvider::OpenaiCodex => {} + | ApiProvider::OpenaiCodex + | ApiProvider::Custom => {} ApiProvider::Moonshot => { // #3024: Kimi models accept thinking enable. body["thinking"] = json!({ "type": "enabled" }); @@ -1789,11 +1818,13 @@ mod tests { fn deepseek_anthropic_client(server: &MockServer) -> DeepSeekClient { let _ = rustls::crypto::ring::default_provider().install_default(); - let mut providers = ProvidersConfig::default(); - providers.deepseek_anthropic = ProviderConfig { - api_key: Some("ds-test".to_string()), - base_url: Some(server.uri()), - ..ProviderConfig::default() + let providers = ProvidersConfig { + deepseek_anthropic: ProviderConfig { + api_key: Some("ds-test".to_string()), + base_url: Some(server.uri()), + ..ProviderConfig::default() + }, + ..ProvidersConfig::default() }; DeepSeekClient::new(&Config { provider: Some("deepseek-anthropic".to_string()), @@ -4457,4 +4488,112 @@ mod tests { assert_eq!(extract_sse_data_value("event: message"), None); assert_eq!(extract_sse_data_value(": heartbeat"), None); } + + /// Build a DeepSeek config with an inline key/base URL plus the resolved + /// runtime route for it. `RouteResolver` (reached through + /// `resolve_runtime_route`) is the only producer of `ReadyRouteCandidate`, + /// so we mint candidates the same way the engine does at switch time. + fn deepseek_route_for_test( + base_url: &str, + model: &str, + ) -> (Config, crate::route_runtime::ResolvedRuntimeRoute) { + let config = Config { + provider: Some("deepseek".to_string()), + api_key: Some("ds-test".to_string()), + base_url: Some(base_url.to_string()), + default_text_model: Some(model.to_string()), + ..Config::default() + }; + let route = crate::route_runtime::resolve_runtime_route( + &config, + ApiProvider::Deepseek, + Some(model), + ) + .expect("deepseek route should resolve"); + (config, route) + } + + #[test] + fn from_candidate_uses_candidate_base_url_and_wire_model() { + let (_config, route) = + deepseek_route_for_test("https://route.example.com/v1", "deepseek-v4-pro"); + + let client = DeepSeekClient::from_candidate(&route.config, &route.candidate) + .expect("client should construct from candidate"); + + // The transport is bound to the candidate, not re-derived from Config. + assert_eq!(client.base_url, route.candidate.endpoint.base_url); + assert_eq!(client.default_model, route.candidate.wire_model_id.as_str()); + } + + #[test] + fn from_candidate_matches_new_when_config_agrees() { + // For a normal route, the resolver writes the candidate's wire model and + // endpoint back into `route.config`, so constructing from the candidate + // must be byte-identical to constructing from that config. This pins the + // "no behavior change today" guarantee for Slice A. + let (_config, route) = + deepseek_route_for_test("https://api.deepseek.com/v1", "deepseek-v4-pro"); + + let from_new = DeepSeekClient::new(&route.config).expect("new client"); + let from_candidate = DeepSeekClient::from_candidate(&route.config, &route.candidate) + .expect("candidate client"); + + assert_eq!(from_candidate.base_url, from_new.base_url); + assert_eq!(from_candidate.default_model, from_new.default_model); + assert_eq!(from_candidate.api_provider, from_new.api_provider); + } + + #[test] + fn from_candidate_binds_custom_provider_base_url_and_model() { + // #1519: a custom OpenAI-compatible provider resolves to a candidate + // whose endpoint/model come from the named `[providers.]` table, + // and `from_candidate` must bind that verbatim base URL + wire model. + let mut custom = std::collections::HashMap::new(); + custom.insert( + "my_thing".to_string(), + ProviderConfig { + kind: Some("openai-compatible".to_string()), + base_url: Some("https://api.example.com/v1".to_string()), + model: Some("custom-model-v1".to_string()), + api_key_env: Some("EXAMPLE_API_KEY_FROM_CANDIDATE_TEST".to_string()), + ..Default::default() + }, + ); + let config = Config { + provider: Some("my_thing".to_string()), + providers: Some(ProvidersConfig { + custom, + ..Default::default() + }), + ..Config::default() + }; + + // The config names a custom provider, so it must resolve as Custom. + assert_eq!(config.api_provider(), ApiProvider::Custom); + + let route = crate::route_runtime::resolve_runtime_route(&config, ApiProvider::Custom, None) + .expect("custom route should resolve"); + + // Provide the key the route's auth path will read. + // SAFETY: single-threaded unit test mutating a uniquely-named var. + unsafe { + std::env::set_var("EXAMPLE_API_KEY_FROM_CANDIDATE_TEST", "sk-custom"); + } + let client = DeepSeekClient::from_candidate(&route.config, &route.candidate) + .expect("client should construct from custom candidate"); + unsafe { + std::env::remove_var("EXAMPLE_API_KEY_FROM_CANDIDATE_TEST"); + } + + assert_eq!(client.base_url, "https://api.example.com/v1"); + assert_eq!(client.default_model, "custom-model-v1"); + assert_eq!(client.api_provider, ApiProvider::Custom); + // The candidate carried the custom endpoint + verbatim wire model. + assert_eq!( + route.candidate.endpoint.base_url, + "https://api.example.com/v1" + ); + assert_eq!(route.candidate.wire_model_id.as_str(), "custom-model-v1"); + } } diff --git a/crates/tui/src/client/chat.rs b/crates/tui/src/client/chat.rs index dbccee9b46..e72d244e1b 100644 --- a/crates/tui/src/client/chat.rs +++ b/crates/tui/src/client/chat.rs @@ -2624,6 +2624,10 @@ enum SseDataFrame { Events(Vec), } +// The six `&mut` streaming-state fields plus the style flag are a deliberate, +// shared parser-state set (mirrored by `parse_sse_chunk*`); bundling them into a +// struct would only add reborrow noise on this hot SSE path. +#[allow(clippy::too_many_arguments)] fn parse_sse_data_frame( data: &str, content_index: &mut u32, @@ -2685,6 +2689,8 @@ pub(super) fn parse_sse_chunk( ) } +// Same deliberate shared parser-state set as `parse_sse_data_frame`. +#[allow(clippy::too_many_arguments)] fn parse_sse_chunk_with_reasoning_style( chunk: &Value, content_index: &mut u32, diff --git a/crates/tui/src/client/responses.rs b/crates/tui/src/client/responses.rs index 96d7f4b57b..49d8613b9b 100644 --- a/crates/tui/src/client/responses.rs +++ b/crates/tui/src/client/responses.rs @@ -686,12 +686,22 @@ fn parse_responses_usage(val: &Value) -> Usage { .and_then(|d| d.get("cached_tokens")) .and_then(|v| v.as_u64()) .unwrap_or(0) as u32; + // Mirror the Chat-Completions parser: derive cache-miss as input minus the + // cached hit when the payload reports cached input tokens. Responses nests + // reasoning under `output_tokens_details` (not `completion_tokens_details`). + let prompt_cache_hit_tokens = if cached > 0 { Some(cached) } else { None }; + let prompt_cache_miss_tokens = prompt_cache_hit_tokens.map(|hit| input.saturating_sub(hit)); + let reasoning_tokens = val + .get("output_tokens_details") + .and_then(|d| d.get("reasoning_tokens")) + .and_then(|v| v.as_u64()) + .map(|v| v as u32); Usage { input_tokens: input, output_tokens: output, - prompt_cache_hit_tokens: if cached > 0 { Some(cached) } else { None }, - prompt_cache_miss_tokens: None, - reasoning_tokens: None, + prompt_cache_hit_tokens, + prompt_cache_miss_tokens, + reasoning_tokens, reasoning_replay_tokens: None, server_tool_use: None, } @@ -1003,6 +1013,33 @@ mod tests { assert_eq!(message, "response incomplete: content_filter"); } + #[test] + fn parse_responses_usage_derives_cache_miss_and_reasoning() { + let usage = json!({ + "input_tokens": 1000, + "output_tokens": 200, + "input_tokens_details": { "cached_tokens": 600 }, + "output_tokens_details": { "reasoning_tokens": 120 } + }); + + let parsed = parse_responses_usage(&usage); + + assert_eq!(parsed.input_tokens, 1000); + assert_eq!(parsed.output_tokens, 200); + assert_eq!(parsed.prompt_cache_hit_tokens, Some(600)); + // Cache-miss is derived as input minus the cached hit when cached > 0. + assert_eq!(parsed.prompt_cache_miss_tokens, Some(400)); + // Reasoning surfaces from output_tokens_details (Responses dialect). + assert_eq!(parsed.reasoning_tokens, Some(120)); + + // Without cached/reasoning details, the derived fields stay None. + let bare = json!({ "input_tokens": 1000, "output_tokens": 200 }); + let parsed_bare = parse_responses_usage(&bare); + assert_eq!(parsed_bare.prompt_cache_hit_tokens, None); + assert_eq!(parsed_bare.prompt_cache_miss_tokens, None); + assert_eq!(parsed_bare.reasoning_tokens, None); + } + #[test] fn responses_input_includes_user_role_tool_results() { let request = MessageRequest { diff --git a/crates/tui/src/config.rs b/crates/tui/src/config.rs index 938b5c7e59..acdafd96f4 100644 --- a/crates/tui/src/config.rs +++ b/crates/tui/src/config.rs @@ -19,217 +19,22 @@ use crate::audit::log_sensitive_event; use crate::features::{Feature, Features, FeaturesToml, is_known_feature_key}; use crate::hooks::HooksConfig; -pub const DEFAULT_MAX_SUBAGENTS: usize = 20; -pub const MAX_SUBAGENTS: usize = 20; -/// Upper bound for queued + running sub-agent admissions. This is deliberately -/// higher than the instantaneous concurrency cap so Workflow-style fanout can -/// opt into large bounded populations without unbounded queue growth. -pub const MAX_SUBAGENT_ADMISSION: usize = 200; -/// Default per-step DeepSeek API timeout for sub-agent requests, in seconds. -/// Matches the legacy hardcoded value so existing configs keep their old -/// behavior when `[subagents] api_timeout_secs` is unset (#1806, #1808). -pub const DEFAULT_SUBAGENT_API_TIMEOUT_SECS: u64 = 120; -/// Minimum accepted `[subagents] api_timeout_secs`. Anything lower (including -/// `0`, which would otherwise produce an immediate timeout footgun) clamps -/// up to this value before the runtime sees it. -pub const MIN_SUBAGENT_API_TIMEOUT_SECS: u64 = 1; -/// Maximum accepted `[subagents] api_timeout_secs` (30 minutes). The cap -/// keeps a misconfigured per-step timeout from masking real model/network -/// hangs forever. -pub const MAX_SUBAGENT_API_TIMEOUT_SECS: u64 = 1800; -/// Default wall-clock interval without manager-visible sub-agent progress -/// before a running child can be auto-cancelled to release its slot (#2614). -pub const DEFAULT_SUBAGENT_HEARTBEAT_TIMEOUT_SECS: u64 = 300; -/// Minimum accepted `[subagents] heartbeat_timeout_secs`. -pub const MIN_SUBAGENT_HEARTBEAT_TIMEOUT_SECS: u64 = 30; -/// Maximum accepted `[subagents] heartbeat_timeout_secs` (1 hour). -pub const MAX_SUBAGENT_HEARTBEAT_TIMEOUT_SECS: u64 = 3600; -/// Default per-SSE-chunk idle timeout, in seconds. -pub const DEFAULT_STREAM_CHUNK_TIMEOUT_SECS: u64 = 300; -/// Minimum accepted stream chunk timeout. -pub const MIN_STREAM_CHUNK_TIMEOUT_SECS: u64 = 1; -/// Maximum accepted stream chunk timeout. -pub const MAX_STREAM_CHUNK_TIMEOUT_SECS: u64 = 3600; -pub(crate) const STREAM_CHUNK_TIMEOUT_ENV: &str = "DEEPSEEK_STREAM_IDLE_TIMEOUT_SECS"; - -fn resolve_subagent_api_timeout_secs(raw: Option) -> u64 { - let raw = raw.unwrap_or(DEFAULT_SUBAGENT_API_TIMEOUT_SECS); - if raw == 0 { - return DEFAULT_SUBAGENT_API_TIMEOUT_SECS; - } - raw.clamp(MIN_SUBAGENT_API_TIMEOUT_SECS, MAX_SUBAGENT_API_TIMEOUT_SECS) -} - -fn resolve_subagent_heartbeat_timeout_secs(raw: Option, api_timeout_secs: u64) -> u64 { - let raw = raw.unwrap_or(DEFAULT_SUBAGENT_HEARTBEAT_TIMEOUT_SECS); - let configured = if raw == 0 { - DEFAULT_SUBAGENT_HEARTBEAT_TIMEOUT_SECS - } else { - raw.clamp( - MIN_SUBAGENT_HEARTBEAT_TIMEOUT_SECS, - MAX_SUBAGENT_HEARTBEAT_TIMEOUT_SECS, - ) - }; - let min_for_api = api_timeout_secs.saturating_add(30).clamp( - MIN_SUBAGENT_HEARTBEAT_TIMEOUT_SECS, - MAX_SUBAGENT_HEARTBEAT_TIMEOUT_SECS, - ); - configured.max(min_for_api) -} - -pub const DEFAULT_TEXT_MODEL: &str = "deepseek-v4-pro"; -pub const DEFAULT_DEEPSEEK_BASE_URL: &str = "https://api.deepseek.com/beta"; -pub const DEFAULT_DEEPSEEK_ANTHROPIC_MODEL: &str = DEFAULT_TEXT_MODEL; -pub const DEFAULT_DEEPSEEK_ANTHROPIC_BASE_URL: &str = "https://api.deepseek.com/anthropic"; -pub const DEFAULT_NVIDIA_NIM_MODEL: &str = "deepseek-ai/deepseek-v4-pro"; -pub const DEFAULT_NVIDIA_NIM_FLASH_MODEL: &str = "deepseek-ai/deepseek-v4-flash"; -pub const DEFAULT_NVIDIA_NIM_BASE_URL: &str = "https://integrate.api.nvidia.com/v1"; -pub const DEFAULT_OPENAI_MODEL: &str = "deepseek-v4-pro"; -pub const DEFAULT_OPENAI_BASE_URL: &str = "https://api.openai.com/v1"; -pub const DEFAULT_ATLASCLOUD_MODEL: &str = "deepseek-ai/deepseek-v4-flash"; -pub const DEFAULT_ATLASCLOUD_BASE_URL: &str = "https://api.atlascloud.ai/v1"; -pub const DEFAULT_WANJIE_ARK_MODEL: &str = "deepseek-reasoner"; -pub const DEFAULT_VOLCENGINE_MODEL: &str = "DeepSeek-V4-Pro"; -pub const DEFAULT_VOLCENGINE_FLASH_MODEL: &str = "DeepSeek-V4-Flash"; -pub const DEFAULT_VOLCENGINE_BASE_URL: &str = "https://ark.cn-beijing.volces.com/api/coding/v3"; -pub const DEFAULT_WANJIE_ARK_BASE_URL: &str = "https://maas-openapi.wanjiedata.com/api/v1"; -pub const DEFAULT_OPENROUTER_MODEL: &str = "deepseek/deepseek-v4-pro"; -pub const DEFAULT_OPENROUTER_FLASH_MODEL: &str = "deepseek/deepseek-v4-flash"; -pub const OPENROUTER_ARCEE_TRINITY_LARGE_THINKING_MODEL: &str = "arcee-ai/trinity-large-thinking"; -pub const OPENROUTER_GEMMA_4_31B_MODEL: &str = "google/gemma-4-31b-it"; -pub const OPENROUTER_GEMMA_4_26B_A4B_MODEL: &str = "google/gemma-4-26b-a4b-it"; -pub const OPENROUTER_GLM_5_1_MODEL: &str = "z-ai/glm-5.1"; -pub const OPENROUTER_GLM_5_2_MODEL: &str = "z-ai/glm-5.2"; -pub const OPENROUTER_GLM_5_TURBO_MODEL: &str = "z-ai/glm-5-turbo"; -pub const OPENROUTER_KIMI_K2_7_CODE_MODEL: &str = "moonshotai/kimi-k2.7-code"; -pub const OPENROUTER_KIMI_K2_6_MODEL: &str = "moonshotai/kimi-k2.6"; -pub const OPENROUTER_MINIMAX_M3_MODEL: &str = "minimax/minimax-m3"; -pub const OPENROUTER_NEMOTRON_3_NANO_OMNI_MODEL: &str = - "nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free"; -pub const OPENROUTER_QWEN_3_6_FLASH_MODEL: &str = "qwen/qwen3.6-flash"; -pub const OPENROUTER_QWEN_3_6_35B_A3B_MODEL: &str = "qwen/qwen3.6-35b-a3b"; -pub const OPENROUTER_QWEN_3_6_MAX_PREVIEW_MODEL: &str = "qwen/qwen3.6-max-preview"; -pub const OPENROUTER_QWEN_3_6_27B_MODEL: &str = "qwen/qwen3.6-27b"; -pub const OPENROUTER_QWEN_3_6_PLUS_MODEL: &str = "qwen/qwen3.6-plus"; -pub const OPENROUTER_QWEN_3_7_MAX_MODEL: &str = "qwen/qwen3.7-max"; -pub const OPENROUTER_MINIMAX_2_7_MODEL: &str = "minimax/minimax-2.7"; -pub const OPENROUTER_NEMOTRON_3_ULTRA_MODEL: &str = "nvidia/nemotron-3-ultra-550b-a55b"; -pub const OPENROUTER_TENCENT_HY3_PREVIEW_MODEL: &str = "tencent/hy3-preview"; -pub const OPENROUTER_XIAOMI_MIMO_V2_5_PRO_MODEL: &str = "xiaomi/mimo-v2.5-pro"; -pub const OPENROUTER_XIAOMI_MIMO_V2_5_MODEL: &str = "xiaomi/mimo-v2.5"; -pub const RECENT_OPENROUTER_LARGE_MODELS: &[&str] = &[ - OPENROUTER_ARCEE_TRINITY_LARGE_THINKING_MODEL, - OPENROUTER_MINIMAX_M3_MODEL, - OPENROUTER_XIAOMI_MIMO_V2_5_PRO_MODEL, - OPENROUTER_XIAOMI_MIMO_V2_5_MODEL, - OPENROUTER_QWEN_3_6_FLASH_MODEL, - OPENROUTER_QWEN_3_6_35B_A3B_MODEL, - OPENROUTER_QWEN_3_6_MAX_PREVIEW_MODEL, - OPENROUTER_QWEN_3_6_27B_MODEL, - OPENROUTER_QWEN_3_6_PLUS_MODEL, - OPENROUTER_QWEN_3_7_MAX_MODEL, - OPENROUTER_MINIMAX_2_7_MODEL, - OPENROUTER_NEMOTRON_3_ULTRA_MODEL, - OPENROUTER_KIMI_K2_7_CODE_MODEL, - OPENROUTER_KIMI_K2_6_MODEL, - OPENROUTER_GLM_5_1_MODEL, - OPENROUTER_GLM_5_2_MODEL, - OPENROUTER_TENCENT_HY3_PREVIEW_MODEL, - OPENROUTER_GEMMA_4_31B_MODEL, - OPENROUTER_GEMMA_4_26B_A4B_MODEL, - OPENROUTER_NEMOTRON_3_NANO_OMNI_MODEL, -]; -pub const DEFAULT_OPENROUTER_BASE_URL: &str = "https://openrouter.ai/api/v1"; -pub const DEFAULT_XIAOMI_MIMO_MODEL: &str = "mimo-v2.5-pro"; -pub const XIAOMI_MIMO_V2_5_PRO_ULTRASPEED_MODEL: &str = "mimo-v2.5-pro-ultraspeed"; -pub const XIAOMI_MIMO_PAY_AS_YOU_GO_BASE_URL: &str = "https://api.xiaomimimo.com/v1"; -pub const DEFAULT_XIAOMI_MIMO_BASE_URL: &str = "https://token-plan-sgp.xiaomimimo.com/v1"; -pub const XIAOMI_MIMO_TOKEN_PLAN_CN_BASE_URL: &str = "https://token-plan-cn.xiaomimimo.com/v1"; -pub const XIAOMI_MIMO_TOKEN_PLAN_SGP_BASE_URL: &str = DEFAULT_XIAOMI_MIMO_BASE_URL; -pub const XIAOMI_MIMO_TOKEN_PLAN_AMS_BASE_URL: &str = "https://token-plan-ams.xiaomimimo.com/v1"; -pub const XIAOMI_MIMO_V2_5_OMNI_MODEL: &str = "mimo-v2.5"; -pub const XIAOMI_MIMO_ASR_MODEL: &str = "mimo-v2.5-asr"; -pub const XIAOMI_MIMO_TTS_MODEL: &str = "mimo-v2.5-tts"; -pub const XIAOMI_MIMO_TTS_VOICE_DESIGN_MODEL: &str = "mimo-v2.5-tts-voicedesign"; -pub const XIAOMI_MIMO_TTS_VOICE_CLONE_MODEL: &str = "mimo-v2.5-tts-voiceclone"; -pub const XIAOMI_MIMO_V2_TTS_MODEL: &str = "mimo-v2-tts"; -pub const DEFAULT_NOVITA_MODEL: &str = "deepseek/deepseek-v4-pro"; -pub const DEFAULT_NOVITA_FLASH_MODEL: &str = "deepseek/deepseek-v4-flash"; -pub const DEFAULT_NOVITA_BASE_URL: &str = "https://api.novita.ai/openai/v1"; -pub const DEFAULT_FIREWORKS_MODEL: &str = "accounts/fireworks/models/deepseek-v4-pro"; -pub const DEFAULT_FIREWORKS_BASE_URL: &str = "https://api.fireworks.ai/inference/v1"; -pub const DEFAULT_SILICONFLOW_MODEL: &str = "deepseek-ai/DeepSeek-V4-Pro"; -pub const DEFAULT_SILICONFLOW_FLASH_MODEL: &str = "deepseek-ai/DeepSeek-V4-Flash"; -pub const DEFAULT_SILICONFLOW_BASE_URL: &str = "https://api.siliconflow.com/v1"; -pub const DEFAULT_SILICONFLOW_CN_BASE_URL: &str = "https://api.siliconflow.cn/v1"; -pub const DEFAULT_ARCEE_MODEL: &str = "trinity-large-thinking"; -pub const ARCEE_TRINITY_LARGE_PREVIEW_MODEL: &str = "trinity-large-preview"; -pub const ARCEE_TRINITY_MINI_MODEL: &str = "trinity-mini"; -pub const DEFAULT_ARCEE_BASE_URL: &str = "https://api.arcee.ai/api/v1"; -pub const DEFAULT_MOONSHOT_MODEL: &str = "kimi-k2.7-code"; -pub const MOONSHOT_KIMI_K2_6_MODEL: &str = "kimi-k2.6"; -pub const DEFAULT_MOONSHOT_BASE_URL: &str = "https://api.moonshot.ai/v1"; -pub const DEFAULT_KIMI_CODE_MODEL: &str = "kimi-for-coding"; -pub const DEFAULT_KIMI_CODE_BASE_URL: &str = "https://api.kimi.com/coding/v1"; -pub const DEFAULT_SGLANG_MODEL: &str = "deepseek-ai/DeepSeek-V4-Pro"; -pub const DEFAULT_SGLANG_FLASH_MODEL: &str = "deepseek-ai/DeepSeek-V4-Flash"; -pub const DEFAULT_SGLANG_BASE_URL: &str = "http://localhost:30000/v1"; -pub const DEFAULT_VLLM_MODEL: &str = "deepseek-ai/DeepSeek-V4-Pro"; -pub const DEFAULT_VLLM_FLASH_MODEL: &str = "deepseek-ai/DeepSeek-V4-Flash"; -pub const DEFAULT_VLLM_BASE_URL: &str = "http://localhost:8000/v1"; -pub const DEFAULT_OLLAMA_MODEL: &str = "deepseek-coder:1.3b"; -pub const DEFAULT_OLLAMA_BASE_URL: &str = "http://localhost:11434/v1"; -pub const DEFAULT_HUGGINGFACE_MODEL: &str = "deepseek-ai/DeepSeek-V4-Pro"; -pub const DEFAULT_HUGGINGFACE_FLASH_MODEL: &str = "deepseek-ai/DeepSeek-V4-Flash"; -pub const DEFAULT_HUGGINGFACE_BASE_URL: &str = "https://router.huggingface.co/v1"; -pub const DEFAULT_DEEPINFRA_MODEL: &str = "deepseek-ai/DeepSeek-V4-Pro"; -pub const DEFAULT_DEEPINFRA_FLASH_MODEL: &str = "deepseek-ai/DeepSeek-V4-Flash"; -pub const DEFAULT_DEEPINFRA_BASE_URL: &str = "https://api.deepinfra.com/v1/openai"; -pub const DEFAULT_TOGETHER_MODEL: &str = "deepseek-ai/DeepSeek-V4-Pro"; -pub const DEFAULT_TOGETHER_FLASH_MODEL: &str = "deepseek-ai/DeepSeek-V4-Flash"; -pub const DEFAULT_TOGETHER_BASE_URL: &str = "https://api.together.xyz/v1"; -pub const DEFAULT_QIANFAN_MODEL: &str = "ernie-4.0-turbo-8k"; -pub const DEFAULT_QIANFAN_BASE_URL: &str = "https://api.baiduqianfan.ai/v1"; -pub const DEFAULT_OPENAI_CODEX_MODEL: &str = "gpt-5.5"; -pub const DEFAULT_OPENAI_CODEX_BASE_URL: &str = "https://chatgpt.com/backend-api"; -pub const OPENAI_CODEX_EFFECTIVE_CONTEXT_WINDOW_TOKENS: u32 = 400_000; -/// Legacy `deepseek-cn` provider alias. -/// -/// DeepSeek's official API host is the same worldwide. Keep this alias for -/// old configs, but route it through the normal beta-enabled DeepSeek default. -/// Legacy typo hostname `api.deepseeki.com` remains recognized in URL -/// heuristics for backward compatibility. -pub const DEFAULT_DEEPSEEKCN_BASE_URL: &str = DEFAULT_DEEPSEEK_BASE_URL; +// Sub-agent concurrency/timeout limit constants and their clamp resolvers live +// in the `subagent_limits` leaf module. The constants are re-exported (keeping +// each item's visibility) so `crate::config::` paths resolve unchanged; +// the private resolvers are pulled back in without widening external surface +// (#3311). +mod subagent_limits; +pub use subagent_limits::*; +use subagent_limits::{resolve_subagent_api_timeout_secs, resolve_subagent_heartbeat_timeout_secs}; + +// Provider model-name and base-URL constants live in the `models` leaf module +// and are re-exported below so every `crate::config::` path is unchanged +// (#3311). +mod models; +pub use models::*; + const API_KEYRING_SENTINEL: &str = "__KEYRING__"; -pub const COMMON_DEEPSEEK_MODELS: &[&str] = &[ - "deepseek-v4-pro", - "deepseek-v4-flash", - "deepseek-ai/deepseek-v4-pro", - "deepseek-ai/deepseek-v4-flash", - "deepseek/deepseek-v4-pro", - "deepseek/deepseek-v4-flash", -]; -pub const OFFICIAL_DEEPSEEK_MODELS: &[&str] = &["deepseek-v4-pro", "deepseek-v4-flash"]; -pub const DEFAULT_ZAI_MODEL: &str = "GLM-5.2"; -pub const ZAI_GLM_5_1_MODEL: &str = "GLM-5.1"; -pub const ZAI_GLM_5_2_MODEL: &str = "GLM-5.2"; -pub const ZAI_GLM_5_TURBO_MODEL: &str = "GLM-5-Turbo"; -pub const DEFAULT_ZAI_BASE_URL: &str = "https://api.z.ai/api/coding/paas/v4"; -pub const DEFAULT_STEPFUN_MODEL: &str = "step-3.7-flash"; -pub const DEFAULT_STEPFUN_BASE_URL: &str = "https://api.stepfun.ai/v1"; -pub const DEFAULT_ANTHROPIC_MODEL: &str = "claude-sonnet-4-6"; -pub const ANTHROPIC_OPUS_MODEL: &str = "claude-opus-4-8"; -pub const ANTHROPIC_HAIKU_MODEL: &str = "claude-haiku-4-5"; -pub const DEFAULT_ANTHROPIC_BASE_URL: &str = "https://api.anthropic.com"; -pub const DEFAULT_MINIMAX_MODEL: &str = "MiniMax-M3"; -pub const MINIMAX_M2_7_MODEL: &str = "MiniMax-M2.7"; -pub const MINIMAX_M2_7_HIGHSPEED_MODEL: &str = "MiniMax-M2.7-highspeed"; -pub const MINIMAX_M2_5_MODEL: &str = "MiniMax-M2.5"; -pub const MINIMAX_M2_5_HIGHSPEED_MODEL: &str = "MiniMax-M2.5-highspeed"; -pub const MINIMAX_M2_1_MODEL: &str = "MiniMax-M2.1"; -pub const MINIMAX_M2_1_HIGHSPEED_MODEL: &str = "MiniMax-M2.1-highspeed"; -pub const MINIMAX_M2_MODEL: &str = "MiniMax-M2"; -pub const DEFAULT_MINIMAX_BASE_URL: &str = "https://api.minimax.io/v1"; #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)] #[serde(rename_all = "snake_case")] @@ -262,6 +67,14 @@ pub enum ApiProvider { Stepfun, Minimax, Deepinfra, + /// User-defined OpenAI-compatible endpoint (#1519). + /// + /// Selected when `provider = ""` names a `[providers.] + /// kind="openai-compatible"` table. A single dynamic identity that maps to + /// [`codewhale_config::ProviderKind::Custom`] and routes via the OpenAI Chat + /// Completions wire protocol; the concrete endpoint/model/auth come from the + /// named config table, not from this variant. + Custom, } impl ApiProvider { @@ -386,6 +199,9 @@ impl ApiProvider { Self::Minimax => "https://platform.minimax.io/docs/guides/quickstart-preparation", Self::Deepinfra => "https://deepinfra.com/dash/api_keys", Self::OpenaiCodex | Self::Sglang | Self::Vllm | Self::Ollama => return None, + // Custom endpoints have no canonical credential page; the user + // supplies the key via their own `api_key_env`. + Self::Custom => return None, }) } @@ -397,7 +213,7 @@ impl ApiProvider { /// `ApiProvider` discriminant → `ProviderKind` lookup. /// Index 1 is `None` for the legacy `DeepseekCN` variant. - const KIND_LOOKUP: [Option; 28] = [ + const KIND_LOOKUP: [Option; 29] = [ Some(codewhale_config::ProviderKind::Deepseek), None, // DeepseekCN Some(codewhale_config::ProviderKind::DeepseekAnthropic), @@ -426,10 +242,11 @@ impl ApiProvider { Some(codewhale_config::ProviderKind::Stepfun), Some(codewhale_config::ProviderKind::Minimax), Some(codewhale_config::ProviderKind::Deepinfra), + Some(codewhale_config::ProviderKind::Custom), ]; /// `ProviderKind` discriminant → `ApiProvider` lookup. - const FROM_KIND_LOOKUP: [Self; 27] = [ + const FROM_KIND_LOOKUP: [Self; 28] = [ Self::Deepseek, Self::DeepseekAnthropic, Self::NvidiaNim, @@ -457,6 +274,7 @@ impl ApiProvider { Self::Stepfun, Self::Minimax, Self::Deepinfra, + Self::Custom, ]; /// Map to the config-level `ProviderKind`. @@ -1311,6 +1129,9 @@ pub fn model_completion_names_for_provider(provider: ApiProvider) -> Vec<&'stati MINIMAX_M2_1_HIGHSPEED_MODEL, MINIMAX_M2_MODEL, ], + // Custom endpoints expose no built-in completion names; the user + // supplies their own model id (#1519). + ApiProvider::Custom => Vec::new(), } } @@ -1545,132 +1366,11 @@ impl SnapshotsConfig { } } -/// Search provider enumeration — selects which backend `web_search` uses. -#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Deserialize, Serialize)] -#[serde(rename_all = "snake_case")] -pub enum SearchProvider { - /// Bing HTML scraping. No API key needed. - Bing, - /// DuckDuckGo HTML scraping with Bing fallback. No API key needed. - #[default] - #[serde(alias = "duckduckgo")] - DuckDuckGo, - /// Tavily AI Search API (). Requires api_key. - Tavily, - /// Bocha AI Search API (). Requires api_key. - Bocha, - /// Metaso AI Search API (). Uses built-in default key - /// or `METASO_API_KEY` env var; configurable via `[search] api_key`. - #[serde(alias = "metaso")] - Metaso, - /// SearXNG JSON search API. Requires a trusted/self-hosted `base_url`. - #[serde(alias = "searx", alias = "searx-ng", alias = "searx_ng")] - Searxng, - /// Baidu AI Search API (). Requires api_key. - #[serde( - alias = "baidu-search", - alias = "baidu_ai_search", - alias = "baidu_search", - alias = "baidu-ai-search" - )] - Baidu, - /// Volcengine Ark web_search via Responses API. Requires api_key. - /// Free tier: 20K queries/month per API key. Falls back to - /// `VOLCENGINE_API_KEY` / `VOLCENGINE_ARK_API_KEY` / `ARK_API_KEY` - /// env vars when `[search] api_key` is not set. - #[serde( - alias = "volcengine", - alias = "ark", - alias = "volc", - alias = "volcengine-ark", - alias = "volcengine_ark", - alias = "volc-ark" - )] - Volcengine, - /// Sofya web search API (). Requires api_key - /// (`ay_live_...`). Returns full extracted page content rather than - /// snippets; falls back to the `SOFYA_API_KEY` env var when - /// `[search] api_key` is not set. - Sofya, -} - -impl SearchProvider { - #[must_use] - pub fn parse(value: &str) -> Option { - match value.trim().to_ascii_lowercase().as_str() { - "bing" => Some(Self::Bing), - "duckduckgo" | "duck-duck-go" | "duck_duck_go" | "ddg" => Some(Self::DuckDuckGo), - "tavily" => Some(Self::Tavily), - "bocha" => Some(Self::Bocha), - "metaso" => Some(Self::Metaso), - "searxng" | "searx" | "searx-ng" | "searx_ng" => Some(Self::Searxng), - "baidu" | "baidu-search" | "baidu_search" | "baidu-ai-search" | "baidu_ai_search" => { - Some(Self::Baidu) - } - "volcengine" | "ark" | "volc" | "volcengine-ark" => Some(Self::Volcengine), - "sofya" => Some(Self::Sofya), - _ => None, - } - } - - #[must_use] - pub fn as_str(self) -> &'static str { - match self { - Self::Bing => "bing", - Self::DuckDuckGo => "duckduckgo", - Self::Tavily => "tavily", - Self::Bocha => "bocha", - Self::Metaso => "metaso", - Self::Searxng => "searxng", - Self::Baidu => "baidu", - Self::Volcengine => "volcengine", - Self::Sofya => "sofya", - } - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum SearchProviderSource { - Default, - Config, - EnvOverride, -} - -impl SearchProviderSource { - #[must_use] - pub fn as_str(self) -> &'static str { - match self { - Self::Default => "default", - Self::Config => "config", - Self::EnvOverride => "env override", - } - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub struct SearchProviderResolution { - pub provider: SearchProvider, - pub source: SearchProviderSource, -} - -/// Web search provider configuration (`[search]` table in config.toml). -#[derive(Debug, Clone, Deserialize, Default)] -pub struct SearchConfig { - /// Search provider: `bing` | `duckduckgo` | `tavily` | `bocha` | `metaso` | `searxng` | `baidu` | `volcengine`. Default: `duckduckgo`. - #[serde(default)] - pub provider: Option, - /// Optional search endpoint. With `duckduckgo`, this is a - /// DuckDuckGo-compatible HTML endpoint. With `searxng`, this is the trusted - /// SearXNG instance root or `/search` endpoint. - #[serde(default)] - pub base_url: Option, - /// API key for Tavily, Bocha, Metaso, Baidu, or Volcengine. Not required for Bing, DuckDuckGo, or SearXNG. - /// Metaso also falls back to `METASO_API_KEY` env var, then a built-in default. - /// Baidu also falls back to `BAIDU_SEARCH_API_KEY` env var. - /// Volcengine also falls back to `VOLCENGINE_API_KEY` / `VOLCENGINE_ARK_API_KEY` / `ARK_API_KEY` env vars. - #[serde(default)] - pub api_key: Option, -} +// Web-search `[search]` table types live in the `search` leaf module and are +// re-exported below so `crate::config::SearchProvider` (and siblings) resolve +// unchanged (#3311). +mod search; +pub use search::*; /// Model-visible tool catalog controls (`[tools]` table in config.toml). #[derive(Debug, Clone, Deserialize, Default)] @@ -2683,6 +2383,33 @@ pub struct ProviderConfig { #[serde(alias = "reasoningStyle", alias = "reasoningStreamStyle")] pub reasoning_stream_style: Option, pub auth: Option, + /// Wire-protocol selector for a custom `[providers.]` entry (#1519). + /// + /// Only `"openai-compatible"` is accepted for now; any other value is + /// rejected at selection time so unsupported wire formats fail loudly rather + /// than silently routing as OpenAI. Built-in providers leave this unset. + #[serde(default)] + pub kind: Option, + /// Name of the environment variable holding this custom provider's API key + /// (#1519), e.g. `api_key_env = "EXAMPLE_API_KEY"`. The key value itself is + /// never stored in config; only the env var name is. + #[serde(default, alias = "apiKeyEnv")] + pub api_key_env: Option, +} + +impl ProviderConfig { + /// True when this entry selects the OpenAI-compatible custom wire protocol. + /// + /// `kind` is matched case-insensitively against `openai-compatible` (and the + /// `openai_compatible` underscore spelling). Returns `false` when `kind` is + /// unset (built-in providers) or names any other value. + #[must_use] + pub fn is_openai_compatible_custom(&self) -> bool { + self.kind.as_deref().is_some_and(|kind| { + let normalized = kind.trim().to_ascii_lowercase().replace('_', "-"); + normalized == "openai-compatible" + }) + } } #[derive(Debug, Clone, Default, Deserialize)] @@ -2777,6 +2504,23 @@ pub struct ProvidersConfig { pub stepfun: ProviderConfig, #[serde(default)] pub minimax: ProviderConfig, + /// Arbitrary user-named custom providers (#1519). + /// + /// Captures every `[providers.]` table whose key is not one of the + /// built-in providers above. Each entry is an OpenAI-compatible custom + /// endpoint selected via `provider = ""`; routing reads its + /// `base_url` / `model` / `api_key_env` through [`ApiProvider::Custom`]. + #[serde(flatten, default)] + pub custom: HashMap, +} + +impl ProvidersConfig { + /// Look up a user-defined custom provider table by its `[providers.]` + /// key (#1519). Returns `None` when no entry with that exact name exists. + #[must_use] + pub fn custom_provider_config(&self, name: &str) -> Option<&ProviderConfig> { + self.custom.get(name) + } } #[derive(Debug, Clone, Deserialize, Default)] @@ -3020,26 +2764,47 @@ impl Config { #[must_use] pub fn api_provider(&self) -> ApiProvider { - self.provider + if let Some(provider) = self.provider.as_deref().and_then(ApiProvider::parse) { + return provider; + } + // #1519 safety fix: when `provider = ""` is not a built-in provider + // but names a `[providers.]` custom table, route as the dynamic + // custom identity. This MUST precede the DeepSeek fallback below so an + // arbitrary custom name can never silently misroute to DeepSeek. + if let Some(name) = self.provider.as_deref() + && self + .providers + .as_ref() + .and_then(|providers| providers.custom_provider_config(name)) + .is_some() + { + return ApiProvider::Custom; + } + self.base_url .as_deref() - .and_then(ApiProvider::parse) - .unwrap_or_else(|| { + .filter(|base| base.contains("integrate.api.nvidia.com")) + .map(|_| ApiProvider::NvidiaNim) + .or_else(|| { self.base_url .as_deref() - .filter(|base| base.contains("integrate.api.nvidia.com")) - .map(|_| ApiProvider::NvidiaNim) - .or_else(|| { - self.base_url - .as_deref() - .filter(|base| base.contains("api.deepseeki.com")) - .map(|_| ApiProvider::DeepseekCN) - }) - .unwrap_or(ApiProvider::Deepseek) + .filter(|base| base.contains("api.deepseeki.com")) + .map(|_| ApiProvider::DeepseekCN) }) + .unwrap_or(ApiProvider::Deepseek) } pub(crate) fn provider_config_for(&self, provider: ApiProvider) -> Option<&ProviderConfig> { let providers = self.providers.as_ref()?; + // The custom provider's config lives in the flatten map, keyed by the + // selected `provider = ""` value, not in a fixed field (#1519). + // Resolve it by name so every existing reader (auth, headers, base_url) + // transparently sees the named table. + if provider == ApiProvider::Custom { + return self + .provider + .as_deref() + .and_then(|name| providers.custom_provider_config(name)); + } Some(match provider { ApiProvider::Deepseek => &providers.deepseek, ApiProvider::DeepseekCN => &providers.deepseek_cn, @@ -3069,6 +2834,8 @@ impl Config { ApiProvider::Zai => &providers.zai, ApiProvider::Stepfun => &providers.stepfun, ApiProvider::Minimax => &providers.minimax, + // Handled by the name-keyed early return above (#1519). + ApiProvider::Custom => unreachable!("custom provider resolved by name above"), }) } @@ -3083,7 +2850,19 @@ impl Config { } pub(crate) fn provider_config_for_mut(&mut self, provider: ApiProvider) -> &mut ProviderConfig { + // The custom provider's mutable slot is keyed by the selected + // `provider = ""` value in the flatten map (#1519). Capture the + // name before borrowing `providers` mutably; fall back to a private + // sentinel key so the accessor stays total when no name is set. + let custom_key = (provider == ApiProvider::Custom).then(|| { + self.provider + .clone() + .unwrap_or_else(|| "__custom__".to_string()) + }); let providers = self.providers.get_or_insert_with(ProvidersConfig::default); + if let Some(key) = custom_key { + return providers.custom.entry(key).or_default(); + } match provider { ApiProvider::Deepseek => &mut providers.deepseek, ApiProvider::DeepseekCN => &mut providers.deepseek_cn, @@ -3113,6 +2892,8 @@ impl Config { ApiProvider::Zai => &mut providers.zai, ApiProvider::Stepfun => &mut providers.stepfun, ApiProvider::Minimax => &mut providers.minimax, + // Handled by the name-keyed early return above (#1519). + ApiProvider::Custom => unreachable!("custom provider resolved by name above"), } } @@ -3262,6 +3043,11 @@ impl Config { ApiProvider::Stepfun => DEFAULT_STEPFUN_MODEL, ApiProvider::Anthropic => DEFAULT_ANTHROPIC_MODEL, ApiProvider::Minimax => DEFAULT_MINIMAX_MODEL, + // Custom endpoints have no built-in default model; pass through the + // descriptor placeholder when nothing is configured (#1519). + ApiProvider::Custom => codewhale_config::ProviderKind::Custom + .provider() + .default_model(), } .to_string() } @@ -3307,7 +3093,10 @@ impl Config { | ApiProvider::OpenaiCodex | ApiProvider::Zai | ApiProvider::Stepfun - | ApiProvider::Minimax => None, + | ApiProvider::Minimax + // Custom reads its base_url from the named `[providers.]` + // table (via provider_base), never from the legacy root field. + | ApiProvider::Custom => None, }; let configured_base_url = provider_base.or(root_base); let base = if provider == ApiProvider::XiaomiMimo { @@ -3363,6 +3152,12 @@ impl Config { ApiProvider::Stepfun => DEFAULT_STEPFUN_BASE_URL, ApiProvider::Anthropic => DEFAULT_ANTHROPIC_BASE_URL, ApiProvider::Minimax => DEFAULT_MINIMAX_BASE_URL, + // No built-in endpoint; descriptor placeholder keeps the + // fallback total. A real custom route configures + // `[providers.] base_url` which wins above (#1519). + ApiProvider::Custom => codewhale_config::ProviderKind::Custom + .provider() + .default_base_url(), } .to_string() }) @@ -3443,6 +3238,23 @@ impl Config { return Ok(configured); } + // 1b. Custom providers (#1519) name their auth env var per-entry via + // `[providers.] api_key_env = "..."`. Resolve it before the + // generic env step, since the custom identity declares no built-in env + // var. The env var NAME is read from config; the secret value is read + // from the process environment and never persisted. + if provider == ApiProvider::Custom + && let Some(env_name) = self + .provider_config_for(provider) + .and_then(|entry| entry.api_key_env.as_deref()) + .map(str::trim) + .filter(|name| !name.is_empty()) + && let Ok(value) = std::env::var(env_name) + && !value.trim().is_empty() + { + return Ok(value); + } + // 2. Environment variables. Do not query platform credential stores // here; routine startup and doctor checks must stay prompt-free. if provider == ApiProvider::XiaomiMimo { @@ -3515,6 +3327,29 @@ impl Config { // Self-hosted deployments commonly run without auth on localhost. // Return an empty key and let the client omit the Authorization header. ApiProvider::Sglang | ApiProvider::Vllm | ApiProvider::Ollama => Ok(String::new()), + // Custom OpenAI-compatible endpoints (#1519): the key comes from the + // env var named by `[providers.] api_key_env`. If we reached + // here it is unset/empty (and the endpoint is not loopback). + ApiProvider::Custom => { + let provider_name = self.provider.as_deref().unwrap_or(""); + match self + .provider_config_for(provider) + .and_then(|entry| entry.api_key_env.as_deref()) + .map(str::trim) + .filter(|name| !name.is_empty()) + { + Some(env_name) => anyhow::bail!( + "Custom provider '{provider_name}' API key not found.\n\ + Set the environment variable {env_name} to your key, \ + or add api_key to [providers.{provider_name}]." + ), + None => anyhow::bail!( + "Custom provider '{provider_name}' has no auth configured.\n\ + Add api_key_env = \"YOUR_ENV_VAR\" (or api_key) to \ + [providers.{provider_name}] in ~/.codewhale/config.toml." + ), + } + } _ => anyhow::bail!("{}", missing_provider_api_key_message(provider)?), } } @@ -4047,65 +3882,17 @@ fn root_deepseek_model_is_foreign_to_direct_provider(provider: ApiProvider, mode // === Defaults === -fn default_config_path() -> Option { - env_config_path().or_else(home_config_path) -} - -fn codewhale_home_dir() -> Option { - std::env::var_os("CODEWHALE_HOME").and_then(|path| { - let path = PathBuf::from(path); - (!path.as_os_str().is_empty()).then_some(path) - }) -} - -pub(crate) fn effective_home_dir() -> Option { - if let Some(path) = std::env::var_os("HOME") { - let path = PathBuf::from(path); - if !path.as_os_str().is_empty() { - return Some(path); - } - } - - if let Some(path) = std::env::var_os("USERPROFILE") { - let path = PathBuf::from(path); - if !path.as_os_str().is_empty() { - return Some(path); - } - } - - #[cfg(windows)] - { - if let (Some(drive), Some(homepath)) = - (std::env::var_os("HOMEDRIVE"), std::env::var_os("HOMEPATH")) - { - let mut path = PathBuf::from(drive); - path.push(homepath); - if !path.as_os_str().is_empty() { - return Some(path); - } - } - } - - dirs::home_dir() -} - -fn home_config_path() -> Option { - if let Some(home) = codewhale_home_dir() { - return Some(home.join("config.toml")); - } - - effective_home_dir().map(|home| { - let primary = home.join(".codewhale").join("config.toml"); - if primary.exists() { - return primary; - } - let legacy = home.join(".deepseek").join("config.toml"); - if legacy.exists() { - return legacy; - } - primary - }) -} +// Pure filesystem path helpers live in the `paths` leaf module. The two +// `pub(crate)` entry points are re-exported so external `crate::config::` +// callers resolve unchanged; the remaining helpers are imported privately for +// the workspace-trust/config-load logic that stays in this file (#3311). +mod paths; +use paths::{ + canonicalize_or_keep, codewhale_home_dir, default_config_path, default_managed_config_path, + default_mcp_config_path, default_memory_path, default_notes_path, default_requirements_path, + default_skills_dir, env_config_path, expand_pathbuf, home_config_path, workspace_config_key, +}; +pub(crate) use paths::{effective_home_dir, expand_path}; pub(crate) fn workspace_trust_config_candidate_paths() -> Vec { if let Some(path) = env_config_path() { @@ -4192,39 +3979,6 @@ fn is_trusted_level(level: &str) -> bool { level.trim().eq_ignore_ascii_case("trusted") } -fn workspace_config_key(workspace: &Path) -> String { - canonicalize_or_keep(workspace) - .to_string_lossy() - .into_owned() -} - -fn canonicalize_or_keep(path: &Path) -> PathBuf { - path.canonicalize().unwrap_or_else(|_| path.to_path_buf()) -} - -fn env_config_path() -> Option { - if let Ok(path) = std::env::var("CODEWHALE_CONFIG_PATH") { - let trimmed = path.trim(); - if !trimmed.is_empty() { - return Some(expand_path(trimmed)); - } - } - if let Ok(path) = std::env::var("DEEPSEEK_CONFIG_PATH") { - let trimmed = path.trim(); - if !trimmed.is_empty() { - return Some(expand_path(trimmed)); - } - } - None -} - -fn expand_pathbuf(path: PathBuf) -> PathBuf { - if let Some(raw) = path.to_str() { - return expand_path(raw); - } - path -} - pub(crate) fn resolve_load_config_path(path: Option) -> Option { if let Some(path) = path { return Some(expand_pathbuf(path)); @@ -4289,102 +4043,6 @@ check_for_updates = true Ok(Some(config_path)) } -fn default_managed_config_path() -> Option { - #[cfg(unix)] - { - Some(PathBuf::from("/etc/deepseek/managed_config.toml")) - } - #[cfg(not(unix))] - { - effective_home_dir().map(|home| { - let primary = home.join(".codewhale").join("managed_config.toml"); - if primary.exists() { - return primary; - } - home.join(".deepseek").join("managed_config.toml") - }) - } -} - -fn default_requirements_path() -> Option { - #[cfg(unix)] - { - Some(PathBuf::from("/etc/deepseek/requirements.toml")) - } - #[cfg(not(unix))] - { - effective_home_dir().map(|home| { - let primary = home.join(".codewhale").join("requirements.toml"); - if primary.exists() { - return primary; - } - home.join(".deepseek").join("requirements.toml") - }) - } -} - -pub(crate) fn expand_path(path: &str) -> PathBuf { - if let Some(stripped) = path.strip_prefix('~') - && (stripped.is_empty() || stripped.starts_with('/') || stripped.starts_with('\\')) - && let Some(mut home) = effective_home_dir() - { - let suffix = stripped.trim_start_matches(['/', '\\']); - if !suffix.is_empty() { - home.push(suffix); - } - return home; - } - - let expanded = shellexpand::tilde(path); - PathBuf::from(expanded.as_ref()) -} - -fn default_skills_dir() -> Option { - effective_home_dir().map(|home| home.join(".codewhale").join("skills")) -} - -fn default_mcp_config_path() -> Option { - effective_home_dir().map(|home| { - let primary = home.join(".codewhale").join("mcp.json"); - if primary.exists() { - return primary; - } - let legacy = home.join(".deepseek").join("mcp.json"); - if legacy.exists() { - return legacy; - } - primary - }) -} - -fn default_notes_path() -> Option { - effective_home_dir().map(|home| { - let primary = home.join(".codewhale").join("notes.txt"); - if primary.exists() { - return primary; - } - let legacy = home.join(".deepseek").join("notes.txt"); - if legacy.exists() { - return legacy; - } - primary - }) -} - -fn default_memory_path() -> Option { - effective_home_dir().map(|home| { - let primary = home.join(".codewhale").join("memory.md"); - if primary.exists() { - return primary; - } - let legacy = home.join(".deepseek").join("memory.md"); - if legacy.exists() { - return legacy; - } - primary - }) -} - // === Environment Overrides === /// Read the `DEEPSEEK_BASE_URL` / `CODEWHALE_BASE_URL` env var that the CLI @@ -4605,6 +4263,11 @@ fn apply_env_overrides(config: &mut Config) { .minimax .base_url = Some(value); } + // Custom resolves to the named `[providers.]` table; route the + // override through the name-keyed mutable accessor (#1519). + ApiProvider::Custom => { + config.provider_config_for_mut(ApiProvider::Custom).base_url = Some(value); + } } } if matches!(config.api_provider(), ApiProvider::NvidiaNim) @@ -4786,6 +4449,14 @@ fn apply_env_overrides(config: &mut Config) { config.http_headers = Some(root_headers); let provider = config.api_provider(); + // Capture the custom entry key (the selected provider name) before the + // mutable borrow of `providers` below (#1519). + let custom_key = (provider == ApiProvider::Custom).then(|| { + config + .provider + .clone() + .unwrap_or_else(|| "__custom__".to_string()) + }); let providers = config .providers .get_or_insert_with(ProvidersConfig::default); @@ -4818,6 +4489,10 @@ fn apply_env_overrides(config: &mut Config) { ApiProvider::Zai => &mut providers.zai, ApiProvider::Stepfun => &mut providers.stepfun, ApiProvider::Minimax => &mut providers.minimax, + ApiProvider::Custom => providers + .custom + .entry(custom_key.expect("custom key captured for custom provider")) + .or_default(), }; let mut provider_headers = entry.http_headers.clone().unwrap_or_default(); provider_headers.extend(headers); @@ -4983,6 +4658,13 @@ fn apply_env_overrides(config: &mut Config) { // (issue #1714). Mirror the OPENAI_MODEL branch above for every // non-DeepSeek provider. let provider = config.api_provider(); + // Capture the custom entry key before the mutable borrow below (#1519). + let custom_key = (provider == ApiProvider::Custom).then(|| { + config + .provider + .clone() + .unwrap_or_else(|| "__custom__".to_string()) + }); if matches!( provider, ApiProvider::Deepseek | ApiProvider::DeepseekCN | ApiProvider::DeepseekAnthropic @@ -4998,6 +4680,10 @@ fn apply_env_overrides(config: &mut Config) { | ApiProvider::DeepseekAnthropic => unreachable!( "DeepSeek providers are handled in the if branch above (issue #1714)" ), + ApiProvider::Custom => providers + .custom + .entry(custom_key.expect("custom key captured for custom provider")) + .or_default(), ApiProvider::NvidiaNim => &mut providers.nvidia_nim, ApiProvider::Openai => &mut providers.openai, ApiProvider::Atlascloud => &mut providers.atlascloud, @@ -5223,6 +4909,9 @@ pub(crate) fn provider_passes_model_through(provider: ApiProvider) -> bool { | ApiProvider::Qianfan | ApiProvider::Ollama | ApiProvider::Huggingface + // Custom OpenAI-compatible endpoints preserve user-supplied model + // ids verbatim (#1519); never normalize/rewrite them. + | ApiProvider::Custom ) } @@ -5719,7 +5408,26 @@ fn merge_provider_config(base: ProviderConfig, override_cfg: ProviderConfig) -> .reasoning_stream_style .or(base.reasoning_stream_style), auth: override_cfg.auth.or(base.auth), + kind: override_cfg.kind.or(base.kind), + api_key_env: override_cfg.api_key_env.or(base.api_key_env), + } +} + +/// Merge the per-name custom provider maps (#1519): the union of both key sets, +/// with each shared key deep-merged via [`merge_provider_config`] (override +/// wins field-by-field). Keys present in only one map are carried through as-is. +fn merge_custom_providers( + mut base: HashMap, + override_cfg: HashMap, +) -> HashMap { + for (name, entry) in override_cfg { + let merged = match base.remove(&name) { + Some(base_entry) => merge_provider_config(base_entry, entry), + None => entry, + }; + base.insert(name, merged); } + base } fn merge_providers( @@ -5762,6 +5470,7 @@ fn merge_providers( zai: merge_provider_config(base.zai, override_cfg.zai), stepfun: merge_provider_config(base.stepfun, override_cfg.stepfun), minimax: merge_provider_config(base.minimax, override_cfg.minimax), + custom: merge_custom_providers(base.custom, override_cfg.custom), }), } } diff --git a/crates/tui/src/config/models.rs b/crates/tui/src/config/models.rs new file mode 100644 index 0000000000..3dea85d580 --- /dev/null +++ b/crates/tui/src/config/models.rs @@ -0,0 +1,160 @@ +//! Static provider model-name and base-URL constants. +//! +//! These are pure data tables (default model identifiers, base URLs, and +//! curated model lists) extracted verbatim from `config.rs` to keep the +//! configuration monolith focused on loading/normalization logic. They are +//! re-exported from `crate::config` via `pub use models::*;`, so every existing +//! `crate::config::` path keeps resolving unchanged (#3311). + +pub const DEFAULT_TEXT_MODEL: &str = "deepseek-v4-pro"; +pub const DEFAULT_DEEPSEEK_BASE_URL: &str = "https://api.deepseek.com/beta"; +pub const DEFAULT_DEEPSEEK_ANTHROPIC_MODEL: &str = DEFAULT_TEXT_MODEL; +pub const DEFAULT_DEEPSEEK_ANTHROPIC_BASE_URL: &str = "https://api.deepseek.com/anthropic"; +pub const DEFAULT_NVIDIA_NIM_MODEL: &str = "deepseek-ai/deepseek-v4-pro"; +pub const DEFAULT_NVIDIA_NIM_FLASH_MODEL: &str = "deepseek-ai/deepseek-v4-flash"; +pub const DEFAULT_NVIDIA_NIM_BASE_URL: &str = "https://integrate.api.nvidia.com/v1"; +pub const DEFAULT_OPENAI_MODEL: &str = "deepseek-v4-pro"; +pub const DEFAULT_OPENAI_BASE_URL: &str = "https://api.openai.com/v1"; +pub const DEFAULT_ATLASCLOUD_MODEL: &str = "deepseek-ai/deepseek-v4-flash"; +pub const DEFAULT_ATLASCLOUD_BASE_URL: &str = "https://api.atlascloud.ai/v1"; +pub const DEFAULT_WANJIE_ARK_MODEL: &str = "deepseek-reasoner"; +pub const DEFAULT_VOLCENGINE_MODEL: &str = "DeepSeek-V4-Pro"; +pub const DEFAULT_VOLCENGINE_FLASH_MODEL: &str = "DeepSeek-V4-Flash"; +pub const DEFAULT_VOLCENGINE_BASE_URL: &str = "https://ark.cn-beijing.volces.com/api/coding/v3"; +pub const DEFAULT_WANJIE_ARK_BASE_URL: &str = "https://maas-openapi.wanjiedata.com/api/v1"; +pub const DEFAULT_OPENROUTER_MODEL: &str = "deepseek/deepseek-v4-pro"; +pub const DEFAULT_OPENROUTER_FLASH_MODEL: &str = "deepseek/deepseek-v4-flash"; +pub const OPENROUTER_ARCEE_TRINITY_LARGE_THINKING_MODEL: &str = "arcee-ai/trinity-large-thinking"; +pub const OPENROUTER_GEMMA_4_31B_MODEL: &str = "google/gemma-4-31b-it"; +pub const OPENROUTER_GEMMA_4_26B_A4B_MODEL: &str = "google/gemma-4-26b-a4b-it"; +pub const OPENROUTER_GLM_5_1_MODEL: &str = "z-ai/glm-5.1"; +pub const OPENROUTER_GLM_5_2_MODEL: &str = "z-ai/glm-5.2"; +pub const OPENROUTER_GLM_5_TURBO_MODEL: &str = "z-ai/glm-5-turbo"; +pub const OPENROUTER_KIMI_K2_7_CODE_MODEL: &str = "moonshotai/kimi-k2.7-code"; +pub const OPENROUTER_KIMI_K2_6_MODEL: &str = "moonshotai/kimi-k2.6"; +pub const OPENROUTER_MINIMAX_M3_MODEL: &str = "minimax/minimax-m3"; +pub const OPENROUTER_NEMOTRON_3_NANO_OMNI_MODEL: &str = + "nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free"; +pub const OPENROUTER_QWEN_3_6_FLASH_MODEL: &str = "qwen/qwen3.6-flash"; +pub const OPENROUTER_QWEN_3_6_35B_A3B_MODEL: &str = "qwen/qwen3.6-35b-a3b"; +pub const OPENROUTER_QWEN_3_6_MAX_PREVIEW_MODEL: &str = "qwen/qwen3.6-max-preview"; +pub const OPENROUTER_QWEN_3_6_27B_MODEL: &str = "qwen/qwen3.6-27b"; +pub const OPENROUTER_QWEN_3_6_PLUS_MODEL: &str = "qwen/qwen3.6-plus"; +pub const OPENROUTER_QWEN_3_7_MAX_MODEL: &str = "qwen/qwen3.7-max"; +pub const OPENROUTER_MINIMAX_2_7_MODEL: &str = "minimax/minimax-2.7"; +pub const OPENROUTER_NEMOTRON_3_ULTRA_MODEL: &str = "nvidia/nemotron-3-ultra-550b-a55b"; +pub const OPENROUTER_TENCENT_HY3_PREVIEW_MODEL: &str = "tencent/hy3-preview"; +pub const OPENROUTER_XIAOMI_MIMO_V2_5_PRO_MODEL: &str = "xiaomi/mimo-v2.5-pro"; +pub const OPENROUTER_XIAOMI_MIMO_V2_5_MODEL: &str = "xiaomi/mimo-v2.5"; +pub const RECENT_OPENROUTER_LARGE_MODELS: &[&str] = &[ + OPENROUTER_ARCEE_TRINITY_LARGE_THINKING_MODEL, + OPENROUTER_MINIMAX_M3_MODEL, + OPENROUTER_XIAOMI_MIMO_V2_5_PRO_MODEL, + OPENROUTER_XIAOMI_MIMO_V2_5_MODEL, + OPENROUTER_QWEN_3_6_FLASH_MODEL, + OPENROUTER_QWEN_3_6_35B_A3B_MODEL, + OPENROUTER_QWEN_3_6_MAX_PREVIEW_MODEL, + OPENROUTER_QWEN_3_6_27B_MODEL, + OPENROUTER_QWEN_3_6_PLUS_MODEL, + OPENROUTER_QWEN_3_7_MAX_MODEL, + OPENROUTER_MINIMAX_2_7_MODEL, + OPENROUTER_NEMOTRON_3_ULTRA_MODEL, + OPENROUTER_KIMI_K2_7_CODE_MODEL, + OPENROUTER_KIMI_K2_6_MODEL, + OPENROUTER_GLM_5_1_MODEL, + OPENROUTER_GLM_5_2_MODEL, + OPENROUTER_TENCENT_HY3_PREVIEW_MODEL, + OPENROUTER_GEMMA_4_31B_MODEL, + OPENROUTER_GEMMA_4_26B_A4B_MODEL, + OPENROUTER_NEMOTRON_3_NANO_OMNI_MODEL, +]; +pub const DEFAULT_OPENROUTER_BASE_URL: &str = "https://openrouter.ai/api/v1"; +pub const DEFAULT_XIAOMI_MIMO_MODEL: &str = "mimo-v2.5-pro"; +pub const XIAOMI_MIMO_V2_5_PRO_ULTRASPEED_MODEL: &str = "mimo-v2.5-pro-ultraspeed"; +pub const XIAOMI_MIMO_PAY_AS_YOU_GO_BASE_URL: &str = "https://api.xiaomimimo.com/v1"; +pub const DEFAULT_XIAOMI_MIMO_BASE_URL: &str = "https://token-plan-sgp.xiaomimimo.com/v1"; +pub const XIAOMI_MIMO_TOKEN_PLAN_CN_BASE_URL: &str = "https://token-plan-cn.xiaomimimo.com/v1"; +pub const XIAOMI_MIMO_TOKEN_PLAN_SGP_BASE_URL: &str = DEFAULT_XIAOMI_MIMO_BASE_URL; +pub const XIAOMI_MIMO_TOKEN_PLAN_AMS_BASE_URL: &str = "https://token-plan-ams.xiaomimimo.com/v1"; +pub const XIAOMI_MIMO_V2_5_OMNI_MODEL: &str = "mimo-v2.5"; +pub const XIAOMI_MIMO_ASR_MODEL: &str = "mimo-v2.5-asr"; +pub const XIAOMI_MIMO_TTS_MODEL: &str = "mimo-v2.5-tts"; +pub const XIAOMI_MIMO_TTS_VOICE_DESIGN_MODEL: &str = "mimo-v2.5-tts-voicedesign"; +pub const XIAOMI_MIMO_TTS_VOICE_CLONE_MODEL: &str = "mimo-v2.5-tts-voiceclone"; +pub const XIAOMI_MIMO_V2_TTS_MODEL: &str = "mimo-v2-tts"; +pub const DEFAULT_NOVITA_MODEL: &str = "deepseek/deepseek-v4-pro"; +pub const DEFAULT_NOVITA_FLASH_MODEL: &str = "deepseek/deepseek-v4-flash"; +pub const DEFAULT_NOVITA_BASE_URL: &str = "https://api.novita.ai/openai/v1"; +pub const DEFAULT_FIREWORKS_MODEL: &str = "accounts/fireworks/models/deepseek-v4-pro"; +pub const DEFAULT_FIREWORKS_BASE_URL: &str = "https://api.fireworks.ai/inference/v1"; +pub const DEFAULT_SILICONFLOW_MODEL: &str = "deepseek-ai/DeepSeek-V4-Pro"; +pub const DEFAULT_SILICONFLOW_FLASH_MODEL: &str = "deepseek-ai/DeepSeek-V4-Flash"; +pub const DEFAULT_SILICONFLOW_BASE_URL: &str = "https://api.siliconflow.com/v1"; +pub const DEFAULT_SILICONFLOW_CN_BASE_URL: &str = "https://api.siliconflow.cn/v1"; +pub const DEFAULT_ARCEE_MODEL: &str = "trinity-large-thinking"; +pub const ARCEE_TRINITY_LARGE_PREVIEW_MODEL: &str = "trinity-large-preview"; +pub const ARCEE_TRINITY_MINI_MODEL: &str = "trinity-mini"; +pub const DEFAULT_ARCEE_BASE_URL: &str = "https://api.arcee.ai/api/v1"; +pub const DEFAULT_MOONSHOT_MODEL: &str = "kimi-k2.7-code"; +pub const MOONSHOT_KIMI_K2_6_MODEL: &str = "kimi-k2.6"; +pub const DEFAULT_MOONSHOT_BASE_URL: &str = "https://api.moonshot.ai/v1"; +pub const DEFAULT_KIMI_CODE_MODEL: &str = "kimi-for-coding"; +pub const DEFAULT_KIMI_CODE_BASE_URL: &str = "https://api.kimi.com/coding/v1"; +pub const DEFAULT_SGLANG_MODEL: &str = "deepseek-ai/DeepSeek-V4-Pro"; +pub const DEFAULT_SGLANG_FLASH_MODEL: &str = "deepseek-ai/DeepSeek-V4-Flash"; +pub const DEFAULT_SGLANG_BASE_URL: &str = "http://localhost:30000/v1"; +pub const DEFAULT_VLLM_MODEL: &str = "deepseek-ai/DeepSeek-V4-Pro"; +pub const DEFAULT_VLLM_FLASH_MODEL: &str = "deepseek-ai/DeepSeek-V4-Flash"; +pub const DEFAULT_VLLM_BASE_URL: &str = "http://localhost:8000/v1"; +pub const DEFAULT_OLLAMA_MODEL: &str = "deepseek-coder:1.3b"; +pub const DEFAULT_OLLAMA_BASE_URL: &str = "http://localhost:11434/v1"; +pub const DEFAULT_HUGGINGFACE_MODEL: &str = "deepseek-ai/DeepSeek-V4-Pro"; +pub const DEFAULT_HUGGINGFACE_FLASH_MODEL: &str = "deepseek-ai/DeepSeek-V4-Flash"; +pub const DEFAULT_HUGGINGFACE_BASE_URL: &str = "https://router.huggingface.co/v1"; +pub const DEFAULT_DEEPINFRA_MODEL: &str = "deepseek-ai/DeepSeek-V4-Pro"; +pub const DEFAULT_DEEPINFRA_FLASH_MODEL: &str = "deepseek-ai/DeepSeek-V4-Flash"; +pub const DEFAULT_DEEPINFRA_BASE_URL: &str = "https://api.deepinfra.com/v1/openai"; +pub const DEFAULT_TOGETHER_MODEL: &str = "deepseek-ai/DeepSeek-V4-Pro"; +pub const DEFAULT_TOGETHER_FLASH_MODEL: &str = "deepseek-ai/DeepSeek-V4-Flash"; +pub const DEFAULT_TOGETHER_BASE_URL: &str = "https://api.together.xyz/v1"; +pub const DEFAULT_QIANFAN_MODEL: &str = "ernie-4.0-turbo-8k"; +pub const DEFAULT_QIANFAN_BASE_URL: &str = "https://api.baiduqianfan.ai/v1"; +pub const DEFAULT_OPENAI_CODEX_MODEL: &str = "gpt-5.5"; +pub const DEFAULT_OPENAI_CODEX_BASE_URL: &str = "https://chatgpt.com/backend-api"; +pub const OPENAI_CODEX_EFFECTIVE_CONTEXT_WINDOW_TOKENS: u32 = 400_000; +/// Legacy `deepseek-cn` provider alias. +/// +/// DeepSeek's official API host is the same worldwide. Keep this alias for +/// old configs, but route it through the normal beta-enabled DeepSeek default. +/// Legacy typo hostname `api.deepseeki.com` remains recognized in URL +/// heuristics for backward compatibility. +pub const DEFAULT_DEEPSEEKCN_BASE_URL: &str = DEFAULT_DEEPSEEK_BASE_URL; +pub const COMMON_DEEPSEEK_MODELS: &[&str] = &[ + "deepseek-v4-pro", + "deepseek-v4-flash", + "deepseek-ai/deepseek-v4-pro", + "deepseek-ai/deepseek-v4-flash", + "deepseek/deepseek-v4-pro", + "deepseek/deepseek-v4-flash", +]; +pub const OFFICIAL_DEEPSEEK_MODELS: &[&str] = &["deepseek-v4-pro", "deepseek-v4-flash"]; +pub const DEFAULT_ZAI_MODEL: &str = "GLM-5.2"; +pub const ZAI_GLM_5_1_MODEL: &str = "GLM-5.1"; +pub const ZAI_GLM_5_2_MODEL: &str = "GLM-5.2"; +pub const ZAI_GLM_5_TURBO_MODEL: &str = "GLM-5-Turbo"; +pub const DEFAULT_ZAI_BASE_URL: &str = "https://api.z.ai/api/coding/paas/v4"; +pub const DEFAULT_STEPFUN_MODEL: &str = "step-3.7-flash"; +pub const DEFAULT_STEPFUN_BASE_URL: &str = "https://api.stepfun.ai/v1"; +pub const DEFAULT_ANTHROPIC_MODEL: &str = "claude-sonnet-4-6"; +pub const ANTHROPIC_OPUS_MODEL: &str = "claude-opus-4-8"; +pub const ANTHROPIC_HAIKU_MODEL: &str = "claude-haiku-4-5"; +pub const DEFAULT_ANTHROPIC_BASE_URL: &str = "https://api.anthropic.com"; +pub const DEFAULT_MINIMAX_MODEL: &str = "MiniMax-M3"; +pub const MINIMAX_M2_7_MODEL: &str = "MiniMax-M2.7"; +pub const MINIMAX_M2_7_HIGHSPEED_MODEL: &str = "MiniMax-M2.7-highspeed"; +pub const MINIMAX_M2_5_MODEL: &str = "MiniMax-M2.5"; +pub const MINIMAX_M2_5_HIGHSPEED_MODEL: &str = "MiniMax-M2.5-highspeed"; +pub const MINIMAX_M2_1_MODEL: &str = "MiniMax-M2.1"; +pub const MINIMAX_M2_1_HIGHSPEED_MODEL: &str = "MiniMax-M2.1-highspeed"; +pub const MINIMAX_M2_MODEL: &str = "MiniMax-M2"; +pub const DEFAULT_MINIMAX_BASE_URL: &str = "https://api.minimax.io/v1"; diff --git a/crates/tui/src/config/paths.rs b/crates/tui/src/config/paths.rs new file mode 100644 index 0000000000..a30aa44e33 --- /dev/null +++ b/crates/tui/src/config/paths.rs @@ -0,0 +1,203 @@ +//! Filesystem path resolution helpers for config/cache/workspace locations. +//! +//! Pure path-building helpers extracted verbatim from `config.rs`. They depend +//! only on `std`, `dirs`, and `shellexpand` plus one another, so they form a +//! clean leaf. `config.rs` pulls them back in (`use paths::{...}`) for the +//! workspace-trust and config-loading logic that stays there, and re-exports +//! the two `pub(crate)` entry points (`effective_home_dir`, `expand_path`) so +//! external `crate::config::` callers resolve unchanged (#3311). +//! +//! Visibility note: helpers that were file-private `fn` in `config.rs` are +//! `pub(crate)` here purely so the parent module can name them; none are +//! re-exported publicly, so the crate's external surface is unchanged. + +use std::path::{Path, PathBuf}; + +pub(crate) fn default_config_path() -> Option { + env_config_path().or_else(home_config_path) +} + +pub(crate) fn codewhale_home_dir() -> Option { + std::env::var_os("CODEWHALE_HOME").and_then(|path| { + let path = PathBuf::from(path); + (!path.as_os_str().is_empty()).then_some(path) + }) +} + +pub(crate) fn effective_home_dir() -> Option { + if let Some(path) = std::env::var_os("HOME") { + let path = PathBuf::from(path); + if !path.as_os_str().is_empty() { + return Some(path); + } + } + + if let Some(path) = std::env::var_os("USERPROFILE") { + let path = PathBuf::from(path); + if !path.as_os_str().is_empty() { + return Some(path); + } + } + + #[cfg(windows)] + { + if let (Some(drive), Some(homepath)) = + (std::env::var_os("HOMEDRIVE"), std::env::var_os("HOMEPATH")) + { + let mut path = PathBuf::from(drive); + path.push(homepath); + if !path.as_os_str().is_empty() { + return Some(path); + } + } + } + + dirs::home_dir() +} + +pub(crate) fn home_config_path() -> Option { + if let Some(home) = codewhale_home_dir() { + return Some(home.join("config.toml")); + } + + effective_home_dir().map(|home| { + let primary = home.join(".codewhale").join("config.toml"); + if primary.exists() { + return primary; + } + let legacy = home.join(".deepseek").join("config.toml"); + if legacy.exists() { + return legacy; + } + primary + }) +} + +pub(crate) fn workspace_config_key(workspace: &Path) -> String { + canonicalize_or_keep(workspace) + .to_string_lossy() + .into_owned() +} + +pub(crate) fn canonicalize_or_keep(path: &Path) -> PathBuf { + path.canonicalize().unwrap_or_else(|_| path.to_path_buf()) +} + +pub(crate) fn env_config_path() -> Option { + if let Ok(path) = std::env::var("CODEWHALE_CONFIG_PATH") { + let trimmed = path.trim(); + if !trimmed.is_empty() { + return Some(expand_path(trimmed)); + } + } + if let Ok(path) = std::env::var("DEEPSEEK_CONFIG_PATH") { + let trimmed = path.trim(); + if !trimmed.is_empty() { + return Some(expand_path(trimmed)); + } + } + None +} + +pub(crate) fn expand_pathbuf(path: PathBuf) -> PathBuf { + if let Some(raw) = path.to_str() { + return expand_path(raw); + } + path +} + +pub(crate) fn default_managed_config_path() -> Option { + #[cfg(unix)] + { + Some(PathBuf::from("/etc/deepseek/managed_config.toml")) + } + #[cfg(not(unix))] + { + effective_home_dir().map(|home| { + let primary = home.join(".codewhale").join("managed_config.toml"); + if primary.exists() { + return primary; + } + home.join(".deepseek").join("managed_config.toml") + }) + } +} + +pub(crate) fn default_requirements_path() -> Option { + #[cfg(unix)] + { + Some(PathBuf::from("/etc/deepseek/requirements.toml")) + } + #[cfg(not(unix))] + { + effective_home_dir().map(|home| { + let primary = home.join(".codewhale").join("requirements.toml"); + if primary.exists() { + return primary; + } + home.join(".deepseek").join("requirements.toml") + }) + } +} + +pub(crate) fn expand_path(path: &str) -> PathBuf { + if let Some(stripped) = path.strip_prefix('~') + && (stripped.is_empty() || stripped.starts_with('/') || stripped.starts_with('\\')) + && let Some(mut home) = effective_home_dir() + { + let suffix = stripped.trim_start_matches(['/', '\\']); + if !suffix.is_empty() { + home.push(suffix); + } + return home; + } + + let expanded = shellexpand::tilde(path); + PathBuf::from(expanded.as_ref()) +} + +pub(crate) fn default_skills_dir() -> Option { + effective_home_dir().map(|home| home.join(".codewhale").join("skills")) +} + +pub(crate) fn default_mcp_config_path() -> Option { + effective_home_dir().map(|home| { + let primary = home.join(".codewhale").join("mcp.json"); + if primary.exists() { + return primary; + } + let legacy = home.join(".deepseek").join("mcp.json"); + if legacy.exists() { + return legacy; + } + primary + }) +} + +pub(crate) fn default_notes_path() -> Option { + effective_home_dir().map(|home| { + let primary = home.join(".codewhale").join("notes.txt"); + if primary.exists() { + return primary; + } + let legacy = home.join(".deepseek").join("notes.txt"); + if legacy.exists() { + return legacy; + } + primary + }) +} + +pub(crate) fn default_memory_path() -> Option { + effective_home_dir().map(|home| { + let primary = home.join(".codewhale").join("memory.md"); + if primary.exists() { + return primary; + } + let legacy = home.join(".deepseek").join("memory.md"); + if legacy.exists() { + return legacy; + } + primary + }) +} diff --git a/crates/tui/src/config/search.rs b/crates/tui/src/config/search.rs new file mode 100644 index 0000000000..26b14ab496 --- /dev/null +++ b/crates/tui/src/config/search.rs @@ -0,0 +1,135 @@ +//! Web-search provider configuration types. +//! +//! Self-contained `[search]` table types extracted verbatim from `config.rs`. +//! Re-exported from `crate::config` via `pub use search::*;` so existing +//! `crate::config::SearchProvider` (and sibling) paths resolve unchanged +//! (#3311). + +use serde::{Deserialize, Serialize}; + +/// Search provider enumeration — selects which backend `web_search` uses. +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Deserialize, Serialize)] +#[serde(rename_all = "snake_case")] +pub enum SearchProvider { + /// Bing HTML scraping. No API key needed. + Bing, + /// DuckDuckGo HTML scraping with Bing fallback. No API key needed. + #[default] + #[serde(alias = "duckduckgo")] + DuckDuckGo, + /// Tavily AI Search API (). Requires api_key. + Tavily, + /// Bocha AI Search API (). Requires api_key. + Bocha, + /// Metaso AI Search API (). Uses built-in default key + /// or `METASO_API_KEY` env var; configurable via `[search] api_key`. + #[serde(alias = "metaso")] + Metaso, + /// SearXNG JSON search API. Requires a trusted/self-hosted `base_url`. + #[serde(alias = "searx", alias = "searx-ng", alias = "searx_ng")] + Searxng, + /// Baidu AI Search API (). Requires api_key. + #[serde( + alias = "baidu-search", + alias = "baidu_ai_search", + alias = "baidu_search", + alias = "baidu-ai-search" + )] + Baidu, + /// Volcengine Ark web_search via Responses API. Requires api_key. + /// Free tier: 20K queries/month per API key. Falls back to + /// `VOLCENGINE_API_KEY` / `VOLCENGINE_ARK_API_KEY` / `ARK_API_KEY` + /// env vars when `[search] api_key` is not set. + #[serde( + alias = "volcengine", + alias = "ark", + alias = "volc", + alias = "volcengine-ark", + alias = "volcengine_ark", + alias = "volc-ark" + )] + Volcengine, + /// Sofya web search API (). Requires api_key + /// (`ay_live_...`). Returns full extracted page content rather than + /// snippets; falls back to the `SOFYA_API_KEY` env var when + /// `[search] api_key` is not set. + Sofya, +} + +impl SearchProvider { + #[must_use] + pub fn parse(value: &str) -> Option { + match value.trim().to_ascii_lowercase().as_str() { + "bing" => Some(Self::Bing), + "duckduckgo" | "duck-duck-go" | "duck_duck_go" | "ddg" => Some(Self::DuckDuckGo), + "tavily" => Some(Self::Tavily), + "bocha" => Some(Self::Bocha), + "metaso" => Some(Self::Metaso), + "searxng" | "searx" | "searx-ng" | "searx_ng" => Some(Self::Searxng), + "baidu" | "baidu-search" | "baidu_search" | "baidu-ai-search" | "baidu_ai_search" => { + Some(Self::Baidu) + } + "volcengine" | "ark" | "volc" | "volcengine-ark" => Some(Self::Volcengine), + "sofya" => Some(Self::Sofya), + _ => None, + } + } + + #[must_use] + pub fn as_str(self) -> &'static str { + match self { + Self::Bing => "bing", + Self::DuckDuckGo => "duckduckgo", + Self::Tavily => "tavily", + Self::Bocha => "bocha", + Self::Metaso => "metaso", + Self::Searxng => "searxng", + Self::Baidu => "baidu", + Self::Volcengine => "volcengine", + Self::Sofya => "sofya", + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SearchProviderSource { + Default, + Config, + EnvOverride, +} + +impl SearchProviderSource { + #[must_use] + pub fn as_str(self) -> &'static str { + match self { + Self::Default => "default", + Self::Config => "config", + Self::EnvOverride => "env override", + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct SearchProviderResolution { + pub provider: SearchProvider, + pub source: SearchProviderSource, +} + +/// Web search provider configuration (`[search]` table in config.toml). +#[derive(Debug, Clone, Deserialize, Default)] +pub struct SearchConfig { + /// Search provider: `bing` | `duckduckgo` | `tavily` | `bocha` | `metaso` | `searxng` | `baidu` | `volcengine`. Default: `duckduckgo`. + #[serde(default)] + pub provider: Option, + /// Optional search endpoint. With `duckduckgo`, this is a + /// DuckDuckGo-compatible HTML endpoint. With `searxng`, this is the trusted + /// SearXNG instance root or `/search` endpoint. + #[serde(default)] + pub base_url: Option, + /// API key for Tavily, Bocha, Metaso, Baidu, or Volcengine. Not required for Bing, DuckDuckGo, or SearXNG. + /// Metaso also falls back to `METASO_API_KEY` env var, then a built-in default. + /// Baidu also falls back to `BAIDU_SEARCH_API_KEY` env var. + /// Volcengine also falls back to `VOLCENGINE_API_KEY` / `VOLCENGINE_ARK_API_KEY` / `ARK_API_KEY` env vars. + #[serde(default)] + pub api_key: Option, +} diff --git a/crates/tui/src/config/subagent_limits.rs b/crates/tui/src/config/subagent_limits.rs new file mode 100644 index 0000000000..a4a98bf114 --- /dev/null +++ b/crates/tui/src/config/subagent_limits.rs @@ -0,0 +1,69 @@ +//! Sub-agent concurrency/timeout limits and their clamp resolvers. +//! +//! Pure numeric/string limit constants plus the two private clamp helpers that +//! operate solely on them. Extracted verbatim from `config.rs`; the constants +//! are re-exported via `pub use subagent_limits::*;` (preserving each item's +//! `pub`/`pub(crate)` visibility) and the resolvers are pulled back into +//! `config.rs` with a private `use`, so no new external surface is created +//! (#3311). + +pub const DEFAULT_MAX_SUBAGENTS: usize = 20; +pub const MAX_SUBAGENTS: usize = 20; +/// Upper bound for queued + running sub-agent admissions. This is deliberately +/// higher than the instantaneous concurrency cap so Workflow-style fanout can +/// opt into large bounded populations without unbounded queue growth. +pub const MAX_SUBAGENT_ADMISSION: usize = 200; +/// Default per-step DeepSeek API timeout for sub-agent requests, in seconds. +/// Matches the legacy hardcoded value so existing configs keep their old +/// behavior when `[subagents] api_timeout_secs` is unset (#1806, #1808). +pub const DEFAULT_SUBAGENT_API_TIMEOUT_SECS: u64 = 120; +/// Minimum accepted `[subagents] api_timeout_secs`. Anything lower (including +/// `0`, which would otherwise produce an immediate timeout footgun) clamps +/// up to this value before the runtime sees it. +pub const MIN_SUBAGENT_API_TIMEOUT_SECS: u64 = 1; +/// Maximum accepted `[subagents] api_timeout_secs` (30 minutes). The cap +/// keeps a misconfigured per-step timeout from masking real model/network +/// hangs forever. +pub const MAX_SUBAGENT_API_TIMEOUT_SECS: u64 = 1800; +/// Default wall-clock interval without manager-visible sub-agent progress +/// before a running child can be auto-cancelled to release its slot (#2614). +pub const DEFAULT_SUBAGENT_HEARTBEAT_TIMEOUT_SECS: u64 = 300; +/// Minimum accepted `[subagents] heartbeat_timeout_secs`. +pub const MIN_SUBAGENT_HEARTBEAT_TIMEOUT_SECS: u64 = 30; +/// Maximum accepted `[subagents] heartbeat_timeout_secs` (1 hour). +pub const MAX_SUBAGENT_HEARTBEAT_TIMEOUT_SECS: u64 = 3600; +/// Default per-SSE-chunk idle timeout, in seconds. +pub const DEFAULT_STREAM_CHUNK_TIMEOUT_SECS: u64 = 300; +/// Minimum accepted stream chunk timeout. +pub const MIN_STREAM_CHUNK_TIMEOUT_SECS: u64 = 1; +/// Maximum accepted stream chunk timeout. +pub const MAX_STREAM_CHUNK_TIMEOUT_SECS: u64 = 3600; +pub(crate) const STREAM_CHUNK_TIMEOUT_ENV: &str = "DEEPSEEK_STREAM_IDLE_TIMEOUT_SECS"; + +pub(crate) fn resolve_subagent_api_timeout_secs(raw: Option) -> u64 { + let raw = raw.unwrap_or(DEFAULT_SUBAGENT_API_TIMEOUT_SECS); + if raw == 0 { + return DEFAULT_SUBAGENT_API_TIMEOUT_SECS; + } + raw.clamp(MIN_SUBAGENT_API_TIMEOUT_SECS, MAX_SUBAGENT_API_TIMEOUT_SECS) +} + +pub(crate) fn resolve_subagent_heartbeat_timeout_secs( + raw: Option, + api_timeout_secs: u64, +) -> u64 { + let raw = raw.unwrap_or(DEFAULT_SUBAGENT_HEARTBEAT_TIMEOUT_SECS); + let configured = if raw == 0 { + DEFAULT_SUBAGENT_HEARTBEAT_TIMEOUT_SECS + } else { + raw.clamp( + MIN_SUBAGENT_HEARTBEAT_TIMEOUT_SECS, + MAX_SUBAGENT_HEARTBEAT_TIMEOUT_SECS, + ) + }; + let min_for_api = api_timeout_secs.saturating_add(30).clamp( + MIN_SUBAGENT_HEARTBEAT_TIMEOUT_SECS, + MAX_SUBAGENT_HEARTBEAT_TIMEOUT_SECS, + ); + configured.max(min_for_api) +} diff --git a/crates/tui/src/config/tests.rs b/crates/tui/src/config/tests.rs index 5d35be19f4..8e935b8fe7 100644 --- a/crates/tui/src/config/tests.rs +++ b/crates/tui/src/config/tests.rs @@ -6840,3 +6840,128 @@ fn huggingface_short_env_fallbacks_configure_route() -> Result<()> { assert_eq!(config.default_model(), "org/short-model"); Ok(()) } + +// === #1519 custom OpenAI-compatible provider slice === + +#[test] +fn custom_provider_flatten_map_parses_alongside_named_provider() { + // A custom `[providers.my_thing]` table lands in the flatten map while a + // built-in `[providers.openai]` table still binds its named field. + let config: Config = toml::from_str( + r#" +provider = "my_thing" + +[providers.openai] +api_key = "openai-key" + +[providers.my_thing] +kind = "openai-compatible" +base_url = "https://api.example.com/v1" +model = "custom-model-v1" +api_key_env = "EXAMPLE_API_KEY" +"#, + ) + .expect("config with a custom provider table should parse"); + + let providers = config.providers.as_ref().expect("providers table present"); + // Built-in named field still works. + assert_eq!(providers.openai.api_key.as_deref(), Some("openai-key")); + // The custom entry is captured by name in the flatten map. + let custom = providers + .custom_provider_config("my_thing") + .expect("custom entry parsed into flatten map"); + assert_eq!(custom.kind.as_deref(), Some("openai-compatible")); + assert_eq!( + custom.base_url.as_deref(), + Some("https://api.example.com/v1") + ); + assert_eq!(custom.model.as_deref(), Some("custom-model-v1")); + assert_eq!(custom.api_key_env.as_deref(), Some("EXAMPLE_API_KEY")); + assert!(custom.is_openai_compatible_custom()); + // A built-in provider name never leaks into the custom map. + assert!(providers.custom_provider_config("openai").is_none()); +} + +#[test] +fn api_provider_returns_custom_for_custom_name_and_deepseek_for_junk() { + // Names a real custom table → Custom (the #1519 silent-misroute fix). + let mut custom = HashMap::new(); + custom.insert( + "my_thing".to_string(), + ProviderConfig { + kind: Some("openai-compatible".to_string()), + base_url: Some("https://api.example.com/v1".to_string()), + ..Default::default() + }, + ); + let config = Config { + provider: Some("my_thing".to_string()), + providers: Some(ProvidersConfig { + custom, + ..Default::default() + }), + ..Config::default() + }; + assert_eq!(config.api_provider(), ApiProvider::Custom); + + // Genuine junk that matches no built-in provider AND no custom table → + // falls back to DeepSeek, exactly as before this slice. + let junk = Config { + provider: Some("totally-not-a-provider".to_string()), + ..Config::default() + }; + assert_eq!(junk.api_provider(), ApiProvider::Deepseek); +} + +#[test] +fn custom_provider_kind_only_accepts_openai_compatible() { + let ok = ProviderConfig { + kind: Some("openai-compatible".to_string()), + ..Default::default() + }; + assert!(ok.is_openai_compatible_custom()); + + // Underscore spelling and case are tolerated. + let underscore = ProviderConfig { + kind: Some("OpenAI_Compatible".to_string()), + ..Default::default() + }; + assert!(underscore.is_openai_compatible_custom()); + + // Any other declared wire format is rejected (callers error on these). + let other = ProviderConfig { + kind: Some("anthropic-messages".to_string()), + ..Default::default() + }; + assert!(!other.is_openai_compatible_custom()); + + // Built-in providers leave `kind` unset. + assert!(!ProviderConfig::default().is_openai_compatible_custom()); +} + +#[test] +fn custom_provider_base_url_and_model_resolve_from_named_table() { + let mut custom = HashMap::new(); + custom.insert( + "my_thing".to_string(), + ProviderConfig { + kind: Some("openai-compatible".to_string()), + base_url: Some("https://api.example.com/v1".to_string()), + model: Some("custom-model-v1".to_string()), + ..Default::default() + }, + ); + let config = Config { + provider: Some("my_thing".to_string()), + providers: Some(ProvidersConfig { + custom, + ..Default::default() + }), + ..Config::default() + }; + + // Resolution reads the named table, not a DeepSeek default. + assert_eq!(config.api_provider(), ApiProvider::Custom); + assert_eq!(config.deepseek_base_url(), "https://api.example.com/v1"); + assert_eq!(config.default_model(), "custom-model-v1"); +} diff --git a/crates/tui/src/config_persistence.rs b/crates/tui/src/config_persistence.rs index 0731e86166..165c46e177 100644 --- a/crates/tui/src/config_persistence.rs +++ b/crates/tui/src/config_persistence.rs @@ -315,6 +315,13 @@ fn provider_base_url_table_key(provider: ApiProvider) -> anyhow::Result<&'static ApiProvider::Zai => Ok("zai"), ApiProvider::Stepfun => Ok("stepfun"), ApiProvider::Minimax => Ok("minimax"), + // Custom providers live under a user-chosen `[providers.]` table, + // not a fixed key. Persisting base_url through this static-key path is + // out of scope for the #1519 constrained slice; users edit the named + // table directly. + ApiProvider::Custom => { + anyhow::bail!("custom providers store base_url in their named [providers.] table") + } } } diff --git a/crates/tui/src/context_budget.rs b/crates/tui/src/context_budget.rs index 521a2b298d..9095ff4905 100644 --- a/crates/tui/src/context_budget.rs +++ b/crates/tui/src/context_budget.rs @@ -36,6 +36,11 @@ // indicator (those consumers are wired in a later pass). Allow dead_code so the // substrate can land warning-clean ahead of its callers, matching how other // not-yet-wired primitives in this crate are gated. +// +// Note: the context report now consumes `PressureLevel::from_usage_percent` and +// `label`, but the rest of the substrate (`ContextBudget` and its methods, +// `PressureLevel::suggests_compaction`) is still pending its engine/TUI +// consumers, so the blanket allow stays until those land. #![allow(dead_code)] /// Fraction of the window, expressed as a percentage, at or above which diff --git a/crates/tui/src/context_report.rs b/crates/tui/src/context_report.rs index 435d5bd418..347961dd52 100644 --- a/crates/tui/src/context_report.rs +++ b/crates/tui/src/context_report.rs @@ -10,10 +10,14 @@ use std::path::Path; use chrono::{SecondsFormat, Utc}; use serde::Serialize; +use codewhale_config::route::RouteLimits; + use crate::compaction::{estimate_input_tokens_conservative, estimate_text_tokens_conservative}; -use crate::config::Config; -use crate::models::{ContentBlock, Message, context_window_for_model}; +use crate::config::{ApiProvider, Config, provider_capability}; +use crate::context_budget::PressureLevel; +use crate::models::{ContentBlock, Message}; use crate::prompts::{COMPACT_TEMPLATE, Personality}; +use crate::route_budget::route_context_window_tokens; use crate::tui::app::App; #[derive(Debug, Clone, Serialize)] @@ -157,7 +161,9 @@ impl ReportBuilder { fn finish( self, + provider: ApiProvider, model: &str, + route_limits: Option, active_context_estimated_tokens: usize, note: impl Into, ) -> PromptSourceMap { @@ -166,7 +172,11 @@ impl ReportBuilder { .iter() .map(|entry| entry.estimated_tokens) .sum(); - let context_window_tokens = context_window_for_model(model); + // Overlay the resolved route's context window when known, falling back + // to the provider+model capability matrix (route_context_window_tokens + // always yields a concrete value, so this is never None at runtime). + let context_window_tokens = + Some(route_context_window_tokens(provider, model, route_limits)); let budget_used_percent = context_window_tokens.map(|window| { ((active_context_estimated_tokens as f64 / f64::from(window)) * 100.0).clamp(0.0, 100.0) }); @@ -188,7 +198,9 @@ pub fn build_context_report(app: &App) -> PromptSourceMap { let active_context_estimated_tokens = estimate_input_tokens_conservative(&app.api_messages, app.system_prompt.as_ref()); builder.finish( + app.api_provider, &app.model, + app.active_route_limits, active_context_estimated_tokens, "Diagnostic source map. Token counts are conservative estimates and may differ from provider billing.", ) @@ -229,10 +241,12 @@ pub fn build_headless_context_report(config: &Config, workspace: &Path) -> Promp None, ActivationReason::RuntimeState, &format!( - "provider: {}\nmodel: {}\ncontext_window: {:?}", + "provider: {}\nmodel: {}\ncontext_window: {}", config.api_provider().as_str(), model, - context_window_for_model(&model) + // Route limits aren't resolved in the headless doctor path, so report + // the provider+model capability window (route overlay is unavailable). + provider_capability(config.api_provider(), &model).context_window ), CountingConfidence::Approximate, None, @@ -244,7 +258,10 @@ pub fn build_headless_context_report(config: &Config, workspace: &Path) -> Promp .map(|entry| entry.estimated_tokens) .sum(); builder.finish( + config.api_provider(), &model, + // Route limits aren't resolved in the headless doctor path. + None, active_context_estimated_tokens, "Headless diagnostic source map. Conversation, tool results, and live TUI state are unavailable in doctor mode.", ) @@ -565,11 +582,11 @@ fn content_block_text(block: &ContentBlock) -> String { } fn pressure_label(percent: Option) -> &'static str { + // Delegate to the unified pressure thresholds so this diagnostic label can't + // drift from `context_budget::PressureLevel`. `None` (unknown window) keeps + // its own sentinel since a level requires a usage percentage. match percent { - Some(value) if value >= 90.0 => "critical", - Some(value) if value >= 70.0 => "high", - Some(value) if value >= 40.0 => "moderate", - Some(_) => "low", + Some(value) => PressureLevel::from_usage_percent(value).label(), None => "unknown", } } @@ -704,7 +721,7 @@ mod tests { Some(1), )); add_message_entries(&mut builder, &messages); - let report = builder.finish("deepseek-v4-pro", 123, "test"); + let report = builder.finish(ApiProvider::Deepseek, "deepseek-v4-pro", None, 123, "test"); let json = context_report_json(&report); assert!(json.contains("\"source_kind\": \"tool_result\"")); @@ -732,13 +749,64 @@ mod tests { CountingConfidence::High, Some(7), )); - let report = builder.finish("deepseek-v4-pro", 525, "test"); + let report = builder.finish(ApiProvider::Deepseek, "deepseek-v4-pro", None, 525, "test"); let summary = format_context_summary(&report); assert!(summary.contains("Context Summary")); assert!(summary.contains("Tool schemas (500)")); } + #[test] + fn finish_reflects_route_context_window_over_model_default() { + // deepseek-v4-pro defaults to a 1M window; a resolved route advertising a + // smaller window must win in the report's context_window_tokens. + let route_window = 128_000u64; + let model_default = crate::models::context_window_for_model("deepseek-v4-pro") + .expect("model has a default window"); + assert_ne!( + u64::from(model_default), + route_window, + "test fixture must differ from the model default to be meaningful" + ); + + let limits = RouteLimits { + context_tokens: Some(route_window), + input_tokens: None, + output_tokens: None, + }; + let builder = ReportBuilder::new(); + let report = builder.finish( + ApiProvider::Deepseek, + "deepseek-v4-pro", + Some(limits), + 10_000, + "test", + ); + + assert_eq!(report.context_window_tokens, Some(route_window as u32)); + // Budget percent is computed against the route window, not the default. + let expected = (10_000.0 / route_window as f64) * 100.0; + let actual = report.budget_used_percent.expect("window known"); + assert!( + (actual - expected).abs() < 1e-6, + "got {actual}, want {expected}" + ); + } + + #[test] + fn pressure_label_matches_unified_pressure_levels() { + // Boundaries mirror context_budget::PressureLevel. + assert_eq!(pressure_label(None), "unknown"); + assert_eq!(pressure_label(Some(0.0)), "low"); + assert_eq!(pressure_label(Some(39.9)), "low"); + assert_eq!(pressure_label(Some(40.0)), "moderate"); + assert_eq!(pressure_label(Some(74.9)), "moderate"); + assert_eq!(pressure_label(Some(75.0)), "high"); + assert_eq!(pressure_label(Some(89.9)), "high"); + assert_eq!(pressure_label(Some(90.0)), "critical"); + assert_eq!(pressure_label(Some(100.0)), "critical"); + } + #[test] fn tool_schema_entry_serializes_like_runtime_catalog() { let tool = Tool { diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index 983faa1f07..85fbd33cef 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -763,7 +763,7 @@ impl Engine { ) })?; let route_config = route.config; - match DeepSeekClient::new(&route_config) { + match DeepSeekClient::from_candidate(&route_config, &route.candidate) { Ok(client) => { self.api_provider = provider; self.api_config = route_config; @@ -3407,6 +3407,10 @@ pub(super) fn auto_review_run_origin_for_plan( } } +// The parameter list intentionally mirrors `AutoReviewContext::from_tool_call`, +// which this thin wrapper builds; the 8 call sites (1 prod + tests) read clearer +// passing the fields than constructing a context first. +#[allow(clippy::too_many_arguments)] pub(super) fn auto_review_plan_decision( policy: &crate::tui::auto_review::AutoReviewPolicy, tool_name: &str, diff --git a/crates/tui/src/core/engine/turn_loop.rs b/crates/tui/src/core/engine/turn_loop.rs index 038532c981..9864b5eb53 100644 --- a/crates/tui/src/core/engine/turn_loop.rs +++ b/crates/tui/src/core/engine/turn_loop.rs @@ -344,6 +344,7 @@ impl Engine { let effective_reasoning_effort = resolve_auto_effort( self.session.reasoning_effort.as_deref(), &self.session.messages, + self.api_provider, ); // Check prefix-cache stability before building the request. @@ -2809,7 +2810,11 @@ fn should_emit_thinking_only_status( /// When the configured effort is `"auto"`, inspects the last user message /// and calls [`crate::auto_reasoning::select`] to pick the actual tier. /// Non-`"auto"` values pass through unchanged. -fn resolve_auto_effort(reasoning_effort: Option<&str>, messages: &[Message]) -> Option { +fn resolve_auto_effort( + reasoning_effort: Option<&str>, + messages: &[Message], + provider: crate::config::ApiProvider, +) -> Option { match reasoning_effort { Some("auto") => { // Find the last user message in the conversation. @@ -2841,7 +2846,10 @@ fn resolve_auto_effort(reasoning_effort: Option<&str>, messages: &[Message]) -> // their own turn pass and can pass is_subagent=true when they // call this function directly. let tier = crate::auto_reasoning::select(false, &last_msg); - let resolved = tier.as_setting().to_string(); + let resolved = + crate::model_routing::normalize_auto_route_effort_for_provider(provider, tier) + .as_setting() + .to_string(); tracing::debug!( reasoning_effort = %resolved, is_subagent = false, @@ -3077,7 +3085,11 @@ mod tests { }]; assert_eq!( - resolve_auto_effort(Some("auto"), &messages), + resolve_auto_effort( + Some("auto"), + &messages, + crate::config::ApiProvider::Deepseek + ), Some("high".to_string()), "auto thinking should classify the user request, not stored metadata" ); diff --git a/crates/tui/src/fleet/alerts.rs b/crates/tui/src/fleet/alerts.rs index 9f2f7e47a5..663c5f1858 100644 --- a/crates/tui/src/fleet/alerts.rs +++ b/crates/tui/src/fleet/alerts.rs @@ -716,6 +716,7 @@ mod tests { max: Some(1.0), notes: Some("regex scorer could not be compiled".to_string()), }), + resolved_route: None, }; let alert = FleetAlertEvent::verifier_failed(&receipt).unwrap(); diff --git a/crates/tui/src/fleet/ledger.rs b/crates/tui/src/fleet/ledger.rs index 0dc530cbe8..bb651b5f8d 100644 --- a/crates/tui/src/fleet/ledger.rs +++ b/crates/tui/src/fleet/ledger.rs @@ -936,6 +936,7 @@ mod tests { failure_kind: None, artifacts: vec![], score: None, + resolved_route: None, }) .unwrap(); @@ -968,6 +969,7 @@ mod tests { failure_kind: None, artifacts: vec![], score: None, + resolved_route: None, }; ledger.record_receipt(receipt.clone()).unwrap(); let state = ledger.rebuild_state().unwrap(); diff --git a/crates/tui/src/fleet/manager.rs b/crates/tui/src/fleet/manager.rs index 3dfa6062f7..127c6f40a5 100644 --- a/crates/tui/src/fleet/manager.rs +++ b/crates/tui/src/fleet/manager.rs @@ -898,12 +898,17 @@ impl FleetManager { &task.entry.task_id, &task.worker_id, )?; + // Mint the resolved-route snapshot once (#3154) so every receipt path — + // verification and the simulated/transport fallback below — persists the + // same honest, secret-free route detail. + let resolved_route = self.resolve_task_route(&task.task_spec); let verification_input = FleetTaskVerificationInput { run_id: task.entry.run_id.clone(), task_id: task.entry.task_id.clone(), worker_id: task.worker_id.clone(), exit_code, artifacts, + resolved_route, }; if task.task_spec.scorer.is_some() { let verification = @@ -948,10 +953,24 @@ impl FleetManager { failure_kind, artifacts: verification_input.artifacts, score: None, + resolved_route: verification_input.resolved_route, })?; Ok(true) } + /// Resolve the route snapshot to persist on a task's receipt (#3154). + /// + /// Loads workspace agent profiles so role/loadout intent composes the same + /// way as the worker-spec path, then mints a secret-free route candidate via + /// the hermetic resolver bridge. Returns `None` (never a fabricated route) + /// when profiles or resolution are unavailable. + fn resolve_task_route(&self, task_spec: &FleetTaskSpec) -> Option { + let agent_profiles = super::profile::load_workspace_agent_profiles(&self.workspace) + .ok() + .unwrap_or_default(); + worker_runtime::resolve_fleet_route(task_spec, &agent_profiles) + } + fn task_artifacts_for_receipt( &self, run_id: &FleetRunId, @@ -1662,6 +1681,7 @@ mod tests { failure_kind: None, artifacts: Vec::new(), score: None, + resolved_route: None, }) .unwrap(); } @@ -2339,6 +2359,55 @@ esac ); assert_eq!(state.receipts.len(), 10); + // #3166 scope #10: every receipt persists a resolved-route snapshot + // (#3154) with non-empty provider/wire-model, a role, and the resolver + // source — and the serialized receipt leaks no credential material. + for (key, receipt) in &state.receipts { + let route = receipt + .resolved_route + .as_ref() + .unwrap_or_else(|| panic!("receipt {key} should carry a resolved route")); + assert!( + !route.provider_id.is_empty(), + "receipt {key} resolved-route provider_id must be non-empty" + ); + assert!( + !route.wire_model_id.is_empty(), + "receipt {key} resolved-route wire_model_id must be non-empty" + ); + assert!( + route.role.as_deref().is_some_and(|role| !role.is_empty()), + "receipt {key} resolved-route should record a role" + ); + assert_eq!( + route.source, "resolver", + "receipt {key} resolved-route source must be the resolver" + ); + + let receipt_json = serde_json::to_string(receipt).unwrap(); + let haystack = receipt_json.to_ascii_lowercase(); + for needle in [ + "api_key", + "apikey", + "api-key", + "authorization", + "bearer ", + "auth_token", + "auth-token", + "password", + "credential", + "sk-ant-", + "sk-proj-", + "sk-or-", + "secret", + ] { + assert!( + !haystack.contains(needle), + "receipt {key} JSON must not contain secret marker {needle:?}: {receipt_json}" + ); + } + } + let failed_receipt = &state.receipts[&format!("{}:verifier-4-fail", report.run_id.0)]; assert_eq!(failed_receipt.result, FleetTaskResult::Fail); assert_eq!( diff --git a/crates/tui/src/fleet/task_spec.rs b/crates/tui/src/fleet/task_spec.rs index 5814989b61..56c9b3a431 100644 --- a/crates/tui/src/fleet/task_spec.rs +++ b/crates/tui/src/fleet/task_spec.rs @@ -74,6 +74,8 @@ pub struct FleetTaskVerificationInput { pub worker_id: String, pub exit_code: Option, pub artifacts: Vec, + /// Resolved-route snapshot to persist on the receipt (#3154). + pub resolved_route: Option, } #[derive(Debug, Clone)] @@ -299,6 +301,7 @@ pub fn record_verification_receipt( failure_kind: verification.failure_kind, artifacts, score: Some(verification.score), + resolved_route: input.resolved_route.clone(), }; ledger.record_receipt(receipt.clone())?; Ok(receipt) @@ -796,6 +799,7 @@ mod tests { worker_id: "worker-1".to_string(), exit_code: Some(0), artifacts: vec![], + resolved_route: None, }; let pass = verify_task_result( @@ -902,6 +906,7 @@ mod tests { worker_id: "worker-1".to_string(), exit_code: Some(1), artifacts: vec![log], + resolved_route: None, }; let verification = verify_task_result( tmp.path(), diff --git a/crates/tui/src/fleet/worker_runtime.rs b/crates/tui/src/fleet/worker_runtime.rs index 96b8aa6e01..7cbc2c6993 100644 --- a/crates/tui/src/fleet/worker_runtime.rs +++ b/crates/tui/src/fleet/worker_runtime.rs @@ -15,11 +15,14 @@ use anyhow::{Result, bail}; use codewhale_protocol::fleet::{ - FleetHostSpec, FleetTaskSpec, FleetTaskWorkerProfile, FleetWorkerEventPayload, FleetWorkerSpec, + FleetHostSpec, FleetResolvedRoute, FleetTaskSpec, FleetTaskWorkerProfile, + FleetWorkerEventPayload, FleetWorkerSpec, }; use super::host::FleetHostKind; use super::profile::AgentProfile; +use crate::config::ApiProvider; +use crate::route_runtime::resolve_route_candidate; use crate::tools::subagent::{ AgentWorkerSpec, AgentWorkerStatus, AgentWorkerToolProfile, SubAgentType, }; @@ -172,6 +175,82 @@ pub fn fleet_task_to_worker_spec_with_profiles( }) } +/// Mint a [`FleetResolvedRoute`] snapshot for a fleet task (#3154). +/// +/// This calls the existing hermetic resolver bridge +/// ([`resolve_route_candidate`]) so the persisted route reflects the same +/// resolution semantics the runtime would use, then records only non-sensitive +/// shape (provider id/kind, model ids, protocol) combined with the already +/// computed effective role/loadout intent. `source` is `"resolver"`. +/// +/// Honesty rules: +/// - `canonical_model` stays `None` when the resolver could not pin one. +/// - The provider comes from the resolver default (the worker profile carries +/// no provider authority); a task-level `model` selector is forwarded as the +/// model selector. No reasoning/pricing fields are fabricated. +/// +/// Returns `None` (never a fabricated route) when resolution fails, so callers +/// degrade gracefully without inventing detail. +pub(crate) fn resolve_fleet_route( + task_spec: &FleetTaskSpec, + agent_profiles: &[AgentProfile], +) -> Option { + let agent_profile = resolve_task_agent_profile(task_spec, agent_profiles) + .ok() + .flatten(); + let worker_profile = task_spec.worker.as_ref(); + let role = effective_fleet_role(worker_profile, agent_profile); + let loadout = effective_fleet_loadout(worker_profile, agent_profile); + + // A task-level explicit model is the only model selector the spec carries + // with provider-resolution intent; otherwise let the resolver pick the + // provider default. Provider authority belongs to route resolution, so we + // do not infer a provider here. + let model_selector = worker_profile + .and_then(|worker| worker.model.as_deref()) + .map(str::trim) + .filter(|model| !model.is_empty() && *model != "auto"); + + // The worker profile carries no provider authority, so resolve within the + // default provider scope (mirrors `ProviderKind::default()`). The resolver + // is fully offline/hermetic and never reads secrets, env, or config. + let candidate = + resolve_route_candidate(ApiProvider::Deepseek, model_selector, None, None).ok()?; + + Some(FleetResolvedRoute { + provider_id: candidate.provider_id.as_str().to_string(), + provider_kind: candidate.provider_kind.as_str().to_string(), + canonical_model: candidate + .canonical_model + .as_ref() + .map(|model| model.as_str().to_string()), + wire_model_id: candidate.wire_model_id.as_str().to_string(), + protocol: route_protocol_label(candidate.protocol).to_string(), + role, + loadout: loadout_intent_label(&loadout), + source: "resolver".to_string(), + }) +} + +/// Plain-string label for a resolved wire protocol (no config type leaks). +fn route_protocol_label(protocol: codewhale_config::route::RequestProtocol) -> &'static str { + use codewhale_config::route::RequestProtocol; + match protocol { + RequestProtocol::ChatCompletions => "chat_completions", + RequestProtocol::Responses => "responses", + RequestProtocol::AnthropicMessages => "anthropic_messages", + } +} + +/// Collapse an `inherit` (no-op) loadout to `None` for the receipt. +fn loadout_intent_label(loadout: &codewhale_config::FleetLoadout) -> Option { + if *loadout == codewhale_config::FleetLoadout::Inherit { + None + } else { + Some(loadout.as_str().to_string()) + } +} + pub(crate) fn fleet_task_prompt(task_spec: &FleetTaskSpec) -> String { fleet_task_prompt_with_profile(task_spec, None) } @@ -664,6 +743,75 @@ mod tests { ); } + #[test] + fn resolve_fleet_route_mints_secret_free_snapshot_from_resolver() { + let task = fleet_task( + "route-1", + Some(worker_profile( + None, + Some("builder"), + Some("fast"), + None, + None, + vec!["read_file"], + )), + ); + let route = resolve_fleet_route(&task, &[]).expect("default route should resolve offline"); + + // Honest, non-empty route shape from the resolver. + assert!(!route.provider_id.is_empty()); + assert!(!route.provider_kind.is_empty()); + assert!(!route.wire_model_id.is_empty()); + assert_eq!(route.protocol, "chat_completions"); + assert_eq!(route.role.as_deref(), Some("builder")); + assert_eq!(route.loadout.as_deref(), Some("fast")); + assert_eq!(route.source, "resolver"); + + // No-secrets: the serialized snapshot carries no credential markers. + let json = serde_json::to_string(&route).unwrap(); + let haystack = json.to_ascii_lowercase(); + for needle in [ + "api_key", + "apikey", + "api-key", + "authorization", + "bearer ", + "auth_token", + "auth-token", + "password", + "credential", + "sk-ant-", + "sk-proj-", + "sk-or-", + "secret", + ] { + assert!( + !haystack.contains(needle), + "resolved-route JSON must not contain secret marker {needle:?}: {json}" + ); + } + } + + #[test] + fn resolve_fleet_route_omits_inherit_loadout() { + // No loadout/model_class intent → `inherit` collapses to None, never an + // "inherit" string on the receipt. + let task = fleet_task( + "route-2", + Some(worker_profile( + None, + Some("scout"), + None, + None, + None, + vec!["read_file"], + )), + ); + let route = resolve_fleet_route(&task, &[]).expect("route should resolve"); + assert_eq!(route.role.as_deref(), Some("scout")); + assert!(route.loadout.is_none()); + } + #[test] fn fleet_tool_profile_empty_uses_inherited() { let profile = FleetTaskWorkerProfile { diff --git a/crates/tui/src/model_routing.rs b/crates/tui/src/model_routing.rs index a2bc227037..fdcb320d43 100644 --- a/crates/tui/src/model_routing.rs +++ b/crates/tui/src/model_routing.rs @@ -522,7 +522,12 @@ pub(crate) fn resolve_explicit_route_with_inventory( return Some(AutoRouteSelection { provider: candidate.provider, model: candidate.model.clone(), - reasoning_effort: config.reasoning_effort().map(ReasoningEffort::from_setting), + reasoning_effort: config.reasoning_effort().map(|setting| { + normalize_auto_route_effort_for_provider( + candidate.provider, + ReasoningEffort::from_setting(setting), + ) + }), source: AutoRouteSource::Heuristic, }); } @@ -539,7 +544,12 @@ pub(crate) fn resolve_explicit_route_with_inventory( Some(AutoRouteSelection { provider: candidate.provider, model: candidate.model.clone(), - reasoning_effort: config.reasoning_effort().map(ReasoningEffort::from_setting), + reasoning_effort: config.reasoning_effort().map(|setting| { + normalize_auto_route_effort_for_provider( + candidate.provider, + ReasoningEffort::from_setting(setting), + ) + }), source: AutoRouteSource::Heuristic, }) } @@ -1044,6 +1054,35 @@ mod tests { assert_eq!(route.reasoning_effort, Some(ReasoningEffort::Off)); } + #[test] + fn explicit_route_to_nonactive_provider_uses_that_providers_effort() { + // Active provider is DeepSeek (whose effort floor is low/medium), but the + // explicit model `GLM-5.2` only routes to Z.ai. The resolved effort must + // be normalized for Z.ai — not left at DeepSeek's raw `low` setting. + let _env_lock = crate::test_support::lock_test_env(); + let _deepseek = crate::test_support::EnvVarGuard::set("DEEPSEEK_API_KEY", "ds-key"); + let _zai = crate::test_support::EnvVarGuard::set("ZAI_API_KEY", "zai-key"); + let config = Config { + provider: Some("deepseek".to_string()), + reasoning_effort: Some("low".to_string()), + ..Default::default() + }; + + let route = resolve_explicit_route_with_inventory(&config, "GLM-5.2") + .expect("explicit GLM route should resolve to its provider"); + + assert_eq!( + route.provider, + ApiProvider::Zai, + "GLM-5.2 must route to Z.ai, not the active DeepSeek provider" + ); + assert_eq!( + route.reasoning_effort, + Some(ReasoningEffort::High), + "low must be normalized up to high for the Z.ai route, not passed through" + ); + } + #[tokio::test] #[allow(clippy::await_holding_lock)] async fn inventory_auto_route_resolves_active_authenticated_provider() { diff --git a/crates/tui/src/route_runtime.rs b/crates/tui/src/route_runtime.rs index ba4a83fae3..6827b53041 100644 --- a/crates/tui/src/route_runtime.rs +++ b/crates/tui/src/route_runtime.rs @@ -60,7 +60,14 @@ fn prepared_route_config( model_selector: Option<&str>, ) -> Config { let mut route_config = config.clone(); - route_config.provider = Some(provider.as_str().to_string()); + // For built-in providers, stamp the canonical provider id. For the dynamic + // custom identity (#1519) the original `provider = ""` IS the lookup + // key into the `[providers.]` flatten map, so it must be preserved — + // overwriting it with the literal "custom" id would break base_url/model + // resolution and silently misroute. + if provider != ApiProvider::Custom { + route_config.provider = Some(provider.as_str().to_string()); + } if matches!(provider, ApiProvider::NvidiaNim) && route_config .base_url @@ -175,4 +182,78 @@ mod tests { None ); } + + fn custom_config(base_url: &str, model: &str) -> Config { + let mut custom = std::collections::HashMap::new(); + custom.insert( + "my_thing".to_string(), + ProviderConfig { + kind: Some("openai-compatible".to_string()), + base_url: Some(base_url.to_string()), + model: Some(model.to_string()), + api_key_env: Some("EXAMPLE_API_KEY".to_string()), + ..Default::default() + }, + ); + Config { + provider: Some("my_thing".to_string()), + providers: Some(ProvidersConfig { + custom, + ..Default::default() + }), + ..Default::default() + } + } + + #[test] + fn custom_provider_resolves_to_custom_endpoint_and_verbatim_model() { + use codewhale_config::route::RequestProtocol; + + let config = custom_config("https://api.example.com/v1", "vendor/custom-model-v1"); + let route = resolve_runtime_route(&config, ApiProvider::Custom, None) + .expect("custom provider should resolve"); + + // Endpoint + model come from the named table; the prefixed model id is + // preserved verbatim as the wire id (no provider-prefix sniffing). + assert_eq!( + route.candidate.endpoint.base_url, + "https://api.example.com/v1" + ); + assert_eq!( + route.candidate.wire_model_id.as_str(), + "vendor/custom-model-v1" + ); + assert_eq!(route.model, "vendor/custom-model-v1"); + assert_eq!(route.candidate.protocol, RequestProtocol::ChatCompletions); + // HTTPS endpoint: route is valid with no insecure-http advisory. + assert!(route.candidate.validation.ok); + assert!(route.candidate.validation.messages.is_empty()); + // The selected provider name is preserved (not overwritten with "custom"). + assert_eq!(route.config.provider.as_deref(), Some("my_thing")); + } + + #[test] + fn custom_provider_http_non_loopback_fires_insecure_advisory() { + let config = custom_config("http://gpu.internal.example:8000/v1", "custom-model-v1"); + let route = resolve_runtime_route(&config, ApiProvider::Custom, None) + .expect("custom http provider should resolve"); + + // Advisory only: the route still validates (ok == true) but warns that + // credentials would be sent in plaintext over a non-loopback http URL. + assert!(route.candidate.validation.ok); + assert!( + route + .candidate + .validation + .messages + .iter() + .any(|message| message.contains("insecure http")), + "expected insecure-http advisory, got {:?}", + route.candidate.validation.messages + ); + assert_eq!( + route.candidate.endpoint.base_url, + "http://gpu.internal.example:8000/v1" + ); + } } diff --git a/crates/tui/src/tools/js_execution.rs b/crates/tui/src/tools/js_execution.rs index 8e3c93a425..864b95f4cd 100644 --- a/crates/tui/src/tools/js_execution.rs +++ b/crates/tui/src/tools/js_execution.rs @@ -282,6 +282,9 @@ mod tests { ); } + // The env lock must stay held across the await so no other env-mutating test + // races the process env while the child node run reads it. + #[allow(clippy::await_holding_lock)] #[tokio::test] async fn execute_js_does_not_inherit_parent_secret_env() { if !node_present() { diff --git a/crates/tui/src/tools/pandoc.rs b/crates/tui/src/tools/pandoc.rs index 0dcd46a94f..92285d596d 100644 --- a/crates/tui/src/tools/pandoc.rs +++ b/crates/tui/src/tools/pandoc.rs @@ -214,6 +214,8 @@ mod tests { msg.contains("getXdgDirectory") || msg.contains("sHGetFolderPath") } + // Test-only skip diagnostic; the module-wide print_stderr deny targets prod code. + #[allow(clippy::print_stderr)] async fn execute_pandoc_or_skip(input: Value, ctx: &ToolContext) -> Option { match PandocConvertTool.execute(input, ctx).await { Ok(result) => Some(result), diff --git a/crates/tui/src/tui/app.rs b/crates/tui/src/tui/app.rs index 2eb31fc49a..37d66edea8 100644 --- a/crates/tui/src/tui/app.rs +++ b/crates/tui/src/tui/app.rs @@ -15,7 +15,8 @@ use crate::artifacts::ArtifactRecord; use crate::client::{CacheWarmupKey, PromptInspection}; use crate::compaction::CompactionConfig; use crate::config::{ - ApiProvider, Config, DEFAULT_TEXT_MODEL, SavedCredential, has_api_key, save_api_key, + ApiProvider, Config, DEFAULT_TEXT_MODEL, SavedCredential, has_api_key, has_api_key_for, + save_api_key, }; use crate::config_ui::ConfigUiMode; use crate::hooks::{HookContext, HookEvent, HookExecutor, HookResult}; @@ -1069,17 +1070,76 @@ pub enum InitialInput { Submit(String), } +/// Durable Agent-era permission baseline that Plan/YOLO restore to (#3386). +/// +/// Mode cycling used to be tangled with permission policy: each mode mutated +/// `allow_shell`/`trust_mode`/`approval_mode` directly and ad-hoc +/// `YoloRestoreState`/`PlanRestoreState` snapshots tried to put things back on +/// exit. That made it easy to leak YOLO's elevated authority into Agent. +/// +/// Instead we keep one canonical baseline here — the permission surface the +/// user has chosen for Agent mode — and derive every mode's effective policy +/// from it via [`base_policy_for_mode`]. `set_mode` refreshes this from the +/// live fields whenever the user leaves Agent, so toggling shell/trust/approval +/// in Agent (wherever that happens in the UI) is captured before any transient +/// Plan/YOLO policy overwrites the live mirrors. #[derive(Debug, Clone, Copy)] -struct YoloRestoreState { +struct ModeSessionPrefs { + agent_allow_shell: bool, + agent_trust_mode: bool, + agent_approval_mode: ApprovalMode, +} + +/// The permission policy a given [`AppMode`] resolves to (#3386). +/// +/// This is a pure projection of `(mode, prefs)` — see [`base_policy_for_mode`]. +/// The App keeps `allow_shell`/`trust_mode`/`approval_mode`/`yolo` as derived +/// mirrors of these values so the rest of the crate can keep reading the plain +/// booleans without a type migration. +#[derive(Debug, Clone, Copy)] +struct EffectiveModePolicy { + #[allow(dead_code)] + mode: AppMode, allow_shell: bool, trust_mode: bool, approval_mode: ApprovalMode, + /// Whether tool calls auto-approve (YOLO authority). Mirrors `self.yolo`. + auto_approve: bool, } -/// Saved approval mode to restore when leaving Plan mode (#3279). -#[derive(Debug, Clone, Copy)] -struct PlanRestoreState { - approval_mode: ApprovalMode, +/// Resolve a mode's effective permission policy from the durable Agent baseline. +/// +/// This is the single source of truth for the mode/permission table (#3386): +/// - `Plan` → read-only: no shell, no trust, `Suggest` approvals. +/// - `Agent` → the user's durable baseline (`prefs`). +/// - `Yolo` → full authority: shell + trust + `Auto` approvals. +/// +/// Pure and side-effect free so it can be unit-tested directly and reused by +/// any policy consumer. +fn base_policy_for_mode(mode: AppMode, prefs: &ModeSessionPrefs) -> EffectiveModePolicy { + match mode { + AppMode::Plan => EffectiveModePolicy { + mode, + allow_shell: false, + trust_mode: false, + approval_mode: ApprovalMode::Suggest, + auto_approve: false, + }, + AppMode::Agent => EffectiveModePolicy { + mode, + allow_shell: prefs.agent_allow_shell, + trust_mode: prefs.agent_trust_mode, + approval_mode: prefs.agent_approval_mode, + auto_approve: false, + }, + AppMode::Yolo => EffectiveModePolicy { + mode, + allow_shell: true, + trust_mode: true, + approval_mode: ApprovalMode::Auto, + auto_approve: true, + }, + } } // === Sub-state structs for App field organization (#377) === @@ -1531,6 +1591,15 @@ pub struct App { pub api_provider: ApiProvider, /// Primary provider plus configured fallback providers for this session. pub provider_chain: Option, + /// Per-provider auth/local readiness snapshot for the fallback chain (#2574). + /// + /// Captured at startup alongside `provider_chain` (where the live `Config` is + /// in scope). `advance_fallback` consults it to skip chain entries that + /// cannot serve a turn — hosted providers missing a key — while local + /// providers (Ollama/vLLM/SGLang) are always ready. Stored as `(provider, + /// ready)` pairs; lookups fall back to "ready" for providers not present so + /// an unknown entry is tried rather than silently skipped. + provider_readiness: Vec<(ApiProvider, bool)>, /// Human-readable description of the last provider fallback event. pub last_fallback_reason: Option, /// True when the active provider/base URL accepts arbitrary model IDs @@ -1732,8 +1801,10 @@ pub struct App { pub hooks: HookExecutor, #[allow(dead_code)] pub yolo: bool, - yolo_restore: Option, - plan_restore: Option, + /// Durable Agent-era permission baseline that Plan/YOLO derive from and + /// restore to (#3386). Refreshed from the live fields whenever the user + /// leaves Agent mode; see [`base_policy_for_mode`] and `set_mode`. + mode_prefs: ModeSessionPrefs, // Clipboard handler pub clipboard: ClipboardHandler, // Tool approval session allowlist @@ -2227,6 +2298,24 @@ impl App { .map(|kind| ProviderChain::new(kind, &config.fallback_providers)) .filter(|chain| chain.providers().len() > 1); + // Snapshot per-provider readiness for the fallback chain (#2574). Uses + // the same `has_api_key_for` helper the provider picker uses, so hosted + // providers require a key and self-hosted ones (Ollama/vLLM/SGLang) are + // reported ready without one. Empty when there is no fallback chain. + let provider_readiness = provider_chain + .as_ref() + .map(|chain| { + chain + .providers() + .iter() + .map(|kind| { + let provider = ApiProvider::from_kind(*kind); + (provider, has_api_key_for(config, provider)) + }) + .collect() + }) + .unwrap_or_default(); + // Check if the effective provider has an API key. This must happen // after settings.default_provider is applied; otherwise a saved // third-party provider can be pushed back into DeepSeek onboarding. @@ -2338,18 +2427,27 @@ impl App { needs_workspace_trust, ); - let yolo_restore = if initial_mode == AppMode::Yolo { - Some(YoloRestoreState { - allow_shell: config.allow_shell(), - trust_mode: false, - approval_mode: config - .approval_policy - .as_deref() - .and_then(ApprovalMode::from_config_value) - .unwrap_or_default(), - }) - } else { - None + // Durable Agent-era permission baseline (#3386). Plan/YOLO derive from + // and restore to this. When the user starts in YOLO the live shell flag + // is force-enabled below, so the baseline shell value is taken from + // config (the pre-YOLO surface) rather than the live mirror; otherwise + // it mirrors the resolved `allow_shell` option. Trust is never part of + // the Agent baseline (it is YOLO-only authority). Approval mirrors the + // configured policy. This preserves the exact values the previous + // `YoloRestoreState`/`PlanRestoreState` snapshots restored. + let configured_approval_mode = config + .approval_policy + .as_deref() + .and_then(ApprovalMode::from_config_value) + .unwrap_or_default(); + let mode_prefs = ModeSessionPrefs { + agent_allow_shell: if initial_mode == AppMode::Yolo { + config.allow_shell() + } else { + allow_shell + }, + agent_trust_mode: false, + agent_approval_mode: configured_approval_mode, }; let allow_shell = allow_shell || initial_mode == AppMode::Yolo; let shell_manager = new_shared_shell_manager(workspace.clone()); @@ -2446,6 +2544,7 @@ impl App { last_effective_model: None, api_provider: provider, provider_chain, + provider_readiness, last_fallback_reason: None, model_ids_passthrough, active_route_limits: None, @@ -2533,8 +2632,7 @@ impl App { api_key_cursor: 0, hooks, yolo: initial_mode == AppMode::Yolo, - yolo_restore, - plan_restore: None, + mode_prefs, clipboard: ClipboardHandler::new(), approval_session_approved: HashSet::new(), approval_session_denied: HashSet::new(), @@ -2740,51 +2838,36 @@ impl App { return false; } - let entering_yolo = mode == AppMode::Yolo && previous_mode != AppMode::Yolo; - let leaving_yolo = previous_mode == AppMode::Yolo && mode != AppMode::Yolo; - let entering_plan = mode == AppMode::Plan && previous_mode != AppMode::Plan; - let leaving_plan = previous_mode == AppMode::Plan && mode != AppMode::Plan; self.mode = mode; self.status_message = Some(format!("Switched to {} mode", mode.label())); - // Restore outgoing mode state before capturing incoming mode state. This - // keeps cross-mode hops such as Plan -> YOLO and YOLO -> Plan from - // saving transient policy values as the next mode's baseline. - if leaving_yolo && let Some(restore) = self.yolo_restore.take() { - self.allow_shell = restore.allow_shell; - self.trust_mode = restore.trust_mode; - self.approval_mode = restore.approval_mode; - } - - // Plan save/restore (#3279): Plan mode derives its write-blocking from - // the mode itself (turn_loop), but the TUI approval surface reads - // `app.approval_mode` without consulting `app.mode`. Save the Agent-era - // approval mode when entering Plan so it is restored when the user - // switches back to Agent. - if leaving_plan && let Some(restore) = self.plan_restore.take() { - self.approval_mode = restore.approval_mode; - } - - if entering_plan { - self.plan_restore = Some(PlanRestoreState { - approval_mode: self.approval_mode, - }); + // Mode cycling is untangled from permission policy (#3386). The user + // only edits the durable permission surface while in Agent mode, so + // refresh the baseline from the live mirrors whenever we leave Agent — + // before any transient Plan/YOLO policy overwrites them. This subsumes + // the old per-mode `YoloRestoreState`/`PlanRestoreState` snapshots: + // cross-mode hops (Plan -> YOLO, YOLO -> Plan) do not touch the baseline, + // so YOLO's elevated authority never bleeds into the restored Agent + // surface (#3279). + if previous_mode == AppMode::Agent { + self.mode_prefs = ModeSessionPrefs { + agent_allow_shell: self.allow_shell, + agent_trust_mode: self.trust_mode, + agent_approval_mode: self.approval_mode, + }; } - // YOLO save/restore: captures the full pre-YOLO permission surface so - // exiting YOLO puts the user back exactly where they were. - if entering_yolo { - self.yolo_restore = Some(YoloRestoreState { - allow_shell: self.allow_shell, - trust_mode: self.trust_mode, - approval_mode: self.approval_mode, - }); - self.allow_shell = true; - self.trust_mode = true; - self.approval_mode = ApprovalMode::Auto; - } + // Derive the effective permission policy for the incoming mode from the + // single source of truth and apply it to the live mirrors in one block. + // Plan's write-blocking still comes from `self.mode` in turn_loop; this + // also keeps the TUI approval surface (which reads `self.approval_mode` + // without consulting `self.mode`) consistent with the active mode. + let policy = base_policy_for_mode(mode, &self.mode_prefs); + self.allow_shell = policy.allow_shell; + self.trust_mode = policy.trust_mode; + self.approval_mode = policy.approval_mode; + self.yolo = policy.auto_approve; - self.yolo = mode == AppMode::Yolo; if mode != AppMode::Plan { self.plan_prompt_pending = false; self.plan_tool_used_in_turn = false; @@ -5560,20 +5643,71 @@ impl App { .map_or(0, |chain| chain.providers().len()) } + /// Whether a fallback chain entry can serve a turn right now (#2574). + /// + /// Mirrors the provider picker's eligibility: hosted providers need a key + /// (`has_api_key_for`, captured into `provider_readiness` at startup) while + /// self-hosted providers (Ollama/vLLM/SGLang) are always ready. Providers + /// absent from the snapshot default to ready so an unknown entry is tried + /// rather than silently skipped. + fn fallback_provider_is_ready(&self, provider: ApiProvider) -> bool { + self.provider_readiness + .iter() + .find_map(|(candidate, ready)| (*candidate == provider).then_some(*ready)) + .unwrap_or(true) + } + + /// Advance to the next *eligible* provider in the fallback chain (#2574). + /// + /// Walks the chain from the current position, skipping entries that are not + /// ready (hosted providers missing auth) and recording a clear note for each + /// skip. Local providers are always eligible. Returns the first ready + /// provider, or `None` (with an exhaustion reason) when every remaining entry + /// is unready or the end of the chain is reached. `ProviderChain::advance` + /// stays pure — the readiness filtering lives here at the App level. + /// + /// Note: auth-rejection (401) failures never reach this path; the caller + /// excludes them from fallback so a bad key does not silently rotate + /// providers (see `apply_engine_error_to_app`). pub fn advance_fallback(&mut self, reason: impl Into) -> Option { let reason = reason.into(); - let chain = self.provider_chain.as_mut()?; - let Some(next_kind) = chain.advance() else { + self.provider_chain.as_ref()?; + + let mut skip_notes: Vec = Vec::new(); + let mut chosen: Option = None; + while let Some(next_kind) = self + .provider_chain + .as_mut() + .and_then(ProviderChain::advance) + { + let candidate = ApiProvider::from_kind(next_kind); + if self.fallback_provider_is_ready(candidate) { + chosen = Some(candidate); + break; + } + skip_notes.push(format!("skipped {}: needs auth", candidate.as_str())); + } + + let skipped = if skip_notes.is_empty() { + String::new() + } else { + format!(" ({})", skip_notes.join("; ")) + }; + + let Some(next_provider) = chosen else { + let total = self + .provider_chain + .as_ref() + .map_or(0, |chain| chain.providers().len()); self.last_fallback_reason = Some(format!( - "Fallback chain exhausted after {} provider(s): {reason}", - chain.providers().len() + "Fallback chain exhausted after {total} provider(s): {reason}{skipped}" )); return None; }; - let next_provider = ApiProvider::from_kind(next_kind); + self.api_provider = next_provider; self.last_fallback_reason = Some(format!( - "Fell back to {} after recoverable provider error: {reason}", + "Fell back to {} after recoverable provider error: {reason}{skipped}", next_provider.as_str() )); Some(next_provider) diff --git a/crates/tui/src/tui/app/tests.rs b/crates/tui/src/tui/app/tests.rs index b020e6fc7b..beacc95609 100644 --- a/crates/tui/src/tui/app/tests.rs +++ b/crates/tui/src/tui/app/tests.rs @@ -1633,6 +1633,144 @@ fn set_mode_plan_to_yolo_keeps_yolo_permissions_and_restores_agent_baseline() { assert_eq!(app.approval_mode, ApprovalMode::Never); } +#[test] +fn base_policy_for_mode_projects_the_mode_permission_table() { + // Pure projection of (mode, prefs) — the single source of truth for #3386. + let prefs = ModeSessionPrefs { + agent_allow_shell: true, + agent_trust_mode: true, + agent_approval_mode: ApprovalMode::Never, + }; + + // Plan: read-only, no shell, no trust, Suggest, no auto-approve — and it + // never inherits the (here elevated) Agent baseline. + let plan = base_policy_for_mode(AppMode::Plan, &prefs); + assert_eq!(plan.mode, AppMode::Plan); + assert!(!plan.allow_shell); + assert!(!plan.trust_mode); + assert_eq!(plan.approval_mode, ApprovalMode::Suggest); + assert!(!plan.auto_approve); + + // Agent: exactly the durable baseline. + let agent = base_policy_for_mode(AppMode::Agent, &prefs); + assert_eq!(agent.mode, AppMode::Agent); + assert!(agent.allow_shell); + assert!(agent.trust_mode); + assert_eq!(agent.approval_mode, ApprovalMode::Never); + assert!(!agent.auto_approve); + + // YOLO: full authority regardless of the baseline. + let yolo = base_policy_for_mode(AppMode::Yolo, &prefs); + assert_eq!(yolo.mode, AppMode::Yolo); + assert!(yolo.allow_shell); + assert!(yolo.trust_mode); + assert_eq!(yolo.approval_mode, ApprovalMode::Auto); + assert!(yolo.auto_approve); + + // A minimal Agent baseline projects through Agent unchanged. + let minimal = ModeSessionPrefs { + agent_allow_shell: false, + agent_trust_mode: false, + agent_approval_mode: ApprovalMode::Suggest, + }; + let agent_min = base_policy_for_mode(AppMode::Agent, &minimal); + assert!(!agent_min.allow_shell); + assert!(!agent_min.trust_mode); + assert_eq!(agent_min.approval_mode, ApprovalMode::Suggest); +} + +#[test] +fn set_mode_agent_to_yolo_to_agent_restores_baseline_without_yolo_leak() { + // Round-trip Agent -> YOLO -> Agent must not leave YOLO's elevated authority + // (shell/trust/Auto) bleeding into the restored Agent surface (#3386). + let mut options = test_options(false); + options.allow_shell = false; + options.start_in_agent_mode = true; + let mut app = App::new(options, &Config::default()); + // User's chosen Agent surface: shell on, trust off, Suggest approvals. + app.allow_shell = true; + app.trust_mode = false; + app.approval_mode = ApprovalMode::Suggest; + + app.set_mode(AppMode::Yolo); + assert!(app.allow_shell); + assert!(app.trust_mode); + assert_eq!(app.approval_mode, ApprovalMode::Auto); + assert!(app.yolo); + + app.set_mode(AppMode::Agent); + assert_eq!(app.mode, AppMode::Agent); + assert!(app.allow_shell, "shell baseline preserved"); + assert!( + !app.trust_mode, + "YOLO trust authority must not leak into Agent" + ); + assert_eq!( + app.approval_mode, + ApprovalMode::Suggest, + "YOLO Auto approvals must not leak into Agent" + ); + assert!(!app.yolo); +} + +#[test] +fn set_mode_plan_to_yolo_to_agent_does_not_bleed_yolo_into_agent() { + // Plan -> YOLO -> Agent: the Agent baseline captured before leaving Agent is + // what we land on, untouched by the transient Plan or YOLO policies (#3386). + let mut options = test_options(false); + options.allow_shell = false; + options.start_in_agent_mode = true; + let mut app = App::new(options, &Config::default()); + app.allow_shell = false; + app.trust_mode = false; + app.approval_mode = ApprovalMode::Never; + + app.set_mode(AppMode::Plan); + // Plan is read-only regardless of the baseline. + assert!(!app.allow_shell); + assert!(!app.trust_mode); + assert_eq!(app.approval_mode, ApprovalMode::Suggest); + + app.set_mode(AppMode::Yolo); + assert!(app.allow_shell); + assert!(app.trust_mode); + assert_eq!(app.approval_mode, ApprovalMode::Auto); + + app.set_mode(AppMode::Agent); + assert_eq!(app.mode, AppMode::Agent); + assert!(!app.allow_shell); + assert!(!app.trust_mode); + assert_eq!(app.approval_mode, ApprovalMode::Never); +} + +#[test] +fn set_mode_captures_agent_edits_as_the_durable_baseline() { + // Editing the permission surface in Agent updates the baseline that a later + // Plan -> Agent (or YOLO -> Agent) restores to (#3386). + let mut options = test_options(false); + options.allow_shell = false; + options.start_in_agent_mode = true; + let mut app = App::new(options, &Config::default()); + assert_eq!(app.mode, AppMode::Agent); + + // Initial baseline restores to no-shell / Suggest. + app.set_mode(AppMode::Plan); + app.set_mode(AppMode::Agent); + assert!(!app.allow_shell); + assert_eq!(app.approval_mode, ApprovalMode::Suggest); + + // User now turns shell on and tightens approvals while in Agent. + app.allow_shell = true; + app.approval_mode = ApprovalMode::Never; + + // A Plan hop and back must restore the *edited* baseline, not the original. + app.set_mode(AppMode::Plan); + assert!(!app.allow_shell, "Plan is read-only"); + app.set_mode(AppMode::Agent); + assert!(app.allow_shell, "edited shell baseline restored"); + assert_eq!(app.approval_mode, ApprovalMode::Never); +} + #[test] fn leaving_yolo_after_startup_restores_baseline_policies() { let config = Config { @@ -2851,3 +2989,138 @@ fn delete_selection_noop_when_no_selection() { assert_eq!(app.input, "hello"); assert_eq!(app.cursor_position, 3); } + +// === #2574: capability-aware fallback eligibility =============================== + +/// Build an `App` whose fallback chain is `[active, fallbacks...]` with each +/// provider's auth controlled via `config.providers` keys. Env-var keys for the +/// providers under test are cleared so readiness is driven solely by config. +fn app_with_fallback_chain( + active: ApiProvider, + fallbacks: &[codewhale_config::ProviderKind], + keyed: &[ApiProvider], +) -> App { + let mut providers = ProvidersConfig::default(); + for provider in keyed { + let entry = ProviderConfig { + api_key: Some(format!("test-key-{}", provider.as_str())), + ..Default::default() + }; + match provider { + ApiProvider::Openai => providers.openai = entry, + ApiProvider::Openrouter => providers.openrouter = entry, + ApiProvider::Together => providers.together = entry, + ApiProvider::Fireworks => providers.fireworks = entry, + other => panic!("unhandled keyed provider in test helper: {other:?}"), + } + } + + let config = Config { + provider: Some(active.as_str().to_string()), + fallback_providers: fallbacks.to_vec(), + providers: Some(providers), + ..Default::default() + }; + + let mut options = test_options(false); + options.start_in_agent_mode = true; + options.skip_onboarding = true; + App::new(options, &config) +} + +#[test] +fn advance_fallback_skips_unauthed_middle_provider_and_lands_on_next_ready() { + let _lock = lock_test_env(); + let _openai = EnvVarGuard::remove("OPENAI_API_KEY"); + let _openrouter = EnvVarGuard::remove("OPENROUTER_API_KEY"); + let _together = EnvVarGuard::remove("TOGETHER_API_KEY"); + + // Chain: Openai (active, keyed) -> Openrouter (no key) -> Together (keyed). + let mut app = app_with_fallback_chain( + ApiProvider::Openai, + &[ + codewhale_config::ProviderKind::Openrouter, + codewhale_config::ProviderKind::Together, + ], + &[ApiProvider::Openai, ApiProvider::Together], + ); + assert_eq!(app.fallback_chain_position(), Some(0)); + + // Openrouter is skipped (needs auth); we land on Together. + let next = app.advance_fallback("network error"); + assert_eq!(next, Some(ApiProvider::Together)); + assert_eq!(app.api_provider, ApiProvider::Together); + assert_eq!(app.fallback_chain_position(), Some(2)); + + let reason = app.last_fallback_reason.as_deref().unwrap_or_default(); + assert!( + reason.contains("Fell back to together"), + "reason should name the landed provider: {reason}" + ); + assert!( + reason.contains("skipped openrouter: needs auth"), + "reason should note the skipped provider: {reason}" + ); +} + +#[test] +fn advance_fallback_local_provider_is_eligible_without_a_key() { + let _lock = lock_test_env(); + let _openai = EnvVarGuard::remove("OPENAI_API_KEY"); + + // Chain: Openai (active, keyed) -> Ollama (local, no key needed). + let mut app = app_with_fallback_chain( + ApiProvider::Openai, + &[codewhale_config::ProviderKind::Ollama], + &[ApiProvider::Openai], + ); + + let next = app.advance_fallback("timeout"); + assert_eq!( + next, + Some(ApiProvider::Ollama), + "self-hosted providers are ready without a key" + ); + assert_eq!(app.api_provider, ApiProvider::Ollama); + let reason = app.last_fallback_reason.as_deref().unwrap_or_default(); + assert!(reason.contains("Fell back to ollama"), "{reason}"); + assert!( + !reason.contains("skipped"), + "no providers should be skipped: {reason}" + ); +} + +#[test] +fn advance_fallback_all_unready_exhausts_with_clear_reason() { + let _lock = lock_test_env(); + let _openai = EnvVarGuard::remove("OPENAI_API_KEY"); + let _openrouter = EnvVarGuard::remove("OPENROUTER_API_KEY"); + let _together = EnvVarGuard::remove("TOGETHER_API_KEY"); + + // Chain: Openai (active, keyed) -> Openrouter (no key) -> Together (no key). + // Every fallback entry is unready, so the chain exhausts. + let mut app = app_with_fallback_chain( + ApiProvider::Openai, + &[ + codewhale_config::ProviderKind::Openrouter, + codewhale_config::ProviderKind::Together, + ], + &[ApiProvider::Openai], + ); + + let next = app.advance_fallback("rate limited"); + assert_eq!(next, None, "no ready fallback remains"); + // Active provider is unchanged on exhaustion. + assert_eq!(app.api_provider, ApiProvider::Openai); + + let reason = app.last_fallback_reason.as_deref().unwrap_or_default(); + assert!( + reason.contains("Fallback chain exhausted"), + "reason should state exhaustion: {reason}" + ); + assert!( + reason.contains("skipped openrouter: needs auth") + && reason.contains("skipped together: needs auth"), + "reason should note every skipped provider: {reason}" + ); +} diff --git a/crates/tui/src/tui/provider_picker.rs b/crates/tui/src/tui/provider_picker.rs index 6318d2a6b3..12bbfbb95f 100644 --- a/crates/tui/src/tui/provider_picker.rs +++ b/crates/tui/src/tui/provider_picker.rs @@ -61,6 +61,7 @@ pub struct ProviderDashboardRow { pub default_route: ProviderDefaultRoute, pub usage_meter: String, pub readiness: ProviderReadiness, + pub maturity: ProviderMaturity, pub messages: Vec, pub is_active: bool, has_key: bool, @@ -99,6 +100,36 @@ pub enum ProviderReadiness { Invalid, } +/// How battle-tested a provider integration is, independent of whether the +/// user has credentials configured (which `ProviderReadiness` already tracks). +/// Kept intentionally minimal — the only two honest states today are an +/// experimental integration and a supported one (#2984). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ProviderMaturity { + Experimental, + Supported, +} + +impl ProviderMaturity { + /// Maturity is seeded from a small table keyed by provider. Only the + /// OpenAI Codex bridge is experimental today; everything else is supported. + fn for_provider(provider: ApiProvider) -> Self { + match provider { + ApiProvider::OpenaiCodex => Self::Experimental, + _ => Self::Supported, + } + } + + /// Compact tag for the picker hint. Returns `None` when the integration is + /// supported so the common case stays noise-free (#2984). + fn tag(self) -> Option<&'static str> { + match self { + Self::Experimental => Some("experimental"), + Self::Supported => None, + } + } +} + impl ProviderDashboardRow { fn from_config(provider: ApiProvider, active: ApiProvider, config: &Config) -> Self { let has_key = has_api_key_for(config, provider); @@ -135,6 +166,7 @@ impl ProviderDashboardRow { }, usage_meter, readiness: ProviderReadiness::Legacy, + maturity: ProviderMaturity::for_provider(provider), messages: vec![ "legacy DeepSeek China alias; routing maps through DeepSeek compatibility" .to_string(), @@ -226,6 +258,7 @@ impl ProviderDashboardRow { default_route, usage_meter: resolved_pricing, readiness, + maturity: ProviderMaturity::for_provider(provider), messages, is_active: provider == active, has_key, @@ -234,7 +267,7 @@ impl ProviderDashboardRow { fn compact_hint(&self) -> String { format!( - "{} | auth:{} | {} | {} | base:{} | route:{}{} | catalog:{}", + "{} | auth:{} | {} | {} | base:{} | route:{}{} | catalog:{}{}", self.readiness.label(), self.auth_status.label(), self.usage_meter, @@ -242,7 +275,13 @@ impl ProviderDashboardRow { compact_base_url(&self.base_url), self.default_route.logical_model, route_wire_suffix(&self.default_route), - self.catalog_label() + self.catalog_label(), + // Only experimental integrations add a tag; supported ones stay + // noise-free (#2984). + self.maturity + .tag() + .map(|tag| format!(" | {tag}")) + .unwrap_or_default(), ) } @@ -554,6 +593,8 @@ impl ProviderPickerView { Span::raw(format!("{enter_action} ")), Span::styled(" R ", Style::default().fg(palette::TEXT_MUTED)), Span::raw("edit key "), + Span::styled(" M ", Style::default().fg(palette::TEXT_MUTED)), + Span::raw("models "), Span::styled(" Esc ", Style::default().fg(palette::TEXT_MUTED)), Span::raw("cancel "), ])) @@ -754,6 +795,13 @@ impl ModalView for ProviderPickerView { self.enter_key_entry(); ViewAction::None } + // Jump to the `/model` picker pre-filtered to this provider + // (#3083). Handled before the type-ahead arm so `m`/`M` opens + // models instead of seeking a provider whose name starts with m. + KeyCode::Char(c) if key.modifiers.is_empty() && c.eq_ignore_ascii_case(&'m') => { + let provider = self.selected_provider(); + ViewAction::EmitAndClose(ViewEvent::ProviderPickerOpenModels { provider }) + } // Type-ahead: any other letter jumps to the next provider whose // name starts with it (e.g. `z` -> "Z.ai"). KeyCode::Char(c) if key.modifiers.is_empty() && c.is_ascii_alphabetic() => { @@ -1000,6 +1048,42 @@ mod tests { assert!(row.is_active); } + #[test] + fn openai_codex_row_is_experimental_and_tagged_in_hint() { + let config = Config::default(); + let row = ProviderDashboardRow::from_config( + ApiProvider::OpenaiCodex, + ApiProvider::Deepseek, + &config, + ); + + // #2984: maturity is a separate axis from auth/readiness. + assert_eq!(row.maturity, ProviderMaturity::Experimental); + assert!( + row.compact_hint().contains("experimental"), + "experimental maturity must surface in the hint, got {:?}", + row.compact_hint() + ); + } + + #[test] + fn mainstream_provider_is_supported_without_experimental_tag() { + let config = Config::default(); + let row = ProviderDashboardRow::from_config( + ApiProvider::Deepseek, + ApiProvider::Deepseek, + &config, + ); + + // #2984: supported integrations stay noise-free (no tag). + assert_eq!(row.maturity, ProviderMaturity::Supported); + assert!( + !row.compact_hint().contains("experimental"), + "supported providers must omit the experimental tag, got {:?}", + row.compact_hint() + ); + } + #[test] fn provider_dashboard_row_uses_route_resolver_for_custom_openai_endpoint() { let config = Config { @@ -1135,6 +1219,40 @@ mod tests { } } + #[test] + fn pressing_m_opens_models_for_selected_provider() { + let config = Config::default(); + let mut picker = ProviderPickerView::new(ApiProvider::Deepseek, &config); + move_to_provider(&mut picker, ApiProvider::Openrouter); + + let action = picker.handle_key(key(KeyCode::Char('m'))); + + // #3083: `m` jumps to the model picker scoped to the highlighted + // provider rather than acting as a type-ahead seek. + match action { + ViewAction::EmitAndClose(ViewEvent::ProviderPickerOpenModels { provider }) => { + assert_eq!(provider, ApiProvider::Openrouter); + } + other => panic!("expected ProviderPickerOpenModels, got {other:?}"), + } + } + + #[test] + fn pressing_uppercase_m_also_opens_models() { + let config = Config::default(); + let mut picker = ProviderPickerView::new(ApiProvider::Deepseek, &config); + + // Case-insensitive like the `R` edit-key affordance: a bare `M` works. + let action = picker.handle_key(key(KeyCode::Char('M'))); + + match action { + ViewAction::EmitAndClose(ViewEvent::ProviderPickerOpenModels { provider }) => { + assert_eq!(provider, ApiProvider::Deepseek); + } + other => panic!("expected ProviderPickerOpenModels, got {other:?}"), + } + } + #[test] fn picker_marks_active_provider_as_initial_selection() { let config = Config::default(); diff --git a/crates/tui/src/tui/session_picker.rs b/crates/tui/src/tui/session_picker.rs index f10a8281f8..377fda1418 100644 --- a/crates/tui/src/tui/session_picker.rs +++ b/crates/tui/src/tui/session_picker.rs @@ -1152,7 +1152,7 @@ mod tests { view.render(area, &mut buf); let y = - row_containing(&buf, area, &selected_id).expect("selected session row should render"); + row_containing(&buf, area, selected_id).expect("selected session row should render"); let rendered_row = buffer_row_text(&buf, area, y); let highlighted_cells = (area.x..area.x.saturating_add(area.width)) .filter(|&x| { diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index 7f78223d46..9f58f176fc 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -6730,7 +6730,7 @@ async fn switch_provider( let next_config = resolved_route.config; let new_model = resolved_route.model; - if let Err(err) = DeepSeekClient::new(&next_config) { + if let Err(err) = DeepSeekClient::from_candidate(&next_config, &resolved_route.candidate) { app.pending_provider_switch = None; app.add_message(HistoryCell::System { content: format!( @@ -6868,7 +6868,7 @@ async fn apply_provider_fallback_switch( let next_config = resolved_route.config; let new_model = resolved_route.model; - if let Err(err) = DeepSeekClient::new(&next_config) { + if let Err(err) = DeepSeekClient::from_candidate(&next_config, &resolved_route.candidate) { app.api_provider = previous_provider; app.last_fallback_reason = Some(format!( "Fallback provider {} was unavailable: {err}", @@ -8348,6 +8348,7 @@ fn render(f: &mut Frame, app: &mut App) { crate::config::ApiProvider::Zai => Some("Z.ai"), crate::config::ApiProvider::Stepfun => Some("StepFun"), crate::config::ApiProvider::Minimax => Some("MiniMax"), + crate::config::ApiProvider::Custom => Some("Custom"), }; let status_indicator_started_at = if app.low_motion { None @@ -8747,6 +8748,27 @@ fn toggle_live_transcript_overlay(app: &mut App) { app.needs_redraw = true; } +/// Open the `/model` picker pre-filtered to `provider` (#3083). The model +/// picker's search already scopes rows by provider display name, so we reuse +/// the standard "open model picker" path and seed its query by replaying the +/// provider's display name as character input through the public view-stack +/// key path — no model-picker internals are touched. +fn open_model_picker_for_provider(app: &mut App, provider: crate::config::ApiProvider) { + if app.view_stack.top_kind() != Some(ModalKind::ModelPicker) { + app.view_stack + .push(crate::tui::model_picker::ModelPickerView::new(app)); + } + for ch in provider.display_name().chars() { + // Char input updates the query and never emits a ViewEvent, so the + // returned (empty) event list is safe to drop. + let _ = app.view_stack.handle_key(crossterm::event::KeyEvent::new( + KeyCode::Char(ch), + KeyModifiers::NONE, + )); + } + app.needs_redraw = true; +} + #[allow(clippy::too_many_arguments)] async fn handle_view_events( terminal: &mut AppTerminal, @@ -9094,6 +9116,9 @@ async fn handle_view_events( ) .await; } + ViewEvent::ProviderPickerOpenModels { provider } => { + open_model_picker_for_provider(app, provider); + } ViewEvent::ModeSelected { mode } => { let prior_mode = app.mode; let msg = commands::switch_mode(app, mode); @@ -9457,6 +9482,13 @@ async fn apply_provider_picker_api_key( if matches!(provider, ApiProvider::Deepseek | ApiProvider::DeepseekCN) { config.api_key = Some(api_key); } else { + // Capture the custom entry key before borrowing `providers` (#1519). + let custom_key = (provider == ApiProvider::Custom).then(|| { + config + .provider + .clone() + .unwrap_or_else(|| "__custom__".to_string()) + }); let providers = config .providers .get_or_insert_with(ProvidersConfig::default); @@ -9465,6 +9497,10 @@ async fn apply_provider_picker_api_key( // Guarded by the outer `if` above; safety net against refactors. return; } + ApiProvider::Custom => providers + .custom + .entry(custom_key.expect("custom key captured for custom provider")) + .or_default(), ApiProvider::DeepseekAnthropic => &mut providers.deepseek_anthropic, ApiProvider::NvidiaNim => &mut providers.nvidia_nim, ApiProvider::Openai => &mut providers.openai, @@ -9526,11 +9562,23 @@ async fn apply_provider_picker_auth_mode( } fn set_provider_auth_mode_in_memory(config: &mut Config, provider: ApiProvider, auth_mode: String) { + // Capture the custom entry key (the selected provider name) before the + // mutable borrow of `providers` below (#1519). + let custom_key = (provider == ApiProvider::Custom).then(|| { + config + .provider + .clone() + .unwrap_or_else(|| "__custom__".to_string()) + }); let providers = config .providers .get_or_insert_with(ProvidersConfig::default); let entry: &mut ProviderConfig = match provider { ApiProvider::Deepseek | ApiProvider::DeepseekCN => return, + ApiProvider::Custom => providers + .custom + .entry(custom_key.expect("custom key captured for custom provider")) + .or_default(), ApiProvider::DeepseekAnthropic => &mut providers.deepseek_anthropic, ApiProvider::NvidiaNim => &mut providers.nvidia_nim, ApiProvider::Openai => &mut providers.openai, diff --git a/crates/tui/src/tui/views/fleet_setup.rs b/crates/tui/src/tui/views/fleet_setup.rs index 5e0cf90371..acd55ca318 100644 --- a/crates/tui/src/tui/views/fleet_setup.rs +++ b/crates/tui/src/tui/views/fleet_setup.rs @@ -1,4 +1,12 @@ //! Fleet setup and loadout planner. +//! +//! NOTE (audit #7 / #3167): the modal title, footer hints, and the lane/row +//! taxonomy below are intentionally English for now. #3167 reworks this view +//! from a read-only summary into an interactive provider/model picker, which +//! will churn most of this text; localizing the ~90 volatile technical strings +//! into all shipped locales before that lands would be throwaway work. The +//! command entry (`CmdFleetDescription`) is already localized, and the +//! functional selection wiring (audit #8) is handled here regardless of locale. use std::path::{Path, PathBuf}; @@ -132,12 +140,21 @@ impl FleetSetupSnapshot { } } +/// Lane index of the role picker (lane "1 Role"). +const ROLE_LANE: usize = 0; +/// Lane index of the model-class picker (lane "2 Model"). +const MODEL_LANE: usize = 1; + pub struct FleetSetupView { lanes: Vec, selected_lane: usize, selected_rows: Vec, scrolls: Vec, - profile_prompt: String, + // The route context the profile prompt is generated against. The prompt + // itself is built on demand from the *current* role/model selection (see + // `insert_profile_prompt_action`) so the planner selection has a functional + // outcome. + snapshot: FleetSetupSnapshot, } impl FleetSetupView { @@ -147,7 +164,6 @@ impl FleetSetupView { } fn from_snapshot(snapshot: FleetSetupSnapshot) -> Self { - let profile_prompt = profile_authoring_prompt(&snapshot); let lanes = build_lanes(&snapshot); let len = lanes.len(); Self { @@ -155,10 +171,31 @@ impl FleetSetupView { selected_lane: 0, selected_rows: vec![0; len], scrolls: vec![0; len], - profile_prompt, + snapshot, } } + /// Label of the row currently selected in `lane`, if any. + fn selected_label(&self, lane: usize) -> Option<&str> { + let row = self.selected_rows.get(lane).copied().unwrap_or_default(); + self.lanes + .get(lane) + .and_then(|lane| lane.rows.get(row)) + .map(|row| row.label.as_str()) + } + + /// The planner role chosen in the Role lane (drives the profile file name + /// and `role_hint`). Falls back to `custom` when unresolved. + fn selected_role(&self) -> &str { + self.selected_label(ROLE_LANE).unwrap_or("custom") + } + + /// The model class chosen in the Model lane, mapped to a profile schema + /// `model_class_hint` value. + fn selected_model_class(&self) -> &'static str { + model_class_hint(self.selected_label(MODEL_LANE).unwrap_or("inherit")) + } + fn selected_row(&self) -> usize { self.selected_rows .get(self.selected_lane) @@ -203,14 +240,20 @@ impl FleetSetupView { fn insert_profile_prompt_action(&self) -> ViewAction { ViewAction::EmitAndClose(ViewEvent::CommandPaletteSelected { action: CommandPaletteAction::InsertText { - text: self.profile_prompt.clone(), + text: self.profile_prompt(), }, }) } - #[cfg(test)] - fn profile_prompt(&self) -> &str { - &self.profile_prompt + /// Build the profile authoring prompt for the *current* role/model + /// selection. Re-evaluated each time so navigating the planner changes what + /// `g`/Enter inserts. + fn profile_prompt(&self) -> String { + profile_authoring_prompt( + &self.snapshot, + self.selected_role(), + self.selected_model_class(), + ) } } @@ -602,17 +645,52 @@ fn profile_file_status(workspace: &Path) -> (String, String) { } } -fn profile_authoring_prompt(snapshot: &FleetSetupSnapshot) -> String { +/// Map a Model-lane row label to a profile-schema `model_class_hint` value. +/// Route-context rows (`current route`, `fixed model`) and anything unknown +/// resolve to `inherit` so the generated profile reuses the active route. +fn model_class_hint(label: &str) -> &'static str { + match label { + "fast" => "fast", + "balanced" => "balanced", + // "strong" = security/release/architecture work → the strongest schema class. + "strong" => "deep-reasoning", + "deep-reasoning" => "deep-reasoning", + "tool-heavy" => "tool-heavy", + _ => "inherit", + } +} + +/// Sanitize a planner role label into a safe TOML file stem. +fn profile_file_stem(role: &str) -> String { + let stem: String = role + .chars() + .map(|c| if c.is_ascii_alphanumeric() { c } else { '-' }) + .collect(); + let stem = stem.trim_matches('-').to_ascii_lowercase(); + if stem.is_empty() { + "custom".to_string() + } else { + stem + } +} + +fn profile_authoring_prompt( + snapshot: &FleetSetupSnapshot, + role: &str, + model_class: &str, +) -> String { + let file_stem = profile_file_stem(role); format!( "Create a safe CodeWhale Fleet agent profile file for this workspace.\n\n\ - Target path: {PROFILE_DIR}/reviewer.toml\n\ + Selected planner role: {role}. Selected model class: {model_class}.\n\ + Target path: {PROFILE_DIR}/{file_stem}.toml\n\ Current route context only: provider = {provider}, model = {model}, reasoning = {reasoning}\n\n\ Write TOML using only this schema:\n\ - name\n\ - display_name\n\ - description\n\ - - role_hint\n\ - - model_class_hint (inherit, fast, balanced, deep-reasoning, code, review, or tool-heavy)\n\ + - role_hint (set to \"{role}\")\n\ + - model_class_hint (set to \"{model_class}\"; one of inherit, fast, balanced, deep-reasoning, code, review, or tool-heavy)\n\ - model (optional explicit model id on the active/resolved route; omit for loadout auto)\n\ - [instructions].text\n\ - [tools].posture = \"read-only\"\n\n\ @@ -684,7 +762,9 @@ mod tests { ViewAction::EmitAndClose(ViewEvent::CommandPaletteSelected { action: CommandPaletteAction::InsertText { text }, }) => { - assert!(text.contains("Target path: .codewhale/agents/reviewer.toml")); + // Default selection is the first Role row ("manager"). + assert!(text.contains("Target path: .codewhale/agents/manager.toml")); + assert!(text.contains("role_hint (set to \"manager\")")); assert!(text.contains("provider = DeepSeek")); assert!(text.contains("model (optional explicit model id")); assert!(text.contains("Do not include provider, base_url")); @@ -694,6 +774,35 @@ mod tests { } } + #[test] + fn selected_role_and_model_class_drive_generated_profile() { + let mut view = FleetSetupView::from_snapshot(snapshot()); + + // Role lane: manager(0) main(1) scout(2) builder(3) ... → move to builder. + view.handle_key(key(KeyCode::Down)); + view.handle_key(key(KeyCode::Down)); + view.handle_key(key(KeyCode::Down)); + // Model lane: current route(0) inherit(1) fast(2) ... → move right then to fast. + view.handle_key(key(KeyCode::Right)); + view.handle_key(key(KeyCode::Down)); + view.handle_key(key(KeyCode::Down)); + + let prompt = view.profile_prompt(); + assert!( + prompt.contains("Target path: .codewhale/agents/builder.toml"), + "selection should drive the profile file name; got: {prompt}" + ); + assert!( + prompt.contains("role_hint (set to \"builder\")"), + "selection should drive role_hint; got: {prompt}" + ); + assert!( + prompt.contains("model_class_hint (set to \"fast\""), + "selection should drive model_class_hint; got: {prompt}" + ); + assert!(prompt.contains("Selected planner role: builder. Selected model class: fast.")); + } + #[test] fn profile_prompt_uses_current_route_only_as_context() { let view = FleetSetupView::from_snapshot(snapshot()); diff --git a/crates/tui/src/tui/views/mod.rs b/crates/tui/src/tui/views/mod.rs index ca7b0ccefc..7a8da19feb 100644 --- a/crates/tui/src/tui/views/mod.rs +++ b/crates/tui/src/tui/views/mod.rs @@ -176,6 +176,11 @@ pub enum ViewEvent { ProviderPickerKimiOAuthEnabled { provider: crate::config::ApiProvider, }, + /// Emitted by the `/provider` picker (the `M` action) to jump straight to + /// the `/model` picker pre-filtered to the highlighted provider (#3083). + ProviderPickerOpenModels { + provider: crate::config::ApiProvider, + }, /// Emitted by the `/mode` picker when the user chooses a mode. ModeSelected { mode: crate::tui::app::AppMode, diff --git a/crates/tui/src/tui/widgets/mod.rs b/crates/tui/src/tui/widgets/mod.rs index 447afa76ed..17345e069f 100644 --- a/crates/tui/src/tui/widgets/mod.rs +++ b/crates/tui/src/tui/widgets/mod.rs @@ -3980,7 +3980,7 @@ mod tests { assert!(!normal_rendered.contains("Draft")); assert!( !normal_rendered - .contains(&normal_app.tr(crate::localization::MessageId::HistorySearchTitle)) + .contains(normal_app.tr(crate::localization::MessageId::HistorySearchTitle)) ); let mut draft_app = create_test_app(); @@ -4001,7 +4001,7 @@ mod tests { search_widget.render(area, &mut search_buf); assert!( buffer_text(&search_buf, area) - .contains(&search_app.tr(crate::localization::MessageId::HistorySearchTitle)) + .contains(search_app.tr(crate::localization::MessageId::HistorySearchTitle)) ); } diff --git a/npm/codewhale/package.json b/npm/codewhale/package.json index 342e0e8f73..d470cfe90e 100644 --- a/npm/codewhale/package.json +++ b/npm/codewhale/package.json @@ -1,7 +1,7 @@ { "name": "codewhale", - "version": "0.8.64", - "codewhaleBinaryVersion": "0.8.64", + "version": "0.8.65", + "codewhaleBinaryVersion": "0.8.65", "description": "Install and run CodeWhale, the agentic terminal for open-source and open-weight coding models, from GitHub release artifacts.", "author": "Hmbown", "license": "MIT", diff --git a/scripts/check-provider-registry.py b/scripts/check-provider-registry.py index 805946539e..06a513bf8a 100644 --- a/scripts/check-provider-registry.py +++ b/scripts/check-provider-registry.py @@ -25,11 +25,20 @@ PROVIDER_KIND_RS = ROOT / "crates" / "config" / "src" / "provider_kind.rs" PROVIDER_RS = ROOT / "crates" / "config" / "src" / "provider.rs" TUI_CONFIG_RS = ROOT / "crates" / "tui" / "src" / "config.rs" +# Default provider model/base-URL constants were split out of config.rs into +# this leaf module (#3311); read them from there for the default-string check. +TUI_CONFIG_MODELS_RS = ROOT / "crates" / "tui" / "src" / "config" / "models.rs" AGENT_RS = ROOT / "crates" / "agent" / "src" / "lib.rs" PROVIDERS_MD = ROOT / "docs" / "PROVIDERS.md" API_PROVIDER_ONLY_IDS = {"deepseek-cn"} + +# `custom` is the dynamic OpenAI-compatible meta-provider (#1519): a single +# catch-all `[providers.custom]` table that backs arbitrary user-defined +# endpoints, not a canonical shipped provider with a docs row. It is excluded +# from the provider-table drift check. +META_PROVIDER_TABLES = {"custom"} SHARED_PROVIDER_TABLES = { "siliconflow-CN": "siliconflow_cn", } @@ -201,10 +210,13 @@ def model_registry_providers(agent_rs: str, variant_to_id: dict[str, str]) -> se def default_strings(tui_config_rs: str) -> set[str]: + # Model/base-URL constants now live in config/models.rs (#3311); scan it + # alongside config.rs so the check follows the leaf split. + sources = tui_config_rs + "\n" + read(TUI_CONFIG_MODELS_RS) defaults = set() for name, value in re.findall( r'const\s+(DEFAULT_[A-Z0-9_]+(?:MODEL|BASE_URL)):\s*&str\s*=\s*"([^"]+)"', - tui_config_rs, + sources, ): if name == "DEFAULT_DEEPSEEKCN_BASE_URL": continue @@ -362,7 +374,11 @@ def main() -> int: canonical_ids, shipped_provider_rows(providers_md), ) - errors += report_set("provider TOML tables", expected_tables, provider_tables(config_rs)) + errors += report_set( + "provider TOML tables", + expected_tables, + provider_tables(config_rs) - META_PROVIDER_TABLES, + ) errors += report_set( "documented provider TOML tables", expected_tables, diff --git a/web/app/[locale]/digest/page.tsx b/web/app/[locale]/digest/page.tsx index 8bdb534c4f..9faf381cd3 100644 --- a/web/app/[locale]/digest/page.tsx +++ b/web/app/[locale]/digest/page.tsx @@ -1,6 +1,6 @@ -import Link from "next/link"; // Bypassing the '@' alias to force TypeScript to find the file import { getEnv } from "../../../lib/kv"; +import { buildPageMetadata } from "../../../lib/page-meta"; // Define the exact structure of the Digest data to fix all the 'any' type errors interface DigestSection { @@ -20,7 +20,23 @@ interface WeeklyDigest { export const revalidate = 3600; // Cache page updates hourly -export default async function DigestArchivePage() { +export async function generateMetadata({ params }: { params: Promise<{ locale: string }> }) { + const { locale } = await params; + const isZh = locale === "zh"; + return buildPageMetadata({ + path: "/digest", + locale, + title: isZh ? "社区摘要 · CodeWhale" : "Community Digest · CodeWhale", + description: isZh + ? "CodeWhale 每周社区更新存档:由维护者审核的摘要。" + : "Archive of weekly CodeWhale community updates — maintainer-approved summaries.", + }); +} + +export default async function DigestArchivePage({ params }: { params: Promise<{ locale: string }> }) { + const { locale } = await params; + const isZh = locale === "zh"; + // 1. Get the correct project environment bindings const env = await getEnv(); const kv = env.CURATED_KV; @@ -32,7 +48,7 @@ export default async function DigestArchivePage() { try { // Fetch all weekly digest keys generated by the agent tasks const { keys } = await kv.list({ prefix: "digest:weekly-" }); - + if (keys && keys.length > 0) { const digestsRaw = await Promise.all( keys.map(async (k: { name: string }) => { @@ -41,11 +57,19 @@ export default async function DigestArchivePage() { }) ); - // Explicitly defining item types so TypeScript stops panicking + // Parse each entry independently so a single malformed record can't + // blank the entire archive — skip only the bad ones. digests = digestsRaw .filter((item: string | null): item is string => Boolean(item)) - .map((item: string) => JSON.parse(item) as WeeklyDigest) - .sort((a: WeeklyDigest, b: WeeklyDigest) => + .flatMap((item: string) => { + try { + return [JSON.parse(item) as WeeklyDigest]; + } catch (e) { + console.error("Skipping malformed digest entry:", e); + return []; + } + }) + .sort((a: WeeklyDigest, b: WeeklyDigest) => new Date(b.generatedAt).getTime() - new Date(a.generatedAt).getTime() ); } @@ -58,9 +82,13 @@ export default async function DigestArchivePage() { if (digests.length === 0) { return (
-

Community Digest

+

+ {isZh ? "社区摘要" : "Community Digest"} +

- We are gathering this week's community updates. Check back soon! + {isZh + ? "本周社区动态整理中,请稍后再来!" + : "We are gathering this week's community updates. Check back soon!"}

); @@ -68,9 +96,13 @@ export default async function DigestArchivePage() { return (
-

Weekly Community Updates

-

Maintainer-approved summaries from CodeWhale

- +

+ {isZh ? "每周社区更新" : "Weekly Community Updates"} +

+

+ {isZh ? "由 CodeWhale 维护者审核的摘要" : "Maintainer-approved summaries from CodeWhale"} +

+
{digests.map((digest: WeeklyDigest) => (
@@ -104,4 +136,4 @@ export default async function DigestArchivePage() {
); -} \ No newline at end of file +} diff --git a/web/app/layout.tsx b/web/app/layout.tsx new file mode 100644 index 0000000000..dd235500a1 --- /dev/null +++ b/web/app/layout.tsx @@ -0,0 +1,17 @@ +import type { Metadata } from "next"; +import { SITE_URL } from "@/lib/page-meta"; + +/** + * Root metadata boundary. The per-locale ``/``, fonts, and content + * metadata live in app/[locale]/layout.tsx; this root only exists to give every + * route — including the framework-generated `/_not-found` and the root-segment + * `opengraph-image` — a resolvable `metadataBase` so Next stops falling back to + * `http://localhost:3000` for social image URLs. + */ +export const metadata: Metadata = { + metadataBase: new URL(SITE_URL), +}; + +export default function RootLayout({ children }: { children: React.ReactNode }) { + return children; +} diff --git a/web/lib/facts-drift.ts b/web/lib/facts-drift.ts index a1a78eebbb..8f24107410 100644 --- a/web/lib/facts-drift.ts +++ b/web/lib/facts-drift.ts @@ -105,9 +105,10 @@ function deriveProvidersFromConfig(cfg: string): ProviderFact[] { Minimax: { id: "minimax", label: "MiniMax", env: "MINIMAX_API_KEY" }, }; // Log loudly on unmapped variants so a new provider can never be silently - // dropped from the drift-derived facts again. DeepseekCN is the one - // deliberate exclusion (see comment above / issue #1104). - const EXCLUDED = new Set(["DeepseekCN"]); + // dropped from the drift-derived facts again. DeepseekCN (#1104) and the + // dynamic Custom meta-provider (#1519, user-defined endpoints) are the + // deliberate exclusions. + const EXCLUDED = new Set(["DeepseekCN", "Custom"]); const unmapped = variants.filter((v) => !EXCLUDED.has(v) && !labelMap[v]); if (unmapped.length > 0) { console.warn( @@ -119,7 +120,9 @@ function deriveProvidersFromConfig(cfg: string): ProviderFact[] { } function deriveDefaultModel(cfg: string): string | null { - const m = cfg.match(/DEFAULT_TEXT_MODEL[^"]*"([^"]+)"/); + // Match the const *definition* (`= "..."`); the definition moved to + // config/models.rs in the #3311 split, so callers pass config.rs + models.rs. + const m = cfg.match(/DEFAULT_TEXT_MODEL\s*(?::\s*&str\s*)?=\s*"([^"]+)"/); return m ? m[1] : null; } @@ -161,9 +164,10 @@ function deriveLicense(licText: string): string | null { } export async function deriveFactsFromRemote(ghToken?: string): Promise { - const [cargo, configRs, sandboxFiles, npmPkg, licText, toolFiles, latestRelease] = await Promise.all([ + const [cargo, configRs, configModels, sandboxFiles, npmPkg, licText, toolFiles, latestRelease] = await Promise.all([ fetchText("Cargo.toml", ghToken), fetchText("crates/tui/src/config.rs", ghToken), + fetchText("crates/tui/src/config/models.rs", ghToken), fetchListing("crates/tui/src/sandbox", ghToken), fetchText("npm/codewhale/package.json", ghToken), fetchText("LICENSE", ghToken), @@ -180,7 +184,7 @@ export async function deriveFactsFromRemote(ghToken?: string): Promise { try { return npmPkg ? JSON.parse(npmPkg).engines?.node ?? null : null; } catch { return null; } })(), diff --git a/web/lib/facts.generated.ts b/web/lib/facts.generated.ts index 6510f819c3..5932491abd 100644 --- a/web/lib/facts.generated.ts +++ b/web/lib/facts.generated.ts @@ -18,8 +18,8 @@ export interface RepoFacts { } export const FACTS: RepoFacts = { - "generatedAt": "2026-06-23T17:27:32.451Z", - "version": "0.8.64", + "generatedAt": "2026-06-24T09:06:20.600Z", + "version": "0.8.65", "crates": [ "agent", "app-server", diff --git a/web/scripts/derive-facts.mjs b/web/scripts/derive-facts.mjs index 3595d40892..edede0c7e9 100644 --- a/web/scripts/derive-facts.mjs +++ b/web/scripts/derive-facts.mjs @@ -8,15 +8,40 @@ * This script delegates all derivation logic to facts-lib.mjs so that * check-facts.mjs can reuse the same code for the CI drift gate. */ -import { writeFileSync } from "node:fs"; +import { writeFileSync, readFileSync, existsSync } from "node:fs"; import { dirname, resolve } from "node:path"; import { fileURLToPath } from "node:url"; import { buildFacts } from "./facts-lib.mjs"; const __dirname = dirname(fileURLToPath(import.meta.url)); +const target = resolve(__dirname, "..", "lib", "facts.generated.ts"); const out = buildFacts(); +// Preserve the committed `generatedAt` when every *checked* fact is unchanged, +// so a clean rebuild doesn't dirty the tracked file on every run. The drift +// gate (check-facts.mjs) ignores generatedAt + latestRelease; we mirror that +// volatile set here. Only when a real fact changes do we stamp a fresh time. +const VOLATILE = new Set(["generatedAt", "latestRelease"]); +function readCommittedFacts() { + if (!existsSync(target)) return null; + const src = readFileSync(target, "utf-8"); + const m = src.match(/export const FACTS\s*:\s*\w+\s*=\s*([\s\S]*?);?\s*$/); + if (!m) return null; + try { + return JSON.parse(m[1]); + } catch { + return null; + } +} +const committed = readCommittedFacts(); +if (committed && typeof committed.generatedAt === "string") { + const sameChecked = Object.keys(out).every( + (k) => VOLATILE.has(k) || JSON.stringify(out[k]) === JSON.stringify(committed[k]), + ); + if (sameChecked) out.generatedAt = committed.generatedAt; +} + // latestRelease is intentionally null at build time — populated at runtime by the drift cron. const RUNTIME_ONLY = new Set(["latestRelease"]); const missing = Object.entries(out).filter( @@ -52,7 +77,6 @@ export interface RepoFacts { export const FACTS: RepoFacts = ${JSON.stringify(out, null, 2)}; `; -const target = resolve(__dirname, "..", "lib", "facts.generated.ts"); writeFileSync(target, ts); console.log(`[derive-facts] wrote ${target}`); console.log( diff --git a/web/scripts/facts-lib.mjs b/web/scripts/facts-lib.mjs index a1a595eb97..0d6f9643ca 100644 --- a/web/scripts/facts-lib.mjs +++ b/web/scripts/facts-lib.mjs @@ -89,7 +89,10 @@ const PROVIDER_LABEL_MAP = { Minimax: { id: "minimax", label: "MiniMax", env: "MINIMAX_API_KEY" }, }; -const EXCLUDED_PROVIDERS = new Set(["DeepseekCN"]); +// DeepseekCN: not wired through shared ProviderKind (#1104). +// Custom: the dynamic OpenAI-compatible meta-provider (#1519) — a runtime +// catch-all for user-defined endpoints, not a website-listable provider. +const EXCLUDED_PROVIDERS = new Set(["DeepseekCN", "Custom"]); export function deriveProviders() { const cfg = read("crates/tui/src/config.rs"); @@ -112,9 +115,15 @@ export function deriveProviders() { } export function deriveDefaultModel() { - const cfg = read("crates/tui/src/config.rs"); - if (!cfg) return null; - const m = cfg.match(/DEFAULT_TEXT_MODEL[^"]*"([^"]+)"/); + // DEFAULT_TEXT_MODEL's definition moved to config/models.rs in the #3311 split; + // read both and match the const *definition* specifically (`= "..."`) so we + // don't mis-bind to a later string at a mere use site. + const cfg = + (read("crates/tui/src/config/models.rs") ?? "") + + "\n" + + (read("crates/tui/src/config.rs") ?? ""); + if (!cfg.trim()) return null; + const m = cfg.match(/DEFAULT_TEXT_MODEL\s*(?::\s*&str\s*)?=\s*"([^"]+)"/); return m ? m[1] : null; }