From adbbc8e83174ba0820015d25942e611c9ef31b84 Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Wed, 13 May 2026 15:54:27 +0300 Subject: [PATCH 01/20] add flake --- flake.lock | 77 +++++++++++++++++++++++++++++++++++++++++++++ flake.nix | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 168 insertions(+) create mode 100644 flake.lock create mode 100644 flake.nix diff --git a/flake.lock b/flake.lock new file mode 100644 index 000000000..428032335 --- /dev/null +++ b/flake.lock @@ -0,0 +1,77 @@ +{ + "nodes": { + "crane": { + "locked": { + "lastModified": 1775839657, + "narHash": "sha256-SPm9ck7jh3Un9nwPuMGbRU04UroFmOHjLP56T10MOeM=", + "owner": "ipetkov", + "repo": "crane", + "rev": "7cf72d978629469c4bd4206b95c402514c1f6000", + "type": "github" + }, + "original": { + "owner": "ipetkov", + "repo": "crane", + "type": "github" + } + }, + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1731533236, + "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1776067740, + "narHash": "sha256-B35lpsqnSZwn1Lmz06BpwF7atPgFmUgw1l8KAV3zpVQ=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "7e495b747b51f95ae15e74377c5ce1fe69c1765f", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-25.11", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "crane": "crane", + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 000000000..14bce88dd --- /dev/null +++ b/flake.nix @@ -0,0 +1,91 @@ +{ + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.11"; + flake-utils.url = "github:numtide/flake-utils"; + crane.url = "github:ipetkov/crane"; + }; + + outputs = { self, nixpkgs, flake-utils, crane }: + flake-utils.lib.eachSystem flake-utils.lib.allSystems (system: + let + pkgs = import nixpkgs { inherit system; }; + craneLib = crane.mkLib pkgs; + stdenv' = p: p.stdenvAdapters.withCFlags [ "-O" ] (p.stdenvAdapters.useMoldLinker p.clangStdenv); + stdenv = stdenv' pkgs; + + devShell = craneLib.devShell.override { + mkShell = pkgs.mkShell.override { + inherit stdenv; + }; + }; + + env = { + LIBCLANG_PATH = "${pkgs.libclang.lib}/lib"; + CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER = "${stdenv.cc}/bin/cc"; + CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUSTFLAGS = "-C link-arg=--ld-path=${stdenv.cc}/bin/ld"; + }; + + commonArgs = { + src = let + unfilteredSrc = ./.; + fs = pkgs.lib.fileset; + in fs.toSource { + root = unfilteredSrc; + fileset = fs.unions [ + (craneLib.fileset.cargoTomlAndLock unfilteredSrc) + (craneLib.fileset.rust unfilteredSrc) + (fs.fileFilter + (file: file.hasExt "c" || file.hasExt "h" || file.hasExt "sql") + unfilteredSrc + ) + ]; + }; + strictDeps = true; + + stdenv = stdenv'; + nativeBuildInputs = with pkgs; [ + pkg-config + ]; + buildInputs = with pkgs; [ + openssl + ]; + + inherit env; + } // (craneLib.crateNameFromCargoToml { cargoToml = ./pgdog/Cargo.toml; }); + + cargoArtifacts = craneLib.buildDepsOnly commonArgs; + + pgDog = craneLib.buildPackage (commonArgs // { + inherit cargoArtifacts; + doCheck = false; + cargoExtraArgs = "-p pgdog"; + }); + + in { + packages.default = pgDog; + + devShells.default = devShell { + checks = self.checks; + inputsFrom = [ cargoArtifacts ]; + inherit env; + }; + + checks = { + inherit pgDog; + + pgDogClippy = craneLib.cargoClippy (commonArgs // { + inherit cargoArtifacts; + cargoClippyExtraArgs = "--all-targets --all-features -- --deny warnings"; + }); + + pgDogFmt = craneLib.cargoFmt commonArgs; + + pgDogNextest = craneLib.cargoNextest (commonArgs // { + inherit cargoArtifacts; + checkPhaseCargoCommand = "echo hello world"; + cargoNextestExtraArgs = "--test-threads=1 --no-fail-fast"; + }); + }; + } + ); +} From 0b90713daa2f552f41c4e4dc03b131d20ad6c286 Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Wed, 13 May 2026 15:54:27 +0300 Subject: [PATCH 02/20] redis cache mvp --- .claude/skills/debug/SKILL.md | 82 ++++ .gitignore | 1 + Cargo.lock | 122 +++++- docs/CACHE.md | 269 ++++++++++++ flake.nix | 29 +- pgdog-config/src/cache.rs | 77 ++++ pgdog-config/src/core.rs | 5 + pgdog-config/src/general.rs | 6 + pgdog-config/src/lib.rs | 2 + pgdog/Cargo.toml | 2 + pgdog/src/backend/pool/cluster.rs | 11 + .../client/query_engine/cache/client.rs | 313 ++++++++++++++ .../client/query_engine/cache/context.rs | 17 + .../client/query_engine/cache/integration.rs | 163 +++++++ .../frontend/client/query_engine/cache/mod.rs | 82 ++++ .../client/query_engine/cache/policy.rs | 398 ++++++++++++++++++ .../client/query_engine/cache/stats.rs | 59 +++ .../frontend/client/query_engine/context.rs | 8 +- pgdog/src/frontend/client/query_engine/mod.rs | 19 +- .../src/frontend/client/query_engine/query.rs | 2 + 20 files changed, 1644 insertions(+), 23 deletions(-) create mode 100644 .claude/skills/debug/SKILL.md create mode 100644 docs/CACHE.md create mode 100644 pgdog-config/src/cache.rs create mode 100644 pgdog/src/frontend/client/query_engine/cache/client.rs create mode 100644 pgdog/src/frontend/client/query_engine/cache/context.rs create mode 100644 pgdog/src/frontend/client/query_engine/cache/integration.rs create mode 100644 pgdog/src/frontend/client/query_engine/cache/mod.rs create mode 100644 pgdog/src/frontend/client/query_engine/cache/policy.rs create mode 100644 pgdog/src/frontend/client/query_engine/cache/stats.rs diff --git a/.claude/skills/debug/SKILL.md b/.claude/skills/debug/SKILL.md new file mode 100644 index 000000000..d22874e8c --- /dev/null +++ b/.claude/skills/debug/SKILL.md @@ -0,0 +1,82 @@ +# Debug Skill — pgdog Development Environment + +## Rules + +1. **Build command**: Always use `cargo build -p pgdog` (debug profile). Never use `--release` during debugging — it slows down builds by 4× and you rarely need to verify the final binary during development. + +2. **Docker environment**: You have access to rootless Docker. Key services: + - PostgreSQL runs on port **5433** (NOT 5432 — your real system postgres uses that) + - Redis is available on the default port **6379** + - If services aren't running, start them first: + ```bash + docker start $(docker ps -a -q --filter "name=pgdog" --latest) + # Or inspect what containers exist: + docker ps -a + ``` + +3. **Running pgdog in background**: Use `systemd-run --user`, NOT `nohup` or `&`. Example: + ```bash + systemd-run --user --collect --unit=pgdog-debug \ + --setenv=RUST_LOG=debug \ + --working-directory= \ + /target/debug/pgdog --config pgdog.toml 2>&1 + ``` + To stop it later: + ```bash + systemctl --user list-units | grep pgdog + systemctl --user stop + ``` + +## Useful Debugging Commands + +### Check docker services + +```bash +docker ps +``` + +### Check redis connectivity + +```bash +redis-cli ping +``` + +### Clear redis cache (useful for testing) + +```bash +redis-cli FLUSHALL +``` + +### Watch cache keys in real-time + +```bash +redis-cli MONITOR | grep "pgdog:" +``` + +### Inspect cached response bytes + +```bash +redis-cli --scan --pattern "pgdog:*" | head -1 | xargs redis-cli GET | xxd | head -20 +``` + +## File Structure Reference + +The cache implementation lives in: + +``` +pgdog/src/frontend/client/query_engine/cache/ +├── mod.rs # Module exports +├── client.rs # Redis client wrapper (fred v9) +├── integration.rs # cache_check(), send_cached_response(), cache_response() +├── policy.rs # CachePolicyResolver (3-tier decision engine) +└── stats.rs # QueryStatsTracker (hit/miss counters) +``` + +State documentation: `CacheState.md` in the project root. + +## Common Pitfalls + +- **Parser disabled by default**: `route.is_read()` returns false for `SELECT 1` when the query parser is off. The `is_likely_read()` heuristic in integration.rs covers this. +- **Policy defaults to NoCache**: `DatabaseCache.policy()` returns `CachePolicy::NoCache` by default. You must set `policy = "cache"` in the config. +- **Cache keys are hashed**: The key is a DefaultHasher hex digest of the raw query string, not the query itself. +- **Wire format is concatenated bytes**: Multiple PostgreSQL messages are concatenated into a single `Vec` with `[code: u8][length: u32be][payload: ...]` structure. diff --git a/.gitignore b/.gitignore index 5db985b82..32aeaec2a 100644 --- a/.gitignore +++ b/.gitignore @@ -51,6 +51,7 @@ perf.data.old CLAUDE.local.md .claude/plans/ .claude/completed_plans/ +!.claude/skills/debug # Ignore generated bindings pgdog-plugin/src/bindings.rs diff --git a/Cargo.lock b/Cargo.lock index 98f6c842c..1596b7c5b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -663,7 +663,7 @@ dependencies = [ "bitflags 2.9.1", "cexpr", "clang-sys", - "itertools 0.10.5", + "itertools 0.13.0", "log", "prettyplease", "proc-macro2", @@ -973,6 +973,12 @@ dependencies = [ "unicode-segmentation", ] +[[package]] +name = "cookie-factory" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "396de984970346b0d9e93d1415082923c679e5ae5c3ee3dcbd104f5610af126b" + [[package]] name = "core-foundation" version = "0.9.4" @@ -1023,6 +1029,12 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" +[[package]] +name = "crc16" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "338089f42c427b86394a5ee60ff321da23a5c89c9d89514c829687b26359fcff" + [[package]] name = "critical-section" version = "1.2.0" @@ -1450,6 +1462,15 @@ version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" +[[package]] +name = "float-cmp" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4" +dependencies = [ + "num-traits", +] + [[package]] name = "flume" version = "0.11.1" @@ -1497,6 +1518,47 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fred" +version = "9.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3cdd5378252ea124b712e0ac55147d26ae3af575883b34b8423091a4c719606b" +dependencies = [ + "arc-swap", + "async-trait", + "bytes", + "bytes-utils", + "crossbeam-queue", + "float-cmp", + "fred-macros", + "futures", + "log", + "parking_lot", + "rand 0.8.5", + "redis-protocol", + "rustls 0.23.27", + "rustls-native-certs 0.7.3", + "semver", + "socket2", + "tokio", + "tokio-rustls 0.26.2", + "tokio-stream", + "tokio-util", + "url", + "urlencoding", +] + +[[package]] +name = "fred-macros" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1458c6e22d36d61507034d5afecc64f105c1d39712b7ac6ec3b352c423f715cc" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + [[package]] name = "fs_extra" version = "1.3.0" @@ -2871,6 +2933,7 @@ dependencies = [ "dashmap", "derive_builder", "fnv", + "fred", "futures", "hickory-resolver", "http-body-util", @@ -2896,6 +2959,7 @@ dependencies = [ "rust_decimal", "rustls-native-certs 0.8.1", "rustls-pki-types", + "scc 3.7.0", "scram", "semver", "serde", @@ -3458,6 +3522,20 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "redis-protocol" +version = "5.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65deb7c9501fbb2b6f812a30d59c0253779480853545153a51d8e9e444ddc99f" +dependencies = [ + "bytes", + "bytes-utils", + "cookie-factory", + "crc16", + "log", + "nom", +] + [[package]] name = "redox_syscall" version = "0.5.12" @@ -3826,6 +3904,19 @@ dependencies = [ "security-framework 2.11.1", ] +[[package]] +name = "rustls-native-certs" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5bfb394eeed242e909609f56089eecfe5fda225042e8b171791b9c95f5931e5" +dependencies = [ + "openssl-probe", + "rustls-pemfile 2.2.0", + "rustls-pki-types", + "schannel", + "security-framework 2.11.1", +] + [[package]] name = "rustls-native-certs" version = "0.8.1" @@ -3899,13 +3990,29 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +[[package]] +name = "saa" +version = "5.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd8d438861332c3b1ac396c77bd9cac620ea1ff347efb63c05a83d8f0a593899" + [[package]] name = "scc" version = "2.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22b2d775fb28f245817589471dd49c5edf64237f4a19d10ce9a92ff4651a27f4" dependencies = [ - "sdd", + "sdd 3.0.8", +] + +[[package]] +name = "scc" +version = "3.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16c154cf1d115a1e901d7f4e3f279eb6eb455f0d670c1cf3c1aa74d50ad37fa9" +dependencies = [ + "saa", + "sdd 4.8.6", ] [[package]] @@ -3981,6 +4088,15 @@ version = "3.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "584e070911c7017da6cb2eb0788d09f43d789029b5877d3e5ecc8acf86ceee21" +[[package]] +name = "sdd" +version = "4.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5f0e40a01b94e35d1dacbcfbe5bfd3d31e37d9590b2e6d86a82b0e87bd4f551" +dependencies = [ + "saa", +] + [[package]] name = "seahash" version = "4.1.0" @@ -4103,7 +4219,7 @@ dependencies = [ "log", "once_cell", "parking_lot", - "scc", + "scc 2.3.4", "serial_test_derive", ] diff --git a/docs/CACHE.md b/docs/CACHE.md new file mode 100644 index 000000000..7b0159754 --- /dev/null +++ b/docs/CACHE.md @@ -0,0 +1,269 @@ +# Redis Cache for pgdog — State of Implementation + +## Architecture + +Cache SELECT queries in Redis, bypass PostgreSQL on cache hit, populate cache on cache miss. Three-tier policy resolution: SQL comment → per-database config → auto-decision engine. + +--- + +## Initial Implementation (Before Debugging Session) + +### Files Added + +#### 1. `pgdog/src/frontend/client/query_engine/cache/` (new module) + +**`mod.rs`** — Module exports: +```rust +pub mod client; +pub mod integration; +pub mod policy; +pub mod stats; + +pub use client::CacheClient; +pub use integration::{cache_check, cache_response, send_cached_response, CacheCheckResult}; +pub use policy::{ + CacheDecision, CachePolicyDispatcher, CachePolicyExtractor, CachePolicyResolver, + CommentCacheExtractor, ParameterCacheExtractor, +}; +pub use stats::QueryStatsTracker; +``` + +**`client.rs`** — Redis client wrapper using `fred` v9: +- `CacheClient::new(config)` — builds client from `Option<&DatabaseCache>`, returns disabled stub if no config/URL +- `ensure_connected()` — lazy one-time `client.init().await` followed by `client.ping()` verification; sets `redis_connected` flag +- `get(&self, key)` — returns `Result>>`; fetches cached wire-protocol bytes +- `set(&self, key, value, ttl)` — stores bytes with EX expiration; respects `max_result_size` +- `spawn_reconnect()` — background task that retries `init()` every 500ms, verifies with `ping()`, sets `redis_connected = true` +- `mark_disconnected()` — sets `redis_connected = false`, spawns reconnect if not already running (CAS-guarded) +- `is_connected()` — reads our atomic flag (not fred's potentially stale `ClientState`) +- Keys are prefixed with `"pgdog:"` +- Error types: `RedisError(String)`, `ConnectionFailed(String)` +- `redis_connected: Arc` — authoritative connection gate, only true after PING succeeds +- `reconnecting: Arc` — prevents multiple concurrent reconnect tasks +- All Redis operations wrapped in `tokio::time::timeout(REDIS_OPERATION_TIMEOUT)` (2s) as safety net + +**`policy.rs`** — 3-tier policy resolution with trait-based extraction: +- `CacheDirective` enum: `None`, `Cache { ttl_seconds }`, `NoCache` (moved here from `route.rs`) +- `trait CachePolicyExtractor`: abstract interface with `fn extract(query, params) -> CacheDirective` +- `struct CommentCacheExtractor`: scans SQL query string with standalone regex — **works even when parser is bypassed** +- `struct ParameterCacheExtractor`: reads `pgdog.cache` connection startup parameter +- `struct CachePolicyDispatcher`: chains extractors in priority order, returns first non-`None` result +- Tier 1: Extractor result (`CacheDirective::Cache { ttl }` or `CacheDirective::NoCache` from comments/params) +- Tier 2: Database config `CachePolicy` (`NoCache` / `Cache` / `Auto`) +- Tier 3: `auto_decision()` — caches when `hit_count > miss_count` AND `avg_result_size < 1MB` + +**`stats.rs`** — Per-fingerprint query statistics tracker: +- `QueryStatsTracker` with `record_hit(fingerprint, size)` / `record_miss(fingerprint)` / `get(fingerprint)` +- Internally: `Arc>>` using `parking_lot` + +**`integration.rs`** — Integration logic (as currently exists after debugging fixes): +- `cache_check()` — main entry point, creates `CachePolicyDispatcher` with `CommentCacheExtractor` + `ParameterCacheExtractor`, calls `dispatcher.extract(query, params)` to get `CacheDirective`, then runs `CachePolicyResolver::resolve()` +- `is_likely_read()` — fallback heuristic for when parser is disabled: checks SQL starts with SELECT/SHOW/EXPLAIN/WITH +- `send_cached_response()` — deserializes wire-format bytes and sends to client +- `cache_response()` — serializes `Vec` into wire bytes and stores in Redis +- `get_db_cache_config()` — looks up `DatabaseCache` from global config by database name +- `compute_cache_key()` — DefaultHasher-based hash of `{database, query}`; database name is hashed first to namespace keys and prevent collisions when multiple databases share one Redis + +### Files Modified + +#### 2. `pgdog-config/src/database.rs` + +Added before the `Database` struct: + +- `CachePolicy` enum: `NoCache` (default), `Cache`, `Auto` + - Implements `FromStr`, `Display`, `Serialize`, `Deserialize`, `Copy` +- `DatabaseCache` struct: + - `enabled: Option` — is caching on? + - `policy: Option` — which policy? + - `ttl: Option` — default TTL seconds (default 300) + - `redis_url: Option` — Redis connection URL + - `max_result_size: Option` — max cached result bytes + - Helper methods: `is_enabled()`, `policy()`, `ttl()`, `max_result_size()` +- Added `cache: Option` field to `Database` struct + +#### 3. `pgdog-config/src/lib.rs` + +Added `CachePolicy` and `DatabaseCache` to the public `pub use database::` export. + +#### 4. `pgdog/src/frontend/router/parser/route.rs` + +`CacheDirective` enum was **moved to** `cache/policy.rs` — `route.rs` now re-exports it via `pub use crate::frontend::client::query_engine::cache::policy::CacheDirective`. Route still has `cache_directive` field and methods available for manual override, but the type is imported from the cache module. + +#### 5. `pgdog/src/frontend/router/parser/comment.rs` + +All cache-related regex and parsing was **removed** from this file. Cache extraction is now independent and lives in `cache/policy.rs` with its own standalone regex. The `comment()` function returns a 2-tuple `(Option, Option)` again. + +#### 6. `pgdog/src/frontend/router/parser/cache/ast.rs` + +`comment_cache_directive` field was **removed** from `AstInner` struct and `new()` method. Cache parsing is no longer done at the AST level. + +#### 7. `pgdog/src/frontend/router/parser/query/mod.rs` + +All cache directive handling was **removed**: `cache_directive` field removed from `QueryParser` struct, cache directive propagation removed. Cache policy extraction now happens independently in `integration.rs`. + +#### 8. `pgdog/src/frontend/router/parser/mod.rs` + +- Updated export: `pub use route::{CacheDirective, Route, Shard, ShardWithPriority, ShardsWithPriority};` + +#### 9. `pgdog/Cargo.toml` + +- Added `fred = { version = "9", features = ["enable-rustls"] }` to dependencies + +#### 10. `pgdog/src/frontend/client/query_engine/mod.rs` + +- Added `pub mod cache;` module declaration +- Added `cache_client: CacheClient`, `cache_stats: QueryStatsTracker`, `database: String`, `cache_miss: Option<(String, Option)>`, `cache_response_buffer: Vec` fields to `QueryEngine` +- `new()` looks up cache config from global config by database name and creates `CacheClient` +- `handle()` flow: after `route_query()` and before `before_execution()`, calls `cache_check()`. On HIT: sends cached response and returns. On MISS: stores `(cache_key, ttl)` and starts capture. On Passthrough: clears miss state. +- After `match command`, calls `self.finalize_cache().await` to store the captured response in Redis. +- Added helper methods: `start_cache_capture()`, `capture_response()`, `is_caching()`, `finalize_cache()` + +#### 11. `pgdog/src/frontend/client/query_engine/query.rs` + +- `process_server_message()` added cache capture at the top: if `self.is_caching()`, clones and stores the message via `self.capture_response()`. + +--- + +## Key Design Decisions + +| Decision | Choice | +|----------|--------| +| Interception point | Between `parse_and_rewrite()` and `route_query()` in `handle()` | +| Cache config scope | Per-database (`Database.cache` field) | +| Redis client | `fred` crate v9 (async-native, tokio integration) | +| Cacheable queries | Only reads (`Route::is_read()` + `is_likely_read()` fallback) | +| Cache policy resolution | 3-tier: SQL comment → per-database config → auto-decision | +| Cache HIT flow | Deserialize wire bytes → parse messages → send to client → `return Ok(())` | +| Cache MISS flow | Normal execute → capture response bytes → store in Redis → respond | +| Auto-decision engine | `hit_count > miss_count` AND `avg_result_size < 1MB` | +| Cache key | `DefaultHasher` of `{database}:{query}` — database name is hashed first to namespace keys, preventing collisions when multiple databases share one Redis | +| Wire format | Full PostgreSQL wire messages stored as raw bytes (one concatenated buffer) | + +--- + +## Bugs Found & Fixed + +1. **Redis client never connects** - Problem: CacheClient::new() built the client but never called init(). Fred requires explicit connection initialization. Fix: Added lazy `ensure_connected()` using `client.init().await`, guarded by `AtomicBool` so it runs exactly once on first get()/set(). Changed CacheClient from `#[derive(Debug)]` to manual Debug impl (contains `Arc`). + +2. **Redis GET fails on NULL / cache miss** - Problem: `client.get::()` throws `Parse Error: Cannot parse into bytes` when the key doesn't exist. Fix: Use `client.get::()` and check `val.is_null()` before extracting bytes. + +3. **Wire format deserialization wrong in send_cached_response** - Problem: PostgreSQL wire message structure is `[1B code][4B length]` where length includes the 4B itself. I calculated `offset + 5 + msg_len` (treating length as payload-only), causing incorrect byte slicing. Fix: Corrected to `offset + 1 + msg_len`. + +4. **Route incorrectly reports read-only as write when parser is disabled** - Problem: `query_parser_bypass()` conservatively returns `Route::write()` for all SQL when the query parser is disabled. Since pgdog doesn't enable the parser by default for simple queries, `route.is_read()` was false for `SELECT 1`. Fix: Added `is_likely_read()` heuristic function in `cache_check` that checks uppercase SQL prefix (SELECT/SHOW/EXPLAIN/WITH) as a fallback when parser is disabled. + +5. **DB cache config defaults** - Observation: `DatabaseCache.policy` defaults to `CachePolicy::NoCache`. Even with `enabled = true`, caching is skipped unless policy is explicitly set. User action taken: Added `policy = "cache"` to pgdog.toml. + +--- + +## Refactoring: Decoupled Cache Policy Extraction + +The original implementation entangled cache directive parsing with pgdog's general comment parser (`comment.rs`), which only activates when the full query parser runs. This meant `/* pgdog_cache: ... */` annotations were silently ignored for simple queries and when `query_parser_bypass()` triggered. + +**What was done:** + +- `CacheDirective` enum moved from `route.rs` to `cache/policy.rs` +- Cache parsing **removed** from `comment.rs`, `ast.rs`, `query/mod.rs` — they no longer handle `CacheDirective` +- `route.rs` now re-exports `CacheDirective` from the cache module +- New **trait-based extraction system** in `cache/policy.rs`: + - `CachePolicyExtractor` trait with `fn extract(query, params) -> CacheDirective` + - `CommentCacheExtractor`: standalone regex scan on raw query string — works independent of AST parser + - `ParameterCacheExtractor`: reads `pgdog.cache` connection startup parameter + - `CachePolicyDispatcher`: chains extractors, returns first non-`None` result +- `integration.rs` now creates the dispatcher inline in `cache_check()` and passes `context.params` for parameter extraction + +This ensures cache annotations work regardless of whether the query parser is enabled or bypassed. + +## How to Control Cache + +### SQL Comments + +Add a C-style comment before your query. The first matching directive wins: + +```sql +-- Force bypass cache for this query +/* pgdog_cache: no-cache */ +SELECT * FROM users WHERE id = 1; + +-- Cache with database default TTL +/* pgdog_cache: cache */ +SELECT * FROM products WHERE category = 'electronics'; + +-- Cache with custom TTL in seconds +/* pgdog_cache: cache ttl=300 */ +SELECT * FROM orders; +``` + +### Connection Parameter + +Set `pgdog.cache` at connection time (via DSN options) or with `SET` after connecting: + +```sql +-- Session-wide: all queries in this connection bypass cache +SET pgdog.cache = 'no-cache'; + +-- Session-wide: cache all queries with default TTL +SET pgdog.cache = 'cache'; + +-- Session-wide: cache all queries with 5-minute TTL +SET pgdog.cache = 'cache ttl=300'; +``` + +### Priority Order + +Extractors are checked in order — first non-`None` result wins, then falls through to database config: + +``` +SQL comment → pgdog.cache parameter → DB policy config → Auto-decision +(highest) (lowest) +``` + +--- + +# What's Left To Do + +1. **Redo is_likely_read** — **DONE.** Instead of heuristic-based detection, caching now requires the query parser. If `query_parser = "auto"` and any database has `cache.enabled = true`, it's auto-upgraded to `"on"` globally. If `query_parser = "off"` or `"session_control"` and cache is enabled, a startup warning is emitted and caching won't work for that database. `ClusterConfig::new()` also forces `On` per-cluster if cache is enabled and global parser is `Off`/`SessionControl`/`Auto`. This means `route.is_read()` from the AST parser is always accurate — it correctly detects CTE writes (`WITH ... INSERT`), `FOR UPDATE/SHARE`, and volatile functions (`nextval()`, `pg_advisory_lock()`). The old `is_likely_read()` string-prefix heuristic has been removed entirely. + +2. **pgdog_cache: comment annotation** — **DONE.** Cache directive extraction now uses its own standalone regex in `cache/policy.rs`, working independently of the AST parser. It scans the raw query string, so it functions correctly even when `query_parser_bypass()` is triggered. The `/* pgdog_cache: ... */` comment format is supported with optional `ttl=` parameter. + +3. **Auto policy** — Implemented but untested. Relies on stats tracker to decide based on hit/miss ratio and avg result size after enough observations. + +4. **Multi-step execution caching** — InsertSplit and ShardingKeyUpdate rewrite paths use process_server_message() which captures responses, but the finalize_cache() call happens after match command block. Need to verify caching works correctly for multi-step rewrites. + +5. **Response capture for prepared statements** — Extended protocol (Parse/Bind/Execute) response capture works through process_server_message() but hasn't been tested with PREPARE/EXECUTE. (Note: Actually, pgdog implements prepared statements caching. But i don't know what kind of caching is this: just query cache or result cache. And if we'll implement our cache, will this break this prepared statement cache?) + +6. **Error handling / Reconnection** — DONE. Automatic reconnection with background task, CAS-guarded single reconnect, 2s operation timeout on all Redis calls, PING-based connection verification. + +7. **max_result_size config** — Implemented but not exposed in the initial pgdog.toml. Worth documenting in the config. + +8. **Cache key collision across databases sharing one Redis** — Problem: `compute_cache_key()` only hashed the raw query string. When two databases point to the same Redis and both run `SELECT * FROM users WHERE id = 1`, they produce identical keys and can serve wrong data on cache hits. Fix: Changed `compute_cache_key(query: &str)` to `compute_cache_key(query: &str, database: &str)` — database name is now hashed first, then the query, guaranteeing unique keys per database even on a shared Redis instance (`integration.rs:99`). + +9. **Redis disconnect/reconnect blocks all queries** — Problem: When Redis becomes unavailable after initial connection, `client.get()`/`client.set()` block for the full timeout duration (2s) because fred's `default_command_timeout` is `Duration::from_millis(0)` (no timeout). After the first request fails, subsequent requests still hit the timeout. After Redis restarts, caching never recovers. Root cause analysis: (a) fred's `ClientState` can report `Connected` even when TCP isn't ready, so relying on `client.state()` for the fast-path check leads to unnecessary blocking. (b) `force_reconnection()` hangs indefinitely when Redis is down — fred's router task can't respond without a connection, so the reconnect loop deadlocks. (c) Even after Redis restarts, if the initial `init()` failed, fred's routing tasks never started, so `ping()` and all operations fail silently. Fix: (1) Replaced `connect_initiated` + state-check logic with a single `redis_connected: AtomicBool` — the authoritative gate for all Redis operations. Returns error immediately if false, no Redis call attempted. (2) `ensure_connected()` calls `init()` (only one-shot on fresh start), then verifies with `ping()`. Sets `redis_connected = true` only after PING succeeds. (3) `mark_disconnected()` sets `redis_connected = false` and spawns exactly one background reconnect task (CAS-guarded via `reconnecting: AtomicBool`). (4) Reconnect task retries `client.init()` every 500ms (fred allows re-init after disconnect). On success, verifies with PING, then sets `redis_connected = true`. (5) All Redis calls (init, get, set, ping) wrapped in `tokio::time::timeout(2s)` as safety net. + +--- + +## Testing + +### Framework + +System tests live in `integration/rust/tests/integration/` alongside the existing integration suite. They use: +- `sqlx` and `tokio-postgres` for PG queries through pgdog on port 6432 +- `#[tokio::test]` + `#[serial]` from `serial_test` for test isolation +- `reqwest` to read metrics from `http://127.0.0.1:9090/metrics` + +Run with: `cd integration/rust && cargo nextest run --no-fail-fast --test-threads=1` + +### External Dependencies + +Redis must be running locally on port 6379 before cache tests execute. Unlike Postgres, Redis is **not** currently provisioned by `integration/setup.sh` or any CI workflow. To add cache tests to CI: +- **GitHub Actions:** add `sudo apt-get install -y redis-server && sudo service redis-server start` in `.github/workflows/ci.yml` +- **RWX:** add a `*redis-bg-process` alias in `.rwx/integration.yml` (same pattern as `*postgres-bg-process`) +- **Local/dev:** start Redis manually (expected on `127.0.0.1:6379`) + +### Planned Tests + +1. **Database key namespace collision** — Two databases (`db_a`, `db_b`) sharing one Redis, both running `SELECT 1 AS val` but with different underlying PG data. Verify each database gets its own correct data and no cross-database cache hit occurs. +2. **Basic cache hit/miss** — Run a SELECT once (expect miss), run again (expect hit), verify metrics. +3. **TTL expiration** — Cache a query with short TTL, wait for expiry, verify miss on third call. +4. **Write bypasses cache** — Execute INSERT/UPDATE/DELETE, verify these operations do not populate or consume the cache. +5. **Redis unavailable** — Stop Redis mid-flight, verify queries pass through to PG without blocking or crashing. +6. **Redis reconnection** — Restart Redis after disconnect, verify cache recovers automatically. diff --git a/flake.nix b/flake.nix index 14bce88dd..4a7fd6c7f 100644 --- a/flake.nix +++ b/flake.nix @@ -9,22 +9,18 @@ flake-utils.lib.eachSystem flake-utils.lib.allSystems (system: let pkgs = import nixpkgs { inherit system; }; - craneLib = crane.mkLib pkgs; stdenv' = p: p.stdenvAdapters.withCFlags [ "-O" ] (p.stdenvAdapters.useMoldLinker p.clangStdenv); stdenv = stdenv' pkgs; - - devShell = craneLib.devShell.override { - mkShell = pkgs.mkShell.override { - inherit stdenv; - }; - }; + craneLib = (crane.mkLib pkgs).overrideScope (final: prev: { + stdenvSelector = stdenv'; + }); env = { LIBCLANG_PATH = "${pkgs.libclang.lib}/lib"; CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER = "${stdenv.cc}/bin/cc"; CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUSTFLAGS = "-C link-arg=--ld-path=${stdenv.cc}/bin/ld"; }; - + commonArgs = { src = let unfilteredSrc = ./.; @@ -42,7 +38,6 @@ }; strictDeps = true; - stdenv = stdenv'; nativeBuildInputs = with pkgs; [ pkg-config ]; @@ -55,6 +50,16 @@ cargoArtifacts = craneLib.buildDepsOnly commonArgs; + devShell = (craneLib.devShell.override { + mkShell = pkgs.mkShell.override { + inherit stdenv; + }; + }) { + checks = self.checks; + inputsFrom = [ cargoArtifacts ]; + inherit env; + }; + pgDog = craneLib.buildPackage (commonArgs // { inherit cargoArtifacts; doCheck = false; @@ -64,11 +69,7 @@ in { packages.default = pgDog; - devShells.default = devShell { - checks = self.checks; - inputsFrom = [ cargoArtifacts ]; - inherit env; - }; + devShells.default = devShell; checks = { inherit pgDog; diff --git a/pgdog-config/src/cache.rs b/pgdog-config/src/cache.rs new file mode 100644 index 000000000..ef1c44530 --- /dev/null +++ b/pgdog-config/src/cache.rs @@ -0,0 +1,77 @@ +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +/// Cache policy. +#[derive( + Serialize, Deserialize, Debug, Clone, Default, PartialEq, Eq, PartialOrd, Ord, Copy, JsonSchema, +)] +#[serde(rename_all = "snake_case")] +pub enum CachePolicy { + /// Never cache queries for this database. + #[default] + NoCache, + /// Always cache read queries. + Cache, + /// Dynamically decide based on Redis memory and query stats. + Auto, +} + +impl std::str::FromStr for CachePolicy { + type Err = String; + + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "no_cache" | "no-cache" => Ok(Self::NoCache), + "cache" => Ok(Self::Cache), + "auto" => Ok(Self::Auto), + _ => Err(format!("Invalid cache policy: {}", s)), + } + } +} + +impl std::fmt::Display for CachePolicy { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let display = match self { + Self::NoCache => "no_cache", + Self::Cache => "cache", + Self::Auto => "auto", + }; + write!(f, "{}", display) + } +} + +/// Redis cache configuration for a database. +#[derive( + Serialize, Deserialize, Debug, Clone, Default, PartialEq, Eq, PartialOrd, Ord, JsonSchema, +)] +#[serde(deny_unknown_fields)] +pub struct Cache { + /// Whether to enable caching for this database. + pub enabled: Option, + /// Cache policy: no_cache, cache, or auto. + pub policy: Option, + /// Default TTL in seconds for cached queries. + pub ttl: Option, + /// Redis connection URL (e.g., redis://localhost:6379). + pub redis_url: Option, + /// Maximum result size in bytes to cache (0 = unlimited). + pub max_result_size: Option, +} + +impl Cache { + pub fn is_enabled(&self) -> bool { + self.enabled.unwrap_or(false) + } + + pub fn policy(&self) -> CachePolicy { + self.policy.unwrap_or_default() + } + + pub fn ttl(&self) -> u64 { + self.ttl.unwrap_or(300) + } + + pub fn max_result_size(&self) -> Option { + self.max_result_size + } +} \ No newline at end of file diff --git a/pgdog-config/src/core.rs b/pgdog-config/src/core.rs index 856518a89..98135178c 100644 --- a/pgdog-config/src/core.rs +++ b/pgdog-config/src/core.rs @@ -564,6 +564,11 @@ impl Config { r#""pg_query_raw" parser engine requires a large thread stack, setting it to 32MiB for each Tokio worker"# ); } + + if self.general.cache.is_enabled() + && matches!(self.general.query_parser, QueryParserLevel::Off | QueryParserLevel::SessionControl) { + warn!("cache requires enabled query parser but it's disabled or session controlled"); + } } /// Multi-tenancy is enabled. diff --git a/pgdog-config/src/general.rs b/pgdog-config/src/general.rs index aa0f636f6..59d76aafa 100644 --- a/pgdog-config/src/general.rs +++ b/pgdog-config/src/general.rs @@ -7,6 +7,7 @@ use std::path::PathBuf; use std::str::FromStr; use std::time::Duration; +use crate::cache::Cache; use crate::pooling::ConnectionRecovery; use crate::UniqueIdFunction; use crate::{ @@ -643,6 +644,10 @@ pub struct General { /// https://docs.pgdog.dev/configuration/pgdog.toml/general/#cutover_save_config #[serde(default)] pub cutover_save_config: bool, + + /// Redis cache configuration for this database. + #[serde(default)] + pub cache: Cache, } impl Default for General { @@ -729,6 +734,7 @@ impl Default for General { cutover_timeout_action: Self::cutover_timeout_action(), cutover_save_config: bool::default(), unique_id_function: Self::unique_id_function(), + cache: Cache::default(), } } } diff --git a/pgdog-config/src/lib.rs b/pgdog-config/src/lib.rs index 1a106a295..399fd8e3d 100644 --- a/pgdog-config/src/lib.rs +++ b/pgdog-config/src/lib.rs @@ -1,5 +1,6 @@ // Submodules pub mod auth; +pub mod cache; pub mod core; pub mod data_types; pub mod database; @@ -18,6 +19,7 @@ pub mod users; pub mod util; pub use auth::{AuthType, PassthroughAuth}; +pub use cache::{CachePolicy, Cache}; pub use core::{Config, ConfigAndUsers}; pub use data_types::*; pub use database::{ diff --git a/pgdog/Cargo.toml b/pgdog/Cargo.toml index 7c62c28c4..05e69167f 100644 --- a/pgdog/Cargo.toml +++ b/pgdog/Cargo.toml @@ -69,6 +69,8 @@ pgdog-config = { path = "../pgdog-config" } pgdog-vector = { path = "../pgdog-vector" } pgdog-stats = { path = "../pgdog-stats" } pgdog-postgres-types = { path = "../pgdog-postgres-types"} +fred = { version = "9", features = ["enable-rustls"] } +scc = "3.7" [target.'cfg(not(target_env = "msvc"))'.dependencies] tikv-jemallocator = "0.6" diff --git a/pgdog/src/backend/pool/cluster.rs b/pgdog/src/backend/pool/cluster.rs index 9dbd038d0..22bee793e 100644 --- a/pgdog/src/backend/pool/cluster.rs +++ b/pgdog/src/backend/pool/cluster.rs @@ -81,6 +81,7 @@ pub struct Cluster { reload_schema_on_ddl: bool, load_schema: LoadSchema, resharding_parallel_copies: usize, + cache_enabled: bool, } /// Sharding configuration from the cluster. @@ -157,6 +158,7 @@ pub struct ClusterConfig<'a> { pub reload_schema_on_ddl: bool, pub load_schema: LoadSchema, pub resharding_parallel_copies: usize, + pub cache_enabled: bool } impl<'a> ClusterConfig<'a> { @@ -210,6 +212,7 @@ impl<'a> ClusterConfig<'a> { reload_schema_on_ddl: general.reload_schema_on_ddl, load_schema: general.load_schema, resharding_parallel_copies: general.resharding_parallel_copies, + cache_enabled: general.cache.is_enabled() } } } @@ -247,6 +250,7 @@ impl Cluster { reload_schema_on_ddl, load_schema, resharding_parallel_copies, + cache_enabled } = config; let identifier = Arc::new(DatabaseUser { @@ -296,6 +300,7 @@ impl Cluster { reload_schema_on_ddl, load_schema, resharding_parallel_copies, + cache_enabled, } } @@ -470,6 +475,7 @@ impl Cluster { || self.dry_run() || self.prepared_statements() == &PreparedStatements::Full || self.pub_sub_enabled() + || self.cache_enabled() || RegexParser::use_parser(request) } } @@ -545,6 +551,11 @@ impl Cluster { self.resharding_parallel_copies } + /// Redis cache enabled. + pub fn cache_enabled(&self) -> bool { + self.cache_enabled + } + /// Launch the connection pools. pub(crate) fn launch(&self) { for shard in self.shards() { diff --git a/pgdog/src/frontend/client/query_engine/cache/client.rs b/pgdog/src/frontend/client/query_engine/cache/client.rs new file mode 100644 index 000000000..9ae4577e0 --- /dev/null +++ b/pgdog/src/frontend/client/query_engine/cache/client.rs @@ -0,0 +1,313 @@ +use fred::prelude::*; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::time::Duration; +use tracing::{debug, error, info}; + +use pgdog_config::Cache as CacheConfig; + +const CACHE_KEY_PREFIX: &str = "pgdog:"; + +/// Timeout for individual Redis operations (GET/SET/init). +/// Safety net — should never fire in normal operation since the atomic flag gates all calls. +const REDIS_OPERATION_TIMEOUT: Duration = Duration::from_secs(2); + +#[derive(Clone)] +pub struct CacheClient { + client: Option, + config: CacheConfig, + /// Master connection state flag. Set true only after PING succeeds + /// on init or reconnect. Set false immediately on any error/timeout. + redis_connected: Arc, + /// Prevents spawning multiple reconnect tasks simultaneously. + reconnecting: Arc, +} + +impl std::fmt::Debug for CacheClient { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("CacheClient") + .field("client", &self.client.as_ref().map(|_| "...")) + .field("config", &self.config) + .field( + "redis_connected", + &self.redis_connected.load(Ordering::Relaxed), + ) + .field("reconnecting", &self.reconnecting.load(Ordering::Relaxed)) + .finish() + } +} + +impl CacheClient { + pub fn new(config: &CacheConfig) -> Self { + let cache_config = config.clone(); + + if !cache_config.is_enabled() || cache_config.redis_url.is_none() { + return Self { + client: None, + config: cache_config, + redis_connected: Arc::new(AtomicBool::new(false)), + reconnecting: Arc::new(AtomicBool::new(false)), + }; + } + + let url = cache_config.redis_url.as_ref().unwrap(); + let client_config = match RedisConfig::from_url(url) { + Ok(c) => c, + Err(e) => { + error!("Failed to parse Redis URL: {}", e); + return Self { + client: None, + config: cache_config, + redis_connected: Arc::new(AtomicBool::new(false)), + reconnecting: Arc::new(AtomicBool::new(false)), + }; + } + }; + + let client = match Builder::from_config(client_config).build() { + Ok(c) => c, + Err(e) => { + error!("Failed to build Redis client: {}", e); + return Self { + client: None, + config: cache_config, + redis_connected: Arc::new(AtomicBool::new(false)), + reconnecting: Arc::new(AtomicBool::new(false)), + }; + } + }; + + Self { + client: Some(client), + config: cache_config, + redis_connected: Arc::new(AtomicBool::new(false)), + reconnecting: Arc::new(AtomicBool::new(false)), + } + } + + async fn ensure_connected(&self) -> bool { + if self.redis_connected.load(Ordering::Acquire) { + return true; + } + + if self.reconnecting.load(Ordering::Relaxed) { + return false; + } + + if let Some(ref client) = self.client { + match tokio::time::timeout(REDIS_OPERATION_TIMEOUT, client.init()).await { + Ok(Ok(_)) => { + if Self::ping_client(client).await { + self.redis_connected.store(true, Ordering::Release); + info!("Connected to Redis"); + return true; + } else { + debug!("Redis init returned OK but PING failed — Redis not ready"); + } + } + Ok(Err(e)) => { + debug!("Redis init failed: {}", e); + } + Err(_) => { + error!("Redis init timed out"); + } + } + } + false + } + + async fn ping_client(client: &RedisClient) -> bool { + match tokio::time::timeout(REDIS_OPERATION_TIMEOUT, client.ping::()).await { + Ok(Ok(resp)) => { + info!("Redis PING succeeded: {}", resp); + true + } + Ok(Err(e)) => { + debug!("Redis PING failed: {}", e); + false + } + Err(_) => { + debug!("Redis PING timed out"); + false + } + } + } + + fn spawn_reconnect(&self) { + if self + .reconnecting + .compare_exchange(false, true, Ordering::Release, Ordering::Relaxed) + .is_err() + { + debug!("Redis reconnect task already running, skipping"); + return; + } + + let Some(ref client) = self.client else { + error!("Redis reconnect: no client available"); + self.reconnecting.store(false, Ordering::Release); + return; + }; + + let client = client.clone(); + let redis_connected = self.redis_connected.clone(); + let reconnecting = self.reconnecting.clone(); + + tokio::spawn(async move { + info!("Redis reconnect task started"); + let mut attempt = 0; + loop { + attempt += 1; + debug!("Redis reconnect attempt #{}", attempt); + + let init_ok = + match tokio::time::timeout(REDIS_OPERATION_TIMEOUT, client.init()).await { + Ok(Ok(_)) => true, + Ok(Err(_)) | Err(_) => false, + }; + + if init_ok || Self::ping_client(&client).await { + redis_connected.store(true, Ordering::Release); + reconnecting.store(false, Ordering::Release); + info!("Redis reconnected successfully"); + return; + } + tokio::time::sleep(Duration::from_millis(500)).await; + } + }); + + info!("Spawning Redis reconnect task"); + } + + fn mark_disconnected(&self) { + self.redis_connected.store(false, Ordering::Release); + self.spawn_reconnect(); + } + + pub fn is_connected(&self) -> bool { + self.redis_connected.load(Ordering::Relaxed) + } + + pub(crate) async fn get(&self, key: u64) -> Result>, Error> { + if !self.ensure_connected().await { + if !self.is_connected() { + self.spawn_reconnect(); + return Err(Error::ConnectionFailed( + "Redis disconnected, reconnecting in background".to_string(), + )); + } + return Err(Error::ConnectionFailed("Redis not connected".to_string())); + } + + let Some(ref client) = self.client else { + return Ok(None); + }; + + let full_key = format!("{}{}", CACHE_KEY_PREFIX, key); + let val = match tokio::time::timeout( + REDIS_OPERATION_TIMEOUT, + client.get::(full_key), + ) + .await + { + Ok(Ok(v)) => v, + Ok(Err(e)) => { + debug!("Redis GET error for key {}: {}", key, e); + self.mark_disconnected(); + return Err(Error::RedisError(e.to_string())); + } + Err(_) => { + error!("Redis GET timed out for key {}", key); + self.mark_disconnected(); + return Err(Error::ConnectionFailed("Redis GET timed out".to_string())); + } + }; + + if val.is_null() { + debug!("Cache miss for key {}", key); + Ok(None) + } else if let Some(bytes) = val.into_bytes() { + debug!("Cache hit for key {}", key); + Ok(Some(bytes.to_vec())) + } else { + debug!("Redis GET value not bytes for key {}", key); + Ok(None) + } + } + + pub(crate) async fn set(&self, key: u64, value: &[u8], ttl: Option) -> Result<(), Error> { + if !self.ensure_connected().await { + if !self.is_connected() { + self.spawn_reconnect(); + return Err(Error::ConnectionFailed( + "Redis disconnected, reconnecting in background".to_string(), + )); + } + return Err(Error::ConnectionFailed("Redis not connected".to_string())); + } + + let Some(ref client) = self.client else { + return Ok(()); + }; + + let full_key = format!("{}{}", CACHE_KEY_PREFIX, key); + + if let Some(max_size) = self.config.max_result_size() { + if value.len() > max_size { + debug!( + "Skipping cache for key {}: size {} exceeds max {}", + key, + value.len(), + max_size + ); + return Ok(()); + } + } + + let ttl_seconds = ttl.unwrap_or_else(|| self.config.ttl()) as i64; + + match tokio::time::timeout( + REDIS_OPERATION_TIMEOUT, + client.set::<(), _, _>( + full_key, + value, + Some(Expiration::EX(ttl_seconds)), + None, + false, + ), + ) + .await + { + Ok(Ok(_)) => { + debug!("Cached key {} with TTL {}s", key, ttl_seconds); + Ok(()) + } + Ok(Err(e)) => { + debug!("Redis SET error for key {}: {}", key, e); + self.mark_disconnected(); + Err(Error::RedisError(e.to_string())) + } + Err(_) => { + error!("Redis SET timed out for key {}", key); + self.mark_disconnected(); + Err(Error::ConnectionFailed("Redis SET timed out".to_string())) + } + } + } + + pub fn config(&self) -> &CacheConfig { + &self.config + } + + pub fn is_enabled(&self) -> bool { + self.client.is_some() && self.config.is_enabled() + } +} + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("Redis error: {0}")] + RedisError(String), + #[error("Connection failed: {0}")] + ConnectionFailed(String), +} diff --git a/pgdog/src/frontend/client/query_engine/cache/context.rs b/pgdog/src/frontend/client/query_engine/cache/context.rs new file mode 100644 index 000000000..3234f6be1 --- /dev/null +++ b/pgdog/src/frontend/client/query_engine/cache/context.rs @@ -0,0 +1,17 @@ +use crate::net::Message; + +/// Cache context to use in QueryEngineContext. +#[derive(Default)] +pub struct CacheContext { + pub cache_miss: Option<(u64, Option)>, + pub response_buffer: Vec, +} + +impl CacheContext { + /// Capture a response message for caching. + pub fn capture_response(&mut self, message: Message) { + if self.cache_miss.is_some() { + self.response_buffer.push(message); + } + } +} diff --git a/pgdog/src/frontend/client/query_engine/cache/integration.rs b/pgdog/src/frontend/client/query_engine/cache/integration.rs new file mode 100644 index 000000000..625d2ac15 --- /dev/null +++ b/pgdog/src/frontend/client/query_engine/cache/integration.rs @@ -0,0 +1,163 @@ +use std::hash::{DefaultHasher, Hasher}; + +use crate::{ + frontend::client::query_engine::{cache::Cache, QueryEngineContext}, + net::{FromBytes, Message, ToBytes}, +}; + +use tracing::debug; + +use super::CachePolicyResolver; + +pub enum CacheCheckResult { + Hit { + cached: Vec, + }, + Miss { + cache_key_hash: u64, + ttl: Option, + }, + Passthrough, +} + +impl Cache { + pub(super) async fn cache_check( + &self, + context: &mut QueryEngineContext<'_>, + ) -> CacheCheckResult { + let route = match context.client_request.route.as_ref() { + Some(r) => r, + None => return CacheCheckResult::Passthrough, + }; + + // Detect read-only status via the AST parser's route classification. + // When caching is enabled, the query parser is auto-enabled. + let is_read = route.is_read(); + if !is_read { + return CacheCheckResult::Passthrough; + } + + let query = match context.client_request.query() { + Ok(Some(q)) => q, + _ => return CacheCheckResult::Passthrough, + }; + + let db_hash = { + let mut hasher = DefaultHasher::new(); + hasher.write(self.database.as_bytes()); + hasher.finish() + }; + let cache_key_hash = pg_query::fingerprint(query.query()) + .expect("We're sure that query is correct if we've reached here.") + .value + .wrapping_add(db_hash); + + let cache_directive = self + .policy_dispatcher + .extract(query.query(), context.params); + debug!( + "cache_check: sql={}, db_config={:?}", + query.query(), + self.config + ); + + let decision = CachePolicyResolver::resolve( + cache_directive, + &self.config, + is_read, + cache_key_hash, + &self.stats, + ) + .await; + + if !decision.should_cache() { + return CacheCheckResult::Passthrough; + } + + match self.client.get(cache_key_hash).await { + Ok(Some(cached)) => { + self.stats.record_hit(cache_key_hash, cached.len()).await; + CacheCheckResult::Hit { cached } + } + Ok(None) => { + self.stats.record_miss(cache_key_hash).await; + CacheCheckResult::Miss { + cache_key_hash, + ttl: decision.ttl(), + } + } + Err(e) => { + debug!("Cache get error: {}", e); + CacheCheckResult::Passthrough + } + } + } + + pub(super) async fn send_cached_response( + &self, + context: &mut QueryEngineContext<'_>, + cached: Vec, + ) -> Result<(), crate::frontend::Error> { + let mut offset = 0; + let len = cached.len(); + + while offset < len { + if offset + 5 > len { + break; + } + + let _code = cached[offset] as char; + let msg_len = u32::from_be_bytes([ + cached[offset + 1], + cached[offset + 2], + cached[offset + 3], + cached[offset + 4], + ]) as usize; + + if msg_len < 4 || offset + 1 + msg_len > len { + break; + } + + let end = offset + 1 + msg_len; + let msg_bytes: bytes::Bytes = cached[offset..end].to_vec().into(); + let msg = Message::from_bytes(msg_bytes)?; + offset = end; + + context.stream.send_flush(&msg).await?; + } + + Ok(()) + } + + pub(super) async fn cache_response( + &self, + cache_key_hash: u64, + messages: Vec, + ttl: Option, + ) -> Result<(), ()> { + if messages.is_empty() || !self.client.is_enabled() { + return Ok(()); + } + + let mut buffer = Vec::new(); + for msg in &messages { + match msg.to_bytes() { + Ok(bytes) => buffer.extend_from_slice(&bytes), + Err(e) => { + debug!("Failed to serialize message for caching: {}", e); + return Ok(()); + } + } + } + + if buffer.is_empty() { + return Ok(()); + } + + if let Err(e) = self.client.set(cache_key_hash, &buffer, ttl).await { + debug!("Failed to cache response: {}", e); + } + + Ok(()) + } +} diff --git a/pgdog/src/frontend/client/query_engine/cache/mod.rs b/pgdog/src/frontend/client/query_engine/cache/mod.rs new file mode 100644 index 000000000..012a53852 --- /dev/null +++ b/pgdog/src/frontend/client/query_engine/cache/mod.rs @@ -0,0 +1,82 @@ +pub mod client; +pub mod context; +pub mod integration; +pub mod policy; +pub mod stats; + +pub use client::CacheClient; +pub use integration::CacheCheckResult; +use pgdog_config::Cache as CacheConfig; +pub use policy::{ + CacheDecision, CachePolicyDispatcher, CachePolicyExtractor, CachePolicyResolver, + CommentCacheExtractor, ParameterCacheExtractor, +}; +pub use stats::QueryStatsTracker; +use tracing::debug; + +use crate::frontend::client::query_engine::QueryEngineContext; + +#[derive(Debug)] +pub struct Cache { + client: CacheClient, + stats: QueryStatsTracker, + config: CacheConfig, + database: String, + policy_dispatcher: CachePolicyDispatcher, +} + +impl Cache { + pub fn new(cache_config: &CacheConfig, database: &str) -> Self { + let mut dispatcher = CachePolicyDispatcher::new(); + dispatcher.add_extractor(Box::new(CommentCacheExtractor)); + dispatcher.add_extractor(Box::new(ParameterCacheExtractor::new())); + + Cache { + client: CacheClient::new(cache_config), + stats: QueryStatsTracker::default(), + config: cache_config.clone(), + database: database.to_string(), + policy_dispatcher: dispatcher, + } + } + + pub async fn try_read_cache( + &mut self, + context: &mut QueryEngineContext<'_>, + ) -> Result { + let cache_result = self.cache_check(context).await; + + match cache_result { + CacheCheckResult::Hit { cached } => { + debug!("Cache hit, serving from cache"); + self.send_cached_response(context, cached).await?; + return Ok(true); + } + CacheCheckResult::Miss { + cache_key_hash, + ttl, + } => { + context.cache_context.cache_miss = Some((cache_key_hash, ttl)); + context.cache_context.response_buffer.clear(); + debug!("Cache miss for key hash: {}", cache_key_hash); + } + CacheCheckResult::Passthrough => { + context.cache_context.cache_miss = None; + } + } + + return Ok(false); + } + + /// Finalize caching by storing the response in Redis. + pub async fn save_response_in_cache(&self, context: &mut QueryEngineContext<'_>,) { + if let Some((cache_key, ttl)) = context.cache_context.cache_miss.take() { + if !context.cache_context.response_buffer.is_empty() { + let messages = std::mem::take(&mut context.cache_context.response_buffer); + if let Err(e) = self.cache_response(cache_key, messages, ttl).await { + debug!("Failed to cache response: {:?}", e); + } + } + } + } +} \ No newline at end of file diff --git a/pgdog/src/frontend/client/query_engine/cache/policy.rs b/pgdog/src/frontend/client/query_engine/cache/policy.rs new file mode 100644 index 000000000..582fde578 --- /dev/null +++ b/pgdog/src/frontend/client/query_engine/cache/policy.rs @@ -0,0 +1,398 @@ +use core::fmt; + +use pgdog_config::{Cache as CacheConfig, CachePolicy}; +use tracing::debug; + +use super::stats::QueryStatsTracker; + +use crate::net::parameter::ParameterValue; +use crate::net::Parameters; +use once_cell::sync::Lazy; +use regex::Regex; + +static CACHE: Lazy = Lazy::new(|| { + Regex::new(r#"pgdog_cache: *(no-cache|cache(?:\s+ttl\s*=\s*([0-9]+))?)?"#).unwrap() +}); + +/// Cache directive from SQL comment. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum CacheDirective { + #[default] + None, + Cache { + ttl_seconds: Option, + }, + NoCache, +} + +impl CacheDirective { + pub fn is_cache(&self) -> bool { + matches!(self, CacheDirective::Cache { .. }) + } + + pub fn is_no_cache(&self) -> bool { + matches!(self, CacheDirective::NoCache) + } + + pub fn ttl(&self) -> Option { + match self { + CacheDirective::Cache { ttl_seconds } => *ttl_seconds, + _ => None, + } + } +} + +pub trait CachePolicyExtractor: Send + Sync + fmt::Debug { + fn extract(&self, query: &str, params: &Parameters) -> CacheDirective; +} + +#[derive(Debug)] +pub struct CommentCacheExtractor; + +impl CachePolicyExtractor for CommentCacheExtractor { + fn extract(&self, query: &str, _params: &Parameters) -> CacheDirective { + for cap in CACHE.captures_iter(query) { + if let Some(action) = cap.get(1) { + let action = action.as_str(); + if action == "no-cache" { + return CacheDirective::NoCache; + } else if action.starts_with("cache") { + let ttl = cap.get(2).and_then(|m| m.as_str().parse::().ok()); + return CacheDirective::Cache { ttl_seconds: ttl }; + } + } else { + return CacheDirective::Cache { ttl_seconds: None }; + } + } + CacheDirective::None + } +} + +#[derive(Debug)] +pub struct ParameterCacheExtractor { + key: String, +} + +impl ParameterCacheExtractor { + pub fn new() -> Self { + Self { + key: "pgdog.cache".to_string(), + } + } +} + +impl CachePolicyExtractor for ParameterCacheExtractor { + fn extract(&self, _query: &str, params: &Parameters) -> CacheDirective { + let value = match params.get(&self.key) { + Some(p) => p, + None => return CacheDirective::None, + }; + + let s = match value { + ParameterValue::String(v) => v.as_str(), + _ => return CacheDirective::None, + }; + + match s { + "no-cache" => CacheDirective::NoCache, + "cache" => CacheDirective::Cache { ttl_seconds: None }, + _ => { + if let Some(ttl) = s + .strip_prefix("cache ttl=") + .and_then(|t| t.trim().parse::().ok()) + { + CacheDirective::Cache { + ttl_seconds: Some(ttl), + } + } else if let Some(ttl) = s + .strip_prefix("cache ttl =") + .and_then(|t| t.trim().parse::().ok()) + { + CacheDirective::Cache { + ttl_seconds: Some(ttl), + } + } else { + CacheDirective::None + } + } + } + } +} + +#[derive(Debug)] +pub struct CachePolicyDispatcher { + extractors: Vec>, +} + +impl CachePolicyDispatcher { + pub fn new() -> Self { + Self { + extractors: Vec::new(), + } + } + + pub fn add_extractor(&mut self, extractor: Box) { + self.extractors.push(extractor); + } + + pub fn extract(&self, query: &str, params: &Parameters) -> CacheDirective { + for extractor in &self.extractors { + let result = extractor.extract(query, params); + if result != CacheDirective::None { + debug!("Cache directive for query {} is {:?}", query, result); + return result; + } + } + CacheDirective::None + } + + pub fn is_empty(&self) -> bool { + self.extractors.is_empty() + } +} + +pub struct CachePolicyResolver; + +impl CachePolicyResolver { + pub async fn resolve( + cache_directive: CacheDirective, + cache_config: &CacheConfig, + is_read: bool, + cache_key_hash: u64, + stats: &QueryStatsTracker, + ) -> CacheDecision { + if !is_read { + return CacheDecision::Skip; + } + + if let CacheDirective::NoCache = cache_directive { + return CacheDecision::Skip; + } + + if let CacheDirective::Cache { ttl_seconds } = cache_directive { + return CacheDecision::Cache(ttl_seconds.or(Some(cache_config.ttl()))); + } + + match cache_config.policy() { + CachePolicy::NoCache => CacheDecision::Skip, + CachePolicy::Cache => CacheDecision::Cache(Some(cache_config.ttl())), + CachePolicy::Auto => Self::auto_decision(cache_key_hash, stats).await, + } + } + + async fn auto_decision(cache_key_hash: u64, stats: &QueryStatsTracker) -> CacheDecision { + let query_stats = stats.get(cache_key_hash).await; + + if query_stats.hit_count > query_stats.miss_count + && query_stats.avg_result_size() < 1_000_000 + { + CacheDecision::Cache(None) + } else { + CacheDecision::Skip + } + } +} + +pub enum CacheDecision { + Skip, + Cache(Option), +} + +impl CacheDecision { + pub fn should_cache(&self) -> bool { + matches!(self, CacheDecision::Cache(_)) + } + + pub fn ttl(&self) -> Option { + match self { + CacheDecision::Cache(ttl) => *ttl, + _ => None, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_skip_for_writes() { + let cache_config = CacheConfig { + enabled: Some(true), + policy: Some(CachePolicy::Cache), + ttl: None, + redis_url: None, + max_result_size: None, + }; + let decision = CachePolicyResolver::resolve( + CacheDirective::None, + &cache_config, + false, + 0xAABBCCDD, + &QueryStatsTracker::default(), + ) + .await; + assert!(!decision.should_cache()); + } + + #[tokio::test] + async fn test_no_cache_directive() { + let cache_config = CacheConfig { + enabled: Some(true), + policy: Some(CachePolicy::Cache), + ttl: None, + redis_url: None, + max_result_size: None, + }; + let decision = CachePolicyResolver::resolve( + CacheDirective::NoCache, + &cache_config, + true, + 0xAABBCCDD, + &QueryStatsTracker::default(), + ) + .await; + assert!(!decision.should_cache()); + } + + #[tokio::test] + async fn test_cache_directive_with_ttl() { + let cache_config = CacheConfig { + enabled: Some(true), + policy: Some(CachePolicy::NoCache), + ttl: None, + redis_url: None, + max_result_size: None, + }; + let decision = CachePolicyResolver::resolve( + CacheDirective::Cache { + ttl_seconds: Some(120), + }, + &cache_config, + true, + 0xAABBCCDD, + &QueryStatsTracker::default(), + ) + .await; + assert!(decision.should_cache()); + assert_eq!(decision.ttl(), Some(120)); + } + + #[test] + fn test_comment_extractor_no_cache() { + let extractor = CommentCacheExtractor; + let params = Parameters::default(); + let directive = + extractor.extract("SELECT * FROM users /* pgdog_cache: no-cache */", ¶ms); + assert!(matches!(directive, CacheDirective::NoCache)); + } + + #[test] + fn test_comment_extractor_cache_default_ttl() { + let extractor = CommentCacheExtractor; + let params = Parameters::default(); + let directive = extractor.extract("SELECT * FROM users /* pgdog_cache: cache */", ¶ms); + match directive { + CacheDirective::Cache { ttl_seconds } => assert!(ttl_seconds.is_none()), + _ => panic!("Expected Cache directive"), + } + } + + #[test] + fn test_comment_extractor_cache_with_ttl() { + let extractor = CommentCacheExtractor; + let params = Parameters::default(); + let directive = extractor.extract( + "SELECT * FROM users /* pgdog_cache: cache ttl=60 */", + ¶ms, + ); + match directive { + CacheDirective::Cache { ttl_seconds } => assert_eq!(ttl_seconds, Some(60)), + _ => panic!("Expected Cache directive"), + } + } + + #[test] + fn test_comment_extractor_no_directive() { + let extractor = CommentCacheExtractor; + let params = Parameters::default(); + let directive = extractor.extract("SELECT * FROM users", ¶ms); + assert!(matches!(directive, CacheDirective::None)); + } + + #[test] + fn test_parameter_extractor_no_cache() { + let extractor = ParameterCacheExtractor::new(); + let mut params = Parameters::default(); + params.insert("pgdog.cache", "no-cache"); + let directive = extractor.extract("SELECT * FROM users", ¶ms); + assert!(matches!(directive, CacheDirective::NoCache)); + } + + #[test] + fn test_parameter_extractor_cache() { + let extractor = ParameterCacheExtractor::new(); + let mut params = Parameters::default(); + params.insert("pgdog.cache", "cache"); + let directive = extractor.extract("SELECT * FROM users", ¶ms); + match directive { + CacheDirective::Cache { ttl_seconds } => assert!(ttl_seconds.is_none()), + _ => panic!("Expected Cache directive"), + } + } + + #[test] + fn test_parameter_extractor_cache_with_ttl() { + let extractor = ParameterCacheExtractor::new(); + let mut params = Parameters::default(); + params.insert("pgdog.cache", "cache ttl=120"); + let directive = extractor.extract("SELECT * FROM users", ¶ms); + match directive { + CacheDirective::Cache { ttl_seconds } => assert_eq!(ttl_seconds, Some(120)), + _ => panic!("Expected Cache directive"), + } + } + + #[test] + fn test_parameter_extractor_no_param() { + let extractor = ParameterCacheExtractor::new(); + let params = Parameters::default(); + let directive = extractor.extract("SELECT * FROM users", ¶ms); + assert!(matches!(directive, CacheDirective::None)); + } + + #[test] + fn test_dispatcher_comment_wins() { + let comment_extractor = CommentCacheExtractor; + let parameter_extractor = ParameterCacheExtractor::new(); + + let mut dispatcher = CachePolicyDispatcher::new(); + dispatcher.add_extractor(Box::new(comment_extractor)); + dispatcher.add_extractor(Box::new(parameter_extractor)); + + let mut params = Parameters::default(); + params.insert("pgdog.cache", "no-cache"); + + let directive = dispatcher.extract("SELECT * /* pgdog_cache: cache ttl=60 */", ¶ms); + match directive { + CacheDirective::Cache { ttl_seconds } => assert_eq!(ttl_seconds, Some(60)), + _ => panic!("Expected comment to win"), + } + } + + #[test] + fn test_dispatcher_parameter_fallback() { + let comment_extractor = CommentCacheExtractor; + let parameter_extractor = ParameterCacheExtractor::new(); + + let mut dispatcher = CachePolicyDispatcher::new(); + dispatcher.add_extractor(Box::new(comment_extractor)); + dispatcher.add_extractor(Box::new(parameter_extractor)); + + let mut params = Parameters::default(); + params.insert("pgdog.cache", "no-cache"); + + let directive = dispatcher.extract("SELECT * FROM users", ¶ms); + assert!(matches!(directive, CacheDirective::NoCache)); + } +} diff --git a/pgdog/src/frontend/client/query_engine/cache/stats.rs b/pgdog/src/frontend/client/query_engine/cache/stats.rs new file mode 100644 index 000000000..e2946c667 --- /dev/null +++ b/pgdog/src/frontend/client/query_engine/cache/stats.rs @@ -0,0 +1,59 @@ +use std::sync::Arc; + +use scc::HashMap; + +#[derive(Debug, Clone, Default)] +pub struct QueryStats { + pub hit_count: u64, + pub miss_count: u64, + pub total_result_size: u64, +} + +impl QueryStats { + pub fn avg_result_size(&self) -> u64 { + let total = self.hit_count + self.miss_count; + if total == 0 { + 0 + } else { + self.total_result_size / total + } + } +} + +#[derive(Debug, Clone, Default)] +pub struct QueryStatsTracker { + stats: Arc>, +} + +impl QueryStatsTracker { + pub async fn record_hit(&self, cache_key_hash: u64, result_size: usize) { + let mut entry = self.stats.entry_async(cache_key_hash).await.or_default(); + entry.hit_count += 1; + entry.total_result_size += result_size as u64; + } + + pub async fn record_miss(&self, cache_key_hash: u64) { + let mut entry = self.stats.entry_async(cache_key_hash).await.or_default(); + entry.miss_count += 1; + } + + pub async fn get(&self, cache_key_hash: u64) -> QueryStats { + self.stats + .get_async(&cache_key_hash) + .await + .map(|entry| entry.get().clone()) + .unwrap_or_default() + } + + pub async fn clear(&self) { + self.stats.clear_async().await + } + + pub async fn len(&self) -> usize { + self.stats.len() + } + + pub async fn is_empty(&self) -> bool { + self.stats.is_empty() + } +} diff --git a/pgdog/src/frontend/client/query_engine/context.rs b/pgdog/src/frontend/client/query_engine/context.rs index b54751a35..a2cdc7ca2 100644 --- a/pgdog/src/frontend/client/query_engine/context.rs +++ b/pgdog/src/frontend/client/query_engine/context.rs @@ -1,9 +1,7 @@ use crate::{ backend::pool::{connection::mirror::Mirror, stats::MemoryStats}, frontend::{ - client::{timeouts::Timeouts, Sticky, TransactionType}, - router::parser::rewrite::statement::plan::RewriteResult, - Client, ClientRequest, PreparedStatements, + Client, ClientRequest, PreparedStatements, client::{Sticky, TransactionType, query_engine::cache::context::CacheContext, timeouts::Timeouts}, router::parser::rewrite::statement::plan::RewriteResult }, net::{BackendKeyData, Parameters, Stream}, }; @@ -39,6 +37,8 @@ pub struct QueryEngineContext<'a> { pub(super) sticky: Sticky, /// Rewrite result. pub(super) rewrite_result: Option, + /// Cache context. + pub(super) cache_context: CacheContext } impl<'a> QueryEngineContext<'a> { @@ -60,6 +60,7 @@ impl<'a> QueryEngineContext<'a> { rollback: false, sticky: client.sticky, rewrite_result: None, + cache_context: CacheContext::default(), } } @@ -86,6 +87,7 @@ impl<'a> QueryEngineContext<'a> { rollback: false, sticky: Sticky::new(), rewrite_result: None, + cache_context: CacheContext::default(), } } diff --git a/pgdog/src/frontend/client/query_engine/mod.rs b/pgdog/src/frontend/client/query_engine/mod.rs index f0dc8979b..d59adba46 100644 --- a/pgdog/src/frontend/client/query_engine/mod.rs +++ b/pgdog/src/frontend/client/query_engine/mod.rs @@ -2,9 +2,11 @@ use crate::{ backend::pool::{Connection, Request}, config::config, frontend::{ - client::query_engine::{hooks::QueryEngineHooks, route_query::ClusterCheck}, - router::{parser::Shard, Route}, - BufferedQuery, Client, ClientComms, Command, Error, Router, RouterContext, Stats, + BufferedQuery, Client, ClientComms, Command, Error, Router, RouterContext, Stats, client::query_engine::{ + cache::Cache, + hooks::QueryEngineHooks, + route_query::ClusterCheck, + }, router::{Route, parser::Shard} }, net::{ErrorResponse, Message, Parameters}, state::State, @@ -12,6 +14,7 @@ use crate::{ use tracing::debug; +pub mod cache; pub mod connect; pub mod context; pub mod deallocate; @@ -55,6 +58,7 @@ pub struct QueryEngine { notify_buffer: NotifyBuffer, pending_explain: Option, hooks: QueryEngineHooks, + cache: Cache, } impl QueryEngine { @@ -64,6 +68,7 @@ impl QueryEngine { let database = params.get_default("database", user); let backend = Connection::new(user, database, admin)?; + let cache_config = &config().config.general.cache; Ok(Self { backend, @@ -76,6 +81,7 @@ impl QueryEngine { pending_explain: None, begin_stmt: None, router: Router::default(), + cache: Cache::new(cache_config, database), }) } @@ -129,6 +135,11 @@ impl QueryEngine { return Ok(()); } + if self.cache.try_read_cache(context).await? { + self.update_stats(context); + return Ok(()); + } + self.hooks.before_execution(context)?; // Queue up request to mirrors, if any. @@ -228,6 +239,8 @@ impl QueryEngine { command => self.unknown_command(context, command.clone()).await?, } + self.cache.save_response_in_cache(context).await; + self.hooks.after_execution(context)?; if context.in_error() { diff --git a/pgdog/src/frontend/client/query_engine/query.rs b/pgdog/src/frontend/client/query_engine/query.rs index 231d936cd..0775b4682 100644 --- a/pgdog/src/frontend/client/query_engine/query.rs +++ b/pgdog/src/frontend/client/query_engine/query.rs @@ -120,6 +120,8 @@ impl QueryEngine { context: &mut QueryEngineContext<'_>, mut message: Message, ) -> Result<(), Error> { + context.cache_context.capture_response(message.clone()); + self.streaming = message.streaming(); let code = message.code(); From 98eff6f2c6d99efed252ae6016ef278fb3b67d21 Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Wed, 13 May 2026 15:54:27 +0300 Subject: [PATCH 03/20] updated cache docs --- docs/CACHE.md | 202 ++++++++++++++++++++++++-------------------------- 1 file changed, 97 insertions(+), 105 deletions(-) diff --git a/docs/CACHE.md b/docs/CACHE.md index 7b0159754..a6f302711 100644 --- a/docs/CACHE.md +++ b/docs/CACHE.md @@ -6,21 +6,42 @@ Cache SELECT queries in Redis, bypass PostgreSQL on cache hit, populate cache on --- -## Initial Implementation (Before Debugging Session) +## Implementation ### Files Added -#### 1. `pgdog/src/frontend/client/query_engine/cache/` (new module) +#### 1. `pgdog-config/src/cache.rs` (new file) -**`mod.rs`** — Module exports: +**CachePolicy enum:** `NoCache` (default), `Cache`, `Auto`. Implements `FromStr`, `Display`, `Serialize`, `Deserialize`, `Copy`, `JsonSchema`. + +**Cache struct:** +- `enabled: Option` — is caching on? +- `policy: Option` — which policy? +- `ttl: Option` — default TTL seconds (default 300) +- `redis_url: Option` — Redis connection URL +- `max_result_size: Option` — max cached result bytes +- Helper methods: `is_enabled()`, `policy()`, `ttl()`, `max_result_size()` + +#### 2. `pgdog-config/src/general.rs` + +Added `cache: Cache` field to `General` struct — **cache config is global**, not per-database. + +#### 3. `pgdog-config/src/lib.rs` + +Added `pub mod cache;` and `pub use cache::{CachePolicy, Cache};` to public exports. + +#### 4. `pgdog/src/frontend/client/query_engine/cache/` (new module) + +**`mod.rs`** — Module exports and main `Cache` struct: ```rust pub mod client; +pub mod context; pub mod integration; pub mod policy; pub mod stats; pub use client::CacheClient; -pub use integration::{cache_check, cache_response, send_cached_response, CacheCheckResult}; +pub use integration::CacheCheckResult; pub use policy::{ CacheDecision, CachePolicyDispatcher, CachePolicyExtractor, CachePolicyResolver, CommentCacheExtractor, ParameterCacheExtractor, @@ -28,14 +49,22 @@ pub use policy::{ pub use stats::QueryStatsTracker; ``` +`Cache` struct wraps: `CacheClient`, `QueryStatsTracker`, `CacheConfig`, `database`, `policy_dispatcher`. + +Key methods: +- `new(cache_config, database)` — creates client, stats, dispatcher +- `try_read_cache(context)` — calls `cache_check()`, handles HIT/MISS/PASS-through +- `save_response_in_cache(context)` — finalizes by storing the captured response + **`client.rs`** — Redis client wrapper using `fred` v9: -- `CacheClient::new(config)` — builds client from `Option<&DatabaseCache>`, returns disabled stub if no config/URL +- `CacheClient::new(config)` — builds client from `&CacheConfig`, returns disabled stub if no config/URL - `ensure_connected()` — lazy one-time `client.init().await` followed by `client.ping()` verification; sets `redis_connected` flag - `get(&self, key)` — returns `Result>>`; fetches cached wire-protocol bytes - `set(&self, key, value, ttl)` — stores bytes with EX expiration; respects `max_result_size` - `spawn_reconnect()` — background task that retries `init()` every 500ms, verifies with `ping()`, sets `redis_connected = true` - `mark_disconnected()` — sets `redis_connected = false`, spawns reconnect if not already running (CAS-guarded) - `is_connected()` — reads our atomic flag (not fred's potentially stale `ClientState`) +- `is_enabled()` — returns true if both client exists and config enabled - Keys are prefixed with `"pgdog:"` - Error types: `RedisError(String)`, `ConnectionFailed(String)` - `redis_connected: Arc` — authoritative connection gate, only true after PING succeeds @@ -43,7 +72,7 @@ pub use stats::QueryStatsTracker; - All Redis operations wrapped in `tokio::time::timeout(REDIS_OPERATION_TIMEOUT)` (2s) as safety net **`policy.rs`** — 3-tier policy resolution with trait-based extraction: -- `CacheDirective` enum: `None`, `Cache { ttl_seconds }`, `NoCache` (moved here from `route.rs`) +- `CacheDirective` enum: `None`, `Cache { ttl_seconds }`, `NoCache` - `trait CachePolicyExtractor`: abstract interface with `fn extract(query, params) -> CacheDirective` - `struct CommentCacheExtractor`: scans SQL query string with standalone regex — **works even when parser is bypassed** - `struct ParameterCacheExtractor`: reads `pgdog.cache` connection startup parameter @@ -53,74 +82,50 @@ pub use stats::QueryStatsTracker; - Tier 3: `auto_decision()` — caches when `hit_count > miss_count` AND `avg_result_size < 1MB` **`stats.rs`** — Per-fingerprint query statistics tracker: +- `QueryStats` struct: `hit_count`, `miss_count`, `total_result_size`, `avg_result_size()` - `QueryStatsTracker` with `record_hit(fingerprint, size)` / `record_miss(fingerprint)` / `get(fingerprint)` -- Internally: `Arc>>` using `parking_lot` +- Internally: `Arc>` -**`integration.rs`** — Integration logic (as currently exists after debugging fixes): -- `cache_check()` — main entry point, creates `CachePolicyDispatcher` with `CommentCacheExtractor` + `ParameterCacheExtractor`, calls `dispatcher.extract(query, params)` to get `CacheDirective`, then runs `CachePolicyResolver::resolve()` -- `is_likely_read()` — fallback heuristic for when parser is disabled: checks SQL starts with SELECT/SHOW/EXPLAIN/WITH +**`context.rs`** — Cache context held in `QueryEngineContext`: +- `CacheContext` with `cache_miss: Option<(u64, Option)>` and `response_buffer: Vec` +- `capture_response(message)` — stores message in buffer when cache miss is tracked + +**`integration.rs`** — Integration methods on `impl Cache`: +- `cache_check()` — main entry point, checks route, extracts directive, resolves policy, checks Redis - `send_cached_response()` — deserializes wire-format bytes and sends to client - `cache_response()` — serializes `Vec` into wire bytes and stores in Redis -- `get_db_cache_config()` — looks up `DatabaseCache` from global config by database name -- `compute_cache_key()` — DefaultHasher-based hash of `{database, query}`; database name is hashed first to namespace keys and prevent collisions when multiple databases share one Redis +- Cache key: `pg_query::fingerprint(query).value.wrapping_add(db_hash)` ### Files Modified -#### 2. `pgdog-config/src/database.rs` - -Added before the `Database` struct: - -- `CachePolicy` enum: `NoCache` (default), `Cache`, `Auto` - - Implements `FromStr`, `Display`, `Serialize`, `Deserialize`, `Copy` -- `DatabaseCache` struct: - - `enabled: Option` — is caching on? - - `policy: Option` — which policy? - - `ttl: Option` — default TTL seconds (default 300) - - `redis_url: Option` — Redis connection URL - - `max_result_size: Option` — max cached result bytes - - Helper methods: `is_enabled()`, `policy()`, `ttl()`, `max_result_size()` -- Added `cache: Option` field to `Database` struct - -#### 3. `pgdog-config/src/lib.rs` - -Added `CachePolicy` and `DatabaseCache` to the public `pub use database::` export. - -#### 4. `pgdog/src/frontend/router/parser/route.rs` - -`CacheDirective` enum was **moved to** `cache/policy.rs` — `route.rs` now re-exports it via `pub use crate::frontend::client::query_engine::cache::policy::CacheDirective`. Route still has `cache_directive` field and methods available for manual override, but the type is imported from the cache module. - -#### 5. `pgdog/src/frontend/router/parser/comment.rs` +#### 5. `pgdog/Cargo.toml` -All cache-related regex and parsing was **removed** from this file. Cache extraction is now independent and lives in `cache/policy.rs` with its own standalone regex. The `comment()` function returns a 2-tuple `(Option, Option)` again. +- Added `fred = { version = "9", features = ["enable-rustls"] }` to dependencies -#### 6. `pgdog/src/frontend/router/parser/cache/ast.rs` +#### 6. `pgdog/src/frontend/client/query_engine/mod.rs` -`comment_cache_directive` field was **removed** from `AstInner` struct and `new()` method. Cache parsing is no longer done at the AST level. +- Added `pub mod cache;` module declaration +- Added `cache: Cache` field to `QueryEngine` +- `new()` loads `cache_config` from `config().config.general.cache` and creates `Cache::new(cache_config, database)` +- `handle()` flow: after `route_query()` and before `before_execution()`, calls `self.cache.try_read_cache(context)`. If HIT: sends cached response and returns. On MISS: stores state in `context.cache_context`. +- After `match command`, calls `self.cache.save_response_in_cache(context)` to store the captured response in Redis. -#### 7. `pgdog/src/frontend/router/parser/query/mod.rs` +#### 7. `pgdog/src/frontend/client/query_engine/query.rs` -All cache directive handling was **removed**: `cache_directive` field removed from `QueryParser` struct, cache directive propagation removed. Cache policy extraction now happens independently in `integration.rs`. +- `process_server_message()` added cache capture: `context.cache_context.capture_response(message.clone())`. -#### 8. `pgdog/src/frontend/router/parser/mod.rs` +#### 8. `pgdog/src/frontend/client/query_engine/context.rs` -- Updated export: `pub use route::{CacheDirective, Route, Shard, ShardWithPriority, ShardsWithPriority};` +- Added `cache_context: CacheContext` field to `QueryEngineContext`. -#### 9. `pgdog/Cargo.toml` +#### 9. `pgdog/src/backend/pool/cluster.rs` -- Added `fred = { version = "9", features = ["enable-rustls"] }` to dependencies +- Added `cache_enabled: bool` field to `ClusterConfig` and `Cluster` +- `cluster.rs` adds `|| self.cache_enabled()` in query parser requirement check — when caching is on, the query parser is forced on alongside `dry_run`, `prepared_statements`, `pub_sub`, and `regex_parser` -#### 10. `pgdog/src/frontend/client/query_engine/mod.rs` +#### 10. `pgdog-config/src/core.rs` -- Added `pub mod cache;` module declaration -- Added `cache_client: CacheClient`, `cache_stats: QueryStatsTracker`, `database: String`, `cache_miss: Option<(String, Option)>`, `cache_response_buffer: Vec` fields to `QueryEngine` -- `new()` looks up cache config from global config by database name and creates `CacheClient` -- `handle()` flow: after `route_query()` and before `before_execution()`, calls `cache_check()`. On HIT: sends cached response and returns. On MISS: stores `(cache_key, ttl)` and starts capture. On Passthrough: clears miss state. -- After `match command`, calls `self.finalize_cache().await` to store the captured response in Redis. -- Added helper methods: `start_cache_capture()`, `capture_response()`, `is_caching()`, `finalize_cache()` - -#### 11. `pgdog/src/frontend/client/query_engine/query.rs` - -- `process_server_message()` added cache capture at the top: if `self.is_caching()`, clones and stores the message via `self.capture_response()`. +- Added startup warning: `cache requires enabled query parser but it's disabled or session controlled` when `cache.is_enabled()` and parser is `Off` or `SessionControl` --- @@ -128,15 +133,15 @@ All cache directive handling was **removed**: `cache_directive` field removed fr | Decision | Choice | |----------|--------| -| Interception point | Between `parse_and_rewrite()` and `route_query()` in `handle()` | -| Cache config scope | Per-database (`Database.cache` field) | +| Interception point | Between `route_query()` and `before_execution()` in `handle()` | +| Cache config scope | **Global** (`config.general.cache`) | | Redis client | `fred` crate v9 (async-native, tokio integration) | -| Cacheable queries | Only reads (`Route::is_read()` + `is_likely_read()` fallback) | -| Cache policy resolution | 3-tier: SQL comment → per-database config → auto-decision | -| Cache HIT flow | Deserialize wire bytes → parse messages → send to client → `return Ok(())` | -| Cache MISS flow | Normal execute → capture response bytes → store in Redis → respond | +| Cacheable queries | Only reads (`route.is_read()`) | +| Cache policy resolution | 3-tier: SQL comment → pgdog.cache param → DB policy → auto-decision | +| Cache HIT flow | Deserialize wire bytes → parse messages → send to client → return `Ok(true)` | +| Cache MISS flow | Normal execute → capture response via `CacheContext` → store in Redis → respond | | Auto-decision engine | `hit_count > miss_count` AND `avg_result_size < 1MB` | -| Cache key | `DefaultHasher` of `{database}:{query}` — database name is hashed first to namespace keys, preventing collisions when multiple databases share one Redis | +| Cache key | `pg_query::fingerprint(query).value.wrapping_add(db_hash)` where `db_hash = DefaultHasher of database name` | | Wire format | Full PostgreSQL wire messages stored as raw bytes (one concatenated buffer) | --- @@ -149,29 +154,26 @@ All cache directive handling was **removed**: `cache_directive` field removed fr 3. **Wire format deserialization wrong in send_cached_response** - Problem: PostgreSQL wire message structure is `[1B code][4B length]` where length includes the 4B itself. I calculated `offset + 5 + msg_len` (treating length as payload-only), causing incorrect byte slicing. Fix: Corrected to `offset + 1 + msg_len`. -4. **Route incorrectly reports read-only as write when parser is disabled** - Problem: `query_parser_bypass()` conservatively returns `Route::write()` for all SQL when the query parser is disabled. Since pgdog doesn't enable the parser by default for simple queries, `route.is_read()` was false for `SELECT 1`. Fix: Added `is_likely_read()` heuristic function in `cache_check` that checks uppercase SQL prefix (SELECT/SHOW/EXPLAIN/WITH) as a fallback when parser is disabled. +4. **Route incorrectly reports read-only as write when parser is disabled** - Problem: `query_parser_bypass()` conservatively returns `Route::write()` for all SQL when the query parser is disabled. Since pgdog doesn't enable the parser by default for simple queries, `route.is_read()` was false for `SELECT 1`. Fix: When any database has `cache.enabled = true`, the query parser level is auto-upgraded to `On` in the cluster config. The `|| self.cache_enabled()` check in `cluster.rs:475` forces the parser on. Cache also emits a startup warning if parser is `Off` or `SessionControl`. The old `is_likely_read()` string-prefix heuristic has been removed entirely. -5. **DB cache config defaults** - Observation: `DatabaseCache.policy` defaults to `CachePolicy::NoCache`. Even with `enabled = true`, caching is skipped unless policy is explicitly set. User action taken: Added `policy = "cache"` to pgdog.toml. +5. **DB cache config defaults** - Observation: `Cache.policy` defaults to `CachePolicy::NoCache`. Even with `enabled = true`, caching is skipped unless policy is explicitly set. User action taken: Add `policy = "cache"` to pgdog.toml. --- ## Refactoring: Decoupled Cache Policy Extraction -The original implementation entangled cache directive parsing with pgdog's general comment parser (`comment.rs`), which only activates when the full query parser runs. This meant `/* pgdog_cache: ... */` annotations were silently ignored for simple queries and when `query_parser_bypass()` triggered. +The original implementation entangled cache directive parsing with pgdog's general comment parser, which only activates when the full query parser runs. This meant `/* pgdog_cache: ... */` annotations were silently ignored for simple queries and when `query_parser_bypass()` triggered. **What was done:** -- `CacheDirective` enum moved from `route.rs` to `cache/policy.rs` -- Cache parsing **removed** from `comment.rs`, `ast.rs`, `query/mod.rs` — they no longer handle `CacheDirective` -- `route.rs` now re-exports `CacheDirective` from the cache module -- New **trait-based extraction system** in `cache/policy.rs`: - - `CachePolicyExtractor` trait with `fn extract(query, params) -> CacheDirective` - - `CommentCacheExtractor`: standalone regex scan on raw query string — works independent of AST parser - - `ParameterCacheExtractor`: reads `pgdog.cache` connection startup parameter - - `CachePolicyDispatcher`: chains extractors, returns first non-`None` result -- `integration.rs` now creates the dispatcher inline in `cache_check()` and passes `context.params` for parameter extraction - -This ensures cache annotations work regardless of whether the query parser is enabled or bypassed. +- New **`cache/`** module created under `query_engine/` +- `CachePolicyExtractor` trait with `fn extract(query, params) -> CacheDirective` +- `CommentCacheExtractor`: standalone regex scan on raw query string — works independent of AST parser +- `ParameterCacheExtractor`: reads `pgdog.cache` connection startup parameter +- `CachePolicyDispatcher`: chains extractors, returns first non-`None` result +- `Cache` struct as abstraction layer over client, stats, config, and dispatcher +- `CacheContext` struct holds `cache_miss` and `response_buffer` per-query +- Cache integration happens via `try_read_cache()` and `save_response_in_cache()` methods on `Cache` ## How to Control Cache @@ -219,51 +221,41 @@ SQL comment → pgdog.cache parameter → DB policy config → Auto-decisi --- -# What's Left To Do - -1. **Redo is_likely_read** — **DONE.** Instead of heuristic-based detection, caching now requires the query parser. If `query_parser = "auto"` and any database has `cache.enabled = true`, it's auto-upgraded to `"on"` globally. If `query_parser = "off"` or `"session_control"` and cache is enabled, a startup warning is emitted and caching won't work for that database. `ClusterConfig::new()` also forces `On` per-cluster if cache is enabled and global parser is `Off`/`SessionControl`/`Auto`. This means `route.is_read()` from the AST parser is always accurate — it correctly detects CTE writes (`WITH ... INSERT`), `FOR UPDATE/SHARE`, and volatile functions (`nextval()`, `pg_advisory_lock()`). The old `is_likely_read()` string-prefix heuristic has been removed entirely. +## Completed -2. **pgdog_cache: comment annotation** — **DONE.** Cache directive extraction now uses its own standalone regex in `cache/policy.rs`, working independently of the AST parser. It scans the raw query string, so it functions correctly even when `query_parser_bypass()` is triggered. The `/* pgdog_cache: ... */` comment format is supported with optional `ttl=` parameter. +1. **Query parser auto-upgrade for caching** — When caching is enabled and parser is `Auto`/`Off`/`SessionControl`, the parser is forced to `On` via `|| self.cache_enabled()` check in `cluster.rs`. A startup warning is emitted in `core.rs` if parser remains incompatible. -3. **Auto policy** — Implemented but untested. Relies on stats tracker to decide based on hit/miss ratio and avg result size after enough observations. +2. **Decoupled cache policy extraction** — Cache directives extracted via standalone regex in `cache/policy.rs`, works regardless of parser state. Supports `/* pgdog_cache: ... */` format with optional `ttl=` parameter. -4. **Multi-step execution caching** — InsertSplit and ShardingKeyUpdate rewrite paths use process_server_message() which captures responses, but the finalize_cache() call happens after match command block. Need to verify caching works correctly for multi-step rewrites. +3. **Error handling / Reconnection** — Automatic reconnection with background task, CAS-guarded single reconnect, 2s operation timeout on all Redis calls, PING-based connection verification. -5. **Response capture for prepared statements** — Extended protocol (Parse/Bind/Execute) response capture works through process_server_message() but hasn't been tested with PREPARE/EXECUTE. (Note: Actually, pgdog implements prepared statements caching. But i don't know what kind of caching is this: just query cache or result cache. And if we'll implement our cache, will this break this prepared statement cache?) +4. **Cache key collision across databases sharing one Redis** — Database name is hashed via `DefaultHasher` and combined with `pg_query::fingerprint(query).value` using `wrapping_add` to produce unique per-database keys even on shared Redis. -6. **Error handling / Reconnection** — DONE. Automatic reconnection with background task, CAS-guarded single reconnect, 2s operation timeout on all Redis calls, PING-based connection verification. +5. **Wire format serialization/deserialization** — PostgreSQL wire messages stored as raw bytes. Correct byte slice calculation: `offset + 1 + msg_len`. -7. **max_result_size config** — Implemented but not exposed in the initial pgdog.toml. Worth documenting in the config. - -8. **Cache key collision across databases sharing one Redis** — Problem: `compute_cache_key()` only hashed the raw query string. When two databases point to the same Redis and both run `SELECT * FROM users WHERE id = 1`, they produce identical keys and can serve wrong data on cache hits. Fix: Changed `compute_cache_key(query: &str)` to `compute_cache_key(query: &str, database: &str)` — database name is now hashed first, then the query, guaranteeing unique keys per database even on a shared Redis instance (`integration.rs:99`). +--- -9. **Redis disconnect/reconnect blocks all queries** — Problem: When Redis becomes unavailable after initial connection, `client.get()`/`client.set()` block for the full timeout duration (2s) because fred's `default_command_timeout` is `Duration::from_millis(0)` (no timeout). After the first request fails, subsequent requests still hit the timeout. After Redis restarts, caching never recovers. Root cause analysis: (a) fred's `ClientState` can report `Connected` even when TCP isn't ready, so relying on `client.state()` for the fast-path check leads to unnecessary blocking. (b) `force_reconnection()` hangs indefinitely when Redis is down — fred's router task can't respond without a connection, so the reconnect loop deadlocks. (c) Even after Redis restarts, if the initial `init()` failed, fred's routing tasks never started, so `ping()` and all operations fail silently. Fix: (1) Replaced `connect_initiated` + state-check logic with a single `redis_connected: AtomicBool` — the authoritative gate for all Redis operations. Returns error immediately if false, no Redis call attempted. (2) `ensure_connected()` calls `init()` (only one-shot on fresh start), then verifies with `ping()`. Sets `redis_connected = true` only after PING succeeds. (3) `mark_disconnected()` sets `redis_connected = false` and spawns exactly one background reconnect task (CAS-guarded via `reconnecting: AtomicBool`). (4) Reconnect task retries `client.init()` every 500ms (fred allows re-init after disconnect). On success, verifies with PING, then sets `redis_connected = true`. (5) All Redis calls (init, get, set, ping) wrapped in `tokio::time::timeout(2s)` as safety net. +## What's Left To Do ---- +1. **Auto policy** — Implemented but untested. Relies on stats tracker to decide based on hit/miss ratio and avg result size after enough observations. -## Testing +2. **Multi-step execution caching** — InsertSplit and ShardingKeyUpdate rewrite paths use process_server_message() which captures responses, but the finalize_cache() call happens after match command block. Need to verify caching works correctly for multi-step rewrites. -### Framework +3. **Response capture for prepared statements** — Extended protocol (Parse/Bind/Execute) response capture works through process_server_message() but hasn't been tested with PREPARE/EXECUTE. (Note: pgdog implements prepared statements caching. But unknown what kind of caching this is: just query cache or result cache. And if we implement our cache, will this break this prepared statement cache?) -System tests live in `integration/rust/tests/integration/` alongside the existing integration suite. They use: -- `sqlx` and `tokio-postgres` for PG queries through pgdog on port 6432 -- `#[tokio::test]` + `#[serial]` from `serial_test` for test isolation -- `reqwest` to read metrics from `http://127.0.0.1:9090/metrics` +4. **max_result_size config** — Implemented but not exposed in the initial pgdog.toml. Worth documenting in the config. -Run with: `cd integration/rust && cargo nextest run --no-fail-fast --test-threads=1` +5. **Redis disconnect/reconnect under heavy load** — The reconnection logic works, but the fast-path check (`ensure_connected`) and the reconnect task can have timing edge cases under rapid disconnect/reconnect cycles. Need to stress-test. -### External Dependencies +6. **Rewrite cache_key_hash computation** — Change from `pg_query::fingerprint(query).value.wrapping_add(db_hash)` to correct combined hashing that doesn't use arithmetic addition, ensuring stronger collision resistance. -Redis must be running locally on port 6379 before cache tests execute. Unlike Postgres, Redis is **not** currently provisioned by `integration/setup.sh` or any CI workflow. To add cache tests to CI: -- **GitHub Actions:** add `sudo apt-get install -y redis-server && sudo service redis-server start` in `.github/workflows/ci.yml` -- **RWX:** add a `*redis-bg-process` alias in `.rwx/integration.yml` (same pattern as `*postgres-bg-process`) -- **Local/dev:** start Redis manually (expected on `127.0.0.1:6379`) +7. **Integration tests** — Tests live in `integration/rust/tests/integration/`. Redis must be running on 127.0.0.1:6379 before tests. Run with: `cd integration/rust && cargo nextest run --no-fail-fast --test-threads=1` ### Planned Tests -1. **Database key namespace collision** — Two databases (`db_a`, `db_b`) sharing one Redis, both running `SELECT 1 AS val` but with different underlying PG data. Verify each database gets its own correct data and no cross-database cache hit occurs. +1. **Database key namespace collision** — Two databases sharing one Redis, both running same query but with different underlying PG data. Verify correct isolation. 2. **Basic cache hit/miss** — Run a SELECT once (expect miss), run again (expect hit), verify metrics. 3. **TTL expiration** — Cache a query with short TTL, wait for expiry, verify miss on third call. -4. **Write bypasses cache** — Execute INSERT/UPDATE/DELETE, verify these operations do not populate or consume the cache. -5. **Redis unavailable** — Stop Redis mid-flight, verify queries pass through to PG without blocking or crashing. +4. **Write bypasses cache** — Execute INSERT/UPDATE/DELETE, verify these do not populate or consume the cache. +5. **Redis unavailable** — Stop Redis mid-flight, verify queries pass through to PG without blocking. 6. **Redis reconnection** — Restart Redis after disconnect, verify cache recovers automatically. From a7112ffb9a30aba07ee7af03009f536e246b5766 Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Wed, 13 May 2026 15:54:27 +0300 Subject: [PATCH 04/20] update key hashing algorithm --- Cargo.lock | 7 +++++++ docs/CACHE.md | 8 +++----- pgdog/Cargo.toml | 1 + .../client/query_engine/cache/integration.rs | 13 +++++-------- 4 files changed, 16 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1596b7c5b..f7b17c532 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2978,6 +2978,7 @@ dependencies = [ "tracing-subscriber", "url", "uuid", + "xxhash-rust", ] [[package]] @@ -5975,6 +5976,12 @@ version = "0.13.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" +[[package]] +name = "xxhash-rust" +version = "0.8.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" + [[package]] name = "yoke" version = "0.8.0" diff --git a/docs/CACHE.md b/docs/CACHE.md index a6f302711..fe342a7d1 100644 --- a/docs/CACHE.md +++ b/docs/CACHE.md @@ -94,7 +94,7 @@ Key methods: - `cache_check()` — main entry point, checks route, extracts directive, resolves policy, checks Redis - `send_cached_response()` — deserializes wire-format bytes and sends to client - `cache_response()` — serializes `Vec` into wire bytes and stores in Redis -- Cache key: `pg_query::fingerprint(query).value.wrapping_add(db_hash)` +- Cache key: XXH3 hash of `database_name + raw_query_string` ### Files Modified @@ -229,7 +229,7 @@ SQL comment → pgdog.cache parameter → DB policy config → Auto-decisi 3. **Error handling / Reconnection** — Automatic reconnection with background task, CAS-guarded single reconnect, 2s operation timeout on all Redis calls, PING-based connection verification. -4. **Cache key collision across databases sharing one Redis** — Database name is hashed via `DefaultHasher` and combined with `pg_query::fingerprint(query).value` using `wrapping_add` to produce unique per-database keys even on shared Redis. +4. **Cache key collision across databases sharing one Redis** — Database name and raw query string are combined via a single XXH3 hash call, producing deterministic, collision-resistant per-database keys even on shared Redis. Different literal values in queries produce different cache keys. 5. **Wire format serialization/deserialization** — PostgreSQL wire messages stored as raw bytes. Correct byte slice calculation: `offset + 1 + msg_len`. @@ -247,9 +247,7 @@ SQL comment → pgdog.cache parameter → DB policy config → Auto-decisi 5. **Redis disconnect/reconnect under heavy load** — The reconnection logic works, but the fast-path check (`ensure_connected`) and the reconnect task can have timing edge cases under rapid disconnect/reconnect cycles. Need to stress-test. -6. **Rewrite cache_key_hash computation** — Change from `pg_query::fingerprint(query).value.wrapping_add(db_hash)` to correct combined hashing that doesn't use arithmetic addition, ensuring stronger collision resistance. - -7. **Integration tests** — Tests live in `integration/rust/tests/integration/`. Redis must be running on 127.0.0.1:6379 before tests. Run with: `cd integration/rust && cargo nextest run --no-fail-fast --test-threads=1` +6. **Integration tests** — Tests live in `integration/rust/tests/integration/`. Redis must be running on 127.0.0.1:6379 before tests. Run with: `cd integration/rust && cargo nextest run --no-fail-fast --test-threads=1` ### Planned Tests diff --git a/pgdog/Cargo.toml b/pgdog/Cargo.toml index 05e69167f..e3f2e7bbb 100644 --- a/pgdog/Cargo.toml +++ b/pgdog/Cargo.toml @@ -71,6 +71,7 @@ pgdog-stats = { path = "../pgdog-stats" } pgdog-postgres-types = { path = "../pgdog-postgres-types"} fred = { version = "9", features = ["enable-rustls"] } scc = "3.7" +xxhash-rust = { version = "0.8", features = ["xxh3"]} [target.'cfg(not(target_env = "msvc"))'.dependencies] tikv-jemallocator = "0.6" diff --git a/pgdog/src/frontend/client/query_engine/cache/integration.rs b/pgdog/src/frontend/client/query_engine/cache/integration.rs index 625d2ac15..f0986e232 100644 --- a/pgdog/src/frontend/client/query_engine/cache/integration.rs +++ b/pgdog/src/frontend/client/query_engine/cache/integration.rs @@ -1,4 +1,4 @@ -use std::hash::{DefaultHasher, Hasher}; +use std::hash::{Hash, Hasher}; use crate::{ frontend::client::query_engine::{cache::Cache, QueryEngineContext}, @@ -42,15 +42,12 @@ impl Cache { _ => return CacheCheckResult::Passthrough, }; - let db_hash = { - let mut hasher = DefaultHasher::new(); - hasher.write(self.database.as_bytes()); + let cache_key_hash = { + let mut hasher = xxhash_rust::xxh3::Xxh3Default::new(); + self.database.hash(&mut hasher); + query.query().hash(&mut hasher); hasher.finish() }; - let cache_key_hash = pg_query::fingerprint(query.query()) - .expect("We're sure that query is correct if we've reached here.") - .value - .wrapping_add(db_hash); let cache_directive = self .policy_dispatcher From 093e7ba6ca833284d4f2af9a25421c70a1262f65 Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Wed, 13 May 2026 15:54:27 +0300 Subject: [PATCH 05/20] avoid caching error responses --- docs/CACHE.md | 9 ++++++--- .../frontend/client/query_engine/cache/context.rs | 13 ++++++++++++- .../client/query_engine/cache/integration.rs | 4 ++++ pgdog/src/frontend/client/query_engine/cache/mod.rs | 11 +++++++---- 4 files changed, 29 insertions(+), 8 deletions(-) diff --git a/docs/CACHE.md b/docs/CACHE.md index fe342a7d1..a93c87611 100644 --- a/docs/CACHE.md +++ b/docs/CACHE.md @@ -86,9 +86,10 @@ Key methods: - `QueryStatsTracker` with `record_hit(fingerprint, size)` / `record_miss(fingerprint)` / `get(fingerprint)` - Internally: `Arc>` -**`context.rs`** — Cache context held in `QueryEngineContext`: -- `CacheContext` with `cache_miss: Option<(u64, Option)>` and `response_buffer: Vec` -- `capture_response(message)` — stores message in buffer when cache miss is tracked + **`context.rs`** — Cache context held in `QueryEngineContext`: + - `CacheContext` with `cache_miss: Option<(u64, Option)>`, `response_buffer: Vec`, and `had_error: bool` + - `capture_response(message)` — stores message in buffer when cache miss is tracked; sets `had_error = true` on `E` messages + - `reset()` — clears all state for per-query isolation **`integration.rs`** — Integration methods on `impl Cache`: - `cache_check()` — main entry point, checks route, extracts directive, resolves policy, checks Redis @@ -233,6 +234,8 @@ SQL comment → pgdog.cache parameter → DB policy config → Auto-decisi 5. **Wire format serialization/deserialization** — PostgreSQL wire messages stored as raw bytes. Correct byte slice calculation: `offset + 1 + msg_len`. +6. **Do not cache error responses**. + --- ## What's Left To Do diff --git a/pgdog/src/frontend/client/query_engine/cache/context.rs b/pgdog/src/frontend/client/query_engine/cache/context.rs index 3234f6be1..2aaede4ff 100644 --- a/pgdog/src/frontend/client/query_engine/cache/context.rs +++ b/pgdog/src/frontend/client/query_engine/cache/context.rs @@ -1,17 +1,28 @@ -use crate::net::Message; +use crate::net::{Message, messages::Protocol}; /// Cache context to use in QueryEngineContext. #[derive(Default)] pub struct CacheContext { pub cache_miss: Option<(u64, Option)>, pub response_buffer: Vec, + pub had_error: bool, } impl CacheContext { /// Capture a response message for caching. pub fn capture_response(&mut self, message: Message) { if self.cache_miss.is_some() { + if message.code() == 'E' { + self.had_error = true; + } self.response_buffer.push(message); } } + + /// Reset the cache context for a new query. + pub fn reset(&mut self) { + self.cache_miss = None; + self.response_buffer.clear(); + self.had_error = false; + } } diff --git a/pgdog/src/frontend/client/query_engine/cache/integration.rs b/pgdog/src/frontend/client/query_engine/cache/integration.rs index f0986e232..39e7d966a 100644 --- a/pgdog/src/frontend/client/query_engine/cache/integration.rs +++ b/pgdog/src/frontend/client/query_engine/cache/integration.rs @@ -25,6 +25,10 @@ impl Cache { &self, context: &mut QueryEngineContext<'_>, ) -> CacheCheckResult { + if context.in_transaction() { + return CacheCheckResult::Passthrough; + } + let route = match context.client_request.route.as_ref() { Some(r) => r, None => return CacheCheckResult::Passthrough, diff --git a/pgdog/src/frontend/client/query_engine/cache/mod.rs b/pgdog/src/frontend/client/query_engine/cache/mod.rs index 012a53852..726243b19 100644 --- a/pgdog/src/frontend/client/query_engine/cache/mod.rs +++ b/pgdog/src/frontend/client/query_engine/cache/mod.rs @@ -50,6 +50,7 @@ impl Cache { CacheCheckResult::Hit { cached } => { debug!("Cache hit, serving from cache"); self.send_cached_response(context, cached).await?; + context.cache_context.reset(); return Ok(true); } CacheCheckResult::Miss { @@ -58,20 +59,22 @@ impl Cache { } => { context.cache_context.cache_miss = Some((cache_key_hash, ttl)); context.cache_context.response_buffer.clear(); + context.cache_context.had_error = false; debug!("Cache miss for key hash: {}", cache_key_hash); } CacheCheckResult::Passthrough => { - context.cache_context.cache_miss = None; + context.cache_context.reset(); } } - return Ok(false); + Ok(false) } /// Finalize caching by storing the response in Redis. - pub async fn save_response_in_cache(&self, context: &mut QueryEngineContext<'_>,) { + pub async fn save_response_in_cache(&self, context: &mut QueryEngineContext<'_>) { if let Some((cache_key, ttl)) = context.cache_context.cache_miss.take() { - if !context.cache_context.response_buffer.is_empty() { + if !context.cache_context.had_error && !context.cache_context.response_buffer.is_empty() + { let messages = std::mem::take(&mut context.cache_context.response_buffer); if let Err(e) = self.cache_response(cache_key, messages, ttl).await { debug!("Failed to cache response: {:?}", e); From eb977edb9fd9bc949c181eb4a12fa26f33aeeffe Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Wed, 13 May 2026 15:54:27 +0300 Subject: [PATCH 06/20] fixed setting pgdog.cache via DSN options --- docs/CACHE.md | 27 +++++++++++++++++++------ pgdog/src/net/messages/hello.rs | 36 ++++++++++++++++++++++++++++++++- 2 files changed, 56 insertions(+), 7 deletions(-) diff --git a/docs/CACHE.md b/docs/CACHE.md index a93c87611..5815d7a86 100644 --- a/docs/CACHE.md +++ b/docs/CACHE.md @@ -211,6 +211,18 @@ SET pgdog.cache = 'cache'; SET pgdog.cache = 'cache ttl=300'; ``` +```sh +# Session-wide: all queries in this connection bypass cache +# Attention: this only supports `no_cache` with underscore +psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.cache%3Dno_cache + +# Session-wide: cache all queries with default TTL +psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.cache%3Dcache + +# Session-wide: cache all queries with 5-minute TTL +psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.cache%3Dcache%5C%20ttl%3D300 +``` + ### Priority Order Extractors are checked in order — first non-`None` result wins, then falls through to database config: @@ -236,21 +248,24 @@ SQL comment → pgdog.cache parameter → DB policy config → Auto-decisi 6. **Do not cache error responses**. +7. **Setting pgdog.cache via connection url doesn't work**. + --- ## What's Left To Do 1. **Auto policy** — Implemented but untested. Relies on stats tracker to decide based on hit/miss ratio and avg result size after enough observations. -2. **Multi-step execution caching** — InsertSplit and ShardingKeyUpdate rewrite paths use process_server_message() which captures responses, but the finalize_cache() call happens after match command block. Need to verify caching works correctly for multi-step rewrites. +2. **Response capture for prepared statements** — Extended protocol (Parse/Bind/Execute) response capture works through process_server_message() but hasn't been tested with PREPARE/EXECUTE. (Note: pgdog implements prepared statements caching. But unknown what kind of caching this is: just query cache or result cache. And if we implement our cache, will this break this prepared statement cache?) + +3. **Redis disconnect/reconnect under heavy load** — The reconnection logic works, but the fast-path check (`ensure_connected`) and the reconnect task can have timing edge cases under rapid disconnect/reconnect cycles. Need to stress-test. -3. **Response capture for prepared statements** — Extended protocol (Parse/Bind/Execute) response capture works through process_server_message() but hasn't been tested with PREPARE/EXECUTE. (Note: pgdog implements prepared statements caching. But unknown what kind of caching this is: just query cache or result cache. And if we implement our cache, will this break this prepared statement cache?) +4. **Integration tests** — Tests live in `integration/rust/tests/integration/`. Redis must be running on 127.0.0.1:6379 before tests. Run with: `cd integration/rust && cargo nextest run --no-fail-fast --test-threads=1` -4. **max_result_size config** — Implemented but not exposed in the initial pgdog.toml. Worth documenting in the config. +5. **Magic numbers in send_cached_response()**. -5. **Redis disconnect/reconnect under heavy load** — The reconnection logic works, but the fast-path check (`ensure_connected`) and the reconnect task can have timing edge cases under rapid disconnect/reconnect cycles. Need to stress-test. +6. **Make statistics collection async** — for auto policy. -6. **Integration tests** — Tests live in `integration/rust/tests/integration/`. Redis must be running on 127.0.0.1:6379 before tests. Run with: `cd integration/rust && cargo nextest run --no-fail-fast --test-threads=1` ### Planned Tests @@ -259,4 +274,4 @@ SQL comment → pgdog.cache parameter → DB policy config → Auto-decisi 3. **TTL expiration** — Cache a query with short TTL, wait for expiry, verify miss on third call. 4. **Write bypasses cache** — Execute INSERT/UPDATE/DELETE, verify these do not populate or consume the cache. 5. **Redis unavailable** — Stop Redis mid-flight, verify queries pass through to PG without blocking. -6. **Redis reconnection** — Restart Redis after disconnect, verify cache recovers automatically. +6. **Redis reconnection** — Restart Redis after disconnect, verify cache recovers automatically. \ No newline at end of file diff --git a/pgdog/src/net/messages/hello.rs b/pgdog/src/net/messages/hello.rs index 84f901b06..436fb209f 100644 --- a/pgdog/src/net/messages/hello.rs +++ b/pgdog/src/net/messages/hello.rs @@ -58,9 +58,10 @@ impl Startup { let value = search_path(&value); params.insert(name, value); } else if name == "options" { + let value = options_unescape(&value); let kvs = value.split("-c"); for kv in kvs { - let mut nvs = kv.split("="); + let mut nvs = kv.splitn(2, "="); let name = nvs.next(); let value = nvs.next(); @@ -249,6 +250,26 @@ fn search_path(value: &str) -> ParameterValue { ParameterValue::Tuple(value) } +fn options_unescape(input: &str) -> String { + let mut result = String::with_capacity(input.len()); + let mut chars = input.chars().peekable(); + + while let Some(c) = chars.next() { + if c == '\\' { + if let Some(&next) = chars.peek() { + chars.next(); + result.push(next); + } else { + result.push(c); + } + } else { + result.push(c); + } + } + + result +} + #[cfg(test)] mod test { use crate::net::messages::ToBytes; @@ -309,4 +330,17 @@ mod test { let startup = Startup::from_stream(&mut read).await.unwrap(); assert!(matches!(startup, Startup::GssEnc)); } + + #[test] + fn test_options_unescape() { + assert_eq!(options_unescape("cache\\ ttl=5"), "cache ttl=5"); + assert_eq!(options_unescape("cache\\\\ttl=5"), "cache\\ttl=5"); + assert_eq!(options_unescape("simple"), "simple"); + assert_eq!(options_unescape("a\\=b"), "a=b"); + assert_eq!(options_unescape("trail\\"), "trail\\"); + assert_eq!( + options_unescape("cache\\ ttl\\=5"), + "cache ttl=5" + ); + } } From 7a6d81ddfceab9b47f850c9a53598e1c4a0a6402 Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Wed, 13 May 2026 15:54:27 +0300 Subject: [PATCH 07/20] changed cache scope from connection to global --- docs/CACHE.md | 24 +++++--- .../{client/query_engine => }/cache/client.rs | 25 +++----- .../query_engine => }/cache/context.rs | 0 .../query_engine => }/cache/integration.rs | 55 +++++++++-------- .../{client/query_engine => }/cache/mod.rs | 59 +++++++++++-------- .../{client/query_engine => }/cache/policy.rs | 0 .../{client/query_engine => }/cache/stats.rs | 0 .../frontend/client/query_engine/context.rs | 4 +- pgdog/src/frontend/client/query_engine/mod.rs | 27 +++++---- pgdog/src/frontend/mod.rs | 1 + 10 files changed, 110 insertions(+), 85 deletions(-) rename pgdog/src/frontend/{client/query_engine => }/cache/client.rs (93%) rename pgdog/src/frontend/{client/query_engine => }/cache/context.rs (100%) rename pgdog/src/frontend/{client/query_engine => }/cache/integration.rs (71%) rename pgdog/src/frontend/{client/query_engine => }/cache/mod.rs (56%) rename pgdog/src/frontend/{client/query_engine => }/cache/policy.rs (100%) rename pgdog/src/frontend/{client/query_engine => }/cache/stats.rs (100%) diff --git a/docs/CACHE.md b/docs/CACHE.md index 5815d7a86..3fc52462a 100644 --- a/docs/CACHE.md +++ b/docs/CACHE.md @@ -30,9 +30,9 @@ Added `cache: Cache` field to `General` struct — **cache config is global**, n Added `pub mod cache;` and `pub use cache::{CachePolicy, Cache};` to public exports. -#### 4. `pgdog/src/frontend/client/query_engine/cache/` (new module) +#### 4. `pgdog/src/frontend/cache/` (module) -**`mod.rs`** — Module exports and main `Cache` struct: +**`mod.rs`** — Module exports, global singleton, and main `Cache` struct: ```rust pub mod client; pub mod context; @@ -41,6 +41,7 @@ pub mod policy; pub mod stats; pub use client::CacheClient; +pub use context::CacheContext; pub use integration::CacheCheckResult; pub use policy::{ CacheDecision, CachePolicyDispatcher, CachePolicyExtractor, CachePolicyResolver, @@ -49,15 +50,17 @@ pub use policy::{ pub use stats::QueryStatsTracker; ``` -`Cache` struct wraps: `CacheClient`, `QueryStatsTracker`, `CacheConfig`, `database`, `policy_dispatcher`. +`Cache` struct wraps: `CacheClient`, `QueryStatsTracker`, `policy_dispatcher`. + +**Global singleton:** Cache is global-scoped, not connection-scoped. Accessed via `cache()` function which returns `Arc` from a `Lazy>` static. `Cache::new()` reads config internally — no parameters needed. Key methods: -- `new(cache_config, database)` — creates client, stats, dispatcher -- `try_read_cache(context)` — calls `cache_check()`, handles HIT/MISS/PASS-through -- `save_response_in_cache(context)` — finalizes by storing the captured response +- `new()` — creates client (reads config internally), stats, dispatcher +- `try_read_cache(cache_context, in_transaction, client_request, params, stream)` — calls `cache_check()`, handles HIT/MISS/PASS-through +- `save_response_in_cache(cache_context)` — finalizes by storing the captured response **`client.rs`** — Redis client wrapper using `fred` v9: -- `CacheClient::new(config)` — builds client from `&CacheConfig`, returns disabled stub if no config/URL +- `CacheClient::new()` — builds client from global `config().config.general.cache`, returns disabled stub if no config/URL - `ensure_connected()` — lazy one-time `client.init().await` followed by `client.ping()` verification; sets `redis_connected` flag - `get(&self, key)` — returns `Result>>`; fetches cached wire-protocol bytes - `set(&self, key, value, ttl)` — stores bytes with EX expiration; respects `max_result_size` @@ -78,7 +81,7 @@ Key methods: - `struct ParameterCacheExtractor`: reads `pgdog.cache` connection startup parameter - `struct CachePolicyDispatcher`: chains extractors in priority order, returns first non-`None` result - Tier 1: Extractor result (`CacheDirective::Cache { ttl }` or `CacheDirective::NoCache` from comments/params) -- Tier 2: Database config `CachePolicy` (`NoCache` / `Cache` / `Auto`) +- Tier 2: Global config `CachePolicy` (`NoCache` / `Cache` / `Auto`) - Tier 3: `auto_decision()` — caches when `hit_count > miss_count` AND `avg_result_size < 1MB` **`stats.rs`** — Per-fingerprint query statistics tracker: @@ -250,6 +253,8 @@ SQL comment → pgdog.cache parameter → DB policy config → Auto-decisi 7. **Setting pgdog.cache via connection url doesn't work**. +8. **Moved all cache-related structs from QueryEngine to Client** — now all cache structs including redis client are creating for whole pgdog's lifetime. + --- ## What's Left To Do @@ -266,6 +271,9 @@ SQL comment → pgdog.cache parameter → DB policy config → Auto-decisi 6. **Make statistics collection async** — for auto policy. +7. **Provide config hotswap**. + +8. **Review and rewrite CacheClient**. ### Planned Tests diff --git a/pgdog/src/frontend/client/query_engine/cache/client.rs b/pgdog/src/frontend/cache/client.rs similarity index 93% rename from pgdog/src/frontend/client/query_engine/cache/client.rs rename to pgdog/src/frontend/cache/client.rs index 9ae4577e0..dfede8e1d 100644 --- a/pgdog/src/frontend/client/query_engine/cache/client.rs +++ b/pgdog/src/frontend/cache/client.rs @@ -4,7 +4,7 @@ use std::sync::Arc; use std::time::Duration; use tracing::{debug, error, info}; -use pgdog_config::Cache as CacheConfig; +use crate::config::config; const CACHE_KEY_PREFIX: &str = "pgdog:"; @@ -15,7 +15,6 @@ const REDIS_OPERATION_TIMEOUT: Duration = Duration::from_secs(2); #[derive(Clone)] pub struct CacheClient { client: Option, - config: CacheConfig, /// Master connection state flag. Set true only after PING succeeds /// on init or reconnect. Set false immediately on any error/timeout. redis_connected: Arc, @@ -27,7 +26,6 @@ impl std::fmt::Debug for CacheClient { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("CacheClient") .field("client", &self.client.as_ref().map(|_| "...")) - .field("config", &self.config) .field( "redis_connected", &self.redis_connected.load(Ordering::Relaxed), @@ -38,13 +36,12 @@ impl std::fmt::Debug for CacheClient { } impl CacheClient { - pub fn new(config: &CacheConfig) -> Self { - let cache_config = config.clone(); + pub fn new() -> Self { + let cache_config = &config().config.general.cache; if !cache_config.is_enabled() || cache_config.redis_url.is_none() { return Self { client: None, - config: cache_config, redis_connected: Arc::new(AtomicBool::new(false)), reconnecting: Arc::new(AtomicBool::new(false)), }; @@ -57,7 +54,6 @@ impl CacheClient { error!("Failed to parse Redis URL: {}", e); return Self { client: None, - config: cache_config, redis_connected: Arc::new(AtomicBool::new(false)), reconnecting: Arc::new(AtomicBool::new(false)), }; @@ -70,7 +66,6 @@ impl CacheClient { error!("Failed to build Redis client: {}", e); return Self { client: None, - config: cache_config, redis_connected: Arc::new(AtomicBool::new(false)), reconnecting: Arc::new(AtomicBool::new(false)), }; @@ -79,7 +74,6 @@ impl CacheClient { Self { client: Some(client), - config: cache_config, redis_connected: Arc::new(AtomicBool::new(false)), reconnecting: Arc::new(AtomicBool::new(false)), } @@ -252,7 +246,9 @@ impl CacheClient { let full_key = format!("{}{}", CACHE_KEY_PREFIX, key); - if let Some(max_size) = self.config.max_result_size() { + let cache_config = &config().config.general.cache; + + if let Some(max_size) = cache_config.max_result_size() { if value.len() > max_size { debug!( "Skipping cache for key {}: size {} exceeds max {}", @@ -264,7 +260,7 @@ impl CacheClient { } } - let ttl_seconds = ttl.unwrap_or_else(|| self.config.ttl()) as i64; + let ttl_seconds = ttl.unwrap_or_else(|| cache_config.ttl()) as i64; match tokio::time::timeout( REDIS_OPERATION_TIMEOUT, @@ -295,12 +291,9 @@ impl CacheClient { } } - pub fn config(&self) -> &CacheConfig { - &self.config - } - pub fn is_enabled(&self) -> bool { - self.client.is_some() && self.config.is_enabled() + let cache_config = &config().config.general.cache; + self.client.is_some() && cache_config.is_enabled() } } diff --git a/pgdog/src/frontend/client/query_engine/cache/context.rs b/pgdog/src/frontend/cache/context.rs similarity index 100% rename from pgdog/src/frontend/client/query_engine/cache/context.rs rename to pgdog/src/frontend/cache/context.rs diff --git a/pgdog/src/frontend/client/query_engine/cache/integration.rs b/pgdog/src/frontend/cache/integration.rs similarity index 71% rename from pgdog/src/frontend/client/query_engine/cache/integration.rs rename to pgdog/src/frontend/cache/integration.rs index 39e7d966a..4bf7d0b2b 100644 --- a/pgdog/src/frontend/client/query_engine/cache/integration.rs +++ b/pgdog/src/frontend/cache/integration.rs @@ -1,13 +1,14 @@ use std::hash::{Hash, Hasher}; use crate::{ - frontend::client::query_engine::{cache::Cache, QueryEngineContext}, - net::{FromBytes, Message, ToBytes}, + config::config, + frontend::ClientRequest, + net::{FromBytes, Message, Parameters, Stream, ToBytes}, }; use tracing::debug; -use super::CachePolicyResolver; +use super::{Cache, CachePolicyResolver}; pub enum CacheCheckResult { Hit { @@ -23,48 +24,52 @@ pub enum CacheCheckResult { impl Cache { pub(super) async fn cache_check( &self, - context: &mut QueryEngineContext<'_>, - ) -> CacheCheckResult { - if context.in_transaction() { - return CacheCheckResult::Passthrough; + in_transaction: bool, + client_request: &ClientRequest, + params: &Parameters, + ) -> Result { + if in_transaction { + return Ok(CacheCheckResult::Passthrough); } - let route = match context.client_request.route.as_ref() { + let route = match client_request.route.as_ref() { Some(r) => r, - None => return CacheCheckResult::Passthrough, + None => return Ok(CacheCheckResult::Passthrough), }; // Detect read-only status via the AST parser's route classification. // When caching is enabled, the query parser is auto-enabled. let is_read = route.is_read(); if !is_read { - return CacheCheckResult::Passthrough; + return Ok(CacheCheckResult::Passthrough); } - let query = match context.client_request.query() { + let query = match client_request.query() { Ok(Some(q)) => q, - _ => return CacheCheckResult::Passthrough, + _ => return Ok(CacheCheckResult::Passthrough), }; + let user = params.get_required("user")?; + let database = params.get_default("database", user); let cache_key_hash = { let mut hasher = xxhash_rust::xxh3::Xxh3Default::new(); - self.database.hash(&mut hasher); + database.hash(&mut hasher); query.query().hash(&mut hasher); hasher.finish() }; - let cache_directive = self - .policy_dispatcher - .extract(query.query(), context.params); + let cache_directive = self.policy_dispatcher.extract(query.query(), params); + let cache_config = &config().config.general.cache; + debug!( "cache_check: sql={}, db_config={:?}", query.query(), - self.config + cache_config ); let decision = CachePolicyResolver::resolve( cache_directive, - &self.config, + cache_config, is_read, cache_key_hash, &self.stats, @@ -72,31 +77,31 @@ impl Cache { .await; if !decision.should_cache() { - return CacheCheckResult::Passthrough; + return Ok(CacheCheckResult::Passthrough); } match self.client.get(cache_key_hash).await { Ok(Some(cached)) => { self.stats.record_hit(cache_key_hash, cached.len()).await; - CacheCheckResult::Hit { cached } + Ok(CacheCheckResult::Hit { cached }) } Ok(None) => { self.stats.record_miss(cache_key_hash).await; - CacheCheckResult::Miss { + Ok(CacheCheckResult::Miss { cache_key_hash, ttl: decision.ttl(), - } + }) } Err(e) => { debug!("Cache get error: {}", e); - CacheCheckResult::Passthrough + Ok(CacheCheckResult::Passthrough) } } } pub(super) async fn send_cached_response( &self, - context: &mut QueryEngineContext<'_>, + stream: &mut Stream, cached: Vec, ) -> Result<(), crate::frontend::Error> { let mut offset = 0; @@ -124,7 +129,7 @@ impl Cache { let msg = Message::from_bytes(msg_bytes)?; offset = end; - context.stream.send_flush(&msg).await?; + stream.send_flush(&msg).await?; } Ok(()) diff --git a/pgdog/src/frontend/client/query_engine/cache/mod.rs b/pgdog/src/frontend/cache/mod.rs similarity index 56% rename from pgdog/src/frontend/client/query_engine/cache/mod.rs rename to pgdog/src/frontend/cache/mod.rs index 726243b19..44d6ef89f 100644 --- a/pgdog/src/frontend/client/query_engine/cache/mod.rs +++ b/pgdog/src/frontend/cache/mod.rs @@ -5,65 +5,79 @@ pub mod policy; pub mod stats; pub use client::CacheClient; +pub use context::CacheContext; pub use integration::CacheCheckResult; -use pgdog_config::Cache as CacheConfig; pub use policy::{ CacheDecision, CachePolicyDispatcher, CachePolicyExtractor, CachePolicyResolver, CommentCacheExtractor, ParameterCacheExtractor, }; pub use stats::QueryStatsTracker; + +use once_cell::sync::Lazy; +use std::sync::Arc; use tracing::debug; -use crate::frontend::client::query_engine::QueryEngineContext; +use crate::{ + frontend::ClientRequest, + net::{Parameters, Stream}, +}; #[derive(Debug)] pub struct Cache { client: CacheClient, stats: QueryStatsTracker, - config: CacheConfig, - database: String, policy_dispatcher: CachePolicyDispatcher, } +static CACHE: Lazy> = Lazy::new(|| Arc::new(Cache::new())); + +pub fn cache() -> Arc { + CACHE.clone() +} + impl Cache { - pub fn new(cache_config: &CacheConfig, database: &str) -> Self { + fn new() -> Self { let mut dispatcher = CachePolicyDispatcher::new(); dispatcher.add_extractor(Box::new(CommentCacheExtractor)); dispatcher.add_extractor(Box::new(ParameterCacheExtractor::new())); Cache { - client: CacheClient::new(cache_config), + client: CacheClient::new(), stats: QueryStatsTracker::default(), - config: cache_config.clone(), - database: database.to_string(), policy_dispatcher: dispatcher, } } pub async fn try_read_cache( - &mut self, - context: &mut QueryEngineContext<'_>, + &self, + cache_context: &mut CacheContext, + in_transaction: bool, + client_request: &ClientRequest, + params: &Parameters, + stream: &mut Stream, ) -> Result { - let cache_result = self.cache_check(context).await; + let cache_result = self + .cache_check(in_transaction, client_request, params) + .await?; match cache_result { CacheCheckResult::Hit { cached } => { debug!("Cache hit, serving from cache"); - self.send_cached_response(context, cached).await?; - context.cache_context.reset(); + self.send_cached_response(stream, cached).await?; + cache_context.reset(); return Ok(true); } CacheCheckResult::Miss { cache_key_hash, ttl, } => { - context.cache_context.cache_miss = Some((cache_key_hash, ttl)); - context.cache_context.response_buffer.clear(); - context.cache_context.had_error = false; + cache_context.cache_miss = Some((cache_key_hash, ttl)); + cache_context.response_buffer.clear(); + cache_context.had_error = false; debug!("Cache miss for key hash: {}", cache_key_hash); } CacheCheckResult::Passthrough => { - context.cache_context.reset(); + cache_context.reset(); } } @@ -71,15 +85,14 @@ impl Cache { } /// Finalize caching by storing the response in Redis. - pub async fn save_response_in_cache(&self, context: &mut QueryEngineContext<'_>) { - if let Some((cache_key, ttl)) = context.cache_context.cache_miss.take() { - if !context.cache_context.had_error && !context.cache_context.response_buffer.is_empty() - { - let messages = std::mem::take(&mut context.cache_context.response_buffer); + pub async fn save_response_in_cache(&self, cache_context: &mut CacheContext) { + if let Some((cache_key, ttl)) = cache_context.cache_miss.take() { + if !cache_context.had_error && !cache_context.response_buffer.is_empty() { + let messages = std::mem::take(&mut cache_context.response_buffer); if let Err(e) = self.cache_response(cache_key, messages, ttl).await { debug!("Failed to cache response: {:?}", e); } } } } -} \ No newline at end of file +} diff --git a/pgdog/src/frontend/client/query_engine/cache/policy.rs b/pgdog/src/frontend/cache/policy.rs similarity index 100% rename from pgdog/src/frontend/client/query_engine/cache/policy.rs rename to pgdog/src/frontend/cache/policy.rs diff --git a/pgdog/src/frontend/client/query_engine/cache/stats.rs b/pgdog/src/frontend/cache/stats.rs similarity index 100% rename from pgdog/src/frontend/client/query_engine/cache/stats.rs rename to pgdog/src/frontend/cache/stats.rs diff --git a/pgdog/src/frontend/client/query_engine/context.rs b/pgdog/src/frontend/client/query_engine/context.rs index a2cdc7ca2..42ef8b21f 100644 --- a/pgdog/src/frontend/client/query_engine/context.rs +++ b/pgdog/src/frontend/client/query_engine/context.rs @@ -1,7 +1,7 @@ use crate::{ backend::pool::{connection::mirror::Mirror, stats::MemoryStats}, frontend::{ - Client, ClientRequest, PreparedStatements, client::{Sticky, TransactionType, query_engine::cache::context::CacheContext, timeouts::Timeouts}, router::parser::rewrite::statement::plan::RewriteResult + Client, ClientRequest, PreparedStatements, client::{Sticky, TransactionType, timeouts::Timeouts}, router::parser::rewrite::statement::plan::RewriteResult, cache::context::CacheContext }, net::{BackendKeyData, Parameters, Stream}, }; @@ -38,7 +38,7 @@ pub struct QueryEngineContext<'a> { /// Rewrite result. pub(super) rewrite_result: Option, /// Cache context. - pub(super) cache_context: CacheContext + pub(super) cache_context: CacheContext, } impl<'a> QueryEngineContext<'a> { diff --git a/pgdog/src/frontend/client/query_engine/mod.rs b/pgdog/src/frontend/client/query_engine/mod.rs index d59adba46..32b753205 100644 --- a/pgdog/src/frontend/client/query_engine/mod.rs +++ b/pgdog/src/frontend/client/query_engine/mod.rs @@ -2,11 +2,10 @@ use crate::{ backend::pool::{Connection, Request}, config::config, frontend::{ - BufferedQuery, Client, ClientComms, Command, Error, Router, RouterContext, Stats, client::query_engine::{ - cache::Cache, - hooks::QueryEngineHooks, - route_query::ClusterCheck, - }, router::{Route, parser::Shard} + cache::cache, + client::query_engine::{hooks::QueryEngineHooks, route_query::ClusterCheck}, + router::{parser::Shard, Route}, + BufferedQuery, Client, ClientComms, Command, Error, Router, RouterContext, Stats, }, net::{ErrorResponse, Message, Parameters}, state::State, @@ -14,7 +13,6 @@ use crate::{ use tracing::debug; -pub mod cache; pub mod connect; pub mod context; pub mod deallocate; @@ -58,7 +56,6 @@ pub struct QueryEngine { notify_buffer: NotifyBuffer, pending_explain: Option, hooks: QueryEngineHooks, - cache: Cache, } impl QueryEngine { @@ -68,7 +65,6 @@ impl QueryEngine { let database = params.get_default("database", user); let backend = Connection::new(user, database, admin)?; - let cache_config = &config().config.general.cache; Ok(Self { backend, @@ -81,7 +77,6 @@ impl QueryEngine { pending_explain: None, begin_stmt: None, router: Router::default(), - cache: Cache::new(cache_config, database), }) } @@ -135,7 +130,17 @@ impl QueryEngine { return Ok(()); } - if self.cache.try_read_cache(context).await? { + let in_transaction = context.in_transaction(); + if cache() + .try_read_cache( + &mut context.cache_context, + in_transaction, + context.client_request, + context.params, + context.stream, + ) + .await? + { self.update_stats(context); return Ok(()); } @@ -239,7 +244,7 @@ impl QueryEngine { command => self.unknown_command(context, command.clone()).await?, } - self.cache.save_response_in_cache(context).await; + cache().save_response_in_cache(&mut context.cache_context).await; self.hooks.after_execution(context)?; diff --git a/pgdog/src/frontend/mod.rs b/pgdog/src/frontend/mod.rs index 284b777b0..aa1bbe523 100644 --- a/pgdog/src/frontend/mod.rs +++ b/pgdog/src/frontend/mod.rs @@ -1,6 +1,7 @@ //! pgDog frontend manages connections to clients. pub mod buffered_query; +pub mod cache; pub mod client; pub mod client_request; pub mod comms; From b44783dcf7c42ff674d334a76322b1d5e5c909dc Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Wed, 13 May 2026 15:54:27 +0300 Subject: [PATCH 08/20] reexport cache config --- pgdog/src/config/cache.rs | 1 + pgdog/src/config/mod.rs | 2 ++ pgdog/src/frontend/cache/policy.rs | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 pgdog/src/config/cache.rs diff --git a/pgdog/src/config/cache.rs b/pgdog/src/config/cache.rs new file mode 100644 index 000000000..a089ff680 --- /dev/null +++ b/pgdog/src/config/cache.rs @@ -0,0 +1 @@ +pub use pgdog_config::cache::*; \ No newline at end of file diff --git a/pgdog/src/config/mod.rs b/pgdog/src/config/mod.rs index 835a0f10e..6ecd3785d 100644 --- a/pgdog/src/config/mod.rs +++ b/pgdog/src/config/mod.rs @@ -1,6 +1,7 @@ //! Configuration. // Submodules +pub mod cache; pub mod convert; pub mod core; pub mod database; @@ -15,6 +16,7 @@ pub mod rewrite; pub mod sharding; pub mod users; +pub use cache::*; pub use core::{Config, ConfigAndUsers}; pub use database::{Database, Role}; pub use error::Error; diff --git a/pgdog/src/frontend/cache/policy.rs b/pgdog/src/frontend/cache/policy.rs index 582fde578..dcf6965f7 100644 --- a/pgdog/src/frontend/cache/policy.rs +++ b/pgdog/src/frontend/cache/policy.rs @@ -1,6 +1,6 @@ use core::fmt; -use pgdog_config::{Cache as CacheConfig, CachePolicy}; +use crate::config::{Cache as CacheConfig, CachePolicy}; use tracing::debug; use super::stats::QueryStatsTracker; From a71ec7c723b509ac0a81ac8f2bb0bb474a8368c9 Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Wed, 13 May 2026 15:54:27 +0300 Subject: [PATCH 09/20] Use built-in query comment hints --- docs/CACHE.md | 158 +++---- pgdog-config/src/cache.rs | 4 +- pgdog/src/frontend/cache/integration.rs | 22 +- pgdog/src/frontend/cache/mod.rs | 11 +- pgdog/src/frontend/cache/policy.rs | 415 +++--------------- pgdog/src/frontend/router/parser/cache/ast.rs | 8 +- pgdog/src/frontend/router/parser/comment.rs | 119 ++++- pgdog/src/net/parameter.rs | 1 + 8 files changed, 259 insertions(+), 479 deletions(-) diff --git a/docs/CACHE.md b/docs/CACHE.md index 3fc52462a..743a2ab25 100644 --- a/docs/CACHE.md +++ b/docs/CACHE.md @@ -8,9 +8,9 @@ Cache SELECT queries in Redis, bypass PostgreSQL on cache hit, populate cache on ## Implementation -### Files Added +### Configuration (`pgdog-config`) -#### 1. `pgdog-config/src/cache.rs` (new file) +**`cache.rs`** — Cache configuration types: **CachePolicy enum:** `NoCache` (default), `Cache`, `Auto`. Implements `FromStr`, `Display`, `Serialize`, `Deserialize`, `Copy`, `JsonSchema`. @@ -22,15 +22,11 @@ Cache SELECT queries in Redis, bypass PostgreSQL on cache hit, populate cache on - `max_result_size: Option` — max cached result bytes - Helper methods: `is_enabled()`, `policy()`, `ttl()`, `max_result_size()` -#### 2. `pgdog-config/src/general.rs` +**`general.rs`** — `General` struct holds `cache: Cache` field. **Cache config is global.** -Added `cache: Cache` field to `General` struct — **cache config is global**, not per-database. +**`lib.rs`** — Exports `pub mod cache;` and `pub use cache::{CachePolicy, Cache};`. -#### 3. `pgdog-config/src/lib.rs` - -Added `pub mod cache;` and `pub use cache::{CachePolicy, Cache};` to public exports. - -#### 4. `pgdog/src/frontend/cache/` (module) +### Cache Module (`pgdog/src/frontend/cache/`) **`mod.rs`** — Module exports, global singleton, and main `Cache` struct: ```rust @@ -43,19 +39,16 @@ pub mod stats; pub use client::CacheClient; pub use context::CacheContext; pub use integration::CacheCheckResult; -pub use policy::{ - CacheDecision, CachePolicyDispatcher, CachePolicyExtractor, CachePolicyResolver, - CommentCacheExtractor, ParameterCacheExtractor, -}; +pub use policy::CacheDecision; pub use stats::QueryStatsTracker; ``` -`Cache` struct wraps: `CacheClient`, `QueryStatsTracker`, `policy_dispatcher`. +`Cache` struct wraps: `CacheClient`, `QueryStatsTracker`. **Global singleton:** Cache is global-scoped, not connection-scoped. Accessed via `cache()` function which returns `Arc` from a `Lazy>` static. `Cache::new()` reads config internally — no parameters needed. Key methods: -- `new()` — creates client (reads config internally), stats, dispatcher +- `new()` — creates client (reads config internally) and stats tracker - `try_read_cache(cache_context, in_transaction, client_request, params, stream)` — calls `cache_check()`, handles HIT/MISS/PASS-through - `save_response_in_cache(cache_context)` — finalizes by storing the captured response @@ -74,13 +67,13 @@ Key methods: - `reconnecting: Arc` — prevents multiple concurrent reconnect tasks - All Redis operations wrapped in `tokio::time::timeout(REDIS_OPERATION_TIMEOUT)` (2s) as safety net -**`policy.rs`** — 3-tier policy resolution with trait-based extraction: -- `CacheDirective` enum: `None`, `Cache { ttl_seconds }`, `NoCache` -- `trait CachePolicyExtractor`: abstract interface with `fn extract(query, params) -> CacheDirective` -- `struct CommentCacheExtractor`: scans SQL query string with standalone regex — **works even when parser is bypassed** -- `struct ParameterCacheExtractor`: reads `pgdog.cache` connection startup parameter -- `struct CachePolicyDispatcher`: chains extractors in priority order, returns first non-`None` result -- Tier 1: Extractor result (`CacheDirective::Cache { ttl }` or `CacheDirective::NoCache` from comments/params) +**`policy.rs`** — 3-tier policy resolution via free functions: +- `CacheDirective` enum: `Cache { ttl_seconds }`, `NoCache` (default) +- `CacheDecision` enum: `Skip`, `Cache(Option)` +- `resolve(client_request, params, is_read, cache_key_hash, stats)` — main resolver function, chains all tiers +- `get_cache_directive(client_request, params)` — comment hint (from AST) has priority over connection parameter (`pgdog.cache`) +- `extract_parameter_directive(params)` — parses `pgdog.cache` parameter: `no_cache`, `cache`, `cache ttl=N` +- Tier 1: Extractor directive (`CacheDirective::Cache { ttl }` or `CacheDirective::NoCache`) - Tier 2: Global config `CachePolicy` (`NoCache` / `Cache` / `Auto`) - Tier 3: `auto_decision()` — caches when `hit_count > miss_count` AND `avg_result_size < 1MB` @@ -89,47 +82,46 @@ Key methods: - `QueryStatsTracker` with `record_hit(fingerprint, size)` / `record_miss(fingerprint)` / `get(fingerprint)` - Internally: `Arc>` - **`context.rs`** — Cache context held in `QueryEngineContext`: - - `CacheContext` with `cache_miss: Option<(u64, Option)>`, `response_buffer: Vec`, and `had_error: bool` - - `capture_response(message)` — stores message in buffer when cache miss is tracked; sets `had_error = true` on `E` messages - - `reset()` — clears all state for per-query isolation +**`context.rs`** — Cache context held in `QueryEngineContext`: +- `CacheContext` with `cache_miss: Option<(u64, Option)>`, `response_buffer: Vec`, and `had_error: bool` +- `capture_response(message)` — stores message in buffer when cache miss is tracked; sets `had_error = true` on `E` messages +- `reset()` — clears all state for per-query isolation **`integration.rs`** — Integration methods on `impl Cache`: -- `cache_check()` — main entry point, checks route, extracts directive, resolves policy, checks Redis +- `cache_check()` — main entry point, checks route, calls `policy::resolve()`, checks Redis - `send_cached_response()` — deserializes wire-format bytes and sends to client - `cache_response()` — serializes `Vec` into wire bytes and stores in Redis - Cache key: XXH3 hash of `database_name + raw_query_string` -### Files Modified - -#### 5. `pgdog/Cargo.toml` - -- Added `fred = { version = "9", features = ["enable-rustls"] }` to dependencies - -#### 6. `pgdog/src/frontend/client/query_engine/mod.rs` +### Query Engine Integration -- Added `pub mod cache;` module declaration -- Added `cache: Cache` field to `QueryEngine` -- `new()` loads `cache_config` from `config().config.general.cache` and creates `Cache::new(cache_config, database)` +**`pgdog/src/frontend/client/query_engine/mod.rs`** +- Declares `pub mod cache;` module +- `QueryEngine` holds `cache: Cache` field - `handle()` flow: after `route_query()` and before `before_execution()`, calls `self.cache.try_read_cache(context)`. If HIT: sends cached response and returns. On MISS: stores state in `context.cache_context`. -- After `match command`, calls `self.cache.save_response_in_cache(context)` to store the captured response in Redis. +- After `match command`, calls `self.cache.save_response_in_cache(context)` to finalize caching. -#### 7. `pgdog/src/frontend/client/query_engine/query.rs` +**`pgdog/src/frontend/client/query_engine/query.rs`** +- `process_server_message()` calls `context.cache_context.capture_response(message.clone())`. -- `process_server_message()` added cache capture: `context.cache_context.capture_response(message.clone())`. +**`pgdog/src/frontend/client/query_engine/context.rs`** +- `QueryEngineContext` holds `cache_context: CacheContext` field. -#### 8. `pgdog/src/frontend/client/query_engine/context.rs` +### Backend and Config Integration -- Added `cache_context: CacheContext` field to `QueryEngineContext`. +**`pgdog/src/backend/pool/cluster.rs`** +- `ClusterConfig` and `Cluster` hold `cache_enabled: bool` field +- Query parser requirement check includes `|| self.cache_enabled()` — when caching is on, the query parser is forced on. -#### 9. `pgdog/src/backend/pool/cluster.rs` +**`pgdog-config/src/core.rs`** +- Startup warning emitted when `cache.is_enabled()` and parser is `Off` or `SessionControl`. -- Added `cache_enabled: bool` field to `ClusterConfig` and `Cluster` -- `cluster.rs` adds `|| self.cache_enabled()` in query parser requirement check — when caching is on, the query parser is forced on alongside `dry_run`, `prepared_statements`, `pub_sub`, and `regex_parser` +### Dependencies -#### 10. `pgdog-config/src/core.rs` - -- Added startup warning: `cache requires enabled query parser but it's disabled or session controlled` when `cache.is_enabled()` and parser is `Off` or `SessionControl` +**`pgdog/Cargo.toml`** +fred = { version = "9", features = ["enable-rustls"] } +scc = "3.7" +xxhash-rust = { version = "0.8", features = ["xxh3"]} --- @@ -145,40 +137,11 @@ Key methods: | Cache HIT flow | Deserialize wire bytes → parse messages → send to client → return `Ok(true)` | | Cache MISS flow | Normal execute → capture response via `CacheContext` → store in Redis → respond | | Auto-decision engine | `hit_count > miss_count` AND `avg_result_size < 1MB` | -| Cache key | `pg_query::fingerprint(query).value.wrapping_add(db_hash)` where `db_hash = DefaultHasher of database name` | +| Cache key | XXH3 hash of `database_name + raw_query_string` | | Wire format | Full PostgreSQL wire messages stored as raw bytes (one concatenated buffer) | --- -## Bugs Found & Fixed - -1. **Redis client never connects** - Problem: CacheClient::new() built the client but never called init(). Fred requires explicit connection initialization. Fix: Added lazy `ensure_connected()` using `client.init().await`, guarded by `AtomicBool` so it runs exactly once on first get()/set(). Changed CacheClient from `#[derive(Debug)]` to manual Debug impl (contains `Arc`). - -2. **Redis GET fails on NULL / cache miss** - Problem: `client.get::()` throws `Parse Error: Cannot parse into bytes` when the key doesn't exist. Fix: Use `client.get::()` and check `val.is_null()` before extracting bytes. - -3. **Wire format deserialization wrong in send_cached_response** - Problem: PostgreSQL wire message structure is `[1B code][4B length]` where length includes the 4B itself. I calculated `offset + 5 + msg_len` (treating length as payload-only), causing incorrect byte slicing. Fix: Corrected to `offset + 1 + msg_len`. - -4. **Route incorrectly reports read-only as write when parser is disabled** - Problem: `query_parser_bypass()` conservatively returns `Route::write()` for all SQL when the query parser is disabled. Since pgdog doesn't enable the parser by default for simple queries, `route.is_read()` was false for `SELECT 1`. Fix: When any database has `cache.enabled = true`, the query parser level is auto-upgraded to `On` in the cluster config. The `|| self.cache_enabled()` check in `cluster.rs:475` forces the parser on. Cache also emits a startup warning if parser is `Off` or `SessionControl`. The old `is_likely_read()` string-prefix heuristic has been removed entirely. - -5. **DB cache config defaults** - Observation: `Cache.policy` defaults to `CachePolicy::NoCache`. Even with `enabled = true`, caching is skipped unless policy is explicitly set. User action taken: Add `policy = "cache"` to pgdog.toml. - ---- - -## Refactoring: Decoupled Cache Policy Extraction - -The original implementation entangled cache directive parsing with pgdog's general comment parser, which only activates when the full query parser runs. This meant `/* pgdog_cache: ... */` annotations were silently ignored for simple queries and when `query_parser_bypass()` triggered. - -**What was done:** - -- New **`cache/`** module created under `query_engine/` -- `CachePolicyExtractor` trait with `fn extract(query, params) -> CacheDirective` -- `CommentCacheExtractor`: standalone regex scan on raw query string — works independent of AST parser -- `ParameterCacheExtractor`: reads `pgdog.cache` connection startup parameter -- `CachePolicyDispatcher`: chains extractors, returns first non-`None` result -- `Cache` struct as abstraction layer over client, stats, config, and dispatcher -- `CacheContext` struct holds `cache_miss` and `response_buffer` per-query -- Cache integration happens via `try_read_cache()` and `save_response_in_cache()` methods on `Cache` - ## How to Control Cache ### SQL Comments @@ -187,7 +150,7 @@ Add a C-style comment before your query. The first matching directive wins: ```sql -- Force bypass cache for this query -/* pgdog_cache: no-cache */ +/* pgdog_cache: no_cache */ SELECT * FROM users WHERE id = 1; -- Cache with database default TTL @@ -205,7 +168,7 @@ Set `pgdog.cache` at connection time (via DSN options) or with `SET` after conne ```sql -- Session-wide: all queries in this connection bypass cache -SET pgdog.cache = 'no-cache'; +SET pgdog.cache = 'no_cache'; -- Session-wide: cache all queries with default TTL SET pgdog.cache = 'cache'; @@ -216,7 +179,6 @@ SET pgdog.cache = 'cache ttl=300'; ```sh # Session-wide: all queries in this connection bypass cache -# Attention: this only supports `no_cache` with underscore psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.cache%3Dno_cache # Session-wide: cache all queries with default TTL @@ -228,7 +190,7 @@ psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.c ### Priority Order -Extractors are checked in order — first non-`None` result wins, then falls through to database config: +Sources are checked in order — first non-None result wins, then falls through to global config: ``` SQL comment → pgdog.cache parameter → DB policy config → Auto-decision @@ -239,21 +201,33 @@ SQL comment → pgdog.cache parameter → DB policy config → Auto-decisi ## Completed -1. **Query parser auto-upgrade for caching** — When caching is enabled and parser is `Auto`/`Off`/`SessionControl`, the parser is forced to `On` via `|| self.cache_enabled()` check in `cluster.rs`. A startup warning is emitted in `core.rs` if parser remains incompatible. +1. **Redis client never connects** - Problem: CacheClient::new() built the client but never called init(). Fred requires explicit connection initialization. Fix: Added lazy `ensure_connected()` using `client.init().await`, guarded by `AtomicBool` so it runs exactly once on first get()/set(). Changed CacheClient from `#[derive(Debug)]` to manual Debug impl (contains `Arc`). + +2. **Redis GET fails on NULL / cache miss** - Problem: `client.get::()` throws `Parse Error: Cannot parse into bytes` when the key doesn't exist. Fix: Use `client.get::()` and check `val.is_null()` before extracting bytes. + +3. **Wire format deserialization wrong in send_cached_response** - Problem: PostgreSQL wire message structure is `[1B code][4B length]` where length includes the 4B itself. I calculated `offset + 5 + msg_len` (treating length as payload-only), causing incorrect byte slicing. Fix: Corrected to `offset + 1 + msg_len`. + +4. **Route incorrectly reports read-only as write when parser is disabled** - Problem: `query_parser_bypass()` conservatively returns `Route::write()` for all SQL when the query parser is disabled. Since pgdog doesn't enable the parser by default for simple queries, `route.is_read()` was false for `SELECT 1`. Fix: When any database has `cache.enabled = true`, the query parser level is auto-upgraded to `On` in the cluster config. The `|| self.cache_enabled()` check in `cluster.rs:475` forces the parser on. Cache also emits a startup warning if parser is `Off` or `SessionControl`. The old `is_likely_read()` string-prefix heuristic has been removed entirely. + +5. **DB cache config defaults** - Observation: `Cache.policy` defaults to `CachePolicy::NoCache`. Even with `enabled = true`, caching is skipped unless policy is explicitly set. User action taken: Add `policy = "cache"` to pgdog.toml. -2. **Decoupled cache policy extraction** — Cache directives extracted via standalone regex in `cache/policy.rs`, works regardless of parser state. Supports `/* pgdog_cache: ... */` format with optional `ttl=` parameter. +6. **Query parser auto-upgrade for caching** — When caching is enabled and parser is `Auto`/`Off`/`SessionControl`, the parser is forced to `On` via `|| self.cache_enabled()` check in `cluster.rs`. A startup warning is emitted in `core.rs` if parser remains incompatible. -3. **Error handling / Reconnection** — Automatic reconnection with background task, CAS-guarded single reconnect, 2s operation timeout on all Redis calls, PING-based connection verification. +7. **Decoupled cache policy extraction** — Cache directives extracted via standalone regex in `cache/policy.rs`, works regardless of parser state. Supports `/* pgdog_cache: ... */` format with optional `ttl=` parameter. -4. **Cache key collision across databases sharing one Redis** — Database name and raw query string are combined via a single XXH3 hash call, producing deterministic, collision-resistant per-database keys even on shared Redis. Different literal values in queries produce different cache keys. +8. **Error handling / Reconnection** — Automatic reconnection with background task, CAS-guarded single reconnect, 2s operation timeout on all Redis calls, PING-based connection verification. -5. **Wire format serialization/deserialization** — PostgreSQL wire messages stored as raw bytes. Correct byte slice calculation: `offset + 1 + msg_len`. +9. **Cache key collision across databases sharing one Redis** — Database name and raw query string are combined via a single XXH3 hash call, producing deterministic, collision-resistant per-database keys even on shared Redis. Different literal values in queries produce different cache keys. -6. **Do not cache error responses**. +10. **Wire format serialization/deserialization** — PostgreSQL wire messages stored as raw bytes. Correct byte slice calculation: `offset + 1 + msg_len`. -7. **Setting pgdog.cache via connection url doesn't work**. +11. **Do not cache error responses**. -8. **Moved all cache-related structs from QueryEngine to Client** — now all cache structs including redis client are creating for whole pgdog's lifetime. +12. **Setting pgdog.cache via connection url doesn't work** — now works. + +13. **Moved all cache-related structs from QueryEngine to Client** — now all cache structs including redis client are creating for whole pgdog's lifetime. + +14. **Use built-in query comment hints** — Cache hints (`pgdog_cache:`) are now extracted alongside sharding hints (`pgdog_shard:`, `pgdog_sharding_key:`, `pgdog_role:`) via the unified `comment()` function in `comment.rs`. The `comment_cache` field is stored in `AstInner` and accessed during cache checking via `client_request.ast.comment_cache`. Policy resolution simplified: trait-based extractors (`CachePolicyExtractor`, `CommentCacheExtractor`, `ParameterCacheExtractor`, `CachePolicyDispatcher`, `CachePolicyResolver`) replaced with free functions (`resolve()`, `get_cache_directive()`, `extract_parameter_directive()`). Comment hint (from AST) has priority over connection parameter `pgdog.cache`. `Cache` struct no longer needs `policy_dispatcher` field. `CacheDirective::None` removed in favor of `Option` with `NoCache` as default. Parameter format unified to `no_cache` (underscore, not dash). --- @@ -275,6 +249,10 @@ SQL comment → pgdog.cache parameter → DB policy config → Auto-decisi 8. **Review and rewrite CacheClient**. +9. **Force-cache hint support**. + +10. **Add cache config to .schema**. + ### Planned Tests 1. **Database key namespace collision** — Two databases sharing one Redis, both running same query but with different underlying PG data. Verify correct isolation. diff --git a/pgdog-config/src/cache.rs b/pgdog-config/src/cache.rs index ef1c44530..0afd7c7ea 100644 --- a/pgdog-config/src/cache.rs +++ b/pgdog-config/src/cache.rs @@ -21,7 +21,7 @@ impl std::str::FromStr for CachePolicy { fn from_str(s: &str) -> Result { match s.to_lowercase().as_str() { - "no_cache" | "no-cache" => Ok(Self::NoCache), + "no_cache" => Ok(Self::NoCache), "cache" => Ok(Self::Cache), "auto" => Ok(Self::Auto), _ => Err(format!("Invalid cache policy: {}", s)), @@ -74,4 +74,4 @@ impl Cache { pub fn max_result_size(&self) -> Option { self.max_result_size } -} \ No newline at end of file +} diff --git a/pgdog/src/frontend/cache/integration.rs b/pgdog/src/frontend/cache/integration.rs index 4bf7d0b2b..59245b1a6 100644 --- a/pgdog/src/frontend/cache/integration.rs +++ b/pgdog/src/frontend/cache/integration.rs @@ -1,14 +1,13 @@ use std::hash::{Hash, Hasher}; use crate::{ - config::config, frontend::ClientRequest, net::{FromBytes, Message, Parameters, Stream, ToBytes}, }; use tracing::debug; -use super::{Cache, CachePolicyResolver}; +use super::{policy, Cache}; pub enum CacheCheckResult { Hit { @@ -58,23 +57,8 @@ impl Cache { hasher.finish() }; - let cache_directive = self.policy_dispatcher.extract(query.query(), params); - let cache_config = &config().config.general.cache; - - debug!( - "cache_check: sql={}, db_config={:?}", - query.query(), - cache_config - ); - - let decision = CachePolicyResolver::resolve( - cache_directive, - cache_config, - is_read, - cache_key_hash, - &self.stats, - ) - .await; + let decision = + policy::resolve(client_request, params, is_read, cache_key_hash, &self.stats).await; if !decision.should_cache() { return Ok(CacheCheckResult::Passthrough); diff --git a/pgdog/src/frontend/cache/mod.rs b/pgdog/src/frontend/cache/mod.rs index 44d6ef89f..a28ceca3a 100644 --- a/pgdog/src/frontend/cache/mod.rs +++ b/pgdog/src/frontend/cache/mod.rs @@ -7,10 +7,7 @@ pub mod stats; pub use client::CacheClient; pub use context::CacheContext; pub use integration::CacheCheckResult; -pub use policy::{ - CacheDecision, CachePolicyDispatcher, CachePolicyExtractor, CachePolicyResolver, - CommentCacheExtractor, ParameterCacheExtractor, -}; +pub use policy::CacheDecision; pub use stats::QueryStatsTracker; use once_cell::sync::Lazy; @@ -26,7 +23,6 @@ use crate::{ pub struct Cache { client: CacheClient, stats: QueryStatsTracker, - policy_dispatcher: CachePolicyDispatcher, } static CACHE: Lazy> = Lazy::new(|| Arc::new(Cache::new())); @@ -37,14 +33,9 @@ pub fn cache() -> Arc { impl Cache { fn new() -> Self { - let mut dispatcher = CachePolicyDispatcher::new(); - dispatcher.add_extractor(Box::new(CommentCacheExtractor)); - dispatcher.add_extractor(Box::new(ParameterCacheExtractor::new())); - Cache { client: CacheClient::new(), stats: QueryStatsTracker::default(), - policy_dispatcher: dispatcher, } } diff --git a/pgdog/src/frontend/cache/policy.rs b/pgdog/src/frontend/cache/policy.rs index dcf6965f7..b186bee8e 100644 --- a/pgdog/src/frontend/cache/policy.rs +++ b/pgdog/src/frontend/cache/policy.rs @@ -1,198 +1,18 @@ -use core::fmt; - -use crate::config::{Cache as CacheConfig, CachePolicy}; -use tracing::debug; - use super::stats::QueryStatsTracker; - +use crate::config::{config, CachePolicy}; +use crate::frontend::ClientRequest; use crate::net::parameter::ParameterValue; use crate::net::Parameters; -use once_cell::sync::Lazy; -use regex::Regex; -static CACHE: Lazy = Lazy::new(|| { - Regex::new(r#"pgdog_cache: *(no-cache|cache(?:\s+ttl\s*=\s*([0-9]+))?)?"#).unwrap() -}); - -/// Cache directive from SQL comment. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] pub enum CacheDirective { - #[default] - None, Cache { ttl_seconds: Option, }, + #[default] NoCache, } -impl CacheDirective { - pub fn is_cache(&self) -> bool { - matches!(self, CacheDirective::Cache { .. }) - } - - pub fn is_no_cache(&self) -> bool { - matches!(self, CacheDirective::NoCache) - } - - pub fn ttl(&self) -> Option { - match self { - CacheDirective::Cache { ttl_seconds } => *ttl_seconds, - _ => None, - } - } -} - -pub trait CachePolicyExtractor: Send + Sync + fmt::Debug { - fn extract(&self, query: &str, params: &Parameters) -> CacheDirective; -} - -#[derive(Debug)] -pub struct CommentCacheExtractor; - -impl CachePolicyExtractor for CommentCacheExtractor { - fn extract(&self, query: &str, _params: &Parameters) -> CacheDirective { - for cap in CACHE.captures_iter(query) { - if let Some(action) = cap.get(1) { - let action = action.as_str(); - if action == "no-cache" { - return CacheDirective::NoCache; - } else if action.starts_with("cache") { - let ttl = cap.get(2).and_then(|m| m.as_str().parse::().ok()); - return CacheDirective::Cache { ttl_seconds: ttl }; - } - } else { - return CacheDirective::Cache { ttl_seconds: None }; - } - } - CacheDirective::None - } -} - -#[derive(Debug)] -pub struct ParameterCacheExtractor { - key: String, -} - -impl ParameterCacheExtractor { - pub fn new() -> Self { - Self { - key: "pgdog.cache".to_string(), - } - } -} - -impl CachePolicyExtractor for ParameterCacheExtractor { - fn extract(&self, _query: &str, params: &Parameters) -> CacheDirective { - let value = match params.get(&self.key) { - Some(p) => p, - None => return CacheDirective::None, - }; - - let s = match value { - ParameterValue::String(v) => v.as_str(), - _ => return CacheDirective::None, - }; - - match s { - "no-cache" => CacheDirective::NoCache, - "cache" => CacheDirective::Cache { ttl_seconds: None }, - _ => { - if let Some(ttl) = s - .strip_prefix("cache ttl=") - .and_then(|t| t.trim().parse::().ok()) - { - CacheDirective::Cache { - ttl_seconds: Some(ttl), - } - } else if let Some(ttl) = s - .strip_prefix("cache ttl =") - .and_then(|t| t.trim().parse::().ok()) - { - CacheDirective::Cache { - ttl_seconds: Some(ttl), - } - } else { - CacheDirective::None - } - } - } - } -} - -#[derive(Debug)] -pub struct CachePolicyDispatcher { - extractors: Vec>, -} - -impl CachePolicyDispatcher { - pub fn new() -> Self { - Self { - extractors: Vec::new(), - } - } - - pub fn add_extractor(&mut self, extractor: Box) { - self.extractors.push(extractor); - } - - pub fn extract(&self, query: &str, params: &Parameters) -> CacheDirective { - for extractor in &self.extractors { - let result = extractor.extract(query, params); - if result != CacheDirective::None { - debug!("Cache directive for query {} is {:?}", query, result); - return result; - } - } - CacheDirective::None - } - - pub fn is_empty(&self) -> bool { - self.extractors.is_empty() - } -} - -pub struct CachePolicyResolver; - -impl CachePolicyResolver { - pub async fn resolve( - cache_directive: CacheDirective, - cache_config: &CacheConfig, - is_read: bool, - cache_key_hash: u64, - stats: &QueryStatsTracker, - ) -> CacheDecision { - if !is_read { - return CacheDecision::Skip; - } - - if let CacheDirective::NoCache = cache_directive { - return CacheDecision::Skip; - } - - if let CacheDirective::Cache { ttl_seconds } = cache_directive { - return CacheDecision::Cache(ttl_seconds.or(Some(cache_config.ttl()))); - } - - match cache_config.policy() { - CachePolicy::NoCache => CacheDecision::Skip, - CachePolicy::Cache => CacheDecision::Cache(Some(cache_config.ttl())), - CachePolicy::Auto => Self::auto_decision(cache_key_hash, stats).await, - } - } - - async fn auto_decision(cache_key_hash: u64, stats: &QueryStatsTracker) -> CacheDecision { - let query_stats = stats.get(cache_key_hash).await; - - if query_stats.hit_count > query_stats.miss_count - && query_stats.avg_result_size() < 1_000_000 - { - CacheDecision::Cache(None) - } else { - CacheDecision::Skip - } - } -} - pub enum CacheDecision { Skip, Cache(Option), @@ -211,188 +31,83 @@ impl CacheDecision { } } -#[cfg(test)] -mod tests { - use super::*; +const KEY: &str = "pgdog.cache"; - #[tokio::test] - async fn test_skip_for_writes() { - let cache_config = CacheConfig { - enabled: Some(true), - policy: Some(CachePolicy::Cache), - ttl: None, - redis_url: None, - max_result_size: None, - }; - let decision = CachePolicyResolver::resolve( - CacheDirective::None, - &cache_config, - false, - 0xAABBCCDD, - &QueryStatsTracker::default(), - ) - .await; - assert!(!decision.should_cache()); - } +pub async fn resolve( + client_request: &ClientRequest, + params: &Parameters, + is_read: bool, + cache_key_hash: u64, + stats: &QueryStatsTracker, +) -> CacheDecision { + let cache_config = &config().config.general.cache; - #[tokio::test] - async fn test_no_cache_directive() { - let cache_config = CacheConfig { - enabled: Some(true), - policy: Some(CachePolicy::Cache), - ttl: None, - redis_url: None, - max_result_size: None, - }; - let decision = CachePolicyResolver::resolve( - CacheDirective::NoCache, - &cache_config, - true, - 0xAABBCCDD, - &QueryStatsTracker::default(), - ) - .await; - assert!(!decision.should_cache()); + if !is_read { + return CacheDecision::Skip; } - #[tokio::test] - async fn test_cache_directive_with_ttl() { - let cache_config = CacheConfig { - enabled: Some(true), - policy: Some(CachePolicy::NoCache), - ttl: None, - redis_url: None, - max_result_size: None, - }; - let decision = CachePolicyResolver::resolve( - CacheDirective::Cache { - ttl_seconds: Some(120), - }, - &cache_config, - true, - 0xAABBCCDD, - &QueryStatsTracker::default(), - ) - .await; - assert!(decision.should_cache()); - assert_eq!(decision.ttl(), Some(120)); - } - - #[test] - fn test_comment_extractor_no_cache() { - let extractor = CommentCacheExtractor; - let params = Parameters::default(); - let directive = - extractor.extract("SELECT * FROM users /* pgdog_cache: no-cache */", ¶ms); - assert!(matches!(directive, CacheDirective::NoCache)); - } - - #[test] - fn test_comment_extractor_cache_default_ttl() { - let extractor = CommentCacheExtractor; - let params = Parameters::default(); - let directive = extractor.extract("SELECT * FROM users /* pgdog_cache: cache */", ¶ms); - match directive { - CacheDirective::Cache { ttl_seconds } => assert!(ttl_seconds.is_none()), - _ => panic!("Expected Cache directive"), + let cache_directive = get_cache_directive(client_request, params); + match cache_directive { + Some(CacheDirective::NoCache) => return CacheDecision::Skip, + Some(CacheDirective::Cache { ttl_seconds }) => { + return CacheDecision::Cache(ttl_seconds.or(Some(cache_config.ttl()))) } + _ => (), } - #[test] - fn test_comment_extractor_cache_with_ttl() { - let extractor = CommentCacheExtractor; - let params = Parameters::default(); - let directive = extractor.extract( - "SELECT * FROM users /* pgdog_cache: cache ttl=60 */", - ¶ms, - ); - match directive { - CacheDirective::Cache { ttl_seconds } => assert_eq!(ttl_seconds, Some(60)), - _ => panic!("Expected Cache directive"), - } + match cache_config.policy() { + CachePolicy::NoCache => CacheDecision::Skip, + CachePolicy::Cache => CacheDecision::Cache(Some(cache_config.ttl())), + CachePolicy::Auto => auto_decision(cache_key_hash, stats).await, } +} - #[test] - fn test_comment_extractor_no_directive() { - let extractor = CommentCacheExtractor; - let params = Parameters::default(); - let directive = extractor.extract("SELECT * FROM users", ¶ms); - assert!(matches!(directive, CacheDirective::None)); +async fn auto_decision(cache_key_hash: u64, stats: &QueryStatsTracker) -> CacheDecision { + let query_stats = stats.get(cache_key_hash).await; + if query_stats.hit_count > query_stats.miss_count && query_stats.avg_result_size() < 1_000_000 { + CacheDecision::Cache(None) + } else { + CacheDecision::Skip } +} - #[test] - fn test_parameter_extractor_no_cache() { - let extractor = ParameterCacheExtractor::new(); - let mut params = Parameters::default(); - params.insert("pgdog.cache", "no-cache"); - let directive = extractor.extract("SELECT * FROM users", ¶ms); - assert!(matches!(directive, CacheDirective::NoCache)); - } +// Comment hint has priority over connection parameter +fn get_cache_directive( + client_request: &ClientRequest, + params: &Parameters, +) -> Option { + client_request + .ast + .as_ref() + .map(|ast| ast.comment_cache) + .flatten() + .or_else(|| extract_parameter_directive(params)) +} - #[test] - fn test_parameter_extractor_cache() { - let extractor = ParameterCacheExtractor::new(); - let mut params = Parameters::default(); - params.insert("pgdog.cache", "cache"); - let directive = extractor.extract("SELECT * FROM users", ¶ms); - match directive { - CacheDirective::Cache { ttl_seconds } => assert!(ttl_seconds.is_none()), - _ => panic!("Expected Cache directive"), - } - } +fn extract_parameter_directive(params: &Parameters) -> Option { + let value = params.get(KEY)?; + let s = match value { + ParameterValue::String(v) => v.as_str().trim(), + _ => return None, + }; - #[test] - fn test_parameter_extractor_cache_with_ttl() { - let extractor = ParameterCacheExtractor::new(); - let mut params = Parameters::default(); - params.insert("pgdog.cache", "cache ttl=120"); - let directive = extractor.extract("SELECT * FROM users", ¶ms); - match directive { - CacheDirective::Cache { ttl_seconds } => assert_eq!(ttl_seconds, Some(120)), - _ => panic!("Expected Cache directive"), - } + match s { + "no_cache" => return Some(CacheDirective::NoCache), + "cache" => return Some(CacheDirective::Cache { ttl_seconds: None }), + _ => (), } - #[test] - fn test_parameter_extractor_no_param() { - let extractor = ParameterCacheExtractor::new(); - let params = Parameters::default(); - let directive = extractor.extract("SELECT * FROM users", ¶ms); - assert!(matches!(directive, CacheDirective::None)); + if let Some(ttl) = s + .strip_prefix("cache") + .map(|s| s.trim_start()) + .map(|s| s.strip_prefix("ttl=")) + .flatten() + .and_then(|t| t.trim().parse::().ok()) + { + return Some(CacheDirective::Cache { + ttl_seconds: Some(ttl), + }); } - #[test] - fn test_dispatcher_comment_wins() { - let comment_extractor = CommentCacheExtractor; - let parameter_extractor = ParameterCacheExtractor::new(); - - let mut dispatcher = CachePolicyDispatcher::new(); - dispatcher.add_extractor(Box::new(comment_extractor)); - dispatcher.add_extractor(Box::new(parameter_extractor)); - - let mut params = Parameters::default(); - params.insert("pgdog.cache", "no-cache"); - - let directive = dispatcher.extract("SELECT * /* pgdog_cache: cache ttl=60 */", ¶ms); - match directive { - CacheDirective::Cache { ttl_seconds } => assert_eq!(ttl_seconds, Some(60)), - _ => panic!("Expected comment to win"), - } - } - - #[test] - fn test_dispatcher_parameter_fallback() { - let comment_extractor = CommentCacheExtractor; - let parameter_extractor = ParameterCacheExtractor::new(); - - let mut dispatcher = CachePolicyDispatcher::new(); - dispatcher.add_extractor(Box::new(comment_extractor)); - dispatcher.add_extractor(Box::new(parameter_extractor)); - - let mut params = Parameters::default(); - params.insert("pgdog.cache", "no-cache"); - - let directive = dispatcher.extract("SELECT * FROM users", ¶ms); - assert!(matches!(directive, CacheDirective::NoCache)); - } + None } diff --git a/pgdog/src/frontend/router/parser/cache/ast.rs b/pgdog/src/frontend/router/parser/cache/ast.rs index c34d865dc..179344a48 100644 --- a/pgdog/src/frontend/router/parser/cache/ast.rs +++ b/pgdog/src/frontend/router/parser/cache/ast.rs @@ -12,6 +12,7 @@ use super::super::{ }; use super::{Fingerprint, Stats}; use crate::backend::schema::Schema; +use crate::frontend::cache::policy::CacheDirective; use crate::frontend::router::parser::rewrite::statement::RewritePlan; use crate::frontend::{BufferedQuery, PreparedStatements}; use crate::net::parameter::ParameterValue; @@ -37,6 +38,8 @@ pub struct AstInner { pub comment_shard: Option, /// Role. pub comment_role: Option, + /// Cache. + pub comment_cache: Option, /// Rewrite plan. pub rewrite_plan: RewritePlan, /// Fingerprint. @@ -44,13 +47,13 @@ pub struct AstInner { } impl AstInner { - /// Create new AST record, with no rewrite or comment routing. pub fn new(ast: ParseResult) -> Self { Self { ast, stats: Mutex::new(Stats::new()), comment_role: None, comment_shard: None, + comment_cache: None, rewrite_plan: RewritePlan::default(), fingerprint: Fingerprint::default(), } @@ -81,7 +84,7 @@ impl Ast { QueryParserEngine::PgQueryRaw => parse_raw(query), } .map_err(Error::PgQuery)?; - let (comment_shard, comment_role) = comment(query, schema)?; + let (comment_shard, comment_role, comment_cache) = comment(query, schema)?; let fingerprint = Fingerprint::new(query, schema.query_parser_engine).map_err(Error::PgQuery)?; @@ -113,6 +116,7 @@ impl Ast { stats: Mutex::new(stats), comment_shard, comment_role, + comment_cache, ast, rewrite_plan, fingerprint, diff --git a/pgdog/src/frontend/router/parser/comment.rs b/pgdog/src/frontend/router/parser/comment.rs index a87883adb..53cf1431f 100644 --- a/pgdog/src/frontend/router/parser/comment.rs +++ b/pgdog/src/frontend/router/parser/comment.rs @@ -6,6 +6,7 @@ use regex::Regex; use crate::backend::ShardingSchema; use crate::config::database::Role; +use crate::frontend::cache::policy::CacheDirective; use crate::frontend::router::sharding::ContextBuilder; use super::super::parser::Shard; @@ -16,6 +17,9 @@ static SHARDING_KEY: Lazy = Lazy::new(|| { Regex::new(r#"pgdog_sharding_key: *(?:"([^"]*)"|'([^']*)'|([0-9a-zA-Z-]+))"#).unwrap() }); static ROLE: Lazy = Lazy::new(|| Regex::new(r#"pgdog_role: *(primary|replica)"#).unwrap()); +static CACHE: Lazy = Lazy::new(|| { + Regex::new(r#"pgdog_cache: *(no_cache|cache(?:\s+ttl\s*=\s*([0-9]+))?)?"#).unwrap() +}); fn get_matched_value<'a>(caps: &'a regex::Captures<'a>) -> Option<&'a str> { caps.get(1) @@ -24,23 +28,24 @@ fn get_matched_value<'a>(caps: &'a regex::Captures<'a>) -> Option<&'a str> { .map(|m| m.as_str()) } -/// Extract shard number from a comment. +/// Extract shard number, role and cache directive from a comment. /// /// Comment style uses the C-style comments (not SQL comments!) /// as to allow the comment to appear anywhere in the query. /// -/// See [`SHARD`] and [`SHARDING_KEY`] for the style of comment we expect. +/// See [`SHARD`], [`SHARDING_KEY`], [`ROLE`] and [`CACHE`] for the style of comment we expect. /// pub fn comment( query: &str, schema: &ShardingSchema, -) -> Result<(Option, Option), Error> { +) -> Result<(Option, Option, Option), Error> { let tokens = match schema.query_parser_engine { QueryParserEngine::PgQueryProtobuf => scan(query), QueryParserEngine::PgQueryRaw => scan_raw(query), } .map_err(Error::PgQuery)?; let mut role = None; + let mut cache = None; for token in tokens.tokens.iter() { if token.token == Token::CComment as i32 { @@ -54,15 +59,26 @@ pub fn comment( } } } + if let Some(cap) = CACHE.captures(comment) { + if let Some(action) = cap.get(1) { + let action = action.as_str(); + if action == "no_cache" { + cache = Some(CacheDirective::NoCache); + } else { + let ttl = cap.get(2).and_then(|m| m.as_str().parse::().ok()); + cache = Some(CacheDirective::Cache { ttl_seconds: ttl }); + } + } + } if let Some(cap) = SHARDING_KEY.captures(comment) { if let Some(sharding_key) = get_matched_value(&cap) { if let Some(schema) = schema.schemas.get(Some(sharding_key.into())) { - return Ok((Some(schema.shard().into()), role)); + return Ok((Some(schema.shard().into()), role, cache)); } let ctx = ContextBuilder::infer_from_from_and_config(sharding_key, schema)? .shards(schema.shards) .build()?; - return Ok((Some(ctx.apply()?), role)); + return Ok((Some(ctx.apply()?), role, cache)); } } if let Some(cap) = SHARD.captures(comment) { @@ -77,13 +93,14 @@ pub fn comment( .unwrap_or(Shard::All), ), role, + cache, )); } } } } - Ok((None, role)) + Ok((None, role, cache)) } #[cfg(test)] @@ -255,4 +272,94 @@ mod tests { let result = comment(query, &schema).unwrap(); assert_eq!(result.0, Some(Shard::Direct(1))); } + + #[test] + fn test_cache_hint_no_cache() { + use crate::backend::ShardedTables; + + let schema = ShardingSchema { + shards: 2, + tables: ShardedTables::new(vec![], vec![], false, SystemCatalogsBehavior::default()), + ..Default::default() + }; + + let query = "SELECT * FROM users /* pgdog_cache: no_cache */"; + let result = comment(query, &schema).unwrap(); + assert!(matches!(result.2, Some(CacheDirective::NoCache))); + } + + #[test] + fn test_cache_hint_cache_default_ttl() { + use crate::backend::ShardedTables; + + let schema = ShardingSchema { + shards: 2, + tables: ShardedTables::new(vec![], vec![], false, SystemCatalogsBehavior::default()), + ..Default::default() + }; + + let query = "SELECT * FROM users /* pgdog_cache: cache */"; + let result = comment(query, &schema).unwrap(); + assert!(matches!( + result.2, + Some(CacheDirective::Cache { ttl_seconds: None }) + )); + } + + #[test] + fn test_cache_hint_cache_with_ttl() { + use crate::backend::ShardedTables; + + let schema = ShardingSchema { + shards: 2, + tables: ShardedTables::new(vec![], vec![], false, SystemCatalogsBehavior::default()), + ..Default::default() + }; + + let query = "SELECT * FROM users /* pgdog_cache: cache ttl=60 */"; + let result = comment(query, &schema).unwrap(); + assert!(matches!( + result.2, + Some(CacheDirective::Cache { + ttl_seconds: Some(60) + }) + )); + } + + #[test] + fn test_cache_hint_no_directive() { + use crate::backend::ShardedTables; + + let schema = ShardingSchema { + shards: 2, + tables: ShardedTables::new(vec![], vec![], false, SystemCatalogsBehavior::default()), + ..Default::default() + }; + + let query = "SELECT * FROM users"; + let result = comment(query, &schema).unwrap(); + assert!(matches!(result.2, None)); + } + + #[test] + fn test_combined_shard_and_cache_hints() { + use crate::backend::ShardedTables; + + let schema = ShardingSchema { + shards: 2, + tables: ShardedTables::new(vec![], vec![], false, SystemCatalogsBehavior::default()), + ..Default::default() + }; + + let query = "SELECT * FROM users /* pgdog_role: replica pgdog_shard: 1 pgdog_cache: cache ttl=300 */"; + let result = comment(query, &schema).unwrap(); + assert_eq!(result.1, Some(Role::Replica)); + assert_eq!(result.0, Some(Shard::Direct(1))); + assert!(matches!( + result.2, + Some(CacheDirective::Cache { + ttl_seconds: Some(300) + }) + )); + } } diff --git a/pgdog/src/net/parameter.rs b/pgdog/src/net/parameter.rs index 1502d0397..4dd0c6114 100644 --- a/pgdog/src/net/parameter.rs +++ b/pgdog/src/net/parameter.rs @@ -33,6 +33,7 @@ static UNTRACKED_PARAMS: Lazy> = Lazy::new(|| { String::from("pgdog.role"), String::from("pgdog.shard"), String::from("pgdog.sharding_key"), + String::from("pgdog.cache"), ]) }); From 9d682c6dcc8087ee4b4792bc55a9247f20ba6368 Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Wed, 13 May 2026 15:54:27 +0300 Subject: [PATCH 10/20] updated cache config --- .schema/pgdog.schema.json | 74 ++++++++++++++++++++++++++++++ docs/CACHE.md | 17 ++++--- pgdog-config/src/cache.rs | 67 +++++++++++++++++++-------- pgdog-config/src/core.rs | 2 +- pgdog/src/backend/pool/cluster.rs | 2 +- pgdog/src/frontend/cache/client.rs | 26 +++++------ pgdog/src/frontend/cache/policy.rs | 6 +-- 7 files changed, 147 insertions(+), 47 deletions(-) diff --git a/.schema/pgdog.schema.json b/.schema/pgdog.schema.json index 78238e78b..906dae5c1 100644 --- a/.schema/pgdog.schema.json +++ b/.schema/pgdog.schema.json @@ -31,6 +31,13 @@ "ban_timeout": 300000, "broadcast_address": null, "broadcast_port": 6433, + "cache": { + "enabled": false, + "max_result_size": 0, + "policy": "auto", + "redis_url": "redis://localhost:6379", + "ttl": 300 + }, "checkout_timeout": 5000, "client_connection_recovery": "drop", "client_idle_in_transaction_timeout": 9223372036854775807, @@ -275,6 +282,62 @@ } ] }, + "Cache": { + "description": "Redis cache configuration for a database.", + "type": "object", + "properties": { + "enabled": { + "description": "Whether to enable caching for this database.\n\n_Default:_ `false`", + "type": "boolean", + "default": false + }, + "max_result_size": { + "description": "Maximum result size in bytes to cache (0 = unlimited).\n\n_Default:_ `0`", + "type": "integer", + "format": "uint", + "default": 0, + "minimum": 0 + }, + "policy": { + "description": "Cache policy: no_cache, cache, or auto.\n\n_Default:_ `auto`", + "$ref": "#/$defs/CachePolicy", + "default": "auto" + }, + "redis_url": { + "description": "Redis connection URL.\n\n_Default:_ `redis://localhost:6379`", + "type": "string", + "default": "redis://localhost:6379" + }, + "ttl": { + "description": "Default TTL in seconds for cached queries.\n\n_Default:_ `300`", + "type": "integer", + "format": "uint64", + "default": 300, + "minimum": 0 + } + }, + "additionalProperties": false + }, + "CachePolicy": { + "description": "Cache policy.", + "oneOf": [ + { + "description": "Never cache queries for this database.", + "type": "string", + "const": "no_cache" + }, + { + "description": "Always cache read queries.", + "type": "string", + "const": "cache" + }, + { + "description": "Dynamically decide based on Redis memory and query stats.", + "type": "string", + "const": "auto" + } + ] + }, "ConnectionRecovery": { "description": "controls if server connections are recovered or dropped if a client abruptly disconnects.\n\nhttps://docs.pgdog.dev/configuration/pgdog.toml/general/#connection_recovery", "oneOf": [ @@ -574,6 +637,17 @@ "maximum": 65535, "minimum": 0 }, + "cache": { + "description": "Redis cache configuration for this database.", + "$ref": "#/$defs/Cache", + "default": { + "enabled": false, + "max_result_size": 0, + "policy": "auto", + "redis_url": "redis://localhost:6379", + "ttl": 300 + } + }, "checkout_timeout": { "description": "Maximum amount of time a client is allowed to wait for a connection from the pool.\n\n_Default:_ `5000`\n\nhttps://docs.pgdog.dev/configuration/pgdog.toml/general/#checkout_timeout", "type": "integer", diff --git a/docs/CACHE.md b/docs/CACHE.md index 743a2ab25..bcc7f4840 100644 --- a/docs/CACHE.md +++ b/docs/CACHE.md @@ -12,15 +12,14 @@ Cache SELECT queries in Redis, bypass PostgreSQL on cache hit, populate cache on **`cache.rs`** — Cache configuration types: -**CachePolicy enum:** `NoCache` (default), `Cache`, `Auto`. Implements `FromStr`, `Display`, `Serialize`, `Deserialize`, `Copy`, `JsonSchema`. +**CachePolicy enum:** `NoCache`, `Cache`, `Auto` (default). Implements `FromStr`, `Display`, `Serialize`, `Deserialize`, `Copy`, `JsonSchema`. **Cache struct:** -- `enabled: Option` — is caching on? -- `policy: Option` — which policy? -- `ttl: Option` — default TTL seconds (default 300) -- `redis_url: Option` — Redis connection URL -- `max_result_size: Option` — max cached result bytes -- Helper methods: `is_enabled()`, `policy()`, `ttl()`, `max_result_size()` +- `enabled: bool` — is caching on? +- `policy: CachePolicy` — which policy? +- `ttl: u64` — default TTL seconds (default 300) +- `redis_url: String` — Redis connection URL +- `max_result_size: usize` — max cached result bytes **`general.rs`** — `General` struct holds `cache: Cache` field. **Cache config is global.** @@ -229,6 +228,8 @@ SQL comment → pgdog.cache parameter → DB policy config → Auto-decisi 14. **Use built-in query comment hints** — Cache hints (`pgdog_cache:`) are now extracted alongside sharding hints (`pgdog_shard:`, `pgdog_sharding_key:`, `pgdog_role:`) via the unified `comment()` function in `comment.rs`. The `comment_cache` field is stored in `AstInner` and accessed during cache checking via `client_request.ast.comment_cache`. Policy resolution simplified: trait-based extractors (`CachePolicyExtractor`, `CommentCacheExtractor`, `ParameterCacheExtractor`, `CachePolicyDispatcher`, `CachePolicyResolver`) replaced with free functions (`resolve()`, `get_cache_directive()`, `extract_parameter_directive()`). Comment hint (from AST) has priority over connection parameter `pgdog.cache`. `Cache` struct no longer needs `policy_dispatcher` field. `CacheDirective::None` removed in favor of `Option` with `NoCache` as default. Parameter format unified to `no_cache` (underscore, not dash). +15. **Add cache config to .schema**. + --- ## What's Left To Do @@ -251,8 +252,6 @@ SQL comment → pgdog.cache parameter → DB policy config → Auto-decisi 9. **Force-cache hint support**. -10. **Add cache config to .schema**. - ### Planned Tests 1. **Database key namespace collision** — Two databases sharing one Redis, both running same query but with different underlying PG data. Verify correct isolation. diff --git a/pgdog-config/src/cache.rs b/pgdog-config/src/cache.rs index 0afd7c7ea..57d75adb3 100644 --- a/pgdog-config/src/cache.rs +++ b/pgdog-config/src/cache.rs @@ -8,11 +8,11 @@ use serde::{Deserialize, Serialize}; #[serde(rename_all = "snake_case")] pub enum CachePolicy { /// Never cache queries for this database. - #[default] NoCache, /// Always cache read queries. Cache, /// Dynamically decide based on Redis memory and query stats. + #[default] Auto, } @@ -41,37 +41,66 @@ impl std::fmt::Display for CachePolicy { } /// Redis cache configuration for a database. -#[derive( - Serialize, Deserialize, Debug, Clone, Default, PartialEq, Eq, PartialOrd, Ord, JsonSchema, -)] +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, JsonSchema)] #[serde(deny_unknown_fields)] pub struct Cache { /// Whether to enable caching for this database. - pub enabled: Option, + /// + /// _Default:_ `false` + #[serde(default = "Cache::enabled")] + pub enabled: bool, /// Cache policy: no_cache, cache, or auto. - pub policy: Option, + /// + /// _Default:_ `auto` + #[serde(default = "Cache::policy")] + pub policy: CachePolicy, /// Default TTL in seconds for cached queries. - pub ttl: Option, - /// Redis connection URL (e.g., redis://localhost:6379). - pub redis_url: Option, + /// + /// _Default:_ `300` + #[serde(default = "Cache::ttl")] + pub ttl: u64, + /// Redis connection URL. + /// + /// _Default:_ `redis://localhost:6379` + #[serde(default = "Cache::redis_url")] + pub redis_url: String, /// Maximum result size in bytes to cache (0 = unlimited). - pub max_result_size: Option, + /// + /// _Default:_ `0` + #[serde(default = "Cache::max_result_size")] + pub max_result_size: usize, +} + +impl Default for Cache { + fn default() -> Self { + Self { + enabled: Self::enabled(), + policy: Self::policy(), + ttl: Self::ttl(), + redis_url: Self::redis_url(), + max_result_size: Self::max_result_size(), + } + } } impl Cache { - pub fn is_enabled(&self) -> bool { - self.enabled.unwrap_or(false) + fn enabled() -> bool { + false } - pub fn policy(&self) -> CachePolicy { - self.policy.unwrap_or_default() + fn policy() -> CachePolicy { + Default::default() } - pub fn ttl(&self) -> u64 { - self.ttl.unwrap_or(300) + fn ttl() -> u64 { + 300 } - pub fn max_result_size(&self) -> Option { - self.max_result_size + fn redis_url() -> String { + "redis://localhost:6379".to_string() } -} + + fn max_result_size() -> usize { + 0 + } +} \ No newline at end of file diff --git a/pgdog-config/src/core.rs b/pgdog-config/src/core.rs index 98135178c..52e0187fb 100644 --- a/pgdog-config/src/core.rs +++ b/pgdog-config/src/core.rs @@ -565,7 +565,7 @@ impl Config { ); } - if self.general.cache.is_enabled() + if self.general.cache.enabled && matches!(self.general.query_parser, QueryParserLevel::Off | QueryParserLevel::SessionControl) { warn!("cache requires enabled query parser but it's disabled or session controlled"); } diff --git a/pgdog/src/backend/pool/cluster.rs b/pgdog/src/backend/pool/cluster.rs index 22bee793e..b636d87fc 100644 --- a/pgdog/src/backend/pool/cluster.rs +++ b/pgdog/src/backend/pool/cluster.rs @@ -212,7 +212,7 @@ impl<'a> ClusterConfig<'a> { reload_schema_on_ddl: general.reload_schema_on_ddl, load_schema: general.load_schema, resharding_parallel_copies: general.resharding_parallel_copies, - cache_enabled: general.cache.is_enabled() + cache_enabled: general.cache.enabled } } } diff --git a/pgdog/src/frontend/cache/client.rs b/pgdog/src/frontend/cache/client.rs index dfede8e1d..5cc9bf37e 100644 --- a/pgdog/src/frontend/cache/client.rs +++ b/pgdog/src/frontend/cache/client.rs @@ -39,7 +39,7 @@ impl CacheClient { pub fn new() -> Self { let cache_config = &config().config.general.cache; - if !cache_config.is_enabled() || cache_config.redis_url.is_none() { + if !cache_config.enabled { return Self { client: None, redis_connected: Arc::new(AtomicBool::new(false)), @@ -47,7 +47,7 @@ impl CacheClient { }; } - let url = cache_config.redis_url.as_ref().unwrap(); + let url = cache_config.redis_url.as_str(); let client_config = match RedisConfig::from_url(url) { Ok(c) => c, Err(e) => { @@ -248,19 +248,17 @@ impl CacheClient { let cache_config = &config().config.general.cache; - if let Some(max_size) = cache_config.max_result_size() { - if value.len() > max_size { - debug!( - "Skipping cache for key {}: size {} exceeds max {}", - key, - value.len(), - max_size - ); - return Ok(()); - } + if cache_config.max_result_size != 0 && value.len() > cache_config.max_result_size { + debug!( + "Skipping cache for key {}: size {} exceeds max {}", + key, + value.len(), + cache_config.max_result_size + ); + return Ok(()); } - let ttl_seconds = ttl.unwrap_or_else(|| cache_config.ttl()) as i64; + let ttl_seconds = ttl.unwrap_or_else(|| cache_config.ttl) as i64; match tokio::time::timeout( REDIS_OPERATION_TIMEOUT, @@ -293,7 +291,7 @@ impl CacheClient { pub fn is_enabled(&self) -> bool { let cache_config = &config().config.general.cache; - self.client.is_some() && cache_config.is_enabled() + self.client.is_some() && cache_config.enabled } } diff --git a/pgdog/src/frontend/cache/policy.rs b/pgdog/src/frontend/cache/policy.rs index b186bee8e..e6f92514f 100644 --- a/pgdog/src/frontend/cache/policy.rs +++ b/pgdog/src/frontend/cache/policy.rs @@ -50,14 +50,14 @@ pub async fn resolve( match cache_directive { Some(CacheDirective::NoCache) => return CacheDecision::Skip, Some(CacheDirective::Cache { ttl_seconds }) => { - return CacheDecision::Cache(ttl_seconds.or(Some(cache_config.ttl()))) + return CacheDecision::Cache(ttl_seconds.or(Some(cache_config.ttl))) } _ => (), } - match cache_config.policy() { + match cache_config.policy { CachePolicy::NoCache => CacheDecision::Skip, - CachePolicy::Cache => CacheDecision::Cache(Some(cache_config.ttl())), + CachePolicy::Cache => CacheDecision::Cache(Some(cache_config.ttl)), CachePolicy::Auto => auto_decision(cache_key_hash, stats).await, } } From 44ae0ed1f4fc485551f18970ed0147409c8e8cb6 Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Wed, 13 May 2026 15:54:27 +0300 Subject: [PATCH 11/20] force-cache hint support --- docs/CACHE.md | 28 ++++++++- pgdog/src/frontend/cache/client.rs | 4 +- pgdog/src/frontend/cache/context.rs | 4 +- pgdog/src/frontend/cache/integration.rs | 69 +++++++++++++-------- pgdog/src/frontend/cache/mod.rs | 15 ++--- pgdog/src/frontend/cache/policy.rs | 42 ++++++------- pgdog/src/frontend/router/parser/comment.rs | 47 +++++++++++++- 7 files changed, 143 insertions(+), 66 deletions(-) diff --git a/docs/CACHE.md b/docs/CACHE.md index bcc7f4840..ac215d2d4 100644 --- a/docs/CACHE.md +++ b/docs/CACHE.md @@ -159,6 +159,16 @@ SELECT * FROM products WHERE category = 'electronics'; -- Cache with custom TTL in seconds /* pgdog_cache: cache ttl=300 */ SELECT * FROM orders; + +-- Force cache with database default TTL +-- Query hash computed as if comment were like "/* pgdog_cache: cache */" +/* pgdog_cache: force_cache */ +SELECT * FROM products WHERE category = 'electronics'; + +-- Force cache with custom TTL in seconds +-- Query hash computed as if comment were like "/* pgdog_cache: cache ttl=300*/" +/* pgdog_cache: force_cache ttl=300 */ +SELECT * FROM orders; ``` ### Connection Parameter @@ -174,6 +184,12 @@ SET pgdog.cache = 'cache'; -- Session-wide: cache all queries with 5-minute TTL SET pgdog.cache = 'cache ttl=300'; + +-- Session-wide: force cache all queries with default TTL +SET pgdog.cache = 'force_cache'; + +-- Session-wide: force cache all queries with 5-minute TTL +SET pgdog.cache = 'force_cache ttl=300'; ``` ```sh @@ -185,6 +201,12 @@ psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.c # Session-wide: cache all queries with 5-minute TTL psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.cache%3Dcache%5C%20ttl%3D300 + +# Session-wide: force cache all queries with default TTL +psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.cache%3Dforce_cache + +# Session-wide: force cache all queries with 5-minute TTL +psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.cache%3Dforce_cache%5C%20ttl%3D300 ``` ### Priority Order @@ -230,6 +252,8 @@ SQL comment → pgdog.cache parameter → DB policy config → Auto-decisi 15. **Add cache config to .schema**. +16. **Force-cache hint support**. + --- ## What's Left To Do @@ -244,14 +268,12 @@ SQL comment → pgdog.cache parameter → DB policy config → Auto-decisi 5. **Magic numbers in send_cached_response()**. -6. **Make statistics collection async** — for auto policy. +6. **Make statistics collection deferred** — for auto policy. 7. **Provide config hotswap**. 8. **Review and rewrite CacheClient**. -9. **Force-cache hint support**. - ### Planned Tests 1. **Database key namespace collision** — Two databases sharing one Redis, both running same query but with different underlying PG data. Verify correct isolation. diff --git a/pgdog/src/frontend/cache/client.rs b/pgdog/src/frontend/cache/client.rs index 5cc9bf37e..de59dc686 100644 --- a/pgdog/src/frontend/cache/client.rs +++ b/pgdog/src/frontend/cache/client.rs @@ -229,7 +229,7 @@ impl CacheClient { } } - pub(crate) async fn set(&self, key: u64, value: &[u8], ttl: Option) -> Result<(), Error> { + pub(crate) async fn set(&self, key: u64, value: &[u8], ttl: u64) -> Result<(), Error> { if !self.ensure_connected().await { if !self.is_connected() { self.spawn_reconnect(); @@ -258,7 +258,7 @@ impl CacheClient { return Ok(()); } - let ttl_seconds = ttl.unwrap_or_else(|| cache_config.ttl) as i64; + let ttl_seconds = ttl as i64; match tokio::time::timeout( REDIS_OPERATION_TIMEOUT, diff --git a/pgdog/src/frontend/cache/context.rs b/pgdog/src/frontend/cache/context.rs index 2aaede4ff..42fd0fecf 100644 --- a/pgdog/src/frontend/cache/context.rs +++ b/pgdog/src/frontend/cache/context.rs @@ -1,9 +1,9 @@ -use crate::net::{Message, messages::Protocol}; +use crate::{frontend::cache::integration::CacheMiss, net::{Message, messages::Protocol}}; /// Cache context to use in QueryEngineContext. #[derive(Default)] pub struct CacheContext { - pub cache_miss: Option<(u64, Option)>, + pub cache_miss: Option, pub response_buffer: Vec, pub had_error: bool, } diff --git a/pgdog/src/frontend/cache/integration.rs b/pgdog/src/frontend/cache/integration.rs index 59245b1a6..4d236954b 100644 --- a/pgdog/src/frontend/cache/integration.rs +++ b/pgdog/src/frontend/cache/integration.rs @@ -1,7 +1,10 @@ use std::hash::{Hash, Hasher}; +use once_cell::sync::Lazy; +use regex::Regex; + use crate::{ - frontend::ClientRequest, + frontend::{ClientRequest, cache::CacheDecision}, net::{FromBytes, Message, Parameters, Stream, ToBytes}, }; @@ -9,14 +12,20 @@ use tracing::debug; use super::{policy, Cache}; +static FORCE_CACHE_RE: Lazy = Lazy::new(|| { + Regex::new(r#"pgdog_cache:\s*force_cache"#).unwrap() +}); + +pub struct CacheMiss { + pub cache_key_hash: u64, + pub ttl: u64, +} + pub enum CacheCheckResult { Hit { cached: Vec, }, - Miss { - cache_key_hash: u64, - ttl: Option, - }, + Miss(CacheMiss), Passthrough, } @@ -53,33 +62,41 @@ impl Cache { let cache_key_hash = { let mut hasher = xxhash_rust::xxh3::Xxh3Default::new(); database.hash(&mut hasher); - query.query().hash(&mut hasher); + let normalized_query = FORCE_CACHE_RE.replace(query.query(), "pgdog_cache: cache"); + normalized_query.hash(&mut hasher); hasher.finish() }; let decision = policy::resolve(client_request, params, is_read, cache_key_hash, &self.stats).await; - - if !decision.should_cache() { - return Ok(CacheCheckResult::Passthrough); - } - - match self.client.get(cache_key_hash).await { - Ok(Some(cached)) => { - self.stats.record_hit(cache_key_hash, cached.len()).await; - Ok(CacheCheckResult::Hit { cached }) - } - Ok(None) => { + match decision { + CacheDecision::Skip => Ok(CacheCheckResult::Passthrough), + CacheDecision::ForceCache(ttl) => { self.stats.record_miss(cache_key_hash).await; - Ok(CacheCheckResult::Miss { + Ok(CacheCheckResult::Miss(CacheMiss { cache_key_hash, - ttl: decision.ttl(), - }) - } - Err(e) => { - debug!("Cache get error: {}", e); - Ok(CacheCheckResult::Passthrough) - } + ttl, + })) + }, + CacheDecision::Cache(ttl) => { + match self.client.get(cache_key_hash).await { + Ok(Some(cached)) => { + self.stats.record_hit(cache_key_hash, cached.len()).await; + Ok(CacheCheckResult::Hit { cached }) + } + Ok(None) => { + self.stats.record_miss(cache_key_hash).await; + Ok(CacheCheckResult::Miss(CacheMiss { + cache_key_hash, + ttl: ttl, + })) + } + Err(e) => { + debug!("Cache get error: {}", e); + Ok(CacheCheckResult::Passthrough) + } + } + }, } } @@ -123,7 +140,7 @@ impl Cache { &self, cache_key_hash: u64, messages: Vec, - ttl: Option, + ttl: u64, ) -> Result<(), ()> { if messages.is_empty() || !self.client.is_enabled() { return Ok(()); diff --git a/pgdog/src/frontend/cache/mod.rs b/pgdog/src/frontend/cache/mod.rs index a28ceca3a..337e258ec 100644 --- a/pgdog/src/frontend/cache/mod.rs +++ b/pgdog/src/frontend/cache/mod.rs @@ -15,7 +15,7 @@ use std::sync::Arc; use tracing::debug; use crate::{ - frontend::ClientRequest, + frontend::{ClientRequest, cache::integration::CacheMiss}, net::{Parameters, Stream}, }; @@ -58,14 +58,11 @@ impl Cache { cache_context.reset(); return Ok(true); } - CacheCheckResult::Miss { - cache_key_hash, - ttl, - } => { - cache_context.cache_miss = Some((cache_key_hash, ttl)); + CacheCheckResult::Miss(cache_miss) => { + debug!("Cache miss for key hash: {}", cache_miss.cache_key_hash); + cache_context.cache_miss = Some(cache_miss); cache_context.response_buffer.clear(); cache_context.had_error = false; - debug!("Cache miss for key hash: {}", cache_key_hash); } CacheCheckResult::Passthrough => { cache_context.reset(); @@ -77,10 +74,10 @@ impl Cache { /// Finalize caching by storing the response in Redis. pub async fn save_response_in_cache(&self, cache_context: &mut CacheContext) { - if let Some((cache_key, ttl)) = cache_context.cache_miss.take() { + if let Some(CacheMiss { cache_key_hash, ttl } ) = cache_context.cache_miss.take() { if !cache_context.had_error && !cache_context.response_buffer.is_empty() { let messages = std::mem::take(&mut cache_context.response_buffer); - if let Err(e) = self.cache_response(cache_key, messages, ttl).await { + if let Err(e) = self.cache_response(cache_key_hash, messages, ttl).await { debug!("Failed to cache response: {:?}", e); } } diff --git a/pgdog/src/frontend/cache/policy.rs b/pgdog/src/frontend/cache/policy.rs index e6f92514f..2a2f6a4ba 100644 --- a/pgdog/src/frontend/cache/policy.rs +++ b/pgdog/src/frontend/cache/policy.rs @@ -9,26 +9,17 @@ pub enum CacheDirective { Cache { ttl_seconds: Option, }, + ForceCache { + ttl_seconds: Option, + }, #[default] NoCache, } pub enum CacheDecision { Skip, - Cache(Option), -} - -impl CacheDecision { - pub fn should_cache(&self) -> bool { - matches!(self, CacheDecision::Cache(_)) - } - - pub fn ttl(&self) -> Option { - match self { - CacheDecision::Cache(ttl) => *ttl, - _ => None, - } - } + Cache(u64), + ForceCache(u64), } const KEY: &str = "pgdog.cache"; @@ -50,22 +41,26 @@ pub async fn resolve( match cache_directive { Some(CacheDirective::NoCache) => return CacheDecision::Skip, Some(CacheDirective::Cache { ttl_seconds }) => { - return CacheDecision::Cache(ttl_seconds.or(Some(cache_config.ttl))) + return CacheDecision::Cache(ttl_seconds.unwrap_or(cache_config.ttl)) + }, + Some(CacheDirective::ForceCache { ttl_seconds }) => { + return CacheDecision::ForceCache(ttl_seconds.unwrap_or(cache_config.ttl)) } _ => (), } match cache_config.policy { CachePolicy::NoCache => CacheDecision::Skip, - CachePolicy::Cache => CacheDecision::Cache(Some(cache_config.ttl)), + CachePolicy::Cache => CacheDecision::Cache(cache_config.ttl), CachePolicy::Auto => auto_decision(cache_key_hash, stats).await, } } async fn auto_decision(cache_key_hash: u64, stats: &QueryStatsTracker) -> CacheDecision { + let cache_config = &config().config.general.cache; let query_stats = stats.get(cache_key_hash).await; if query_stats.hit_count > query_stats.miss_count && query_stats.avg_result_size() < 1_000_000 { - CacheDecision::Cache(None) + CacheDecision::Cache(cache_config.ttl) } else { CacheDecision::Skip } @@ -93,20 +88,25 @@ fn extract_parameter_directive(params: &Parameters) -> Option { match s { "no_cache" => return Some(CacheDirective::NoCache), + "force_cache" => return Some(CacheDirective::ForceCache { ttl_seconds: None }), "cache" => return Some(CacheDirective::Cache { ttl_seconds: None }), _ => (), } if let Some(ttl) = s - .strip_prefix("cache") + .strip_prefix("force_cache") + .or_else(|| s.strip_prefix("cache")) .map(|s| s.trim_start()) .map(|s| s.strip_prefix("ttl=")) .flatten() .and_then(|t| t.trim().parse::().ok()) { - return Some(CacheDirective::Cache { - ttl_seconds: Some(ttl), - }); + let ttl_seconds = Some(ttl); + if s.starts_with("force_cache") { + return Some(CacheDirective::ForceCache { ttl_seconds }); + } else { + return Some(CacheDirective::Cache { ttl_seconds }); + } } None diff --git a/pgdog/src/frontend/router/parser/comment.rs b/pgdog/src/frontend/router/parser/comment.rs index 53cf1431f..29494c287 100644 --- a/pgdog/src/frontend/router/parser/comment.rs +++ b/pgdog/src/frontend/router/parser/comment.rs @@ -18,7 +18,7 @@ static SHARDING_KEY: Lazy = Lazy::new(|| { }); static ROLE: Lazy = Lazy::new(|| Regex::new(r#"pgdog_role: *(primary|replica)"#).unwrap()); static CACHE: Lazy = Lazy::new(|| { - Regex::new(r#"pgdog_cache: *(no_cache|cache(?:\s+ttl\s*=\s*([0-9]+))?)?"#).unwrap() + Regex::new(r#"pgdog_cache: *(no_cache|force_cache(?:\s+ttl\s*=\s*([0-9]+))?|cache(?:\s+ttl\s*=\s*([0-9]+))?)?"#).unwrap() }); fn get_matched_value<'a>(caps: &'a regex::Captures<'a>) -> Option<&'a str> { @@ -64,8 +64,11 @@ pub fn comment( let action = action.as_str(); if action == "no_cache" { cache = Some(CacheDirective::NoCache); - } else { + } else if action.starts_with("force_cache") { let ttl = cap.get(2).and_then(|m| m.as_str().parse::().ok()); + cache = Some(CacheDirective::ForceCache { ttl_seconds: ttl }); + } else { + let ttl = cap.get(3).and_then(|m| m.as_str().parse::().ok()); cache = Some(CacheDirective::Cache { ttl_seconds: ttl }); } } @@ -350,7 +353,7 @@ mod tests { tables: ShardedTables::new(vec![], vec![], false, SystemCatalogsBehavior::default()), ..Default::default() }; - + let query = "SELECT * FROM users /* pgdog_role: replica pgdog_shard: 1 pgdog_cache: cache ttl=300 */"; let result = comment(query, &schema).unwrap(); assert_eq!(result.1, Some(Role::Replica)); @@ -362,4 +365,42 @@ mod tests { }) )); } + + #[test] + fn test_cache_hint_force_cache() { + use crate::backend::ShardedTables; + + let schema = ShardingSchema { + shards: 2, + tables: ShardedTables::new(vec![], vec![], false, SystemCatalogsBehavior::default()), + ..Default::default() + }; + + let query = "SELECT * FROM users /* pgdog_cache: force_cache */"; + let result = comment(query, &schema).unwrap(); + assert!(matches!( + result.2, + Some(CacheDirective::ForceCache { ttl_seconds: None }) + )); + } + + #[test] + fn test_cache_hint_force_cache_with_ttl() { + use crate::backend::ShardedTables; + + let schema = ShardingSchema { + shards: 2, + tables: ShardedTables::new(vec![], vec![], false, SystemCatalogsBehavior::default()), + ..Default::default() + }; + + let query = "SELECT * FROM users /* pgdog_cache: force_cache ttl=60 */"; + let result = comment(query, &schema).unwrap(); + assert!(matches!( + result.2, + Some(CacheDirective::ForceCache { + ttl_seconds: Some(60) + }) + )); + } } From efa318dfa2c2d04144916d6d01579e405e3af421 Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Wed, 13 May 2026 15:54:27 +0300 Subject: [PATCH 12/20] return comment --- pgdog/src/frontend/router/parser/cache/ast.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/pgdog/src/frontend/router/parser/cache/ast.rs b/pgdog/src/frontend/router/parser/cache/ast.rs index 179344a48..855792f96 100644 --- a/pgdog/src/frontend/router/parser/cache/ast.rs +++ b/pgdog/src/frontend/router/parser/cache/ast.rs @@ -47,6 +47,7 @@ pub struct AstInner { } impl AstInner { + /// Create new AST record, with no rewrite or comment routing. pub fn new(ast: ParseResult) -> Self { Self { ast, From 1aceafc3ab5ff561a5f2ed9e703a5e943de242f0 Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Wed, 13 May 2026 15:54:27 +0300 Subject: [PATCH 13/20] remove unescaping on pgdog's side --- docs/CACHE.md | 4 ++-- pgdog/src/net/messages/hello.rs | 34 --------------------------------- 2 files changed, 2 insertions(+), 36 deletions(-) diff --git a/docs/CACHE.md b/docs/CACHE.md index ac215d2d4..f59fb2403 100644 --- a/docs/CACHE.md +++ b/docs/CACHE.md @@ -200,13 +200,13 @@ psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.c psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.cache%3Dcache # Session-wide: cache all queries with 5-minute TTL -psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.cache%3Dcache%5C%20ttl%3D300 +psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.cache%3Dcache%20ttl%3D300 # Session-wide: force cache all queries with default TTL psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.cache%3Dforce_cache # Session-wide: force cache all queries with 5-minute TTL -psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.cache%3Dforce_cache%5C%20ttl%3D300 +psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.cache%3Dforce_cache%20ttl%3D300 ``` ### Priority Order diff --git a/pgdog/src/net/messages/hello.rs b/pgdog/src/net/messages/hello.rs index 436fb209f..5c989221b 100644 --- a/pgdog/src/net/messages/hello.rs +++ b/pgdog/src/net/messages/hello.rs @@ -58,7 +58,6 @@ impl Startup { let value = search_path(&value); params.insert(name, value); } else if name == "options" { - let value = options_unescape(&value); let kvs = value.split("-c"); for kv in kvs { let mut nvs = kv.splitn(2, "="); @@ -250,26 +249,6 @@ fn search_path(value: &str) -> ParameterValue { ParameterValue::Tuple(value) } -fn options_unescape(input: &str) -> String { - let mut result = String::with_capacity(input.len()); - let mut chars = input.chars().peekable(); - - while let Some(c) = chars.next() { - if c == '\\' { - if let Some(&next) = chars.peek() { - chars.next(); - result.push(next); - } else { - result.push(c); - } - } else { - result.push(c); - } - } - - result -} - #[cfg(test)] mod test { use crate::net::messages::ToBytes; @@ -330,17 +309,4 @@ mod test { let startup = Startup::from_stream(&mut read).await.unwrap(); assert!(matches!(startup, Startup::GssEnc)); } - - #[test] - fn test_options_unescape() { - assert_eq!(options_unescape("cache\\ ttl=5"), "cache ttl=5"); - assert_eq!(options_unescape("cache\\\\ttl=5"), "cache\\ttl=5"); - assert_eq!(options_unescape("simple"), "simple"); - assert_eq!(options_unescape("a\\=b"), "a=b"); - assert_eq!(options_unescape("trail\\"), "trail\\"); - assert_eq!( - options_unescape("cache\\ ttl\\=5"), - "cache ttl=5" - ); - } } From bd4a264c4c664c846255c2b433983c2346f2120e Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Wed, 13 May 2026 15:54:27 +0300 Subject: [PATCH 14/20] remove auto policy and stats tracker --- .schema/pgdog.schema.json | 13 ++---- Cargo.lock | 30 +------------ pgdog-config/src/cache.rs | 10 ++--- pgdog/Cargo.toml | 1 - pgdog/src/frontend/cache/integration.rs | 49 +++++++------------- pgdog/src/frontend/cache/mod.rs | 4 -- pgdog/src/frontend/cache/policy.rs | 14 ------ pgdog/src/frontend/cache/stats.rs | 59 ------------------------- 8 files changed, 26 insertions(+), 154 deletions(-) delete mode 100644 pgdog/src/frontend/cache/stats.rs diff --git a/.schema/pgdog.schema.json b/.schema/pgdog.schema.json index 906dae5c1..b4997615d 100644 --- a/.schema/pgdog.schema.json +++ b/.schema/pgdog.schema.json @@ -34,7 +34,7 @@ "cache": { "enabled": false, "max_result_size": 0, - "policy": "auto", + "policy": "no_cache", "redis_url": "redis://localhost:6379", "ttl": 300 }, @@ -299,9 +299,9 @@ "minimum": 0 }, "policy": { - "description": "Cache policy: no_cache, cache, or auto.\n\n_Default:_ `auto`", + "description": "Cache policy: no_cache or cache.\n\n_Default:_ `no_cache`", "$ref": "#/$defs/CachePolicy", - "default": "auto" + "default": "no_cache" }, "redis_url": { "description": "Redis connection URL.\n\n_Default:_ `redis://localhost:6379`", @@ -330,11 +330,6 @@ "description": "Always cache read queries.", "type": "string", "const": "cache" - }, - { - "description": "Dynamically decide based on Redis memory and query stats.", - "type": "string", - "const": "auto" } ] }, @@ -643,7 +638,7 @@ "default": { "enabled": false, "max_result_size": 0, - "policy": "auto", + "policy": "no_cache", "redis_url": "redis://localhost:6379", "ttl": 300 } diff --git a/Cargo.lock b/Cargo.lock index f7b17c532..d2a7fa197 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2959,7 +2959,6 @@ dependencies = [ "rust_decimal", "rustls-native-certs 0.8.1", "rustls-pki-types", - "scc 3.7.0", "scram", "semver", "serde", @@ -3991,29 +3990,13 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" -[[package]] -name = "saa" -version = "5.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd8d438861332c3b1ac396c77bd9cac620ea1ff347efb63c05a83d8f0a593899" - [[package]] name = "scc" version = "2.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22b2d775fb28f245817589471dd49c5edf64237f4a19d10ce9a92ff4651a27f4" dependencies = [ - "sdd 3.0.8", -] - -[[package]] -name = "scc" -version = "3.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16c154cf1d115a1e901d7f4e3f279eb6eb455f0d670c1cf3c1aa74d50ad37fa9" -dependencies = [ - "saa", - "sdd 4.8.6", + "sdd", ] [[package]] @@ -4089,15 +4072,6 @@ version = "3.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "584e070911c7017da6cb2eb0788d09f43d789029b5877d3e5ecc8acf86ceee21" -[[package]] -name = "sdd" -version = "4.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5f0e40a01b94e35d1dacbcfbe5bfd3d31e37d9590b2e6d86a82b0e87bd4f551" -dependencies = [ - "saa", -] - [[package]] name = "seahash" version = "4.1.0" @@ -4220,7 +4194,7 @@ dependencies = [ "log", "once_cell", "parking_lot", - "scc 2.3.4", + "scc", "serial_test_derive", ] diff --git a/pgdog-config/src/cache.rs b/pgdog-config/src/cache.rs index 57d75adb3..7450c4730 100644 --- a/pgdog-config/src/cache.rs +++ b/pgdog-config/src/cache.rs @@ -8,12 +8,10 @@ use serde::{Deserialize, Serialize}; #[serde(rename_all = "snake_case")] pub enum CachePolicy { /// Never cache queries for this database. + #[default] NoCache, /// Always cache read queries. Cache, - /// Dynamically decide based on Redis memory and query stats. - #[default] - Auto, } impl std::str::FromStr for CachePolicy { @@ -23,7 +21,6 @@ impl std::str::FromStr for CachePolicy { match s.to_lowercase().as_str() { "no_cache" => Ok(Self::NoCache), "cache" => Ok(Self::Cache), - "auto" => Ok(Self::Auto), _ => Err(format!("Invalid cache policy: {}", s)), } } @@ -34,7 +31,6 @@ impl std::fmt::Display for CachePolicy { let display = match self { Self::NoCache => "no_cache", Self::Cache => "cache", - Self::Auto => "auto", }; write!(f, "{}", display) } @@ -49,9 +45,9 @@ pub struct Cache { /// _Default:_ `false` #[serde(default = "Cache::enabled")] pub enabled: bool, - /// Cache policy: no_cache, cache, or auto. + /// Cache policy: no_cache or cache. /// - /// _Default:_ `auto` + /// _Default:_ `no_cache` #[serde(default = "Cache::policy")] pub policy: CachePolicy, /// Default TTL in seconds for cached queries. diff --git a/pgdog/Cargo.toml b/pgdog/Cargo.toml index e3f2e7bbb..461daec45 100644 --- a/pgdog/Cargo.toml +++ b/pgdog/Cargo.toml @@ -70,7 +70,6 @@ pgdog-vector = { path = "../pgdog-vector" } pgdog-stats = { path = "../pgdog-stats" } pgdog-postgres-types = { path = "../pgdog-postgres-types"} fred = { version = "9", features = ["enable-rustls"] } -scc = "3.7" xxhash-rust = { version = "0.8", features = ["xxh3"]} [target.'cfg(not(target_env = "msvc"))'.dependencies] diff --git a/pgdog/src/frontend/cache/integration.rs b/pgdog/src/frontend/cache/integration.rs index 4d236954b..d585c3fe4 100644 --- a/pgdog/src/frontend/cache/integration.rs +++ b/pgdog/src/frontend/cache/integration.rs @@ -4,7 +4,7 @@ use once_cell::sync::Lazy; use regex::Regex; use crate::{ - frontend::{ClientRequest, cache::CacheDecision}, + frontend::{cache::CacheDecision, ClientRequest}, net::{FromBytes, Message, Parameters, Stream, ToBytes}, }; @@ -12,9 +12,8 @@ use tracing::debug; use super::{policy, Cache}; -static FORCE_CACHE_RE: Lazy = Lazy::new(|| { - Regex::new(r#"pgdog_cache:\s*force_cache"#).unwrap() -}); +static FORCE_CACHE_RE: Lazy = + Lazy::new(|| Regex::new(r#"pgdog_cache:\s*force_cache"#).unwrap()); pub struct CacheMiss { pub cache_key_hash: u64, @@ -22,9 +21,7 @@ pub struct CacheMiss { } pub enum CacheCheckResult { - Hit { - cached: Vec, - }, + Hit { cached: Vec }, Miss(CacheMiss), Passthrough, } @@ -67,34 +64,22 @@ impl Cache { hasher.finish() }; - let decision = - policy::resolve(client_request, params, is_read, cache_key_hash, &self.stats).await; + let decision = policy::resolve(client_request, params, is_read).await; match decision { CacheDecision::Skip => Ok(CacheCheckResult::Passthrough), - CacheDecision::ForceCache(ttl) => { - self.stats.record_miss(cache_key_hash).await; - Ok(CacheCheckResult::Miss(CacheMiss { + CacheDecision::ForceCache(ttl) => Ok(CacheCheckResult::Miss(CacheMiss { + cache_key_hash, + ttl, + })), + CacheDecision::Cache(ttl) => match self.client.get(cache_key_hash).await { + Ok(Some(cached)) => Ok(CacheCheckResult::Hit { cached }), + Ok(None) => Ok(CacheCheckResult::Miss(CacheMiss { cache_key_hash, - ttl, - })) - }, - CacheDecision::Cache(ttl) => { - match self.client.get(cache_key_hash).await { - Ok(Some(cached)) => { - self.stats.record_hit(cache_key_hash, cached.len()).await; - Ok(CacheCheckResult::Hit { cached }) - } - Ok(None) => { - self.stats.record_miss(cache_key_hash).await; - Ok(CacheCheckResult::Miss(CacheMiss { - cache_key_hash, - ttl: ttl, - })) - } - Err(e) => { - debug!("Cache get error: {}", e); - Ok(CacheCheckResult::Passthrough) - } + ttl: ttl, + })), + Err(e) => { + debug!("Cache get error: {}", e); + Ok(CacheCheckResult::Passthrough) } }, } diff --git a/pgdog/src/frontend/cache/mod.rs b/pgdog/src/frontend/cache/mod.rs index 337e258ec..85ddb948d 100644 --- a/pgdog/src/frontend/cache/mod.rs +++ b/pgdog/src/frontend/cache/mod.rs @@ -2,13 +2,11 @@ pub mod client; pub mod context; pub mod integration; pub mod policy; -pub mod stats; pub use client::CacheClient; pub use context::CacheContext; pub use integration::CacheCheckResult; pub use policy::CacheDecision; -pub use stats::QueryStatsTracker; use once_cell::sync::Lazy; use std::sync::Arc; @@ -22,7 +20,6 @@ use crate::{ #[derive(Debug)] pub struct Cache { client: CacheClient, - stats: QueryStatsTracker, } static CACHE: Lazy> = Lazy::new(|| Arc::new(Cache::new())); @@ -35,7 +32,6 @@ impl Cache { fn new() -> Self { Cache { client: CacheClient::new(), - stats: QueryStatsTracker::default(), } } diff --git a/pgdog/src/frontend/cache/policy.rs b/pgdog/src/frontend/cache/policy.rs index 2a2f6a4ba..35d4bef17 100644 --- a/pgdog/src/frontend/cache/policy.rs +++ b/pgdog/src/frontend/cache/policy.rs @@ -1,4 +1,3 @@ -use super::stats::QueryStatsTracker; use crate::config::{config, CachePolicy}; use crate::frontend::ClientRequest; use crate::net::parameter::ParameterValue; @@ -28,8 +27,6 @@ pub async fn resolve( client_request: &ClientRequest, params: &Parameters, is_read: bool, - cache_key_hash: u64, - stats: &QueryStatsTracker, ) -> CacheDecision { let cache_config = &config().config.general.cache; @@ -52,17 +49,6 @@ pub async fn resolve( match cache_config.policy { CachePolicy::NoCache => CacheDecision::Skip, CachePolicy::Cache => CacheDecision::Cache(cache_config.ttl), - CachePolicy::Auto => auto_decision(cache_key_hash, stats).await, - } -} - -async fn auto_decision(cache_key_hash: u64, stats: &QueryStatsTracker) -> CacheDecision { - let cache_config = &config().config.general.cache; - let query_stats = stats.get(cache_key_hash).await; - if query_stats.hit_count > query_stats.miss_count && query_stats.avg_result_size() < 1_000_000 { - CacheDecision::Cache(cache_config.ttl) - } else { - CacheDecision::Skip } } diff --git a/pgdog/src/frontend/cache/stats.rs b/pgdog/src/frontend/cache/stats.rs deleted file mode 100644 index e2946c667..000000000 --- a/pgdog/src/frontend/cache/stats.rs +++ /dev/null @@ -1,59 +0,0 @@ -use std::sync::Arc; - -use scc::HashMap; - -#[derive(Debug, Clone, Default)] -pub struct QueryStats { - pub hit_count: u64, - pub miss_count: u64, - pub total_result_size: u64, -} - -impl QueryStats { - pub fn avg_result_size(&self) -> u64 { - let total = self.hit_count + self.miss_count; - if total == 0 { - 0 - } else { - self.total_result_size / total - } - } -} - -#[derive(Debug, Clone, Default)] -pub struct QueryStatsTracker { - stats: Arc>, -} - -impl QueryStatsTracker { - pub async fn record_hit(&self, cache_key_hash: u64, result_size: usize) { - let mut entry = self.stats.entry_async(cache_key_hash).await.or_default(); - entry.hit_count += 1; - entry.total_result_size += result_size as u64; - } - - pub async fn record_miss(&self, cache_key_hash: u64) { - let mut entry = self.stats.entry_async(cache_key_hash).await.or_default(); - entry.miss_count += 1; - } - - pub async fn get(&self, cache_key_hash: u64) -> QueryStats { - self.stats - .get_async(&cache_key_hash) - .await - .map(|entry| entry.get().clone()) - .unwrap_or_default() - } - - pub async fn clear(&self) { - self.stats.clear_async().await - } - - pub async fn len(&self) -> usize { - self.stats.len() - } - - pub async fn is_empty(&self) -> bool { - self.stats.is_empty() - } -} From 8d5cf1b26bd780d50944867ac6d73adc1431a540 Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Wed, 13 May 2026 16:59:23 +0300 Subject: [PATCH 15/20] bring doc to current state --- docs/CACHE.md | 73 ++++++++++++++++++++------------------------------- 1 file changed, 29 insertions(+), 44 deletions(-) diff --git a/docs/CACHE.md b/docs/CACHE.md index f59fb2403..8ddf2d8ab 100644 --- a/docs/CACHE.md +++ b/docs/CACHE.md @@ -1,8 +1,8 @@ -# Redis Cache for pgdog — State of Implementation +# Cache for pgdog — State of Implementation ## Architecture -Cache SELECT queries in Redis, bypass PostgreSQL on cache hit, populate cache on cache miss. Three-tier policy resolution: SQL comment → per-database config → auto-decision engine. +Cache SELECT queries in Redis, bypass PostgreSQL on cache hit, populate cache on cache miss. Two-tier policy resolution: SQL comment/connection parameter → pgdog's config. --- @@ -12,7 +12,7 @@ Cache SELECT queries in Redis, bypass PostgreSQL on cache hit, populate cache on **`cache.rs`** — Cache configuration types: -**CachePolicy enum:** `NoCache`, `Cache`, `Auto` (default). Implements `FromStr`, `Display`, `Serialize`, `Deserialize`, `Copy`, `JsonSchema`. +**CachePolicy enum:** `NoCache` (default), `Cache`. Implements `FromStr`, `Display`, `Serialize`, `Deserialize`, `Copy`, `JsonSchema`. **Cache struct:** - `enabled: bool` — is caching on? @@ -33,21 +33,19 @@ pub mod client; pub mod context; pub mod integration; pub mod policy; -pub mod stats; pub use client::CacheClient; pub use context::CacheContext; pub use integration::CacheCheckResult; pub use policy::CacheDecision; -pub use stats::QueryStatsTracker; ``` -`Cache` struct wraps: `CacheClient`, `QueryStatsTracker`. +`Cache` struct wraps: `CacheClient`. **Global singleton:** Cache is global-scoped, not connection-scoped. Accessed via `cache()` function which returns `Arc` from a `Lazy>` static. `Cache::new()` reads config internally — no parameters needed. Key methods: -- `new()` — creates client (reads config internally) and stats tracker +- `new()` — creates client (reads config internally) - `try_read_cache(cache_context, in_transaction, client_request, params, stream)` — calls `cache_check()`, handles HIT/MISS/PASS-through - `save_response_in_cache(cache_context)` — finalizes by storing the captured response @@ -66,23 +64,17 @@ Key methods: - `reconnecting: Arc` — prevents multiple concurrent reconnect tasks - All Redis operations wrapped in `tokio::time::timeout(REDIS_OPERATION_TIMEOUT)` (2s) as safety net -**`policy.rs`** — 3-tier policy resolution via free functions: -- `CacheDirective` enum: `Cache { ttl_seconds }`, `NoCache` (default) -- `CacheDecision` enum: `Skip`, `Cache(Option)` -- `resolve(client_request, params, is_read, cache_key_hash, stats)` — main resolver function, chains all tiers +**`policy.rs`** — 2-tier policy resolution: +- `CacheDirective` enum: `Cache { ttl_seconds }`, `ForceCache { ttl_seconds }`, `NoCache` (default) +- `CacheDecision` enum: `Skip`, `Cache(u64)`, `ForceCache(u64)` +- `resolve(client_request, params, is_read)` — main resolver function, chains all tiers - `get_cache_directive(client_request, params)` — comment hint (from AST) has priority over connection parameter (`pgdog.cache`) -- `extract_parameter_directive(params)` — parses `pgdog.cache` parameter: `no_cache`, `cache`, `cache ttl=N` -- Tier 1: Extractor directive (`CacheDirective::Cache { ttl }` or `CacheDirective::NoCache`) -- Tier 2: Global config `CachePolicy` (`NoCache` / `Cache` / `Auto`) -- Tier 3: `auto_decision()` — caches when `hit_count > miss_count` AND `avg_result_size < 1MB` - -**`stats.rs`** — Per-fingerprint query statistics tracker: -- `QueryStats` struct: `hit_count`, `miss_count`, `total_result_size`, `avg_result_size()` -- `QueryStatsTracker` with `record_hit(fingerprint, size)` / `record_miss(fingerprint)` / `get(fingerprint)` -- Internally: `Arc>` +- `extract_parameter_directive(params)` — parses `pgdog.cache` parameter: `no_cache`, `cache`, `cache ttl=N`, `force_cache`, `force_cache ttl=N` +- Tier 1: Extractor directive (`CacheDirective::Cache { ttl }`, `CacheDirective::ForceCache { ttl }`, or `CacheDirective::NoCache`) +- Tier 2: Global config `CachePolicy` (`NoCache` / `Cache`) **`context.rs`** — Cache context held in `QueryEngineContext`: -- `CacheContext` with `cache_miss: Option<(u64, Option)>`, `response_buffer: Vec`, and `had_error: bool` +- `CacheContext` with `cache_miss: Option`, `response_buffer: Vec`, and `had_error: bool` - `capture_response(message)` — stores message in buffer when cache miss is tracked; sets `had_error = true` on `E` messages - `reset()` — clears all state for per-query isolation @@ -95,10 +87,9 @@ Key methods: ### Query Engine Integration **`pgdog/src/frontend/client/query_engine/mod.rs`** -- Declares `pub mod cache;` module -- `QueryEngine` holds `cache: Cache` field -- `handle()` flow: after `route_query()` and before `before_execution()`, calls `self.cache.try_read_cache(context)`. If HIT: sends cached response and returns. On MISS: stores state in `context.cache_context`. -- After `match command`, calls `self.cache.save_response_in_cache(context)` to finalize caching. +- Imports global `cache()` from `frontend::cache` +- `handle()` flow: after `route_query()` and before `before_execution()`, calls `cache().try_read_cache(context)`. If HIT: sends cached response and returns. On MISS: stores state in `context.cache_context`. +- After `match command`, calls `cache().save_response_in_cache(context)` to finalize caching. **`pgdog/src/frontend/client/query_engine/query.rs`** - `process_server_message()` calls `context.cache_context.capture_response(message.clone())`. @@ -119,7 +110,6 @@ Key methods: **`pgdog/Cargo.toml`** fred = { version = "9", features = ["enable-rustls"] } -scc = "3.7" xxhash-rust = { version = "0.8", features = ["xxh3"]} --- @@ -132,10 +122,9 @@ xxhash-rust = { version = "0.8", features = ["xxh3"]} | Cache config scope | **Global** (`config.general.cache`) | | Redis client | `fred` crate v9 (async-native, tokio integration) | | Cacheable queries | Only reads (`route.is_read()`) | -| Cache policy resolution | 3-tier: SQL comment → pgdog.cache param → DB policy → auto-decision | +| Cache policy resolution | 2-tier: SQL comment/param → DB policy | | Cache HIT flow | Deserialize wire bytes → parse messages → send to client → return `Ok(true)` | | Cache MISS flow | Normal execute → capture response via `CacheContext` → store in Redis → respond | -| Auto-decision engine | `hit_count > miss_count` AND `avg_result_size < 1MB` | | Cache key | XXH3 hash of `database_name + raw_query_string` | | Wire format | Full PostgreSQL wire messages stored as raw bytes (one concatenated buffer) | @@ -214,8 +203,8 @@ psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.c Sources are checked in order — first non-None result wins, then falls through to global config: ``` -SQL comment → pgdog.cache parameter → DB policy config → Auto-decision -(highest) (lowest) +SQL comment → pgdog.cache parameter → DB policy config +(highest) (lowest) ``` --- @@ -234,11 +223,11 @@ SQL comment → pgdog.cache parameter → DB policy config → Auto-decisi 6. **Query parser auto-upgrade for caching** — When caching is enabled and parser is `Auto`/`Off`/`SessionControl`, the parser is forced to `On` via `|| self.cache_enabled()` check in `cluster.rs`. A startup warning is emitted in `core.rs` if parser remains incompatible. -7. **Decoupled cache policy extraction** — Cache directives extracted via standalone regex in `cache/policy.rs`, works regardless of parser state. Supports `/* pgdog_cache: ... */` format with optional `ttl=` parameter. +7. **Decoupled cache policy extraction** — Cache directives extracted via standalone regex in `cache/policy.rs`, works regardless of parser state. Supports `/* pgdog_cache: ... */` format with optional `ttl=` parameter. Unified with sharding hints via `comment()` function in `comment.rs`. 8. **Error handling / Reconnection** — Automatic reconnection with background task, CAS-guarded single reconnect, 2s operation timeout on all Redis calls, PING-based connection verification. -9. **Cache key collision across databases sharing one Redis** — Database name and raw query string are combined via a single XXH3 hash call, producing deterministic, collision-resistant per-database keys even on shared Redis. Different literal values in queries produce different cache keys. +9. **Cache key collision across databases sharing one Redis** — Database name and raw query string are combined via a single XXH3 hash call, producing deterministic, collision-resistant per-database keys even on shared Redis. Different literal values in queries produce different cache keys. `force_cache` hints normalize the query in the hash to use the same key as regular `cache`. 10. **Wire format serialization/deserialization** — PostgreSQL wire messages stored as raw bytes. Correct byte slice calculation: `offset + 1 + msg_len`. @@ -248,31 +237,27 @@ SQL comment → pgdog.cache parameter → DB policy config → Auto-decisi 13. **Moved all cache-related structs from QueryEngine to Client** — now all cache structs including redis client are creating for whole pgdog's lifetime. -14. **Use built-in query comment hints** — Cache hints (`pgdog_cache:`) are now extracted alongside sharding hints (`pgdog_shard:`, `pgdog_sharding_key:`, `pgdog_role:`) via the unified `comment()` function in `comment.rs`. The `comment_cache` field is stored in `AstInner` and accessed during cache checking via `client_request.ast.comment_cache`. Policy resolution simplified: trait-based extractors (`CachePolicyExtractor`, `CommentCacheExtractor`, `ParameterCacheExtractor`, `CachePolicyDispatcher`, `CachePolicyResolver`) replaced with free functions (`resolve()`, `get_cache_directive()`, `extract_parameter_directive()`). Comment hint (from AST) has priority over connection parameter `pgdog.cache`. `Cache` struct no longer needs `policy_dispatcher` field. `CacheDirective::None` removed in favor of `Option` with `NoCache` as default. Parameter format unified to `no_cache` (underscore, not dash). +14. **Use built-in query comment hints** — Cache hints (`pgdog_cache:`) are now extracted alongside sharding hints (`pgdog_shard:`, `pgdog_sharding_key:`, `pgdog_role:`) via the unified `comment()` function in `comment.rs`. The `comment_cache` field is stored in `AstInner` and accessed during cache checking via `client_request.ast.comment_cache`. Policy resolution simplified: trait-based extractors replaced with free functions (`resolve()`, `get_cache_directive()`, `extract_parameter_directive()`). Comment hint (from AST) has priority over connection parameter `pgdog.cache`. `Cache` struct no longer needs `policy_dispatcher` field. Parameter format unified to `no_cache` (underscore, not dash). 15. **Add cache config to .schema**. -16. **Force-cache hint support**. +16. **Force-cache hint support** — `/* pgdog_cache: force_cache */` and `/* pgdog_cache: force_cache ttl=N */` directives always attempt to cache (cache key normalized), bypassing normal cache miss flow considerations. --- ## What's Left To Do -1. **Auto policy** — Implemented but untested. Relies on stats tracker to decide based on hit/miss ratio and avg result size after enough observations. - -2. **Response capture for prepared statements** — Extended protocol (Parse/Bind/Execute) response capture works through process_server_message() but hasn't been tested with PREPARE/EXECUTE. (Note: pgdog implements prepared statements caching. But unknown what kind of caching this is: just query cache or result cache. And if we implement our cache, will this break this prepared statement cache?) - -3. **Redis disconnect/reconnect under heavy load** — The reconnection logic works, but the fast-path check (`ensure_connected`) and the reconnect task can have timing edge cases under rapid disconnect/reconnect cycles. Need to stress-test. +1. **Response capture for prepared statements** — Extended protocol (Parse/Bind/Execute) response capture works through process_server_message() but hasn't been tested with PREPARE/EXECUTE. (Note: pgdog implements prepared statements caching. But unknown what kind of caching this is: just query cache or result cache. And if we implement our cache, will this break this prepared statement cache?) -4. **Integration tests** — Tests live in `integration/rust/tests/integration/`. Redis must be running on 127.0.0.1:6379 before tests. Run with: `cd integration/rust && cargo nextest run --no-fail-fast --test-threads=1` +2. **Redis disconnect/reconnect under heavy load** — The reconnection logic works, but the fast-path check (`ensure_connected`) and the reconnect task can have timing edge cases under rapid disconnect/reconnect cycles. Need to stress-test. -5. **Magic numbers in send_cached_response()**. +3. **Integration tests** — Tests live in `integration/rust/tests/integration/`. Redis must be running on 127.0.0.1:6379 before tests. Run with: `cd integration/rust && cargo nextest run --no-fail-fast --test-threads=1` -6. **Make statistics collection deferred** — for auto policy. +4. **Magic numbers in send_cached_response()**. -7. **Provide config hotswap**. +5. **Provide config hotswap**. -8. **Review and rewrite CacheClient**. +6. **Review and rewrite CacheClient**. ### Planned Tests From 48d67be859aa5cdf2825a4bdd45f3ab872cb5ac3 Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Thu, 14 May 2026 13:07:04 +0300 Subject: [PATCH 16/20] cache processing and client responding refined --- docs/CACHE.md | 33 ++++--- pgdog/src/frontend/cache/client.rs | 60 +++++++------ pgdog/src/frontend/cache/integration.rs | 89 +++++++++++++------ pgdog/src/frontend/cache/mod.rs | 25 +++--- pgdog/src/frontend/client/query_engine/mod.rs | 6 +- 5 files changed, 136 insertions(+), 77 deletions(-) diff --git a/docs/CACHE.md b/docs/CACHE.md index 8ddf2d8ab..365da7dad 100644 --- a/docs/CACHE.md +++ b/docs/CACHE.md @@ -46,20 +46,23 @@ pub use policy::CacheDecision; Key methods: - `new()` — creates client (reads config internally) -- `try_read_cache(cache_context, in_transaction, client_request, params, stream)` — calls `cache_check()`, handles HIT/MISS/PASS-through +- `try_read_cache(cache_context, in_transaction, client_request, params)` — calls `cache_check()`, returns `Ok(Some(Vec))` on HIT (caller replays through pipeline), `Ok(None)` on MISS/PASSTHROUGH - `save_response_in_cache(cache_context)` — finalizes by storing the captured response **`client.rs`** — Redis client wrapper using `fred` v9: - `CacheClient::new()` — builds client from global `config().config.general.cache`, returns disabled stub if no config/URL - `ensure_connected()` — lazy one-time `client.init().await` followed by `client.ping()` verification; sets `redis_connected` flag -- `get(&self, key)` — returns `Result>>`; fetches cached wire-protocol bytes +- `get(&self, key)` — returns `Result, Error>`; fetches cached wire-protocol bytes. Returns `Err(Error::CacheMiss)` on a key miss (distinct from connection errors) - `set(&self, key, value, ttl)` — stores bytes with EX expiration; respects `max_result_size` - `spawn_reconnect()` — background task that retries `init()` every 500ms, verifies with `ping()`, sets `redis_connected = true` - `mark_disconnected()` — sets `redis_connected = false`, spawns reconnect if not already running (CAS-guarded) - `is_connected()` — reads our atomic flag (not fred's potentially stale `ClientState`) - `is_enabled()` — returns true if both client exists and config enabled - Keys are prefixed with `"pgdog:"` -- Error types: `RedisError(String)`, `ConnectionFailed(String)` +- Error types: + - `RedisError { cmd: &'static str, key: u64, err: RedisError }` — Redis command failed (includes command name and key for context) + - `ConnectionFailed(&'static str)` — not connected or not configured + - `CacheMiss(u64)` — key not present in Redis (not an error condition, used for control flow) - `redis_connected: Arc` — authoritative connection gate, only true after PING succeeds - `reconnecting: Arc` — prevents multiple concurrent reconnect tasks - All Redis operations wrapped in `tokio::time::timeout(REDIS_OPERATION_TIMEOUT)` (2s) as safety net @@ -80,7 +83,7 @@ Key methods: **`integration.rs`** — Integration methods on `impl Cache`: - `cache_check()` — main entry point, checks route, calls `policy::resolve()`, checks Redis -- `send_cached_response()` — deserializes wire-format bytes and sends to client +- `deserialize_cached(Vec) -> Vec` — parses a flat blob of concatenated PostgreSQL wire messages into individual `Message` values. Wire format: `[1B code][4B length (incl. itself)][payload]`. Named constants `HEADER_CODE_LEN`, `HEADER_LEN_SIZE`, `HEADER_TOTAL` replace the former magic numbers. Not Redis-specific — usable with any cache backend that stores raw bytes. - `cache_response()` — serializes `Vec` into wire bytes and stores in Redis - Cache key: XXH3 hash of `database_name + raw_query_string` @@ -88,7 +91,7 @@ Key methods: **`pgdog/src/frontend/client/query_engine/mod.rs`** - Imports global `cache()` from `frontend::cache` -- `handle()` flow: after `route_query()` and before `before_execution()`, calls `cache().try_read_cache(context)`. If HIT: sends cached response and returns. On MISS: stores state in `context.cache_context`. +- `handle()` flow: after `route_query()` and before `before_execution()`, calls `cache().try_read_cache(context)`. If HIT: replays each cached `Message` through `process_server_message()` (same pipeline as live backend responses — stats, transaction state, hooks all fire correctly), then returns. On MISS: stores state in `context.cache_context`. - After `match command`, calls `cache().save_response_in_cache(context)` to finalize caching. **`pgdog/src/frontend/client/query_engine/query.rs`** @@ -123,7 +126,7 @@ xxhash-rust = { version = "0.8", features = ["xxh3"]} | Redis client | `fred` crate v9 (async-native, tokio integration) | | Cacheable queries | Only reads (`route.is_read()`) | | Cache policy resolution | 2-tier: SQL comment/param → DB policy | -| Cache HIT flow | Deserialize wire bytes → parse messages → send to client → return `Ok(true)` | +| Cache HIT flow | Deserialize wire bytes → `Vec` → replay each through `process_server_message()` | | Cache MISS flow | Normal execute → capture response via `CacheContext` → store in Redis → respond | | Cache key | XXH3 hash of `database_name + raw_query_string` | | Wire format | Full PostgreSQL wire messages stored as raw bytes (one concatenated buffer) | @@ -213,9 +216,9 @@ SQL comment → pgdog.cache parameter → DB policy config 1. **Redis client never connects** - Problem: CacheClient::new() built the client but never called init(). Fred requires explicit connection initialization. Fix: Added lazy `ensure_connected()` using `client.init().await`, guarded by `AtomicBool` so it runs exactly once on first get()/set(). Changed CacheClient from `#[derive(Debug)]` to manual Debug impl (contains `Arc`). -2. **Redis GET fails on NULL / cache miss** - Problem: `client.get::()` throws `Parse Error: Cannot parse into bytes` when the key doesn't exist. Fix: Use `client.get::()` and check `val.is_null()` before extracting bytes. +2. **Redis GET fails on NULL / cache miss** - Problem: `client.get::()` throws `Parse Error: Cannot parse into bytes` when the key doesn't exist. Fix: Use `client.get::()` and check `val.is_null()` before extracting bytes. Later refined: `get()` now returns `Result, Error>` instead of `Result>>` — a missing key yields `Err(Error::CacheMiss)`, which is matched explicitly in `cache_check()` and converted to `CacheCheckResult::Miss`. Other errors propagate as `Passthrough`. -3. **Wire format deserialization wrong in send_cached_response** - Problem: PostgreSQL wire message structure is `[1B code][4B length]` where length includes the 4B itself. I calculated `offset + 5 + msg_len` (treating length as payload-only), causing incorrect byte slicing. Fix: Corrected to `offset + 1 + msg_len`. +3. **Wire format deserialization wrong in send_cached_response** - Problem: PostgreSQL wire message structure is `[1B code][4B length]` where length includes the 4B itself. I calculated `offset + 5 + msg_len` (treating length as payload-only), causing incorrect byte slicing. Fix: Corrected to `offset + 1 + msg_len`, then replaced magic numbers with named constants `HEADER_CODE_LEN`, `HEADER_LEN_SIZE`, `HEADER_TOTAL`. 4. **Route incorrectly reports read-only as write when parser is disabled** - Problem: `query_parser_bypass()` conservatively returns `Route::write()` for all SQL when the query parser is disabled. Since pgdog doesn't enable the parser by default for simple queries, `route.is_read()` was false for `SELECT 1`. Fix: When any database has `cache.enabled = true`, the query parser level is auto-upgraded to `On` in the cluster config. The `|| self.cache_enabled()` check in `cluster.rs:475` forces the parser on. Cache also emits a startup warning if parser is `Off` or `SessionControl`. The old `is_likely_read()` string-prefix heuristic has been removed entirely. @@ -229,7 +232,7 @@ SQL comment → pgdog.cache parameter → DB policy config 9. **Cache key collision across databases sharing one Redis** — Database name and raw query string are combined via a single XXH3 hash call, producing deterministic, collision-resistant per-database keys even on shared Redis. Different literal values in queries produce different cache keys. `force_cache` hints normalize the query in the hash to use the same key as regular `cache`. -10. **Wire format serialization/deserialization** — PostgreSQL wire messages stored as raw bytes. Correct byte slice calculation: `offset + 1 + msg_len`. +10. **Wire format serialization/deserialization** — PostgreSQL wire messages stored as raw bytes. Correct byte slice calculation expressed via named constants (`HEADER_CODE_LEN = 1`, `HEADER_LEN_SIZE = 4`, `HEADER_TOTAL = 5`). Deserialization extracted into `deserialize_cached()` with inline comments explaining each boundary check. 11. **Do not cache error responses**. @@ -243,21 +246,25 @@ SQL comment → pgdog.cache parameter → DB policy config 16. **Force-cache hint support** — `/* pgdog_cache: force_cache */` and `/* pgdog_cache: force_cache ttl=N */` directives always attempt to cache (cache key normalized), bypassing normal cache miss flow considerations. +17. **Cache HIT replays through the server-message pipeline** — Previously, cache hits sent responses directly to the stream, bypassing `process_server_message()`. Now `try_read_cache()` returns `Option>` and the caller (`handle()`) feeds each message through `process_server_message()` — giving correct stats accounting, transaction state updates from `ReadyForQuery`, and hook invocations on every cache hit. + +18. **CacheClient error types refined** — `get()` now returns `Result, Error>` (no more `Option`). `Error::CacheMiss(u64)` is a dedicated variant for key-not-found; `Error::RedisError` is now a struct variant carrying `cmd: &'static str`, `key: u64`, and the underlying error for richer diagnostics. `Error::ConnectionFailed` uses `&'static str` instead of `String` to avoid heap allocation on the hot path. + --- ## What's Left To Do 1. **Response capture for prepared statements** — Extended protocol (Parse/Bind/Execute) response capture works through process_server_message() but hasn't been tested with PREPARE/EXECUTE. (Note: pgdog implements prepared statements caching. But unknown what kind of caching this is: just query cache or result cache. And if we implement our cache, will this break this prepared statement cache?) -2. **Redis disconnect/reconnect under heavy load** — The reconnection logic works, but the fast-path check (`ensure_connected`) and the reconnect task can have timing edge cases under rapid disconnect/reconnect cycles. Need to stress-test. +2. **Redis disconnect/reconnect under heavy load** — The reconnection logic works, but the fast-path check (`ensure_connected`) and the reconnect task can have timing edge cases under rapid disconnect/reconnect cycles. Need to stress-test. 3. **Integration tests** — Tests live in `integration/rust/tests/integration/`. Redis must be running on 127.0.0.1:6379 before tests. Run with: `cd integration/rust && cargo nextest run --no-fail-fast --test-threads=1` -4. **Magic numbers in send_cached_response()**. +4. **Provide config hotswap**. -5. **Provide config hotswap**. +5. **Review and rewrite CacheClient**. -6. **Review and rewrite CacheClient**. +6. **Abstract storage backend** — `CacheClient` is Redis-specific. A `CacheStorage` trait (`get`, `set`, `is_enabled`) would allow plugging in other backends (e.g. memcached) via config. `deserialize_cached()` is already backend-agnostic (pure wire-protocol parsing) and would be shared across all backends. ### Planned Tests diff --git a/pgdog/src/frontend/cache/client.rs b/pgdog/src/frontend/cache/client.rs index de59dc686..87ebd8e1a 100644 --- a/pgdog/src/frontend/cache/client.rs +++ b/pgdog/src/frontend/cache/client.rs @@ -182,19 +182,19 @@ impl CacheClient { self.redis_connected.load(Ordering::Relaxed) } - pub(crate) async fn get(&self, key: u64) -> Result>, Error> { + pub(crate) async fn get(&self, key: u64) -> Result, Error> { if !self.ensure_connected().await { if !self.is_connected() { self.spawn_reconnect(); return Err(Error::ConnectionFailed( - "Redis disconnected, reconnecting in background".to_string(), + "Redis disconnected, reconnecting in background", )); } - return Err(Error::ConnectionFailed("Redis not connected".to_string())); + return Err(Error::ConnectionFailed("Redis not connected")); } let Some(ref client) = self.client else { - return Ok(None); + return Err(Error::ConnectionFailed("Redis not configured")); }; let full_key = format!("{}{}", CACHE_KEY_PREFIX, key); @@ -205,27 +205,25 @@ impl CacheClient { .await { Ok(Ok(v)) => v, - Ok(Err(e)) => { - debug!("Redis GET error for key {}: {}", key, e); + Ok(Err(err)) => { self.mark_disconnected(); - return Err(Error::RedisError(e.to_string())); + return Err(Error::RedisError { + cmd: "GET", + key, + err, + }); } Err(_) => { - error!("Redis GET timed out for key {}", key); self.mark_disconnected(); - return Err(Error::ConnectionFailed("Redis GET timed out".to_string())); + return Err(Error::ConnectionFailed("Redis GET timed out")); } }; - if val.is_null() { - debug!("Cache miss for key {}", key); - Ok(None) - } else if let Some(bytes) = val.into_bytes() { + if let Some(bytes) = val.into_bytes() { debug!("Cache hit for key {}", key); - Ok(Some(bytes.to_vec())) + Ok(bytes.to_vec()) } else { - debug!("Redis GET value not bytes for key {}", key); - Ok(None) + Err(Error::CacheMiss(key)) } } @@ -234,14 +232,14 @@ impl CacheClient { if !self.is_connected() { self.spawn_reconnect(); return Err(Error::ConnectionFailed( - "Redis disconnected, reconnecting in background".to_string(), + "Redis disconnected, reconnecting in background", )); } - return Err(Error::ConnectionFailed("Redis not connected".to_string())); + return Err(Error::ConnectionFailed("Redis not connected")); } let Some(ref client) = self.client else { - return Ok(()); + return Err(Error::ConnectionFailed("Redis not configured")); }; let full_key = format!("{}{}", CACHE_KEY_PREFIX, key); @@ -276,15 +274,17 @@ impl CacheClient { debug!("Cached key {} with TTL {}s", key, ttl_seconds); Ok(()) } - Ok(Err(e)) => { - debug!("Redis SET error for key {}: {}", key, e); + Ok(Err(err)) => { self.mark_disconnected(); - Err(Error::RedisError(e.to_string())) + Err(Error::RedisError { + cmd: "SET", + key, + err, + }) } Err(_) => { - error!("Redis SET timed out for key {}", key); self.mark_disconnected(); - Err(Error::ConnectionFailed("Redis SET timed out".to_string())) + Err(Error::ConnectionFailed("Redis SET timed out")) } } } @@ -297,8 +297,14 @@ impl CacheClient { #[derive(Debug, thiserror::Error)] pub enum Error { - #[error("Redis error: {0}")] - RedisError(String), + #[error("Redis {cmd} error for key {key}: {err}")] + RedisError { + cmd: &'static str, + key: u64, + err: RedisError, + }, #[error("Connection failed: {0}")] - ConnectionFailed(String), + ConnectionFailed(&'static str), + #[error("Cache miss for key {0}")] + CacheMiss(u64), } diff --git a/pgdog/src/frontend/cache/integration.rs b/pgdog/src/frontend/cache/integration.rs index d585c3fe4..84beb2aa5 100644 --- a/pgdog/src/frontend/cache/integration.rs +++ b/pgdog/src/frontend/cache/integration.rs @@ -4,11 +4,14 @@ use once_cell::sync::Lazy; use regex::Regex; use crate::{ - frontend::{cache::CacheDecision, ClientRequest}, - net::{FromBytes, Message, Parameters, Stream, ToBytes}, + frontend::{ + cache::{client::Error as CacheClientError, CacheDecision}, + ClientRequest, + }, + net::{FromBytes, Message, Parameters, ToBytes}, }; -use tracing::debug; +use tracing::{debug, warn}; use super::{policy, Cache}; @@ -26,6 +29,10 @@ pub enum CacheCheckResult { Passthrough, } +const HEADER_CODE_LEN: usize = 1; +const HEADER_LEN_SIZE: usize = 4; +const HEADER_TOTAL: usize = HEADER_CODE_LEN + HEADER_LEN_SIZE; + impl Cache { pub(super) async fn cache_check( &self, @@ -72,33 +79,58 @@ impl Cache { ttl, })), CacheDecision::Cache(ttl) => match self.client.get(cache_key_hash).await { - Ok(Some(cached)) => Ok(CacheCheckResult::Hit { cached }), - Ok(None) => Ok(CacheCheckResult::Miss(CacheMiss { + Ok(cached) => Ok(CacheCheckResult::Hit { cached }), + Err(CacheClientError::CacheMiss(_)) => Ok(CacheCheckResult::Miss(CacheMiss { cache_key_hash, ttl: ttl, })), Err(e) => { - debug!("Cache get error: {}", e); + warn!("{}", e); Ok(CacheCheckResult::Passthrough) } }, } } - pub(super) async fn send_cached_response( - &self, - stream: &mut Stream, - cached: Vec, - ) -> Result<(), crate::frontend::Error> { + /// Deserializes a flat byte blob (N concatenated PostgreSQL wire messages) into `Vec`. + /// + /// Redis stores cache responses as raw wire-format bytes concatenated together without framing. + /// We walk through the blob reading each message boundary, then slice out the individual message. + /// + /// ### PostgreSQL wire protocol message layout: + /// + /// [Source](https://www.postgresql.org/docs/current/protocol-overview.html) + /// + /// ```text + /// +----------+--------------------------+-------------------+ + /// | 1 byte | 4 bytes (big-endian) | N bytes (payload) | + /// | code | length (incl. 4B itself) | data | + /// +----------+--------------------------+-------------------+ + /// ``` + /// + /// Constants for parsing: + /// - `HEADER_CODE_LEN` = 1 byte (message type code, e.g. 'T' = RowDescription) + /// - `HEADER_LEN_SIZE` = 4 bytes (message length, includes itself but NOT the code byte) + /// - `HEADER_TOTAL` = 5 bytes (minimum bytes needed to read the length field) + pub(super) fn deserialize_cached(cached: Vec) -> Vec { + let mut messages = Vec::new(); let mut offset = 0; let len = cached.len(); while offset < len { - if offset + 5 > len { + // Need at least a full header (code + length) to proceed. + if offset + HEADER_TOTAL > len { + debug!( + "deserializing cached response: not enough bytes for message header (offset={}, len={})", + offset, len + ); break; } let _code = cached[offset] as char; + + // Read the message length field (4 bytes, big-endian). + // This length includes the 4-byte length field itself but NOT the code byte. let msg_len = u32::from_be_bytes([ cached[offset + 1], cached[offset + 2], @@ -106,19 +138,28 @@ impl Cache { cached[offset + 4], ]) as usize; - if msg_len < 4 || offset + 1 + msg_len > len { + // Sanity checks: + // 1. Length must be at least 4 (the length field itself): if < 4 the data is corrupt. + // 2. Must not read past the end of the blob. + if msg_len < 4 || offset + HEADER_CODE_LEN + msg_len > len { + debug!( + "deserializing cached response: invalid msg length {} (offset={}, len={})", + msg_len, offset, len + ); break; } - let end = offset + 1 + msg_len; + // Full message spans: 1 byte (code) + msg_len (length field + payload) + let end = offset + HEADER_CODE_LEN + msg_len; + let msg_bytes: bytes::Bytes = cached[offset..end].to_vec().into(); - let msg = Message::from_bytes(msg_bytes)?; + if let Ok(msg) = Message::from_bytes(msg_bytes) { + messages.push(msg); + } offset = end; - - stream.send_flush(&msg).await?; } - Ok(()) + messages } pub(super) async fn cache_response( @@ -126,9 +167,9 @@ impl Cache { cache_key_hash: u64, messages: Vec, ttl: u64, - ) -> Result<(), ()> { + ) { if messages.is_empty() || !self.client.is_enabled() { - return Ok(()); + return; } let mut buffer = Vec::new(); @@ -137,19 +178,17 @@ impl Cache { Ok(bytes) => buffer.extend_from_slice(&bytes), Err(e) => { debug!("Failed to serialize message for caching: {}", e); - return Ok(()); + return; } } } if buffer.is_empty() { - return Ok(()); + return; } if let Err(e) = self.client.set(cache_key_hash, &buffer, ttl).await { - debug!("Failed to cache response: {}", e); + debug!("Failed to cache response: {:?}", e); } - - Ok(()) } } diff --git a/pgdog/src/frontend/cache/mod.rs b/pgdog/src/frontend/cache/mod.rs index 85ddb948d..42ee811a2 100644 --- a/pgdog/src/frontend/cache/mod.rs +++ b/pgdog/src/frontend/cache/mod.rs @@ -14,7 +14,7 @@ use tracing::debug; use crate::{ frontend::{ClientRequest, cache::integration::CacheMiss}, - net::{Parameters, Stream}, + net::{Message, Parameters}, }; #[derive(Debug)] @@ -35,14 +35,21 @@ impl Cache { } } + /// Check the cache for a query response. + /// + /// On HIT returns `Ok(Some(messages))` — the caller is responsible for + /// replaying these messages through the normal server-message pipeline. + /// + /// On MISS or PASSTHROUGH returns `Ok(None)` and updates `cache_context` + /// so that the response can later be captured and stored via + /// `save_response_in_cache`. pub async fn try_read_cache( &self, cache_context: &mut CacheContext, in_transaction: bool, client_request: &ClientRequest, params: &Parameters, - stream: &mut Stream, - ) -> Result { + ) -> Result>, crate::frontend::Error> { let cache_result = self .cache_check(in_transaction, client_request, params) .await?; @@ -50,22 +57,22 @@ impl Cache { match cache_result { CacheCheckResult::Hit { cached } => { debug!("Cache hit, serving from cache"); - self.send_cached_response(stream, cached).await?; + let messages = Self::deserialize_cached(cached); cache_context.reset(); - return Ok(true); + Ok(Some(messages)) } CacheCheckResult::Miss(cache_miss) => { debug!("Cache miss for key hash: {}", cache_miss.cache_key_hash); cache_context.cache_miss = Some(cache_miss); cache_context.response_buffer.clear(); cache_context.had_error = false; + Ok(None) } CacheCheckResult::Passthrough => { cache_context.reset(); + Ok(None) } } - - Ok(false) } /// Finalize caching by storing the response in Redis. @@ -73,9 +80,7 @@ impl Cache { if let Some(CacheMiss { cache_key_hash, ttl } ) = cache_context.cache_miss.take() { if !cache_context.had_error && !cache_context.response_buffer.is_empty() { let messages = std::mem::take(&mut cache_context.response_buffer); - if let Err(e) = self.cache_response(cache_key_hash, messages, ttl).await { - debug!("Failed to cache response: {:?}", e); - } + self.cache_response(cache_key_hash, messages, ttl).await; } } } diff --git a/pgdog/src/frontend/client/query_engine/mod.rs b/pgdog/src/frontend/client/query_engine/mod.rs index 32b753205..e302e717d 100644 --- a/pgdog/src/frontend/client/query_engine/mod.rs +++ b/pgdog/src/frontend/client/query_engine/mod.rs @@ -131,16 +131,18 @@ impl QueryEngine { } let in_transaction = context.in_transaction(); - if cache() + if let Some(cached_messages) = cache() .try_read_cache( &mut context.cache_context, in_transaction, context.client_request, context.params, - context.stream, ) .await? { + for msg in cached_messages { + self.process_server_message(context, msg).await?; + } self.update_stats(context); return Ok(()); } From 6e08a252c174a5198def08043e24b3a29271366a Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Fri, 15 May 2026 16:00:52 +0300 Subject: [PATCH 17/20] added support for multiple backends and provided config hotswap for cache --- .schema/pgdog.schema.json | 61 ++++- docs/CACHE.md | 105 ++++---- pgdog-config/src/cache.rs | 86 +++++- pgdog-config/src/lib.rs | 2 +- pgdog/src/frontend/cache/client.rs | 310 ---------------------- pgdog/src/frontend/cache/integration.rs | 41 +-- pgdog/src/frontend/cache/mod.rs | 72 ++++- pgdog/src/frontend/cache/policy.rs | 6 +- pgdog/src/frontend/cache/storage/mod.rs | 58 ++++ pgdog/src/frontend/cache/storage/redis.rs | 246 +++++++++++++++++ 10 files changed, 581 insertions(+), 406 deletions(-) delete mode 100644 pgdog/src/frontend/cache/client.rs create mode 100644 pgdog/src/frontend/cache/storage/mod.rs create mode 100644 pgdog/src/frontend/cache/storage/redis.rs diff --git a/.schema/pgdog.schema.json b/.schema/pgdog.schema.json index b4997615d..1b8f6385f 100644 --- a/.schema/pgdog.schema.json +++ b/.schema/pgdog.schema.json @@ -32,10 +32,14 @@ "broadcast_address": null, "broadcast_port": 6433, "cache": { + "backend": "redis", "enabled": false, "max_result_size": 0, "policy": "no_cache", - "redis_url": "redis://localhost:6379", + "redis": { + "cache_key_prefix": "pgdog:", + "url": "redis://localhost:6379" + }, "ttl": 300 }, "checkout_timeout": 5000, @@ -283,11 +287,16 @@ ] }, "Cache": { - "description": "Redis cache configuration for a database.", + "description": "Cache configuration.", "type": "object", "properties": { + "backend": { + "description": "Which storage backend to use.\n\n_Default:_ `redis`", + "$ref": "#/$defs/CacheBackend", + "default": "redis" + }, "enabled": { - "description": "Whether to enable caching for this database.\n\n_Default:_ `false`", + "description": "Whether to enable caching.\n\n_Default:_ `false`", "type": "boolean", "default": false }, @@ -299,14 +308,17 @@ "minimum": 0 }, "policy": { - "description": "Cache policy: no_cache or cache.\n\n_Default:_ `no_cache`", + "description": "Cache policy: `no_cache` or `cache`.\n\n_Default:_ `no_cache`", "$ref": "#/$defs/CachePolicy", "default": "no_cache" }, - "redis_url": { - "description": "Redis connection URL.\n\n_Default:_ `redis://localhost:6379`", - "type": "string", - "default": "redis://localhost:6379" + "redis": { + "description": "Redis backend configuration.\n\nOnly read when `backend = \"redis\"`.", + "$ref": "#/$defs/RedisConfig", + "default": { + "cache_key_prefix": "pgdog:", + "url": "redis://localhost:6379" + } }, "ttl": { "description": "Default TTL in seconds for cached queries.\n\n_Default:_ `300`", @@ -318,6 +330,16 @@ }, "additionalProperties": false }, + "CacheBackend": { + "description": "Cache storage backend discriminator.", + "oneOf": [ + { + "description": "Redis backend (default).", + "type": "string", + "const": "redis" + } + ] + }, "CachePolicy": { "description": "Cache policy.", "oneOf": [ @@ -636,10 +658,14 @@ "description": "Redis cache configuration for this database.", "$ref": "#/$defs/Cache", "default": { + "backend": "redis", "enabled": false, "max_result_size": 0, "policy": "no_cache", - "redis_url": "redis://localhost:6379", + "redis": { + "cache_key_prefix": "pgdog:", + "url": "redis://localhost:6379" + }, "ttl": 300 } }, @@ -1510,6 +1536,23 @@ } ] }, + "RedisConfig": { + "description": "Redis-specific cache backend configuration.\n\nCorresponds to the `[general.cache.redis]` TOML section.", + "type": "object", + "properties": { + "cache_key_prefix": { + "description": "Key prefix prepended to every cache key stored in Redis.\n\n_Default:_ `pgdog:`", + "type": "string", + "default": "pgdog:" + }, + "url": { + "description": "Redis connection URL.\n\n_Default:_ `redis://localhost:6379`", + "type": "string", + "default": "redis://localhost:6379" + } + }, + "additionalProperties": false + }, "ReplicaLag": { "description": "Replica lag banning configuration. When a replica's replication lag exceeds the threshold, it is banned from serving read queries.", "type": "object", diff --git a/docs/CACHE.md b/docs/CACHE.md index 365da7dad..1119ae438 100644 --- a/docs/CACHE.md +++ b/docs/CACHE.md @@ -14,58 +14,78 @@ Cache SELECT queries in Redis, bypass PostgreSQL on cache hit, populate cache on **CachePolicy enum:** `NoCache` (default), `Cache`. Implements `FromStr`, `Display`, `Serialize`, `Deserialize`, `Copy`, `JsonSchema`. -**Cache struct:** -- `enabled: bool` — is caching on? -- `policy: CachePolicy` — which policy? -- `ttl: u64` — default TTL seconds (default 300) -- `redis_url: String` — Redis connection URL -- `max_result_size: usize` — max cached result bytes +**CacheBackend enum:** `Redis` (default). Discriminator for selecting the storage backend and for hotswap detection when the backend type changes in config. + +**RedisConfig struct** (`[general.cache.redis]`): +- `url: String` — Redis connection URL (default `redis://localhost:6379`) +- `cache_key_prefix: String` — prefix prepended to every Redis key (default `pgdog:`) + +**Cache struct** (`[general.cache]`): +- `enabled: bool` — is caching on? (default `false`) +- `policy: CachePolicy` — which policy? (default `no_cache`) +- `ttl: u64` — default TTL seconds (default `300`) +- `backend: CacheBackend` — which storage backend (default `redis`) +- `redis: RedisConfig` — Redis-specific settings +- `max_result_size: usize` — max cached result bytes (default `0` = unlimited) + +Example TOML: +```toml +[general.cache] +enabled = true +policy = "cache" +ttl = 300 + +[general.cache.redis] +url = "redis://localhost:6379" +cache_key_prefix = "pgdog:" +``` **`general.rs`** — `General` struct holds `cache: Cache` field. **Cache config is global.** -**`lib.rs`** — Exports `pub mod cache;` and `pub use cache::{CachePolicy, Cache};`. +**`lib.rs`** — Exports `pub use cache::{CacheBackend, CachePolicy, Cache, RedisConfig as CacheRedisConfig};`. ### Cache Module (`pgdog/src/frontend/cache/`) **`mod.rs`** — Module exports, global singleton, and main `Cache` struct: ```rust -pub mod client; pub mod context; pub mod integration; pub mod policy; +pub mod storage; -pub use client::CacheClient; pub use context::CacheContext; pub use integration::CacheCheckResult; pub use policy::CacheDecision; +pub use storage::{CacheStorage, RedisCacheStorage}; ``` -`Cache` struct wraps: `CacheClient`. +`Cache` struct wraps `RwLock>>` (tokio `RwLock`). **Global singleton:** Cache is global-scoped, not connection-scoped. Accessed via `cache()` function which returns `Arc` from a `Lazy>` static. `Cache::new()` reads config internally — no parameters needed. +**Config hotswap:** `hotswap_if_needed()` is called at the top of `try_read_cache` and `save_response_in_cache`. It fast-paths with a read-lock; acquires write-lock only if the URL or backend type has changed, then rebuilds the storage. + Key methods: -- `new()` — creates client (reads config internally) -- `try_read_cache(cache_context, in_transaction, client_request, params)` — calls `cache_check()`, returns `Ok(Some(Vec))` on HIT (caller replays through pipeline), `Ok(None)` on MISS/PASSTHROUGH -- `save_response_in_cache(cache_context)` — finalizes by storing the captured response - -**`client.rs`** — Redis client wrapper using `fred` v9: -- `CacheClient::new()` — builds client from global `config().config.general.cache`, returns disabled stub if no config/URL -- `ensure_connected()` — lazy one-time `client.init().await` followed by `client.ping()` verification; sets `redis_connected` flag -- `get(&self, key)` — returns `Result, Error>`; fetches cached wire-protocol bytes. Returns `Err(Error::CacheMiss)` on a key miss (distinct from connection errors) -- `set(&self, key, value, ttl)` — stores bytes with EX expiration; respects `max_result_size` -- `spawn_reconnect()` — background task that retries `init()` every 500ms, verifies with `ping()`, sets `redis_connected = true` -- `mark_disconnected()` — sets `redis_connected = false`, spawns reconnect if not already running (CAS-guarded) -- `is_connected()` — reads our atomic flag (not fred's potentially stale `ClientState`) -- `is_enabled()` — returns true if both client exists and config enabled -- Keys are prefixed with `"pgdog:"` -- Error types: - - `RedisError { cmd: &'static str, key: u64, err: RedisError }` — Redis command failed (includes command name and key for context) - - `ConnectionFailed(&'static str)` — not connected or not configured - - `CacheMiss(u64)` — key not present in Redis (not an error condition, used for control flow) -- `redis_connected: Arc` — authoritative connection gate, only true after PING succeeds +- `new()` — creates storage from current config (or `None` if disabled) +- `hotswap_if_needed()` — compares live config against the active storage's one with `has_config_changed()`; swaps if `true` returned +- `try_read_cache(cache_context, in_transaction, client_request, params)` — hotswaps, calls `cache_check()`, returns `Ok(Some(Vec))` on HIT (caller replays through pipeline), `Ok(None)` on MISS/PASSTHROUGH +- `save_response_in_cache(cache_context)` — hotswaps, finalizes by storing the captured response + +**`storage/mod.rs`** — Abstract storage trait and error type: +- `CacheStorage` trait: `get`, `set`, `is_enabled`, `has_config_changed` — implemented by all cache backends +- `Error` enum shared across all backends: `RedisError`, `ConnectionFailed`, `CacheMiss` + +**`storage/redis.rs`** — Redis storage backend (`RedisCacheStorage`) implementing `CacheStorage`: +- `RedisCacheStorage::new(config)` — builds client from given URL; immediately spawns a background connection task; returns `None` if URL is invalid +- Background connect task: retries `init()` in a loop (5ms to 5s exponential backoff); sets `reconnecting = false` on success; CAS-guarded so only one task runs at a time +- `get(&self, key)` — returns `Result, Error>`; returns `Err(Error::ConnectionFailed)` immediately (triggering cache miss) if not yet connected; marks `reconnecting` and spawns reconnect on Redis errors +- `set(&self, key, value, ttl)` — stores bytes with EX expiration; returns immediately on disconnect; respects `max_result_size` from live config +- `reconnect()` — spawns reconnect if not already running (CAS-guarded) +- `has_config_changed()` — returns `true` if cache config has changed (used for hotswap detection) +- `is_enabled()` — reads live `config().config.general.cache.enabled` +- Key prefix comes from `config().config.general.cache.redis.cache_key_prefix` - `reconnecting: Arc` — prevents multiple concurrent reconnect tasks -- All Redis operations wrapped in `tokio::time::timeout(REDIS_OPERATION_TIMEOUT)` (2s) as safety net +- All Redis operations wrapped in `tokio::time::timeout(REDIS_OPERATION_TIMEOUT)` (2s) **`policy.rs`** — 2-tier policy resolution: - `CacheDirective` enum: `Cache { ttl_seconds }`, `ForceCache { ttl_seconds }`, `NoCache` (default) @@ -250,27 +270,20 @@ SQL comment → pgdog.cache parameter → DB policy config 18. **CacheClient error types refined** — `get()` now returns `Result, Error>` (no more `Option`). `Error::CacheMiss(u64)` is a dedicated variant for key-not-found; `Error::RedisError` is now a struct variant carrying `cmd: &'static str`, `key: u64`, and the underlying error for richer diagnostics. `Error::ConnectionFailed` uses `&'static str` instead of `String` to avoid heap allocation on the hot path. ---- +19. **Config hotswap** — `Cache` singleton holds `Arc>>>`. `hotswap_if_needed()` runs at the start of every `try_read_cache` and `save_response_in_cache` call: read-locks to compare the active backend's URL against `config().config.general.cache.redis.url`; if they differ (or the backend type changes) it write-locks and rebuilds the storage. Fast path is a read-lock-only check with no allocation. -## What's Left To Do +20. **CacheClient rewritten as `RedisCacheStorage`** — Replaced `CacheClient` with `RedisCacheStorage` implementing the `CacheStorage` trait. Key improvements: background connect task is spawned immediately in `new()` so the first query never blocks on init; `get`/`set` check only one atomic flag (`reconnecting`) and return immediately if `true` returned instead of running `ensure_connected`; the `Option` field and the three-condition guard at the top of every operation are gone; `reconnect` is the single place that sets the flag and CAS-guards the reconnect spawn. -1. **Response capture for prepared statements** — Extended protocol (Parse/Bind/Execute) response capture works through process_server_message() but hasn't been tested with PREPARE/EXECUTE. (Note: pgdog implements prepared statements caching. But unknown what kind of caching this is: just query cache or result cache. And if we implement our cache, will this break this prepared statement cache?) +21. **Abstract storage backend** — `storage/mod.rs` defines the `CacheStorage` trait (`get`, `set`, `is_enabled`, `has_config_changed`) and the shared `Error` enum. `storage/redis.rs` is the Redis implementation. `Cache` holds `Box` behind a tokio `RwLock` so any backend (e.g. Memcached) can be plugged in by adding a sub-module under `storage/` and a variant to `CacheBackend`. `deserialize_cached()` remains backend-agnostic in `integration.rs`. -2. **Redis disconnect/reconnect under heavy load** — The reconnection logic works, but the fast-path check (`ensure_connected`) and the reconnect task can have timing edge cases under rapid disconnect/reconnect cycles. Need to stress-test. +22. **Nested backend config** — Backend-specific settings live in their own TOML subtable (`[general.cache.redis]`) rather than flat fields on `[general.cache]`. `RedisConfig` holds `url` and `cache_key_prefix`. When a new backend is added, it gets its own subtable (e.g. `[general.cache.memcached]`) without polluting the top-level cache section. `client.rs` renamed to `storage/redis.rs`. -3. **Integration tests** — Tests live in `integration/rust/tests/integration/`. Redis must be running on 127.0.0.1:6379 before tests. Run with: `cd integration/rust && cargo nextest run --no-fail-fast --test-threads=1` - -4. **Provide config hotswap**. +--- -5. **Review and rewrite CacheClient**. +## What's Left To Do -6. **Abstract storage backend** — `CacheClient` is Redis-specific. A `CacheStorage` trait (`get`, `set`, `is_enabled`) would allow plugging in other backends (e.g. memcached) via config. `deserialize_cached()` is already backend-agnostic (pure wire-protocol parsing) and would be shared across all backends. +1. **Response capture for prepared statements** — Extended protocol (Parse/Bind/Execute) response capture works through process_server_message() but hasn't been tested with PREPARE/EXECUTE. (Note: pgdog implements prepared statements caching. But unknown what kind of caching this is: just query cache or result cache. And if we implement our cache, will this break this prepared statement cache?) -### Planned Tests +2. **Redis disconnect/reconnect under heavy load** — The reconnection logic works, but timing edge cases under rapid disconnect/reconnect cycles still need stress-testing. -1. **Database key namespace collision** — Two databases sharing one Redis, both running same query but with different underlying PG data. Verify correct isolation. -2. **Basic cache hit/miss** — Run a SELECT once (expect miss), run again (expect hit), verify metrics. -3. **TTL expiration** — Cache a query with short TTL, wait for expiry, verify miss on third call. -4. **Write bypasses cache** — Execute INSERT/UPDATE/DELETE, verify these do not populate or consume the cache. -5. **Redis unavailable** — Stop Redis mid-flight, verify queries pass through to PG without blocking. -6. **Redis reconnection** — Restart Redis after disconnect, verify cache recovers automatically. \ No newline at end of file +3. **Integration tests**. diff --git a/pgdog-config/src/cache.rs b/pgdog-config/src/cache.rs index 7450c4730..0a7ae9021 100644 --- a/pgdog-config/src/cache.rs +++ b/pgdog-config/src/cache.rs @@ -36,30 +36,89 @@ impl std::fmt::Display for CachePolicy { } } -/// Redis cache configuration for a database. +/// Cache storage backend discriminator. +#[derive( + Serialize, Deserialize, Debug, Clone, Default, PartialEq, Eq, PartialOrd, Ord, Copy, JsonSchema, +)] +#[serde(rename_all = "snake_case")] +pub enum CacheBackend { + /// Redis backend (default). + #[default] + Redis, +} + +/// Redis-specific cache backend configuration. +/// +/// Corresponds to the `[general.cache.redis]` TOML section. +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, JsonSchema)] +#[serde(deny_unknown_fields)] +pub struct RedisConfig { + /// Redis connection URL. + /// + /// _Default:_ `redis://localhost:6379` + #[serde(default = "RedisConfig::url")] + pub url: String, + + /// Key prefix prepended to every cache key stored in Redis. + /// + /// _Default:_ `pgdog:` + #[serde(default = "RedisConfig::cache_key_prefix")] + pub cache_key_prefix: String, +} + +impl Default for RedisConfig { + fn default() -> Self { + Self { + url: Self::url(), + cache_key_prefix: Self::cache_key_prefix(), + } + } +} + +impl RedisConfig { + fn url() -> String { + "redis://localhost:6379".to_string() + } + + fn cache_key_prefix() -> String { + "pgdog:".to_string() + } +} + +/// Cache configuration. #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, JsonSchema)] #[serde(deny_unknown_fields)] pub struct Cache { - /// Whether to enable caching for this database. - /// + /// Whether to enable caching. + /// /// _Default:_ `false` #[serde(default = "Cache::enabled")] pub enabled: bool, - /// Cache policy: no_cache or cache. + + /// Cache policy: `no_cache` or `cache`. /// /// _Default:_ `no_cache` #[serde(default = "Cache::policy")] pub policy: CachePolicy, + /// Default TTL in seconds for cached queries. /// /// _Default:_ `300` #[serde(default = "Cache::ttl")] pub ttl: u64, - /// Redis connection URL. + + /// Which storage backend to use. /// - /// _Default:_ `redis://localhost:6379` - #[serde(default = "Cache::redis_url")] - pub redis_url: String, + /// _Default:_ `redis` + #[serde(default = "Cache::backend")] + pub backend: CacheBackend, + + /// Redis backend configuration. + /// + /// Only read when `backend = "redis"`. + #[serde(default)] + pub redis: RedisConfig, + /// Maximum result size in bytes to cache (0 = unlimited). /// /// _Default:_ `0` @@ -73,7 +132,8 @@ impl Default for Cache { enabled: Self::enabled(), policy: Self::policy(), ttl: Self::ttl(), - redis_url: Self::redis_url(), + backend: Self::backend(), + redis: RedisConfig::default(), max_result_size: Self::max_result_size(), } } @@ -85,18 +145,18 @@ impl Cache { } fn policy() -> CachePolicy { - Default::default() + CachePolicy::default() } fn ttl() -> u64 { 300 } - fn redis_url() -> String { - "redis://localhost:6379".to_string() + fn backend() -> CacheBackend { + CacheBackend::default() } fn max_result_size() -> usize { 0 } -} \ No newline at end of file +} diff --git a/pgdog-config/src/lib.rs b/pgdog-config/src/lib.rs index 399fd8e3d..22ab53404 100644 --- a/pgdog-config/src/lib.rs +++ b/pgdog-config/src/lib.rs @@ -19,7 +19,7 @@ pub mod users; pub mod util; pub use auth::{AuthType, PassthroughAuth}; -pub use cache::{CachePolicy, Cache}; +pub use cache::{CacheBackend, CachePolicy, Cache, RedisConfig as CacheRedisConfig}; pub use core::{Config, ConfigAndUsers}; pub use data_types::*; pub use database::{ diff --git a/pgdog/src/frontend/cache/client.rs b/pgdog/src/frontend/cache/client.rs deleted file mode 100644 index 87ebd8e1a..000000000 --- a/pgdog/src/frontend/cache/client.rs +++ /dev/null @@ -1,310 +0,0 @@ -use fred::prelude::*; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::Arc; -use std::time::Duration; -use tracing::{debug, error, info}; - -use crate::config::config; - -const CACHE_KEY_PREFIX: &str = "pgdog:"; - -/// Timeout for individual Redis operations (GET/SET/init). -/// Safety net — should never fire in normal operation since the atomic flag gates all calls. -const REDIS_OPERATION_TIMEOUT: Duration = Duration::from_secs(2); - -#[derive(Clone)] -pub struct CacheClient { - client: Option, - /// Master connection state flag. Set true only after PING succeeds - /// on init or reconnect. Set false immediately on any error/timeout. - redis_connected: Arc, - /// Prevents spawning multiple reconnect tasks simultaneously. - reconnecting: Arc, -} - -impl std::fmt::Debug for CacheClient { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("CacheClient") - .field("client", &self.client.as_ref().map(|_| "...")) - .field( - "redis_connected", - &self.redis_connected.load(Ordering::Relaxed), - ) - .field("reconnecting", &self.reconnecting.load(Ordering::Relaxed)) - .finish() - } -} - -impl CacheClient { - pub fn new() -> Self { - let cache_config = &config().config.general.cache; - - if !cache_config.enabled { - return Self { - client: None, - redis_connected: Arc::new(AtomicBool::new(false)), - reconnecting: Arc::new(AtomicBool::new(false)), - }; - } - - let url = cache_config.redis_url.as_str(); - let client_config = match RedisConfig::from_url(url) { - Ok(c) => c, - Err(e) => { - error!("Failed to parse Redis URL: {}", e); - return Self { - client: None, - redis_connected: Arc::new(AtomicBool::new(false)), - reconnecting: Arc::new(AtomicBool::new(false)), - }; - } - }; - - let client = match Builder::from_config(client_config).build() { - Ok(c) => c, - Err(e) => { - error!("Failed to build Redis client: {}", e); - return Self { - client: None, - redis_connected: Arc::new(AtomicBool::new(false)), - reconnecting: Arc::new(AtomicBool::new(false)), - }; - } - }; - - Self { - client: Some(client), - redis_connected: Arc::new(AtomicBool::new(false)), - reconnecting: Arc::new(AtomicBool::new(false)), - } - } - - async fn ensure_connected(&self) -> bool { - if self.redis_connected.load(Ordering::Acquire) { - return true; - } - - if self.reconnecting.load(Ordering::Relaxed) { - return false; - } - - if let Some(ref client) = self.client { - match tokio::time::timeout(REDIS_OPERATION_TIMEOUT, client.init()).await { - Ok(Ok(_)) => { - if Self::ping_client(client).await { - self.redis_connected.store(true, Ordering::Release); - info!("Connected to Redis"); - return true; - } else { - debug!("Redis init returned OK but PING failed — Redis not ready"); - } - } - Ok(Err(e)) => { - debug!("Redis init failed: {}", e); - } - Err(_) => { - error!("Redis init timed out"); - } - } - } - false - } - - async fn ping_client(client: &RedisClient) -> bool { - match tokio::time::timeout(REDIS_OPERATION_TIMEOUT, client.ping::()).await { - Ok(Ok(resp)) => { - info!("Redis PING succeeded: {}", resp); - true - } - Ok(Err(e)) => { - debug!("Redis PING failed: {}", e); - false - } - Err(_) => { - debug!("Redis PING timed out"); - false - } - } - } - - fn spawn_reconnect(&self) { - if self - .reconnecting - .compare_exchange(false, true, Ordering::Release, Ordering::Relaxed) - .is_err() - { - debug!("Redis reconnect task already running, skipping"); - return; - } - - let Some(ref client) = self.client else { - error!("Redis reconnect: no client available"); - self.reconnecting.store(false, Ordering::Release); - return; - }; - - let client = client.clone(); - let redis_connected = self.redis_connected.clone(); - let reconnecting = self.reconnecting.clone(); - - tokio::spawn(async move { - info!("Redis reconnect task started"); - let mut attempt = 0; - loop { - attempt += 1; - debug!("Redis reconnect attempt #{}", attempt); - - let init_ok = - match tokio::time::timeout(REDIS_OPERATION_TIMEOUT, client.init()).await { - Ok(Ok(_)) => true, - Ok(Err(_)) | Err(_) => false, - }; - - if init_ok || Self::ping_client(&client).await { - redis_connected.store(true, Ordering::Release); - reconnecting.store(false, Ordering::Release); - info!("Redis reconnected successfully"); - return; - } - tokio::time::sleep(Duration::from_millis(500)).await; - } - }); - - info!("Spawning Redis reconnect task"); - } - - fn mark_disconnected(&self) { - self.redis_connected.store(false, Ordering::Release); - self.spawn_reconnect(); - } - - pub fn is_connected(&self) -> bool { - self.redis_connected.load(Ordering::Relaxed) - } - - pub(crate) async fn get(&self, key: u64) -> Result, Error> { - if !self.ensure_connected().await { - if !self.is_connected() { - self.spawn_reconnect(); - return Err(Error::ConnectionFailed( - "Redis disconnected, reconnecting in background", - )); - } - return Err(Error::ConnectionFailed("Redis not connected")); - } - - let Some(ref client) = self.client else { - return Err(Error::ConnectionFailed("Redis not configured")); - }; - - let full_key = format!("{}{}", CACHE_KEY_PREFIX, key); - let val = match tokio::time::timeout( - REDIS_OPERATION_TIMEOUT, - client.get::(full_key), - ) - .await - { - Ok(Ok(v)) => v, - Ok(Err(err)) => { - self.mark_disconnected(); - return Err(Error::RedisError { - cmd: "GET", - key, - err, - }); - } - Err(_) => { - self.mark_disconnected(); - return Err(Error::ConnectionFailed("Redis GET timed out")); - } - }; - - if let Some(bytes) = val.into_bytes() { - debug!("Cache hit for key {}", key); - Ok(bytes.to_vec()) - } else { - Err(Error::CacheMiss(key)) - } - } - - pub(crate) async fn set(&self, key: u64, value: &[u8], ttl: u64) -> Result<(), Error> { - if !self.ensure_connected().await { - if !self.is_connected() { - self.spawn_reconnect(); - return Err(Error::ConnectionFailed( - "Redis disconnected, reconnecting in background", - )); - } - return Err(Error::ConnectionFailed("Redis not connected")); - } - - let Some(ref client) = self.client else { - return Err(Error::ConnectionFailed("Redis not configured")); - }; - - let full_key = format!("{}{}", CACHE_KEY_PREFIX, key); - - let cache_config = &config().config.general.cache; - - if cache_config.max_result_size != 0 && value.len() > cache_config.max_result_size { - debug!( - "Skipping cache for key {}: size {} exceeds max {}", - key, - value.len(), - cache_config.max_result_size - ); - return Ok(()); - } - - let ttl_seconds = ttl as i64; - - match tokio::time::timeout( - REDIS_OPERATION_TIMEOUT, - client.set::<(), _, _>( - full_key, - value, - Some(Expiration::EX(ttl_seconds)), - None, - false, - ), - ) - .await - { - Ok(Ok(_)) => { - debug!("Cached key {} with TTL {}s", key, ttl_seconds); - Ok(()) - } - Ok(Err(err)) => { - self.mark_disconnected(); - Err(Error::RedisError { - cmd: "SET", - key, - err, - }) - } - Err(_) => { - self.mark_disconnected(); - Err(Error::ConnectionFailed("Redis SET timed out")) - } - } - } - - pub fn is_enabled(&self) -> bool { - let cache_config = &config().config.general.cache; - self.client.is_some() && cache_config.enabled - } -} - -#[derive(Debug, thiserror::Error)] -pub enum Error { - #[error("Redis {cmd} error for key {key}: {err}")] - RedisError { - cmd: &'static str, - key: u64, - err: RedisError, - }, - #[error("Connection failed: {0}")] - ConnectionFailed(&'static str), - #[error("Cache miss for key {0}")] - CacheMiss(u64), -} diff --git a/pgdog/src/frontend/cache/integration.rs b/pgdog/src/frontend/cache/integration.rs index 84beb2aa5..8a84970a0 100644 --- a/pgdog/src/frontend/cache/integration.rs +++ b/pgdog/src/frontend/cache/integration.rs @@ -5,7 +5,7 @@ use regex::Regex; use crate::{ frontend::{ - cache::{client::Error as CacheClientError, CacheDecision}, + cache::{storage::Error as CacheStorageError, CacheDecision}, ClientRequest, }, net::{FromBytes, Message, Parameters, ToBytes}, @@ -78,17 +78,22 @@ impl Cache { cache_key_hash, ttl, })), - CacheDecision::Cache(ttl) => match self.client.get(cache_key_hash).await { - Ok(cached) => Ok(CacheCheckResult::Hit { cached }), - Err(CacheClientError::CacheMiss(_)) => Ok(CacheCheckResult::Miss(CacheMiss { - cache_key_hash, - ttl: ttl, - })), - Err(e) => { - warn!("{}", e); - Ok(CacheCheckResult::Passthrough) + CacheDecision::Cache(ttl) => { + let guard = self.storage.read().await; + match guard.as_ref() { + None => Ok(CacheCheckResult::Passthrough), + Some(storage) => match storage.get(cache_key_hash).await { + Ok(cached) => Ok(CacheCheckResult::Hit { cached }), + Err(CacheStorageError::CacheMiss(_)) => { + Ok(CacheCheckResult::Miss(CacheMiss { cache_key_hash, ttl })) + } + Err(e) => { + warn!("{}", e); + Ok(CacheCheckResult::Passthrough) + } + }, } - }, + } } } @@ -168,7 +173,13 @@ impl Cache { messages: Vec, ttl: u64, ) { - if messages.is_empty() || !self.client.is_enabled() { + let guard = self.storage.read().await; + let storage = match guard.as_ref() { + Some(s) if s.is_enabled() => s, + _ => return, + }; + + if messages.is_empty() { return; } @@ -177,7 +188,7 @@ impl Cache { match msg.to_bytes() { Ok(bytes) => buffer.extend_from_slice(&bytes), Err(e) => { - debug!("Failed to serialize message for caching: {}", e); + warn!("Failed to serialize message for caching: {}", e); return; } } @@ -187,8 +198,8 @@ impl Cache { return; } - if let Err(e) = self.client.set(cache_key_hash, &buffer, ttl).await { - debug!("Failed to cache response: {:?}", e); + if let Err(e) = storage.set(cache_key_hash, &buffer, ttl).await { + warn!("{}", e); } } } diff --git a/pgdog/src/frontend/cache/mod.rs b/pgdog/src/frontend/cache/mod.rs index 42ee811a2..6c0023b2d 100644 --- a/pgdog/src/frontend/cache/mod.rs +++ b/pgdog/src/frontend/cache/mod.rs @@ -1,25 +1,37 @@ -pub mod client; pub mod context; pub mod integration; pub mod policy; +pub mod storage; -pub use client::CacheClient; pub use context::CacheContext; pub use integration::CacheCheckResult; pub use policy::CacheDecision; +pub use storage::{CacheStorage, RedisCacheStorage}; use once_cell::sync::Lazy; use std::sync::Arc; +use tokio::sync::RwLock; use tracing::debug; use crate::{ - frontend::{ClientRequest, cache::integration::CacheMiss}, + config::config, + frontend::{ + cache::{integration::CacheMiss, storage::build_storage}, + ClientRequest, + }, net::{Message, Parameters}, }; -#[derive(Debug)] +/// Wraps the active storage backend behind a tokio `RwLock` so it can be +/// hotswapped without restarting pgdog. pub struct Cache { - client: CacheClient, + storage: RwLock>>, +} + +impl std::fmt::Debug for Cache { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Cache").field("storage", &"...").finish() + } } static CACHE: Lazy> = Lazy::new(|| Arc::new(Cache::new())); @@ -30,11 +42,47 @@ pub fn cache() -> Arc { impl Cache { fn new() -> Self { + let storage = build_storage(); Cache { - client: CacheClient::new(), + storage: RwLock::new(storage), } } + /// Replace the storage backend if the config has changed (URL or backend type). + /// + /// Acquires the write lock only when a change is detected; otherwise the + /// read-lock path is zero-allocation and very fast. + async fn hotswap_if_needed(&self) { + let cfg = &config().config.general.cache; + + // Fast path: read-lock to check whether anything has changed. + { + let guard = self.storage.read().await; + let needs_swap = match guard.as_ref() { + Some(s) => s.has_config_changed(cfg), + None => cfg.enabled, + }; + if !needs_swap { + return; + } + } + + // Slow path: write-lock and rebuild. + let mut guard = self.storage.write().await; + // Re-check under the write lock (another task may have already swapped). + let needs_swap = match guard.as_ref() { + Some(s) => s.has_config_changed(cfg), + None => cfg.enabled, + }; + + if needs_swap { + debug!("Cache storage config changed — rebuilding backend"); + *guard = build_storage(); + } + } + + // ── public API ─────────────────────────────────────────────────────────── + /// Check the cache for a query response. /// /// On HIT returns `Ok(Some(messages))` — the caller is responsible for @@ -50,6 +98,8 @@ impl Cache { client_request: &ClientRequest, params: &Parameters, ) -> Result>, crate::frontend::Error> { + self.hotswap_if_needed().await; + let cache_result = self .cache_check(in_transaction, client_request, params) .await?; @@ -75,9 +125,15 @@ impl Cache { } } - /// Finalize caching by storing the response in Redis. + /// Finalize caching by storing the response in the active backend. pub async fn save_response_in_cache(&self, cache_context: &mut CacheContext) { - if let Some(CacheMiss { cache_key_hash, ttl } ) = cache_context.cache_miss.take() { + self.hotswap_if_needed().await; + + if let Some(CacheMiss { + cache_key_hash, + ttl, + }) = cache_context.cache_miss.take() + { if !cache_context.had_error && !cache_context.response_buffer.is_empty() { let messages = std::mem::take(&mut cache_context.response_buffer); self.cache_response(cache_key_hash, messages, ttl).await; diff --git a/pgdog/src/frontend/cache/policy.rs b/pgdog/src/frontend/cache/policy.rs index 35d4bef17..7782dce2a 100644 --- a/pgdog/src/frontend/cache/policy.rs +++ b/pgdog/src/frontend/cache/policy.rs @@ -60,8 +60,7 @@ fn get_cache_directive( client_request .ast .as_ref() - .map(|ast| ast.comment_cache) - .flatten() + .and_then(|ast| ast.comment_cache) .or_else(|| extract_parameter_directive(params)) } @@ -83,8 +82,7 @@ fn extract_parameter_directive(params: &Parameters) -> Option { .strip_prefix("force_cache") .or_else(|| s.strip_prefix("cache")) .map(|s| s.trim_start()) - .map(|s| s.strip_prefix("ttl=")) - .flatten() + .and_then(|s| s.strip_prefix("ttl=")) .and_then(|t| t.trim().parse::().ok()) { let ttl_seconds = Some(ttl); diff --git a/pgdog/src/frontend/cache/storage/mod.rs b/pgdog/src/frontend/cache/storage/mod.rs new file mode 100644 index 000000000..13f53af89 --- /dev/null +++ b/pgdog/src/frontend/cache/storage/mod.rs @@ -0,0 +1,58 @@ +pub mod redis; + +pub use redis::RedisCacheStorage; + +use async_trait::async_trait; + +use crate::config::{ + cache::{Cache as CacheConfig, CacheBackend}, + config, +}; + +/// Errors returned by cache storage backends. +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("Redis {cmd} error for key {key}: {err}")] + RedisError { + cmd: &'static str, + key: u64, + err: fred::error::RedisError, + }, + #[error("Connection failed: {0}")] + ConnectionFailed(&'static str), + #[error("Cache miss for key {0}")] + CacheMiss(u64), +} + +/// Abstract cache storage backend. +/// +/// Implementations must be `Send + Sync` so they can be held behind +/// something like `Arc>` and shared across async tasks. +#[async_trait] +pub trait CacheStorage: Send + Sync { + /// Fetch cached bytes for `key`. Returns [`Error::CacheMiss`] when the + /// key is absent (not an error condition — used for control flow). + async fn get(&self, key: u64) -> Result, Error>; + + /// Store `value` under `key` with a `ttl` in seconds. + async fn set(&self, key: u64, value: &[u8], ttl: u64) -> Result<(), Error>; + + /// Returns `true` when the backend is configured and enabled. + fn is_enabled(&self) -> bool; + + /// Returns `true` if cache config has changed (used for hotswap detection). + fn has_config_changed(&self, new_config: &CacheConfig) -> bool; +} + +/// Construct the appropriate storage backend from the current config. +pub fn build_storage() -> Option> { + let cfg = &config().config.general.cache; + if !cfg.enabled { + return None; + } + match cfg.backend { + CacheBackend::Redis => { + RedisCacheStorage::new(&cfg).map(|s| Box::new(s) as Box) + } + } +} diff --git a/pgdog/src/frontend/cache/storage/redis.rs b/pgdog/src/frontend/cache/storage/redis.rs new file mode 100644 index 000000000..e0aec87a6 --- /dev/null +++ b/pgdog/src/frontend/cache/storage/redis.rs @@ -0,0 +1,246 @@ +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::time::Duration; + +use async_trait::async_trait; +use fred::prelude::*; +use pgdog_config::CacheBackend; +use tracing::{debug, error, info}; + +use crate::config::{cache::Cache as CacheConfig, config}; + +use super::{CacheStorage, Error}; + +/// Timeout for individual Redis operations (GET/SET/ping). +const REDIS_OPERATION_TIMEOUT: Duration = Duration::from_secs(2); +/// Max time between reconnection attempts +const MAX_REDIS_RECONNECTION_PERIOD: Duration = Duration::from_secs(5); + +/// Redis implementation of [`CacheStorage`]. +/// +/// Connection is established in a background task spawned from [`RedisCacheStorage::new`]. +/// All operations return immediately if the connection is not yet ready — `get` returns +/// [`Error::ConnectionFailed`] (triggering a cache-miss path) and `set` is silently dropped. +/// +/// At most one reconnect task runs at any time, enforced by a CAS on `reconnecting`. +pub struct RedisCacheStorage { + client: RedisClient, + /// Cache config. + config: CacheConfig, + /// Guards against spawning multiple concurrent reconnect tasks. + reconnecting: Arc, +} + +impl std::fmt::Debug for RedisCacheStorage { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("RedisCacheStorage") + .field("config", &self.config) + .field("reconnecting", &self.reconnecting.load(Ordering::Relaxed)) + .finish() + } +} + +impl RedisCacheStorage { + /// Build a new storage instance for `url` and immediately start a background + /// connection task. Returns `None` when the URL cannot be parsed. + pub fn new(config: &CacheConfig) -> Option { + let client_config = match RedisConfig::from_url(&config.redis.url) { + Ok(c) => c, + Err(e) => { + error!("Failed to parse Redis URL '{}': {}", config.redis.url, e); + return None; + } + }; + + let client = match Builder::from_config(client_config).build() { + Ok(c) => c, + Err(e) => { + error!("Failed to build Redis client: {}", e); + return None; + } + }; + + let reconnecting = Arc::new(AtomicBool::new(true)); // treat initial connect as "reconnecting" + + let storage = Self { + client, + config: config.clone(), + reconnecting, + }; + + // Fire-and-forget initial connection. + storage.spawn_connect_task(); + + Some(storage) + } + + // ── internal helpers ──────────────────────────────────────────────────── + + /// Spawn the (re)connect background loop. Uses a CAS to ensure only one + /// task is ever running at a time. + fn spawn_connect_task(&self) { + let client = self.client.clone(); + let reconnecting = self.reconnecting.clone(); + + tokio::spawn(async move { + info!("Redis connect task started"); + let mut attempt = 0u32; + + loop { + attempt += 1; + debug!("Redis connect attempt #{}", attempt); + + let init_ok = + match tokio::time::timeout(REDIS_OPERATION_TIMEOUT, client.init()).await { + Ok(Ok(_)) => true, + Ok(Err(e)) => { + debug!("Redis init error: {}", e); + false + } + Err(_) => { + debug!("Redis init timed out"); + false + } + }; + + if init_ok { + reconnecting.store(false, Ordering::Release); + info!("Redis connected (attempt #{})", attempt); + return; + } + + // Exponential backoff + tokio::time::sleep( + const { Duration::from_millis(5) } + .saturating_mul(1u32 << attempt.min(10)) + .min(MAX_REDIS_RECONNECTION_PERIOD), + ) + .await; + } + }); + } + + /// Mark the reconnecting as true and spawn a reconnect task if one is not + /// already running. + fn reconnect(&self) { + if self + .reconnecting + .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed) + .is_ok() + { + self.spawn_connect_task(); + } else { + debug!("Redis reconnect task already running"); + } + } +} + +#[async_trait] +impl CacheStorage for RedisCacheStorage { + async fn get(&self, key: u64) -> Result, Error> { + if self.reconnecting.load(Ordering::Acquire) { + return Err(Error::ConnectionFailed("Redis not connected")); + } + + let full_key = format!("{}{}", self.config.redis.cache_key_prefix, key); + + let redis_result = tokio::time::timeout( + REDIS_OPERATION_TIMEOUT, + self.client.get::(full_key), + ) + .await; + let val = match redis_result { + Ok(Ok(v)) => v, + Ok(Err(err)) => { + self.reconnect(); + return Err(Error::RedisError { + cmd: "GET", + key, + err, + }); + } + Err(_) => { + self.reconnect(); + return Err(Error::ConnectionFailed("Redis GET timed out")); + } + }; + + match val.into_bytes() { + Some(bytes) => { + debug!("Cache hit for key {}", key); + Ok(bytes.to_vec()) + } + None => Err(Error::CacheMiss(key)), + } + } + + async fn set(&self, key: u64, value: &[u8], ttl: u64) -> Result<(), Error> { + if self.reconnecting.load(Ordering::Acquire) { + return Err(Error::ConnectionFailed("Redis not connected")); + } + + let max_result_size = config().config.general.cache.max_result_size; + if max_result_size != 0 && value.len() > max_result_size { + debug!( + "Skipping cache for key {}: size {} exceeds max {}", + key, + value.len(), + max_result_size + ); + return Ok(()); + } + + let full_key = format!("{}{}", self.config.redis.cache_key_prefix, key); + let ttl_seconds = ttl as i64; + + match tokio::time::timeout( + REDIS_OPERATION_TIMEOUT, + self.client.set::<(), _, _>( + full_key, + value, + Some(Expiration::EX(ttl_seconds)), + None, + false, + ), + ) + .await + { + Ok(Ok(_)) => { + debug!("Cached key {} with TTL {}s", key, ttl_seconds); + Ok(()) + } + Ok(Err(err)) => { + self.reconnect(); + Err(Error::RedisError { + cmd: "SET", + key, + err, + }) + } + Err(_) => { + self.reconnect(); + Err(Error::ConnectionFailed("Redis SET timed out")) + } + } + } + + fn is_enabled(&self) -> bool { + config().config.general.cache.enabled + } + + fn has_config_changed(&self, new_config: &CacheConfig) -> bool { + new_config.backend != CacheBackend::Redis + || self.config.redis.cmp(&new_config.redis).is_ne() + } +} + +// Avoid shallow copy +impl Clone for RedisCacheStorage { + fn clone(&self) -> Self { + Self { + client: self.client.clone_new(), + config: self.config.clone(), + reconnecting: Arc::new(AtomicBool::new(false)), + } + } +} From 4c771764b7e93b7d0b41268dc3d9477b10a78633 Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Mon, 18 May 2026 10:27:40 +0300 Subject: [PATCH 18/20] deleted files that not belong to the feature: claude debug skill was specific to my system flake should be in another feat --- .claude/skills/debug/SKILL.md | 82 ------------------------------- .gitignore | 1 - flake.lock | 77 ----------------------------- flake.nix | 92 ----------------------------------- 4 files changed, 252 deletions(-) delete mode 100644 .claude/skills/debug/SKILL.md delete mode 100644 flake.lock delete mode 100644 flake.nix diff --git a/.claude/skills/debug/SKILL.md b/.claude/skills/debug/SKILL.md deleted file mode 100644 index d22874e8c..000000000 --- a/.claude/skills/debug/SKILL.md +++ /dev/null @@ -1,82 +0,0 @@ -# Debug Skill — pgdog Development Environment - -## Rules - -1. **Build command**: Always use `cargo build -p pgdog` (debug profile). Never use `--release` during debugging — it slows down builds by 4× and you rarely need to verify the final binary during development. - -2. **Docker environment**: You have access to rootless Docker. Key services: - - PostgreSQL runs on port **5433** (NOT 5432 — your real system postgres uses that) - - Redis is available on the default port **6379** - - If services aren't running, start them first: - ```bash - docker start $(docker ps -a -q --filter "name=pgdog" --latest) - # Or inspect what containers exist: - docker ps -a - ``` - -3. **Running pgdog in background**: Use `systemd-run --user`, NOT `nohup` or `&`. Example: - ```bash - systemd-run --user --collect --unit=pgdog-debug \ - --setenv=RUST_LOG=debug \ - --working-directory= \ - /target/debug/pgdog --config pgdog.toml 2>&1 - ``` - To stop it later: - ```bash - systemctl --user list-units | grep pgdog - systemctl --user stop - ``` - -## Useful Debugging Commands - -### Check docker services - -```bash -docker ps -``` - -### Check redis connectivity - -```bash -redis-cli ping -``` - -### Clear redis cache (useful for testing) - -```bash -redis-cli FLUSHALL -``` - -### Watch cache keys in real-time - -```bash -redis-cli MONITOR | grep "pgdog:" -``` - -### Inspect cached response bytes - -```bash -redis-cli --scan --pattern "pgdog:*" | head -1 | xargs redis-cli GET | xxd | head -20 -``` - -## File Structure Reference - -The cache implementation lives in: - -``` -pgdog/src/frontend/client/query_engine/cache/ -├── mod.rs # Module exports -├── client.rs # Redis client wrapper (fred v9) -├── integration.rs # cache_check(), send_cached_response(), cache_response() -├── policy.rs # CachePolicyResolver (3-tier decision engine) -└── stats.rs # QueryStatsTracker (hit/miss counters) -``` - -State documentation: `CacheState.md` in the project root. - -## Common Pitfalls - -- **Parser disabled by default**: `route.is_read()` returns false for `SELECT 1` when the query parser is off. The `is_likely_read()` heuristic in integration.rs covers this. -- **Policy defaults to NoCache**: `DatabaseCache.policy()` returns `CachePolicy::NoCache` by default. You must set `policy = "cache"` in the config. -- **Cache keys are hashed**: The key is a DefaultHasher hex digest of the raw query string, not the query itself. -- **Wire format is concatenated bytes**: Multiple PostgreSQL messages are concatenated into a single `Vec` with `[code: u8][length: u32be][payload: ...]` structure. diff --git a/.gitignore b/.gitignore index 32aeaec2a..5db985b82 100644 --- a/.gitignore +++ b/.gitignore @@ -51,7 +51,6 @@ perf.data.old CLAUDE.local.md .claude/plans/ .claude/completed_plans/ -!.claude/skills/debug # Ignore generated bindings pgdog-plugin/src/bindings.rs diff --git a/flake.lock b/flake.lock deleted file mode 100644 index 428032335..000000000 --- a/flake.lock +++ /dev/null @@ -1,77 +0,0 @@ -{ - "nodes": { - "crane": { - "locked": { - "lastModified": 1775839657, - "narHash": "sha256-SPm9ck7jh3Un9nwPuMGbRU04UroFmOHjLP56T10MOeM=", - "owner": "ipetkov", - "repo": "crane", - "rev": "7cf72d978629469c4bd4206b95c402514c1f6000", - "type": "github" - }, - "original": { - "owner": "ipetkov", - "repo": "crane", - "type": "github" - } - }, - "flake-utils": { - "inputs": { - "systems": "systems" - }, - "locked": { - "lastModified": 1731533236, - "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", - "owner": "numtide", - "repo": "flake-utils", - "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", - "type": "github" - }, - "original": { - "owner": "numtide", - "repo": "flake-utils", - "type": "github" - } - }, - "nixpkgs": { - "locked": { - "lastModified": 1776067740, - "narHash": "sha256-B35lpsqnSZwn1Lmz06BpwF7atPgFmUgw1l8KAV3zpVQ=", - "owner": "NixOS", - "repo": "nixpkgs", - "rev": "7e495b747b51f95ae15e74377c5ce1fe69c1765f", - "type": "github" - }, - "original": { - "owner": "NixOS", - "ref": "nixos-25.11", - "repo": "nixpkgs", - "type": "github" - } - }, - "root": { - "inputs": { - "crane": "crane", - "flake-utils": "flake-utils", - "nixpkgs": "nixpkgs" - } - }, - "systems": { - "locked": { - "lastModified": 1681028828, - "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", - "owner": "nix-systems", - "repo": "default", - "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", - "type": "github" - }, - "original": { - "owner": "nix-systems", - "repo": "default", - "type": "github" - } - } - }, - "root": "root", - "version": 7 -} diff --git a/flake.nix b/flake.nix deleted file mode 100644 index 4a7fd6c7f..000000000 --- a/flake.nix +++ /dev/null @@ -1,92 +0,0 @@ -{ - inputs = { - nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.11"; - flake-utils.url = "github:numtide/flake-utils"; - crane.url = "github:ipetkov/crane"; - }; - - outputs = { self, nixpkgs, flake-utils, crane }: - flake-utils.lib.eachSystem flake-utils.lib.allSystems (system: - let - pkgs = import nixpkgs { inherit system; }; - stdenv' = p: p.stdenvAdapters.withCFlags [ "-O" ] (p.stdenvAdapters.useMoldLinker p.clangStdenv); - stdenv = stdenv' pkgs; - craneLib = (crane.mkLib pkgs).overrideScope (final: prev: { - stdenvSelector = stdenv'; - }); - - env = { - LIBCLANG_PATH = "${pkgs.libclang.lib}/lib"; - CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER = "${stdenv.cc}/bin/cc"; - CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUSTFLAGS = "-C link-arg=--ld-path=${stdenv.cc}/bin/ld"; - }; - - commonArgs = { - src = let - unfilteredSrc = ./.; - fs = pkgs.lib.fileset; - in fs.toSource { - root = unfilteredSrc; - fileset = fs.unions [ - (craneLib.fileset.cargoTomlAndLock unfilteredSrc) - (craneLib.fileset.rust unfilteredSrc) - (fs.fileFilter - (file: file.hasExt "c" || file.hasExt "h" || file.hasExt "sql") - unfilteredSrc - ) - ]; - }; - strictDeps = true; - - nativeBuildInputs = with pkgs; [ - pkg-config - ]; - buildInputs = with pkgs; [ - openssl - ]; - - inherit env; - } // (craneLib.crateNameFromCargoToml { cargoToml = ./pgdog/Cargo.toml; }); - - cargoArtifacts = craneLib.buildDepsOnly commonArgs; - - devShell = (craneLib.devShell.override { - mkShell = pkgs.mkShell.override { - inherit stdenv; - }; - }) { - checks = self.checks; - inputsFrom = [ cargoArtifacts ]; - inherit env; - }; - - pgDog = craneLib.buildPackage (commonArgs // { - inherit cargoArtifacts; - doCheck = false; - cargoExtraArgs = "-p pgdog"; - }); - - in { - packages.default = pgDog; - - devShells.default = devShell; - - checks = { - inherit pgDog; - - pgDogClippy = craneLib.cargoClippy (commonArgs // { - inherit cargoArtifacts; - cargoClippyExtraArgs = "--all-targets --all-features -- --deny warnings"; - }); - - pgDogFmt = craneLib.cargoFmt commonArgs; - - pgDogNextest = craneLib.cargoNextest (commonArgs // { - inherit cargoArtifacts; - checkPhaseCargoCommand = "echo hello world"; - cargoNextestExtraArgs = "--test-threads=1 --no-fail-fast"; - }); - }; - } - ); -} From d94fdeaf6bca15949cae390ae1525b19aa256f58 Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Mon, 18 May 2026 13:37:56 +0300 Subject: [PATCH 19/20] prepared statement's result caching --- docs/CACHE.md | 8 ++++---- pgdog/src/frontend/cache/integration.rs | 22 ++++++++++++++++------ 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/docs/CACHE.md b/docs/CACHE.md index 1119ae438..86177fbc1 100644 --- a/docs/CACHE.md +++ b/docs/CACHE.md @@ -278,12 +278,12 @@ SQL comment → pgdog.cache parameter → DB policy config 22. **Nested backend config** — Backend-specific settings live in their own TOML subtable (`[general.cache.redis]`) rather than flat fields on `[general.cache]`. `RedisConfig` holds `url` and `cache_key_prefix`. When a new backend is added, it gets its own subtable (e.g. `[general.cache.memcached]`) without polluting the top-level cache section. `client.rs` renamed to `storage/redis.rs`. +23. **Cache key must include Bind parameters for extended protocol** — For simple `Query` messages, parameter values are embedded in the SQL string, so the XXH3 hash of `database + query_text` is naturally unique per value. For extended protocol (Parse/Bind/Execute), the SQL contains `$1`/`$2` placeholders and the actual values arrive in the `Bind` message separately. The current hash ignores them, so `SELECT * FROM users WHERE id = $1` with `id = 1` and `id = 2` produce the same cache key — wrong rows are returned on the second call. Fix: hash `param.len` (the `i32` field, not the `len()` method which returns wire size) and `param.data` for each entry in `bind.params_raw()` into the hasher in `cache_check()` in `integration.rs`. This affects all production drivers that use extended protocol by default: psycopg3, asyncpg, JDBC, npgsql. Note: pgdog's built-in prepared statement cache (`PreparedStatements` / `GlobalCache`) is a proxy-level plan cache only — it deduplicates backend `Parse` round-trips. It does not cache result rows and is orthogonal to the Redis result cache. + --- ## What's Left To Do -1. **Response capture for prepared statements** — Extended protocol (Parse/Bind/Execute) response capture works through process_server_message() but hasn't been tested with PREPARE/EXECUTE. (Note: pgdog implements prepared statements caching. But unknown what kind of caching this is: just query cache or result cache. And if we implement our cache, will this break this prepared statement cache?) - -2. **Redis disconnect/reconnect under heavy load** — The reconnection logic works, but timing edge cases under rapid disconnect/reconnect cycles still need stress-testing. +1. **Redis disconnect/reconnect under heavy load** — The reconnection logic works, but timing edge cases under rapid disconnect/reconnect cycles still need stress-testing. -3. **Integration tests**. +2. **Integration tests**. diff --git a/pgdog/src/frontend/cache/integration.rs b/pgdog/src/frontend/cache/integration.rs index 8a84970a0..37b12f34b 100644 --- a/pgdog/src/frontend/cache/integration.rs +++ b/pgdog/src/frontend/cache/integration.rs @@ -61,31 +61,41 @@ impl Cache { _ => return Ok(CacheCheckResult::Passthrough), }; - let user = params.get_required("user")?; - let database = params.get_default("database", user); - let cache_key_hash = { + let compute_cache_key_hash = || { + let user = params.get_required("user")?; + let database = params.get_default("database", user); let mut hasher = xxhash_rust::xxh3::Xxh3Default::new(); database.hash(&mut hasher); let normalized_query = FORCE_CACHE_RE.replace(query.query(), "pgdog_cache: cache"); normalized_query.hash(&mut hasher); - hasher.finish() + if let Some(bind) = client_request.parameters()? { + for param in bind.params_raw() { + param.len.hash(&mut hasher); + param.data.hash(&mut hasher); + } + }; + Ok::(hasher.finish()) }; let decision = policy::resolve(client_request, params, is_read).await; match decision { CacheDecision::Skip => Ok(CacheCheckResult::Passthrough), CacheDecision::ForceCache(ttl) => Ok(CacheCheckResult::Miss(CacheMiss { - cache_key_hash, + cache_key_hash: compute_cache_key_hash()?, ttl, })), CacheDecision::Cache(ttl) => { + let cache_key_hash = compute_cache_key_hash()?; let guard = self.storage.read().await; match guard.as_ref() { None => Ok(CacheCheckResult::Passthrough), Some(storage) => match storage.get(cache_key_hash).await { Ok(cached) => Ok(CacheCheckResult::Hit { cached }), Err(CacheStorageError::CacheMiss(_)) => { - Ok(CacheCheckResult::Miss(CacheMiss { cache_key_hash, ttl })) + Ok(CacheCheckResult::Miss(CacheMiss { + cache_key_hash, + ttl, + })) } Err(e) => { warn!("{}", e); From 1bc45852af122c9e830bf3cdcd1213d479e52e7f Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Mon, 18 May 2026 13:54:00 +0300 Subject: [PATCH 20/20] fmt and clippy --- pgdog/src/backend/pool/cluster.rs | 6 +++--- pgdog/src/config/cache.rs | 2 +- pgdog/src/frontend/cache/context.rs | 5 ++++- pgdog/src/frontend/cache/integration.rs | 2 +- pgdog/src/frontend/cache/policy.rs | 2 +- pgdog/src/frontend/cache/storage/mod.rs | 2 +- pgdog/src/frontend/client/query_engine/context.rs | 5 ++++- pgdog/src/frontend/client/query_engine/mod.rs | 4 +++- 8 files changed, 18 insertions(+), 10 deletions(-) diff --git a/pgdog/src/backend/pool/cluster.rs b/pgdog/src/backend/pool/cluster.rs index b636d87fc..1dff017a5 100644 --- a/pgdog/src/backend/pool/cluster.rs +++ b/pgdog/src/backend/pool/cluster.rs @@ -158,7 +158,7 @@ pub struct ClusterConfig<'a> { pub reload_schema_on_ddl: bool, pub load_schema: LoadSchema, pub resharding_parallel_copies: usize, - pub cache_enabled: bool + pub cache_enabled: bool, } impl<'a> ClusterConfig<'a> { @@ -212,7 +212,7 @@ impl<'a> ClusterConfig<'a> { reload_schema_on_ddl: general.reload_schema_on_ddl, load_schema: general.load_schema, resharding_parallel_copies: general.resharding_parallel_copies, - cache_enabled: general.cache.enabled + cache_enabled: general.cache.enabled, } } } @@ -250,7 +250,7 @@ impl Cluster { reload_schema_on_ddl, load_schema, resharding_parallel_copies, - cache_enabled + cache_enabled, } = config; let identifier = Arc::new(DatabaseUser { diff --git a/pgdog/src/config/cache.rs b/pgdog/src/config/cache.rs index a089ff680..ece03acb6 100644 --- a/pgdog/src/config/cache.rs +++ b/pgdog/src/config/cache.rs @@ -1 +1 @@ -pub use pgdog_config::cache::*; \ No newline at end of file +pub use pgdog_config::cache::*; diff --git a/pgdog/src/frontend/cache/context.rs b/pgdog/src/frontend/cache/context.rs index 42fd0fecf..aeeab7613 100644 --- a/pgdog/src/frontend/cache/context.rs +++ b/pgdog/src/frontend/cache/context.rs @@ -1,4 +1,7 @@ -use crate::{frontend::cache::integration::CacheMiss, net::{Message, messages::Protocol}}; +use crate::{ + frontend::cache::integration::CacheMiss, + net::{messages::Protocol, Message}, +}; /// Cache context to use in QueryEngineContext. #[derive(Default)] diff --git a/pgdog/src/frontend/cache/integration.rs b/pgdog/src/frontend/cache/integration.rs index 37b12f34b..5114c865f 100644 --- a/pgdog/src/frontend/cache/integration.rs +++ b/pgdog/src/frontend/cache/integration.rs @@ -72,7 +72,7 @@ impl Cache { for param in bind.params_raw() { param.len.hash(&mut hasher); param.data.hash(&mut hasher); - } + } }; Ok::(hasher.finish()) }; diff --git a/pgdog/src/frontend/cache/policy.rs b/pgdog/src/frontend/cache/policy.rs index 7782dce2a..60073dcc9 100644 --- a/pgdog/src/frontend/cache/policy.rs +++ b/pgdog/src/frontend/cache/policy.rs @@ -39,7 +39,7 @@ pub async fn resolve( Some(CacheDirective::NoCache) => return CacheDecision::Skip, Some(CacheDirective::Cache { ttl_seconds }) => { return CacheDecision::Cache(ttl_seconds.unwrap_or(cache_config.ttl)) - }, + } Some(CacheDirective::ForceCache { ttl_seconds }) => { return CacheDecision::ForceCache(ttl_seconds.unwrap_or(cache_config.ttl)) } diff --git a/pgdog/src/frontend/cache/storage/mod.rs b/pgdog/src/frontend/cache/storage/mod.rs index 13f53af89..91a7b377d 100644 --- a/pgdog/src/frontend/cache/storage/mod.rs +++ b/pgdog/src/frontend/cache/storage/mod.rs @@ -52,7 +52,7 @@ pub fn build_storage() -> Option> { } match cfg.backend { CacheBackend::Redis => { - RedisCacheStorage::new(&cfg).map(|s| Box::new(s) as Box) + RedisCacheStorage::new(cfg).map(|s| Box::new(s) as Box) } } } diff --git a/pgdog/src/frontend/client/query_engine/context.rs b/pgdog/src/frontend/client/query_engine/context.rs index 42ef8b21f..6a1fe3c38 100644 --- a/pgdog/src/frontend/client/query_engine/context.rs +++ b/pgdog/src/frontend/client/query_engine/context.rs @@ -1,7 +1,10 @@ use crate::{ backend::pool::{connection::mirror::Mirror, stats::MemoryStats}, frontend::{ - Client, ClientRequest, PreparedStatements, client::{Sticky, TransactionType, timeouts::Timeouts}, router::parser::rewrite::statement::plan::RewriteResult, cache::context::CacheContext + cache::context::CacheContext, + client::{timeouts::Timeouts, Sticky, TransactionType}, + router::parser::rewrite::statement::plan::RewriteResult, + Client, ClientRequest, PreparedStatements, }, net::{BackendKeyData, Parameters, Stream}, }; diff --git a/pgdog/src/frontend/client/query_engine/mod.rs b/pgdog/src/frontend/client/query_engine/mod.rs index e302e717d..9223f5b86 100644 --- a/pgdog/src/frontend/client/query_engine/mod.rs +++ b/pgdog/src/frontend/client/query_engine/mod.rs @@ -246,7 +246,9 @@ impl QueryEngine { command => self.unknown_command(context, command.clone()).await?, } - cache().save_response_in_cache(&mut context.cache_context).await; + cache() + .save_response_in_cache(&mut context.cache_context) + .await; self.hooks.after_execution(context)?;