From 2aef69c356cccaee93d86b9c0a7e5a9eeb594003 Mon Sep 17 00:00:00 2001 From: wenkaifan0720 Date: Thu, 2 Jul 2026 17:44:53 -0700 Subject: [PATCH] =?UTF-8?q?feat(ipc):=20wire-protocol=20version=20handshak?= =?UTF-8?q?e=20=E2=80=94=20refuse=20a=20skewed=20cef=5Fhost=20instead=20of?= =?UTF-8?q?=20silently=20mis-parsing?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Architecture-audit follow-up (finding #4): the framed kOp protocol had no version negotiation — opReady carried a single ad-hoc-flag byte, so a plugin paired with a protocol-skewed host (a FLUTTER_CEF_HOST override, a stale from-source build, a stale embedded copy) mis-parsed or silently dropped frames: frozen/blank tiles with zero breadcrumb. The content-hash distribution keeps the pair matched on the normal path; this closes the bypass vectors. - cef_host (main.mm): opReady's payload is now [readyFlags, kCefHostProtocolVersion]. Pre-handshake hosts sent 1 byte and read as v0. Unknown opcodes in the reader are now logged once per opcode via SendLog (previously a silent `default: break`) — a newer-plugin/older-host frame drop leaves a breadcrumb. - CefProfileHost.swift: expected-version constant beside the opcode table (bump BOTH on any wire change); the opReady arm refuses a mismatched host BEFORE flushing any queued create, clears pendingCreates, and fires onProtocolMismatch — reusing the F.5 refuse-at-ready pattern. - FlutterCefPlugin.swift: the host-death teardown is extracted to failHost(_:reason:) and shared; onProtocolMismatch fails every attached session with processGone("protocolMismatch(host=vN)") and tears the host down. Deliberately NO auto-respawn (it would re-resolve the same binary and loop); the consumer's bounded recovery surfaces it. Validated live with the example app: v1 plugin + v0 host (the currently-published prebuilt) is REFUSED at the handshake ("wire-protocol version 0 != expected 1", zero renders); v1 plugin + from-source v1 host renders flutter.dev normally; Dart unit suite green. NOTE: changes native/cef_host -> new content hash. After merge, publish the new artifact (work_canvas `make publish-cef-host`) and bump the consumer pin (picks up #17's fetch codesign-verify as well). Co-Authored-By: Claude Fable 5 --- .../macos/Classes/CefProfileHost.swift | 34 +++++++++ .../macos/Classes/FlutterCefPlugin.swift | 69 ++++++++++++------- .../flutter_cef_macos/native/cef_host/main.mm | 34 ++++++++- 3 files changed, 109 insertions(+), 28 deletions(-) diff --git a/packages/flutter_cef_macos/macos/Classes/CefProfileHost.swift b/packages/flutter_cef_macos/macos/Classes/CefProfileHost.swift index d1858aa..7727b31 100644 --- a/packages/flutter_cef_macos/macos/Classes/CefProfileHost.swift +++ b/packages/flutter_cef_macos/macos/Classes/CefProfileHost.swift @@ -32,6 +32,15 @@ final class CefProfileHost { static let opInvalidate: UInt8 = 0x37 // us -> cef_host: force a repaint to re-kick a stalled first frame (C1) static let opSetVisible: UInt8 = 0x35 // us -> cef_host: WasHidden(!visible); peeked to make the C1 watchdog visibility-aware + // Expected kOp wire-protocol version, announced by the host in opReady's payload + // (byte 1; a 1-byte payload = a host predating the handshake = v0). Must equal + // kCefHostProtocolVersion in native/cef_host/main.mm — bump BOTH on any semantic + // wire change. A mismatched host is refused at the handshake (onProtocolMismatch → + // processGone) instead of silently mis-parsing frames into frozen/blank tiles; the + // skew vectors are FLUTTER_CEF_HOST overrides, stale from-source builds, and stale + // embedded copies (the content-hash fetch can't drift on the normal path). + static let protocolVersion: UInt8 = 1 + // Profile identity / config. let profileId: String let profileDir: String @@ -183,6 +192,14 @@ final class CefProfileHost { // and respawns an ephemeral one for the same session. var onInsecureProfileRefused: (() -> Void)? + // Invoked (off the reader thread) when the host announces a kOp wire-protocol + // version other than [protocolVersion] in its opReady payload. The host is + // refused before ANY create flushes (nothing was mis-parsed); the plugin emits + // processGone("protocolMismatch") for every attached session and tears the host + // down. Deliberately NO auto-respawn: respawning would re-resolve the same + // mismatched binary and loop. + var onProtocolMismatch: ((UInt8) -> Void)? + // C1: invoked ON THE MAIN THREAD when the reader loop exits UNEXPECTEDLY // (cef_host died: EOF/ECONNRESET while running, or a writeAll to a dead pipe) // — NOT on a clean shutdown(). Carries the process exit status so the plugin @@ -1317,6 +1334,23 @@ final class CefProfileHost { private func handleProcessFrame(_ op: UInt8, _ payload: [UInt8]) { switch op { case Self.opReady: + // Protocol handshake FIRST: refuse a version-skewed host before anything is + // flushed to it. Byte 1 is the host's wire-protocol version; a legacy 1-byte + // payload (pre-handshake host) reads as v0 and is refused the same way — + // same-framing semantic drift would otherwise mis-parse or silently drop + // frames (frozen/blank tiles with no breadcrumb). + let hostVersion: UInt8 = payload.count >= 2 ? payload[1] : 0 + if hostVersion != Self.protocolVersion { + NSLog("[cef] REFUSING cef_host for profile '\(profileId)': wire-protocol " + + "version \(hostVersion) != expected \(Self.protocolVersion). The " + + "resolved cef_host binary does not match this plugin build " + + "(FLUTTER_CEF_HOST override / stale from-source build / stale embed?).") + writeLock.lock() + pendingCreates.removeAll() // never flushed — the plugin fails the sessions via processGone + writeLock.unlock() + onProtocolMismatch?(hostVersion) + return + } let flags = payload.first ?? 0 let adhoc = (flags & 0x01) != 0 // F.5 dev safety-rail: an ad-hoc (mock-keychain) host must NOT load a named diff --git a/packages/flutter_cef_macos/macos/Classes/FlutterCefPlugin.swift b/packages/flutter_cef_macos/macos/Classes/FlutterCefPlugin.swift index e080fb8..a194864 100644 --- a/packages/flutter_cef_macos/macos/Classes/FlutterCefPlugin.swift +++ b/packages/flutter_cef_macos/macos/Classes/FlutterCefPlugin.swift @@ -474,6 +474,39 @@ public class FlutterCefPlugin: NSObject, FlutterPlugin { /// guard unblocks (hasLiveBrowser also goes false via the host's crashed flag), /// and reap the process. `onHostDied` is dispatched on the main thread by the /// host, so the unlocked dictionaries are touched only here on main (H3). + /// Fail every session attached to `host` (emit processGone with `reason` + dispose), + /// drop the host from the profile registry, and reap it. Main-thread only (the maps + /// are main-thread confined — H3). Shared by the host-death and protocol-mismatch + /// paths, which differ only in the reason string. + private func failHost(_ host: CefProfileHost, reason: String) { + dispatchPrecondition(condition: .onQueue(.main)) + // Every session still routed to this host loses its browser. Snapshot first + // (we mutate the maps in the loop). + let goneSessions = sessionHost.compactMap { $0.value === host ? $0.key : nil } + for sid in goneSessions { + emit("processGone", ["sessionId": sid, "reason": reason]) + // F-5: dispose the session BEFORE niling the maps. dispose() is the only caller of + // registry.unregisterTexture (+ frees the CVPixelBuffer / IOSurface / any pending + // buffer). If we just nil sessions[sid], the later Dart controller.dispose -> + // disposeSession early-returns on the now-missing session, so the texture + surfaces + // leak for the engine's lifetime — on EVERY host crash, exactly when recovery (a + // fresh create) happens most. (onBrowserFailed / respawn-failure already dispose; + // this path was the asymmetric leak.) + sessions[sid]?.dispose() + sessions[sid] = nil + sessionHost[sid] = nil + sessionKey[sid] = nil + sessionCreateArgs[sid] = nil + } + // Drop the host from the profile registry so a re-create spawns a fresh + // one. Snapshot the matching keys first — never mutate a Dictionary while + // iterating it. + let goneKeys = profiles.compactMap { $0.value === host ? $0.key : nil } + for k in goneKeys { profiles[k] = nil } + // Reap: idempotent SIGTERM(+SIGKILL escalation), a no-op if already exited. + host.shutdown() + } + private func wireHostDied(_ host: CefProfileHost) { host.onHostDied = { [weak self, weak host] status in dispatchPrecondition(condition: .onQueue(.main)) @@ -482,32 +515,18 @@ public class FlutterCefPlugin: NSObject, FlutterPlugin { // when it loses the cache singleton lock to another process. Surface that as // a distinct reason so the widget can say "already open elsewhere" instead of // a generic crash. - let reason = (status == 2) ? "locked" : "crashed" - // Every session still routed to this host loses its browser. Snapshot first - // (we mutate the maps in the loop). - let goneSessions = self.sessionHost.compactMap { $0.value === host ? $0.key : nil } - for sid in goneSessions { - self.emit("processGone", ["sessionId": sid, "reason": reason]) - // F-5: dispose the session BEFORE niling the maps. dispose() is the only caller of - // registry.unregisterTexture (+ frees the CVPixelBuffer / IOSurface / any pending - // buffer). If we just nil sessions[sid], the later Dart controller.dispose -> - // disposeSession early-returns on the now-missing session, so the texture + surfaces - // leak for the engine's lifetime — on EVERY host crash, exactly when recovery (a - // fresh create) happens most. (onBrowserFailed / respawn-failure already dispose; - // this path was the asymmetric leak.) - self.sessions[sid]?.dispose() - self.sessions[sid] = nil - self.sessionHost[sid] = nil - self.sessionKey[sid] = nil - self.sessionCreateArgs[sid] = nil + self.failHost(host, reason: (status == 2) ? "locked" : "crashed") + } + // Protocol handshake refusal: the host announced a wire-protocol version this + // plugin doesn't speak (see CefProfileHost.protocolVersion). Nothing was flushed + // to it, so nothing mis-parsed — fail its sessions with a distinct reason and + // tear it down. Deliberately NO auto-respawn (a respawn would re-resolve the + // same mismatched binary and loop); the consumer's bounded recovery surfaces it. + host.onProtocolMismatch = { [weak self, weak host] hostVersion in + DispatchQueue.main.async { + guard let self = self, let host = host else { return } + self.failHost(host, reason: "protocolMismatch(host=v\(hostVersion))") } - // Drop the host from the profile registry so a re-create spawns a fresh - // one. Snapshot the matching keys first — never mutate a Dictionary while - // iterating it. - let goneKeys = self.profiles.compactMap { $0.value === host ? $0.key : nil } - for k in goneKeys { self.profiles[k] = nil } - // Reap: idempotent SIGTERM(+SIGKILL escalation), a no-op if already exited. - host.shutdown() } // H7: a SINGLE browser's create failed (host otherwise healthy) — drop just that // session + emit processGone for it, so Dart stops waiting on a browser that will diff --git a/packages/flutter_cef_macos/native/cef_host/main.mm b/packages/flutter_cef_macos/native/cef_host/main.mm index 3eb0e57..4f6200d 100644 --- a/packages/flutter_cef_macos/native/cef_host/main.mm +++ b/packages/flutter_cef_macos/native/cef_host/main.mm @@ -96,6 +96,17 @@ namespace { +// ---- Wire protocol version ---- +// Announced in kOpReady's payload (byte 1; byte 0 stays the ready-flags byte) so the +// Swift plugin can REFUSE a host speaking a different protocol instead of silently +// mis-parsing frames (frozen/blank tiles with no breadcrumb). The content-hash +// distribution keeps host + plugin matched on the normal path; this catches the skew +// vectors that bypass it (FLUTTER_CEF_HOST env override, a stale from-source build, a +// stale embedded copy). BUMP THIS on any semantic change to the kOp wire protocol +// below, together with CefProfileHost.protocolVersion (Swift side) — the two must +// stay equal. Hosts predating the handshake send a 1-byte payload and read as v0. +constexpr uint8_t kCefHostProtocolVersion = 1; + // ---- Opcodes ---- constexpr uint8_t kOpPresent = 0x01; constexpr uint8_t kOpReady = 0x02; @@ -1447,7 +1458,9 @@ void OnBeforeCommandLineProcessing( // this profile). Nothing loads — and nothing is written to the profile cache — // until the first kOpCreateBrowser, which is the safety window the host uses to // refuse a persistent profile under a mock-keychain (ad-hoc) build (F.5). The - // readyFlags byte tells the host whether this is an ad-hoc build (bit0). + // payload is [readyFlags (bit0 = ad-hoc build), protocolVersion] — the version + // byte lets the host refuse a protocol-skewed binary at the handshake instead of + // silently mis-parsing every later frame. void OnContextInitialized() override { CEF_REQUIRE_UI_THREAD(); if (std::getenv("FLUTTER_CEF_DEBUG")) @@ -1456,7 +1469,8 @@ void OnContextInitialized() override { #ifdef CEF_HOST_ADHOC ready_flags |= 0x01; // bit0 = ad-hoc / mock-keychain build #endif - SendFrame(/*browser_id=*/0, kOpReady, &ready_flags, 1); + const uint8_t ready_payload[2] = {ready_flags, kCefHostProtocolVersion}; + SendFrame(/*browser_id=*/0, kOpReady, ready_payload, sizeof(ready_payload)); } IMPLEMENT_REFCOUNTING(HostApp); }; @@ -2320,8 +2334,22 @@ void IpcReadLoop() { ch)); break; } - default: + default: { + // An opcode this build doesn't know = protocol skew (a newer plugin driving an + // older host — the kOpReady version handshake should have refused it, but an + // in-between version or a bypassed handshake still lands here). Log ONCE per + // opcode (this reader is a single thread, so plain statics are safe) instead of + // silently dropping — a silent drop is a frozen tile with no breadcrumb. + static bool logged_unknown[256] = {false}; + if (!logged_unknown[opcode]) { + logged_unknown[opcode] = true; + SendLog(/*browser_id=*/0, + "unknown opcode " + std::to_string(opcode) + + " (protocol skew? plugin newer than host) — dropping this " + "and further frames of this opcode"); + } break; + } } } // Parent died / socket closed: quit.