diff --git a/examples/openclaw-plugin/config.ts b/examples/openclaw-plugin/config.ts index be9c4f916..80233dbcb 100644 --- a/examples/openclaw-plugin/config.ts +++ b/examples/openclaw-plugin/config.ts @@ -37,6 +37,15 @@ export type MemoryOpenVikingConfig = { emitStandardDiagnostics?: boolean; /** When true, log tenant routing for semantic find and session writes (messages/commit) to the plugin logger. */ logFindRequests?: boolean; + // SCCS integration (tool-output compression) + sccsEnabled?: boolean; + sccsCompressThreshold?: number; + sccsSummaryMaxChars?: number; + sccsEnableSmartSummary?: boolean; + sccsStorageTtlSeconds?: number; + sccsStorageDir?: string; + sccsMaxEntries?: number; + }; const DEFAULT_BASE_URL = "http://127.0.0.1:1933"; @@ -58,6 +67,13 @@ const DEFAULT_INGEST_REPLY_ASSIST_MIN_CHARS = 120; const DEFAULT_INGEST_REPLY_ASSIST_IGNORE_SESSION_PATTERNS: string[] = []; const DEFAULT_EMIT_STANDARD_DIAGNOSTICS = false; const DEFAULT_LOCAL_CONFIG_PATH = join(homedir(), ".openviking", "ov.conf"); +const DEFAULT_SCCS_ENABLED = false; +const DEFAULT_SCCS_COMPRESS_THRESHOLD = 3000; +const DEFAULT_SCCS_SUMMARY_MAX_CHARS = 300; +const DEFAULT_SCCS_ENABLE_SMART_SUMMARY = true; +const DEFAULT_SCCS_STORAGE_TTL_SECONDS = 86400; +const DEFAULT_SCCS_STORAGE_DIR = join(homedir(), ".openclaw", "sccs"); +const DEFAULT_SCCS_MAX_ENTRIES = 10000; const DEFAULT_AGENT_ID = "default"; @@ -167,6 +183,13 @@ export const memoryOpenVikingConfigSchema = { "ingestReplyAssistIgnoreSessionPatterns", "emitStandardDiagnostics", "logFindRequests", + "sccsEnabled", + "sccsCompressThreshold", + "sccsSummaryMaxChars", + "sccsEnableSmartSummary", + "sccsStorageTtlSeconds", + "sccsStorageDir", + "sccsMaxEntries", ], "openviking config", ); @@ -270,6 +293,34 @@ export const memoryOpenVikingConfigSchema = { cfg.logFindRequests === true || envFlag("OPENVIKING_LOG_ROUTING") || envFlag("OPENVIKING_DEBUG"), + sccsEnabled: cfg.sccsEnabled === true ? true : DEFAULT_SCCS_ENABLED, + sccsCompressThreshold: Math.max( + 2000, + Math.floor(toNumber(cfg.sccsCompressThreshold, DEFAULT_SCCS_COMPRESS_THRESHOLD)), + ), + sccsSummaryMaxChars: Math.max( + 50, + Math.floor(toNumber(cfg.sccsSummaryMaxChars, DEFAULT_SCCS_SUMMARY_MAX_CHARS)), + ), + sccsEnableSmartSummary: + typeof cfg.sccsEnableSmartSummary === "boolean" + ? cfg.sccsEnableSmartSummary + : DEFAULT_SCCS_ENABLE_SMART_SUMMARY, + sccsStorageTtlSeconds: Math.max( + 600, + Math.floor(toNumber(cfg.sccsStorageTtlSeconds, DEFAULT_SCCS_STORAGE_TTL_SECONDS)), + ), + sccsStorageDir: resolvePath( + resolveEnvVars( + typeof cfg.sccsStorageDir === "string" && cfg.sccsStorageDir.trim() + ? cfg.sccsStorageDir + : DEFAULT_SCCS_STORAGE_DIR, + ).replace(/^~/, homedir()), + ), + sccsMaxEntries: Math.max( + 1000, + Math.floor(toNumber(cfg.sccsMaxEntries, DEFAULT_SCCS_MAX_ENTRIES)), + ), }; }, uiHints: { diff --git a/examples/openclaw-plugin/index.ts b/examples/openclaw-plugin/index.ts index 2fca3a0ca..7b5f3d856 100644 --- a/examples/openclaw-plugin/index.ts +++ b/examples/openclaw-plugin/index.ts @@ -47,6 +47,7 @@ import { openClawSessionToOvStorageId, } from "./context-engine.js"; import type { ContextEngineWithCommit } from "./context-engine.js"; +import { createSccsIntegration } from "./sccs/integration.js"; type PluginLogger = { debug?: (message: string) => void; @@ -527,6 +528,18 @@ const contextEnginePlugin = { ? (api.pluginConfig as Record) : {}; const cfg = memoryOpenVikingConfigSchema.parse(api.pluginConfig); + const sccs = createSccsIntegration({ + cfg: { + enabled: cfg.sccsEnabled, + compressThreshold: cfg.sccsCompressThreshold, + summaryMaxChars: cfg.sccsSummaryMaxChars, + enableSmartSummary: cfg.sccsEnableSmartSummary, + storageTtlSeconds: cfg.sccsStorageTtlSeconds, + storageDir: cfg.sccsStorageDir, + maxEntries: cfg.sccsMaxEntries, + }, + logger: api.logger, + }); const bypassSessionPatterns = compileSessionPatterns(cfg.bypassSessionPatterns); const rawAgentId = rawCfg.agentId; if (cfg.logFindRequests) { @@ -1062,6 +1075,10 @@ const mergeFindResults = (results: FindResult[]): FindResult => { { name: "memory_recall" }, ); + if (sccs.enabled && sccs.tool) { + api.registerTool(sccs.tool, { name: "fetch_original_data" }); + } + api.registerTool( (ctx: ToolContext) => ({ name: "memory_store", @@ -1589,7 +1606,7 @@ const mergeFindResults = (results: FindResult[]): FindResult => { if (typeof api.registerContextEngine === "function") { api.registerContextEngine(contextEnginePlugin.id, () => { - contextEngineRef = createMemoryOpenVikingContextEngine({ + const baseEngine = createMemoryOpenVikingContextEngine({ id: contextEnginePlugin.id, name: contextEnginePlugin.name, version: "0.1.0", @@ -1603,10 +1620,14 @@ const mergeFindResults = (results: FindResult[]): FindResult => { resolveAgentId, rememberSessionAgentId, }); + // Wrap base engine with SCCS compression layer (no-op when sccsEnabled=false). + // Cast: spread preserves all base engine props (including commitOVSession), + // but TS cannot infer the subtype relationship across the internal ContextEngine types. + contextEngineRef = sccs.wrapContextEngine(baseEngine) as typeof contextEngineRef; return contextEngineRef; }); api.logger.info( - "openviking: registered context-engine (before_prompt_build=auto-recall, afterTurn=auto-capture, assemble=archive+active, session→OV id=uuid-or-sha256 + diag/Phase2 options)", + `openviking: registered context-engine (before_prompt_build=auto-recall, afterTurn=auto-capture, assemble=archive+active, session→OV id=uuid-or-sha256 + diag/Phase2 options)${sccs.enabled ? " + SCCS compression" : ""}`, ); } else { api.logger.warn( diff --git a/examples/openclaw-plugin/openclaw.plugin.json b/examples/openclaw-plugin/openclaw.plugin.json index 5b5d1a2c2..11c3a9573 100644 --- a/examples/openclaw-plugin/openclaw.plugin.json +++ b/examples/openclaw-plugin/openclaw.plugin.json @@ -134,6 +134,34 @@ "label": "Log find requests", "help": "Log tenant routing: /search/find + session messages/commit (X-OpenViking-*; not apiKey). Or set env OPENVIKING_LOG_ROUTING=1 or OPENVIKING_DEBUG=1. Local mode: subprocess stderr at info when enabled.", "advanced": true + }, + "sccsEnabled": { + "label": "Enable SCCS Compression", + "help": "Enable SCCS tool-output compression in the context engine" + }, + "sccsCompressThreshold": { + "label": "SCCS Compress Threshold", + "help": "Compress tool outputs longer than this many characters" + }, + "sccsSummaryMaxChars": { + "label": "SCCS Summary Max Chars", + "help": "Maximum characters for SCCS REF_ID summaries" + }, + "sccsEnableSmartSummary": { + "label": "SCCS Smart Summary", + "help": "Enable structured summary extraction for SCCS" + }, + "sccsStorageTtlSeconds": { + "label": "SCCS Storage TTL (seconds)", + "help": "How long REF_ID content is retained" + }, + "sccsStorageDir": { + "label": "SCCS Storage Directory", + "help": "Directory for persisted REF_ID content (default: ~/.openclaw/sccs)" + }, + "sccsMaxEntries": { + "label": "SCCS Max Entries", + "help": "Max in-memory REF_ID entries before eviction" } }, "configSchema": { @@ -220,6 +248,27 @@ }, "logFindRequests": { "type": "boolean" + }, + "sccsEnabled": { + "type": "boolean" + }, + "sccsCompressThreshold": { + "type": "number" + }, + "sccsSummaryMaxChars": { + "type": "number" + }, + "sccsEnableSmartSummary": { + "type": "boolean" + }, + "sccsStorageTtlSeconds": { + "type": "number" + }, + "sccsStorageDir": { + "type": "string" + }, + "sccsMaxEntries": { + "type": "number" } } } diff --git a/examples/openclaw-plugin/sccs/compressor.ts b/examples/openclaw-plugin/sccs/compressor.ts new file mode 100644 index 000000000..75f3957ac --- /dev/null +++ b/examples/openclaw-plugin/sccs/compressor.ts @@ -0,0 +1,68 @@ +import { SummaryExtractor } from "./summarizer.js"; +import { extractTextContent, hasRefId, isToolRole, md5Hex, setTextContent } from "./utils.js"; +import type { RefStore } from "./storage.js"; +export type CompressorConfig = { + compressThreshold: number; + summaryMaxChars: number; + enableSmartSummary: boolean; + storageTtlSeconds: number; +}; + +export const REF_ID_INSTRUCTION = `\n=== REF_ID DECISION GUIDELINES ===\nWhen you see any [REF_ID: xxx] in a tool response:\n1. Read the summary carefully.\n2. Ask yourself: 'Does this summary contain enough information for my current task?'\n- Yes → proceed normally, ignore the REF_ID.\n- No → call fetch_original_data for that REF_ID.\n3. Common cases where you SHOULD fetch:\n- You need more than ~30 lines of code\n- You need exact line numbers/indentation\n- You plan to edit/replace and need full context\n4. Common cases where you can skip:\n- You only needed to confirm a function exists\n- You only care about a small part already in the summary\n`; + +const OPENCLAW_CONFIG_WHITELIST = [ + "# SOUL.md", + "# MEMORY.md", + "# USER.md", + "# AGENTS.md", + "# HEARTBEAT.md", + "# IDENTIFY.md", + "# TOOLS.md", + "# BOOTSTRAP.md" +]; + +function isOpenClawConfigFile(text: string): boolean { + const firstLine = text.trim().split('\n')[0]?.trim(); + if (!firstLine) return false; + return OPENCLAW_CONFIG_WHITELIST.some(pattern => firstLine.includes(pattern)); +} + +async function buildSummary(params: { text: string; config: CompressorConfig }): Promise<{ refId: string; summary: string }> { + const summarizer = new SummaryExtractor(params.config.summaryMaxChars); + const summary = summarizer.summarize(params.text, params.config.enableSmartSummary); + return { refId: md5Hex(params.text), summary }; +} + +export async function compressToolMessages(params: { + messages: Array<{ role?: unknown; content?: unknown }>; + config: CompressorConfig; + store: RefStore; + logger?: { info?: (msg: string) => void; warn?: (msg: string) => void }; +}): Promise<{ messages: Array<{ role?: unknown; content?: unknown }>; systemPromptAddition?: string; compressedCount: number }> { + const { messages, config, store, logger } = params; + let compressedCount = 0; + const nextMessages = messages.map((msg) => ({ ...msg })); + + for (let i = 0; i < nextMessages.length; i++) { + const msg = nextMessages[i]; + if (!isToolRole(msg.role)) continue; + const text = extractTextContent(msg.content); + if (!text) continue; + if (text.length <= config.compressThreshold || hasRefId(text)) continue; + if (isOpenClawConfigFile(text)) { + continue; + } + const summaryResult = await buildSummary({ text, config }); + await store.set(summaryResult.refId, text, config.storageTtlSeconds); + const compressed = `[REF_ID: ${summaryResult.refId}] (Summary: ${summaryResult.summary}). NOTE: You can pass this REF_ID directly as a tool parameter.`; + nextMessages[i] = setTextContent(msg, compressed); + compressedCount += 1; + logger?.info?.(`[sccs] compressed tool output #${i} -> REF_ID ${summaryResult.refId.slice(0, 8)}...`); + } + + return { + messages: nextMessages, + compressedCount, + systemPromptAddition: compressedCount > 0 ? REF_ID_INSTRUCTION : undefined, + }; +} diff --git a/examples/openclaw-plugin/sccs/integration.ts b/examples/openclaw-plugin/sccs/integration.ts new file mode 100644 index 000000000..d41128396 --- /dev/null +++ b/examples/openclaw-plugin/sccs/integration.ts @@ -0,0 +1,109 @@ +import { compressToolMessages } from "./compressor.js"; +import { DiskBackedStore } from "./storage.js"; +import { estimateTokensForMessages, resolveHomePath } from "./utils.js"; +import { createFetchOriginalDataTool } from "./ref-tool.js"; + +export type SccsConfig = { + enabled: boolean; + compressThreshold: number; + summaryMaxChars: number; + enableSmartSummary: boolean; + storageTtlSeconds: number; + storageDir: string; + maxEntries?: number; +}; + +type AgentMessage = { role?: string; content?: unknown }; + +type AssembleResult = { + messages: AgentMessage[]; + estimatedTokens: number; + systemPromptAddition?: string; +}; + +type ContextEngine = { + info: { id: string; name: string; version?: string }; + assemble: (params: { sessionId: string; messages: AgentMessage[]; tokenBudget?: number }) => Promise; + ingest: (params: { sessionId: string; message: AgentMessage; isHeartbeat?: boolean }) => Promise<{ ingested: boolean }>; + ingestBatch?: (params: { sessionId: string; messages: AgentMessage[]; isHeartbeat?: boolean }) => Promise<{ ingestedCount: number }>; + afterTurn?: (params: { + sessionId: string; + sessionFile: string; + messages: AgentMessage[]; + prePromptMessageCount: number; + autoCompactionSummary?: string; + isHeartbeat?: boolean; + tokenBudget?: number; + runtimeContext?: Record; + }) => Promise; + compact: (params: { + sessionId: string; + sessionFile: string; + tokenBudget?: number; + force?: boolean; + currentTokenCount?: number; + compactionTarget?: "budget" | "threshold"; + customInstructions?: string; + runtimeContext?: Record; + }) => Promise<{ ok: boolean; compacted: boolean; reason?: string; result?: unknown }>; +}; + +type Logger = { info: (msg: string) => void; warn?: (msg: string) => void }; + +export function createSccsIntegration(params: { cfg: SccsConfig; logger: Logger }) { + if (!params.cfg.enabled) { + return { + enabled: false as const, + wrapContextEngine: (engine: T): T => engine, + tool: undefined, + }; + } + + const store = new DiskBackedStore({ + dir: resolveHomePath(params.cfg.storageDir), + maxEntries: params.cfg.maxEntries, + }); + + const wrapContextEngine = (engine: T): T => { + return { + ...engine, + assemble: async (assembleParams) => { + const base = await engine.assemble(assembleParams); + const compressed = await compressToolMessages({ + messages: base.messages, + config: { + compressThreshold: params.cfg.compressThreshold, + summaryMaxChars: params.cfg.summaryMaxChars, + enableSmartSummary: params.cfg.enableSmartSummary, + storageTtlSeconds: params.cfg.storageTtlSeconds, + }, + store, + logger: params.logger, + }); + + const systemPromptAddition = + base.systemPromptAddition && compressed.systemPromptAddition + ? `${base.systemPromptAddition}\n\n${compressed.systemPromptAddition}` + : base.systemPromptAddition || compressed.systemPromptAddition; + + return { + ...base, + messages: compressed.messages, + estimatedTokens: estimateTokensForMessages(compressed.messages), + ...(systemPromptAddition ? { systemPromptAddition } : {}), + }; + }, + }; + }; + + const tool = createFetchOriginalDataTool({ + store, + logger: params.logger, + }); + + return { + enabled: true as const, + wrapContextEngine, + tool, + }; +} diff --git a/examples/openclaw-plugin/sccs/ref-tool.ts b/examples/openclaw-plugin/sccs/ref-tool.ts new file mode 100644 index 000000000..b19bb25b2 --- /dev/null +++ b/examples/openclaw-plugin/sccs/ref-tool.ts @@ -0,0 +1,45 @@ +import type { RefStore } from "./storage.js"; +import { normalizeRefId } from "./utils.js"; +export function createFetchOriginalDataTool(params: { + store: RefStore; + logger?: { info?: (msg: string) => void }; +}) { + return { + name: "fetch_original_data", + label: "Fetch Original Data (SCCS)", + description: "Retrieve the full original output for one or more REF_ID placeholders.", + parameters: { + type: "object", + properties: { + ref_ids: { + type: "array", + items: { type: "string" }, + description: "List of REF_ID hashes or [REF_ID: ...] strings", + }, + }, + required: ["ref_ids"], + }, + async execute(_toolCallId: string, input: unknown) { + const value = input as { ref_ids?: unknown }; + const refIds = Array.isArray(value?.ref_ids) ? value.ref_ids : []; + const normalized = refIds + .filter((v) => typeof v === "string") + .map((v) => normalizeRefId(v)) + .filter((v): v is string => v !== null); + if (normalized.length === 0) { + return { content: [{ type: "text", text: "No valid REF_ID provided." }] }; + } + + const sections: string[] = []; + for (const refId of normalized) { + const content = await params.store.get(refId); + sections.push( + content ? `REF_ID ${refId}:\n${content}` : `REF_ID ${refId}: `, + ); + } + + params.logger?.info?.(`[sccs] fetch_original_data: ${normalized.length} ids`); + return { content: [{ type: "text", text: sections.join("\n\n") }] }; + }, + }; +} diff --git a/examples/openclaw-plugin/sccs/storage.ts b/examples/openclaw-plugin/sccs/storage.ts new file mode 100644 index 000000000..a67df9be3 --- /dev/null +++ b/examples/openclaw-plugin/sccs/storage.ts @@ -0,0 +1,87 @@ +import { mkdir, readFile, rm, writeFile } from "node:fs/promises"; +import { join, resolve, relative } from "node:path"; + +export type StoredValue = { content: string; expiresAt: number }; +export interface RefStore { + get(refId: string): Promise; + set(refId: string, content: string, ttlSeconds: number): Promise; +} + +export class MemoryStore implements RefStore { + private entries = new Map(); + private maxEntries?: number; + constructor(maxEntries?: number) { + this.maxEntries = maxEntries && maxEntries > 0 ? maxEntries : undefined; + } + async get(refId: string): Promise { + const entry = this.entries.get(refId); + if (!entry) return null; + if (entry.expiresAt <= Date.now()) { + this.entries.delete(refId); + return null; + } + return entry.content; + } + async set(refId: string, content: string, ttlSeconds: number): Promise { + const expiresAt = Date.now() + Math.max(1, ttlSeconds) * 1000; + this.entries.set(refId, { content, expiresAt }); + if (this.maxEntries && this.entries.size > this.maxEntries) { + const firstKey = this.entries.keys().next().value as string | undefined; + if (firstKey) this.entries.delete(firstKey); + } + } +} + +export class DiskBackedStore implements RefStore { + private memory: MemoryStore; + private dir: string; + constructor(params: { dir: string; maxEntries?: number }) { + this.memory = new MemoryStore(params.maxEntries); + this.dir = params.dir; + } + /** + * Build a safe file path for a refId. + * Resolves against the refs directory and verifies the result stays inside it, + * preventing path traversal even if a non-hash refId slips through. + */ + private pathFor(refId: string): string { + const refsDir = resolve(this.dir, "refs"); + const target = resolve(refsDir, `${refId}.json`); + const rel = relative(refsDir, target); + if (rel.startsWith("..") || resolve(refsDir, rel) !== target) { + throw new Error(`[sccs] path traversal detected for refId: ${refId}`); + } + return target; + } + async get(refId: string): Promise { + const cached = await this.memory.get(refId); + if (cached !== null) return cached; + const path = this.pathFor(refId); + try { + const raw = await readFile(path, "utf8"); + const parsed = JSON.parse(raw) as StoredValue; + if (!parsed || typeof parsed.content !== "string") return null; + if (parsed.expiresAt <= Date.now()) { + await rm(path, { force: true }); + return null; + } + await this.memory.set(refId, parsed.content, Math.ceil((parsed.expiresAt - Date.now()) / 1000)); + return parsed.content; + } catch { + return null; + } + } + async set(refId: string, content: string, ttlSeconds: number): Promise { + await this.memory.set(refId, content, ttlSeconds); + const expiresAt = Date.now() + Math.max(1, ttlSeconds) * 1000; + const path = this.pathFor(refId); + try { + await mkdir(join(this.dir, "refs"), { recursive: true }); + await writeFile(path, JSON.stringify({ content, expiresAt }), "utf8"); + } catch (err) { + // Disk persistence is best-effort, but log the failure for observability. + // Data remains available in memory until evicted or process exits. + console.warn(`[sccs] disk write failed for refId ${refId}:`, err); + } + } +} diff --git a/examples/openclaw-plugin/sccs/summarizer.ts b/examples/openclaw-plugin/sccs/summarizer.ts new file mode 100644 index 000000000..8079fc227 --- /dev/null +++ b/examples/openclaw-plugin/sccs/summarizer.ts @@ -0,0 +1,207 @@ +const DEFAULT_SUMMARY_MAX_CHARS = 300; +type JsonValue = null | boolean | number | string | JsonValue[] | { [k: string]: JsonValue }; + +export class SummaryExtractor { + private maxChars: number; + constructor(maxChars?: number) { + this.maxChars = maxChars ?? DEFAULT_SUMMARY_MAX_CHARS; + } + truncate(text: string, maxChars?: number): string { + const cleaned = text.replace(/\s+/g, " ").trim(); + const limit = maxChars ?? this.maxChars; + if (cleaned.length <= limit) return cleaned; + if (limit <= 1) return cleaned.slice(0, limit); + return `${cleaned.slice(0, limit - 1)}…`; + } + summarize(content: string, enable = true): string { + if (!enable) return this.truncate(content); + try { + return this.summarizeJson(JSON.parse(content) as JsonValue); + } catch { + if (this.detectMarkdown(content)) return this.summarizeMarkdown(content); + const delim = this.detectTable(content); + if (delim) return this.summarizeTable(content, delim); + if (this.detectLog(content)) return this.summarizeLog(content); + if (this.detectCode(content)) return this.summarizeCode(content); + return this.summarizeText(content); + } + } + private summarizeJson(data: JsonValue): string { + if (Array.isArray(data)) { + const types = new Map(); + data.slice(0, 30).forEach((item) => { + const t = Array.isArray(item) ? "array" : item === null ? "null" : typeof item; + types.set(t, (types.get(t) ?? 0) + 1); + }); + const typePart = Array.from(types.entries()) + .sort((a, b) => b[1] - a[1]) + .slice(0, 3) + .map(([t, c]) => `${t}:${c}`) + .join(","); + const first = data[0]; + const firstPart = + first && typeof first === "object" && !Array.isArray(first) + ? `first_keys=${Object.keys(first).slice(0, 6).join(",")}` + : `first=${this.truncate(String(first ?? ""), 40)}`; + return this.truncate(`JSON array: items=${data.length} types=${typePart} ${firstPart}`); + } + if (data && typeof data === "object") { + const keys = Object.keys(data); + const fields: string[] = []; + keys.slice(0, 6).forEach((k) => { + const v = (data as Record)[k]; + if (Array.isArray(v)) fields.push(`${k}[${v.length}]`); + else if (v && typeof v === "object") fields.push(`${k}{${Object.keys(v).length}}`); + else if (typeof v === "string") fields.push(`${k}='${this.truncate(v, 20)}'`); + else fields.push(`${k}=${String(v)}`); + }); + return this.truncate( + `JSON object: keys=${keys.length}[${keys.slice(0, 8).join(",")}] sample=${fields.join(";")}`, + ); + } + return this.truncate(String(data)); + } + private extractKeywords(content: string, limit = 4): string[] { + const words = content.toLowerCase().match(/[A-Za-z_][A-Za-z0-9_]{2,}/g) ?? []; + const stop = new Set([ + "the","and","for","with","from","that","this","have","has","are","was","were","but","you","your","not", + "can","will","all","any","get","set","out","too","use","using","into","when","where","none","true","false", + "null","json","line","file","path","http","https","info","debug","warning","error", + ]); + const freq = new Map(); + for (const w of words) { + if (w.length < 4 || stop.has(w)) continue; + freq.set(w, (freq.get(w) ?? 0) + 1); + } + return Array.from(freq.entries()) + .sort((a, b) => b[1] - a[1]) + .slice(0, limit) + .map(([k]) => k); + } + private extractKeyLines(lines: string[], limit = 2): string[] { + const score = (line: string) => { + const upper = line.toUpperCase(); + let s = 0; + if (/ERROR|EXCEPTION|TRACEBACK|FATAL|FAILED/.test(upper)) s += 6; + if (/WARN/.test(upper)) s += 4; + if (/\b[45]\d{2}\b/.test(line)) s += 3; + if (/\b(GET|POST|PUT|DELETE|PATCH)\b/.test(upper)) s += 2; + if (/[A-Za-z_][A-Za-z0-9_]*\(/.test(line)) s += 2; + if (/(\/[A-Za-z0-9._-]+){2,}|[A-Za-z]:\\/.test(line)) s += 2; + s += Math.min(line.length, 120) / 30; + return s; + }; + const candidates = lines.map((l) => l.trim()).filter(Boolean); + const ranked = candidates.sort((a, b) => score(b) - score(a)); + const out: string[] = []; + const seen = new Set(); + for (const line of ranked) { + const compact = this.truncate(line, 60); + if (seen.has(compact)) continue; + seen.add(compact); + out.push(compact); + if (out.length >= limit) break; + } + return out; + } + private summarizeText(content: string): string { + const lines = content.split(/\r?\n/).map((l) => l.trim()).filter(Boolean); + const head = `len=${content.length},line=${lines.length},E=${(content.match(/\bERROR\b|\bEXCEPTION\b|\bTRACEBACK\b/gi) ?? []).length},W=${(content.match(/\bWARN(?:ING)?\b/gi) ?? []).length},url=${(content.match(/https?:\/\/\S+/gi) ?? []).length},path=${(content.match(/(?:[A-Za-z]:\\|\/)[A-Za-z0-9._\\/\-]+/g) ?? []).length},num=${(content.match(/\b\d+(?:\.\d+)?\b/g) ?? []).length}`; + const keywords = this.extractKeywords(content); + const first = this.truncate(lines[0] ?? content, 55); + const keyLines = this.extractKeyLines(lines, 2); + const parts = [head]; + if (keywords.length) parts.push(`kw=${keywords.join(",")}`); + parts.push(`first=${first}`); + if (keyLines.length) parts.push(`key=${keyLines.join(" | ")}`); + return this.truncate(parts.join(" ; ")); + } + private detectLog(content: string): boolean { + const lines = content.split(/\r?\n/).filter((l) => l.trim()); + if (lines.length < 2) return false; + const levelHits = lines.filter((l) => /\b(INFO|DEBUG|WARN|WARNING|ERROR|TRACE|FATAL)\b/.test(l)).length; + const tsHits = lines.filter((l) => /\b\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}/.test(l)).length; + return levelHits >= 2 || tsHits >= 2; + } + private summarizeLog(content: string): string { + const lines = content.split(/\r?\n/).map((l) => l.trim()).filter(Boolean); + const levels = { + E: lines.filter((l) => /\b(ERROR|FATAL)\b/.test(l)).length, + W: lines.filter((l) => /\bWARN(?:ING)?\b/.test(l)).length, + I: lines.filter((l) => /\bINFO\b/.test(l)).length, + }; + const ts = lines + .map((l) => l.match(/\b\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}/)?.[0]) + .filter(Boolean) as string[]; + const errTypes = (content.match(/\b([A-Za-z_][A-Za-z0-9_]*(?:Error|Exception))\b/g) ?? []).slice(0, 2); + const keyLines = this.extractKeyLines(lines, 2); + const parts = [`log lines=${lines.length} levels=E${levels.E} W${levels.W} I${levels.I}`]; + if (ts.length) parts.push(`ts=${ts[0]}..${ts[ts.length - 1]}`); + if (errTypes.length) parts.push(`err=${errTypes.join(",")}`); + if (keyLines.length) parts.push(`key=${keyLines.join(" | ")}`); + return this.truncate(parts.join(" ; ")); + } + private detectMarkdown(content: string): boolean { + const lines = content.split(/\r?\n/); + const headings = lines.filter((l) => /^\s*#{1,6}\s+\S+/.test(l)).length; + const lists = lines.filter((l) => /^\s*([-*+]|\d+\.)\s+\S+/.test(l)).length; + const fences = lines.filter((l) => /^\s*```/.test(l)).length; + const links = (content.match(/\[[^\]]+\]\([^)]+\)/g) ?? []).length; + return headings >= 1 && (lists >= 2 || fences >= 1 || links >= 1); + } + private summarizeMarkdown(content: string): string { + const lines = content.split(/\r?\n/); + const headings = lines + .filter((l) => /^\s*#{1,6}\s+\S+/.test(l)) + .map((l) => l.replace(/^\s*#{1,6}\s+/, "").trim()); + const fences = lines + .map((l) => l.match(/^\s*```([A-Za-z0-9_+-]+)?\s*$/)?.[1] ?? "") + .filter(Boolean); + const listItems = lines.filter((l) => /^\s*([-*+]|\d+\.)\s+\S+/.test(l)).length; + const linkCount = (content.match(/\[[^\]]+\]\([^)]+\)/g) ?? []).length; + const parts = [`md lines=${lines.length} headings=${headings.length}`]; + if (headings.length) { + parts.push(`h=[${headings.slice(0, 2).map((h) => this.truncate(h, 18)).join(",")}]`); + } + if (fences.length) { + parts.push(`code=${fences.length}[${fences.slice(0, 2).join(",")}]`); + } + if (listItems) parts.push(`lists=${listItems}`); + if (linkCount) parts.push(`links=${linkCount}`); + return this.truncate(parts.join(" ; ")); + } + private detectTable(content: string): "," | "\t" | null { + const lines = content.split(/\r?\n/).filter((l) => l.trim()); + if (lines.length < 2) return null; + const delim = lines[0].includes("\t") ? "\t" : lines[0].includes(",") ? "," : null; + if (!delim) return null; + const cols = lines[0].split(delim).length; + const similar = lines.slice(1, 4).filter((l) => l.split(delim).length === cols).length; + return similar >= 1 ? delim : null; + } + private summarizeTable(content: string, delim: "," | "\t"): string { + const lines = content.split(/\r?\n/).filter((l) => l.trim()); + const header = lines[0].split(delim).map((c) => c.trim()).filter(Boolean); + const rows = Math.max(0, lines.length - 1); + return this.truncate(`table rows=${rows} cols=${header.length} header=[${header.slice(0, 4).join(",")}]`); + } + private detectCode(content: string): boolean { + const lines = content.split(/\r?\n/); + const braces = lines.filter((l) => /[{};]/.test(l)).length; + const defs = lines.filter((l) => /\b(class|def|function|const|let|var|import|from)\b/.test(l)).length; + return braces >= 4 || defs >= 3; + } + private summarizeCode(content: string): string { + const lines = content.split(/\r?\n/).filter((l) => l.trim()); + const funcs = (content.match(/\b([A-Za-z_][A-Za-z0-9_]*)\s*\(/g) ?? []).slice(0, 4); + const imports = (content.match(/^\s*(import|from)\b.*$/gm) ?? []).slice(0, 2); + const parts = [`code lines=${lines.length}`]; + if (funcs.length) { + parts.push(`funcs=${funcs.map((f) => f.replace(/\s*\(/, "")).join(",")}`); + } + if (imports.length) { + parts.push(`imports=${imports.map((l) => this.truncate(l.trim(), 40)).join(" | ")}`); + } + return this.truncate(parts.join(" ; ")); + } +} diff --git a/examples/openclaw-plugin/sccs/utils.ts b/examples/openclaw-plugin/sccs/utils.ts new file mode 100644 index 000000000..153506116 --- /dev/null +++ b/examples/openclaw-plugin/sccs/utils.ts @@ -0,0 +1,77 @@ +import { createHash } from "node:crypto"; +import { homedir } from "node:os"; +import { resolve } from "node:path"; + +type ContentBlock = { type?: unknown; text?: unknown }; +type MessageLike = { role?: unknown; content?: unknown }; +const REF_ID_RE = /\[REF_ID: ([a-f0-9]{32})\]/i; +/** Strict MD5 hex pattern — only 32 hex chars (case-insensitive) allowed as refId */ +const STRICT_REF_ID_RE = /^[a-f0-9]{32}$/i; + +export function md5Hex(input: string): string { + return createHash("md5").update(input).digest("hex"); +} + +export function hasRefId(text: string): boolean { + return REF_ID_RE.test(text); +} + +/** + * Extract and validate a refId from a string. + * - If the string matches [REF_ID: ], extract the hash. + * - Otherwise, treat the trimmed string as a raw refId. + * - Returns null if the result is not a valid 32-char hex MD5 hash. + * - Returned value is always lowercase for consistent file naming. + */ +export function normalizeRefId(value: string): string | null { + const match = value.match(REF_ID_RE); + const candidate = match ? match[1] : value.trim(); + return STRICT_REF_ID_RE.test(candidate) ? candidate.toLowerCase() : null; +} + +export function resolveHomePath(pathValue: string): string { + if (!pathValue) { + return pathValue; + } + return pathValue.startsWith("~/") + ? resolve(homedir(), pathValue.slice(2)) + : resolve(pathValue); +} + +export function extractTextContent(content: unknown): string { + if (typeof content === "string") { + return content; + } + if (Array.isArray(content)) { + return content + .map((block: ContentBlock) => + block && typeof block === "object" && typeof block.text === "string" ? block.text : "", + ) + .filter(Boolean) + .join("\n"); + } + if (content && typeof content === "object") { + try { + return JSON.stringify(content); + } catch { + return String(content); + } + } + return ""; +} + +export function setTextContent(message: MessageLike, text: string): MessageLike { + return { ...message, content: [{ type: "text", text }] }; +} + +export function isToolRole(role: unknown): boolean { + return role === "tool" || role === "toolResult" || role === "tool_result"; +} + +export function estimateTokens(text: string): number { + return Math.ceil(text.length / 4); +} + +export function estimateTokensForMessages(messages: MessageLike[]): number { + return messages.reduce((sum, msg) => sum + estimateTokens(extractTextContent(msg.content)), 0); +} diff --git a/examples/openclaw-plugin/setup-helper/install.js b/examples/openclaw-plugin/setup-helper/install.js index 7c9bc739a..c813df45d 100755 --- a/examples/openclaw-plugin/setup-helper/install.js +++ b/examples/openclaw-plugin/setup-helper/install.js @@ -126,6 +126,7 @@ let openvikingPythonPath = ""; let upgradeRuntimeConfig = null; let installedUpgradeState = null; let upgradeAudit = null; +let enableSccs = false; const argv = process.argv.slice(2); for (let i = 0; i < argv.length; i++) { @@ -597,6 +598,15 @@ async function selectMode() { selectedMode = mode === "remote" ? "remote" : "local"; } +async function selectSccs() { + if (installYes) { + enableSccs = false; + return; + } + const answer = (await question(tr("Enable SCCS tool-output compression? (y/N)", "启用 SCCS 工具输出压缩?(y/N)"), "N")).toLowerCase(); + enableSccs = answer === "y" || answer === "yes"; +} + async function collectRemoteConfig() { if (installYes) return; remoteBaseUrl = await question(tr("OpenViking server URL", "OpenViking 服务器地址"), remoteBaseUrl); @@ -2117,6 +2127,11 @@ async function configureOpenClawPlugin({ await oc(["config", "set", `plugins.entries.${pluginId}.config.autoRecall`, "true", "--json"]); await oc(["config", "set", `plugins.entries.${pluginId}.config.autoCapture`, "true", "--json"]); } + + if (enableSccs) { + await oc(["config", "set", "plugins.entries.openviking.config.sccsEnabled", "true", "--json"]); + info(tr("SCCS compression enabled", "SCCS 压缩已启用")); + } info(tr("OpenClaw plugin configured", "OpenClaw 插件配置完成")); } @@ -2299,6 +2314,7 @@ async function main() { await selectMode(); } info(tr(`Mode: ${selectedMode}`, `模式: ${selectedMode}`)); + await selectSccs(); if (upgradePluginOnly) { await checkOpenClaw(); diff --git a/examples/openclaw-plugin/tests/ut/sccs-path-traversal.test.ts b/examples/openclaw-plugin/tests/ut/sccs-path-traversal.test.ts new file mode 100644 index 000000000..b6f6a94e2 --- /dev/null +++ b/examples/openclaw-plugin/tests/ut/sccs-path-traversal.test.ts @@ -0,0 +1,233 @@ +import { describe, expect, it, vi } from "vitest"; +import { tmpdir } from "node:os"; +import { join, resolve } from "node:path"; +import { mkdir, rm, readFile, chmod } from "node:fs/promises"; + +import { normalizeRefId, md5Hex } from "../../sccs/utils.js"; +import { DiskBackedStore, MemoryStore } from "../../sccs/storage.js"; + +// --------------------------------------------------------------------------- +// normalizeRefId — strict MD5 hash validation +// --------------------------------------------------------------------------- + +describe("normalizeRefId", () => { + it("extracts valid hash from [REF_ID: ] format", () => { + const hash = "a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4"; + expect(normalizeRefId(`[REF_ID: ${hash}]`)).toBe(hash); + }); + + it("accepts raw 32-char hex string", () => { + const hash = "abcdef0123456789abcdef0123456789"; + expect(normalizeRefId(hash)).toBe(hash); + }); + + it("accepts uppercase hex and returns lowercase", () => { + const upper = "A1B2C3D4E5F6A1B2C3D4E5F6A1B2C3D4"; + const lower = upper.toLowerCase(); + expect(normalizeRefId(upper)).toBe(lower); + }); + + it("trims whitespace before validating", () => { + const hash = "a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4"; + expect(normalizeRefId(` ${hash} `)).toBe(hash); + }); + + it("returns null for path traversal attempt with ../", () => { + expect(normalizeRefId("../../etc/passwd")).toBeNull(); + }); + + it("returns null for path traversal attempt with ..\\", () => { + expect(normalizeRefId("..\\..\\windows\\system32")).toBeNull(); + }); + + it("returns null for short hash (16 chars)", () => { + expect(normalizeRefId("a1b2c3d4e5f6a1b2")).toBeNull(); + }); + + it("returns null for hash with non-hex characters", () => { + expect(normalizeRefId("g1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4")).toBeNull(); + }); + + it("returns null for empty string", () => { + expect(normalizeRefId("")).toBeNull(); + }); + + it("returns null for whitespace-only string", () => { + expect(normalizeRefId(" ")).toBeNull(); + }); + + it("returns null for string with special characters", () => { + expect(normalizeRefId("../../../outside")).toBeNull(); + }); + + it("returns null for [REF_ID: ...] with invalid inner hash", () => { + expect(normalizeRefId("[REF_ID: not-a-hash]")).toBeNull(); + }); + + it("returns null for hash with spaces", () => { + expect(normalizeRefId("a1b2c3d4 e5f6a1b2 c3d4e5f6 a1b2c3d4")).toBeNull(); + }); +}); + +// --------------------------------------------------------------------------- +// DiskBackedStore.pathFor — path traversal defense (via set/get) +// --------------------------------------------------------------------------- + +describe("DiskBackedStore path traversal prevention", () => { + let testDir: string; + + // Create a temp dir for each test + async function createTestDir(): Promise { + const dir = join(tmpdir(), `sccs-test-${Date.now()}-${Math.random().toString(36).slice(2)}`); + await mkdir(dir, { recursive: true }); + return dir; + } + + it("stores and retrieves a valid refId normally", async () => { + testDir = await createTestDir(); + try { + const store = new DiskBackedStore({ dir: testDir, maxEntries: 100 }); + const refId = md5Hex("normal content"); + await store.set(refId, "normal content", 3600); + const result = await store.get(refId); + expect(result).toBe("normal content"); + } finally { + await rm(testDir, { recursive: true, force: true }); + } + }); + + it("rejects path traversal refId with ../ in set()", async () => { + testDir = await createTestDir(); + try { + const store = new DiskBackedStore({ dir: testDir, maxEntries: 100 }); + await expect(store.set("../../outside", "malicious", 3600)).rejects.toThrow( + "path traversal detected", + ); + } finally { + await rm(testDir, { recursive: true, force: true }); + } + }); + + it("rejects path traversal refId with ../ in get()", async () => { + testDir = await createTestDir(); + try { + const store = new DiskBackedStore({ dir: testDir, maxEntries: 100 }); + await expect(store.get("../../outside")).rejects.toThrow("path traversal detected"); + } finally { + await rm(testDir, { recursive: true, force: true }); + } + }); + + it("rejects path traversal with absolute path", async () => { + testDir = await createTestDir(); + try { + const store = new DiskBackedStore({ dir: testDir, maxEntries: 100 }); + await expect(store.set("/etc/passwd", "malicious", 3600)).rejects.toThrow( + "path traversal detected", + ); + } finally { + await rm(testDir, { recursive: true, force: true }); + } + }); + + it("rejects path traversal with ..\\ on any platform", async () => { + testDir = await createTestDir(); + try { + const store = new DiskBackedStore({ dir: testDir, maxEntries: 100 }); + await expect(store.set("..\\..\\windows\\system32", "malicious", 3600)).rejects.toThrow( + "path traversal detected", + ); + } finally { + await rm(testDir, { recursive: true, force: true }); + } + }); + + it("writes file inside refs/ directory for valid refId", async () => { + testDir = await createTestDir(); + try { + const store = new DiskBackedStore({ dir: testDir, maxEntries: 100 }); + const refId = md5Hex("file location test"); + await store.set(refId, "test content", 3600); + // set() now awaits disk write — file should be immediately readable + const filePath = join(testDir, "refs", `${refId}.json`); + const raw = await readFile(filePath, "utf8"); + const parsed = JSON.parse(raw); + expect(parsed.content).toBe("test content"); + expect(typeof parsed.expiresAt).toBe("number"); + } finally { + await rm(testDir, { recursive: true, force: true }); + } + }); + + it("persists data to disk so new store instance can read it", async () => { + testDir = await createTestDir(); + try { + const refId = md5Hex("persistence test"); + // Write with one store instance + const store1 = new DiskBackedStore({ dir: testDir, maxEntries: 100 }); + await store1.set(refId, "persisted content", 3600); + // Read with a fresh store instance (empty memory) + const store2 = new DiskBackedStore({ dir: testDir, maxEntries: 100 }); + const result = await store2.get(refId); + expect(result).toBe("persisted content"); + } finally { + await rm(testDir, { recursive: true, force: true }); + } + }); + + it("logs warning when disk write fails", async () => { + testDir = await createTestDir(); + try { + const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {}); + // Create a read-only refs dir to cause write failure + const refsDir = join(testDir, "refs"); + await mkdir(refsDir, { recursive: true }); + await chmod(refsDir, 0o444); + + const store = new DiskBackedStore({ dir: testDir, maxEntries: 100 }); + const refId = md5Hex("write fail test"); + // set() should not throw, but log a warning + await store.set(refId, "should fail on disk", 3600); + expect(warnSpy).toHaveBeenCalledWith( + expect.stringContaining(`[sccs] disk write failed for refId ${refId}`), + expect.anything(), + ); + // Data should still be in memory + expect(await store.get(refId)).toBe("should fail on disk"); + warnSpy.mockRestore(); + } finally { + // Restore write permission before cleanup + const refsDir = join(testDir, "refs"); + await chmod(refsDir, 0o755).catch(() => {}); + await rm(testDir, { recursive: true, force: true }); + } + }); +}); + +// --------------------------------------------------------------------------- +// MemoryStore — basic operations (unchanged, smoke test) +// --------------------------------------------------------------------------- + +describe("MemoryStore", () => { + it("stores and retrieves a value", async () => { + const store = new MemoryStore(100); + await store.set("abc123", "hello", 3600); + expect(await store.get("abc123")).toBe("hello"); + }); + + it("returns null for missing key", async () => { + const store = new MemoryStore(100); + expect(await store.get("nonexistent")).toBeNull(); + }); + + it("evicts oldest entry when maxEntries exceeded", async () => { + const store = new MemoryStore(2); + await store.set("first", "a", 3600); + await store.set("second", "b", 3600); + await store.set("third", "c", 3600); + // "first" should be evicted + expect(await store.get("first")).toBeNull(); + expect(await store.get("second")).toBe("b"); + expect(await store.get("third")).toBe("c"); + }); +}); diff --git a/examples/openclaw-plugin/tsconfig.json b/examples/openclaw-plugin/tsconfig.json index 158c20095..225e0171b 100644 --- a/examples/openclaw-plugin/tsconfig.json +++ b/examples/openclaw-plugin/tsconfig.json @@ -8,5 +8,5 @@ "skipLibCheck": true, "types": ["node"] }, - "include": ["*.ts"] + "include": ["*.ts", "sccs/**/*.ts"] }