diff --git a/components/runners/claude-code-runner/Dockerfile b/components/runners/claude-code-runner/Dockerfile index b8751c881..0f0ea7e1a 100644 --- a/components/runners/claude-code-runner/Dockerfile +++ b/components/runners/claude-code-runner/Dockerfile @@ -9,7 +9,7 @@ RUN dnf install -y 'dnf-command(config-manager)' && \ dnf install -y git jq && \ dnf clean all - + # Install Node.js # Use UBI AppStream to avoid conflicts with preinstalled nodejs-full-i18n RUN dnf module reset -y nodejs && \ @@ -60,6 +60,9 @@ RUN pip install --no-cache-dir uv pre-commit # Create working directory WORKDIR /app +# Copy shared runner commands (slash commands available in every session) +COPY commands/ /app/commands/ + # Copy claude-runner package (no separate runner-shell needed) COPY claude-code-runner /app/claude-runner diff --git a/components/runners/claude-code-runner/ambient_runner/bridges/claude/bridge.py b/components/runners/claude-code-runner/ambient_runner/bridges/claude/bridge.py index 8c4666c48..40171764a 100644 --- a/components/runners/claude-code-runner/ambient_runner/bridges/claude/bridge.py +++ b/components/runners/claude-code-runner/ambient_runner/bridges/claude/bridge.py @@ -169,7 +169,11 @@ def get_error_context(self) -> str: async def get_mcp_status(self) -> dict: """Get MCP server status via an ephemeral SDK client.""" if not self._context: - return {"servers": [], "totalCount": 0, "message": "Context not initialized"} + return { + "servers": [], + "totalCount": 0, + "message": "Context not initialized", + } try: from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient @@ -217,8 +221,7 @@ async def get_mcp_status(self) -> dict: { "name": t.get("name", ""), "annotations": { - k: v - for k, v in (t.get("annotations") or {}).items() + k: v for k, v in (t.get("annotations") or {}).items() }, } for t in raw_tools @@ -227,9 +230,7 @@ async def get_mcp_status(self) -> dict: servers_list.append( { "name": srv.get("name", ""), - "displayName": server_info.get( - "name", srv.get("name", "") - ), + "displayName": server_info.get("name", srv.get("name", "")), "status": srv.get("status", "unknown"), "version": server_info.get("version", ""), "tools": tools, @@ -278,8 +279,7 @@ async def _ensure_ready(self) -> None: await self._setup_platform() self._ready = True logger.info( - f"Platform ready — model: {self._configured_model}, " - f"cwd: {self._cwd_path}" + f"Platform ready — model: {self._configured_model}, cwd: {self._cwd_path}" ) async def _setup_platform(self) -> None: @@ -291,7 +291,10 @@ async def _setup_platform(self) -> None: # Claude-specific auth from ambient_runner.bridges.claude.auth import setup_sdk_authentication from ambient_runner.platform.auth import populate_runtime_credentials - from ambient_runner.platform.workspace import resolve_workspace_paths, validate_prerequisites + from ambient_runner.platform.workspace import ( + resolve_workspace_paths, + validate_prerequisites, + ) await validate_prerequisites(self._context) _api_key, _use_vertex, configured_model = await setup_sdk_authentication( @@ -302,6 +305,11 @@ async def _setup_platform(self) -> None: # Workspace paths cwd_path, add_dirs = resolve_workspace_paths(self._context) + # Inject platform slash commands (before Claude Code launches) + from ambient_runner.platform.commands import inject_platform_commands + + inject_platform_commands() + # Observability (before MCP so rubric tool can access it) await self._setup_observability(configured_model) @@ -319,9 +327,7 @@ async def _setup_platform(self) -> None: # System prompt from ambient_runner.bridges.claude.prompts import build_sdk_system_prompt - system_prompt = build_sdk_system_prompt( - self._context.workspace_path, cwd_path - ) + system_prompt = build_sdk_system_prompt(self._context.workspace_path, cwd_path) # Store results self._configured_model = configured_model @@ -349,9 +355,7 @@ async def _setup_observability(self, configured_model: str) -> None: ) await obs.initialize( prompt="(pending)", - namespace=self._context.get_env( - "AGENTIC_SESSION_NAMESPACE", "unknown" - ), + namespace=self._context.get_env("AGENTIC_SESSION_NAMESPACE", "unknown"), model=configured_model, ) self._obs = obs @@ -401,4 +405,3 @@ def _stderr_handler(line: str) -> None: adapter._stderr_lines = self._stderr_lines # type: ignore[attr-defined] self._adapter = adapter logger.info("Adapter built (persistent, will be reused across runs)") - diff --git a/components/runners/claude-code-runner/ambient_runner/bridges/claude/feedback_tool.py b/components/runners/claude-code-runner/ambient_runner/bridges/claude/feedback_tool.py new file mode 100644 index 000000000..ee6cd1794 --- /dev/null +++ b/components/runners/claude-code-runner/ambient_runner/bridges/claude/feedback_tool.py @@ -0,0 +1,233 @@ +""" +Global /feedback SDK tool for capturing user satisfaction during sessions. + +When a user expresses satisfaction, dissatisfaction, or provides qualitative +feedback about the session or agent output, this tool records it. When +Langfuse is configured, feedback is logged as a scored event; otherwise it +falls back to stdout (pod logs) so feedback is never lost. + +Available in every session regardless of workflow configuration. +""" + +import logging +import os +from typing import Any + +logger = logging.getLogger(__name__) + + +# ------------------------------------------------------------------ +# Constants +# ------------------------------------------------------------------ + +FEEDBACK_RATINGS = ["positive", "negative"] + +FEEDBACK_TOOL_DESCRIPTION = ( + "Submit user feedback about the session or agent output. Call this when " + "the user explicitly rates the session, expresses satisfaction or " + "dissatisfaction, or provides qualitative feedback about quality.\n\n" + "## When to call\n\n" + "- User says the output is good, great, perfect, or similar praise\n" + "- User says the output is bad, wrong, unhelpful, or similar criticism\n" + "- User explicitly asks to submit feedback or rate the session\n" + "- User gives a thumbs up / thumbs down\n\n" + "## Fields\n\n" + "- `rating`: 'positive' for praise/satisfaction, 'negative' for " + "criticism/dissatisfaction\n" + "- `comment`: the user's exact words or a brief summary of their feedback\n" +) + +FEEDBACK_INPUT_SCHEMA: dict = { + "type": "object", + "properties": { + "rating": { + "type": "string", + "enum": FEEDBACK_RATINGS, + "description": ( + "User sentiment: 'positive' for satisfaction/praise, " + "'negative' for dissatisfaction/criticism." + ), + }, + "comment": { + "type": "string", + "description": ( + "The user's feedback comment. Capture their exact words " + "or a concise summary of what they said." + ), + }, + }, + "required": ["rating", "comment"], +} + + +# ------------------------------------------------------------------ +# Tool factory +# ------------------------------------------------------------------ + + +def create_feedback_mcp_tool( + obs: Any, + session_id: str, + sdk_tool_decorator, +): + """Create the submit_feedback MCP tool. + + Args: + obs: ObservabilityManager instance for trace ID and Langfuse client. + session_id: Current session ID. + sdk_tool_decorator: The ``tool`` decorator from ``claude_agent_sdk``. + + Returns: + Decorated async tool function. + """ + _obs = obs + _session_id = session_id + + @sdk_tool_decorator( + "submit_feedback", + FEEDBACK_TOOL_DESCRIPTION, + FEEDBACK_INPUT_SCHEMA, + ) + async def submit_feedback_tool(args: dict) -> dict: + """Log user feedback to Langfuse.""" + rating = args.get("rating", "") + comment = args.get("comment", "") + + success, error = _log_feedback_to_langfuse( + rating=rating, + comment=comment, + obs=_obs, + session_id=_session_id, + ) + + if success: + return { + "content": [ + { + "type": "text", + "text": ( + f"Feedback recorded (rating={rating}). " + "Thank you for helping improve the platform." + ), + } + ] + } + else: + return { + "content": [ + { + "type": "text", + "text": f"Feedback noted but could not be recorded: {error}", + } + ], + "isError": True, + } + + return submit_feedback_tool + + +# ------------------------------------------------------------------ +# Langfuse logging (with stdout fallback) +# ------------------------------------------------------------------ + + +def _log_feedback_fallback( + reason: str, rating: str, comment: str, session_id: str +) -> tuple[bool, None]: + """Log feedback to stdout when Langfuse is unavailable.""" + logger.info( + f"Feedback ({reason}): rating={rating}, " + f"comment={comment[:500] if comment else ''}, " + f"session_id={session_id}" + ) + return True, None + + +def _log_feedback_to_langfuse( + rating: str, + comment: str, + obs: Any, + session_id: str, +) -> tuple[bool, str | None]: + """Log a user feedback score to Langfuse.""" + try: + langfuse_client = getattr(obs, "langfuse_client", None) if obs else None + using_obs_client = langfuse_client is not None + + if not langfuse_client: + langfuse_enabled = os.getenv("LANGFUSE_ENABLED", "").strip().lower() in ( + "1", + "true", + "yes", + ) + if not langfuse_enabled: + return _log_feedback_fallback( + "no Langfuse", rating, comment, session_id + ) + + from langfuse import Langfuse + + public_key = os.getenv("LANGFUSE_PUBLIC_KEY", "").strip() + secret_key = os.getenv("LANGFUSE_SECRET_KEY", "").strip() + host = os.getenv("LANGFUSE_HOST", "").strip() + + if not (public_key and secret_key and host): + return _log_feedback_fallback( + "Langfuse creds missing", rating, comment, session_id + ) + + langfuse_client = Langfuse( + public_key=public_key, + secret_key=secret_key, + host=host, + ) + + # Prefer obs-owned trace ID; fall back to last_trace_id across turns. + if using_obs_client: + try: + trace_id = obs.get_current_trace_id() if obs else None + if trace_id is None: + trace_id = getattr(obs, "last_trace_id", None) + except Exception: + trace_id = getattr(obs, "last_trace_id", None) + else: + trace_id = None + + value = rating == "positive" + + session_name = os.getenv("AGENTIC_SESSION_NAME", "").strip() + project = os.getenv("AGENTIC_SESSION_NAMESPACE", "").strip() + + metadata: dict = { + "rating": rating, + "session_id": session_id, + "session_name": session_name, + "project": project, + } + + kwargs: dict = { + "name": "session-feedback", + "value": value, + "data_type": "BOOLEAN", + "comment": comment[:500] if comment else None, + "metadata": metadata, + } + if trace_id: + kwargs["trace_id"] = trace_id + + langfuse_client.create_score(**kwargs) + langfuse_client.flush() + + logger.info( + f"Feedback logged to Langfuse: rating={rating}, trace_id={trace_id}" + ) + return True, None + + except ImportError: + return _log_feedback_fallback( + "langfuse not installed", rating, comment, session_id + ) + except Exception as e: + msg = str(e) + logger.error(f"Failed to log feedback to Langfuse: {msg}") + return False, msg diff --git a/components/runners/claude-code-runner/ambient_runner/bridges/claude/mcp.py b/components/runners/claude-code-runner/ambient_runner/bridges/claude/mcp.py index 388ad377b..e1dd8c81e 100644 --- a/components/runners/claude-code-runner/ambient_runner/bridges/claude/mcp.py +++ b/components/runners/claude-code-runner/ambient_runner/bridges/claude/mcp.py @@ -19,8 +19,14 @@ DEFAULT_ALLOWED_TOOLS = [ - "Read", "Write", "Bash", "Glob", "Grep", "Edit", - "MultiEdit", "WebSearch", + "Read", + "Write", + "Bash", + "Glob", + "Grep", + "Edit", + "MultiEdit", + "WebSearch", ] @@ -45,6 +51,7 @@ def build_mcp_servers(context: RunnerContext, cwd_path: str, obs: Any = None) -> load_rubric_content, ) from ambient_runner.bridges.claude.corrections import create_correction_mcp_tool + from ambient_runner.bridges.claude.feedback_tool import create_feedback_mcp_tool mcp_servers = load_mcp_config(context, cwd_path) or {} @@ -91,6 +98,19 @@ def build_mcp_servers(context: RunnerContext, cwd_path: str, obs: Any = None) -> mcp_servers["corrections"] = correction_server logger.info("Added corrections feedback MCP tool (log_correction)") + # Global feedback tool (always available) + feedback_tool = create_feedback_mcp_tool( + obs=obs, + session_id=context.session_id, + sdk_tool_decorator=sdk_tool, + ) + if feedback_tool: + feedback_server = create_sdk_mcp_server( + name="feedback", version="1.0.0", tools=[feedback_tool] + ) + mcp_servers["feedback"] = feedback_server + logger.info("Added global feedback MCP tool (submit_feedback)") + return mcp_servers @@ -118,7 +138,9 @@ def log_auth_status(mcp_servers: dict) -> None: # --------------------------------------------------------------------------- -def _read_google_credentials(workspace_path: Path, secret_path: Path) -> Dict[str, Any] | None: +def _read_google_credentials( + workspace_path: Path, secret_path: Path +) -> Dict[str, Any] | None: cred_path = workspace_path if workspace_path.exists() else secret_path if not cred_path.exists(): return None @@ -144,20 +166,28 @@ def _parse_token_expiry(expiry_str: str) -> datetime | None: return None -def _validate_google_token(user_creds: Dict[str, Any], user_email: str) -> tuple[bool | None, str]: +def _validate_google_token( + user_creds: Dict[str, Any], user_email: str +) -> tuple[bool | None, str]: if not user_creds.get("access_token") or not user_creds.get("refresh_token"): return False, "Google OAuth credentials incomplete - missing or empty tokens" if "token_expiry" in user_creds and user_creds["token_expiry"]: expiry = _parse_token_expiry(user_creds["token_expiry"]) if expiry is None: - return None, f"Google OAuth authenticated as {user_email} (token expiry format invalid)" + return ( + None, + f"Google OAuth authenticated as {user_email} (token expiry format invalid)", + ) now = datetime.now(timezone.utc) if expiry <= now and not user_creds.get("refresh_token"): return False, "Google OAuth token expired - re-authenticate" if expiry <= now: - return None, f"Google OAuth authenticated as {user_email} (token refresh needed)" + return ( + None, + f"Google OAuth authenticated as {user_email} (token refresh needed)", + ) return True, f"Google OAuth authenticated as {user_email}" @@ -165,11 +195,16 @@ def _validate_google_token(user_creds: Dict[str, Any], user_email: str) -> tuple def check_mcp_authentication(server_name: str) -> tuple[bool | None, str | None]: """Check if credentials are available and valid for known MCP servers.""" if server_name == "google-workspace": - workspace_path = Path("/workspace/.google_workspace_mcp/credentials/credentials.json") + workspace_path = Path( + "/workspace/.google_workspace_mcp/credentials/credentials.json" + ) secret_path = Path("/app/.google_workspace_mcp/credentials/credentials.json") creds = _read_google_credentials(workspace_path, secret_path) if creds is None: - return False, "Google OAuth not configured - authenticate via Integrations page" + return ( + False, + "Google OAuth not configured - authenticate via Integrations page", + ) try: user_email = os.environ.get("USER_GOOGLE_EMAIL", "") @@ -195,7 +230,9 @@ def check_mcp_authentication(server_name: str) -> tuple[bool | None, str | None] import urllib.request as _urllib_request base = os.getenv("BACKEND_API_URL", "").rstrip("/") - project = os.getenv("PROJECT_NAME") or os.getenv("AGENTIC_SESSION_NAMESPACE", "") + project = os.getenv("PROJECT_NAME") or os.getenv( + "AGENTIC_SESSION_NAMESPACE", "" + ) session_id = os.getenv("SESSION_ID", "") if base and project and session_id: @@ -208,7 +245,10 @@ def check_mcp_authentication(server_name: str) -> tuple[bool | None, str | None] with _urllib_request.urlopen(req, timeout=3) as resp: data = json.loads(resp.read()) if data.get("apiToken"): - return True, "Jira credentials available (not yet loaded in session)" + return ( + True, + "Jira credentials available (not yet loaded in session)", + ) except Exception: pass except Exception: diff --git a/components/runners/claude-code-runner/ambient_runner/platform/commands.py b/components/runners/claude-code-runner/ambient_runner/platform/commands.py new file mode 100644 index 000000000..b52cd1c56 --- /dev/null +++ b/components/runners/claude-code-runner/ambient_runner/platform/commands.py @@ -0,0 +1,48 @@ +""" +Inject shared platform commands into the Claude Code commands directory. + +Copies command files from /app/commands/ (baked into the runner image) +into /app/.claude/commands/ so Claude Code picks them up as user-level +slash commands. +""" + +import logging +import shutil +from pathlib import Path + +logger = logging.getLogger(__name__) + +BUNDLED_COMMANDS_DIR = Path("/app/commands") +CLAUDE_COMMANDS_DIR = Path("/app/.claude/commands") + + +def inject_platform_commands( + source: Path = BUNDLED_COMMANDS_DIR, + target: Path = CLAUDE_COMMANDS_DIR, +) -> int: + """Copy bundled command files into the Claude Code commands directory. + + Args: + source: Directory containing bundled command ``.md`` files. + target: Claude Code user-level commands directory. + + Returns: + Number of command files injected. + """ + if not source.is_dir(): + logger.debug(f"No bundled commands directory at {source}") + return 0 + + target.mkdir(parents=True, exist_ok=True) + + count = 0 + for src_file in sorted(source.iterdir()): + if not src_file.is_file(): + continue + dst_file = target / src_file.name + shutil.copy2(src_file, dst_file) + count += 1 + logger.debug(f"Injected command: {src_file.name} -> {dst_file}") + + logger.info(f"Injected {count} platform command(s) into {target}") + return count diff --git a/components/runners/claude-code-runner/ambient_runner/platform/prompts.py b/components/runners/claude-code-runner/ambient_runner/platform/prompts.py index bed08a728..a15635612 100644 --- a/components/runners/claude-code-runner/ambient_runner/platform/prompts.py +++ b/components/runners/claude-code-runner/ambient_runner/platform/prompts.py @@ -71,17 +71,27 @@ CORRECTION_DETECTION_INSTRUCTIONS = ( "## CRITICAL: Automatic Correction Logging\n\n" - "**BEFORE acting on user feedback that changes something you already did or assumed, ALWAYS ask yourself: \"Is the user steering me away from a previous action or decision?\"**\n\n" + '**BEFORE acting on user feedback that changes something you already did or assumed, ALWAYS ask yourself: "Is the user steering me away from a previous action or decision?"**\n\n' "If YES → Call `log_correction` FIRST, then fix.\n\n" "**Rule: Any message that redirects, corrects, or changes your previous work or assumptions = log it. When in doubt, log it.**\n\n" "This includes quality issues, but also: redirections, preference changes, missed requirements, wrong scope, or any context that changes what you should have done. Do NOT treat these as simple new requests.\n" ) +FEEDBACK_INSTRUCTIONS = ( + "## Session Feedback\n\n" + "When the user expresses satisfaction, dissatisfaction, or provides " + "qualitative feedback about the session or your output, call " + "`submit_feedback` (via the feedback MCP server) to record it.\n\n" + "**When to call**: user praises the output, criticises the result, " + "gives a thumbs up/down, or explicitly asks to submit feedback.\n" +) + # --------------------------------------------------------------------------- # Prompt builder # --------------------------------------------------------------------------- + def build_workspace_context_prompt( repos_cfg: list, workflow_name: str | None, @@ -117,9 +127,7 @@ def build_workspace_context_prompt( file_uploads_path = Path(workspace_path) / "file-uploads" if file_uploads_path.exists() and file_uploads_path.is_dir(): try: - files = sorted( - [f.name for f in file_uploads_path.iterdir() if f.is_file()] - ) + files = sorted([f.name for f in file_uploads_path.iterdir() if f.is_file()]) if files: max_display = 10 if len(files) <= max_display: @@ -140,9 +148,7 @@ def build_workspace_context_prompt( session_id = os.getenv("AGENTIC_SESSION_NAME", "").strip() feature_branch = f"ambient/{session_id}" if session_id else None - repo_names = [ - repo.get("name", f"repo-{i}") for i, repo in enumerate(repos_cfg) - ] + repo_names = [repo.get("name", f"repo-{i}") for i, repo in enumerate(repos_cfg)] if len(repo_names) <= 5: prompt += ( f"**Repositories**: " @@ -164,9 +170,7 @@ def build_workspace_context_prompt( prompt += "\n" # Git push instructions for auto-push repos - auto_push_repos = [ - repo for repo in repos_cfg if repo.get("autoPush", False) - ] + auto_push_repos = [repo for repo in repos_cfg if repo.get("autoPush", False)] if auto_push_repos: if not feature_branch: logger.warning( @@ -186,21 +190,16 @@ def build_workspace_context_prompt( # Workflow instructions if ambient_config.get("systemPrompt"): - prompt += ( - f"## Workflow Instructions\n" - f"{ambient_config['systemPrompt']}\n\n" - ) + prompt += f"## Workflow Instructions\n{ambient_config['systemPrompt']}\n\n" # Rubric evaluation instructions prompt += _build_rubric_prompt_section(ambient_config) - # Corrections feedback instructions (only when Langfuse is configured) - langfuse_enabled = os.getenv("LANGFUSE_ENABLED", "").strip().lower() in ( - "1", "true", "yes" - ) - if langfuse_enabled: - prompt += "## Corrections Feedback\n\n" - prompt += CORRECTION_DETECTION_INSTRUCTIONS + # Corrections and feedback instructions (always enabled; tools gracefully + # degrade when Langfuse is unavailable by logging to stdout instead) + prompt += "## Corrections Feedback\n\n" + prompt += CORRECTION_DETECTION_INSTRUCTIONS + prompt += FEEDBACK_INSTRUCTIONS return prompt diff --git a/components/runners/claude-code-runner/tests/test_commands.py b/components/runners/claude-code-runner/tests/test_commands.py new file mode 100644 index 000000000..6f12b5b61 --- /dev/null +++ b/components/runners/claude-code-runner/tests/test_commands.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +""" +Test platform command injection logic. + +Validates: +1. Commands are copied from source to target directory +2. Target directory is created when missing +3. No-op when source directory is absent +4. Only files (not subdirectories) are copied +""" + +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from ambient_runner.platform.commands import inject_platform_commands + + +def test_injects_command_files(tmp_path): + """Command .md files are copied from source to target.""" + source = tmp_path / "commands" + source.mkdir() + (source / "feedback.md").write_text("# feedback command") + (source / "help.md").write_text("# help command") + + target = tmp_path / ".claude" / "commands" + + count = inject_platform_commands(source=source, target=target) + + assert count == 2 + assert (target / "feedback.md").exists() + assert (target / "help.md").exists() + assert (target / "feedback.md").read_text() == "# feedback command" + + +def test_creates_target_directory(tmp_path): + """Target directory is created if it doesn't exist.""" + source = tmp_path / "commands" + source.mkdir() + (source / "feedback.md").write_text("content") + + target = tmp_path / "deep" / "nested" / "commands" + assert not target.exists() + + inject_platform_commands(source=source, target=target) + + assert target.is_dir() + assert (target / "feedback.md").exists() + + +def test_noop_when_source_missing(tmp_path): + """Returns 0 when source directory doesn't exist.""" + source = tmp_path / "nonexistent" + target = tmp_path / "target" + + count = inject_platform_commands(source=source, target=target) + + assert count == 0 + assert not target.exists() + + +def test_skips_subdirectories(tmp_path): + """Only files are copied, subdirectories are skipped.""" + source = tmp_path / "commands" + source.mkdir() + (source / "feedback.md").write_text("command") + (source / "subdir").mkdir() + + target = tmp_path / "target" + + count = inject_platform_commands(source=source, target=target) + + assert count == 1 + assert not (target / "subdir").exists() + + +def test_overwrites_existing_files(tmp_path): + """Existing files in target are overwritten with source content.""" + source = tmp_path / "commands" + source.mkdir() + (source / "feedback.md").write_text("new content") + + target = tmp_path / "target" + target.mkdir(parents=True) + (target / "feedback.md").write_text("old content") + + inject_platform_commands(source=source, target=target) + + assert (target / "feedback.md").read_text() == "new content" diff --git a/components/runners/claude-code-runner/tests/test_feedback_tool.py b/components/runners/claude-code-runner/tests/test_feedback_tool.py new file mode 100644 index 000000000..d6a0d06b3 --- /dev/null +++ b/components/runners/claude-code-runner/tests/test_feedback_tool.py @@ -0,0 +1,354 @@ +#!/usr/bin/env python3 +""" +Test global feedback MCP tool. + +Validates: +1. Tool creation and schema structure +2. Langfuse score creation with correct parameters +3. Graceful fallback to stdout when Langfuse is unavailable +4. Comment truncation +5. Rating to boolean mapping +""" + +import os +import sys +from pathlib import Path +from unittest.mock import MagicMock, patch + +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from ambient_runner.bridges.claude.feedback_tool import ( + FEEDBACK_INPUT_SCHEMA, + FEEDBACK_RATINGS, + FEEDBACK_TOOL_DESCRIPTION, + _log_feedback_to_langfuse, + create_feedback_mcp_tool, +) + + +# ------------------------------------------------------------------ +# Schema validation +# ------------------------------------------------------------------ + + +def test_schema_has_rating_and_comment(): + """Input schema includes both rating and comment fields.""" + props = FEEDBACK_INPUT_SCHEMA["properties"] + assert "rating" in props + assert "comment" in props + + +def test_schema_rating_enum(): + """Rating enum contains positive and negative.""" + enum = FEEDBACK_INPUT_SCHEMA["properties"]["rating"]["enum"] + assert "positive" in enum + assert "negative" in enum + assert enum == FEEDBACK_RATINGS + + +def test_schema_required_fields(): + """Both rating and comment are required.""" + assert "rating" in FEEDBACK_INPUT_SCHEMA["required"] + assert "comment" in FEEDBACK_INPUT_SCHEMA["required"] + + +# ------------------------------------------------------------------ +# Tool creation +# ------------------------------------------------------------------ + + +def test_tool_creation(): + """Tool is created with correct name via decorator.""" + mock_decorator = MagicMock() + mock_decorator.return_value = lambda fn: fn + + tool = create_feedback_mcp_tool( + obs=MagicMock(), + session_id="session-1", + sdk_tool_decorator=mock_decorator, + ) + + assert tool is not None + mock_decorator.assert_called_once() + call_args = mock_decorator.call_args[0] + assert call_args[0] == "submit_feedback" + + +def test_tool_description_content(): + """Tool description explains when to call submit_feedback.""" + mock_decorator = MagicMock() + mock_decorator.return_value = lambda fn: fn + + create_feedback_mcp_tool( + obs=MagicMock(), + session_id="session-1", + sdk_tool_decorator=mock_decorator, + ) + + description = mock_decorator.call_args[0][1] + assert ( + "submit_feedback" not in description or FEEDBACK_TOOL_DESCRIPTION in description + ) + assert "positive" in description or "rating" in description + + +def test_tool_schema_passed_to_decorator(): + """Full input schema is passed as third arg to decorator.""" + mock_decorator = MagicMock() + mock_decorator.return_value = lambda fn: fn + + create_feedback_mcp_tool( + obs=MagicMock(), + session_id="session-1", + sdk_tool_decorator=mock_decorator, + ) + + schema = mock_decorator.call_args[0][2] + assert schema["type"] == "object" + assert "rating" in schema["properties"] + assert "comment" in schema["properties"] + + +# ------------------------------------------------------------------ +# Langfuse logging +# ------------------------------------------------------------------ + + +def test_positive_rating_maps_to_true(): + """Positive rating logs value=True to Langfuse.""" + mock_obs = MagicMock() + mock_obs.langfuse_client = MagicMock() + mock_obs.get_current_trace_id.return_value = "trace-abc" + + with patch.dict( + os.environ, + { + "AGENTIC_SESSION_NAME": "session-1", + "AGENTIC_SESSION_NAMESPACE": "my-project", + }, + ): + success, error = _log_feedback_to_langfuse( + rating="positive", + comment="Great job!", + obs=mock_obs, + session_id="session-1", + ) + + assert success is True + assert error is None + + call_kwargs = mock_obs.langfuse_client.create_score.call_args[1] + assert call_kwargs["value"] is True + assert call_kwargs["name"] == "session-feedback" + assert call_kwargs["data_type"] == "BOOLEAN" + assert call_kwargs["trace_id"] == "trace-abc" + mock_obs.langfuse_client.flush.assert_called_once() + + +def test_negative_rating_maps_to_false(): + """Negative rating logs value=False to Langfuse.""" + mock_obs = MagicMock() + mock_obs.langfuse_client = MagicMock() + mock_obs.get_current_trace_id.return_value = "trace-xyz" + + with patch.dict(os.environ, {}, clear=True): + success, error = _log_feedback_to_langfuse( + rating="negative", + comment="Needs improvement.", + obs=mock_obs, + session_id="session-1", + ) + + assert success is True + call_kwargs = mock_obs.langfuse_client.create_score.call_args[1] + assert call_kwargs["value"] is False + + +def test_logging_metadata_includes_session_context(): + """Metadata captures session_id, session_name, project, and rating.""" + mock_obs = MagicMock() + mock_obs.langfuse_client = MagicMock() + mock_obs.get_current_trace_id.return_value = None + mock_obs.last_trace_id = None + + with patch.dict( + os.environ, + { + "AGENTIC_SESSION_NAME": "my-session", + "AGENTIC_SESSION_NAMESPACE": "my-project", + }, + ): + _log_feedback_to_langfuse( + rating="positive", + comment="Excellent output.", + obs=mock_obs, + session_id="session-42", + ) + + call_kwargs = mock_obs.langfuse_client.create_score.call_args[1] + metadata = call_kwargs["metadata"] + assert metadata["rating"] == "positive" + assert metadata["session_id"] == "session-42" + assert metadata["session_name"] == "my-session" + assert metadata["project"] == "my-project" + + +def test_logging_without_trace_id(): + """Score is created without trace_id when none is available.""" + mock_obs = MagicMock() + mock_obs.langfuse_client = MagicMock() + mock_obs.get_current_trace_id.return_value = None + mock_obs.last_trace_id = None + + with patch.dict(os.environ, {}, clear=True): + success, error = _log_feedback_to_langfuse( + rating="positive", + comment="All good.", + obs=mock_obs, + session_id="session-1", + ) + + assert success is True + call_kwargs = mock_obs.langfuse_client.create_score.call_args[1] + assert "trace_id" not in call_kwargs + + +def test_logging_uses_last_trace_id_fallback(): + """Falls back to obs.last_trace_id when get_current_trace_id returns None.""" + mock_obs = MagicMock() + mock_obs.langfuse_client = MagicMock() + mock_obs.get_current_trace_id.return_value = None + mock_obs.last_trace_id = "last-trace-999" + + with patch.dict(os.environ, {}, clear=True): + _log_feedback_to_langfuse( + rating="negative", + comment="Not what I expected.", + obs=mock_obs, + session_id="session-1", + ) + + call_kwargs = mock_obs.langfuse_client.create_score.call_args[1] + assert call_kwargs["trace_id"] == "last-trace-999" + + +def test_logging_without_langfuse_enabled(): + """Falls back to stdout logging when Langfuse not enabled.""" + mock_obs = MagicMock() + mock_obs.langfuse_client = None + + with patch.dict(os.environ, {"LANGFUSE_ENABLED": "false"}, clear=True): + success, error = _log_feedback_to_langfuse( + rating="positive", + comment="Great!", + obs=mock_obs, + session_id="session-1", + ) + + assert success is True + assert error is None + + +def test_logging_without_credentials(): + """Falls back to stdout logging when Langfuse enabled but credentials missing.""" + mock_obs = MagicMock() + mock_obs.langfuse_client = None + + with patch.dict(os.environ, {"LANGFUSE_ENABLED": "true"}, clear=True): + with patch.dict("sys.modules", {"langfuse": MagicMock()}): + success, error = _log_feedback_to_langfuse( + rating="positive", + comment="Nice work.", + obs=mock_obs, + session_id="session-1", + ) + + assert success is True + assert error is None + + +def test_logging_with_no_obs(): + """Falls back to stdout logging when obs is None and Langfuse not enabled.""" + with patch.dict(os.environ, {}, clear=True): + success, error = _log_feedback_to_langfuse( + rating="positive", + comment="Great.", + obs=None, + session_id="session-1", + ) + + assert success is True + assert error is None + + +def test_comment_truncation(): + """Comment is truncated to 500 chars.""" + mock_obs = MagicMock() + mock_obs.langfuse_client = MagicMock() + mock_obs.get_current_trace_id.return_value = None + mock_obs.last_trace_id = None + + long_comment = "x" * 1000 + + with patch.dict(os.environ, {}, clear=True): + _log_feedback_to_langfuse( + rating="positive", + comment=long_comment, + obs=mock_obs, + session_id="session-1", + ) + + call_kwargs = mock_obs.langfuse_client.create_score.call_args[1] + assert len(call_kwargs["comment"]) == 500 + + +# ------------------------------------------------------------------ +# Runner +# ------------------------------------------------------------------ + + +if __name__ == "__main__": + print("Testing global feedback MCP tool...") + print("=" * 60) + + tests = [ + ("Schema: rating and comment fields", test_schema_has_rating_and_comment), + ("Schema: rating enum values", test_schema_rating_enum), + ("Schema: required fields", test_schema_required_fields), + ("Tool: creation", test_tool_creation), + ("Tool: description content", test_tool_description_content), + ("Tool: schema passed to decorator", test_tool_schema_passed_to_decorator), + ("Logging: positive maps to True", test_positive_rating_maps_to_true), + ("Logging: negative maps to False", test_negative_rating_maps_to_false), + ( + "Logging: metadata has session context", + test_logging_metadata_includes_session_context, + ), + ("Logging: no trace_id", test_logging_without_trace_id), + ("Logging: last_trace_id fallback", test_logging_uses_last_trace_id_fallback), + ("Logging: fallback when not enabled", test_logging_without_langfuse_enabled), + ("Logging: fallback when no credentials", test_logging_without_credentials), + ("Logging: fallback when no obs", test_logging_with_no_obs), + ("Logging: comment truncation", test_comment_truncation), + ] + + passed = 0 + failed = 0 + + for test_name, test_func in tests: + try: + test_func() + print(f" PASS {test_name}") + passed += 1 + except AssertionError as e: + print(f" FAIL {test_name}: {e}") + failed += 1 + except Exception as e: + print(f" FAIL {test_name}: Unexpected error: {e}") + failed += 1 + + print("=" * 60) + print(f"Results: {passed} passed, {failed} failed") + + if failed > 0: + sys.exit(1) diff --git a/components/runners/commands/feedback.md b/components/runners/commands/feedback.md new file mode 100644 index 000000000..74eb5ff6c --- /dev/null +++ b/components/runners/commands/feedback.md @@ -0,0 +1,18 @@ +--- +displayName: Feedback +description: Submit feedback about this session +icon: message-square +order: 100 +--- + +The user wants to submit feedback about this session. Follow these steps: + +1. Ask the user two questions: + - **Rating**: Is their experience positive or negative? + - **Comment**: A brief description of their feedback (what went well, what could improve, etc.) + +2. Once you have both, call the `submit_feedback` tool (via the feedback MCP server) with: + - `rating`: "positive" or "negative" + - `comment`: the user's feedback in their own words + +3. Confirm to the user that their feedback has been recorded and thank them.