Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion components/runners/claude-code-runner/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ RUN dnf install -y 'dnf-command(config-manager)' && \
dnf install -y git jq && \
dnf clean all


# Install Node.js
# Use UBI AppStream to avoid conflicts with preinstalled nodejs-full-i18n
RUN dnf module reset -y nodejs && \
Expand Down Expand Up @@ -60,6 +60,9 @@ RUN pip install --no-cache-dir uv pre-commit
# Create working directory
WORKDIR /app

# Copy shared runner commands (slash commands available in every session)
COPY commands/ /app/commands/

# Copy claude-runner package (no separate runner-shell needed)
COPY claude-code-runner /app/claude-runner

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,11 @@ def get_error_context(self) -> str:
async def get_mcp_status(self) -> dict:
"""Get MCP server status via an ephemeral SDK client."""
if not self._context:
return {"servers": [], "totalCount": 0, "message": "Context not initialized"}
return {
"servers": [],
"totalCount": 0,
"message": "Context not initialized",
}

try:
from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
Expand Down Expand Up @@ -217,8 +221,7 @@ async def get_mcp_status(self) -> dict:
{
"name": t.get("name", ""),
"annotations": {
k: v
for k, v in (t.get("annotations") or {}).items()
k: v for k, v in (t.get("annotations") or {}).items()
},
}
for t in raw_tools
Expand All @@ -227,9 +230,7 @@ async def get_mcp_status(self) -> dict:
servers_list.append(
{
"name": srv.get("name", ""),
"displayName": server_info.get(
"name", srv.get("name", "")
),
"displayName": server_info.get("name", srv.get("name", "")),
"status": srv.get("status", "unknown"),
"version": server_info.get("version", ""),
"tools": tools,
Expand Down Expand Up @@ -278,8 +279,7 @@ async def _ensure_ready(self) -> None:
await self._setup_platform()
self._ready = True
logger.info(
f"Platform ready — model: {self._configured_model}, "
f"cwd: {self._cwd_path}"
f"Platform ready — model: {self._configured_model}, cwd: {self._cwd_path}"
)

async def _setup_platform(self) -> None:
Expand All @@ -291,7 +291,10 @@ async def _setup_platform(self) -> None:
# Claude-specific auth
from ambient_runner.bridges.claude.auth import setup_sdk_authentication
from ambient_runner.platform.auth import populate_runtime_credentials
from ambient_runner.platform.workspace import resolve_workspace_paths, validate_prerequisites
from ambient_runner.platform.workspace import (
resolve_workspace_paths,
validate_prerequisites,
)

await validate_prerequisites(self._context)
_api_key, _use_vertex, configured_model = await setup_sdk_authentication(
Expand All @@ -302,6 +305,11 @@ async def _setup_platform(self) -> None:
# Workspace paths
cwd_path, add_dirs = resolve_workspace_paths(self._context)

# Inject platform slash commands (before Claude Code launches)
from ambient_runner.platform.commands import inject_platform_commands

inject_platform_commands()

# Observability (before MCP so rubric tool can access it)
await self._setup_observability(configured_model)

Expand All @@ -319,9 +327,7 @@ async def _setup_platform(self) -> None:
# System prompt
from ambient_runner.bridges.claude.prompts import build_sdk_system_prompt

system_prompt = build_sdk_system_prompt(
self._context.workspace_path, cwd_path
)
system_prompt = build_sdk_system_prompt(self._context.workspace_path, cwd_path)

# Store results
self._configured_model = configured_model
Expand Down Expand Up @@ -349,9 +355,7 @@ async def _setup_observability(self, configured_model: str) -> None:
)
await obs.initialize(
prompt="(pending)",
namespace=self._context.get_env(
"AGENTIC_SESSION_NAMESPACE", "unknown"
),
namespace=self._context.get_env("AGENTIC_SESSION_NAMESPACE", "unknown"),
model=configured_model,
)
self._obs = obs
Expand Down Expand Up @@ -401,4 +405,3 @@ def _stderr_handler(line: str) -> None:
adapter._stderr_lines = self._stderr_lines # type: ignore[attr-defined]
self._adapter = adapter
logger.info("Adapter built (persistent, will be reused across runs)")

Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
"""
Global /feedback SDK tool for capturing user satisfaction during sessions.

When a user expresses satisfaction, dissatisfaction, or provides qualitative
feedback about the session or agent output, this tool records it. When
Langfuse is configured, feedback is logged as a scored event; otherwise it
falls back to stdout (pod logs) so feedback is never lost.

Available in every session regardless of workflow configuration.
"""

import logging
import os
from typing import Any

logger = logging.getLogger(__name__)


# ------------------------------------------------------------------
# Constants
# ------------------------------------------------------------------

FEEDBACK_RATINGS = ["positive", "negative"]

FEEDBACK_TOOL_DESCRIPTION = (
"Submit user feedback about the session or agent output. Call this when "
"the user explicitly rates the session, expresses satisfaction or "
"dissatisfaction, or provides qualitative feedback about quality.\n\n"
"## When to call\n\n"
"- User says the output is good, great, perfect, or similar praise\n"
"- User says the output is bad, wrong, unhelpful, or similar criticism\n"
"- User explicitly asks to submit feedback or rate the session\n"
"- User gives a thumbs up / thumbs down\n\n"
"## Fields\n\n"
"- `rating`: 'positive' for praise/satisfaction, 'negative' for "
"criticism/dissatisfaction\n"
"- `comment`: the user's exact words or a brief summary of their feedback\n"
)

FEEDBACK_INPUT_SCHEMA: dict = {
"type": "object",
"properties": {
"rating": {
"type": "string",
"enum": FEEDBACK_RATINGS,
"description": (
"User sentiment: 'positive' for satisfaction/praise, "
"'negative' for dissatisfaction/criticism."
),
},
"comment": {
"type": "string",
"description": (
"The user's feedback comment. Capture their exact words "
"or a concise summary of what they said."
),
},
},
"required": ["rating", "comment"],
}


# ------------------------------------------------------------------
# Tool factory
# ------------------------------------------------------------------


def create_feedback_mcp_tool(
obs: Any,
session_id: str,
sdk_tool_decorator,
):
"""Create the submit_feedback MCP tool.

Args:
obs: ObservabilityManager instance for trace ID and Langfuse client.
session_id: Current session ID.
sdk_tool_decorator: The ``tool`` decorator from ``claude_agent_sdk``.

Returns:
Decorated async tool function.
"""
_obs = obs
_session_id = session_id

@sdk_tool_decorator(
"submit_feedback",
FEEDBACK_TOOL_DESCRIPTION,
FEEDBACK_INPUT_SCHEMA,
)
async def submit_feedback_tool(args: dict) -> dict:
"""Log user feedback to Langfuse."""
rating = args.get("rating", "")
comment = args.get("comment", "")

success, error = _log_feedback_to_langfuse(
rating=rating,
comment=comment,
obs=_obs,
session_id=_session_id,
)

if success:
return {
"content": [
{
"type": "text",
"text": (
f"Feedback recorded (rating={rating}). "
"Thank you for helping improve the platform."
),
}
]
}
else:
return {
"content": [
{
"type": "text",
"text": f"Feedback noted but could not be recorded: {error}",
}
],
"isError": True,
}

return submit_feedback_tool


# ------------------------------------------------------------------
# Langfuse logging (with stdout fallback)
# ------------------------------------------------------------------


def _log_feedback_fallback(
reason: str, rating: str, comment: str, session_id: str
) -> tuple[bool, None]:
"""Log feedback to stdout when Langfuse is unavailable."""
logger.info(
f"Feedback ({reason}): rating={rating}, "
f"comment={comment[:500] if comment else ''}, "
f"session_id={session_id}"
)
return True, None


def _log_feedback_to_langfuse(
rating: str,
comment: str,
obs: Any,
session_id: str,
) -> tuple[bool, str | None]:
"""Log a user feedback score to Langfuse."""
try:
langfuse_client = getattr(obs, "langfuse_client", None) if obs else None
using_obs_client = langfuse_client is not None

if not langfuse_client:
langfuse_enabled = os.getenv("LANGFUSE_ENABLED", "").strip().lower() in (
"1",
"true",
"yes",
)
if not langfuse_enabled:
return _log_feedback_fallback(
"no Langfuse", rating, comment, session_id
)

from langfuse import Langfuse

public_key = os.getenv("LANGFUSE_PUBLIC_KEY", "").strip()
secret_key = os.getenv("LANGFUSE_SECRET_KEY", "").strip()
host = os.getenv("LANGFUSE_HOST", "").strip()

if not (public_key and secret_key and host):
return _log_feedback_fallback(
"Langfuse creds missing", rating, comment, session_id
)

langfuse_client = Langfuse(
public_key=public_key,
secret_key=secret_key,
host=host,
)

# Prefer obs-owned trace ID; fall back to last_trace_id across turns.
if using_obs_client:
try:
trace_id = obs.get_current_trace_id() if obs else None
if trace_id is None:
trace_id = getattr(obs, "last_trace_id", None)
except Exception:
trace_id = getattr(obs, "last_trace_id", None)
else:
trace_id = None

value = rating == "positive"

session_name = os.getenv("AGENTIC_SESSION_NAME", "").strip()
project = os.getenv("AGENTIC_SESSION_NAMESPACE", "").strip()

metadata: dict = {
"rating": rating,
"session_id": session_id,
"session_name": session_name,
"project": project,
}

kwargs: dict = {
"name": "session-feedback",
"value": value,
"data_type": "BOOLEAN",
"comment": comment[:500] if comment else None,
"metadata": metadata,
}
if trace_id:
kwargs["trace_id"] = trace_id

langfuse_client.create_score(**kwargs)
langfuse_client.flush()

logger.info(
f"Feedback logged to Langfuse: rating={rating}, trace_id={trace_id}"
)
return True, None

except ImportError:
return _log_feedback_fallback(
"langfuse not installed", rating, comment, session_id
)
except Exception as e:
msg = str(e)
logger.error(f"Failed to log feedback to Langfuse: {msg}")
return False, msg
Loading
Loading