diff --git a/a2a/exgentic_agent/.dockerignore b/a2a/exgentic_agent/.dockerignore new file mode 100644 index 00000000..17aa69e0 --- /dev/null +++ b/a2a/exgentic_agent/.dockerignore @@ -0,0 +1,38 @@ +# Git files +.git +.gitignore +.gitattributes + +# Documentation +*.md +!README.md + +# IDE and editor files +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# Python cache +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python + +# Virtual environments +venv/ +env/ +ENV/ + +# OS files +.DS_Store +Thumbs.db + +# Logs +*.log + +# Temporary files +*.tmp +*.temp diff --git a/a2a/exgentic_agent/.env.advanced b/a2a/exgentic_agent/.env.advanced new file mode 100644 index 00000000..3f1ea0de --- /dev/null +++ b/a2a/exgentic_agent/.env.advanced @@ -0,0 +1,4 @@ +OPENAI_API_KEY='{"valueFrom": {"secretKeyRef": {"name": "openai-secret", "key": "apikey"}}}' +OPENAI_API_BASE='https://api.openai.com/v1' +AGENT_NAME=advanced_agent +EXGENTIC_SET_AGENT_MODEL=openai/gpt-4o diff --git a/a2a/exgentic_agent/.env.example b/a2a/exgentic_agent/.env.example new file mode 100644 index 00000000..977c2a1d --- /dev/null +++ b/a2a/exgentic_agent/.env.example @@ -0,0 +1,3 @@ +OPENAI_API_KEY='{"valueFrom": {"secretKeyRef": {"name": "openai-secret", "key": "apikey"}}}' +OPENAI_API_BASE='https://api.openai.com/v1' +EXGENTIC_SET_AGENT_MODEL=openai/gpt-4o diff --git a/a2a/exgentic_agent/Dockerfile b/a2a/exgentic_agent/Dockerfile new file mode 100644 index 00000000..bb15b58e --- /dev/null +++ b/a2a/exgentic_agent/Dockerfile @@ -0,0 +1,72 @@ +FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim + +# Build arguments +ARG AGENT_NAME +ARG RELEASE_VERSION="main" + +# Validate AGENT_NAME is provided +RUN if [ -z "$AGENT_NAME" ]; then \ + echo "ERROR: AGENT_NAME build argument is required"; \ + echo "Usage: docker build --build-arg AGENT_NAME= -t exgentic-a2a- ."; \ + exit 1; \ + fi + +# Install system dependencies (git and git-lfs are needed for cloning) +RUN apt-get update && apt-get install -y \ + git \ + git-lfs \ + && rm -rf /var/lib/apt/lists/* \ + && git lfs install + +# Configure git to handle large repositories and avoid HTTP/2 issues +RUN git config --global http.postBuffer 524288000 && \ + git config --global http.version HTTP/1.1 && \ + git config --global core.compression 0 + +# Set working directory +WORKDIR /app + +# Clone the Exgentic repository +RUN git clone https://github.com/Exgentic/exgentic.git /app/exgentic + +# Checkout the feature/mcp-command branch +WORKDIR /app/exgentic +RUN git checkout feature/mcp-command + +# Set UV_SYSTEM_PYTHON before any uv operations to ensure uv uses system Python +ENV UV_SYSTEM_PYTHON=1 + +# Install Exgentic and its dependencies using uv +RUN uv pip install --system --no-cache -e . + +# Setup the agent - UV_SYSTEM_PYTHON is already set above +RUN exgentic install --agent ${AGENT_NAME} + +# Copy entrypoint script +WORKDIR /app +COPY entrypoint.sh /app/entrypoint.sh +RUN chmod +x /app/entrypoint.sh + +# Set environment variables +ENV AGENT_NAME=${AGENT_NAME} \ + HOST=0.0.0.0 \ + PORT=8000 \ + LOG_LEVEL=INFO \ + PRODUCTION_MODE=True \ + RELEASE_VERSION=${RELEASE_VERSION} + +# Create non-root user and set permissions +RUN useradd -m -u 1001 exgentic && \ + chown -R 1001:0 /app && \ + chmod -R g+rwX /app + +USER 1001 + +# Set HOME environment variable to /app +ENV HOME=/app + + +# Expose the default port +EXPOSE 8000 +# Set the entrypoint +ENTRYPOINT ["/app/entrypoint.sh"] \ No newline at end of file diff --git a/a2a/exgentic_agent/README.md b/a2a/exgentic_agent/README.md new file mode 100644 index 00000000..cb40b309 --- /dev/null +++ b/a2a/exgentic_agent/README.md @@ -0,0 +1,275 @@ +# Exgentic A2A Agent Wrapper + +A Docker-based wrapper that runs [Exgentic](https://github.com/Exgentic/exgentic) agents using the A2A (Agent-to-Agent) protocol. This wrapper clones the Exgentic repository, installs a specific agent at build time, and exposes it via the A2A interface. These agents are used for evaluation against the [Exgentic benchmarks mcp servers](mcp/README.md). The test harness used to evaluate the agents against the benchmarks is found in the [workload-harness repo](https://github.com/kagenti/workload-harness/blob/main/exgentic_a2a_runner/README.md). + +## Overview + +This wrapper provides access to Exgentic agents through the A2A protocol. Each Docker image is built with a specific agent pre-installed, making it easy to deploy and run different agents in isolated environments. + +## Features + +- **Agent-Specific Images**: Each Docker image contains a single agent +- **Build-Time Setup**: Agents are installed during image build for faster startup +- **Flexible Configuration**: HOST and PORT configurable at runtime via environment variables +- **Consistent Interface**: Follows A2A protocol standards +- **Security**: Runs as non-root user (UID 1001) +- **Production Ready**: Includes proper error handling and logging +- **Build Script**: Convenient build.sh script with docker/podman auto-detection + +## Prerequisites + +- Docker or Podman installed on your system +- Internet connection (for cloning repository and downloading agent data) +- Sufficient disk space (agent data can be large) + +## Quick Start + +### Using the Build Script (Recommended) + +The easiest way to build an agent image: + +```bash +cd a2a/exgentic_agent + +# Build an agent image +./build.sh tool_calling +``` + +The script will: +- Auto-detect docker or podman +- Build the image with proper tagging +- Provide colored output and progress information + + +### Run the Agent + +```bash +docker run -p 8000:8000 \ + -e MCP_URL=http://host.containers.internal:8000/mcp \ + -e EXGENTIC_SET_AGENT_MODEL='openai/gpt-4o' \ + -e OPENAI_API_KEY \ + -e OPENAI_API_BASE \ + exgentic-a2a-tool_calling:latest + +``` + +The agent will start on `http://0.0.0.0:8000` + +### Test the Agent + +```bash +# Check if agent is running +curl http://localhost:8000/health + +# List available capabilities +curl http://localhost:8000/capabilities +``` + +## Build Script Usage + +The `build.sh` script provides a convenient way to build agent images: + +```bash +# Basic usage +./build.sh AGENT_NAME [--tag TAG] [--use-cache] + +# Examples +./build.sh tool_calling # Build without cache (default) +./build.sh tool_calling --tag v1.0.0 # Build v1.0.0 without cache +./build.sh tool_calling --tag dev # Build with 'dev' tag +./build.sh tool_calling --use-cache # Build with cache enabled +./build.sh tool_calling --tag v1.0.0 --use-cache # Build v1.0.0 with cache + +# Get help +./build.sh --help +``` + +**Features:** +- Automatically detects docker or podman +- Colored output for better readability +- Build summary with success/failure counts +- Builds without cache by default for consistency +- Optional cache usage with `--use-cache` flag + +## Configuration + +### Build Arguments + +| Argument | Required | Default | Description | +|----------|----------|---------|-------------| +| `AGENT_NAME` | Yes | - | The agent to install (e.g., tool_calling, agent1, agent2) | +| `RELEASE_VERSION` | No | main | Version tag for tracking | + +### Runtime Environment Variables + +| Variable | Required | Default | Description | +|----------|----------|---------|-------------| +| `HOST` | No | 0.0.0.0 | Server host address | +| `PORT` | No | 8000 | Server port | +| `LOG_LEVEL` | No | INFO | Logging level (DEBUG, INFO, WARNING, ERROR) | +| `AGENT_NAME` | No | (from build) | Agent name (set during build) | +| `EXGENTIC_SET_*` | No | - | Runtime configuration parameters (see below) | + +### Runtime Configuration with --set Parameters + +You can pass runtime configuration parameters to the `exgentic a2a` command using environment variables with the `EXGENTIC_SET_` prefix. These will be converted to `--set` arguments. + +**Format**: `EXGENTIC_SET__=` +- The `` will be separated from `` with a dot +- The rest of the underscores in `` remain as underscores +- Everything is converted to lowercase +- Example: `EXGENTIC_SET_AGENT_MODEL` → `--set agent.model` + +**Common Parameters**: +- `EXGENTIC_SET_AGENT_MODEL` - Set the agent model (e.g., `openai/gpt-4o`) +- `EXGENTIC_SET_AGENT_MAX_STEPS` - Set maximum steps +- `EXGENTIC_SET_AGENT_TEMPERATURE` - Set temperature +- `EXGENTIC_SET_AGENT_TIMEOUT` - Set timeout +- Any other agent-specific configuration parameter + +### Custom Configuration Examples + + +**Set multiple runtime parameters:** +```bash +docker run -p 8000:8000 \ + -e EXGENTIC_SET_AGENT_MODEL='openai/gpt-4o' \ + -e EXGENTIC_SET_AGENT_MAX_STEPS='50' \ + -e EXGENTIC_SET_AGENT_TEMPERATURE='0.7' \ + exgentic-a2a-tool_calling:latest +``` + +### API Credentials + +When using external models, you need to provide API credentials as environment variables: + +| Variable | Required | Description | +|----------|----------|-------------| +| `OPENAI_API_KEY` | Yes (for OpenAI models) | Your OpenAI API key | +| `OPENAI_API_BASE` | No | Custom API base URL (if using a proxy or alternative endpoint) | + +**Example with OpenAI credentials:** +```bash +docker run -p 8000:8000 \ + -e OPENAI_API_KEY='your-api-key-here' \ + -e EXGENTIC_SET_AGENT_MODEL='openai/gpt-4o' \ + exgentic-a2a-tool_calling:latest +``` + +**Example with custom API base:** +```bash +docker run -p 8000:8000 \ + -e OPENAI_API_KEY='your-api-key-here' \ + -e OPENAI_API_BASE='https://custom-endpoint.example.com/v1' \ + -e EXGENTIC_SET_AGENT_MODEL='openai/gpt-4o' \ + exgentic-a2a-tool_calling:latest +``` + +## Build Process + +``` +1. Clone Exgentic repository (HTTPS) + ↓ +2. Checkout feature/mcp-command branch + ↓ +3. Install Exgentic and dependencies + ↓ +4. Run: exgentic install --agent $AGENT_NAME + ↓ +5. Configure entrypoint and permissions + ↓ +6. Create image: exgentic-a2a-{agent}:latest +``` + +## Runtime Process + +``` +1. Container starts with entrypoint.sh + ↓ +2. Read environment variables (HOST, PORT) + ↓ +3. Execute: exgentic a2a --agent $AGENT_NAME --host $HOST --port $PORT + ↓ +4. A2A agent listens on configured HOST:PORT +``` + +## Image Naming Convention + +Images follow the pattern: `exgentic-a2a-{agent}:latest` + +Examples: +- `exgentic-a2a-tool_calling:latest` +- `exgentic-a2a-agent1:latest` +- `exgentic-a2a-agent2:latest` + + +## Repository Information + +- **Source**: https://github.com/Exgentic/exgentic.git +- **Branch**: feature/mcp-command +- **Protocol**: HTTPS (public repository) + +## A2A Protocol + +This wrapper implements the Agent-to-Agent (A2A) protocol, which allows agents to communicate and collaborate programmatically. + +### Key Features + +- **Agent Discovery**: Agents expose their capabilities via A2A +- **Type Safety**: Strong typing for parameters and returns +- **Documentation**: Built-in documentation for each capability +- **Error Handling**: Standardized error responses +- **Transport**: HTTP transport with streamable support + +## Security Considerations + +- Runs as non-root user (UID 1001) +- No SSH keys or secrets in image +- Public repository access only +- Minimal attack surface +- Production-ready configuration + +## Performance Notes + +- **Build Time**: 5-15 minutes depending on agent size +- **Image Size**: Varies by agent (typically 1-5 GB) +- **Startup Time**: Fast (agent already installed) +- **Memory**: Depends on agent requirements + +## Files in This Directory + +- `Dockerfile` - Multi-stage build configuration +- `entrypoint.sh` - Container startup script +- `build.sh` - Convenient build script with auto-detection +- `.dockerignore` - Files to exclude from build context +- `.env.example` - Basic environment variable template +- `.env.advanced` - Advanced configuration example +- `README.md` - This file + +## Contributing + +When adding support for new agents: + +1. Verify the agent exists in the Exgentic repository +2. Test the build process using `./build.sh agent_name` +3. Document any special requirements +4. Update this README with examples + +## License + +See the repository's LICENSE file for details. + +## Support + +For issues related to: +- **This A2A wrapper**: Open an issue in the agent-examples repository +- **Exgentic agents**: Refer to the Exgentic repository +- **A2A protocol**: See the Agent-to-Agent protocol documentation + +## Related Resources + +- [Exgentic Repository](https://github.com/Exgentic/exgentic) +- [Agent-to-Agent Protocol](https://github.com/Exgentic/exgentic) +- [Docker Documentation](https://docs.docker.com/) +- [Podman Documentation](https://podman.io/) +- [Other A2A Agents](../README.md) \ No newline at end of file diff --git a/a2a/exgentic_agent/build.sh b/a2a/exgentic_agent/build.sh new file mode 100755 index 00000000..9aaf8f98 --- /dev/null +++ b/a2a/exgentic_agent/build.sh @@ -0,0 +1,194 @@ +#!/bin/bash +set -e + +# Script to build exgentic a2a agent images +# Automatically detects whether to use docker or podman + +# Color output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# Function to print colored messages +print_info() { + echo -e "${GREEN}[INFO]${NC} $1" +} + +print_warn() { + echo -e "${YELLOW}[WARN]${NC} $1" +} + +print_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +# Detect container runtime (docker or podman) +detect_runtime() { + if command -v docker &> /dev/null; then + echo "docker" + elif command -v podman &> /dev/null; then + echo "podman" + else + print_error "Neither docker nor podman is installed!" + print_error "Please install one of them to continue." + exit 1 + fi +} + +# Build image for a specific agent +build_agent() { + local agent=$1 + local runtime=$2 + local image_name="localhost/exgentic-a2a-${agent}" + local tag=$3 + local use_cache=$4 + + print_info "Building ${image_name}:${tag} using ${runtime}..." + if [ "$use_cache" = "false" ]; then + print_info "Building without cache (default)" + else + print_info "Building with cache enabled" + fi + + # Build the image with the agent name as build arg + local build_cmd="$runtime build" + if [ "$use_cache" = "false" ]; then + build_cmd="$build_cmd --no-cache" + fi + + if $build_cmd \ + --build-arg AGENT_NAME="${agent}" \ + -t "${image_name}:${tag}" \ + -f Dockerfile \ + .; then + print_info "✓ Successfully built ${image_name}:${tag}" + return 0 + else + print_error "✗ Failed to build ${image_name}:${tag}" + return 1 + fi +} + +# Main script +main() { + local script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + cd "$script_dir" + + print_info "Exgentic A2A Agent Image Builder" + print_info "=================================" + + # Detect container runtime + RUNTIME=$(detect_runtime) + print_info "Detected container runtime: ${RUNTIME}" + + # Parse command line arguments + AGENT="" + TAG="latest" + USE_CACHE="false" # Default: do not use cache for consistency + + while [[ $# -gt 0 ]]; do + case $1 in + --tag) + TAG="$2" + shift 2 + ;; + --use-cache) + USE_CACHE="true" + shift + ;; + --help|-h) + cat << EOF +Usage: $0 AGENT_NAME [--tag TAG] [--use-cache] + +Build exgentic a2a agent Docker/Podman image. + +Arguments: + AGENT_NAME Agent name (required, positional) + --tag TAG Image tag (optional, default: latest) + --use-cache Use Docker cache during build (optional, default: no cache for consistency) + +Examples: + $0 my_agent # Build without cache (default) + $0 my_agent --tag v1.0.0 # Build v1.0.0 without cache + $0 my_agent --use-cache # Build with cache enabled + $0 my_agent --tag v1.0.0 --use-cache # Build v1.0.0 with cache + +The script automatically detects whether to use docker or podman. +By default, builds do not use cache to ensure consistency. +EOF + exit 0 + ;; + -*) + print_error "Unknown option: $1" + echo "Use --help for usage information" + exit 1 + ;; + *) + if [ -z "$AGENT" ]; then + AGENT="$1" + shift + else + print_error "Unexpected argument: $1" + echo "Use --help for usage information" + exit 1 + fi + ;; + esac + done + + # Validate agent name is provided + if [ -z "$AGENT" ]; then + print_error "Agent name is required!" + echo "" + echo "Usage: $0 AGENT_NAME [--tag TAG] [--no-cache]" + echo "" + echo "Example: $0 my_agent --tag v1.0.0" + echo "Use --help for more information" + exit 1 + fi + + AGENTS=("$AGENT") + + print_info "Building agent: ${AGENT}" + print_info "Image tag: ${TAG}" + if [ "$USE_CACHE" = "true" ]; then + print_info "Cache: enabled" + else + print_info "Cache: disabled (default)" + fi + echo "" + + # Build each agent + SUCCESS_COUNT=0 + FAIL_COUNT=0 + + for agent in "${AGENTS[@]}"; do + if build_agent "$agent" "$RUNTIME" "$TAG" "$USE_CACHE"; then + ((SUCCESS_COUNT++)) + else + ((FAIL_COUNT++)) + fi + echo "" + done + + # Summary + print_info "Build Summary" + print_info "=============" + print_info "Successful: ${SUCCESS_COUNT}" + if [ $FAIL_COUNT -gt 0 ]; then + print_error "Failed: ${FAIL_COUNT}" + exit 1 + else + print_info "All builds completed successfully!" + echo "" + print_info "Built images:" + for agent in "${AGENTS[@]}"; do + echo " - localhost/exgentic-a2a-${agent}:${TAG}" + done + fi +} + +main "$@" + +# Made with Bob \ No newline at end of file diff --git a/a2a/exgentic_agent/entrypoint.sh b/a2a/exgentic_agent/entrypoint.sh new file mode 100644 index 00000000..cd949ccd --- /dev/null +++ b/a2a/exgentic_agent/entrypoint.sh @@ -0,0 +1,52 @@ +#!/bin/bash +set -e + +# Read environment variables with defaults +HOST=${HOST:-0.0.0.0} +PORT=${PORT:-8000} +LOG_LEVEL=${LOG_LEVEL:-INFO} + +# Validate required environment variables +if [ -z "$AGENT_NAME" ]; then + echo "ERROR: AGENT_NAME environment variable is not set" + exit 1 +fi + +if [ -z "$MCP_URL" ]; then + echo "ERROR: MCP_URL environment variable is not set" + exit 1 +fi + +# Validate MCP_URL format +if [[ ! "$MCP_URL" =~ ^https?:// ]]; then + echo "ERROR: MCP_URL must start with http:// or https://" + exit 1 +fi + +echo "Starting Exgentic A2A Agent" +echo "Agent: $AGENT_NAME" +echo "Host: $HOST" +echo "Port: $PORT" +echo "Log Level: $LOG_LEVEL" +echo "MCP URL: $MCP_URL" + +# Build --set arguments from EXGENTIC_SET_* environment variables +SET_ARGS=() +for var in $(env | grep '^EXGENTIC_SET_' | cut -d= -f1); do + # Extract the key by removing EXGENTIC_SET_ prefix and converting to lowercase + # Replace first underscore with dot, keep rest as underscores + key=$(echo "$var" | sed 's/^EXGENTIC_SET_//' | tr '[:upper:]' '[:lower:]' | sed 's/_/./' ) + value="${!var}" + SET_ARGS+=("--set" "$key=$value") + echo "Setting: $key=$value" +done + +# Change to the exgentic directory +cd /app/exgentic + +# Run the exgentic a2a command with --mcp and --set arguments +# --disable-dns-rebinding-protection is added to allow kubernetes to access the service +echo "Command: exgentic a2a --agent $AGENT_NAME --host $HOST --port $PORT --mcp $MCP_URL ${SET_ARGS[*]}" +exec exgentic a2a --agent "$AGENT_NAME" --host "$HOST" --port "$PORT" --mcp "$MCP_URL" "${SET_ARGS[@]}" + +# Made with Bob diff --git a/mcp/exgentic_benchmarks/.dockerignore b/mcp/exgentic_benchmarks/.dockerignore new file mode 100644 index 00000000..412dc9c7 --- /dev/null +++ b/mcp/exgentic_benchmarks/.dockerignore @@ -0,0 +1,38 @@ +# Git files +.git +.gitignore +.gitattributes + +# Documentation +*.md +!README.md + +# IDE and editor files +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# Python cache +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python + +# Virtual environments +venv/ +env/ +ENV/ + +# OS files +.DS_Store +Thumbs.db + +# Logs +*.log + +# Temporary files +*.tmp +*.temp \ No newline at end of file diff --git a/mcp/exgentic_benchmarks/.env.gsm8k b/mcp/exgentic_benchmarks/.env.gsm8k new file mode 100644 index 00000000..ac20ad33 --- /dev/null +++ b/mcp/exgentic_benchmarks/.env.gsm8k @@ -0,0 +1,2 @@ +BENCHMARK_NAME=gsm8k +HF_TOKEN='{"valueFrom": {"secretKeyRef": {"name": "hf-secret", "key": "hf-token"}}}' diff --git a/mcp/exgentic_benchmarks/.env.tau2 b/mcp/exgentic_benchmarks/.env.tau2 new file mode 100644 index 00000000..5c46d5c3 --- /dev/null +++ b/mcp/exgentic_benchmarks/.env.tau2 @@ -0,0 +1,4 @@ +OPENAI_API_KEY='{"valueFrom": {"secretKeyRef": {"name": "openai-secret", "key": "apikey"}}}' +OPENAI_API_BASE='https://api.openai.com/v1' +BENCHMARK_NAME=tau2 +EXGENTIC_SET_BENCHMARK_USER_SIMULATOR_MODEL=openai/gpt-4o \ No newline at end of file diff --git a/mcp/exgentic_benchmarks/Dockerfile b/mcp/exgentic_benchmarks/Dockerfile new file mode 100644 index 00000000..757db319 --- /dev/null +++ b/mcp/exgentic_benchmarks/Dockerfile @@ -0,0 +1,77 @@ +FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim + +# Build arguments +ARG BENCHMARK_NAME +ARG RELEASE_VERSION="main" + +# Validate BENCHMARK_NAME is provided +RUN if [ -z "$BENCHMARK_NAME" ]; then \ + echo "ERROR: BENCHMARK_NAME build argument is required"; \ + echo "Usage: docker build --build-arg BENCHMARK_NAME= -t exgentic-mcp- ."; \ + exit 1; \ + fi + +# Install system dependencies (git and git-lfs are needed for cloning) +RUN apt-get update && apt-get install -y \ + git \ + git-lfs \ + && rm -rf /var/lib/apt/lists/* \ + && git lfs install + +# Configure git to handle large repositories and avoid HTTP/2 issues +RUN git config --global http.postBuffer 524288000 && \ + git config --global http.version HTTP/1.1 && \ + git config --global core.compression 0 + +# Set working directory +WORKDIR /app + +# Clone the Exgentic repository +RUN git clone https://github.com/Exgentic/exgentic.git /app/exgentic + +# Checkout the feature/mcp-command branch +WORKDIR /app/exgentic +RUN git checkout feature/mcp-command + +# Set UV_SYSTEM_PYTHON before any uv operations to ensure uv uses system Python +ENV UV_SYSTEM_PYTHON=1 + +# Install Exgentic and its dependencies using uv +RUN uv pip install --system --no-cache -e . +RUN uv pip install --system --no-cache datasets # to allow direct (and not venv) access to GSK8k and another simple benchmarks + +# Set HOME to /app before installing benchmark so data goes to the right location +ENV HOME=/app + +# Setup the benchmark - this will install to /app/.exgentic/ +RUN exgentic install --benchmark ${BENCHMARK_NAME} + +# Copy entrypoint script +WORKDIR /app +COPY entrypoint.sh /app/entrypoint.sh +RUN chmod +x /app/entrypoint.sh + +# Set environment variables +ENV BENCHMARK_NAME=${BENCHMARK_NAME} \ + HOST=0.0.0.0 \ + PORT=8000 \ + LOG_LEVEL=INFO \ + PRODUCTION_MODE=True \ + RELEASE_VERSION=${RELEASE_VERSION} + +# Create non-root user with /app as home directory and set permissions +# Using UID 1000 to match Kubernetes securityContext runAsUser +RUN useradd -u 1000 -d /app exgentic && \ + chown -R 1000:0 /app && \ + chmod -R g+rwX /app + +USER 1000 + +# Set HOME after switching user to ensure it's not overridden +ENV HOME=/app + +# Expose the default port +EXPOSE 8000 + +# Set the entrypoint +ENTRYPOINT ["/app/entrypoint.sh"] diff --git a/mcp/exgentic_benchmarks/README.md b/mcp/exgentic_benchmarks/README.md new file mode 100644 index 00000000..89d04563 --- /dev/null +++ b/mcp/exgentic_benchmarks/README.md @@ -0,0 +1,441 @@ +# Exgentic Benchmarks MCP Server + +A Docker-based MCP (Model Context Protocol) server that runs the [Exgentic](https://github.com/Exgentic/exgentic) benchmark system. This server clones the Exgentic repository, installs a specific benchmark at build time, and exposes it via the MCP protocol. + +The test harness used to evaluate the benchmark against agents is found in the [workload-harness repo](https://github.com/kagenti/workload-harness/blob/main/exgentic_a2a_runner/README.md). + +## Overview + +This MCP server provides access to Exgentic benchmarks through a standardized interface. Each Docker image is built with a specific benchmark pre-installed, making it easy to deploy and run different benchmarks in isolated environments. + +## Features + +- **Benchmark-Specific Images**: Each Docker image contains a single benchmark +- **Build-Time Setup**: Benchmarks are installed during image build for faster startup +- **Flexible Configuration**: HOST and PORT configurable at runtime via environment variables +- **Consistent Interface**: Follows MCP protocol standards +- **Security**: Runs as non-root user (UID 1001) +- **Production Ready**: Includes proper error handling and logging +- **Build Script**: Convenient build.sh script with docker/podman auto-detection + +## Prerequisites + +- Docker installed on your system +- Internet connection (for cloning repository and downloading benchmark data) +- Sufficient disk space (benchmark data can be large) + +## Quick Start + +### Build the Docker Image + +Build an image with the `tau2` benchmark: + +```bash +cd mcp/exgentic_benchmarks + +# Using the build script (recommended) +./build.sh tau2 + +# Or using docker directly +docker build \ + --build-arg BENCHMARK_NAME=tau2 \ + -t exgentic-mcp-tau2:latest \ + . +``` + +### Run the Server + +```bash +docker run -p 8000:8000 exgentic-mcp-tau2:latest +``` + +The server will start on `http://0.0.0.0:8000` + +### Test the Server + +```bash +# Check if server is running +curl http://localhost:8000/health + +# List available tools +curl http://localhost:8000/tools +``` + +## Using the Build Script + +The `build.sh` script provides a convenient way to build benchmark images: + +```bash +# Basic usage +./build.sh BENCHMARK [--tag TAG] [--use-cache] + +# Examples +./build.sh tau2 # Build without cache (default) +./build.sh gsm8k --tag v1.0.0 # Build v1.0.0 without cache +./build.sh tau2 --tag dev # Build with 'dev' tag +./build.sh tau2 --use-cache # Build with cache enabled +./build.sh gsm8k --tag v1.0.0 --use-cache # Build v1.0.0 with cache + +# Get help +./build.sh --help +``` + +**Features:** +- Automatically detects docker or podman +- Colored output for better readability +- Build summary with success/failure counts +- Builds without cache by default for consistency +- Optional cache usage with `--use-cache` flag + +## Building for Different Benchmarks + +You can build images for different benchmarks: + +### Using build.sh (Recommended) +```bash +./build.sh tau2 +./build.sh gsm8k +./build.sh webarena +``` + +### Using Docker Directly +```bash +# WebArena Benchmark +docker build \ + --build-arg BENCHMARK_NAME=webarena \ + -t exgentic-mcp-webarena:latest \ + . + +# MiniWoB Benchmark +docker build \ + --build-arg BENCHMARK_NAME=miniwob \ + -t exgentic-mcp-miniwob:latest \ + . + +# Tau2 Benchmark +docker build \ + --build-arg BENCHMARK_NAME=tau2 \ + -t exgentic-mcp-tau2:latest \ + . +``` + +## Configuration + +### Build Arguments + +| Argument | Required | Default | Description | +|----------|----------|---------|-------------| +| `BENCHMARK_NAME` | Yes | - | The benchmark to install (e.g., tau2, webarena, miniwob) | +| `RELEASE_VERSION` | No | main | Version tag for tracking | + +### Runtime Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `HOST` | 0.0.0.0 | Server host address | +| `PORT` | 8000 | Server port | +| `LOG_LEVEL` | INFO | Logging level (DEBUG, INFO, WARNING, ERROR) | +| `BENCHMARK_NAME` | (from build) | Benchmark name (set during build) | +| `EXGENTIC_SET_*` | - | Runtime configuration parameters (see below) | + +### Runtime Configuration with --set Parameters + +You can pass runtime configuration parameters to the `exgentic mcp` command using environment variables with the `EXGENTIC_SET_` prefix. These will be converted to `--set` arguments. + +**Format**: `EXGENTIC_SET__=` +- The `` will be separated from `` with a dot +- The rest of the underscores in `` remain as underscores +- Everything is converted to lowercase +- Example: `EXGENTIC_SET_BENCHMARK_USER_SIMULATOR_MODEL` → `--set benchmark.user_simulator_model` + +**Common Parameters**: +- `EXGENTIC_SET_BENCHMARK_USER_SIMULATOR_MODEL` - Set the user simulator model (e.g., `openai/Azure/gpt-4o`) +- `EXGENTIC_SET_BENCHMARK_AGENT_MODEL` - Set the agent model +- `EXGENTIC_SET_BENCHMARK_MAX_STEPS` - Set maximum steps +- `EXGENTIC_SET_BENCHMARK_MAX_INTERACTIONS` - Set maximum interactions +- `EXGENTIC_SET_BENCHMARK_SEED` - Set random seed +- Any other benchmark-specific configuration parameter + + +### Custom Configuration Examples + +**Run on a different port:** +```bash +docker run -p 9000:9000 \ + -e PORT=9000 \ + exgentic-mcp-tau2:latest +``` + +**Enable debug logging:** +```bash +docker run -p 8000:8000 \ + -e LOG_LEVEL=DEBUG \ + exgentic-mcp-tau2:latest +``` + +**Bind to specific host:** +```bash +docker run -p 8000:8000 \ + -e HOST=127.0.0.1 \ + exgentic-mcp-tau2:latest +``` + +**Set user simulator model:** +```bash +docker run -p 8000:8000 \ + -e EXGENTIC_SET_BENCHMARK_USER_SIMULATOR_MODEL='openai/Azure/gpt-4o' \ + exgentic-mcp-tau2:latest +``` + +**Set multiple runtime parameters:** +```bash +docker run -p 8000:8000 \ + -e EXGENTIC_SET_BENCHMARK_USER_SIMULATOR_MODEL='openai/Azure/gpt-4o' \ + -e EXGENTIC_SET_BENCHMARK_AGENT_MODEL='anthropic/claude-3-5-sonnet-20241022' \ + -e EXGENTIC_SET_BENCHMARK_MAX_STEPS='50' \ + exgentic-mcp-tau2:latest +``` + + + +### Build Process + +``` +1. Clone Exgentic repository (HTTPS) + ↓ +2. Checkout feature/mcp-command branch + ↓ +3. Install Exgentic and dependencies + ↓ +4. Run: exgentic setup --benchmark $BENCHMARK_NAME + ↓ +5. Configure entrypoint and permissions + ↓ +6. Create image: exgentic-mcp-{benchmark}:latest +``` + +### Runtime Process + +``` +1. Container starts with entrypoint.sh + ↓ +2. Read environment variables (HOST, PORT) + ↓ +3. Execute: exgentic mcp --benchmark $BENCHMARK_NAME --host $HOST --port $PORT + ↓ +4. MCP server listens on configured HOST:PORT +``` + +## Image Naming Convention + +Images follow the pattern: `exgentic-mcp-{benchmark}:latest` + +Examples: +- `exgentic-mcp-tau2:latest` +- `exgentic-mcp-webarena:latest` +- `exgentic-mcp-miniwob:latest` + +## Advanced Usage + +### Docker Compose + +Create a `docker-compose.yml`: + +```yaml +version: '3.8' + +services: + exgentic-tau2: + build: + context: . + args: + BENCHMARK_NAME: tau2 + ports: + - "8000:8000" + environment: + - HOST=0.0.0.0 + - PORT=8000 + - LOG_LEVEL=INFO + restart: unless-stopped +``` + +Run with: +```bash +docker-compose up -d +``` + +### Multi-Benchmark Deployment + +Run multiple benchmarks simultaneously on different ports: + +```bash +# Tau2 on port 8000 +docker run -d -p 8000:8000 --name exgentic-tau2 exgentic-mcp-tau2:latest + +# WebArena on port 8001 +docker run -d -p 8001:8001 -e PORT=8001 --name exgentic-webarena exgentic-mcp-webarena:latest + +# MiniWoB on port 8002 +docker run -d -p 8002:8002 -e PORT=8002 --name exgentic-miniwob exgentic-mcp-miniwob:latest +``` + +### Volume Mounting (Optional) + +If you need to persist data or share files: + +```bash +docker run -p 8000:8000 \ + -v $(pwd)/data:/app/data \ + exgentic-mcp-tau2:latest +``` + +## Troubleshooting + +### Build Fails + +**Problem**: `BENCHMARK_NAME build argument is required` +```bash +# Solution: Always provide BENCHMARK_NAME +docker build --build-arg BENCHMARK_NAME=tau2 -t exgentic-mcp-tau2 . +``` + +**Problem**: Git clone fails +```bash +# Solution: Check internet connection and GitHub access +# The repository is public and uses HTTPS, so no authentication needed +``` + +**Problem**: Benchmark setup fails +```bash +# Solution: Check if the benchmark name is valid +# Verify the benchmark exists in the Exgentic repository +# Check build logs for specific error messages +``` + +### Runtime Issues + +**Problem**: Port already in use +```bash +# Solution: Use a different port +docker run -p 9000:9000 -e PORT=9000 exgentic-mcp-tau2:latest +``` + +**Problem**: Container exits immediately +```bash +# Solution: Check logs +docker logs + +# Run in foreground to see errors +docker run -it exgentic-mcp-tau2:latest +``` + +**Problem**: Cannot connect to server +```bash +# Solution: Verify port mapping and firewall +docker ps # Check if container is running +curl http://localhost:8000/health # Test connection +``` + +### Debugging + +**View container logs:** +```bash +docker logs -f +``` + +**Access container shell:** +```bash +docker exec -it /bin/bash +``` + +**Check running processes:** +```bash +docker exec ps aux +``` + +## Development + +### Local Development + +For development, you can mount the local Exgentic repository: + +```bash +docker run -p 8000:8000 \ + -v /path/to/local/exgentic:/app/exgentic \ + exgentic-mcp-tau2:latest +``` + +### Rebuilding + +After making changes, rebuild the image: + +```bash +# Using build script (builds without cache by default) +./build.sh tau2 + +# Or using docker directly +docker build --no-cache \ + --build-arg BENCHMARK_NAME=tau2 \ + -t exgentic-mcp-tau2:latest \ + . +``` + +## Repository Information + +- **Source**: https://github.com/Exgentic/exgentic.git +- **Branch**: feature/mcp-command +- **Protocol**: HTTPS (public repository) + +## MCP Protocol + +This server implements the Model Context Protocol (MCP), which allows AI assistants to discover and use benchmark tools programmatically. + +### Key Features + +- **Tool Discovery**: Benchmarks expose their capabilities via MCP +- **Type Safety**: Strong typing for parameters and returns +- **Documentation**: Built-in documentation for each tool +- **Error Handling**: Standardized error responses +- **Transport**: HTTP transport with streamable support + +## Security Considerations + +- Runs as non-root user (UID 1001) +- No SSH keys or secrets in image +- Public repository access only +- Minimal attack surface +- Production-ready configuration + +## Performance Notes + +- **Build Time**: 5-15 minutes depending on benchmark size +- **Image Size**: Varies by benchmark (typically 1-5 GB) +- **Startup Time**: Fast (benchmark already installed) +- **Memory**: Depends on benchmark requirements + +## Contributing + +When adding support for new benchmarks: + +1. Verify the benchmark exists in the Exgentic repository +2. Test the build process using `./build.sh benchmark_name` +3. Document any special requirements +4. Update this README with examples + +## License + +See the repository's LICENSE file for details. + +## Support + +For issues related to: +- **This MCP server**: Open an issue in the agent-examples repository +- **Exgentic benchmarks**: Refer to the Exgentic repository +- **MCP protocol**: See the Model Context Protocol documentation + +## Related Resources + +- [Exgentic Repository](https://github.com/Exgentic/exgentic) +- [Model Context Protocol](https://modelcontextprotocol.io/) +- [Docker Documentation](https://docs.docker.com/) +- [Other MCP Tools](../README.md) \ No newline at end of file diff --git a/mcp/exgentic_benchmarks/build.sh b/mcp/exgentic_benchmarks/build.sh new file mode 100755 index 00000000..0c6b301d --- /dev/null +++ b/mcp/exgentic_benchmarks/build.sh @@ -0,0 +1,199 @@ +#!/bin/bash +set -e + +# Script to build exgentic benchmark images for tau2 and gsm8k +# Automatically detects whether to use docker or podman + +# Color output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# Function to print colored messages +print_info() { + echo -e "${GREEN}[INFO]${NC} $1" +} + +print_warn() { + echo -e "${YELLOW}[WARN]${NC} $1" +} + +print_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +# Detect container runtime (docker or podman) +detect_runtime() { + if command -v docker &> /dev/null; then + echo "docker" + elif command -v podman &> /dev/null; then + echo "podman" + else + print_error "Neither docker nor podman is installed!" + print_error "Please install one of them to continue." + exit 1 + fi +} + +# Build image for a specific benchmark +build_benchmark() { + local benchmark=$1 + local runtime=$2 + local image_name="localhost/exgentic-mcp-${benchmark}" + local tag=$3 + local use_cache=$4 + + print_info "Building ${image_name}:${tag} using ${runtime}..." + if [ "$use_cache" = "false" ]; then + print_info "Building without cache (default)" + else + print_info "Building with cache enabled" + fi + + # Build the image with the benchmark name as build arg + local build_cmd="$runtime build" + if [ "$use_cache" = "false" ]; then + build_cmd="$build_cmd --no-cache" + fi + + if $build_cmd \ + --build-arg BENCHMARK_NAME="${benchmark}" \ + -t "${image_name}:${tag}" \ + -f Dockerfile \ + .; then + print_info "✓ Successfully built ${image_name}:${tag}" + return 0 + else + print_error "✗ Failed to build ${image_name}:${tag}" + return 1 + fi +} + +# Main script +main() { + local script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + cd "$script_dir" + + print_info "Exgentic Benchmark Image Builder" + print_info "=================================" + + # Detect container runtime + RUNTIME=$(detect_runtime) + print_info "Detected container runtime: ${RUNTIME}" + + # Parse command line arguments + BENCHMARK="" + TAG="latest" + USE_CACHE="false" # Default: do not use cache for consistency + + while [[ $# -gt 0 ]]; do + case $1 in + --tag) + TAG="$2" + shift 2 + ;; + --use-cache) + USE_CACHE="true" + shift + ;; + --help|-h) + cat << EOF +Usage: $0 BENCHMARK [--tag TAG] [--use-cache] + +Build exgentic benchmark Docker/Podman image. + +Arguments: + BENCHMARK Benchmark name (required, positional: tau2 or gsm8k) + --tag TAG Image tag (optional, default: latest) + --use-cache Use Docker cache during build (optional, default: no cache for consistency) + +Examples: + $0 tau2 # Build without cache (default) + $0 gsm8k --tag v1.0.0 # Build v1.0.0 without cache + $0 tau2 --use-cache # Build with cache enabled + $0 gsm8k --tag v1.0.0 --use-cache # Build v1.0.0 with cache + +Available benchmarks: + - tau2 + - gsm8k + +The script automatically detects whether to use docker or podman. +By default, builds do not use cache to ensure consistency. +EOF + exit 0 + ;; + -*) + print_error "Unknown option: $1" + echo "Use --help for usage information" + exit 1 + ;; + *) + if [ -z "$BENCHMARK" ]; then + BENCHMARK="$1" + shift + else + print_error "Unexpected argument: $1" + echo "Use --help for usage information" + exit 1 + fi + ;; + esac + done + + # Validate benchmark name is provided + if [ -z "$BENCHMARK" ]; then + print_error "Benchmark name is required!" + echo "" + echo "Usage: $0 BENCHMARK [--tag TAG] [--no-cache]" + echo "" + echo "Available benchmarks: tau2, gsm8k" + echo "Example: $0 tau2 --tag v1.0.0" + echo "Use --help for more information" + exit 1 + fi + + BENCHMARKS=("$BENCHMARK") + + print_info "Building benchmark: ${BENCHMARK}" + print_info "Image tag: ${TAG}" + if [ "$USE_CACHE" = "true" ]; then + print_info "Cache: enabled" + else + print_info "Cache: disabled (default)" + fi + echo "" + + # Build each benchmark + SUCCESS_COUNT=0 + FAIL_COUNT=0 + + for benchmark in "${BENCHMARKS[@]}"; do + if build_benchmark "$benchmark" "$RUNTIME" "$TAG" "$USE_CACHE"; then + ((SUCCESS_COUNT++)) + else + ((FAIL_COUNT++)) + fi + echo "" + done + + # Summary + print_info "Build Summary" + print_info "=============" + print_info "Successful: ${SUCCESS_COUNT}" + if [ $FAIL_COUNT -gt 0 ]; then + print_error "Failed: ${FAIL_COUNT}" + exit 1 + else + print_info "All builds completed successfully!" + echo "" + print_info "Built images:" + for benchmark in "${BENCHMARKS[@]}"; do + echo " - localhost/exgentic-mcp-${benchmark}:${TAG}" + done + fi +} + +main "$@" + +# Made with Bob diff --git a/mcp/exgentic_benchmarks/entrypoint.sh b/mcp/exgentic_benchmarks/entrypoint.sh new file mode 100644 index 00000000..6fb2bea3 --- /dev/null +++ b/mcp/exgentic_benchmarks/entrypoint.sh @@ -0,0 +1,46 @@ +#!/bin/bash +set -e + +# Ensure HOME is set (Kubernetes may override it) +export HOME=${HOME:-/app} + +# Read environment variables with defaults +HOST=${HOST:-0.0.0.0} +PORT=${PORT:-8000} +LOG_LEVEL=${LOG_LEVEL:-INFO} + +# Validate BENCHMARK_NAME is set +if [ -z "$BENCHMARK_NAME" ]; then + echo "ERROR: BENCHMARK_NAME environment variable is not set" + exit 1 +fi + +# Set TAU2_DATA_DIR to the parent directory +# Temp fix until wwe have a better way to handle this +export TAU2_DATA_DIR="/app/.exgentic/benchmarks/tau2" + +echo "Starting Exgentic MCP Server" +echo "Benchmark: $BENCHMARK_NAME" +echo "Host: $HOST" +echo "Port: $PORT" +echo "Log Level: $LOG_LEVEL" + +# Build --set arguments from EXGENTIC_SET_* environment variables +SET_ARGS="" +for var in $(env | grep '^EXGENTIC_SET_' | cut -d= -f1); do + # Extract the key by removing EXGENTIC_SET_ prefix and converting to lowercase + # Replace first underscore with dot, keep rest as underscores + key=$(echo "$var" | sed 's/^EXGENTIC_SET_//' | tr '[:upper:]' '[:lower:]' | sed 's/_/./' ) + value="${!var}" + SET_ARGS="$SET_ARGS --set $key='$value'" + echo "Setting: $key=$value" +done + +# Change to the exgentic directory +cd /app/exgentic + +# Run the exgentic MCP server with --set arguments +# --disable-dns-rebinding-protection is added to allow kubernetes to access the MCP +echo "Command: exgentic mcp --benchmark $BENCHMARK_NAME --host $HOST --port $PORT $SET_ARGS --disable-dns-rebinding-protection" +eval exec exgentic mcp --benchmark "$BENCHMARK_NAME" --host "$HOST" --port "$PORT" $SET_ARGS --disable-dns-rebinding-protection +