From 984320cefbe229b37098d670f2f9eddf5b222931 Mon Sep 17 00:00:00 2001 From: Zig Date: Sat, 21 Feb 2026 11:08:10 -0500 Subject: [PATCH] feat: Add OpenClaw environment for agentic RL training This PR adds a new OpenClaw environment that exposes agentic tool capabilities for reinforcement learning training. OpenClaw is a personal AI assistant framework that provides agents with access to: - File system operations (read, write, edit) - Shell command execution (exec) - Web research tools (web_search, web_fetch) - Memory/context management (memory_search, memory_get) The environment wraps these capabilities as MCP (Model Context Protocol) tools, enabling RL agents to learn real-world agentic workflows like coding, research, and automation tasks. Features: - Full MCP tool integration following the OpenEnv patterns - Isolated workspace per episode for safe exploration - Sandbox mode for web tools (simulated results) - Comprehensive test suite - Docker support with CI integration Usage: ```python from openclaw_env import OpenClawEnv with OpenClawEnv(base_url="http://localhost:8000") as env: env.reset() tools = env.list_tools() result = env.call_tool("exec", command="echo hello") ``` --- .github/workflows/docker-build.yml | 2 + envs/openclaw_env/README.md | 272 ++++++++ envs/openclaw_env/__init__.py | 34 + envs/openclaw_env/client.py | 98 +++ envs/openclaw_env/openenv.yaml | 3 + envs/openclaw_env/pyproject.toml | 47 ++ envs/openclaw_env/server/Dockerfile | 45 ++ envs/openclaw_env/server/__init__.py | 11 + envs/openclaw_env/server/app.py | 57 ++ .../server/openclaw_environment.py | 629 ++++++++++++++++++ envs/openclaw_env/server/requirements.txt | 9 + tests/envs/test_openclaw_environment.py | 348 ++++++++++ 12 files changed, 1555 insertions(+) create mode 100644 envs/openclaw_env/README.md create mode 100644 envs/openclaw_env/__init__.py create mode 100644 envs/openclaw_env/client.py create mode 100644 envs/openclaw_env/openenv.yaml create mode 100644 envs/openclaw_env/pyproject.toml create mode 100644 envs/openclaw_env/server/Dockerfile create mode 100644 envs/openclaw_env/server/__init__.py create mode 100644 envs/openclaw_env/server/app.py create mode 100644 envs/openclaw_env/server/openclaw_environment.py create mode 100644 envs/openclaw_env/server/requirements.txt create mode 100644 tests/envs/test_openclaw_environment.py diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index b7a96d454..43b56a064 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -99,6 +99,8 @@ jobs: context: envs/openapp_env - name: maze-env dockerfile: envs/maze_env/server/Dockerfile + - name: openclaw-env + dockerfile: envs/openclaw_env/server/Dockerfile steps: - name: Checkout code diff --git a/envs/openclaw_env/README.md b/envs/openclaw_env/README.md new file mode 100644 index 000000000..d51a30f99 --- /dev/null +++ b/envs/openclaw_env/README.md @@ -0,0 +1,272 @@ +# OpenClaw Environment + +An OpenEnv environment that exposes [OpenClaw](https://github.com/openclaw/openclaw)'s agentic tool capabilities for reinforcement learning training. + +## Overview + +OpenClaw is a personal AI assistant framework that provides agents with access to: +- **File system operations**: Read, write, and edit files +- **Shell execution**: Run commands in a sandboxed environment +- **Web research**: Search and fetch web content +- **Memory management**: Search and retrieve context from memory files + +This environment wraps these capabilities as MCP (Model Context Protocol) tools, enabling RL agents to learn agentic workflows like coding, research, and automation tasks. + +## Quick Start + +### Using a Running Server + +```python +from openclaw_env import OpenClawEnv + +# Connect to a running OpenClaw environment +with OpenClawEnv(base_url="http://localhost:8000") as env: + env.reset() + + # List available tools + tools = env.list_tools() + print([t.name for t in tools]) + # ['exec', 'read', 'write', 'edit', 'web_search', 'web_fetch', 'memory_search', 'memory_get'] + + # Execute a shell command + result = env.call_tool("exec", command="echo 'Hello from OpenClaw!'") + print(result) # {"stdout": "Hello from OpenClaw!\n", "exit_code": 0, ...} + + # Create and read a file + env.call_tool("write", path="hello.txt", content="Hello, World!") + result = env.call_tool("read", path="hello.txt") + print(result["content"]) # "Hello, World!" +``` + +### Using Docker + +```python +from openclaw_env import OpenClawEnv + +# Start container automatically +env = OpenClawEnv.from_docker_image("openclaw-env:latest") +try: + env.reset() + result = env.call_tool("exec", command="pwd") + print(result) +finally: + env.close() +``` + +### Using HuggingFace Space + +```python +from openclaw_env import OpenClawEnv + +env = OpenClawEnv.from_env("openenv/openclaw-env") +try: + env.reset() + tools = env.list_tools() +finally: + env.close() +``` + +## Available Tools + +### File System Tools + +#### `read` +Read contents of a file with optional line range. + +```python +result = env.call_tool("read", path="config.py", offset=1, limit=100) +# Returns: {"content": "...", "lines_read": 100, "truncated": False, "path": "/workspace/config.py"} +``` + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `path` | str | required | Path to file (relative or absolute) | +| `offset` | int | 1 | Starting line number (1-indexed) | +| `limit` | int | 2000 | Maximum lines to read | + +#### `write` +Write content to a file, creating directories as needed. + +```python +result = env.call_tool("write", path="src/main.py", content="print('hello')") +# Returns: {"success": True, "path": "/workspace/src/main.py", "bytes_written": 14} +``` + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `path` | str | required | Path to file | +| `content` | str | required | Content to write | + +#### `edit` +Make precise edits by replacing exact text. + +```python +result = env.call_tool("edit", + path="config.py", + old_string="DEBUG = False", + new_string="DEBUG = True" +) +# Returns: {"success": True, "replacements": 1, "path": "/workspace/config.py"} +``` + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `path` | str | required | Path to file | +| `old_string` | str | required | Exact text to find | +| `new_string` | str | required | Replacement text | + +### Shell Execution + +#### `exec` +Execute shell commands in the workspace. + +```python +result = env.call_tool("exec", command="ls -la", timeout=30) +# Returns: {"stdout": "...", "stderr": "", "exit_code": 0, "command": "ls -la"} +``` + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `command` | str | required | Shell command to execute | +| `workdir` | str | workspace | Working directory | +| `timeout` | int | 30 | Timeout in seconds | + +### Web Tools + +#### `web_search` +Search the web (simulated in sandbox mode). + +```python +result = env.call_tool("web_search", query="python asyncio tutorial", count=5) +# Returns: {"query": "...", "results": [{"title": "...", "url": "...", "snippet": "..."}]} +``` + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `query` | str | required | Search query | +| `count` | int | 5 | Number of results (1-10) | + +#### `web_fetch` +Fetch content from a URL (simulated in sandbox mode). + +```python +result = env.call_tool("web_fetch", url="https://example.com", extract_mode="markdown") +# Returns: {"url": "...", "content": "...", "extract_mode": "markdown"} +``` + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `url` | str | required | URL to fetch | +| `extract_mode` | str | "markdown" | "markdown" or "text" | +| `max_chars` | int | 10000 | Maximum characters | + +### Memory Tools + +#### `memory_search` +Search memory files for relevant context. + +```python +result = env.call_tool("memory_search", query="API endpoints", max_results=5) +# Returns: {"query": "...", "results": [{"path": "...", "snippet": "..."}]} +``` + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `query` | str | required | Search query | +| `max_results` | int | 5 | Maximum results | + +#### `memory_get` +Get a snippet from a memory file. + +```python +result = env.call_tool("memory_get", path="memory/notes.md", from_line=10, lines=20) +# Returns: {"content": "...", "lines_read": 20, "path": "..."} +``` + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `path` | str | required | Path to memory file | +| `from_line` | int | 1 | Starting line (1-indexed) | +| `lines` | int | 50 | Number of lines | + +## Training Examples + +### With TRL (GRPO) + +```python +from trl import GRPOTrainer, GRPOConfig +from openclaw_env import OpenClawEnv + +# Environment factory +def env_factory(): + return OpenClawEnv(base_url="http://localhost:8000") + +config = GRPOConfig( + # ... training config +) + +trainer = GRPOTrainer( + model=model, + config=config, + env_factory=env_factory, +) + +trainer.train() +``` + +### With torchforge + +See [examples/grpo_openclaw/](../../examples/grpo_openclaw/) for a complete training example. + +## Building Docker Image + +```bash +# Build base image first +docker build -t openenv-base:latest -f src/openenv/core/containers/images/Dockerfile . + +# Build OpenClaw environment +docker build -t openclaw-env:latest -f envs/openclaw_env/server/Dockerfile . + +# Run the container +docker run -p 8000:8000 openclaw-env:latest +``` + +## Development + +### Local Testing + +```bash +# Install in development mode +cd envs/openclaw_env +pip install -e ".[dev]" + +# Run tests +PYTHONPATH=../../src:.. pytest tests/ -v + +# Run server locally +uv run --project . server +``` + +### Running Tests + +```bash +# From repository root +PYTHONPATH=src:envs pytest tests/envs/test_openclaw_environment.py -v +``` + +## Security Notes + +- **Sandbox mode**: By default, web tools return simulated results +- **Workspace isolation**: Each episode gets a fresh workspace directory +- **Command restrictions**: Commands run with limited environment variables +- **File access**: Files are constrained to the workspace directory + +For production deployments with real web access, configure the appropriate API keys and security policies. + +## License + +BSD 3-Clause License (see [LICENSE](../../LICENSE)) + +## Contributing + +See [CONTRIBUTING.md](../../CONTRIBUTING.md) for contribution guidelines. diff --git a/envs/openclaw_env/__init__.py b/envs/openclaw_env/__init__.py new file mode 100644 index 000000000..eebb234b0 --- /dev/null +++ b/envs/openclaw_env/__init__.py @@ -0,0 +1,34 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +OpenClaw Environment - An MCP environment for training agents on OpenClaw's tool ecosystem. + +OpenClaw is a personal AI assistant framework with access to local tools: +- File system operations (read, write, edit) +- Shell command execution +- Web search and fetch +- Memory/context management + +This environment exposes OpenClaw's capabilities as MCP tools for RL training, +enabling agents to learn agentic workflows like coding, research, and automation. + +Example: + >>> from openclaw_env import OpenClawEnv + >>> + >>> with OpenClawEnv(base_url="http://localhost:8000") as env: + ... env.reset() + ... tools = env.list_tools() + ... result = env.call_tool("exec", command="echo hello") + ... print(result) +""" + +from .client import OpenClawEnv + +# Re-export MCP types for convenience +from openenv.core.env_server.mcp_types import CallToolAction, ListToolsAction + +__all__ = ["OpenClawEnv", "CallToolAction", "ListToolsAction"] diff --git a/envs/openclaw_env/client.py b/envs/openclaw_env/client.py new file mode 100644 index 000000000..98668c2fd --- /dev/null +++ b/envs/openclaw_env/client.py @@ -0,0 +1,98 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +OpenClaw Environment Client. + +This module provides the client for connecting to an OpenClaw Environment server. +OpenClawEnv extends MCPToolClient to provide tool-calling style interactions +with OpenClaw's agentic capabilities. + +Example: + >>> with OpenClawEnv(base_url="http://localhost:8000") as env: + ... env.reset() + ... + ... # Discover tools + ... tools = env.list_tools() + ... print([t.name for t in tools]) + ... + ... # Execute shell command + ... result = env.call_tool("exec", command="ls -la") + ... print(result) + ... + ... # Read a file + ... result = env.call_tool("read", path="README.md") + ... print(result) + ... + ... # Search the web + ... result = env.call_tool("web_search", query="PyTorch tutorials") + ... print(result) +""" + +from openenv.core.mcp_client import MCPToolClient + + +class OpenClawEnv(MCPToolClient): + """ + Client for the OpenClaw Environment. + + This client provides an interface for interacting with OpenClaw's + agentic capabilities via MCP tools. It inherits all functionality + from MCPToolClient: + - `list_tools()`: Discover available tools + - `call_tool(name, **kwargs)`: Call a tool by name + - `reset(**kwargs)`: Reset the environment + - `step(action)`: Execute an action (for advanced use) + + Available Tools: + - exec: Execute shell commands in a sandboxed environment + - read: Read file contents + - write: Write content to files + - edit: Make precise edits to files + - web_search: Search the web using Brave Search API + - web_fetch: Fetch and extract content from URLs + - memory_search: Search memory/context files + - memory_get: Get snippets from memory files + + Example: + >>> # Connect to a running server + >>> with OpenClawEnv(base_url="http://localhost:8000") as env: + ... env.reset() + ... + ... # List available tools + ... tools = env.list_tools() + ... for tool in tools: + ... print(f"{tool.name}: {tool.description}") + ... + ... # Execute a shell command + ... result = env.call_tool("exec", command="pwd") + ... print(result) + ... + ... # Read a file + ... result = env.call_tool("read", path="setup.py") + ... print(result) + + Example with Docker: + >>> # Automatically start container and connect + >>> env = OpenClawEnv.from_docker_image("openclaw-env:latest") + >>> try: + ... env.reset() + ... tools = env.list_tools() + ... result = env.call_tool("exec", command="echo hello") + ... finally: + ... env.close() + + Example with HuggingFace Space: + >>> # Run from HuggingFace Space + >>> env = OpenClawEnv.from_env("openenv/openclaw-env") + >>> try: + ... env.reset() + ... result = env.call_tool("web_search", query="reinforcement learning") + ... finally: + ... env.close() + """ + + pass # MCPToolClient provides all needed functionality diff --git a/envs/openclaw_env/openenv.yaml b/envs/openclaw_env/openenv.yaml new file mode 100644 index 000000000..9fc583932 --- /dev/null +++ b/envs/openclaw_env/openenv.yaml @@ -0,0 +1,3 @@ +name: openclaw_env +version: 0.1.0 +description: OpenClaw agentic environment for RL training diff --git a/envs/openclaw_env/pyproject.toml b/envs/openclaw_env/pyproject.toml new file mode 100644 index 000000000..244bec1eb --- /dev/null +++ b/envs/openclaw_env/pyproject.toml @@ -0,0 +1,47 @@ +[project] +name = "openclaw-env" +version = "0.1.0" +description = "OpenClaw agentic environment for RL training" +readme = "README.md" +license = "BSD-3-Clause" +requires-python = ">=3.10" +keywords = ["openenv", "openclaw", "agents", "reinforcement-learning", "mcp"] +authors = [ + { name = "OpenClaw Team" }, + { name = "Meta PyTorch Team" }, +] +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: BSD License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Scientific/Engineering :: Artificial Intelligence", +] + +dependencies = [ + "openenv-core>=0.1.0", +] + +[project.optional-dependencies] +dev = [ + "pytest>=7.0.0", + "pytest-asyncio>=0.21.0", +] + +[project.urls] +Homepage = "https://github.com/meta-pytorch/OpenEnv" +Documentation = "https://meta-pytorch.org/OpenEnv/" +Repository = "https://github.com/meta-pytorch/OpenEnv" + +[project.scripts] +server = "server.app:main" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["."] diff --git a/envs/openclaw_env/server/Dockerfile b/envs/openclaw_env/server/Dockerfile new file mode 100644 index 000000000..db7627b1e --- /dev/null +++ b/envs/openclaw_env/server/Dockerfile @@ -0,0 +1,45 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# OpenClaw Environment Dockerfile +# Provides an isolated execution environment for training agents on +# OpenClaw's agentic tool capabilities. + +ARG BASE_IMAGE=openenv-base:latest +FROM ${BASE_IMAGE} + +# Install system dependencies for shell execution +RUN apt-get update && apt-get install -y --no-install-recommends \ + git \ + curl \ + wget \ + jq \ + tree \ + && rm -rf /var/lib/apt/lists/* + +# Install Python dependencies +COPY envs/openclaw_env/server/requirements.txt /tmp/requirements.txt +RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt + +# Copy environment code +COPY src/openenv/core/ /app/src/openenv/core/ +COPY envs/openclaw_env/ /app/envs/openclaw_env/ + +# Create workspace directory +RUN mkdir -p /app/workspace && chmod 755 /app/workspace + +# Set workspace environment variable +ENV OPENCLAW_WORKSPACE=/app/workspace + +# Health check +HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ + CMD curl -f http://localhost:8000/health || exit 1 + +# Expose port +EXPOSE 8000 + +# Run server +CMD ["uvicorn", "envs.openclaw_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/envs/openclaw_env/server/__init__.py b/envs/openclaw_env/server/__init__.py new file mode 100644 index 000000000..1d1de7e70 --- /dev/null +++ b/envs/openclaw_env/server/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +"""OpenClaw Environment Server.""" + +from .openclaw_environment import OpenClawEnvironment + +__all__ = ["OpenClawEnvironment"] diff --git a/envs/openclaw_env/server/app.py b/envs/openclaw_env/server/app.py new file mode 100644 index 000000000..9a5e743a1 --- /dev/null +++ b/envs/openclaw_env/server/app.py @@ -0,0 +1,57 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +FastAPI application for the OpenClaw Environment. + +This module creates an HTTP server that exposes the OpenClawEnvironment +over HTTP and WebSocket endpoints, compatible with MCPToolClient. + +Usage: + # Development (with auto-reload): + uvicorn server.app:app --reload --host 0.0.0.0 --port 8000 + + # Production: + uvicorn server.app:app --host 0.0.0.0 --port 8000 --workers 4 + + # Or run directly: + uv run --project . server +""" + +# Support both in-repo and standalone imports +try: + from openenv.core.env_server.http_server import create_app + from openenv.core.env_server.mcp_types import CallToolAction, CallToolObservation + from .openclaw_environment import OpenClawEnvironment +except ImportError: + from openenv.core.env_server.http_server import create_app + from openenv.core.env_server.mcp_types import CallToolAction, CallToolObservation + from server.openclaw_environment import OpenClawEnvironment + +# Create the app with web interface and README integration +# Pass the class (factory) instead of an instance for WebSocket session support +# Use MCP types for action/observation since this is a pure MCP environment +app = create_app( + OpenClawEnvironment, CallToolAction, CallToolObservation, env_name="openclaw_env" +) + + +def main(): + """ + Entry point for direct execution via uv run or python -m. + + This function enables running the server without Docker: + uv run --project . server + python -m envs.openclaw_env.server.app + openenv serve openclaw_env + """ + import uvicorn + + uvicorn.run(app, host="0.0.0.0", port=8000) + + +if __name__ == "__main__": + main() diff --git a/envs/openclaw_env/server/openclaw_environment.py b/envs/openclaw_env/server/openclaw_environment.py new file mode 100644 index 000000000..b1fca2115 --- /dev/null +++ b/envs/openclaw_env/server/openclaw_environment.py @@ -0,0 +1,629 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +OpenClaw Environment Implementation. + +An MCP environment that provides OpenClaw's agentic tool capabilities for RL training. +This enables agents to learn workflows involving file operations, shell commands, +web research, and memory management. + +All interactions happen through MCP tools: +- exec: Execute shell commands +- read: Read file contents +- write: Write to files +- edit: Make precise file edits +- web_search: Search the web +- web_fetch: Fetch URL content +- memory_search: Search memory files +- memory_get: Get memory snippets + +Example: + >>> from openenv.core.env_server.mcp_types import ListToolsAction, CallToolAction + >>> env = OpenClawEnvironment() + >>> env.reset() + >>> + >>> # List available tools + >>> obs = env.step(ListToolsAction()) + >>> print([t.name for t in obs.tools]) + >>> + >>> # Execute a command + >>> obs = env.step(CallToolAction(tool_name="exec", arguments={"command": "echo hello"})) + >>> print(obs.result) +""" + +import os +import re +import subprocess +import tempfile +from pathlib import Path +from typing import Any, Dict, List, Optional +from uuid import uuid4 + +# Support both in-repo and standalone imports +try: + from openenv.core.env_server.mcp_environment import MCPEnvironment + from openenv.core.env_server.types import Action, Observation, State +except ImportError: + from openenv.core.env_server.mcp_environment import MCPEnvironment + from openenv.core.env_server.types import Action, Observation, State + +from fastmcp import FastMCP + + +# Maximum file size to read (50KB) +MAX_FILE_SIZE = 50 * 1024 +# Maximum lines to read +MAX_LINES = 2000 +# Command timeout in seconds +COMMAND_TIMEOUT = 30 +# Workspace directory (created per episode) +WORKSPACE_BASE = tempfile.gettempdir() + + +class OpenClawEnvironment(MCPEnvironment): + """ + An MCP environment providing OpenClaw's agentic capabilities. + + This environment exposes file system operations, shell execution, + web tools, and memory management through MCP tools. It's designed + for training agents on real-world agentic tasks. + + The environment maintains an isolated workspace per episode, allowing + safe exploration without affecting the host system. + + Security: + - Commands run in a sandboxed workspace + - File operations are restricted to the workspace + - Network operations (web_search, web_fetch) are simulated in sandbox mode + + Example: + >>> from openenv.core.mcp_client import MCPToolClient + >>> + >>> with MCPToolClient(base_url="http://localhost:8000") as env: + ... env.reset() + ... tools = env.list_tools() + ... result = env.call_tool("exec", command="echo hello") + ... print(result) + """ + + def __init__(self, workspace_dir: Optional[str] = None): + """ + Initialize the OpenClaw environment. + + Args: + workspace_dir: Optional base directory for workspaces. + If not provided, uses system temp directory. + """ + # Create MCP server and define tools inline + mcp = FastMCP("openclaw_env") + + # Store reference to self for tool closures + env = self + self._workspace_base = workspace_dir or WORKSPACE_BASE + self._workspace: Optional[Path] = None + self._state = State(episode_id=str(uuid4()), step_count=0) + + # ========================================================================= + # File System Tools + # ========================================================================= + + @mcp.tool + def read( + path: str, + offset: int = 1, + limit: int = MAX_LINES, + ) -> Dict[str, Any]: + """ + Read contents of a file. + + Supports text files with optional line range. Output is truncated + to MAX_LINES or 50KB, whichever is hit first. + + Args: + path: Path to the file (relative to workspace or absolute) + offset: Line number to start reading from (1-indexed) + limit: Maximum number of lines to read + + Returns: + Dictionary with 'content', 'lines_read', 'truncated', and 'path' + """ + file_path = env._resolve_path(path) + + if not file_path.exists(): + return {"error": f"File not found: {path}"} + + if not file_path.is_file(): + return {"error": f"Not a file: {path}"} + + try: + with open(file_path, "r", encoding="utf-8", errors="replace") as f: + lines = f.readlines() + + total_lines = len(lines) + start_idx = max(0, offset - 1) + end_idx = min(start_idx + limit, total_lines) + + selected_lines = lines[start_idx:end_idx] + content = "".join(selected_lines) + + # Truncate by size if needed + truncated = False + if len(content) > MAX_FILE_SIZE: + content = content[:MAX_FILE_SIZE] + truncated = True + + return { + "content": content, + "lines_read": len(selected_lines), + "total_lines": total_lines, + "truncated": truncated, + "path": str(file_path), + } + except Exception as e: + return {"error": str(e)} + + @mcp.tool + def write(path: str, content: str) -> Dict[str, Any]: + """ + Write content to a file. + + Creates the file if it doesn't exist, overwrites if it does. + Automatically creates parent directories. + + Args: + path: Path to the file (relative to workspace or absolute) + content: Content to write to the file + + Returns: + Dictionary with 'success', 'path', and 'bytes_written' + """ + file_path = env._resolve_path(path) + + try: + # Create parent directories if needed + file_path.parent.mkdir(parents=True, exist_ok=True) + + with open(file_path, "w", encoding="utf-8") as f: + f.write(content) + + return { + "success": True, + "path": str(file_path), + "bytes_written": len(content.encode("utf-8")), + } + except Exception as e: + return {"error": str(e), "success": False} + + @mcp.tool + def edit( + path: str, + old_string: str, + new_string: str, + ) -> Dict[str, Any]: + """ + Edit a file by replacing exact text. + + The old_string must match exactly (including whitespace). + Use this for precise, surgical edits. + + Args: + path: Path to the file to edit + old_string: Exact text to find and replace + new_string: New text to replace the old text with + + Returns: + Dictionary with 'success', 'replacements', and 'path' + """ + file_path = env._resolve_path(path) + + if not file_path.exists(): + return {"error": f"File not found: {path}", "success": False} + + try: + with open(file_path, "r", encoding="utf-8") as f: + content = f.read() + + if old_string not in content: + return { + "error": "Old string not found in file", + "success": False, + "path": str(file_path), + } + + # Count replacements + count = content.count(old_string) + new_content = content.replace(old_string, new_string) + + with open(file_path, "w", encoding="utf-8") as f: + f.write(new_content) + + return { + "success": True, + "replacements": count, + "path": str(file_path), + } + except Exception as e: + return {"error": str(e), "success": False} + + # ========================================================================= + # Shell Execution Tool + # ========================================================================= + + @mcp.tool + def exec( + command: str, + workdir: Optional[str] = None, + timeout: int = COMMAND_TIMEOUT, + ) -> Dict[str, Any]: + """ + Execute a shell command. + + Commands run in the workspace directory by default. + Use for running scripts, building projects, or system operations. + + Args: + command: Shell command to execute + workdir: Working directory (defaults to workspace) + timeout: Timeout in seconds (default 30) + + Returns: + Dictionary with 'stdout', 'stderr', 'exit_code', and 'command' + """ + cwd = env._resolve_path(workdir) if workdir else env._workspace + + try: + result = subprocess.run( + command, + shell=True, + cwd=str(cwd), + capture_output=True, + text=True, + timeout=timeout, + env={**os.environ, "HOME": str(env._workspace)}, + ) + + return { + "stdout": result.stdout, + "stderr": result.stderr, + "exit_code": result.returncode, + "command": command, + } + except subprocess.TimeoutExpired: + return { + "error": f"Command timed out after {timeout} seconds", + "command": command, + "exit_code": -1, + } + except Exception as e: + return { + "error": str(e), + "command": command, + "exit_code": -1, + } + + # ========================================================================= + # Web Tools (Simulated in sandbox mode) + # ========================================================================= + + @mcp.tool + def web_search( + query: str, + count: int = 5, + ) -> Dict[str, Any]: + """ + Search the web using Brave Search API. + + Returns titles, URLs, and snippets for research tasks. + Note: In sandbox mode, returns simulated results. + + Args: + query: Search query string + count: Number of results to return (1-10) + + Returns: + Dictionary with 'results' list containing title, url, snippet + """ + # In sandbox mode, return simulated results + # In production, this would call the Brave Search API + return { + "query": query, + "results": [ + { + "title": f"Search result {i+1} for: {query}", + "url": f"https://example.com/result{i+1}", + "snippet": f"This is a simulated search result for '{query}'. " + f"In production, this would return real search results.", + } + for i in range(min(count, 10)) + ], + "note": "Simulated results in sandbox mode", + } + + @mcp.tool + def web_fetch( + url: str, + extract_mode: str = "markdown", + max_chars: int = 10000, + ) -> Dict[str, Any]: + """ + Fetch and extract readable content from a URL. + + Converts HTML to markdown or text for lightweight page access. + Note: In sandbox mode, returns simulated content. + + Args: + url: HTTP or HTTPS URL to fetch + extract_mode: Extraction mode ("markdown" or "text") + max_chars: Maximum characters to return + + Returns: + Dictionary with 'content', 'url', and metadata + """ + # In sandbox mode, return simulated content + return { + "url": url, + "content": f"# Simulated Content\n\n" + f"This is simulated content for URL: {url}\n\n" + f"In production mode, this would fetch and extract " + f"the actual page content in {extract_mode} format.", + "extract_mode": extract_mode, + "truncated": False, + "note": "Simulated content in sandbox mode", + } + + # ========================================================================= + # Memory/Context Tools + # ========================================================================= + + @mcp.tool + def memory_search( + query: str, + max_results: int = 5, + ) -> Dict[str, Any]: + """ + Search memory files for relevant context. + + Semantically searches workspace memory files (MEMORY.md, memory/*.md) + and returns matching snippets with file paths and line numbers. + + Args: + query: Search query string + max_results: Maximum number of results to return + + Returns: + Dictionary with 'results' list containing path, lines, snippet + """ + results = [] + memory_dir = env._workspace / "memory" + + # Search MEMORY.md if it exists + memory_file = env._workspace / "MEMORY.md" + if memory_file.exists(): + results.extend(env._search_file(memory_file, query)) + + # Search memory/*.md files + if memory_dir.exists(): + for md_file in memory_dir.glob("*.md"): + results.extend(env._search_file(md_file, query)) + + # Simple relevance scoring (count query term occurrences) + for result in results: + result["score"] = result["snippet"].lower().count(query.lower()) + + # Sort by score and limit + results.sort(key=lambda x: x["score"], reverse=True) + results = results[:max_results] + + return { + "query": query, + "results": results, + "total_found": len(results), + } + + @mcp.tool + def memory_get( + path: str, + from_line: int = 1, + lines: int = 50, + ) -> Dict[str, Any]: + """ + Get a snippet from a memory file. + + Safe snippet read from MEMORY.md or memory/*.md files. + Use after memory_search to pull specific context. + + Args: + path: Path to the memory file + from_line: Starting line number (1-indexed) + lines: Number of lines to read + + Returns: + Dictionary with 'content', 'path', 'from_line', 'lines_read' + """ + # Delegate to read tool with appropriate parameters + return read(path=path, offset=from_line, limit=lines) + + # Pass the MCP server to the base class + super().__init__(mcp) + + def _resolve_path(self, path: Optional[str]) -> Path: + """ + Resolve a path relative to the workspace. + + Args: + path: Path string (relative or absolute) + + Returns: + Resolved Path object, constrained to workspace + """ + if path is None: + return self._workspace + + path_obj = Path(path) + + # If absolute, check if it's within workspace + if path_obj.is_absolute(): + try: + path_obj.relative_to(self._workspace) + return path_obj + except ValueError: + # Path is outside workspace, treat as relative + return self._workspace / path_obj.name + else: + return self._workspace / path_obj + + def _search_file(self, file_path: Path, query: str) -> List[Dict[str, Any]]: + """ + Search a file for lines matching a query. + + Args: + file_path: Path to the file to search + query: Search query string + + Returns: + List of result dicts with path, line_start, snippet + """ + results = [] + query_lower = query.lower() + + try: + with open(file_path, "r", encoding="utf-8", errors="replace") as f: + lines = f.readlines() + + for i, line in enumerate(lines, 1): + if query_lower in line.lower(): + # Get context (2 lines before and after) + start = max(0, i - 3) + end = min(len(lines), i + 2) + snippet = "".join(lines[start:end]) + + results.append( + { + "path": str(file_path.relative_to(self._workspace)), + "line_start": start + 1, + "line_end": end, + "snippet": snippet.strip(), + } + ) + except Exception: + pass + + return results + + def reset( + self, + seed: Optional[int] = None, + episode_id: Optional[str] = None, + **kwargs: Any, + ) -> Observation: + """ + Reset the environment. + + Creates a fresh workspace directory for the new episode. + + Args: + seed: Optional random seed + episode_id: Optional episode ID to use + **kwargs: Additional reset options + + Returns: + Observation indicating the environment is ready + """ + # Generate episode ID + ep_id = episode_id or str(uuid4()) + + # Create workspace directory + self._workspace = Path(self._workspace_base) / f"openclaw_env_{ep_id[:8]}" + self._workspace.mkdir(parents=True, exist_ok=True) + + # Create initial directory structure + (self._workspace / "memory").mkdir(exist_ok=True) + + # Initialize state + self._state = State(episode_id=ep_id, step_count=0) + + return Observation( + done=False, + reward=0.0, + metadata={ + "status": "ready", + "message": "OpenClaw environment ready!", + "workspace": str(self._workspace), + "episode_id": ep_id, + }, + ) + + def _step_impl( + self, + action: Action, + timeout_s: Optional[float] = None, + **kwargs: Any, + ) -> Observation: + """ + Handle non-MCP actions. + + This environment only supports MCP actions (ListToolsAction, CallToolAction). + Any other action type returns an error observation. + + Args: + action: The action to execute + timeout_s: Optional timeout + **kwargs: Additional arguments + + Returns: + Observation with error for unknown action types + """ + return Observation( + done=False, + reward=0.0, + metadata={ + "error": f"Unknown action type: {type(action).__name__}. " + "Use ListToolsAction or CallToolAction for MCP interactions." + }, + ) + + def step( + self, + action: Action, + timeout_s: Optional[float] = None, + **kwargs: Any, + ) -> Observation: + """ + Execute a step in the environment. + + Delegates to base class for MCP actions. Increments step count. + + Args: + action: The MCP action to execute + timeout_s: Optional timeout for the action + **kwargs: Additional arguments + + Returns: + Observation from the action execution + """ + self._state.step_count += 1 + return super().step(action, timeout_s=timeout_s, **kwargs) + + @property + def state(self) -> State: + """ + Get the current environment state. + + Returns: + Current State with episode_id, step_count, and workspace path + """ + return self._state + + def close(self) -> None: + """ + Clean up the environment. + + Optionally removes the workspace directory. + """ + # Note: We don't delete the workspace by default to allow inspection + # In production, you might want to clean up + super().close() diff --git a/envs/openclaw_env/server/requirements.txt b/envs/openclaw_env/server/requirements.txt new file mode 100644 index 000000000..9c09ab0d9 --- /dev/null +++ b/envs/openclaw_env/server/requirements.txt @@ -0,0 +1,9 @@ +# OpenClaw Environment Dependencies +# Core dependencies are inherited from openenv-base image +# Add environment-specific dependencies here + +# No additional dependencies required for core functionality +# The environment uses standard library modules for: +# - subprocess (shell execution) +# - pathlib (file operations) +# - tempfile (workspace management) diff --git a/tests/envs/test_openclaw_environment.py b/tests/envs/test_openclaw_environment.py new file mode 100644 index 000000000..aa642f58d --- /dev/null +++ b/tests/envs/test_openclaw_environment.py @@ -0,0 +1,348 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +"""Tests for OpenClaw Environment.""" + +import pytest +import tempfile +from pathlib import Path + +# Skip if dependencies aren't available +pytest.importorskip("fastmcp") + +from envs.openclaw_env.server.openclaw_environment import OpenClawEnvironment +from openenv.core.env_server.mcp_types import ListToolsAction, CallToolAction + + +class TestOpenClawEnvironment: + """Test suite for OpenClawEnvironment.""" + + @pytest.fixture + def env(self): + """Create a fresh environment for each test.""" + with tempfile.TemporaryDirectory() as tmpdir: + environment = OpenClawEnvironment(workspace_dir=tmpdir) + environment.reset() + yield environment + + def test_reset(self, env): + """Test environment reset creates workspace.""" + obs = env.reset() + + assert obs.done is False + assert obs.metadata["status"] == "ready" + assert "workspace" in obs.metadata + assert Path(obs.metadata["workspace"]).exists() + + def test_list_tools(self, env): + """Test that all expected tools are available.""" + obs = env.step(ListToolsAction()) + + tool_names = [t.name for t in obs.tools] + + # Check all expected tools are present + expected_tools = [ + "read", "write", "edit", "exec", + "web_search", "web_fetch", + "memory_search", "memory_get" + ] + + for tool in expected_tools: + assert tool in tool_names, f"Missing tool: {tool}" + + def test_exec_echo(self, env): + """Test executing a simple echo command.""" + action = CallToolAction( + tool_name="exec", + arguments={"command": "echo 'hello world'"} + ) + obs = env.step(action) + + assert obs.result is not None + # The result contains structured_content with our dict + result_data = obs.result.structured_content.get("result", obs.result.data) + assert result_data["exit_code"] == 0 + assert "hello world" in result_data["stdout"] + + def test_write_and_read(self, env): + """Test writing and reading a file.""" + # Write a file + write_action = CallToolAction( + tool_name="write", + arguments={ + "path": "test.txt", + "content": "Hello, OpenClaw!" + } + ) + write_obs = env.step(write_action) + + write_result = write_obs.result.structured_content.get("result", write_obs.result.data) + assert write_result["success"] is True + + # Read the file + read_action = CallToolAction( + tool_name="read", + arguments={"path": "test.txt"} + ) + read_obs = env.step(read_action) + + read_result = read_obs.result.structured_content.get("result", read_obs.result.data) + assert read_result["content"] == "Hello, OpenClaw!" + + def test_edit_file(self, env): + """Test editing a file.""" + # Write initial content + env.step(CallToolAction( + tool_name="write", + arguments={ + "path": "config.txt", + "content": "DEBUG = False\nVERBOSE = True" + } + )) + + # Edit the file + edit_action = CallToolAction( + tool_name="edit", + arguments={ + "path": "config.txt", + "old_string": "DEBUG = False", + "new_string": "DEBUG = True" + } + ) + edit_obs = env.step(edit_action) + + edit_result = edit_obs.result.structured_content.get("result", edit_obs.result.data) + assert edit_result["success"] is True + assert edit_result["replacements"] == 1 + + # Verify the change + read_obs = env.step(CallToolAction( + tool_name="read", + arguments={"path": "config.txt"} + )) + read_result = read_obs.result.structured_content.get("result", read_obs.result.data) + assert "DEBUG = True" in read_result["content"] + + def test_read_nonexistent_file(self, env): + """Test reading a file that doesn't exist.""" + action = CallToolAction( + tool_name="read", + arguments={"path": "nonexistent.txt"} + ) + obs = env.step(action) + + result = obs.result.structured_content.get("result", obs.result.data) + assert "error" in result + + def test_exec_with_workdir(self, env): + """Test executing command in specific directory.""" + # Create a subdirectory with a file + env.step(CallToolAction( + tool_name="exec", + arguments={"command": "mkdir -p subdir && echo 'test' > subdir/file.txt"} + )) + + # Execute in that directory + action = CallToolAction( + tool_name="exec", + arguments={ + "command": "cat file.txt", + "workdir": "subdir" + } + ) + obs = env.step(action) + + result = obs.result.structured_content.get("result", obs.result.data) + assert result["exit_code"] == 0 + assert "test" in result["stdout"] + + def test_web_search_sandbox(self, env): + """Test web search returns simulated results in sandbox mode.""" + action = CallToolAction( + tool_name="web_search", + arguments={ + "query": "python reinforcement learning", + "count": 3 + } + ) + obs = env.step(action) + + result = obs.result.structured_content.get("result", obs.result.data) + assert "results" in result + assert len(result["results"]) == 3 + assert "note" in result # Sandbox mode indicator + + def test_web_fetch_sandbox(self, env): + """Test web fetch returns simulated content in sandbox mode.""" + action = CallToolAction( + tool_name="web_fetch", + arguments={ + "url": "https://example.com", + "extract_mode": "markdown" + } + ) + obs = env.step(action) + + result = obs.result.structured_content.get("result", obs.result.data) + assert "content" in result + assert result["url"] == "https://example.com" + assert "note" in result # Sandbox mode indicator + + def test_memory_search_empty(self, env): + """Test memory search with no memory files.""" + action = CallToolAction( + tool_name="memory_search", + arguments={"query": "test query"} + ) + obs = env.step(action) + + result = obs.result.structured_content.get("result", obs.result.data) + assert "results" in result + assert result["total_found"] == 0 + + def test_memory_search_with_content(self, env): + """Test memory search finds content in memory files.""" + # Create a memory file + env.step(CallToolAction( + tool_name="write", + arguments={ + "path": "MEMORY.md", + "content": "# Important Notes\n\nRemember to check the API endpoints.\n" + } + )) + + # Search for content + action = CallToolAction( + tool_name="memory_search", + arguments={"query": "API endpoints"} + ) + obs = env.step(action) + + result = obs.result.structured_content.get("result", obs.result.data) + assert result["total_found"] >= 1 + assert "API endpoints" in result["results"][0]["snippet"] + + def test_step_count_increments(self, env): + """Test that step count increments properly.""" + initial_count = env.state.step_count + + env.step(ListToolsAction()) + assert env.state.step_count == initial_count + 1 + + env.step(CallToolAction(tool_name="exec", arguments={"command": "echo hi"})) + assert env.state.step_count == initial_count + 2 + + def test_episode_isolation(self, env): + """Test that reset creates a fresh episode.""" + # Create a file in first episode + env.step(CallToolAction( + tool_name="write", + arguments={"path": "episode1.txt", "content": "first episode"} + )) + + # Reset to new episode + obs = env.reset() + new_workspace = obs.metadata["workspace"] + + # File from first episode should not exist + read_obs = env.step(CallToolAction( + tool_name="read", + arguments={"path": "episode1.txt"} + )) + result = read_obs.result.structured_content.get("result", read_obs.result.data) + assert "error" in result + + +class TestOpenClawEnvironmentEdgeCases: + """Edge case tests for OpenClawEnvironment.""" + + @pytest.fixture + def env(self): + """Create environment for edge case tests.""" + with tempfile.TemporaryDirectory() as tmpdir: + environment = OpenClawEnvironment(workspace_dir=tmpdir) + environment.reset() + yield environment + + def test_exec_timeout(self, env): + """Test command timeout handling.""" + action = CallToolAction( + tool_name="exec", + arguments={ + "command": "sleep 10", + "timeout": 1 + } + ) + obs = env.step(action) + + result = obs.result.structured_content.get("result", obs.result.data) + assert "error" in result + assert "timed out" in result["error"].lower() + + def test_edit_string_not_found(self, env): + """Test editing when old string doesn't exist.""" + env.step(CallToolAction( + tool_name="write", + arguments={"path": "test.txt", "content": "hello world"} + )) + + action = CallToolAction( + tool_name="edit", + arguments={ + "path": "test.txt", + "old_string": "not found", + "new_string": "replacement" + } + ) + obs = env.step(action) + + result = obs.result.structured_content.get("result", obs.result.data) + assert result["success"] is False + + def test_write_creates_directories(self, env): + """Test that write creates parent directories.""" + action = CallToolAction( + tool_name="write", + arguments={ + "path": "deep/nested/dir/file.txt", + "content": "nested content" + } + ) + obs = env.step(action) + + result = obs.result.structured_content.get("result", obs.result.data) + assert result["success"] is True + + # Verify file exists + read_obs = env.step(CallToolAction( + tool_name="read", + arguments={"path": "deep/nested/dir/file.txt"} + )) + read_result = read_obs.result.structured_content.get("result", read_obs.result.data) + assert read_result["content"] == "nested content" + + def test_read_with_offset_and_limit(self, env): + """Test reading specific line ranges.""" + content = "\n".join([f"Line {i}" for i in range(1, 101)]) + env.step(CallToolAction( + tool_name="write", + arguments={"path": "lines.txt", "content": content} + )) + + action = CallToolAction( + tool_name="read", + arguments={ + "path": "lines.txt", + "offset": 50, + "limit": 10 + } + ) + obs = env.step(action) + + result = obs.result.structured_content.get("result", obs.result.data) + assert result["lines_read"] == 10 + assert "Line 50" in result["content"] + assert "Line 59" in result["content"]