Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions debug_gym/agents/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
from debug_gym.agents.base_agent import BaseAgent, register_agent
from debug_gym.agents.free_agent import FreeAgent
from debug_gym.agents.froggy_agent import FroggyAgent
from debug_gym.agents.solution_agent import AgentSolution
from debug_gym.agents.swe_agent import SWEAgent

__all__ = [
"BaseAgent",
"register_agent",
"FreeAgent",
"FroggyAgent",
"AgentSolution",
"SWEAgent",
Expand Down
44 changes: 44 additions & 0 deletions debug_gym/agents/free_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""Simple agent example for interacting with FreeEnv."""

from debug_gym.agents.base_agent import BaseAgent, register_agent


@register_agent
class FreeAgent(BaseAgent):
"""Minimal reasoning agent tailored for FreeEnv sessions."""

name = "free_agent"
# Customized system instructions keep FreeEnv light-weight while still
# providing the model with a structured exploration checklist.
system_prompt = (
"You are assisting in an exploratory codebase understanding session inside an open-ended container.\n"
"You have access to a set of tools to inspect and modify the codebase.\n"
"Your goal is to use the tools to gather as much information about the codebase as possible.\n"
"Output both your thinking process (if any) and the tool call (must) in the response.\n"
"When you are done exploring, use the submit tool as the final action to end the session."
)

def __init__(self, config, env, llm=None, logger=None):
super().__init__(config=config, env=env, llm=llm, logger=logger)

override_prompt = config.get("system_prompt")
if override_prompt is not None:
self.system_prompt = str(override_prompt)

def run(self, task_name=None, debug=False):
"""Wrap BaseAgent.run to surface clearer errors when startup fails."""
try:
return super().run(task_name=task_name, debug=debug)
except AttributeError as exc:
error_msg = str(exc)
sentinel = "'NoneType' object has no attribute 'max_score'"
if sentinel not in error_msg:
raise

root_cause = exc.__context__ or exc.__cause__ or exc
self.logger.error(
"FreeAgent failed to reset the environment before receiving initial observations. "
"Check that the configured container image exists and is accessible."
)

raise root_cause
4 changes: 4 additions & 0 deletions debug_gym/gym/envs/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from debug_gym.gym.envs.aider import AiderBenchmarkEnv
from debug_gym.gym.envs.env import RepoEnv, TooledEnv
from debug_gym.gym.envs.free_env import FreeEnv
from debug_gym.gym.envs.local import LocalEnv
from debug_gym.gym.envs.mini_nightmare import MiniNightmareEnv
from debug_gym.gym.envs.r2egym import R2EGymEnv
Expand All @@ -12,6 +13,7 @@
"AiderBenchmarkEnv",
"RepoEnv",
"TooledEnv",
"FreeEnv",
"LocalEnv",
"MiniNightmareEnv",
"R2EGymEnv",
Expand Down Expand Up @@ -39,6 +41,8 @@ def select_env(env_type: str = None) -> type[RepoEnv]:
return MiniNightmareEnv
case "r2egym":
return R2EGymEnv
case "free":
return FreeEnv
case _:
raise ValueError(f"Unknown environment {env_type}")

Expand Down
196 changes: 196 additions & 0 deletions debug_gym/gym/envs/free_env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
from __future__ import annotations

import shlex
from pathlib import Path
from typing import Any

from debug_gym.gym.envs.env import RepoEnv
from debug_gym.gym.terminals.local import LocalTerminal
from debug_gym.gym.terminals.terminal import Terminal
from debug_gym.logger import DebugGymLogger


class FreeEnv(RepoEnv):
"""Lightweight RepoEnv wrapper for running arbitrary container images."""

DEFAULT_TASK_NAME = "free-session"

def __init__(
self,
image: str,
*,
terminal: Terminal | None = None,
mount_path: str | Path | None = None,
setup_commands: list[str] | None = None,
instructions: str | None = None,
init_git: bool = True,
workspace_dir: str | Path = "/testbed",
logger: DebugGymLogger | None = None,
**env_kwargs: Any,
) -> None:
"""Create a free-form environment backed by an existing repository terminal."""
self.container_image = image
self._custom_instructions = (instructions or "").strip()
self.init_git = init_git
self._setup_commands = list(setup_commands or [])
self._workspace_dir = str(workspace_dir)

shared_logger = logger or DebugGymLogger("debug-gym")

super().__init__(
path=str(mount_path) if mount_path is not None else None,
entrypoint="true",
debug_entrypoint="true",
max_score=0,
terminal=terminal,
logger=shared_logger,
**env_kwargs,
)

if self.terminal is not None:
self._apply_terminal_settings()

def _apply_terminal_settings(self) -> None:
"""Keep terminal metadata (image/setup commands) in sync with env state."""
terminal = self.terminal
if terminal is None:
return
if hasattr(terminal, "base_image"):
setattr(terminal, "base_image", self.container_image)

if hasattr(terminal, "setup_commands"):
terminal.setup_commands = list(self._setup_commands)

if hasattr(terminal, "working_dir") and not isinstance(terminal, LocalTerminal):
try:
terminal.working_dir = self._workspace_dir
except ValueError:
self.logger.debug(
"Terminal already active; keeping working_dir=%s",
getattr(terminal, "working_dir", self._workspace_dir),
)

if hasattr(terminal, "task_name"):
try:
terminal.task_name = self.DEFAULT_TASK_NAME
except ValueError:
self.logger.debug(
"Terminal already active; keeping existing task name."
)

terminal.logger = self.logger

def load_dataset(self, problems: str | list[str] | None = None):
"""Expose a single synthetic task keyed by DEFAULT_TASK_NAME."""
return {self.DEFAULT_TASK_NAME: {"image": self.container_image}}

def setup_task(self, task_name: str | None, options: dict | None = None) -> None:
"""Record base image metadata for consistency with RepoEnv expectations."""
self.task_name = task_name or self.DEFAULT_TASK_NAME
self.base_image = self.container_image
if hasattr(self.terminal, "base_image"):
setattr(self.terminal, "base_image", self.base_image)

def setup_workspace(self) -> None:
"""Ensure the remote workspace matches the configured working directory."""
if isinstance(self.terminal, LocalTerminal):
super().setup_workspace()
return

self.workspace.reset()
self.workspace.working_dir = Path(self._workspace_dir)
if self.terminal is not None:
current_dir = getattr(self.terminal, "working_dir", None)
if current_dir != self._workspace_dir:
try:
self.terminal.working_dir = self._workspace_dir
except ValueError:
self.logger.debug(
"Terminal already active; keeping working_dir=%s", current_dir
)
# Ensure core utilities exist before RepoEnv renders directory listings.
self.terminal.run(
"apt-get update -y && apt-get install -y tree", raises=True
)
self.terminal.run(
f"mkdir -p {shlex.quote(self._workspace_dir)}",
raises=True,
)

if self.path:
self.workspace.copy_content(self.path)

self.workspace.setup_file_filters()

def setup_terminal(self) -> None:
"""Apply FreeEnv tweaks and reuse RepoEnv git bootstrapping when enabled."""
self._apply_terminal_settings()

if self.terminal is not None:
self.terminal.run("touch .debugignore .debugreadonly")

if not self.init_git:
return
if not self._git_available():
self.logger.debug(
"Git is not available in the container; skipping repository setup.",
)
return
super().setup_terminal()

def _git_available(self) -> bool:
"""Check for git presence before attempting repository initialization."""
if self.terminal is None:
return False
success, _ = self.terminal.run("command -v git")
return success

@property
def instructions(self) -> str:
"""Provide user-facing guidance, falling back to a generic sandbox blurb."""
return (
self._custom_instructions
or "You are placed in an isolated Linux environment, use the available tools to interact with the environment effectively."
)

def reset(self, *, options: dict | None = None):
"""Allow callers to mutate container settings before delegating to RepoEnv."""
options = options or {}

image = options.get("image")
workspace_dir = options.get("workspace_dir")
setup_commands = options.get("setup_commands")
instructions = options.get("instructions")
init_git = options.get("init_git")

restart_terminal = False

if image and image != self.container_image:
self.container_image = image
restart_terminal = True

if workspace_dir and str(workspace_dir) != self._workspace_dir:
self._workspace_dir = str(workspace_dir)
restart_terminal = True

if setup_commands is not None:
new_commands = list(setup_commands)
if new_commands != self._setup_commands:
self._setup_commands = new_commands
restart_terminal = True

if instructions is not None:
self._custom_instructions = instructions

if init_git is not None:
self.init_git = bool(init_git)

if restart_terminal and self.terminal is not None:
try:
self.terminal.close()
except Exception as exc: # noqa: BLE001 - diagnostics only
self.logger.debug("Failed to close terminal cleanly: %s", exc)

self._apply_terminal_settings()

return super().reset(options=options)
27 changes: 24 additions & 3 deletions debug_gym/gym/terminals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,20 @@ def select_terminal(
if terminal_config is None:
return None

if isinstance(terminal_config, Terminal):
return terminal_config

if not isinstance(terminal_config, dict):
raise TypeError(
"terminal configuration must be a dict, Terminal instance, or None",
)

config = dict(terminal_config)
terminal_type = str(config.pop("type", "")).lower()
if not terminal_type:
raise ValueError("Terminal configuration must include a 'type' key")

logger = logger or DebugGymLogger("debug-gym")
terminal_type = terminal_config["type"]
match terminal_type:
case "docker":
terminal_class = DockerTerminal
Expand All @@ -25,8 +37,17 @@ def select_terminal(
case _:
raise ValueError(f"Unknown terminal {terminal_type}")

extra_labels = config.pop("extra_labels", {}) or {}
if uuid is not None:
extra_labels = {**extra_labels, "uuid": uuid}

if terminal_class is KubernetesTerminal and extra_labels:
config["extra_labels"] = extra_labels

if terminal_class is not KubernetesTerminal:
config.pop("extra_labels", None)

return terminal_class(
**terminal_config,
logger=logger,
extra_labels={"uuid": uuid},
**config,
)
Loading
Loading