Skip to content

Commit 6c5f151

Browse files
committed
Fixes bug in sandbox_client
1 parent a9d9555 commit 6c5f151

File tree

2 files changed

+83
-98
lines changed

2 files changed

+83
-98
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,6 @@ bin/
99
dist/
1010
release_assets/
1111

12-
# Temporary folders
12+
# Ignore temporary folders
1313
tmp/
1414
temp/

clients/python/agentic-sandbox-client/agentic_sandbox/sandbox_client.py

Lines changed: 82 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,7 @@
3939
SANDBOX_PLURAL_NAME = "sandboxes"
4040

4141
logging.basicConfig(level=logging.INFO,
42-
format='%(asctime)s - %(levelname)s - %(message)s',
43-
stream=sys.stdout)
42+
format='%(asctime)s - %(levelname)s - %(message)s', stream=sys.stdout)
4443

4544

4645
@dataclass
@@ -59,11 +58,8 @@ class SandboxClient:
5958
def __init__(
6059
self,
6160
template_name: str,
62-
namespace: str = "default", # Where Sandbox lives
63-
gateway_name: str | None = None, # Name of the Gateway
64-
gateway_namespace: str = "default", # Where Gateway lives
65-
api_url: str | None = None, # Allow custom URL (DNS or Localhost)
66-
server_port: int = 8888, # The port the runtime inside the sandbox listens on
61+
namespace: str = "default",
62+
server_port: int = 8888,
6763
sandbox_ready_timeout: int = 180,
6864
gateway_ready_timeout: int = 180,
6965
port_forward_ready_timeout: int = 30,
@@ -82,6 +78,9 @@ def __init__(
8278

8379
self.claim_name: str | None = None
8480
self.sandbox_name: str | None = None
81+
self.pod_name: str | None = None
82+
self.base_url = f"http://127.0.0.1:{self.server_port}"
83+
self.port_forward_process: subprocess.Popen | None = None
8584

8685
try:
8786
config.load_incluster_config()
@@ -129,11 +128,40 @@ def _create_claim(self):
129128
)
130129

131130
def _wait_for_sandbox_ready(self):
132-
"""Waits for the Sandbox custom resource to have a 'Ready' status."""
131+
"""
132+
Waits for the SandboxClaim to be populated with the Sandbox name, and then
133+
waits for the Sandbox custom resource to have a 'Ready' status condition.
134+
This indicates that the underlying pod is running and has passed its checks.
135+
"""
133136
if not self.claim_name:
134137
raise RuntimeError(
135-
"Cannot wait for sandbox; a sandboxclaim has not been created.")
138+
"Cannot wait for sandbox, claim has not been created.")
139+
140+
# Watch for the SandboxClaim to be updated with the sandbox name
141+
w_claim = watch.Watch()
142+
for event in w_claim.stream(
143+
self.custom_objects_api.list_namespaced_custom_object,
144+
namespace=self.namespace,
145+
group=CLAIM_API_GROUP,
146+
version=CLAIM_API_VERSION,
147+
plural=CLAIM_PLURAL_NAME,
148+
field_selector=f"metadata.name={self.claim_name}",
149+
timeout_seconds=self.sandbox_ready_timeout
150+
):
151+
if event["type"] in ["ADDED", "MODIFIED"]:
152+
claim_obj = event["object"]
153+
status = claim_obj.get("status", {})
154+
sandbox_status = status.get("sandbox", {})
155+
if sandbox_status and sandbox_status.get("Name"):
156+
self.sandbox_name = sandbox_status.get("Name")
157+
w_claim.stop()
158+
break
159+
else:
160+
self.__exit__(None, None, None) # Attempt cleanup
161+
raise TimeoutError(
162+
f"SandboxClaim did not become ready within {self.sandbox_ready_timeout} seconds.")
136163

164+
# Watch for the Sandbox to become ready
137165
w = watch.Watch()
138166
logging.info("Watching for Sandbox to become ready...")
139167
for event in w.stream(
@@ -142,56 +170,54 @@ def _wait_for_sandbox_ready(self):
142170
group=SANDBOX_API_GROUP,
143171
version=SANDBOX_API_VERSION,
144172
plural=SANDBOX_PLURAL_NAME,
145-
field_selector=f"metadata.name={self.claim_name}",
173+
field_selector=f"metadata.name={self.sandbox_name}",
146174
timeout_seconds=self.sandbox_ready_timeout
147175
):
148-
sandbox_object = event['object']
149-
status = sandbox_object.get('status', {})
150-
conditions = status.get('conditions', [])
151-
is_ready = False
152-
for cond in conditions:
153-
if cond.get('type') == 'Ready' and cond.get('status') == 'True':
154-
is_ready = True
176+
if event["type"] in ["ADDED", "MODIFIED"]:
177+
sandbox_object = event['object']
178+
status = sandbox_object.get('status', {})
179+
conditions = status.get('conditions', [])
180+
is_ready = False
181+
for cond in conditions:
182+
if cond.get('type') == 'Ready' and cond.get('status') == 'True':
183+
is_ready = True
184+
break
185+
186+
if is_ready:
187+
annotations = sandbox_object.get(
188+
'metadata', {}).get('annotations', {})
189+
pod_name_annotation = "agents.x-k8s.io/pod-name"
190+
if pod_name_annotation in annotations:
191+
self.pod_name = annotations[pod_name_annotation]
192+
logging.info(
193+
f"Found pod name from annotation: {self.pod_name}")
194+
else:
195+
self.pod_name = self.sandbox_name
196+
w.stop()
197+
logging.info(f"Sandbox {self.sandbox_name} is ready.")
155198
break
156199

157-
if is_ready:
158-
self.sandbox_name = sandbox_object['metadata']['name']
159-
w.stop()
160-
logging.info(f"Sandbox {self.sandbox_name} is ready.")
161-
break
162-
163-
if not self.sandbox_name:
200+
if not self.pod_name:
164201
self.__exit__(None, None, None)
165202
raise TimeoutError(
166-
f"Sandbox did not become ready within {self.sandbox_ready_timeout} seconds.")
167-
168-
def _get_free_port(self):
169-
"""Finds a free port on localhost."""
170-
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
171-
s.bind(('', 0))
172-
return s.getsockname()[1]
203+
f"Sandbox did not become ready or pod name could not be determined within {self.sandbox_ready_timeout} seconds.")
173204

174205
def _start_and_wait_for_port_forward(self):
175206
"""
176207
Starts 'kubectl port-forward' to the Router Service.
177208
This allows 'Dev Mode' without needing a public Gateway IP.
178209
"""
179-
local_port = self._get_free_port()
180-
181-
# Assumes the router service name from sandbox_router.yaml
182-
router_svc = "svc/sandbox-router-svc"
183-
210+
if not self.pod_name:
211+
raise RuntimeError(
212+
"Cannot start port-forwarding, sandbox pod name is not known.")
184213
logging.info(
185-
f"Starting Dev Mode tunnel: localhost:{local_port} -> {router_svc}:8080...")
186-
214+
f"Starting port-forwarding for sandbox {self.sandbox_name} with sandbox pod {self.pod_name}...")
187215
self.port_forward_process = subprocess.Popen(
188216
[
189217
"kubectl", "port-forward",
190-
router_svc,
191-
# Tunnel to Router (8080), not Sandbox (8888)
192-
f"{local_port}:8080",
193-
# The router lives in the Gateway/Default NS
194-
"-n", self.gateway_namespace
218+
f"pod/{self.pod_name}",
219+
f"{self.server_port}:{self.server_port}",
220+
"-n", self.namespace
195221
],
196222
stdout=subprocess.PIPE,
197223
stderr=subprocess.PIPE
@@ -206,54 +232,16 @@ def _start_and_wait_for_port_forward(self):
206232
f"Tunnel crashed: {stderr.decode(errors='ignore')}")
207233

208234
try:
209-
# Connect to localhost
210-
with socket.create_connection(("127.0.0.1", local_port), timeout=0.1):
211-
self.base_url = f"http://127.0.0.1:{local_port}"
235+
with socket.create_connection(("127.0.0.1", self.server_port), timeout=0.1):
212236
logging.info(
213-
f"Dev Mode ready. Tunneled to Router at {self.base_url}")
214-
# No need for huge sleeps; the Router service is stable.
215-
time.sleep(0.5)
237+
f"Port-forwarding is ready on port {self.server_port}.")
216238
return
217239
except (socket.timeout, ConnectionRefusedError):
218240
time.sleep(0.5)
219241

220242
self.__exit__(None, None, None)
221-
raise TimeoutError("Failed to establish tunnel to Router Service.")
222-
223-
def _wait_for_gateway_ip(self):
224-
"""Waits for the Gateway to be assigned an external IP."""
225-
# Check if we already have a manually provided URL
226-
if self.base_url:
227-
logging.info(f"Using configured API URL: {self.base_url}")
228-
return
229-
230-
logging.info(
231-
f"Waiting for Gateway '{self.gateway_name}' in namespace '{self.gateway_namespace}'...")
232-
233-
w = watch.Watch()
234-
for event in w.stream(
235-
func=self.custom_objects_api.list_namespaced_custom_object,
236-
namespace=self.gateway_namespace, group=GATEWAY_API_GROUP,
237-
version=GATEWAY_API_VERSION, plural=GATEWAY_PLURAL,
238-
field_selector=f"metadata.name={self.gateway_name}",
239-
timeout_seconds=self.gateway_ready_timeout,
240-
):
241-
if event["type"] in ["ADDED", "MODIFIED"]:
242-
gateway_object = event['object']
243-
status = gateway_object.get('status', {})
244-
addresses = status.get('addresses', [])
245-
if addresses:
246-
ip_address = addresses[0].get('value')
247-
if ip_address:
248-
self.base_url = f"http://{ip_address}"
249-
logging.info(
250-
f"Gateway is ready. Base URL set to: {self.base_url}")
251-
w.stop()
252-
return
253-
254-
if not self.base_url:
255-
raise TimeoutError(
256-
f"Gateway '{self.gateway_name}' in namespace '{self.gateway_namespace}' did not get an IP within {self.gateway_ready_timeout} seconds.")
243+
raise TimeoutError(
244+
f"Port-forwarding did not become ready within {self.port_forward_ready_timeout} seconds.")
257245

258246
def __enter__(self) -> 'SandboxClient':
259247
self._create_claim()
@@ -320,22 +308,15 @@ def _request(self, method: str, endpoint: str, **kwargs) -> requests.Response:
320308
headers["X-Sandbox-Port"] = str(self.server_port)
321309
kwargs["headers"] = headers
322310

311+
url = f"{self.base_url}/{endpoint}"
323312
try:
324313
response = self.session.request(method, url, **kwargs)
325314
response.raise_for_status()
326315
return response
327316
except requests.exceptions.RequestException as e:
328-
# Check if port-forward died DURING request
329-
if self.port_forward_process and self.port_forward_process.poll() is not None:
330-
stdout, stderr = self.port_forward_process.communicate()
331-
raise RuntimeError(
332-
f"Kubectl Port-Forward crashed DURING request!\n"
333-
f"Stderr: {stderr.decode(errors='ignore')}"
334-
) from e
335-
336-
logging.error(f"Request to gateway router failed: {e}")
317+
logging.error(f"Request to sandbox failed: {e}")
337318
raise RuntimeError(
338-
f"Failed to communicate with the sandbox via the gateway at {url}.") from e
319+
f"Failed to communicate with the sandbox at {url}.") from e
339320

340321
def run(self, command: str, timeout: int = 60) -> ExecutionResult:
341322
payload = {"command": command}
@@ -359,6 +340,10 @@ def write(self, path: str, content: bytes | str, timeout: int = 60):
359340
self._request("POST", "upload", files=files_payload, timeout=timeout)
360341
logging.info(f"File '{filename}' uploaded successfully.")
361342

362-
def read(self, path: str, timeout: int = 60) -> bytes:
363-
response = self._request("GET", f"download/{path}", timeout=timeout)
343+
def read(self, path: str) -> bytes:
344+
"""
345+
Downloads a file from the sandbox.
346+
The base path for the download is the root of the sandbox's filesystem.
347+
"""
348+
response = self._request("GET", f"download/{path}")
364349
return response.content

0 commit comments

Comments
 (0)