Skip to content

Commit 8f0eeb9

Browse files
committed
Rebase to main
1 parent 55977f7 commit 8f0eeb9

File tree

1 file changed

+93
-70
lines changed

1 file changed

+93
-70
lines changed

clients/python/agentic-sandbox-client/agentic_sandbox/sandbox_client.py

Lines changed: 93 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
SANDBOX_API_GROUP = "agents.x-k8s.io"
3838
SANDBOX_API_VERSION = "v1alpha1"
3939
SANDBOX_PLURAL_NAME = "sandboxes"
40+
4041
POD_NAME_ANNOTATION = "agents.x-k8s.io/pod-name"
4142

4243
logging.basicConfig(level=logging.INFO,
@@ -60,11 +61,14 @@ class SandboxClient:
6061
def __init__(
6162
self,
6263
template_name: str,
63-
namespace: str = "default",
64-
server_port: int = 8888,
64+
namespace: str = "default", # Where Sandbox lives
65+
gateway_name: str | None = None, # Name of the Gateway
66+
gateway_namespace: str = "default", # Where Gateway lives
67+
api_url: str | None = None, # Allow custom URL (DNS or Localhost)
68+
server_port: int = 8888, # The port the runtime inside the sandbox listens on
6569
sandbox_ready_timeout: int = 180,
70+
gateway_ready_timeout: int = 180,
6671
port_forward_ready_timeout: int = 30,
67-
pod_name_ready_timeout: int = 1
6872
):
6973
self.template_name = template_name
7074
self.namespace = namespace
@@ -75,12 +79,12 @@ def __init__(
7579
self.sandbox_ready_timeout = sandbox_ready_timeout
7680
self.gateway_ready_timeout = gateway_ready_timeout
7781
self.port_forward_ready_timeout = port_forward_ready_timeout
78-
self.pod_name_ready_timeout = pod_name_ready_timeout
82+
83+
self.port_forward_process: subprocess.Popen | None = None
84+
7985
self.claim_name: str | None = None
8086
self.sandbox_name: str | None = None
8187
self.pod_name: str | None = None
82-
self.base_url = f"http://127.0.0.1:{self.server_port}"
83-
self.port_forward_process: subprocess.Popen | None = None
8488

8589
try:
8690
config.load_incluster_config()
@@ -128,13 +132,10 @@ def _create_claim(self):
128132
)
129133

130134
def _wait_for_sandbox_ready(self):
131-
"""
132-
Waits for the Sandbox custom resource to have a 'Ready' status condition.
133-
This indicates that the underlying pod is running and has passed its checks.
134-
"""
135+
"""Waits for the Sandbox custom resource to have a 'Ready' status."""
135136
if not self.claim_name:
136137
raise RuntimeError(
137-
"Cannot wait for sandbox, claim has not been created.")
138+
"Cannot wait for sandbox; a sandboxclaim has not been created.")
138139

139140
w = watch.Watch()
140141
logging.info("Watching for Sandbox to become ready...")
@@ -165,70 +166,51 @@ def _wait_for_sandbox_ready(self):
165166
if not self.sandbox_name:
166167
raise RuntimeError(
167168
"Could not determine sandbox name from sandbox object.")
168-
169169
logging.info(f"Sandbox {self.sandbox_name} is ready.")
170-
self._wait_for_pod_name()
170+
171+
annotations = sandbox_object.get(
172+
'metadata', {}).get('annotations', {})
173+
pod_name = annotations.get(POD_NAME_ANNOTATION)
174+
if pod_name:
175+
self.pod_name = pod_name
176+
logging.info(
177+
f"Found pod name from annotation: {self.pod_name}")
178+
else:
179+
self.pod_name = self.sandbox_name
171180
w.stop()
172181
return
173182

174183
self.__exit__(None, None, None)
175184
raise TimeoutError(
176185
f"Sandbox did not become ready within {self.sandbox_ready_timeout} seconds.")
177186

178-
def _wait_for_pod_name(self, timeout: int = 30):
179-
"""
180-
Waits for the pod-name annotation to be present on the sandbox object.
181-
This wait is only necessary when using SandboxWarmPool.
182-
"""
183-
if self.pod_name_ready_timeout <= 0:
184-
logging.info(
185-
f"pod_name_ready_timeout {self.pod_name_ready_timeout} is <= 0. Defaulting pod to sandbox name {self.sandbox_name}.")
186-
self.pod_name = self.sandbox_name
187-
return
188-
w = watch.Watch()
189-
logging.info(
190-
f"Waiting for pod name annotation on sandbox {self.sandbox_name}...")
191-
for event in w.stream(
192-
func=self.custom_objects_api.list_namespaced_custom_object,
193-
namespace=self.namespace,
194-
group=SANDBOX_API_GROUP,
195-
version=SANDBOX_API_VERSION,
196-
plural=SANDBOX_PLURAL_NAME,
197-
field_selector=f"metadata.name={self.sandbox_name}",
198-
timeout_seconds=self.pod_name_ready_timeout
199-
):
200-
if event["type"] in ["ADDED", "MODIFIED"]:
201-
sandbox_object = event['object']
202-
annotations = sandbox_object.get(
203-
'metadata', {}).get('annotations', {})
204-
pod_name = annotations.get(POD_NAME_ANNOTATION)
205-
if pod_name:
206-
self.pod_name = pod_name
207-
logging.info(
208-
f"Found pod name from annotation: {self.pod_name}")
209-
w.stop()
210-
return
211-
212-
logging.warning(
213-
f"Pod name annotation not found after {self.pod_name_ready_timeout} seconds. Defaulting to sandbox name {self.sandbox_name}.")
214-
self.pod_name = self.sandbox_name
187+
def _get_free_port(self):
188+
"""Finds a free port on localhost."""
189+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
190+
s.bind(('', 0))
191+
return s.getsockname()[1]
215192

216193
def _start_and_wait_for_port_forward(self):
217194
"""
218195
Starts 'kubectl port-forward' to the Router Service.
219196
This allows 'Dev Mode' without needing a public Gateway IP.
220197
"""
221-
if not self.pod_name:
222-
raise RuntimeError(
223-
"Cannot start port-forwarding, sandbox pod name is not known.")
198+
local_port = self._get_free_port()
199+
200+
# Assumes the router service name from sandbox_router.yaml
201+
router_svc = "svc/sandbox-router-svc"
202+
224203
logging.info(
225-
f"Starting port-forwarding for sandbox {self.sandbox_name} in namespace {self.namespace} with sandbox pod {self.pod_name}...")
204+
f"Starting Dev Mode tunnel: localhost:{local_port} -> {router_svc}:8080...")
205+
226206
self.port_forward_process = subprocess.Popen(
227207
[
228208
"kubectl", "port-forward",
229-
f"pod/{self.pod_name}",
230-
f"{self.server_port}:{self.server_port}",
231-
"-n", self.namespace
209+
router_svc,
210+
# Tunnel to Router (8080), not Sandbox (8888)
211+
f"{local_port}:8080",
212+
# The router lives in the Gateway/Default NS
213+
"-n", self.gateway_namespace
232214
],
233215
stdout=subprocess.PIPE,
234216
stderr=subprocess.PIPE
@@ -243,16 +225,54 @@ def _start_and_wait_for_port_forward(self):
243225
f"Tunnel crashed: {stderr.decode(errors='ignore')}")
244226

245227
try:
246-
with socket.create_connection(("127.0.0.1", self.server_port), timeout=0.1):
228+
# Connect to localhost
229+
with socket.create_connection(("127.0.0.1", local_port), timeout=0.1):
230+
self.base_url = f"http://127.0.0.1:{local_port}"
247231
logging.info(
248-
f"Port-forwarding is ready on port {self.server_port}.")
232+
f"Dev Mode ready. Tunneled to Router at {self.base_url}")
233+
# No need for huge sleeps; the Router service is stable.
234+
time.sleep(0.5)
249235
return
250236
except (socket.timeout, ConnectionRefusedError):
251237
time.sleep(0.5)
252238

253239
self.__exit__(None, None, None)
254-
raise TimeoutError(
255-
f"Port-forwarding did not become ready within {self.port_forward_ready_timeout} seconds.")
240+
raise TimeoutError("Failed to establish tunnel to Router Service.")
241+
242+
def _wait_for_gateway_ip(self):
243+
"""Waits for the Gateway to be assigned an external IP."""
244+
# Check if we already have a manually provided URL
245+
if self.base_url:
246+
logging.info(f"Using configured API URL: {self.base_url}")
247+
return
248+
249+
logging.info(
250+
f"Waiting for Gateway '{self.gateway_name}' in namespace '{self.gateway_namespace}'...")
251+
252+
w = watch.Watch()
253+
for event in w.stream(
254+
func=self.custom_objects_api.list_namespaced_custom_object,
255+
namespace=self.gateway_namespace, group=GATEWAY_API_GROUP,
256+
version=GATEWAY_API_VERSION, plural=GATEWAY_PLURAL,
257+
field_selector=f"metadata.name={self.gateway_name}",
258+
timeout_seconds=self.gateway_ready_timeout,
259+
):
260+
if event["type"] in ["ADDED", "MODIFIED"]:
261+
gateway_object = event['object']
262+
status = gateway_object.get('status', {})
263+
addresses = status.get('addresses', [])
264+
if addresses:
265+
ip_address = addresses[0].get('value')
266+
if ip_address:
267+
self.base_url = f"http://{ip_address}"
268+
logging.info(
269+
f"Gateway is ready. Base URL set to: {self.base_url}")
270+
w.stop()
271+
return
272+
273+
if not self.base_url:
274+
raise TimeoutError(
275+
f"Gateway '{self.gateway_name}' in namespace '{self.gateway_namespace}' did not get an IP within {self.gateway_ready_timeout} seconds.")
256276

257277
def __enter__(self) -> 'SandboxClient':
258278
self._create_claim()
@@ -319,15 +339,22 @@ def _request(self, method: str, endpoint: str, **kwargs) -> requests.Response:
319339
headers["X-Sandbox-Port"] = str(self.server_port)
320340
kwargs["headers"] = headers
321341

322-
url = f"{self.base_url}/{endpoint}"
323342
try:
324343
response = self.session.request(method, url, **kwargs)
325344
response.raise_for_status()
326345
return response
327346
except requests.exceptions.RequestException as e:
328-
logging.error(f"Request to sandbox failed: {e}")
347+
# Check if port-forward died DURING request
348+
if self.port_forward_process and self.port_forward_process.poll() is not None:
349+
stdout, stderr = self.port_forward_process.communicate()
350+
raise RuntimeError(
351+
f"Kubectl Port-Forward crashed DURING request!\n"
352+
f"Stderr: {stderr.decode(errors='ignore')}"
353+
) from e
354+
355+
logging.error(f"Request to gateway router failed: {e}")
329356
raise RuntimeError(
330-
f"Failed to communicate with the sandbox at {url}.") from e
357+
f"Failed to communicate with the sandbox via the gateway at {url}.") from e
331358

332359
def run(self, command: str, timeout: int = 60) -> ExecutionResult:
333360
payload = {"command": command}
@@ -351,10 +378,6 @@ def write(self, path: str, content: bytes | str, timeout: int = 60):
351378
self._request("POST", "upload", files=files_payload, timeout=timeout)
352379
logging.info(f"File '{filename}' uploaded successfully.")
353380

354-
def read(self, path: str) -> bytes:
355-
"""
356-
Downloads a file from the sandbox.
357-
The base path for the download is the root of the sandbox's filesystem.
358-
"""
359-
response = self._request("GET", f"download/{path}")
381+
def read(self, path: str, timeout: int = 60) -> bytes:
382+
response = self._request("GET", f"download/{path}", timeout=timeout)
360383
return response.content

0 commit comments

Comments
 (0)