3737SANDBOX_API_GROUP = "agents.x-k8s.io"
3838SANDBOX_API_VERSION = "v1alpha1"
3939SANDBOX_PLURAL_NAME = "sandboxes"
40+
4041POD_NAME_ANNOTATION = "agents.x-k8s.io/pod-name"
4142
4243logging .basicConfig (level = logging .INFO ,
@@ -60,11 +61,14 @@ class SandboxClient:
6061 def __init__ (
6162 self ,
6263 template_name : str ,
63- namespace : str = "default" ,
64- server_port : int = 8888 ,
64+ namespace : str = "default" , # Where Sandbox lives
65+ gateway_name : str | None = None , # Name of the Gateway
66+ gateway_namespace : str = "default" , # Where Gateway lives
67+ api_url : str | None = None , # Allow custom URL (DNS or Localhost)
68+ server_port : int = 8888 , # The port the runtime inside the sandbox listens on
6569 sandbox_ready_timeout : int = 180 ,
70+ gateway_ready_timeout : int = 180 ,
6671 port_forward_ready_timeout : int = 30 ,
67- pod_name_ready_timeout : int = 1
6872 ):
6973 self .template_name = template_name
7074 self .namespace = namespace
@@ -75,12 +79,12 @@ def __init__(
7579 self .sandbox_ready_timeout = sandbox_ready_timeout
7680 self .gateway_ready_timeout = gateway_ready_timeout
7781 self .port_forward_ready_timeout = port_forward_ready_timeout
78- self .pod_name_ready_timeout = pod_name_ready_timeout
82+
83+ self .port_forward_process : subprocess .Popen | None = None
84+
7985 self .claim_name : str | None = None
8086 self .sandbox_name : str | None = None
8187 self .pod_name : str | None = None
82- self .base_url = f"http://127.0.0.1:{ self .server_port } "
83- self .port_forward_process : subprocess .Popen | None = None
8488
8589 try :
8690 config .load_incluster_config ()
@@ -128,13 +132,10 @@ def _create_claim(self):
128132 )
129133
130134 def _wait_for_sandbox_ready (self ):
131- """
132- Waits for the Sandbox custom resource to have a 'Ready' status condition.
133- This indicates that the underlying pod is running and has passed its checks.
134- """
135+ """Waits for the Sandbox custom resource to have a 'Ready' status."""
135136 if not self .claim_name :
136137 raise RuntimeError (
137- "Cannot wait for sandbox, claim has not been created." )
138+ "Cannot wait for sandbox; a sandboxclaim has not been created." )
138139
139140 w = watch .Watch ()
140141 logging .info ("Watching for Sandbox to become ready..." )
@@ -165,70 +166,51 @@ def _wait_for_sandbox_ready(self):
165166 if not self .sandbox_name :
166167 raise RuntimeError (
167168 "Could not determine sandbox name from sandbox object." )
168-
169169 logging .info (f"Sandbox { self .sandbox_name } is ready." )
170- self ._wait_for_pod_name ()
170+
171+ annotations = sandbox_object .get (
172+ 'metadata' , {}).get ('annotations' , {})
173+ pod_name = annotations .get (POD_NAME_ANNOTATION )
174+ if pod_name :
175+ self .pod_name = pod_name
176+ logging .info (
177+ f"Found pod name from annotation: { self .pod_name } " )
178+ else :
179+ self .pod_name = self .sandbox_name
171180 w .stop ()
172181 return
173182
174183 self .__exit__ (None , None , None )
175184 raise TimeoutError (
176185 f"Sandbox did not become ready within { self .sandbox_ready_timeout } seconds." )
177186
178- def _wait_for_pod_name (self , timeout : int = 30 ):
179- """
180- Waits for the pod-name annotation to be present on the sandbox object.
181- This wait is only necessary when using SandboxWarmPool.
182- """
183- if self .pod_name_ready_timeout <= 0 :
184- logging .info (
185- f"pod_name_ready_timeout { self .pod_name_ready_timeout } is <= 0. Defaulting pod to sandbox name { self .sandbox_name } ." )
186- self .pod_name = self .sandbox_name
187- return
188- w = watch .Watch ()
189- logging .info (
190- f"Waiting for pod name annotation on sandbox { self .sandbox_name } ..." )
191- for event in w .stream (
192- func = self .custom_objects_api .list_namespaced_custom_object ,
193- namespace = self .namespace ,
194- group = SANDBOX_API_GROUP ,
195- version = SANDBOX_API_VERSION ,
196- plural = SANDBOX_PLURAL_NAME ,
197- field_selector = f"metadata.name={ self .sandbox_name } " ,
198- timeout_seconds = self .pod_name_ready_timeout
199- ):
200- if event ["type" ] in ["ADDED" , "MODIFIED" ]:
201- sandbox_object = event ['object' ]
202- annotations = sandbox_object .get (
203- 'metadata' , {}).get ('annotations' , {})
204- pod_name = annotations .get (POD_NAME_ANNOTATION )
205- if pod_name :
206- self .pod_name = pod_name
207- logging .info (
208- f"Found pod name from annotation: { self .pod_name } " )
209- w .stop ()
210- return
211-
212- logging .warning (
213- f"Pod name annotation not found after { self .pod_name_ready_timeout } seconds. Defaulting to sandbox name { self .sandbox_name } ." )
214- self .pod_name = self .sandbox_name
187+ def _get_free_port (self ):
188+ """Finds a free port on localhost."""
189+ with socket .socket (socket .AF_INET , socket .SOCK_STREAM ) as s :
190+ s .bind (('' , 0 ))
191+ return s .getsockname ()[1 ]
215192
216193 def _start_and_wait_for_port_forward (self ):
217194 """
218195 Starts 'kubectl port-forward' to the Router Service.
219196 This allows 'Dev Mode' without needing a public Gateway IP.
220197 """
221- if not self .pod_name :
222- raise RuntimeError (
223- "Cannot start port-forwarding, sandbox pod name is not known." )
198+ local_port = self ._get_free_port ()
199+
200+ # Assumes the router service name from sandbox_router.yaml
201+ router_svc = "svc/sandbox-router-svc"
202+
224203 logging .info (
225- f"Starting port-forwarding for sandbox { self .sandbox_name } in namespace { self .namespace } with sandbox pod { self .pod_name } ..." )
204+ f"Starting Dev Mode tunnel: localhost:{ local_port } -> { router_svc } :8080..." )
205+
226206 self .port_forward_process = subprocess .Popen (
227207 [
228208 "kubectl" , "port-forward" ,
229- f"pod/{ self .pod_name } " ,
230- f"{ self .server_port } :{ self .server_port } " ,
231- "-n" , self .namespace
209+ router_svc ,
210+ # Tunnel to Router (8080), not Sandbox (8888)
211+ f"{ local_port } :8080" ,
212+ # The router lives in the Gateway/Default NS
213+ "-n" , self .gateway_namespace
232214 ],
233215 stdout = subprocess .PIPE ,
234216 stderr = subprocess .PIPE
@@ -243,16 +225,54 @@ def _start_and_wait_for_port_forward(self):
243225 f"Tunnel crashed: { stderr .decode (errors = 'ignore' )} " )
244226
245227 try :
246- with socket .create_connection (("127.0.0.1" , self .server_port ), timeout = 0.1 ):
228+ # Connect to localhost
229+ with socket .create_connection (("127.0.0.1" , local_port ), timeout = 0.1 ):
230+ self .base_url = f"http://127.0.0.1:{ local_port } "
247231 logging .info (
248- f"Port-forwarding is ready on port { self .server_port } ." )
232+ f"Dev Mode ready. Tunneled to Router at { self .base_url } " )
233+ # No need for huge sleeps; the Router service is stable.
234+ time .sleep (0.5 )
249235 return
250236 except (socket .timeout , ConnectionRefusedError ):
251237 time .sleep (0.5 )
252238
253239 self .__exit__ (None , None , None )
254- raise TimeoutError (
255- f"Port-forwarding did not become ready within { self .port_forward_ready_timeout } seconds." )
240+ raise TimeoutError ("Failed to establish tunnel to Router Service." )
241+
242+ def _wait_for_gateway_ip (self ):
243+ """Waits for the Gateway to be assigned an external IP."""
244+ # Check if we already have a manually provided URL
245+ if self .base_url :
246+ logging .info (f"Using configured API URL: { self .base_url } " )
247+ return
248+
249+ logging .info (
250+ f"Waiting for Gateway '{ self .gateway_name } ' in namespace '{ self .gateway_namespace } '..." )
251+
252+ w = watch .Watch ()
253+ for event in w .stream (
254+ func = self .custom_objects_api .list_namespaced_custom_object ,
255+ namespace = self .gateway_namespace , group = GATEWAY_API_GROUP ,
256+ version = GATEWAY_API_VERSION , plural = GATEWAY_PLURAL ,
257+ field_selector = f"metadata.name={ self .gateway_name } " ,
258+ timeout_seconds = self .gateway_ready_timeout ,
259+ ):
260+ if event ["type" ] in ["ADDED" , "MODIFIED" ]:
261+ gateway_object = event ['object' ]
262+ status = gateway_object .get ('status' , {})
263+ addresses = status .get ('addresses' , [])
264+ if addresses :
265+ ip_address = addresses [0 ].get ('value' )
266+ if ip_address :
267+ self .base_url = f"http://{ ip_address } "
268+ logging .info (
269+ f"Gateway is ready. Base URL set to: { self .base_url } " )
270+ w .stop ()
271+ return
272+
273+ if not self .base_url :
274+ raise TimeoutError (
275+ f"Gateway '{ self .gateway_name } ' in namespace '{ self .gateway_namespace } ' did not get an IP within { self .gateway_ready_timeout } seconds." )
256276
257277 def __enter__ (self ) -> 'SandboxClient' :
258278 self ._create_claim ()
@@ -319,15 +339,22 @@ def _request(self, method: str, endpoint: str, **kwargs) -> requests.Response:
319339 headers ["X-Sandbox-Port" ] = str (self .server_port )
320340 kwargs ["headers" ] = headers
321341
322- url = f"{ self .base_url } /{ endpoint } "
323342 try :
324343 response = self .session .request (method , url , ** kwargs )
325344 response .raise_for_status ()
326345 return response
327346 except requests .exceptions .RequestException as e :
328- logging .error (f"Request to sandbox failed: { e } " )
347+ # Check if port-forward died DURING request
348+ if self .port_forward_process and self .port_forward_process .poll () is not None :
349+ stdout , stderr = self .port_forward_process .communicate ()
350+ raise RuntimeError (
351+ f"Kubectl Port-Forward crashed DURING request!\n "
352+ f"Stderr: { stderr .decode (errors = 'ignore' )} "
353+ ) from e
354+
355+ logging .error (f"Request to gateway router failed: { e } " )
329356 raise RuntimeError (
330- f"Failed to communicate with the sandbox at { url } ." ) from e
357+ f"Failed to communicate with the sandbox via the gateway at { url } ." ) from e
331358
332359 def run (self , command : str , timeout : int = 60 ) -> ExecutionResult :
333360 payload = {"command" : command }
@@ -351,10 +378,6 @@ def write(self, path: str, content: bytes | str, timeout: int = 60):
351378 self ._request ("POST" , "upload" , files = files_payload , timeout = timeout )
352379 logging .info (f"File '{ filename } ' uploaded successfully." )
353380
354- def read (self , path : str ) -> bytes :
355- """
356- Downloads a file from the sandbox.
357- The base path for the download is the root of the sandbox's filesystem.
358- """
359- response = self ._request ("GET" , f"download/{ path } " )
381+ def read (self , path : str , timeout : int = 60 ) -> bytes :
382+ response = self ._request ("GET" , f"download/{ path } " , timeout = timeout )
360383 return response .content
0 commit comments