3939SANDBOX_PLURAL_NAME = "sandboxes"
4040
4141logging .basicConfig (level = logging .INFO ,
42- format = '%(asctime)s - %(levelname)s - %(message)s' ,
43- stream = sys .stdout )
42+ format = '%(asctime)s - %(levelname)s - %(message)s' , stream = sys .stdout )
4443
4544
4645@dataclass
@@ -59,11 +58,8 @@ class SandboxClient:
5958 def __init__ (
6059 self ,
6160 template_name : str ,
62- namespace : str = "default" , # Where Sandbox lives
63- gateway_name : str | None = None , # Name of the Gateway
64- gateway_namespace : str = "default" , # Where Gateway lives
65- api_url : str | None = None , # Allow custom URL (DNS or Localhost)
66- server_port : int = 8888 , # The port the runtime inside the sandbox listens on
61+ namespace : str = "default" ,
62+ server_port : int = 8888 ,
6763 sandbox_ready_timeout : int = 180 ,
6864 gateway_ready_timeout : int = 180 ,
6965 port_forward_ready_timeout : int = 30 ,
@@ -82,6 +78,9 @@ def __init__(
8278
8379 self .claim_name : str | None = None
8480 self .sandbox_name : str | None = None
81+ self .pod_name : str | None = None
82+ self .base_url = f"http://127.0.0.1:{ self .server_port } "
83+ self .port_forward_process : subprocess .Popen | None = None
8584
8685 try :
8786 config .load_incluster_config ()
@@ -129,11 +128,40 @@ def _create_claim(self):
129128 )
130129
131130 def _wait_for_sandbox_ready (self ):
132- """Waits for the Sandbox custom resource to have a 'Ready' status."""
131+ """
132+ Waits for the SandboxClaim to be populated with the Sandbox name, and then
133+ waits for the Sandbox custom resource to have a 'Ready' status condition.
134+ This indicates that the underlying pod is running and has passed its checks.
135+ """
133136 if not self .claim_name :
134137 raise RuntimeError (
135- "Cannot wait for sandbox; a sandboxclaim has not been created." )
138+ "Cannot wait for sandbox, claim has not been created." )
139+
140+ # Watch for the SandboxClaim to be updated with the sandbox name
141+ w_claim = watch .Watch ()
142+ for event in w_claim .stream (
143+ self .custom_objects_api .list_namespaced_custom_object ,
144+ namespace = self .namespace ,
145+ group = CLAIM_API_GROUP ,
146+ version = CLAIM_API_VERSION ,
147+ plural = CLAIM_PLURAL_NAME ,
148+ field_selector = f"metadata.name={ self .claim_name } " ,
149+ timeout_seconds = self .sandbox_ready_timeout
150+ ):
151+ if event ["type" ] in ["ADDED" , "MODIFIED" ]:
152+ claim_obj = event ["object" ]
153+ status = claim_obj .get ("status" , {})
154+ sandbox_status = status .get ("sandbox" , {})
155+ if sandbox_status and sandbox_status .get ("Name" ):
156+ self .sandbox_name = sandbox_status .get ("Name" )
157+ w_claim .stop ()
158+ break
159+ else :
160+ self .__exit__ (None , None , None ) # Attempt cleanup
161+ raise TimeoutError (
162+ f"SandboxClaim did not become ready within { self .sandbox_ready_timeout } seconds." )
136163
164+ # Watch for the Sandbox to become ready
137165 w = watch .Watch ()
138166 logging .info ("Watching for Sandbox to become ready..." )
139167 for event in w .stream (
@@ -142,56 +170,54 @@ def _wait_for_sandbox_ready(self):
142170 group = SANDBOX_API_GROUP ,
143171 version = SANDBOX_API_VERSION ,
144172 plural = SANDBOX_PLURAL_NAME ,
145- field_selector = f"metadata.name={ self .claim_name } " ,
173+ field_selector = f"metadata.name={ self .sandbox_name } " ,
146174 timeout_seconds = self .sandbox_ready_timeout
147175 ):
148- sandbox_object = event ['object' ]
149- status = sandbox_object .get ('status' , {})
150- conditions = status .get ('conditions' , [])
151- is_ready = False
152- for cond in conditions :
153- if cond .get ('type' ) == 'Ready' and cond .get ('status' ) == 'True' :
154- is_ready = True
176+ if event ["type" ] in ["ADDED" , "MODIFIED" ]:
177+ sandbox_object = event ['object' ]
178+ status = sandbox_object .get ('status' , {})
179+ conditions = status .get ('conditions' , [])
180+ is_ready = False
181+ for cond in conditions :
182+ if cond .get ('type' ) == 'Ready' and cond .get ('status' ) == 'True' :
183+ is_ready = True
184+ break
185+
186+ if is_ready :
187+ annotations = sandbox_object .get (
188+ 'metadata' , {}).get ('annotations' , {})
189+ pod_name_annotation = "agents.x-k8s.io/pod-name"
190+ if pod_name_annotation in annotations :
191+ self .pod_name = annotations [pod_name_annotation ]
192+ logging .info (
193+ f"Found pod name from annotation: { self .pod_name } " )
194+ else :
195+ self .pod_name = self .sandbox_name
196+ w .stop ()
197+ logging .info (f"Sandbox { self .sandbox_name } is ready." )
155198 break
156199
157- if is_ready :
158- self .sandbox_name = sandbox_object ['metadata' ]['name' ]
159- w .stop ()
160- logging .info (f"Sandbox { self .sandbox_name } is ready." )
161- break
162-
163- if not self .sandbox_name :
200+ if not self .pod_name :
164201 self .__exit__ (None , None , None )
165202 raise TimeoutError (
166- f"Sandbox did not become ready within { self .sandbox_ready_timeout } seconds." )
167-
168- def _get_free_port (self ):
169- """Finds a free port on localhost."""
170- with socket .socket (socket .AF_INET , socket .SOCK_STREAM ) as s :
171- s .bind (('' , 0 ))
172- return s .getsockname ()[1 ]
203+ f"Sandbox did not become ready or pod name could not be determined within { self .sandbox_ready_timeout } seconds." )
173204
174205 def _start_and_wait_for_port_forward (self ):
175206 """
176207 Starts 'kubectl port-forward' to the Router Service.
177208 This allows 'Dev Mode' without needing a public Gateway IP.
178209 """
179- local_port = self ._get_free_port ()
180-
181- # Assumes the router service name from sandbox_router.yaml
182- router_svc = "svc/sandbox-router-svc"
183-
210+ if not self .pod_name :
211+ raise RuntimeError (
212+ "Cannot start port-forwarding, sandbox pod name is not known." )
184213 logging .info (
185- f"Starting Dev Mode tunnel: localhost:{ local_port } -> { router_svc } :8080..." )
186-
214+ f"Starting port-forwarding for sandbox { self .sandbox_name } with sandbox pod { self .pod_name } ..." )
187215 self .port_forward_process = subprocess .Popen (
188216 [
189217 "kubectl" , "port-forward" ,
190- router_svc ,
191- # Tunnel to Router (8080), not Sandbox (8888)
192- f"{ local_port } :8080" ,
193- # The router lives in the Gateway/Default NS
194- "-n" , self .gateway_namespace
218+ f"pod/{ self .pod_name } " ,
219+ f"{ self .server_port } :{ self .server_port } " ,
220+ "-n" , self .namespace
195221 ],
196222 stdout = subprocess .PIPE ,
197223 stderr = subprocess .PIPE
@@ -206,54 +232,16 @@ def _start_and_wait_for_port_forward(self):
206232 f"Tunnel crashed: { stderr .decode (errors = 'ignore' )} " )
207233
208234 try :
209- # Connect to localhost
210- with socket .create_connection (("127.0.0.1" , local_port ), timeout = 0.1 ):
211- self .base_url = f"http://127.0.0.1:{ local_port } "
235+ with socket .create_connection (("127.0.0.1" , self .server_port ), timeout = 0.1 ):
212236 logging .info (
213- f"Dev Mode ready. Tunneled to Router at { self .base_url } " )
214- # No need for huge sleeps; the Router service is stable.
215- time .sleep (0.5 )
237+ f"Port-forwarding is ready on port { self .server_port } ." )
216238 return
217239 except (socket .timeout , ConnectionRefusedError ):
218240 time .sleep (0.5 )
219241
220242 self .__exit__ (None , None , None )
221- raise TimeoutError ("Failed to establish tunnel to Router Service." )
222-
223- def _wait_for_gateway_ip (self ):
224- """Waits for the Gateway to be assigned an external IP."""
225- # Check if we already have a manually provided URL
226- if self .base_url :
227- logging .info (f"Using configured API URL: { self .base_url } " )
228- return
229-
230- logging .info (
231- f"Waiting for Gateway '{ self .gateway_name } ' in namespace '{ self .gateway_namespace } '..." )
232-
233- w = watch .Watch ()
234- for event in w .stream (
235- func = self .custom_objects_api .list_namespaced_custom_object ,
236- namespace = self .gateway_namespace , group = GATEWAY_API_GROUP ,
237- version = GATEWAY_API_VERSION , plural = GATEWAY_PLURAL ,
238- field_selector = f"metadata.name={ self .gateway_name } " ,
239- timeout_seconds = self .gateway_ready_timeout ,
240- ):
241- if event ["type" ] in ["ADDED" , "MODIFIED" ]:
242- gateway_object = event ['object' ]
243- status = gateway_object .get ('status' , {})
244- addresses = status .get ('addresses' , [])
245- if addresses :
246- ip_address = addresses [0 ].get ('value' )
247- if ip_address :
248- self .base_url = f"http://{ ip_address } "
249- logging .info (
250- f"Gateway is ready. Base URL set to: { self .base_url } " )
251- w .stop ()
252- return
253-
254- if not self .base_url :
255- raise TimeoutError (
256- f"Gateway '{ self .gateway_name } ' in namespace '{ self .gateway_namespace } ' did not get an IP within { self .gateway_ready_timeout } seconds." )
243+ raise TimeoutError (
244+ f"Port-forwarding did not become ready within { self .port_forward_ready_timeout } seconds." )
257245
258246 def __enter__ (self ) -> 'SandboxClient' :
259247 self ._create_claim ()
@@ -320,22 +308,15 @@ def _request(self, method: str, endpoint: str, **kwargs) -> requests.Response:
320308 headers ["X-Sandbox-Port" ] = str (self .server_port )
321309 kwargs ["headers" ] = headers
322310
311+ url = f"{ self .base_url } /{ endpoint } "
323312 try :
324313 response = self .session .request (method , url , ** kwargs )
325314 response .raise_for_status ()
326315 return response
327316 except requests .exceptions .RequestException as e :
328- # Check if port-forward died DURING request
329- if self .port_forward_process and self .port_forward_process .poll () is not None :
330- stdout , stderr = self .port_forward_process .communicate ()
331- raise RuntimeError (
332- f"Kubectl Port-Forward crashed DURING request!\n "
333- f"Stderr: { stderr .decode (errors = 'ignore' )} "
334- ) from e
335-
336- logging .error (f"Request to gateway router failed: { e } " )
317+ logging .error (f"Request to sandbox failed: { e } " )
337318 raise RuntimeError (
338- f"Failed to communicate with the sandbox via the gateway at { url } ." ) from e
319+ f"Failed to communicate with the sandbox at { url } ." ) from e
339320
340321 def run (self , command : str , timeout : int = 60 ) -> ExecutionResult :
341322 payload = {"command" : command }
@@ -359,6 +340,10 @@ def write(self, path: str, content: bytes | str, timeout: int = 60):
359340 self ._request ("POST" , "upload" , files = files_payload , timeout = timeout )
360341 logging .info (f"File '{ filename } ' uploaded successfully." )
361342
362- def read (self , path : str , timeout : int = 60 ) -> bytes :
363- response = self ._request ("GET" , f"download/{ path } " , timeout = timeout )
343+ def read (self , path : str ) -> bytes :
344+ """
345+ Downloads a file from the sandbox.
346+ The base path for the download is the root of the sandbox's filesystem.
347+ """
348+ response = self ._request ("GET" , f"download/{ path } " )
364349 return response .content
0 commit comments