optional s3

CyrusNuevoDia · CyrusNuevoDia · commit 473eb78a9cd6 · 2025-09-24T13:24:45.000-06:00
diff --git a/lib/ai.py b/lib/ai.py
@@ -13,7 +13,7 @@
 from browser_use.llm import BaseChatModel
 import orjson
 
-__all__ = ["ModelProvider", "ChatFactory", "AGENT_INSTRUCTIONS"]
+__all__ = ["ModelProvider", "ChatFactory"]
 
 
 ModelProvider = t.Literal[
@@ -33,12 +33,3 @@ def config(provider: ModelProvider) -> dict:
     "groq": partial(ChatGroq, **config("groq")),
     "ollama": partial(ChatOllama, **config("ollama")),
 }
-
-
-AGENT_INSTRUCTIONS = """Remember, you are an agent - please keep going until the user's query is completely resolved, before ending your turn and yielding back to the user. Decompose the user's query into all required sub-requests, and confirm that each is completed. Do not stop after completing only part of the request. Only terminate your turn when you are sure that the problem is solved. You must be prepared to answer multiple queries and only finish the call once the user has confirmed they're done.
-
-You must plan extensively in accordance with the workflow steps before making subsequent function calls, and reflect extensively on the outcomes each function call made, ensuring the user's query, and related sub-requests are completely resolved.
-
-Note that your browser will automatically:
-1. Download the PDF file upon viewing it. Just wait for it. You do not need to read the PDF.
-2. Solve CAPTCHAs or similar tests. Just wait for it."""
diff --git a/lib/browser.py b/lib/browser.py
@@ -18,7 +18,7 @@
 DOWNLOADS_PATH = anyio.Path(getenv("DOWNLOADS_PATH", "/tmp/downloads"))
 
 
-async def create_browser(ctx: KernelContext, request: BrowserAgentRequest):
+async def create(ctx: KernelContext, request: BrowserAgentRequest):
     invocation_id = ctx.invocation_id
     headless = request.headless
 
@@ -49,4 +49,4 @@ async def create_browser(ctx: KernelContext, request: BrowserAgentRequest):
         ),
     )
 
-    return session, browser
+    return session, browser, DOWNLOADS_PATH
diff --git a/lib/storage.py b/lib/storage.py
@@ -3,28 +3,35 @@
 from os import environ as env
 from pathlib import Path
 
-from botocore.client import Config
 import anyio
-import boto3
 import orjson
 
 from lib.asyncio import asyncify
 
-BUCKET = env.get("S3_BUCKET", "browser-agent")
-PRESIGNED_URL_EXPIRES_IN = 24 * 60 * 60  # 12 hours
+BUCKET = env.get("S3_BUCKET")
+ENABLED = bool(BUCKET)
+PRESIGNED_URL_EXPIRES_IN = int(env.get("PRESIGNED_URL_EXPIRES_IN", 24 * 60 * 60))
 
-client = boto3.client(
-    service_name="s3",
-    endpoint_url=env["S3_ENDPOINT_URL"],
-    aws_access_key_id=env["S3_ACCESS_KEY_ID"],
-    aws_secret_access_key=env["S3_SECRET_ACCESS_KEY"],
-    region_name="auto",
-    config=Config(signature_version="s3v4"),
-)
+client = None
+if ENABLED:
+    import boto3
+    from botocore.client import Config
+
+    client = boto3.client(
+        service_name="s3",
+        endpoint_url=env["S3_ENDPOINT_URL"],
+        aws_access_key_id=env["S3_ACCESS_KEY_ID"],
+        aws_secret_access_key=env["S3_SECRET_ACCESS_KEY"],
+        region_name="auto",
+        config=Config(signature_version="s3v4"),
+    )
 
 
 @asyncify
 def upload_file(file: anyio.Path | Path | str, key: str) -> str:
+    if not client:
+        raise ValueError("S3_BUCKET is not set")
+
     client.upload_file(
         Bucket=BUCKET,
         Filename=str(file),
@@ -39,6 +46,9 @@ def upload_file(file: anyio.Path | Path | str, key: str) -> str:
 
 @asyncify
 def upload_json(data: t.Any, key: str) -> str:
+    if not client:
+        raise ValueError("S3_BUCKET is not set")
+
     client.put_object(
         Bucket=BUCKET,
         Key=key,
@@ -55,10 +65,14 @@ async def upload_files(
     dir: str,
     files: t.AsyncIterator[anyio.Path | Path | str],
 ) -> dict[str, str]:
-    files = [Path(f) async for f in files]
-    names = [f.name for f in files]
+    paths = [Path(f) async for f in files]
+    names = [f.name for f in paths]
+
+    if not client:
+        return {n: str(f) for n, f in zip(names, paths)}
+
     object_keys = [f"{dir}/{n}" for n in names]
     presigned_urls = await asyncio.gather(
-        *[upload_file(f, k) for f, k in zip(files, object_keys)]
+        *[upload_file(f, k) for f, k in zip(paths, object_keys)]
     )
     return dict(zip(names, presigned_urls))
diff --git a/main.py b/main.py
@@ -5,10 +5,7 @@
 from kernel import App, KernelContext
 from zenbase_llml import llml
 
-from lib.ai import AGENT_INSTRUCTIONS, ChatFactory
-from lib.browser import DOWNLOADS_PATH, create_browser
-from lib.models import BrowserAgentRequest, BrowserAgentResponse
-from lib.storage import upload_files, upload_json
+from lib import storage, ai, browser, models
 
 logger = logging.getLogger(__name__)
 
@@ -17,40 +14,53 @@
 
 @app.action("perform")
 async def perform(ctx: KernelContext, params: dict):
-    request = BrowserAgentRequest.model_validate(params)
+    request = models.BrowserAgentRequest.model_validate(params)
 
-    llm = ChatFactory[request.provider](
+    llm = ai.ChatFactory[request.provider](
         api_key=request.api_key,
         model=request.model,
     )
 
-    session, browser = await create_browser(ctx, request)
+    instructions = f"""
+    You are an agent - please keep going until the user's query is completely resolved, before ending your turn and yielding back to the user. Decompose the user's query into all required sub-requests, and confirm that each is completed. Do not stop after completing only part of the request. Only terminate your turn when you are sure that the problem is solved. You must be prepared to answer multiple queries and only finish the call once the user has confirmed they're done.
 
-    prompt = {
-        "instructions": "\n\n".join(
-            filter(bool, [request.instructions, AGENT_INSTRUCTIONS])
-        ),
-        "input": request.input,
-    }
+    You must plan extensively in accordance with the workflow steps before making subsequent function calls, and reflect extensively on the outcomes each function call made, ensuring the user's query, and related sub-requests are completely resolved.
 
+    {request.instructions}
+
+    Remember, you are an agent - please keep going until the user's query is completely resolved, before ending your turn and yielding back to the user. Decompose the user's query into all required sub-requests, and confirm that each is completed. Do not stop after completing only part of the request. Only terminate your turn when you are sure that the problem is solved. You must be prepared to answer multiple queries and only finish the call once the user has confirmed they're done.
+
+    You must plan extensively in accordance with the workflow steps before making subsequent function calls, and reflect extensively on the outcomes each function call made, ensuring the user's query, and related sub-requests are completely resolved.
+
+    Note that your browser will automatically:
+    1. Download the PDF file upon viewing it. Just wait for it. You do not need to read the PDF.
+    2. Solve CAPTCHAs or similar tests. Just wait for it.
+    """
+
+    session_id, browser_session, downloads_path = await browser.create(ctx, request)
     agent = Agent(
-        task=llml(prompt),
-        browser=browser,
+        task=llml({"instructions": instructions, "input": request.input}),
+        browser=browser_session,
         llm=llm,
         use_thinking=request.reasoning,
         flash_mode=request.flash,
     )
-
     trajectory = await agent.run(max_steps=request.max_steps)
 
-    uploads = await asyncio.gather(
-        upload_files(dir=session, files=DOWNLOADS_PATH.glob("*")),
-        upload_json(trajectory.model_dump(), key=f"{session}/trajectory.json"),
-    )
+    if not storage.ENABLED:
+        downloads = {p.name: str(p) async for p in downloads_path.glob("*")}
+    else:
+        (downloads,) = await asyncio.gather(
+            storage.upload_files(dir=session_id, paths=downloads_path.glob("*")),
+            storage.upload_json(
+                trajectory.model_dump(),
+                key=f"{session_id}/trajectory.json",
+            ),
+        )
 
-    response = BrowserAgentResponse.from_run(
+    response = models.BrowserAgentResponse.from_run(
         trajectory,
-        session=session,
-        downloads=uploads[0],
+        session=session_id,
+        downloads=downloads,
     )
     return response.model_dump()