|
5 | 5 | from kernel import App, KernelContext |
6 | 6 | from zenbase_llml import llml |
7 | 7 |
|
8 | | -from lib.ai import AGENT_INSTRUCTIONS, ChatFactory |
9 | | -from lib.browser import DOWNLOADS_PATH, create_browser |
10 | | -from lib.models import BrowserAgentRequest, BrowserAgentResponse |
11 | | -from lib.storage import upload_files, upload_json |
| 8 | +from lib import storage, ai, browser, models |
12 | 9 |
|
13 | 10 | logger = logging.getLogger(__name__) |
14 | 11 |
|
|
17 | 14 |
|
18 | 15 | @app.action("perform") |
19 | 16 | async def perform(ctx: KernelContext, params: dict): |
20 | | - request = BrowserAgentRequest.model_validate(params) |
| 17 | + request = models.BrowserAgentRequest.model_validate(params) |
21 | 18 |
|
22 | | - llm = ChatFactory[request.provider]( |
| 19 | + llm = ai.ChatFactory[request.provider]( |
23 | 20 | api_key=request.api_key, |
24 | 21 | model=request.model, |
25 | 22 | ) |
26 | 23 |
|
27 | | - session, browser = await create_browser(ctx, request) |
| 24 | + instructions = f""" |
| 25 | + You are an agent - please keep going until the user's query is completely resolved, before ending your turn and yielding back to the user. Decompose the user's query into all required sub-requests, and confirm that each is completed. Do not stop after completing only part of the request. Only terminate your turn when you are sure that the problem is solved. You must be prepared to answer multiple queries and only finish the call once the user has confirmed they're done. |
28 | 26 |
|
29 | | - prompt = { |
30 | | - "instructions": "\n\n".join( |
31 | | - filter(bool, [request.instructions, AGENT_INSTRUCTIONS]) |
32 | | - ), |
33 | | - "input": request.input, |
34 | | - } |
| 27 | + You must plan extensively in accordance with the workflow steps before making subsequent function calls, and reflect extensively on the outcomes each function call made, ensuring the user's query, and related sub-requests are completely resolved. |
35 | 28 |
|
| 29 | + {request.instructions} |
| 30 | +
|
| 31 | + Remember, you are an agent - please keep going until the user's query is completely resolved, before ending your turn and yielding back to the user. Decompose the user's query into all required sub-requests, and confirm that each is completed. Do not stop after completing only part of the request. Only terminate your turn when you are sure that the problem is solved. You must be prepared to answer multiple queries and only finish the call once the user has confirmed they're done. |
| 32 | +
|
| 33 | + You must plan extensively in accordance with the workflow steps before making subsequent function calls, and reflect extensively on the outcomes each function call made, ensuring the user's query, and related sub-requests are completely resolved. |
| 34 | +
|
| 35 | + Note that your browser will automatically: |
| 36 | + 1. Download the PDF file upon viewing it. Just wait for it. You do not need to read the PDF. |
| 37 | + 2. Solve CAPTCHAs or similar tests. Just wait for it. |
| 38 | + """ |
| 39 | + |
| 40 | + session_id, browser_session, downloads_path = await browser.create(ctx, request) |
36 | 41 | agent = Agent( |
37 | | - task=llml(prompt), |
38 | | - browser=browser, |
| 42 | + task=llml({"instructions": instructions, "input": request.input}), |
| 43 | + browser=browser_session, |
39 | 44 | llm=llm, |
40 | 45 | use_thinking=request.reasoning, |
41 | 46 | flash_mode=request.flash, |
42 | 47 | ) |
43 | | - |
44 | 48 | trajectory = await agent.run(max_steps=request.max_steps) |
45 | 49 |
|
46 | | - uploads = await asyncio.gather( |
47 | | - upload_files(dir=session, files=DOWNLOADS_PATH.glob("*")), |
48 | | - upload_json(trajectory.model_dump(), key=f"{session}/trajectory.json"), |
49 | | - ) |
| 50 | + if not storage.ENABLED: |
| 51 | + downloads = {p.name: str(p) async for p in downloads_path.glob("*")} |
| 52 | + else: |
| 53 | + (downloads,) = await asyncio.gather( |
| 54 | + storage.upload_files(dir=session_id, paths=downloads_path.glob("*")), |
| 55 | + storage.upload_json( |
| 56 | + trajectory.model_dump(), |
| 57 | + key=f"{session_id}/trajectory.json", |
| 58 | + ), |
| 59 | + ) |
50 | 60 |
|
51 | | - response = BrowserAgentResponse.from_run( |
| 61 | + response = models.BrowserAgentResponse.from_run( |
52 | 62 | trajectory, |
53 | | - session=session, |
54 | | - downloads=uploads[0], |
| 63 | + session=session_id, |
| 64 | + downloads=downloads, |
55 | 65 | ) |
56 | 66 | return response.model_dump() |
0 commit comments