Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions .github/workflows/e2e-examples-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
name: e2e-examples-test

permissions:
contents: read

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

on:
workflow_dispatch: { }
pull_request:
types: [labeled, synchronize]
push:
branches:
- main
paths:
- 'apps/agentstack-server/**'
- 'apps/agentstack-sdk-py/**'
- 'examples/**'
- 'helm/**'
jobs:
e2e-examples-test:
if: >-
github.event_name != 'pull_request' ||
contains(github.event.pull_request.labels.*.name, 'e2e-examples')
timeout-minutes: 45
runs-on: ubuntu-latest
env:
AGENTSTACK__HOME: ${{ github.workspace }}/.agentstack
steps:
- uses: actions/checkout@v4
- name: Maximize build space
uses: ./.github/actions/maximize-build-space
with:
root-reserve-mb: 15360
temp-reserve-mb: 2048
swap-size-mb: 1024
remove-dotnet: 'true'
- name: "Set up Lima"
uses: lima-vm/lima-actions/setup@v1
id: lima-actions-setup
- name: "Cache ~/.cache/lima"
uses: actions/cache@v4
with:
path: ~/.cache/lima
key: lima-${{ steps.lima-actions-setup.outputs.version }}
- uses: actions/checkout@v4
- uses: ./.github/actions/setup
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- run: mise run agentstack-server:test:e2e-examples
env:
LLM_API_BASE: "${{ secrets.OPENAI_API_BASE }}"
LLM_MODEL: "${{ vars.OPENAI_MODEL }}"
EMBEDDING_MODEL: "${{ vars.OPENAI_EMBEDDING_MODEL }}"
LLM_API_KEY: "${{ secrets.OPENAI_API_KEY }}"
- run: uv cache prune --ci
4 changes: 1 addition & 3 deletions .github/workflows/e2e-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,6 @@ jobs:
env:
LLM_API_BASE: "${{ secrets.OPENAI_API_BASE }}"
LLM_MODEL: "${{ vars.OPENAI_MODEL }}"
EMBEDDING_MODEL: "${{ vars.OPENAI_EMBEDDING_MODEL }}"
LLM_API_KEY: "${{ secrets.OPENAI_API_KEY }}"
# LLM_API_BASE: "https://api.groq.com/openai/v1"
# LLM_MODEL: "groq:groq/compound"
# LLM_API_KEY: "${{ secrets.GROQ_API_KEY }}"
- run: uv cache prune --ci
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,7 @@ async def start(
] = None,
vm_name: typing.Annotated[str, typer.Option(hidden=True)] = "agentstack",
verbose: typing.Annotated[bool, typer.Option("-v", "--verbose", help="Show verbose output")] = False,
skip_pull: typing.Annotated[bool, typer.Option(hidden=True)] = False,
skip_restart_deployments: typing.Annotated[bool, typer.Option(hidden=True)] = False,
skip_login: typing.Annotated[bool, typer.Option(hidden=True)] = False,
no_wait_for_platform: typing.Annotated[bool, typer.Option(hidden=True)] = False,
):
import agentstack_cli.commands.server
Expand Down Expand Up @@ -140,7 +139,8 @@ async def start(
style="dim",
)

await agentstack_cli.commands.server.server_login("http://localhost:8333")
if not skip_login:
await agentstack_cli.commands.server.server_login("http://localhost:8333")


@app.command("stop", help="Stop Agent Stack platform. [Local only]")
Expand Down
89 changes: 87 additions & 2 deletions apps/agentstack-server/tasks.toml
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,7 @@ VM_NAME=e2e-test-run

export AGENTSTACK__USERNAME=admin
export AGENTSTACK__PASSWORD=admin
export AGENTSTACK__CLIENT_ID=agentstack-cli

NO_CLEAN="${usage_no_clean:-false}"
if [ "$NO_CLEAN" != "true" ]; then
Expand Down Expand Up @@ -299,7 +300,7 @@ keycloak:
roles: ["agentstack-admin"]
' > "$CONFIG_FILE"

{{ mise_bin }} run agentstack:start --vm-name=${VM_NAME} -f "$CONFIG_FILE" --set ui.enabled=false
{{ mise_bin }} run agentstack:start --vm-name=${VM_NAME} --skip-login -f "$CONFIG_FILE" --set ui.enabled=false


eval "$( {{ mise_bin }} run agentstack:shell --vm-name="$VM_NAME" )"
Expand All @@ -322,7 +323,91 @@ echo "Waiting for Keycloak provision job to complete..."
kubectl wait --for=condition=complete --timeout=300s job/keycloak-provision

kubectl port-forward svc/postgresql 5432:5432 2>/dev/null 1>&2 &
uv run pytest -m e2e "${usage_only}"
uv run pytest -m e2e --ignore=tests/e2e/examples
result=$?

if [ $result -ne 0 ]; then
echo "Tests failed. Checking pod status..."
echo "------------- pods --------------"
kubectl get pod
echo "------------ events -------------"
kubectl get event
fi

if [ "$NO_CLEAN" != "true" ]; then
{{ mise_bin }} run agentstack-cli:run -- platform delete --vm-name=${VM_NAME}
else
{{ mise_bin }} run agentstack-cli:run -- platform stop --vm-name=${VM_NAME}
fi

rm -f "$CONFIG_FILE"

kill %1
exit $result
"""

["agentstack-server:test:e2e-examples"]
dir = "{{config_root}}/apps/agentstack-server"
usage = 'flag "--no-clean"'
run = """
#!/bin/bash
VM_NAME=e2e-examples-test-run

export AGENTSTACK__USERNAME=admin
export AGENTSTACK__PASSWORD=admin
export AGENTSTACK__CLIENT_ID=agentstack-cli

NO_CLEAN="${usage_no_clean:-false}"
if [ "$NO_CLEAN" != "true" ]; then
{{ mise_bin }} run agentstack:stop-all
{{ mise_bin }} run agentstack:delete --vm-name=${VM_NAME}
curl http://localhost:8333 >/dev/null 2>&1 && echo "Another instance at localhost:8333 is already running" && exit 2
fi

CONFIG_FILE="/tmp/config_e2e_test_$(date +%s).yaml"

echo '
externalRegistries: null
ui:
enabled: false
auth:
enabled: true
docling:
enabled: true
connector:
presets:
- url: mcp+stdio://test
stdio:
image: mcp/aws-documentation
metadata:
name: Test MCP Server
keycloak:
auth:
seedAgentstackUsers:
- username: admin
password: admin
firstName: Admin
lastName: User
email: admin@beeai.dev
roles: ["agentstack-admin"]
' > "$CONFIG_FILE"

{{ mise_bin }} run agentstack:start --vm-name=${VM_NAME} --skip-login -f "$CONFIG_FILE" --set ui.enabled=false


eval "$( {{ mise_bin }} run agentstack:shell --vm-name="$VM_NAME" )"

export DB_URL="postgresql+asyncpg://agentstack-user:password@localhost:5432/agentstack"
export LLM_API_BASE="${LLM_API_BASE:-http://host.docker.internal:11434/v1}"

echo "Waiting for agentstack-server deployment to be ready..."
kubectl wait --for=condition=available --timeout=300s deployment/agentstack-server

echo "Waiting for Keycloak provision job to complete..."
kubectl wait --for=condition=complete --timeout=300s job/keycloak-provision

kubectl port-forward svc/postgresql 5432:5432 2>/dev/null 1>&2 &
uv run pytest -m e2e tests/e2e/examples
result=$?

if [ $result -ne 0 ]; then
Expand Down
6 changes: 3 additions & 3 deletions apps/agentstack-server/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ class Configuration(BaseSettings):
model_config = SettingsConfigDict(extra="ignore")
kubeconfig: Path = Path.home() / ".agentstack/lima/agentstack-local-dev/copied-from-guest/kubeconfig.yaml"
llm_api_base: Secret[str] = Secret("http://localhost:11434/v1")
# llm_model: str = "other:llama3.1:8b"
llm_model: str = "other:granite4:latest"
llm_model: str = "ollama:gpt-oss:20b"
embedding_model: str = "ollama:nomic-embed-text:latest"
llm_api_key: Secret[str] = Secret("dummy")
test_agent_image: str = "agentstack-registry-svc.default:5001/chat-test:latest"
test_agent_build_repo: str = "https://github.com/i-am-bee/agentstack-starter"
Expand Down Expand Up @@ -63,7 +63,7 @@ def pytest_configure(config):
async def _get_kr8s_client():
api = await kr8s.asyncio.api()
kubeconfig = api.auth.kubeconfig
kubeconfig_regex = r".*/.agentstack/lima/(agentstack-local-dev|e2e-test-run|integration-test-run)/copied-from-guest/kubeconfig.yaml$"
kubeconfig_regex = r".*/.agentstack/lima/(agentstack-local-dev|e2e-test-run|e2e-examples-test-run|integration-test-run)/copied-from-guest/kubeconfig.yaml$"
if not re.match(kubeconfig_regex, str(kubeconfig.path)):
raise ValueError(
f"Preventing kubeconfig operations with invalid kubeconfig path.\n"
Expand Down
4 changes: 3 additions & 1 deletion apps/agentstack-server/tests/e2e/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,9 @@ async def setup_real_llm(test_configuration, setup_platform_client):
with suppress(Exception):
ex = Exception(str(f"Failed to setup LLM - {ex}\n{json.dumps(ex.response.text, indent=2)}"))
raise ex
await SystemConfiguration.update(default_llm_model=test_configuration.llm_model)
await SystemConfiguration.update(
default_llm_model=test_configuration.llm_model, default_embedding_model=test_configuration.embedding_model
)


@pytest.fixture(scope="session")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Copyright 2025 © BeeAI a Series of LF Projects, LLC
# SPDX-License-Identifier: Apache-2.0

import pytest

from agentstack_server.domain.constants import AGENT_DETAIL_EXTENSION_URI
from agentstack_server.utils.a2a import get_extension
from tests.e2e.examples.conftest import run_example

pytestmark = pytest.mark.e2e


@pytest.mark.usefixtures("clean_up", "setup_platform_client")
async def test_basic_configuration_example(subtests, get_final_task_from_stream, a2a_client_factory):
example_path = "agent-integration/agent-details/basic-configuration"

async with run_example(example_path, a2a_client_factory) as running_example:
agent_card = running_example.agent_card

with subtests.test("agent card has correct name"):
assert agent_card.name == "Example Research Assistant"

with subtests.test("agent card has two skills"):
assert agent_card.skills is not None
assert len(agent_card.skills) == 2

skill_ids = {skill.id for skill in agent_card.skills}
assert skill_ids == {"research", "summarization"}

with subtests.test("agent detail extension is configured"):
agent_detail = get_extension(agent_card, AGENT_DETAIL_EXTENSION_URI)
assert agent_detail is not None

params = agent_detail.model_dump()["params"]
assert params["interaction_mode"] == "multi-turn"
assert (
params["user_greeting"] == "Hi there! I can help you research topics or summarize uploaded documents."
)
assert params["framework"] == "BeeAI Framework"
assert params["source_code_url"] == "https://github.com/example/example-research-assistant"

with subtests.test("agent detail has author info"):
agent_detail = get_extension(agent_card, AGENT_DETAIL_EXTENSION_URI)
params = agent_detail.model_dump()["params"]

assert params["author"]["name"] == "Agent Stack Team"
assert params["author"]["email"] == "team@example.com"

with subtests.test("agent detail has tools"):
agent_detail = get_extension(agent_card, AGENT_DETAIL_EXTENSION_URI)
params = agent_detail.model_dump()["params"]

tools = params["tools"]
assert len(tools) == 2

tool_names = {tool["name"] for tool in tools}
assert tool_names == {"Web Search", "Document Reader"}
Comment thread
aleskalfas marked this conversation as resolved.
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@


@pytest.mark.usefixtures("clean_up", "setup_platform_client")
async def test_settings_form_rendering_example(subtests, get_final_task_from_stream, a2a_client_factory):
example_path = "agent-integration/forms/settings-form-rendering"
async def test_basic_settings_example(subtests, get_final_task_from_stream, a2a_client_factory):
example_path = "agent-integration/agent-settings/basic-settings"

async with run_example(example_path, a2a_client_factory) as running_example:
spec = FormServiceExtensionSpec.from_agent_card(running_example.provider.agent_card)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Copyright 2025 © BeeAI a Series of LF Projects, LLC
# SPDX-License-Identifier: Apache-2.0

import pytest
from a2a.client.helpers import create_text_message_object
from a2a.types import TaskState
from agentstack_sdk.a2a.extensions.ui.canvas import CanvasExtensionSpec

from tests.e2e.examples.conftest import run_example

pytestmark = pytest.mark.e2e


@pytest.mark.usefixtures("clean_up", "setup_platform_client")
async def test_canvas_with_llm_example(subtests, get_final_task_from_stream, a2a_client_factory):
example_path = "agent-integration/canvas/canvas-with-llm"

async with run_example(example_path, a2a_client_factory) as running_example:
canvas_uri = CanvasExtensionSpec.URI

with subtests.test("agent generates code artifact"):
message = create_text_message_object(content="Write a hello world program")
message.context_id = running_example.context.id
task = await get_final_task_from_stream(running_example.client.send_message(message))

assert task.status.state == TaskState.completed, f"Fail: {task.status.message.parts[0].root.text}"

# Verify artifact is returned (the agent uses a mocked LLM response)
assert len(task.artifacts) > 0
artifact = task.artifacts[0]
assert artifact.name == "Response"

# Verify the artifact contains the expected mock response
artifact_text = "".join(part.root.text for part in artifact.parts if hasattr(part.root, "text"))
assert "Hello from LLM!" in artifact_text

with subtests.test("agent updates artifact via canvas edit"):
# Use the artifact from the previous test
artifact_id = artifact.artifact_id

# Get the artifact text to determine indices
artifact_text = "".join(part.root.text for part in artifact.parts if hasattr(part.root, "text"))

# Send edit request for a portion of the code
message = create_text_message_object(content="Change print to use f-string")
message.context_id = running_example.context.id
message.metadata = {
canvas_uri: {
"artifact_id": artifact_id,
"start_index": 0,
"end_index": min(50, len(artifact_text)), # Select first 50 chars
"description": "Change print to use f-string",
}
}
task = await get_final_task_from_stream(running_example.client.send_message(message))

assert task.status.state == TaskState.completed, f"Fail: {task.status.message.parts[0].root.text}"

# Verify updated artifact is returned
assert len(task.artifacts) > 0
updated_artifact = task.artifacts[0]

# Verify the response contains the edit prompt context
updated_text = "".join(part.root.text for part in updated_artifact.parts if hasattr(part.root, "text"))
assert "editing existing code" in updated_text.lower() or "selected" in updated_text.lower()
Loading
Loading