i-am-bee · aleskalfas · Feb 17, 2026 · Feb 17, 2026 · Feb 17, 2026 · Feb 17, 2026
diff --git a/.github/workflows/e2e-examples-test.yml b/.github/workflows/e2e-examples-test.yml
@@ -0,0 +1,58 @@
+name: e2e-examples-test
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+on:
+  workflow_dispatch: { }
+  pull_request:
+    types: [labeled, synchronize]
+  push:
+    branches:
+      - main
+    paths:
+      - 'apps/agentstack-server/**'
+      - 'apps/agentstack-sdk-py/**'
+      - 'examples/**'
+      - 'helm/**'
+jobs:
+  e2e-examples-test:
+    if: >-
+      github.event_name != 'pull_request' ||
+      contains(github.event.pull_request.labels.*.name, 'e2e-examples')
+    timeout-minutes: 45
+    runs-on: ubuntu-latest
+    env:
+      AGENTSTACK__HOME: ${{ github.workspace }}/.agentstack
+    steps:
+      - uses: actions/checkout@v4
+      - name: Maximize build space
+        uses: ./.github/actions/maximize-build-space
+        with:
+          root-reserve-mb: 15360
+          temp-reserve-mb: 2048
+          swap-size-mb: 1024
+          remove-dotnet: 'true'
+      - name: "Set up Lima"
+        uses: lima-vm/lima-actions/setup@v1
+        id: lima-actions-setup
+      - name: "Cache ~/.cache/lima"
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/lima
+          key: lima-${{ steps.lima-actions-setup.outputs.version }}
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/setup
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      - run: mise run agentstack-server:test:e2e-examples
+        env:
+          LLM_API_BASE: "${{ secrets.OPENAI_API_BASE }}"
+          LLM_MODEL: "${{ vars.OPENAI_MODEL }}"
+          EMBEDDING_MODEL: "${{ vars.OPENAI_EMBEDDING_MODEL }}"
+          LLM_API_KEY: "${{ secrets.OPENAI_API_KEY }}"
+      - run: uv cache prune --ci
diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml
@@ -52,8 +52,6 @@ jobs:
         env:
           LLM_API_BASE: "${{ secrets.OPENAI_API_BASE }}"
           LLM_MODEL: "${{ vars.OPENAI_MODEL }}"
+          EMBEDDING_MODEL: "${{ vars.OPENAI_EMBEDDING_MODEL }}"
           LLM_API_KEY: "${{ secrets.OPENAI_API_KEY }}"
-          # LLM_API_BASE: "https://api.groq.com/openai/v1"
-          # LLM_MODEL: "groq:groq/compound"
-          # LLM_API_KEY: "${{ secrets.GROQ_API_KEY }}"
       - run: uv cache prune --ci
diff --git a/apps/agentstack-cli/src/agentstack_cli/commands/platform/__init__.py b/apps/agentstack-cli/src/agentstack_cli/commands/platform/__init__.py
@@ -83,8 +83,7 @@ async def start(
     ] = None,
     vm_name: typing.Annotated[str, typer.Option(hidden=True)] = "agentstack",
     verbose: typing.Annotated[bool, typer.Option("-v", "--verbose", help="Show verbose output")] = False,
-    skip_pull: typing.Annotated[bool, typer.Option(hidden=True)] = False,
-    skip_restart_deployments: typing.Annotated[bool, typer.Option(hidden=True)] = False,
+    skip_login: typing.Annotated[bool, typer.Option(hidden=True)] = False,
     no_wait_for_platform: typing.Annotated[bool, typer.Option(hidden=True)] = False,
 ):
     import agentstack_cli.commands.server
@@ -140,7 +139,8 @@ async def start(
                 style="dim",
             )
 
-        await agentstack_cli.commands.server.server_login("http://localhost:8333")
+        if not skip_login:
+            await agentstack_cli.commands.server.server_login("http://localhost:8333")
 
 
 @app.command("stop", help="Stop Agent Stack platform. [Local only]")

diff --git a/apps/agentstack-server/tasks.toml b/apps/agentstack-server/tasks.toml
@@ -263,6 +263,7 @@ VM_NAME=e2e-test-run
 
 export AGENTSTACK__USERNAME=admin
 export AGENTSTACK__PASSWORD=admin
+export AGENTSTACK__CLIENT_ID=agentstack-cli
 
 NO_CLEAN="${usage_no_clean:-false}"
 if [ "$NO_CLEAN" != "true" ]; then
@@ -299,7 +300,7 @@ keycloak:
         roles: ["agentstack-admin"]
 ' > "$CONFIG_FILE"
 
-{{ mise_bin }} run agentstack:start --vm-name=${VM_NAME} -f "$CONFIG_FILE" --set ui.enabled=false
+{{ mise_bin }} run agentstack:start --vm-name=${VM_NAME} --skip-login -f "$CONFIG_FILE" --set ui.enabled=false
 
 
 eval "$( {{ mise_bin }} run agentstack:shell --vm-name="$VM_NAME" )"
@@ -322,7 +323,91 @@ echo "Waiting for Keycloak provision job to complete..."
 kubectl wait --for=condition=complete --timeout=300s job/keycloak-provision
 
 kubectl port-forward svc/postgresql 5432:5432 2>/dev/null 1>&2 &
-uv run pytest -m e2e "${usage_only}"
+uv run pytest -m e2e --ignore=tests/e2e/examples
+result=$?
+
+if [ $result -ne 0 ]; then
+    echo "Tests failed. Checking pod status..."
+    echo "------------- pods --------------"
+    kubectl get pod
+    echo "------------ events -------------"
+    kubectl get event
+fi
+
+if [ "$NO_CLEAN" != "true" ]; then
+    {{ mise_bin }} run agentstack-cli:run -- platform delete --vm-name=${VM_NAME}
+else
+    {{ mise_bin }} run agentstack-cli:run -- platform stop --vm-name=${VM_NAME}
+fi
+
+rm -f "$CONFIG_FILE"
+
+kill %1
+exit $result
+"""
+
+["agentstack-server:test:e2e-examples"]
+dir = "{{config_root}}/apps/agentstack-server"
+usage = 'flag "--no-clean"'
+run = """
+#!/bin/bash
+VM_NAME=e2e-examples-test-run
+
+export AGENTSTACK__USERNAME=admin
+export AGENTSTACK__PASSWORD=admin
+export AGENTSTACK__CLIENT_ID=agentstack-cli
+
+NO_CLEAN="${usage_no_clean:-false}"
+if [ "$NO_CLEAN" != "true" ]; then
+    {{ mise_bin }} run agentstack:stop-all
+    {{ mise_bin }} run agentstack:delete --vm-name=${VM_NAME}
+    curl http://localhost:8333 >/dev/null 2>&1 && echo "Another instance at localhost:8333 is already running" && exit 2
+fi
+
+CONFIG_FILE="/tmp/config_e2e_test_$(date +%s).yaml"
+
+echo '
+externalRegistries: null
+ui:
+  enabled: false
+auth:
+  enabled: true
+docling:
+  enabled: true
+connector:
+  presets:
+    - url: mcp+stdio://test
+      stdio:
+        image: mcp/aws-documentation
+      metadata:
+        name: Test MCP Server
+keycloak:
+  auth:
+    seedAgentstackUsers:
+      - username: admin
+        password: admin
+        firstName: Admin
+        lastName: User
+        email: admin@beeai.dev
+        roles: ["agentstack-admin"]
+' > "$CONFIG_FILE"
+
+{{ mise_bin }} run agentstack:start --vm-name=${VM_NAME} --skip-login -f "$CONFIG_FILE" --set ui.enabled=false
+
+
+eval "$( {{ mise_bin }} run agentstack:shell --vm-name="$VM_NAME" )"
+
+export DB_URL="postgresql+asyncpg://agentstack-user:password@localhost:5432/agentstack"
+export LLM_API_BASE="${LLM_API_BASE:-http://host.docker.internal:11434/v1}"
+
+echo "Waiting for agentstack-server deployment to be ready..."
+kubectl wait --for=condition=available --timeout=300s deployment/agentstack-server
+
+echo "Waiting for Keycloak provision job to complete..."
+kubectl wait --for=condition=complete --timeout=300s job/keycloak-provision
+
+kubectl port-forward svc/postgresql 5432:5432 2>/dev/null 1>&2 &
+uv run pytest -m e2e tests/e2e/examples
 result=$?
 
 if [ $result -ne 0 ]; then

diff --git a/apps/agentstack-server/tests/conftest.py b/apps/agentstack-server/tests/conftest.py
@@ -24,8 +24,8 @@ class Configuration(BaseSettings):
     model_config = SettingsConfigDict(extra="ignore")
     kubeconfig: Path = Path.home() / ".agentstack/lima/agentstack-local-dev/copied-from-guest/kubeconfig.yaml"
     llm_api_base: Secret[str] = Secret("http://localhost:11434/v1")
-    # llm_model: str = "other:llama3.1:8b"
-    llm_model: str = "other:granite4:latest"
+    llm_model: str = "ollama:gpt-oss:20b"
+    embedding_model: str = "ollama:nomic-embed-text:latest"
     llm_api_key: Secret[str] = Secret("dummy")
     test_agent_image: str = "agentstack-registry-svc.default:5001/chat-test:latest"
     test_agent_build_repo: str = "https://github.com/i-am-bee/agentstack-starter"
@@ -63,7 +63,7 @@ def pytest_configure(config):
 async def _get_kr8s_client():
     api = await kr8s.asyncio.api()
     kubeconfig = api.auth.kubeconfig
-    kubeconfig_regex = r".*/.agentstack/lima/(agentstack-local-dev|e2e-test-run|integration-test-run)/copied-from-guest/kubeconfig.yaml$"
+    kubeconfig_regex = r".*/.agentstack/lima/(agentstack-local-dev|e2e-test-run|e2e-examples-test-run|integration-test-run)/copied-from-guest/kubeconfig.yaml$"
     if not re.match(kubeconfig_regex, str(kubeconfig.path)):
         raise ValueError(
             f"Preventing kubeconfig operations with invalid kubeconfig path.\n"

diff --git a/apps/agentstack-server/tests/e2e/conftest.py b/apps/agentstack-server/tests/e2e/conftest.py
@@ -71,7 +71,9 @@ async def setup_real_llm(test_configuration, setup_platform_client):
         with suppress(Exception):
             ex = Exception(str(f"Failed to setup LLM - {ex}\n{json.dumps(ex.response.text, indent=2)}"))
         raise ex
-    await SystemConfiguration.update(default_llm_model=test_configuration.llm_model)
+    await SystemConfiguration.update(
+        default_llm_model=test_configuration.llm_model, default_embedding_model=test_configuration.embedding_model
+    )
 
 
 @pytest.fixture(scope="session")

diff --git a/...ack-server/tests/e2e/examples/agent-integration/agent-details/test_basic_configuration.py b/...ack-server/tests/e2e/examples/agent-integration/agent-details/test_basic_configuration.py
@@ -0,0 +1,57 @@
+# Copyright 2025 © BeeAI a Series of LF Projects, LLC
+# SPDX-License-Identifier: Apache-2.0
+
+import pytest
+
+from agentstack_server.domain.constants import AGENT_DETAIL_EXTENSION_URI
+from agentstack_server.utils.a2a import get_extension
+from tests.e2e.examples.conftest import run_example
+
+pytestmark = pytest.mark.e2e
+
+
+@pytest.mark.usefixtures("clean_up", "setup_platform_client")
+async def test_basic_configuration_example(subtests, get_final_task_from_stream, a2a_client_factory):
+    example_path = "agent-integration/agent-details/basic-configuration"
+
+    async with run_example(example_path, a2a_client_factory) as running_example:
+        agent_card = running_example.agent_card
+
+        with subtests.test("agent card has correct name"):
+            assert agent_card.name == "Example Research Assistant"
+
+        with subtests.test("agent card has two skills"):
+            assert agent_card.skills is not None
+            assert len(agent_card.skills) == 2
+
+            skill_ids = {skill.id for skill in agent_card.skills}
+            assert skill_ids == {"research", "summarization"}
+
+        with subtests.test("agent detail extension is configured"):
+            agent_detail = get_extension(agent_card, AGENT_DETAIL_EXTENSION_URI)
+            assert agent_detail is not None
+
+            params = agent_detail.model_dump()["params"]
+            assert params["interaction_mode"] == "multi-turn"
+            assert (
+                params["user_greeting"] == "Hi there! I can help you research topics or summarize uploaded documents."
+            )
+            assert params["framework"] == "BeeAI Framework"
+            assert params["source_code_url"] == "https://github.com/example/example-research-assistant"
+
+        with subtests.test("agent detail has author info"):
+            agent_detail = get_extension(agent_card, AGENT_DETAIL_EXTENSION_URI)
+            params = agent_detail.model_dump()["params"]
+
+            assert params["author"]["name"] == "Agent Stack Team"
+            assert params["author"]["email"] == "team@example.com"
+
+        with subtests.test("agent detail has tools"):
+            agent_detail = get_extension(agent_card, AGENT_DETAIL_EXTENSION_URI)
+            params = agent_detail.model_dump()["params"]
+
+            tools = params["tools"]
+            assert len(tools) == 2
+
+            tool_names = {tool["name"] for tool in tools}
+            assert tool_names == {"Web Search", "Document Reader"}
diff --git a/...ion/forms/test_settings_form_rendering.py → ...ion/agent-settings/test_basic_settings.py b/...ion/forms/test_settings_form_rendering.py → ...ion/agent-settings/test_basic_settings.py
@@ -13,8 +13,8 @@
 
 
 @pytest.mark.usefixtures("clean_up", "setup_platform_client")
-async def test_settings_form_rendering_example(subtests, get_final_task_from_stream, a2a_client_factory):
-    example_path = "agent-integration/forms/settings-form-rendering"
+async def test_basic_settings_example(subtests, get_final_task_from_stream, a2a_client_factory):
+    example_path = "agent-integration/agent-settings/basic-settings"
 
     async with run_example(example_path, a2a_client_factory) as running_example:
         spec = FormServiceExtensionSpec.from_agent_card(running_example.provider.agent_card)

diff --git a/apps/agentstack-server/tests/e2e/examples/agent-integration/canvas/test_canvas_with_llm.py b/apps/agentstack-server/tests/e2e/examples/agent-integration/canvas/test_canvas_with_llm.py
@@ -0,0 +1,65 @@
+# Copyright 2025 © BeeAI a Series of LF Projects, LLC
+# SPDX-License-Identifier: Apache-2.0
+
+import pytest
+from a2a.client.helpers import create_text_message_object
+from a2a.types import TaskState
+from agentstack_sdk.a2a.extensions.ui.canvas import CanvasExtensionSpec
+
+from tests.e2e.examples.conftest import run_example
+
+pytestmark = pytest.mark.e2e
+
+
+@pytest.mark.usefixtures("clean_up", "setup_platform_client")
+async def test_canvas_with_llm_example(subtests, get_final_task_from_stream, a2a_client_factory):
+    example_path = "agent-integration/canvas/canvas-with-llm"
+
+    async with run_example(example_path, a2a_client_factory) as running_example:
+        canvas_uri = CanvasExtensionSpec.URI
+
+        with subtests.test("agent generates code artifact"):
+            message = create_text_message_object(content="Write a hello world program")
+            message.context_id = running_example.context.id
+            task = await get_final_task_from_stream(running_example.client.send_message(message))
+
+            assert task.status.state == TaskState.completed, f"Fail: {task.status.message.parts[0].root.text}"
+
+            # Verify artifact is returned (the agent uses a mocked LLM response)
+            assert len(task.artifacts) > 0
+            artifact = task.artifacts[0]
+            assert artifact.name == "Response"
+
+            # Verify the artifact contains the expected mock response
+            artifact_text = "".join(part.root.text for part in artifact.parts if hasattr(part.root, "text"))
+            assert "Hello from LLM!" in artifact_text
+
+        with subtests.test("agent updates artifact via canvas edit"):
+            # Use the artifact from the previous test
+            artifact_id = artifact.artifact_id
+
+            # Get the artifact text to determine indices
+            artifact_text = "".join(part.root.text for part in artifact.parts if hasattr(part.root, "text"))
+
+            # Send edit request for a portion of the code
+            message = create_text_message_object(content="Change print to use f-string")
+            message.context_id = running_example.context.id
+            message.metadata = {
+                canvas_uri: {
+                    "artifact_id": artifact_id,
+                    "start_index": 0,
+                    "end_index": min(50, len(artifact_text)),  # Select first 50 chars
+                    "description": "Change print to use f-string",
+                }
+            }
+            task = await get_final_task_from_stream(running_example.client.send_message(message))
+
+            assert task.status.state == TaskState.completed, f"Fail: {task.status.message.parts[0].root.text}"
+
+            # Verify updated artifact is returned
+            assert len(task.artifacts) > 0
+            updated_artifact = task.artifacts[0]
+
+            # Verify the response contains the edit prompt context
+            updated_text = "".join(part.root.text for part in updated_artifact.parts if hasattr(part.root, "text"))
+            assert "editing existing code" in updated_text.lower() or "selected" in updated_text.lower()