i-am-bee · aleskalfas · Feb 5, 2026 · Feb 6, 2026 · Feb 6, 2026 · Feb 6, 2026
diff --git a/...ack-server/tests/e2e/examples/agent-integration/agent-details/test_basic_configuration.py b/...ack-server/tests/e2e/examples/agent-integration/agent-details/test_basic_configuration.py
@@ -0,0 +1,57 @@
+# Copyright 2025 © BeeAI a Series of LF Projects, LLC
+# SPDX-License-Identifier: Apache-2.0
+
+import pytest
+
+from agentstack_server.domain.constants import AGENT_DETAIL_EXTENSION_URI
+from agentstack_server.utils.a2a import get_extension
+from tests.e2e.examples.conftest import run_example
+
+pytestmark = pytest.mark.e2e
+
+
+@pytest.mark.usefixtures("clean_up", "setup_platform_client")
+async def test_basic_configuration_example(subtests, get_final_task_from_stream, a2a_client_factory):
+    example_path = "agent-integration/agent-details/basic-configuration"
+
+    async with run_example(example_path, a2a_client_factory) as running_example:
+        agent_card = running_example.agent_card
+
+        with subtests.test("agent card has correct name"):
+            assert agent_card.name == "Example Research Assistant"
+
+        with subtests.test("agent card has two skills"):
+            assert agent_card.skills is not None
+            assert len(agent_card.skills) == 2
+
+            skill_ids = {skill.id for skill in agent_card.skills}
+            assert skill_ids == {"research", "summarization"}
+
+        with subtests.test("agent detail extension is configured"):
+            agent_detail = get_extension(agent_card, AGENT_DETAIL_EXTENSION_URI)
+            assert agent_detail is not None
+
+            params = agent_detail.model_dump()["params"]
+            assert params["interaction_mode"] == "multi-turn"
+            assert (
+                params["user_greeting"] == "Hi there! I can help you research topics or summarize uploaded documents."
+            )
+            assert params["framework"] == "BeeAI Framework"
+            assert params["source_code_url"] == "https://github.com/example/example-research-assistant"
+
+        with subtests.test("agent detail has author info"):
+            agent_detail = get_extension(agent_card, AGENT_DETAIL_EXTENSION_URI)
+            params = agent_detail.model_dump()["params"]
+
+            assert params["author"]["name"] == "Agent Stack Team"
+            assert params["author"]["email"] == "team@example.com"
+
+        with subtests.test("agent detail has tools"):
+            agent_detail = get_extension(agent_card, AGENT_DETAIL_EXTENSION_URI)
+            params = agent_detail.model_dump()["params"]
+
+            tools = params["tools"]
+            assert len(tools) == 2
+
+            tool_names = {tool["name"] for tool in tools}
+            assert tool_names == {"Web Search", "Document Reader"}
diff --git a/...ntstack-server/tests/e2e/examples/agent-integration/agent-settings/test_basic_settings.py b/...ntstack-server/tests/e2e/examples/agent-integration/agent-settings/test_basic_settings.py
@@ -0,0 +1,55 @@
+# Copyright 2025 © BeeAI a Series of LF Projects, LLC
+# SPDX-License-Identifier: Apache-2.0
+
+import pytest
+from a2a.client.helpers import create_text_message_object
+from a2a.types import TaskState
+from agentstack_sdk.a2a.extensions.ui.settings import SettingsExtensionSpec
+
+from tests.e2e.examples.conftest import run_example
+
+pytestmark = pytest.mark.e2e
+
+
+@pytest.mark.usefixtures("clean_up", "setup_platform_client")
+async def test_basic_settings_example(subtests, get_final_task_from_stream, a2a_client_factory):
+    example_path = "agent-integration/agent-settings/basic-settings"
+
+    async with run_example(example_path, a2a_client_factory) as running_example:
+        settings_uri = SettingsExtensionSpec.URI
+
+        with subtests.test("agent responds based on enabled thinking setting"):
+            message = create_text_message_object(content="Hello")
+            message.context_id = running_example.context.id
+            message.metadata = {
+                settings_uri: {
+                    "values": {
+                        "thinking_group": {
+                            "type": "checkbox_group",
+                            "values": {"thinking": {"value": True}},
+                        }
+                    }
+                }
+            }
+            task = await get_final_task_from_stream(running_example.client.send_message(message))
+
+            assert task.status.state == TaskState.completed, f"Fail: {task.status.message.parts[0].root.text}"
+            assert "Thinking mode is enabled" in task.history[-1].parts[0].root.text
+
+        with subtests.test("agent responds based on disabled thinking setting"):
+            message = create_text_message_object(content="Hello")
+            message.context_id = running_example.context.id
+            message.metadata = {
+                settings_uri: {
+                    "values": {
+                        "thinking_group": {
+                            "type": "checkbox_group",
+                            "values": {"thinking": {"value": False}},
+                        }
+                    }
+                }
+            }
+            task = await get_final_task_from_stream(running_example.client.send_message(message))
+
+            assert task.status.state == TaskState.completed, f"Fail: {task.status.message.parts[0].root.text}"
+            assert "Thinking mode is disabled" in task.history[-1].parts[0].root.text
diff --git a/apps/agentstack-server/tests/e2e/examples/agent-integration/canvas/test_canvas_with_llm.py b/apps/agentstack-server/tests/e2e/examples/agent-integration/canvas/test_canvas_with_llm.py
@@ -0,0 +1,65 @@
+# Copyright 2025 © BeeAI a Series of LF Projects, LLC
+# SPDX-License-Identifier: Apache-2.0
+
+import pytest
+from a2a.client.helpers import create_text_message_object
+from a2a.types import TaskState
+from agentstack_sdk.a2a.extensions.ui.canvas import CanvasExtensionSpec
+
+from tests.e2e.examples.conftest import run_example
+
+pytestmark = pytest.mark.e2e
+
+
+@pytest.mark.usefixtures("clean_up", "setup_platform_client")
+async def test_canvas_with_llm_example(subtests, get_final_task_from_stream, a2a_client_factory):
+    example_path = "agent-integration/canvas/canvas-with-llm"
+
+    async with run_example(example_path, a2a_client_factory) as running_example:
+        canvas_uri = CanvasExtensionSpec.URI
+
+        with subtests.test("agent generates code artifact"):
+            message = create_text_message_object(content="Write a hello world program")
+            message.context_id = running_example.context.id
+            task = await get_final_task_from_stream(running_example.client.send_message(message))
+
+            assert task.status.state == TaskState.completed, f"Fail: {task.status.message.parts[0].root.text}"
+
+            # Verify artifact is returned (the agent uses a mocked LLM response)
+            assert len(task.artifacts) > 0
+            artifact = task.artifacts[0]
+            assert artifact.name == "Response"
+
+            # Verify the artifact contains the expected mock response
+            artifact_text = "".join(part.root.text for part in artifact.parts if hasattr(part.root, "text"))
+            assert "Hello from LLM!" in artifact_text
+
+        with subtests.test("agent updates artifact via canvas edit"):
+            # Use the artifact from the previous test
+            artifact_id = artifact.artifact_id
+
+            # Get the artifact text to determine indices
+            artifact_text = "".join(part.root.text for part in artifact.parts if hasattr(part.root, "text"))
+
+            # Send edit request for a portion of the code
+            message = create_text_message_object(content="Change print to use f-string")
+            message.context_id = running_example.context.id
+            message.metadata = {
+                canvas_uri: {
+                    "artifact_id": artifact_id,
+                    "start_index": 0,
+                    "end_index": min(50, len(artifact_text)),  # Select first 50 chars
+                    "description": "Change print to use f-string",
+                }
+            }
+            task = await get_final_task_from_stream(running_example.client.send_message(message))
+
+            assert task.status.state == TaskState.completed, f"Fail: {task.status.message.parts[0].root.text}"
+
+            # Verify updated artifact is returned
+            assert len(task.artifacts) > 0
+            updated_artifact = task.artifacts[0]
+
+            # Verify the response contains the edit prompt context
+            updated_text = "".join(part.root.text for part in updated_artifact.parts if hasattr(part.root, "text"))
+            assert "editing existing code" in updated_text.lower() or "selected" in updated_text.lower()
diff --git a/...tstack-server/tests/e2e/examples/agent-integration/citations/test_citation_basic_usage.py b/...tstack-server/tests/e2e/examples/agent-integration/citations/test_citation_basic_usage.py
@@ -0,0 +1,40 @@
+# Copyright 2025 © BeeAI a Series of LF Projects, LLC
+# SPDX-License-Identifier: Apache-2.0
+
+import pytest
+from a2a.client.helpers import create_text_message_object
+from a2a.types import TaskState
+from agentstack_sdk.a2a.extensions import CitationExtensionSpec
+
+from tests.e2e.examples.conftest import run_example
+
+pytestmark = pytest.mark.e2e
+
+
+@pytest.mark.usefixtures("clean_up", "setup_platform_client")
+async def test_citation_basic_usage_example(subtests, get_final_task_from_stream, a2a_client_factory):
+    example_path = "agent-integration/citations/citation-basic-usage"
+
+    async with run_example(example_path, a2a_client_factory) as running_example:
+        with subtests.test("agent responds with text and citation metadata"):
+            message = create_text_message_object(content="Hello")
+            message.context_id = running_example.context.id
+            task = await get_final_task_from_stream(running_example.client.send_message(message))
+
+            assert task.status.state == TaskState.completed, f"Fail: {task.status.message.parts[0].root.text}"
+
+            # Verify response text
+            response_text = task.history[-1].parts[0].root.text
+            assert "Python is the most popular programming language" in response_text
+
+            # Verify citation metadata exists
+            citation_uri = CitationExtensionSpec.URI
+            response_metadata = task.history[-1].metadata
+            assert response_metadata is not None
+            assert citation_uri in response_metadata
+
+            # Verify citation content
+            citations = response_metadata[citation_uri]["citations"]
+            assert len(citations) == 1
+            assert citations[0]["url"] == "https://survey.stackoverflow.com/2023"
+            assert citations[0]["title"] == "Stack Overflow Developer Survey 2023"
diff --git a/...er/tests/e2e/examples/agent-integration/env-variables/test_basic_environment_variables.py b/...er/tests/e2e/examples/agent-integration/env-variables/test_basic_environment_variables.py
@@ -0,0 +1,25 @@
+# Copyright 2025 © BeeAI a Series of LF Projects, LLC
+# SPDX-License-Identifier: Apache-2.0
+
+import pytest
+from a2a.client.helpers import create_text_message_object
+from a2a.types import TaskState
+
+from tests.e2e.examples.conftest import run_example
+
+pytestmark = pytest.mark.e2e
+
+
+@pytest.mark.usefixtures("clean_up", "setup_platform_client")
+async def test_basic_environment_variables_example(subtests, get_final_task_from_stream, a2a_client_factory):
+    example_path = "agent-integration/env-variables/basic-environment-variables"
+
+    async with run_example(example_path, a2a_client_factory) as running_example:
+        with subtests.test("agent responds based on default env variable value"):
+            message = create_text_message_object(content="Hello")
+            message.context_id = running_example.context.id
+            task = await get_final_task_from_stream(running_example.client.send_message(message))
+
+            assert task.status.state == TaskState.completed, f"Fail: {task.status.message.parts[0].root.text}"
+            # THINKING_ENABLED defaults to false when not set
+            assert "Thinking mode is disabled" in task.history[-1].parts[0].root.text
diff --git a/...agentstack-server/tests/e2e/examples/agent-integration/error/test_adding_error_context.py b/...agentstack-server/tests/e2e/examples/agent-integration/error/test_adding_error_context.py
@@ -0,0 +1,44 @@
+# Copyright 2025 © BeeAI a Series of LF Projects, LLC
+# SPDX-License-Identifier: Apache-2.0
+
+import pytest
+from a2a.client.helpers import create_text_message_object
+from a2a.types import TaskState
+from agentstack_sdk.a2a.extensions.ui.error import ErrorExtensionSpec
+
+from tests.e2e.examples.conftest import run_example
+
+pytestmark = pytest.mark.e2e
+
+
+@pytest.mark.usefixtures("clean_up", "setup_platform_client")
+async def test_adding_error_context_example(subtests, get_final_task_from_stream, a2a_client_factory):
+    example_path = "agent-integration/error/adding-error-context"
+
+    async with run_example(example_path, a2a_client_factory) as running_example:
+        with subtests.test("agent includes context in error metadata"):
+            message = create_text_message_object(content="Hello")
+            message.context_id = running_example.context.id
+            task = await get_final_task_from_stream(running_example.client.send_message(message))
+
+            assert task.status.state == TaskState.failed
+
+            # Verify error metadata contains context
+            error_uri = ErrorExtensionSpec.URI
+            error_metadata = task.status.message.metadata
+            assert error_metadata is not None
+            assert error_uri in error_metadata
+
+            error_data = error_metadata[error_uri]
+
+            # Verify context is included
+            assert error_data["context"] is not None
+            assert error_data["context"]["request_id"] == "req-123"
+            assert error_data["context"]["user_id"] == 42
+
+            # Verify stack trace is included (include_stacktrace=True)
+            assert error_data["stack_trace"] is not None
+
+            # Verify error details
+            assert error_data["error"]["title"] == "ValueError"
+            assert error_data["error"]["message"] == "Something went wrong!"
diff --git a/...tstack-server/tests/e2e/examples/agent-integration/error/test_advanced_error_reporting.py b/...tstack-server/tests/e2e/examples/agent-integration/error/test_advanced_error_reporting.py
@@ -0,0 +1,37 @@
+# Copyright 2025 © BeeAI a Series of LF Projects, LLC
+# SPDX-License-Identifier: Apache-2.0
+
+import pytest
+from a2a.client.helpers import create_text_message_object
+from a2a.types import TaskState
+from agentstack_sdk.a2a.extensions.ui.error import ErrorExtensionSpec
+
+from tests.e2e.examples.conftest import run_example
+
+pytestmark = pytest.mark.e2e
+
+
+@pytest.mark.usefixtures("clean_up", "setup_platform_client")
+async def test_advanced_error_reporting_example(subtests, get_final_task_from_stream, a2a_client_factory):
+    example_path = "agent-integration/error/advanced-error-reporting"
+
+    async with run_example(example_path, a2a_client_factory) as running_example:
+        with subtests.test("agent reports error with stack trace"):
+            message = create_text_message_object(content="Hello")
+            message.context_id = running_example.context.id
+            task = await get_final_task_from_stream(running_example.client.send_message(message))
+
+            assert task.status.state == TaskState.failed
+
+            # Verify error message content
+            error_text = task.status.message.parts[0].root.text
+            assert "ValueError" in error_text
+            assert "Something went wrong!" in error_text
+            assert "Stack Trace" in error_text
+
+            # Verify error metadata includes stack trace
+            error_uri = ErrorExtensionSpec.URI
+            error_metadata = task.status.message.metadata
+            assert error_metadata is not None
+            assert error_uri in error_metadata
+            assert error_metadata[error_uri]["stack_trace"] is not None
diff --git a/...tstack-server/tests/e2e/examples/agent-integration/error/test_multiple_errors_handling.py b/...tstack-server/tests/e2e/examples/agent-integration/error/test_multiple_errors_handling.py
@@ -0,0 +1,42 @@
+# Copyright 2025 © BeeAI a Series of LF Projects, LLC
+# SPDX-License-Identifier: Apache-2.0
+
+import pytest
+from a2a.client.helpers import create_text_message_object
+from a2a.types import TaskState
+from agentstack_sdk.a2a.extensions.ui.error import ErrorExtensionSpec
+
+from tests.e2e.examples.conftest import run_example
+
+pytestmark = pytest.mark.e2e
+
+
+@pytest.mark.usefixtures("clean_up", "setup_platform_client")
+async def test_multiple_errors_handling_example(subtests, get_final_task_from_stream, a2a_client_factory):
+    example_path = "agent-integration/error/multiple-errors-handling"
+
+    async with run_example(example_path, a2a_client_factory) as running_example:
+        with subtests.test("agent reports multiple errors from ExceptionGroup"):
+            message = create_text_message_object(content="Hello")
+            message.context_id = running_example.context.id
+            task = await get_final_task_from_stream(running_example.client.send_message(message))
+
+            assert task.status.state == TaskState.failed
+
+            # Verify error metadata contains structured error data
+            error_uri = ErrorExtensionSpec.URI
+            error_metadata = task.status.message.metadata
+            assert error_metadata is not None
+            assert error_uri in error_metadata
+
+            error_data = error_metadata[error_uri]["error"]
+            # ExceptionGroup produces an ErrorGroup with list of errors
+            assert "errors" in error_data
+            errors = error_data["errors"]
+            assert len(errors) == 2
+
+            # Verify both errors are present
+            error_titles = {e["title"] for e in errors}
+            error_messages = {e["message"] for e in errors}
+            assert error_titles == {"ValueError", "TypeError"}
+            assert error_messages == {"First error", "Second error"}
diff --git a/...tstack-server/tests/e2e/examples/agent-integration/error/test_standard_error_reporting.py b/...tstack-server/tests/e2e/examples/agent-integration/error/test_standard_error_reporting.py
@@ -0,0 +1,24 @@
+# Copyright 2025 © BeeAI a Series of LF Projects, LLC
+# SPDX-License-Identifier: Apache-2.0
+
+import pytest
+from a2a.client.helpers import create_text_message_object
+from a2a.types import TaskState
+
+from tests.e2e.examples.conftest import run_example
+
+pytestmark = pytest.mark.e2e
+
+
+@pytest.mark.usefixtures("clean_up", "setup_platform_client")
+async def test_standard_error_reporting_example(subtests, get_final_task_from_stream, a2a_client_factory):
+    example_path = "agent-integration/error/standard-error-reporting"
+
+    async with run_example(example_path, a2a_client_factory) as running_example:
+        with subtests.test("agent reports error when exception is raised"):
+            message = create_text_message_object(content="Hello")
+            message.context_id = running_example.context.id
+            task = await get_final_task_from_stream(running_example.client.send_message(message))
+
+            assert task.status.state == TaskState.failed
+            assert "Something went wrong!" in task.status.message.parts[0].root.text