marimo-team · akshayka · Nov 20, 2025 · Nov 16, 2025 · Nov 16, 2025 · Nov 16, 2025
@@ -132,6 +132,92 @@ mo.ui.chat(
 )
 ```
 
+## Streaming Responses
+
+Chatbots can stream responses in real-time, creating a more interactive experience 
+similar to ChatGPT where you see the response appear word-by-word as it's generated.
+
+### With Built-in Models
+
+For built-in models (OpenAI, Anthropic, Google, Groq, Bedrock), set `stream=True` in the model constructor:
+
+```python
+import marimo as mo
+
+chat = mo.ui.chat(
+    mo.ai.llm.openai(
+        "gpt-4o",
+        system_message="You are a helpful assistant.",
+        stream=True,  # Enable streaming
+    ),
+    show_configuration_controls=True
+)
+chat
+```
+
+This works for all built-in models:
+
+- `mo.ai.llm.openai("gpt-4o", stream=True)`
+- `mo.ai.llm.anthropic("claude-3-5-sonnet-20240620", stream=True)`  
+- `mo.ai.llm.google("gemini-1.5-pro-latest", stream=True)`
+- `mo.ai.llm.groq("llama-3.1-70b-versatile", stream=True)`
+- `mo.ai.llm.bedrock("anthropic.claude-3-7-sonnet-20250219-v1:0", stream=True)`
+
+### With Custom Models
+
+For custom models, you can use either regular (sync) or async generator functions that yield intermediate results:
+
+**Sync generator (simpler):**
+
+```python
+import marimo as mo
+import time
+
+def streaming_model(messages, config):
+    """Stream responses word by word."""
+    response = "This response will appear word by word!"
+    words = response.split()
+    accumulated = ""
+
+    for word in words:
+        accumulated += word + " "
+        yield accumulated
+        time.sleep(0.1)  # Simulate processing delay
+
+chat = mo.ui.chat(streaming_model)
+chat
+```
+
+**Async generator (for async operations):**
+
+```python
+import marimo as mo
+import asyncio
+
+async def async_streaming_model(messages, config):
+    """Stream responses word by word asynchronously."""
+    response = "This response will appear word by word!"
+    words = response.split()
+    accumulated = ""
+
+    for word in words:
+        accumulated += word + " "
+        yield accumulated
+        await asyncio.sleep(0.1)  # Async processing delay
+
+chat = mo.ui.chat(async_streaming_model)
+chat
+```
+
+Each `yield` sends an update to the frontend, and the chat UI will display
+the progressively accumulated response in real-time.
+
+!!! tip "See streaming examples"
+    For complete working examples, check out:
+
+    - [`streaming_openai.py`](https://github.com/marimo-team/marimo/blob/main/examples/ai/chat/streaming_openai.py) - Streaming with OpenAI models
+    - [`streaming_custom.py`](https://github.com/marimo-team/marimo/blob/main/examples/ai/chat/streaming_custom.py) - Custom streaming chatbot
+
 ## Built-in Models
 
 marimo provides several built-in AI models that you can use with the chat UI

@@ -3,7 +3,9 @@
 These examples show how to make chatbots with marimo, using [`mo.ui.chat`](https://docs.marimo.io/api/inputs/chat.html#marimo.ui.chat).
 
 - `custom.py` shows how to make a custom chatbot.
+- `streaming_custom.py` shows how to make a custom chatbot with streaming responses.
 - `openai_example.py` shows how to make a chatbot powered by OpenAI models.
+- `streaming_openai.py` shows how to make a chatbot powered by OpenAI models with streaming responses.
 - `anthropic_example.py` shows how to make a chatbot powered by Anthropic models.
 - `gemini.py` shows how to make a chatbot powered by Google models like Gemini.
 - `groq_example.py` shows how to make a chatbot powered by Groq models.
@@ -14,6 +16,15 @@ These examples show how to make chatbots with marimo, using [`mo.ui.chat`](https
 - `simplemind_example.py` shows how to integrate [simplemind](https://github.com/kennethreitz/simplemind).
 - `generative_ui.py` shows how to make a chatbot that can generate UI code.
 
+## Streaming Responses
+
+Chatbots can stream responses in real-time, creating a more interactive experience similar to ChatGPT. To enable streaming:
+
+- For **custom models**: create an async generator function that yields intermediate results
+- For **built-in models** (OpenAI, Anthropic, etc.): set `stream=True` in the model constructor
+
+See `streaming_custom.py` and `streaming_openai.py` for complete examples.
+
 Chatbot's in marimo are _reactive_: when the chatbot responds with a message,
 all other cells referencing the chatbot are automatically run or marked
 stale, with the chatbot's response stored in the object's `value` attribute.

@@ -56,6 +56,7 @@ def _(key, mo):
             "claude-3-7-sonnet-20250219",
             system_message="You are a helpful assistant.",
             api_key=key,
+            # stream=True,  # Uncomment to enable streaming responses
         ),
         allow_attachments=[
             "image/png",

@@ -192,6 +192,7 @@ def create_chat(config_form):
                 "model": model,
                 "region_name": region,
                 "system_message": system_message,
+                # "stream": True,  # Uncomment to enable streaming responses
             }
 
             # Add profile if specified

@@ -2,12 +2,13 @@
 # requires-python = ">=3.11"
 # dependencies = [
 #     "marimo",
+#     "google-genai>=1.20.0",
 # ]
 # ///
 
 import marimo
 
-__generated_with = "0.17.4"
+__generated_with = "0.17.8"
 app = marimo.App(width="medium")
 
 
@@ -52,9 +53,10 @@ def _(input_key, mo, os_key):
 def _(key, mo):
     chatbot = mo.ui.chat(
        mo.ai.llm.google(
-            "gemini-1.5-pro-latest",
+            "gemini-2.5-flash",
             system_message="You are a helpful assistant.",
             api_key=key,
+            # stream=True,  # Uncomment to enable streaming responses
        ),
         prompts=[
             "Hello",

@@ -73,6 +73,7 @@ def _(key, mo):
            model="llama-3.1-70b-versatile",
            system_message="You are a helpful assistant.",
            api_key=key,
+           # stream=True,  # Uncomment to enable streaming responses
        ),
         prompts=[
             "Hello",

@@ -56,6 +56,7 @@ def _(mo, openai_key):
             "gpt-4o",
             system_message="You are a helpful assistant.",
             api_key=openai_key,
+            # stream=True,  # Uncomment to enable streaming responses
        ),
         prompts=[
             "Hello",

@@ -0,0 +1,102 @@
+# /// script
+# requires-python = ">=3.11"
+# dependencies = [
+#     "marimo",
+# ]
+# ///
+
+import marimo
+
+__generated_with = "0.17.8"
+app = marimo.App(width="medium")
+
+
+@app.cell
+def _():
+    import marimo as mo
+    import asyncio
+    return asyncio, mo
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md("""
+    # Custom streaming chatbot
+
+    This example shows how to make a chatbot that streams responses.
+    Create an async generator function that yields intermediate results,
+    and watch the response appear incrementally!
+    """)
+    return
+
+
+@app.cell
+def _(asyncio, mo):
+    async def streaming_echo_model(messages, config):
+        """This chatbot echoes what the user says, word by word."""
+        # Get the user's message
+        user_message = messages[-1].content
+
+        # Stream the response word by word
+        response = f"You said: '{user_message}'. Here's my response streaming word by word!"
+        words = response.split()
+        accumulated = ""
+
+        for word in words:
+            accumulated += word + " "
+            yield accumulated
+            await asyncio.sleep(0.2)  # Delay to make streaming visible
+
+    chatbot = mo.ui.chat(
+        streaming_echo_model,
+        prompts=["Hello", "Tell me a story", "What is streaming?"],
+        show_configuration_controls=True
+    )
+    return (chatbot,)
+
+
+@app.cell
+def _(chatbot):
+    chatbot
+    return
+
+
+@app.cell
+def _(mo):
+    mo.md("""
+    ## How it works
+
+    The key is to make your model function an **async generator**:
+
+    ```python
+    async def my_model(messages, config):
+        response = 'Building up text...'
+        accumulated = ''
+        for part in response.split():
+            accumulated += part + ' '
+            yield accumulated  # Each yield updates the UI
+            await asyncio.sleep(0.1)
+    ```
+
+    Each `yield` sends an update to the frontend, creating a smooth streaming effect!
+    """)
+    return
+
+
+@app.cell
+def _(mo):
+    mo.md("""
+    Access the chatbot's historical messages with `chatbot.value`.
+    """)
+    return
+
+
+@app.cell
+def _(chatbot):
+    # chatbot.value is the list of chat messages
+    chatbot.value
+    return
+
+
+if __name__ == "__main__":
+    app.run()
@@ -0,0 +1,117 @@
+# /// script
+# requires-python = ">=3.11"
+# dependencies = [
+#     "marimo",
+#     "openai>=1.55.3",
+# ]
+# ///
+
+import marimo
+
+__generated_with = "0.17.8"
+app = marimo.App(width="medium")
+
+
+@app.cell
+def _():
+    import marimo as mo
+    return (mo,)
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md("""
+    # OpenAI streaming chatbot
+
+    This example shows how to use OpenAI's API with streaming responses.
+    The built-in `mo.ai.llm.openai()` model automatically streams tokens
+    as they arrive from the API!
+
+    Enter your API key below to try it out.
+    """)
+    return
+
+
+@app.cell
+def _(mo):
+    api_key_input = mo.ui.text(
+        placeholder="sk-...",
+        label="OpenAI API Key",
+        kind="password",
+    )
+    api_key_input
+    return (api_key_input,)
+
+
+@app.cell
+def _(api_key_input, mo):
+    if api_key_input.value:
+        chatbot = mo.ui.chat(
+            mo.ai.llm.openai(
+                "gpt-4o-mini",
+                system_message="You are a helpful assistant. Keep responses concise and friendly.",
+                api_key=api_key_input.value,
+                stream=True,  # Enable streaming
+            ),
+            prompts=[
+                "Tell me a short joke",
+                "What is Python?",
+                "Explain streaming in one sentence",
+            ],
+            show_configuration_controls=True,
+        )
+    else:
+        chatbot = mo.md("*Enter your OpenAI API key above to start chatting*")
+    return (chatbot,)
+
+
+@app.cell
+def _(chatbot):
+    chatbot
+    return
+
+
+@app.cell
+def _(mo):
+    mo.md("""
+    ## How it works
+
+    The built-in OpenAI model returns an async generator that yields tokens
+    as they stream from the API:
+
+    ```python
+    mo.ui.chat(
+        mo.ai.llm.openai(
+            "gpt-4o-mini",
+            api_key="your-key",
+            stream=True,  # Enable streaming!
+        )
+    )
+    ```
+
+    Set `stream=True` to enable streaming responses. 🚀
+
+    Other built-in models (`anthropic`, `google`, `groq`) work the same way.
+    """)
+    return
+
+
+@app.cell
+def _(chatbot, mo):
+    # Show chat history
+    if hasattr(chatbot, 'value'):
+        mo.md(f"**Chat history:** {len(chatbot.value)} messages")
+    return
+
+
+@app.cell
+def _(chatbot):
+    # Display full history
+    if hasattr(chatbot, 'value'):
+        chatbot.value
+    return
+
+
+if __name__ == "__main__":
+    app.run()
+