strands-agents
diff --git a/‎src/strands/models/litellm.py‎
Lines changed: 54 additions & 57 deletions b/‎src/strands/models/litellm.py‎
Lines changed: 54 additions & 57 deletions
diff --git a/‎src/strands/models/openai.py‎
Lines changed: 60 additions & 24 deletions b/‎src/strands/models/openai.py‎
Lines changed: 60 additions & 24 deletions
@@ -15,8 +15,9 @@
 
 from ..tools import convert_pydantic_to_tool_spec
 from ..types.content import ContentBlock, Messages, SystemContentBlock
+from ..types.event_loop import Usage
 from ..types.exceptions import ContextWindowOverflowException
-from ..types.streaming import StreamEvent
+from ..types.streaming import MetadataEvent, StreamEvent
 from ..types.tools import ToolChoice, ToolSpec
 from ._validation import validate_config_keys
 from .openai import OpenAIModel
@@ -81,11 +82,12 @@ def get_config(self) -> LiteLLMConfig:
 
     @override
     @classmethod
-    def format_request_message_content(cls, content: ContentBlock) -> dict[str, Any]:
+    def format_request_message_content(cls, content: ContentBlock, **kwargs: Any) -> dict[str, Any]:
         """Format a LiteLLM content block.
 
         Args:
             content: Message content.
+            **kwargs: Additional keyword arguments for future extensibility.
 
         Returns:
             LiteLLM formatted content block.
@@ -133,33 +135,28 @@ def _stream_switch_content(self, data_type: str, prev_data_type: str | None) ->
 
     @override
     @classmethod
-    def format_request_messages(
+    def _format_system_messages(
         cls,
-        messages: Messages,
         system_prompt: Optional[str] = None,
         *,
         system_prompt_content: Optional[list[SystemContentBlock]] = None,
-        **kwargs: Any,
     ) -> list[dict[str, Any]]:
-        """Format a LiteLLM compatible messages array with cache point support.
+        """Format system messages for LiteLLM with cache point support.
 
         Args:
-            messages: List of message objects to be processed by the model.
-            system_prompt: System prompt to provide context to the model (for legacy compatibility).
+            system_prompt: System prompt to provide context to the model.
             system_prompt_content: System prompt content blocks to provide context to the model.
-            **kwargs: Additional keyword arguments for future extensibility.
 
         Returns:
-            A LiteLLM compatible messages array.
+            List of formatted system messages.
         """
-        formatted_messages: list[dict[str, Any]] = []
         # Handle backward compatibility: if system_prompt is provided but system_prompt_content is None
         if system_prompt and system_prompt_content is None:
-            system_prompt_content = [{"context": system_prompt}]
+            system_prompt_content = [{"text": system_prompt}]
 
         # For LiteLLM with Bedrock, we can support cache points
-        system_content = []
-        for block in system_prompt_content:
+        system_content: list[dict[str, Any]] = []
+        for block in system_prompt_content or []:
             if "text" in block:
                 system_content.append({"type": "text", "text": block["text"]})
             elif "cachePoint" in block and block["cachePoint"].get("type") == "default":
@@ -169,46 +166,44 @@ def format_request_messages(
                     system_content[-1]["cache_control"] = {"type": "ephemeral"}
 
         # Create single system message with content array
-        if system_content:
-            formatted_messages.append({"role": "system", "content": system_content})
-
-        # Process regular messages
-        for message in messages:
-            contents = message["content"]
-
-            formatted_contents = [
-                cls.format_request_message_content(content)
-                for content in contents
-                if not any(block_type in content for block_type in ["toolResult", "toolUse"])
-            ]
-            formatted_tool_calls = [
-                cls.format_request_message_tool_call(content["toolUse"]) for content in contents if "toolUse" in content
-            ]
-            formatted_tool_messages = [
-                cls.format_request_tool_message(content["toolResult"])
-                for content in contents
-                if "toolResult" in content
-            ]
-
-            formatted_message = {
-                "role": message["role"],
-                "content": formatted_contents,
-                **({"tool_calls": formatted_tool_calls} if formatted_tool_calls else {}),
-            }
-            formatted_messages.append(formatted_message)
-            formatted_messages.extend(formatted_tool_messages)
+        return [{"role": "system", "content": system_content}] if system_content else []
+
+    @override
+    @classmethod
+    def format_request_messages(
+        cls,
+        messages: Messages,
+        system_prompt: Optional[str] = None,
+        *,
+        system_prompt_content: Optional[list[SystemContentBlock]] = None,
+        **kwargs: Any,
+    ) -> list[dict[str, Any]]:
+        """Format a LiteLLM compatible messages array with cache point support.
+
+        Args:
+            messages: List of message objects to be processed by the model.
+            system_prompt: System prompt to provide context to the model (for legacy compatibility).
+            system_prompt_content: System prompt content blocks to provide context to the model.
+            **kwargs: Additional keyword arguments for future extensibility.
+
+        Returns:
+            A LiteLLM compatible messages array.
+        """
+        formatted_messages = cls._format_system_messages(system_prompt, system_prompt_content=system_prompt_content)
+        formatted_messages.extend(cls._format_regular_messages(messages))
 
         return [message for message in formatted_messages if message["content"] or "tool_calls" in message]
 
     @override
-    def format_chunk(self, event: dict[str, Any]) -> StreamEvent:
+    def format_chunk(self, event: dict[str, Any], **kwargs: Any) -> StreamEvent:
         """Format a LiteLLM response event into a standardized message chunk.
 
         This method overrides OpenAI's format_chunk to handle the metadata case
         with prompt caching support. All other chunk types use the parent implementation.
 
         Args:
             event: A response event from the LiteLLM model.
+            **kwargs: Additional keyword arguments for future extensibility.
 
         Returns:
             The formatted chunk.
@@ -218,30 +213,29 @@ def format_chunk(self, event: dict[str, Any]) -> StreamEvent:
         """
         # Handle metadata case with prompt caching support
         if event["chunk_type"] == "metadata":
-            usage_data = {
+            usage_data: Usage = {
                 "inputTokens": event["data"].prompt_tokens,
                 "outputTokens": event["data"].completion_tokens,
                 "totalTokens": event["data"].total_tokens,
             }
 
             # Only LiteLLM over Anthropic supports cache cache write tokens
             # Waiting until a more general approach is available to set cacheWriteInputTokens
-            
-            tokens_details = getattr(event["data"], "prompt_tokens_details", None)
-            if tokens_details and getattr(tokens_details, "cached_tokens", None):
-                usage_data["cacheReadInputTokens"] = event["data"].prompt_tokens_details.cached_tokens
-            
 
+            if tokens_details := getattr(event["data"], "prompt_tokens_details", None):
+                if cached := getattr(tokens_details, "cached_tokens", None):
+                    usage_data["cacheReadInputTokens"] = cached
+                if creation := getattr(tokens_details, "cache_creation_tokens", None):
+                    usage_data["cacheWriteInputTokens"] = creation
 
-            return {
-                "metadata": {
-                    "usage": usage_data,
-                    "metrics": {
+            return StreamEvent(
+                metadata=MetadataEvent(
+                    metrics={
                         "latencyMs": 0,  # TODO
                     },
-                },
-            }
-
+                    usage=usage_data,
+                )
+            )
         # For all other cases, use the parent implementation
         return super().format_chunk(event)
 
@@ -263,13 +257,16 @@ async def stream(
             tool_specs: List of tool specifications to make available to the model.
             system_prompt: System prompt to provide context to the model.
             tool_choice: Selection strategy for tool invocation.
+            system_prompt_content: System prompt content blocks to provide context to the model.
             **kwargs: Additional keyword arguments for future extensibility.
 
         Yields:
             Formatted message chunks from the model.
         """
         logger.debug("formatting request")
-        request = self.format_request(messages, tool_specs, system_prompt, tool_choice, system_prompt_content=system_prompt_content)
+        request = self.format_request(
+            messages, tool_specs, system_prompt, tool_choice, system_prompt_content=system_prompt_content
+        )
         logger.debug("request=<%s>", request)
 
         logger.debug("invoking model")
 
@@ -14,7 +14,7 @@
 from pydantic import BaseModel
 from typing_extensions import Unpack, override
 
-from ..types.content import ContentBlock, Messages
+from ..types.content import ContentBlock, Messages, SystemContentBlock
 from ..types.exceptions import ContextWindowOverflowException, ModelThrottledException
 from ..types.streaming import StreamEvent
 from ..types.tools import ToolChoice, ToolResult, ToolSpec, ToolUse
@@ -94,6 +94,7 @@ def format_request_message_content(cls, content: ContentBlock, **kwargs: Any) ->
 
         Args:
             content: Message content.
+            **kwargs: Additional keyword arguments for future extensibility.
 
         Returns:
             OpenAI compatible content block.
@@ -136,6 +137,7 @@ def format_request_message_tool_call(cls, tool_use: ToolUse, **kwargs: Any) -> d
 
         Args:
             tool_use: Tool use requested by the model.
+            **kwargs: Additional keyword arguments for future extensibility.
 
         Returns:
             OpenAI compatible tool call.
@@ -155,6 +157,7 @@ def format_request_tool_message(cls, tool_result: ToolResult, **kwargs: Any) ->
 
         Args:
             tool_result: Tool result collected from a tool execution.
+            **kwargs: Additional keyword arguments for future extensibility.
 
         Returns:
             OpenAI compatible tool message.
@@ -198,40 +201,44 @@ def _format_request_tool_choice(cls, tool_choice: ToolChoice | None) -> dict[str
                 return {"tool_choice": "auto"}
 
     @classmethod
-    def format_request_messages(
-        cls, 
-        messages: Messages, 
-        system_prompt: Optional[str] = None, 
+    def _format_system_messages(
+        cls,
+        system_prompt: Optional[str] = None,
         *,
         system_prompt_content: Optional[list[SystemContentBlock]] = None,
-        **kwargs
     ) -> list[dict[str, Any]]:
-        """Format an OpenAI compatible messages array.
+        """Format system messages for OpenAI-compatible providers.
 
         Args:
-            messages: List of message objects to be processed by the model.
             system_prompt: System prompt to provide context to the model.
+            system_prompt_content: System prompt content blocks to provide context to the model.
 
         Returns:
-            An OpenAI compatible messages array.
+            List of formatted system messages.
         """
         # Handle backward compatibility: if system_prompt is provided but system_prompt_content is None
         if system_prompt and system_prompt_content is None:
-            system_prompt_content = [{"context": system_prompt}]
-
-        # TODO: Handle caching blocks in openai 
-        # TODO Create tracking ticket
-        formatted_messages: list[dict[str, Any]] = [
-            {
-                "role": "system",
-                "content": [
-                    cls.format_request_message_content(content)
-                    for content in system_prompt_content
-                    if "text" in content
-                ],
-            }
+            system_prompt_content = [{"text": system_prompt}]
+
+        # TODO: Handle caching blocks https://github.com/strands-agents/sdk-python/issues/1140
+        return [
+            {"role": "system", "content": content["text"]}
+            for content in system_prompt_content or []
+            if "text" in content
         ]
 
+    @classmethod
+    def _format_regular_messages(cls, messages: Messages) -> list[dict[str, Any]]:
+        """Format regular messages for OpenAI-compatible providers.
+
+        Args:
+            messages: List of message objects to be processed by the model.
+
+        Returns:
+            List of formatted messages.
+        """
+        formatted_messages = []
+
         for message in messages:
             contents = message["content"]
 
@@ -263,6 +270,31 @@ def format_request_messages(
             formatted_messages.append(formatted_message)
             formatted_messages.extend(formatted_tool_messages)
 
+        return formatted_messages
+
+    @classmethod
+    def format_request_messages(
+        cls,
+        messages: Messages,
+        system_prompt: Optional[str] = None,
+        *,
+        system_prompt_content: Optional[list[SystemContentBlock]] = None,
+        **kwargs: Any,
+    ) -> list[dict[str, Any]]:
+        """Format an OpenAI compatible messages array.
+
+        Args:
+            messages: List of message objects to be processed by the model.
+            system_prompt: System prompt to provide context to the model.
+            system_prompt_content: System prompt content blocks to provide context to the model.
+            **kwargs: Additional keyword arguments for future extensibility.
+
+        Returns:
+            An OpenAI compatible messages array.
+        """
+        formatted_messages = cls._format_system_messages(system_prompt, system_prompt_content=system_prompt_content)
+        formatted_messages.extend(cls._format_regular_messages(messages))
+
         return [message for message in formatted_messages if message["content"] or "tool_calls" in message]
 
     def format_request(
@@ -282,6 +314,8 @@ def format_request(
             tool_specs: List of tool specifications to make available to the model.
             system_prompt: System prompt to provide context to the model.
             tool_choice: Selection strategy for tool invocation.
+            system_prompt_content: System prompt content blocks to provide context to the model.
+            **kwargs: Additional keyword arguments for future extensibility.
 
         Returns:
             An OpenAI compatible chat streaming request.
@@ -291,7 +325,9 @@ def format_request(
                 format.
         """
         return {
-            "messages": self.format_request_messages(messages, system_prompt, system_prompt_content=system_prompt_content),
+            "messages": self.format_request_messages(
+                messages, system_prompt, system_prompt_content=system_prompt_content
+            ),
             "model": self.config["model_id"],
             "stream": True,
             "stream_options": {"include_usage": True},
@@ -310,12 +346,12 @@ def format_request(
             **cast(dict[str, Any], self.config.get("params", {})),
         }
 
-
     def format_chunk(self, event: dict[str, Any], **kwargs: Any) -> StreamEvent:
         """Format an OpenAI response event into a standardized message chunk.
 
         Args:
             event: A response event from the OpenAI compatible model.
+            **kwargs: Additional keyword arguments for future extensibility.
 
         Returns:
             The formatted chunk.