1515
1616from ..tools import convert_pydantic_to_tool_spec
1717from ..types .content import ContentBlock , Messages , SystemContentBlock
18+ from ..types .event_loop import Usage
1819from ..types .exceptions import ContextWindowOverflowException
19- from ..types .streaming import StreamEvent
20+ from ..types .streaming import MetadataEvent , StreamEvent
2021from ..types .tools import ToolChoice , ToolSpec
2122from ._validation import validate_config_keys
2223from .openai import OpenAIModel
@@ -81,11 +82,12 @@ def get_config(self) -> LiteLLMConfig:
8182
8283 @override
8384 @classmethod
84- def format_request_message_content (cls , content : ContentBlock ) -> dict [str , Any ]:
85+ def format_request_message_content (cls , content : ContentBlock , ** kwargs : Any ) -> dict [str , Any ]:
8586 """Format a LiteLLM content block.
8687
8788 Args:
8889 content: Message content.
90+ **kwargs: Additional keyword arguments for future extensibility.
8991
9092 Returns:
9193 LiteLLM formatted content block.
@@ -133,33 +135,28 @@ def _stream_switch_content(self, data_type: str, prev_data_type: str | None) ->
133135
134136 @override
135137 @classmethod
136- def format_request_messages (
138+ def _format_system_messages (
137139 cls ,
138- messages : Messages ,
139140 system_prompt : Optional [str ] = None ,
140141 * ,
141142 system_prompt_content : Optional [list [SystemContentBlock ]] = None ,
142- ** kwargs : Any ,
143143 ) -> list [dict [str , Any ]]:
144- """Format a LiteLLM compatible messages array with cache point support.
144+ """Format system messages for LiteLLM with cache point support.
145145
146146 Args:
147- messages: List of message objects to be processed by the model.
148- system_prompt: System prompt to provide context to the model (for legacy compatibility).
147+ system_prompt: System prompt to provide context to the model.
149148 system_prompt_content: System prompt content blocks to provide context to the model.
150- **kwargs: Additional keyword arguments for future extensibility.
151149
152150 Returns:
153- A LiteLLM compatible messages array .
151+ List of formatted system messages .
154152 """
155- formatted_messages : list [dict [str , Any ]] = []
156153 # Handle backward compatibility: if system_prompt is provided but system_prompt_content is None
157154 if system_prompt and system_prompt_content is None :
158- system_prompt_content = [{"context " : system_prompt }]
155+ system_prompt_content = [{"text " : system_prompt }]
159156
160157 # For LiteLLM with Bedrock, we can support cache points
161- system_content = []
162- for block in system_prompt_content :
158+ system_content : list [ dict [ str , Any ]] = []
159+ for block in system_prompt_content or [] :
163160 if "text" in block :
164161 system_content .append ({"type" : "text" , "text" : block ["text" ]})
165162 elif "cachePoint" in block and block ["cachePoint" ].get ("type" ) == "default" :
@@ -169,46 +166,44 @@ def format_request_messages(
169166 system_content [- 1 ]["cache_control" ] = {"type" : "ephemeral" }
170167
171168 # Create single system message with content array
172- if system_content :
173- formatted_messages .append ({"role" : "system" , "content" : system_content })
174-
175- # Process regular messages
176- for message in messages :
177- contents = message ["content" ]
178-
179- formatted_contents = [
180- cls .format_request_message_content (content )
181- for content in contents
182- if not any (block_type in content for block_type in ["toolResult" , "toolUse" ])
183- ]
184- formatted_tool_calls = [
185- cls .format_request_message_tool_call (content ["toolUse" ]) for content in contents if "toolUse" in content
186- ]
187- formatted_tool_messages = [
188- cls .format_request_tool_message (content ["toolResult" ])
189- for content in contents
190- if "toolResult" in content
191- ]
192-
193- formatted_message = {
194- "role" : message ["role" ],
195- "content" : formatted_contents ,
196- ** ({"tool_calls" : formatted_tool_calls } if formatted_tool_calls else {}),
197- }
198- formatted_messages .append (formatted_message )
199- formatted_messages .extend (formatted_tool_messages )
169+ return [{"role" : "system" , "content" : system_content }] if system_content else []
170+
171+ @override
172+ @classmethod
173+ def format_request_messages (
174+ cls ,
175+ messages : Messages ,
176+ system_prompt : Optional [str ] = None ,
177+ * ,
178+ system_prompt_content : Optional [list [SystemContentBlock ]] = None ,
179+ ** kwargs : Any ,
180+ ) -> list [dict [str , Any ]]:
181+ """Format a LiteLLM compatible messages array with cache point support.
182+
183+ Args:
184+ messages: List of message objects to be processed by the model.
185+ system_prompt: System prompt to provide context to the model (for legacy compatibility).
186+ system_prompt_content: System prompt content blocks to provide context to the model.
187+ **kwargs: Additional keyword arguments for future extensibility.
188+
189+ Returns:
190+ A LiteLLM compatible messages array.
191+ """
192+ formatted_messages = cls ._format_system_messages (system_prompt , system_prompt_content = system_prompt_content )
193+ formatted_messages .extend (cls ._format_regular_messages (messages ))
200194
201195 return [message for message in formatted_messages if message ["content" ] or "tool_calls" in message ]
202196
203197 @override
204- def format_chunk (self , event : dict [str , Any ]) -> StreamEvent :
198+ def format_chunk (self , event : dict [str , Any ], ** kwargs : Any ) -> StreamEvent :
205199 """Format a LiteLLM response event into a standardized message chunk.
206200
207201 This method overrides OpenAI's format_chunk to handle the metadata case
208202 with prompt caching support. All other chunk types use the parent implementation.
209203
210204 Args:
211205 event: A response event from the LiteLLM model.
206+ **kwargs: Additional keyword arguments for future extensibility.
212207
213208 Returns:
214209 The formatted chunk.
@@ -218,30 +213,29 @@ def format_chunk(self, event: dict[str, Any]) -> StreamEvent:
218213 """
219214 # Handle metadata case with prompt caching support
220215 if event ["chunk_type" ] == "metadata" :
221- usage_data = {
216+ usage_data : Usage = {
222217 "inputTokens" : event ["data" ].prompt_tokens ,
223218 "outputTokens" : event ["data" ].completion_tokens ,
224219 "totalTokens" : event ["data" ].total_tokens ,
225220 }
226221
227222 # Only LiteLLM over Anthropic supports cache cache write tokens
228223 # Waiting until a more general approach is available to set cacheWriteInputTokens
229-
230- tokens_details = getattr (event ["data" ], "prompt_tokens_details" , None )
231- if tokens_details and getattr (tokens_details , "cached_tokens" , None ):
232- usage_data ["cacheReadInputTokens" ] = event ["data" ].prompt_tokens_details .cached_tokens
233-
234224
225+ if tokens_details := getattr (event ["data" ], "prompt_tokens_details" , None ):
226+ if cached := getattr (tokens_details , "cached_tokens" , None ):
227+ usage_data ["cacheReadInputTokens" ] = cached
228+ if creation := getattr (tokens_details , "cache_creation_tokens" , None ):
229+ usage_data ["cacheWriteInputTokens" ] = creation
235230
236- return {
237- "metadata" : {
238- "usage" : usage_data ,
239- "metrics" : {
231+ return StreamEvent (
232+ metadata = MetadataEvent (
233+ metrics = {
240234 "latencyMs" : 0 , # TODO
241235 },
242- } ,
243- }
244-
236+ usage = usage_data ,
237+ )
238+ )
245239 # For all other cases, use the parent implementation
246240 return super ().format_chunk (event )
247241
@@ -263,13 +257,16 @@ async def stream(
263257 tool_specs: List of tool specifications to make available to the model.
264258 system_prompt: System prompt to provide context to the model.
265259 tool_choice: Selection strategy for tool invocation.
260+ system_prompt_content: System prompt content blocks to provide context to the model.
266261 **kwargs: Additional keyword arguments for future extensibility.
267262
268263 Yields:
269264 Formatted message chunks from the model.
270265 """
271266 logger .debug ("formatting request" )
272- request = self .format_request (messages , tool_specs , system_prompt , tool_choice , system_prompt_content = system_prompt_content )
267+ request = self .format_request (
268+ messages , tool_specs , system_prompt , tool_choice , system_prompt_content = system_prompt_content
269+ )
273270 logger .debug ("request=<%s>" , request )
274271
275272 logger .debug ("invoking model" )
0 commit comments