diff --git a/litellm/llms/mistral/chat/transformation.py b/litellm/llms/mistral/chat/transformation.py index 51fa65244a0e..267386233752 100644 --- a/litellm/llms/mistral/chat/transformation.py +++ b/litellm/llms/mistral/chat/transformation.py @@ -8,7 +8,9 @@ from typing import ( Any, + AsyncIterator, Coroutine, + Iterator, List, Literal, Optional, @@ -26,11 +28,14 @@ handle_messages_with_content_list_to_str_conversion, strip_none_values_from_message, ) -from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig +from litellm.llms.openai.chat.gpt_transformation import ( + OpenAIGPTConfig, + OpenAIChatCompletionStreamingHandler, +) from litellm.secret_managers.main import get_secret_str from litellm.types.llms.mistral import MistralThinkingBlock, MistralToolCallMessage from litellm.types.llms.openai import AllMessageValues -from litellm.types.utils import ModelResponse +from litellm.types.utils import ModelResponse, ModelResponseStream from litellm.utils import convert_to_model_response_object @@ -602,3 +607,77 @@ def transform_response( ) return final_response_obj + + def get_model_response_iterator( + self, + streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse], + sync_stream: bool, + json_mode: Optional[bool] = False, + ): + return MistralChatResponseIterator( + streaming_response=streaming_response, + sync_stream=sync_stream, + json_mode=json_mode, + ) + + +class MistralChatResponseIterator(OpenAIChatCompletionStreamingHandler): + def chunk_parser(self, chunk: dict) -> ModelResponseStream: + try: + for choice in chunk.get("choices", []): + delta = choice.get("delta", {}) + content = delta.get("content") + if isinstance(content, list): + ( + normalized_text, + thinking_blocks, + reasoning_content, + ) = self._normalize_content_blocks(content) + delta["content"] = normalized_text + if thinking_blocks: + delta["thinking_blocks"] = thinking_blocks + delta["reasoning_content"] = reasoning_content + else: + delta.pop("thinking_blocks", None) + delta.pop("reasoning_content", None) + except Exception: + # Fall back to default parsing if custom handling fails + return super().chunk_parser(chunk) + + return super().chunk_parser(chunk) + + @staticmethod + def _normalize_content_blocks( + content_blocks: List[dict], + ) -> Tuple[Optional[str], List[dict], Optional[str]]: + """ + Convert Mistral magistral content blocks into OpenAI-compatible content + thinking_blocks. + """ + text_segments: List[str] = [] + thinking_blocks: List[dict] = [] + reasoning_segments: List[str] = [] + + for block in content_blocks: + block_type = block.get("type") + if block_type == "thinking": + mistral_thinking = block.get("thinking", []) + thinking_text_parts: List[str] = [] + for thinking_block in mistral_thinking: + if thinking_block.get("type") == "text": + thinking_text_parts.append(thinking_block.get("text", "")) + thinking_text = "".join(thinking_text_parts) + if thinking_text: + reasoning_segments.append(thinking_text) + thinking_blocks.append( + { + "type": "thinking", + "thinking": thinking_text, + "signature": "mistral", + } + ) + elif block_type == "text": + text_segments.append(block.get("text", "")) + + normalized_text = "".join(text_segments) if text_segments else None + reasoning_content = "\n".join(reasoning_segments) if reasoning_segments else None + return normalized_text, thinking_blocks, reasoning_content diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index f571dfb52433..47fa66ef795a 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -16199,6 +16199,21 @@ "supports_response_schema": true, "supports_tool_choice": true }, + "mistral/magistral-medium-2509": { + "input_cost_per_token": 2e-06, + "litellm_provider": "mistral", + "max_input_tokens": 40000, + "max_output_tokens": 40000, + "max_tokens": 40000, + "mode": "chat", + "output_cost_per_token": 5e-06, + "source": "https://mistral.ai/news/magistral", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "mistral/mistral-ocr-latest": { "litellm_provider": "mistral", "ocr_cost_per_page": 1e-3, diff --git a/tests/test_litellm/llms/mistral/test_mistral_chat_transformation.py b/tests/test_litellm/llms/mistral/test_mistral_chat_transformation.py index e6d7ed78d6ef..544788105d34 100644 --- a/tests/test_litellm/llms/mistral/test_mistral_chat_transformation.py +++ b/tests/test_litellm/llms/mistral/test_mistral_chat_transformation.py @@ -11,7 +11,10 @@ 0, os.path.abspath("../..") ) # Adds the parent directory to the system path -from litellm.llms.mistral.chat.transformation import MistralConfig +from litellm.llms.mistral.chat.transformation import ( + MistralChatResponseIterator, + MistralConfig, +) from litellm.types.utils import ModelResponse @@ -361,6 +364,45 @@ def test_end_to_end_reasoning_workflow(self): assert "_add_reasoning_prompt" not in result +def test_mistral_streaming_chunk_preserves_thinking_blocks(): + """Ensure streaming chunks keep magistral reasoning content.""" + iterator = MistralChatResponseIterator( + streaming_response=iter([]), sync_stream=True, json_mode=False + ) + + streamed_chunk = { + "id": "chunk-1", + "object": "chat.completion.chunk", + "created": 123456, + "model": "magistral-medium-2509", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "content": [ + { + "type": "thinking", + "thinking": [{"type": "text", "text": "Working it out."}], + }, + {"type": "text", "text": " Hello"}, + ], + }, + "finish_reason": None, + } + ], + } + + parsed_chunk = iterator.chunk_parser(streamed_chunk) + + delta = parsed_chunk.choices[0].delta + assert delta.thinking_blocks is not None + assert delta.thinking_blocks[0]["thinking"] == "Working it out." + assert delta.thinking_blocks[0]["signature"] == "mistral" + assert delta.reasoning_content == "Working it out." + assert delta.content == " Hello" + + class TestMistralNameHandling: """Test suite for Mistral name handling in messages."""