Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 81 additions & 2 deletions litellm/llms/mistral/chat/transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@

from typing import (
Any,
AsyncIterator,
Coroutine,
Iterator,
List,
Literal,
Optional,
Expand All @@ -26,11 +28,14 @@
handle_messages_with_content_list_to_str_conversion,
strip_none_values_from_message,
)
from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
from litellm.llms.openai.chat.gpt_transformation import (
OpenAIGPTConfig,
OpenAIChatCompletionStreamingHandler,
)
from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.mistral import MistralThinkingBlock, MistralToolCallMessage
from litellm.types.llms.openai import AllMessageValues
from litellm.types.utils import ModelResponse
from litellm.types.utils import ModelResponse, ModelResponseStream
from litellm.utils import convert_to_model_response_object


Expand Down Expand Up @@ -602,3 +607,77 @@ def transform_response(
)

return final_response_obj

def get_model_response_iterator(
self,
streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse],
sync_stream: bool,
json_mode: Optional[bool] = False,
):
return MistralChatResponseIterator(
streaming_response=streaming_response,
sync_stream=sync_stream,
json_mode=json_mode,
)


class MistralChatResponseIterator(OpenAIChatCompletionStreamingHandler):
def chunk_parser(self, chunk: dict) -> ModelResponseStream:
try:
for choice in chunk.get("choices", []):
delta = choice.get("delta", {})
content = delta.get("content")
if isinstance(content, list):
(
normalized_text,
thinking_blocks,
reasoning_content,
) = self._normalize_content_blocks(content)
delta["content"] = normalized_text
if thinking_blocks:
delta["thinking_blocks"] = thinking_blocks
delta["reasoning_content"] = reasoning_content
else:
delta.pop("thinking_blocks", None)
delta.pop("reasoning_content", None)
except Exception:
# Fall back to default parsing if custom handling fails
return super().chunk_parser(chunk)

return super().chunk_parser(chunk)

@staticmethod
def _normalize_content_blocks(
content_blocks: List[dict],
) -> Tuple[Optional[str], List[dict], Optional[str]]:
"""
Convert Mistral magistral content blocks into OpenAI-compatible content + thinking_blocks.
"""
text_segments: List[str] = []
thinking_blocks: List[dict] = []
reasoning_segments: List[str] = []

for block in content_blocks:
block_type = block.get("type")
if block_type == "thinking":
mistral_thinking = block.get("thinking", [])
thinking_text_parts: List[str] = []
for thinking_block in mistral_thinking:
if thinking_block.get("type") == "text":
thinking_text_parts.append(thinking_block.get("text", ""))
thinking_text = "".join(thinking_text_parts)
if thinking_text:
reasoning_segments.append(thinking_text)
thinking_blocks.append(
{
"type": "thinking",
"thinking": thinking_text,
"signature": "mistral",
}
)
elif block_type == "text":
text_segments.append(block.get("text", ""))

normalized_text = "".join(text_segments) if text_segments else None
reasoning_content = "\n".join(reasoning_segments) if reasoning_segments else None
return normalized_text, thinking_blocks, reasoning_content
15 changes: 15 additions & 0 deletions model_prices_and_context_window.json
Original file line number Diff line number Diff line change
Expand Up @@ -16199,6 +16199,21 @@
"supports_response_schema": true,
"supports_tool_choice": true
},
"mistral/magistral-medium-2509": {
"input_cost_per_token": 2e-06,
"litellm_provider": "mistral",
"max_input_tokens": 40000,
"max_output_tokens": 40000,
"max_tokens": 40000,
"mode": "chat",
"output_cost_per_token": 5e-06,
"source": "https://mistral.ai/news/magistral",
"supports_assistant_prefill": true,
"supports_function_calling": true,
"supports_reasoning": true,
"supports_response_schema": true,
"supports_tool_choice": true
},
"mistral/mistral-ocr-latest": {
"litellm_provider": "mistral",
"ocr_cost_per_page": 1e-3,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@
0, os.path.abspath("../..")
) # Adds the parent directory to the system path

from litellm.llms.mistral.chat.transformation import MistralConfig
from litellm.llms.mistral.chat.transformation import (
MistralChatResponseIterator,
MistralConfig,
)
from litellm.types.utils import ModelResponse


Expand Down Expand Up @@ -361,6 +364,45 @@ def test_end_to_end_reasoning_workflow(self):
assert "_add_reasoning_prompt" not in result


def test_mistral_streaming_chunk_preserves_thinking_blocks():
"""Ensure streaming chunks keep magistral reasoning content."""
iterator = MistralChatResponseIterator(
streaming_response=iter([]), sync_stream=True, json_mode=False
)

streamed_chunk = {
"id": "chunk-1",
"object": "chat.completion.chunk",
"created": 123456,
"model": "magistral-medium-2509",
"choices": [
{
"index": 0,
"delta": {
"role": "assistant",
"content": [
{
"type": "thinking",
"thinking": [{"type": "text", "text": "Working it out."}],
},
{"type": "text", "text": " Hello"},
],
},
"finish_reason": None,
}
],
}

parsed_chunk = iterator.chunk_parser(streamed_chunk)

delta = parsed_chunk.choices[0].delta
assert delta.thinking_blocks is not None
assert delta.thinking_blocks[0]["thinking"] == "Working it out."
assert delta.thinking_blocks[0]["signature"] == "mistral"
assert delta.reasoning_content == "Working it out."
assert delta.content == " Hello"


class TestMistralNameHandling:
"""Test suite for Mistral name handling in messages."""

Expand Down
Loading