Skip to content

Commit e4bc223

Browse files
joschuclaude
andcommitted
feat(qwen3): Add strip_thinking_from_history option to Qwen3Renderer
Add a `strip_thinking_from_history` parameter to `Qwen3Renderer.__init__`: - When `True` (default): strips `<think>...</think>` blocks from assistant messages in multi-turn history - When `False`: preserves thinking blocks in history (useful for certain RL scenarios) This matches how Qwen3 models were trained - they only see their own thinking during the current turn, not from previous turns. The default behavior is unchanged (stripping is enabled). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]>
1 parent 46b3bbd commit e4bc223

File tree

1 file changed

+17
-2
lines changed

1 file changed

+17
-2
lines changed

tinker_cookbook/renderers.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -470,13 +470,28 @@ class Qwen3Renderer(Renderer):
470470
I can help you with...<|im_end|>
471471
"""
472472

473+
def __init__(self, tokenizer: Tokenizer, strip_thinking_from_history: bool = True):
474+
"""
475+
Args:
476+
tokenizer: The tokenizer to use for encoding.
477+
strip_thinking_from_history: When True (default), strips <think>...</think> blocks
478+
from assistant messages in multi-turn history. This matches how Qwen3 models
479+
were trained - they only see their own thinking during the current turn, not
480+
from previous turns. Set to False to preserve thinking in history (useful for
481+
certain RL scenarios).
482+
"""
483+
super().__init__(tokenizer)
484+
self.strip_thinking_from_history = strip_thinking_from_history
485+
473486
def _render_message(self, idx: int, message: Message) -> tuple[list[int], list[int], list[int]]:
474487
assert message.get("thinking") is None, "TODO: support CoT in Qwen3 renderer"
475488
maybe_newline = "\n" if idx > 0 else ""
476489
ob_str = f"{maybe_newline}<|im_start|>{message['role']}\n"
477490
ac_content = message["content"]
478-
if message["role"] == "assistant" and "</think>" in ac_content:
479-
# Multi-turn conversation, we remove the thinking section from the assistant message
491+
if self.strip_thinking_from_history and message["role"] == "assistant" and "</think>" in ac_content:
492+
# Multi-turn conversation, we remove the thinking section from the assistant message.
493+
# This matches how Qwen3 models were trained - they only see their own thinking
494+
# during the current turn, not from previous turns.
480495
ac_content = ac_content.split("</think>")[1].lstrip()
481496
elif message["role"] == "assistant" and "<think>" not in ac_content:
482497
# Matching the paper, we force the assistant to start with <think>. Some SFT datasets include

0 commit comments

Comments
 (0)