Skip to content

Commit f996a59

Browse files
authored
Compatible with Qwen3 multi-turn dialogue template (#102)
1 parent 53aaf8d commit f996a59

File tree

1 file changed

+4
-3
lines changed

1 file changed

+4
-3
lines changed

tinker_cookbook/renderers.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -351,16 +351,17 @@ class Qwen3Renderer(Renderer):
351351
352352
</think>
353353
I can help you with...<|im_end|>
354-
355-
It is currently missing Qwen 3's functionality for removing thinking spans in multi-turn conversations.
356354
"""
357355

358356
def _render_message(self, idx: int, message: Message) -> tuple[list[int], list[int], list[int]]:
359357
assert message.get("thinking") is None, "TODO: support CoT in Qwen3 renderer"
360358
maybe_newline = "\n" if idx > 0 else ""
361359
ob_str = f"{maybe_newline}<|im_start|>{message['role']}\n"
362360
ac_content = message["content"]
363-
if message["role"] == "assistant" and "<think>" not in ac_content:
361+
if message["role"] == "assistant" and "</think>" in ac_content:
362+
# Multi-turn conversation, we remove the thinking section from the assistant message
363+
ac_content = ac_content.split("</think>")[1].lstrip()
364+
elif message["role"] == "assistant" and "<think>" not in ac_content:
364365
# Matching the paper, we force the assistant to start with <think>. Some SFT datasets include
365366
# <think> in the assistant messages, we so don't need to re-add it in those cases.
366367
ob_str += "<think>\n"

0 commit comments

Comments
 (0)