Skip to content

Commit 61499b0

Browse files
authored
refactor(streaming): simplify response display and fix duplication issues (#104)
- Replace Rich Live widget with direct console printing to eliminate text duplication when scrolling - Stream content character-by-character for typing effect, then render final markdown - Fix spinner behavior: stop on first thinking chunk (if show_thinking=True) or first content/tool chunk - Respect show_thinking parameter throughout the streaming process - Add proper return statement to fix TypeError when unpacking response - Maintain streaming illusion while avoiding terminal scrollback duplication This approach trades off real-time markdown rendering during streaming for a cleaner terminal history without duplicated text.
1 parent b6485a3 commit 61499b0

File tree

1 file changed

+79
-102
lines changed

1 file changed

+79
-102
lines changed

mcp_client_for_ollama/utils/streaming.py

Lines changed: 79 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,6 @@
55
StreamingManager: Handles streaming responses from Ollama.
66
"""
77
from rich.markdown import Markdown
8-
from rich.live import Live
9-
from rich.spinner import Spinner
10-
from rich.table import Table
11-
from rich.text import Text
128
from .metrics import display_metrics, extract_metrics
139

1410
class StreamingManager:
@@ -22,36 +18,6 @@ def __init__(self, console):
2218
"""
2319
self.console = console
2420

25-
def _create_working_display(self):
26-
"""Create a display showing working status with spinner"""
27-
table = Table.grid()
28-
spinner = Spinner("dots", style="cyan")
29-
working_text = Text("working...", style="cyan")
30-
header = Table.grid(padding=(0, 1))
31-
header.add_row(spinner, working_text)
32-
table.add_row(header)
33-
return table
34-
35-
def _create_content_display(self, content, thinking_content="", show_thinking=True, has_tool_calls=False):
36-
"""Create a display for content with optional thinking section"""
37-
if thinking_content and show_thinking:
38-
# Only add separator and Answer label if there's actual content
39-
if content:
40-
if has_tool_calls:
41-
combined_content = thinking_content + "\n\n---\n\n" + content
42-
else:
43-
combined_content = thinking_content + "\n\n---\n\n**Answer:**\n\n" + content
44-
else:
45-
# No content, just show thinking
46-
combined_content = thinking_content
47-
return Markdown(combined_content)
48-
else:
49-
# Don't add "Answer:" label when tools are being called or when content is empty
50-
if has_tool_calls or not content:
51-
return Markdown(content)
52-
else:
53-
return Markdown("**Answer:**\n\n" + content)
54-
5521
async def process_streaming_response(self, stream, print_response=True, thinking_mode=False, show_thinking=True, show_metrics=False):
5622
"""Process a streaming response from Ollama with status spinner and content updates
5723
@@ -70,79 +36,86 @@ async def process_streaming_response(self, stream, print_response=True, thinking
7036
accumulated_text = ""
7137
thinking_content = ""
7238
tool_calls = []
73-
showing_working = True # Track if we're still showing the working display
7439
metrics = None # Store metrics from final chunk
7540

7641
if print_response:
77-
with Live(console=self.console, refresh_per_second=10, vertical_overflow='visible') as live:
78-
# Start with working display
79-
live.update(self._create_working_display())
80-
81-
async for chunk in stream:
82-
# Capture metrics when chunk is done
83-
extracted_metrics = extract_metrics(chunk)
84-
if extracted_metrics:
85-
metrics = extracted_metrics
86-
87-
# Handle thinking content
88-
if (thinking_mode and hasattr(chunk, 'message') and
89-
hasattr(chunk.message, 'thinking') and chunk.message.thinking):
90-
91-
if not thinking_content:
92-
thinking_content = "🤔 **Thinking:**\n\n"
93-
thinking_content += chunk.message.thinking
94-
95-
# Hide working display and show thinking content
96-
if showing_working:
97-
showing_working = False
98-
99-
display = self._create_content_display(
100-
accumulated_text, thinking_content, show_thinking=True, has_tool_calls=False
101-
)
102-
live.update(display)
103-
104-
# Handle regular content
105-
if (hasattr(chunk, 'message') and hasattr(chunk.message, 'content') and
106-
chunk.message.content):
107-
108-
accumulated_text += chunk.message.content
109-
110-
# Hide working display and show content
111-
if showing_working:
112-
showing_working = False
113-
114-
# Update display based on thinking mode
115-
display = self._create_content_display(
116-
accumulated_text, thinking_content, show_thinking, has_tool_calls=False
117-
)
118-
live.update(display)
119-
120-
# Handle tool calls
121-
if (hasattr(chunk, 'message') and hasattr(chunk.message, 'tool_calls') and
122-
chunk.message.tool_calls):
123-
# Hide working display and show final content if any before tool calls
124-
showing_working = False
125-
126-
for tool in chunk.message.tool_calls:
127-
tool_calls.append(tool)
128-
129-
# Show final content display if we have any accumulated text
130-
if accumulated_text or thinking_content:
131-
display = self._create_content_display(
132-
accumulated_text, thinking_content, show_thinking, has_tool_calls=True
133-
)
134-
live.update(display)
135-
else:
136-
# Clear the working display by showing empty content
137-
live.update(Markdown(""))
138-
139-
# Add spacing after streaming completes only if we showed content and no tool calls
140-
if not showing_working and not tool_calls:
42+
# Thinking header flag
43+
thinking_started = False
44+
# Show initial working spinner until first chunk arrives
45+
first_chunk = True
46+
status = self.console.status("[cyan]working...", spinner="dots")
47+
status.start()
48+
49+
async for chunk in stream:
50+
# Capture metrics when chunk is done
51+
extracted_metrics = extract_metrics(chunk)
52+
if extracted_metrics:
53+
metrics = extracted_metrics
54+
55+
# Handle thinking content
56+
if (thinking_mode and hasattr(chunk, 'message') and
57+
hasattr(chunk.message, 'thinking') and chunk.message.thinking):
58+
# Stop spinner on first thinking chunk ONLY if show_thinking is True
59+
if first_chunk and show_thinking:
60+
status.stop()
61+
first_chunk = False
62+
63+
if not thinking_content:
64+
thinking_content = "🤔 **Thinking:**\n\n"
65+
if not thinking_started and show_thinking:
66+
self.console.print(Markdown("🤔 **Thinking:**\n"))
67+
self.console.print(Markdown("---"))
68+
self.console.print()
69+
thinking_started = True
70+
thinking_content += chunk.message.thinking
71+
# Print thinking content as plain text only if show_thinking is True
72+
if show_thinking:
73+
self.console.print(chunk.message.thinking, end="")
74+
75+
# Handle regular content
76+
if (hasattr(chunk, 'message') and hasattr(chunk.message, 'content') and
77+
chunk.message.content):
78+
# Stop spinner on first content chunk (always)
79+
if first_chunk:
80+
status.stop()
81+
first_chunk = False
82+
83+
# Print separator and Answer label when transitioning from thinking to content
84+
if not accumulated_text:
85+
self.console.print()
86+
self.console.print(Markdown("📝 **Answer:**"))
87+
self.console.print(Markdown("---"))
88+
self.console.print()
89+
90+
accumulated_text += chunk.message.content
91+
92+
# Print only new content as plain text (will render full markdown at end)
93+
self.console.print(chunk.message.content, end="")
94+
95+
# Handle tool calls
96+
if (hasattr(chunk, 'message') and hasattr(chunk.message, 'tool_calls') and
97+
chunk.message.tool_calls):
98+
# Stop spinner on first tool call chunk (always) - just in case no content arrives
99+
if first_chunk:
100+
status.stop()
101+
first_chunk = False
102+
103+
for tool in chunk.message.tool_calls:
104+
tool_calls.append(tool)
105+
106+
# Print newline at end
107+
self.console.print()
108+
109+
# Render final markdown content properly
110+
if accumulated_text:
111+
# Render in markdown format and state this
112+
self.console.print()
113+
self.console.print(Markdown("📝 **Answer (Markdown):**"))
114+
self.console.print(Markdown("---"))
115+
self.console.print()
116+
self.console.print(Markdown(accumulated_text))
141117
self.console.print()
142118

143-
# Display metrics if requested and available
144-
if show_metrics and metrics and print_response:
145-
display_metrics(self.console, metrics)
146119
else:
147120
# Silent processing without display
148121
async for chunk in stream:
@@ -164,4 +137,8 @@ async def process_streaming_response(self, stream, print_response=True, thinking
164137
for tool in chunk.message.tool_calls:
165138
tool_calls.append(tool)
166139

140+
# Display metrics if requested
141+
if show_metrics and metrics:
142+
display_metrics(self.console, metrics)
143+
167144
return accumulated_text, tool_calls, metrics

0 commit comments

Comments
 (0)