55 StreamingManager: Handles streaming responses from Ollama.
66"""
77from rich .markdown import Markdown
8- from rich .live import Live
9- from rich .spinner import Spinner
10- from rich .table import Table
11- from rich .text import Text
128from .metrics import display_metrics , extract_metrics
139
1410class StreamingManager :
@@ -22,36 +18,6 @@ def __init__(self, console):
2218 """
2319 self .console = console
2420
25- def _create_working_display (self ):
26- """Create a display showing working status with spinner"""
27- table = Table .grid ()
28- spinner = Spinner ("dots" , style = "cyan" )
29- working_text = Text ("working..." , style = "cyan" )
30- header = Table .grid (padding = (0 , 1 ))
31- header .add_row (spinner , working_text )
32- table .add_row (header )
33- return table
34-
35- def _create_content_display (self , content , thinking_content = "" , show_thinking = True , has_tool_calls = False ):
36- """Create a display for content with optional thinking section"""
37- if thinking_content and show_thinking :
38- # Only add separator and Answer label if there's actual content
39- if content :
40- if has_tool_calls :
41- combined_content = thinking_content + "\n \n ---\n \n " + content
42- else :
43- combined_content = thinking_content + "\n \n ---\n \n **Answer:**\n \n " + content
44- else :
45- # No content, just show thinking
46- combined_content = thinking_content
47- return Markdown (combined_content )
48- else :
49- # Don't add "Answer:" label when tools are being called or when content is empty
50- if has_tool_calls or not content :
51- return Markdown (content )
52- else :
53- return Markdown ("**Answer:**\n \n " + content )
54-
5521 async def process_streaming_response (self , stream , print_response = True , thinking_mode = False , show_thinking = True , show_metrics = False ):
5622 """Process a streaming response from Ollama with status spinner and content updates
5723
@@ -70,79 +36,86 @@ async def process_streaming_response(self, stream, print_response=True, thinking
7036 accumulated_text = ""
7137 thinking_content = ""
7238 tool_calls = []
73- showing_working = True # Track if we're still showing the working display
7439 metrics = None # Store metrics from final chunk
7540
7641 if print_response :
77- with Live (console = self .console , refresh_per_second = 10 , vertical_overflow = 'visible' ) as live :
78- # Start with working display
79- live .update (self ._create_working_display ())
80-
81- async for chunk in stream :
82- # Capture metrics when chunk is done
83- extracted_metrics = extract_metrics (chunk )
84- if extracted_metrics :
85- metrics = extracted_metrics
86-
87- # Handle thinking content
88- if (thinking_mode and hasattr (chunk , 'message' ) and
89- hasattr (chunk .message , 'thinking' ) and chunk .message .thinking ):
90-
91- if not thinking_content :
92- thinking_content = "🤔 **Thinking:**\n \n "
93- thinking_content += chunk .message .thinking
94-
95- # Hide working display and show thinking content
96- if showing_working :
97- showing_working = False
98-
99- display = self ._create_content_display (
100- accumulated_text , thinking_content , show_thinking = True , has_tool_calls = False
101- )
102- live .update (display )
103-
104- # Handle regular content
105- if (hasattr (chunk , 'message' ) and hasattr (chunk .message , 'content' ) and
106- chunk .message .content ):
107-
108- accumulated_text += chunk .message .content
109-
110- # Hide working display and show content
111- if showing_working :
112- showing_working = False
113-
114- # Update display based on thinking mode
115- display = self ._create_content_display (
116- accumulated_text , thinking_content , show_thinking , has_tool_calls = False
117- )
118- live .update (display )
119-
120- # Handle tool calls
121- if (hasattr (chunk , 'message' ) and hasattr (chunk .message , 'tool_calls' ) and
122- chunk .message .tool_calls ):
123- # Hide working display and show final content if any before tool calls
124- showing_working = False
125-
126- for tool in chunk .message .tool_calls :
127- tool_calls .append (tool )
128-
129- # Show final content display if we have any accumulated text
130- if accumulated_text or thinking_content :
131- display = self ._create_content_display (
132- accumulated_text , thinking_content , show_thinking , has_tool_calls = True
133- )
134- live .update (display )
135- else :
136- # Clear the working display by showing empty content
137- live .update (Markdown ("" ))
138-
139- # Add spacing after streaming completes only if we showed content and no tool calls
140- if not showing_working and not tool_calls :
42+ # Thinking header flag
43+ thinking_started = False
44+ # Show initial working spinner until first chunk arrives
45+ first_chunk = True
46+ status = self .console .status ("[cyan]working..." , spinner = "dots" )
47+ status .start ()
48+
49+ async for chunk in stream :
50+ # Capture metrics when chunk is done
51+ extracted_metrics = extract_metrics (chunk )
52+ if extracted_metrics :
53+ metrics = extracted_metrics
54+
55+ # Handle thinking content
56+ if (thinking_mode and hasattr (chunk , 'message' ) and
57+ hasattr (chunk .message , 'thinking' ) and chunk .message .thinking ):
58+ # Stop spinner on first thinking chunk ONLY if show_thinking is True
59+ if first_chunk and show_thinking :
60+ status .stop ()
61+ first_chunk = False
62+
63+ if not thinking_content :
64+ thinking_content = "🤔 **Thinking:**\n \n "
65+ if not thinking_started and show_thinking :
66+ self .console .print (Markdown ("🤔 **Thinking:**\n " ))
67+ self .console .print (Markdown ("---" ))
68+ self .console .print ()
69+ thinking_started = True
70+ thinking_content += chunk .message .thinking
71+ # Print thinking content as plain text only if show_thinking is True
72+ if show_thinking :
73+ self .console .print (chunk .message .thinking , end = "" )
74+
75+ # Handle regular content
76+ if (hasattr (chunk , 'message' ) and hasattr (chunk .message , 'content' ) and
77+ chunk .message .content ):
78+ # Stop spinner on first content chunk (always)
79+ if first_chunk :
80+ status .stop ()
81+ first_chunk = False
82+
83+ # Print separator and Answer label when transitioning from thinking to content
84+ if not accumulated_text :
85+ self .console .print ()
86+ self .console .print (Markdown ("📝 **Answer:**" ))
87+ self .console .print (Markdown ("---" ))
88+ self .console .print ()
89+
90+ accumulated_text += chunk .message .content
91+
92+ # Print only new content as plain text (will render full markdown at end)
93+ self .console .print (chunk .message .content , end = "" )
94+
95+ # Handle tool calls
96+ if (hasattr (chunk , 'message' ) and hasattr (chunk .message , 'tool_calls' ) and
97+ chunk .message .tool_calls ):
98+ # Stop spinner on first tool call chunk (always) - just in case no content arrives
99+ if first_chunk :
100+ status .stop ()
101+ first_chunk = False
102+
103+ for tool in chunk .message .tool_calls :
104+ tool_calls .append (tool )
105+
106+ # Print newline at end
107+ self .console .print ()
108+
109+ # Render final markdown content properly
110+ if accumulated_text :
111+ # Render in markdown format and state this
112+ self .console .print ()
113+ self .console .print (Markdown ("📝 **Answer (Markdown):**" ))
114+ self .console .print (Markdown ("---" ))
115+ self .console .print ()
116+ self .console .print (Markdown (accumulated_text ))
141117 self .console .print ()
142118
143- # Display metrics if requested and available
144- if show_metrics and metrics and print_response :
145- display_metrics (self .console , metrics )
146119 else :
147120 # Silent processing without display
148121 async for chunk in stream :
@@ -164,4 +137,8 @@ async def process_streaming_response(self, stream, print_response=True, thinking
164137 for tool in chunk .message .tool_calls :
165138 tool_calls .append (tool )
166139
140+ # Display metrics if requested
141+ if show_metrics and metrics :
142+ display_metrics (self .console , metrics )
143+
167144 return accumulated_text , tool_calls , metrics
0 commit comments