1414import logging
1515import os
1616import sys
17+ import time
1718import traceback
1819from abc import ABC , abstractmethod
1920from typing import Any , Callable , Dict , List , Optional , Union
2021
2122import dspy
2223from typing_extensions import Literal
2324
25+ from .evaluation import create_evaluator
2426from .utils import map_auto_mode_to_dspy
2527
2628
@@ -144,6 +146,11 @@ def __init__(
144146 fewshot_aware_proposer : bool = True ,
145147 use_llama_tips : bool = True ,
146148 requires_permission_to_run : bool = False ,
149+ # Baseline computation settings
150+ compute_baseline : bool = True ,
151+ # Model name parameters for display
152+ task_model_name : Optional [str ] = None ,
153+ prompt_model_name : Optional [str ] = None ,
147154 ** kwargs ,
148155 ):
149156 """
@@ -179,7 +186,13 @@ def __init__(
179186 tip_aware_proposer: Whether to use tip-aware instruction proposals
180187 fewshot_aware_proposer: Whether to use few-shot aware instruction proposals
181188 requires_permission_to_run: Whether to require user permission to run
182- provide_traceback: Whether to provide tracebacks for errors
189+
190+ # Baseline computation parameters
191+ compute_baseline: Whether to compute baseline score before optimization
192+
193+ # Model name parameters for display
194+ task_model_name: Name of the task model
195+ prompt_model_name: Name of the prompt model
183196
184197 **kwargs: Additional configuration parameters
185198 """
@@ -192,6 +205,7 @@ def __init__(
192205 # Training and validation data
193206 self .trainset = kwargs .get ("trainset" , [])
194207 self .valset = kwargs .get ("valset" , [])
208+ self .testset = kwargs .get ("testset" , [])
195209
196210 # Model-specific optimization settings
197211 self .use_llama_tips = use_llama_tips
@@ -221,6 +235,127 @@ def __init__(
221235 self .fewshot_aware_proposer = fewshot_aware_proposer
222236 self .requires_permission_to_run = requires_permission_to_run
223237
238+ # Baseline computation settings
239+ self .compute_baseline = compute_baseline
240+
241+ # Model name parameters for display
242+ self .task_model_name = task_model_name
243+ self .prompt_model_name = prompt_model_name
244+
245+ def _get_model_name (self , model ) -> str :
246+ """
247+ Get a human-readable name for a model using stored names.
248+
249+ Args:
250+ model: The model object to get the name for
251+
252+ Returns:
253+ A string representation of the model name
254+ """
255+ if model is None :
256+ return "None"
257+
258+ # Use stored model names if available
259+ if model is self .task_model and self .task_model_name :
260+ return self .task_model_name
261+ if model is self .prompt_model and self .prompt_model_name :
262+ return self .prompt_model_name
263+
264+ # Fallback to legacy introspection for backward compatibility
265+ if hasattr (model , "model_name" ):
266+ return str (model .model_name )
267+ if hasattr (model , "model" ):
268+ return str (model .model )
269+ if hasattr (model , "_model" ) and hasattr (model ._model , "model" ):
270+ return str (model ._model .model )
271+
272+ # Final fallback
273+ return str (model )
274+
275+ def _create_signature (self , prompt_data : Dict [str , Any ], instructions : str ):
276+ """
277+ Create a DSPy signature with explicit field definitions.
278+
279+ Args:
280+ prompt_data: Dictionary containing inputs and outputs field definitions
281+ instructions: The instruction text for the signature
282+
283+ Returns:
284+ DSPy signature class
285+ """
286+ # Create a signature class dynamically with proper field definitions
287+ input_fields = {}
288+ output_fields = {}
289+
290+ # Define input and output fields based on prompt_data
291+ for field in prompt_data .get ("inputs" , ["question" ]):
292+ input_fields [field ] = dspy .InputField (desc = "${" + field + "}" )
293+ for field in prompt_data .get ("outputs" , ["answer" ]):
294+ output_fields [field ] = dspy .OutputField (desc = "${" + field + "}" )
295+
296+ # Create the signature class with proper field definitions
297+ DynamicSignature = type (
298+ "DynamicSignature" ,
299+ (dspy .Signature ,),
300+ {
301+ ** input_fields ,
302+ ** output_fields ,
303+ "__doc__" : instructions , # Store the instructions as the docstring
304+ },
305+ )
306+
307+ return DynamicSignature
308+
309+ def _compute_baseline_score (self , prompt_data : Dict [str , Any ]) -> Optional [float ]:
310+ """
311+ Compute baseline score using the original prompt before optimization.
312+ Uses testset to avoid data leakage and evaluation.py for consistency.
313+
314+ Args:
315+ prompt_data: Dictionary containing the prompt text and metadata
316+
317+ Returns:
318+ Baseline score as float, or None if computation fails or is not possible
319+ """
320+ if not self .metric or not self .testset :
321+ logging .debug ("Skipping baseline computation: missing metric or test set" )
322+ return None
323+
324+ if not self .compute_baseline :
325+ logging .debug ("Baseline computation disabled" )
326+ return None
327+
328+ try :
329+ start_time = time .time ()
330+
331+ # Use consistent signature creation with original prompt
332+ baseline_signature = self ._create_signature (
333+ prompt_data , prompt_data ["text" ]
334+ )
335+ baseline_program = dspy .Predict (baseline_signature )
336+
337+ print (
338+ f"\n Computing baseline score on { len (self .testset )} test examples using { self .num_threads } threads..."
339+ )
340+
341+ evaluator = create_evaluator (
342+ metric = self .metric ,
343+ devset = self .testset ,
344+ num_threads = self .num_threads , # Use the strategy's num_threads setting
345+ display_progress = True ,
346+ display_table = False ,
347+ )
348+
349+ score = evaluator .evaluate (baseline_program )
350+ duration = time .time () - start_time
351+
352+ print (f"✅ Baseline Score: { score :.3f} in { duration :.2f} s\n " )
353+ return float (score )
354+
355+ except Exception as e :
356+ logging .warning (f"Baseline evaluation failed: { e } " )
357+ return None
358+
224359 def run (self , prompt_data : Dict [str , Any ]) -> Any :
225360 """
226361 Apply basic optimization to the prompt using DSPy's MIPROv2.
@@ -237,6 +372,11 @@ def run(self, prompt_data: Dict[str, Any]) -> Any:
237372 if "dspy" not in globals () or not self .trainset :
238373 return f"[Optimized for { self .model_name } ] { text } "
239374
375+ # Display pre-optimization summary using utility function
376+ from .utils .summary_utils import create_and_display_summary
377+
378+ create_and_display_summary (self , prompt_data )
379+
240380 try :
241381 # Add model-specific tips to the prompt if enabled
242382 model_tips = None
@@ -269,29 +409,12 @@ def run(self, prompt_data: Dict[str, Any]) -> Any:
269409
270410 # Update the prompt text in prompt_data
271411 prompt_data ["text" ] = text
272- # Create a signature class dynamically with proper field definitions
273- input_fields = {}
274- output_fields = {}
275-
276- # Define input and output fields based on prompt_data
277- for field in prompt_data .get ("inputs" , ["question" ]):
278- input_fields [field ] = dspy .InputField (desc = "${" + field + "}" )
279- for field in prompt_data .get ("outputs" , ["answer" ]):
280- output_fields [field ] = dspy .OutputField (desc = "${" + field + "}" )
281-
282- # Create the signature class with proper field definitions
283- DynamicSignature = type (
284- "DynamicSignature" ,
285- (dspy .Signature ,),
286- {
287- ** input_fields ,
288- ** output_fields ,
289- "__doc__" : text , # Store the instructions as the docstring
290- },
291- )
412+
413+ # Create signature using consistent helper method with enhanced prompt
414+ signature = self ._create_signature (prompt_data , text )
292415
293416 # Create program instance with the signature
294- program = dspy .Predict (DynamicSignature )
417+ program = dspy .Predict (signature )
295418
296419 # Map our naming convention to DSPy's expected values
297420 dspy_auto_mode = map_auto_mode_to_dspy (self .auto )
@@ -338,7 +461,7 @@ def run(self, prompt_data: Dict[str, Any]) -> Any:
338461 # Use our custom instruction tips with highest priority
339462 optimizer .proposer_kwargs ["tip" ] = self .proposer_kwargs ["tip" ]
340463 logging .info (
341- f"Using custom instruction tips: { self .proposer_kwargs ['tip' ]} ... "
464+ f"Using custom instruction tips: { self .proposer_kwargs ['tip' ][: 50 ] if self . proposer_kwargs [ 'tip' ] else 'None' } "
342465 )
343466 # Otherwise, if we have model-specific tips, use those
344467 elif model_tips :
@@ -355,7 +478,7 @@ def run(self, prompt_data: Dict[str, Any]) -> Any:
355478 )
356479
357480 logging .info (
358- f"Compiling program with { len (self .trainset )} training examples and { len (self .valset )} validation examples"
481+ f"Compiling program with { len (self .trainset )} training examples, { len (self .valset )} validation examples, and { len ( self . testset ) } test examples"
359482 )
360483
361484 # Create a custom compile method that injects our tip directly
0 commit comments