-
Notifications
You must be signed in to change notification settings - Fork 89
feat: Add pre-optimization summary display (Issue #19) #27
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
8deea72
fad488c
85c4c97
cb6bff5
1620a37
0a7abf6
c9032e6
ac3b647
4a43302
61ab1a4
e318171
7b04df4
dda5d15
c54b8c0
7e1a799
f0a81de
7bb6487
f86e849
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,14 +14,17 @@ | |
| import logging | ||
| import os | ||
| import sys | ||
| import time | ||
| import traceback | ||
| from abc import ABC, abstractmethod | ||
| from typing import Any, Callable, Dict, List, Optional, Union | ||
|
|
||
| import dspy | ||
| from typing_extensions import Literal | ||
|
|
||
| from .evaluation import create_evaluator | ||
| from .utils import map_auto_mode_to_dspy | ||
| from .utils.telemetry import PreOptimizationSummary | ||
|
|
||
|
|
||
| class OptimizationError(Exception): | ||
|
|
@@ -144,6 +147,8 @@ def __init__( | |
| fewshot_aware_proposer: bool = True, | ||
| use_llama_tips: bool = True, | ||
| requires_permission_to_run: bool = False, | ||
| # Baseline computation settings | ||
| compute_baseline: bool = False, | ||
| **kwargs, | ||
| ): | ||
| """ | ||
|
|
@@ -179,7 +184,9 @@ def __init__( | |
| tip_aware_proposer: Whether to use tip-aware instruction proposals | ||
| fewshot_aware_proposer: Whether to use few-shot aware instruction proposals | ||
| requires_permission_to_run: Whether to require user permission to run | ||
| provide_traceback: Whether to provide tracebacks for errors | ||
|
|
||
| # Baseline computation parameters | ||
| compute_baseline: Whether to compute baseline score before optimization | ||
|
|
||
| **kwargs: Additional configuration parameters | ||
| """ | ||
|
|
@@ -192,6 +199,7 @@ def __init__( | |
| # Training and validation data | ||
| self.trainset = kwargs.get("trainset", []) | ||
| self.valset = kwargs.get("valset", []) | ||
| self.testset = kwargs.get("testset", []) | ||
|
|
||
| # Model-specific optimization settings | ||
| self.use_llama_tips = use_llama_tips | ||
|
|
@@ -221,6 +229,120 @@ def __init__( | |
| self.fewshot_aware_proposer = fewshot_aware_proposer | ||
| self.requires_permission_to_run = requires_permission_to_run | ||
|
|
||
| # Baseline computation settings | ||
| self.compute_baseline = compute_baseline | ||
|
|
||
| def _get_model_name(self, model) -> str: | ||
| """ | ||
| Extract a human-readable name from a model object. | ||
|
|
||
| Args: | ||
| model: The model object (could be a DSPy model, adapter, or string) | ||
|
|
||
| Returns: | ||
| A string representation of the model name | ||
| """ | ||
| if model is None: | ||
| return "None" | ||
|
|
||
| # Try to get model_name attribute first | ||
| if hasattr(model, "model_name"): | ||
| return str(model.model_name) | ||
|
|
||
| # Try to get model attribute (for adapters) | ||
| if hasattr(model, "model"): | ||
| return str(model.model) | ||
|
|
||
| # For DSPyModelAdapter, try to get the underlying model name | ||
| if hasattr(model, "_model") and hasattr(model._model, "model"): | ||
| return str(model._model.model) | ||
|
|
||
| # Fall back to string representation | ||
| return str(model) | ||
|
|
||
| def _create_signature(self, prompt_data: Dict[str, Any], instructions: str): | ||
| """ | ||
| Create a DSPy signature with explicit field definitions. | ||
|
|
||
| Args: | ||
| prompt_data: Dictionary containing inputs and outputs field definitions | ||
| instructions: The instruction text for the signature | ||
|
|
||
| Returns: | ||
| DSPy signature class | ||
| """ | ||
| # Create a signature class dynamically with proper field definitions | ||
| input_fields = {} | ||
| output_fields = {} | ||
|
|
||
| # Define input and output fields based on prompt_data | ||
| for field in prompt_data.get("inputs", ["question"]): | ||
| input_fields[field] = dspy.InputField(desc="${" + field + "}") | ||
| for field in prompt_data.get("outputs", ["answer"]): | ||
| output_fields[field] = dspy.OutputField(desc="${" + field + "}") | ||
|
|
||
| # Create the signature class with proper field definitions | ||
| DynamicSignature = type( | ||
| "DynamicSignature", | ||
| (dspy.Signature,), | ||
| { | ||
| **input_fields, | ||
| **output_fields, | ||
| "__doc__": instructions, # Store the instructions as the docstring | ||
| }, | ||
| ) | ||
|
|
||
| return DynamicSignature | ||
|
|
||
| def _compute_baseline_score(self, prompt_data: Dict[str, Any]) -> Optional[float]: | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should use built in evaluator class here
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should probably have this seperately as well i. single responsibility principle ii. hard to reuse it accorss different strategy iii. allow testing in isolation |
||
| """ | ||
| Compute baseline score using the original prompt before optimization. | ||
| Uses testset to avoid data leakage and evaluation.py for consistency. | ||
|
|
||
| Args: | ||
| prompt_data: Dictionary containing the prompt text and metadata | ||
|
|
||
| Returns: | ||
| Baseline score as float, or None if computation fails or is not possible | ||
| """ | ||
| if not self.metric or not self.testset: | ||
| logging.debug("Skipping baseline computation: missing metric or test set") | ||
| return None | ||
|
|
||
| if not self.compute_baseline: | ||
| logging.debug("Baseline computation disabled") | ||
| return None | ||
|
|
||
| try: | ||
| start_time = time.time() | ||
| logging.info("Computing baseline score using testset...") | ||
|
|
||
| # Use consistent signature creation with original prompt | ||
| baseline_signature = self._create_signature( | ||
| prompt_data, prompt_data["text"] | ||
| ) | ||
| baseline_program = dspy.Predict(baseline_signature) | ||
|
|
||
| # Leverage existing evaluation infrastructure | ||
| evaluator = create_evaluator( | ||
| metric=self.metric, | ||
| devset=self.testset, | ||
| display_progress=False, | ||
| display_table=False, | ||
| ) | ||
|
|
||
| score = evaluator.evaluate(baseline_program) | ||
| duration = time.time() - start_time | ||
|
|
||
| logging.info( | ||
| f"Baseline evaluation completed in {duration:.2f}s: {score:.3f}" | ||
| ) | ||
| return float(score) | ||
|
|
||
| except Exception as e: | ||
| logging.warning(f"Baseline evaluation failed: {e}") | ||
| return None | ||
|
|
||
| def run(self, prompt_data: Dict[str, Any]) -> Any: | ||
| """ | ||
| Apply basic optimization to the prompt using DSPy's MIPROv2. | ||
|
|
@@ -237,6 +359,54 @@ def run(self, prompt_data: Dict[str, Any]) -> Any: | |
| if "dspy" not in globals() or not self.trainset: | ||
| return f"[Optimized for {self.model_name}] {text}" | ||
|
|
||
| # Display pre-optimization summary | ||
| try: | ||
| # Collect guidance information | ||
| guidance = None | ||
| if ( | ||
| hasattr(self, "proposer_kwargs") | ||
| and self.proposer_kwargs | ||
| and "tip" in self.proposer_kwargs | ||
| ): | ||
| guidance = self.proposer_kwargs["tip"] | ||
|
|
||
| # Compute baseline score if enabled | ||
| baseline_score = None | ||
|
||
| if self.compute_baseline: | ||
| try: | ||
| baseline_score = self._compute_baseline_score(prompt_data) | ||
| except Exception as baseline_e: | ||
| logging.warning(f"Failed to compute baseline score: {baseline_e}") | ||
| baseline_score = None | ||
|
|
||
| # Create and display the pre-optimization summary | ||
| summary = PreOptimizationSummary( | ||
| task_model=self._get_model_name(self.task_model), | ||
| proposer_model=self._get_model_name(self.prompt_model), | ||
| metric_name=( | ||
| getattr(self.metric, "__name__", str(self.metric)) | ||
| if self.metric | ||
| else "None" | ||
| ), | ||
| train_size=len(self.trainset or []), | ||
| val_size=len(self.valset or []), | ||
| mipro_params={ | ||
| "auto_user": self.auto, | ||
| "auto_dspy": map_auto_mode_to_dspy(self.auto), | ||
| "max_labeled_demos": self.max_labeled_demos, | ||
| "max_bootstrapped_demos": self.max_bootstrapped_demos, | ||
| "num_candidates": self.num_candidates, | ||
| "num_threads": self.num_threads, | ||
| "init_temperature": self.init_temperature, | ||
| "seed": self.seed, | ||
| }, | ||
| guidance=guidance, | ||
| baseline_score=baseline_score, | ||
| ) | ||
| summary.log() | ||
| except Exception as e: | ||
| logging.warning(f"Failed to display pre-optimization summary: {str(e)}") | ||
|
|
||
| try: | ||
| # Add model-specific tips to the prompt if enabled | ||
| model_tips = None | ||
|
|
@@ -269,29 +439,12 @@ def run(self, prompt_data: Dict[str, Any]) -> Any: | |
|
|
||
| # Update the prompt text in prompt_data | ||
| prompt_data["text"] = text | ||
| # Create a signature class dynamically with proper field definitions | ||
| input_fields = {} | ||
| output_fields = {} | ||
|
|
||
| # Define input and output fields based on prompt_data | ||
| for field in prompt_data.get("inputs", ["question"]): | ||
| input_fields[field] = dspy.InputField(desc="${" + field + "}") | ||
| for field in prompt_data.get("outputs", ["answer"]): | ||
| output_fields[field] = dspy.OutputField(desc="${" + field + "}") | ||
|
|
||
| # Create the signature class with proper field definitions | ||
| DynamicSignature = type( | ||
| "DynamicSignature", | ||
| (dspy.Signature,), | ||
| { | ||
| **input_fields, | ||
| **output_fields, | ||
| "__doc__": text, # Store the instructions as the docstring | ||
| }, | ||
| ) | ||
|
|
||
| # Create signature using consistent helper method with enhanced prompt | ||
| signature = self._create_signature(prompt_data, text) | ||
|
|
||
| # Create program instance with the signature | ||
| program = dspy.Predict(DynamicSignature) | ||
| program = dspy.Predict(signature) | ||
|
|
||
| # Map our naming convention to DSPy's expected values | ||
| dspy_auto_mode = map_auto_mode_to_dspy(self.auto) | ||
|
|
@@ -338,7 +491,7 @@ def run(self, prompt_data: Dict[str, Any]) -> Any: | |
| # Use our custom instruction tips with highest priority | ||
| optimizer.proposer_kwargs["tip"] = self.proposer_kwargs["tip"] | ||
| logging.info( | ||
| f"Using custom instruction tips: {self.proposer_kwargs['tip']}..." | ||
| f"Using custom instruction tips: {self.proposer_kwargs['tip'][:50] if self.proposer_kwargs['tip'] else 'None'}" | ||
| ) | ||
| # Otherwise, if we have model-specific tips, use those | ||
| elif model_tips: | ||
|
|
@@ -355,7 +508,7 @@ def run(self, prompt_data: Dict[str, Any]) -> Any: | |
| ) | ||
|
|
||
| logging.info( | ||
| f"Compiling program with {len(self.trainset)} training examples and {len(self.valset)} validation examples" | ||
| f"Compiling program with {len(self.trainset)} training examples, {len(self.valset)} validation examples, and {len(self.testset)} test examples" | ||
| ) | ||
|
|
||
| # Create a custom compile method that injects our tip directly | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we want to make sure to add type safety here, and also not violates the open/closed principle