Skip to content

Commit 72df7e8

Browse files
authored
Merge pull request #27 from siddhantparadox/feature/pre-optimization-summary
feat: Add pre-optimization summary display (Issue #19)
2 parents 1620a37 + f86e849 commit 72df7e8

File tree

18 files changed

+1024
-77
lines changed

18 files changed

+1024
-77
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,3 +111,5 @@ use-cases/joule/
111111
use-cases/dox/
112112
datasets/dropbox_qa/dropbox_data.json
113113
publish.sh
114+
115+
scripts/

docs/advanced/logging.md

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,47 @@ The exported JSON file will contain two main sections:
4343

4444
- `timings`: The duration of each major phase of the optimization process.
4545
- `metrics`: A list of metrics logged during the process, including the metric key, value, and step.
46+
47+
## Pre-Optimization Summary
48+
49+
Before starting the optimization process, `llama-prompt-ops` displays a comprehensive summary of the optimization configuration. This summary provides transparency into what will happen during optimization and helps with debugging and reproducibility.
50+
51+
### Summary Contents
52+
53+
The pre-optimization summary includes:
54+
55+
- **Task Model**: The model used for executing the task
56+
- **Proposer Model**: The model used for generating instruction proposals (MIPROv2)
57+
- **Metric**: The evaluation metric being used
58+
- **Train / Val size**: Number of training and validation examples
59+
- **MIPRO Params**: Key MIPROv2 optimization parameters
60+
- **Guidance**: Any custom instruction tips provided to the proposer
61+
- **Baseline score**: Performance of the original prompt before optimization
62+
63+
### Example Output
64+
65+
```
66+
=== Pre-Optimization Summary ===
67+
Task Model : openai/gpt-4o-mini
68+
Proposer Model : openai/gpt-4o
69+
Metric : facility_metric
70+
Train / Val size : 100 / 25
71+
MIPRO Params : {"auto_user":"basic","auto_dspy":"light","max_labeled_demos":5,"max_bootstrapped_demos":4,"num_candidates":10,"num_threads":18,"init_temperature":0.5,"seed":9}
72+
Guidance : Use chain-of-thought reasoning and show your work step by step...
73+
Baseline score : 0.4200
74+
```
75+
76+
### Controlling Visibility
77+
78+
The pre-optimization summary is logged at `INFO` level. To see it, ensure your log level is set to `INFO` or lower:
79+
80+
```bash
81+
# Via command line
82+
llama-prompt-ops migrate --config config.yaml --log-level INFO
83+
84+
# Via environment variable
85+
export PROMPT_OPS_LOG_LEVEL=INFO
86+
llama-prompt-ops migrate --config config.yaml
87+
```
88+
89+
The summary provides valuable context for understanding optimization results and can help identify configuration issues before the optimization process begins.

src/llama_prompt_ops/core/migrator.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,11 +160,15 @@ def optimize(
160160
if hasattr(self.strategy, "valset") and valset:
161161
self.strategy.valset = valset
162162

163+
if hasattr(self.strategy, "testset") and testset:
164+
self.strategy.testset = testset
165+
163166
self.logger.progress(
164167
f"Applying {self.strategy.__class__.__name__} to optimize prompt"
165168
)
166169
self.logger.progress(f"Training set size: {len(trainset) if trainset else 0}")
167170
self.logger.progress(f"Validation set size: {len(valset) if valset else 0}")
171+
self.logger.progress(f"Test set size: {len(testset) if testset else 0}")
168172

169173
with self.logger.phase("Running optimization strategy"):
170174
optimized_program = self.strategy.run(prompt_data)
@@ -215,6 +219,9 @@ def load_dataset_with_adapter(
215219
if hasattr(self.strategy, "valset") and self.valset:
216220
self.strategy.valset = self.valset
217221

222+
if hasattr(self.strategy, "testset") and self.testset:
223+
self.strategy.testset = self.testset
224+
218225
return self.trainset, self.valset, self.testset
219226

220227
def evaluate(

src/llama_prompt_ops/core/model_strategies.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ def __init__(
4545
max_bootstrapped_demos: int = 4,
4646
max_labeled_demos: int = 5,
4747
auto: Optional[Literal["basic", "intermediate", "advanced"]] = "basic",
48+
task_model_name: Optional[str] = None,
49+
prompt_model_name: Optional[str] = None,
4850
**kwargs,
4951
):
5052
"""
@@ -60,6 +62,8 @@ def __init__(
6062
max_bootstrapped_demos: Maximum number of bootstrapped demos for MIPROv2
6163
max_labeled_demos: Maximum number of labeled demos for MIPROv2
6264
auto: Auto mode for MIPROv2 (basic, intermediate, advanced)
65+
task_model_name: Name of the task model (for display purposes)
66+
prompt_model_name: Name of the prompt/proposer model (for display purposes)
6367
**kwargs: Additional parameters for BasicOptimizationStrategy
6468
"""
6569
# Verify that the model is a Llama model
@@ -77,6 +81,8 @@ def __init__(
7781
max_bootstrapped_demos=max_bootstrapped_demos,
7882
max_labeled_demos=max_labeled_demos,
7983
auto=auto,
84+
task_model_name=task_model_name,
85+
prompt_model_name=prompt_model_name,
8086
**kwargs,
8187
)
8288

@@ -145,6 +151,7 @@ def run(self, prompt_data: Dict[str, Any]) -> Any:
145151

146152
# Ensure the base strategy has the latest models and datasets
147153
# This is important because these might be set after initialization
154+
# and the pre-optimization summary needs them
148155
self.base_strategy.task_model = self.task_model
149156
self.base_strategy.prompt_model = self.prompt_model
150157
self.base_strategy.trainset = self.trainset

src/llama_prompt_ops/core/prompt_strategies.py

Lines changed: 147 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,15 @@
1414
import logging
1515
import os
1616
import sys
17+
import time
1718
import traceback
1819
from abc import ABC, abstractmethod
1920
from typing import Any, Callable, Dict, List, Optional, Union
2021

2122
import dspy
2223
from typing_extensions import Literal
2324

25+
from .evaluation import create_evaluator
2426
from .utils import map_auto_mode_to_dspy
2527

2628

@@ -144,6 +146,11 @@ def __init__(
144146
fewshot_aware_proposer: bool = True,
145147
use_llama_tips: bool = True,
146148
requires_permission_to_run: bool = False,
149+
# Baseline computation settings
150+
compute_baseline: bool = True,
151+
# Model name parameters for display
152+
task_model_name: Optional[str] = None,
153+
prompt_model_name: Optional[str] = None,
147154
**kwargs,
148155
):
149156
"""
@@ -179,7 +186,13 @@ def __init__(
179186
tip_aware_proposer: Whether to use tip-aware instruction proposals
180187
fewshot_aware_proposer: Whether to use few-shot aware instruction proposals
181188
requires_permission_to_run: Whether to require user permission to run
182-
provide_traceback: Whether to provide tracebacks for errors
189+
190+
# Baseline computation parameters
191+
compute_baseline: Whether to compute baseline score before optimization
192+
193+
# Model name parameters for display
194+
task_model_name: Name of the task model
195+
prompt_model_name: Name of the prompt model
183196
184197
**kwargs: Additional configuration parameters
185198
"""
@@ -192,6 +205,7 @@ def __init__(
192205
# Training and validation data
193206
self.trainset = kwargs.get("trainset", [])
194207
self.valset = kwargs.get("valset", [])
208+
self.testset = kwargs.get("testset", [])
195209

196210
# Model-specific optimization settings
197211
self.use_llama_tips = use_llama_tips
@@ -221,6 +235,127 @@ def __init__(
221235
self.fewshot_aware_proposer = fewshot_aware_proposer
222236
self.requires_permission_to_run = requires_permission_to_run
223237

238+
# Baseline computation settings
239+
self.compute_baseline = compute_baseline
240+
241+
# Model name parameters for display
242+
self.task_model_name = task_model_name
243+
self.prompt_model_name = prompt_model_name
244+
245+
def _get_model_name(self, model) -> str:
246+
"""
247+
Get a human-readable name for a model using stored names.
248+
249+
Args:
250+
model: The model object to get the name for
251+
252+
Returns:
253+
A string representation of the model name
254+
"""
255+
if model is None:
256+
return "None"
257+
258+
# Use stored model names if available
259+
if model is self.task_model and self.task_model_name:
260+
return self.task_model_name
261+
if model is self.prompt_model and self.prompt_model_name:
262+
return self.prompt_model_name
263+
264+
# Fallback to legacy introspection for backward compatibility
265+
if hasattr(model, "model_name"):
266+
return str(model.model_name)
267+
if hasattr(model, "model"):
268+
return str(model.model)
269+
if hasattr(model, "_model") and hasattr(model._model, "model"):
270+
return str(model._model.model)
271+
272+
# Final fallback
273+
return str(model)
274+
275+
def _create_signature(self, prompt_data: Dict[str, Any], instructions: str):
276+
"""
277+
Create a DSPy signature with explicit field definitions.
278+
279+
Args:
280+
prompt_data: Dictionary containing inputs and outputs field definitions
281+
instructions: The instruction text for the signature
282+
283+
Returns:
284+
DSPy signature class
285+
"""
286+
# Create a signature class dynamically with proper field definitions
287+
input_fields = {}
288+
output_fields = {}
289+
290+
# Define input and output fields based on prompt_data
291+
for field in prompt_data.get("inputs", ["question"]):
292+
input_fields[field] = dspy.InputField(desc="${" + field + "}")
293+
for field in prompt_data.get("outputs", ["answer"]):
294+
output_fields[field] = dspy.OutputField(desc="${" + field + "}")
295+
296+
# Create the signature class with proper field definitions
297+
DynamicSignature = type(
298+
"DynamicSignature",
299+
(dspy.Signature,),
300+
{
301+
**input_fields,
302+
**output_fields,
303+
"__doc__": instructions, # Store the instructions as the docstring
304+
},
305+
)
306+
307+
return DynamicSignature
308+
309+
def _compute_baseline_score(self, prompt_data: Dict[str, Any]) -> Optional[float]:
310+
"""
311+
Compute baseline score using the original prompt before optimization.
312+
Uses testset to avoid data leakage and evaluation.py for consistency.
313+
314+
Args:
315+
prompt_data: Dictionary containing the prompt text and metadata
316+
317+
Returns:
318+
Baseline score as float, or None if computation fails or is not possible
319+
"""
320+
if not self.metric or not self.testset:
321+
logging.debug("Skipping baseline computation: missing metric or test set")
322+
return None
323+
324+
if not self.compute_baseline:
325+
logging.debug("Baseline computation disabled")
326+
return None
327+
328+
try:
329+
start_time = time.time()
330+
331+
# Use consistent signature creation with original prompt
332+
baseline_signature = self._create_signature(
333+
prompt_data, prompt_data["text"]
334+
)
335+
baseline_program = dspy.Predict(baseline_signature)
336+
337+
print(
338+
f"\nComputing baseline score on {len(self.testset)} test examples using {self.num_threads} threads..."
339+
)
340+
341+
evaluator = create_evaluator(
342+
metric=self.metric,
343+
devset=self.testset,
344+
num_threads=self.num_threads, # Use the strategy's num_threads setting
345+
display_progress=True,
346+
display_table=False,
347+
)
348+
349+
score = evaluator.evaluate(baseline_program)
350+
duration = time.time() - start_time
351+
352+
print(f"✅ Baseline Score: {score:.3f} in {duration:.2f}s\n")
353+
return float(score)
354+
355+
except Exception as e:
356+
logging.warning(f"Baseline evaluation failed: {e}")
357+
return None
358+
224359
def run(self, prompt_data: Dict[str, Any]) -> Any:
225360
"""
226361
Apply basic optimization to the prompt using DSPy's MIPROv2.
@@ -237,6 +372,11 @@ def run(self, prompt_data: Dict[str, Any]) -> Any:
237372
if "dspy" not in globals() or not self.trainset:
238373
return f"[Optimized for {self.model_name}] {text}"
239374

375+
# Display pre-optimization summary using utility function
376+
from .utils.summary_utils import create_and_display_summary
377+
378+
create_and_display_summary(self, prompt_data)
379+
240380
try:
241381
# Add model-specific tips to the prompt if enabled
242382
model_tips = None
@@ -269,29 +409,12 @@ def run(self, prompt_data: Dict[str, Any]) -> Any:
269409

270410
# Update the prompt text in prompt_data
271411
prompt_data["text"] = text
272-
# Create a signature class dynamically with proper field definitions
273-
input_fields = {}
274-
output_fields = {}
275-
276-
# Define input and output fields based on prompt_data
277-
for field in prompt_data.get("inputs", ["question"]):
278-
input_fields[field] = dspy.InputField(desc="${" + field + "}")
279-
for field in prompt_data.get("outputs", ["answer"]):
280-
output_fields[field] = dspy.OutputField(desc="${" + field + "}")
281-
282-
# Create the signature class with proper field definitions
283-
DynamicSignature = type(
284-
"DynamicSignature",
285-
(dspy.Signature,),
286-
{
287-
**input_fields,
288-
**output_fields,
289-
"__doc__": text, # Store the instructions as the docstring
290-
},
291-
)
412+
413+
# Create signature using consistent helper method with enhanced prompt
414+
signature = self._create_signature(prompt_data, text)
292415

293416
# Create program instance with the signature
294-
program = dspy.Predict(DynamicSignature)
417+
program = dspy.Predict(signature)
295418

296419
# Map our naming convention to DSPy's expected values
297420
dspy_auto_mode = map_auto_mode_to_dspy(self.auto)
@@ -338,7 +461,7 @@ def run(self, prompt_data: Dict[str, Any]) -> Any:
338461
# Use our custom instruction tips with highest priority
339462
optimizer.proposer_kwargs["tip"] = self.proposer_kwargs["tip"]
340463
logging.info(
341-
f"Using custom instruction tips: {self.proposer_kwargs['tip']}..."
464+
f"Using custom instruction tips: {self.proposer_kwargs['tip'][:50] if self.proposer_kwargs['tip'] else 'None'}"
342465
)
343466
# Otherwise, if we have model-specific tips, use those
344467
elif model_tips:
@@ -355,7 +478,7 @@ def run(self, prompt_data: Dict[str, Any]) -> Any:
355478
)
356479

357480
logging.info(
358-
f"Compiling program with {len(self.trainset)} training examples and {len(self.valset)} validation examples"
481+
f"Compiling program with {len(self.trainset)} training examples, {len(self.valset)} validation examples, and {len(self.testset)} test examples"
359482
)
360483

361484
# Create a custom compile method that injects our tip directly

src/llama_prompt_ops/core/utils/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,15 @@
1010
from .format_utils import convert_json_to_yaml, json_to_yaml_file
1111
from .logging import get_logger
1212
from .strategy_utils import map_auto_mode_to_dspy
13+
from .summary_utils import create_and_display_summary, create_pre_optimization_summary
14+
from .telemetry import PreOptimizationSummary
1315

1416
__all__ = [
1517
"map_auto_mode_to_dspy",
1618
"convert_json_to_yaml",
1719
"json_to_yaml_file",
1820
"get_logger",
21+
"PreOptimizationSummary",
22+
"create_pre_optimization_summary",
23+
"create_and_display_summary",
1924
]

0 commit comments

Comments
 (0)