meta-llama · heyjustinai · Jul 5, 2025 · Jun 27, 2025 · Jun 28, 2025 · Jun 28, 2025
diff --git a/docs/advanced/logging.md b/docs/advanced/logging.md
@@ -43,3 +43,47 @@ The exported JSON file will contain two main sections:
 
 -   `timings`: The duration of each major phase of the optimization process.
 -   `metrics`: A list of metrics logged during the process, including the metric key, value, and step.
+
+## Pre-Optimization Summary
+
+Before starting the optimization process, `llama-prompt-ops` displays a comprehensive summary of the optimization configuration. This summary provides transparency into what will happen during optimization and helps with debugging and reproducibility.
+
+### Summary Contents
+
+The pre-optimization summary includes:
+
+- **Task Model**: The model used for executing the task
+- **Proposer Model**: The model used for generating instruction proposals (MIPROv2)
+- **Metric**: The evaluation metric being used
+- **Train / Val size**: Number of training and validation examples
+- **MIPRO Params**: Key MIPROv2 optimization parameters
+- **Guidance**: Any custom instruction tips provided to the proposer
+- **Baseline score**: Performance of the original prompt before optimization
+
+### Example Output
+
+```
+=== Pre-Optimization Summary ===
+    Task Model       : openai/gpt-4o-mini
+    Proposer Model   : openai/gpt-4o
+    Metric           : facility_metric
+    Train / Val size : 100 / 25
+    MIPRO Params     : {"auto_user":"basic","auto_dspy":"light","max_labeled_demos":5,"max_bootstrapped_demos":4,"num_candidates":10,"num_threads":18,"init_temperature":0.5,"seed":9}
+    Guidance         : Use chain-of-thought reasoning and show your work step by step...
+    Baseline score   : 0.4200
+```
+
+### Controlling Visibility
+
+The pre-optimization summary is logged at `INFO` level. To see it, ensure your log level is set to `INFO` or lower:
+
+```bash
+# Via command line
+llama-prompt-ops migrate --config config.yaml --log-level INFO
+
+# Via environment variable
+export PROMPT_OPS_LOG_LEVEL=INFO
+llama-prompt-ops migrate --config config.yaml
+```
+
+The summary provides valuable context for understanding optimization results and can help identify configuration issues before the optimization process begins.
diff --git a/src/llama_prompt_ops/core/migrator.py b/src/llama_prompt_ops/core/migrator.py
@@ -160,11 +160,15 @@ def optimize(
         if hasattr(self.strategy, "valset") and valset:
             self.strategy.valset = valset
 
+        if hasattr(self.strategy, "testset") and testset:
+            self.strategy.testset = testset
+
         self.logger.progress(
             f"Applying {self.strategy.__class__.__name__} to optimize prompt"
         )
         self.logger.progress(f"Training set size: {len(trainset) if trainset else 0}")
         self.logger.progress(f"Validation set size: {len(valset) if valset else 0}")
+        self.logger.progress(f"Test set size: {len(testset) if testset else 0}")
 
         with self.logger.phase("Running optimization strategy"):
             optimized_program = self.strategy.run(prompt_data)
@@ -215,6 +219,9 @@ def load_dataset_with_adapter(
         if hasattr(self.strategy, "valset") and self.valset:
             self.strategy.valset = self.valset
 
+        if hasattr(self.strategy, "testset") and self.testset:
+            self.strategy.testset = self.testset
+
         return self.trainset, self.valset, self.testset
 
     def evaluate(

diff --git a/src/llama_prompt_ops/core/model_strategies.py b/src/llama_prompt_ops/core/model_strategies.py
@@ -145,6 +145,7 @@ def run(self, prompt_data: Dict[str, Any]) -> Any:
 
         # Ensure the base strategy has the latest models and datasets
         # This is important because these might be set after initialization
+        # and the pre-optimization summary needs them
         self.base_strategy.task_model = self.task_model
         self.base_strategy.prompt_model = self.prompt_model
         self.base_strategy.trainset = self.trainset

diff --git a/src/llama_prompt_ops/core/prompt_strategies.py b/src/llama_prompt_ops/core/prompt_strategies.py
@@ -14,14 +14,17 @@
 import logging
 import os
 import sys
+import time
 import traceback
 from abc import ABC, abstractmethod
 from typing import Any, Callable, Dict, List, Optional, Union
 
 import dspy
 from typing_extensions import Literal
 
+from .evaluation import create_evaluator
 from .utils import map_auto_mode_to_dspy
+from .utils.telemetry import PreOptimizationSummary
 
 
 class OptimizationError(Exception):
@@ -144,6 +147,8 @@ def __init__(
         fewshot_aware_proposer: bool = True,
         use_llama_tips: bool = True,
         requires_permission_to_run: bool = False,
+        # Baseline computation settings
+        compute_baseline: bool = False,
         **kwargs,
     ):
         """
@@ -179,7 +184,9 @@ def __init__(
             tip_aware_proposer: Whether to use tip-aware instruction proposals
             fewshot_aware_proposer: Whether to use few-shot aware instruction proposals
             requires_permission_to_run: Whether to require user permission to run
-            provide_traceback: Whether to provide tracebacks for errors
+
+            # Baseline computation parameters
+            compute_baseline: Whether to compute baseline score before optimization
 
             **kwargs: Additional configuration parameters
         """
@@ -192,6 +199,7 @@ def __init__(
         # Training and validation data
         self.trainset = kwargs.get("trainset", [])
         self.valset = kwargs.get("valset", [])
+        self.testset = kwargs.get("testset", [])
 
         # Model-specific optimization settings
         self.use_llama_tips = use_llama_tips
@@ -221,6 +229,120 @@ def __init__(
         self.fewshot_aware_proposer = fewshot_aware_proposer
         self.requires_permission_to_run = requires_permission_to_run
 
+        # Baseline computation settings
+        self.compute_baseline = compute_baseline
+
+    def _get_model_name(self, model) -> str:
+        """
+        Extract a human-readable name from a model object.
+
+        Args:
+            model: The model object (could be a DSPy model, adapter, or string)
+
+        Returns:
+            A string representation of the model name
+        """
+        if model is None:
+            return "None"
+
+        # Try to get model_name attribute first
+        if hasattr(model, "model_name"):
+            return str(model.model_name)
+
+        # Try to get model attribute (for adapters)
+        if hasattr(model, "model"):
+            return str(model.model)
+
+        # For DSPyModelAdapter, try to get the underlying model name
+        if hasattr(model, "_model") and hasattr(model._model, "model"):
+            return str(model._model.model)
+
+        # Fall back to string representation
+        return str(model)
+
+    def _create_signature(self, prompt_data: Dict[str, Any], instructions: str):
+        """
+        Create a DSPy signature with explicit field definitions.
+
+        Args:
+            prompt_data: Dictionary containing inputs and outputs field definitions
+            instructions: The instruction text for the signature
+
+        Returns:
+            DSPy signature class
+        """
+        # Create a signature class dynamically with proper field definitions
+        input_fields = {}
+        output_fields = {}
+
+        # Define input and output fields based on prompt_data
+        for field in prompt_data.get("inputs", ["question"]):
+            input_fields[field] = dspy.InputField(desc="${" + field + "}")
+        for field in prompt_data.get("outputs", ["answer"]):
+            output_fields[field] = dspy.OutputField(desc="${" + field + "}")
+
+        # Create the signature class with proper field definitions
+        DynamicSignature = type(
+            "DynamicSignature",
+            (dspy.Signature,),
+            {
+                **input_fields,
+                **output_fields,
+                "__doc__": instructions,  # Store the instructions as the docstring
+            },
+        )
+
+        return DynamicSignature
+
+    def _compute_baseline_score(self, prompt_data: Dict[str, Any]) -> Optional[float]:
+        """
+        Compute baseline score using the original prompt before optimization.
+        Uses testset to avoid data leakage and evaluation.py for consistency.
+
+        Args:
+            prompt_data: Dictionary containing the prompt text and metadata
+
+        Returns:
+            Baseline score as float, or None if computation fails or is not possible
+        """
+        if not self.metric or not self.testset:
+            logging.debug("Skipping baseline computation: missing metric or test set")
+            return None
+
+        if not self.compute_baseline:
+            logging.debug("Baseline computation disabled")
+            return None
+
+        try:
+            start_time = time.time()
+            logging.info("Computing baseline score using testset...")
+
+            # Use consistent signature creation with original prompt
+            baseline_signature = self._create_signature(
+                prompt_data, prompt_data["text"]
+            )
+            baseline_program = dspy.Predict(baseline_signature)
+
+            # Leverage existing evaluation infrastructure
+            evaluator = create_evaluator(
+                metric=self.metric,
+                devset=self.testset,
+                display_progress=False,
+                display_table=False,
+            )
+
+            score = evaluator.evaluate(baseline_program)
+            duration = time.time() - start_time
+
+            logging.info(
+                f"Baseline evaluation completed in {duration:.2f}s: {score:.3f}"
+            )
+            return float(score)
+
+        except Exception as e:
+            logging.warning(f"Baseline evaluation failed: {e}")
+            return None
+
     def run(self, prompt_data: Dict[str, Any]) -> Any:
         """
         Apply basic optimization to the prompt using DSPy's MIPROv2.
@@ -237,6 +359,54 @@ def run(self, prompt_data: Dict[str, Any]) -> Any:
         if "dspy" not in globals() or not self.trainset:
             return f"[Optimized for {self.model_name}] {text}"
 
+        # Display pre-optimization summary
+        try:
+            # Collect guidance information
+            guidance = None
+            if (
+                hasattr(self, "proposer_kwargs")
+                and self.proposer_kwargs
+                and "tip" in self.proposer_kwargs
+            ):
+                guidance = self.proposer_kwargs["tip"]
+
+            # Compute baseline score if enabled
+            baseline_score = None
+            if self.compute_baseline:
+                try:
+                    baseline_score = self._compute_baseline_score(prompt_data)
+                except Exception as baseline_e:
+                    logging.warning(f"Failed to compute baseline score: {baseline_e}")
+                    baseline_score = None
+
+            # Create and display the pre-optimization summary
+            summary = PreOptimizationSummary(
+                task_model=self._get_model_name(self.task_model),
+                proposer_model=self._get_model_name(self.prompt_model),
+                metric_name=(
+                    getattr(self.metric, "__name__", str(self.metric))
+                    if self.metric
+                    else "None"
+                ),
+                train_size=len(self.trainset or []),
+                val_size=len(self.valset or []),
+                mipro_params={
+                    "auto_user": self.auto,
+                    "auto_dspy": map_auto_mode_to_dspy(self.auto),
+                    "max_labeled_demos": self.max_labeled_demos,
+                    "max_bootstrapped_demos": self.max_bootstrapped_demos,
+                    "num_candidates": self.num_candidates,
+                    "num_threads": self.num_threads,
+                    "init_temperature": self.init_temperature,
+                    "seed": self.seed,
+                },
+                guidance=guidance,
+                baseline_score=baseline_score,
+            )
+            summary.log()
+        except Exception as e:
+            logging.warning(f"Failed to display pre-optimization summary: {str(e)}")
+
         try:
             # Add model-specific tips to the prompt if enabled
             model_tips = None
@@ -269,29 +439,12 @@ def run(self, prompt_data: Dict[str, Any]) -> Any:
 
             # Update the prompt text in prompt_data
             prompt_data["text"] = text
-            # Create a signature class dynamically with proper field definitions
-            input_fields = {}
-            output_fields = {}
-
-            # Define input and output fields based on prompt_data
-            for field in prompt_data.get("inputs", ["question"]):
-                input_fields[field] = dspy.InputField(desc="${" + field + "}")
-            for field in prompt_data.get("outputs", ["answer"]):
-                output_fields[field] = dspy.OutputField(desc="${" + field + "}")
-
-            # Create the signature class with proper field definitions
-            DynamicSignature = type(
-                "DynamicSignature",
-                (dspy.Signature,),
-                {
-                    **input_fields,
-                    **output_fields,
-                    "__doc__": text,  # Store the instructions as the docstring
-                },
-            )
+
+            # Create signature using consistent helper method with enhanced prompt
+            signature = self._create_signature(prompt_data, text)
 
             # Create program instance with the signature
-            program = dspy.Predict(DynamicSignature)
+            program = dspy.Predict(signature)
 
             # Map our naming convention to DSPy's expected values
             dspy_auto_mode = map_auto_mode_to_dspy(self.auto)
@@ -338,7 +491,7 @@ def run(self, prompt_data: Dict[str, Any]) -> Any:
                 # Use our custom instruction tips with highest priority
                 optimizer.proposer_kwargs["tip"] = self.proposer_kwargs["tip"]
                 logging.info(
-                    f"Using custom instruction tips: {self.proposer_kwargs['tip']}..."
+                    f"Using custom instruction tips: {self.proposer_kwargs['tip'][:50] if self.proposer_kwargs['tip'] else 'None'}"
                 )
             # Otherwise, if we have model-specific tips, use those
             elif model_tips:
@@ -355,7 +508,7 @@ def run(self, prompt_data: Dict[str, Any]) -> Any:
             )
 
             logging.info(
-                f"Compiling program with {len(self.trainset)} training examples and {len(self.valset)} validation examples"
+                f"Compiling program with {len(self.trainset)} training examples, {len(self.valset)} validation examples, and {len(self.testset)} test examples"
             )
 
             # Create a custom compile method that injects our tip directly

diff --git a/src/llama_prompt_ops/core/utils/__init__.py b/src/llama_prompt_ops/core/utils/__init__.py
@@ -10,10 +10,12 @@
 from .format_utils import convert_json_to_yaml, json_to_yaml_file
 from .logging import get_logger
 from .strategy_utils import map_auto_mode_to_dspy
+from .telemetry import PreOptimizationSummary
 
 __all__ = [
     "map_auto_mode_to_dspy",
     "convert_json_to_yaml",
     "json_to_yaml_file",
     "get_logger",
+    "PreOptimizationSummary",
 ]