[Plot] Remove hidden cap for data and set general scale start from 0 (#66)

CatherineSue · web-flow · commit c9aca7b18db6 · 2025-08-11T17:27:15.000-07:00
diff --git a/genai_bench/analysis/flexible_plot_report.py b/genai_bench/analysis/flexible_plot_report.py
@@ -353,7 +353,7 @@ def _add_plot_annotations(
                 bbox=dict(
                     boxstyle="round,pad=0.2",
                     facecolor="white",
-                    alpha=0.7,
+                    alpha=0.1,
                     edgecolor="none",
                 ),
             )
@@ -481,7 +481,7 @@ def _plot_multi_line_metric(
         ax.set_xlabel(plot_spec.x_label or self._generate_label(plot_spec.x_field))
         ax.set_ylabel(plot_spec.y_label or "Value")
         ax.set_title(plot_spec.title)
-        ax.grid(True, alpha=0.3)
+        ax.grid(True, alpha=0.1)
 
         # Position legend outside plot area for multi-line plots to avoid overlap
         ax.legend(bbox_to_anchor=(1.05, 1), loc="upper left", fontsize="small")
@@ -564,7 +564,7 @@ def _save_individual_subplots_multiline(
                     bbox=dict(
                         boxstyle="round,pad=0.2",
                         facecolor="white",
-                        alpha=0.7,
+                        alpha=0.1,
                         edgecolor="none",
                     ),
                 )
@@ -593,7 +593,7 @@ def _save_individual_subplots_multiline(
             # Copy grid
             ax_temp.grid(
                 ax.get_xgridlines()[0].get_visible() if ax.get_xgridlines() else True,
-                alpha=0.3,
+                alpha=0.1,
             )
             ax_temp.minorticks_on()
 
diff --git a/genai_bench/analysis/plot_report.py b/genai_bench/analysis/plot_report.py
@@ -51,25 +51,9 @@ def plot_graph(
     else:
         x_positions = x_data  # type: ignore[assignment]
 
-    # If this is TTFT or E2E latency, filter out values outside [0.1, 100]
-    valid_x = []
-    valid_y = []
-    valid_concurrency = []
-
-    should_cap = any(
-        kw in y_label.lower() for kw in ["ttft", "mean e2e", "p90 e2e", "p99 e2e"]
-    )
-
-    if should_cap:
-        for xx, yy, cc in zip(x_data, y_data, concurrency_levels, strict=False):
-            if 0.1 <= yy <= 100:
-                valid_x.append(xx)
-                valid_y.append(yy)
-                valid_concurrency.append(cc)
-    else:
-        valid_x = x_data
-        valid_y = y_data
-        valid_concurrency = concurrency_levels
+    valid_x = x_data
+    valid_y = y_data
+    valid_concurrency = concurrency_levels
 
     # Plot data
     if plot_type == "line":
@@ -88,7 +72,7 @@ def plot_graph(
             textcoords="offset points",
             ha="left",
             bbox=dict(
-                boxstyle="round,pad=0.2", facecolor="white", alpha=0.8, edgecolor="none"
+                boxstyle="round,pad=0.2", facecolor="white", alpha=0.1, edgecolor="none"
             ),
         )
 
@@ -101,11 +85,17 @@ def plot_graph(
             mticker.LogLocator(base=10.0, subs=np.arange(2, 10) * 0.1, numticks=100)
         )
 
-    # Cap the y-limits if needed
-    if should_cap:
-        ax.set_ylim([0.1, 100])
-    else:
-        ax.set_ylim(bottom=0)
+    # Axis limits handling with autoscale re-enabled every draw
+    # X-axis: allow Matplotlib to autoscale to include new data, then pin left=0
+    ax.autoscale(enable=True, axis="x", tight=False)
+    x_left, x_right = ax.get_xlim()
+    ax.set_xlim(left=0.0, right=x_right)
+
+    # Y-axis: re-autoscale first, then pin bottom=0 for linear scale only
+    ax.autoscale(enable=True, axis="y", tight=False)
+    if ax.get_yscale() != "log":
+        y_bottom, y_top = ax.get_ylim()
+        ax.set_ylim(bottom=0.0, top=y_top)
 
     ax.set_xlabel(x_label)
     ax.set_ylabel(y_label)
@@ -715,6 +705,10 @@ def plot_error_rates(
     ax.set_xlabel("Concurrency")
     ax.set_ylabel("Error Rate")
     ax.set_title("Error Rates by HTTP Status vs Concurrency")
-    ax.set_ylim(bottom=0)
+    # Re-enable autoscale for y so subsequent groups can extend the top,
+    # then pin bottom at 0 (valid for linear scale used here)
+    ax.autoscale(enable=True, axis="y", tight=False)
+    y_bottom, y_top = ax.get_ylim()
+    ax.set_ylim(bottom=0.0, top=y_top)
     ax.legend()
     ax.grid(True)
diff --git a/genai_bench/cli/cli.py b/genai_bench/cli/cli.py
@@ -10,11 +10,10 @@
 
 from genai_bench.analysis.excel_report import create_workbook
 from genai_bench.analysis.experiment_loader import load_one_experiment
+from genai_bench.analysis.flexible_plot_report import plot_experiment_data_flexible
 from genai_bench.analysis.plot_report import (
     plot_single_scenario_inference_speed_vs_throughput,
 )
-from genai_bench.analysis.flexible_plot_report import plot_experiment_data_flexible
-
 from genai_bench.auth.unified_factory import UnifiedAuthFactory
 from genai_bench.cli.option_groups import (
     api_options,
diff --git a/tests/analysis/test_plot_report.py b/tests/analysis/test_plot_report.py
@@ -250,6 +250,9 @@ def test_plot_single_scenario_rerank(mock_plot_graph, mock_plt, tmp_path, caplog
 
 def test_plot_graph_line():
     ax = MagicMock()
+    ax.get_xlim.return_value = (0, 10)
+    ax.get_ylim.return_value = (0, 10)
+    ax.get_yscale.return_value = "linear"
     x_data = [1, 2, 3]
     y_data = [10, 20, 30]
     x_label = "X Axis"
@@ -280,6 +283,9 @@ def test_plot_graph_line():
 
 def test_plot_graph_scatter():
     ax = MagicMock()
+    ax.get_xlim.return_value = (0, 10)
+    ax.get_ylim.return_value = (0, 10)
+    ax.get_yscale.return_value = "linear"
     x_data = [1, 2, 3]
     y_data = [10, 20, 30]
     x_label = "X Axis"
@@ -307,6 +313,9 @@ def test_plot_graph_scatter():
 def test_plot_graph_concurrency():
     """When x_label is 'Concurrency', x_data is replaced by evenly spaced positions."""
     ax = MagicMock()
+    ax.get_xlim.return_value = (0, 10)
+    ax.get_ylim.return_value = (0, 10)
+    ax.get_yscale.return_value = "linear"
     x_data = [10, 20, 30]
     y_data = [0.5, 1.0, 2.0]
     x_label = "Concurrency"
@@ -324,28 +333,6 @@ def test_plot_graph_concurrency():
     ax.plot.assert_called_once()
 
 
-def test_plot_graph_cap():
-    """
-    When y_label triggers value capping (e.g. contains "ttft"),
-    only y values in [0.1, 100] are plotted.
-    """
-    ax = MagicMock()
-    x_data = [0, 1, 2, 3]
-    y_data = [0.05, 0.5, 50, 150]  # only 0.5 and 50 are within the valid range
-    x_label = "Not Concurrency"
-    y_label = "TTFT"
-    title = "TTFT Plot"
-    concurrency_levels = [10, 20, 30, 40]
-    label = "CapTest"
-
-    plot_graph(ax, x_data, y_data, x_label, y_label, title, concurrency_levels, label)
-
-    # The plotting call should use only the two valid data points.
-    ax.plot.assert_called_once()
-    # And y-limits should be capped to [0.1, 100]
-    ax.set_ylim.assert_called_with([0.1, 100])
-
-
 @patch("genai_bench.analysis.plot_report.plot_graph")
 @patch("genai_bench.analysis.plot_report.plot_error_rates")
 def test_plot_metrics(mock_plot_error_rates, mock_plot_graph):
@@ -552,6 +539,8 @@ def test_plot_error_rates():
     ax = MagicMock()
     # Ensure unpacking of legend handles/labels works.
     ax.get_legend_handles_labels.return_value = ([], [])
+    # Provide y-limits for autoscale+pin logic
+    ax.get_ylim.return_value = (0, 1)
 
     def create_agg(freq, num_requests):
         agg = MagicMock()