Merge branch 'arb/streamline_experiments' of https://github.com/alexbanwell1/tsml-eval into arb/streamline_experiments

alexbanwell1 · alexbanwell1 · commit b6797d61b8bf · 2025-11-09T13:21:14.000Z
diff --git a/tsml_eval/evaluation/storage/forecaster_results.py b/tsml_eval/evaluation/storage/forecaster_results.py
@@ -4,7 +4,10 @@
 from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error
 
 from tsml_eval.evaluation.storage.estimator_results import EstimatorResults
-from tsml_eval.utils.results_writing import results_third_line, write_results_to_tsml_format
+from tsml_eval.utils.results_writing import (
+    results_third_line,
+    write_results_to_tsml_format,
+)
 
 
 class ForecasterResults(EstimatorResults):
@@ -157,7 +160,7 @@ def save_to_file(self, file_path, full_path=True):
             benchmark_time=self.benchmark_time,
             memory_usage=self.memory_usage,
         )
-        write_results_to_tsml_format (
+        write_results_to_tsml_format(
             self.predictions,
             self.target_labels,
             self.estimator_name,
@@ -169,7 +172,7 @@ def save_to_file(self, file_path, full_path=True):
             time_unit=self.time_unit,
             first_line_comment=self.description,
             second_line=self.parameter_info,
-            third_line=third_line
+            third_line=third_line,
         )
 
     def load_from_file(self, file_path, verify_values=True):
diff --git a/tsml_eval/evaluation/storage/regressor_results.py b/tsml_eval/evaluation/storage/regressor_results.py
@@ -10,7 +10,10 @@
 )
 
 from tsml_eval.evaluation.storage.estimator_results import EstimatorResults
-from tsml_eval.utils.results_writing import regression_results_third_line, write_results_to_tsml_format
+from tsml_eval.utils.results_writing import (
+    regression_results_third_line,
+    write_results_to_tsml_format,
+)
 
 
 class RegressorResults(EstimatorResults):
diff --git a/tsml_eval/experiments/experiments.py b/tsml_eval/experiments/experiments.py
@@ -443,6 +443,7 @@ def load_and_run_classification_experiment(
         benchmark_time=benchmark_time,
     )
 
+
 def transform_input(
     data_transforms,
     x_train: np.ndarray,
@@ -459,6 +460,7 @@ def transform_input(
             x_test = transform.transform(x_test, y_test)
     return x_train, x_test
 
+
 def cross_validate_train_data(estimator, y_train, X_train):
     cv_size = min(10, len(y_train))
     start = int(round(time.time() * 1000))
@@ -467,6 +469,7 @@ def cross_validate_train_data(estimator, y_train, X_train):
     train_estimate_method = f"{cv_size}F-CV"
     return train_preds, train_time, train_estimate_method
 
+
 class Experiment:
     """Run an experiment and save the results to file.
 
@@ -522,6 +525,7 @@ class Experiment:
         Whether to benchmark the hardware used with a simple function and write the
         results. This will typically take ~2 seconds, but is hardware dependent.
     """
+
     def __init__(
         self,
         estimator,
@@ -547,11 +551,15 @@ def __init__(
         )
 
         if not build_test_file and not build_train_file:
-            warnings.warn("All files exist and not overwriting, skipping.", stacklevel=1)
+            warnings.warn(
+                "All files exist and not overwriting, skipping.", stacklevel=1
+            )
             return None
 
         if write_attributes:
-            attribute_file_path = f"{results_path}/{estimator_name}/Workspace/{dataset_name}/"
+            attribute_file_path = (
+                f"{results_path}/{estimator_name}/Workspace/{dataset_name}/"
+            )
         else:
             attribute_file_path = None
 
@@ -571,28 +579,37 @@ def __init__(
         else:
             self.estimator_name = estimator_name
         self.estimator = self.validate_estimator(estimator=estimator)
-        self.second_comment = str(estimator.get_params()).replace("\n", " ").replace("\r", " ")
+        self.second_comment = (
+            str(estimator.get_params()).replace("\n", " ").replace("\r", " ")
+        )
         if attribute_file_path is not None:
             estimator_attributes_to_file(
                 self.estimator, attribute_file_path, max_list_shape=att_max_shape
             )
 
-
     def run_experiment(self):
         x_train, y_train, x_test, y_test = self.load_experimental_data()
 
         self.first_comment = (
             "Generated by run_experiment on "
             f"{datetime.now().strftime('%m/%d/%Y, %H:%M:%S')}"
         )
-        
-        x_train, x_test = transform_input(data_transforms=self.data_transforms, x_train=x_train, x_test=x_test,y_train=y_train, y_test=y_test)
+
+        x_train, x_test = transform_input(
+            data_transforms=self.data_transforms,
+            x_train=x_train,
+            x_test=x_test,
+            y_train=y_train,
+            y_test=y_test,
+        )
         if self.benchmark_time:
             self.benchmark = timing_benchmark(random_state=self.resample_id)
-        
+
         if self.build_train_file:
             train_preds, train_time = self.generate_train_preds(x_train, y_train)
-            self.write_results("TRAIN", y_train, train_preds, train_time, -1, self.benchmark, -1)
+            self.write_results(
+                "TRAIN", y_train, train_preds, train_time, -1, self.benchmark, -1
+            )
 
         if self.build_test_file:
             if self.needs_fit():
@@ -605,28 +622,37 @@ def run_experiment(self):
                 fit_time += int(round(getattr(self.estimator, "_fit_time_milli", 0)))
             test_preds, test_time = self.generate_test_preds(x_test, y_test)
             test_time += int(round(getattr(self.estimator, "_predict_time_milli", 0)))
-            self.write_results("TEST", y_test, test_preds, fit_time, test_time, self.benchmark, mem_usage)
+            self.write_results(
+                "TEST",
+                y_test,
+                test_preds,
+                fit_time,
+                test_time,
+                self.benchmark,
+                mem_usage,
+            )
 
     def load_experimental_data(self):
         return None, None, None, None
 
     def validate_estimator(self, estimator):
         estimator
 
-
     def generate_train_preds(self, X_train, y_train):
         return time_function(self.estimator.fit_predict, (X_train, y_train))
-    
+
     def generate_test_preds(self, x_test, y_test):
         return time_function(self.estimator.predict, x_test)
 
-
     def needs_fit(self):
         return False
 
-
-    def write_results(self, split, y, preds,fit_time, predict_time, benchmark_time, memory_usage):
-        third_line = self.get_third_line(y, preds,fit_time, predict_time, benchmark_time, memory_usage)
+    def write_results(
+        self, split, y, preds, fit_time, predict_time, benchmark_time, memory_usage
+    ):
+        third_line = self.get_third_line(
+            y, preds, fit_time, predict_time, benchmark_time, memory_usage
+        )
         write_results_to_tsml_format(
             preds,
             y,
@@ -642,16 +668,20 @@ def write_results(self, split, y, preds,fit_time, predict_time, benchmark_time,
             second_line=self.second_comment,
             third_line=third_line,
         )
-    def get_third_line(self, y, preds, fit_time, predict_time, benchmark_time, memory_usage):
+
+    def get_third_line(
+        self, y, preds, fit_time, predict_time, benchmark_time, memory_usage
+    ):
         return results_third_line(
-                y=y,
-                preds=preds,
-                fit_time=fit_time,
-                predict_time=predict_time,
-                benchmark_time=benchmark_time,
-                memory_usage=memory_usage,
-            )
-    
+            y=y,
+            preds=preds,
+            fit_time=fit_time,
+            predict_time=predict_time,
+            benchmark_time=benchmark_time,
+            memory_usage=memory_usage,
+        )
+
+
 class ForecastingExperiment(Experiment):
     def __init__(self):
         pass
@@ -677,21 +707,25 @@ def generate_test_preds(self, x_test, y_test):
     def validate_estimator(self, estimator):
         return validate_forecaster(estimator)
 
+
 class RegressionExperiment(Experiment):
     def __init__(
-            self,
-            ignore_custom_train_estimate=False,
-            predefined_resample = False,
-            problem_path="",
-        ):
+        self,
+        ignore_custom_train_estimate=False,
+        predefined_resample=False,
+        problem_path="",
+    ):
         self.is_fitted = False
         self.ignore_custom_train_estimate = ignore_custom_train_estimate
         self.problem_path = problem_path
         self.predefined_resample = predefined_resample
 
     def load_experimental_data(self):
         X_train, y_train, X_test, y_test, resample = load_experiment_data(
-            self.problem_path, self.dataset_name, self.resample_id, self.predefined_resample
+            self.problem_path,
+            self.dataset_name,
+            self.resample_id,
+            self.predefined_resample,
         )
 
         if resample:
@@ -703,30 +737,38 @@ def load_experimental_data(self):
     def generate_train_preds(self, X_train, y_train):
         if self.estimate_train_data and not self.ignore_custom_train_estimate:
             self.train_estimate_method = "Custom"
-            train_preds, train_time = time_function(self.estimator.fit_predict, (X_train, y_train))
+            train_preds, train_time = time_function(
+                self.estimator.fit_predict, (X_train, y_train)
+            )
             self.is_fitted = True
         else:
-            train_preds, train_time, self.train_estimate_method = cross_validate_train_data(self.estimator,y_train,X_train)
+            train_preds, train_time, self.train_estimate_method = (
+                cross_validate_train_data(self.estimator, y_train, X_train)
+            )
         return train_preds, train_time
 
     def needs_fit(self):
         return not self.is_fitted
 
-    def get_third_line(self, y, preds, fit_time, predict_time, benchmark_time, memory_usage):
+    def get_third_line(
+        self, y, preds, fit_time, predict_time, benchmark_time, memory_usage
+    ):
         return regression_results_third_line(
-                y=y,
-                preds=preds,
-                fit_time=fit_time,
-                predict_time=predict_time,
-                benchmark_time=benchmark_time,
-                memory_usage=memory_usage,
-                train_estimate_method=self.train_estimate_method,
-            )
+            y=y,
+            preds=preds,
+            fit_time=fit_time,
+            predict_time=predict_time,
+            benchmark_time=benchmark_time,
+            memory_usage=memory_usage,
+            train_estimate_method=self.train_estimate_method,
+        )
+
     def validate_estimator(self, estimator):
         estimator, estimate_train_data = validate_regressor(estimator)
         self.estimate_train_data = estimate_train_data
         return estimator
-    
+
+
 def validate_forecaster(estimator):
     if isinstance(estimator, BaseForecaster):
         return estimator
@@ -735,28 +777,34 @@ def validate_forecaster(estimator):
             estimator, _ = validate_regressor(estimator)
             return RegressionForecaster(regressor=estimator)
         except TypeError:
-            raise TypeError("forecaster must be an aeon forecaster or a tsml, aeon or sklearn regressor.")
+            raise TypeError(
+                "forecaster must be an aeon forecaster or a tsml, aeon or sklearn regressor."
+            )
+
 
 def validate_regressor(estimator):
     estimate_train_data = False
     if isinstance(estimator, BaseRegressor):
-        if estimator.get_tag(
-            "capability:train_estimate", False, False
-        ):
+        if estimator.get_tag("capability:train_estimate", False, False):
             estimate_train_data = True
         return estimator, estimate_train_data
     elif isinstance(estimator, BaseTimeSeriesEstimator) and is_regressor(estimator):
         return estimator, estimate_train_data
     elif isinstance(estimator, BaseEstimator) and is_regressor(estimator):
-        return SklearnToTsmlRegressor(
-            regressor=estimator,
-            pad_unequal=True,
-            concatenate_channels=True,
-            clone_estimator=False,
-            random_state=(
-                estimator.random_state if hasattr(estimator, "random_state") else None
+        return (
+            SklearnToTsmlRegressor(
+                regressor=estimator,
+                pad_unequal=True,
+                concatenate_channels=True,
+                clone_estimator=False,
+                random_state=(
+                    estimator.random_state
+                    if hasattr(estimator, "random_state")
+                    else None
+                ),
             ),
-        ), estimate_train_data
+            estimate_train_data,
+        )
     else:
         raise TypeError("regressor must be a tsml, aeon or sklearn regressor.")
 
@@ -1207,6 +1255,7 @@ def load_and_run_clustering_experiment(
         benchmark_time=benchmark_time,
     )
 
+
 def run_forecasting_experiment(
     train,
     y_test,
@@ -1258,7 +1307,6 @@ def run_forecasting_experiment(
     pass
 
 
-
 def load_and_run_forecasting_experiment(
     problem_path,
     results_path,
@@ -1301,7 +1349,6 @@ def load_and_run_forecasting_experiment(
         If set to False, this will only build results if there is not a result file
         already present. If True, it will overwrite anything already there.
     """
-
     tmpdir = tempfile.mkdtemp()
     dataset = load_forecasting(dataset, tmpdir)
     series = (
diff --git a/tsml_eval/utils/functions.py b/tsml_eval/utils/functions.py
@@ -8,6 +8,7 @@
 ]
 
 import time
+
 import numpy as np
 
 
@@ -102,7 +103,8 @@ def rank_array(arr, higher_better=True):
 
     return ranks
 
+
 def time_function(function, args=None, kwargs=None):
     start = int(round(time.time() * 1000))
     output = function(*args, **kwargs)
-    return int(round(time.time() * 1000)) - start, output
+    return int(round(time.time() * 1000)) - start, output
diff --git a/tsml_eval/utils/results_writing.py b/tsml_eval/utils/results_writing.py
diff --git a/tsml_eval/utils/tests/test_results_writing.py b/tsml_eval/utils/tests/test_results_writing.py

Original file line number	Diff line number	Diff line change
`@@ -10,7 +10,10 @@`
`10`	`10`	`)`
`11`	`11`
`12`	`12`	`from tsml_eval.evaluation.storage.estimator_results import EstimatorResults`
`13`		`-from tsml_eval.utils.results_writing import regression_results_third_line, write_results_to_tsml_format`
	`13`	`+from tsml_eval.utils.results_writing import (`
	`14`	`+ regression_results_third_line,`
	`15`	`+ write_results_to_tsml_format,`
	`16`	`+)`
`14`	`17`
`15`	`18`
`16`	`19`	`class RegressorResults(EstimatorResults):`