diff --git a/python/hsfs/builtin_transformations.py b/python/hsfs/builtin_transformations.py index 4426268cc..e15253482 100644 --- a/python/hsfs/builtin_transformations.py +++ b/python/hsfs/builtin_transformations.py @@ -15,6 +15,7 @@ # import math +from typing import Union import pandas as pd from hsfs.hopsworks_udf import udf @@ -25,26 +26,78 @@ @udf(float, drop=["feature"]) -def min_max_scaler(feature: pd.Series, statistics=feature_statistics) -> pd.Series: +def min_max_scaler( + feature: Union[pd.Series, int, float], + statistics: TransformationStatistics = feature_statistics, +) -> Union[pd.Series, int, float]: + """ + Scales the feature to the range [0, 1] using the min and max values of the feature in the training data. + + # Arguments + feature: `Union[pd.Series, int, float]`. The feature to scale between 0 and 1. + statistics: `TransformationStatistics`. The training dataset statistics of the feature, this is computed and passed automatically by Hopsworks when the transformation function is executed. + + # Returns + `pd.Series`. The scaled feature. + """ return (feature - statistics.feature.min) / ( statistics.feature.max - statistics.feature.min ) @udf(float, drop=["feature"]) -def standard_scaler(feature: pd.Series, statistics=feature_statistics) -> pd.Series: +def standard_scaler( + feature: Union[pd.Series, int, float], + statistics: TransformationStatistics = feature_statistics, +) -> Union[pd.Series, int, float]: + """ + Standardize features by transforming the feature to have a mean of 0 and a standard deviation of 1. + + # Arguments + feature: `Union[pd.Series, int, float]`. The feature to standardize. + statistics: `TransformationStatistics`. The training dataset statistics of the feature, this is computed and passed automatically by Hopsworks when the transformation function is executed. + + # Returns + `pd.Series`. The scaled feature. + """ return (feature - statistics.feature.mean) / statistics.feature.stddev @udf(float, drop=["feature"]) -def robust_scaler(feature: pd.Series, statistics=feature_statistics) -> pd.Series: +def robust_scaler( + feature: Union[pd.Series, int, float], + statistics: TransformationStatistics = feature_statistics, +) -> Union[pd.Series, int, float]: + """ + Robust scale features by transforming the feature to have a median of 0 and a interquartile range of 1. + + # Arguments + feature: `Union[pd.Series, int, float]`. The feature to robust scale. + statistics: `TransformationStatistics`. The training dataset statistics of the feature, this is computed and passed automatically by Hopsworks when the transformation function is executed. + + # Returns + `pd.Series`. The scaled feature. + """ return (feature - statistics.feature.percentiles[49]) / ( statistics.feature.percentiles[74] - statistics.feature.percentiles[24] ) @udf(int, drop=["feature"], mode="pandas") -def label_encoder(feature: pd.Series, statistics=feature_statistics) -> pd.Series: +def label_encoder( + feature: Union[pd.Series, str], + statistics: TransformationStatistics = feature_statistics, +) -> Union[pd.Series, int]: + """ + Encode categorical features into numerical features. + + # Arguments + feature: `Union[pd.Series, str]`. The feature to encode. + statistics: `TransformationStatistics`. The training dataset statistics of the feature, this is computed and passed automatically by Hopsworks when the transformation function is executed. + + # Returns + `pd.Series`. The encoded feature. + """ unique_data = sorted([value for value in statistics.feature.unique_values]) value_to_index = {value: index for index, value in enumerate(unique_data)} # Unknown categories not present in training dataset are encoded as -1. @@ -57,7 +110,20 @@ def label_encoder(feature: pd.Series, statistics=feature_statistics) -> pd.Serie @udf(bool, drop=["feature"], mode="pandas") -def one_hot_encoder(feature: pd.Series, statistics=feature_statistics) -> pd.Series: +def one_hot_encoder( + feature: Union[pd.Series, str], + statistics: TransformationStatistics = feature_statistics, +) -> pd.DataFrame: + """ + Encode categorical features as a one-hot numeric array. + + # Arguments + feature: `Union[pd.Series, str]`. The feature to encode. + statistics: `TransformationStatistics`. The training dataset statistics of the feature, this is computed and passed automatically by Hopsworks when the transformation function is executed. + + # Returns + `pd.DataFrame`. A pandas dataframe with the one-hot encoded features. + """ unique_data = [value for value in statistics.feature.unique_values] # One hot encode features. Re-indexing to set missing categories to False and drop categories not in training data statistics. diff --git a/python/hsfs/hopsworks_udf.py b/python/hsfs/hopsworks_udf.py index b5f3221a5..010936284 100644 --- a/python/hsfs/hopsworks_udf.py +++ b/python/hsfs/hopsworks_udf.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from __future__ import annotations import ast import copy @@ -38,7 +39,12 @@ class UDFExecutionMode(Enum): """ - Class that store the possible execution types of UDF's. + Possible execution modes for a transformation function. The execution mode specifies the way in which a transformation function is executed in Hopsworks. + + Currently Hopsworks supports three execution modes for a transformation function. + - `default` - This mode executes the transformation function as a Pandas UDF during training dataset generation or batch inference and as a Python UDF during feature vector reterival. + - `pandas` - This mode would always execute the transformation function as a Pandas UDF. + - `python` - This mode would always execute the transformation function as a Python UDF. """ DEFAULT = "default" @@ -76,36 +82,37 @@ def udf( return_type: Union[List[type], type], drop: Optional[Union[str, List[str]]] = None, mode: Literal["default", "python", "pandas"] = "default", -) -> "HopsworksUdf": +) -> HopsworksUdf: """ - Create an User Defined Function that can be and used within the Hopsworks Feature Store to create transformation functions. - - Hopsworks UDF's are user defined functions that executes as 'pandas_udf' when executing - in spark engine and as pandas functions in the python engine. The pandas udf/pandas functions - gets as inputs pandas Series's and can provide as output a pandas Series or a pandas DataFrame. - A Hopsworks udf is defined using the `hopsworks_udf` decorator. The outputs of the defined UDF - must be mentioned in the decorator as a list of python types. + Decorator used to define a User Defined Function (UDF) in Hopsworks. + UDF's defined using this decorator can be attached to: + - Feature groups: To create On-Demand transformation functions. + - Feature views: To create Model-Dependent transformation functions. !!! example ```python from hopsworks import udf - @udf(float) - def add_one(data1): - return data1 + 1 + @udf(float, drop=["data2"], mode="python") + def add(data1, data2): + return data1 + data2 ``` # Arguments - return_type: `Union[List[type], type]`. The output types of the defined UDF - drop: `Optional[Union[str, List[str]]]`. The features to be dropped after application of transformation functions. Default's to None. - mode: `Literal["default", "python", "pandas"]`. The exection mode of the UDF. Default's to 'default' + return_type: The type of the output features returned from the the transformation function. + drop: List that contains the names of the arguments to be removed form the dataframe after the application of the transformation function. The features mapped to these arguments are removed from the final output dataframe after all transformation functions are executed. + mode: The execution mode of the UDF determines the way in which the UDF is executed during training dataset generation, batch data retrieval and online feature vector retrieval. Currently Hopsworks supports three execution modes for a transformation function: + - `default` - This mode executes the transformation function as a Pandas UDF during training dataset generation or batch inference and as a Python UDF during feature vector retrieval. + - `pandas` - This mode would always execute the transformation function as a Pandas UDF. + - `python` - This mode would always execute the transformation function as a Python UDF. + Default's to 'default'. # Returns - `HopsworksUdf`: The metadata object for hopsworks UDF's. + `HopsworksUdf`: The metadata object that contains all information to execute the UDF in both python and spark engines. # Raises - `hopsworks.client.exceptions.FeatureStoreException` : If unable to create UDF. + `hopsworks.client.exceptions.FeatureStoreException` : If unable to create the UDF. """ def wrapper(func: Callable) -> HopsworksUdf: @@ -147,21 +154,23 @@ class HopsworksUdf: """ Meta data for user defined functions. - Stores meta data required to execute the user defined function in both spark and python engine. - The class generates uses the metadata to dynamically generate user defined functions based on the - engine it is executed in. + This class stores all the meta data required to execute the user defined function in both spark and python engine. + This metadata is used to generate the source code to execute the function in both python and spark. + + !!! Note + UDFs in Hopsworks should only be created using the `udf` decorator. It should not be directly created by using the constructor. # Arguments func : `Union[Callable, str]`. The transformation function object or the source code of the transformation function. - return_types : `Union[List[type], type, List[str], str]`. A python type or a list of python types that denotes the data types of the columns output from the transformation functions. - name : `Optional[str]`. Name of the transformation function. - transformation_features : `Optional[List[TransformationFeature]]`. A list of objects of `TransformationFeature` that maps the feature used for transformation to their corresponding statistics argument names if any - transformation_function_argument_names : `Optional[List[str]]`. The argument names of the transformation function. + return_types : `Union[List[type], type, List[str], str]`. A python type or a list of python types that denotes the data types of the columns returned from the UDF. + name : `Optional[str]`. Name of the UDF. + transformation_features : `Optional[List[TransformationFeature]]`. A list of objects of `TransformationFeature` that maps the argument names of the UDF to the feature names that are passed as input to it when the UDF is executed. + transformation_function_argument_names : `Optional[List[str]]`. The name of the arguments used by the UDF. dropped_argument_names : `Optional[List[str]]`. The arguments to be dropped from the finial DataFrame after the transformation functions are applied. - dropped_feature_names : `Optional[List[str]]`. The feature name corresponding to the arguments names that are dropped - feature_name_prefix: `Optional[str]`. Prefixes if any used in the feature view. - output_column_names: `Optional[List[str]]`. The names of the output columns returned from the transformation function. - generate_output_col_names: `bool`. Generate default output column names for the transformation function. Default's to True. + dropped_feature_names : `Optional[List[str]]`. The feature name corresponding to the arguments names that are dropped. + feature_name_prefix: `Optional[str]`. Prefixes if any used in the feature view. These are required if an on-demand feature from a feature group is added to a feature view using a prefix. + output_column_names: `Optional[List[str]]`. The names of the output columns returned from the UDF. + generate_output_col_names: `bool`. Generate default output column names for the UDF. Default's to True. """ # Mapping for converting python types to spark types - required for creating pandas UDF's. @@ -697,9 +706,11 @@ def renaming_wrapper(*args): # returning executed function object return eval("renaming_wrapper", scope) - def __call__(self, *features: List[str]) -> "HopsworksUdf": + def __call__(self, *features: List[str]) -> HopsworksUdf: """ - Set features to be passed as arguments to the user defined functions + Set features to be passed as arguments to the user defined functions. + + This function maps the UDF argument names to the passed feature names. When executing the UDF, Hopsworks extracts the data corresponding to these features and passes them to the UDF. # Arguments features: Name of features to be passed to the User Defined function @@ -745,9 +756,16 @@ def __call__(self, *features: List[str]) -> "HopsworksUdf": udf.dropped_features = updated_dropped_features return udf - def alias(self, *args: str): + def alias(self, *args: str) -> HopsworksUdf: """ Set the names of the transformed features output by the UDF. + + # Arguments + args: `str`. The names of the output features returned from the UDF. + # Returns + `HopsworksUdf`: The UDF object with the updated output feature names. + # Raises + `FeatureStoreException`: If the output feature names are not strings or if the number of output feature names does not match the number of features returned by the UDF. """ if len(args) == 1 and isinstance(args[0], list): # If a single list is passed, use it directly @@ -893,8 +911,8 @@ def json(self) -> str: @classmethod def from_response_json( - cls: "HopsworksUdf", json_dict: Dict[str, Any] - ) -> "HopsworksUdf": + cls: HopsworksUdf, json_dict: Dict[str, Any] + ) -> HopsworksUdf: """ Function that constructs the class object from its json serialization. @@ -1000,7 +1018,9 @@ def from_response_json( @property def return_types(self) -> List[str]: - """Get the output types of the UDF""" + """ + Types of the output features returned by the UDF. These return types is used to execute the UDF as a PandasUDF or Udf in the Spark Engine. + """ # Update the number of outputs for one hot encoder to match the number of unique values for the feature if self.function_name == "one_hot_encoder" and self.transformation_statistics: self.update_return_type_one_hot() @@ -1008,24 +1028,31 @@ def return_types(self) -> List[str]: @property def function_name(self) -> str: - """Get the function name of the UDF""" + """The name of the UDF.""" return self._function_name @property def statistics_required(self) -> bool: - """Get if statistics for any feature is required by the UDF""" + """ + True if any features requires training dataset statistics in the defined UDF. + + Arguments in an UDF can request training dataset statistics for it to be computed by setting the default value for the argument `statistics` to an instance of `TransformationStatistics` when defining a UDF. + """ return bool(self.statistics_features) @property def transformation_statistics( self, ) -> Optional[TransformationStatistics]: - """Feature statistics required for the defined UDF""" + """ + The statistics being used when executing the transformation function. + This property is set internally by Hopsworks when generating training datasets and when a feature view is initialized for serving using the `init_serving` function. + """ return self._statistics @property def output_column_names(self) -> List[str]: - """Output columns names of the transformation function""" + """The names of the features output by the UDF. The dataframe output after applying the transformation function will use these names to store the output columns.""" if self._feature_name_prefix: return [ self._feature_name_prefix + output_col_name @@ -1037,7 +1064,10 @@ def output_column_names(self) -> List[str]: @property def transformation_features(self) -> List[str]: """ - List of feature names to be used in the User Defined Function. + The names of the feature whose data should be passed as input to the UDF. When executing the UDF, the data corresponding to these features are extracted and passed to the UDF. + + By default, Hopsworks would use the name of the UDF's arguments as the feature names to be passed as input to the transformation function. + However this behaviour can be over ridden by explicitly passed the required feature name by calling the UDF. """ if self._feature_name_prefix: return [ @@ -1054,7 +1084,7 @@ def transformation_features(self) -> List[str]: @property def unprefixed_transformation_features(self) -> List[str]: """ - List of feature name used in the transformation function without the feature name prefix. + The unprefixed names of the feature whose data should be passed as input to the UDF. """ return [ transformation_feature.feature_name @@ -1063,13 +1093,19 @@ def unprefixed_transformation_features(self) -> List[str]: @property def feature_name_prefix(self) -> Optional[str]: - """The feature name prefix that needs to be added to the feature names""" + """ + The prefix that is applied to input and output feature names of a transformation function. + + A prefix is set internally by Hopsworks if a on-demand feature from a feature group is added to a feature view using a prefix. + """ return self._feature_name_prefix @property def statistics_features(self) -> List[str]: """ - List of feature names that require statistics + List of feature names that requires the training statistics. + + Arguments in an UDF can request training dataset statistics for it to be computed by setting the default value for the argument `statistics` to an instance of `TransformationStatistics` when defining a UDF. """ return [ transformation_feature.feature_name @@ -1090,7 +1126,7 @@ def _statistics_argument_mapping(self) -> Dict[str, str]: @property def _statistics_argument_names(self) -> List[str]: """ - List of argument names required for statistics + List of argument names required for statistics. """ return [ transformation_feature.statistic_argument_name @@ -1101,7 +1137,7 @@ def _statistics_argument_names(self) -> List[str]: @property def dropped_features(self) -> List[str]: """ - List of features that will be dropped after the UDF is applied. + List of features that will be dropped from output dataframe. These feature are dropped from the final output dataframe after all UDFs in the feature view/feature group are executed. """ if self._feature_name_prefix and self._dropped_features: return [ @@ -1113,6 +1149,9 @@ def dropped_features(self) -> List[str]: @property def execution_mode(self) -> UDFExecutionMode: + """ + The execution modes for a transformation function. The execution mode specifies the way in which a transformation function is executed in Hopsworks. + """ return self._execution_mode @property diff --git a/python/hsfs/transformation_function.py b/python/hsfs/transformation_function.py index d0fc41d13..9c4466377 100644 --- a/python/hsfs/transformation_function.py +++ b/python/hsfs/transformation_function.py @@ -50,7 +50,10 @@ class TransformationType(Enum): class TransformationFunction: NOT_FOUND_ERROR_CODE = 270160 """ - DTO class for transformation functions. + DTO class for transformation functions, that is used to store and retrieve transformation functions from the backend. + A transformation function is created in Hopsworks in two cases + - When a UDF created using the `@udf` decorator is attached to a feature view/feature group. + - If a transformation function is fetched or saved from the backend. # Arguments featurestore_id : `int`. Id of the feature store in which the transformation function is saved. @@ -165,7 +168,7 @@ def __call__(self, *features: List[str]) -> TransformationFunction: # Arguments features: `List[str]`. Name of features to be passed to the User Defined function # Returns - `HopsworksUdf`: Meta data class for the user defined function. + `TransformationFunction`: Transformation function object, with the updated input features names. # Raises `hopsworks.client.exceptions.FeatureStoreException`: If the provided number of features do not match the number of arguments in the defined UDF or if the provided feature names are not strings. """ @@ -256,6 +259,11 @@ def to_dict(self) -> Dict[str, Any]: def alias(self, *args: str): """ Set the names of the transformed features output by the transformation function. + + # Arguments + args: `str`. The names of the output features returned from the UDF. + # Returns + `TransformationFunction`: The transformation function object with the updated output feature names. """ self.__hopsworks_udf.alias(*args) @@ -343,8 +351,8 @@ def _validate_transformation_type( ) @property - def id(self) -> id: - """Transformation function id.""" + def id(self) -> int: + """The id for the transformation function that can be used to uniquely identify it in the backend.""" return self._id @id.setter @@ -353,7 +361,10 @@ def id(self, id: int) -> None: @property def version(self) -> int: - """Version of the transformation function.""" + """The version number of the transformation function. + Multiple versions of a transformation function can be created and saved in the backend by using the `feature_store.create_transformation_function` method. + These can be retrieved by using the `feature_store.get_transformation_function` method. + """ return self._version @version.setter @@ -362,7 +373,7 @@ def version(self, version: int) -> None: @property def hopsworks_udf(self) -> HopsworksUdf: - """Meta data class for the user defined transformation function.""" + """The metadata object for a "User Defined Function" (UDF) that contains all information about a UDF like the function name, return type, arguments, output column names, and more.""" # Make sure that the output column names for a model-dependent or on-demand transformation function, when accessed externally from the class. if ( self.transformation_type @@ -385,7 +396,7 @@ def transformation_type(self, transformation_type) -> None: def transformation_statistics( self, ) -> Optional[TransformationStatistics]: - """Feature statistics required for the defined UDF""" + """Object that store the statistics for all features that require training dataset statistics in a transformation function.""" return self.__hopsworks_udf.transformation_statistics @transformation_statistics.setter diff --git a/python/hsfs/transformation_statistics.py b/python/hsfs/transformation_statistics.py index bff42c384..5e6a0658d 100644 --- a/python/hsfs/transformation_statistics.py +++ b/python/hsfs/transformation_statistics.py @@ -17,38 +17,17 @@ from __future__ import annotations import json -from dataclasses import dataclass -from typing import Any, Dict, Mapping, Optional, Union +from typing import Any, Dict, List, Mapping, Optional, Union import humps -@dataclass class FeatureTransformationStatistics: """ - Data class that contains all the statistics parameters that can be used for transformations inside a custom transformation function. - """ + Class that stores the statistics for a single feature that is used inside a transformation function. - feature_name: str - count: int = None - # for any feature type - completeness: Optional[float] = None - num_non_null_values: Optional[int] = None - num_null_values: Optional[int] = None - approx_num_distinct_values: Optional[int] = None - # for numerical features - min: Optional[float] = None - max: Optional[float] = None - sum: Optional[float] = None - mean: Optional[float] = None - stddev: Optional[float] = None - percentiles: Optional[Mapping[str, float]] = None - # with exact uniqueness - distinctness: Optional[float] = None - entropy: Optional[float] = None - uniqueness: Optional[float] = None - exact_num_distinct_values: Optional[int] = None - extended_statistics: Optional[Union[dict, str]] = None + These statistics are computed when a training dataset is generated from a feature view. + """ def __init__( self, @@ -100,67 +79,72 @@ def __init__( @property def feature_name(self) -> str: - """Name of the feature.""" + """The name of the feature that is associated with the statistics.""" return self._feature_name @property def count(self) -> int: - """Number of values.""" + """The count that denotes the number of values of the feature in the training data.""" return self._count @property def completeness(self) -> Optional[float]: - """Fraction of non-null values in a column.""" + """The fraction of non-null values for the feature in the training data.""" return self._completeness @property def num_non_null_values(self) -> Optional[int]: - """Number of non-null values.""" + """The count that denotes the number of non-null values for the feature in the training data.""" return self._num_non_null_values @property def num_null_values(self) -> Optional[int]: - """Number of null values.""" + """Number of null values of the feature in the training data.""" return self._num_null_values @property def approx_num_distinct_values(self) -> Optional[int]: - """Approximate number of distinct values.""" + """Approximate number of distinct values of the feature in the training data.""" return self._approx_num_distinct_values @property def min(self) -> Optional[float]: - """Minimum value.""" + """The minimum value of the feature in the training data.""" return self._min @property def max(self) -> Optional[float]: - """Maximum value.""" + """The maximum value of the feature in the training data.""" return self._max @property def sum(self) -> Optional[float]: - """Sum of all feature values.""" + """The sum of all values of the feature in the training data.""" return self._sum @property def mean(self) -> Optional[float]: - """Mean value.""" + """The average value of all the values of the feature in the training data.""" return self._mean @property def stddev(self) -> Optional[float]: - """Standard deviation of the feature values.""" + """Standard deviation of all the values of the feature in the training data.""" return self._stddev @property - def percentiles(self) -> Optional[Mapping[str, float]]: - """Percentiles.""" + def percentiles(self) -> Optional[List[float]]: + """Percentiles of the feature in the training data. Percentiles are the values below which a given percentage of data points in a dataset fall. + + The percentiles are stored as a list of 100 values, one for each percentile. + !!! note "Example" + The 50th percentile is the value below which 50% of the data points in the training data falls is in the 49th index of the list. + """ return self._percentiles @property def distinctness(self) -> Optional[float]: - """Fraction of distinct values of a feature over the number of all its values. Distinct values occur at least once. + """Fraction of distinct values over the total number of values of the feature in the training data. Distinct values occur at least once. !!! note "Example" $[a, a, b]$ contains two distinct values $a$ and $b$, so distinctness is $2/3$. @@ -169,9 +153,10 @@ def distinctness(self) -> Optional[float]: @property def entropy(self) -> Optional[float]: - """Entropy is a measure of the level of information contained in an event (feature value) when considering all possible events (all feature values). - Entropy is estimated using observed value counts as the negative sum of (value_count/total_count) * log(value_count/total_count). + """Entropy of the feature in the training data. + Entropy is a measure of the level of information contained in an event (feature value) when considering all possible events (all feature values). + Entropy is estimated using observed value counts as the negative sum of (value_count/total_count) * log(value_count/total_count). !!! note "Example" $[a, b, b, c, c]$ has three distinct values with counts $[1, 2, 2]$. @@ -181,7 +166,7 @@ def entropy(self) -> Optional[float]: @property def uniqueness(self) -> Optional[float]: - """Fraction of unique values over the number of all values of a column. Unique values occur exactly once. + """Fraction of unique values over the total number of values of the feature in the training data. Unique values occur exactly once. !!! note "Example" $[a, a, b]$ contains one unique value $b$, so uniqueness is $1/3$. @@ -190,27 +175,27 @@ def uniqueness(self) -> Optional[float]: @property def exact_num_distinct_values(self) -> Optional[int]: - """Exact number of distinct values.""" + """Exact number of distinct values of the feature in the training data.""" return self._exact_num_distinct_values @property def correlations(self) -> Optional[dict]: - """Correlations of feature values.""" + """The Correlations of feature values with other features in the training data.""" return self._correlations @property def histogram(self) -> Optional[dict]: - """Histogram of feature values.""" + """Histogram of all the values of the feature in the training data.""" return self._histogram @property def kll(self) -> Optional[dict]: - """KLL of feature values.""" + """KLL of all the values of the feature in the training data.""" return self._kll @property def unique_values(self) -> Optional[dict]: - """Number of Unique Values.""" + """Number of unique values of the feature in the training data.""" return self._unique_values @classmethod @@ -248,9 +233,31 @@ def __init__(self, *features: str): ) def init_statistics(self, feature_name: str) -> FeatureTransformationStatistics: + """ + Initializes the FeatureTransformationStatistics object for a specific feature with default values. + + # Arguments + feature_name: `str`. + The name of the feature for which the statistics are to be initialized. + + # Returns + `FeatureTransformationStatistics`. + The FeatureTransformationStatistics object for the feature. + """ return FeatureTransformationStatistics(feature_name=feature_name) def set_statistics(self, feature_name: str, statistics: Dict[str, Any]) -> None: + """ + Sets the statistics for a specific feature. + + This function is used to overwrite the default values of the FeatureTransformationStatistics object for a specific feature with the values computed from the training data. + + # Arguments + feature_name: `str`. + The name of the feature for which the statistics are to be set. + statistics: `Dict[str, Any]`. + The statistics for the feature. + """ self.__dict__[feature_name] = ( FeatureTransformationStatistics.from_response_json(statistics) )