From 61eb5582c192366db05fdc13a4c65f8a69a177c3 Mon Sep 17 00:00:00 2001 From: EvgeniyS99 Date: Fri, 2 Jun 2023 15:02:30 +0000 Subject: [PATCH 01/17] Change config --- batchflow/config.py | 395 ++++++++++++++++++++++---------------------- 1 file changed, 197 insertions(+), 198 deletions(-) diff --git a/batchflow/config.py b/batchflow/config.py index b4a606dbe..b49404d34 100644 --- a/batchflow/config.py +++ b/batchflow/config.py @@ -1,20 +1,18 @@ """ Config class""" -from pprint import pformat +from pathlib import Path import numpy as np +class IAddDict(dict): + """ dict that supports update via += """ + def __iadd__(self, other): + if isinstance(other, dict): + self.update(other) + else: + raise TypeError(f"unsupported operand type(s) for +=: 'IAddDict' and '{type(other)}'") + return self -class Config: +class Config(dict): """ Class for configs that can be represented as nested dicts with easy indexing by slashes """ - - class IAddDict(dict): - """ dict that supports update via += """ - def __iadd__(self, other): - if isinstance(other, dict): - self.update(other) - else: - raise TypeError(f"unsupported operand type(s) for +=: 'IAddDict' and '{type(other)}'") - return self - def __init__(self, config=None, **kwargs): """ Create Config @@ -23,45 +21,94 @@ def __init__(self, config=None, **kwargs): config : dict, Config or None an object to initialize Config if dict, all keys and values slashes will be parsed into nested structure of dicts - and the resulting dictionary will be saved into self.config - if an instance on Config, config.config will be saved to self.config (not a copy!) + and the resulting dictionary will be saved into self + if an instance on Config, config will be saved to self if None, empty dictionary will be created kwargs : - parameters from kwargs also will be parsed and saved into self.config + parameters from kwargs also will be parsed and saved into self """ if config is None: - self.config = Config.IAddDict() - elif isinstance(config, (dict, list)): - self.config = self.parse(config) + pass elif isinstance(config, Config): - self.config = config.config + super().__init__(config) + elif isinstance(config, (dict, list)): + self.parse(config) else: raise TypeError(f'config must be dict, Config or list but {type(config)} was given') for key, value in kwargs.items(): self.put(key, value) - def pop(self, variables, config=None, **kwargs): - """ Returns variables and remove them from config + def parse(self, config): + """ Parses flatten config with slashes Parameters ---------- - variables : str or list of strs - names of variables. '/' is used to get value from nested dict - config : dict, Config or None - if None, variables will be getted from self.config else from config + config : dict, Config or list Returns ------- - single value or a tuple + new_config : dict """ - if isinstance(config, Config): - value = config.pop(variables, None, **kwargs) + if isinstance(config, dict): + items = config.items() + elif isinstance(config, list): + items = config + if np.any([len(item) != 2 for item in items]): + raise ValueError('tuples in list should represent pairs key-value' + ', and therefore must be always the length of 2') + + for key, value in items: + if not isinstance(key, (str, Path)): + raise TypeError(f'only str and Path keys are supported, "{str(key)}" is of {type(key)} type') + + if isinstance(key, str): + key = '/'.join(filter(None, key.split('/'))) + + self.put(key, value) + + return self + + def put(self, key, value): + """ Put a new variable into config + + Parameters + ---------- + key : str, Path + key to add. '/' is used to put value into nested dict + value : masc + """ + if not isinstance(value, Config) and isinstance(value, dict): + value = Config(value) + + if isinstance(key, str) and '/' in key: + keys = key.split('/') + prefix = keys[:-1] + var_name = keys[-1] + + config = self + for i, p in enumerate(prefix): + if p not in config: + config[p] = {} + if isinstance(config[p], dict): + config = config[p] + else: + value = Config({'/'.join(prefix[i+1:] + [var_name]): value}) + var_name = p + break + + if var_name in config and isinstance(config[var_name], dict) and isinstance(value, Config): + config[var_name].update(value) + else: + config[var_name] = value + else: - value = self._get(variables, config, pop=True, **kwargs) - return value + if key in self and isinstance(self[key], dict) and isinstance(value, Config): + self[key].update(value) + else: + super().__setitem__(key, value) - def get(self, variables, default=None, config=None): + def get(self, variables, config=None, default=None): """ Returns variables from config Parameters @@ -78,14 +125,12 @@ def get(self, variables, default=None, config=None): single value or a tuple """ if isinstance(config, Config): - val = config.get(variables, default=default) + value = config.get(variables, default=default) else: - val = self._get(variables, config, default=default, pop=False) - return val + value = self._get(variables, config=config, default=default) + return value def _get(self, variables, config=None, **kwargs): - if config is None: - config = self.config pop = kwargs.get('pop', False) has_default = 'default' in kwargs default = kwargs.get('default') @@ -97,123 +142,101 @@ def _get(self, variables, config=None, **kwargs): ret_vars = [] for variable in variables: - _config = config - if '/' in variable: - var = variable.split('/') - prefix = var[:-1] - var_name = var[-1] - else: - prefix = [] - var_name = variable + if isinstance(variable, str) and '/' in variable: + keys = variable.split('/') + prefix = keys[:-1] + var_name = keys[-1] + + _config = self if config is None else config + for p in prefix: + if p in _config: + _config = _config[p] + else: + _config = None + break - for p in prefix: - if p in _config: - _config = _config[p] - else: - _config = None - break - if isinstance(_config, dict): - if pop: - if has_default: - val = _config.pop(var_name, default) + if isinstance(_config, dict): + if pop: + value = _config.pop(var_name) else: - val = _config.pop(var_name) + value = _config[var_name] else: if has_default: - val = _config.get(var_name, default) + value = default else: - val = _config[var_name] + raise KeyError(f"Key '{variable}' not found") + else: - if has_default: - val = default - else: - raise KeyError(f"Key '{variable}' not found") + _config = self if config is None else config + value = self._get_var_from_object(variable, has_default, default, pop, _config) - val = Config(val) if isinstance(val, (dict, Config.IAddDict)) else val - ret_vars.append(val) + ret_vars.append(value) if unpack: ret_vars = ret_vars[0] else: ret_vars = tuple(ret_vars) + return ret_vars - def put(self, variable, value, config=None): - """ Put a new variable into config + def _get_var_from_object(self, variable, has_default, default, pop, config): + """ Get variable from the object. + The object can be either Config or dict. + If dict, the parent methods will be used. + """ + if isinstance(config, Config): + obj = super() + else: + obj = config + + if variable in config: + value = obj.pop(variable) if pop else obj.__getitem__(variable) + else: + if has_default: + value = obj.pop(variable, default) if pop else obj.get(variable, default) + value = Config(value) if isinstance(value, dict) and len(value) > 0 else value + else: + raise KeyError(f"Key '{variable}' not found") + + return value + + def update(self, other, **kwargs): + """ Update config with values from other Parameters ---------- - variable : str - variable to add. '/' is used to put value into nested dict - value : masc - config : dict, Config or None - if None value will be putted into self.config else from config + other : dict or Config + + kwargs : + parameters from kwargs also will be included into the resulting config """ - if config is None: - config = self.config - elif isinstance(config, Config): - config = config.config - if isinstance(value, dict): - value = Config(value) - variable = variable.strip('/') - if '/' in variable: - var = variable.split('/') - prefix = var[:-1] - var_name = var[-1] - else: - prefix = [] - var_name = variable - - for i, p in enumerate(prefix): - if p not in config: - config[p] = Config.IAddDict() - if isinstance(config[p], dict): - config = config[p] - else: # for example, we put value with key 'a/b' into `{a: c}` - value = Config({'/'.join(prefix[i+1:] + [var_name]): value}) - var_name = p - break - if var_name in config and isinstance(config[var_name], dict) and isinstance(value, Config): - config[var_name] = Config(config[var_name]) - config[var_name].update(value) - config[var_name] = config[var_name].config + other = {} if other is None else other + if isinstance(other, dict): + for key, value in other.items(): + self.put(key, value) else: - if isinstance(value, Config): - config[var_name] = value.config - else: - config[var_name] = value + for key, value in kwargs.items(): + self.put(key, value) - def parse(self, config): - """ Parses flatten config with slashes + def pop(self, variables, config=None, default=None, **kwargs): + """ Returns variables and remove them from config Parameters ---------- - config : dict, Config or list + variables : str or list of strs + names of variables. '/' is used to get value from nested dict + config : dict, Config or None + if None, variables will be getted from self.config else from config Returns ------- - new_config : dict + single value or a tuple """ if isinstance(config, Config): - return config.config - if isinstance(config, dict): - items = config.items() - elif isinstance(config, list): - items = config - if np.any([len(item) != 2 for item in items]): - raise ValueError('tuples in list should represent pairs key-value' - ', and therefore must be always the length of 2') + value = config.pop(variables, default=default) else: - raise TypeError(f'config must be dict, Config or list but {type(config)} was given') - new_config = Config.IAddDict() - for key, value in items: - if isinstance(value, dict): - value = self.parse(value) - if not isinstance(key, str): - raise TypeError(f'only str keys are supported, "{str(key)}" is of {type(key)} type') - key = '/'.join(filter(None, key.split('/'))) #merge multiple consecutive slashes '/' to one - self.put(key, value, new_config) - return new_config + value = self._get(variables, pop=True, default=default, **kwargs) + return value def flatten(self, config=None): """ Transforms nested dict into flatten dict @@ -227,62 +250,60 @@ def flatten(self, config=None): ------- new_config : dict """ - if config is None: - config = self.config - elif isinstance(config, Config): - config = config.config - new_config = Config.IAddDict() + config = self if config is None else config + + new_config = IAddDict() # Do we really need here IAddDict? for key, value in config.items(): - if isinstance(value, Config): - value = value.config if isinstance(value, dict) and len(value) > 0: value = self.flatten(value) for _key, _value in value.items(): new_config[key+'/'+_key] = _value else: new_config[key] = value + return new_config - def __add__(self, other): + def __getattr__(self, key): + if key in self: + value = self.get(key) + value = Config(value) if isinstance(value, dict) else value + return value + raise AttributeError(key) + + def __iadd__(self, other): if isinstance(other, dict): + self.update(other) + else: + raise TypeError(f"unsupported operand type(s) for +=: 'Config' and '{type(other)}'") + return self + + def __add__(self, other): + if isinstance(other, dict) and not isinstance(other, Config): other = Config(other) if isinstance(other, Config): return Config([*self.flatten().items(), *other.flatten().items()]) return NotImplemented def __radd__(self, other): - if isinstance(other, dict): + if isinstance(other, dict) and not isinstance(other, Config): other = Config(other) return other.__add__(self) - def __getitem__(self, key): - value = self._get(key) - return value - def __setitem__(self, key, value): self.pop(key, default=None) self.put(key, value) + def __getitem__(self, key): + value = self._get(key) + return value + def __delitem__(self, key): self.pop(key) - def __getattr__(self, key): - if key in self.config: - value = self._get(key) - value = Config(value) if isinstance(value, dict) else value - return value - raise AttributeError(key) - - def __getstate__(self): - """ Must be explicitly defined for pickling to work. """ - return vars(self) - - def __setstate__(self, state): - """ Must be explicitly defined for pickling to work. """ - vars(self).update(state) - - def __len__(self): - return len(self.config) + def __eq__(self, other): + self_ = self.flatten() if isinstance(self, Config) else self + other_ = Config(other).flatten() if isinstance(other, dict) and not isinstance(other, Config) else other + return self_.__eq__(other_) def __rshift__(self, other): """ @@ -297,28 +318,17 @@ def __rshift__(self, other): """ return other << self - def __eq__(self, other): - self_ = self.flatten() if isinstance(self, Config) else self - other_ = Config(other).flatten() if isinstance(other, (dict, Config)) else other - return self_.__eq__(other_) - - def items(self, flatten=False): - """ Returns config items + def copy(self): + """ Create a shallow copy of the instance. """ + return Config(super().copy()) - Parameters - ---------- - flatten : bool - if False, keys and values will be getted from first level of nested dict, else from the last + def __getstate__(self): + """ Must be explicitly defined for pickling to work. """ + return vars(self) - Returns - ------- - dict_items - """ - if flatten: - items = self.flatten().items() - else: - items = self.config.items() - return items + def __setstate__(self, state): + """ Must be explicitly defined for pickling to work. """ + vars(self).update(state) def keys(self, flatten=False): """ Returns config keys @@ -335,7 +345,7 @@ def keys(self, flatten=False): if flatten: keys = self.flatten().keys() else: - keys = self.config.keys() + keys = super().keys() return keys def values(self, flatten=False): @@ -353,34 +363,23 @@ def values(self, flatten=False): if flatten: values = self.flatten().values() else: - values = self.config.values() + values = super().values() return values - def update(self, other=None, **kwargs): - """ Update config with values from other + def items(self, flatten=False): + """ Returns config items Parameters ---------- - other : dict or Config + flatten : bool + if False, keys and values will be getted from first level of nested dict, else from the last - kwargs : - parameters from kwargs also will be included into the resulting config + Returns + ------- + dict_items """ - other = {} if other is None else other - if isinstance(other, (dict, Config)): - for key, value in other.items(): - self.put(key, value) + if flatten: + items = self.flatten().items() else: - for key, value in kwargs.items(): - self.put(key, value) - - def copy(self): - """ Create a shallow copy of the instance. """ - return Config(self.config.copy()) - - def __iter__(self): - return iter(self.config) - - def __repr__(self): - lines = ['\n' + 4 * ' ' + line for line in pformat(self.config).split('\n')] - return f"Config({''.join(lines)})" + items = super().items() + return items \ No newline at end of file From 88d2897b3cf40406ca916c6eb4339257fb567a8d Mon Sep 17 00:00:00 2001 From: EvgeniyS99 Date: Fri, 16 Jun 2023 08:59:41 +0000 Subject: [PATCH 02/17] New version of config --- batchflow/config.py | 265 +++++++++++++++++--------------------------- 1 file changed, 100 insertions(+), 165 deletions(-) diff --git a/batchflow/config.py b/batchflow/config.py index b49404d34..ea409e312 100644 --- a/batchflow/config.py +++ b/batchflow/config.py @@ -1,15 +1,4 @@ """ Config class""" -from pathlib import Path -import numpy as np - -class IAddDict(dict): - """ dict that supports update via += """ - def __iadd__(self, other): - if isinstance(other, dict): - self.update(other) - else: - raise TypeError(f"unsupported operand type(s) for +=: 'IAddDict' and '{type(other)}'") - return self class Config(dict): """ Class for configs that can be represented as nested dicts with easy indexing by slashes """ @@ -18,10 +7,11 @@ def __init__(self, config=None, **kwargs): Parameters ---------- - config : dict, Config or None + config : dict, Config, list, tuple or None an object to initialize Config if dict, all keys and values slashes will be parsed into nested structure of dicts and the resulting dictionary will be saved into self + if list or tuple, should contain key-value pairs with the length of 2 if an instance on Config, config will be saved to self if None, empty dictionary will be created kwargs : @@ -31,175 +21,160 @@ def __init__(self, config=None, **kwargs): pass elif isinstance(config, Config): super().__init__(config) - elif isinstance(config, (dict, list)): + elif isinstance(config, (dict, list, tuple)): self.parse(config) - else: - raise TypeError(f'config must be dict, Config or list but {type(config)} was given') - + for key, value in kwargs.items(): self.put(key, value) - + def parse(self, config): """ Parses flatten config with slashes Parameters ---------- - config : dict, Config or list + config : dict, Config, list or tuple Returns ------- - new_config : dict + self : dict """ if isinstance(config, dict): items = config.items() - elif isinstance(config, list): + elif isinstance(config, (tuple, list)): items = config - if np.any([len(item) != 2 for item in items]): + if any([not isinstance(item, (tuple, list)) for item in items]): + raise ValueError('tuple or list should contain only tuples or lists') + if any([len(item) != 2 for item in items]): raise ValueError('tuples in list should represent pairs key-value' ', and therefore must be always the length of 2') for key, value in items: - if not isinstance(key, (str, Path)): - raise TypeError(f'only str and Path keys are supported, "{str(key)}" is of {type(key)} type') - if isinstance(key, str): key = '/'.join(filter(None, key.split('/'))) - self.put(key, value) return self - + def put(self, key, value): - """ Put a new variable into config + """ Put a new key into config Parameters ---------- - key : str, Path + key : hashable object key to add. '/' is used to put value into nested dict value : masc """ - if not isinstance(value, Config) and isinstance(value, dict): - value = Config(value) - if isinstance(key, str) and '/' in key: - keys = key.split('/') - prefix = keys[:-1] - var_name = keys[-1] - config = self - for i, p in enumerate(prefix): - if p not in config: - config[p] = {} - if isinstance(config[p], dict): - config = config[p] - else: - value = Config({'/'.join(prefix[i+1:] + [var_name]): value}) - var_name = p - break + parent, child = key.split('/', 1) - if var_name in config and isinstance(config[var_name], dict) and isinstance(value, Config): - config[var_name].update(value) + if parent in config and isinstance(config[parent], Config): + config[parent].update(Config({child: value})) else: - config[var_name] = value + config[parent] = Config({child: value}) else: - if key in self and isinstance(self[key], dict) and isinstance(value, Config): - self[key].update(value) + if key in self and isinstance(self[key], dict) and isinstance(value, dict): + self[key].update(Config(value)) else: super().__setitem__(key, value) - def get(self, variables, config=None, default=None): - """ Returns variables from config + def __getitem__(self, key): + value = self._get(key) + return value + + def __setitem__(self, key, value): + if key in self: + self.pop(key) + self.put(key, value) + + def get(self, key, default=None): + """ Returns the value or tuple of values for key from config Parameters ---------- - variables : str or list of str or tuple of str - names of variables. '/' is used to get value from nested dict. - config : dict, Config or None - if None variables will be getted from self.config else from config + key : str or list of hashable objects + '/' is used to get value from nested dict. default : masc - default value if variable doesn't exist in config + default value if key doesn't exist in config Returns ------- single value or a tuple """ - if isinstance(config, Config): - value = config.get(variables, default=default) - else: - value = self._get(variables, config=config, default=default) + value = self._get(key, default=default) return value + + def pop(self, key, **kwargs): + """ Returns the value or tuple of values for key and remove them from config + + Parameters + ---------- + key : str or list of hashable objects + '/' is used to get value from nested dict + + Returns + ------- + single value or a tuple + """ + value = self._get(key, pop=True, **kwargs) + return value + + def _get(self, key, **kwargs): - def _get(self, variables, config=None, **kwargs): pop = kwargs.get('pop', False) - has_default = 'default' in kwargs + has_default = 'default' in kwargs default = kwargs.get('default') + default = Config(default) if isinstance(default, dict) else default + + method = super().get if not pop else super().pop unpack = False - if not isinstance(variables, (list, tuple)): - variables = list([variables]) + if not isinstance(key, list): + key = list([key]) unpack = True ret_vars = [] - for variable in variables: + for variable in key: if isinstance(variable, str) and '/' in variable: keys = variable.split('/') - prefix = keys[:-1] - var_name = keys[-1] - - _config = self if config is None else config - for p in prefix: - if p in _config: - _config = _config[p] + value = self + for k in keys: + if isinstance(value, dict): + parent = value + value = value[k] else: - _config = None - break + if has_default: + return default + raise KeyError(k) + if pop: + del parent[k] - if isinstance(_config, dict): - if pop: - value = _config.pop(var_name) - else: - value = _config[var_name] + else: + if variable in self: + value = method(variable) else: if has_default: - value = default + return default else: - raise KeyError(f"Key '{variable}' not found") - - else: - _config = self if config is None else config - value = self._get_var_from_object(variable, has_default, default, pop, _config) + raise KeyError(variable) ret_vars.append(value) - if unpack: - ret_vars = ret_vars[0] - else: - ret_vars = tuple(ret_vars) + ret_vars = ret_vars[0] if unpack else tuple(ret_vars) return ret_vars - def _get_var_from_object(self, variable, has_default, default, pop, config): - """ Get variable from the object. - The object can be either Config or dict. - If dict, the parent methods will be used. - """ - if isinstance(config, Config): - obj = super() - else: - obj = config - - if variable in config: - value = obj.pop(variable) if pop else obj.__getitem__(variable) - else: - if has_default: - value = obj.pop(variable, default) if pop else obj.get(variable, default) - value = Config(value) if isinstance(value, dict) and len(value) > 0 else value - else: - raise KeyError(f"Key '{variable}' not found") - - return value - + def __delitem__(self, key): + self.pop(key) + + def __getattr__(self, key): + if key in self: + value = self.get(key) + value = Config(value) if isinstance(value, dict) else value + return value + raise AttributeError(key) + def update(self, other, **kwargs): """ Update config with values from other @@ -210,7 +185,6 @@ def update(self, other, **kwargs): kwargs : parameters from kwargs also will be included into the resulting config """ - other = {} if other is None else other if isinstance(other, dict): for key, value in other.items(): self.put(key, value) @@ -218,58 +192,30 @@ def update(self, other, **kwargs): for key, value in kwargs.items(): self.put(key, value) - def pop(self, variables, config=None, default=None, **kwargs): - """ Returns variables and remove them from config - - Parameters - ---------- - variables : str or list of strs - names of variables. '/' is used to get value from nested dict - config : dict, Config or None - if None, variables will be getted from self.config else from config - - Returns - ------- - single value or a tuple - """ - if isinstance(config, Config): - value = config.pop(variables, default=default) - else: - value = self._get(variables, pop=True, default=default, **kwargs) - return value - def flatten(self, config=None): """ Transforms nested dict into flatten dict Parameters ---------- config : dict, Config or None - if None self.config will be parsed else config + if None self will be parsed else config Returns ------- new_config : dict """ config = self if config is None else config - - new_config = IAddDict() # Do we really need here IAddDict? + new_config = {} for key, value in config.items(): if isinstance(value, dict) and len(value) > 0: value = self.flatten(value) for _key, _value in value.items(): - new_config[key+'/'+_key] = _value + new_config[key + '/' + _key] = _value else: new_config[key] = value return new_config - - def __getattr__(self, key): - if key in self: - value = self.get(key) - value = Config(value) if isinstance(value, dict) else value - return value - raise AttributeError(key) - + def __iadd__(self, other): if isinstance(other, dict): self.update(other) @@ -283,28 +229,17 @@ def __add__(self, other): if isinstance(other, Config): return Config([*self.flatten().items(), *other.flatten().items()]) return NotImplemented - + def __radd__(self, other): - if isinstance(other, dict) and not isinstance(other, Config): + if isinstance(other, dict): other = Config(other) return other.__add__(self) - - def __setitem__(self, key, value): - self.pop(key, default=None) - self.put(key, value) - - def __getitem__(self, key): - value = self._get(key) - return value - - def __delitem__(self, key): - self.pop(key) - + def __eq__(self, other): self_ = self.flatten() if isinstance(self, Config) else self - other_ = Config(other).flatten() if isinstance(other, dict) and not isinstance(other, Config) else other + other_ = Config(other).flatten() if isinstance(other, dict) else other return self_.__eq__(other_) - + def __rshift__(self, other): """ Parameters @@ -317,11 +252,7 @@ def __rshift__(self, other): Pipeline object with an updated config """ return other << self - - def copy(self): - """ Create a shallow copy of the instance. """ - return Config(super().copy()) - + def __getstate__(self): """ Must be explicitly defined for pickling to work. """ return vars(self) @@ -329,7 +260,11 @@ def __getstate__(self): def __setstate__(self, state): """ Must be explicitly defined for pickling to work. """ vars(self).update(state) - + + def copy(self): + """ Create a shallow copy of the instance. """ + return Config(super().copy()) + def keys(self, flatten=False): """ Returns config keys From 44269f5efd03f35f2d0893358b8b84b148df53d1 Mon Sep 17 00:00:00 2001 From: EvgeniyS99 Date: Tue, 11 Jul 2023 09:48:32 +0000 Subject: [PATCH 03/17] Add comments and doc --- batchflow/config.py | 43 +++++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/batchflow/config.py b/batchflow/config.py index ea409e312..808780ec4 100644 --- a/batchflow/config.py +++ b/batchflow/config.py @@ -120,9 +120,21 @@ def pop(self, key, **kwargs): value = self._get(key, pop=True, **kwargs) return value - def _get(self, key, **kwargs): - - pop = kwargs.get('pop', False) + def _get(self, key, pop=False, **kwargs): + """ Recursively get values corresponding to key + If key doesn't contain '/', get() or pop() from the `dict` class is invoked + """ + # For example, let d = {'a': {'b': {'c': 30}}}. If + # we want to get d['a/b'], the __getitem__ method will invoke + # this method. + # keys = ['a', 'b'] + # value = self (value = {'a': {'b': {'c': 30}}}) + # k = 'a': + # Then parent starts to link to this value, i.e., parent = {'a': {'b': {'c': 30}}} + # Then we get value for 'a', so a new value starts to link to the {'b': {'c': 30}} + # k = 'b': + # Then parent starts to link to the new value, i.e., parent = {'b': {'c': 30}} + # Then we get value for 'b', so a new value starts to link to the {'c': 30} has_default = 'default' in kwargs default = kwargs.get('default') default = Config(default) if isinstance(default, dict) else default @@ -131,33 +143,35 @@ def _get(self, key, **kwargs): unpack = False if not isinstance(key, list): - key = list([key]) + key = [key] unpack = True ret_vars = [] for variable in key: if isinstance(variable, str) and '/' in variable: keys = variable.split('/') - value = self + value = self # value starts to link to self which is original dict for k in keys: if isinstance(value, dict): - parent = value - value = value[k] + parent = value # parent starts to link to value + value = value[k] # this invokes the __getitem__ method and returns the value corresponding to the k, + # value starts to link to the dict inside the previous dict + + # if we want to get, for example, 'a/b/c' from {'a': {'b': 30}} else: if has_default: return default raise KeyError(k) if pop: del parent[k] - + else: if variable in self: value = method(variable) else: if has_default: return default - else: - raise KeyError(variable) + raise KeyError(variable) ret_vars.append(value) @@ -185,12 +199,9 @@ def update(self, other, **kwargs): kwargs : parameters from kwargs also will be included into the resulting config """ - if isinstance(other, dict): - for key, value in other.items(): - self.put(key, value) - else: - for key, value in kwargs.items(): - self.put(key, value) + iterable = other if isinstance(other, dict) else kwargs + for key, value in iterable.items(): + self.put(key, value) def flatten(self, config=None): """ Transforms nested dict into flatten dict From 87708da7913b79906c7ed94afae040489fff44dd Mon Sep 17 00:00:00 2001 From: EvgeniyS99 Date: Sun, 30 Jul 2023 11:50:31 +0000 Subject: [PATCH 04/17] Edit docrstring and signature of `pop` and `get` --- batchflow/config.py | 134 +++++++++++++++++++++++++------------------- 1 file changed, 75 insertions(+), 59 deletions(-) diff --git a/batchflow/config.py b/batchflow/config.py index 808780ec4..30e45af0b 100644 --- a/batchflow/config.py +++ b/batchflow/config.py @@ -1,21 +1,27 @@ """ Config class""" class Config(dict): - """ Class for configs that can be represented as nested dicts with easy indexing by slashes """ + """ Class for configs that can be represented as nested dicts with easy indexing by slashes. """ def __init__(self, config=None, **kwargs): """ Create Config Parameters ---------- config : dict, Config, list, tuple or None - an object to initialize Config - if dict, all keys and values slashes will be parsed into nested structure of dicts - and the resulting dictionary will be saved into self - if list or tuple, should contain key-value pairs with the length of 2 - if an instance on Config, config will be saved to self - if None, empty dictionary will be created + An object to initialize Config. + + If dict, all keys with slashes and values are parsed into nested structure of dicts, + and the resulting dictionary is saved to self. + For example, `{'a/b': 1, 'c/d/e': 2}` will be parsed into `{'a': {'b': 1}, 'c': {'d': {'e': 2}}}`. + + If list or tuple, should contain key-value pairs with the length of 2. + For example, `[('a/b', 1), ('c/d/e', 2)]` will be parsed into `{'a': {'b': 1}, 'c': {'d': {'e': 2}}}`. + + If an instance of Config, config is saved to self. + + If None, empty dictionary is created. kwargs : - parameters from kwargs also will be parsed and saved into self + Parameters from kwargs also are parsed and saved to self. """ if config is None: pass @@ -23,12 +29,14 @@ def __init__(self, config=None, **kwargs): super().__init__(config) elif isinstance(config, (dict, list, tuple)): self.parse(config) - + else: + raise TypeError(f'Config must be dict, Config, list or tuple but {type(config)} was given') + for key, value in kwargs.items(): self.put(key, value) - + def parse(self, config): - """ Parses flatten config with slashes + """ Parses flatten config with slashes. Parameters ---------- @@ -43,9 +51,9 @@ def parse(self, config): elif isinstance(config, (tuple, list)): items = config if any([not isinstance(item, (tuple, list)) for item in items]): - raise ValueError('tuple or list should contain only tuples or lists') + raise ValueError('Tuple or list should contain only tuples or lists') if any([len(item) != 2 for item in items]): - raise ValueError('tuples in list should represent pairs key-value' + raise ValueError('Tuples in list should represent pairs key-value' ', and therefore must be always the length of 2') for key, value in items: @@ -54,9 +62,9 @@ def parse(self, config): self.put(key, value) return self - + def put(self, key, value): - """ Put a new key into config + """ Put a new key into config recursively. Parameters ---------- @@ -68,13 +76,13 @@ def put(self, key, value): config = self parent, child = key.split('/', 1) - if parent in config and isinstance(config[parent], Config): - config[parent].update(Config({child: value})) + if parent in config and isinstance(config[parent], Config): # for example, we put value=3 with key='a/c' into the + config[parent].update(Config({child: value})) # config = {'a': {'b': 1}} and want to receive {'a': {'b': 1, 'c': 3}} else: config[parent] = Config({child: value}) else: - if key in self and isinstance(self[key], dict) and isinstance(value, dict): + if key in self and isinstance(self[key], dict) and isinstance(value, dict): self[key].update(Config(value)) else: super().__setitem__(key, value) @@ -84,57 +92,64 @@ def __getitem__(self, key): return value def __setitem__(self, key, value): - if key in self: - self.pop(key) + _ = self.pop(key, None) self.put(key, value) def get(self, key, default=None): - """ Returns the value or tuple of values for key from config + """ Returns the value or tuple of values for key in the config. + If not found, returns a default value. Parameters ---------- key : str or list of hashable objects '/' is used to get value from nested dict. default : masc - default value if key doesn't exist in config + Default value if key doesn't exist in config. + Defaults to None, so that this method never raises a KeyError. Returns ------- - single value or a tuple + Single value or a tuple. """ value = self._get(key, default=default) return value - def pop(self, key, **kwargs): - """ Returns the value or tuple of values for key and remove them from config + def pop(self, key, default=None): + """ Returns the value or tuple of values for key and remove them from config. Parameters ---------- key : str or list of hashable objects - '/' is used to get value from nested dict + '/' is used to get value from nested dict. + default : masc + Default value if key doesn't exist in config. + Defaults to None, so that this method never raises a KeyError. Returns ------- - single value or a tuple + Single value or a tuple """ - value = self._get(key, pop=True, **kwargs) + value = self._get(key, pop=True, default=default) return value - + def _get(self, key, pop=False, **kwargs): - """ Recursively get values corresponding to key - If key doesn't contain '/', get() or pop() from the `dict` class is invoked + """ Recursively retrieve values for a given key if the key contains '/'. + If key doesn't contain '/', get or pop from the `dict` class is invoked. + + Example: + Let d = {'a': {'b': {'c': 30}}}. + If we want to get d['a/b'], __getitem__ method will invoke _get method. + + Given the key='a/b', keys will be ['a', 'b']. + value = self (value = {'a': {'b': {'c': 30}}}). + k = 'a': + Then parent starts to link to this value, i.e., parent = {'a': {'b': {'c': 30}}}; + Then we get value for 'a', so a new value starts to link to the {'b': {'c': 30}}. + k = 'b': + Then parent starts to link to the new value, i.e., parent = {'b': {'c': 30}}; + Then we get value for 'b', so a new value starts to link to the {'c': 30}. + Returns {'c': 30} """ - # For example, let d = {'a': {'b': {'c': 30}}}. If - # we want to get d['a/b'], the __getitem__ method will invoke - # this method. - # keys = ['a', 'b'] - # value = self (value = {'a': {'b': {'c': 30}}}) - # k = 'a': - # Then parent starts to link to this value, i.e., parent = {'a': {'b': {'c': 30}}} - # Then we get value for 'a', so a new value starts to link to the {'b': {'c': 30}} - # k = 'b': - # Then parent starts to link to the new value, i.e., parent = {'b': {'c': 30}} - # Then we get value for 'b', so a new value starts to link to the {'c': 30} has_default = 'default' in kwargs default = kwargs.get('default') default = Config(default) if isinstance(default, dict) else default @@ -149,12 +164,12 @@ def _get(self, key, pop=False, **kwargs): ret_vars = [] for variable in key: if isinstance(variable, str) and '/' in variable: - keys = variable.split('/') + keys = variable.split('/') # split variable='a/b' into ['a', 'b'] value = self # value starts to link to self which is original dict for k in keys: if isinstance(value, dict): parent = value # parent starts to link to value - value = value[k] # this invokes the __getitem__ method and returns the value corresponding to the k, + value = value[k] # this invokes the __getitem__ method again and returns the value corresponding to the k, # value starts to link to the dict inside the previous dict # if we want to get, for example, 'a/b/c' from {'a': {'b': 30}} @@ -164,7 +179,8 @@ def _get(self, key, pop=False, **kwargs): raise KeyError(k) if pop: del parent[k] - + + # use dict's 'get' or 'pop' if '/' not in variable else: if variable in self: value = method(variable) @@ -181,7 +197,7 @@ def _get(self, key, pop=False, **kwargs): def __delitem__(self, key): self.pop(key) - + def __getattr__(self, key): if key in self: value = self.get(key) @@ -190,7 +206,7 @@ def __getattr__(self, key): raise AttributeError(key) def update(self, other, **kwargs): - """ Update config with values from other + """ Update config with values from other. Parameters ---------- @@ -204,12 +220,12 @@ def update(self, other, **kwargs): self.put(key, value) def flatten(self, config=None): - """ Transforms nested dict into flatten dict + """ Transforms nested dict into flatten dict. Parameters ---------- config : dict, Config or None - if None self will be parsed else config + If None self will be parsed else config. Returns ------- @@ -231,7 +247,7 @@ def __iadd__(self, other): if isinstance(other, dict): self.update(other) else: - raise TypeError(f"unsupported operand type(s) for +=: 'Config' and '{type(other)}'") + raise TypeError(f"Unsupported operand type(s) for +=: 'Config' and '{type(other)}'") return self def __add__(self, other): @@ -240,12 +256,12 @@ def __add__(self, other): if isinstance(other, Config): return Config([*self.flatten().items(), *other.flatten().items()]) return NotImplemented - + def __radd__(self, other): if isinstance(other, dict): other = Config(other) return other.__add__(self) - + def __eq__(self, other): self_ = self.flatten() if isinstance(self, Config) else self other_ = Config(other).flatten() if isinstance(other, dict) else other @@ -260,7 +276,7 @@ def __rshift__(self, other): Returns ------- Pipeline - Pipeline object with an updated config + Pipeline object with an updated config. """ return other << self @@ -277,12 +293,12 @@ def copy(self): return Config(super().copy()) def keys(self, flatten=False): - """ Returns config keys + """ Returns config keys. Parameters ---------- flatten : bool - if False, keys will be getted from first level of nested dict, else from the last + if False, keys will be getted from first level of nested dict, else from the last. Returns ------- @@ -295,12 +311,12 @@ def keys(self, flatten=False): return keys def values(self, flatten=False): - """ Return config values + """ Return config values. Parameters ---------- flatten : bool - if False, values will be getted from first level of nested dict, else from the last + if False, values will be getted from first level of nested dict, else from the last. Returns ------- @@ -313,12 +329,12 @@ def values(self, flatten=False): return values def items(self, flatten=False): - """ Returns config items + """ Returns config items. Parameters ---------- flatten : bool - if False, keys and values will be getted from first level of nested dict, else from the last + if False, keys and values will be getted from first level of nested dict, else from the last. Returns ------- From 5e0b98923e16193c805738c9516e4e4c67391dd7 Mon Sep 17 00:00:00 2001 From: EvgeniyS99 Date: Wed, 16 Aug 2023 13:48:53 +0000 Subject: [PATCH 05/17] Edit docstrings --- batchflow/config.py | 173 ++++++++++++++++++++++---------------------- 1 file changed, 88 insertions(+), 85 deletions(-) diff --git a/batchflow/config.py b/batchflow/config.py index 30e45af0b..aaae08624 100644 --- a/batchflow/config.py +++ b/batchflow/config.py @@ -69,8 +69,8 @@ def put(self, key, value): Parameters ---------- key : hashable object - key to add. '/' is used to put value into nested dict - value : masc + Key to add. '/' is used to put value into nested dict. + value : misc """ if isinstance(key, str) and '/' in key: config = self @@ -79,6 +79,9 @@ def put(self, key, value): if parent in config and isinstance(config[parent], Config): # for example, we put value=3 with key='a/c' into the config[parent].update(Config({child: value})) # config = {'a': {'b': 1}} and want to receive {'a': {'b': 1, 'c': 3}} else: + if parent in config: + import warnings + warnings.warn('Note that ...') config[parent] = Config({child: value}) else: @@ -86,15 +89,7 @@ def put(self, key, value): self[key].update(Config(value)) else: super().__setitem__(key, value) - - def __getitem__(self, key): - value = self._get(key) - return value - - def __setitem__(self, key, value): - _ = self.pop(key, None) - self.put(key, value) - + def get(self, key, default=None): """ Returns the value or tuple of values for key in the config. If not found, returns a default value. @@ -102,8 +97,8 @@ def get(self, key, default=None): Parameters ---------- key : str or list of hashable objects - '/' is used to get value from nested dict. - default : masc + A key in the dictionary. '/' is used to get value from nested dict. + default : misc Default value if key doesn't exist in config. Defaults to None, so that this method never raises a KeyError. @@ -114,24 +109,6 @@ def get(self, key, default=None): value = self._get(key, default=default) return value - def pop(self, key, default=None): - """ Returns the value or tuple of values for key and remove them from config. - - Parameters - ---------- - key : str or list of hashable objects - '/' is used to get value from nested dict. - default : masc - Default value if key doesn't exist in config. - Defaults to None, so that this method never raises a KeyError. - - Returns - ------- - Single value or a tuple - """ - value = self._get(key, pop=True, default=default) - return value - def _get(self, key, pop=False, **kwargs): """ Recursively retrieve values for a given key if the key contains '/'. If key doesn't contain '/', get or pop from the `dict` class is invoked. @@ -194,16 +171,24 @@ def _get(self, key, pop=False, **kwargs): ret_vars = ret_vars[0] if unpack else tuple(ret_vars) return ret_vars + + def pop(self, key, default=None): + """ Returns the value or tuple of values for key and remove them from config. - def __delitem__(self, key): - self.pop(key) + Parameters + ---------- + key : str or list of hashable objects + A key in the dictionary. '/' is used to get value from nested dict. + default : misc + Default value if key doesn't exist in config. + Defaults to None, so that this method never raises a KeyError. - def __getattr__(self, key): - if key in self: - value = self.get(key) - value = Config(value) if isinstance(value, dict) else value - return value - raise AttributeError(key) + Returns + ------- + Single value or a tuple + """ + value = self._get(key, pop=True, default=default) + return value def update(self, other, **kwargs): """ Update config with values from other. @@ -243,51 +228,6 @@ def flatten(self, config=None): return new_config - def __iadd__(self, other): - if isinstance(other, dict): - self.update(other) - else: - raise TypeError(f"Unsupported operand type(s) for +=: 'Config' and '{type(other)}'") - return self - - def __add__(self, other): - if isinstance(other, dict) and not isinstance(other, Config): - other = Config(other) - if isinstance(other, Config): - return Config([*self.flatten().items(), *other.flatten().items()]) - return NotImplemented - - def __radd__(self, other): - if isinstance(other, dict): - other = Config(other) - return other.__add__(self) - - def __eq__(self, other): - self_ = self.flatten() if isinstance(self, Config) else self - other_ = Config(other).flatten() if isinstance(other, dict) else other - return self_.__eq__(other_) - - def __rshift__(self, other): - """ - Parameters - ---------- - other : Pipeline - - Returns - ------- - Pipeline - Pipeline object with an updated config. - """ - return other << self - - def __getstate__(self): - """ Must be explicitly defined for pickling to work. """ - return vars(self) - - def __setstate__(self, state): - """ Must be explicitly defined for pickling to work. """ - vars(self).update(state) - def copy(self): """ Create a shallow copy of the instance. """ return Config(super().copy()) @@ -344,4 +284,67 @@ def items(self, flatten=False): items = self.flatten().items() else: items = super().items() - return items \ No newline at end of file + return items + + def __getitem__(self, key): + value = self._get(key) + return value + + def __setitem__(self, key, value): + _ = self.pop(key, None) + self.put(key, value) + + def __delitem__(self, key): + self.pop(key) + + def __getattr__(self, key): + if key in self: + value = self.get(key) + value = Config(value) if isinstance(value, dict) else value + return value + raise AttributeError(key) + + def __iadd__(self, other): + if isinstance(other, dict): + self.update(other) + else: + raise TypeError(f"Unsupported operand type(s) for +=: 'Config' and '{type(other)}'") + return self + + def __add__(self, other): + if isinstance(other, dict) and not isinstance(other, Config): + other = Config(other) + if isinstance(other, Config): + return Config([*self.flatten().items(), *other.flatten().items()]) + return NotImplemented + + def __radd__(self, other): + if isinstance(other, dict): + other = Config(other) + return other.__add__(self) + + def __eq__(self, other): + self_ = self.flatten() if isinstance(self, Config) else self + other_ = Config(other).flatten() if isinstance(other, dict) else other + return self_.__eq__(other_) + + def __rshift__(self, other): + """ + Parameters + ---------- + other : Pipeline + + Returns + ------- + Pipeline + Pipeline object with an updated config. + """ + return other << self + + def __getstate__(self): + """ Must be explicitly defined for pickling to work. """ + return vars(self) + + def __setstate__(self, state): + """ Must be explicitly defined for pickling to work. """ + vars(self).update(state) From 725668dc9977caeb44d5400df72608f893d58b44 Mon Sep 17 00:00:00 2001 From: EvgeniyS99 Date: Wed, 16 Aug 2023 15:44:41 +0000 Subject: [PATCH 06/17] Add warning --- batchflow/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/batchflow/config.py b/batchflow/config.py index aaae08624..e19ae20f0 100644 --- a/batchflow/config.py +++ b/batchflow/config.py @@ -1,4 +1,5 @@ """ Config class""" +import warnings class Config(dict): """ Class for configs that can be represented as nested dicts with easy indexing by slashes. """ @@ -80,8 +81,7 @@ def put(self, key, value): config[parent].update(Config({child: value})) # config = {'a': {'b': 1}} and want to receive {'a': {'b': 1, 'c': 3}} else: if parent in config: - import warnings - warnings.warn('Note that ...') + warnings.warn(f'Note that value for `{parent}` was overridden, not updated!') config[parent] = Config({child: value}) else: From ef552567b62efde3998365b30116a432cbde52f2 Mon Sep 17 00:00:00 2001 From: EvgeniyS99 Date: Thu, 17 Aug 2023 09:00:34 +0000 Subject: [PATCH 07/17] Edit docs --- batchflow/config.py | 50 ++++++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/batchflow/config.py b/batchflow/config.py index e19ae20f0..8f2dc36ec 100644 --- a/batchflow/config.py +++ b/batchflow/config.py @@ -4,7 +4,7 @@ class Config(dict): """ Class for configs that can be represented as nested dicts with easy indexing by slashes. """ def __init__(self, config=None, **kwargs): - """ Create Config + """ Create Config. Parameters ---------- @@ -15,7 +15,7 @@ def __init__(self, config=None, **kwargs): and the resulting dictionary is saved to self. For example, `{'a/b': 1, 'c/d/e': 2}` will be parsed into `{'a': {'b': 1}, 'c': {'d': {'e': 2}}}`. - If list or tuple, should contain key-value pairs with the length of 2. + If list or tuples, should contain key-value pairs with the length of 2. For example, `[('a/b', 1), ('c/d/e', 2)]` will be parsed into `{'a': {'b': 1}, 'c': {'d': {'e': 2}}}`. If an instance of Config, config is saved to self. @@ -108,24 +108,29 @@ def get(self, key, default=None): """ value = self._get(key, default=default) return value - + def _get(self, key, pop=False, **kwargs): """ Recursively retrieve values for a given key if the key contains '/'. If key doesn't contain '/', get or pop from the `dict` class is invoked. - Example: - Let d = {'a': {'b': {'c': 30}}}. - If we want to get d['a/b'], __getitem__ method will invoke _get method. + Examples + -------- + >>> config = Config({'a': {'b': {'c': 30}}}) + config.get('a/b') + {'c': 30} - Given the key='a/b', keys will be ['a', 'b']. + Explaining: + + If we want to get d['a/b'], __getitem__ method will invoke _get method. + Given the key = 'a/b', keys will be ['a', 'b']. value = self (value = {'a': {'b': {'c': 30}}}). - k = 'a': - Then parent starts to link to this value, i.e., parent = {'a': {'b': {'c': 30}}}; - Then we get value for 'a', so a new value starts to link to the {'b': {'c': 30}}. - k = 'b': - Then parent starts to link to the new value, i.e., parent = {'b': {'c': 30}}; - Then we get value for 'b', so a new value starts to link to the {'c': 30}. - Returns {'c': 30} + Iterate over keys: + k = 'a': + Then parent starts to link to this value, i.e., parent = {'a': {'b': {'c': 30}}}; + Then we get value for 'a', so a new value starts to link to the {'b': {'c': 30}}. + k = 'b': + Then parent starts to link to the new value, i.e., parent = {'b': {'c': 30}}; + Then we get value for 'b', so a new value starts to link to the {'c': 30}. """ has_default = 'default' in kwargs default = kwargs.get('default') @@ -198,7 +203,7 @@ def update(self, other, **kwargs): other : dict or Config kwargs : - parameters from kwargs also will be included into the resulting config + Parameters from kwargs also will be included into the resulting config. """ iterable = other if isinstance(other, dict) else kwargs for key, value in iterable.items(): @@ -238,7 +243,7 @@ def keys(self, flatten=False): Parameters ---------- flatten : bool - if False, keys will be getted from first level of nested dict, else from the last. + If False, keys will be getted from first level of nested dict, else from the last. Returns ------- @@ -256,7 +261,7 @@ def values(self, flatten=False): Parameters ---------- flatten : bool - if False, values will be getted from first level of nested dict, else from the last. + If False, values will be getted from first level of nested dict, else from the last. Returns ------- @@ -274,7 +279,7 @@ def items(self, flatten=False): Parameters ---------- flatten : bool - if False, keys and values will be getted from first level of nested dict, else from the last. + If False, keys and values will be getted from first level of nested dict, else from the last. Returns ------- @@ -285,7 +290,7 @@ def items(self, flatten=False): else: items = super().items() return items - + def __getitem__(self, key): value = self._get(key) return value @@ -327,10 +332,9 @@ def __eq__(self, other): self_ = self.flatten() if isinstance(self, Config) else self other_ = Config(other).flatten() if isinstance(other, dict) else other return self_.__eq__(other_) - + def __rshift__(self, other): - """ - Parameters + """ Parameters ---------- other : Pipeline @@ -340,7 +344,7 @@ def __rshift__(self, other): Pipeline object with an updated config. """ return other << self - + def __getstate__(self): """ Must be explicitly defined for pickling to work. """ return vars(self) From 28621ca7d7d60b58684353ee0e34d05dad4fc9bb Mon Sep 17 00:00:00 2001 From: EvgeniyS99 Date: Thu, 17 Aug 2023 09:12:02 +0000 Subject: [PATCH 08/17] Replace [] with list() --- batchflow/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/batchflow/config.py b/batchflow/config.py index 8f2dc36ec..854066bfd 100644 --- a/batchflow/config.py +++ b/batchflow/config.py @@ -140,7 +140,7 @@ def _get(self, key, pop=False, **kwargs): unpack = False if not isinstance(key, list): - key = [key] + key = list(key) unpack = True ret_vars = [] From b16651eee54628d81aaeef267a1f8ed7c75f4096 Mon Sep 17 00:00:00 2001 From: EvgeniyS99 Date: Thu, 17 Aug 2023 09:14:34 +0000 Subject: [PATCH 09/17] Add tuple case in docs --- batchflow/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/batchflow/config.py b/batchflow/config.py index 854066bfd..61c559999 100644 --- a/batchflow/config.py +++ b/batchflow/config.py @@ -96,7 +96,7 @@ def get(self, key, default=None): Parameters ---------- - key : str or list of hashable objects + key : str or list of hashable objects or tuple of hashable objects A key in the dictionary. '/' is used to get value from nested dict. default : misc Default value if key doesn't exist in config. @@ -182,7 +182,7 @@ def pop(self, key, default=None): Parameters ---------- - key : str or list of hashable objects + key : str or list of hashable objects or tuple of hashable objects A key in the dictionary. '/' is used to get value from nested dict. default : misc Default value if key doesn't exist in config. From 2ae9aeca56f5f254df5289f7593f635e9ee704cc Mon Sep 17 00:00:00 2001 From: EvgeniyS99 Date: Thu, 17 Aug 2023 14:25:22 +0000 Subject: [PATCH 10/17] Add special case for get/pop, remove tuples as keys --- batchflow/config.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/batchflow/config.py b/batchflow/config.py index 61c559999..f46a66ebd 100644 --- a/batchflow/config.py +++ b/batchflow/config.py @@ -96,7 +96,7 @@ def get(self, key, default=None): Parameters ---------- - key : str or list of hashable objects or tuple of hashable objects + key : str or list of hashable objects A key in the dictionary. '/' is used to get value from nested dict. default : misc Default value if key doesn't exist in config. @@ -132,7 +132,7 @@ def _get(self, key, pop=False, **kwargs): Then parent starts to link to the new value, i.e., parent = {'b': {'c': 30}}; Then we get value for 'b', so a new value starts to link to the {'c': 30}. """ - has_default = 'default' in kwargs + has_default = 'default' in kwargs default = kwargs.get('default') default = Config(default) if isinstance(default, dict) else default @@ -140,7 +140,7 @@ def _get(self, key, pop=False, **kwargs): unpack = False if not isinstance(key, list): - key = list(key) + key = [key] unpack = True ret_vars = [] @@ -151,8 +151,13 @@ def _get(self, key, pop=False, **kwargs): for k in keys: if isinstance(value, dict): parent = value # parent starts to link to value - value = value[k] # this invokes the __getitem__ method again and returns the value corresponding to the k, - # value starts to link to the dict inside the previous dict + + if not has_default: + value = value[k] # this invokes the __getitem__ method again and returns the value corresponding to the k, + # value starts to link to the dict inside the previous dict + + else: # if we want to get/pop, for example, 'a/c' from {'a': 1} and expect default value to be returned + value = value.get(k, default=default) # if we want to get, for example, 'a/b/c' from {'a': {'b': 30}} else: @@ -176,13 +181,13 @@ def _get(self, key, pop=False, **kwargs): ret_vars = ret_vars[0] if unpack else tuple(ret_vars) return ret_vars - + def pop(self, key, default=None): """ Returns the value or tuple of values for key and remove them from config. Parameters ---------- - key : str or list of hashable objects or tuple of hashable objects + key : str or list of hashable objects A key in the dictionary. '/' is used to get value from nested dict. default : misc Default value if key doesn't exist in config. @@ -194,7 +199,7 @@ def pop(self, key, default=None): """ value = self._get(key, pop=True, default=default) return value - + def update(self, other, **kwargs): """ Update config with values from other. @@ -310,10 +315,10 @@ def __getattr__(self, key): raise AttributeError(key) def __iadd__(self, other): - if isinstance(other, dict): - self.update(other) - else: + if not isinstance(other, dict): raise TypeError(f"Unsupported operand type(s) for +=: 'Config' and '{type(other)}'") + + self.update(other) return self def __add__(self, other): From 7e6809873aa503b5d21906d53d43bad43e350f81 Mon Sep 17 00:00:00 2001 From: EvgeniyS99 Date: Fri, 18 Aug 2023 08:00:53 +0000 Subject: [PATCH 11/17] Edit docs, edit parameter in pop in __setitem__ --- batchflow/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/batchflow/config.py b/batchflow/config.py index f46a66ebd..0cad41581 100644 --- a/batchflow/config.py +++ b/batchflow/config.py @@ -116,7 +116,7 @@ def _get(self, key, pop=False, **kwargs): Examples -------- >>> config = Config({'a': {'b': {'c': 30}}}) - config.get('a/b') + >>> config.get('a/b') {'c': 30} Explaining: @@ -301,7 +301,7 @@ def __getitem__(self, key): return value def __setitem__(self, key, value): - _ = self.pop(key, None) + self.pop(key) self.put(key, value) def __delitem__(self, key): From 337c16a3aa51c062bb95b0848d89a0ebc326164c Mon Sep 17 00:00:00 2001 From: EvgeniyS99 Date: Wed, 29 Nov 2023 10:21:12 +0000 Subject: [PATCH 12/17] Rm kwarg --- batchflow/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/batchflow/config.py b/batchflow/config.py index 0cad41581..35e262975 100644 --- a/batchflow/config.py +++ b/batchflow/config.py @@ -157,7 +157,7 @@ def _get(self, key, pop=False, **kwargs): # value starts to link to the dict inside the previous dict else: # if we want to get/pop, for example, 'a/c' from {'a': 1} and expect default value to be returned - value = value.get(k, default=default) + value = value.get(k, default) # if we want to get, for example, 'a/b/c' from {'a': {'b': 30}} else: From e470bc105ba6a6b4131e7f75ecebf3bf780fb6c2 Mon Sep 17 00:00:00 2001 From: EvgeniyS99 Date: Tue, 5 Dec 2023 08:40:15 +0000 Subject: [PATCH 13/17] Add new version --- batchflow/config.py | 376 ++++++++++++++++++++------------------------ 1 file changed, 168 insertions(+), 208 deletions(-) diff --git a/batchflow/config.py b/batchflow/config.py index 35e262975..7e19e2bf0 100644 --- a/batchflow/config.py +++ b/batchflow/config.py @@ -1,33 +1,15 @@ -""" Config class""" -import warnings +from pprint import pformat class Config(dict): - """ Class for configs that can be represented as nested dicts with easy indexing by slashes. """ + class IAddDict(dict): + pass def __init__(self, config=None, **kwargs): - """ Create Config. + self.config = {} - Parameters - ---------- - config : dict, Config, list, tuple or None - An object to initialize Config. - - If dict, all keys with slashes and values are parsed into nested structure of dicts, - and the resulting dictionary is saved to self. - For example, `{'a/b': 1, 'c/d/e': 2}` will be parsed into `{'a': {'b': 1}, 'c': {'d': {'e': 2}}}`. - - If list or tuples, should contain key-value pairs with the length of 2. - For example, `[('a/b', 1), ('c/d/e', 2)]` will be parsed into `{'a': {'b': 1}, 'c': {'d': {'e': 2}}}`. - - If an instance of Config, config is saved to self. - - If None, empty dictionary is created. - kwargs : - Parameters from kwargs also are parsed and saved to self. - """ if config is None: pass elif isinstance(config, Config): - super().__init__(config) + self.parse(config.config) elif isinstance(config, (dict, list, tuple)): self.parse(config) else: @@ -35,108 +17,45 @@ def __init__(self, config=None, **kwargs): for key, value in kwargs.items(): self.put(key, value) - + def parse(self, config): - """ Parses flatten config with slashes. - - Parameters - ---------- - config : dict, Config, list or tuple - - Returns - ------- - self : dict - """ - if isinstance(config, dict): - items = config.items() - elif isinstance(config, (tuple, list)): - items = config - if any([not isinstance(item, (tuple, list)) for item in items]): - raise ValueError('Tuple or list should contain only tuples or lists') - if any([len(item) != 2 for item in items]): - raise ValueError('Tuples in list should represent pairs key-value' - ', and therefore must be always the length of 2') + items = config.items() if isinstance(config, dict) else dict(config).items() for key, value in items: if isinstance(key, str): - key = '/'.join(filter(None, key.split('/'))) + key = '/'.join(s for s in key.split('/') if s) self.put(key, value) return self def put(self, key, value): - """ Put a new key into config recursively. - Parameters - ---------- - key : hashable object - Key to add. '/' is used to put value into nested dict. - value : misc - """ - if isinstance(key, str) and '/' in key: - config = self - parent, child = key.split('/', 1) + if isinstance(key, str): + d = self.config + levels = key.split('/') - if parent in config and isinstance(config[parent], Config): # for example, we put value=3 with key='a/c' into the - config[parent].update(Config({child: value})) # config = {'a': {'b': 1}} and want to receive {'a': {'b': 1, 'c': 3}} - else: - if parent in config: - warnings.warn(f'Note that value for `{parent}` was overridden, not updated!') - config[parent] = Config({child: value}) + # Iterate to the last level + for level in levels[:-1]: + prev_d = d + if level not in d: + d[level] = {} + d = d[level] - else: - if key in self and isinstance(self[key], dict) and isinstance(value, dict): - self[key].update(Config(value)) + # Update the last leaf + if isinstance(value, dict) and levels[-1] in d and isinstance(d[levels[-1]], dict): + d[levels[-1]].update(value) else: - super().__setitem__(key, value) - - def get(self, key, default=None): - """ Returns the value or tuple of values for key in the config. - If not found, returns a default value. - - Parameters - ---------- - key : str or list of hashable objects - A key in the dictionary. '/' is used to get value from nested dict. - default : misc - Default value if key doesn't exist in config. - Defaults to None, so that this method never raises a KeyError. - - Returns - ------- - Single value or a tuple. - """ - value = self._get(key, default=default) - return value + if isinstance(d, dict): + d[levels[-1]] = value + else: + prev_d[level] = {levels[-1]: value} + else: + self.config[key] = value - def _get(self, key, pop=False, **kwargs): - """ Recursively retrieve values for a given key if the key contains '/'. - If key doesn't contain '/', get or pop from the `dict` class is invoked. - - Examples - -------- - >>> config = Config({'a': {'b': {'c': 30}}}) - >>> config.get('a/b') - {'c': 30} - - Explaining: - - If we want to get d['a/b'], __getitem__ method will invoke _get method. - Given the key = 'a/b', keys will be ['a', 'b']. - value = self (value = {'a': {'b': {'c': 30}}}). - Iterate over keys: - k = 'a': - Then parent starts to link to this value, i.e., parent = {'a': {'b': {'c': 30}}}; - Then we get value for 'a', so a new value starts to link to the {'b': {'c': 30}}. - k = 'b': - Then parent starts to link to the new value, i.e., parent = {'b': {'c': 30}}; - Then we get value for 'b', so a new value starts to link to the {'c': 30}. - """ - has_default = 'default' in kwargs - default = kwargs.get('default') - default = Config(default) if isinstance(default, dict) else default + def _get(self, key, default=None, has_default=False, pop=False): - method = super().get if not pop else super().pop + method = 'get' if not pop else 'pop' + method = getattr(self.config, method) unpack = False if not isinstance(key, list): @@ -146,44 +65,42 @@ def _get(self, key, pop=False, **kwargs): ret_vars = [] for variable in key: if isinstance(variable, str) and '/' in variable: - keys = variable.split('/') # split variable='a/b' into ['a', 'b'] - value = self # value starts to link to self which is original dict - for k in keys: - if isinstance(value, dict): - parent = value # parent starts to link to value - - if not has_default: - value = value[k] # this invokes the __getitem__ method again and returns the value corresponding to the k, - # value starts to link to the dict inside the previous dict - - else: # if we want to get/pop, for example, 'a/c' from {'a': 1} and expect default value to be returned - value = value.get(k, default) + value = self.config + levels = variable.split('/') + values = [] - # if we want to get, for example, 'a/b/c' from {'a': {'b': 30}} - else: + # Iterate to the last level + for level in levels: + if not isinstance(value, dict): + if has_default: + return default + raise KeyError(level) + if level not in value: if has_default: return default - raise KeyError(k) + raise KeyError(level) + value = value[level] + values.append(value) if pop: - del parent[k] - - # use dict's 'get' or 'pop' if '/' not in variable + del values[-2][level] else: - if variable in self: - value = method(variable) - else: + if variable not in self.config: if has_default: return default - raise KeyError(variable) + raise KeyError + value = method(variable) - ret_vars.append(value) + if isinstance(value, dict): + value = Config(value) + ret_vars.append(value) + ret_vars = ret_vars[0] if unpack else tuple(ret_vars) - return ret_vars - def pop(self, key, default=None): - """ Returns the value or tuple of values for key and remove them from config. + def get(self, key, default=None): + """ Returns the value or tuple of values for key in the config. + If not found, returns a default value. Parameters ---------- @@ -195,60 +112,64 @@ def pop(self, key, default=None): Returns ------- - Single value or a tuple - """ - value = self._get(key, pop=True, default=default) - return value - - def update(self, other, **kwargs): - """ Update config with values from other. - - Parameters - ---------- - other : dict or Config - - kwargs : - Parameters from kwargs also will be included into the resulting config. + Single value or a tuple. """ - iterable = other if isinstance(other, dict) else kwargs - for key, value in iterable.items(): - self.put(key, value) + value = self._get(key, default=default, has_default=True) - def flatten(self, config=None): - """ Transforms nested dict into flatten dict. + return value + + def pop(self, key, **kwargs): + """ Returns the value or tuple of values for key in the config. + If not found, returns a default value. Parameters ---------- - config : dict, Config or None - If None self will be parsed else config. + key : str or list of hashable objects + A key in the dictionary. '/' is used to get value from nested dict. + default : misc + Default value if key doesn't exist in config. + Defaults to None, so that this method never raises a KeyError. Returns ------- - new_config : dict + Single value or a tuple. """ - config = self if config is None else config - new_config = {} - for key, value in config.items(): - if isinstance(value, dict) and len(value) > 0: - value = self.flatten(value) - for _key, _value in value.items(): - new_config[key + '/' + _key] = _value - else: - new_config[key] = value + has_default = 'default' in kwargs + default = kwargs.get('default') + value = self._get(key, has_default=has_default, default=default, pop=True) - return new_config - - def copy(self): - """ Create a shallow copy of the instance. """ - return Config(super().copy()) + return value + + def __repr__(self): + return repr(self.config) + + def __getitem__(self, key): + value = self._get(key) + return value + def update(self, other=None, **kwargs): + iterable = other if isinstance(other, (dict, tuple, list)) else kwargs + + if isinstance(iterable, dict): + items = iterable.items() + elif isinstance(iterable, (tuple, list)): + items = iterable + if any([not isinstance(item, (tuple, list)) for item in items]): + raise ValueError('Tuple or list should contain only tuples or lists') + if any([len(item) != 2 for item in items]): + raise ValueError('Tuples in list should represent pairs key-value' + ', and therefore must be always the length of 2') + + for key, value in iterable.items(): + self.put(key, value) + def keys(self, flatten=False): - """ Returns config keys. + """ Returns config keys Parameters ---------- flatten : bool - If False, keys will be getted from first level of nested dict, else from the last. + if False, keys will be getted from first level of nested dict, else from the last Returns ------- @@ -257,16 +178,16 @@ def keys(self, flatten=False): if flatten: keys = self.flatten().keys() else: - keys = super().keys() + keys = self.config.keys() return keys - + def values(self, flatten=False): - """ Return config values. + """ Return config values Parameters ---------- flatten : bool - If False, values will be getted from first level of nested dict, else from the last. + if False, values will be getted from first level of nested dict, else from the last Returns ------- @@ -275,16 +196,16 @@ def values(self, flatten=False): if flatten: values = self.flatten().values() else: - values = super().values() + values = self.config.values() return values - + def items(self, flatten=False): - """ Returns config items. + """ Returns config items Parameters ---------- flatten : bool - If False, keys and values will be getted from first level of nested dict, else from the last. + if False, keys and values will be getted from first level of nested dict, else from the last Returns ------- @@ -293,51 +214,98 @@ def items(self, flatten=False): if flatten: items = self.flatten().items() else: - items = super().items() + items = self.config.items() return items + + def flatten(self, config=None): + """ Transforms nested dict into flatten dict. - def __getitem__(self, key): - value = self._get(key) - return value + Parameters + ---------- + config : dict, Config or None + If None self will be parsed else config. + + Returns + ------- + new_config : dict + """ + config = self.config if config is None else config + new_config = {} + for key, value in config.items(): + if isinstance(value, dict) and len(value) > 0: + value = self.flatten(value) + for _key, _value in value.items(): + if isinstance(_key, str): + new_config[key + '/' + _key] = _value + else: + new_config[key] = {_key: _value} + else: + new_config[key] = value + return new_config + def __setitem__(self, key, value): - self.pop(key) + if key in self.config: + self.pop(key, default=None) self.put(key, value) def __delitem__(self, key): self.pop(key) - + + def copy(self): + """ Create a shallow copy of the instance. """ + return Config(self.config.copy()) + def __getattr__(self, key): - if key in self: - value = self.get(key) + if key in self.config: + value = self.config.get(key) value = Config(value) if isinstance(value, dict) else value return value raise AttributeError(key) - - def __iadd__(self, other): - if not isinstance(other, dict): - raise TypeError(f"Unsupported operand type(s) for +=: 'Config' and '{type(other)}'") - - self.update(other) - return self - + def __add__(self, other): if isinstance(other, dict) and not isinstance(other, Config): other = Config(other) if isinstance(other, Config): return Config([*self.flatten().items(), *other.flatten().items()]) return NotImplemented + + def __iter__(self): + return iter(self.config) + + def __repr__(self): + lines = ['\n' + 4 * ' ' + line for line in pformat(self.config).split('\n')] + return f"Config({''.join(lines)})" + + def __iadd__(self, other): + if isinstance(other, dict): + self.update(other) + else: + raise TypeError(f"unsupported operand type(s) for +=: 'IAddDict' and '{type(other)}'") + return self def __radd__(self, other): if isinstance(other, dict): other = Config(other) return other.__add__(self) - + + def __len__(self): + return len(self.config) + def __eq__(self, other): - self_ = self.flatten() if isinstance(self, Config) else self + self_ = self.flatten() + print(self_, 'self_') other_ = Config(other).flatten() if isinstance(other, dict) else other return self_.__eq__(other_) + def __getstate__(self): + """ Must be explicitly defined for pickling to work. """ + return vars(self) + + def __setstate__(self, state): + """ Must be explicitly defined for pickling to work. """ + vars(self).update(state) + def __rshift__(self, other): """ Parameters ---------- @@ -349,11 +317,3 @@ def __rshift__(self, other): Pipeline object with an updated config. """ return other << self - - def __getstate__(self): - """ Must be explicitly defined for pickling to work. """ - return vars(self) - - def __setstate__(self, state): - """ Must be explicitly defined for pickling to work. """ - vars(self).update(state) From 7aac9d8cd2440119178ba58c0d00c4803b6ecde3 Mon Sep 17 00:00:00 2001 From: EvgeniyS99 Date: Thu, 7 Dec 2023 15:10:03 +0000 Subject: [PATCH 14/17] Change naming, make config cleaner --- batchflow/config.py | 160 +++++++++++++++++++++++++++++--------------- 1 file changed, 106 insertions(+), 54 deletions(-) diff --git a/batchflow/config.py b/batchflow/config.py index 7e19e2bf0..6e382272f 100644 --- a/batchflow/config.py +++ b/batchflow/config.py @@ -4,6 +4,26 @@ class Config(dict): class IAddDict(dict): pass def __init__(self, config=None, **kwargs): + """ Create Config. + + Parameters + ---------- + config : dict, Config, list, tuple or None + An object to initialize Config. + + If dict, all keys with slashes and values are parsed into nested structure of dicts, + and the resulting dictionary is saved to self.config. + For example, `{'a/b': 1, 'c/d/e': 2}` will be parsed into `{'a': {'b': 1}, 'c': {'d': {'e': 2}}}`. + + If list or tuples, should contain key-value pairs with the length of 2. + For example, `[('a/b', 1), ('c/d/e', 2)]` will be parsed into `{'a': {'b': 1}, 'c': {'d': {'e': 2}}}`. + + If an instance of Config, config is saved to self.config. + + If None, empty dictionary is created. + kwargs : + Parameters from kwargs also are parsed and saved to self.config. + """ self.config = {} if config is None: @@ -19,41 +39,64 @@ def __init__(self, config=None, **kwargs): self.put(key, value) def parse(self, config): + """ Parses flatten config with slashes. + + Parameters + ---------- + config : dict, Config, list or tuple + + Returns + ------- + self : Config + + """ items = config.items() if isinstance(config, dict) else dict(config).items() for key, value in items: - if isinstance(key, str): + if isinstance(key, str): # if key contains multiple consecutive '/' key = '/'.join(s for s in key.split('/') if s) self.put(key, value) return self def put(self, key, value): + """ Put a new key into config recursively. + + Parameters + ---------- + key : hashable object + Key to add. '/' is used to put value into nested dict. + value : misc + + """ + if isinstance(value, dict): # for example, value = {'a/b': 3}, and we need to parse it before put + value = Config(value).config if isinstance(key, str): - d = self.config + + config = self.config levels = key.split('/') + last_level = levels[-1] - # Iterate to the last level for level in levels[:-1]: - prev_d = d - if level not in d: - d[level] = {} - d = d[level] - - # Update the last leaf - if isinstance(value, dict) and levels[-1] in d and isinstance(d[levels[-1]], dict): - d[levels[-1]].update(value) + prev_config = config + if level not in config: + config[level] = {} + config = config[level] + + if isinstance(value, dict) and last_level in config and isinstance(config[last_level], dict): + config[last_level].update(value) else: - if isinstance(d, dict): - d[levels[-1]] = value + # for example, we try to set config['a/b/c'] = 3, where config = Config({'a/b': 1}) + if isinstance(config, dict): + config[last_level] = value else: - prev_d[level] = {levels[-1]: value} + prev_config[level] = {last_level: value} else: self.config[key] = value def _get(self, key, default=None, has_default=False, pop=False): - + """ Consecutively retrieve values for a given key if the key contains '/'. """ method = 'get' if not pop else 'pop' method = getattr(self.config, method) @@ -62,14 +105,21 @@ def _get(self, key, default=None, has_default=False, pop=False): key = [key] unpack = True + # Provide `default` for each variable in key + if has_default: + if isinstance(default, (list, tuple)) and len(key) != 1 and len(default) != len(key): + raise ValueError() # + elif not isinstance(default, (list, tuple)) and len(key) != 1: + default = [default] * len(key) + ret_vars = [] for variable in key: + if isinstance(variable, str) and '/' in variable: value = self.config levels = variable.split('/') values = [] - # Iterate to the last level for level in levels: if not isinstance(value, dict): if has_default: @@ -81,8 +131,10 @@ def _get(self, key, default=None, has_default=False, pop=False): raise KeyError(level) value = value[level] values.append(value) + if pop: - del values[-2][level] + del values[-2][level] # delete the last level from the parent dict + else: if variable not in self.config: if has_default: @@ -94,7 +146,7 @@ def _get(self, key, default=None, has_default=False, pop=False): value = Config(value) ret_vars.append(value) - + ret_vars = ret_vars[0] if unpack else tuple(ret_vars) return ret_vars @@ -112,7 +164,8 @@ def get(self, key, default=None): Returns ------- - Single value or a tuple. + value : misc + Single value or a tuple. """ value = self._get(key, default=default, has_default=True) @@ -132,7 +185,8 @@ def pop(self, key, **kwargs): Returns ------- - Single value or a tuple. + value : misc + Single value or a tuple. """ has_default = 'default' in kwargs default = kwargs.get('default') @@ -146,88 +200,86 @@ def __repr__(self): def __getitem__(self, key): value = self._get(key) return value - + def update(self, other=None, **kwargs): - iterable = other if isinstance(other, (dict, tuple, list)) else kwargs - - if isinstance(iterable, dict): - items = iterable.items() - elif isinstance(iterable, (tuple, list)): - items = iterable - if any([not isinstance(item, (tuple, list)) for item in items]): - raise ValueError('Tuple or list should contain only tuples or lists') - if any([len(item) != 2 for item in items]): - raise ValueError('Tuples in list should represent pairs key-value' - ', and therefore must be always the length of 2') - - for key, value in iterable.items(): + other = other or {} + if not isinstance(other, (dict, tuple, list)): + raise TypeError(f'{type(other)} object is not iterable') + + self.parse(other) + + for key, value in kwargs.items(): self.put(key, value) - + def keys(self, flatten=False): """ Returns config keys Parameters ---------- flatten : bool - if False, keys will be getted from first level of nested dict, else from the last + If False, keys will be got from first level of nested dict, else from the last. Returns ------- - dict_keys + keys : dict_keys + """ if flatten: keys = self.flatten().keys() else: keys = self.config.keys() return keys - + def values(self, flatten=False): """ Return config values Parameters ---------- flatten : bool - if False, values will be getted from first level of nested dict, else from the last + If False, values will be got from first level of nested dict, else from the last. Returns ------- - dict_values + values : dict_values + """ if flatten: values = self.flatten().values() else: values = self.config.values() return values - + def items(self, flatten=False): """ Returns config items Parameters ---------- flatten : bool - if False, keys and values will be getted from first level of nested dict, else from the last + If False, keys and values will be got from first level of nested dict, else from the last. Returns ------- - dict_items + items : dict_items + """ if flatten: items = self.flatten().items() else: items = self.config.items() return items - + def flatten(self, config=None): """ Transforms nested dict into flatten dict. Parameters ---------- config : dict, Config or None - If None self will be parsed else config. + If None `self.config` will be parsed else config. Returns ------- new_config : dict + """ config = self.config if config is None else config new_config = {} @@ -243,7 +295,7 @@ def flatten(self, config=None): new_config[key] = value return new_config - + def __setitem__(self, key, value): if key in self.config: self.pop(key, default=None) @@ -251,32 +303,32 @@ def __setitem__(self, key, value): def __delitem__(self, key): self.pop(key) - + def copy(self): """ Create a shallow copy of the instance. """ return Config(self.config.copy()) - + def __getattr__(self, key): if key in self.config: value = self.config.get(key) value = Config(value) if isinstance(value, dict) else value return value raise AttributeError(key) - + def __add__(self, other): if isinstance(other, dict) and not isinstance(other, Config): other = Config(other) if isinstance(other, Config): return Config([*self.flatten().items(), *other.flatten().items()]) return NotImplemented - + def __iter__(self): return iter(self.config) - + def __repr__(self): lines = ['\n' + 4 * ' ' + line for line in pformat(self.config).split('\n')] return f"Config({''.join(lines)})" - + def __iadd__(self, other): if isinstance(other, dict): self.update(other) @@ -288,10 +340,10 @@ def __radd__(self, other): if isinstance(other, dict): other = Config(other) return other.__add__(self) - + def __len__(self): return len(self.config) - + def __eq__(self, other): self_ = self.flatten() print(self_, 'self_') From 1877af25661b413d8293d5342da32798f8323bb8 Mon Sep 17 00:00:00 2001 From: EvgeniyS99 Date: Tue, 12 Dec 2023 15:08:22 +0000 Subject: [PATCH 15/17] Allow default to be unique for each variable --- batchflow/config.py | 71 +++++++++++++++++++++++++++++---------------- 1 file changed, 46 insertions(+), 25 deletions(-) diff --git a/batchflow/config.py b/batchflow/config.py index 6e382272f..cb48baee4 100644 --- a/batchflow/config.py +++ b/batchflow/config.py @@ -1,8 +1,11 @@ from pprint import pformat class Config(dict): + + # Should be defined temporarily for the already pickled configs class IAddDict(dict): pass + def __init__(self, config=None, **kwargs): """ Create Config. @@ -50,7 +53,15 @@ def parse(self, config): self : Config """ - items = config.items() if isinstance(config, dict) else dict(config).items() + if isinstance(config, Config): + items = config.items(flatten=True) # suppose we have config = {'a': {'b': {'c': 1}}}, + # and we try to update config with other = {'a': {'b': {'d': 3}}}, + # and expect to see config = {'a': {'b': {'c': 1, 'd': 3}}} + elif isinstance(config, dict): + items = config.items() + else: + items = dict(config).items() + # items = config.items() if isinstance(config, dict) else dict(config).items() for key, value in items: if isinstance(key, str): # if key contains multiple consecutive '/' @@ -87,7 +98,7 @@ def put(self, key, value): if isinstance(value, dict) and last_level in config and isinstance(config[last_level], dict): config[last_level].update(value) else: - # for example, we try to set config['a/b/c'] = 3, where config = Config({'a/b': 1}) + # for example, we try to set config['a/b/c'] = 3, where config = Config({'a/b': 1}) and don't want error here if isinstance(config, dict): config[last_level] = value else: @@ -96,7 +107,9 @@ def put(self, key, value): self.config[key] = value def _get(self, key, default=None, has_default=False, pop=False): - """ Consecutively retrieve values for a given key if the key contains '/'. """ + """ Consecutively retrieve values for a given key if the key contains '/'. + This method supports the `default` to be unique for each variable in key. + """ method = 'get' if not pop else 'pop' method = getattr(self.config, method) @@ -106,14 +119,12 @@ def _get(self, key, default=None, has_default=False, pop=False): unpack = True # Provide `default` for each variable in key - if has_default: - if isinstance(default, (list, tuple)) and len(key) != 1 and len(default) != len(key): - raise ValueError() # - elif not isinstance(default, (list, tuple)) and len(key) != 1: - default = [default] * len(key) + if default is not None and len(key) != 1 and len(default) != len(key): + raise ValueError('You should provide `default` for each variable in `key`') # edit + default = [default] if not isinstance(default, list) else default ret_vars = [] - for variable in key: + for ix, variable in enumerate(key): if isinstance(variable, str) and '/' in variable: value = self.config @@ -121,33 +132,44 @@ def _get(self, key, default=None, has_default=False, pop=False): values = [] for level in levels: + if not isinstance(value, dict): - if has_default: - return default - raise KeyError(level) - if level not in value: - if has_default: - return default - raise KeyError(level) - value = value[level] - values.append(value) + if not has_default: + raise KeyError(level) + value = default[ix] + ret_vars.append(value) + break + + elif level not in value: + if not has_default: + raise KeyError(level) + value = default[ix] + ret_vars.append(value) + break + + else: + value = value[level] + values.append(value) if pop: del values[-2][level] # delete the last level from the parent dict else: if variable not in self.config: - if has_default: - return default - raise KeyError - value = method(variable) + if not has_default: + raise KeyError(variable) + value = default[ix] + ret_vars.append(value) + + else: + value = method(variable) if isinstance(value, dict): value = Config(value) - ret_vars.append(value) ret_vars = ret_vars[0] if unpack else tuple(ret_vars) + return ret_vars def get(self, key, default=None): @@ -206,7 +228,7 @@ def update(self, other=None, **kwargs): if not isinstance(other, (dict, tuple, list)): raise TypeError(f'{type(other)} object is not iterable') - self.parse(other) + self.parse(Config(other)) for key, value in kwargs.items(): self.put(key, value) @@ -346,7 +368,6 @@ def __len__(self): def __eq__(self, other): self_ = self.flatten() - print(self_, 'self_') other_ = Config(other).flatten() if isinstance(other, dict) else other return self_.__eq__(other_) From 03b36cc69c920b21335e2f854700b2c963b6e2bc Mon Sep 17 00:00:00 2001 From: EvgeniyS99 Date: Mon, 18 Dec 2023 13:08:06 +0000 Subject: [PATCH 16/17] Fix defaults --- batchflow/config.py | 155 +++++++++++++++++++++++--------------------- 1 file changed, 80 insertions(+), 75 deletions(-) diff --git a/batchflow/config.py b/batchflow/config.py index cb48baee4..08dac6014 100644 --- a/batchflow/config.py +++ b/batchflow/config.py @@ -1,6 +1,8 @@ +""" Config class""" from pprint import pformat class Config(dict): + """ Class for configs that can be represented as nested dicts with easy indexing by slashes. """ # Should be defined temporarily for the already pickled configs class IAddDict(dict): @@ -27,6 +29,7 @@ def __init__(self, config=None, **kwargs): kwargs : Parameters from kwargs also are parsed and saved to self.config. """ + # pylint: disable=super-init-not-called self.config = {} if config is None: @@ -40,7 +43,7 @@ def __init__(self, config=None, **kwargs): for key, value in kwargs.items(): self.put(key, value) - + def parse(self, config): """ Parses flatten config with slashes. @@ -54,14 +57,14 @@ def parse(self, config): """ if isinstance(config, Config): - items = config.items(flatten=True) # suppose we have config = {'a': {'b': {'c': 1}}}, - # and we try to update config with other = {'a': {'b': {'d': 3}}}, - # and expect to see config = {'a': {'b': {'c': 1, 'd': 3}}} + # suppose we have config = {'a': {'b': {'c': 1}}}, + # and we try to update config with other = {'a': {'b': {'d': 3}}}, + # and expect to see config = {'a': {'b': {'c': 1, 'd': 3}}} + items = config.items(flatten=True) elif isinstance(config, dict): items = config.items() else: items = dict(config).items() - # items = config.items() if isinstance(config, dict) else dict(config).items() for key, value in items: if isinstance(key, str): # if key contains multiple consecutive '/' @@ -98,11 +101,12 @@ def put(self, key, value): if isinstance(value, dict) and last_level in config and isinstance(config[last_level], dict): config[last_level].update(value) else: - # for example, we try to set config['a/b/c'] = 3, where config = Config({'a/b': 1}) and don't want error here if isinstance(config, dict): config[last_level] = value + # for example, we try to set my_config['a/b/c'] = 3, + # where my_config = Config({'a/b': 1}) and don't want error here else: - prev_config[level] = {last_level: value} + prev_config[level] = {last_level: value} # pylint: disable=undefined-loop-variable else: self.config[key] = value @@ -118,15 +122,19 @@ def _get(self, key, default=None, has_default=False, pop=False): key = [key] unpack = True - # Provide `default` for each variable in key - if default is not None and len(key) != 1 and len(default) != len(key): - raise ValueError('You should provide `default` for each variable in `key`') # edit - default = [default] if not isinstance(default, list) else default + n = len(key) + if n > 1: + default = [default] * n if not isinstance(default, list) else default + if len(default) != n: + raise ValueError('The length of `default` must be equal to the length of `key`') + else: + default = [default] ret_vars = [] for ix, variable in enumerate(key): if isinstance(variable, str) and '/' in variable: + value = self.config levels = variable.split('/') values = [] @@ -137,29 +145,29 @@ def _get(self, key, default=None, has_default=False, pop=False): if not has_default: raise KeyError(level) value = default[ix] - ret_vars.append(value) + values.append(value) break - elif level not in value: + if level not in value: if not has_default: raise KeyError(level) value = default[ix] - ret_vars.append(value) + values.append(value) break - else: - value = value[level] - values.append(value) + value = value[level] + values.append(value) if pop: - del values[-2][level] # delete the last level from the parent dict + # delete the last level from the parent dict + values[-2].pop(level, default[ix]) # pylint: disable=undefined-loop-variable else: + if variable not in self.config: if not has_default: raise KeyError(variable) value = default[ix] - ret_vars.append(value) else: value = method(variable) @@ -182,7 +190,8 @@ def get(self, key, default=None): A key in the dictionary. '/' is used to get value from nested dict. default : misc Default value if key doesn't exist in config. - Defaults to None, so that this method never raises a KeyError. + By default None, so this method never raises a KeyError. + If key has several variables, `default` can be a list with defaults for each variable. Returns ------- @@ -192,7 +201,7 @@ def get(self, key, default=None): value = self._get(key, default=default, has_default=True) return value - + def pop(self, key, **kwargs): """ Returns the value or tuple of values for key in the config. If not found, returns a default value. @@ -203,7 +212,6 @@ def pop(self, key, **kwargs): A key in the dictionary. '/' is used to get value from nested dict. default : misc Default value if key doesn't exist in config. - Defaults to None, so that this method never raises a KeyError. Returns ------- @@ -216,13 +224,6 @@ def pop(self, key, **kwargs): return value - def __repr__(self): - return repr(self.config) - - def __getitem__(self, key): - value = self._get(key) - return value - def update(self, other=None, **kwargs): other = other or {} if not isinstance(other, (dict, tuple, list)): @@ -233,6 +234,34 @@ def update(self, other=None, **kwargs): for key, value in kwargs.items(): self.put(key, value) + def flatten(self, config=None): + """ Transforms nested dict into flatten dict. + + Parameters + ---------- + config : dict, Config or None + If None `self.config` will be parsed else config. + + Returns + ------- + new_config : dict + + """ + config = self.config if config is None else config + new_config = {} + for key, value in config.items(): + if isinstance(value, dict) and len(value) > 0: + value = self.flatten(value) + for _key, _value in value.items(): + if isinstance(_key, str): + new_config[key + '/' + _key] = _value + else: + new_config[key] = {_key: _value} + else: + new_config[key] = value + + return new_config + def keys(self, flatten=False): """ Returns config keys @@ -290,33 +319,13 @@ def items(self, flatten=False): items = self.config.items() return items - def flatten(self, config=None): - """ Transforms nested dict into flatten dict. - - Parameters - ---------- - config : dict, Config or None - If None `self.config` will be parsed else config. - - Returns - ------- - new_config : dict - - """ - config = self.config if config is None else config - new_config = {} - for key, value in config.items(): - if isinstance(value, dict) and len(value) > 0: - value = self.flatten(value) - for _key, _value in value.items(): - if isinstance(_key, str): - new_config[key + '/' + _key] = _value - else: - new_config[key] = {_key: _value} - else: - new_config[key] = value + def copy(self): + """ Create a shallow copy of the instance. """ + return Config(self.config.copy()) - return new_config + def __getitem__(self, key): + value = self._get(key) + return value def __setitem__(self, key, value): if key in self.config: @@ -326,10 +335,6 @@ def __setitem__(self, key, value): def __delitem__(self, key): self.pop(key) - def copy(self): - """ Create a shallow copy of the instance. """ - return Config(self.config.copy()) - def __getattr__(self, key): if key in self.config: value = self.config.get(key) @@ -344,13 +349,6 @@ def __add__(self, other): return Config([*self.flatten().items(), *other.flatten().items()]) return NotImplemented - def __iter__(self): - return iter(self.config) - - def __repr__(self): - lines = ['\n' + 4 * ' ' + line for line in pformat(self.config).split('\n')] - return f"Config({''.join(lines)})" - def __iadd__(self, other): if isinstance(other, dict): self.update(other) @@ -363,21 +361,20 @@ def __radd__(self, other): other = Config(other) return other.__add__(self) - def __len__(self): - return len(self.config) - def __eq__(self, other): self_ = self.flatten() other_ = Config(other).flatten() if isinstance(other, dict) else other return self_.__eq__(other_) - def __getstate__(self): - """ Must be explicitly defined for pickling to work. """ - return vars(self) + def __len__(self): + return len(self.config) - def __setstate__(self, state): - """ Must be explicitly defined for pickling to work. """ - vars(self).update(state) + def __iter__(self): + return iter(self.config) + + def __repr__(self): + lines = ['\n' + 4 * ' ' + line for line in pformat(self.config).split('\n')] + return f"Config({''.join(lines)})" def __rshift__(self, other): """ Parameters @@ -390,3 +387,11 @@ def __rshift__(self, other): Pipeline object with an updated config. """ return other << self + + def __getstate__(self): + """ Must be explicitly defined for pickling to work. """ + return vars(self) + + def __setstate__(self, state): + """ Must be explicitly defined for pickling to work. """ + vars(self).update(state) From c87e766d42e7572f019d9e5ec9dce7db3e038441 Mon Sep 17 00:00:00 2001 From: EvgeniyS99 Date: Mon, 18 Dec 2023 13:22:32 +0000 Subject: [PATCH 17/17] Add `default` description in `po --- batchflow/config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/batchflow/config.py b/batchflow/config.py index 08dac6014..cb8430fe1 100644 --- a/batchflow/config.py +++ b/batchflow/config.py @@ -212,6 +212,7 @@ def pop(self, key, **kwargs): A key in the dictionary. '/' is used to get value from nested dict. default : misc Default value if key doesn't exist in config. + If key has several variables, `default` can be a list with defaults for each variable. Returns -------