"""dictools.py
Collection of utility functions related to nested Python dictionaries.
"""
import numpy as np
[docs]
def unroll_nested_dictionary_keys(dict_: dict, max_depth: int = None) -> list:
"""Returns a list of all combinations of keys inside a nested dictionary.
Useful to iterate over all keys of a nested dictionary without having to
use multiple loops.
Parameters
----------
dictionary: dict
Nested dictionary.
max_depth: int, optional
If specified, it is the number of layers to dig in to at most in
the nested dictionary.
If only the first layer is desired (no recursion at all), `max_depth=1`.
Returns
-------
: list
Unrolled combinations of all keys of the nested dictionary.
"""
return __unroll_nested_dictionary_keys(dict_, max_depth=max_depth)
def __unroll_nested_dictionary_keys(dict_: dict,
*,
max_depth: int,
current_keys: list = None,
current_depth: int = 1) -> list:
"""Returns a list of all combinations of keys inside a nested dictionary.
This is the recursive function. Use the main function.
"""
if current_keys is None:
current_keys = []
unrolled_keys = []
for key, value in dict_.items():
new_keys = current_keys + [key]
if isinstance(value, dict) and (max_depth is None or current_depth < max_depth):
# Go down next depth layer.
unrolled_keys += __unroll_nested_dictionary_keys(
value, max_depth=max_depth, current_keys=new_keys, current_depth=current_depth+1
)
else:
unrolled_keys.append(new_keys)
return unrolled_keys
[docs]
def get_value_from_nested_dict(dict_, keys: list):
"""Get a value from an arbitrarily-depth nested dictionary.
Parameters
----------
dict_: dict
Nested dictionary.
keys: list
Sequence of keys necessary to get to the element inside the nested
dictionary.
Returns
-------
: Any
Value of the element inside the nested dictionary.
"""
if not isinstance(dict_, dict):
raise TypeError("'dict_' must be a dictionary")
value = dict_
for key in keys:
if not isinstance((value:=value[key]), dict) and not hasattr(value, '__iter__'):
raise ValueError("the nested dictionary shape does not match with the input key sequence")
return value
[docs]
def set_value_to_nested_dict(dict_, keys, value, add_missing_keys=False):
"""Set a value to an arbitrarily-depth nested dictionary.
Parameters
----------
dict_: dict
Nested dictionary.
keys: iterable
Sequence of keys necessary to get to the element inside the nested
dictionary.
value: Any
add_missing_keys: bool
If True, missing keys (layers) will be added to the nested
dictionary.
CAUTION: if `add_missing_keys=True`, no KeyError will be raised.
"""
for key in keys[:-1]:
if key not in dict_:
if add_missing_keys:
dict_[key] = {}
else:
raise ValueError(
"the nested dictionary shape does not match with the input key sequence"
)
dict_ = dict_[key]
dict_[keys[-1]] = value
[docs]
def fill(dict_: dict, value, keys=None, deepcopy=False):
"""Fill an arbitrarily-depth nested dictionary with a value.
Fill an arbitrarily-depth nested dictionary below the coordinates 'keys'
with the value 'value'.
The filling is performed inplace.
Parameters
----------
dict_: dict
Nested dictionary.
value: Any
keys: iterable, optional
Starting layers from where to fill the dictionary, if only a subset
of the whole 'dict_' is desired to be filled.
deepcopy: bool
If True, each instance of 'value' will be a copy. By default all
elements reference the same 'value'.
"""
if keys is not None:
# Get to the target layer.
for key in keys:
dict_ = dict_[key]
__fill(dict_, value, deepcopy=deepcopy)
def __fill(dict_: dict, value, deepcopy=False):
"""Fill an arbitrarily-depth nested dictionary with a value.
This is the recursive function. Use the main function.
"""
for key in dict_:
if isinstance(dict_[key], dict):
__fill(dict_[key], value, deepcopy=deepcopy)
else:
if deepcopy:
try:
dict_[key] = value.copy()
except AttributeError:
from copy import deepcopy
dict_[key] = deepcopy(value)
else:
dict_[key] = value
def _replicate_structure_nested_dict(dict_: dict) -> dict:
"""Create a new nested dictionary with the same structure as the input.
Values of the new dictionary are set to None.
"""
if not isinstance(dict_, dict):
return None
replicated_dict = {}
for key, value in dict_.items():
if isinstance(value, dict):
replicated_dict[key] = _replicate_structure_nested_dict(value)
else:
replicated_dict[key] = None
return replicated_dict
[docs]
def get_depth(dict_: dict) -> int:
"""Return the depth of the input nested dictionary.
A simple (non-nested) dictionary has a depth of 1.
Assumes a homogeneous nested dictionary, and only looks for the first
element at each layer.
"""
depth = 0
while isinstance(dict_, dict):
key = next(iter(dict_.keys()))
dict_ = dict_[key]
depth += 1
return depth
[docs]
def dict_to_stacked_array(dict_: dict, target_length: int = None) -> tuple[np.ndarray, list]:
"""Stack the arrays inside a dict() to a 2d-array.
Given a NON-nested dict whose values are flat numpy arrays, with potentially different
lengths, stacks them in a homogeneous 2d-array aligned to the left,
zero-padding the remaining space.
Parameters
----------
dict_ : dict[str: np.ndarray]
NON-nested Python dictionary containing numpy 1d-arrays.
target_length : int, optional
If given, defines the size of the second axis of the returned 2d-array.
If omitted, the size will be equal to the longest array inside 'dict_'.
Must be larger or equal than the longest array inside 'dict_'.
Returns
-------
stacked_arrays : 2d-array
Stacked arrays, with right zero-padding those original strains whose
length were shorter.
lengths : list
Original length of each input array, following the same order as the
first axis of 'stacked_arrays'.
"""
if target_length is None:
target_length = 0
for array in dict_.values():
l = len(array)
if l > target_length:
target_length = l
if target_length == 0:
raise ValueError(
"no arrays with nonzero length were found inside 'dict_'"
)
stacked_arrays = np.zeros((len(dict_), target_length), dtype=float)
lengths = []
for i, array in enumerate(dict_.values()):
l = len(array)
pad = target_length - l
if pad < 0:
raise ValueError(
"given 'target_length' is smaller than the longest array inside 'dict_'"
)
stacked_arrays[i] = np.pad(array, (0, pad))
lengths.append(l)
return stacked_arrays, lengths
def _find_level0_of_level1(dict_, key: int|str) -> int | str:
"""Finds the top level key containing the second level 'key'.
Finds the key of the uppermost level of the nested 'dict_' which contains
the second level entry with key 'key'.
Parameters
----------
dict_ : dict
Nested dictionary.
key : int | str
Key of the second level of 'dict_'.
Returns
-------
key_top : int | str
Key of the uppermost level of 'dict_'.
Raises
------
ValueError
If 'key' is not found in the second level of 'dict_'.
"""
for key_top, level1 in dict_.items():
if key in level1:
return key_top
else:
raise ValueError(f"key '{key}' was not found inside the second level of the dictionary")
[docs]
def flatten_nested_dict(dict_: dict) -> dict:
"""Turn any nested dictionary into a shallow (single level) one.
Flatten a nested dictionary into a single level dictionary, keeping their
keys as tuples.
"""
return __flatten_nested_dict(dict_)
def __flatten_nested_dict(dict_, parent_keys=()):
"""Flatten recursively 'dict_in'.
Here is where the actual flattening happens, using recursion.
"""
flattened_dict = {}
for k, v in dict_.items():
key = parent_keys + (k,)
if isinstance(v, dict):
flattened_dict.update(__flatten_nested_dict(v, parent_keys=key))
else:
flattened_dict[key] = v
return flattened_dict
[docs]
def filter_nested_dict(dict_, condition, layer) -> dict:
"""Filter a layer of a nested dictionary.
Filter a nested dictionary based on a condition applied to the keys of the
specified layer.
NOTE: Layer numbering begins with 0, as array-likes do; as God commands.
Parameters
----------
dict_ : dict
The nested dictionary to be filtered.
condition : callable
The condition function to apply.
Should take a single argument, the key, and return a boolean indicating
wether to include its related value.
layer : int
The layer at which to apply the condition.
1 corresponds to the top level, 2 to the second level, and so on.
Default is 1.
Returns
-------
: dict
Filtered version of the nested dictionary.
Caveats
-------
- The filtering does not alter the order of kept elements in 'dict_'.
"""
def filter_layer(dictionary, current_layer):
if current_layer == layer:
return {k: v for k, v in dictionary.items() if condition(k)}
return {k: filter_layer(v, current_layer + 1) if isinstance(v, dict) else v
for k, v in dictionary.items()}
return filter_layer(dict_, 0)
[docs]
def get_next_item(dict_):
"""Get the next item in a nested dictionary.
Returns
-------
value : Any
Value of the next item in the dictionary.
"""
if not isinstance(dict_, dict):
raise TypeError("'dict_' must be a dictionary")
try:
value = next(iter(dict_.values()))
except StopIteration:
# Empty dictionary.
return None
if isinstance(value, dict):
return get_next_item(value)
return value
[docs]
def get_number_of_elements(dict_):
"""Get the number of elements in a nested dictionary.
Parameters
----------
dict_ : dict
Nested dictionary.
Returns
-------
number : int
Number of elements in the nested dictionary.
"""
if not isinstance(dict_, dict):
raise TypeError("'dict_' must be a dictionary")
number = 0
for value in dict_.values():
if isinstance(value, dict):
number += get_number_of_elements(value)
else:
number += 1
return number