"""
Utilities for dictionaries where dots in keys represent nestings
"""
import ubelt as ub
import pygtrie
[docs]
def dotdict_to_nested(d):
auto = ub.AutoDict()
walker = ub.IndexableWalker(auto)
for k, v in d.items():
path = k.split('.')
walker[path] = v
return auto.to_dict()
[docs]
def dotkeys_to_nested(keys):
"""
Args:
keys (List[str]): a list of dotted key names
"""
auto = ub.AutoDict()
walker = ub.IndexableWalker(auto)
for k in keys:
path = k.split('.')
walker[path] = k
# print(ub.urepr(auto))
return auto.to_dict()
[docs]
class DotDict(ub.UDict):
"""
I'm sure this data structure exists on pypi.
This should be replaced with that if we find it.
SeeAlso:
DotDictDataFrame
Example:
>>> from geowatch.utils.util_dotdict import * # NOQA
>>> self = DotDict({
>>> 'proc1.param1': 1,
>>> 'proc1.param2': 2,
>>> 'proc2.param1': 3,
>>> 'proc2.param2': 4,
>>> 'proc3.param1': 5,
>>> 'proc3.param2': 6,
>>> 'proc4.part1.param1': 7,
>>> 'proc4.part1.param2': 8,
>>> 'proc4.part2.param2': 9,
>>> 'proc4.part2.param2': 10,
>>> })
>>> self.get('proc1')
>>> self.prefix_get('proc4')
>>> 'proc1' in self
>>> nested = self.to_nested()
>>> recon = DotDict.from_nested(nested)
>>> assert nested != self
>>> assert recon == self
"""
def __init__(self, /, *args, **kwargs):
super().__init__(*args, **kwargs)
self._trie_cache = {}
[docs]
@classmethod
def from_nested(cls, data):
"""
Args:
data (Dict):
nested data
"""
flat = cls()
walker = ub.IndexableWalker(data, list_cls=tuple())
for path, value in walker:
if not isinstance(value, dict):
spath = list(map(str, path))
key = '.'.join(spath)
flat[key] = value
return flat
[docs]
def to_nested(self):
return dotdict_to_nested(self)
[docs]
def to_nested_keys(self):
return dotkeys_to_nested(self)
@property
def _prefix_trie(self):
if self._trie_cache.get('prefix_trie', None) is None:
_trie_data = ub.dzip(self.keys(), self.keys())
_trie = pygtrie.StringTrie(_trie_data, separator='.')
self._trie_cache['prefix_trie'] = _trie
return self._trie_cache['prefix_trie']
# @property
# def _suffix_trie(self):
# if self._trie_cache.get('suffix_trie', None) is None:
# _trie_data = ub.dzip(self.keys(), self.keys())
# _trie = pygtrie.StringTrie(_trie_data, separator='.')
# self._trie_cache['suffix_trie'] = _trie
# return self._trie_cache['suffix_trie']
[docs]
def prefix_get(self, key, default=ub.NoParam):
try:
suffix_dict = DotDict()
full_keys = self._prefix_trie.values(key)
except KeyError:
if default is not ub.NoParam:
return default
else:
raise
else:
for full_key in full_keys:
sub_key = full_key[len(key) + 1:]
suffix_dict[sub_key] = self[full_key]
return suffix_dict
[docs]
def add_prefix(self, prefix):
"""
Adds a prefix to all items
"""
new = self.__class__([(prefix + '.' + k, v) for k, v in self.items()])
return new
[docs]
def insert_prefix(self, prefix, index):
"""
Adds a prefix to all items
Args:
prefix (str): prefix to insert
index (int): the depth to insert the new param
Example:
>>> from geowatch.utils.util_dotdict import * # NOQA
>>> self = DotDict({
>>> 'proc1.param1': 1,
>>> 'proc1.param2': 2,
>>> 'proc2.param1': 3,
>>> 'proc4.part1.param2': 8,
>>> 'proc4.part2.param2': 9,
>>> 'proc4.part2.param2': 10,
>>> })
>>> new = self.insert_prefix('foo', index=1)
>>> print('self = {}'.format(ub.urepr(self, nl=1)))
>>> print('new = {}'.format(ub.urepr(new, nl=1)))
"""
def _generate_new_items():
sep = '.'
for k, v in self.items():
path = k.split(sep)
path.insert(index, prefix)
k2 = sep.join(path)
yield k2, v
new = self.__class__(_generate_new_items())
return new
[docs]
def print_graph(self):
explore_nested_dict(self)
[docs]
def query_keys(self, col):
"""
Finds columns where one level has this key
Example:
>>> from geowatch.utils.util_dotdict import * # NOQA
>>> self = DotDict({
>>> 'proc1.param1': 1,
>>> 'proc1.param2': 2,
>>> 'proc2.param1': 3,
>>> 'proc4.part1.param2': 8,
>>> 'proc4.part2.param2': 9,
>>> 'proc4.part2.param2': 10,
>>> })
>>> list(self.query_keys('param1'))
Ignore:
could use _trie_iteritems
trie = self._prefix_trie
"""
for key in self.keys():
if col in set(key.split('.')):
yield key
# def __contains__(self, key):
# if super().__contains__(key):
# return True
# else:
# subkeys = []
# subkeys.extend(self._prefix_trie.values(key))
# return bool(subkeys)
# def get(self, key, default=ub.NoParam):
# if default is ub.NoParam:
# return self[key]
# else:
# try:
# return self[key]
# except KeyError:
# return default
# def __getitem__(self, key):
# try:
# return super().__getitem__(key)
# except KeyError:
# subkeys = []
# subkeys.extend(self._prefix_trie.values(key))
# return self.__class__([(k, self[k]) for k in subkeys])
[docs]
def indexable_to_graph(data):
import networkx as nx
graph = nx.DiGraph()
walker = ub.IndexableWalker(data)
for path, value in walker:
spath = list(map(str, path))
key = '.'.join(spath)
graph.add_node(key)
label = spath[-1]
if not isinstance(value, walker.indexable_cls):
label = f'{label} : {type(value).__name__} = {value}'
graph.nodes[key].update({
'path': path,
'value': value,
'label': label,
})
if len(path) > 1:
parent_key = '.'.join(spath[:-1])
graph.add_edge(parent_key, key)
return graph
[docs]
def explore_nested_dict(data):
"""
TODO: some sort of textual interface
"""
graph = indexable_to_graph(data)
from cmd_queue.util.util_networkx import write_network_text
import rich
write_network_text(graph, path=rich.print, end='')