Source code for geowatch.utils.util_dotdict

"""
Utilities for dictionaries where dots in keys represent nestings
"""
import ubelt as ub
import pygtrie


[docs] class DotDict(ub.UDict): """ I'm sure this data structure exists on pypi. This should be replaced with that if we find it. SeeAlso: DotDictDataFrame Example: >>> from geowatch.utils.util_dotdict import * # NOQA >>> self = DotDict({ >>> 'proc1.param1': 1, >>> 'proc1.param2': 2, >>> 'proc2.param1': 3, >>> 'proc2.param2': 4, >>> 'proc3.param1': 5, >>> 'proc3.param2': 6, >>> 'proc4.part1.param1': 7, >>> 'proc4.part1.param2': 8, >>> 'proc4.part2.param2': 9, >>> 'proc4.part2.param2': 10, >>> }) >>> self.get('proc1') >>> self.prefix_get('proc4') >>> 'proc1' in self >>> nested = self.to_nested() >>> recon = DotDict.from_nested(nested) >>> assert nested != self >>> assert recon == self """ def __init__(self, /, *args, **kwargs): super().__init__(*args, **kwargs) # Tries work well with prefix stuff, but they may be too complex for # what we really need to do here. self._trie_cache = {}
[docs] @classmethod def from_nested(cls, data): """ Args: data (Dict): nested data """ flat = cls() walker = ub.IndexableWalker(data, list_cls=tuple()) for path, value in walker: if not isinstance(value, dict): spath = list(map(str, path)) key = '.'.join(spath) flat[key] = value return flat
[docs] def to_nested(self): """ Converts this flat DotDict into a nested representation. I.e. keys are broken using the "." separtor, with each separator becoming a new nesting level. Example: >>> from geowatch.utils.util_dotdict import * # NOQA >>> self = DotDict(**{ >>> 'foo.bar.baz': 1, >>> 'foo.bar.biz': 1, >>> 'foo.spam': 1, >>> 'eggs.spam': 1, >>> }) >>> nested = self.to_nested() >>> print(f'nested = {ub.urepr(nested, nl=2)}') nested = { 'foo': { 'bar': {'baz': 1, 'biz': 1}, 'spam': 1, }, 'eggs': { 'spam': 1, }, } """ auto = ub.AutoDict() walker = ub.IndexableWalker(auto) d = self for k, v in d.items(): path = k.split('.') walker[path] = v return auto.to_dict()
[docs] def to_nested_keys(self): """ Converts this flat DotDict into a nested key representation. The difference between this and to_nested is that the leafs are sets of keys whereas the leafs in DotDict are dicts Example: >>> from geowatch.utils.util_dotdict import * # NOQA >>> self = DotDict(**{ >>> 'foo.bar.baz': 1, >>> 'foo.bar.biz': 1, >>> 'foo.spam': 1, >>> 'eggs.spam': 1, >>> }) >>> nested = self.to_nested_keys() >>> print(f'nested = {ub.urepr(nested, nl=2)}') nested = { 'foo': { 'bar': {'baz': 'foo.bar.baz', 'biz': 'foo.bar.biz'}, 'spam': 'foo.spam', }, 'eggs': { 'spam': 'eggs.spam', }, } """ auto = ub.AutoDict() walker = ub.IndexableWalker(auto) for k in self: path = k.split('.') walker[path] = k # print(ub.urepr(auto)) return auto.to_dict()
@property def _prefix_trie(self): if self._trie_cache.get('prefix_trie', None) is None: _trie_data = ub.dzip(self.keys(), self.keys()) _trie = pygtrie.StringTrie(_trie_data, separator='.') self._trie_cache['prefix_trie'] = _trie return self._trie_cache['prefix_trie'] @property def _suffix_trie(self): if 'suffix_trie' not in self._trie_cache: reversed_keys = { '.'.join(reversed(k.split('.'))): k for k in self.keys() } _trie = pygtrie.StringTrie(reversed_keys, separator='.') self._trie_cache['suffix_trie'] = _trie return self._trie_cache['suffix_trie']
[docs] def suffix_get(self, suffix, default=ub.NoParam, backend='trie'): """ Retrieve all key-value pairs whose keys end with a given dot-suffix. Args: suffix (str): dot-separated suffix string default: fallback if no matches found backend (str): 'trie' or 'loop' Returns: DotDict Example: >>> from geowatch.utils.util_dotdict import * # NOQA >>> self = DotDict({ >>> 'a.b.c': 1, >>> 'x.b.c': 2, >>> 'z.y': 3, >>> }) >>> self.suffix_get('b.c') {'a.b.c': 1, 'x.b.c': 2} """ if backend == 'loop': matches = DotDict({ k: v for k, v in self.items() if k.endswith('.' + suffix) or k == suffix }) elif backend == 'trie': rev_suffix = '.'.join(reversed(suffix.split('.'))) try: matches = DotDict({ k: self[k] for k in self._suffix_trie.values(rev_suffix) }) except KeyError: if default is not ub.NoParam: return default raise else: raise ValueError(f'Unknown backend={backend}') if not matches and default is not ub.NoParam: return default return matches
[docs] def prefix_get(self, key, default=ub.NoParam): """ Example: >>> from geowatch.utils.util_dotdict import * # NOQA >>> self = DotDict(**{ >>> 'foo.bar.baz': 1, >>> 'foo.bar.biz': 1, >>> 'foo.spam': 1, >>> 'eggs.spam': 1, >>> }) >>> self.prefix_get('foo') {'bar.baz': 1, 'bar.biz': 1, 'spam': 1} """ try: suffix_dict = DotDict() full_keys = self._prefix_trie.values(key) except KeyError: if default is not ub.NoParam: return default else: raise else: for full_key in full_keys: sub_key = full_key[len(key) + 1:] suffix_dict[sub_key] = self[full_key] return suffix_dict
[docs] def suffix_subdict(self, suffixes, backend='trie'): """ Filter DotDict to only contain keys ending with any given suffixes. Args: suffixes (List[str]): list of dot-suffixes backend (str): 'trie' or 'loop' Returns: DotDict References: https://chatgpt.com/c/6841a161-2cd4-8002-ad9b-5593f5a2d70c Example: >>> from geowatch.utils.util_dotdict import * # NOQA >>> self = DotDict({ >>> 'proc1.param1': 1, >>> 'proc2.param1': 2, >>> 'proc3.param2': 3, >>> 'proc4.part1.param1': 4, >>> 'proc4.part2.param2': 5, >>> }) >>> new = self.suffix_subdict(['param1', 'part2.param2']) >>> print(f'new = {ub.urepr(new, nl=1, sort=1)}') new = { 'proc1.param1': 1, 'proc2.param1': 2, 'proc4.part1.param1': 4, 'proc4.part2.param2': 5, } """ if backend == 'loop': result = { k: v for k, v in self.items() if any(k.endswith('.' + suf) or k == suf for suf in suffixes) } elif backend == 'trie': reversed_trie = self._suffix_trie result_keys = set() for suf in suffixes: rev_suf = '.'.join(reversed(suf.split('.'))) result_keys.update(reversed_trie.values(rev_suf)) result = {k: self[k] for k in result_keys} else: raise ValueError(f'Unknown backend={backend}') return self.__class__(result)
[docs] def prefix_subdict(self, prefixes, backend='trie'): """ Filter DotDict to only contain keys starting with any given prefixes. Args: prefixes (List[str]): list of dot-prefixes backend (str): 'trie' or 'loop' Returns: DotDict Example: >>> from geowatch.utils.util_dotdict import * # NOQA >>> self = DotDict({ >>> 'proc1.param1': 1, >>> 'proc1.param2': 2, >>> 'proc2.param1': 3, >>> 'proc3.param2': 4, >>> 'proc4.part1.param1': 5, >>> 'proc4.part2.param2': 6, >>> }) >>> new = self.prefix_subdict(['proc1', 'proc4.part1']) >>> print(f'new = {ub.urepr(new, nl=1, sort=1)}') new = { 'proc1.param1': 1, 'proc1.param2': 2, 'proc4.part1.param1': 5, } """ if backend == 'loop': result = { k: v for k, v in self.items() if any(k.startswith(pref + '.') or k == pref for pref in prefixes) } elif backend == 'trie': trie = self._prefix_trie result_keys = set() for pref in prefixes: try: result_keys.update(trie.values(pref)) except KeyError: pass # It's okay if a prefix has no matches result = {k: self[k] for k in result_keys} else: raise ValueError(f'Unknown backend={backend}') return self.__class__(result)
[docs] def add_prefix(self, prefix): """ Adds a prefix to all items """ new = self.__class__([(prefix + '.' + k, v) for k, v in self.items()]) return new
[docs] def insert_prefix(self, prefix, index): """ Adds a prefix to all items Args: prefix (str): prefix to insert index (int): the depth to insert the new param Example: >>> from geowatch.utils.util_dotdict import * # NOQA >>> self = DotDict({ >>> 'proc1.param1': 1, >>> 'proc1.param2': 2, >>> 'proc2.param1': 3, >>> 'proc4.part1.param2': 8, >>> 'proc4.part2.param2': 9, >>> 'proc4.part2.param2': 10, >>> }) >>> new = self.insert_prefix('foo', index=1) >>> print('self = {}'.format(ub.urepr(self, nl=1))) >>> print('new = {}'.format(ub.urepr(new, nl=1))) """ def _generate_new_items(): sep = '.' for k, v in self.items(): path = k.split(sep) path.insert(index, prefix) k2 = sep.join(path) yield k2, v new = self.__class__(_generate_new_items()) return new
[docs] def query_keys(self, col): """ Finds columns where one level has this key Example: >>> from geowatch.utils.util_dotdict import * # NOQA >>> self = DotDict({ >>> 'proc1.param1': 1, >>> 'proc1.param2': 2, >>> 'proc2.param1': 3, >>> 'proc4.part1.param2': 8, >>> 'proc4.part2.param2': 9, >>> 'proc4.part2.param2': 10, >>> }) >>> list(self.query_keys('param1')) Ignore: could use _trie_iteritems trie = self._prefix_trie """ for key in self.keys(): if col in set(key.split('.')): yield key
[docs] def print_graph(self): explore_nested_dict(self)
# def __contains__(self, key): # if super().__contains__(key): # return True # else: # subkeys = [] # subkeys.extend(self._prefix_trie.values(key)) # return bool(subkeys) # def get(self, key, default=ub.NoParam): # if default is ub.NoParam: # return self[key] # else: # try: # return self[key] # except KeyError: # return default # def __getitem__(self, key): # try: # return super().__getitem__(key) # except KeyError: # subkeys = [] # subkeys.extend(self._prefix_trie.values(key)) # return self.__class__([(k, self[k]) for k in subkeys])
[docs] def dotdict_to_nested(d): return DotDict.to_nested(d)
[docs] def dotkeys_to_nested(keys): """ Args: keys (List[str]): a list of dotted key names """ # this is abusing duck typing return DotDict.to_nested_keys(keys)
[docs] def indexable_to_graph(data): import networkx as nx graph = nx.DiGraph() walker = ub.IndexableWalker(data) for path, value in walker: spath = list(map(str, path)) key = '.'.join(spath) graph.add_node(key) label = spath[-1] if not isinstance(value, walker.indexable_cls): label = f'{label} : {type(value).__name__} = {value}' graph.nodes[key].update({ 'path': path, 'value': value, 'label': label, }) if len(path) > 1: parent_key = '.'.join(spath[:-1]) graph.add_edge(parent_key, key) return graph
[docs] def explore_nested_dict(data): """ TODO: some sort of textual interface """ graph = indexable_to_graph(data) from cmd_queue.util.util_networkx import write_network_text import rich write_network_text(graph, path=rich.print, end='')