Source code for geowatch.utils.util_dotdict

"""
Utilities for dictionaries where dots in keys represent nestings
"""
import ubelt as ub
import pygtrie



[docs]
class DotDict(ub.UDict):
    """
    I'm sure this data structure exists on pypi.
    This should be replaced with that if we find it.

    SeeAlso:
        DotDictDataFrame

    Example:
        >>> from geowatch.utils.util_dotdict import *  # NOQA
        >>> self = DotDict({
        >>>     'proc1.param1': 1,
        >>>     'proc1.param2': 2,
        >>>     'proc2.param1': 3,
        >>>     'proc2.param2': 4,
        >>>     'proc3.param1': 5,
        >>>     'proc3.param2': 6,
        >>>     'proc4.part1.param1': 7,
        >>>     'proc4.part1.param2': 8,
        >>>     'proc4.part2.param2': 9,
        >>>     'proc4.part2.param2': 10,
        >>> })
        >>> self.get('proc1')
        >>> self.prefix_get('proc4')
        >>> 'proc1' in self

        >>> nested = self.to_nested()
        >>> recon = DotDict.from_nested(nested)
        >>> assert nested != self
        >>> assert recon == self
    """

    def __init__(self, /, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # Tries work well with prefix stuff, but they may be too complex for
        # what we really need to do here.
        self._trie_cache = {}


[docs]
    @classmethod
    def from_nested(cls, data):
        """
        Args:
            data (Dict):
                nested data
        """
        flat = cls()
        walker = ub.IndexableWalker(data, list_cls=tuple())
        for path, value in walker:
            if not isinstance(value, dict):
                spath = list(map(str, path))
                key = '.'.join(spath)
                flat[key] = value
        return flat



[docs]
    def to_nested(self):
        """
        Converts this flat DotDict into a nested representation.  I.e. keys are
        broken using the "." separtor, with each separator becoming a new
        nesting level.

        Example:
            >>> from geowatch.utils.util_dotdict import *  # NOQA
            >>> self = DotDict(**{
            >>>     'foo.bar.baz': 1,
            >>>     'foo.bar.biz': 1,
            >>>     'foo.spam': 1,
            >>>     'eggs.spam': 1,
            >>> })
            >>> nested = self.to_nested()
            >>> print(f'nested = {ub.urepr(nested, nl=2)}')
            nested = {
                'foo': {
                    'bar': {'baz': 1, 'biz': 1},
                    'spam': 1,
                },
                'eggs': {
                    'spam': 1,
                },
            }
        """
        auto = ub.AutoDict()
        walker = ub.IndexableWalker(auto)
        d = self
        for k, v in d.items():
            path = k.split('.')
            walker[path] = v
        return auto.to_dict()



[docs]
    def to_nested_keys(self):
        """
        Converts this flat DotDict into a nested key representation.
        The difference between this and to_nested is that the leafs are
        sets of keys whereas the leafs in DotDict are dicts

        Example:
            >>> from geowatch.utils.util_dotdict import *  # NOQA
            >>> self = DotDict(**{
            >>>     'foo.bar.baz': 1,
            >>>     'foo.bar.biz': 1,
            >>>     'foo.spam': 1,
            >>>     'eggs.spam': 1,
            >>> })
            >>> nested = self.to_nested_keys()
            >>> print(f'nested = {ub.urepr(nested, nl=2)}')
            nested = {
                'foo': {
                    'bar': {'baz': 'foo.bar.baz', 'biz': 'foo.bar.biz'},
                    'spam': 'foo.spam',
                },
                'eggs': {
                    'spam': 'eggs.spam',
                },
            }
        """
        auto = ub.AutoDict()
        walker = ub.IndexableWalker(auto)
        for k in self:
            path = k.split('.')
            walker[path] = k
        # print(ub.urepr(auto))
        return auto.to_dict()


    @property
    def _prefix_trie(self):
        if self._trie_cache.get('prefix_trie', None) is None:
            _trie_data = ub.dzip(self.keys(), self.keys())
            _trie = pygtrie.StringTrie(_trie_data, separator='.')
            self._trie_cache['prefix_trie'] = _trie
        return self._trie_cache['prefix_trie']

    @property
    def _suffix_trie(self):
        if 'suffix_trie' not in self._trie_cache:
            reversed_keys = {
                '.'.join(reversed(k.split('.'))): k
                for k in self.keys()
            }
            _trie = pygtrie.StringTrie(reversed_keys, separator='.')
            self._trie_cache['suffix_trie'] = _trie
        return self._trie_cache['suffix_trie']


[docs]
    def suffix_get(self, suffix, default=ub.NoParam, backend='trie'):
        """
        Retrieve all key-value pairs whose keys end with a given dot-suffix.

        Args:
            suffix (str): dot-separated suffix string
            default: fallback if no matches found
            backend (str): 'trie' or 'loop'

        Returns:
            DotDict

        Example:
            >>> from geowatch.utils.util_dotdict import *  # NOQA
            >>> self = DotDict({
            >>>     'a.b.c': 1,
            >>>     'x.b.c': 2,
            >>>     'z.y': 3,
            >>> })
            >>> self.suffix_get('b.c')
            {'a.b.c': 1, 'x.b.c': 2}
        """
        if backend == 'loop':
            matches = DotDict({
                k: v for k, v in self.items()
                if k.endswith('.' + suffix) or k == suffix
            })
        elif backend == 'trie':
            rev_suffix = '.'.join(reversed(suffix.split('.')))
            try:
                matches = DotDict({
                    k: self[k] for k in self._suffix_trie.values(rev_suffix)
                })
            except KeyError:
                if default is not ub.NoParam:
                    return default
                raise
        else:
            raise ValueError(f'Unknown backend={backend}')

        if not matches and default is not ub.NoParam:
            return default
        return matches



[docs]
    def prefix_get(self, key, default=ub.NoParam):
        """
        Example:
            >>> from geowatch.utils.util_dotdict import *  # NOQA
            >>> self = DotDict(**{
            >>>     'foo.bar.baz': 1,
            >>>     'foo.bar.biz': 1,
            >>>     'foo.spam': 1,
            >>>     'eggs.spam': 1,
            >>> })
            >>> self.prefix_get('foo')
            {'bar.baz': 1, 'bar.biz': 1, 'spam': 1}
        """
        try:
            suffix_dict = DotDict()
            full_keys = self._prefix_trie.values(key)
        except KeyError:
            if default is not ub.NoParam:
                return default
            else:
                raise
        else:
            for full_key in full_keys:
                sub_key = full_key[len(key) + 1:]
                suffix_dict[sub_key] = self[full_key]
            return suffix_dict



[docs]
    def suffix_subdict(self, suffixes, backend='trie'):
        """
        Filter DotDict to only contain keys ending with any given suffixes.

        Args:
            suffixes (List[str]): list of dot-suffixes
            backend (str): 'trie' or 'loop'

        Returns:
            DotDict

        References:
            https://chatgpt.com/c/6841a161-2cd4-8002-ad9b-5593f5a2d70c

        Example:
            >>> from geowatch.utils.util_dotdict import *  # NOQA
            >>> self = DotDict({
            >>>     'proc1.param1': 1,
            >>>     'proc2.param1': 2,
            >>>     'proc3.param2': 3,
            >>>     'proc4.part1.param1': 4,
            >>>     'proc4.part2.param2': 5,
            >>> })
            >>> new = self.suffix_subdict(['param1', 'part2.param2'])
            >>> print(f'new = {ub.urepr(new, nl=1, sort=1)}')
            new = {
                'proc1.param1': 1,
                'proc2.param1': 2,
                'proc4.part1.param1': 4,
                'proc4.part2.param2': 5,
            }
        """
        if backend == 'loop':
            result = {
                k: v for k, v in self.items()
                if any(k.endswith('.' + suf) or k == suf for suf in suffixes)
            }
        elif backend == 'trie':
            reversed_trie = self._suffix_trie
            result_keys = set()
            for suf in suffixes:
                rev_suf = '.'.join(reversed(suf.split('.')))
                result_keys.update(reversed_trie.values(rev_suf))
            result = {k: self[k] for k in result_keys}
        else:
            raise ValueError(f'Unknown backend={backend}')
        return self.__class__(result)



[docs]
    def prefix_subdict(self, prefixes, backend='trie'):
        """
        Filter DotDict to only contain keys starting with any given prefixes.

        Args:
            prefixes (List[str]): list of dot-prefixes
            backend (str): 'trie' or 'loop'

        Returns:
            DotDict

        Example:
            >>> from geowatch.utils.util_dotdict import *  # NOQA
            >>> self = DotDict({
            >>>     'proc1.param1': 1,
            >>>     'proc1.param2': 2,
            >>>     'proc2.param1': 3,
            >>>     'proc3.param2': 4,
            >>>     'proc4.part1.param1': 5,
            >>>     'proc4.part2.param2': 6,
            >>> })
            >>> new = self.prefix_subdict(['proc1', 'proc4.part1'])
            >>> print(f'new = {ub.urepr(new, nl=1, sort=1)}')
            new = {
                'proc1.param1': 1,
                'proc1.param2': 2,
                'proc4.part1.param1': 5,
            }
        """
        if backend == 'loop':
            result = {
                k: v for k, v in self.items()
                if any(k.startswith(pref + '.') or k == pref for pref in prefixes)
            }
        elif backend == 'trie':
            trie = self._prefix_trie
            result_keys = set()
            for pref in prefixes:
                try:
                    result_keys.update(trie.values(pref))
                except KeyError:
                    pass  # It's okay if a prefix has no matches
            result = {k: self[k] for k in result_keys}
        else:
            raise ValueError(f'Unknown backend={backend}')
        return self.__class__(result)



[docs]
    def add_prefix(self, prefix):
        """
        Adds a prefix to all items
        """
        new = self.__class__([(prefix + '.' + k, v) for k, v in self.items()])
        return new



[docs]
    def insert_prefix(self, prefix, index):
        """
        Adds a prefix to all items

        Args:
            prefix (str): prefix to insert
            index (int): the depth to insert the new param

        Example:
            >>> from geowatch.utils.util_dotdict import *  # NOQA
            >>> self = DotDict({
            >>>     'proc1.param1': 1,
            >>>     'proc1.param2': 2,
            >>>     'proc2.param1': 3,
            >>>     'proc4.part1.param2': 8,
            >>>     'proc4.part2.param2': 9,
            >>>     'proc4.part2.param2': 10,
            >>> })
            >>> new = self.insert_prefix('foo', index=1)
            >>> print('self = {}'.format(ub.urepr(self, nl=1)))
            >>> print('new = {}'.format(ub.urepr(new, nl=1)))
        """
        def _generate_new_items():
            sep = '.'
            for k, v in self.items():
                path = k.split(sep)
                path.insert(index, prefix)
                k2 = sep.join(path)
                yield k2, v
        new = self.__class__(_generate_new_items())
        return new



[docs]
    def query_keys(self, col):
        """
        Finds columns where one level has this key

        Example:
            >>> from geowatch.utils.util_dotdict import *  # NOQA
            >>> self = DotDict({
            >>>     'proc1.param1': 1,
            >>>     'proc1.param2': 2,
            >>>     'proc2.param1': 3,
            >>>     'proc4.part1.param2': 8,
            >>>     'proc4.part2.param2': 9,
            >>>     'proc4.part2.param2': 10,
            >>> })
            >>> list(self.query_keys('param1'))

        Ignore:
            could use _trie_iteritems
            trie = self._prefix_trie
        """
        for key in self.keys():
            if col in set(key.split('.')):
                yield key



[docs]
    def print_graph(self):
        explore_nested_dict(self)



    # def __contains__(self, key):
    #     if super().__contains__(key):
    #         return True
    #     else:
    #         subkeys = []
    #         subkeys.extend(self._prefix_trie.values(key))
    #         return bool(subkeys)

    # def get(self, key, default=ub.NoParam):
    #     if default is ub.NoParam:
    #         return self[key]
    #     else:
    #         try:
    #             return self[key]
    #         except KeyError:
    #             return default

    # def __getitem__(self, key):
    #     try:
    #         return super().__getitem__(key)
    #     except KeyError:
    #         subkeys = []
    #         subkeys.extend(self._prefix_trie.values(key))
    #         return self.__class__([(k, self[k]) for k in subkeys])



[docs]
def dotdict_to_nested(d):
    return DotDict.to_nested(d)




[docs]
def dotkeys_to_nested(keys):
    """
    Args:
        keys (List[str]): a list of dotted key names
    """
    # this is abusing duck typing
    return DotDict.to_nested_keys(keys)




[docs]
def indexable_to_graph(data):
    import networkx as nx
    graph = nx.DiGraph()
    walker = ub.IndexableWalker(data)
    for path, value in walker:
        spath = list(map(str, path))
        key = '.'.join(spath)
        graph.add_node(key)
        label = spath[-1]
        if not isinstance(value, walker.indexable_cls):
            label = f'{label} : {type(value).__name__} = {value}'

        graph.nodes[key].update({
            'path': path,
            'value': value,
            'label': label,
        })
        if len(path) > 1:
            parent_key = '.'.join(spath[:-1])
            graph.add_edge(parent_key, key)
    return graph




[docs]
def explore_nested_dict(data):
    """
    TODO: some sort of textual interface
    """
    graph = indexable_to_graph(data)

    from cmd_queue.util.util_networkx import write_network_text
    import rich
    write_network_text(graph, path=rich.print, end='')