"""
Utilities for saving the data that gave rise to particular hash values.
"""
import ubelt as ub
import shelve
import os
try:
from line_profiler import profile
except Exception:
profile = ub.identity
[docs]
class ReverseHashTable:
"""
Make a lookup table of hashes we've made, so we can refer to what the heck
those directory names mean!
/home/joncrall/data/dvc-repos/smart_expt_dvc/models/fusion/Aligned-Drop4-2022-08-08-TA1-S2-L8-ACC/pred/Drop4_BAS_Continue_10GSD_BGR_V003/Drop4_BAS_Continue_10GSD_BGR_V003_epoch=93-step=48128.pt.pt/Aligned-Drop4-2022-08-08-TA1-S2-L8-ACC_data_vali.kwcoco/predcfg_1c530993/pred.kwcoco.json
Example:
>>> from geowatch.utils.reverse_hashid import * # NOQA
>>> data = {'test': 'data'}
>>> key = ub.hash_data(data)[0:8]
>>> self = ReverseHashTable(type='test-rhash')
>>> self.register(key, data)
>>> self.register('conflict-hash', 'conflict-data1')
>>> self.register('conflict-hash', 'conflict-data2')
>>> full_shelf = self.load()
>>> print('full_shelf = {}'.format(ub.urepr(full_shelf, nl=2)))
"""
def __init__(self, type='global'):
from kwutil.util_locks import Superlock
self.rlut_dpath = ub.Path.appdir('geowatch/hash_rlut', type).ensuredir()
self.shelf_fpath = self.rlut_dpath / 'hash_rlut.shelf'
self.text_fpath = self.rlut_dpath / 'hash_rlut.txt'
self.file_dpath = (self.rlut_dpath / 'hash_rlut').ensuredir()
self.lock_fpath = self.rlut_dpath / 'flock.lock'
self.lock = Superlock(thread_key='hash_rlut', lock_fpath=self.lock_fpath)
[docs]
def load(self):
with self.lock:
shelf = shelve.open(os.fspath(self.shelf_fpath))
full_shelf = dict(shelf)
return full_shelf
[docs]
def register(self, key, data):
"""
Args:
key (str): the hash
data (Any): the hashed data (must be serializable)
"""
FULL_TEXT = 1
DPATH_TEXT = 1
blake3 = ub.hash_data(data, hasher='blake3')
row = {'data': data, 'blake3': blake3}
info = {}
with self.lock:
shelf = shelve.open(os.fspath(self.shelf_fpath))
with shelf:
# full_shelf = dict(shelf)
if key not in shelf:
datas = shelf[key] = [row]
info['status'] = 'new'
else:
datas = shelf[key]
found = False
for other in datas:
if other['blake3'] == row['blake3']:
found = True
break
if not found:
info['status'] = 'conflict'
datas.append(row)
shelf[key] = datas
else:
info['status'] = 'exists'
if FULL_TEXT:
full_shelf = dict(shelf)
else:
full_shelf = None
if info['status'] != 'exists':
# Convinience
if FULL_TEXT:
full_text = ub.urepr(full_shelf, nl=3)
self.text_fpath.write_text(full_text)
if DPATH_TEXT:
fpath = self.file_dpath / key
datas_text = ub.urepr(datas, nl=3)
fpath.write_text(datas_text)
return info
[docs]
@classmethod
def query(cls, key=None, verbose=1):
"""
If the type of the hash is unknown, we can search in a few different
locations for it.
"""
rlut_root = ub.Path.appdir('geowatch/hash_rlut')
dpaths = [path for path in rlut_root.iterdir() if path.is_dir()]
candidates = []
for dpath in ub.ProgIter(dpaths, desc='rlut is searching', verbose=verbose):
type = dpath.name
rlut_type = cls(type)
full_shelf = rlut_type.load()
# print('full_shelf = {}'.format(ub.urepr(full_shelf, nl=1, sort=1)))
if key is None:
for k, v in full_shelf.items():
candidates.append({'found': v, 'type': type, 'key': k})
elif key in full_shelf:
candidates.append({'found': full_shelf[key], 'type': type, 'key': key})
if verbose:
print(f'Found {len(candidates)} entries for key={key}')
print('candidates = {}'.format(ub.urepr(candidates, nl=5)))
return candidates
[docs]
@profile
def condense_config(params, type, human_opts=None, register=True):
"""
Given a dictionary of parameters and a type, makes a hash of the params
prefixes it with a type and ensures it is registered in the global system
reverse hash lookup table. Some config parts can be given human readable
descriptions.
"""
from geowatch.utils.reverse_hashid import ReverseHashTable
if human_opts is None:
human_opts = {}
params = ub.udict(params)
human_opts = params & human_opts
other_opts = params - human_opts
if len(human_opts):
human_part = ub.urepr(human_opts, compact=1) + '_'
else:
human_part = ''
cfgstr_suffix = human_part + ub.hash_data(other_opts)[0:8]
cfgstr = f'{type}_{cfgstr_suffix}'
if register:
rhash = ReverseHashTable(type=type)
rhash.register(cfgstr, params)
return cfgstr