Source code for geowatch.cli.experimental.fixup_predict_kwcoco_metadata
#!/usr/bin/env python3
import scriptconfig as scfg
import ubelt as ub
[docs]
class FixupPredictKwcocoMetadata(scfg.DataConfig):
"""
Update pre-0.15.1 kwcoco predictions to properly store train-time params.
The kwcoco info section of kwcoco files produced by geowatch.fusion.predict
only contained the "data" section of the fit configuration. This script is
able to fix one or more of those old predicted files as long as the path to
the model is available. Warnings that direct users to this help document
will typically give example usage that fixes one file, but multiple files
can be fixed at once by specifying a glob pattern. The following example
illustrates this.
CommandLine:
# Say you have an old mlops directory of results
DVC_EXPT_DPATH=$(geowatch_dvc --tags='phase3_expt' --hardware=auto)
MLOPS_DPATH=$DVC_EXPT_DPATH/_preeval20_bas_grid
# Construct a glob pattern that matches the kwcoco files that need to
# be fixed, and pass it to this script.
python -m geowatch.cli.experimental.fixup_predict_kwcoco_metadata \\
--coco_fpaths "$MLOPS_DPATH/pred/flat/bas_pxl/*/pred.kwcoco.zip"
Note:
This modifies input files INPLACE!
"""
coco_fpaths = scfg.Value(None, help='Path to one or more predicted kwcoco files to fix')
workers = scfg.Value(8)
[docs]
@classmethod
def main(cls, cmdline=1, **kwargs):
"""
Example:
>>> # xdoctest: +SKIP
>>> from fixup_bad_fit_config import * # NOQA
>>> cmdline = 0
>>> kwargs = dict()
>>> cls = FixupBadFitConfigCLI
>>> cls.main(cmdline=cmdline, **kwargs)
"""
import rich
config = cls.cli(cmdline=cmdline, data=kwargs, strict=True)
rich.print('config = ' + ub.urepr(config, nl=1))
from kwutil import util_path
from kwutil import util_progress
kwcoco_fpaths = util_path.coerce_patterned_paths(config.coco_fpaths)
pman = util_progress.ProgressManager()
jobs = ub.JobPool(mode='process', max_workers=config.workers)
with pman, jobs:
for node_dpath in pman.progiter(kwcoco_fpaths, desc='submit kwcoco fixup jobs'):
jobs.submit(fixup_pxl_pred_node_dpath, node_dpath)
stats = ub.ddict(int)
for job in pman.progiter(jobs.as_completed(), desc='collect fixup jobs'):
status = job.result()
stats[status['message']] += 1
pman.update_info(f'stats = {ub.urepr(stats, nl=1)}')
[docs]
def fixup_pxl_pred_node_dpath(coco_fpath):
import kwcoco
from kwutil.util_yaml import Yaml
import zipfile
status = {}
messages = []
dset = kwcoco.CocoDataset(coco_fpath)
candidates = []
for item in dset.dataset['info']:
if item['type'] == 'process':
if item['properties']['name'] == 'geowatch.tasks.fusion.predict':
candidates.append(item)
assert len(candidates) == 1
pred_item = candidates[0]
extra = pred_item['properties']['extra']
if 'fit_config' in extra:
if set(extra['fit_config']).issuperset({'data', 'trainer'}):
messages.append('Already had updated fit config')
else:
status['message'] = 'Updated metadata'
package_fpath = pred_item['properties']['config']['package_fpath']
zfile = zipfile.ZipFile(package_fpath)
fit_config = None
with zfile:
for name in zfile.namelist():
if name.endswith('package_header/config.yaml'):
config_text = zfile.read(name).decode('utf8')
fit_config = Yaml.loads(config_text, backend='pyyaml')
break
assert fit_config is not None
extra['fit_config'] = fit_config
dset.dump()
messages.append('Updated metadata')
status['message'] = '\n'.join(messages)
return status
__cli__ = FixupPredictKwcocoMetadata
main = __cli__.main
if __name__ == '__main__':
"""
CommandLine:
python ~/code/geowatch/dev/oneoffs/fixup_predict_kwcoco_metadata.py
"""
main()