#!/usr/bin/env python3
"""
See Old Version:
../../../scripts/run_sc_fusion_eval3_for_baseline.py
SeeAlso:
~/code/watch-smartflow-dags/KIT_TA2_PREEVAL10_PYENV.py
"""
import sys
import traceback
import shutil
import scriptconfig as scfg
import ubelt as ub
from geowatch.mlops.smart_pipeline import DinoBoxDetector, SV_DinoFilter
[docs]
class DinoSVConfig(scfg.DataConfig):
"""
Run TA-2 SC fusion as baseline framework component
"""
input_path = scfg.Value(None, type=str, position=1, required=True, help=ub.paragraph(
'''
Path to the STAC items this step can use as inputs.
This is usually an S3 Path.
'''), alias=['input_stac_path'])
input_region_path = scfg.Value(None, type=str, position=2, required=True, help=ub.paragraph(
'''
Path to input T&E Baseline Framework Region definition JSON
'''))
output_path = scfg.Value(None, type=str, position=3, required=True, help=ub.paragraph(
'''
Path to the STAC items that register the outputs of this stage.
This is usually an S3 Path.
'''), alias=['output_stac_path'])
aws_profile = scfg.Value(None, help=ub.paragraph(
'''
AWS Profile to use for AWS S3 CLI commands
'''))
dryrun = scfg.Value(False, isflag=True, short_alias=['d'], help='Run AWS CLI commands with --dryrun flag')
outbucket = scfg.Value(None, type=str, required=True, short_alias=['o'], help=ub.paragraph(
'''
S3 Output directory for STAC item / asset egress
'''))
dino_detect_config = scfg.Value(None, type=str, help=ub.paragraph(
'''
Raw json/yaml or a path to a json/yaml file that specifies the
config for DinoBoxDetector.
'''))
dino_filter_config = scfg.Value(None, type=str, help=ub.paragraph(
'''
Raw json/yaml or a path to a json/yaml file that specifies the
config for SV_DinoFilter.
'''))
skip_on_fail = scfg.Value(False, help=ub.paragraph(
'''
If an error occurs, pass through input region / sites unchanged.
'''))
input_region_models_asset_name = scfg.Value('depth_filtered_regions', type=str, required=False, help=ub.paragraph(
'''
Which region model assets to use as input
'''))
input_site_models_asset_name = scfg.Value('depth_filtered_sites', type=str, required=False, help=ub.paragraph(
'''
Which site model assets to to use as input
'''))
[docs]
def main():
config = DinoSVConfig.cli(strict=True)
print('config = {}'.format(ub.urepr(dict(config), nl=1, align=':')))
run_dino_sv(config)
[docs]
def run_dino_sv(config):
from geowatch.cli.smartflow_ingress import smartflow_ingress
from geowatch.cli.smartflow_egress import smartflow_egress
from geowatch.utils.util_framework import download_region, determine_region_id
from kwutil.util_yaml import Yaml
from geowatch.utils import util_framework
input_path = config.input_path
input_region_path = config.input_region_path
output_path = config.output_path
outbucket = config.outbucket
aws_profile = config.aws_profile
dryrun = config.dryrun
####
# DEBUGGING:
# Print info about what version of the code we are running on
from geowatch.utils.util_framework import NodeStateDebugger
node_state = NodeStateDebugger()
node_state.print_environment()
node_state.print_local_invocation(config)
# 1. Ingress data
print("* Running baseline framework kwcoco ingress *")
ingress_dir = ub.Path('/tmp/ingress')
ingressed_assets = smartflow_ingress(
input_path=input_path,
assets=[
'cropped_kwcoco_for_sv',
'cropped_kwcoco_for_sv_assets',
config.input_region_models_asset_name,
config.input_site_models_asset_name,
],
outdir=ingress_dir,
aws_profile=aws_profile,
dryrun=dryrun
)
# # 2. Download and prune region file
print("* Downloading and pruning region file *")
local_region_path = '/tmp/region.json'
download_region(
input_region_path=input_region_path,
output_region_path=local_region_path,
aws_profile=aws_profile,
strip_nonregions=True,
)
# Determine the region_id in the region file.
region_id = determine_region_id(local_region_path)
print(f'region_id={region_id}')
dino_boxes_kwcoco_path = ingress_dir / 'dino_boxes_kwcoco.json'
input_region_dpath = ub.Path(ingressed_assets[config.input_region_models_asset_name])
input_sites_dpath = ub.Path(ingressed_assets[config.input_site_models_asset_name])
input_region_fpath = ub.Path(input_region_dpath) / f'{region_id}.geojson'
assert input_region_fpath.exists()
# NOTE; we want to be using the output of SV crop, not necesarilly the the
# dzyne output referenced by ingress_kwcoco_path
# input_kwcoco_fpath = ingress_kwcoco_path
input_kwcoco_fpath = ingressed_assets['cropped_kwcoco_for_sv']
# FIXME: these output directories for region / site models should be passed
# to us from the DAG
output_sites_dpath = ingress_dir / 'sv_out_site_models'
output_region_dpath = ingress_dir / 'sv_out_region_models'
output_site_manifest_dpath = ingress_dir / 'tracking_manifests_sv'
output_region_fpath = output_region_dpath / f'{region_id}.geojson'
output_site_manifest_fpath = output_site_manifest_dpath / 'site_models_manifest.json'
node_state.print_current_state(ingress_dir)
# 3.1. Check that we have at least one "video" (BAS identified
# site) to run over; if not skip SV fusion and KWCOCO to GeoJSON
import kwcoco
input_coco_dset = kwcoco.CocoDataset(input_kwcoco_fpath)
print('input_coco_dset = {}'.format(ub.urepr(input_coco_dset, nl=1)))
num_videos = input_coco_dset.n_videos
# Note: cant open with json here because kwcoco will save compressed files
# with open(input_kwcoco_fpath) as f:
# ingress_kwcoco_data = json.load(f)
# num_videos = len(ingress_kwcoco_data.get('videos', ()))
print(f'num_videos={num_videos}')
output_region_dpath.ensuredir()
if num_videos == 0:
# Copy input region model into region_models outdir to be updated
# (rather than generated from tracking, which may not have the
# same bounds as the original)
# Not sure if the above case is the right comment, but leaving this
# here to guarentee the region with site summaries is passed forward
# TODO: the dino code should just be robust to this.
input_sites_dpath.copy(output_sites_dpath)
input_region_fpath.copy(output_region_fpath)
else:
output_site_manifest_dpath.ensuredir()
output_sites_dpath.ensuredir()
# 3.2 Run DinoBoxDetector
print("* Running Dino Detect *")
default_dino_detect_config = ub.udict({
'coco_fpath': input_kwcoco_fpath,
'package_fpath': None,
'batch_size': 1,
'device': 0})
dino_detect_config = (default_dino_detect_config
| Yaml.coerce(config.dino_detect_config or {}))
if dino_detect_config.get('package_fpath', None) is None:
raise ValueError('Requires package_fpath')
dino_box_detector = DinoBoxDetector(root_dpath='/tmp/ingress')
dino_box_detector.configure({
'out_coco_fpath': dino_boxes_kwcoco_path,
**dino_detect_config})
try:
ub.cmd(dino_box_detector.command(), check=True, verbose=3, system=True)
# 3.3 Run SV_DinoFilter
print("* Running Dino Building Filter *")
default_dino_filter_config = ub.udict({})
dino_filter_config = (default_dino_filter_config
| Yaml.coerce(config.dino_filter_config or {}))
dino_building_filter = SV_DinoFilter(root_dpath='/tmp/ingress')
dino_building_filter.configure({
'input_kwcoco': dino_boxes_kwcoco_path,
'input_region': input_region_fpath,
'input_sites': input_sites_dpath,
'output_region_fpath': output_region_fpath,
'output_sites_dpath': output_sites_dpath,
'output_site_manifest_fpath': output_site_manifest_fpath,
**dino_filter_config,
})
ub.cmd(dino_building_filter.command(), check=True, verbose=3, system=True)
except Exception:
if config.skip_on_fail:
print("WARNING: Exception occurred (printed below), passing input sites / region models as output")
traceback.print_exception(*sys.exc_info())
shutil.copytree(input_sites_dpath, output_sites_dpath, dirs_exist_ok=True)
shutil.copytree(input_region_dpath, output_region_dpath, dirs_exist_ok=True)
else:
raise
else:
# Validate and fix all outputs
util_framework.fixup_and_validate_site_and_region_models(
region_dpath=output_region_fpath.parent,
site_dpath=output_sites_dpath,
)
node_state.print_current_state(ingress_dir)
# 5. Egress (envelop KWCOCO dataset in a STAC item and egress;
# will need to recursive copy the kwcoco output directory up to
# S3 bucket)
print("* Egressing KWCOCO dataset and associated STAC item *")
ingressed_assets['sv_out_site_models'] = output_sites_dpath
ingressed_assets['sv_out_region_models'] = output_region_dpath
if dino_boxes_kwcoco_path.exists():
# Reroot kwcoco files to make downloaded results easier to work with
ub.cmd(['kwcoco', 'reroot', f'--src={dino_boxes_kwcoco_path}', '--inplace=1', '--absolute=0'])
ingressed_assets['sv_dino_boxes_kwcoco'] = dino_boxes_kwcoco_path
smartflow_egress(ingressed_assets,
local_region_path,
output_path,
outbucket,
aws_profile=None,
dryrun=False,
newline=False)
if __name__ == "__main__":
main()