#!/usr/bin/env python3
"""
SeeAlso:
~/code/watch/geowatch/cli/coco_time_combine.py
~/code/watch/geowatch/cli/queue_cli/prepare_teamfeats.py
"""
import scriptconfig as scfg
import ubelt as ub
from cmd_queue.cli_boilerplate import CMDQueueConfig
[docs]
class PrepareTimeAverages(CMDQueueConfig):
"""
Prepare a temporally averaged dataset on multiple regions
"""
regions = scfg.Value('all', type=str, help=ub.paragraph(
'''
The regions to time average (this is not a robust implementation)
'''))
resolution = '10GSD'
time_window = '1year'
merge_method = 'mean'
remove_seasons = scfg.Value([], nargs='+')
spatial_tile_size = None
mask_low_quality = scfg.Value(True)
combine_workers = scfg.Value(4, help='number of workers per combine job')
input_bundle_dpath = scfg.Value(None)
output_bundle_dpath = scfg.Value(None)
skip_existing = scfg.Value(True)
cache = scfg.Value(True)
reproject = scfg.Value(False, isflag=True, help='Enable reprojection of annotations. Requires true_site_dpath and true_region_dpath be specified')
true_site_dpath = scfg.Value(None)
true_region_dpath = scfg.Value(None)
max_images_per_group = scfg.Value(None, help=ub.paragraph(
'''
If specified, the averaging operation (e.g. mean/median) will only
consider a subset of the images within each temporal window. This can
greatly reduce the resources required to run this script at the cost of
quality. Currently a heuristic is used to select the "highest quality"
subset of images.
'''))
queue_name = scfg.Value('time-ave-queue', help='overwrite the default queue name', group='cmd-queue')
def _find_valid_regions():
import geowatch
dvc_data_dpath = geowatch.find_dvc_dpath(tags='phase2_data', hardware='auto')
coco_fpaths = list((dvc_data_dpath / 'Drop6').glob('imgonly*.kwcoco*'))
import kwcoco
dsets = list(kwcoco.CocoDataset.coerce_multiple(coco_fpaths, workers='avail'))
for dset in dsets:
if dset.n_images > 0:
print('- ' + ub.Path(dset.fpath).name.split('-')[1].split('.')[0])
[docs]
def main(cmdline=1, **kwargs):
"""
Example:
>>> # xdoctest: +SKIP
>>> cmdline = 0
>>> kwargs = dict(
>>> )
>>> main(cmdline=cmdline, **kwargs)
"""
config = PrepareTimeAverages.cli(cmdline=cmdline, data=kwargs, strict=True)
import rich
from kwutil.partial_format import subtemplate
rich.print('config = ' + ub.urepr(config, nl=1))
assert config.output_bundle_dpath is not None
assert config.input_bundle_dpath is not None
# import geowatch
# dvc_data_dpath = geowatch.find_dvc_dpath(tags='phase2_data', hardware='auto')
if config.regions == 'all':
all_regions = [p.name.split('.')[0] for p in (ub.Path(config.true_region_dpath)).ls()]
chosen_regions = all_regions
elif config.regions == 'all_tne':
all_regions = [p.name.split('.')[0] for p in (ub.Path(config.true_region_dpath)).ls()]
tne_regions = [r for r in all_regions if r.split('_')[1].startswith('R')]
chosen_regions = tne_regions
else:
from kwutil.util_yaml import Yaml
chosen_regions = Yaml.coerce(config.regions)
from geowatch.mlops.pipeline_nodes import ProcessNode
rich.print('chosen_regions = {}'.format(ub.urepr(chosen_regions, nl=1)))
# time_duration = '1year'
# time_duration = '3months'
# all_regions = [
# 'KR_R001',
# 'KR_R002',
# 'NZ_R001',
# 'CH_R001',
# 'BR_R001',
# 'BR_R002',
# 'BH_R001',
# 'AE_R001',
# ]
queue = config.create_queue()
# Need these for landcover
other_s2_bands = '|coastal|cirrus|B05|B06|B07|B8A|B09'
def submit_job_step(node, depends=None):
if config.skip_existing and node.outputs_exist:
job = None
else:
node.cache = config.cache
job = queue.submit(node.final_command(), depends=depends, name=node.name)
return job
for region in chosen_regions:
fmtdict = dict(
# DVC_DATA_DPATH=dvc_data_dpath,
INPUT_BUNDLE_DPATH=config.input_bundle_dpath,
OUTPUT_BUNDLE_DPATH=config.output_bundle_dpath,
REGION=region,
# SUFFIX='MeanYear',
# TIME_DURATION='3months',
# SUFFIX='Mean3Month10GSD',
TRUE_SITE_DPATH=config.true_site_dpath,
TRUE_REGION_DPATH=config.true_region_dpath,
CHANNELS='red|green|blue|nir|swir16|swir22|pan' + other_s2_bands,
remove_seasons_str=None if not config.remove_seasons else ','.join(config.remove_seasons),
mask_low_quality=config.mask_low_quality,
)
fmtdict.update(config)
input_bundle_dpath = ub.Path(config.input_bundle_dpath)
output_bundle_dpath = ub.Path(config.output_bundle_dpath)
INPUT_KWCOCO_FPATH = input_bundle_dpath / region / f'imgonly-{region}-rawbands.kwcoco.zip'
TAVE_KWCOCO_FPATH = output_bundle_dpath / region / f'_unfielded_imgonly-{region}-rawbands.kwcoco.zip'
FIELDED_KWCOCO_FPATH = output_bundle_dpath / region / f'imgonly-{region}-rawbands.kwcoco.zip'
FINAL_KWCOCO_FPATH = output_bundle_dpath / region / f'imganns-{region}-rawbands.kwcoco.zip'
fmtdict['INPUT_KWCOCO_FPATH'] = INPUT_KWCOCO_FPATH
fmtdict['TAVE_KWCOCO_FPATH'] = TAVE_KWCOCO_FPATH
fmtdict['FIELDED_KWCOCO_FPATH'] = FIELDED_KWCOCO_FPATH
fmtdict['FINAL_KWCOCO_FPATH'] = FINAL_KWCOCO_FPATH
code = subtemplate(ub.codeblock(
r'''
python -m geowatch.cli.coco_time_combine \
--kwcoco_fpath="$INPUT_KWCOCO_FPATH" \
--output_kwcoco_fpath="$TAVE_KWCOCO_FPATH" \
--channels="$CHANNELS" \
--resolution="$resolution" \
--time_window=$time_window \
--remove_seasons=$remove_seasons_str \
--merge_method=$merge_method \
--spatial_tile_size=$spatial_tile_size \
--mask_low_quality=$mask_low_quality \
--max_images_per_group=$max_images_per_group \
--start_time=2010-03-01 \
--assets_dname="raw_bands" \
--workers=$combine_workers
'''), fmtdict)
node = ProcessNode(
name=f'combine-time-{region}',
command=code,
in_paths={'kwcoco_fpath': subtemplate('$INPUT_KWCOCO_FPATH', fmtdict)},
out_paths={'output_kwcoco_fpath': subtemplate('$TAVE_KWCOCO_FPATH', fmtdict)},
_no_outarg=True,
_no_inarg=True,
)
combine_job = submit_job_step(node)
code = subtemplate(ub.codeblock(
r'''
python -m geowatch add_fields \
--src $TAVE_KWCOCO_FPATH \
--dst $FIELDED_KWCOCO_FPATH
'''), fmtdict)
node = ProcessNode(
name=f'add-fields-{region}',
command=code,
in_paths={'src': subtemplate('$TAVE_KWCOCO_FPATH', fmtdict)},
out_paths={'dst': subtemplate('$FIELDED_KWCOCO_FPATH', fmtdict)},
_no_outarg=True,
_no_inarg=True,
)
field_job = submit_job_step(node, depends=[combine_job])
if config.reproject:
code = subtemplate(ub.codeblock(
r'''
python -m geowatch reproject \
--src $FIELDED_KWCOCO_FPATH \
--dst $FINAL_KWCOCO_FPATH \
--status_to_catname="positive_excluded: positive" \
--regions="$TRUE_REGION_DPATH/${REGION}.geojson" \
--sites="$TRUE_SITE_DPATH/${REGION}_*.geojson"
'''), fmtdict)
node = ProcessNode(
command=code,
name=f'reproject-ann-{region}',
in_paths={'src': subtemplate('$FIELDED_KWCOCO_FPATH', fmtdict)},
out_paths={'dst': subtemplate('$FINAL_KWCOCO_FPATH', fmtdict)},
_no_outarg=True,
_no_inarg=True,
)
field_job = submit_job_step(node, depends=[field_job])
config.run_queue(queue)
SUMMER_CONFIG = """
DVC_DATA_DPATH=$(geowatch_dvc --tags='phase2_data' --hardware=hdd)
SSD_DVC_DATA_DPATH=$(geowatch_dvc --tags='phase2_data' --hardware=ssd)
python -m geowatch.cli.queue_cli.prepare_time_combined_dataset \
--regions="[
# T&E Regions
AE_R001, BH_R001, BR_R001, BR_R002, BR_R004, BR_R005, CH_R001,
KR_R001,
KR_R002, LT_R001, NZ_R001, US_R001, US_R004, US_R005,
US_R006, US_R007,
# iMerit Regions
AE_C001,
AE_C002,
AE_C003, PE_C001, QA_C001, SA_C005, US_C000, US_C010,
US_C011, US_C012,
]" \
--input_bundle_dpath=$SSD_DVC_DATA_DPATH/Drop6 \
--output_bundle_dpath=$DVC_DATA_DPATH/Drop6-NoWinterMedian10GSD \
--true_site_dpath=$DVC_DATA_DPATH/annotations/drop6_hard_v1/site_models \
--true_region_dpath=$DVC_DATA_DPATH/annotations/drop6_hard_v1/region_models \
--spatial_tile_size=256 \
--merge_method=median \
--remove_seasons=winter \
--tmux_workers=2 \
--time_window=1y \
--combine_workers=4 \
--resolution=10GSD \
--backend=tmux \
--run=1
# Drop 6
export CUDA_VISIBLE_DEVICES="0,1"
DVC_DATA_DPATH=$(geowatch_dvc --tags='phase2_data' --hardware=auto)
DVC_EXPT_DPATH=$(geowatch_dvc --tags='phase2_expt' --hardware='auto')
BUNDLE_DPATH=$DVC_DATA_DPATH/Drop6-NoWinterMedian10GSD
python -m geowatch.cli.queue_cli.prepare_teamfeats \
--base_fpath "$BUNDLE_DPATH"/imganns-*[0-9].kwcoco.zip \
--expt_dvc_dpath="$DVC_EXPT_DPATH" \
--with_landcover=1 \
--with_invariants2=1 \
--with_materials=0 \
--with_depth=0 \
--with_cold=0 \
--skip_existing=1 \
--assets_dname=teamfeats \
--gres=0,1 --tmux_workers=4 --backend=tmux --run=1
# DVC_DATA_DPATH=$(geowatch_dvc --tags='phase2_data' --hardware=auto)
# python -m geowatch.cli.queue_cli.prepare_splits \
# --base_fpath=$DVC_DATA_DPATH/Drop6-NoWinterMedian10GSD/combo_imganns*_L*.kwcoco.zip \
# --constructive_mode=True \
# --suffix=L \
# --backend=tmux --workers=6 \
# --run=1
DVC_DATA_DPATH=$(geowatch_dvc --tags='phase2_data' --hardware=auto)
python -m geowatch.cli.queue_cli.prepare_splits \
--base_fpath=$DVC_DATA_DPATH/Drop6-NoWinterMedian10GSD/combo_imganns*_I2L*.kwcoco.zip \
--constructive_mode=True \
--suffix=I2L \
--backend=tmux --tmux_workers=6 \
--run=1
# DVC_DATA_DPATH=$(geowatch_dvc --tags='phase2_data' --hardware=auto)
# python -m geowatch.cli.queue_cli.prepare_splits \
# --base_fpath=$DVC_DATA_DPATH/Drop6-NoWinterMedian10GSD/imganns-*.kwcoco.zip \
# --constructive_mode=True \
# --suffix=rawbands \
# --backend=tmux --tmux_workers=6 \
# --run=1
DVC_DATA_DPATH=$(geowatch_dvc --tags='phase2_data' --hardware=auto)
DVC_EXPT_DPATH=$(geowatch_dvc --tags='phase2_expt' --hardware='auto')
TRUE_SITE_DPATH=$DVC_DATA_DPATH/annotations/drop6_hard_v1/site_models
OUTPUT_BUNDLE_DPATH=$DVC_DATA_DPATH/Drop6-NoWinterMedian10GSD
geowatch reproject \
--src $DVC_DATA_DPATH/Drop6-NoWinterMedian10GSD/data_vali_I2L_split6.kwcoco.zip \
--inplace=True \
--status_to_catname="positive_excluded: positive" \
--site_models=$TRUE_SITE_DPATH
geowatch reproject \
--src $DVC_DATA_DPATH/Drop6-NoWinterMedian10GSD/data_train_I2L_split6.kwcoco.zip \
--inplace=True \
--status_to_catname="positive_excluded: positive" \
--site_models=$TRUE_SITE_DPATH
"""
if __name__ == '__main__':
r"""
CommandLine:
DVC_DATA_DPATH=$(geowatch_dvc --tags='phase2_data' --hardware=auto)
python geowatch.cli.queue_cli.prepare_time_combined_dataset.py \
--regions="[
# T&E Regions
AE_R001, BH_R001, BR_R001, BR_R002, BR_R004, BR_R005, CH_R001,
KR_R001, KR_R002, LT_R001, NZ_R001, US_R001, US_R004, US_R005,
US_R006, US_R007,
# iMerit Regions
AE_C001,
AE_C002,
AE_C003, PE_C001, QA_C001, SA_C005, US_C000, US_C010,
US_C011, US_C012,
]" \
--input_bundle_dpath=$DVC_DATA_DPATH/Drop6 \
--output_bundle_dpath=$DVC_DATA_DPATH/Drop6-MeanYear10GSD-V2 \
--true_site_dpath=$DVC_DATA_DPATH/annotations/drop6_hard_v1/site_models \
--true_region_dpath=$DVC_DATA_DPATH/annotations/drop6_hard_v1/region_models \
--backend=tmux \
--tmux_workers=4 \
--combine_workers=2 \
--resolution=10GSD \
--run=1
DVC_DATA_DPATH=$(geowatch_dvc --tags='phase2_data' --hardware=auto)
python -m geowatch.cli.queue_cli.prepare_splits \
--base_fpath=$DVC_DATA_DPATH/Drop6-MeanYear10GSD/imganns-*.kwcoco.zip \
--constructive_mode=True \
--suffix=rawbands \
--backend=tmux --tmux_workers=6 \
--run=1
geowatch visualize /home/joncrall/remote/toothbrush/data/dvc-repos/smart_data_dvc-ssd/Drop6-MeanYear10GSD/imganns-NZ_R001.kwcoco.zip --smart
# Drop 6
export CUDA_VISIBLE_DEVICES="0,1"
DVC_DATA_DPATH=$(geowatch_dvc --tags='phase2_data' --hardware=auto)
DVC_EXPT_DPATH=$(geowatch_dvc --tags='phase2_expt' --hardware='auto')
BUNDLE_DPATH=$DVC_DATA_DPATH/Drop6-MeanYear10GSD-V2
python -m geowatch.cli.queue_cli.prepare_teamfeats \
--base_fpath "$BUNDLE_DPATH"/imganns-*[0-9].kwcoco.zip \
--expt_dvc_dpath="$DVC_EXPT_DPATH" \
--with_landcover=1 \
--with_invariants2=1 \
--with_materials=0 \
--with_depth=0 \
--with_cold=0 \
--skip_existing=1 \
--assets_dname=teamfeats \
--gres=0,1 --tmux_workers=4 --backend=tmux --run=0
DVC_DATA_DPATH=$(geowatch_dvc --tags='phase2_data' --hardware=auto)
python -m geowatch.cli.queue_cli.prepare_splits \
--base_fpath=$DVC_DATA_DPATH/Drop6-MeanYear10GSD-V2/combo_imganns*_I2L*.kwcoco.zip \
--constructive_mode=True \
--suffix=I2L \
--backend=tmux --tmux_workers=6 \
--run=1
DVC_DATA_DPATH=$(geowatch_dvc --tags='phase2_data' --hardware=auto)
DVC_EXPT_DPATH=$(geowatch_dvc --tags='phase2_expt' --hardware='auto')
TRUE_SITE_DPATH=$DVC_DATA_DPATH/annotations/drop6_hard_v1/site_models
OUTPUT_BUNDLE_DPATH=$DVC_DATA_DPATH/Drop6-MeanYear10GSD-V2
python -m geowatch reproject \
--src data_vali_I2L_split6.kwcoco.zip \
--dst data_vali_I2L_split6.kwcoco.zip \
--site_models=$TRUE_SITE_DPATH
DVC_DATA_DPATH=$(geowatch_dvc --tags='phase2_data' --hardware=auto)
DVC_EXPT_DPATH=$(geowatch_dvc --tags='phase2_expt' --hardware='auto')
TRUE_SITE_DPATH=$DVC_DATA_DPATH/annotations/drop6_hard_v1/site_models
OUTPUT_BUNDLE_DPATH=$DVC_DATA_DPATH/Drop6-MeanYear10GSD-V2
python -m geowatch reproject \
--src data_train_I2L_split6.kwcoco.zip \
--dst data_train_I2L_split6.kwcoco.zip \
--site_models=$TRUE_SITE_DPATH
"""
main()