#!/usr/bin/env python3
# PYTHON_ARGCOMPLETE_OK
"""
The following example simply produces the script under different variations.
CommandLine:
xdoctest -m geowatch.cli.queue_cli.prepare_teamfeats __doc__
SeeAlso:
../tasks/invariants/predict.py
../tasks/landcover/predict.py
../tasks/depth/predict.py
../tasks/cold/predict.py
~/code/watch/dev/poc/prepare_time_combined_dataset.py
Example:
>>> from geowatch.cli.queue_cli.prepare_teamfeats import * # NOQA
>>> config = {
>>> 'src_kwcocos': './pretend_bundle/data.kwcoco.json',
>>> 'gres': [0, 1],
>>> 'expt_dvc_dpath': './pretend_expt_dvc',
>>> #
>>> 'virtualenv_cmd': 'conda activate geowatch',
>>> #
>>> #'with_s2_landcover': 1,
>>> #'with_materials': 1,
>>> #'with_invariants2': 1,
>>> 'with_mae': 1,
>>> #
>>> 'run': 0,
>>> 'check': False,
>>> 'skip_existing': False,
>>> 'backend': 'serial',
>>> }
>>> config['backend'] = 'slurm'
>>> outputs = prep_feats(cmdline=False, **config)
>>> outputs['queue'].print_commands(0, 0)
>>> config['backend'] = 'tmux'
>>> outputs = prep_feats(cmdline=False, **config)
>>> outputs['queue'].print_commands(0, 0)
>>> config['backend'] = 'serial'
>>> outputs = prep_feats(cmdline=False, **config)
>>> outputs['queue'].print_commands(0, 0)
Example:
>>> # Test landcover commands
>>> from geowatch.cli.queue_cli.prepare_teamfeats import * # NOQA
>>> config = {
>>> 'src_kwcocos': './PRETEND_BUNDLE/data.kwcoco.json',
>>> 'gres': [0, 1],
>>> 'expt_dvc_dpath': './PRETEND_EXPT_DVC',
>>> 'virtualenv_cmd': 'conda activate geowatch',
>>> 'with_s2_landcover': 1,
>>> 'with_wv_landcover': 1,
>>> 'num_wv_landcover_hidden': 0,
>>> 'num_s2_landcover_hidden': 0,
>>> 'run': 0,
>>> 'check': False,
>>> 'skip_existing': False,
>>> 'backend': 'serial',
>>> }
>>> config['backend'] = 'serial'
>>> outputs = prep_feats(cmdline=False, **config)
>>> outputs['queue'].print_commands(0, 0)
>>> output_paths = outputs['final_output_paths']
>>> print('output_paths = {}'.format(ub.urepr(output_paths, nl=1)))
Example:
>>> # Test COLD commands
>>> from geowatch.cli.queue_cli.prepare_teamfeats import * # NOQA
>>> cold_config = ub.codeblock(
'''
enabled: 1
conse: 42
sensors: 'L8,S2,WV'
resolution: 3GSD
''')
>>> config = {
>>> 'src_kwcocos': './PRETEND_BUNDLE/data.kwcoco.json',
>>> 'gres': [0, 1],
>>> 'expt_dvc_dpath': './PRETEND_EXPT_DVC',
>>> 'virtualenv_cmd': 'conda activate geowatch',
>>> 'cold_config': cold_config,
>>> 'run': 0,
>>> 'check': False,
>>> 'skip_existing': False,
>>> 'backend': 'serial',
>>> }
>>> config['backend'] = 'serial'
>>> outputs = prep_feats(cmdline=False, **config)
>>> outputs['queue'].print_commands(0, 0)
>>> output_paths = outputs['final_output_paths']
>>> print('output_paths = {}'.format(ub.urepr(output_paths, nl=1)))
Example:
>>> # Test COLD commands
>>> from geowatch.cli.queue_cli.prepare_teamfeats import * # NOQA
>>> cold_config = None
>>> config = {
>>> 'src_kwcocos': './PRETEND_BUNDLE/data.kwcoco.json',
>>> 'gres': [0, 1],
>>> 'expt_dvc_dpath': './PRETEND_EXPT_DVC',
>>> 'virtualenv_cmd': 'conda activate geowatch',
>>> 'cold_config': cold_config,
>>> 'with_cold': cold_config,
>>> 'run': 0,
>>> 'check': False,
>>> 'skip_existing': False,
>>> 'backend': 'serial',
>>> }
>>> config['backend'] = 'serial'
>>> outputs = prep_feats(cmdline=False, **config)
>>> outputs['queue'].print_commands(0, 0)
>>> output_paths = outputs['final_output_paths']
>>> print('output_paths = {}'.format(ub.urepr(output_paths, nl=1)))
Ignore:
# Drop 6
export CUDA_VISIBLE_DEVICES="0,1"
DVC_DATA_DPATH=$(geowatch_dvc --tags='phase2_data' --hardware=auto)
DVC_EXPT_DPATH=$(geowatch_dvc --tags='phase2_expt' --hardware=auto)
BUNDLE_DPATH=$DVC_DATA_DPATH/Drop6
python -m geowatch.cli.queue_cli.prepare_teamfeats \
--src_kwcocos "$BUNDLE_DPATH"/imganns-*.kwcoco.zip \
--expt_dvc_dpath="$DVC_EXPT_DPATH" \
--with_invariants2=0 \
--with_s2_landcover=0 \
--with_materials=0 \
--with_depth=0 \
--with_cold=1 \
--skip_existing=1 \
--gres=0,1 --tmux_workers=4 --backend=tmux --run=0 --print-commands
# Drop 6
export CUDA_VISIBLE_DEVICES="0,1"
DVC_DATA_DPATH=$(geowatch_dvc --tags='phase2_data' --hardware=auto)
DVC_EXPT_DPATH=$(geowatch_dvc --tags='phase2_expt' --hardware=auto)
BUNDLE_DPATH=$DVC_DATA_DPATH/Drop6
python -m geowatch.cli.queue_cli.prepare_teamfeats \
--src_kwcocos "$BUNDLE_DPATH"/imganns-KR_R00*.kwcoco.zip \
--expt_dvc_dpath="$DVC_EXPT_DPATH" \
--with_invariants2=1 \
--with_s2_landcover=0 \
--with_materials=0 \
--with_depth=0 \
--with_cold=0 \
--skip_existing=1 \
--assets_dname=teamfeats \
--gres=0,1 --tmux_workers=4 --backend=tmux --run=0
# TimeCombined V2
export CUDA_VISIBLE_DEVICES="0,1"
DVC_DATA_DPATH=$(geowatch_dvc --tags='phase2_data' --hardware=auto)
DVC_EXPT_DPATH=$(geowatch_dvc --tags='phase2_expt' --hardware='auto')
BUNDLE_DPATH=$DVC_DATA_DPATH/Drop6-MeanYear10GSD-V2
python -m geowatch.cli.queue_cli.prepare_teamfeats \
--src_kwcocos "$BUNDLE_DPATH"/imganns-*[0-9].kwcoco.zip \
--expt_dvc_dpath="$DVC_EXPT_DPATH" \
--with_s2_landcover=1 \
--with_invariants2=1 \
--with_sam=1 \
--with_materials=0 \
--with_depth=0 \
--with_cold=0 \
--skip_existing=1 \
--assets_dname=teamfeats \
--gres=0, --tmux_workers=1 --backend=tmux --run=0
DVC_DATA_DPATH=$(geowatch_dvc --tags='phase2_data' --hardware=auto)
python -m geowatch.cli.queue_cli.prepare_splits \
--src_kwcocos=$DVC_DATA_DPATH/Drop6-MeanYear10GSD-V2/combo_imganns*_I2LS*.kwcoco.zip \
--constructive_mode=True \
--suffix=I2LS \
--backend=tmux --tmux_workers=6 \
--run=1
DVC_DATA_DPATH=$(geowatch_dvc --tags='phase2_data' --hardware=auto)
DVC_EXPT_DPATH=$(geowatch_dvc --tags='phase2_expt' --hardware='auto')
TRUE_SITE_DPATH=$DVC_DATA_DPATH/annotations/drop6_hard_v1/site_models
OUTPUT_BUNDLE_DPATH=$DVC_DATA_DPATH/Drop6-MeanYear10GSD-V2
python -m geowatch reproject \
--src $DVC_DATA_DPATH/Drop6-MeanYear10GSD-V2/data_vali_I2LS_split6.kwcoco.zip \
--inplace \
--site_models=$TRUE_SITE_DPATH
python -m geowatch reproject \
--src $DVC_DATA_DPATH/Drop6-MeanYear10GSD-V2/data_train_I2LS_split6.kwcoco.zip \
--inplace \
--site_models=$TRUE_SITE_DPATH
"""
import scriptconfig as scfg
import ubelt as ub
from cmd_queue.cli_boilerplate import CMDQueueConfig
[docs]
class TeamFeaturePipelineConfig(CMDQueueConfig):
"""
This generates the bash commands necessary to run team feature computation,
followed by aggregation and then splitting out train / val datasets.
Note:
The models and parameters to use are hard coded in this script.
TODO:
- [ ] jsonargparse use-case: specifying parmeters of the subalgos
"""
src_kwcocos = scfg.Value(None, help=ub.paragraph(
'''
One or more base coco files to compute team-features on.
'''), nargs='+', alias=['base_fpath'], group='inputs')
expt_dvc_dpath = scfg.Value('auto', help=ub.paragraph(
'''
The DVC directory where team feature model weights can be
found. If "auto" uses the
``geowatch.find_dvc_dpath(tags='phase2_expt')`` mechanism to
infer the location.
'''), group='inputs')
gres = scfg.Value('auto', help='comma separated list of gpus or auto', group='cmd-queue')
with_s2_landcover = scfg.Value(False, help='Include DZYNE S2 landcover features', group='team feature enablers')
with_wv_landcover = scfg.Value(False, help='Include DZYNE WV landcover features', group='team feature enablers')
with_materials = scfg.Value(False, help='Include Rutgers material features', group='team feature enablers')
with_mae = scfg.Value(False, help='Include WU MAE features', group='team feature enablers')
with_invariants2 = scfg.Value(False, help='Include UKY invariant features', group='team feature enablers')
with_depth = scfg.Value(False, help='Include DZYNE WorldView depth features', group='team feature enablers')
with_sam = scfg.Value(False, help='Include SAM features')
cold_config = scfg.Value(None, type=str, help=ub.paragraph(
'''
Raw json/yaml or a path to a json/yaml file that specifies the
config for cold teamfeats.
'''))
num_s2_landcover_hidden = 32
num_wv_landcover_hidden = 32
invariant_segmentation = scfg.Value(False, help=ub.paragraph(
'''
Enable/Disable segmentation part of invariants
'''), group='invariants options')
invariant_pca = scfg.Value(0, help='Enable/Disable invariant PCA', group='invariants options')
invariant_resolution = scfg.Value('10GSD', help='GSD for invariants', group='invariants options')
virtualenv_cmd = scfg.Value(None, type=str, help=ub.paragraph(
'''
Command to start the appropriate virtual environment if your
bashrc does not start it by default.
'''))
skip_existing = scfg.Value(True, help='if True skip completed results', group='common options')
data_workers = scfg.Value(2, help='dataloader workers for each proc', group='common options')
kwcoco_ext = scfg.Value('.kwcoco.zip', help=ub.paragraph(
'''
use .kwcoco.json or .kwcoco.zip for outputs
'''), group='common options')
assets_dname = scfg.Value('_teamfeats', help=ub.paragraph(
'''
The name of the top-level directory to write new assets.
'''), group='common options')
check = scfg.Value(True, help='if True check files exist where we can', group='common options')
with_cold = scfg.Value(False, help='Include COLD features. DEPRECATED: pass enabled: 1 in the cold_config')
cold_workers = scfg.Value(2, help='workers for pycold. DEPRECATED pass as workermode in "cold_config"', group='cold options')
cold_workermode = scfg.Value('process', help='workers mode for pycold. DEPRECATED pass as workermode in "cold_config"', group='cold options')
depth_workers = scfg.Value(2, help=ub.paragraph(
'''
workers for depth only. On systems with < 32GB RAM might
need to set to 0
'''), group='depth options')
[docs]
def prep_feats(cmdline=True, **kwargs):
"""
The idea is that we should have a lightweight scheduler. I think something
fairly minimal can be implemented with tmux, but it would be nice to have a
more robust slurm extension.
TODO:
- [ ] Option to just dump the serial bash script that does everything.
"""
config = TeamFeaturePipelineConfig.cli(cmdline=cmdline, data=kwargs,
strict=True)
import rich
rich.print('config = {}'.format(ub.urepr(config, nl=2)))
from scriptconfig.smartcast import smartcast
from kwutil import util_path
# hack for cmd-queue, will be fixed soon
config.slurm_options = config.slurm_options or {}
gres = smartcast(config['gres'])
if gres is None:
gres = 'auto'
if gres == 'auto':
import torch
gres = list(range(torch.cuda.device_count()))
elif not ub.iterable(gres):
gres = [gres]
if config['expt_dvc_dpath'] == 'auto':
import geowatch
expt_dvc_dpath = geowatch.find_dvc_dpath(tags='phase2_expt', hardware='auto')
else:
expt_dvc_dpath = ub.Path(config['expt_dvc_dpath'])
blocklist = [
'_dzyne_landcover',
'_dzyne_s2_landcover',
'_dzyne_wv_landcover',
'_uky_invariants',
'_rutgers_material_seg_v4',
]
base_fpath_pat = config['src_kwcocos']
base_fpath_list = list(util_path.coerce_patterned_paths(
base_fpath_pat, globfallback=True))
from geowatch.mlops.pipeline_nodes import Pipeline
dag_nodes = []
final_output_paths = []
for src_fpath in base_fpath_list:
# Hack to prevent doubling up.
# Should really just choose a better naming scheme so we don't have
# to break user expectations about glob
if any(b in src_fpath.name for b in blocklist):
print(f'blocked src_fpath={src_fpath}')
continue
if config.check:
if not src_fpath.exists():
raise FileNotFoundError(
'Specified kwcoco file: {src_fpath!r=} does not exist and check=True')
aligned_bundle_dpath = src_fpath.parent
nodes, base_combo_fpath = _make_teamfeat_nodes(
src_fpath, expt_dvc_dpath,
aligned_bundle_dpath, config)
final_output_paths.append(base_combo_fpath)
dag_nodes.extend(nodes)
dag = Pipeline(dag_nodes)
dag.configure(cache=True)
queue = config.create_queue(gres=gres)
dag.submit_jobs(
queue=queue,
skip_existing=config['skip_existing'],
enable_links=False,
write_invocations=False,
write_configs=False,
)
# pipeline._populate_explicit_dependency_queue(queue)
config.run_queue(queue)
outputs = {
'queue': queue,
'final_output_paths': final_output_paths,
}
return outputs
def _make_teamfeat_nodes(src_fpath, expt_dvc_dpath, aligned_bundle_dpath, config):
from geowatch.mlops.pipeline_nodes import ProcessNode
from kwutil import util_parallel
from geowatch.utils import simple_dvc
data_workers = util_parallel.coerce_num_workers(config['data_workers'])
model_fpaths = {
# 'rutgers_materials': expt_dvc_dpath / 'models/rutgers/rutgers_peri_materials_v3/experiments_epoch_18_loss_59.014100193977356_valmF1_0.18694573888313187_valChangeF1_0.0_time_2022-02-01-01:53:20.pth',
# 'rutgers_materials': dvc_dpath / 'models/rutgers/experiments_epoch_62_loss_0.09470022770735186_valmIoU_0.5901660531463717_time_2021101T16277.pth',
'rutgers_materials_model_v4': expt_dvc_dpath / 'models/rutgers/ru_model_05_25_2023.ckpt',
'rutgers_materials_config_v4': expt_dvc_dpath / 'models/rutgers/ru_config_05_25_2023.yaml',
'wu_mae_v1': expt_dvc_dpath / 'models/wu/wu_mae_2023_04_21/Drop6-epoch=01-val_loss=0.20.ckpt',
# 'dzyne_s2_landcover': expt_dvc_dpath / 'models/landcover/visnav_remap_s2_subset.pt',
'dzyne_s2_landcover': expt_dvc_dpath / 'models/landcover/sentinel2.pt',
'dzyne_wv_landcover': expt_dvc_dpath / 'models/landcover/worldview.pt',
# 2022-02-11
# 'uky_pretext': dvc_dpath / 'models/uky/uky_invariants_2022_02_11/TA1_pretext_model/pretext_package.pt',
# 'uky_pca': dvc_dpath / 'models/uky/uky_invariants_2022_02_11/TA1_pretext_model/pca_projection_matrix.pt',
# 'uky_segmentation': dvc_dpath / 'models/uky/uky_invariants_2022_02_11/TA1_segmentation_model/segmentation_package.pt',
# 2022-03-11
# 'uky_pretext': dvc_dpath / 'models/uky/uky_invariants_2022_03_11/TA1_pretext_model/pretext_package.pt',
# 'uky_pca': dvc_dpath / 'models/uky/uky_invariants_2022_03_11/TA1_pretext_model/pca_projection_matrix.pt',
# 'uky_segmentation': dvc_dpath / 'models/uky/uky_invariants_2022_02_11/TA1_segmentation_model/segmentation_package.pt', # uses old segmentation model
# 2022-03-21
'uky_pretext': expt_dvc_dpath / 'models/uky/uky_invariants_2022_03_21/pretext_model/pretext_package.pt',
'uky_pca': expt_dvc_dpath / 'models/uky/uky_invariants_2022_03_21/pretext_model/pretext_pca_104.pt',
'uky_pretext2': expt_dvc_dpath / 'models/uky/uky_invariants_2022_12_17/TA1_pretext_model/pretext_package.pt',
# 'uky_segmentation': dvc_dpath / 'models/uky/uky_invariants_2022_02_21/TA1_segmentation_model/segmentation_package.pt', # uses old segmentation model
# TODO: use v1 on RGB and v2 on PAN
'dzyne_depth': expt_dvc_dpath / 'models/depth/weights_v1.pt',
# 'dzyne_depth': dvc_dpath / 'models/depth/weights_v2_gray.pt',
'sam': expt_dvc_dpath / 'models/sam/sam_vit_h_4b8939.pth'
}
subset_name = src_fpath.name.split('.')[0]
if subset_name.endswith('-rawbands'):
subset_name = subset_name.rsplit('-', 1)[0]
name_suffix = '_' + ub.hash_data(src_fpath)[0:8]
outputs = {
# 'rutgers_materials': aligned_bundle_dpath / (subset_name + '_rutgers_material_seg_v3' + config['kwcoco_ext']),
'rutgers_materials_v4': aligned_bundle_dpath / (subset_name + '_rutgers_material_seg_v4' + config['kwcoco_ext']),
'wu_mae': aligned_bundle_dpath / (subset_name + '_wu_mae' + config['kwcoco_ext']),
'dzyne_s2_landcover': aligned_bundle_dpath / (subset_name + '_dzyne_s2_landcover' + config['kwcoco_ext']),
'dzyne_wv_landcover': aligned_bundle_dpath / (subset_name + '_dzyne_wv_landcover' + config['kwcoco_ext']),
'dzyne_depth': aligned_bundle_dpath / (subset_name + '_dzyne_depth' + config['kwcoco_ext']),
'uky_invariants': aligned_bundle_dpath / (subset_name + '_uky_invariants' + config['kwcoco_ext']),
'cold': aligned_bundle_dpath / (subset_name + '_cold' + config['kwcoco_ext']),
'sam': aligned_bundle_dpath / (subset_name + '_sam' + config['kwcoco_ext']),
}
# print('Exist check: ')
# print('model_packages: ' + ub.urepr(ub.map_vals(lambda x: x.exists(), model_fpaths)))
# print('feature outputs: ' + ub.urepr(ub.map_vals(lambda x: x.exists(), outputs)))
# TODO: different versions of features need different codes.
codes = {
'with_s2_landcover': 'LS2',
'with_wv_landcover': 'LWV',
'with_depth': 'D',
'with_materials': 'M',
'with_mae': 'E',
'with_invariants2': 'I2',
'with_cold': 'C',
'with_sam': 'S',
}
# tmux queue is still limited. The order of submission matters.
feature_nodes = []
combo_code_parts = []
key = 'with_s2_landcover'
if config[key]:
if config.check:
simple_dvc.SimpleDVC().request(model_fpaths['dzyne_s2_landcover'])
# Landcover is fairly fast to run
node = ProcessNode(
name=key + name_suffix,
executable='python -m geowatch.tasks.landcover.predict',
in_paths={
'dataset': src_fpath,
'deployed': model_fpaths['dzyne_s2_landcover'],
},
out_paths={
'output': outputs['dzyne_s2_landcover']
},
algo_params={
'with_hidden': config.num_s2_landcover_hidden,
'select_images': '.sensor_coarse == "S2"',
'assets_dname': config.assets_dname,
},
perf_params={
'device': 0,
'num_workers': data_workers,
},
node_dpath='.',
)
feature_nodes.append(node)
combo_code_parts.append(codes[key])
key = 'with_wv_landcover'
if config[key]:
if config.check:
simple_dvc.SimpleDVC().request(model_fpaths['dzyne_wv_landcover'])
# Landcover is fairly fast to run
node = ProcessNode(
name=key + name_suffix,
executable='python -m geowatch.tasks.landcover.predict',
in_paths={
'dataset': src_fpath,
'deployed': model_fpaths['dzyne_wv_landcover'],
},
out_paths={
'output': outputs['dzyne_wv_landcover']
},
algo_params={
'with_hidden': config.num_wv_landcover_hidden,
'select_images': '.sensor_coarse == "WV"',
'assets_dname': config.assets_dname,
},
perf_params={
'device': 0,
'num_workers': data_workers,
},
node_dpath='.',
)
feature_nodes.append(node)
combo_code_parts.append(codes[key])
from kwutil.util_yaml import Yaml
key = 'with_cold'
cold_config = Yaml.coerce(config.cold_config) or {}
GRACEFUL_DEPRECATION_WORKAROUNDS = 1
if GRACEFUL_DEPRECATION_WORKAROUNDS:
if 'workers' not in cold_config:
cold_config['workers'] = config.cold_workers
if 'workermode' not in cold_config:
cold_config['workers'] = config.cold_workers
config[key] = cold_config.get('enabled', config[key])
cold_config['enabled'] = config[key]
if cold_config['enabled']:
node = ProcessNode(
name=key + name_suffix,
executable='python -m geowatch.tasks.cold.predict',
in_paths={
'coco_fpath': src_fpath,
},
out_paths={
'mod_coco_fpath': outputs['cold'],
'out_dpath': src_fpath.parent,
},
algo_params={
'sensors': 'L8',
'adj_cloud': False,
'method': 'COLD',
'prob': 0.99,
'conse': 6,
'cm_interval': 60,
'year_lowbound': None,
'year_highbound': None,
'coefs': 'cv,rmse,a0,a1,b1,c1',
'coefs_bands': '0,1,2,3,4,5',
'timestamp': False,
'combine': False,
'resolution': '30GSD',
},
perf_params={
'workermode': 'process',
'workers': 2,
},
node_dpath='.',
)
WITH_S2 = 1 # hard coded
if WITH_S2:
node.algo_params.update({
'sensors': 'L8,S2',
'conse': 8,
'resolution': '10GSD',
})
node.configure(cold_config)
feature_nodes.append(node)
combo_code_parts.append(codes[key])
key = 'with_depth'
if config[key]:
if config.check:
simple_dvc.SimpleDVC().request(model_fpaths['dzyne_depth'])
# Only need 1 worker to minimize lag between images, task is GPU bound
depth_data_workers = config['depth_workers']
if depth_data_workers == 'auto':
import psutil
import pint
reg = pint.UnitRegistry()
vmem_info = psutil.virtual_memory()
total_gb = (vmem_info.total * reg.byte).to(reg.gigabyte).m
avail_gb = (vmem_info.available * reg.byte).to(reg.gigabyte).m
if avail_gb < 32:
depth_data_workers = 0
elif avail_gb < 64:
depth_data_workers = 1
else:
depth_data_workers = 2
print('total_gb = {!r}'.format(total_gb))
print('avail_gb = {!r}'.format(avail_gb))
depth_window_size = 1440
node = ProcessNode(
name=key + name_suffix,
executable='python -m geowatch.tasks.depth.predict',
in_paths={
'dataset': src_fpath,
'deployed': model_fpaths['dzyne_depth'],
},
out_paths={
'output': outputs['dzyne_depth'],
},
algo_params={
'window_size': depth_window_size,
},
perf_params={
# 'skip_existing': 1,
'data_workers': depth_data_workers,
},
node_dpath='.',
)
feature_nodes.append(node)
combo_code_parts.append(codes[key])
key = 'with_materials'
if config[key]:
if config.check:
simple_dvc.SimpleDVC().request(model_fpaths['rutgers_materials_model_v4'])
node = ProcessNode(
name=key + name_suffix,
executable='python -m geowatch.tasks.rutgers_material_seg_v2.predict',
in_paths={
'kwcoco_fpath': src_fpath,
'model_fpath': model_fpaths['rutgers_materials_model_v4'],
'config_fpath': model_fpaths['rutgers_materials_config_v4'],
},
out_paths={
'output_kwcoco_fpath': outputs['rutgers_materials_v4'],
},
algo_params={
},
perf_params={
'workers': data_workers,
},
node_dpath='.',
)
feature_nodes.append(node)
combo_code_parts.append(codes[key])
key = 'with_mae'
if config[key]:
if config.check:
simple_dvc.SimpleDVC().request(model_fpaths['wu_mae_v1'])
node = ProcessNode(
name=key + name_suffix,
executable=ub.codeblock(
'''
python -m geowatch.tasks.mae.predict
'''),
in_paths={
'input_kwcoco': src_fpath,
'mae_ckpt_path': model_fpaths['wu_mae_v1'],
},
out_paths={
'output_kwcoco': outputs['wu_mae'],
},
algo_params={
'assets_dname': config.assets_dname,
},
perf_params={
'workers': data_workers,
},
node_dpath='.',
)
feature_nodes.append(node)
combo_code_parts.append(codes[key])
key = 'with_invariants2'
if config[key]:
if config.check:
simple_dvc.SimpleDVC().request(model_fpaths['uky_pretext2'])
if not model_fpaths['uky_pretext2'].exists():
print('Warning: UKY pretext model does not exist')
# task['gpus'] = 1
# all_tasks = 'before_after segmentation pretext'
node = ProcessNode(
name=key + name_suffix,
executable=ub.codeblock(
'''
python -m geowatch.tasks.invariants.predict
'''),
in_paths={
'input_kwcoco': src_fpath,
'pretext_package_path': model_fpaths['uky_pretext2'],
'pca_projection_path': model_fpaths['uky_pca'],
},
out_paths={
'output_kwcoco': outputs['uky_invariants'],
},
algo_params={
'assets_dname': config.assets_dname,
'input_resolution': config['invariant_resolution'],
'window_resolution': config['invariant_resolution'],
'patch_size': 256,
'patch_overlap': 0.3,
'do_pca': config['invariant_pca'],
'tasks': ['before_after', 'pretext'],
},
perf_params={
'workers': data_workers,
'io_workers': 0,
},
node_dpath='.',
)
feature_nodes.append(node)
combo_code_parts.append(codes[key])
key = 'with_sam'
if config[key]:
if config.check:
simple_dvc.SimpleDVC().request(model_fpaths['sam'])
if not model_fpaths['sam'].exists():
print('Warning: SAM model does not exist')
node = ProcessNode(
name=key + name_suffix,
executable=ub.codeblock(
'''
python -m geowatch.tasks.sam.predict
'''),
in_paths={
'input_kwcoco': src_fpath,
'weights_fpath': model_fpaths['sam'],
},
out_paths={
'output_kwcoco': outputs['sam'],
},
algo_params={
'assets_dname': config.assets_dname,
'window_overlap': 0.3,
},
perf_params={
'data_workers': data_workers,
'io_workers': 0,
},
node_dpath='.',
)
feature_nodes.append(node)
combo_code_parts.append(codes[key])
# Determine what all of the output paths will be
feature_paths = []
feature_output_nodes = []
for node in feature_nodes:
node_features = []
for output in node.outputs.values():
if output.name == 'out_dpath':
# hack to skip a non-feature output for COLD
continue
node_features.append(str(output.final_value))
feature_output_nodes.append(output)
assert len(node_features) == 1, (
'code assumes each node should have 1 feature output')
feature_paths.extend(node_features)
# Finalize features by combining them all into combo.kwcoco.json
tocombine = [str(src_fpath)] + feature_paths
combo_code = ''.join(sorted(combo_code_parts))
base_combo_fpath = aligned_bundle_dpath / (f'combo_{subset_name}_{combo_code}' + config['kwcoco_ext'])
for node in feature_nodes:
node.configure(node.config, cache=False)
combine_node = ProcessNode(
name='combine_features' + name_suffix,
executable='python -m geowatch.cli.coco_combine_features',
in_paths={
'src': tocombine,
},
out_paths={
'dst': base_combo_fpath,
},
)
# TODO: it would be nice if the mlops DAG allowed us to simply specify the
# process level dependencies and assume we take care of the i/o level
# dependencies.
for output in feature_output_nodes:
output.connect(combine_node.inputs['src'])
combine_node.configure(combine_node.config, cache=False)
nodes = [combine_node] + feature_nodes
return nodes, base_combo_fpath
main = prep_feats
if __name__ == '__main__':
"""
CommandLine:
DVC_DPATH=$(geowatch_dvc)
python -m geowatch.cli.queue_cli.prepare_teamfeats \
--src_kwcocos="$DVC_DPATH/Drop2-Aligned-TA1-2022-02-15/data.kwcoco.json" \
--gres=0 \
--with_depth=0 \
--run=False --skip_existing=False --virtualenv_cmd "conda activate geowatch" \
--backend=serial
python -m geowatch.cli.queue_cli.prepare_teamfeats --gres=0,2 --with_depth=True --keep_sessions=True
python -m geowatch.cli.queue_cli.prepare_teamfeats --gres=2 --with_materials=False --keep_sessions=True
# TODO: rename to schedule teamfeatures
# TO UPDATE ANNOTS
# Update to whatever the state of the annotations submodule is
DVC_DPATH=$(geowatch_dvc)
python -m geowatch reproject_annotations \
--src $DVC_DPATH/Drop2-Aligned-TA1-2022-02-15/data.kwcoco.json \
--dst $DVC_DPATH/Drop2-Aligned-TA1-2022-02-15/data.kwcoco.json \
--site_models="$DVC_DPATH/annotations/site_models/*.geojson"
kwcoco stats $DVC_DPATH/Drop2-Aligned-TA1-2022-02-15/data_20220203.kwcoco.json $DVC_DPATH/Drop2-Aligned-TA1-2022-02-15/data.kwcoco.json
# Team Features on Drop2
DVC_DPATH=$(geowatch_dvc)
python -m geowatch.cli.queue_cli.prepare_teamfeats \
--src_kwcocos=$DVC_DPATH/Drop2-Aligned-TA1-2022-02-15/data.kwcoco.json \
--gres=0,1 --with_depth=0 --with_materials=False \
--run=0
###
DATASET_CODE=Aligned-Drop2-TA1-2022-02-24
DVC_DPATH=$(geowatch_dvc)
DATASET_CODE=Drop2-Aligned-TA1-2022-02-15
KWCOCO_BUNDLE_DPATH=$DVC_DPATH/$DATASET_CODE
python -m geowatch.cli.queue_cli.prepare_teamfeats \
--src_kwcocos=$KWCOCO_BUNDLE_DPATH/data.kwcoco.json \
--gres=0,1 \
--with_depth=1 \
--with_s2_landcover=1 \
--with_materials=1 \
--depth_workers=auto \
--skip_existing=0 --run=0
# Simple demo
python -m geowatch.cli.queue_cli.prepare_teamfeats \
--src_kwcocos=./mydata/data.kwcoco.json \
--gres=0,1 \
--with_depth=0 \
--with_s2_landcover=1 \
--with_materials=1 \
--skip_existing=0 \
--backend=tmux \
--run=0
"""
main(cmdline=True)