r"""
CommandLine:
DVC_DPATH=$(geowatch_dvc --hardware="hdd")
echo $DVC_DPATH
python -m geowatch.cli.coco_crop_tracks \
--src="$DVC_DPATH/Aligned-Drop3-TA1-2022-03-10/data.kwcoco.json" \
--dst="$DVC_DPATH/Cropped-Drop3-TA1-2022-03-10/data.kwcoco.json" \
--mode=process --workers=8
# Small test of KR only
DVC_DPATH=$(geowatch_dvc --hardware="hdd")
echo $DVC_DPATH
python -m geowatch.cli.coco_crop_tracks \
--src="$DVC_DPATH/Drop2-Aligned-TA1-2022-02-15/data.kwcoco.json" \
--dst="$DVC_DPATH/Cropped-Drop2-TA1-test/data.kwcoco.json" \
--mode=process --workers=8 --channels="red|green|blue" \
--include_sensors="WV,S2,L8" --select_videos '.name | startswith("KR_R001")' \
--target_gsd=3
TODO:
- [ ] option to merge overlapping regions?
"""
import scriptconfig as scfg
import ubelt as ub
[docs]
class CocoCropTrackConfig(scfg.DataConfig):
"""
Create a dataset of aligned temporal sequences around objects of interest
in an unstructured collection of annotated geotiffs.
High Level Steps:
* Find a set of geospatial AOIs
* For each AOI find all images that overlap
* Orthorectify (or warp) the selected spatial region and its
annotations to a cannonical space.
"""
__default__ = {
'src': scfg.Value('in.geojson.json', help='input dataset to chip'),
'dst': scfg.Value(None, help='bundle directory or kwcoco json file for the output'),
'workers': scfg.Value(1, type=str, help='number of parallel procs. This can also be an expression accepted by coerce_num_workers.'),
'mode': scfg.Value('process', type=str, help='process, thread, or serial'),
'context_factor': scfg.Value(1.8, help=ub.paragraph('scale factor')),
'sqlmode': scfg.Value(0, type=str, help='if True use sqlmode'),
'keep': scfg.Value('img', help='set to None to recompute'),
'target_gsd': scfg.Value(1, help='GSD of new kwcoco videospace'),
'channels': scfg.Value(None, help='only crop these channels if specified'),
'select_images': scfg.Value(
None, type=str, help=ub.paragraph(
'''
A jq query to specify images. See kwcoco subset --help for more details.
''')),
'select_videos': scfg.Value(
None, help=ub.paragraph(
'''
A jq query to specify videos. See kwcoco subset --help for more details.
''')),
'include_sensors': scfg.Value(None, help='if specified can be comma separated valid sensors'),
'exclude_sensors': scfg.Value(None, help='if specified can be comma separated invalid sensors'),
}
[docs]
def main(cmdline=0, **kwargs):
"""
Simple CLI for cropping to tracks
Ignore:
from geowatch.cli.coco_crop_tracks import * # NOQA
import geowatch
dvc_dpath = geowatch.find_dvc_dpath(hardware='hdd')
# dvc_dpath = geowatch.find_dvc_dpath(hardware='ssd')
base_fpath = dvc_dpath / 'Aligned-Drop3-TA1-2022-03-10/data.kwcoco.json'
src = base_fpath
dst = dvc_dpath / 'Cropped-Drop3-TA1-2022-03-10/data.kwcoco.json'
cmdline = 0
include_sensors = None
kwargs = dict(
workers=21,
src=src, dst=dst, include_sensors=include_sensors)
Ignore:
from geowatch.cli.coco_crop_tracks import * # NOQA
import geowatch
dvc_dpath = geowatch.find_dvc_dpath(hardware='hdd')
base_fpath = dvc_dpath / 'Drop2-Aligned-TA1-2022-02-15/data.kwcoco.json'
src = base_fpath
dst = dvc_dpath / 'Cropped-Drop2-TA1-2022-02-15/data.kwcoco.json'
cmdline = 0
include_sensors = ['WV']
kwargs = dict(src=src, dst=dst, include_sensors=include_sensors)
kwargs['workers'] = 8
"""
import kwcoco
config = CocoCropTrackConfig.cli(cmdline=cmdline, data=kwargs, strict=True)
print('config = {}'.format(ub.urepr(config, nl=1)))
src = config['src']
dst = ub.Path(config['dst'])
dst_bundle_dpath = dst.parent
print('load = {}'.format(ub.urepr(src, nl=1)))
if config['sqlmode']:
coco_dset = kwcoco.CocoSqlDatabase.coerce(src)
else:
coco_dset = kwcoco.CocoDataset.coerce(src)
# sql_coco_dset = coco_dset.view_sql()
# coco_dset = sql_coco_dset
from geowatch.utils import kwcoco_extensions
valid_gids = kwcoco_extensions.filter_image_ids(
coco_dset,
include_sensors=config['include_sensors'],
exclude_sensors=config['exclude_sensors'],
select_images=config['select_images'],
select_videos=config['select_videos'],
)
if len(valid_gids) != coco_dset.n_images:
coco_dset = coco_dset.subset(valid_gids)
context_factor = config['context_factor']
channels = config['channels']
if channels is not None:
channels = kwcoco.FusedChannelSpec.coerce(channels)
print('Generate jobs')
crop_job_gen = generate_crop_jobs(coco_dset, dst_bundle_dpath,
context_factor=context_factor,
channels=channels)
crop_job_iter = iter(crop_job_gen)
keep = config['keep']
from kwutil import util_parallel
workers = util_parallel.coerce_num_workers(config['workers'])
jobs = ub.JobPool(mode=config['mode'], max_workers=workers)
prog = ub.ProgIter(desc='submit crop jobs')
last_key = None
with prog:
try:
while True:
crop_asset_task = next(crop_job_iter)
# prog.ensure_newline()
jobs.submit(run_crop_asset_task, crop_asset_task, keep)
key = (crop_asset_task['region'], crop_asset_task['tid'])
if last_key != key:
last_key = key
prog.set_postfix_str(key)
prog.update()
except StopIteration:
pass
results = []
failed = []
for job in jobs.as_completed(desc='collect jobs'):
try:
result = job.result()
except Exception as ex:
# import traceback
# stack = traceback.extract_stack()
# stack_lines = traceback.format_list(stack)
# tbtext = ''.join(stack_lines)
# print(ub.highlight_code(tbtext, 'pytb'))
print('ex = {}'.format(ub.urepr(ex, nl=1)))
print('Failed crop asset task ex = {!r}'.format(ex))
raise
failed.append(job)
else:
results.append(result)
# Group assets by the track they belong to
tid_to_assets = ub.group_items(results, lambda x: x['tid'])
tid_to_size = ub.map_vals(len, tid_to_assets)
import kwarray
import numpy as np
print(f'{len(tid_to_size)=}')
arr = np.array(list(tid_to_size.values()))
stats = kwarray.stats_dict(arr)
if len(arr):
quantile = [0.25, 0.50, 0.75]
quant_values = np.quantile(arr, quantile)
quant_keys = ['q_{:0.2f}'.format(q) for q in quantile]
for k, v in zip(quant_keys, quant_values):
stats[k] = v
print(f'track length stats {ub.urepr(stats, nl=1)!s}')
# Rebuild the manifest
target_gsd = config['target_gsd']
new_dset = make_track_kwcoco_manifest(dst, dst_bundle_dpath, tid_to_assets,
target_gsd=target_gsd)
import safer
with safer.open(dst, 'w', temp_file=not ub.WIN32) as file:
new_dset.dump(file, newlines=True, indent=' ')
r"""
geowatch visualize \
/home/joncrall/data/dvc-repos/smart_watch_dvc-hdd/Cropped-Drop2-TA1-2022-02-15/data.kwcoco.json \
--channels="red|green|blue" \
--animate=True \
--workers=8
DVC_DPATH=$(geowatch_dvc --hardware="hdd")
echo $DVC_DPATH
cp $DVC_DPATH/Cropped-Drop2-TA1-2022-02-15/data.kwcoco.json \
$DVC_DPATH/Cropped-Drop2-TA1-2022-02-15/imgonly.kwcoco.json
python -m geowatch reproject_annotations \
--src "$DVC_DPATH/Cropped-Drop2-TA1-2022-02-15/imgonly.kwcoco.json" \
--dst "$DVC_DPATH/Cropped-Drop2-TA1-2022-02-15/projected.kwcoco.json" \
--site_models="$DVC_DPATH/annotations/site_models/*.geojson" \
--region_models="$DVC_DPATH/annotations/region_models/*.geojson"
# {viz_part}
"""
[docs]
def make_track_kwcoco_manifest(dst, dst_bundle_dpath, tid_to_assets,
target_gsd=1):
"""
Rebundle in a a new kwcoco file
TODO:
- [ ] populate auxiliary is taking a long time, speed it up.
"""
from geowatch.utils import kwcoco_extensions
import kwimage
import kwcoco
# Make the new kwcoco file where 1 track is mostly 1 video
# TODO: we could crop the kwcoco annotations here too, but
# we can punt on that for now and just reproject them.
new_dset = kwcoco.CocoDataset()
new_dset.fpath = dst
for tid, track_assets in tid_to_assets.items():
new_video = {
'name': tid,
}
new_vidid = new_dset.add_video(**new_video)
gid_to_assets = ub.group_items(track_assets, lambda x: x['gid'])
new_images = []
for gid, img_assets in gid_to_assets.items():
auxiliary = []
region = None
datetime_ = None
sensor_coarse = None
for aux in img_assets:
fname = aux['file_name']
aux = aux.copy()
aux.pop('gid', None)
aux.pop('tid', None)
region = aux.pop('region', None)
sensor_coarse = aux['sensor_coarse']
datetime_ = aux.pop('datetime')
aux['file_name'] = str(fname)
auxiliary.append(aux)
auxiliary = sorted(auxiliary, key=lambda obj: obj['channels'])
new_img = {
'name': fname.parent.name,
'file_name': None,
'auxiliary': auxiliary,
'date_captured': datetime_.isoformat(),
'sensor_coarse': sensor_coarse,
'parent_region': region,
'timestamp': datetime_.timestamp(),
}
new_images.append(new_img)
# new_gid = new_dset.add_image(**new_img)
new_images = sorted(new_images, key=lambda x: x['date_captured'])
for idx, new_img in enumerate(new_images):
new_img['frame_index'] = idx
new_img['video_id'] = new_vidid
new_dset.add_image(**new_img)
for new_img in ub.ProgIter(new_dset.dataset['images'], desc='populate auxiliary'):
for obj in new_img['auxiliary']:
kwcoco_extensions._populate_canvas_obj(
dst_bundle_dpath, obj, overwrite={'warp'}, with_wgs=True)
for new_img in new_dset.dataset['images']:
kwcoco_extensions._recompute_auxiliary_transforms(new_img)
for new_img in new_dset.dataset['images']:
new_coco_img = kwcoco.CocoImage(new_img)
new_coco_img._bundle_dpath = dst_bundle_dpath
new_coco_img._video = {}
kwcoco_extensions._populate_valid_region(new_coco_img)
from kwcoco.util.util_json import ensure_json_serializable
for new_img in ub.ProgIter(new_dset.dataset['images'], desc='cleanup imgs'):
for obj in ub.flatten([[new_img], new_img['auxiliary']]):
if 'warp_to_wld' in obj:
obj['warp_to_wld'] = kwimage.Affine.coerce(obj['warp_to_wld']).concise()
if 'wld_to_pxl' in obj:
obj['wld_to_pxl'] = kwimage.Affine.coerce(obj['wld_to_pxl']).concise()
# obj.pop('wgs84_to_wld', None)
# obj.pop('valid_region_utm', None)
# obj.pop('utm_corners', None)
# obj.pop('wgs84_corners', None)
# obj.pop('utm_crs_info', None)
new_img.update(ensure_json_serializable(new_img))
# Make the asset order consistent by channel
for img in new_dset.dataset['images']:
if 'auxiliary' in img:
img['auxiliary'] = sorted(img['auxiliary'], key=lambda aux: aux['channels'])
for video_id in ub.ProgIter(new_dset.videos(), desc='populate videos'):
kwcoco_extensions.coco_populate_geo_video_stats(
new_dset, target_gsd=target_gsd, video_id=video_id
)
return new_dset
# @xdev.profile
[docs]
def generate_crop_jobs(coco_dset, dst_bundle_dpath, channels=None, context_factor=1.0):
"""
Generator that yields parameters to be used to call gdal_translate
Benchmark:
# Test kwimage versus shapely warp
kw_poly = kwimage.Polygon.random()
kw_poly = kwimage.Boxes.random(1).to_polygons()[0]
sh_poly = kw_poly.to_shapely()
transform = kwimage.Affine.random()
import timerit
ti = timerit.Timerit(100, bestof=10, verbose=2)
for timer in ti.reset('shapely'):
with timer:
# This is faster than fancy indexing
a, b, x, d, e, y = transform.matrix.ravel()[0:6]
sh_transform = (a, b, d, e, x, y)
sh_warp_poly = affine_transform(sh_poly, sh_transform)
for timer in ti.reset('kwimage'):
with timer:
kw_warp_poly = kw_poly.warp(transform)
kw_warp_poly2 = kwimage.Polygon.from_shapely(sh_warp_poly)
print(kw_warp_poly2)
print(kw_warp_poly)
"""
import shapely
from kwutil import util_time
import kwcoco
from shapely.affinity import affine_transform
import kwimage
# Build to_extract-like objects so this script can eventually be combined
# with coco-align-geotiffs to make something that's ultimately better.
# HACK: only take tracks with specific categories (todo: glob-parameterize)
valid_categories = {
'No Activity',
'Site Preparation',
'Active Construction',
'Post Construction',
'negative',
}
print('\n\n')
# [aids[0] tid, aids in coco_dset.index.trackid_to_aids]
try:
tid_to_aids = coco_dset.index.trackid_to_aids
except Exception:
annots = coco_dset.index.annots()
tid_to_aids = ub.group_items(annots.lookup('id'), annots.lookup('track_id'))
bundle_dpath = ub.Path(coco_dset.bundle_dpath)
_lut_coco_image = ub.memoize(coco_dset.coco_image)
# for video_id in coco_dset.videos():
# video = coco_dset.index.videos[video_id]
# # video_images = coco_dset.images(video_id=video_id)
# # Get all annotations for this video in video space
# # video_aids = list(ub.flatten(video_images.annots.lookup('id')))
# # video_annots = coco_dset.annots(video_aids)
# # tid_to_anns = ub.group_items(video_annots.objs, video_annots.lookup('track_id'))
# # coco_dset.index.trackid_to_aids
# # track_group_dname = dst_bundle_dpath /
print(f'{len(tid_to_aids)=}')
# if 0:
# tids = tid_to_aids.keys()
# else:
vidid_and_tid_list = []
for tid, aids in ub.ProgIter(tid_to_aids.items(), total=len(tid_to_aids), desc='one loop over tracks'):
aid0 = ub.peek(aids)
ann0 = coco_dset.index.anns[aid0]
cid = ann0['category_id']
category = coco_dset.index.cats[cid]
catname = category['name']
flag = catname in valid_categories
if flag:
img0 = coco_dset.index.imgs[ann0['image_id']]
vidid = img0['video_id']
vidid_and_tid_list.append((vidid, tid))
vidid_and_tid_list = sorted(vidid_and_tid_list)
# print(f'{len(tids)=}')
for vidid, tid in vidid_and_tid_list:
aids = tid_to_aids[tid]
# print(f'{len(aids)=}')
video = coco_dset.index.videos[vidid]
region = video['name']
# One iteration over the track to gather the segmentation region.
annots = coco_dset.annots(list(aids))
annot_dates = annots.images.lookup('date_captured')
sortx = ub.argsort(annot_dates)
annots = annots.take(sortx)
# Hack only take some of the frames
if 1:
# Pad with some number of no activity frames
cname_to_idxs = ub.group_items(list(range(len(annots))), annots.cnames)
# Take 5
k = 'Post Construction'
if k in cname_to_idxs:
cname_to_idxs[k] = sorted(cname_to_idxs[k])[0:5]
k = 'No Activity'
if k in cname_to_idxs:
cname_to_idxs[k] = sorted(cname_to_idxs[k])[-5:]
final_idxs = sorted(ub.flatten(cname_to_idxs.values()))
annots = annots.take(final_idxs)
track_polys = []
anns = annots.objs
if len(anns) == 0:
continue
# print(f'{len(anns)=}')
for ann in anns:
gid = ann['image_id']
coco_img = _lut_coco_image(gid)
vid_sseg = coco_img._annot_segmentation(ann, space='video')
track_polys.append(vid_sseg.to_shapely())
# Might need to pad out this region
vid_track_poly_sh = shapely.ops.unary_union(track_polys).convex_hull
SQUARE_BOUNDS = 1
SHAPELY_TRANFORM = 1
if SQUARE_BOUNDS:
vid_track_poly = kwimage.MultiPolygon.from_shapely(vid_track_poly_sh).data[0]
cx, cy, w, h = vid_track_poly.to_boxes().to_cxywh().data[0]
w = h = max(w, h)
vid_track_square = kwimage.Boxes([[cx, cy, w, h]], 'cxywh')
vid_track_square = vid_track_square.scale(context_factor, about='center')
vid_track_poly_sh = vid_track_square.to_shapely()[0]
# Ensure we dont crop past the edges
video_bounds_sh = kwimage.Boxes([
[0, 0, video['width'], video['height']]
], 'xywh').to_shapely()[0]
vid_track_poly_sh = vid_track_poly_sh.intersection(video_bounds_sh)
# vid_track_poly = vid_track_poly.scale(1.1, about='center')
# Given the segmentation region, generate track tasks
track_name = tid if isinstance(tid, str) else 'track_{}'.format(tid)
track_dname = ub.Path(region) / track_name
# String representing the spatial crop
space_str = ub.hash_data(vid_track_poly_sh.wkt, base='abc')[0:8]
crop_track_task = {
'tid': tid,
}
# crop_img_tasks = []
# Assume each annotation corresponds to exactly one image
for ann in anns:
gid = ann['image_id']
coco_img = _lut_coco_image(gid)
# Skip this image if it corresponds to an invalid region
try:
valid_region = coco_img.valid_region(space='video')
if valid_region is not None:
valid_region_sh = valid_region.to_shapely()
if not valid_region_sh.intersects(vid_track_poly_sh):
continue
except AttributeError as ex:
print('warning (might need kwcoco > 0.2.27) ex = {!r}'.format(ex))
sensor_coarse = coco_img.get('sensor_coarse', 'unknown')
datetime_ = util_time.coerce_datetime(coco_img['date_captured'])
iso_time = util_time.isoformat(datetime_, sep='T', timespec='seconds')
crop_img_task = {
'gid': gid,
'sensor_coarse': sensor_coarse,
'datetime': datetime_,
**crop_track_task,
}
# crop_asset_tasks = []
track_img_dname = track_dname / sensor_coarse
warp_img_from_vid = coco_img.warp_img_from_vid
for obj in coco_img.iter_asset_objs():
chan_code = obj['channels']
obj_channels = kwcoco.FusedChannelSpec.coerce(chan_code)
if channels is not None:
if obj_channels.intersection(channels).numel() == 0:
# Skip this channel
continue
warp_img_from_aux = kwimage.Affine.coerce(obj['warp_aux_to_img'])
warp_aux_from_img = warp_img_from_aux.inv()
warp_aux_from_vid = warp_aux_from_img @ warp_img_from_vid
if SHAPELY_TRANFORM:
a, b, x, d, e, y = warp_aux_from_vid.matrix.ravel()[0:6]
sh_transform = (a, b, d, e, x, y)
aux_track_poly_sh = affine_transform(vid_track_poly_sh, sh_transform)
aux_track_poly = kwimage.Polygon.from_shapely(aux_track_poly_sh)
# else:
# aux_track_poly = vid_track_poly.warp(warp_aux_from_vid)
crop_box_asset_space = aux_track_poly.bounding_box().quantize().to_xywh().data[0]
# Construct a name for the subregion to extract.
num = 0
# to prevent long names for docker (limit is 242 chars)
chan_pname = obj_channels.path_sanitize(maxlen=10)
name = 'crop_{}_{}_{}_{}_{}'.format(iso_time, track_name, space_str, sensor_coarse, num)
dst_fname = track_img_dname / name / f'{name}_{chan_pname}.tif'
crop_asset_task = {**crop_img_task}
crop_asset_task['parent_file_name'] = obj['file_name']
crop_asset_task['file_name'] = dst_fname
crop_asset_task['region'] = region
crop_asset_task['src'] = bundle_dpath / obj['file_name']
crop_asset_task['dst'] = dst_bundle_dpath / dst_fname
crop_asset_task['crop_box_asset_space'] = crop_box_asset_space
crop_asset_task['channels'] = chan_code
crop_asset_task['num_bands'] = obj['num_bands']
yield crop_asset_task
[docs]
def run_crop_asset_task(crop_asset_task, keep):
from osgeo import osr
osr.GetPROJSearchPaths()
from kwgis.utils import util_gdal
import kwimage
_crop_task = crop_asset_task.copy()
src = _crop_task.pop('src')
dst = _crop_task.pop('dst')
crop_box_asset_space = _crop_task.pop('crop_box_asset_space')
cache_hit = keep in {'img'} and dst.exists()
if not cache_hit:
pixel_box = kwimage.Boxes([crop_box_asset_space], 'xywh')
dst.parent.ensuredir()
util_gdal.gdal_single_translate(src, dst, pixel_box, tries=10)
return _crop_task
if __name__ == '__main__':
"""
CommandLine:
python ~/code/watch/geowatch/cli/coco_crop_tracks.py
"""
main(cmdline=1)