"""
Adds fields needed by ndsampler to correctly "watch" a region.
Some of this is done hueristically. We assume images come from certain sensors.
We assume input is orthorectified. We assume some GSD "target" gsd for video
and image processing. Note a video GSD will typically be much higher (i.e.
lower resolution) than an image GSD.
"""
import warnings
import numpy as np
import ubelt as ub
import kwimage
import itertools
import numbers
import kwcoco
from os.path import join
from geowatch.utils import util_raster
from geowatch import exceptions
# import geowatch.gis
try:
from line_profiler import profile
except Exception:
profile = ub.identity
[docs]
def filter_image_ids(coco_dset, gids=None, include_sensors=None,
exclude_sensors=None, select_images=None,
select_videos=None):
"""
Filters to a specific set of images given query parameters
"""
def coerce_set(x):
return set(x.split(',')) if isinstance(x, str) else set(x)
def filter_by_attribute(table, key, include, exclude):
if include is not None or exclude is not None:
if include is not None:
include = coerce_set(include)
if exclude is not None:
exclude = coerce_set(exclude)
values = table.lookup(key, default=None)
if include is None:
flags = [v not in exclude for v in values]
elif exclude is None:
flags = [v in include for v in values]
else:
flags = [v in include and v not in exclude for v in values]
table = table.compress(flags)
return table
valid_images = coco_dset.images(gids)
valid_images = filter_by_attribute(
valid_images, 'sensor_coarse', include_sensors, exclude_sensors)
valid_gids = set(valid_images)
if select_images is not None:
try:
import jq
except Exception:
print('The jq library is required to run a generic image query')
raise
try:
query_text = ".images[] | select({}) | .id".format(select_images)
query = jq.compile(query_text)
image_selected_gids = set(query.input(coco_dset.dataset).all())
valid_gids &= image_selected_gids
except Exception as ex:
print('JQ Query Failed: {}, ex={}'.format(query_text, ex))
raise
if select_videos is not None:
try:
import jq
except Exception:
print('The jq library is required to run a generic image query')
raise
try:
query_text = ".videos[] | select({}) | .id".format(select_videos)
query = jq.compile(query_text)
selected_vidids = query.input(coco_dset.dataset).all()
vid_selected_gids = set(ub.flatten(coco_dset.index.vidid_to_gids[vidid]
for vidid in selected_vidids))
valid_gids &= vid_selected_gids
except Exception:
print('JQ Query Failed: {}'.format(query_text))
raise
valid_gids = sorted(valid_gids)
return valid_gids
[docs]
def populate_watch_fields(
coco_dset,
target_gsd=10.0,
vidids=None,
overwrite=False,
default_gsd=None,
conform=True,
enable_video_stats=True,
enable_valid_region=False,
enable_intensity_stats=False,
workers=0,
mode='thread',
remove_broken=False,
skip_populate_errors=False,
):
"""
Aggregate populate function for fields useful to GeoWATCH.
Args:
coco_dset (Dataset): dataset to work with
target_gsd (float): target gsd in meters
overwrite (bool | List[str]):
if True or False overwrites everything or nothing. Otherwise it can
be a list of strings indicating what is
overwritten. Valid keys are warp, band, and channels.
default_gsd (None | float):
if specified, assumed any images without geo-metadata have this
GSD'
Ignore:
>>> # xdoctest: +REQUIRES(env:DVC_DPATH)
>>> from geowatch.utils.kwcoco_extensions import * # NOQA
>>> import kwcoco
>>> dvc_dpath = geowatch.utils.util_data.find_dvc_dpath()
>>> fpath = dvc_dpath / 'drop0_aligned/data.kwcoco.json')
>>> coco_dset = kwcoco.CocoDataset(fpath)
>>> target_gsd = 5.0
>>> populate_watch_fields(coco_dset, target_gsd)
>>> print('coco_dset.index.videos = {}'.format(ub.urepr(coco_dset.index.videos, nl=-1)))
>>> print('coco_dset.index.imgs[1] = ' + ub.urepr(coco_dset.index.imgs[1], nl=1))
Example:
>>> from geowatch.utils.kwcoco_extensions import * # NOQA
>>> import kwcoco
>>> # TODO: make a demo dataset with some sort of gsd metadata
>>> coco_dset = kwcoco.CocoDataset.demo('vidshapes8-multispectral')
>>> print('coco_dset = {!r}'.format(coco_dset))
>>> target_gsd = 13.0
>>> populate_watch_fields(coco_dset, target_gsd, default_gsd=1)
>>> print('coco_dset.index.imgs[1] = ' + ub.urepr(coco_dset.index.imgs[1], nl=2))
>>> print('coco_dset.index.videos = {}'.format(ub.urepr(coco_dset.index.videos, nl=1)))
>>> # TODO: make a demo dataset with some sort of gsd metadata
>>> coco_dset = kwcoco.CocoDataset.demo('vidshapes8')
>>> print('coco_dset = {!r}'.format(coco_dset))
>>> target_gsd = 13.0
>>> populate_watch_fields(coco_dset, target_gsd, default_gsd=1)
>>> print('coco_dset.index.imgs[1] = ' + ub.urepr(coco_dset.index.imgs[1], nl=2))
>>> print('coco_dset.index.videos = {}'.format(ub.urepr(coco_dset.index.videos, nl=1)))
"""
# Load your KW-COCO dataset (conform populates information like image size)
if conform:
# Note: we will handle imgsize in a later part
coco_dset.conform(pycocotools_info=False, workers=workers,
ensure_imgsize=False)
if 1:
from geowatch import heuristics
heuristics.register_known_fsspec_s3_buckets()
if vidids is None:
vidids = list(coco_dset.index.videos.keys())
gids = list(coco_dset.index.imgs.keys())
else:
gids = list(ub.flatten(coco_dset.images(video_id=video_id) for video_id in vidids))
coco_populate_geo_heuristics(
coco_dset, gids=gids, overwrite=overwrite, default_gsd=default_gsd,
workers=workers, mode=mode,
enable_intensity_stats=enable_intensity_stats,
enable_valid_region=enable_valid_region,
remove_broken=remove_broken,
skip_populate_errors=skip_populate_errors,
)
# Modify videos to include cleared status
if 1:
from geowatch import heuristics
from kwutil import util_pattern
region_id_to_cleared = {d['region_id']: d['cleared'] for d in heuristics.REGION_STATUS}
pat = util_pattern.Pattern.coerce(r'\w+_R\d+(_\d+)?', 'regex')
for video in coco_dset.videos().objs:
video_name = video['name']
if pat.match(video_name):
region_id = '_'.join(video_name.split('_')[0:2])
cleared = region_id_to_cleared.get(region_id, False)
video['cleared'] = cleared
video['domain'] = region_id
if enable_video_stats:
for video_id in ub.ProgIter(vidids, total=len(vidids), desc='populate videos'):
coco_populate_geo_video_stats(coco_dset, video_id, target_gsd=target_gsd)
# serialize intermediate objects
coco_dset._ensure_json_serializable()
[docs]
def coco_populate_geo_heuristics(coco_dset: kwcoco.CocoDataset,
gids=None,
overwrite=False,
default_gsd=None,
workers=0,
mode='thread',
remove_broken=False, **kw):
"""
Example:
>>> # xdoctest: +REQUIRES(env:DVC_DPATH)
>>> from geowatch.utils.kwcoco_extensions import * # NOQA
>>> from geowatch.utils.util_data import find_dvc_dpath
>>> import kwcoco
>>> dvc_dpath = find_dvc_dpath()
>>> coco_fpath = dvc_dpath / 'drop1-S2-L8-aligned/data.kwcoco.json'
>>> coco_dset = kwcoco.CocoDataset(coco_fpath)
>>> coco_populate_geo_heuristics(coco_dset, overwrite=True, workers=12,
>>> keep_geotiff_metadata=False,
>>> mode='process')
Example:
>>> # xdoctest: +REQUIRES(env:DVC_DPATH)
>>> from geowatch.utils.kwcoco_extensions import * # NOQA
>>> from geowatch.utils.util_data import find_dvc_dpath
>>> import kwcoco
>>> dvc_dpath = find_dvc_dpath()
>>> coco_fpath = dvc_dpath / 'drop1-S2-L8-aligned/data.kwcoco.json'
>>> coco_dset = kwcoco.CocoDataset(coco_fpath)
>>> coco_populate_geo_heuristics(coco_dset, overwrite=True, workers=12,
>>> keep_geotiff_metadata=False,
>>> mode='process')
"""
import rich
gids = coco_dset.images(gids)._ids
# Cant multiprocess because of SwigPyObjects... bleh
# keep_geotiff_metadata must be False to use mode=process
keep_geotiff_metadata = kw.get('keep_geotiff_metadata', False)
if keep_geotiff_metadata and mode == 'process':
raise NotImplementedError(ub.paragraph(
'''
Cannot keep keep geotiff metadata when using process parallelism.
Need to serialize gdal objects (i.e. RPC transforms and
SwigPyObject) returned from ``geowatch.gis.geotiff.geotiff_metadata``
to be able do this.
'''))
jobs = ub.JobPool(mode, max_workers=workers, transient=True)
for gid in ub.ProgIter(gids, desc='submit populate imgs'):
coco_img = coco_dset.coco_image(gid)
if mode == 'process':
coco_img = coco_img.detach()
job_args = (coco_img,)
job_kwargs = dict(overwrite=overwrite, default_gsd=default_gsd, **kw)
job = jobs.submit(coco_populate_geo_img_heuristics2,
*job_args, **job_kwargs)
if 0:
# For debugging
job.job_func = coco_populate_geo_img_heuristics2
job.job_args = job_args
job.job_kwargs = job_kwargs
job.gid = gid
broken_image_ids = []
working_image_ids = []
for job in ub.ProgIter(jobs.as_completed(), total=len(jobs), desc='collect populate imgs'):
gid = job.gid
try:
img = job.result()
except Exception as ex:
# Check for known error messages that might cause errors grabbing
# data
known_errors = {}
known_errors['has_404'] = remove_broken and "404" in repr(ex)
known_errors['has_acc_problem'] = "not recognized as a supported file format" in repr(ex)
known_errors['connection_reset'] = "Connection reset by peer" in repr(ex)
known_errors['failed_to_read'] = 'Failed to read' in repr(ex)
print('')
print(f'known_errors = {ub.urepr(known_errors, nl=1)}')
if any(known_errors.values()):
broken_image_ids.append(gid)
print('')
rich.print('[yellow]WARNING: KNOWN ERROR IN GEO HEURISTICS')
print(f'ex={ex!r}')
print(f'ex={ex}')
print(f'ex.__dict__={ex.__dict__}')
print('num_broken = {}'.format(len(broken_image_ids)))
print('num_working = {}'.format(len(working_image_ids)))
rich.print('[yellow]WARNING: KNOWN ERROR IN GEO HEURISTICS')
else:
coco_img = coco_dset.coco_image(gid)
# Check for remote existence and handle the case where the data
# might be at a remote location
from geowatch.utils import util_fsspec
missing_paths = []
existing_paths = []
forbidden_paths = []
HACK_CHECK_EXISTS = 1
if HACK_CHECK_EXISTS:
"""
NOTE:
For L2 data, we need to assume the user called
from geowatch import heuristics
heuristics.register_known_fsspec_s3_buckets()
And populated the appropriate mapping from s3-bucket to
s3 configurations.
"""
for p in coco_img.iter_image_filepaths():
# Use fsspec to check if the files exist
fspath = util_fsspec.FSPath.coerce(p)
try:
if not fspath.exists():
missing_paths.append(fspath)
else:
existing_paths.append(fspath)
except PermissionError:
forbidden_paths.append(fspath)
if missing_paths or forbidden_paths:
broken_image_ids.append(gid)
print('')
rich.print('[yellow]WARNING: OTHER ERROR IN GEO HEURISTICS')
print(f'existing_paths = {ub.urepr(existing_paths, nl=1)}')
print(f'missing_paths = {ub.urepr(missing_paths, nl=1)}')
print(f'forbidden_paths = {ub.urepr(forbidden_paths, nl=1)}')
print(f'ex={ex!r}')
print(f'ex={ex}')
print(f'ex.__dict__={ex.__dict__}')
print('coco_img = {}'.format(ub.urepr(coco_img.img, nl=3)))
print('num_broken = {}'.format(len(broken_image_ids)))
print('num_working = {}'.format(len(working_image_ids)))
rich.print('[yellow]WARNING: OTHER ERROR IN GEO HEURISTICS')
print('')
# raise FileNotFoundError(str(missing_paths))
else:
print('')
rich.print('[red]ERROR: UNKNOWN ERROR IN GEO HEURISTICS')
print(f'ex={ex!r}')
print(f'ex={ex}')
print(f'ex.__dict__={ex.__dict__}')
print('coco_img = {}'.format(ub.urepr(coco_img.img, nl=3)))
print('num_broken = {}'.format(len(broken_image_ids)))
print('num_working = {}'.format(len(working_image_ids)))
rich.print('[red]ERROR: UNKNOWN ERROR IN GEO HEURISTICS')
print('')
# if 0:
# job.job_args
# job.job_kwargs
# coco_img, = job.job_args
# globals().update(**job.job_kwargs)
# # result = coco_populate_geo_img_heuristics2(*job.job_args, **job.job_kwargs)
raise
else:
working_image_ids.append(gid)
if mode == 'process':
# for multiprocessing
real_img = coco_dset.index.imgs[gid]
real_img.update(img)
if broken_image_ids:
print(f'There were {len(broken_image_ids)} / {len(gids)} broken images')
coco_dset.remove_images(broken_image_ids, verbose=True)
[docs]
@profile
def coco_populate_geo_img_heuristics2(
coco_img,
overwrite=False,
default_gsd=None,
keep_geotiff_metadata=False,
enable_intensity_stats=False,
enable_valid_region=False,
skip_populate_errors=False,
):
"""
Note: this will not overwrite existing channel info unless specified
Commandline
xdoctest -m ~/code/watch/geowatch/utils/kwcoco_extensions.py --profile
TODO:
- [ ] Use logic in the align demo classmethod to make an example
that uses a real L8 / S2 image.
Example:
>>> from geowatch.utils.kwcoco_extensions import * # NOQA
>>> import geowatch
>>> import json
>>> coco_dset = geowatch.coerce_kwcoco('geowatch-msi-geodata-dates-heatmap-videos1-frames2-gsize64')
>>> gid = 1
>>> overwrite = {'warp', 'band'}
>>> default_gsd = None
>>> kw = {}
>>> coco_img = coco_dset.coco_image(gid)
>>> before_img_attrs = list(coco_img.img.keys())
>>> before_aux_attr_hist = ub.dict_hist(ub.flatten([list(aux) for aux in coco_img.img['auxiliary']]))
>>> print('before_img_attrs = {!r}'.format(before_img_attrs))
>>> print('before_aux_attr_hist = {}'.format(ub.urepr(before_aux_attr_hist, nl=1)))
>>> coco_populate_geo_img_heuristics2(coco_img)
>>> img = coco_dset.index.imgs[gid]
>>> after_img_attrs = list(coco_img.img.keys())
>>> after_aux_attr_hist = ub.dict_hist(ub.flatten([list(aux) for aux in coco_img.img['auxiliary']]))
>>> new_img_attrs = set(after_img_attrs) - set(before_img_attrs)
>>> new_aux_attrs = {k: after_aux_attr_hist[k] - before_aux_attr_hist.get(k, 0) for k in after_aux_attr_hist}
>>> new_aux_attrs = {k: v for k, v in new_aux_attrs.items() if v > 0}
>>> print('new_img_attrs = {}'.format(ub.urepr(new_img_attrs, nl=1)))
>>> print('new_aux_attrs = {}'.format(ub.urepr(new_aux_attrs, nl=1)))
>>> #print('after_img_attrs = {}'.format(ub.urepr(after_img_attrs, nl=1)))
>>> #print('after_aux_attr_hist = {}'.format(ub.urepr(after_aux_attr_hist, nl=1)))
>>> assert 'geos_corners' in img
>>> #assert 'default_nodata' in img
>>> #assert 'default_nodata' in new_aux_attrs
>>> print(ub.varied_values(list(map(lambda x: ub.map_vals(json.dumps, x), coco_img.img['auxiliary'])), default=None))
Example:
>>> from geowatch.utils.kwcoco_extensions import * # NOQA
>>> import kwcoco
>>> ###
>>> gid = 1
>>> dset = kwcoco.CocoDataset.demo('vidshapes8-multispectral')
>>> coco_img = dset.coco_image(gid)
>>> coco_populate_geo_img_heuristics2(coco_img, overwrite=True)
>>> ###
>>> gid = 1
>>> dset2 = kwcoco.CocoDataset.demo('shapes8')
>>> coco_img = dset2.coco_image(gid)
>>> coco_populate_geo_img_heuristics2(coco_img, overwrite=True)
"""
import geowatch
bundle_dpath = coco_img.bundle_dpath
img = coco_img.img
primary_obj = coco_img.primary_asset()
asset_objs = list(coco_img.iter_asset_objs())
overwrite = _coerce_overwrite(overwrite)
# Note: for non-geotiffs we could use the aux_to_img transformation
# provided with them to determine their geo-properties.
asset_errors = []
for obj in asset_objs:
try:
errors = _populate_canvas_obj(
bundle_dpath, obj, overwrite=overwrite, default_gsd=default_gsd,
keep_geotiff_metadata=keep_geotiff_metadata,
enable_intensity_stats=enable_intensity_stats)
asset_errors.append(errors)
except Exception as ex:
if skip_populate_errors:
asset_errors.append(str(ex))
else:
raise
if all(asset_errors):
info = ub.dict_isect(img, {'name', 'file_name', 'id'})
warnings.warn(f'img {info} has issues introspecting')
if keep_geotiff_metadata:
info = primary_obj.get('geotiff_metadata', None)
if info is None:
dem_hint = primary_obj.get('dem_hint', 'use')
metakw = {}
if dem_hint == 'ignore':
metakw['elevation'] = 0
primary_fname = primary_obj.get('file_name', None)
primary_fpath = join(bundle_dpath, primary_fname)
info = geowatch.gis.geotiff.geotiff_metadata(primary_fpath, **metakw)
primary_obj['geotiff_metadata'] = info
# if 'default_nodata' not in img:
# img['default_nodata'] = primary_obj['default_nodata']
if overwrite:
# Update image space to correspond to one of the assets
# (typically the one with the largest size)
if img.get('file_name', None) is None:
assets = list(coco_img.assets)
asset_dsizes = [
(asset['width'], asset['height'])
for asset in assets
if ("width" in asset) and ("height" in asset)
]
idx = ub.argmax(asset_dsizes)
main_asset = assets[idx]
img['width'] = main_asset['width']
img['height'] = main_asset['height']
if 'warp_to_wld' in main_asset:
# Also update world information (does the image need this?)
img['wld_to_pxl'] = kwimage.Affine.coerce(main_asset['warp_to_wld']).inv().concise()
img['wld_crs_info'] = main_asset['wld_crs_info']
else:
img.pop('wld_to_pxl', None)
img.pop('wld_crs_info', None)
_recompute_auxiliary_transforms(img)
if ('width' not in img or 'height' not in img):
# or overwrite:
# TODO: better test to see if we need to recompute auxiliary transforms
_recompute_auxiliary_transforms(img)
valid_region_utm = img.get('valid_region_utm', None)
if enable_valid_region and (valid_region_utm is None or 'warp' in overwrite):
_populate_valid_region(coco_img)
if keep_geotiff_metadata:
img['geotiff_metadata'] = primary_obj['geotiff_metadata']
if 'date_captured' in img:
from kwutil import util_time
img['timestamp'] = util_time.coerce_datetime(img['date_captured']).timestamp()
if 'geos_corners' in primary_obj:
# FIXME: we are assuming this maps perfectly onto the image
# which is should for the SMART data, but perhaps in the future
# this will not be safe?
img['geos_corners'] = primary_obj['geos_corners']
else:
print('None of the assets had geo information')
return img
@profile
def _populate_valid_region(coco_img):
"""
Ignore:
>>> # Make a dummy image with nodata to test that this works
>>> from geowatch.utils.kwcoco_extensions import * # NOQA
>>> from geowatch.utils.kwcoco_extensions import _populate_valid_region
>>> import kwcoco
>>> H, W = 1024, 1024
>>> nodata = 0 #
>>> dpath = ub.Path.appdir('geowatch/tests/valid_region').ensuredir()
>>> imdata = ((np.random.rand(H, W) * 240) + 3).astype(np.uint8)
>>> # Fill in nodata values
>>> imdata[:, 0:20] = nodata
>>> imdata[:, -20:] = nodata
>>> poly1 = kwimage.Polygon.random(rng=432).scale((W, H))
>>> poly1.data['exterior'] = poly1.data['exterior'].round()
>>> poly1.fill(imdata, nodata)
>>> poly2 = kwimage.Polygon.circle((0, 0), W / 2)
>>> poly2.fill(imdata, nodata)
>>> # Write image to disk with nodata info
>>> img_fpath = dpath / 'test_img.tif'
>>> kwimage.imwrite(img_fpath, imdata, backend='gdal', nodata=0, overviews=3)
>>> # # Register the image with a kwcoco dataset
>>> dset = kwcoco.CocoDataset()
>>> dset.fpath = dpath / 'data.kwcooc.json'
>>> gid = dset.add_image(file_name=img_fpath, channels='gray')
>>> dset.conform()
>>> #
>>> coco_img = dset.coco_image(gid)
>>> _populate_valid_region(coco_img)
>>> valid_region = kwimage.MultiPolygon.coerce(coco_img.img['valid_region'])
>>> print('valid_region.data = {!r}'.format(valid_region.data))
>>> #
>>> import kwplot
>>> kwplot.autompl()
>>> canvas = imdata.copy()
>>> canvas = valid_region.draw_on(canvas, fill=0, border=True)
>>> kwplot.imshow(canvas)
from rasterio import features
shapes = [(valid_region.to_geojson(), 255)]
canvasR = features.rasterize(shapes, out=canvas[:, :, 0].copy())
kwplot.imshow(canvasR, doclf=1)
"""
import geowatch
# _ = ub.cmd('gdalinfo -stats {}'.format(fpath), check=True)
bundle_dpath = coco_img.bundle_dpath
img = coco_img.img
primary_obj = coco_img.primary_asset()
if primary_obj is None:
warnings.warn('No primary asset found for img={}'.format(img))
return
primary_fname = primary_obj.get('file_name', None)
primary_fpath = join(bundle_dpath, primary_fname)
# NOTE: THIS POLYGON IS COMPUTED VIA RASTERIO, NOT SURE HOW WELL THIS AGREES
# WITH OTHER POLYGONS WE DRAW (USUALLY VIA CV2)
# TODO: get a better heuristic here
sh_poly = util_raster.mask(
primary_fpath,
tolerance=4,
# tolerance=None,
default_nodata=primary_obj.get('default_nodata', None),
# max_polys=100,
use_overview=2,
convex_hull=True,
)
kw_asset_poly = kwimage.MultiPolygon.from_shapely(sh_poly)
info = primary_obj.get('geotiff_metadata', None)
if info is None:
metakw = {}
dem_hint = primary_obj.get('dem_hint', 'use')
if dem_hint == 'ignore':
metakw['elevation'] = 0
info = geowatch.gis.geotiff.geotiff_metadata(primary_fpath, **metakw)
warp_img_from_asset = kwimage.Affine.coerce(primary_obj.get('warp_aux_to_img', None))
if warp_img_from_asset.isclose_identity():
kw_img_poly = kw_asset_poly
else:
kw_img_poly = kw_asset_poly.warp(warp_img_from_asset)
# TODO: we probably should not add the valid region to the asset.
# Not sure if its used anywhere though.
primary_obj['valid_region'] = kw_asset_poly.to_coco(style='new')
img['valid_region'] = kw_img_poly.to_coco(style='new')
if 'pxl_to_wld' in info:
wld_from_asset = info['pxl_to_wld']
kw_poly_utm = kw_asset_poly.warp(wld_from_asset).warp(info['wld_to_utm'])
poly_utm = kw_poly_utm.to_geojson()
poly_utm['properties'] = {}
poly_utm['properties']['crs'] = info['utm_crs_info']
# TODO: we probably should only add this to the image and not the
# asset?
primary_obj['valid_region_utm'] = poly_utm
img['valid_region_utm'] = poly_utm
@profile
def _populate_canvas_obj(bundle_dpath, obj, overwrite=False, with_wgs=False,
default_gsd=None, keep_geotiff_metadata=False,
enable_intensity_stats=False):
"""
obj can be an img or aux
Ignore:
from geowatch.utils.kwcoco_extensions import * # NOQA
from geowatch.demo.smart_kwcoco_demodata import demo_kwcoco_with_heatmaps
coco_dset = demo_kwcoco_with_heatmaps()
coco_img = coco_dset.coco_image(1)
obj = coco_img.primary_asset()
bundle_dpath = coco_dset.bundle_dpath
overwrite = True
with_wgs = False
default_gsd = None
keep_geotiff_metadata = False
"""
import geowatch
sensor_coarse = obj.get('sensor_coarse', None) # not reliable
num_bands = obj.get('num_bands', None)
channels = obj.get('channels', None)
fname = obj.get('file_name', None)
warp_to_wld = obj.get('warp_to_wld', None)
approx_meter_gsd = obj.get('approx_meter_gsd', None)
overwrite = _coerce_overwrite(overwrite)
errors = []
# Can only do this for images with file names
if fname is not None:
fpath = join(bundle_dpath, fname)
info = None
dem_hint = obj.get('dem_hint', 'use')
metakw = {}
if dem_hint == 'ignore':
metakw['elevation'] = 0
# TODO: ensure real nodata exists (maybe write helper file to disk?)
sensor_coarse = obj.get('sensor_coarse', None)
# if sensor_coarse in {'S2', 'L8', 'WV'}:
# default_nodata = 0
# else:
# default_nodata = None
# # Heuristic for no-data
# obj['default_nodata'] = default_nodata
if 'warp' in overwrite or warp_to_wld is None or approx_meter_gsd is None:
try:
if info is None:
info = geowatch.gis.geotiff.geotiff_metadata(
fpath, strict=True, **metakw)
if keep_geotiff_metadata:
obj['geotiff_metadata'] = info
try:
height, width = info['img_shape'][0:2]
except Exception:
print('info = {}'.format(ub.urepr(info, nl=1)))
raise
obj['height'] = height
obj['width'] = width
# print('info = {!r}'.format(info))
# WE NEED TO ACCOUNT FOR WLD_CRS TO USE THIS
if info['is_rpc']:
obj_to_wld = None
else:
obj_to_wld = kwimage.Affine.coerce(info['pxl_to_wld'])
geos_corners = info['geos_corners']
wld_crs_info = ub.dict_diff(info['wld_crs_info'], {'type'})
obj.update({
'geos_corners': geos_corners, # always in geojson
'wld_crs_info': wld_crs_info,
})
obj['band_metas'] = info['band_metas']
obj['is_rpc'] = info['is_rpc']
if with_wgs:
obj.update({
'wgs84_to_wld': info['wgs84_to_wld'],
'wld_to_pxl': info['wld_to_pxl'],
})
approx_meter_gsd = info['approx_meter_gsd']
except exceptions.GeoMetadataNotFound as ex:
if default_gsd is not None:
obj['approx_meter_gsd'] = default_gsd
obj['warp_to_wld'] = kwimage.Affine.eye().__json__()
else:
# FIXME: This might not be the best way to report errors
# raise
errors.append('no_crs_info: {!r}'.format(ex))
else:
obj['approx_meter_gsd'] = approx_meter_gsd
if obj_to_wld is not None:
obj['warp_to_wld'] = kwimage.Affine.coerce(obj_to_wld).__json__()
if 'band' in overwrite or num_bands is None:
try:
num_bands = _introspect_num_bands(fpath)
except Exception:
channels = obj.get('channels', None)
if channels is not None:
num_bands = kwcoco.ChannelSpec(channels).numel()
else:
raise
obj['num_bands'] = num_bands
if 'channels' in overwrite or channels is None:
if sensor_coarse is not None:
channels = _sensor_channel_hueristic(sensor_coarse, num_bands)
elif num_bands is not None:
channels = _num_band_hueristic(num_bands)
else:
raise Exception(ub.paragraph(
f'''
no methods to introspect channels
sensor_coarse={sensor_coarse},
num_bands={num_bands}
for obj={obj}
'''))
obj['channels'] = channels
# TODO: determine nodata defaults based on sensor_coarse
if enable_intensity_stats:
from geowatch.cli import coco_spectra
coco_spectra.ensure_intensity_sidecar(fpath)
return errors
@ub.memoize
def _is_writeable(dpath):
" https://stackoverflow.com/questions/2113427/determining-whether-a-directory-is-writeable "
import os
return os.access(dpath, os.W_OK) and os.path.isdir(dpath)
def _coerce_overwrite(overwrite):
"""
Im not a big fan of the way overwrite currently works, might want to
refactor.
"""
valid_overwrites = {'warp', 'band', 'channels'}
default_overwrites = {'warp', 'band'}
if isinstance(overwrite, str):
overwrite = set(overwrite.split(','))
if overwrite is True:
overwrite = default_overwrites
elif overwrite is False:
overwrite = {}
else:
overwrite = set(overwrite)
unexpected = overwrite - valid_overwrites
if unexpected:
raise ValueError(f'Got unexpected overwrites: {unexpected}')
return overwrite
# def single_geotiff_metadata(bundle_dpath, img, serializable=False):
# import geowatch
# from os.path import exists
# import dateutil
# geotiff_metadata = None
# aux_metadata = []
# img['datetime_acquisition'] = (
# dateutil.parser.parse(img['date_captured'])
# )
# # if an image specified its "dem_hint" as ignore, then we set the
# # elevation to 0. NOTE: this convention might be generalized and
# # replaced in the future. I.e. in the future the dem_hint might simply
# # specify the constant elevation to use, or perhaps something else.
# dem_hint = img.get('dem_hint', 'use')
# metakw = {}
# if dem_hint == 'ignore':
# metakw['elevation'] = 0
# # only need rpc info, geos_corners, and and warps
# keys_of_interest = {
# 'rpc_transform',
# 'is_rpc',
# 'wgs84_to_wld',
# 'geos_corners',
# 'wld_to_pxl',
# }
# fname = img.get('file_name', None)
# if fname is not None:
# src_gpath = join(bundle_dpath, fname)
# assert exists(src_gpath)
# img_info = geowatch.gis.geotiff.geotiff_metadata(src_gpath, **metakw)
# if serializable:
# raise NotImplementedError
# else:
# img_info = ub.dict_isect(img_info, keys_of_interest)
# geotiff_metadata = img_info
# for aux in img.get('auxiliary', []):
# aux_fpath = join(bundle_dpath, aux['file_name'])
# assert exists(aux_fpath)
# aux_info = geowatch.gis.geotiff.geotiff_metadata(aux_fpath, **metakw)
# aux_info = ub.dict_isect(aux_info, keys_of_interest)
# if serializable:
# raise NotImplementedError
# else:
# aux_metadata.append(aux_info)
# aux['geotiff_metadata'] = aux_info
# if fname is None:
# # need to choose one of the auxiliary images as the "main" image.
# # We are assuming that there is one auxiliary image that exactly
# # corresponds.
# candidates = []
# for aux in img.get('auxiliary', []):
# if aux['width'] == img['width'] and aux['height'] == img['height']:
# candidates.append(aux)
# if not candidates:
# raise AssertionError(
# 'Assumed at least one auxiliary image has identity '
# 'transform, but this seems to not be the case')
# aux = ub.peek(candidates)
# geotiff_metadata = aux['geotiff_metadata']
# img['geotiff_metadata'] = geotiff_metadata
# return geotiff_metadata, aux_metadata
[docs]
@profile
def coco_populate_geo_video_stats(coco_dset, video_id, target_gsd='max-resolution'):
"""
Create a "video-space" for all images in a video sequence at a specified
resolution.
For this video, this chooses the "best" image as the "video canvas /
region" and registers everything to that canvas/region. This creates the
"video-space" for this image sequence. Currently the "best" image is the
one that has the GSD closest to the target-gsd. This hueristic works well
in most cases, but no all.
Notes:
* Currently the "best image" exactly define the video canvas / region.
* Areas where other images do not overlap the vieo canvas are
effectively lost when sampling in video space, because anything
outside the video canvas is cropped out.
* Auxilary / asset images are required to have an "approx_meter_gsd"
and a "warp_to_wld" attribute to use this function atm.
TODO:
- [ ] Allow choosing of a custom "video-canvas" not based on any one image.
- [ ] Allow choosing a "video-canvas" that encompases all images
- [ ] Allow the base image to contain "approx_meter_gsd" /
"warp_to_wld" instead of the auxiliary image
- [ ] Is computing the scale factor based on approx_meter_gsd safe?
Args:
coco_dset (CocoDataset): coco dataset to be modified inplace
video_id (int): video_id to modify
target_gsd (float | str): string code, or float target gsd
Example:
>>> # xdoctest: +REQUIRES(env:DVC_DPATH)
>>> from geowatch.utils.kwcoco_extensions import * # NOQA
>>> from geowatch.utils.util_data import find_dvc_dpath
>>> import kwcoco
>>> dvc_dpath = find_dvc_dpath()
>>> coco_fpath = dvc_dpath / 'Drop2-Aligned-TA1-2022-02-15/data.kwcoco.json'
>>> video_id = 2
>>> coco_fpath = dvc_dpath / 'Aligned-Drop2-TA1-2022-03-07/data.kwcoco.json'
>>> coco_dset = kwcoco.CocoDataset(coco_fpath)
>>> target_gsd = 10.0
>>> video_id = 1
>>> # We can check transforms before we apply this function
>>> coco_dset.images(video_id=video_id).lookup('warp_img_to_vid', None)
>>> # Apply the function
>>> coco_populate_geo_video_stats(coco_dset, video_id, target_gsd)
>>> # Check these transforms to make sure they look right
>>> popualted_video = coco_dset.index.videos[video_id]
>>> popualted_video = ub.dict_isect(popualted_video, ['width', 'height', 'warp_wld_to_vid', 'target_gsd'])
>>> print('popualted_video = {}'.format(ub.urepr(popualted_video, nl=-1)))
>>> coco_dset.images(video_id=video_id).lookup('warp_img_to_vid')
# TODO: make a demo dataset with some sort of gsd metadata
coco_dset = kwcoco.CocoDataset.demo('vidshapes8-multispectral')
print('coco_dset = {!r}'.format(coco_dset))
coco_fpath = ub.expandpath('~/data/dvc-repos/smart_watch_dvc/drop0_aligned/data.kwcoco.json')
coco_fpath = '/home/joncrall/data/dvc-repos/smart_watch_dvc/drop1-S2-L8-aligned/combo_data.kwcoco.json'
coco_dset = kwcoco.CocoDataset(coco_fpath)
video_id = 1
target_gsd = 2.8
# Check drawing the valid region on the image
frac = 1
valid_regions = coco_dset.images().lookup('valid_region')
cands = []
for idx, valid_region in enumerate(valid_regions):
valid_region_img = kwimage.MultiPolygon.coerce(valid_region)
frac = valid_region_img.to_shapely().area / valid_region_img.bounding_box().area
if frac < 0.6:
cands.append(idx)
gid = coco_dset.images().take(cands).lookup('id')[0]
coco_img = coco_dset.coco_image(gid)
imdata = coco_img.imdelay('blue').finalize(nodata='float')
valid_region_img = kwimage.MultiPolygon.coerce(coco_img.img['valid_region'])
frac = valid_region_img.to_shapely().area / valid_region_img.bounding_box().area
print('frac = {!r}'.format(frac))
canvas_imgspace = kwimage.normalize_intensity(imdata)
kwplot.autompl()
kwplot.imshow(canvas_imgspace, doclf=1)
valid_region_img = kwimage.MultiPolygon.coerce(coco_img.img['valid_region'])
canvas_imgspace = valid_region_img.draw_on(canvas_imgspace, fill=0, color='green')
kwplot.imshow(canvas_imgspace)
# Check the nodata polygon returned raw pixel methods
primary_data = kwimage.imread(primary_fpath, nodata='float')
valid_mask = ~np.isnan(primary_data)
kw_poly = kwimage.Mask(valid_mask.astype(np.uint8), 'c_mask').to_multi_polygon()
print('kwimage kw_poly.data = {!r}'.format(kw_poly.data))
# CHeck the one returned by util_raster
primary_fpath = coco_img.primary_image_filepath()
sh_poly = util_raster.mask(
primary_fpath, tolerance=None,
convex_hull=0)
kw_poly = kwimage.MultiPolygon.from_shapely(sh_poly)
print('rasterio kw_poly.data = {!r}'.format(kw_poly.data))
"""
from kwcoco.coco_image import CocoImage
# Compute an image-to-video transform that aligns all frames to some
# common resolution.
video = coco_dset.index.videos[video_id]
gids = coco_dset.index.vidid_to_gids[video_id]
check_unique_channel_names(coco_dset, gids=gids)
frame_infos = {}
for gid in gids:
img = coco_dset.index.imgs[gid]
coco_img = CocoImage(img)
# If the base dictionary has "warp_to_wld" and "approx_meter_gsd"
# information we use that.
warp_wld_from_img = img.get('warp_to_wld', None)
img_approx_meter_gsd = img.get('approx_meter_gsd', None)
wld_crs_info = img.get('wld_crs_info', None)
# Otherwise we try to obtain it from the auxiliary images
if img_approx_meter_gsd is None or warp_wld_from_img is None:
# Choose any one of the auxiliary images that has the required
# attribute
aux_chosen = coco_img.primary_asset(requires=[
'warp_to_wld', 'approx_meter_gsd'])
if aux_chosen is None:
raise Exception(ub.paragraph(
'''
Image auxiliary images have no warp_to_wld and approx_meter
gsd. The auxiliary images may not have associated geo
metadata.
'''))
warp_wld_from_asset = kwimage.Affine.coerce(aux_chosen.get('warp_to_wld', None))
warp_img_from_asset = kwimage.Affine.coerce(aux_chosen['warp_aux_to_img'])
warp_asset_from_img = warp_img_from_asset.inv()
warp_wld_from_img = warp_wld_from_asset @ warp_asset_from_img
scale_img_from_asset = np.mean(warp_img_from_asset.decompose()['scale'])
asset_approx_meter_gsd = aux_chosen['approx_meter_gsd']
img_approx_meter_gsd = asset_approx_meter_gsd / scale_img_from_asset
wld_crs_info = aux_chosen.get('wld_crs_info', None)
if img_approx_meter_gsd is None or warp_wld_from_img is None:
raise Exception(ub.paragraph(
'''
Both the base image and its auxiliary images do not seem to
have the required warp_to_wld and approx_meter_gsd fields.
The image may not have associated geo metadata.
'''))
warp_wld_from_img = kwimage.Affine.coerce(warp_wld_from_img)
asset_channels = []
asset_gsds = []
for obj in coco_img.iter_asset_objs():
_gsd = obj.get('approx_meter_gsd')
if _gsd is not None:
_gsd = round(_gsd, 1)
asset_gsds.append(_gsd)
asset_channels.append(obj.get('channels', None))
frame_infos[gid] = {
'warp_wld_from_img': warp_wld_from_img,
'wld_crs_info': wld_crs_info,
# Note: division because gsd is inverted. This got me confused, but
# I'm pretty sure this works.
'target_gsd': target_gsd,
'approx_meter_gsd': img_approx_meter_gsd,
'width': img['width'],
'height': img['height'],
'asset_channels': asset_channels,
'asset_gsds': asset_gsds,
}
sorted_gids = ub.argsort(frame_infos, key=lambda x: x['approx_meter_gsd'])
if sorted_gids:
min_gsd_gid = sorted_gids[0]
max_gsd_gid = sorted_gids[-1]
max_example = frame_infos[max_gsd_gid]
min_example = frame_infos[min_gsd_gid]
max_gsd = max_example['approx_meter_gsd']
min_gsd = min_example['approx_meter_gsd']
if target_gsd == 'max-resolution':
target_gsd_ = min_gsd
elif target_gsd == 'min-resolution':
target_gsd_ = max_gsd
else:
target_gsd_ = target_gsd
if not isinstance(target_gsd, numbers.Number):
raise TypeError('target_gsd must be a code or number = {}'.format(type(target_gsd)))
target_gsd_ = float(target_gsd_)
# Compute the scale factor needed to be applied to each image to achieve
# the target videospace GSD.
for info in frame_infos.values():
info['target_gsd'] = target_gsd_
info['to_target_scale_factor'] = info['approx_meter_gsd'] / target_gsd_
available_channels = set()
available_gsds = set()
for gid in gids:
img = coco_dset.index.imgs[gid]
for obj in coco_img.iter_asset_objs():
available_channels.add(obj.get('channels', None))
_gsd = obj.get('approx_meter_gsd')
if _gsd is not None:
available_gsds.add(round(_gsd, 1))
# Align to the base reference frame closest to the target GSD, which
# has the "to_target_scale_factor" that is closest to 1.0
base_gid, base_info = min(
frame_infos.items(),
key=lambda kv: abs(1 - kv[1]['to_target_scale_factor'])
)
scale = base_info['to_target_scale_factor']
base_wld_crs_info = base_info['wld_crs_info']
# Can add an extra transform here if the video is not exactly in
# any specific image space
warp_baseimg_from_wld = base_info['warp_wld_from_img'].inv()
warp_vid_from_wld = kwimage.Affine.scale(scale) @ warp_baseimg_from_wld
video['width'] = int(np.ceil(base_info['width'] * scale))
video['height'] = int(np.ceil(base_info['height'] * scale))
video['wld_crs_info'] = base_wld_crs_info
if 'valid_region_geos' in video:
import geopandas as gpd
from geowatch.utils import util_gis
# Project the valid region onto video space
valid_region_crs84 = kwimage.MultiPolygon.coerce(video['valid_region_geos'])
wld_crs = base_wld_crs_info['auth']
crs84 = util_gis.get_crs84()
crs84_region_gdf = gpd.GeoDataFrame({'geometry': [valid_region_crs84.to_shapely()]}, crs=crs84)
wld_region_gdf = crs84_region_gdf.to_crs(wld_crs)
wld_region_poly = wld_region_gdf['geometry'].iloc[0]
wld_region_kwpoly = kwimage.MultiPolygon.from_shapely(wld_region_poly)
valid_region = wld_region_kwpoly.warp(warp_vid_from_wld).to_geojson()
if 0:
import kwplot
kwplot.autompl()
wld_map_gdf = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
ax = wld_map_gdf.plot()
wld_region_gdf.to_crs('OGC:CRS84').plot(ax=ax, color='red')
wld_region_gdf.plot(ax=ax, color='red')
video['valid_region'] = valid_region
# Store metadata in the video
video['warp_wld_to_vid'] = warp_vid_from_wld.__json__()
video['target_gsd'] = target_gsd_
video['resolution'] = f'{target_gsd_}GSD'
video['min_gsd'] = min_gsd
video['max_gsd'] = max_gsd
video['num_frames'] = len(gids)
# Remove old cruft (can remove in future versions)
video.pop('available_channels', None)
for gid in gids:
img = coco_dset.index.imgs[gid]
frame_info = frame_infos[gid]
wld_crs_info = frame_info['wld_crs_info']
warp_wld_from_img = frame_info['warp_wld_from_img']
warp_vid_from_img = warp_vid_from_wld @ warp_wld_from_img
img['warp_img_to_vid'] = warp_vid_from_img.concise()
if base_wld_crs_info != wld_crs_info:
import warnings
warnings.warn(ub.paragraph(
f'''
Video alignment is warping images with different World
Coordinate Reference Systems, but still treating them as the
same. FIXME
base_wld_crs_info={base_wld_crs_info!r},
wld_crs_info={wld_crs_info!r}
'''))
[docs]
def check_unique_channel_names(coco_dset, gids=None, verbose=0):
"""
Check each image has unique channel names
TODO:
- [ ] move to kwcoco proper
Example:
>>> from geowatch.utils.kwcoco_extensions import * # NOQA
>>> import kwcoco
>>> # TODO: make a demo dataset with some sort of gsd metadata
>>> coco_dset = kwcoco.CocoDataset.demo('vidshapes8-multispectral')
>>> check_unique_channel_names(coco_dset)
>>> # Make some duplicate channels to test
>>> obj = coco_dset.images().objs[0]
>>> obj['auxiliary'][0]['channels'] = 'B1|B1'
>>> obj = coco_dset.images().objs[1]
>>> obj['auxiliary'][0]['channels'] = 'B1|B1'
>>> obj = coco_dset.images().objs[2]
>>> obj['auxiliary'][1]['channels'] = 'B1'
>>> import pytest
>>> with pytest.raises(AssertionError):
>>> check_unique_channel_names(coco_dset)
"""
images = coco_dset.images(gids=gids)
errors = []
for img in images.objs:
coco_img = coco_dset.coco_image(img['id'])
try:
_check_unique_channel_names_in_image(coco_img)
except AssertionError as ex:
if verbose:
print('ERROR: ex = {}'.format(ub.urepr(ex, nl=1)))
errors.append(ex)
if errors:
error_summary = ub.dict_hist(map(str, errors))
raise AssertionError(ub.urepr(error_summary))
def _check_unique_channel_names_in_image(coco_img):
import kwcoco
seen = set()
for obj in coco_img.iter_asset_objs():
chans = kwcoco.FusedChannelSpec.coerce(obj['channels'])
chan_list : list = chans.normalize().parsed
intra_aux_duplicate = ub.find_duplicates(chan_list)
if intra_aux_duplicate:
raise AssertionError(
'Image has internal duplicate bands: {}'.format(
intra_aux_duplicate))
inter_aux_duplicates = seen & set(chan_list)
if inter_aux_duplicates:
raise AssertionError(
'Image has inter-auxiliary duplicate bands: {}'.format(
inter_aux_duplicates))
[docs]
def coco_list_asset_infos(coco_dset):
"""
Get a list of filename and channels for each coco image
"""
asset_infos = []
for gid in coco_dset.images():
coco_img = coco_dset.coco_image(gid)
asset_objs = list(coco_img.iter_asset_objs())
for _asset_idx, obj in enumerate(asset_objs):
fname = obj.get('file_name', None)
if fname is not None:
fpath = join(coco_img.dset.bundle_dpath, fname)
file_info = {
'fpath': fpath,
'channels': obj['channels'],
}
asset_infos.append(file_info)
return asset_infos
[docs]
def rewrite_geotiffs(coco_dset):
import tempfile
import pathlib
blocksize = 96
compress = 'NONE'
asset_infos = coco_list_asset_infos(coco_dset)
for file_info in ub.ProgIter(asset_infos):
fpath = file_info['fpath']
if fpath.endswith(kwimage.im_io.JPG_EXTENSIONS):
print('Skipping jpeg')
# dont touch jpegs
continue
orig_fpath = pathlib.Path(fpath)
info = geotiff_format_info(fpath)
if (info['blocksize'][0] != blocksize or info['compress'] != compress) or True:
tmpdir = orig_fpath.parent / '.tmp_gdal_workspace'
tmpdir.mkdir(exist_ok=True, parents=True)
workdir = tmpdir / 'work'
bakdir = tmpdir / 'backup_v2'
workdir.mkdir(exist_ok=True)
bakdir.mkdir(exist_ok=True)
tmpfile = tempfile.NamedTemporaryFile(suffix=orig_fpath.name, dir=workdir, delete=False)
tmp_fpath = tmpfile.name
options = [
'-co BLOCKSIZE={}'.format(blocksize),
'-co COMPRESS={}'.format(compress),
'-of COG',
'-overwrite',
]
if not info['has_geotransform']:
options += [
'-to SRC_METHOD=NO_GEOTRANSFORM'
]
options += [
fpath,
tmp_fpath,
]
command = 'gdalwarp ' + ' '.join(options)
cmdinfo = ub.cmd(command)
if cmdinfo['ret'] != 0:
print('cmdinfo = {}'.format(ub.urepr(cmdinfo, nl=1)))
raise Exception('Command Errored')
# Backup the original file
import shutil
shutil.move(fpath, bakdir)
# Move the rewritten file into its place
shutil.move(tmp_fpath, fpath)
# info2 = geotiff_format_info(tmp_fpath)
[docs]
def ensure_transfered_geo_data(coco_dset, gids=None):
for gid in ub.ProgIter(coco_dset.images(gids), desc='transfer metadata'):
transfer_geo_metadata(coco_dset, gid)
def _search_video_for_other_geo_assets(coco_img, coco_dset):
### TODO: make use of me as a fallback in the transfer_geo_metadata2
class Found(Exception):
pass
import geowatch
asset_with_geo_info = None
gid = coco_img.img['id']
try:
# If an asset in our local image has no data, we can
# check to see if anyone in the vide has data.
# Check if anything in the video has geo-data
video_id = coco_img.img['video_id']
for other_gid in coco_dset.images(video_id=video_id):
if other_gid != gid:
other_coco_img = coco_dset.coco_image(other_gid)
for obj in other_coco_img.iter_asset_objs():
fname = obj.get('file_name', None)
if fname is not None:
fpath = join(coco_img.dset.bundle_dpath, fname)
try:
# Try until we find an image with real CRS info
info = geowatch.gis.geotiff.geotiff_metadata(fpath)
if info.get('crs_error', None) is not None:
raise Exception
except Exception:
continue
else:
raise Found
except Found:
warp_vid_from_geoimg = kwimage.Affine.coerce(other_coco_img.img['warp_img_to_vid'])
asset_with_geo_info = (obj, info, warp_vid_from_geoimg)
else:
raise exceptions.GeoMetadataNotFound(ub.paragraph(
'''
There are images without geo data, but no other data within
this image has transferable geo-data
'''))
return asset_with_geo_info
def _find_geotiffs_without_metadata(coco_img):
pass
def _execute_transfer_task(task):
fpath = task['fpath']
aff_geo_transform = task['aff_geo_transform']
georef_crs = task['georef_crs']
from osgeo import gdal
dst_ds = gdal.Open(fpath, gdal.GA_Update)
if dst_ds is None:
raise Exception('error handling gdal')
ret = dst_ds.SetGeoTransform(aff_geo_transform)
assert ret == 0
ret = dst_ds.SetSpatialRef(georef_crs)
assert ret == 0
dst_ds.FlushCache()
dst_ds = None
# unused
def _make_coco_img_from_geotiff(tiff_fpath, name=None):
"""
Example:
>>> from geowatch.demo.landsat_demodata import grab_landsat_product # NOQA
>>> product = grab_landsat_product()
>>> tiffs = product['bands'] + [product['meta']['bqa']]
>>> tiff_fpath = product['bands'][0]
>>> name = None
>>> img = _make_coco_img_from_geotiff(tiff_fpath)
>>> print('img = {}'.format(ub.urepr(img, nl=1)))
"""
obj = {}
if name is not None:
obj['name'] = name
bundle_dpath = '.'
obj = {
'file_name': tiff_fpath
}
_populate_canvas_obj(bundle_dpath, obj)
return obj
def _sensor_channel_hueristic(sensor_coarse, num_bands):
"""
Given a sensor and the number of bands in the image, return likely channel
codes for the image
Note these are "pseudo-harmonized" by common_name, but not harmonized
that is, one sensor's 'red' is roughly similar to another's but not corrected to match.
Bands without a common_name will have a sensor-unique prefix appended to prevent this behavior.
"""
from geowatch.utils.util_bands import WORLDVIEW2_PAN, WORLDVIEW2_MS4, WORLDVIEW2_MS8, SENTINEL2, LANDSAT8, LANDSAT7 # NOQA
def code(bands, prefix):
names = []
for band_dict in bands:
if 'common_name' in band_dict:
names.append(band_dict['common_name'])
else:
names.append(prefix + band_dict['name'])
return '|'.join(names)
err = 0
if sensor_coarse == 'WV':
if num_bands == 1:
channels = 'panchromatic'
elif num_bands == 3:
channels = 'r|g|b'
elif num_bands == 4:
channels = code(WORLDVIEW2_MS4, 'w')
elif num_bands == 8:
channels = code(WORLDVIEW2_MS8, 'w')
#channels = 'wv1|wv2|wv3|wv4|wv5|wv6|wv7|wv8'
# channels = 'cb|b|g|y|r|wv6|wv7|wv8'
else:
err = 1
elif sensor_coarse == 'S2':
if num_bands == 1:
channels = 'gray'
elif num_bands == 3:
channels = 'r|g|b'
elif num_bands == 13:
channels = code(SENTINEL2, 's')
# channels = 's1|s2|s3|s4|s4|s6|s7|s8|s8a|s9|s10|s11|s12'
# channels = 'cb|b|g|r|s4|s6|s7|s8|s8a|s9|s10|s11|s12'
else:
err = 1
elif sensor_coarse in {'LC', 'L8', 'LS'}:
if num_bands == 1:
channels = 'panchromatic'
elif num_bands == 3:
channels = 'r|g|b'
elif num_bands == 11:
channels = code(LANDSAT8, 'l8')
# channels = 'lc1|lc2|lc3|lc4|lc5|lc6|lc7|lc8|lc9|lc10|lc11'
# channels = 'cb|b|g|r|lc5|lc6|lc7|pan|lc9|lc10|lc11'
else:
err = 1
elif sensor_coarse in {'LE', 'L7'}:
if num_bands == 1:
channels = 'panchromatic'
elif num_bands == 3:
channels = 'r|g|b'
elif num_bands == 8:
channels = code(LANDSAT7, 'l7')
else:
err = 1
else:
err = 1
if err:
msg = f'sensor_coarse={sensor_coarse}, num_bands={num_bands}'
print('ERROR: mgs = {!r}'.format(msg))
raise NotImplementedError(msg)
return channels
def _introspect_num_bands(fpath):
from geowatch.utils import util_kwimage
shape = util_kwimage.load_image_shape(fpath, backend=['gdal', 'pil'])
if len(shape) == 1:
return 1
elif len(shape) == 3:
return shape[2]
else:
raise Exception(f'unknown format, fpath={fpath}, shape={shape}')
def _num_band_hueristic(num_bands):
if num_bands == 1:
channels = 'gray'
elif num_bands == 3:
channels = 'r|g|b'
elif num_bands == 4:
channels = 'r|g|b|a'
else:
raise Exception(f'num_bands={num_bands}')
return channels
@profile
def _recompute_auxiliary_transforms(img):
"""
Uses geotiff info to repopulate metadata
"""
import kwimage
from kwcoco.coco_image import CocoImage
coco_img = CocoImage(img)
base = coco_img.primary_asset(requires=['warp_to_wld'])
if base is None:
import warnings
warnings.warn(
'Cannot recompute auxiliary/asset transforms if no asset has a '
' warp_to_wld attribute')
return
try:
warp_wld_from_img = kwimage.Affine.coerce(base['warp_to_wld'])
except Exception:
print('img = {}'.format(ub.urepr(img, nl=2)))
raise
warp_img_from_wld = warp_wld_from_img.inv()
img.update(ub.dict_isect(base, {
'geos_corners',
'wld_crs_info',
'width', 'height',
'wgs84_to_wld',
'wld_to_pxl',
}))
if 'width' not in img and 'height' not in img:
if 'width' in base and 'height' in base:
img['width'] = base['width']
img['height'] = base['height']
for asset in coco_img.assets:
if 'warp_to_wld' in asset:
warp_wld_from_asset = kwimage.Affine.coerce(asset['warp_to_wld'])
warp_img_from_asset = warp_img_from_wld @ warp_wld_from_asset
asset['warp_aux_to_img'] = warp_img_from_asset.concise()
else:
import warnings
warnings.warn(
'An asset could not have its warp_aux_to_img transform updated '
'because it does not have a warp_to_wld attribute')
[docs]
def coco_channel_stats(coco_dset):
"""
Return information about what channels are available in the dataset
Example:
>>> import kwcoco
>>> import ubelt as ub
>>> import geowatch
>>> coco_dset = geowatch.coerce_kwcoco('vidshapes-geowatch')
>>> from geowatch.utils import kwcoco_extensions
>>> info = kwcoco_extensions.coco_channel_stats(coco_dset)
>>> print(ub.urepr(info, nl=3))
"""
import kwcoco
from kwcoco.coco_image import CocoImage
sensor_hist = ub.ddict(lambda: 0)
chan_hist = ub.ddict(lambda: 0)
single_chan_hist = ub.ddict(lambda: 0)
sensorchan_hist = ub.ddict(lambda: ub.ddict(lambda: 0))
sensorchan_hist2 = ub.ddict(lambda: 0)
for _gid, img in coco_dset.index.imgs.items():
coco_img: CocoImage = coco_dset.coco_image(_gid)
channels = []
for obj in coco_img.iter_asset_objs():
# TODO: perhaps replace unknown-chan with a "*"?
channels.append(obj.get('channels', 'unknown-chan'))
channels = sorted(channels)
chan = ','.join(channels)
sensor = img.get('sensor_coarse', '*')
chan_hist[chan] += 1
sensor_hist[sensor] += 1
sensorchan_hist[sensor][chan] += 1
# TODO: replace other usages with sensorchan_hist2 then remove other
# uses, and rename sensorchan_hist2 to sensorchan_hist
sensorchan = f'{sensor}:({chan})'
sensorchan_hist2[sensorchan] += 1
for single_chan in kwcoco.ChannelSpec(chan).unique():
single_chan_hist[single_chan] += 1
CS = kwcoco.ChannelSpec
FS = kwcoco.FusedChannelSpec
osets = [CS.coerce(c).fuse().to_oset() for c in chan_hist]
if len(osets) == 0:
common_channels = FS.coerce([])
all_channels = FS.coerce([])
all_sensorchan = kwcoco.SensorChanSpec.coerce('')
else:
common_channels = FS.coerce(list(ub.oset.intersection(*osets))).concise()
all_channels = FS.coerce(list(ub.oset.union(*osets))).concise()
all_sensorchan = kwcoco.SensorChanSpec.late_fuse(*[
kwcoco.SensorChanSpec.coerce(s)
for s in sensorchan_hist2.keys()]).concise()
info = {
'single_chan_hist': single_chan_hist,
'chan_hist': chan_hist,
'sensor_hist': sensor_hist,
'sensorchan_hist': sensorchan_hist,
'sensorchan_hist2': sensorchan_hist2,
'common_channels': common_channels,
'all_channels': all_channels,
'all_sensorchan': all_sensorchan,
}
return info
[docs]
class TrackidGenerator:
"""
Keep track of which trackids have been used and generate new ones on demand
TODO merge this into kwcoco as something like CocoDataset.next_trackid()?
Or expose whatever mechanism is already generating new aids, gids, etc
"""
[docs]
def update_generator(self):
if self.dset is None:
raise Exception('cannot update unconnected generator. Manually exclude trackids')
self.used_trackids.update(self.dset.index.trackid_to_aids.keys())
new_generator = filter(lambda x: x not in self.used_trackids,
itertools.count(start=next(self.generator)))
self.generator = new_generator
[docs]
def exclude_trackids(self, trackids):
if 0 and self.used_trackids.intersection(trackids):
print(f'warning: CocoDataset {self.dset.tag} with trackids '
f'{self.used_trackids} already has trackids in {trackids}')
self.used_trackids.update(trackids)
def __init__(self, coco_dset=None):
self.dset = coco_dset
self.used_trackids = set()
self.generator = itertools.count(start=1)
if coco_dset is not None:
self.update_generator()
def __next__(self):
next_id = next(self.generator)
while next_id in self.used_trackids:
next_id = next(self.generator)
return next_id
[docs]
def coco_img_wld_info(coco_img):
"""
TODO: candidate for kwcoco.CocoImage method
"""
from geowatch.utils import util_gis
asset = coco_img.primary_asset(requires=['geos_corners'])
if asset is None:
raise KeyError(f'Geo-referenced asset not found for {coco_img}')
if 'wld_to_pxl' in asset:
warp_aux_from_wld = kwimage.Affine.coerce(asset['wld_to_pxl'])
elif 'warp_to_wld' in asset:
warp_aux_from_wld = kwimage.Affine.coerce(asset['warp_to_wld']).inv()
else:
print('asset = {}'.format(ub.urepr(asset, nl=1)))
raise KeyError('Asset has no known transform to world coordinates')
if 'wld_crs_info' in asset:
wld_crs_info = asset['wld_crs_info']
elif 'wld_crs_info' in coco_img.img:
wld_crs_info = coco_img.img['wld_crs_info']
else:
print('asset = {}'.format(ub.urepr(asset, nl=1)))
raise KeyError('Asset / image has no registered world CRS info')
# TODO: traditional / authority check? Or assume traditional?
# img['wld_crs_info']['axis_mapping'] == 'OAMS_TRADITIONAL_GIS_ORDER'
wld_crs = util_gis.coerce_crs(wld_crs_info)
warp_img_from_aux = kwimage.Affine.coerce(asset.get('warp_aux_to_img', None))
warp_img_from_wld = warp_img_from_aux @ warp_aux_from_wld
return warp_img_from_wld, wld_crs
[docs]
def warp_annot_segmentations_from_geos(coco_dset):
"""
Uses the segmentation_geos property (which should be crs84) and warps it
into image space based on available geo data.
Args:
coco_dset (kwcoco.CocoDataset):
a CocoDataset where annotations contain a "segmentation_geos"
attribute. The "segmentation" attribute will be modified in-place.
Ignore:
# xdoctest: +REQUIRES(env:DVC_DPATH)
from geowatch.utils.kwcoco_extensions import * # NOQA
import geowatch
dvc_dpath = geowatch.find_dvc_dpath(tags='phase2_data', hardware='auto')
coco_fpath = (dvc_dpath / 'Drop6') / 'imganns-KR_R001.kwcoco.json'
import kwcoco
coco_dset = kwcoco.CocoDataset(coco_fpath)
Example:
>>> from geowatch.utils.kwcoco_extensions import * # NOQA
>>> import geowatch
>>> # creating demodata also uses warp_annot_segmentations_to_geos
>>> orig_dset = geowatch.coerce_kwcoco('geowatch-msi', geodata=True)
>>> coco_dset = orig_dset.copy()
>>> for ann in coco_dset.annots().objs:
... ann.pop('segmentation', None)
>>> warp_annot_segmentations_from_geos(coco_dset)
>>> errors = []
>>> for aid in coco_dset.annots():
>>> ann1 = orig_dset.index.anns[aid]
>>> ann2 = coco_dset.index.anns[aid]
>>> poly1 = kwimage.MultiPolygon.coerce(ann1['segmentation'])
>>> poly2 = kwimage.MultiPolygon.coerce(ann2['segmentation'])
>>> worked = (poly1.is_invalid() and poly2.is_invalid()) or poly1.iou(poly2) > 0.99
>>> errors.append(not worked)
>>> if sum(errors) > 0:
>>> # FIXME: THERE SHOULD BE NO ERRORS HERE. PUNTING TO MAKE
>>> # THE DASHBOARDS GREEN, BUT THIS SHOULD BE REVISITED
>>> #raise AssertionError('transforms should have cyclic consistency')
>>> warnings.warn('Transforms should have cyclic consistency, but some dont. This should be an error, but we will allow it for now')
>>> assert (sum(errors) / len(errors)) < 0.5, 'more than half of the data does not have cyclic consistency'
Ignore:
# TODO: looks like this fails
check_geo_transform_consistency(coco_dset)
"""
import pandas as pd
import geopandas as gpd
from geowatch.utils import util_gis
from shapely.geometry import shape
crs84 = util_gis.get_crs84()
gdfs = []
for gid in coco_dset.images().gids:
aids = []
for ann in coco_dset.annots(image_id=gid).objs:
if 'segmentation_geos' in ann:
aids.append(ann['id'])
annots = coco_dset.annots(aids)
if len(annots) > 0:
# TODO: check crs properties (probably always crs84)
sseg_geos = annots.lookup('segmentation_geos')
warp_img_from_wld, wld_crs = coco_img_wld_info(coco_dset.coco_image(gid))
gdf = gpd.GeoDataFrame(
dict(
geometry=[shape(p) for p in sseg_geos],
aid=annots.aids,
),
crs=crs84)
gdf = gdf.join(
pd.DataFrame(dict(
gid=gid,
wld_crs=wld_crs,
warp_img_from_wld=[warp_img_from_wld.to_shapely()],
)),
how='cross'
)
gdfs.append(gdf)
if len(gdfs) > 0:
gdf = pd.concat(gdfs, axis=0, ignore_index=True)
# Convert to string so we can crs as a group key
gdf['wld_crs_str'] = gdf['wld_crs'].apply(lambda crs: crs.to_string())
crs_groups = gdf.groupby('wld_crs_str')[
['geometry', 'wld_crs', 'warp_img_from_wld', 'aid']]
for wld_crs_str, grp in crs_groups:
wld_geom = grp.to_crs(grp['wld_crs'].iloc[0])
# Some images will have the same transform, which makes the
# warp more efficient.
for img_from_wld, subgrp in wld_geom.groupby('warp_img_from_wld'):
aids = subgrp['aid']
img_geoms = subgrp.affine_transform(img_from_wld)
for aid, img_geom in zip(aids, img_geoms):
sseg = kwimage.MultiPolygon.from_shapely(img_geom)
ann = coco_dset.anns[aid]
ann['segmentation'] = list(sseg.to_coco(style='new'))
ann['bbox'] = list(sseg.bounding_box().quantize().to_coco())[0]
# if __debug__:
# for ann in coco_dset.dataset['annotations']:
# assert 'bbox' in ann
# assert 'segmentation' in ann
[docs]
def warp_annot_segmentations_to_geos(coco_dset):
"""
Example:
>>> from geowatch.utils.kwcoco_extensions import * # NOQA
>>> import geowatch
>>> orig_dset = geowatch.coerce_kwcoco('geowatch-msi', geodata=True)
>>> coco_dset = orig_dset.copy()
>>> for ann in coco_dset.annots().objs:
... ann.pop('segmentation_geos', None)
>>> warp_annot_segmentations_to_geos(coco_dset)
>>> errors = []
>>> for aid in ub.ProgIter(coco_dset.annots()):
>>> ann1 = orig_dset.index.anns[aid]
>>> ann2 = coco_dset.index.anns[aid]
>>> poly1 = kwimage.MultiPolygon.from_geojson(ann1['segmentation_geos'])
>>> poly2 = kwimage.MultiPolygon.from_geojson(ann2['segmentation_geos'])
>>> worked = (poly1.is_invalid() and poly2.is_invalid()) or poly1.iou(poly2) > 0.99
>>> errors.append(not worked)
>>> if sum(errors) > 0:
>>> # FIXME: THERE SHOULD BE NO ERRORS HERE. PUNTING TO MAKE
>>> # THE DASHBOARDS GREEN, BUT THIS SHOULD BE REVISITED
>>> #raise AssertionError('transforms should have cyclic consistency')
>>> warnings.warn('Transforms should have cyclic consistency, but some dont. This should be an error, but we will allow it for now')
>>> assert (sum(errors) / len(errors)) < 0.5, 'more than half of the data does not have cyclic consistency'
"""
import pandas as pd
import geopandas as gpd
from geowatch.utils import util_gis
crs84 = util_gis.get_crs84()
gdfs = []
for gid in coco_dset.images().gids:
aids = []
for ann in coco_dset.annots(image_id=gid).objs:
if 'segmentation' in ann:
aids.append(ann['id'])
annots = coco_dset.annots(aids)
if len(annots) > 0:
# TODO: check crs properties (probably always crs84)
warp_img_from_wld, wld_crs = coco_img_wld_info(coco_dset.coco_image(gid))
ssegs = annots.detections.data['segmentations']
warp_wld_from_img = warp_img_from_wld.inv()
gdf = gpd.GeoDataFrame(
dict(
geometry=[p.to_shapely() for p in ssegs],
aid=annots.aids,
),
crs=None,
)
gdf = gdf.join(
pd.DataFrame(dict(
gid=gid,
wld_crs=wld_crs,
warp_wld_from_img=[warp_wld_from_img.to_shapely()]
)),
how='cross'
)
gdfs.append(gdf)
if len(gdfs) > 0:
gdf = pd.concat(gdfs, axis=0, ignore_index=True)
crs84_info = {
'axis_mapping': 'OAMS_TRADITIONAL_GIS_ORDER',
'auth': ('EPSG', '4326')
}
# Convert to string so we can crs as a group key
gdf['wld_crs_str'] = gdf['wld_crs'].apply(lambda crs: crs.to_string())
crs_groups = gdf.groupby('wld_crs_str')[
['geometry', 'wld_crs', 'warp_wld_from_img', 'aid']]
gdf.loc[:, 'crs84_geoms'] = None
for wld_crs_str, grp in crs_groups:
wld_crs = grp['wld_crs'].iloc[0]
for wld_from_img, subgrp in grp.groupby('warp_wld_from_img'):
wld_geoms = subgrp.affine_transform(wld_from_img)
wld_geoms = wld_geoms.set_crs(wld_crs)
crs84_geoms = wld_geoms.to_crs(crs84)
# this geojson rep also contains bboxes, but don't need to use them
geojsons = crs84_geoms.__geo_interface__['features']
for aid, geoj in zip(subgrp['aid'], geojsons):
# To be compatible with other kwcoco files (might need to
# allow for an entire feature to live here)
sseg_geos = geoj['geometry']
sseg_geos.setdefault('properties', {})
sseg_geos['properties']['crs_info'] = crs84_info
ann = coco_dset.anns[aid]
ann['segmentation_geos'] = sseg_geos
# def coco_geopandas_images(coco_dset):
# """
# TODO:
# - [ ] This is unused in this file and thus should move to the dev
# folder or somewhere else for to keep useful scratch work.
# """
# import geopandas as gpd
# df_input = []
# for gid, img in coco_dset.index.imgs.items():
# info = img['geotiff_metadata']
# kw_img_poly = kwimage.Polygon(exterior=info['geos_corners'])
# sh_img_poly = kw_img_poly.to_shapely()
# df_input.append({
# 'gid': gid,
# 'name': img.get('name', None),
# 'video_id': img.get('video_id', None),
# 'bounds': sh_img_poly,
# })
# img_geos_df = gpd.GeoDataFrame(df_input, geometry='bounds', crs='epsg:4326')
# return img_geos_df
[docs]
def visualize_rois(coco_dset, zoom=None):
"""
Matplotlib visualization of image and annotation regions on a world map
Example:
>>> # xdoctest: +REQUIRES(--slow)
>>> from geowatch.utils.kwcoco_extensions import * # NOQA
>>> from geowatch.demo.smart_kwcoco_demodata import demo_kwcoco_with_heatmaps
>>> coco_dset = demo_kwcoco_with_heatmaps(num_videos=1)
>>> coco_populate_geo_heuristics(coco_dset, overwrite=True)
>>> visualize_rois(coco_dset, zoom=0)
Example:
>>> # xdoctest: +REQUIRES(env:DVC_DPATH)
>>> from geowatch.utils.kwcoco_extensions import * # NOQA
>>> from geowatch.utils.util_data import find_dvc_dpath
>>> import kwcoco
>>> dvc_dpath = find_dvc_dpath()
>>> coco_fpath = dvc_dpath / 'drop1-S2-L8-aligned/combo_data.kwcoco.json'
>>> coco_dset = kwcoco.CocoDataset(coco_fpath)
>>> coco_populate_geo_heuristics(coco_dset, overwrite=True, workers=4)
>>> visualize_rois(coco_dset)
"""
import geopandas as gpd
cov_image_gdf = covered_image_geo_regions(coco_dset)
annot_gdf = covered_annot_geo_regions(coco_dset)
import kwplot
kwplot.autompl()
wld_map_gdf = gpd.read_file(
gpd.datasets.get_path('naturalearth_lowres')
)
ax = wld_map_gdf.plot()
def safe_centroids(gdf):
return gdf.to_crs('+proj=cea').centroid.to_crs(gdf.crs)
cov_centroids = safe_centroids(cov_image_gdf)
cov_image_gdf.plot(ax=ax, facecolor='none', edgecolor='green', alpha=0.5)
cov_centroids.plot(ax=ax, marker='o', facecolor='green', alpha=0.5)
# img_centroids = img_poly_gdf.geometry.centroid
# img_poly_gdf.plot(ax=ax, facecolor='none', edgecolor='red', alpha=0.5)
# img_centroids.plot(ax=ax, marker='o', facecolor='red', alpha=0.5)
annot_centroids = safe_centroids(annot_gdf)
annot_gdf.plot(ax=ax, facecolor='none', edgecolor='orange', alpha=0.5)
annot_centroids.plot(ax=ax, marker='o', facecolor='orange', alpha=0.5)
if zoom is not None:
sh_zoom_roi = cov_image_gdf.geometry.iloc[0]
kw_zoom_roi = kwimage.Polygon.from_shapely(sh_zoom_roi)
bb = kw_zoom_roi.bounding_box()
min_x, min_y, max_x, max_y = bb.scale(1.5, about='center').to_ltrb().data[0]
ax.set_xlim(min_x, max_x)
ax.set_ylim(min_y, max_y)
[docs]
def covered_image_geo_regions(coco_dset, merge=False):
"""
Find the intersection of all image bounding boxes in world space
to see what spatial regions are covered by the imagery.
Returns:
gpd.GeoDataFrame
Example:
>>> from geowatch.utils.kwcoco_extensions import * # NOQA
>>> from geowatch.demo.smart_kwcoco_demodata import demo_kwcoco_with_heatmaps
>>> coco_dset = demo_kwcoco_with_heatmaps(num_frames=1, num_videos=1)
>>> coco_populate_geo_heuristics(coco_dset, overwrite=True)
>>> img = coco_dset.index.imgs[1]
>>> cov_image_gdf = covered_image_geo_regions(coco_dset)
Example:
>>> # Check it works with empty data frame
>>> from geowatch.utils.kwcoco_extensions import * # NOQA
>>> coco_dset = kwcoco.CocoDataset()
>>> cov_image_gdf1 = covered_image_geo_regions(coco_dset, merge=False)
>>> cov_image_gdf2 = covered_image_geo_regions(coco_dset, merge=True)
>>> assert len(cov_image_gdf1) == 0
>>> assert len(cov_image_gdf2) == 0
"""
import geopandas as gpd
from shapely import ops
import shapely
# import geowatch
rows = []
columns = [
'geometry', 'date_captured', 'name', 'height', 'width', 'video_id',
'image_id', 'frame_index']
for gid, img in coco_dset.index.imgs.items():
if 'geos_corners' in img:
geos_corners = img['geos_corners']
else:
coco_img = coco_dset.coco_image(img['id'])
asset = coco_img.primary_asset(requires=['geos_corners'])
geos_corners = asset['geos_corners']
geos_crs_info = geos_corners.get('properties').get('crs_info', None)
if geos_crs_info is not None:
assert geos_crs_info['axis_mapping'] == 'OAMS_TRADITIONAL_GIS_ORDER'
assert list(geos_crs_info['auth']) == ['EPSG', '4326']
sh_img_poly = shapely.geometry.shape(geos_corners)
rows.append({
'geometry': sh_img_poly,
'date_captured': img.get('date_captured', None),
'name': img.get('name', None),
'height': img.get('height', None),
'width': img.get('width', None),
'video_id': img.get('video_id', None),
'image_id': gid,
'frame_index': img.get('frame_index', None),
})
from geowatch.utils import util_gis
cov_poly_crs = util_gis.get_crs84()
if merge:
# df_input = [
# {'gid': gid, 'bounds': poly, 'name': coco_dset.index.imgs[gid].get('name', None),
# 'video_id': coco_dset.index.imgs[gid].get('video_id', None) }
# for gid, poly in gid_to_poly.items()
# ]
# img_geos = gpd.GeoDataFrame(df_input, geometry='bounds', crs='epsg:4326')
# Can merge like this, but we lose membership info
# coverage_df = gpd.GeoDataFrame(img_geos.unary_union)
coverage_rois_ = ops.unary_union([row['geometry'] for row in rows])
if hasattr(coverage_rois_, 'geoms'):
# Iteration over shapely objects was deprecated, test for geoms
# attribute instead.
coverage_rois = list(coverage_rois_.geoms)
else:
coverage_rois = [coverage_rois_]
# geopandas uses traditional crs mappings
cov_image_gdf = gpd.GeoDataFrame(
{'geometry': coverage_rois},
geometry='geometry', crs=cov_poly_crs)
else:
cov_image_gdf = gpd.GeoDataFrame(rows, geometry='geometry',
columns=columns, crs=cov_poly_crs)
return cov_image_gdf
[docs]
def covered_video_geo_regions(coco_dset):
"""
Compute CRS84 bounds for each video in the coco dataset.
Returns:
gpd.GeoDataFrame
Example:
>>> from geowatch.utils.kwcoco_extensions import * # NOQA
>>> from geowatch.demo.smart_kwcoco_demodata import demo_kwcoco_with_heatmaps
>>> coco_dset = demo_kwcoco_with_heatmaps(num_frames=1, num_videos=1)
>>> # coco_populate_geo_heuristics(coco_dset, overwrite=True)
>>> # video_gdf = covered_video_geo_regions(coco_dset)
"""
import geopandas as gpd
from geowatch.utils import util_gis
# TODO: build this more efficiently if possible.
# if 0:
# import geowatch
rows = []
for video_id, video in coco_dset.index.videos.items():
if 'valid_region_geos' in video:
crs84_poly = kwimage.MultiPolygon.coerce(video['valid_region_geos']).to_shapely()
else:
vidspace_poly = kwimage.Boxes(
[[0, 0, video['width'], video['height']]], 'xywh').to_polygons()[0]
if 'warp_wld_to_vid' in video:
auth = video['wld_crs_info']['auth']
vid_from_wld = kwimage.Affine.coerce(video['warp_wld_to_vid'])
wld_form_vid = vid_from_wld.inv()
wld_poly = vidspace_poly.warp(wld_form_vid)
import pyproj
from shapely.ops import transform
crs = pyproj.CRS(':'.join(auth))
crs84 = util_gis.get_crs84()
project = pyproj.Transformer.from_crs(crs, crs84, always_xy=True).transform
wld_poly_ = wld_poly.to_shapely()
crs84_poly = transform(project, wld_poly_)
else:
raise NotImplementedError('We dont have a way to get the geo bounds for a video')
gids = coco_dset.index.vidid_to_gids[video_id]
if gids:
start_gid = gids[0]
stop_gid = gids[-1]
start_img = coco_dset.coco_image(start_gid)
stop_img = coco_dset.coco_image(stop_gid)
start_date = start_img.img['date_captured']
end_date = stop_img.img['date_captured']
else:
warnings.warn('video does not have any images')
continue
row = {
'video_name': video['name'],
'video_id': video['id'],
'geometry': crs84_poly,
'start_date': start_date,
'end_date': end_date,
}
rows.append(row)
crs84 = util_gis.get_crs84()
video_gdf = gpd.GeoDataFrame(rows, geometry='geometry', crs=crs84)
return video_gdf
[docs]
def covered_annot_geo_regions(coco_dset, merge=False):
"""
Given a dataset find spatial regions of interest that contain annotations
"""
import shapely
import geopandas as gpd
from shapely import ops
aid_to_poly = {}
for aid, ann in coco_dset.anns.items():
ann_goes = ann['segmentation_geos']
# TODO: assert the segmentation_geos CRS is (geojson - WGS84-traditional)
if ann_goes is not None:
sh_poly = shapely.geometry.shape(ann_goes)
aid_to_poly[aid] = sh_poly
# annot_crs = 'epsg:4326'
from geowatch.utils import util_gis
annot_crs = util_gis.get_crs84()
# annot_crs = 'OGC:CRS84'
if merge:
gid_to_rois = {}
for gid, aids in coco_dset.index.gid_to_aids.items():
if len(aids):
sh_annot_polys = ub.dict_subset(aid_to_poly, aids)
sh_annot_polys_ = [p.buffer(0) for p in sh_annot_polys.values()]
sh_annot_polys_ = [p.buffer(0.000001) for p in sh_annot_polys_]
# What CRS should we be doing this in? Is WGS84 OK?
# Should we switch to UTM?
img_rois_ = ops.unary_union(sh_annot_polys_)
try:
img_rois = list(img_rois_.geoms)
except Exception:
img_rois = [img_rois_]
kw_img_rois = [
kwimage.Polygon.from_shapely(p.convex_hull).bounding_box().to_polygons()[0]
for p in img_rois]
sh_img_rois = [p.to_shapely() for p in kw_img_rois]
gid_to_rois[gid] = sh_img_rois
# TODO: if there are only midly overlapping regions, we should likely split
# them up. We can also group by UTM coordinates to reduce computation.
sh_rois_ = ops.unary_union([
p.buffer(0) for rois in gid_to_rois.values()
for p in rois
])
try:
sh_rois = list(sh_rois_.geoms)
except Exception:
sh_rois = [sh_rois_]
# geopandas uses traditional crs mappings
cov_annot_gdf = gpd.GeoDataFrame(
{'geometry': sh_rois},
geometry='geometry', crs=annot_crs)
else:
sh_polys = list(aid_to_poly.values())
aids = list(aid_to_poly.keys())
cov_annot_gdf = gpd.GeoDataFrame(
{'geometry': sh_polys, 'aids': aids},
geometry='geometry', crs=annot_crs)
return cov_annot_gdf
# def flip_xy(poly):
# """
# TODO:
# - [ ] This is unused in this file and thus should move to the dev
# folder or somewhere else for to keep useful scratch work.
# """
# if hasattr(poly, 'reorder_axes'):
# new_poly = poly.reorder_axes((1, 0))
# else:
# kw_poly = kwimage.Polygon.from_shapely(poly)
# kw_poly.data['exterior'].data = kw_poly.data['exterior'].data[:, ::-1]
# sh_poly_ = kw_poly.to_shapely()
# new_poly = sh_poly_
# return new_poly
[docs]
def category_category_colors(coco_dset):
"""
Ensures that each category in a CategoryTree has a color
TODO:
- [ ] Add to CategoryTree
- [ ] Consolidate with ~/code/watch/geowatch/tasks/fusion/utils :: category_tree_ensure_color
- [ ] Consolidate with ~/code/watch/geowatch/utils/kwcoco_extensions :: category_category_colors
- [ ] Consolidate with ~/code/watch/geowatch/heuristics.py :: ensure_heuristic_category_tree_colors
- [ ] Consolidate with ~/code/watch/geowatch/heuristics.py :: ensure_heuristic_coco_colors
"""
cats = coco_dset.dataset['categories']
# backup_colors = iter(kwimage.Color.distinct(len(cats)))
for cat in cats:
color = cat.get('color', None)
if color is None:
# color = next(backup_colors)
# cat['color'] = kwimage.Color(color).as01()
color = kwimage.Color.random()
cat['color'] = color.as01()
[docs]
@profile
def associate_images(dset1, dset2, key_fallback=None):
"""
Builds an association between image-ids in two datasets.
One use for this is if ``dset1`` is a truth dataset and ``dset2`` is a
prediction dataset, and you need the to know which images are in common so
they can be scored.
Args:
dset1 (kwcoco.CocoDataset): a kwcoco datset.
dset2 (kwcoco.CocoDataset): another kwcoco dataset
key_fallback (str):
The fallback key to use if the image "name" is not specified.
This can either be "file_name" or "id" or None.
TODO:
- [ ] port to kwcoco proper
- [ ] use in kwcoco eval as a robust image/video association method
Example:
>>> from geowatch.utils.kwcoco_extensions import * # NOQA
>>> import kwcoco
>>> from kwcoco.demo.perterb import perterb_coco
>>> dset1 = kwcoco.CocoDataset.demo('shapes2')
>>> kwargs = {
>>> 'box_noise': 0.5,
>>> 'n_fp': (0, 10),
>>> 'n_fn': (0, 10),
>>> }
>>> dset2 = perterb_coco(dset1, **kwargs)
>>> matches = associate_images(dset1, dset2, key_fallback='file_name')
>>> assert len(matches['image']['match_gids1'])
>>> assert len(matches['image']['match_gids2'])
>>> assert not len(matches['video'])
Example:
>>> from geowatch.utils.kwcoco_extensions import * # NOQA
>>> import kwcoco
>>> from kwcoco.demo.perterb import perterb_coco
>>> dset1 = kwcoco.CocoDataset.demo('vidshapes2')
>>> kwargs = {
>>> 'box_noise': 0.5,
>>> 'n_fp': (0, 10),
>>> 'n_fn': (0, 10),
>>> }
>>> dset2 = perterb_coco(dset1, **kwargs)
>>> matches = associate_images(dset1, dset2, key_fallback='file_name')
>>> assert not len(matches['image']['match_gids1'])
>>> assert not len(matches['image']['match_gids2'])
>>> assert len(matches['video'])
"""
common_vidnames = (set(dset1.index.name_to_video) &
set(dset2.index.name_to_video))
def image_keys(dset, gids):
# Generate image "keys" that should be compatible between datasets
for gid in gids:
img = dset.imgs[gid]
if img.get('name', None) is not None:
yield img['name']
else:
if key_fallback is None:
raise Exception('images require names to associate')
elif key_fallback == 'id':
yield img['id']
elif key_fallback == 'file_name':
yield img['file_name']
else:
raise KeyError(key_fallback)
all_gids1 = list(dset1.imgs.keys())
all_gids2 = list(dset2.imgs.keys())
all_keys1 = list(image_keys(dset1, all_gids1))
all_keys2 = list(image_keys(dset2, all_gids2))
key_to_gid1 = ub.dzip(all_keys1, all_gids1)
key_to_gid2 = ub.dzip(all_keys2, all_gids2)
gid_to_key1 = ub.invert_dict(key_to_gid1)
gid_to_key2 = ub.invert_dict(key_to_gid2)
video_matches = []
all_match_gids1 = set()
all_match_gids2 = set()
for vidname in common_vidnames:
video1 = dset1.index.name_to_video[vidname]
video2 = dset2.index.name_to_video[vidname]
vidid1 = video1['id']
vidid2 = video2['id']
gids1 = dset1.index.vidid_to_gids[vidid1]
gids2 = dset2.index.vidid_to_gids[vidid2]
keys1 = ub.oset(ub.take(gid_to_key1, gids1))
keys2 = ub.oset(ub.take(gid_to_key2, gids2))
match_keys = ub.oset(keys1) & ub.oset(keys2)
match_gids1 = list(ub.take(key_to_gid1, match_keys))
match_gids2 = list(ub.take(key_to_gid2, match_keys))
all_match_gids1.update(match_gids1)
all_match_gids2.update(match_gids2)
video_matches.append({
'vidname': vidname,
'match_gids1': match_gids1,
'match_gids2': match_gids2,
})
# Associate loose images not belonging to any video
unmatched_gid_to_key1 = ub.dict_diff(gid_to_key1, all_match_gids1)
unmatched_gid_to_key2 = ub.dict_diff(gid_to_key2, all_match_gids2)
remain_keys = (set(unmatched_gid_to_key1.values()) &
set(unmatched_gid_to_key2.values()))
remain_gids1 = [key_to_gid1[key] for key in remain_keys]
remain_gids2 = [key_to_gid2[key] for key in remain_keys]
image_matches = {
'match_gids1': remain_gids1,
'match_gids2': remain_gids2,
}
matches = {
'image': image_matches,
'video': video_matches,
}
return matches
[docs]
def reorder_video_frames(dset):
"""
Reorder the image indexes in each video to ensure temporal ordering
"""
from kwutil import util_time
videos = dset.videos()
info = []
for video, images in zip(videos.objs, videos.images):
date_captured_list = images.lookup('date_captured')
dt_list = [util_time.coerce_datetime(d) for d in date_captured_list]
frame_index_list = images.lookup('frame_index')
video_report = {
'name': video['name'],
'status': 'ok',
}
errors = []
# This should never happen due to kwcoco assumptions
had_bad_index_order = (np.diff(frame_index_list) < 1).any()
if had_bad_index_order:
errors.append('had a critical error')
new_frame_indexes = ub.argsort(dt_list)
# This might happen, and we should fix it.
had_bad_date_order = (np.diff(new_frame_indexes) < 1).any()
if had_bad_date_order:
errors.append('had a bad date ordering')
if errors:
new_image_order = images.take(new_frame_indexes)
for new_frame_index, img in enumerate(new_image_order.objs):
img['frame_index'] = new_frame_index
video_report['errors'] = errors
video_report['stats'] = 'fixed'
info.append(video_report)
dset._build_index()
print('reorder check info = {}'.format(ub.urepr(info, nl=1)))
return info
[docs]
def pick_channels(coco_img, choices):
"""
Choose the set of channels in choices that all exist in this image.
TODO:
- [ ] Add to CocoIamge as a method
Args:
coco_img (CocoImage): an image with channels
choices (List[FusedChannelSpec | str]):
a list of fused channels in priority order to choose from.
Returns:
None | FusedChannelSpec :
The first channel group in ``choices`` where all of those channels
exist in the image.
CommandLine:
xdoctest -m geowatch.utils.kwcoco_extensions pick_channels
Example:
>>> from geowatch.utils import kwcoco_extensions
>>> import kwcoco
>>> choices = ['blue|green|red', 'pan']
>>> # Make different demo CocoImages that contain different bands
>>> coco_img1 = kwcoco.CocoImage({
>>> 'channels': 'red|green|blue', 'file_name': 'dummy'})
>>> coco_img2 = kwcoco.CocoImage({
>>> 'channels': 'green|blue', 'file_name': 'dummy'})
>>> coco_img3 = kwcoco.CocoImage({
>>> 'channels': 'blue|green|red', 'file_name': 'dummy'})
>>> coco_img4 = kwcoco.CocoImage({
>>> 'channels': 'pan', 'file_name': 'dummy'})
>>> # Channels are only found if all bands in a choices item are given
>>> found1 = kwcoco_extensions.pick_channels(coco_img1, choices)
>>> found2 = kwcoco_extensions.pick_channels(coco_img2, choices)
>>> found3 = kwcoco_extensions.pick_channels(coco_img3, choices)
>>> found4 = kwcoco_extensions.pick_channels(coco_img4, choices)
>>> print(f'found1={found1}')
>>> print(f'found2={found2}')
>>> print(f'found3={found3}')
>>> print(f'found4={found4}')
found1=<FusedChannelSpec(blue|green|red)>
found2=None
found3=<FusedChannelSpec(blue|green|red)>
found4=<FusedChannelSpec(pan)>
Example:
>>> # Test case with different choices orders
>>> from geowatch.utils import kwcoco_extensions
>>> channel_priority1 = ['blue|green|red', 'pan']
>>> channel_priority2 = ['pan', 'blue|green|red']
>>> coco_img1 = kwcoco.CocoImage({
>>> 'channels': 'blue|green|red|pan', 'file_name': 'dummy'})
>>> coco_img2 = kwcoco.CocoImage({
>>> 'channels': 'pan|blue|green|red', 'file_name': 'dummy'})
>>> found1 = kwcoco_extensions.pick_channels(coco_img1, channel_priority1)
>>> found2 = kwcoco_extensions.pick_channels(coco_img1, channel_priority2)
>>> found3 = kwcoco_extensions.pick_channels(coco_img2, channel_priority1)
>>> found4 = kwcoco_extensions.pick_channels(coco_img2, channel_priority2)
>>> # The first found band in choices is returned when
>>> # the image contains both, regardless of order in the image.
>>> print(f'found1={found1}')
>>> print(f'found2={found2}')
>>> print(f'found3={found3}')
>>> print(f'found4={found4}')
found1=<FusedChannelSpec(blue|green|red)>
found2=<FusedChannelSpec(pan)>
found3=<FusedChannelSpec(blue|green|red)>
found4=<FusedChannelSpec(pan)>
"""
import kwcoco
have_chans = coco_img.channels.fuse()
for candidate in choices:
candidate = kwcoco.FusedChannelSpec.coerce(candidate)
if candidate.issubset(have_chans):
return candidate