Source code for geowatch.utils.kwcoco_extensions

"""
Adds fields needed by ndsampler to correctly "watch" a region.

Some of this is done hueristically. We assume images come from certain sensors.
We assume input is orthorectified.  We assume some GSD "target" gsd for video
and image processing. Note a video GSD will typically be much higher (i.e.
lower resolution) than an image GSD.
"""
import warnings
import numpy as np
import ubelt as ub
import kwimage
import itertools
import numbers
import kwcoco

from os.path import join
from geowatch.utils import util_raster
from geowatch import exceptions
# import geowatch.gis

try:
    from line_profiler import profile
except Exception:
    profile = ub.identity



[docs]
def filter_image_ids(coco_dset, gids=None, include_sensors=None,
                     exclude_sensors=None, select_images=None,
                     select_videos=None):
    """
    Filters to a specific set of images given query parameters
    """
    def coerce_set(x):
        return set(x.split(',')) if isinstance(x, str) else set(x)

    def filter_by_attribute(table, key, include, exclude):
        if include is not None or exclude is not None:
            if include is not None:
                include = coerce_set(include)
            if exclude is not None:
                exclude = coerce_set(exclude)
            values = table.lookup(key, default=None)
            if include is None:
                flags = [v not in exclude for v in values]
            elif exclude is None:
                flags = [v in include for v in values]
            else:
                flags = [v in include and v not in exclude for v in values]
            table = table.compress(flags)
        return table
    valid_images = coco_dset.images(gids)
    valid_images = filter_by_attribute(
        valid_images, 'sensor_coarse', include_sensors, exclude_sensors)
    valid_gids = set(valid_images)

    if select_images is not None:
        try:
            import jq
        except Exception:
            print('The jq library is required to run a generic image query')
            raise

        try:
            query_text = ".images[] | select({}) | .id".format(select_images)
            query = jq.compile(query_text)
            image_selected_gids = set(query.input(coco_dset.dataset).all())
            valid_gids &= image_selected_gids
        except Exception as ex:
            print('JQ Query Failed: {}, ex={}'.format(query_text, ex))
            raise

    if select_videos is not None:
        try:
            import jq
        except Exception:
            print('The jq library is required to run a generic image query')
            raise

        try:
            query_text = ".videos[] | select({}) | .id".format(select_videos)
            query = jq.compile(query_text)
            selected_vidids = query.input(coco_dset.dataset).all()
            vid_selected_gids = set(ub.flatten(coco_dset.index.vidid_to_gids[vidid]
                                               for vidid in selected_vidids))
            valid_gids &= vid_selected_gids
        except Exception:
            print('JQ Query Failed: {}'.format(query_text))
            raise

    valid_gids = sorted(valid_gids)
    return valid_gids




[docs]
def populate_watch_fields(
    coco_dset,
    target_gsd=10.0,
    vidids=None,
    overwrite=False,
    default_gsd=None,
    conform=True,
    enable_video_stats=True,
    enable_valid_region=False,
    enable_intensity_stats=False,
    workers=0,
    mode='thread',
    remove_broken=False,
    skip_populate_errors=False,
):
    """
    Aggregate populate function for fields useful to GeoWATCH.

    Args:
        coco_dset (Dataset): dataset to work with

        target_gsd (float): target gsd in meters

        overwrite (bool | List[str]):
            if True or False overwrites everything or nothing. Otherwise it can
            be a list of strings indicating what is
            overwritten. Valid keys are warp, band, and channels.

        default_gsd (None | float):
            if specified, assumed any images without geo-metadata have this
            GSD'

    Ignore:
        >>> # xdoctest: +REQUIRES(env:DVC_DPATH)
        >>> from geowatch.utils.kwcoco_extensions import *  # NOQA
        >>> import kwcoco
        >>> dvc_dpath = geowatch.utils.util_data.find_dvc_dpath()
        >>> fpath = dvc_dpath / 'drop0_aligned/data.kwcoco.json')
        >>> coco_dset = kwcoco.CocoDataset(fpath)
        >>> target_gsd = 5.0
        >>> populate_watch_fields(coco_dset, target_gsd)
        >>> print('coco_dset.index.videos = {}'.format(ub.urepr(coco_dset.index.videos, nl=-1)))
        >>> print('coco_dset.index.imgs[1] = ' + ub.urepr(coco_dset.index.imgs[1], nl=1))

    Example:
        >>> from geowatch.utils.kwcoco_extensions import *  # NOQA
        >>> import kwcoco
        >>> # TODO: make a demo dataset with some sort of gsd metadata
        >>> coco_dset = kwcoco.CocoDataset.demo('vidshapes8-multispectral')
        >>> print('coco_dset = {!r}'.format(coco_dset))
        >>> target_gsd = 13.0
        >>> populate_watch_fields(coco_dset, target_gsd, default_gsd=1)
        >>> print('coco_dset.index.imgs[1] = ' + ub.urepr(coco_dset.index.imgs[1], nl=2))
        >>> print('coco_dset.index.videos = {}'.format(ub.urepr(coco_dset.index.videos, nl=1)))

        >>> # TODO: make a demo dataset with some sort of gsd metadata
        >>> coco_dset = kwcoco.CocoDataset.demo('vidshapes8')
        >>> print('coco_dset = {!r}'.format(coco_dset))
        >>> target_gsd = 13.0
        >>> populate_watch_fields(coco_dset, target_gsd, default_gsd=1)
        >>> print('coco_dset.index.imgs[1] = ' + ub.urepr(coco_dset.index.imgs[1], nl=2))
        >>> print('coco_dset.index.videos = {}'.format(ub.urepr(coco_dset.index.videos, nl=1)))
    """
    # Load your KW-COCO dataset (conform populates information like image size)
    if conform:
        # Note: we will handle imgsize in a later part
        coco_dset.conform(pycocotools_info=False, workers=workers,
                          ensure_imgsize=False)

    if 1:
        from geowatch import heuristics
        heuristics.register_known_fsspec_s3_buckets()

    if vidids is None:
        vidids = list(coco_dset.index.videos.keys())
        gids = list(coco_dset.index.imgs.keys())
    else:
        gids = list(ub.flatten(coco_dset.images(video_id=video_id) for video_id in vidids))

    coco_populate_geo_heuristics(
        coco_dset, gids=gids, overwrite=overwrite, default_gsd=default_gsd,
        workers=workers, mode=mode,
        enable_intensity_stats=enable_intensity_stats,
        enable_valid_region=enable_valid_region,
        remove_broken=remove_broken,
        skip_populate_errors=skip_populate_errors,
    )

    # Modify videos to include cleared status
    if 1:
        from geowatch import heuristics
        from kwutil import util_pattern
        region_id_to_cleared = {d['region_id']: d['cleared'] for d in heuristics.REGION_STATUS}
        pat = util_pattern.Pattern.coerce(r'\w+_R\d+(_\d+)?', 'regex')
        for video in coco_dset.videos().objs:
            video_name = video['name']
            if pat.match(video_name):
                region_id = '_'.join(video_name.split('_')[0:2])
                cleared = region_id_to_cleared.get(region_id, False)
                video['cleared'] = cleared
                video['domain'] = region_id

    if enable_video_stats:
        for video_id in ub.ProgIter(vidids, total=len(vidids), desc='populate videos'):
            coco_populate_geo_video_stats(coco_dset, video_id, target_gsd=target_gsd)

    # serialize intermediate objects
    coco_dset._ensure_json_serializable()




[docs]
def coco_populate_geo_heuristics(coco_dset: kwcoco.CocoDataset,
                                 gids=None,
                                 overwrite=False,
                                 default_gsd=None,
                                 workers=0,
                                 mode='thread',
                                 remove_broken=False, **kw):
    """
    Example:
        >>> # xdoctest: +REQUIRES(env:DVC_DPATH)
        >>> from geowatch.utils.kwcoco_extensions import *  # NOQA
        >>> from geowatch.utils.util_data import find_dvc_dpath
        >>> import kwcoco
        >>> dvc_dpath = find_dvc_dpath()
        >>> coco_fpath = dvc_dpath / 'drop1-S2-L8-aligned/data.kwcoco.json'
        >>> coco_dset = kwcoco.CocoDataset(coco_fpath)
        >>> coco_populate_geo_heuristics(coco_dset, overwrite=True, workers=12,
        >>>                              keep_geotiff_metadata=False,
        >>>                              mode='process')

    Example:
        >>> # xdoctest: +REQUIRES(env:DVC_DPATH)
        >>> from geowatch.utils.kwcoco_extensions import *  # NOQA
        >>> from geowatch.utils.util_data import find_dvc_dpath
        >>> import kwcoco
        >>> dvc_dpath = find_dvc_dpath()
        >>> coco_fpath = dvc_dpath / 'drop1-S2-L8-aligned/data.kwcoco.json'
        >>> coco_dset = kwcoco.CocoDataset(coco_fpath)
        >>> coco_populate_geo_heuristics(coco_dset, overwrite=True, workers=12,
        >>>                              keep_geotiff_metadata=False,
        >>>                              mode='process')
    """
    import rich
    gids = coco_dset.images(gids)._ids
    # Cant multiprocess because of SwigPyObjects... bleh
    # keep_geotiff_metadata must be False to use mode=process
    keep_geotiff_metadata = kw.get('keep_geotiff_metadata', False)
    if keep_geotiff_metadata and mode == 'process':
        raise NotImplementedError(ub.paragraph(
            '''
            Cannot keep keep geotiff metadata when using process parallelism.

            Need to serialize gdal objects (i.e. RPC transforms and
            SwigPyObject) returned from ``geowatch.gis.geotiff.geotiff_metadata``
            to be able do this.
            '''))

    jobs = ub.JobPool(mode, max_workers=workers, transient=True)
    for gid in ub.ProgIter(gids, desc='submit populate imgs'):
        coco_img = coco_dset.coco_image(gid)
        if mode == 'process':
            coco_img = coco_img.detach()

        job_args = (coco_img,)
        job_kwargs = dict(overwrite=overwrite, default_gsd=default_gsd, **kw)
        job = jobs.submit(coco_populate_geo_img_heuristics2,
                          *job_args, **job_kwargs)
        if 0:
            # For debugging
            job.job_func = coco_populate_geo_img_heuristics2
            job.job_args = job_args
            job.job_kwargs = job_kwargs
        job.gid = gid

    broken_image_ids = []
    working_image_ids = []
    for job in ub.ProgIter(jobs.as_completed(), total=len(jobs), desc='collect populate imgs'):
        gid = job.gid
        try:
            img = job.result()
        except Exception as ex:
            # Check for known error messages that might cause errors grabbing
            # data
            known_errors = {}
            known_errors['has_404'] = remove_broken and "404" in repr(ex)
            known_errors['has_acc_problem'] = "not recognized as a supported file format" in repr(ex)
            known_errors['connection_reset'] = "Connection reset by peer" in repr(ex)
            known_errors['failed_to_read'] = 'Failed to read' in repr(ex)

            print('')
            print(f'known_errors = {ub.urepr(known_errors, nl=1)}')
            if any(known_errors.values()):
                broken_image_ids.append(gid)
                print('')
                rich.print('[yellow]WARNING: KNOWN ERROR IN GEO HEURISTICS')
                print(f'ex={ex!r}')
                print(f'ex={ex}')
                print(f'ex.__dict__={ex.__dict__}')
                print('num_broken = {}'.format(len(broken_image_ids)))
                print('num_working = {}'.format(len(working_image_ids)))
                rich.print('[yellow]WARNING: KNOWN ERROR IN GEO HEURISTICS')
            else:
                coco_img = coco_dset.coco_image(gid)

                # Check for remote existence and handle the case where the data
                # might be at a remote location
                from geowatch.utils import util_fsspec
                missing_paths = []
                existing_paths = []
                forbidden_paths = []

                HACK_CHECK_EXISTS = 1
                if HACK_CHECK_EXISTS:
                    """
                    NOTE:
                        For L2 data, we need to assume the user called

                        from geowatch import heuristics
                        heuristics.register_known_fsspec_s3_buckets()

                        And populated the appropriate mapping from s3-bucket to
                        s3 configurations.
                    """
                    for p in coco_img.iter_image_filepaths():
                        # Use fsspec to check if the files exist
                        fspath = util_fsspec.FSPath.coerce(p)
                        try:
                            if not fspath.exists():
                                missing_paths.append(fspath)
                            else:
                                existing_paths.append(fspath)
                        except PermissionError:
                            forbidden_paths.append(fspath)

                if missing_paths or forbidden_paths:
                    broken_image_ids.append(gid)
                    print('')
                    rich.print('[yellow]WARNING: OTHER ERROR IN GEO HEURISTICS')
                    print(f'existing_paths = {ub.urepr(existing_paths, nl=1)}')
                    print(f'missing_paths = {ub.urepr(missing_paths, nl=1)}')
                    print(f'forbidden_paths = {ub.urepr(forbidden_paths, nl=1)}')
                    print(f'ex={ex!r}')
                    print(f'ex={ex}')
                    print(f'ex.__dict__={ex.__dict__}')
                    print('coco_img = {}'.format(ub.urepr(coco_img.img, nl=3)))
                    print('num_broken = {}'.format(len(broken_image_ids)))
                    print('num_working = {}'.format(len(working_image_ids)))
                    rich.print('[yellow]WARNING: OTHER ERROR IN GEO HEURISTICS')
                    print('')
                    # raise FileNotFoundError(str(missing_paths))
                else:
                    print('')
                    rich.print('[red]ERROR: UNKNOWN ERROR IN GEO HEURISTICS')
                    print(f'ex={ex!r}')
                    print(f'ex={ex}')
                    print(f'ex.__dict__={ex.__dict__}')
                    print('coco_img = {}'.format(ub.urepr(coco_img.img, nl=3)))
                    print('num_broken = {}'.format(len(broken_image_ids)))
                    print('num_working = {}'.format(len(working_image_ids)))
                    rich.print('[red]ERROR: UNKNOWN ERROR IN GEO HEURISTICS')
                    print('')
                    # if 0:
                    #     job.job_args
                    #     job.job_kwargs
                    #     coco_img, = job.job_args
                    #     globals().update(**job.job_kwargs)
                    #     # result = coco_populate_geo_img_heuristics2(*job.job_args, **job.job_kwargs)
                    raise
        else:
            working_image_ids.append(gid)
            if mode == 'process':
                # for multiprocessing
                real_img = coco_dset.index.imgs[gid]
                real_img.update(img)
    if broken_image_ids:
        print(f'There were {len(broken_image_ids)} / {len(gids)} broken images')
        coco_dset.remove_images(broken_image_ids, verbose=True)




[docs]
@profile
def coco_populate_geo_img_heuristics2(
    coco_img,
    overwrite=False,
    default_gsd=None,
    keep_geotiff_metadata=False,
    enable_intensity_stats=False,
    enable_valid_region=False,
    skip_populate_errors=False,
):
    """
    Note: this will not overwrite existing channel info unless specified

    Commandline
        xdoctest -m ~/code/watch/geowatch/utils/kwcoco_extensions.py --profile

    TODO:
        - [ ] Use logic in the align demo classmethod to make an example
              that uses a real L8 / S2 image.

    Example:
        >>> from geowatch.utils.kwcoco_extensions import *  # NOQA
        >>> import geowatch
        >>> import json
        >>> coco_dset = geowatch.coerce_kwcoco('geowatch-msi-geodata-dates-heatmap-videos1-frames2-gsize64')
        >>> gid = 1
        >>> overwrite = {'warp', 'band'}
        >>> default_gsd = None
        >>> kw = {}
        >>> coco_img = coco_dset.coco_image(gid)
        >>> before_img_attrs = list(coco_img.img.keys())
        >>> before_aux_attr_hist = ub.dict_hist(ub.flatten([list(aux) for aux in coco_img.img['auxiliary']]))
        >>> print('before_img_attrs = {!r}'.format(before_img_attrs))
        >>> print('before_aux_attr_hist = {}'.format(ub.urepr(before_aux_attr_hist, nl=1)))
        >>> coco_populate_geo_img_heuristics2(coco_img)
        >>> img = coco_dset.index.imgs[gid]
        >>> after_img_attrs = list(coco_img.img.keys())
        >>> after_aux_attr_hist = ub.dict_hist(ub.flatten([list(aux) for aux in coco_img.img['auxiliary']]))
        >>> new_img_attrs = set(after_img_attrs) - set(before_img_attrs)
        >>> new_aux_attrs = {k: after_aux_attr_hist[k] - before_aux_attr_hist.get(k, 0) for k in after_aux_attr_hist}
        >>> new_aux_attrs = {k: v for k, v in new_aux_attrs.items() if v > 0}
        >>> print('new_img_attrs = {}'.format(ub.urepr(new_img_attrs, nl=1)))
        >>> print('new_aux_attrs = {}'.format(ub.urepr(new_aux_attrs, nl=1)))
        >>> #print('after_img_attrs = {}'.format(ub.urepr(after_img_attrs, nl=1)))
        >>> #print('after_aux_attr_hist = {}'.format(ub.urepr(after_aux_attr_hist, nl=1)))
        >>> assert 'geos_corners' in img
        >>> #assert 'default_nodata' in img
        >>> #assert 'default_nodata' in new_aux_attrs
        >>> print(ub.varied_values(list(map(lambda x: ub.map_vals(json.dumps, x), coco_img.img['auxiliary'])), default=None))

    Example:
        >>> from geowatch.utils.kwcoco_extensions import *  # NOQA
        >>> import kwcoco
        >>> ###
        >>> gid = 1
        >>> dset = kwcoco.CocoDataset.demo('vidshapes8-multispectral')
        >>> coco_img = dset.coco_image(gid)
        >>> coco_populate_geo_img_heuristics2(coco_img, overwrite=True)
        >>> ###
        >>> gid = 1
        >>> dset2 = kwcoco.CocoDataset.demo('shapes8')
        >>> coco_img = dset2.coco_image(gid)
        >>> coco_populate_geo_img_heuristics2(coco_img, overwrite=True)
    """
    import geowatch
    bundle_dpath = coco_img.bundle_dpath
    img = coco_img.img

    primary_obj = coco_img.primary_asset()
    asset_objs = list(coco_img.iter_asset_objs())

    overwrite = _coerce_overwrite(overwrite)

    # Note: for non-geotiffs we could use the aux_to_img transformation
    # provided with them to determine their geo-properties.
    asset_errors = []
    for obj in asset_objs:
        try:
            errors = _populate_canvas_obj(
                bundle_dpath, obj, overwrite=overwrite, default_gsd=default_gsd,
                keep_geotiff_metadata=keep_geotiff_metadata,
                enable_intensity_stats=enable_intensity_stats)
            asset_errors.append(errors)
        except Exception as ex:
            if skip_populate_errors:
                asset_errors.append(str(ex))
            else:
                raise

    if all(asset_errors):
        info = ub.dict_isect(img, {'name', 'file_name', 'id'})
        warnings.warn(f'img {info} has issues introspecting')

    if keep_geotiff_metadata:
        info = primary_obj.get('geotiff_metadata', None)
        if info is None:
            dem_hint = primary_obj.get('dem_hint', 'use')
            metakw = {}
            if dem_hint == 'ignore':
                metakw['elevation'] = 0
            primary_fname = primary_obj.get('file_name', None)
            primary_fpath = join(bundle_dpath, primary_fname)
            info = geowatch.gis.geotiff.geotiff_metadata(primary_fpath, **metakw)
            primary_obj['geotiff_metadata'] = info

    # if 'default_nodata' not in img:
    #     img['default_nodata'] = primary_obj['default_nodata']
    if overwrite:
        # Update image space to correspond to one of the assets
        # (typically the one with the largest size)
        if img.get('file_name', None) is None:
            assets = list(coco_img.assets)
            asset_dsizes = [
                (asset['width'], asset['height'])
                for asset in assets
                if ("width" in asset) and ("height" in asset)
            ]
            idx = ub.argmax(asset_dsizes)
            main_asset = assets[idx]

            img['width'] = main_asset['width']
            img['height'] = main_asset['height']

            if 'warp_to_wld' in main_asset:
                # Also update world information (does the image need this?)
                img['wld_to_pxl'] = kwimage.Affine.coerce(main_asset['warp_to_wld']).inv().concise()
                img['wld_crs_info'] = main_asset['wld_crs_info']
            else:
                img.pop('wld_to_pxl', None)
                img.pop('wld_crs_info', None)

            _recompute_auxiliary_transforms(img)

    if ('width' not in img or 'height' not in img):
        # or overwrite:
        # TODO: better test to see if we need to recompute auxiliary transforms
        _recompute_auxiliary_transforms(img)

    valid_region_utm = img.get('valid_region_utm', None)
    if enable_valid_region and (valid_region_utm is None or 'warp' in overwrite):
        _populate_valid_region(coco_img)

    if keep_geotiff_metadata:
        img['geotiff_metadata'] = primary_obj['geotiff_metadata']

    if 'date_captured' in img:
        from kwutil import util_time
        img['timestamp'] = util_time.coerce_datetime(img['date_captured']).timestamp()

    if 'geos_corners' in primary_obj:
        # FIXME: we are assuming this maps perfectly onto the image
        # which is should for the SMART data, but perhaps in the future
        # this will not be safe?
        img['geos_corners'] = primary_obj['geos_corners']
    else:
        print('None of the assets had geo information')
    return img



@profile
def _populate_valid_region(coco_img):
    """
    Ignore:
        >>> # Make a dummy image with nodata to test that this works
        >>> from geowatch.utils.kwcoco_extensions import *  # NOQA
        >>> from geowatch.utils.kwcoco_extensions import _populate_valid_region
        >>> import kwcoco
        >>> H, W = 1024, 1024
        >>> nodata = 0  #
        >>> dpath = ub.Path.appdir('geowatch/tests/valid_region').ensuredir()
        >>> imdata = ((np.random.rand(H, W) * 240) + 3).astype(np.uint8)
        >>> # Fill in nodata values
        >>> imdata[:, 0:20] = nodata
        >>> imdata[:, -20:] = nodata
        >>> poly1 = kwimage.Polygon.random(rng=432).scale((W, H))
        >>> poly1.data['exterior'] = poly1.data['exterior'].round()
        >>> poly1.fill(imdata, nodata)
        >>> poly2 = kwimage.Polygon.circle((0, 0), W / 2)
        >>> poly2.fill(imdata, nodata)
        >>> # Write image to disk with nodata info
        >>> img_fpath = dpath / 'test_img.tif'
        >>> kwimage.imwrite(img_fpath, imdata, backend='gdal', nodata=0, overviews=3)
        >>> # # Register the image with a kwcoco dataset
        >>> dset = kwcoco.CocoDataset()
        >>> dset.fpath = dpath / 'data.kwcooc.json'
        >>> gid = dset.add_image(file_name=img_fpath, channels='gray')
        >>> dset.conform()
        >>> #
        >>> coco_img = dset.coco_image(gid)
        >>> _populate_valid_region(coco_img)
        >>> valid_region = kwimage.MultiPolygon.coerce(coco_img.img['valid_region'])
        >>> print('valid_region.data = {!r}'.format(valid_region.data))
        >>> #
        >>> import kwplot
        >>> kwplot.autompl()
        >>> canvas = imdata.copy()
        >>> canvas = valid_region.draw_on(canvas, fill=0, border=True)
        >>> kwplot.imshow(canvas)

        from rasterio import features
        shapes = [(valid_region.to_geojson(), 255)]
        canvasR = features.rasterize(shapes, out=canvas[:, :, 0].copy())
        kwplot.imshow(canvasR, doclf=1)
    """
    import geowatch
    # _ = ub.cmd('gdalinfo -stats {}'.format(fpath), check=True)
    bundle_dpath = coco_img.bundle_dpath
    img = coco_img.img
    primary_obj = coco_img.primary_asset()
    if primary_obj is None:
        warnings.warn('No primary asset found for img={}'.format(img))
        return
    primary_fname = primary_obj.get('file_name', None)
    primary_fpath = join(bundle_dpath, primary_fname)

    # NOTE: THIS POLYGON IS COMPUTED VIA RASTERIO, NOT SURE HOW WELL THIS AGREES
    # WITH OTHER POLYGONS WE DRAW (USUALLY VIA CV2)
    # TODO: get a better heuristic here
    sh_poly = util_raster.mask(
        primary_fpath,
        tolerance=4,
        # tolerance=None,
        default_nodata=primary_obj.get('default_nodata', None),
        # max_polys=100,
        use_overview=2,
        convex_hull=True,
    )
    kw_asset_poly = kwimage.MultiPolygon.from_shapely(sh_poly)

    info = primary_obj.get('geotiff_metadata', None)
    if info is None:
        metakw = {}
        dem_hint = primary_obj.get('dem_hint', 'use')
        if dem_hint == 'ignore':
            metakw['elevation'] = 0
        info = geowatch.gis.geotiff.geotiff_metadata(primary_fpath, **metakw)

    warp_img_from_asset = kwimage.Affine.coerce(primary_obj.get('warp_aux_to_img', None))
    if warp_img_from_asset.isclose_identity():
        kw_img_poly = kw_asset_poly
    else:
        kw_img_poly = kw_asset_poly.warp(warp_img_from_asset)

    # TODO: we probably should not add the valid region to the asset.
    # Not sure if its used anywhere though.
    primary_obj['valid_region'] = kw_asset_poly.to_coco(style='new')
    img['valid_region'] = kw_img_poly.to_coco(style='new')

    if 'pxl_to_wld' in info:
        wld_from_asset = info['pxl_to_wld']
        kw_poly_utm = kw_asset_poly.warp(wld_from_asset).warp(info['wld_to_utm'])
        poly_utm = kw_poly_utm.to_geojson()
        poly_utm['properties'] = {}
        poly_utm['properties']['crs'] = info['utm_crs_info']
        # TODO: we probably should only add this to the image and not the
        # asset?
        primary_obj['valid_region_utm'] = poly_utm
        img['valid_region_utm'] = poly_utm


@profile
def _populate_canvas_obj(bundle_dpath, obj, overwrite=False, with_wgs=False,
                         default_gsd=None, keep_geotiff_metadata=False,
                         enable_intensity_stats=False):
    """
    obj can be an img or aux

    Ignore:
        from geowatch.utils.kwcoco_extensions import *  # NOQA
        from geowatch.demo.smart_kwcoco_demodata import demo_kwcoco_with_heatmaps
        coco_dset = demo_kwcoco_with_heatmaps()
        coco_img = coco_dset.coco_image(1)
        obj = coco_img.primary_asset()
        bundle_dpath = coco_dset.bundle_dpath
        overwrite = True
        with_wgs = False
        default_gsd = None
        keep_geotiff_metadata = False
    """
    import geowatch
    sensor_coarse = obj.get('sensor_coarse', None)  # not reliable
    num_bands = obj.get('num_bands', None)
    channels = obj.get('channels', None)
    fname = obj.get('file_name', None)
    warp_to_wld = obj.get('warp_to_wld', None)
    approx_meter_gsd = obj.get('approx_meter_gsd', None)

    overwrite = _coerce_overwrite(overwrite)

    errors = []
    # Can only do this for images with file names
    if fname is not None:
        fpath = join(bundle_dpath, fname)

        info = None
        dem_hint = obj.get('dem_hint', 'use')
        metakw = {}
        if dem_hint == 'ignore':
            metakw['elevation'] = 0

        # TODO: ensure real nodata exists (maybe write helper file to disk?)
        sensor_coarse = obj.get('sensor_coarse', None)
        # if sensor_coarse in {'S2', 'L8', 'WV'}:
        #     default_nodata = 0
        # else:
        #     default_nodata = None
        # # Heuristic for no-data
        # obj['default_nodata'] = default_nodata

        if 'warp' in overwrite or warp_to_wld is None or approx_meter_gsd is None:
            try:
                if info is None:
                    info = geowatch.gis.geotiff.geotiff_metadata(
                        fpath, strict=True, **metakw)

                if keep_geotiff_metadata:
                    obj['geotiff_metadata'] = info

                try:
                    height, width = info['img_shape'][0:2]
                except Exception:
                    print('info = {}'.format(ub.urepr(info, nl=1)))
                    raise

                obj['height'] = height
                obj['width'] = width
                # print('info = {!r}'.format(info))

                # WE NEED TO ACCOUNT FOR WLD_CRS TO USE THIS
                if info['is_rpc']:
                    obj_to_wld = None
                else:
                    obj_to_wld = kwimage.Affine.coerce(info['pxl_to_wld'])

                geos_corners = info['geos_corners']
                wld_crs_info = ub.dict_diff(info['wld_crs_info'], {'type'})
                obj.update({
                    'geos_corners': geos_corners,  # always in geojson
                    'wld_crs_info': wld_crs_info,
                })
                obj['band_metas'] = info['band_metas']
                obj['is_rpc'] = info['is_rpc']

                if with_wgs:
                    obj.update({
                        'wgs84_to_wld': info['wgs84_to_wld'],
                        'wld_to_pxl': info['wld_to_pxl'],
                    })

                approx_meter_gsd = info['approx_meter_gsd']
            except exceptions.GeoMetadataNotFound as ex:
                if default_gsd is not None:
                    obj['approx_meter_gsd'] = default_gsd
                    obj['warp_to_wld'] = kwimage.Affine.eye().__json__()
                else:
                    # FIXME: This might not be the best way to report errors
                    # raise
                    errors.append('no_crs_info: {!r}'.format(ex))
            else:
                obj['approx_meter_gsd'] = approx_meter_gsd
                if obj_to_wld is not None:
                    obj['warp_to_wld'] = kwimage.Affine.coerce(obj_to_wld).__json__()

        if 'band' in overwrite or num_bands is None:
            try:
                num_bands = _introspect_num_bands(fpath)
            except Exception:
                channels = obj.get('channels', None)
                if channels is not None:
                    num_bands = kwcoco.ChannelSpec(channels).numel()
                else:
                    raise
            obj['num_bands'] = num_bands

        if 'channels' in overwrite or channels is None:
            if sensor_coarse is not None:
                channels = _sensor_channel_hueristic(sensor_coarse, num_bands)
            elif num_bands is not None:
                channels = _num_band_hueristic(num_bands)
            else:
                raise Exception(ub.paragraph(
                    f'''
                    no methods to introspect channels
                    sensor_coarse={sensor_coarse},
                    num_bands={num_bands}
                    for obj={obj}
                    '''))
            obj['channels'] = channels

        # TODO: determine nodata defaults based on sensor_coarse

        if enable_intensity_stats:
            from geowatch.cli import coco_spectra
            coco_spectra.ensure_intensity_sidecar(fpath)

        return errors


@ub.memoize
def _is_writeable(dpath):
    " https://stackoverflow.com/questions/2113427/determining-whether-a-directory-is-writeable "
    import os
    return os.access(dpath, os.W_OK) and os.path.isdir(dpath)


def _coerce_overwrite(overwrite):
    """
    Im not a big fan of the way overwrite currently works, might want to
    refactor.
    """
    valid_overwrites = {'warp', 'band', 'channels'}
    default_overwrites = {'warp', 'band'}
    if isinstance(overwrite, str):
        overwrite = set(overwrite.split(','))
    if overwrite is True:
        overwrite = default_overwrites
    elif overwrite is False:
        overwrite = {}
    else:
        overwrite = set(overwrite)
        unexpected = overwrite - valid_overwrites
        if unexpected:
            raise ValueError(f'Got unexpected overwrites: {unexpected}')
    return overwrite


# def single_geotiff_metadata(bundle_dpath, img, serializable=False):
#     import geowatch
#     from os.path import exists
#     import dateutil
#     geotiff_metadata = None
#     aux_metadata = []

#     img['datetime_acquisition'] = (
#         dateutil.parser.parse(img['date_captured'])
#     )

#     # if an image specified its "dem_hint" as ignore, then we set the
#     # elevation to 0. NOTE: this convention might be generalized and
#     # replaced in the future. I.e. in the future the dem_hint might simply
#     # specify the constant elevation to use, or perhaps something else.
#     dem_hint = img.get('dem_hint', 'use')
#     metakw = {}
#     if dem_hint == 'ignore':
#         metakw['elevation'] = 0

#     # only need rpc info, geos_corners, and and warps
#     keys_of_interest = {
#         'rpc_transform',
#         'is_rpc',
#         'wgs84_to_wld',
#         'geos_corners',
#         'wld_to_pxl',
#     }

#     fname = img.get('file_name', None)
#     if fname is not None:
#         src_gpath = join(bundle_dpath, fname)
#         assert exists(src_gpath)
#         img_info = geowatch.gis.geotiff.geotiff_metadata(src_gpath, **metakw)

#         if serializable:
#             raise NotImplementedError
#         else:
#             img_info = ub.dict_isect(img_info, keys_of_interest)
#             geotiff_metadata = img_info

#     for aux in img.get('auxiliary', []):
#         aux_fpath = join(bundle_dpath, aux['file_name'])
#         assert exists(aux_fpath)
#         aux_info = geowatch.gis.geotiff.geotiff_metadata(aux_fpath, **metakw)
#         aux_info = ub.dict_isect(aux_info, keys_of_interest)
#         if serializable:
#             raise NotImplementedError
#         else:
#             aux_metadata.append(aux_info)
#             aux['geotiff_metadata'] = aux_info

#     if fname is None:
#         # need to choose one of the auxiliary images as the "main" image.
#         # We are assuming that there is one auxiliary image that exactly
#         # corresponds.
#         candidates = []
#         for aux in img.get('auxiliary', []):
#             if aux['width'] == img['width'] and aux['height'] == img['height']:
#                 candidates.append(aux)

#         if not candidates:
#             raise AssertionError(
#                 'Assumed at least one auxiliary image has identity '
#                 'transform, but this seems to not be the case')
#         aux = ub.peek(candidates)
#         geotiff_metadata = aux['geotiff_metadata']

#     img['geotiff_metadata'] = geotiff_metadata
#     return geotiff_metadata, aux_metadata



[docs]
@profile
def coco_populate_geo_video_stats(coco_dset, video_id, target_gsd='max-resolution'):
    """
    Create a "video-space" for all images in a video sequence at a specified
    resolution.

    For this video, this chooses the "best" image as the "video canvas /
    region" and registers everything to that canvas/region. This creates the
    "video-space" for this image sequence. Currently the "best" image is the
    one that has the GSD closest to the target-gsd. This hueristic works well
    in most cases, but no all.

    Notes:
        * Currently the "best image" exactly define the video canvas / region.

        * Areas where other images do not overlap the vieo canvas are
          effectively lost when sampling in video space, because anything
          outside the video canvas is cropped out.

        * Auxilary / asset images are required to have an "approx_meter_gsd"
          and a "warp_to_wld" attribute to use this function atm.

    TODO:
        - [ ] Allow choosing of a custom "video-canvas" not based on any one image.
        - [ ] Allow choosing a "video-canvas" that encompases all images
        - [ ] Allow the base image to contain "approx_meter_gsd" /
              "warp_to_wld" instead of the auxiliary image
        - [ ] Is computing the scale factor based on approx_meter_gsd safe?

    Args:
        coco_dset (CocoDataset): coco dataset to be modified inplace
        video_id (int): video_id to modify
        target_gsd (float | str): string code, or float target gsd


    Example:
        >>> # xdoctest: +REQUIRES(env:DVC_DPATH)
        >>> from geowatch.utils.kwcoco_extensions import *  # NOQA
        >>> from geowatch.utils.util_data import find_dvc_dpath
        >>> import kwcoco
        >>> dvc_dpath = find_dvc_dpath()
        >>> coco_fpath = dvc_dpath / 'Drop2-Aligned-TA1-2022-02-15/data.kwcoco.json'
        >>> video_id = 2

        >>> coco_fpath = dvc_dpath / 'Aligned-Drop2-TA1-2022-03-07/data.kwcoco.json'
        >>> coco_dset = kwcoco.CocoDataset(coco_fpath)
        >>> target_gsd = 10.0
        >>> video_id = 1
        >>> # We can check transforms before we apply this function
        >>> coco_dset.images(video_id=video_id).lookup('warp_img_to_vid', None)
        >>> # Apply the function
        >>> coco_populate_geo_video_stats(coco_dset, video_id, target_gsd)
        >>> # Check these transforms to make sure they look right
        >>> popualted_video = coco_dset.index.videos[video_id]
        >>> popualted_video = ub.dict_isect(popualted_video, ['width', 'height', 'warp_wld_to_vid', 'target_gsd'])
        >>> print('popualted_video = {}'.format(ub.urepr(popualted_video, nl=-1)))
        >>> coco_dset.images(video_id=video_id).lookup('warp_img_to_vid')

        # TODO: make a demo dataset with some sort of gsd metadata
        coco_dset = kwcoco.CocoDataset.demo('vidshapes8-multispectral')
        print('coco_dset = {!r}'.format(coco_dset))

        coco_fpath = ub.expandpath('~/data/dvc-repos/smart_watch_dvc/drop0_aligned/data.kwcoco.json')
        coco_fpath = '/home/joncrall/data/dvc-repos/smart_watch_dvc/drop1-S2-L8-aligned/combo_data.kwcoco.json'
        coco_dset = kwcoco.CocoDataset(coco_fpath)
        video_id = 1

        target_gsd = 2.8


        # Check drawing the valid region on the image
        frac = 1

        valid_regions = coco_dset.images().lookup('valid_region')
        cands = []
        for idx, valid_region in enumerate(valid_regions):
            valid_region_img = kwimage.MultiPolygon.coerce(valid_region)
            frac = valid_region_img.to_shapely().area / valid_region_img.bounding_box().area
            if frac < 0.6:
                cands.append(idx)
        gid = coco_dset.images().take(cands).lookup('id')[0]

        coco_img = coco_dset.coco_image(gid)
        imdata = coco_img.imdelay('blue').finalize(nodata='float')
        valid_region_img = kwimage.MultiPolygon.coerce(coco_img.img['valid_region'])
        frac = valid_region_img.to_shapely().area / valid_region_img.bounding_box().area
        print('frac = {!r}'.format(frac))

        canvas_imgspace = kwimage.normalize_intensity(imdata)
        kwplot.autompl()
        kwplot.imshow(canvas_imgspace, doclf=1)

        valid_region_img = kwimage.MultiPolygon.coerce(coco_img.img['valid_region'])
        canvas_imgspace = valid_region_img.draw_on(canvas_imgspace, fill=0, color='green')
        kwplot.imshow(canvas_imgspace)


        # Check the nodata polygon returned raw pixel methods
        primary_data = kwimage.imread(primary_fpath, nodata='float')
        valid_mask = ~np.isnan(primary_data)
        kw_poly = kwimage.Mask(valid_mask.astype(np.uint8), 'c_mask').to_multi_polygon()
        print('kwimage kw_poly.data = {!r}'.format(kw_poly.data))

        # CHeck the one returned by util_raster
        primary_fpath = coco_img.primary_image_filepath()
        sh_poly = util_raster.mask(
            primary_fpath, tolerance=None,
            convex_hull=0)
        kw_poly = kwimage.MultiPolygon.from_shapely(sh_poly)
        print('rasterio kw_poly.data = {!r}'.format(kw_poly.data))
    """
    from kwcoco.coco_image import CocoImage
    # Compute an image-to-video transform that aligns all frames to some
    # common resolution.
    video = coco_dset.index.videos[video_id]
    gids = coco_dset.index.vidid_to_gids[video_id]

    check_unique_channel_names(coco_dset, gids=gids)

    frame_infos = {}

    for gid in gids:
        img = coco_dset.index.imgs[gid]
        coco_img = CocoImage(img)

        # If the base dictionary has "warp_to_wld" and "approx_meter_gsd"
        # information we use that.
        warp_wld_from_img = img.get('warp_to_wld', None)
        img_approx_meter_gsd = img.get('approx_meter_gsd', None)
        wld_crs_info = img.get('wld_crs_info', None)

        # Otherwise we try to obtain it from the auxiliary images
        if img_approx_meter_gsd is None or warp_wld_from_img is None:
            # Choose any one of the auxiliary images that has the required
            # attribute
            aux_chosen = coco_img.primary_asset(requires=[
                'warp_to_wld', 'approx_meter_gsd'])
            if aux_chosen is None:
                raise Exception(ub.paragraph(
                    '''
                    Image auxiliary images have no warp_to_wld and approx_meter
                    gsd. The auxiliary images may not have associated geo
                    metadata.
                    '''))

            warp_wld_from_asset = kwimage.Affine.coerce(aux_chosen.get('warp_to_wld', None))
            warp_img_from_asset = kwimage.Affine.coerce(aux_chosen['warp_aux_to_img'])

            warp_asset_from_img = warp_img_from_asset.inv()
            warp_wld_from_img = warp_wld_from_asset @ warp_asset_from_img

            scale_img_from_asset = np.mean(warp_img_from_asset.decompose()['scale'])

            asset_approx_meter_gsd = aux_chosen['approx_meter_gsd']
            img_approx_meter_gsd = asset_approx_meter_gsd / scale_img_from_asset

            wld_crs_info = aux_chosen.get('wld_crs_info', None)

        if img_approx_meter_gsd is None or warp_wld_from_img is None:
            raise Exception(ub.paragraph(
                '''
                Both the base image and its auxiliary images do not seem to
                have the required warp_to_wld and approx_meter_gsd fields.
                The image may not have associated geo metadata.
                '''))

        warp_wld_from_img = kwimage.Affine.coerce(warp_wld_from_img)

        asset_channels = []
        asset_gsds = []
        for obj in coco_img.iter_asset_objs():
            _gsd = obj.get('approx_meter_gsd')
            if _gsd is not None:
                _gsd = round(_gsd, 1)
            asset_gsds.append(_gsd)
            asset_channels.append(obj.get('channels', None))

        frame_infos[gid] = {
            'warp_wld_from_img': warp_wld_from_img,
            'wld_crs_info': wld_crs_info,
            # Note: division because gsd is inverted. This got me confused, but
            # I'm pretty sure this works.
            'target_gsd': target_gsd,
            'approx_meter_gsd': img_approx_meter_gsd,
            'width': img['width'],
            'height': img['height'],
            'asset_channels': asset_channels,
            'asset_gsds': asset_gsds,
        }

    sorted_gids = ub.argsort(frame_infos, key=lambda x: x['approx_meter_gsd'])
    if sorted_gids:
        min_gsd_gid = sorted_gids[0]
        max_gsd_gid = sorted_gids[-1]
        max_example = frame_infos[max_gsd_gid]
        min_example = frame_infos[min_gsd_gid]
        max_gsd = max_example['approx_meter_gsd']
        min_gsd = min_example['approx_meter_gsd']

        if target_gsd == 'max-resolution':
            target_gsd_ = min_gsd
        elif target_gsd == 'min-resolution':
            target_gsd_ = max_gsd
        else:
            target_gsd_ = target_gsd
            if not isinstance(target_gsd, numbers.Number):
                raise TypeError('target_gsd must be a code or number = {}'.format(type(target_gsd)))
        target_gsd_ = float(target_gsd_)

        # Compute the scale factor needed to be applied to each image to achieve
        # the target videospace GSD.
        for info in frame_infos.values():
            info['target_gsd'] = target_gsd_
            info['to_target_scale_factor'] = info['approx_meter_gsd'] / target_gsd_

        available_channels = set()
        available_gsds = set()
        for gid in gids:
            img = coco_dset.index.imgs[gid]
            for obj in coco_img.iter_asset_objs():
                available_channels.add(obj.get('channels', None))
                _gsd = obj.get('approx_meter_gsd')
                if _gsd is not None:
                    available_gsds.add(round(_gsd, 1))

        # Align to the base reference frame closest to the target GSD, which
        # has the "to_target_scale_factor" that is closest to 1.0
        base_gid, base_info = min(
            frame_infos.items(),
            key=lambda kv: abs(1 - kv[1]['to_target_scale_factor'])
        )
        scale = base_info['to_target_scale_factor']
        base_wld_crs_info = base_info['wld_crs_info']

        # Can add an extra transform here if the video is not exactly in
        # any specific image space
        warp_baseimg_from_wld = base_info['warp_wld_from_img'].inv()
        warp_vid_from_wld = kwimage.Affine.scale(scale) @ warp_baseimg_from_wld

        video['width'] = int(np.ceil(base_info['width'] * scale))
        video['height'] = int(np.ceil(base_info['height'] * scale))
        video['wld_crs_info'] = base_wld_crs_info

        if 'valid_region_geos' in video:
            import geopandas as gpd
            from geowatch.utils import util_gis
            # Project the valid region onto video space
            valid_region_crs84 = kwimage.MultiPolygon.coerce(video['valid_region_geos'])
            wld_crs = base_wld_crs_info['auth']
            crs84 = util_gis.get_crs84()
            crs84_region_gdf = gpd.GeoDataFrame({'geometry': [valid_region_crs84.to_shapely()]}, crs=crs84)
            wld_region_gdf = crs84_region_gdf.to_crs(wld_crs)
            wld_region_poly = wld_region_gdf['geometry'].iloc[0]
            wld_region_kwpoly = kwimage.MultiPolygon.from_shapely(wld_region_poly)
            valid_region = wld_region_kwpoly.warp(warp_vid_from_wld).to_geojson()

            if 0:
                import kwplot
                kwplot.autompl()
                wld_map_gdf = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
                ax = wld_map_gdf.plot()
                wld_region_gdf.to_crs('OGC:CRS84').plot(ax=ax, color='red')
                wld_region_gdf.plot(ax=ax, color='red')

            video['valid_region'] = valid_region

        # Store metadata in the video
        video['warp_wld_to_vid'] = warp_vid_from_wld.__json__()
        video['target_gsd'] = target_gsd_
        video['resolution'] = f'{target_gsd_}GSD'
        video['min_gsd'] = min_gsd
        video['max_gsd'] = max_gsd

    video['num_frames'] = len(gids)

    # Remove old cruft (can remove in future versions)
    video.pop('available_channels', None)

    for gid in gids:
        img = coco_dset.index.imgs[gid]
        frame_info = frame_infos[gid]
        wld_crs_info = frame_info['wld_crs_info']
        warp_wld_from_img = frame_info['warp_wld_from_img']
        warp_vid_from_img = warp_vid_from_wld @ warp_wld_from_img
        img['warp_img_to_vid'] = warp_vid_from_img.concise()

        if base_wld_crs_info != wld_crs_info:
            import warnings
            warnings.warn(ub.paragraph(
                f'''
                Video alignment is warping images with different World
                Coordinate Reference Systems, but still treating them as the
                same. FIXME
                base_wld_crs_info={base_wld_crs_info!r},
                wld_crs_info={wld_crs_info!r}
                '''))




[docs]
def check_kwcoco_spatial_transforms(coco_dset):
    """
    import kwplot
    kwplot.plt.ion()
    import kwcoco
    dset = kwcoco.CocoDataset('/home/joncrall/remote/toothbrush/data/dvc-repos/smart_data_dvc-ssd/Drop6_MeanYear/imgonly-KR_R001.kwcoco.zip')

    import geowatch
    data_dvc_dpath = geowatch.find_dvc_dpath(tags='phase2_data', hardware='auto')
    dset = kwcoco.CocoDataset(data_dvc_dpath / 'Drop6-MeanYear10GSD/imganns-NZ_R001.kwcoco.zip')

    dset = kwcoco.CocoDataset('/home/joncrall/quicklinks/toothbrush_smart_expt_dvc/_debug/pred.kwcoco.zip')
    """

    import kwimage
    from geowatch.utils import util_gdal
    import numpy as np

    for video in coco_dset.videos().objs:

        video_box = kwimage.Box.from_dsize((video['width'], video['height']))
        print('video_box = {}'.format(ub.urepr(video_box, nl=1)))

        video_target_gsd = video['target_gsd']

        images = coco_dset.images(video_id=video['id'])

        for coco_img in images.coco_images:

            image_box = kwimage.Box.from_dsize((coco_img['width'], coco_img['height']))
            sensor = coco_img['sensor_coarse']

            image_summary = {
                'sensor': sensor,
                'video_box': video_box,
                'image_box': image_box,
            }
            warp_vid_from_img = coco_img.warp_vid_from_img
            scale_vid_from_img = warp_vid_from_img.decompose()['scale']
            recon_video_box = image_box.scale(scale_vid_from_img).quantize()
            image_summary['scale_vid_from_img'] = scale_vid_from_img
            image_summary['reconstructed_video_box'] = recon_video_box
            image_summary['img_gsd'] = video_target_gsd / (1 / np.mean(scale_vid_from_img))

            img = ub.udict(coco_img.img)
            img = img - {'has_predictions'}
            # print('coco_img.img = {}'.format(ub.urepr(img, nl=-1)))
            asset_summaries = []

            for asset in coco_img.assets:
                warp_img_from_asset = kwimage.Affine.coerce(asset['warp_aux_to_img'])
                scale_img_from_asset = warp_img_from_asset.decompose()['scale']

                warp_vid_from_asset = warp_vid_from_img @ warp_img_from_asset

                scale_vid_from_asset = warp_vid_from_asset.decompose()['scale']
                scale_asset_from_vid = np.mean(warp_vid_from_asset.inv().decompose()['scale'])

                asset_box = kwimage.Box.from_dsize((asset['width'], asset['height']))
                recon_img_box = asset_box.scale(scale_img_from_asset)
                recon_vid_box = asset_box.scale(scale_vid_from_asset)
                fpath = ub.Path(coco_img.bundle_dpath) / asset['file_name']
                gdal_dset = util_gdal.GdalOpen(fpath)
                asset_shape = (gdal_dset.RasterYSize, gdal_dset.RasterXSize, gdal_dset.RasterCount)
                info = gdal_dset.info()
                disk_gsd = np.mean(np.abs(kwimage.Affine.from_gdal(info['geoTransform']).decompose()['scale']))
                asset_summaries.append({
                    'video_box': video_box,
                    'image_box': image_box,
                    'asset_box': asset_box,
                    'asset_disk_shape': asset_shape,
                    'asset_disk_gsd': disk_gsd,

                    'asset_gsd': video_target_gsd / scale_asset_from_vid,

                    'scale_img_from_asset': scale_img_from_asset,
                    'scale_vid_from_asset': scale_vid_from_asset,
                    'reconstructed_img_box': recon_img_box.quantize(),
                    'reconstructed_vid_box': recon_vid_box.quantize(),
                })
                # assert len(info['bands']) == coco_img.channels.numel()

            image_summary['assets'] = asset_summaries
            print('image_summary = {}'.format(ub.urepr(image_summary, nl=3, sv=1)))




[docs]
def check_geo_transform_consistency(coco_dset):
    """
    Checks the consistency of transforms between world, video, image, and asset
    space in a coco dataset.

    Ignore:
        >>> from geowatch.utils.kwcoco_extensions import *  # NOQA
        >>> import kwcoco
        >>> #coco_dset = kwcoco.CocoDataset('/home/joncrall/remote/toothbrush/data/dvc-repos/smart_data_dvc-ssd/Drop6-MeanYear10GSD/imgonly-NZ_R001_v2.kwcoco.json')
        >>> coco_dset = kwcoco.CocoDataset('/home/joncrall/quicklinks/toothbrush_smart_data_dvc-ssd/Drop6/imgonly-NZ_R001.kwcoco.json')
        >>> coco_dset = kwcoco.CocoDataset('/home/joncrall/remote/toothbrush/data/dvc-repos/smart_data_dvc-ssd/Drop6-MeanYear10GSD/imganns-NZ_R001_v2.kwcoco.json')
        >>> populate_watch_fields(coco_dset, target_gsd=10, overwrite=True)
        >>> check_geo_transform_consistency(coco_dset)

        coco_dset = kwcoco.CocoDataset('/home/joncrall/quicklinks/toothbrush_smart_data_dvc-ssd/Drop6/imgonly-KR_R001.kwcoco.json')
        coco_dset = kwcoco.CocoDataset('/home/joncrall/quicklinks/toothbrush_smart_data_dvc-ssd/Drop6-MeanYear10GSD/imganns-KR_R001.kwcoco.zip')
    """

    def check_space_graph_concistency(graph_v3):
        errors = []

        # Test for consistency between video / image / asset spaces
        wld_boxes = graph_v3.nodes['wld']['boxes']

        spaces = ['asset', 'img', 'vid']
        space_crs_infos = ub.udict()
        for space in spaces:
            wld_crs_info = graph_v3.nodes[space].get('wld_crs_info', None)
            if wld_crs_info is not None:
                space_crs_infos[space] = wld_crs_info

        space_crs_infos_hash = space_crs_infos.map_values(ub.hash_data)
        if not ub.allsame(space_crs_infos_hash.values()):
            errors.append({
                'message': 'CRS is not the same in all spaces',
                'data': {
                    'space_crs_infos': space_crs_infos,
                    'space_crs_infos_hash': space_crs_infos_hash,
                }
            })

        import itertools as it
        ious = {}
        for s1, s2 in it.combinations(spaces, 2):
            if s1 in wld_boxes and s2 in wld_boxes:
                b1 = wld_boxes[s1]
                b2 = wld_boxes[s2]
                ious[(s1, s2)] = b1.iou(b2)

        if not all(v > 0.95 for v in ious.values()):
            errors.append({
                'message': 'World boxes do not line up',
                'data': {
                    'wld_boxes': wld_boxes,
                    'ious': ious,
                },
            })

        # Compute any remaining warps to complete the graph
        import operator as op
        from functools import reduce
        missing_edges = set(nx.transitive_closure(graph_v3).edges) - graph_v3.edges
        missing_edges = [(u, v) for u, v in missing_edges if u != v]
        for n1, n2 in missing_edges:
            path = nx.shortest_path(graph_v3, n1, n2)
            path_warps = []
            for edge in ub.iter_window(path, 2):
                warp = graph_v3.edges[edge]['warp']
                path_warps.append(warp)
            warp_n2_from_n1 = reduce(op.matmul, path_warps[::-1])
            graph_v3.add_edge(n1, n2, warp=warp_n2_from_n1)

        # For all paths in the graph, determine if the warps are
        # consistent
        pairs1 = list(it.combinations(graph_v3.nodes, 2))
        pairs2 = [e[::-1] for e in pairs1]
        pairs = pairs1 + pairs2

        for n1, n2 in pairs:
            all_paths = list(nx.all_simple_paths(graph_v3, n1, n2))
            for path in all_paths:
                path_warps = []
                for u, v in ub.iter_window(path, 2):
                    warp = graph_v3.edges[(u, v)]['warp']
                    path_warps.append(warp)
                warp_indirect = reduce(op.matmul, path_warps[::-1])
                warp_direct = graph_v3.edges[(n1, n2)]['warp']
                effective = warp_indirect.inv() @ warp_direct
                if not effective.isclose_identity(rtol=1e-3, atol=1e-3):
                    errors.append({
                        'message': 'Transforms are inconsistent',
                        'data': {
                            'nodes': (n1, n2),
                            'path': path,
                            'warp_indirect': warp_indirect.concise(),
                            'warp_direct': warp_direct.concise(),
                        },
                    })

        if 0:
            from cmd_queue.util.util_networkx import write_network_text
            write_network_text(graph_v3)

        if errors:
            import copy
            node_summaries = copy.deepcopy(dict(graph_v3.nodes))
            # print(ub.urepr(dict(graph_v3.nodes)))
            for key, val in node_summaries.items():
                if 'boxes' in val:
                    val['boxes'] = ub.udict(val['boxes']).map_values(lambda x: x.bounding_box().to_xywh().astype(float).data)
                if 'box' in val:
                    val['box'] = val['box'].bounding_box().to_xywh().data
                if 'corners_crs84' in val:
                    val['corners_crs84'] = val['corners_crs84'].bounding_box().to_xywh().data
                if 'wld_crs_info' in val:
                    val['wld_crs_info'] = val['wld_crs_info']['auth'][1]

                val['warp_to'] = {}
                for other, data in sorted(graph_v3.adj[key].items()):
                    val['warp_to'][other] = ub.urepr(data['warp'].concise(), precision=2, nl=0)
                    ...
            print('node_summaries = {}'.format(ub.urepr(node_summaries, nl=True, precision=2)))

        return errors

    error_ids = set()
    seen_ids = set()

    all_errors = {}

    from kwutil import util_progress
    pman = util_progress.ProgressManager()
    with pman:
        # Create graphs with relationships between videos / images / assets
        import networkx as nx
        for video_id in pman.progiter(coco_dset.videos(), desc='check video consistency'):

            video = coco_dset.index.videos[video_id]

            # vid_valid_crs84 = kwimage.MultiPolygon.coerce(video['valid_region_geos'])
            # vid_valid_vidspace = kwimage.MultiPolygon.coerce(video['valid_region'])
            vid_box_vidspace = kwimage.Box.from_dsize((video['width'], video['height'])).to_polygon()
            warp_vid_from_wld = kwimage.Affine.coerce(video['warp_wld_to_vid'])

            graph_v1 = nx.DiGraph()
            graph_v1.add_node('wld')
            graph_v1.add_node('vid')

            # Add a node with video properties
            graph_v1.nodes['vid']['box'] = vid_box_vidspace
            # graph_v1.nodes['vid']['valid_region'] = vid_valid_vidspace
            graph_v1.nodes['vid']['wld_crs_info'] = video['wld_crs_info']

            # Add relationship between video and world space
            graph_v1.add_edge('wld', 'vid', warp=warp_vid_from_wld)
            graph_v1.add_edge('vid', 'wld', warp=warp_vid_from_wld.inv())

            # Warp the video box into world space
            graph_v1.nodes['wld']['boxes'] = {}
            graph_v1.nodes['wld']['boxes']['vid'] = graph_v1.nodes['vid']['box'].warp(graph_v1.edges[('vid', 'wld')]['warp'])

            for gid in pman.progiter(coco_dset.images(), desc='check image consistency'):

                coco_img = coco_dset.coco_image(gid)
                img_corners_crs84 = kwimage.Polygon.coerce(coco_img['geos_corners'])
                img_box_imgspace = kwimage.Box.from_dsize((coco_img['width'], coco_img['height'])).to_polygon()
                warp_vid_from_img = kwimage.Affine.coerce(coco_img['warp_img_to_vid'])

                if coco_img.get('wld_to_pxl', None) is not None:
                    warp_img_from_wld = kwimage.Affine.coerce(coco_img['wld_to_pxl'])
                else:
                    warp_img_from_wld = None

                # Create a copy of the existing video graph for this image
                graph_v2 = graph_v1.copy()

                # Create a node with image properties
                graph_v2.add_node('img')
                graph_v2.nodes['img']['box'] = img_box_imgspace
                graph_v2.nodes['img']['corners_crs84'] = img_corners_crs84

                # Add relationship between image and video / world space
                graph_v2.add_edge('img', 'vid', warp=warp_vid_from_img)
                graph_v2.add_edge('vid', 'img', warp=warp_vid_from_img.inv())

                if warp_img_from_wld is not None:
                    graph_v2.add_edge('wld', 'img', warp=warp_img_from_wld)
                    graph_v2.add_edge('img', 'wld', warp=warp_img_from_wld.inv())
                    # Warp the image box into world space.
                    graph_v2.nodes['wld']['boxes']['img'] = graph_v2.nodes['img']['box'].warp(graph_v2.edges[('img', 'wld')]['warp'])
                    graph_v2.nodes['img']['wld_crs_info'] = coco_img['wld_crs_info']

                for asset in coco_img.assets:

                    channels = asset['channels']

                    asset_box_assetspace = kwimage.Box.from_dsize((asset['width'], asset['height'])).to_polygon()
                    asset_corners_crs84 = kwimage.Polygon.coerce(asset['geos_corners'])

                    warp_wld_from_asset = kwimage.Affine.coerce(asset['warp_to_wld'])
                    warp_img_from_asset = kwimage.Affine.coerce(asset['warp_aux_to_img'])

                    # Create a copy of the existing image graph for this asset
                    graph_v3 = graph_v2.copy()

                    # Create a node with asset properties
                    graph_v3.add_node('asset')
                    graph_v3.nodes['asset']['box'] = asset_box_assetspace
                    graph_v3.nodes['asset']['wld_crs_info'] = asset['wld_crs_info']
                    graph_v3.nodes['asset']['corners_crs84'] = asset_corners_crs84

                    graph_v3.add_edge('asset', 'wld', warp=warp_wld_from_asset)
                    graph_v3.add_edge('asset', 'img', warp=warp_img_from_asset)
                    graph_v3.add_edge('wld', 'asset', warp=warp_wld_from_asset.inv())
                    graph_v3.add_edge('img', 'asset', warp=warp_img_from_asset.inv())

                    # Warp the asset box into world space
                    graph_v3.nodes['wld']['boxes']['asset'] = graph_v3.nodes['asset']['box'].warp(graph_v3.edges[('asset', 'wld')]['warp'])

                    ########
                    # Checks
                    ########
                    errors = check_space_graph_concistency(graph_v3)
                    asset_id = (gid, channels)
                    if errors:
                        print(f'asset_id={asset_id}')
                        print('errors = {}'.format(ub.urepr(errors, nl=True)))
                    seen_ids.add(asset_id)
                    if errors:
                        all_errors[asset_id] = errors
                        error_ids.add(asset_id)

                    # print(ub.urepr(dict(graph_v3.nodes)))
                    # for space1
    ok_ids = seen_ids - error_ids
    print(f'{len(ok_ids)} / {len(seen_ids)} passed')




[docs]
def check_unique_channel_names(coco_dset, gids=None, verbose=0):
    """
    Check each image has unique channel names

    TODO:
        - [ ] move to kwcoco proper

    Example:
        >>> from geowatch.utils.kwcoco_extensions import *  # NOQA
        >>> import kwcoco
        >>> # TODO: make a demo dataset with some sort of gsd metadata
        >>> coco_dset = kwcoco.CocoDataset.demo('vidshapes8-multispectral')
        >>> check_unique_channel_names(coco_dset)
        >>> # Make some duplicate channels to test
        >>> obj = coco_dset.images().objs[0]
        >>> obj['auxiliary'][0]['channels'] = 'B1|B1'
        >>> obj = coco_dset.images().objs[1]
        >>> obj['auxiliary'][0]['channels'] = 'B1|B1'
        >>> obj = coco_dset.images().objs[2]
        >>> obj['auxiliary'][1]['channels'] = 'B1'
        >>> import pytest
        >>> with pytest.raises(AssertionError):
        >>>     check_unique_channel_names(coco_dset)

    """
    images = coco_dset.images(gids=gids)
    errors = []
    for img in images.objs:
        coco_img = coco_dset.coco_image(img['id'])
        try:
            _check_unique_channel_names_in_image(coco_img)
        except AssertionError as ex:
            if verbose:
                print('ERROR: ex = {}'.format(ub.urepr(ex, nl=1)))
            errors.append(ex)

    if errors:
        error_summary = ub.dict_hist(map(str, errors))
        raise AssertionError(ub.urepr(error_summary))



def _check_unique_channel_names_in_image(coco_img):
    import kwcoco
    seen = set()
    for obj in coco_img.iter_asset_objs():
        chans = kwcoco.FusedChannelSpec.coerce(obj['channels'])
        chan_list : list = chans.normalize().parsed
        intra_aux_duplicate = ub.find_duplicates(chan_list)
        if intra_aux_duplicate:
            raise AssertionError(
                'Image has internal duplicate bands: {}'.format(
                    intra_aux_duplicate))

        inter_aux_duplicates = seen & set(chan_list)
        if inter_aux_duplicates:
            raise AssertionError(
                'Image has inter-auxiliary duplicate bands: {}'.format(
                    inter_aux_duplicates))



[docs]
def coco_list_asset_infos(coco_dset):
    """
    Get a list of filename and channels for each coco image
    """
    asset_infos = []
    for gid in coco_dset.images():
        coco_img = coco_dset.coco_image(gid)
        asset_objs = list(coco_img.iter_asset_objs())
        for _asset_idx, obj in enumerate(asset_objs):
            fname = obj.get('file_name', None)
            if fname is not None:
                fpath = join(coco_img.dset.bundle_dpath, fname)
                file_info = {
                    'fpath': fpath,
                    'channels': obj['channels'],
                }
                asset_infos.append(file_info)
    return asset_infos




[docs]
def check_geotiff_formats(coco_dset):
    # Enumerate assests on disk
    infos = []
    asset_infos = coco_list_asset_infos(coco_dset)
    for file_info in ub.ProgIter(asset_infos):
        fpath = file_info['fpath']
        info = geotiff_format_info(fpath)
        info.update(file_info)
        infos.append(info)

    ub.varied_values([ub.dict_diff(d, {'fpath', 'filelist'}) for d in infos])




[docs]
def rewrite_geotiffs(coco_dset):
    import tempfile
    import pathlib
    blocksize = 96
    compress = 'NONE'
    asset_infos = coco_list_asset_infos(coco_dset)

    for file_info in ub.ProgIter(asset_infos):
        fpath = file_info['fpath']
        if fpath.endswith(kwimage.im_io.JPG_EXTENSIONS):
            print('Skipping jpeg')
            # dont touch jpegs
            continue

        orig_fpath = pathlib.Path(fpath)

        info = geotiff_format_info(fpath)
        if (info['blocksize'][0] != blocksize or info['compress'] != compress) or True:
            tmpdir = orig_fpath.parent / '.tmp_gdal_workspace'
            tmpdir.mkdir(exist_ok=True, parents=True)
            workdir = tmpdir / 'work'
            bakdir = tmpdir / 'backup_v2'
            workdir.mkdir(exist_ok=True)
            bakdir.mkdir(exist_ok=True)

            tmpfile = tempfile.NamedTemporaryFile(suffix=orig_fpath.name, dir=workdir, delete=False)
            tmp_fpath = tmpfile.name

            options = [
                '-co BLOCKSIZE={}'.format(blocksize),
                '-co COMPRESS={}'.format(compress),
                '-of COG',
                '-overwrite',
            ]
            if not info['has_geotransform']:
                options += [
                    '-to SRC_METHOD=NO_GEOTRANSFORM'
                ]
            options += [
                fpath,
                tmp_fpath,
            ]
            command = 'gdalwarp ' + ' '.join(options)
            cmdinfo = ub.cmd(command)
            if cmdinfo['ret'] != 0:
                print('cmdinfo = {}'.format(ub.urepr(cmdinfo, nl=1)))
                raise Exception('Command Errored')

            # Backup the original file
            import shutil
            shutil.move(fpath, bakdir)

            # Move the rewritten file into its place
            shutil.move(tmp_fpath, fpath)


            # info2 = geotiff_format_info(tmp_fpath)



[docs]
def geotiff_format_info(fpath):
    from osgeo import gdal
    gdal_ds = gdal.Open(fpath, gdal.GA_ReadOnly)
    filelist = gdal_ds.GetFileList()

    aff_wld_crs = gdal_ds.GetSpatialRef()
    has_geotransform = aff_wld_crs is not None

    filename = gdal_ds.GetDescription()
    main_band = gdal_ds.GetRasterBand(1)
    block_size = main_band.GetBlockSize()

    num_bands = gdal_ds.RasterCount
    width = gdal_ds.RasterXSize
    height = gdal_ds.RasterYSize

    ovr_count = main_band.GetOverviewCount()
    ifd_offset = main_band.GetMetadataItem('IFD_OFFSET', 'TIFF')
    if ifd_offset is not None:
        ifd_offset = int(ifd_offset)
    block_offset = main_band.GetMetadataItem('BLOCK_OFFSET_0_0', 'TIFF')
    structure = gdal_ds.GetMetadata("IMAGE_STRUCTURE")
    compress = structure.get("COMPRESSION", 'NONE')
    interleave = structure.get("INTERLEAVE", None)

    has_external_overview = (filename + '.ovr' in filelist)

    format_info = {
        'fpath': fpath,
        'filelist': filelist,
        'blocksize': block_size,
        'ovr_count': ovr_count,
        'ifd_offset': ifd_offset,
        'block_offset': block_offset,
        'compress': compress,
        'interleave': interleave,
        'has_external_overview': has_external_overview,
        'num_bands': num_bands,
        'has_geotransform': has_geotransform,
        'width': width,
        'height': height,
    }
    return format_info




[docs]
def ensure_transfered_geo_data(coco_dset, gids=None):
    for gid in ub.ProgIter(coco_dset.images(gids), desc='transfer metadata'):
        transfer_geo_metadata(coco_dset, gid)




[docs]
@profile
def transfer_geo_metadata(coco_dset, gid):
    """
    Transfer geo-metadata from source geotiffs to predicted feature images

    THIS FUNCITON MODIFIES THE IMAGE DATA ON DISK! BE CAREFUL!

    ASSUMES THAT EVERYTHING IS ALREADY ALIGNED

    Example:
        # xdoctest: +REQUIRES(env:DVC_DPATH)
        from geowatch.utils.kwcoco_extensions import *  # NOQA
        from geowatch.utils.util_data import find_dvc_dpath
        import kwcoco
        dvc_dpath = find_dvc_dpath()
        coco_fpath = dvc_dpath / 'drop1-S2-L8-aligned/combo_data.kwcoco.json'
        coco_dset = kwcoco.CocoDataset(coco_fpath)
        gid = coco_dset.images().peek()['id']

    Example:
        >>> from geowatch.utils.kwcoco_extensions import *  # NOQA
        >>> import kwcoco
        >>> from geowatch.demo.smart_kwcoco_demodata import hack_seed_geometadata_in_dset
        >>> coco_dset = kwcoco.CocoDataset.demo('vidshapes8-multispectral')
        >>> hack_seed_geometadata_in_dset(coco_dset, force=True, rng=0)
        >>> gid = 2
        >>> transfer_geo_metadata(coco_dset, gid)
        >>> fpath = join(coco_dset.bundle_dpath, coco_dset.coco_image(gid).primary_asset()['file_name'])
        >>> _ = ub.cmd('gdalinfo ' + fpath, verbose=1)
    """
    import geowatch
    from osgeo import gdal
    coco_img = coco_dset.coco_image(gid)

    assets_with_geo_info = {}
    assets_without_geo_info = {}

    asset_objs = list(coco_img.iter_asset_objs())
    for asset_idx, obj in enumerate(asset_objs):
        fname = obj.get('file_name', None)
        if fname is not None:
            fpath = join(coco_img.dset.bundle_dpath, fname)
            try:
                info = geowatch.gis.geotiff.geotiff_metadata(fpath)
                if info.get('crs_error', None) is not None:
                    raise Exception
            except Exception:
                assets_without_geo_info[asset_idx] = obj
            else:
                assets_with_geo_info[asset_idx] = (obj, info)

    warp_vid_from_geoimg = kwimage.Affine.eye()

    if assets_without_geo_info:
        if not assets_with_geo_info:
            class Found(Exception):
                pass
            try:
                # If an asset in our local image has no data, we can
                # check to see if anyone in the vide has data.
                # Check if anything in the video has geo-data
                video_id = coco_img.img['video_id']
                for other_gid in coco_dset.images(video_id=video_id):
                    if other_gid != gid:
                        other_coco_img = coco_dset.coco_image(other_gid)
                        for obj in other_coco_img.iter_asset_objs():
                            fname = obj.get('file_name', None)
                            if fname is not None:
                                fpath = join(coco_img.dset.bundle_dpath, fname)
                                try:
                                    # Try until we find an image with real CRS info
                                    info = geowatch.gis.geotiff.geotiff_metadata(fpath)
                                    if info.get('crs_error', None) is not None:
                                        raise Exception
                                except Exception:
                                    continue
                                else:
                                    raise Found
            except Found:
                assets_with_geo_info[-1] = (obj, info)
                warp_vid_from_geoimg = kwimage.Affine.coerce(other_coco_img.img['warp_img_to_vid'])
            else:
                raise exceptions.GeoMetadataNotFound(ub.paragraph(
                    '''
                    There are images without geo data, but no other data within
                    this image has transferable geo-data
                    '''))

        # Choose an object to register to (not sure if it matters which one)
        # choose arbitrary one for now.
        geo_asset_idx, (geo_obj, geo_info) = ub.peek(assets_with_geo_info.items())

        if geo_info['is_rpc']:
            raise NotImplementedError(
                'Not sure how to do this if the target has RPC information')

        warp_geoimg_from_geoaux = kwimage.Affine.coerce(
            geo_obj.get('warp_aux_to_img', None))
        warp_wld_from_geoaux = kwimage.Affine.coerce(geo_info['pxl_to_wld'])

        georef_crs_info = geo_info['wld_crs_info']
        georef_crs = georef_crs_info['type']

        img = coco_img.img
        warp_vid_from_img = kwimage.Affine.coerce(img['warp_img_to_vid'])

        # In case our reference is from another frame in the video
        warp_geoimg_from_vid = warp_vid_from_geoimg.inv()
        warp_geoaux_from_geoimg = warp_geoimg_from_geoaux.inv()
        warp_wld_from_img = (
            warp_wld_from_geoaux @
            warp_geoaux_from_geoimg @
            warp_geoimg_from_vid @
            warp_vid_from_img)

        for _asset_idx, obj in assets_without_geo_info.items():
            fname = obj.get('file_name', None)
            fpath = join(coco_img.dset.bundle_dpath, fname)

            warp_img_from_aux = kwimage.Affine.coerce(
                obj.get('warp_aux_to_img', None))

            warp_wld_from_aux = (
                warp_wld_from_img @ warp_img_from_aux)

            # Convert to gdal-style
            aff_geo_transform = warp_wld_from_aux.to_gdal()

            dst_ds = gdal.Open(fpath, gdal.GA_Update)
            if dst_ds is None:
                raise exceptions.GeoMetadataNotFound('error handling gdal')
            ret = dst_ds.SetGeoTransform(aff_geo_transform)
            if ret != 0:
                raise AssertionError(f'failed to set SetGeoTransform on {fpath}')
            ret = dst_ds.SetSpatialRef(georef_crs)
            if ret != 0:
                raise AssertionError(f'failed to set SetSpatialRef on {fpath}')
            dst_ds.FlushCache()
            dst_ds = None

        # Matt's transfer metadata code
        """
        geo_ds = gdal.Open(toafile)
        if geo_ds is None:
            log.error('Could not open image')
            sys.exit(1)
        transform = geo_ds.GetGeoTransform()
        proj = geo_ds.GetProjection()
        dst_ds = gdal.Open(boafile, gdal.GA_Update)
        dst_ds.SetGeoTransform(transform)
        dst_ds.SetProjection(proj)
        geo_ds, dst_ds = None, None
        """



def _search_video_for_other_geo_assets(coco_img, coco_dset):
    ### TODO: make use of me as a fallback in the transfer_geo_metadata2
    class Found(Exception):
        pass
    import geowatch
    asset_with_geo_info = None
    gid = coco_img.img['id']
    try:
        # If an asset in our local image has no data, we can
        # check to see if anyone in the vide has data.
        # Check if anything in the video has geo-data
        video_id = coco_img.img['video_id']
        for other_gid in coco_dset.images(video_id=video_id):
            if other_gid != gid:
                other_coco_img = coco_dset.coco_image(other_gid)
                for obj in other_coco_img.iter_asset_objs():
                    fname = obj.get('file_name', None)
                    if fname is not None:
                        fpath = join(coco_img.dset.bundle_dpath, fname)
                        try:
                            # Try until we find an image with real CRS info
                            info = geowatch.gis.geotiff.geotiff_metadata(fpath)
                            if info.get('crs_error', None) is not None:
                                raise Exception
                        except Exception:
                            continue
                        else:
                            raise Found
    except Found:
        warp_vid_from_geoimg = kwimage.Affine.coerce(other_coco_img.img['warp_img_to_vid'])
        asset_with_geo_info = (obj, info, warp_vid_from_geoimg)
    else:
        raise exceptions.GeoMetadataNotFound(ub.paragraph(
            '''
            There are images without geo data, but no other data within
            this image has transferable geo-data
            '''))
    return asset_with_geo_info


def _find_geotiffs_without_metadata(coco_img):
    pass



[docs]
@profile
def transfer_geo_metadata2(coco_img, dry=0):
    """
    Second version of this function to work in process mode

    Transfer geo-metadata from source geotiffs to predicted feature images

    THIS FUNCITON MODIFIES THE IMAGE DATA ON DISK! BE CAREFUL!

    ASSUMES THAT EVERYTHING IS ALREADY ALIGNED
    """
    import geowatch
    # from osgeo import gdal
    import affine
    from os.path import exists

    assets_with_geo_info = {}
    assets_without_geo_info = {}

    asset_objs = list(coco_img.iter_asset_objs())
    for asset_idx, obj in enumerate(asset_objs):
        fname = obj.get('file_name', None)
        if fname is not None:
            fpath = join(coco_img.bundle_dpath, fname)
            if exists(fpath):
                try:
                    info = geowatch.gis.geotiff.geotiff_metadata(fpath)
                    if info.get('crs_error', None) is not None:
                        raise Exception
                except Exception:
                    assets_without_geo_info[asset_idx] = obj
                else:
                    assets_with_geo_info[asset_idx] = (obj, info)

    warp_vid_from_geoimg = kwimage.Affine.eye()

    assert dry == 0, 'cant work in dry mode yet'

    actions = []
    if assets_without_geo_info:
        if not assets_with_geo_info:
            raise exceptions.GeoMetadataNotFound('No other asset in this image has geo metadata')

        # Choose an object to register to (not sure if it matters which one)
        # choose arbitrary one for now.
        geo_asset_idx, (geo_obj, geo_info) = ub.peek(assets_with_geo_info.items())

        if geo_info['is_rpc']:
            raise NotImplementedError(
                'Not sure how to do this if the target has RPC information')

        warp_geoimg_from_geoaux = kwimage.Affine.coerce(
            geo_obj.get('warp_aux_to_img', None))
        warp_wld_from_geoaux = kwimage.Affine.coerce(geo_info['pxl_to_wld'])

        georef_crs_info = geo_info['wld_crs_info']
        georef_crs = georef_crs_info['type']

        img = coco_img.img
        warp_vid_from_img = kwimage.Affine.coerce(img['warp_img_to_vid'])

        # In case our reference is from another frame in the video
        warp_geoimg_from_vid = warp_vid_from_geoimg.inv()
        warp_geoaux_from_geoimg = warp_geoimg_from_geoaux.inv()
        warp_wld_from_img = (
            warp_wld_from_geoaux @
            warp_geoaux_from_geoimg @
            warp_geoimg_from_vid @
            warp_vid_from_img)

        for _asset_idx, obj in assets_without_geo_info.items():
            fname = obj.get('file_name', None)
            fpath = join(coco_img.bundle_dpath, fname)

            warp_img_from_aux = kwimage.Affine.coerce(
                obj.get('warp_aux_to_img', None))

            warp_wld_from_aux = (
                warp_wld_from_img @ warp_img_from_aux)

            # Convert to gdal-style
            a, b, c, d, e, f = warp_wld_from_aux.matrix.ravel()[0:6]
            aff = affine.Affine(a, b, c, d, e, f)
            aff_geo_transform = aff.to_gdal()

            task = {
                'fpath': fpath,
                'aff_geo_transform': aff_geo_transform,
                # TODO: can't pickle the srs.
                # need to figure out how to do this.
                'georef_crs': georef_crs,
            }
            actions.append({
                'fpath': fpath,
            })
            if not dry:
                _execute_transfer_task(task)
    # Just return what we did
    return actions



def _execute_transfer_task(task):
    fpath = task['fpath']
    aff_geo_transform = task['aff_geo_transform']
    georef_crs = task['georef_crs']
    from osgeo import gdal
    dst_ds = gdal.Open(fpath, gdal.GA_Update)
    if dst_ds is None:
        raise Exception('error handling gdal')
    ret = dst_ds.SetGeoTransform(aff_geo_transform)
    assert ret == 0
    ret = dst_ds.SetSpatialRef(georef_crs)
    assert ret == 0
    dst_ds.FlushCache()
    dst_ds = None


# unused
def _make_coco_img_from_geotiff(tiff_fpath, name=None):
    """
    Example:
        >>> from geowatch.demo.landsat_demodata import grab_landsat_product  # NOQA
        >>> product = grab_landsat_product()
        >>> tiffs = product['bands'] + [product['meta']['bqa']]
        >>> tiff_fpath = product['bands'][0]
        >>> name = None
        >>> img = _make_coco_img_from_geotiff(tiff_fpath)
        >>> print('img = {}'.format(ub.urepr(img, nl=1)))
    """
    obj = {}
    if name is not None:
        obj['name'] = name

    bundle_dpath = '.'
    obj = {
        'file_name': tiff_fpath
    }
    _populate_canvas_obj(bundle_dpath, obj)
    return obj


def _sensor_channel_hueristic(sensor_coarse, num_bands):
    """
    Given a sensor and the number of bands in the image, return likely channel
    codes for the image

    Note these are "pseudo-harmonized" by common_name, but not harmonized
    that is, one sensor's 'red' is roughly similar to another's but not corrected to match.
    Bands without a common_name will have a sensor-unique prefix appended to prevent this behavior.
    """
    from geowatch.utils.util_bands import WORLDVIEW2_PAN, WORLDVIEW2_MS4, WORLDVIEW2_MS8, SENTINEL2, LANDSAT8, LANDSAT7  # NOQA

    def code(bands, prefix):
        names = []
        for band_dict in bands:
            if 'common_name' in band_dict:
                names.append(band_dict['common_name'])
            else:
                names.append(prefix + band_dict['name'])
        return '|'.join(names)

    err = 0
    if sensor_coarse == 'WV':
        if num_bands == 1:
            channels = 'panchromatic'
        elif num_bands == 3:
            channels = 'r|g|b'
        elif num_bands == 4:
            channels = code(WORLDVIEW2_MS4, 'w')
        elif num_bands == 8:
            channels = code(WORLDVIEW2_MS8, 'w')
            #channels = 'wv1|wv2|wv3|wv4|wv5|wv6|wv7|wv8'
            # channels = 'cb|b|g|y|r|wv6|wv7|wv8'
        else:
            err = 1
    elif sensor_coarse == 'S2':
        if num_bands == 1:
            channels = 'gray'
        elif num_bands == 3:
            channels = 'r|g|b'
        elif num_bands == 13:
            channels = code(SENTINEL2, 's')
            # channels = 's1|s2|s3|s4|s4|s6|s7|s8|s8a|s9|s10|s11|s12'
            # channels = 'cb|b|g|r|s4|s6|s7|s8|s8a|s9|s10|s11|s12'
        else:
            err = 1
    elif sensor_coarse in {'LC', 'L8', 'LS'}:
        if num_bands == 1:
            channels = 'panchromatic'
        elif num_bands == 3:
            channels = 'r|g|b'
        elif num_bands == 11:
            channels = code(LANDSAT8, 'l8')
            # channels = 'lc1|lc2|lc3|lc4|lc5|lc6|lc7|lc8|lc9|lc10|lc11'
            # channels = 'cb|b|g|r|lc5|lc6|lc7|pan|lc9|lc10|lc11'
        else:
            err = 1
    elif sensor_coarse in {'LE', 'L7'}:
        if num_bands == 1:
            channels = 'panchromatic'
        elif num_bands == 3:
            channels = 'r|g|b'
        elif num_bands == 8:
            channels = code(LANDSAT7, 'l7')
        else:
            err = 1
    else:
        err = 1
    if err:
        msg = f'sensor_coarse={sensor_coarse}, num_bands={num_bands}'
        print('ERROR: mgs = {!r}'.format(msg))
        raise NotImplementedError(msg)
    return channels


def _introspect_num_bands(fpath):
    from geowatch.utils import util_kwimage
    shape = util_kwimage.load_image_shape(fpath, backend=['gdal', 'pil'])
    if len(shape) == 1:
        return 1
    elif len(shape) == 3:
        return shape[2]
    else:
        raise Exception(f'unknown format, fpath={fpath}, shape={shape}')


def _num_band_hueristic(num_bands):
    if num_bands == 1:
        channels = 'gray'
    elif num_bands == 3:
        channels = 'r|g|b'
    elif num_bands == 4:
        channels = 'r|g|b|a'
    else:
        raise Exception(f'num_bands={num_bands}')
    return channels


@profile
def _recompute_auxiliary_transforms(img):
    """
    Uses geotiff info to repopulate metadata
    """
    import kwimage
    from kwcoco.coco_image import CocoImage
    coco_img = CocoImage(img)
    base = coco_img.primary_asset(requires=['warp_to_wld'])
    if base is None:
        import warnings
        warnings.warn(
            'Cannot recompute auxiliary/asset transforms if no asset has a '
            ' warp_to_wld attribute')
        return
    try:
        warp_wld_from_img = kwimage.Affine.coerce(base['warp_to_wld'])
    except Exception:
        print('img = {}'.format(ub.urepr(img, nl=2)))
        raise

    warp_img_from_wld = warp_wld_from_img.inv()
    img.update(ub.dict_isect(base, {
        'geos_corners',
        'wld_crs_info',
        'width', 'height',
        'wgs84_to_wld',
        'wld_to_pxl',
    }))
    if 'width' not in img and 'height' not in img:
        if 'width' in base and 'height' in base:
            img['width'] = base['width']
            img['height'] = base['height']

    for asset in coco_img.assets:
        if 'warp_to_wld' in asset:
            warp_wld_from_asset = kwimage.Affine.coerce(asset['warp_to_wld'])
            warp_img_from_asset = warp_img_from_wld @ warp_wld_from_asset
            asset['warp_aux_to_img'] = warp_img_from_asset.concise()
        else:
            import warnings
            warnings.warn(
                'An asset could not have its warp_aux_to_img transform updated '
                'because it does not have a warp_to_wld attribute')



[docs]
def coco_channel_stats(coco_dset):
    """
    Return information about what channels are available in the dataset

    Example:
        >>> import kwcoco
        >>> import ubelt as ub
        >>> import geowatch
        >>> coco_dset = geowatch.coerce_kwcoco('vidshapes-geowatch')
        >>> from geowatch.utils import kwcoco_extensions
        >>> info = kwcoco_extensions.coco_channel_stats(coco_dset)
        >>> print(ub.urepr(info, nl=3))
    """
    import kwcoco
    from kwcoco.coco_image import CocoImage

    sensor_hist = ub.ddict(lambda: 0)
    chan_hist = ub.ddict(lambda: 0)
    single_chan_hist = ub.ddict(lambda: 0)
    sensorchan_hist = ub.ddict(lambda: ub.ddict(lambda: 0))
    sensorchan_hist2 = ub.ddict(lambda: 0)
    for _gid, img in coco_dset.index.imgs.items():
        coco_img: CocoImage = coco_dset.coco_image(_gid)
        channels = []
        for obj in coco_img.iter_asset_objs():
            # TODO: perhaps replace unknown-chan with a "*"?
            channels.append(obj.get('channels', 'unknown-chan'))
        channels = sorted(channels)
        chan = ','.join(channels)
        sensor = img.get('sensor_coarse', '*')
        chan_hist[chan] += 1
        sensor_hist[sensor] += 1
        sensorchan_hist[sensor][chan] += 1
        # TODO: replace other usages with sensorchan_hist2 then remove other
        # uses, and rename sensorchan_hist2 to sensorchan_hist
        sensorchan = f'{sensor}:({chan})'
        sensorchan_hist2[sensorchan] += 1

        for single_chan in kwcoco.ChannelSpec(chan).unique():
            single_chan_hist[single_chan] += 1

    CS = kwcoco.ChannelSpec
    FS = kwcoco.FusedChannelSpec
    osets = [CS.coerce(c).fuse().to_oset() for c in chan_hist]
    if len(osets) == 0:
        common_channels = FS.coerce([])
        all_channels = FS.coerce([])
        all_sensorchan = kwcoco.SensorChanSpec.coerce('')
    else:
        common_channels = FS.coerce(list(ub.oset.intersection(*osets))).concise()
        all_channels = FS.coerce(list(ub.oset.union(*osets))).concise()
        all_sensorchan = kwcoco.SensorChanSpec.late_fuse(*[
            kwcoco.SensorChanSpec.coerce(s)
            for s in sensorchan_hist2.keys()]).concise()

    info = {
        'single_chan_hist': single_chan_hist,
        'chan_hist': chan_hist,
        'sensor_hist': sensor_hist,
        'sensorchan_hist': sensorchan_hist,
        'sensorchan_hist2': sensorchan_hist2,
        'common_channels': common_channels,
        'all_channels': all_channels,
        'all_sensorchan': all_sensorchan,
    }
    return info




[docs]
class TrackidGenerator:
    """
    Keep track of which trackids have been used and generate new ones on demand

    TODO merge this into kwcoco as something like CocoDataset.next_trackid()?
    Or expose whatever mechanism is already generating new aids, gids, etc
    """


[docs]
    def update_generator(self):
        if self.dset is None:
            raise Exception('cannot update unconnected generator. Manually exclude trackids')
        self.used_trackids.update(self.dset.index.trackid_to_aids.keys())
        new_generator = filter(lambda x: x not in self.used_trackids,
                               itertools.count(start=next(self.generator)))
        self.generator = new_generator



[docs]
    def exclude_trackids(self, trackids):
        if 0 and self.used_trackids.intersection(trackids):
            print(f'warning: CocoDataset {self.dset.tag} with trackids '
                  f'{self.used_trackids} already has trackids in {trackids}')
        self.used_trackids.update(trackids)


    def __init__(self, coco_dset=None):
        self.dset = coco_dset
        self.used_trackids = set()
        self.generator = itertools.count(start=1)
        if coco_dset is not None:
            self.update_generator()

    def __next__(self):
        next_id = next(self.generator)
        while next_id in self.used_trackids:
            next_id = next(self.generator)
        return next_id




[docs]
def coco_img_wld_info(coco_img):
    """
    TODO: candidate for kwcoco.CocoImage method
    """
    from geowatch.utils import util_gis
    asset = coco_img.primary_asset(requires=['geos_corners'])
    if asset is None:
        raise KeyError(f'Geo-referenced asset not found for {coco_img}')
    if 'wld_to_pxl' in asset:
        warp_aux_from_wld = kwimage.Affine.coerce(asset['wld_to_pxl'])
    elif 'warp_to_wld' in asset:
        warp_aux_from_wld = kwimage.Affine.coerce(asset['warp_to_wld']).inv()
    else:
        print('asset = {}'.format(ub.urepr(asset, nl=1)))
        raise KeyError('Asset has no known transform to world coordinates')

    if 'wld_crs_info' in asset:
        wld_crs_info = asset['wld_crs_info']
    elif 'wld_crs_info' in coco_img.img:
        wld_crs_info = coco_img.img['wld_crs_info']
    else:
        print('asset = {}'.format(ub.urepr(asset, nl=1)))
        raise KeyError('Asset / image has no registered world CRS info')
    # TODO: traditional / authority check? Or assume traditional?
    # img['wld_crs_info']['axis_mapping'] == 'OAMS_TRADITIONAL_GIS_ORDER'
    wld_crs = util_gis.coerce_crs(wld_crs_info)

    warp_img_from_aux = kwimage.Affine.coerce(asset.get('warp_aux_to_img', None))
    warp_img_from_wld = warp_img_from_aux @ warp_aux_from_wld
    return warp_img_from_wld, wld_crs




[docs]
def warp_annot_segmentations_from_geos(coco_dset):
    """
    Uses the segmentation_geos property (which should be crs84) and warps it
    into image space based on available geo data.

    Args:
        coco_dset (kwcoco.CocoDataset):
            a CocoDataset where annotations contain a "segmentation_geos"
            attribute. The "segmentation" attribute will be modified in-place.

    Ignore:
        # xdoctest: +REQUIRES(env:DVC_DPATH)
        from geowatch.utils.kwcoco_extensions import *  # NOQA
        import geowatch
        dvc_dpath = geowatch.find_dvc_dpath(tags='phase2_data', hardware='auto')
        coco_fpath = (dvc_dpath / 'Drop6') / 'imganns-KR_R001.kwcoco.json'
        import kwcoco
        coco_dset = kwcoco.CocoDataset(coco_fpath)

    Example:
        >>> from geowatch.utils.kwcoco_extensions import *  # NOQA
        >>> import geowatch
        >>> # creating demodata also uses warp_annot_segmentations_to_geos
        >>> orig_dset = geowatch.coerce_kwcoco('geowatch-msi', geodata=True)
        >>> coco_dset = orig_dset.copy()
        >>> for ann in coco_dset.annots().objs:
        ...     ann.pop('segmentation', None)
        >>> warp_annot_segmentations_from_geos(coco_dset)
        >>> errors = []
        >>> for aid in coco_dset.annots():
        >>>     ann1 = orig_dset.index.anns[aid]
        >>>     ann2 = coco_dset.index.anns[aid]
        >>>     poly1 = kwimage.MultiPolygon.coerce(ann1['segmentation'])
        >>>     poly2 = kwimage.MultiPolygon.coerce(ann2['segmentation'])
        >>>     worked = (poly1.is_invalid() and poly2.is_invalid()) or poly1.iou(poly2) > 0.99
        >>>     errors.append(not worked)
        >>> if sum(errors) > 0:
        >>>     # FIXME: THERE SHOULD BE NO ERRORS HERE. PUNTING TO MAKE
        >>>     # THE DASHBOARDS GREEN, BUT THIS SHOULD BE REVISITED
        >>>     #raise AssertionError('transforms should have cyclic consistency')
        >>>     warnings.warn('Transforms should have cyclic consistency, but some dont. This should be an error, but we will allow it for now')
        >>>     assert (sum(errors) / len(errors)) < 0.5, 'more than half of the data does not have cyclic consistency'

    Ignore:
        # TODO: looks like this fails
        check_geo_transform_consistency(coco_dset)
    """
    import pandas as pd
    import geopandas as gpd
    from geowatch.utils import util_gis
    from shapely.geometry import shape
    crs84 = util_gis.get_crs84()

    gdfs = []
    for gid in coco_dset.images().gids:
        aids = []
        for ann in coco_dset.annots(image_id=gid).objs:
            if 'segmentation_geos' in ann:
                aids.append(ann['id'])
        annots = coco_dset.annots(aids)
        if len(annots) > 0:
            # TODO: check crs properties (probably always crs84)
            sseg_geos = annots.lookup('segmentation_geos')
            warp_img_from_wld, wld_crs = coco_img_wld_info(coco_dset.coco_image(gid))
            gdf = gpd.GeoDataFrame(
                dict(
                    geometry=[shape(p) for p in sseg_geos],
                    aid=annots.aids,
                ),
                crs=crs84)
            gdf = gdf.join(
                pd.DataFrame(dict(
                    gid=gid,
                    wld_crs=wld_crs,
                    warp_img_from_wld=[warp_img_from_wld.to_shapely()],
                )),
                how='cross'
            )
            gdfs.append(gdf)
    if len(gdfs) > 0:
        gdf = pd.concat(gdfs, axis=0, ignore_index=True)

        # Convert to string so we can crs as a group key
        gdf['wld_crs_str'] = gdf['wld_crs'].apply(lambda crs: crs.to_string())

        crs_groups = gdf.groupby('wld_crs_str')[
            ['geometry', 'wld_crs', 'warp_img_from_wld', 'aid']]
        for wld_crs_str, grp in crs_groups:
            wld_geom = grp.to_crs(grp['wld_crs'].iloc[0])
            # Some images will have the same transform, which makes the
            # warp more efficient.
            for img_from_wld, subgrp in wld_geom.groupby('warp_img_from_wld'):
                aids = subgrp['aid']
                img_geoms = subgrp.affine_transform(img_from_wld)
                for aid, img_geom in zip(aids, img_geoms):
                    sseg = kwimage.MultiPolygon.from_shapely(img_geom)
                    ann = coco_dset.anns[aid]
                    ann['segmentation'] = list(sseg.to_coco(style='new'))
                    ann['bbox'] = list(sseg.bounding_box().quantize().to_coco())[0]


    # if __debug__:
    #     for ann in coco_dset.dataset['annotations']:
    #         assert 'bbox' in ann
    #         assert 'segmentation' in ann



[docs]
def warp_annot_segmentations_to_geos(coco_dset):
    """

    Example:
        >>> from geowatch.utils.kwcoco_extensions import *  # NOQA
        >>> import geowatch
        >>> orig_dset = geowatch.coerce_kwcoco('geowatch-msi', geodata=True)
        >>> coco_dset = orig_dset.copy()
        >>> for ann in coco_dset.annots().objs:
        ...     ann.pop('segmentation_geos', None)
        >>> warp_annot_segmentations_to_geos(coco_dset)
        >>> errors = []
        >>> for aid in ub.ProgIter(coco_dset.annots()):
        >>>     ann1 = orig_dset.index.anns[aid]
        >>>     ann2 = coco_dset.index.anns[aid]
        >>>     poly1 = kwimage.MultiPolygon.from_geojson(ann1['segmentation_geos'])
        >>>     poly2 = kwimage.MultiPolygon.from_geojson(ann2['segmentation_geos'])
        >>>     worked = (poly1.is_invalid() and poly2.is_invalid()) or poly1.iou(poly2) > 0.99
        >>>     errors.append(not worked)
        >>> if sum(errors) > 0:
        >>>     # FIXME: THERE SHOULD BE NO ERRORS HERE. PUNTING TO MAKE
        >>>     # THE DASHBOARDS GREEN, BUT THIS SHOULD BE REVISITED
        >>>     #raise AssertionError('transforms should have cyclic consistency')
        >>>     warnings.warn('Transforms should have cyclic consistency, but some dont. This should be an error, but we will allow it for now')
        >>>     assert (sum(errors) / len(errors)) < 0.5, 'more than half of the data does not have cyclic consistency'
    """
    import pandas as pd
    import geopandas as gpd
    from geowatch.utils import util_gis

    crs84 = util_gis.get_crs84()
    gdfs = []
    for gid in coco_dset.images().gids:
        aids = []
        for ann in coco_dset.annots(image_id=gid).objs:
            if 'segmentation' in ann:
                aids.append(ann['id'])
        annots = coco_dset.annots(aids)
        if len(annots) > 0:
            # TODO: check crs properties (probably always crs84)
            warp_img_from_wld, wld_crs = coco_img_wld_info(coco_dset.coco_image(gid))
            ssegs = annots.detections.data['segmentations']
            warp_wld_from_img = warp_img_from_wld.inv()

            gdf = gpd.GeoDataFrame(
                dict(
                    geometry=[p.to_shapely() for p in ssegs],
                    aid=annots.aids,
                ),
                crs=None,
            )
            gdf = gdf.join(
                pd.DataFrame(dict(
                    gid=gid,
                    wld_crs=wld_crs,
                    warp_wld_from_img=[warp_wld_from_img.to_shapely()]
                )),
                how='cross'
            )
            gdfs.append(gdf)
    if len(gdfs) > 0:
        gdf = pd.concat(gdfs, axis=0, ignore_index=True)

        crs84_info = {
            'axis_mapping': 'OAMS_TRADITIONAL_GIS_ORDER',
            'auth': ('EPSG', '4326')
        }
        # Convert to string so we can crs as a group key
        gdf['wld_crs_str'] = gdf['wld_crs'].apply(lambda crs: crs.to_string())
        crs_groups = gdf.groupby('wld_crs_str')[
            ['geometry', 'wld_crs', 'warp_wld_from_img', 'aid']]

        gdf.loc[:, 'crs84_geoms'] = None
        for wld_crs_str, grp in crs_groups:
            wld_crs = grp['wld_crs'].iloc[0]
            for wld_from_img, subgrp in grp.groupby('warp_wld_from_img'):
                wld_geoms = subgrp.affine_transform(wld_from_img)
                wld_geoms = wld_geoms.set_crs(wld_crs)
                crs84_geoms = wld_geoms.to_crs(crs84)
                # this geojson rep also contains bboxes, but don't need to use them
                geojsons = crs84_geoms.__geo_interface__['features']
                for aid, geoj in zip(subgrp['aid'], geojsons):
                    # To be compatible with other kwcoco files (might need to
                    # allow for an entire feature to live here)
                    sseg_geos = geoj['geometry']
                    sseg_geos.setdefault('properties', {})
                    sseg_geos['properties']['crs_info'] = crs84_info
                    ann = coco_dset.anns[aid]
                    ann['segmentation_geos'] = sseg_geos



# def coco_geopandas_images(coco_dset):
#     """
#     TODO:
#         - [ ] This is unused in this file and thus should move to the dev
#         folder or somewhere else for to keep useful scratch work.
#     """
#     import geopandas as gpd
#     df_input = []
#     for gid, img in coco_dset.index.imgs.items():
#         info  = img['geotiff_metadata']
#         kw_img_poly = kwimage.Polygon(exterior=info['geos_corners'])
#         sh_img_poly = kw_img_poly.to_shapely()
#         df_input.append({
#             'gid': gid,
#             'name': img.get('name', None),
#             'video_id': img.get('video_id', None),
#             'bounds': sh_img_poly,
#         })
#     img_geos_df = gpd.GeoDataFrame(df_input, geometry='bounds', crs='epsg:4326')
#     return img_geos_df



[docs]
def visualize_rois(coco_dset, zoom=None):
    """
    Matplotlib visualization of image and annotation regions on a world map

    Example:
        >>> # xdoctest: +REQUIRES(--slow)
        >>> from geowatch.utils.kwcoco_extensions import *  # NOQA
        >>> from geowatch.demo.smart_kwcoco_demodata import demo_kwcoco_with_heatmaps
        >>> coco_dset = demo_kwcoco_with_heatmaps(num_videos=1)
        >>> coco_populate_geo_heuristics(coco_dset, overwrite=True)
        >>> visualize_rois(coco_dset, zoom=0)

    Example:
        >>> # xdoctest: +REQUIRES(env:DVC_DPATH)
        >>> from geowatch.utils.kwcoco_extensions import *  # NOQA
        >>> from geowatch.utils.util_data import find_dvc_dpath
        >>> import kwcoco
        >>> dvc_dpath = find_dvc_dpath()
        >>> coco_fpath = dvc_dpath / 'drop1-S2-L8-aligned/combo_data.kwcoco.json'
        >>> coco_dset = kwcoco.CocoDataset(coco_fpath)
        >>> coco_populate_geo_heuristics(coco_dset, overwrite=True, workers=4)
        >>> visualize_rois(coco_dset)
    """
    import geopandas as gpd
    cov_image_gdf = covered_image_geo_regions(coco_dset)
    annot_gdf = covered_annot_geo_regions(coco_dset)

    import kwplot
    kwplot.autompl()

    wld_map_gdf = gpd.read_file(
        gpd.datasets.get_path('naturalearth_lowres')
    )
    ax = wld_map_gdf.plot()

    def safe_centroids(gdf):
        return gdf.to_crs('+proj=cea').centroid.to_crs(gdf.crs)

    cov_centroids = safe_centroids(cov_image_gdf)
    cov_image_gdf.plot(ax=ax, facecolor='none', edgecolor='green', alpha=0.5)
    cov_centroids.plot(ax=ax, marker='o', facecolor='green', alpha=0.5)
    # img_centroids = img_poly_gdf.geometry.centroid
    # img_poly_gdf.plot(ax=ax, facecolor='none', edgecolor='red', alpha=0.5)
    # img_centroids.plot(ax=ax, marker='o', facecolor='red', alpha=0.5)

    annot_centroids = safe_centroids(annot_gdf)
    annot_gdf.plot(ax=ax, facecolor='none', edgecolor='orange', alpha=0.5)
    annot_centroids.plot(ax=ax, marker='o', facecolor='orange', alpha=0.5)

    if zoom is not None:
        sh_zoom_roi = cov_image_gdf.geometry.iloc[0]
        kw_zoom_roi = kwimage.Polygon.from_shapely(sh_zoom_roi)
        bb = kw_zoom_roi.bounding_box()
        min_x, min_y, max_x, max_y = bb.scale(1.5, about='center').to_ltrb().data[0]
        ax.set_xlim(min_x, max_x)
        ax.set_ylim(min_y, max_y)




[docs]
def covered_image_geo_regions(coco_dset, merge=False):
    """
    Find the intersection of all image bounding boxes in world space
    to see what spatial regions are covered by the imagery.

    Returns:
        gpd.GeoDataFrame

    Example:
        >>> from geowatch.utils.kwcoco_extensions import *  # NOQA
        >>> from geowatch.demo.smart_kwcoco_demodata import demo_kwcoco_with_heatmaps
        >>> coco_dset = demo_kwcoco_with_heatmaps(num_frames=1, num_videos=1)
        >>> coco_populate_geo_heuristics(coco_dset, overwrite=True)
        >>> img = coco_dset.index.imgs[1]
        >>> cov_image_gdf = covered_image_geo_regions(coco_dset)

    Example:
        >>> # Check it works with empty data frame
        >>> from geowatch.utils.kwcoco_extensions import *  # NOQA
        >>> coco_dset = kwcoco.CocoDataset()
        >>> cov_image_gdf1 = covered_image_geo_regions(coco_dset, merge=False)
        >>> cov_image_gdf2 = covered_image_geo_regions(coco_dset, merge=True)
        >>> assert len(cov_image_gdf1) == 0
        >>> assert len(cov_image_gdf2) == 0
    """
    import geopandas as gpd
    from shapely import ops
    import shapely
    # import geowatch
    rows = []
    columns = [
        'geometry', 'date_captured', 'name', 'height', 'width', 'video_id',
        'image_id', 'frame_index']
    for gid, img in coco_dset.index.imgs.items():
        if 'geos_corners' in img:
            geos_corners = img['geos_corners']
        else:
            coco_img = coco_dset.coco_image(img['id'])
            asset = coco_img.primary_asset(requires=['geos_corners'])
            geos_corners = asset['geos_corners']
        geos_crs_info = geos_corners.get('properties').get('crs_info', None)
        if geos_crs_info is not None:
            assert geos_crs_info['axis_mapping'] == 'OAMS_TRADITIONAL_GIS_ORDER'
            assert list(geos_crs_info['auth']) == ['EPSG', '4326']
        sh_img_poly = shapely.geometry.shape(geos_corners)
        rows.append({
            'geometry': sh_img_poly,
            'date_captured': img.get('date_captured', None),
            'name': img.get('name', None),
            'height': img.get('height', None),
            'width': img.get('width', None),
            'video_id': img.get('video_id', None),
            'image_id': gid,
            'frame_index': img.get('frame_index', None),
        })

    from geowatch.utils import util_gis
    cov_poly_crs = util_gis.get_crs84()
    if merge:
        # df_input = [
        #     {'gid': gid, 'bounds': poly, 'name': coco_dset.index.imgs[gid].get('name', None),
        #      'video_id': coco_dset.index.imgs[gid].get('video_id', None) }
        #     for gid, poly in gid_to_poly.items()
        # ]
        # img_geos = gpd.GeoDataFrame(df_input, geometry='bounds', crs='epsg:4326')

        # Can merge like this, but we lose membership info
        # coverage_df = gpd.GeoDataFrame(img_geos.unary_union)
        coverage_rois_ = ops.unary_union([row['geometry'] for row in rows])
        if hasattr(coverage_rois_, 'geoms'):
            # Iteration over shapely objects was deprecated, test for geoms
            # attribute instead.
            coverage_rois = list(coverage_rois_.geoms)
        else:
            coverage_rois = [coverage_rois_]
        # geopandas uses traditional crs mappings
        cov_image_gdf = gpd.GeoDataFrame(
            {'geometry': coverage_rois},
            geometry='geometry', crs=cov_poly_crs)
    else:
        cov_image_gdf = gpd.GeoDataFrame(rows, geometry='geometry',
                                         columns=columns, crs=cov_poly_crs)

    return cov_image_gdf




[docs]
def covered_video_geo_regions(coco_dset):
    """
    Compute CRS84 bounds for each video in the coco dataset.

    Returns:
        gpd.GeoDataFrame

    Example:
        >>> from geowatch.utils.kwcoco_extensions import *  # NOQA
        >>> from geowatch.demo.smart_kwcoco_demodata import demo_kwcoco_with_heatmaps
        >>> coco_dset = demo_kwcoco_with_heatmaps(num_frames=1, num_videos=1)
        >>> # coco_populate_geo_heuristics(coco_dset, overwrite=True)
        >>> # video_gdf = covered_video_geo_regions(coco_dset)
    """
    import geopandas as gpd
    from geowatch.utils import util_gis

    # TODO: build this more efficiently if possible.

    # if 0:
    # import geowatch
    rows = []
    for video_id, video in coco_dset.index.videos.items():
        if 'valid_region_geos' in video:
            crs84_poly = kwimage.MultiPolygon.coerce(video['valid_region_geos']).to_shapely()
        else:
            vidspace_poly = kwimage.Boxes(
                [[0, 0, video['width'], video['height']]], 'xywh').to_polygons()[0]
            if 'warp_wld_to_vid' in video:
                auth = video['wld_crs_info']['auth']
                vid_from_wld = kwimage.Affine.coerce(video['warp_wld_to_vid'])
                wld_form_vid = vid_from_wld.inv()
                wld_poly = vidspace_poly.warp(wld_form_vid)
                import pyproj
                from shapely.ops import transform
                crs = pyproj.CRS(':'.join(auth))
                crs84 = util_gis.get_crs84()
                project = pyproj.Transformer.from_crs(crs, crs84, always_xy=True).transform
                wld_poly_ = wld_poly.to_shapely()
                crs84_poly = transform(project, wld_poly_)
            else:
                raise NotImplementedError('We dont have a way to get the geo bounds for a video')
        gids = coco_dset.index.vidid_to_gids[video_id]
        if gids:
            start_gid = gids[0]
            stop_gid = gids[-1]
            start_img = coco_dset.coco_image(start_gid)
            stop_img = coco_dset.coco_image(stop_gid)
            start_date = start_img.img['date_captured']
            end_date = stop_img.img['date_captured']
        else:
            warnings.warn('video does not have any images')
            continue

        row = {
            'video_name': video['name'],
            'video_id': video['id'],
            'geometry': crs84_poly,
            'start_date': start_date,
            'end_date': end_date,
        }
        rows.append(row)

    crs84 = util_gis.get_crs84()
    video_gdf = gpd.GeoDataFrame(rows, geometry='geometry', crs=crs84)
    return video_gdf




[docs]
def covered_annot_geo_regions(coco_dset, merge=False):
    """
    Given a dataset find spatial regions of interest that contain annotations
    """
    import shapely
    import geopandas as gpd
    from shapely import ops
    aid_to_poly = {}
    for aid, ann in coco_dset.anns.items():
        ann_goes = ann['segmentation_geos']
        # TODO: assert the segmentation_geos CRS is (geojson - WGS84-traditional)
        if ann_goes is not None:
            sh_poly = shapely.geometry.shape(ann_goes)
            aid_to_poly[aid] = sh_poly

    # annot_crs = 'epsg:4326'
    from geowatch.utils import util_gis
    annot_crs = util_gis.get_crs84()
    # annot_crs = 'OGC:CRS84'
    if merge:
        gid_to_rois = {}
        for gid, aids in coco_dset.index.gid_to_aids.items():
            if len(aids):
                sh_annot_polys = ub.dict_subset(aid_to_poly, aids)
                sh_annot_polys_ = [p.buffer(0) for p in sh_annot_polys.values()]
                sh_annot_polys_ = [p.buffer(0.000001) for p in sh_annot_polys_]

                # What CRS should we be doing this in? Is WGS84 OK?
                # Should we switch to UTM?
                img_rois_ = ops.unary_union(sh_annot_polys_)
                try:
                    img_rois = list(img_rois_.geoms)
                except Exception:
                    img_rois = [img_rois_]

                kw_img_rois = [
                    kwimage.Polygon.from_shapely(p.convex_hull).bounding_box().to_polygons()[0]
                    for p in img_rois]
                sh_img_rois = [p.to_shapely() for p in kw_img_rois]
                gid_to_rois[gid] = sh_img_rois

        # TODO: if there are only midly overlapping regions, we should likely split
        # them up. We can also group by UTM coordinates to reduce computation.
        sh_rois_ = ops.unary_union([
            p.buffer(0) for rois in gid_to_rois.values()
            for p in rois
        ])
        try:
            sh_rois = list(sh_rois_.geoms)
        except Exception:
            sh_rois = [sh_rois_]
        # geopandas uses traditional crs mappings
        cov_annot_gdf = gpd.GeoDataFrame(
            {'geometry': sh_rois},
            geometry='geometry', crs=annot_crs)
    else:
        sh_polys = list(aid_to_poly.values())
        aids = list(aid_to_poly.keys())
        cov_annot_gdf = gpd.GeoDataFrame(
            {'geometry': sh_polys, 'aids': aids},
            geometry='geometry', crs=annot_crs)
    return cov_annot_gdf



# def flip_xy(poly):
#     """
#     TODO:
#         - [ ] This is unused in this file and thus should move to the dev
#         folder or somewhere else for to keep useful scratch work.
#     """
#     if hasattr(poly, 'reorder_axes'):
#         new_poly = poly.reorder_axes((1, 0))
#     else:
#         kw_poly = kwimage.Polygon.from_shapely(poly)
#         kw_poly.data['exterior'].data = kw_poly.data['exterior'].data[:, ::-1]
#         sh_poly_ = kw_poly.to_shapely()
#         new_poly = sh_poly_
#     return new_poly



[docs]
def category_category_colors(coco_dset):
    """
    Ensures that each category in a CategoryTree has a color

    TODO:
        - [ ] Add to CategoryTree
        - [ ] Consolidate with ~/code/watch/geowatch/tasks/fusion/utils :: category_tree_ensure_color
        - [ ] Consolidate with ~/code/watch/geowatch/utils/kwcoco_extensions :: category_category_colors
        - [ ] Consolidate with ~/code/watch/geowatch/heuristics.py :: ensure_heuristic_category_tree_colors
        - [ ] Consolidate with ~/code/watch/geowatch/heuristics.py :: ensure_heuristic_coco_colors
    """
    cats = coco_dset.dataset['categories']
    # backup_colors = iter(kwimage.Color.distinct(len(cats)))
    for cat in cats:
        color = cat.get('color', None)
        if color is None:
            # color = next(backup_colors)
            # cat['color'] = kwimage.Color(color).as01()
            color = kwimage.Color.random()
            cat['color'] = color.as01()




[docs]
@profile
def associate_images(dset1, dset2, key_fallback=None):
    """
    Builds an association between image-ids in two datasets.

    One use for this is if ``dset1`` is a truth dataset and ``dset2`` is a
    prediction dataset, and you need the to know which images are in common so
    they can be scored.

    Args:
        dset1 (kwcoco.CocoDataset): a kwcoco datset.

        dset2 (kwcoco.CocoDataset): another kwcoco dataset

        key_fallback (str):
            The fallback key to use if the image "name" is not specified.
            This can either be "file_name" or "id" or None.

    TODO:
        - [ ] port to kwcoco proper
        - [ ] use in kwcoco eval as a robust image/video association method

    Example:
        >>> from geowatch.utils.kwcoco_extensions import *  # NOQA
        >>> import kwcoco
        >>> from kwcoco.demo.perterb import perterb_coco
        >>> dset1 = kwcoco.CocoDataset.demo('shapes2')
        >>> kwargs = {
        >>>     'box_noise': 0.5,
        >>>     'n_fp': (0, 10),
        >>>     'n_fn': (0, 10),
        >>> }
        >>> dset2 = perterb_coco(dset1, **kwargs)
        >>> matches = associate_images(dset1, dset2, key_fallback='file_name')
        >>> assert len(matches['image']['match_gids1'])
        >>> assert len(matches['image']['match_gids2'])
        >>> assert not len(matches['video'])

    Example:
        >>> from geowatch.utils.kwcoco_extensions import *  # NOQA
        >>> import kwcoco
        >>> from kwcoco.demo.perterb import perterb_coco
        >>> dset1 = kwcoco.CocoDataset.demo('vidshapes2')
        >>> kwargs = {
        >>>     'box_noise': 0.5,
        >>>     'n_fp': (0, 10),
        >>>     'n_fn': (0, 10),
        >>> }
        >>> dset2 = perterb_coco(dset1, **kwargs)
        >>> matches = associate_images(dset1, dset2, key_fallback='file_name')
        >>> assert not len(matches['image']['match_gids1'])
        >>> assert not len(matches['image']['match_gids2'])
        >>> assert len(matches['video'])
    """
    common_vidnames = (set(dset1.index.name_to_video) &
                       set(dset2.index.name_to_video))

    def image_keys(dset, gids):
        # Generate image "keys" that should be compatible between datasets
        for gid in gids:
            img = dset.imgs[gid]
            if img.get('name', None) is not None:
                yield img['name']
            else:
                if key_fallback is None:
                    raise Exception('images require names to associate')
                elif key_fallback == 'id':
                    yield img['id']
                elif key_fallback == 'file_name':
                    yield img['file_name']
                else:
                    raise KeyError(key_fallback)

    all_gids1 = list(dset1.imgs.keys())
    all_gids2 = list(dset2.imgs.keys())
    all_keys1 = list(image_keys(dset1, all_gids1))
    all_keys2 = list(image_keys(dset2, all_gids2))
    key_to_gid1 = ub.dzip(all_keys1, all_gids1)
    key_to_gid2 = ub.dzip(all_keys2, all_gids2)
    gid_to_key1 = ub.invert_dict(key_to_gid1)
    gid_to_key2 = ub.invert_dict(key_to_gid2)

    video_matches = []

    all_match_gids1 = set()
    all_match_gids2 = set()

    for vidname in common_vidnames:
        video1 = dset1.index.name_to_video[vidname]
        video2 = dset2.index.name_to_video[vidname]
        vidid1 = video1['id']
        vidid2 = video2['id']
        gids1 = dset1.index.vidid_to_gids[vidid1]
        gids2 = dset2.index.vidid_to_gids[vidid2]
        keys1 = ub.oset(ub.take(gid_to_key1, gids1))
        keys2 = ub.oset(ub.take(gid_to_key2, gids2))
        match_keys = ub.oset(keys1) & ub.oset(keys2)
        match_gids1 = list(ub.take(key_to_gid1, match_keys))
        match_gids2 = list(ub.take(key_to_gid2, match_keys))
        all_match_gids1.update(match_gids1)
        all_match_gids2.update(match_gids2)
        video_matches.append({
            'vidname': vidname,
            'match_gids1': match_gids1,
            'match_gids2': match_gids2,
        })

    # Associate loose images not belonging to any video
    unmatched_gid_to_key1 = ub.dict_diff(gid_to_key1, all_match_gids1)
    unmatched_gid_to_key2 = ub.dict_diff(gid_to_key2, all_match_gids2)

    remain_keys = (set(unmatched_gid_to_key1.values()) &
                   set(unmatched_gid_to_key2.values()))
    remain_gids1 = [key_to_gid1[key] for key in remain_keys]
    remain_gids2 = [key_to_gid2[key] for key in remain_keys]

    image_matches = {
        'match_gids1': remain_gids1,
        'match_gids2': remain_gids2,
    }

    matches = {
        'image': image_matches,
        'video': video_matches,
    }
    return matches




[docs]
def reorder_video_frames(dset):
    """
    Reorder the image indexes in each video to ensure temporal ordering
    """
    from kwutil import util_time
    videos = dset.videos()
    info = []
    for video, images in zip(videos.objs, videos.images):
        date_captured_list = images.lookup('date_captured')
        dt_list = [util_time.coerce_datetime(d) for d in date_captured_list]
        frame_index_list = images.lookup('frame_index')

        video_report = {
            'name': video['name'],
            'status': 'ok',
        }
        errors = []

        # This should never happen due to kwcoco assumptions
        had_bad_index_order = (np.diff(frame_index_list) < 1).any()
        if had_bad_index_order:
            errors.append('had a critical error')

        new_frame_indexes = ub.argsort(dt_list)

        # This might happen, and we should fix it.
        had_bad_date_order = (np.diff(new_frame_indexes) < 1).any()
        if had_bad_date_order:
            errors.append('had a bad date ordering')

        if errors:
            new_image_order = images.take(new_frame_indexes)
            for new_frame_index, img in enumerate(new_image_order.objs):
                img['frame_index'] = new_frame_index
            video_report['errors'] = errors
            video_report['stats'] = 'fixed'
        info.append(video_report)

    dset._build_index()
    print('reorder check info = {}'.format(ub.urepr(info, nl=1)))
    return info




[docs]
def pick_channels(coco_img, choices):
    """
    Choose the set of channels in choices that all exist in this image.

    TODO:
        - [ ] Add to CocoIamge as a method

    Args:
        coco_img (CocoImage): an image with channels

        choices (List[FusedChannelSpec | str]):
            a list of fused channels in priority order to choose from.

    Returns:
        None | FusedChannelSpec :
            The first channel group in ``choices`` where all of those channels
            exist in the image.

    CommandLine:
        xdoctest -m geowatch.utils.kwcoco_extensions pick_channels

    Example:
        >>> from geowatch.utils import kwcoco_extensions
        >>> import kwcoco
        >>> choices = ['blue|green|red', 'pan']
        >>> # Make different demo CocoImages that contain different bands
        >>> coco_img1 = kwcoco.CocoImage({
        >>>     'channels': 'red|green|blue', 'file_name': 'dummy'})
        >>> coco_img2 = kwcoco.CocoImage({
        >>>     'channels': 'green|blue', 'file_name': 'dummy'})
        >>> coco_img3 = kwcoco.CocoImage({
        >>>     'channels': 'blue|green|red', 'file_name': 'dummy'})
        >>> coco_img4 = kwcoco.CocoImage({
        >>>     'channels': 'pan', 'file_name': 'dummy'})
        >>> # Channels are only found if all bands in a choices item are given
        >>> found1 = kwcoco_extensions.pick_channels(coco_img1, choices)
        >>> found2 = kwcoco_extensions.pick_channels(coco_img2, choices)
        >>> found3 = kwcoco_extensions.pick_channels(coco_img3, choices)
        >>> found4 = kwcoco_extensions.pick_channels(coco_img4, choices)
        >>> print(f'found1={found1}')
        >>> print(f'found2={found2}')
        >>> print(f'found3={found3}')
        >>> print(f'found4={found4}')
        found1=<FusedChannelSpec(blue|green|red)>
        found2=None
        found3=<FusedChannelSpec(blue|green|red)>
        found4=<FusedChannelSpec(pan)>

    Example:
        >>> # Test case with different choices orders
        >>> from geowatch.utils import kwcoco_extensions
        >>> channel_priority1 = ['blue|green|red', 'pan']
        >>> channel_priority2 = ['pan', 'blue|green|red']
        >>> coco_img1 = kwcoco.CocoImage({
        >>>     'channels': 'blue|green|red|pan', 'file_name': 'dummy'})
        >>> coco_img2 = kwcoco.CocoImage({
        >>>     'channels': 'pan|blue|green|red', 'file_name': 'dummy'})
        >>> found1 = kwcoco_extensions.pick_channels(coco_img1, channel_priority1)
        >>> found2 = kwcoco_extensions.pick_channels(coco_img1, channel_priority2)
        >>> found3 = kwcoco_extensions.pick_channels(coco_img2, channel_priority1)
        >>> found4 = kwcoco_extensions.pick_channels(coco_img2, channel_priority2)
        >>> # The first found band in choices is returned when
        >>> # the image contains both, regardless of order in the image.
        >>> print(f'found1={found1}')
        >>> print(f'found2={found2}')
        >>> print(f'found3={found3}')
        >>> print(f'found4={found4}')
        found1=<FusedChannelSpec(blue|green|red)>
        found2=<FusedChannelSpec(pan)>
        found3=<FusedChannelSpec(blue|green|red)>
        found4=<FusedChannelSpec(pan)>
    """
    import kwcoco
    have_chans = coco_img.channels.fuse()
    for candidate in choices:
        candidate = kwcoco.FusedChannelSpec.coerce(candidate)
        if candidate.issubset(have_chans):
            return candidate