geowatch.cli.coco_clean_geotiffs module

class geowatch.cli.coco_clean_geotiffs.CleanGeotiffConfig(*args, **kwargs)[source]

Bases: DataConfig

Clean geotiff files inplace by masking bad pixels with NODATA.

Replaces large contiguous regions of specific same-valued pixels as NODATA.

Note

This is a destructive operation and overwrites the geotiff image data inplace. Make a copy of your dataset if there is any chance you need to go back. The underlying kwcoco file is not modified.

Usage:

# It is a good idea to do a dry run first to check for issues # This can be done at a smaller scale for speed. DVC_DATA_DPATH=$(geowatch_dvc –tags=’phase2_data’ –hardware=auto) geowatch clean_geotiffs

–src “$DVC_DATA_DPATH/Drop4-BAS/data.kwcoco.json” –channels=”red|green|blue|nir|swir16|swir22” –prefilter_channels=”red” –min_region_size=256 –nodata_value=-9999 –workers=”min(2,avail)” –probe_scale=0.5 –dry=True

# Then execute a real run at full scale - optionally with a probe scale geowatch clean_geotiffs

–src “$DVC_DATA_DPATH/Drop4-BAS/data_vali.kwcoco.json” –channels=”red|green|blue|nir|swir16|swir22” –prefilter_channels=”red” –min_region_size=256 –nodata_value=-9999 –workers=”min(2,avail)” –probe_scale=None –dry=False

Valid options: []

Parameters:
  • *args – positional arguments for this data config

  • **kwargs – keyword arguments for this data config

default = {'channels': <Value('*')>, 'dry': <Value(False)>, 'exclude_channels': <Value('quality|cloudmask')>, 'export_bad_fpath': <Value(None)>, 'min_region_size': <Value(256)>, 'nodata_value': <Value(-9999)>, 'possible_nodata_values': <Value([0])>, 'prefilter_channels': <Value('red')>, 'probe_scale': <Value(None)>, 'scale': <Value(None)>, 'src': <Value(None)>, 'use_fix_stamps': <Value(False)>, 'workers': <Value(0)>}
geowatch.cli.coco_clean_geotiffs.main(cmdline=1, **kwargs)[source]

CommandLine

xdoctest -m geowatch.cli.coco_clean_geotiffs main

Example

>>> # xdoctest: +REQUIRES(env:SLOW_DOCTESTS)
>>> # Generate a dataset that has bad nodata values
>>> from geowatch.cli.coco_clean_geotiffs import *  # NOQA
>>> import kwimage
>>> import geowatch
>>> import kwarray
>>> import numpy as np
>>> # Create a copy of the test dataset to clean inplace
>>> orig_dset = geowatch.coerce_kwcoco('geowatch-msi', geodata=True, bad_nodata=True, num_videos=1, num_frames=2)
>>> orig_dpath = ub.Path(orig_dset.bundle_dpath)
>>> dpath = orig_dpath.augment(stemsuffix='_cleaned')
>>> dpath.delete()
>>> orig_dpath.copy(dpath)
>>> dset = geowatch.coerce_kwcoco(dpath / 'data.kwcoco.json')
>>> coco_img = dset.images().coco_images[0]
>>> kwargs = {
>>>     'src': dset,
>>>     'workers': 0,
>>>     'channels': 'B11',
>>>     'prefilter_channels': 'B11',
>>>     'min_region_size': 32,
>>>     'nodata_value': 2,  # because toydata is uint16
>>> }
>>> cmdline = 0
>>> # Do a dry run first
>>> main(cmdline=cmdline, **kwargs, dry=True)
>>> # Then a real run.
>>> main(cmdline=cmdline, **kwargs)
>>> coco_img1 = orig_dset.images().coco_images[0]
>>> coco_img2 = dset.coco_image(coco_img1.img['id'])
>>> print(ub.urepr(list(coco_img1.iter_image_filepaths())))
>>> print(ub.urepr(list(coco_img2.iter_image_filepaths())))
>>> imdata1 = coco_img1.imdelay('B11', nodata_method='float').finalize()
>>> imdata2 = coco_img2.imdelay('B11', nodata_method='float').finalize()
>>> print(np.isnan(imdata1).sum())
>>> print(np.isnan(imdata2).sum())
>>> canvas1 = kwarray.robust_normalize(imdata1)
>>> canvas2 = kwarray.robust_normalize(imdata2)
>>> canvas1 = kwimage.nodata_checkerboard(canvas1)
>>> canvas2 = kwimage.nodata_checkerboard(canvas2)
>>> # xdoctest: +REQUIRES(--show)
>>> # xdoctest: +REQUIRES(module:kwplot)
>>> import kwplot
>>> kwplot.autompl()
>>> kwplot.imshow(canvas1, pnum=(1, 2, 1), title='before')
>>> kwplot.imshow(canvas2, pnum=(1, 2, 2), title='after')
geowatch.cli.coco_clean_geotiffs.probe_image_issues(coco_img, channels=None, prefilter_channels=None, scale=None, possible_nodata_values=None, min_region_size=256, exclude_channels=None, probe_scale=None, use_fix_stamps=False, nodata_value=-9999)[source]

Inspect a single image, possibily with multiple assets, each with possibily multiple bands for fixable nodata values.

Parameters:
  • coco_img – the coco image to check

  • channels – the channels to check

  • prefilter_channels – the channels to check first for efficiency. If they do not exist, then the all channels are checked.

  • possible_nodata_values (set[int]) – the values that may be nodata if known

  • scale – use a downscaled overview to speed up the computation via approximation. Returns results at this scale. DO NOT USE RIGHT NOW.

  • probe_scale – use a downscaled overview to speed up the computation via approximation. If the probe identifies an issue a full scale probe is done.

Example

>>> import geowatch
>>> from geowatch.cli.coco_clean_geotiffs import *  # NOQA
>>> import numpy as np
>>> dset = geowatch.coerce_kwcoco('geowatch-msi', geodata=True, bad_nodata=True)
>>> coco_img = dset.images().coco_images[4]
>>> channels = 'B11|B10|X.1'
>>> prefilter_channels = 'B11'
>>> scale = None
>>> possible_nodata_values = {0}
>>> min_region_size = 128
>>> image_summary = probe_image_issues(
>>>     coco_img, channels=channels, prefilter_channels=prefilter_channels,
>>>     scale=scale, possible_nodata_values=possible_nodata_values,
>>>     min_region_size=min_region_size)
>>> print(f'image_summary={image_summary}')
geowatch.cli.coco_clean_geotiffs.probe_asset(coco_img, obj, band_idxs=None, scale=None, possible_nodata_values=None, min_region_size=256, use_fix_stamps=False, nodata_value=-9999)[source]

Inspect a specific single-file asset possibily with multiple bands for fixable nodata values.

geowatch.cli.coco_clean_geotiffs.probe_asset_imdata(imdata, band_idxs, min_region_size_=256, possible_nodata_values=None)[source]
geowatch.cli.coco_clean_geotiffs.probe_band_imdata(band_imdata, min_region_size_=256, possible_nodata_values=None)[source]
geowatch.cli.coco_clean_geotiffs.fix_single_asset(fpath, dry=False)[source]
geowatch.cli.coco_clean_geotiffs.fix_geotiff_ondisk(asset_summary, correct_nodata_value=-9999)[source]

Updates the nodata value based on a mask inplace on disk. Attempts to preserve all other metadata, but this is not guarenteed or always possible.

Parameters:
  • asset_summary (Dict) – an item from probe_asset().

  • correct_nodata_value (int) – the nodata value to use in the modified geotiff.

Assumptions:
  • The input image uses AVERAGE overview resampling

  • The input image is a tiled geotiff (ideally a COG)

Todo

  • [ ] Can restructure this as a more general context manager.

Example

>>> from geowatch.cli.coco_clean_geotiffs import *  # NOQA
>>> from geowatch.demo.metrics_demo.demo_rendering import write_demo_geotiff
>>> import kwimage
>>> import numpy as np
>>> dpath = ub.Path.appdir('geowatch/tests/clean_geotiff').ensuredir()
>>> fpath1 = dpath / 'test_geotiff.tif'
>>> fpath2 = fpath1.augment(stemsuffix='_fixed')
>>> fpath1.delete()
>>> fpath2.delete()
>>> imdata = kwimage.grab_test_image('amazon', dsize=(512, 512))
>>> poly = kwimage.Polygon.random().scale(imdata.shape[0:2][::-1])
>>> imdata = poly.draw_on(imdata, color='black')
>>> imdata = imdata.astype(np.int16)
>>> #imdata = poly.fill(imdata, value=(0, 0, 0), pixels_are='areas')
>>> imdata = poly.fill(imdata, value=0, pixels_are='areas')
>>> imdata[:256, :256, 0] = 0
>>> write_demo_geotiff(img_fpath=fpath1, imdata=imdata)
>>> fpath1.copy(fpath2)
>>> asset_summary = probe_asset_imdata(imdata, band_idxs=[0, 2], possible_nodata_values={0})
>>> asset_summary['fpath'] = fpath2
>>> assert fpath1.stat().st_size == fpath2.stat().st_size
>>> fix_geotiff_ondisk(asset_summary)
>>> assert fpath1.stat().st_size != fpath2.stat().st_size
>>> imdata1 = kwimage.imread(fpath1, nodata_method='ma')
>>> imdata2 = kwimage.imread(fpath2, nodata_method='ma')
>>> canvas1 = kwimage.normalize_intensity(imdata1)
>>> canvas2 = kwimage.normalize_intensity(imdata2)
>>> canvas1 = kwimage.nodata_checkerboard(canvas1)
>>> canvas2 = kwimage.nodata_checkerboard(canvas2)
>>> # xdoctest: +REQUIRES(--show)
>>> # xdoctest: +REQUIRES(module:kwplot)
>>> import kwplot
>>> kwplot.autompl()
>>> kwplot.imshow(canvas1.data, pnum=(2, 2, 1), title='norm imdata1 vals')
>>> kwplot.imshow(canvas2.data, pnum=(2, 2, 2), title='norm imdata2 vals')
>>> kwplot.imshow(imdata1.mask.any(axis=2), pnum=(2, 2, 3), title='imdata1.mask')
>>> kwplot.imshow(imdata2.mask.any(axis=2), pnum=(2, 2, 4), title='imdata2.mask')
>>> #kwplot.imshow((asset_summary['is_samecolor'] > 0), pnum=(3, 2, 5), title='is samecolor mask')

Example

>>> from geowatch.cli.coco_clean_geotiffs import *  # NOQA
>>> from geowatch.demo.metrics_demo.demo_rendering import write_demo_geotiff
>>> import kwimage
>>> import numpy as np
>>> dpath = ub.Path.appdir('geowatch/tests/clean_geotiff').ensuredir()
>>> fpath1 = dpath / 'test_geotiff.tif'
>>> fpath2 = fpath1.augment(stemsuffix='_fixed')
>>> fpath1.delete()
>>> fpath2.delete()
>>> imdata = kwimage.grab_test_image('amazon', dsize=(512, 512))[..., 0]
>>> poly = kwimage.Polygon.random().scale(imdata.shape[0:2][::-1])
>>> imdata = imdata.astype(np.int16)
>>> imdata = poly.fill(imdata, value=0, pixels_are='areas')
>>> imdata[:256, :256] = 0
>>> write_demo_geotiff(img_fpath=fpath1, imdata=imdata)
>>> fpath1.copy(fpath2)
>>> asset_summary = probe_asset_imdata(imdata, band_idxs=[0], possible_nodata_values={0})
>>> asset_summary['fpath'] = fpath2
>>> assert fpath1.stat().st_size == fpath2.stat().st_size
>>> fix_geotiff_ondisk(asset_summary)
>>> assert fpath1.stat().st_size != fpath2.stat().st_size
>>> imdata1 = kwimage.imread(fpath1, nodata_method='ma')
>>> imdata2 = kwimage.imread(fpath2, nodata_method='ma')
>>> canvas1 = kwimage.normalize_intensity(imdata1)
>>> canvas2 = kwimage.normalize_intensity(imdata2)
>>> canvas1 = kwimage.nodata_checkerboard(canvas1)
>>> canvas2 = kwimage.nodata_checkerboard(canvas2)
>>> # xdoctest: +REQUIRES(--show)
>>> # xdoctest: +REQUIRES(module:kwplot)
>>> import kwplot
>>> kwplot.autompl()
>>> kwplot.imshow(canvas1.data, pnum=(2, 2, 1), title='norm imdata1 vals')
>>> kwplot.imshow(canvas2.data, pnum=(2, 2, 2), title='norm imdata2 vals')
>>> kwplot.imshow(imdata1.mask, pnum=(2, 2, 3), title='imdata1.mask')
>>> kwplot.imshow(imdata2.mask, pnum=(2, 2, 4), title='imdata2.mask')
>>> #kwplot.imshow((asset_summary['is_samecolor'] > 0), pnum=(3, 2, 5), title='is samecolor mask')
geowatch.cli.coco_clean_geotiffs.draw_asset_summary(coco_img, asset_summary)[source]