geowatch.cli.cluster_sites module

Given a set of site summaries, clusters them into groups, ideally with small overlap. Writes new regions to a specified directory using the hash of the contained sites as a subregion identifier.

Limitations:
  • The clustering algorithm is overly simple

Example

DVC_DATA_DPATH=$(geowatch_dvc –tags=’phase2_data’ –hardware=’auto’) python -m geowatch.cli.cluster_sites

–src “$DVC_DATA_DPATH/annotations/drop6/region_models/KR_R002.geojson” –dst_dpath $DVC_DATA_DPATH/ValiRegionSmall/geojson –draw_clusters True

DVC_DATA_DPATH=$(geowatch_dvc –tags=’phase2_data’ –hardware=’auto’) python -m geowatch.cli.coco_align

–src $DVC_DATA_DPATH/Drop6/combo_imganns-KR_R002_L.kwcoco.json –dst $DVC_DATA_DPATH/ValiRegionSmall/small_KR_R002_odarcigm.kwcoco.zip –regions $DVC_DATA_DPATH/ValiRegionSmall/geojson/SUB_KR_R002_n007_odarcigm.geojson –minimum_size=”128x128@10GSD” –context_factor=1 –geo_preprop=auto –force_nodata=-9999 –site_summary=False –target_gsd=5 –aux_workers=8 –workers=8

DVC_DATA_DPATH=$(geowatch_dvc –tags=’phase2_data’ –hardware=’auto’) python -m geowatch.cli.cluster_sites

–src “$DVC_DATA_DPATH/annotations/drop6/region_models/NZ_R001.geojson” –dst_dpath $DVC_DATA_DPATH/ValiRegionSmall/geojson/NZ_R001 –draw_clusters True

DVC_DATA_DPATH=$(geowatch_dvc –tags=’phase2_data’ –hardware=’auto’) python -m geowatch.cli.coco_align

–src $DVC_DATA_DPATH/Drop6/combo_imganns-NZ_R001_L.kwcoco.json –dst $DVC_DATA_DPATH/ValiRegionSmall/small_NZ_R001_swnykmah.kwcoco.zip –regions $DVC_DATA_DPATH/ValiRegionSmall/geojson/NZ_R001/SUB_NZ_R001_n031_swnykmah.geojson –minimum_size=”128x128@10GSD” –context_factor=1 –geo_preprop=auto –force_nodata=-9999 –site_summary=False –target_gsd=5 –aux_workers=8 –workers=8

class geowatch.cli.cluster_sites.ClusterSiteConfig(*args, **kwargs)[source]

Bases: DataConfig

Creates a new region file that groups nearby sites.

Valid options: []

Parameters:
  • *args – positional arguments for this data config

  • **kwargs – keyword arguments for this data config

default = {'context_factor': <Value(1.5)>, 'crop_time': <Value(True)>, 'draw_clusters': <Value(False)>, 'dst_dpath': <Value(None)>, 'dst_region_fpath': <Value(None)>, 'ignore_status': <Value("['system_rejected']")>, 'io_workers': <Value(10)>, 'maximum_size': <Value('1024x1024@2GSD')>, 'minimum_size': <Value('128x128@2GSD')>, 'src': <Value(None)>}
main(**kwargs)

CommandLine

xdoctest -m geowatch.cli.cluster_sites main:0
xdoctest -m geowatch.cli.cluster_sites main:2

Example

>>> from geowatch.cli.cluster_sites import *  # NOQA
>>> from geowatch.cli import cluster_sites
>>> import ubelt as ub
>>> dpath = ub.Path.appdir('geowatch', 'doctests', 'cluster_sites1').ensuredir()
>>> src_dpath = (dpath / 'src').ensuredir()
>>> dst_dpath = (dpath / 'dst')
>>> dst_region_fpath = dst_dpath / 'cluster.geojson'
>>> from geowatch.geoannots import geomodels
>>> region = geomodels.RegionModel.random(num_sites=10)
>>> src_fpath = src_dpath / 'demo_region.geojson'
>>> src_fpath.write_text(region.dumps())
>>> kwargs = {
>>>     'src': src_fpath,
>>>     'dst_dpath': dst_dpath,
>>>     'dst_region_fpath': dst_region_fpath,
>>>     'io_workers': 0,
>>>     'draw_clusters': 0,
>>>     'crop_time': True,
>>> }
>>> cmdline = 0
>>> cluster_sites.main(cmdline=cmdline, **kwargs)

Example

>>> from geowatch.cli.cluster_sites import *  # NOQA
>>> from geowatch.cli import cluster_sites
>>> import ubelt as ub
>>> dpath = ub.Path.appdir('geowatch', 'doctests', 'cluster_sites2').ensuredir()
>>> src_dpath = (dpath / 'src').ensuredir()
>>> dst_dpath = (dpath / 'dst')
>>> from geowatch.geoannots import geomodels
>>> region = geomodels.RegionModel.random(num_sites=10)
>>> src_fpath = src_dpath / 'demo_region.geojson'
>>> src_fpath.write_text(region.dumps())
>>> dst_region_fpath = dst_dpath / 'cluster.geojson'
>>> kwargs = {
>>>     'src': src_fpath,
>>>     'dst_dpath': dst_dpath,
>>>     'dst_region_fpath': dst_region_fpath,
>>>     'io_workers': 0,
>>>     'draw_clusters': 1,
>>>     'crop_time': True,
>>> }
>>> cmdline = 0
>>> cluster_sites.main(cmdline=cmdline, **kwargs)

Example

>>> # Test empty case
>>> from geowatch.cli.cluster_sites import *  # NOQA
>>> from geowatch.cli import cluster_sites
>>> import ubelt as ub
>>> dpath = ub.Path.appdir('geowatch', 'doctests', 'cluster_sites3').ensuredir()
>>> src_dpath = (dpath / 'src').ensuredir()
>>> dst_dpath = (dpath / 'dst')
>>> from geowatch.geoannots import geomodels
>>> region = geomodels.RegionModel.random(num_sites=0)
>>> src_fpath = src_dpath / 'demo_region.geojson'
>>> src_fpath.write_text(region.dumps())
>>> dst_region_fpath = dst_dpath / 'cluster.geojson'
>>> kwargs = {
>>>     'src': src_fpath,
>>>     'dst_dpath': dst_dpath,
>>>     'dst_region_fpath': dst_region_fpath,
>>>     'io_workers': 0,
>>>     'draw_clusters': 1,
>>>     'crop_time': True,
>>> }
>>> cmdline = 0
>>> cluster_sites.main(cmdline=cmdline, **kwargs)
geowatch.cli.cluster_sites.main(cmdline=1, **kwargs)[source]

CommandLine

xdoctest -m geowatch.cli.cluster_sites main:0
xdoctest -m geowatch.cli.cluster_sites main:2

Example

>>> from geowatch.cli.cluster_sites import *  # NOQA
>>> from geowatch.cli import cluster_sites
>>> import ubelt as ub
>>> dpath = ub.Path.appdir('geowatch', 'doctests', 'cluster_sites1').ensuredir()
>>> src_dpath = (dpath / 'src').ensuredir()
>>> dst_dpath = (dpath / 'dst')
>>> dst_region_fpath = dst_dpath / 'cluster.geojson'
>>> from geowatch.geoannots import geomodels
>>> region = geomodels.RegionModel.random(num_sites=10)
>>> src_fpath = src_dpath / 'demo_region.geojson'
>>> src_fpath.write_text(region.dumps())
>>> kwargs = {
>>>     'src': src_fpath,
>>>     'dst_dpath': dst_dpath,
>>>     'dst_region_fpath': dst_region_fpath,
>>>     'io_workers': 0,
>>>     'draw_clusters': 0,
>>>     'crop_time': True,
>>> }
>>> cmdline = 0
>>> cluster_sites.main(cmdline=cmdline, **kwargs)

Example

>>> from geowatch.cli.cluster_sites import *  # NOQA
>>> from geowatch.cli import cluster_sites
>>> import ubelt as ub
>>> dpath = ub.Path.appdir('geowatch', 'doctests', 'cluster_sites2').ensuredir()
>>> src_dpath = (dpath / 'src').ensuredir()
>>> dst_dpath = (dpath / 'dst')
>>> from geowatch.geoannots import geomodels
>>> region = geomodels.RegionModel.random(num_sites=10)
>>> src_fpath = src_dpath / 'demo_region.geojson'
>>> src_fpath.write_text(region.dumps())
>>> dst_region_fpath = dst_dpath / 'cluster.geojson'
>>> kwargs = {
>>>     'src': src_fpath,
>>>     'dst_dpath': dst_dpath,
>>>     'dst_region_fpath': dst_region_fpath,
>>>     'io_workers': 0,
>>>     'draw_clusters': 1,
>>>     'crop_time': True,
>>> }
>>> cmdline = 0
>>> cluster_sites.main(cmdline=cmdline, **kwargs)

Example

>>> # Test empty case
>>> from geowatch.cli.cluster_sites import *  # NOQA
>>> from geowatch.cli import cluster_sites
>>> import ubelt as ub
>>> dpath = ub.Path.appdir('geowatch', 'doctests', 'cluster_sites3').ensuredir()
>>> src_dpath = (dpath / 'src').ensuredir()
>>> dst_dpath = (dpath / 'dst')
>>> from geowatch.geoannots import geomodels
>>> region = geomodels.RegionModel.random(num_sites=0)
>>> src_fpath = src_dpath / 'demo_region.geojson'
>>> src_fpath.write_text(region.dumps())
>>> dst_region_fpath = dst_dpath / 'cluster.geojson'
>>> kwargs = {
>>>     'src': src_fpath,
>>>     'dst_dpath': dst_dpath,
>>>     'dst_region_fpath': dst_region_fpath,
>>>     'io_workers': 0,
>>>     'draw_clusters': 1,
>>>     'crop_time': True,
>>> }
>>> cmdline = 0
>>> cluster_sites.main(cmdline=cmdline, **kwargs)
geowatch.cli.cluster_sites.cluster_single_region_sites(input_region_model, scale, min_box_dim, max_box_dim, proc_context, config)[source]
geowatch.cli.cluster_sites.build_clustered_models(input_region_model, region_header, region_sites, region_sites_utm, keep_bbs, config)[source]

Given the clustering output, construct new region models.

Returns:

  1. A new “clustered region” region model with the original region bounds and the clusters as site summaries

  2. A set of region models for each cluster, with the original site summaries that were assigned to that cluster

Return type:

Tuple[RegionModel, List[RegionModle]]