geowatch.tasks.tracking.normalize module

geowatch.tasks.tracking.normalize.dedupe_annots(coco_dset)[source]

Check for annotations with different aids that are the same geometry

geowatch.tasks.tracking.normalize.remove_small_annots(coco_dset, min_area_px=1, min_geo_precision=6)[source]

There are several reasons for a detection to be too small to keep. Remove these and return the rest of the dataset.

  1. Detections that aren’t well-formed polygons.

    These are simply errors. They show up fairly often in an arbitrary dset; TODO figure out why possible culprits:

    mask_to_scored_polygons? cropping in propagate_labels?

    >>> # xdoctest: +SKIP
    >>> d = kwcoco.CocoDataset('pred_KR_R01.kwcoco_timeagg_v1.json')
    >>> sum(are_invalid(d.annots())), d.n_annots
    6686, 13136
    
  2. Very small detections in pixel-space (area <1 pixel).

    These probably couldn’t represent something visible, unless the GSD is very large. Skip this check by setting min_area_px=0

  3. Overly-precise geo-detections.

    Because GSD varies, and because lat-lon isn’t equal-area, detections can be trivial in geo space but not pixel space. GeoJSON spec recommends a precision of 6 decimal places, which is ~10cm. (IARPA annotations conform to this). This check removes detections that are empty when rounded. Skip this check by setting min_geo_precision=None

Sources:

[1] https://pypi.org/project/geojson/#default-and-custom-precision

Example

>>> import kwimage
>>> from copy import deepcopy
>>> from geowatch.tasks.tracking.normalize import remove_small_annots
>>> from geowatch.demo import smart_kwcoco_demodata
>>> coco_dset = smart_kwcoco_demodata.demo_kwcoco_with_heatmaps()
>>> # This dset has 1 video with all images the same size
>>> # For testing, resize one of the images so there is a meaningful
>>> # difference between img space and vid space
>>> scale_factor = 0.5
>>> aff = kwimage.Affine.coerce({'scale': scale_factor})
>>> img = coco_dset.imgs[1]
>>> img['width'] *= scale_factor
>>> img['height'] *= scale_factor
>>> img['warp_img_to_vid']['scale'] = 1/scale_factor
>>> for aux in img['auxiliary']:
>>>     aux['warp_aux_to_img']['scale'] = np.array(
>>>         aux['warp_aux_to_img'].get('scale', 1)) * scale_factor
>>> annots = coco_dset.annots(gid=img['id'])
>>> old_annots = deepcopy(annots)
>>> dets = annots.detections.warp(aff)
>>> # TODO this doesn't handle keypoints, and is rather brittle, is
>>> # there a way to simply do something like:
>>> #    annots.detections = annot.detections.warp(w)
>>> annots.set('bbox', dets.boxes.to_coco(style='new'))
>>> annots.set('segmentation', dets.data['segmentations'].to_coco(
>>>     style='new'))
>>> # test that scaling worked
>>> assert np.all(annots.boxes.area < old_annots.boxes.area)
>>> assert np.allclose(annots.boxes.warp(aff.inv()).area,
>>>     old_annots.boxes.area)
>>> # test that remove_small_annots no-ops with no threshold
>>> # (ie there are no invalid annots here)
>>> assert coco_dset.n_annots == remove_small_annots(deepcopy(coco_dset),
>>>     min_area_px=0, min_geo_precision=None).n_annots
>>> # test that annots can be removed
>>> assert remove_small_annots(deepcopy(coco_dset), min_area_px=1e99,
>>>     min_geo_precision=None).n_annots == 0
>>> # test that annotations are filtered in video space
>>> # pick a threshold above the img annot size and below the vid
>>> # annot size; annot should not be removed
>>> thresh = annots.boxes.area[0] + 1
>>> assert annots.aids[0] in remove_small_annots(deepcopy(coco_dset),
>>>     min_area_px=thresh, min_geo_precision=None).annots(
>>>         gid=img['id']).aids
>>> # test that some but not all annots can be removed
>>> filtered = remove_small_annots(
>>>     deepcopy(coco_dset), min_area_px=10000,
>>>     min_geo_precision=None)
>>> assert filtered.n_annots > 0 and filtered.n_annots < coco_dset.n_annots
>>> # TODO test min_geo_precision
geowatch.tasks.tracking.normalize.ensure_videos(coco_dset)[source]

Ensure every image belongs to a video, even a dummy video and has a frame_index

geowatch.tasks.tracking.normalize.dedupe_tracks(coco_dset)[source]

Assuming that videos are made of disjoint images, ensure that trackids are not shared by two tracks in different videos.

geowatch.tasks.tracking.normalize.shapely_round(geom, precision)[source]

References

https://gis.stackexchange.com/questions/188622

geowatch.tasks.tracking.normalize.normalize_phases(coco_dset, use_viterbi=False, t_probs=None, e_probs=None, baseline_keys={'salient'}, prediction_key='phase_transition_days')[source]

Convert internal representation of phases to their IARPA standards as well as inserting a baseline guess for activity classification and removing empty tracks.

HACK: add a Post Construction frame at the end of every track until we support partial sites

The only remaining categories in the returned coco_dset should be:

Site Preparation Active Construction Post Construction

TODO make this a step in track_fn to take advantage of heatmap info? .. rubric:: Example

>>> # test baseline guess
>>> from geowatch.tasks.tracking.normalize import normalize_phases
>>> from geowatch.tasks.tracking.normalize import normalize_phases
>>> from geowatch.demo import smart_kwcoco_demodata
>>> dset = smart_kwcoco_demodata.demo_kwcoco_with_heatmaps()
>>> dset.remove_categories([1,3,4,5])
>>> dset.cats[2]['name'] = 'salient'
>>> assert dset.cats == {2: {'id': 2, 'name': 'salient'}}
>>> # HACK, this shouldn't be needed?
>>> # TODO file bug report
>>> dset._build_index()
>>> dset = normalize_phases(dset)
>>> assert (dset.annots(track_id=1).cnames ==
>>>     ((['Site Preparation'] * 10) +
>>>      (['Active Construction'] * 9) +
>>>      (['Post Construction'])))
>>> # try again with smoothing
>>> dset = normalize_phases(dset, use_viterbi=True)
>>> from geowatch.demo import smart_kwcoco_demodata
>>> dset = smart_kwcoco_demodata.demo_kwcoco_with_heatmaps()
>>> dset.remove_categories([1,3,4,5])
>>> dset.cats[2]['name'] = 'salient'
>>> assert dset.cats == {2: {'id': 2, 'name': 'salient'}}
>>> # HACK, this shouldn't be needed?
>>> # TODO file bug report
>>> dset._build_index()
>>> dset = normalize_phases(dset)
>>> assert (dset.annots(track_id=1).cnames ==
>>>     ((['Site Preparation'] * 10) +
>>>      (['Active Construction'] * 9) +
>>>      (['Post Construction'])))
>>> # try again with smoothing
>>> dset = normalize_phases(dset, use_viterbi=True)
geowatch.tasks.tracking.normalize.dedupe_dates(coco_dset)[source]

Ensure a tracked kwcoco file has at most 1 annot per track per date. [1]

There are several potential ways to do this.
  • take highest-resolution sensor [currently done]

  • take image with best coverage (least nodata)

  • take latest time

  • majority-vote labels/average scores

  • average heatmaps before polygons are created

Given that this probably has a minimal impact on scores, the safest method is chosen.

References

[1] https://smartgitlab.com/TE/metrics-and-test-framework/-/issues/63

Example

>>> from geowatch.tasks.tracking.normalize import *  # NOQA
>>> import geowatch
>>> import kwarray
>>> coco_dset = geowatch.coerce_kwcoco('geowatch-msi', geodata=True, dates=True)
>>> # Add 0-4 duplicate images to each video
>>> rng = kwarray.ensure_rng(613544)
>>> gids_to_duplicate = list(ub.flatten([rng.choice(gs, rng.randint(0, 4)) for gs in coco_dset.videos().images]))
>>> for gid in gids_to_duplicate:
>>>     img1 = ub.udict(coco_dset.index.imgs[gid]) - {'id'}
>>>     img1['name'] = img1['name'] + '_duplicated'
>>>     coco_dset.add_image(**img1)
>>> coco_dset_with_dups = coco_dset.copy()
>>> coco_dset_fixed = dedupe_dates(coco_dset.copy())
>>> assert coco_dset_fixed.n_images < coco_dset_with_dups.n_images
geowatch.tasks.tracking.normalize.run_tracking_pipeline(coco_dset, track_fn, gt_dset=None, viz_out_dir=None, use_viterbi=False, sensor_warnings=True, **track_kwargs)[source]

Driver function to apply all normalizations

Todo

Rename this to something like run_tracker. This is the entry point to the main tracking pipeline.

Example

>>> import kwcoco as kc
>>> from geowatch.tasks.tracking.normalize import *
>>> from geowatch.tasks.tracking.from_polygon import OverlapTrack
>>> # create demodata
>>> d = kc.CocoDataset.demo()
>>> ann_dct = d.anns[1]
>>> d.remove_annotations(range(1,12))
>>> ann_dct.pop('keypoints')
>>> ann_dct.pop('id')
>>> for i in range(1,4):
>>>     ann_dct.update(image_id=i)
>>>     d.add_annotation(**ann_dct)
>>> for img, sensor in zip(d.imgs.values(), ['WV', 'S2', 'L8']):
>>>     img['sensor_coarse'] = sensor
>>> d.remove_categories(range(2,9))
>>> d.cats[1]['supercategory'] = None
>>> d.cats[1]['name'] = 'change'
>>> d.images().set('channels', 'rgb')
>>> # test everything except geo-info
>>> def _normalize_annots(coco_dset):
>>>     coco_dset = dedupe_annots(coco_dset)
>>>     coco_dset = remove_small_annots(coco_dset,
>>>         min_geo_precision=None)
>>>     return coco_dset
>>> coco_dset = d.copy()
>>> coco_dset = _normalize_annots(coco_dset)
>>> assert coco_dset.anns == d.anns
>>> coco_dset = ensure_videos(coco_dset)
>>> assert coco_dset.index.vidid_to_gids[1] == coco_dset.imgs.keys()
>>> n_existing_annots = coco_dset.n_annots
>>> coco_dset = OverlapTrack().apply_per_video(coco_dset)
>>> assert set(coco_dset.annots().get('track_id')) == {1}
>>> assert coco_dset.n_annots == n_existing_annots
>>> coco_dset = dedupe_tracks(coco_dset)
>>> assert set(coco_dset.annots().get('track_id')) == {1}
>>> coco_dset = normalize_phases(coco_dset, baseline_keys={'change'})
>>> assert (coco_dset.annots().cnames ==
>>> ['Site Preparation', 'Site Preparation', 'Post Construction'])
>>> from geowatch import heuristics
>>> coco_dset = heuristics.normalize_sensors(
>>>     coco_dset, sensor_warnings=False, format='iarpa')
>>> assert (coco_dset.images().get('sensor_coarse') ==
>>>     ['WorldView', 'Sentinel-2', 'Landsat 8'])
geowatch.tasks.tracking.normalize.normalize(coco_dset, track_fn, gt_dset=None, viz_out_dir=None, use_viterbi=False, sensor_warnings=True, **track_kwargs)

Driver function to apply all normalizations

Todo

Rename this to something like run_tracker. This is the entry point to the main tracking pipeline.

Example

>>> import kwcoco as kc
>>> from geowatch.tasks.tracking.normalize import *
>>> from geowatch.tasks.tracking.from_polygon import OverlapTrack
>>> # create demodata
>>> d = kc.CocoDataset.demo()
>>> ann_dct = d.anns[1]
>>> d.remove_annotations(range(1,12))
>>> ann_dct.pop('keypoints')
>>> ann_dct.pop('id')
>>> for i in range(1,4):
>>>     ann_dct.update(image_id=i)
>>>     d.add_annotation(**ann_dct)
>>> for img, sensor in zip(d.imgs.values(), ['WV', 'S2', 'L8']):
>>>     img['sensor_coarse'] = sensor
>>> d.remove_categories(range(2,9))
>>> d.cats[1]['supercategory'] = None
>>> d.cats[1]['name'] = 'change'
>>> d.images().set('channels', 'rgb')
>>> # test everything except geo-info
>>> def _normalize_annots(coco_dset):
>>>     coco_dset = dedupe_annots(coco_dset)
>>>     coco_dset = remove_small_annots(coco_dset,
>>>         min_geo_precision=None)
>>>     return coco_dset
>>> coco_dset = d.copy()
>>> coco_dset = _normalize_annots(coco_dset)
>>> assert coco_dset.anns == d.anns
>>> coco_dset = ensure_videos(coco_dset)
>>> assert coco_dset.index.vidid_to_gids[1] == coco_dset.imgs.keys()
>>> n_existing_annots = coco_dset.n_annots
>>> coco_dset = OverlapTrack().apply_per_video(coco_dset)
>>> assert set(coco_dset.annots().get('track_id')) == {1}
>>> assert coco_dset.n_annots == n_existing_annots
>>> coco_dset = dedupe_tracks(coco_dset)
>>> assert set(coco_dset.annots().get('track_id')) == {1}
>>> coco_dset = normalize_phases(coco_dset, baseline_keys={'change'})
>>> assert (coco_dset.annots().cnames ==
>>> ['Site Preparation', 'Site Preparation', 'Post Construction'])
>>> from geowatch import heuristics
>>> coco_dset = heuristics.normalize_sensors(
>>>     coco_dset, sensor_warnings=False, format='iarpa')
>>> assert (coco_dset.images().get('sensor_coarse') ==
>>>     ['WorldView', 'Sentinel-2', 'Landsat 8'])