geowatch.tasks.tracking.normalize module¶
- geowatch.tasks.tracking.normalize.dedupe_annots(coco_dset)[source]¶
Check for annotations with different aids that are the same geometry
- geowatch.tasks.tracking.normalize.remove_small_annots(coco_dset, min_area_px=1, min_geo_precision=6)[source]¶
There are several reasons for a detection to be too small to keep. Remove these and return the rest of the dataset.
- Detections that aren’t well-formed polygons.
These are simply errors. They show up fairly often in an arbitrary dset; TODO figure out why possible culprits:
mask_to_scored_polygons? cropping in propagate_labels?
>>> # xdoctest: +SKIP >>> d = kwcoco.CocoDataset('pred_KR_R01.kwcoco_timeagg_v1.json') >>> sum(are_invalid(d.annots())), d.n_annots 6686, 13136
- Very small detections in pixel-space (area <1 pixel).
These probably couldn’t represent something visible, unless the GSD is very large. Skip this check by setting min_area_px=0
- Overly-precise geo-detections.
Because GSD varies, and because lat-lon isn’t equal-area, detections can be trivial in geo space but not pixel space. GeoJSON spec recommends a precision of 6 decimal places, which is ~10cm. (IARPA annotations conform to this). This check removes detections that are empty when rounded. Skip this check by setting min_geo_precision=None
Example
>>> import kwimage >>> from copy import deepcopy >>> from geowatch.tasks.tracking.normalize import remove_small_annots >>> from geowatch.demo import smart_kwcoco_demodata >>> coco_dset = smart_kwcoco_demodata.demo_kwcoco_with_heatmaps() >>> # This dset has 1 video with all images the same size >>> # For testing, resize one of the images so there is a meaningful >>> # difference between img space and vid space >>> scale_factor = 0.5 >>> aff = kwimage.Affine.coerce({'scale': scale_factor}) >>> img = coco_dset.imgs[1] >>> img['width'] *= scale_factor >>> img['height'] *= scale_factor >>> img['warp_img_to_vid']['scale'] = 1/scale_factor >>> for aux in img['auxiliary']: >>> aux['warp_aux_to_img']['scale'] = np.array( >>> aux['warp_aux_to_img'].get('scale', 1)) * scale_factor >>> annots = coco_dset.annots(gid=img['id']) >>> old_annots = deepcopy(annots) >>> dets = annots.detections.warp(aff) >>> # TODO this doesn't handle keypoints, and is rather brittle, is >>> # there a way to simply do something like: >>> # annots.detections = annot.detections.warp(w) >>> annots.set('bbox', dets.boxes.to_coco(style='new')) >>> annots.set('segmentation', dets.data['segmentations'].to_coco( >>> style='new')) >>> # test that scaling worked >>> assert np.all(annots.boxes.area < old_annots.boxes.area) >>> assert np.allclose(annots.boxes.warp(aff.inv()).area, >>> old_annots.boxes.area) >>> # test that remove_small_annots no-ops with no threshold >>> # (ie there are no invalid annots here) >>> assert coco_dset.n_annots == remove_small_annots(deepcopy(coco_dset), >>> min_area_px=0, min_geo_precision=None).n_annots >>> # test that annots can be removed >>> assert remove_small_annots(deepcopy(coco_dset), min_area_px=1e99, >>> min_geo_precision=None).n_annots == 0 >>> # test that annotations are filtered in video space >>> # pick a threshold above the img annot size and below the vid >>> # annot size; annot should not be removed >>> thresh = annots.boxes.area[0] + 1 >>> assert annots.aids[0] in remove_small_annots(deepcopy(coco_dset), >>> min_area_px=thresh, min_geo_precision=None).annots( >>> gid=img['id']).aids >>> # test that some but not all annots can be removed >>> filtered = remove_small_annots( >>> deepcopy(coco_dset), min_area_px=10000, >>> min_geo_precision=None) >>> assert filtered.n_annots > 0 and filtered.n_annots < coco_dset.n_annots >>> # TODO test min_geo_precision
- geowatch.tasks.tracking.normalize.ensure_videos(coco_dset)[source]¶
Ensure every image belongs to a video, even a dummy video and has a frame_index
- geowatch.tasks.tracking.normalize.dedupe_tracks(coco_dset)[source]¶
Assuming that videos are made of disjoint images, ensure that trackids are not shared by two tracks in different videos.
- geowatch.tasks.tracking.normalize.normalize_phases(coco_dset, use_viterbi=False, t_probs=None, e_probs=None, baseline_keys={'salient'}, prediction_key='phase_transition_days')[source]¶
Convert internal representation of phases to their IARPA standards as well as inserting a baseline guess for activity classification and removing empty tracks.
HACK: add a Post Construction frame at the end of every track until we support partial sites
- The only remaining categories in the returned coco_dset should be:
Site Preparation Active Construction Post Construction
TODO make this a step in track_fn to take advantage of heatmap info? .. rubric:: Example
>>> # test baseline guess >>> from geowatch.tasks.tracking.normalize import normalize_phases >>> from geowatch.tasks.tracking.normalize import normalize_phases >>> from geowatch.demo import smart_kwcoco_demodata >>> dset = smart_kwcoco_demodata.demo_kwcoco_with_heatmaps() >>> dset.remove_categories([1,3,4,5]) >>> dset.cats[2]['name'] = 'salient' >>> assert dset.cats == {2: {'id': 2, 'name': 'salient'}} >>> # HACK, this shouldn't be needed? >>> # TODO file bug report >>> dset._build_index() >>> dset = normalize_phases(dset) >>> assert (dset.annots(track_id=1).cnames == >>> ((['Site Preparation'] * 10) + >>> (['Active Construction'] * 9) + >>> (['Post Construction']))) >>> # try again with smoothing >>> dset = normalize_phases(dset, use_viterbi=True) >>> from geowatch.demo import smart_kwcoco_demodata >>> dset = smart_kwcoco_demodata.demo_kwcoco_with_heatmaps() >>> dset.remove_categories([1,3,4,5]) >>> dset.cats[2]['name'] = 'salient' >>> assert dset.cats == {2: {'id': 2, 'name': 'salient'}} >>> # HACK, this shouldn't be needed? >>> # TODO file bug report >>> dset._build_index() >>> dset = normalize_phases(dset) >>> assert (dset.annots(track_id=1).cnames == >>> ((['Site Preparation'] * 10) + >>> (['Active Construction'] * 9) + >>> (['Post Construction']))) >>> # try again with smoothing >>> dset = normalize_phases(dset, use_viterbi=True)
- geowatch.tasks.tracking.normalize.dedupe_dates(coco_dset)[source]¶
Ensure a tracked kwcoco file has at most 1 annot per track per date. [1]
- There are several potential ways to do this.
take highest-resolution sensor [currently done]
take image with best coverage (least nodata)
take latest time
majority-vote labels/average scores
average heatmaps before polygons are created
Given that this probably has a minimal impact on scores, the safest method is chosen.
References
[1] https://smartgitlab.com/TE/metrics-and-test-framework/-/issues/63
Example
>>> from geowatch.tasks.tracking.normalize import * # NOQA >>> import geowatch >>> import kwarray >>> coco_dset = geowatch.coerce_kwcoco('geowatch-msi', geodata=True, dates=True) >>> # Add 0-4 duplicate images to each video >>> rng = kwarray.ensure_rng(613544) >>> gids_to_duplicate = list(ub.flatten([rng.choice(gs, rng.randint(0, 4)) for gs in coco_dset.videos().images])) >>> for gid in gids_to_duplicate: >>> img1 = ub.udict(coco_dset.index.imgs[gid]) - {'id'} >>> img1['name'] = img1['name'] + '_duplicated' >>> coco_dset.add_image(**img1) >>> coco_dset_with_dups = coco_dset.copy() >>> coco_dset_fixed = dedupe_dates(coco_dset.copy()) >>> assert coco_dset_fixed.n_images < coco_dset_with_dups.n_images
- geowatch.tasks.tracking.normalize.run_tracking_pipeline(coco_dset, track_fn, gt_dset=None, viz_out_dir=None, use_viterbi=False, sensor_warnings=True, **track_kwargs)[source]¶
Driver function to apply all normalizations
Todo
Rename this to something like run_tracker. This is the entry point to the main tracking pipeline.
Example
>>> import kwcoco as kc >>> from geowatch.tasks.tracking.normalize import * >>> from geowatch.tasks.tracking.from_polygon import OverlapTrack >>> # create demodata >>> d = kc.CocoDataset.demo() >>> ann_dct = d.anns[1] >>> d.remove_annotations(range(1,12)) >>> ann_dct.pop('keypoints') >>> ann_dct.pop('id') >>> for i in range(1,4): >>> ann_dct.update(image_id=i) >>> d.add_annotation(**ann_dct) >>> for img, sensor in zip(d.imgs.values(), ['WV', 'S2', 'L8']): >>> img['sensor_coarse'] = sensor >>> d.remove_categories(range(2,9)) >>> d.cats[1]['supercategory'] = None >>> d.cats[1]['name'] = 'change' >>> d.images().set('channels', 'rgb') >>> # test everything except geo-info >>> def _normalize_annots(coco_dset): >>> coco_dset = dedupe_annots(coco_dset) >>> coco_dset = remove_small_annots(coco_dset, >>> min_geo_precision=None) >>> return coco_dset >>> coco_dset = d.copy() >>> coco_dset = _normalize_annots(coco_dset) >>> assert coco_dset.anns == d.anns >>> coco_dset = ensure_videos(coco_dset) >>> assert coco_dset.index.vidid_to_gids[1] == coco_dset.imgs.keys() >>> n_existing_annots = coco_dset.n_annots >>> coco_dset = OverlapTrack().apply_per_video(coco_dset) >>> assert set(coco_dset.annots().get('track_id')) == {1} >>> assert coco_dset.n_annots == n_existing_annots >>> coco_dset = dedupe_tracks(coco_dset) >>> assert set(coco_dset.annots().get('track_id')) == {1} >>> coco_dset = normalize_phases(coco_dset, baseline_keys={'change'}) >>> assert (coco_dset.annots().cnames == >>> ['Site Preparation', 'Site Preparation', 'Post Construction']) >>> from geowatch import heuristics >>> coco_dset = heuristics.normalize_sensors( >>> coco_dset, sensor_warnings=False, format='iarpa') >>> assert (coco_dset.images().get('sensor_coarse') == >>> ['WorldView', 'Sentinel-2', 'Landsat 8'])
- geowatch.tasks.tracking.normalize.normalize(coco_dset, track_fn, gt_dset=None, viz_out_dir=None, use_viterbi=False, sensor_warnings=True, **track_kwargs)¶
Driver function to apply all normalizations
Todo
Rename this to something like run_tracker. This is the entry point to the main tracking pipeline.
Example
>>> import kwcoco as kc >>> from geowatch.tasks.tracking.normalize import * >>> from geowatch.tasks.tracking.from_polygon import OverlapTrack >>> # create demodata >>> d = kc.CocoDataset.demo() >>> ann_dct = d.anns[1] >>> d.remove_annotations(range(1,12)) >>> ann_dct.pop('keypoints') >>> ann_dct.pop('id') >>> for i in range(1,4): >>> ann_dct.update(image_id=i) >>> d.add_annotation(**ann_dct) >>> for img, sensor in zip(d.imgs.values(), ['WV', 'S2', 'L8']): >>> img['sensor_coarse'] = sensor >>> d.remove_categories(range(2,9)) >>> d.cats[1]['supercategory'] = None >>> d.cats[1]['name'] = 'change' >>> d.images().set('channels', 'rgb') >>> # test everything except geo-info >>> def _normalize_annots(coco_dset): >>> coco_dset = dedupe_annots(coco_dset) >>> coco_dset = remove_small_annots(coco_dset, >>> min_geo_precision=None) >>> return coco_dset >>> coco_dset = d.copy() >>> coco_dset = _normalize_annots(coco_dset) >>> assert coco_dset.anns == d.anns >>> coco_dset = ensure_videos(coco_dset) >>> assert coco_dset.index.vidid_to_gids[1] == coco_dset.imgs.keys() >>> n_existing_annots = coco_dset.n_annots >>> coco_dset = OverlapTrack().apply_per_video(coco_dset) >>> assert set(coco_dset.annots().get('track_id')) == {1} >>> assert coco_dset.n_annots == n_existing_annots >>> coco_dset = dedupe_tracks(coco_dset) >>> assert set(coco_dset.annots().get('track_id')) == {1} >>> coco_dset = normalize_phases(coco_dset, baseline_keys={'change'}) >>> assert (coco_dset.annots().cnames == >>> ['Site Preparation', 'Site Preparation', 'Post Construction']) >>> from geowatch import heuristics >>> coco_dset = heuristics.normalize_sensors( >>> coco_dset, sensor_warnings=False, format='iarpa') >>> assert (coco_dset.images().get('sensor_coarse') == >>> ['WorldView', 'Sentinel-2', 'Landsat 8'])