Source code for geowatch.heuristics

"""
In the interest of development speed we can't always be perfect about correctly
passing the right metadata to functions that need it. This module serves as a
place to store hard-coded heuristics so we are explicit about where we are
cutting corners or using magic numbers. The idea is that  this will make it
easier for us to go back and make the code robust.

References:
    https://gitlab.kitware.com/smart/annotations-wiki/-/blob/main/Annotation-Status-Types.md

"""
import ubelt as ub

# to make it easier to switch to "assets" when we finally deprecate auxiliary
# COCO_ASSETS_KEY = 'assets'
COCO_ASSETS_KEY = 'auxiliary'

HEURISTIC_START_STATES = {
    'No Activity',
}

HEURISTIC_END_STATES = {
    'Post Construction'
}

DEFAULT_QA_ENCODING = 'ARA-4'


# # FIXME: Hard-coded category aliases.
# https://smartgitlab.com/TE/standards


"""
Status Data Info
================

The following is a long-form table, where list-items are rows and keys are
columns to describe how to interpret IARPA annotation status labels in
different cases. More information on status labels can be found in
[TEAnnotStatus]_ and in our internal fork [KWAnnotStatus]_.


Column Info:
    * status:
        the name of the site status label

    * color:
        what color to use for this status in visualizations. These are
        mostly taken from [EvalMetricColors]_, but in cases where they are
        undefined we make them up.

    * kwcoco_catname:
        what category to map this status to in the kwcoco dataset (and thus
        what will be learned), Note: a status is different than a phase label,
        and annotations with phase labels may overwrite the kwcoco category
        defined here. This is used in geowatch reproject.

    * positive_match_confusion
        This is the label the truth is given when it has some match in our set
        of positive predictions.  Denote what type of confusion a truth status
        incurs when it is matched.


The following code prints a concice version of this table and shows a legend
with colors.

.. code:: python

    from geowatch.heuristics import *  # NOQA
    import pandas as pd
    import rich
    df = pd.DataFrame(HUERISTIC_STATUS_DATA)
    rich.print(df.to_string())

    import kwplot
    kwplot.autompl()
    status_to_color = {r['status']: r['color'] for r in HUERISTIC_STATUS_DATA}
    img = kwplot.make_legend_img(status_to_color, dpi=300)
    kwplot.imshow(img, fnum=1)


References
----------
.. [TEAnnotStatus] https://smartgitlab.com/TE/annotations/-/wikis/Annotation-Status-Types
.. [KWAnnotStatus] https://gitlab.kitware.com/smart/annotations-wiki/-/blob/main/Annotation-Status-Types.md
.. [EvalMetricColors] https://smartgitlab.com/TE/metrics-and-test-framework/-/blob/main/iarpa_smart_metrics/evaluation.py#L1205

"""
HUERISTIC_STATUS_DATA = [
    ### Misc Rows
    {
        'status': 'seen',
        'color': 'cyan',
        'kwcoco_catname': None,
        'positive_match_confusion': 'gt_seen',
    },
    {
        'status': 'train',
        'color': 'cyan',
        'kwcoco_catname': None,
        'positive_match_confusion': 'gt_seen',
    },

    ### Ignore Rows
    {
        'status': 'ignore',
        'color': 'lightsalmon',
        'kwcoco_catname': 'ignore',
        'positive_match_confusion': 'gt_ignore',
    },

    ### Negative Rows
    # Note: 'colors for these status labels are undefined, using neutral gray
    {
        'status': 'negative',
        'color': 'gray',
        'kwcoco_catname': 'negative',
        'positive_match_confusion': 'gt_false_pos',
    },
    {
        'status': 'negative_unbounded',
        'color': 'gray',
        'kwcoco_catname': 'negative',
        'positive_match_confusion': 'gt_false_pos',
    },

    ### Positive Rows
    {
        'status': 'positive_annotated',
        'color': 'black',
        # Requires a category phase label, do not use status to map
        'kwcoco_catname': None,
        'positive_match_confusion': 'gt_true_pos',
    },
    {
        'status': 'positive_annotated_static',
        'color': 'black',
        # Requires a category phase label, do not use status to map
        'kwcoco_catname': None,
        'positive_match_confusion': 'gt_true_pos',
    },
    {
        'status': 'positive_excluded',
        # Modifing the official color scheme to differentiate this from negative
        # 'color': 'gray',
        'color': 'darkblue',
        # This is positive, but is not "big" enough
        # Setting this to "positive" by default, because it is ignored at
        # evaluation time.
        # 'kwcoco_catname': 'ignore',
        'kwcoco_catname': 'positive',
        'positive_match_confusion': 'gt_false_pos',
    },
    {
        'status': 'positive_partial',
        'color': 'black',
        'kwcoco_catname': 'positive',
        'positive_match_confusion': 'gt_true_pos',
    },
    {
        'status': 'positive_pending',
        'color': 'black',
        'kwcoco_catname': 'positive',
        'positive_match_confusion': 'gt_true_pos',
    },

    {
        'status': 'positive_unbounded',
        'color': 'darkviolet',
        'kwcoco_catname': 'positive',   # Start or end date might not be defined
        'positive_match_confusion': 'gt_positive_unbounded',
    },

    ### Transient Rows
    {
        'status': 'transient_positive',
        'color': 'steelblue',
        'kwcoco_catname': 'transient',
    },
    {
        'status': 'transient_negative',
        'color': 'rust',
        'kwcoco_catname': 'negative',
    },
    {
        'status': 'transient_excluded',
        'kwcoco_catname': 'ignore',
        'color': 'lightsalmon',
    },
    {
        'status': 'transient_ignore',
        'kwcoco_catname': 'ignore',
        'color': 'lightsalmon',
    },
    {
        'status': 'transient_pending',
        'kwcoco_catname': 'ignore',
        'color': 'lightsalmon',
    },

    ### Prediction Rows

    # TODO? Add alias of pending for "positive_pending"? For QFabric?
    {
        'status': 'system_confirmed',
        'color': 'kitware_blue'
    },

    {
        'status': 'system_rejected',
        'color': 'kitware_yellow'
    },
]

# Backwards compat
for row in HUERISTIC_STATUS_DATA:
    row['name'] = row['status']


# Convinience mappings used in reproject annotations
PHASE_STATUS_TO_KWCOCO_CATNAME = {
    row['name']: row['kwcoco_catname'] for row in HUERISTIC_STATUS_DATA
    if 'kwcoco_catname' in row
}

IARPA_STATUS_TO_INFO = {row['status']: row for row in HUERISTIC_STATUS_DATA}

# update HUERISTIC_STATUS_DATA
# for status, row in IARPA_STATUS_TO_INFO.items():
#     if status in PHASE_STATUS_TO_KWCOCO_CATNAME:
#         row['kwcoco_catname'] = PHASE_STATUS_TO_KWCOCO_CATNAME[status]

# for status, row in IARPA_STATUS_TO_INFO.items():
#     if status in PHASE_STATUS_TO_MATCHED_CONFUSION:
#         row['positive_match_confusion'] = PHASE_STATUS_TO_MATCHED_CONFUSION[status]


if 0:
    import pandas as pd
    print(pd.DataFrame(HUERISTIC_STATUS_DATA).to_string())


IARPA_REAL_STATUS = {
    'positive': ["positive_annotated", "positive_annotated_static", "positive_partial", "positive_pending"],
    'negative': ["positive_excluded", "negative", "negative_unbounded"],
    'ignore': 'ignore',
}


"""
For official color definitions see:

    :func:`iarpa_smart_metrics.evaluation.Evaluation.get_sm_color` and
    :func:`iarpa_smart_metrics.evaluation.Evaluation.get_gt_color`
"""
IARPA_CONFUSION_COLORS = {}
IARPA_CONFUSION_COLORS['gt_true_neg'] = 'green'  # no IARPA color for this, make one up.
IARPA_CONFUSION_COLORS['gt_true_pos'] = 'lime'
IARPA_CONFUSION_COLORS['gt_false_pos'] = 'red'
IARPA_CONFUSION_COLORS['gt_false_neg'] = 'black'
IARPA_CONFUSION_COLORS['gt_positive_unbounded'] = "darkviolet"
IARPA_CONFUSION_COLORS['gt_ignore'] = "lightsalmon"
IARPA_CONFUSION_COLORS['gt_seen'] = "gray"
IARPA_CONFUSION_COLORS['sm_pos_match'] = "aquamarine"
IARPA_CONFUSION_COLORS['sm_partially_wrong'] = "orange"
IARPA_CONFUSION_COLORS['sm_completely_wrong'] = "magenta"

IARPA_CONFUSION_COLORS['sm_ignore'] = "lightsalmon"  # no IARPA color for this, make one up



[docs]
def iarpa_assign_truth_confusion(truth_status, has_positive_match):
    """
    Example:
        >>> from geowatch.heuristics import *  # NOQA
        >>> import pandas as pd
        >>> rows = []
        >>> for truth_status in IARPA_STATUS_TO_INFO.keys():
        >>>     for has_positive_match in [0, 1]:
        >>>         gt_cfsn = iarpa_assign_truth_confusion(truth_status, has_positive_match)
        >>>         rows.append({
        >>>             'truth_status': truth_status,
        >>>             'has_positive_match': has_positive_match,
        >>>             'confusion': gt_cfsn,
        >>>         })
        >>> print(pd.DataFrame(rows).to_string())
    """
    gt_cfsn = None
    if has_positive_match:
        gt_cfsn = IARPA_STATUS_TO_INFO[truth_status].get('positive_match_confusion', None)
    else:
        if truth_status in ["positive_unbounded"]:
            gt_cfsn = 'gt_positive_unbounded'
        elif truth_status in ["ignore"]:
            gt_cfsn = 'gt_ignore'
        elif truth_status in ['seen', 'train']:
            gt_cfsn = 'gt_seen'
        elif truth_status in IARPA_REAL_STATUS['negative']:
            gt_cfsn = 'gt_true_neg'
        elif truth_status in IARPA_REAL_STATUS['positive']:
            gt_cfsn = 'gt_false_neg'
        elif truth_status in {'transient_positive'}:
            gt_cfsn = 'gt_false_neg'

    return gt_cfsn




[docs]
def iarpa_assign_pred_confusion(truth_match_statuses):
    """
    Example:
        >>> from geowatch.heuristics import *  # NOQA
        >>> import itertools as it
        >>> truth_match_statuses = {'positive_partial', 'positive_excluded'}
        >>> for combo in it.combinations(IARPA_STATUS_TO_INFO, 2):
        >>>     truth_match_statuses = combo
        >>>     pred_cfsn = iarpa_assign_pred_confusion(truth_match_statuses)
        >>>     print(f'{pred_cfsn=} for {truth_match_statuses}')
    """
    pred_cfsn = None
    if not truth_match_statuses:
        pred_cfsn = 'sm_completely_wrong'

    truth_cfsns = {
        IARPA_STATUS_TO_INFO[s].get('positive_match_confusion', None)
        for s in truth_match_statuses
    }
    if truth_cfsns & {'gt_true_pos', 'gt_positive_unbounded'}:
        if 'gt_false_pos' in truth_cfsns:
            pred_cfsn = 'sm_partially_wrong'
        else:
            pred_cfsn = 'sm_pos_match'
    elif 'gt_false_pos' in truth_cfsns:
        pred_cfsn = 'sm_completely_wrong'
    else:
        if set(truth_cfsns) == {'gt_ignore'}:
            pred_cfsn = 'sm_ignore'
    return pred_cfsn



# metrics-and-test-framework/evaluation.py:1684
# Note: the condition field is in development the idea is to
# encode when a category should be added as a label.


[docs]
def TAG_IF(tag, condition):
    return {'type': 'condition', 'op': 'tag_if', 'tag': tag, 'condition': condition}




[docs]
def CONDITION(op, args):
    return {'type': 'condition', 'op': op, 'args': args}




[docs]
def ALL(*args):
    return {'type': 'condition', 'op': 'all', 'args': args}



CATEGORIES = [
    {
        'name': 'background',
        'color': 'black',
        'scored': False,
        'required': True,
    },

    {
        'name': 'ignore',
        'color': 'violet',
        'scored': False,
        'required': True,
    },

    {
        'name': 'Unknown',
        'color': 'blueviolet',
        'scored': False,
    },

    {
        'name': 'positive',
        'color': 'palegreen',
        'scored': False,
    },

    {
        'name': 'negative',
        'color': 'gray',
        'scored': False,
    },

    {
        'name': 'Site Preparation',
        'color': 'gold',
        'scored': True,
    },
    {
        'name': 'Active Construction',
        'color': 'lime',
        'scored': True,
    },
    # Conditional classes
    {
        'name': 'Post Construction',
        'color': 'darkturquoise',
        'scored': True,
    },
    {
        'name': 'No Activity',
        'color': 'tomato',
        'scored': True,
    },
    # Transient
    {
        'name': 'transient',
        'color': 'steelblue',
        'scored': True,
    },
    # Transient
    {
        'name': 'transient',
        'color': 'steelblue',
        'scored': True,
    },
]



[docs]
def hack_track_categories(track_catnames, task):
    """
    Returns:
        List[str]: Modified categories

    Example:
        >>> from geowatch.heuristics import *  # NOQA
        >>> basis = {
        >>>     #'task': ['class', 'saliency'],
        >>>     'task': ['class'],
        >>>     'track_catnames': [
        >>>         ['No Activity'],
        >>>         ['Post Construction'],
        >>>         ['Post Construction', 'Post Construction', ],
        >>>         ['Post Construction', 'Post Construction', 'Post Construction', ],
        >>>         ['No Activity', 'ignore', 'ignore'],
        >>>         ['No Activity', 'Post Construction'],
        >>>         ['No Activity', 'Site Preparation', 'Post Construction'],
        >>>     ],
        >>> }
        >>> for kw in ub.named_product(basis):
        >>>     task = kw['task']
        >>>     track_catnames = kw['track_catnames']
        >>>     kw['new_catnames'] = hack_track_categories(track_catnames, task)
        >>>     print('kw = {}'.format(ub.urepr(kw, nl=1)))

    Example:
        >>> from geowatch.heuristics import *  # NOQA
        >>> track_catnames = ['negative', 'negative']
        >>> task = 'saliency'
        >>> result = hack_track_categories(track_catnames, task)
        >>> print(result)
        ['negative', 'negative']

    """
    # FIXME! This is hard coded nonsense, need to come up with a general
    # way to encode these conditions in the categories themselves. Getting
    # this right is harder than I want it to be, so I'm hacking it.

    # Might want to make a real parser for this mini-language, or find an
    # existing mini-language that works

    main_classes = {'No Activity', 'Site Preparation', 'Active Construction', 'Post Construction'}

    # This is some of the uggliest code I've ever written
    new_catnames = []
    if task == 'saliency':
        for catname in track_catnames:
            if catname == 'No Activity':
                catname = 'background'
            elif catname == 'Post Construction':
                catname = 'background'
            elif catname == 'Unknown':
                catname = 'ignore'
            new_catnames.append(catname)
    elif task == 'class':
        unique_catnames = set(track_catnames)
        for catname in track_catnames:
            if catname == 'No Activity':
                remain = unique_catnames - {catname, None}
                if len(remain & main_classes) > 0:
                    catname = catname
                elif len(remain) == 0:
                    catname = 'background'
                elif remain.issubset({'background', 'ignore', 'Unknown'}):
                    catname = 'ignore'
            elif catname == 'Post Construction':
                remain = unique_catnames - {catname, None}
                if len(remain & main_classes) > 0:
                    catname = catname
                elif len(remain) == 0:
                    catname = 'background'
                elif remain.issubset({'background', 'ignore'}):
                    catname = 'ignore'
            elif catname == 'positive':
                catname = 'ignore'
            elif catname == 'negative':
                catname = 'background'
            elif catname == 'Unknown':
                catname = 'ignore'
            new_catnames.append(catname)
    else:
        raise KeyError(task)
        # op = condition['op']
        # # TODO: normalize classes
        # # TODO: make label conditionals as part of kwcoco
        # if op == 'ALSO_HAS':
        #     track_catnames = set(track_catnames)
        #     flag = any(arg in track_catnames for arg in condition['args'])
        #     return flag
        # else:
        #     raise NotImplementedError(op)
    return new_catnames



# Backwards compat (remove if nothing uses them)
CATEGORIES_SCORED = [c for c in CATEGORIES if c.get('scored', False)]
CATEGORIES_UNSCORED = [c for c in CATEGORIES if not c.get('scored', False)]


# Might need to split this up into a finer-grained structure
IGNORE_CLASSNAMES = {'ignore', 'Unknown'}
BACKGROUND_CLASSES = {'background'}
NEGATIVE_CLASSES = {'negative'}
UNDISTINGUISHED_CLASSES =  {'positive'}
CONTEXT_CLASSES = {'No Activity', 'Post Construction'}


# # These classes are used in BAS, but not in AC/SC
# UNDISTINGUISHED_CLASSES = {
#     'positive',
# }


CATEGORIES_DCT = {
    'positive': {
        'scored': [
            {'name': 'Site Preparation', 'color': 'gold'},
            {'name': 'Active Construction', 'color': 'lime'},
            {'name': 'Post Construction', 'color': 'darkturquoise'},

        ],
        'unscored': [
            {'name': 'positive', 'color': 'green'},
        ],
    },
    'negative': {
        'scored': [
            # Maybe this should not be marked as "scored", because it isn't
            {'name': 'No Activity', 'color': 'tomato'},
        ],
        'unscored': [
            {'name': 'Unknown', 'color': 'blueviolet'},
            {'name': 'ignore', 'color': 'slategray'},
            {'name': 'negative', 'color': 'orangered'},
            {'name': 'background', 'color': 'black'},
        ],
    }
}

# 'name' field only
CNAMES_DCT = {
    k1: {k2: [cat['name'] for cat in cats]
         for k2, cats in dct.items()}
    for k1, dct in CATEGORIES_DCT.items()
}


# For passing site summaries from BAS to SC
SITE_SUMMARY_CNAME = 'Site Boundary'



[docs]
def ensure_heuristic_coco_colors(coco_dset, force=False):
    """
    Args:
        coco_dset (kwcoco.CocoDataset): object to modify
        force (bool): if True, overwrites existing colors if needed

    TODO:
        - [ ] Move this non-heuristic functionality to
            :func:`kwcoco.CocoDataset.ensure_class_colors`

    Example:
        >>> from geowatch.heuristics import *  # NOQA
        >>> import kwcoco
        >>> coco_dset = kwcoco.CocoDataset.demo()
        >>> ensure_heuristic_coco_colors(coco_dset)
        >>> assert all(c['color'] for c in coco_dset.cats.values())
    """
    for hcat in CATEGORIES:
        cat = coco_dset.index.name_to_cat.get(hcat['name'], None)
        if cat is not None:
            if force or cat.get('color', None) is None:
                cat['color'] = hcat['color']
    data_dicts = coco_dset.dataset['categories']
    _ensure_distinct_dict_colors(data_dicts)




[docs]
def ensure_heuristic_category_tree_colors(classes, force=False):
    """
    Args:
        classes (kwcoco.CategoryTree): object to modify
        force (bool): if True, overwrites existing colors if needed

    TODO:
        - [ ] Move this non-heuristic functionality to
            :func:`kwcoco.CategoryTree.ensure_colors`
        - [ ] Consolidate with ~/code/watch/geowatch/heuristics.py :: category_tree_ensure_color
        - [ ] Consolidate with ~/code/watch/geowatch/heuristics.py :: category_category_colors
        - [ ] Consolidate with ~/code/watch/geowatch/heuristics.py :: ensure_heuristic_category_tree_colors
        - [ ] Consolidate with ~/code/watch/geowatch/heuristics.py :: ensure_heuristic_coco_colors

    Example:
        >>> from geowatch import heuristics
        >>> import kwcoco
        >>> classes = kwcoco.CategoryTree.coerce(['ignore', 'positive', 'Active Construction', 'foobar', 'Unknown', 'baz'])
        >>> heuristics.ensure_heuristic_category_tree_colors(classes)
        >>> assert all(d['color'] for n, d in classes.graph.nodes(data=True))
    """
    # Set any missing class color with the heuristic category
    for hcat in CATEGORIES:
        node_data = classes.graph.nodes.get(hcat['name'], None)
        if node_data is not None:
            if force or node_data.get('color', None) is None:
                node_data['color'] = hcat['color']
    data_dicts = [data for node, data in classes.graph.nodes(data=True)]
    _ensure_distinct_dict_colors(data_dicts)




[docs]
def category_tree_ensure_color(classes):
    """
    Ensures that each category in a CategoryTree has a color

    TODO:
        - [ ] Add to CategoryTree
        - [ ] TODO: better function
        - [ ] Consolidate with ~/code/watch/geowatch/heuristics.py :: category_tree_ensure_color
        - [ ] Consolidate with ~/code/watch/geowatch/heuristics.py :: category_category_colors
        - [ ] Consolidate with ~/code/watch/geowatch/heuristics.py :: ensure_heuristic_category_tree_colors
        - [ ] Consolidate with ~/code/watch/geowatch/heuristics.py :: ensure_heuristic_coco_colors

    Example:
        >>> import kwcoco
        >>> classes = kwcoco.CategoryTree.demo()
        >>> assert not any('color' in data for data in classes.graph.nodes.values())
        >>> category_tree_ensure_color(classes)
        >>> assert all('color' in data for data in classes.graph.nodes.values())
    """
    import kwimage
    backup_colors = iter(kwimage.Color.distinct(len(classes)))
    for node in classes.graph.nodes:
        color = classes.graph.nodes[node].get('color', None)
        if color is None:
            color = next(backup_colors)
            classes.graph.nodes[node]['color'] = kwimage.Color(color).as01()




[docs]
def category_category_colors(coco_dset):
    """
    Ensures that each category in a CategoryTree has a color

    TODO:
        - [ ] Add to CategoryTree
        - [ ] Consolidate with ~/code/watch/geowatch/heuristics.py :: category_tree_ensure_color
        - [ ] Consolidate with ~/code/watch/geowatch/heuristics.py :: category_category_colors
        - [ ] Consolidate with ~/code/watch/geowatch/heuristics.py :: ensure_heuristic_category_tree_colors
        - [ ] Consolidate with ~/code/watch/geowatch/heuristics.py :: ensure_heuristic_coco_colors
    """
    import kwimage
    cats = coco_dset.dataset['categories']
    # backup_colors = iter(kwimage.Color.distinct(len(cats)))
    for cat in cats:
        color = cat.get('color', None)
        if color is None:
            # color = next(backup_colors)
            # cat['color'] = kwimage.Color(color).as01()
            color = kwimage.Color.random()
            cat['color'] = color.as01()



def _ensure_distinct_dict_colors(data_dicts, force=False):
    # Generalized part that could move to kwcoco
    have_dicts = [d for d in data_dicts if d.get('color', None) is not None]
    miss_dicts = [d for d in data_dicts if d.get('color', None) is None]
    num_uncolored = len(miss_dicts)
    if num_uncolored:
        import kwimage
        existing_colors = [kwimage.Color(d['color']).as01() for d in have_dicts]
        new_colors = kwimage.Color.distinct(
            num_uncolored, existing=existing_colors, legacy=False)
        for d, c in zip(miss_dicts, new_colors):
            d['color'] = c


HUERISTIC_COMBINABLE_CHANNELS = [
    ub.oset(['B04', 'B03', 'B02']),  # for onera
    ub.oset(['matseg_1', 'matseg_2', 'matseg_3']),  # hack
    # ub.oset(['snow_or_ice_field', 'built_up', 'grassland']),  # hack
]

CONFUSION_COLOR_SCHEME = {
    'TN': 'black',
    # 'TP': 'white',
    # 'TP': 'snow',  # off white
    'TP': 'whitesmoke',  # off white
    'FN': 'teal',
    'FP': 'red',
}



[docs]
def dummy_legend():
    # hack to make a legend for slides
    """
    from geowatch.heuristics import *  # NOQA
    dummy_legend()

    """
    import kwplot
    kwplot.autompl()
    img = kwplot.make_legend_img(CONFUSION_COLOR_SCHEME)
    kwplot.imshow(img, fnum=1)

    label_to_color = {
        cat['name']: cat['color']
        for cat in CATEGORIES
    }
    img = kwplot.make_legend_img(label_to_color)
    kwplot.imshow(img, fnum=2)

    label_to_color = ub.dict_subset(label_to_color, {
        'Post Construction', 'Site Preparation', 'Active Construction',
        'No Activity', 'Unknown'})
    img = kwplot.make_legend_img(label_to_color)
    kwplot.imshow(img, fnum=3)




[docs]
def build_image_header_text(**kwargs):
    """
    A heuristic for what sort of info is useful to plot on the header of an
    image.

    Kwargs:
        img
        coco_dset
        vidname,
        _header_extra

        gid,
        frame_index,
        dset_idstr,
        name,
        sensor_coarse,
        date_captured

    Example:
        >>> from geowatch.heuristics import *  # NOQA
        >>> img = {
        >>>     'id': 1,
        >>>     'frame_index': 0,
        >>>     'date_captured': '2020-01-01',
        >>>     'name': 'BLARG',
        >>>     'sensor_coarse': 'Sensor1',
        >>> }
        >>> kwargs = {
        >>>     'img': img,
        >>>     'dset_idstr': '',
        >>>     'name': '',
        >>>     '_header_extra': None,
        >>> }
        >>> header_lines = build_image_header_text(**kwargs)
        >>> print('header_lines = {}'.format(ub.urepr(header_lines, nl=1)))

    Example:
        >>> from geowatch.heuristics import *  # NOQA
        >>> img = {
        >>>     'id': 1,
        >>>     'frame_index': 0,
        >>>     'timestamp': '2020-01-01',
        >>>     'name': 'BLARG',
        >>> }
        >>> kwargs = {
        >>>     'img': img,
        >>>     'dset_idstr': '',
        >>>     'name': '',
        >>>     '_header_extra': None,
        >>> }
        >>> header_lines = build_image_header_text(**kwargs)
        >>> print('header_lines = {}'.format(ub.urepr(header_lines, nl=1)))
    """
    img = kwargs.get('img', {})
    _header_extra = kwargs.get('_header_extra', None)
    dset_idstr = kwargs.get('dset_idstr', '')

    def _multi_get(key, default=ub.NoParam, *dicts):
        # try to lookup from multiple dictionaries
        found = default
        for d in dicts:
            if key in d:
                found = d[key]
                break
        if found is ub.NoParam:
            raise Exception
        return found

    sensor_coarse = _multi_get('sensor_coarse', 'unknown', kwargs, img)
    # name = _multi_get('name', 'unknown', kwargs, img)

    date_captured = _multi_get('date_captured', '', kwargs, img)
    if not date_captured:
        date_captured = _multi_get('timestamp', '', kwargs, img)

    frame_index = _multi_get('frame_index', None, kwargs, img)
    gid = _multi_get('id', None, kwargs, img)
    image_name = _multi_get('name', '', kwargs, img)

    vidname = None
    if 'vidname' in kwargs:
        vidname = kwargs['vidname']
    else:
        coco_dset = kwargs.get('coco_dset', None)
        if coco_dset is not None:
            video_id = img.get('video_id', None)
            if video_id is not None:
                vidname = coco_dset.index.videos[video_id]['name']
            else:
                vidname = 'loose-images'

    image_id_parts = []
    image_id_parts.append(f'gid={gid}')
    image_id_parts.append(f'frame_index={frame_index}')
    image_id_part = ', '.join(image_id_parts)

    header_line_infos = []
    header_line_infos.append([vidname, image_id_part, _header_extra])
    header_line_infos.append([dset_idstr])
    header_line_infos.append([image_name])
    header_line_infos.append([sensor_coarse, date_captured])
    header_lines = []
    for line_info in header_line_infos:
        header_line = ' '.join([str(p) for p in line_info if p])
        header_line = header_line.replace('\\n', '\n')  # hack
        if header_line:
            header_lines.append(header_line)
    return header_lines



# TODO: this could be a specially handled frame like ASI.
# NOTE:
# QA Information Moved to ~/code/watch/geowatch/tasks/fusion/datamodules/qa_bands.py

DVC_FIND_EXPT_KWARGS = {
    'tags': 'phase2_expt', 'envvar': 'DVC_EXPT_DPATH', 'hardware': 'auto'}
DVC_FIND_DATA_KWARGS = {
    'tags': 'phase2_data', 'envvar': 'DVC_DATA_DPATH', 'hardware': 'auto'}



[docs]
def auto_expt_dvc():
    import geowatch
    return geowatch.find_dvc_dpath(**DVC_FIND_EXPT_KWARGS)




[docs]
def auto_data_dvc():
    import geowatch
    return geowatch.find_dvc_dpath(**DVC_FIND_DATA_KWARGS)



# We should be able to figure out a way to robustly introspect these
fit_param_keys = [
    'sensorchan',
    # 'channels',
    'time_steps',
    'chip_dims', 'chip_overlap', 'arch_name', 'optimizer',
    'time_sampling', 'time_span', 'true_multimodal',
    'accumulate_grad_batches', 'modulate_class_weights', 'tokenizer',
    'use_grid_positives', 'use_cloudmask', 'upweight_centers',
    'temporal_dropout', 'stream_channels', 'saliency_loss',
    'class_loss', 'init', 'learning_rate', 'decoder',
]


pred_param_keys = [
    'pred_tta_fliprot',
    'pred_tta_time',
    'pred_chip_overlap',
]


trk_param_keys = [
    'trk_thresh',
    'trk_morph_kernel',
    'trk_agg_fn',
    'trk_thresh_hysteresis',
    'trk_moving_window_size',
]


act_param_keys = [
    'trk_use_viterbi',
    'trk_thresh',
]


DSET_CODE_TO_GSD = {
    # DEPRECATE
    'Aligned-Drop3-L1': 10.0,
    'Aligned-Drop3-TA1-2022-03-10': 10.0,
    'Cropped-Drop3-TA1-2022-03-10': 1.0,
}


PHASES = ['No Activity', 'Site Preparation', 'Active Construction', 'Post Construction']


# Represents default relative weights to give to each sensor in temporal
# sampling.
SENSOR_TEMPORAL_SAMPLING_VALUES = {
    'WV': 10,
    'WV1': 9,
    'S2': 1,
    'PD': 7,
    'L8': 0.3,
    'sensor1': 11,
    'sensor2': 7,
    'sensor3': 5,
    'sensor4': 3,
}


# Hard coded values for which regions are cleared
REGION_STATUS = [
    {'region_id': 'AE_R001', 'cleared': True},
    {'region_id': 'BH_R001', 'cleared': True},
    {'region_id': 'BR_R001', 'cleared': True},
    {'region_id': 'BR_R002', 'cleared': True},
    {'region_id': 'BR_R004', 'cleared': True},
    {'region_id': 'BR_R005', 'cleared': True},
    {'region_id': 'CH_R001', 'cleared': True},
    {'region_id': 'KR_R001', 'cleared': True},
    {'region_id': 'KR_R002', 'cleared': True},
    {'region_id': 'LT_R001', 'cleared': True},
    {'region_id': 'NZ_R001', 'cleared': True},
    {'region_id': 'PE_R001', 'cleared': True},
    {'region_id': 'US_R001', 'cleared': True},
    {'region_id': 'US_R004', 'cleared': True},
    {'region_id': 'US_R005', 'cleared': True},
]


# Mapping from our sensor names to the official T&E sensor names
SENSOR_TABLE = [
    {'te_name': 'WorldView', 'kit_name': 'WV'},
    {'te_name': 'Sentinel-2', 'kit_name': 'S2'},
    {'te_name': 'Landsat 7', 'kit_name': 'LE'},
    {'te_name': 'Landsat 8', 'kit_name': 'LC'},
    {'te_name': 'Landsat 8', 'kit_name': 'L8'},
    {'te_name': 'WorldView', 'kit_name': 'WV1'},
    {'te_name': 'Planet', 'kit_name': 'PD'},
]
{r['kit_name']: r['te_name'] for r in SENSOR_TABLE}
# {r['te_name']: r['kit_name'] for r in SENSOR_TABLE[::-1]}

TE_SENSOR_NAMES = {
    'WV': 'WorldView',
    'S2': 'Sentinel-2',
    'LE': 'Landsat 7',
    'LC': 'Landsat 8',
    'L8': 'Landsat 8',
    'WV1': 'WorldView',
    'PD': 'Planet',
}

SENSOR_TRACK_PRIORITY = {
    'WorldView': 6,
    'WorldView 1': 5,
    'Planet': 4,
    'Sentinel-2': 3,
    'Landsat 8': 2,
    'Landsat 7': 1
}



[docs]
def normalize_sensors(coco_dset, sensor_warnings=True, format='te'):
    """
    Convert to / from internal representations or IAPRA sensor standards
    """
    from geowatch.heuristics import TE_SENSOR_NAMES
    sensor_dict = TE_SENSOR_NAMES
    good_sensors = set(sensor_dict.values())

    for img in coco_dset.dataset['images']:
        try:
            sensor = img['sensor_coarse']
            if sensor not in good_sensors:
                img['sensor_coarse'] = sensor_dict[sensor]
        except KeyError:
            if sensor_warnings:
                # name = img.get('name', img['file_name'])
                sensor = img.get('sensor_coarse', None)
                import warnings
                warnings.warn(
                    f'image has unknown sensor {sensor} in tag={coco_dset.tag}')

    return coco_dset




[docs]
def extract_region_id(fname):
    """
    Example:
        >>> fname = 'foobar_KR_R001_otherstuff'
        >>> extract_region_id(fname)
    """
    import kwutil
    # Find a region pattern
    pat = kwutil.util_pattern.Pattern.coerce(r'([A-Z]+_[A-Z]\d+)', 'regex')
    found = pat.search(fname)
    name = found.groups()[0]
    return name




[docs]
def register_known_fsspec_s3_buckets():
    """
    A workaround to handle requester pays information for particular s3
    endpoints. Ideally the user would be able to specify this mapping via the
    CLI, but for now lets just hack it in.

    We are not specifying the profile here, assuming that instead the user
    will use the ``AWS_DEFAULT_PROFILE`` environ.

    Note: the ``AWS_REQUEST_PAYER`` environ is only repsected by gdal, and this
    function does not impact gdal at all, so this environ needs to be set as
    well as calling this workaround.


    Ignore:
        from geowatch import heuristics
        heuristics.register_known_fsspec_s3_buckets()

        from geowatch.utils.util_fsspec import S3Path
        self = S3Path.coerce('/vsis3/usgs-landsat-ard/collection02')
        self.ls()
    """
    from geowatch.utils import util_fsspec
    util_fsspec.S3Path.register_bucket('s3://usgs-landsat-ard', requester_pays=True)
    util_fsspec.S3Path.register_bucket('s3://usgs-landsat', requester_pays=True)