r"""
Helper to fix issues in truth region / site models, particularly issues seen in
iMERIT data.
SeeAlso:
~/code/watch/geowatch/geoannots/geomodels.py
~/code/watch/geowatch/cli/validate_annotation_schemas.py
~/code/watch/geowatch/cli/fix_region_models.py
DVC_DATA_DPATH=$(geowatch_dvc --tags='phase2_data' --hardware=auto)
python -m geowatch.cli.fix_region_models \
--region_models="$DVC_DATA_DPATH"/annotations/drop6/region_models/*.geojson
python -m geowatch.cli.fix_region_models \
--region_models "$DVC_DATA_DPATH"/submodules/annotations/region_models/*.geojson
python -m geowatch.cli.fix_region_models \
--region_models \
"$DVC_DATA_DPATH"/submodules/annotations/region_models/AE_C002.geojson \
"$DVC_DATA_DPATH"/submodules/annotations/region_models/AE_C003.geojson \
"$DVC_DATA_DPATH"/submodules/annotations/region_models/PY_C001.geojson \
"$DVC_DATA_DPATH"/submodules/annotations/region_models/BR_T001.geojson \
"$DVC_DATA_DPATH"/submodules/annotations/region_models/BR_T002.geojson
python -m geowatch.cli.validate_annotation_schemas \
--region_models="$DVC_DATA_DPATH"/annotations/drop6/region_models/AE_C001.geojson
"""
#!/usr/bin/env python3
import decimal
import simplejson
import json
import scriptconfig as scfg
import ubelt as ub
[docs]
class FixRegionModelsCLI(scfg.DataConfig):
# site_models = scfg.Value(None, nargs='+', help='coercable site models')
region_models = scfg.Value(None, nargs='+', help='coercable region models')
[docs]
class fakefloat(float):
def __init__(self, value):
self._value = value
def __repr__(self):
return str(self._value)
def __str__(self):
return str(self._value)
[docs]
class DecimalEncoder(json.JSONEncoder):
# https://stackoverflow.com/questions/1960516/python-json-serialize-a-decimal-object
[docs]
def default(self, o):
if isinstance(o, decimal.Decimal):
return fakefloat(o)
return super(DecimalEncoder, self).default(o)
[docs]
def defaultencode(o):
if isinstance(o, decimal.Decimal):
# Subclass float with custom repr?
return fakefloat(o)
raise TypeError(repr(o) + " is not JSON serializable")
[docs]
def main(cmdline=1, **kwargs):
from kwgis.utils import util_gis
from geowatch.geoannots import geomodels
import rich
config = FixRegionModelsCLI.cli(cmdline=cmdline, data=kwargs, strict=True)
rich.print('config = ' + ub.urepr(config, nl=1))
# dpath = '/media/joncrall/flash1/smart_data_dvc/submodules/annotations/region_models'
region_model_fpaths = util_gis.coerce_geojson_paths(config.region_models)
_iter = iter(region_model_fpaths)
for fpath in _iter:
# if fpath.stem in {'AE_C002', 'AE_C003', 'BR_T001', 'BR_T002', 'PY_C001'}:
# continue
# if fpath.stem in {'AE_C002', 'AE_C003', 'PY_C001'}:
# continue
region_model = geomodels.RegionModel.coerce(fpath, parse_float=decimal.Decimal)
# region_model._validate_parts()
try:
region_model.validate(verbose=0)
except Exception:
print('Attempting a fix')
region_model.fixup()
fix_region_model(region_model)
region_model.fixup()
region_model.validate()
if '_C' in fpath.stem:
# Try to minimize the diff by outputing in a similar style
new_text = special_dumps(region_model)
else:
# T&E regions seem to be normal json outputs
new_text = simplejson.dumps(region_model, indent=' ')
fpath.write_text(new_text)
# old_text = fpath.read_text()
# print(new_text.split('\n')[:6])
# print(old_text.split('\n')[:6])
# import xdev
# print(xdev.difftext(old_text, new_text, colored=True))
[docs]
def special_dumps(region_model):
# import ubelt as ub
t = region_model.copy()
features = t.pop('features', None)
lines = ['{']
def oneline_dict(val):
# import json
# text = json.dumps(val, cls=DecimalEncoder)
text = simplejson.dumps(val)
# cls=DecimalEncoder)
# text = ub.urepr(v, trailsep=False, nl=0)
text = text.replace('{', '{ ')
text = text.replace('}', ' }')
text = text.replace('[', '[ ')
text = text.replace(']', ' ]')
# text = text.replace("'", '"')
return text
for k, v in t.items():
text = oneline_dict(v)
lines.append(f'"{k}": ' + text + ',')
last_feat = features[-1]
lines.append('"features": [')
for feat in features:
text = oneline_dict(feat)
if feat is last_feat:
lines.append(text)
else:
lines.append(text + ',')
lines.append(']')
lines.append('}')
text = '\n'.join(lines) + '\n'
return text
[docs]
def fix_region_model(region_model):
import mgrs
import kwimage
(lon,), (lat,) = region_model.geometry.centroid.xy
mgrs_code = mgrs.MGRS().toMGRS(lat, lon, MGRSPrecision=0)
import ubelt as ub
DRAW_BAD_REGIONS = 0
if DRAW_BAD_REGIONS:
import kwplot
kwplot.autompl()
def draw_bad_region(region_model, region_poly):
fig = kwplot.figure(fnum=1)
ax = fig.gca()
ax.cla()
region_poly.draw(setlim=1, ax=ax, alpha=0.5)
ax.set_title('Region Geometry: {}'.format(region_model.region_id))
dpath = (ub.Path.home() / 'tmpfig/bad_regions').ensuredir()
fig.savefig(dpath / region_model.region_id + '_bounds.png')
def draw_bad_site(region_model, feat, site_poly):
fig = kwplot.figure(fnum=1)
ax = fig.gca()
ax.cla()
site_id = feat['properties']['site_id']
site_poly.draw(setlim=1, ax=ax, alpha=0.5)
ax.set_title('Site Geometry: {}'.format(site_id))
dpath = (ub.Path.home() / 'tmpfig/bad_sites').ensuredir()
fig.savefig(dpath / site_id + '_bounds.png')
if region_model.geometry.geom_type == 'MultiPolygon':
region_geom = region_model.geometry
region_poly = kwimage.MultiPolygon.from_shapely(region_geom)
parts = list(region_geom.geoms)
if len(parts) == 0:
raise Exception
elif len(parts) > 1:
# HACK! Make a convex hull!
poly = region_poly.convex_hull
else:
poly = kwimage.Polygon.from_shapely(parts[0])
if DRAW_BAD_REGIONS:
draw_bad_region(region_model, region_poly)
print('Fix region header geom')
region_model.header['geometry'] = poly.to_geojson()
if region_model.geometry.geom_type == 'Polygon':
for ring in region_model.header['geometry']['coordinates']:
for pt in ring:
if len(pt) != 2:
assert pt[2] == 0
pt[:] = pt[0:2]
very_bad_feats = []
for feat in region_model.features:
props = feat['properties']
props['mgrs'] = mgrs_code
if 'socre' in props:
old_score = props.pop('socre', None)
if old_score is not None:
if 'score' not in props or props['score'] is None:
props['score'] = float(old_score)
if 'score' in props:
if isinstance(props['score'], str):
props['score'] = float(props['score'])
if 'model_cont' in props:
props['model_content'] = props.pop('model_cont')
if 'model_content' in props:
if props['model_content'] is None:
props['model_content'] = 'annotation'
if 'originator' in props:
if props['originator'] == 'imerit':
props['originator'] = 'iMERIT'
if 'orginator' in props:
props['originator'] = props.pop('orginator')
props['type'] = props['type'].replace(' ', '')
if props['type'] == 'site_summary':
if props['version'] is None:
props['version'] = region_model.header['properties']['version']
if 'version' in props:
props['version'] = props['version'].strip()
props['site_id'] = props['site_id'].replace(' ', '')
props['status'] = props['status'].strip().lower()
if 'validated' in props:
props['validated'] = props['validated'].strip()
if feat['geometry'] is None:
very_bad_feats.append(feat)
else:
if feat['geometry']['type'] == 'MultiPolygon':
print('Fix site summary geom')
site_poly = kwimage.MultiPolygon.coerce(feat['geometry'])
if DRAW_BAD_REGIONS:
draw_bad_site(region_model, feat, site_poly)
parts = list(site_poly.to_shapely().geoms)
assert len(parts) == 1
poly = kwimage.Polygon.from_shapely(parts[0])
feat['geometry'] = poly.to_geojson()
if 'cache' not in props:
props['cache'] = {}
if feat['geometry']['type'] == 'Polygon':
for ring in feat['geometry']['coordinates']:
for pt in ring:
if len(pt) != 2:
assert pt[2] == 0
pt[:] = pt[0:2]
if feat['geometry']['type'] == 'MultiPolygon':
for poly in feat['geometry']['coordinates']:
for ring in poly:
for pt in ring:
if len(pt) != 2:
raise Exception
# region_model._validate_parts()
for feat in very_bad_feats:
region_model['features'].remove(feat)
if __name__ == '__main__':
"""
CommandLine:
python ~/code/watch/geowatch/cli/fix_region_models.py
python -m geowatch.cli.fix_region_models
"""
main()