"""
Functional utilities for generating basic components of the demodata.
"""
import ubelt as ub
import kwimage
import kwarray
import numpy as np
import geopandas as gpd
from shapely import geometry
import datetime as datetime_mod
from datetime import datetime as datetime_cls
[docs]
def random_geo_points(num, rng=None):
"""
Generate a uniformly random longitude, latitude.
Based on logic described in [SO68298220]_.
Args:
num (int) : number of random points to generate
rng : random seed or number generator
Returns:
ndarray:
An Nx2 array with columns corresponding to CRS84 points
(i.e. longitude, latitude)
References:
.. [SO68298220] https://stackoverflow.com/questions/68298220/random-geocoords
Example:
>>> from geowatch.demo.metrics_demo.demo_utils import * # NOQA
>>> latlon = random_geo_points(num=3, rng=0)
>>> print(ub.urepr(latlon, precision=4))
np.array([[ 16.1579, 5.6025],
[-27.4843, 25.4916],
[ 52.5219, 11.8603]], dtype=np.float64)
Example:
>>> # This example demonstrates that the points are randomly spread out
>>> from geowatch.demo.metrics_demo.demo_utils import * # NOQA
>>> # xdoctest: +REQUIRES(--show)
>>> # xdoctest: +REQUIRES(module:kwplot)
>>> import kwplot
>>> kwplot.autompl()
>>> # Create random geopoints and place them into a GeoDataFrame
>>> latlons = random_geo_points(300)
>>> crs84 = get_crs84()
>>> pts_gdf = gpd.GeoDataFrame(geometry=[geometry.Point(p) for p in latlons], crs=crs84)
>>> # Plot the map of the world in the background
>>> wld_map_gdf = gpd.read_file(
>>> gpd.datasets.get_path('naturalearth_lowres')
>>> ).to_crs(crs84)
>>> ax = wld_map_gdf.plot()
>>> pts_gdf.plot(ax=ax, color='orange', alpha=0.8)
>>> kwplot.show_if_requested()
"""
rng = kwarray.ensure_rng(rng)
u0, u1 = rng.rand(2, num)
rad_lat = np.arcsin(2 * u0 - 1.0) # angle with Equator - from +pi/2 to -pi/2
rad_lon = (2 * u1 - 1) * np.pi # longitude in radians - from -pi to +pi
rad_lonlat = np.stack([rad_lon, rad_lat], axis=1)
lonlat = np.rad2deg(rad_lonlat)
return lonlat
[docs]
def random_geo_polygon(max_rt_area=10_000, rng=None):
"""
Creates a random polygon of a "reasonable size" for a region in CRS84
Args:
max_rt_area (float):
Maximum root area (i.e. sqrt(area)) of the polygon in meters.
The generated polygon will usually have a root-area that is between
a factor of 0.1 and 0.6 of this number. Defaults to 10,000 meters.
rng : random state or seed
Returns:
kwimage.Polygon : polygon in CRS84 space
Example:
>>> from geowatch.demo.metrics_demo.demo_utils import * # NOQA
>>> max_rt_area = 10000
>>> region_poly = random_geo_polygon(max_rt_area, rng=321)
>>> geo_poly = region_poly.round(6).to_geojson()
>>> print('geo_poly = {}'.format(ub.urepr(geo_poly, nl=-1)))
geo_poly = {
'type': 'Polygon',
'coordinates': [
[
[-151.983974, 50.530122],
[-151.982547, 50.520243],
[-151.951446, 50.506376],
[-151.925555, 50.514069],
[-151.930728, 50.543657],
[-151.941043, 50.541291],
[-151.983974, 50.530122]
]
]
}
Example:
>>> # This example demonstrates the distribution of random polygons
>>> from geowatch.demo.metrics_demo.demo_utils import * # NOQA
>>> # xdoctest: +REQUIRES(--show)
>>> # xdoctest: +REQUIRES(module:kwplot)
>>> import kwplot
>>> kwplot.autompl()
>>> # Create random polygons and place them into a GeoDataFrame
>>> rng = kwarray.ensure_rng(32321)
>>> # Make the polygons very large, so they show up at world scale
>>> max_rt_area = 1_000_000
>>> polys = [random_geo_polygon(max_rt_area, rng=rng) for _ in range(10)]
>>> crs84 = get_crs84()
>>> poly_gdf = gpd.GeoDataFrame(geometry=[p.to_shapely() for p in polys], crs=crs84)
>>> # Plot the map of the world in the background
>>> wld_map_gdf = gpd.read_file(
>>> gpd.datasets.get_path('naturalearth_lowres')
>>> ).to_crs(crs84)
>>> ax = wld_map_gdf.plot()
>>> poly_gdf.plot(ax=ax, color='limegreen', alpha=0.8)
>>> poly_gdf.centroid.plot(ax=ax, color='orangered', alpha=0.5)
>>> kwplot.show_if_requested()
"""
# Choose a random centroid of the polygon
lon, lat = random_geo_points(num=1, rng=rng).ravel()
# Determine what UTM zone this random centroid falls in
epsg_code = utm_epsg_from_latlon(lat, lon)
crs84 = get_crs84()
pt_crs84_gdf = gpd.GeoDataFrame(geometry=[
geometry.Point(lon, lat)], crs=crs84)
# Project the random lon/lat point into a UTM space
pt_utm_gdf = pt_crs84_gdf.to_crs(epsg_code)
utm_xy = pt_utm_gdf['geometry'].iloc[0].xy
utm_x = utm_xy[0][0]
utm_y = utm_xy[1][0]
# Create a random polygon with an area between 0 and 1 (usually in 0.1-0.6)
poly = kwimage.Polygon.random(rng=rng)
# centered it at (0, 0)
poly = poly.translate(-np.array(poly.centroid))
# Scale the polygon to the appropriate area in UTM space, and translate it
# into the randomly chosen center position.
poly_utm = poly.scale(max_rt_area).translate((utm_x, utm_y))
utm_gdf = gpd.GeoDataFrame(geometry=[poly_utm.to_shapely()],
crs=pt_utm_gdf.crs)
crs84_gdf = utm_gdf.to_crs(crs84)
poly_crs84 = crs84_gdf['geometry'].iloc[0]
region_poly = kwimage.Polygon.from_shapely(poly_crs84)
return region_poly
[docs]
def random_time_sequence(min_date_iso, max_date_iso, num_observations, rng=None):
"""
Generate a list of random timestamps between the specified dates
Args:
min_date_iso (str): minimum possible date
max_date_iso (str): maximum possible date
num_observations (int): number of dates to generate
rng: random state or seed
Returns:
List[datetime_cls]: sampled dates in the UTC timezone
Example:
>>> from geowatch.demo.metrics_demo.demo_utils import * # NOQA
>>> min_date_iso = '1996-06-23'
>>> max_date_iso = '2017-10-27'
>>> num_observations = 3
>>> obs_sequence = random_time_sequence(min_date_iso, max_date_iso, num_observations, rng=9320)
>>> print('obs_sequence = {}'.format(ub.urepr(obs_sequence, nl=1)))
obs_sequence = [
datetime.datetime(1997, 9, 17, 20, 49, 7, 888653, tzinfo=datetime.timezone.utc),
datetime.datetime(2001, 6, 29, 17, 43, 36, 108233, tzinfo=datetime.timezone.utc),
datetime.datetime(2009, 4, 2, 10, 26, 54, 429200, tzinfo=datetime.timezone.utc),
]
Ignore:
# In Docker, test that the code provides the same number in different
# locale.
export TZ="Africa/Lusaka"
rm -rf /etc/localtime
ln -s /usr/share/zoneinfo/Africa/Lusaka /etc/localtime
python -c "import datetime as datetime_mod; print(datetime_mod.datetime.fromisoformat('1996-06-23').replace(tzinfo=datetime_mod.timezone.utc).timestamp())"
"""
rng = kwarray.ensure_rng(rng)
from kwutil import util_time
min_date = util_time.coerce_datetime(min_date_iso)
max_date = util_time.coerce_datetime(max_date_iso)
# min_date = datetime_cls.fromisoformat(min_date_iso)
# max_date = datetime_cls.fromisoformat(max_date_iso)
# # Assume user is specifying UTC by default.
# if min_date.tzinfo is None:
# min_date = min_date.replace(tzinfo=datetime_mod.timezone.utc)
# if max_date.tzinfo is None:
# max_date = max_date.replace(tzinfo=datetime_mod.timezone.utc)
min_ts = min_date.timestamp()
max_ts = max_date.timestamp()
obs_timestamps = rng.rand(num_observations) * (max_ts - min_ts) + min_ts
obs_sequence = [
# util_time.coerce_datetime(ts)
datetime_cls.fromtimestamp(ts, tz=datetime_mod.timezone.utc)
for ts in sorted(obs_timestamps)
]
return obs_sequence
[docs]
def utm_epsg_from_latlon(lat, lon):
"""
Find a reasonable UTM CRS for a given lat / lon
The purpose of this function is to get a reasonable CRS for computing
distances in meters. If the region of interest is very large, this may not
be valid.
See [SE190198]_ and [SE365584]_.
Args:
lat (float): degrees in latitude
lon (float): degrees in longitude
Returns:
int : the ESPG code of the UTM zone
References:
.. [SE190198] https://gis.stackexchange.com/questions/190198/how-to-get-appropriate-crs-for-a-position-specified-in-lat-lon-coordinates
.. [SE365584] https://gis.stackexchange.com/questions/365584/convert-utm-zone-into-epsg-code
Example:
>>> from geowatch.demo.metrics_demo.demo_utils import * # NOQA
>>> epsg_code = utm_epsg_from_latlon(0, 0)
>>> print('epsg_code = {!r}'.format(epsg_code))
epsg_code = 32631
"""
import utm
# easting, northing, zone_num, zone_code = utm.from_latlon(min_lat, min_lon)
zone_num = utm.latlon_to_zone_number(lat, lon)
# Construction of EPSG code from UTM zone number
south = lat < 0
epsg_code = 32600
epsg_code += int(zone_num)
if south is True:
epsg_code += 100
return epsg_code
[docs]
def project_gdf_to_local_utm(gdf_crs84, max_utm_zones=None):
"""
Find the local UTM zone for a geo data frame and project to it.
Assumes geometry is in CRS-84.
All geometry in the GDF must be in the same UTM zone.
Args:
gdf_crs84 (geopandas.GeoDataFrame):
The data with CRS-84 geometry to project into a local UTM
max_utm_zones (int | None):
If the data spans more than this many UTM zones, error.
Otherwise, we take the first one.
Returns:
geopandas.GeoDataFrame
Example:
>>> import geopandas as gpd
>>> import kwarray
>>> import kwimage
>>> rng = kwarray.ensure_rng(0)
>>> # Gen lat/lons between 0 and 1, which is in UTM zone 31N
>>> gdf_crs84 = gpd.GeoDataFrame({'geometry': [
>>> kwimage.Polygon.random(rng=rng).to_shapely(),
>>> kwimage.Polygon.random(rng=rng).to_shapely(),
>>> kwimage.Polygon.random(rng=rng).to_shapely(),
>>> ]}, crs=get_crs84())
>>> gdf_utm = project_gdf_to_local_utm(gdf_crs84)
>>> assert gdf_utm.crs.name == 'WGS 84 / UTM zone 31N'
Example:
>>> import geopandas as gpd
>>> import kwarray
>>> import kwimage
>>> # If the data is too big for a single UTM zone,
>>> rng = kwarray.ensure_rng(0)
>>> gdf_crs84 = gpd.GeoDataFrame({'geometry': [
>>> kwimage.Polygon.random(rng=rng).scale(90).to_shapely(),
>>> kwimage.Polygon.random(rng=rng).scale(90).to_shapely(),
>>> kwimage.Polygon.random(rng=rng).scale(90).to_shapely(),
>>> ]}, crs=get_crs84())
>>> import pytest
>>> with pytest.raises(ValueError):
>>> gdf_utm = project_gdf_to_local_utm(gdf_crs84, max_utm_zones=1)
"""
epsg_zones = []
for geom_crs84 in gdf_crs84.geometry:
epsg_zone = find_local_meter_epsg_crs(geom_crs84)
epsg_zones.append(epsg_zone)
if not ub.allsame(epsg_zones):
unique_utm = set(epsg_zones)
if max_utm_zones is not None and len(unique_utm) > max_utm_zones:
raise ValueError(ub.paragraph(
'''
Input data spanned multiple UTM zones.
This is currently not allowed. {}
'''
).format(unique_utm))
# TODO: if there are more than one is there a way to get "the best one?"
epsg_zone = epsg_zones[0]
gdf_utm = gdf_crs84.to_crs(epsg_zone)
return gdf_utm
[docs]
def find_local_meter_epsg_crs(geom_crs84):
"""
Find the "best" meter based CRS for a smallish geographic region.
Currently this only returns UTM zones. Might be better to return an Albers
projection if the geometry spans more than one UTM zone.
Args:
geom_crs84 (shapely.geometry.base.BaseGeometry):
shapely geometry in CRS84 (lon/lat wgs84)
Returns:
int: epsg code
References:
[1] https://gis.stackexchange.com/questions/148181/choosing-projection-crs-for-short-distance-based-analysis/148187
[2] http://projfinder.com/
TODO:
- [ ] Better UTM zone intersection
- [ ] Fix edge cases
Example:
>>> import kwimage
>>> geom_crs84 = kwimage.Polygon.random().translate(-0.5).scale((180, 90)).to_shapely()
>>> epsg_zone = find_local_meter_epsg_crs(geom_crs84)
"""
lonmin, latmin, lonmax, latmax = geom_crs84.bounds
# Hack: this doesnt work on boundries (or for larger regions)
# correct way of doing this would be lookup candiate CRS zones,
# and find the one with highest intersection area weighted by distance
# to the center of the valid region.
latmid = (latmin + latmax) / 2
lonmid = (lonmin + lonmax) / 2
candidate_utm_codes = [
utm_epsg_from_latlon(latmin, lonmin),
utm_epsg_from_latlon(latmax, lonmax),
utm_epsg_from_latlon(latmax, lonmin),
utm_epsg_from_latlon(latmin, lonmax),
utm_epsg_from_latlon(latmid, lonmid),
]
epsg_zone = ub.argmax(ub.dict_hist(candidate_utm_codes))
return epsg_zone
[docs]
@ub.memoize
def get_crs84():
"""
Constructing the CRS84 is slow.
This function memoizes it so it only happens once.
Returns:
pyproj.crs.crs.CRS
"""
from pyproj import CRS
crs84 = CRS.from_user_input('OGC:CRS84')
# crs84 = CRS.from_user_input('crs84')
return crs84