r"""
Helpers for reading data downloaded from digital globe
Notes:
The data in the Core3D dataset is public and can be rehosted.
https://spacenet.ai/core3d/
AWS_PROFILE=iarpa aws s3 ls s3://spacenet-dataset/Hosted-Datasets/CORE3D-Public-Data/Satellite-Images/
AWS_PROFILE=iarpa aws s3 ls s3://spacenet-dataset/Hosted-Datasets/CORE3D-Public-Data/Tiled-Examples-for-Urban-3D-Challenge-Comparisons/02_Master/challenge-inputs/
AWS_PROFILE=iarpa aws s3 cp \
s3://spacenet-dataset/Hosted-Datasets/CORE3D-Public-Data/Tiled-Examples-for-Urban-3D-Challenge-Comparisons/02_Master/challenge-inputs/RIC_Tile_000_DSM.tif RIC_Tile_000_DSM.tif
AWS_PROFILE=iarpa aws s3 cp \
s3://spacenet-dataset/Hosted-Datasets/CORE3D-Public-Data/Tiled-Examples-for-Urban-3D-Challenge-Comparisons/02_Master/challenge-inputs/RIC_Tile_000_DTM.tif RIC_Tile_000_DTM.tif
AWS_PROFILE=iarpa aws s3 cp \
s3://spacenet-dataset/Hosted-Datasets/CORE3D-Public-Data/Tiled-Examples-for-Urban-3D-Challenge-Comparisons/02_Master/challenge-inputs/RIC_Tile_000_RGB.tif RIC_Tile_000_RGB.tif
Requirements:
pip install xmltodict
pip install pyshp
pip install cogeotiff
"""
from os.path import exists
from os.path import join
import ubelt as ub
import xmltodict
from os.path import dirname, abspath
[docs]
class DigitalGlobeBundle(ub.NiceRepr):
"""
Data structure to organize information in digital globe bundles
TODO: need public digital globe demodata for a doctest
Maybe we can grab them from here?
https://www.maxar.com/product-samples
https://ard.maxar.com/samples#v5/
https://spacenet.ai/core3d/
Requirements:
pip isntall pyshp
Ignore:
# This has a different format than our stuff... bleh..
sample_zip_fpath = ub.grabdata('https://maxar-marketing.s3.amazonaws.com/product-samples/Rome_Colosseum_2022-03-22_WV03_HD.zip', hash_prefix='2a99cea2b37bed9b5867fa21a1bd')
from kwcoco.util import util_archive
archive = util_archive.Archive(sample_zip_fpath)
dpath = (ub.Path(sample_zip_fpath).parent / 'MaxarSample').ensuredir()
metadata_fpath = list(dpath.glob('*.MAN'))[0]
archive.extractall(dpath)
delivery_metadata_fpath = dpath / '050012575010_01/050012575010_01_README.XML'
self = DigitalGlobeBundle(delivery_metadata_fpath)
"""
def __init__(self, delivery_metadata_fpath, pointer=None, autobuild=True):
self.data = {
'delivery_metadata_fpath': delivery_metadata_fpath,
'product_metas': None,
'pointer': pointer,
}
if autobuild:
self.parse_delivery_metadata()
def __nice__(self):
return self.data['delivery_metadata_fpath']
[docs]
@classmethod
def from_pointer(cls, pointer, **kw):
"""
Args:
pointer (str): a path to any file inside a digital globe
bundle. We will search for the DeliveryMetadata.xml data.
Ignore:
pointer = '/home/joncrall/data/dvc-repos/smart_watch_dvc/drop0/KR-Pyeongchang-WV/_assets/20170907_a_KRP_011777481_10_0/011777481010_01_003/011777481010_01/011777481010_01_P001_MUL/17SEP07021826-M1BS-011777481010_01_P001.TIF'
pointer = '/home/joncrall/data/dvc-repos/smart_watch_dvc/drop0/KR-Pyeongchang-WV/_assets/20170907_a_KRP_011777481_10_0/011777481010_01_003/011777481010_01/011777481010_01_P001_PAN/17SEP07021826-P1BS-011777481010_01_P001.TIF'
cls = DigitalGlobeBundle
self = DigitalGlobeBundle.from_pointer(pointer)
for meta in self.data['product_metas']:
meta['sensorVehicle']
dict_list = self.data['product_metas']
print(varried['sensorVehicle'])
"""
dpath = abspath(pointer)
delivery_fpath = search_path_ancestors(
path=dpath, fname='DeliveryMetadata.xml')
if delivery_fpath is None:
raise Exception('cannot find DG DeliveryMetadata.xml')
self = cls(delivery_fpath, pointer=pointer, **kw)
return self
[docs]
@classmethod
def coerce(cls, key, **kw):
try:
self = cls.pointer(key, **kw)
except Exception:
self = None
return self
[docs]
def search_path_ancestors(path, fname, stop_fname=None, max_steps=1000):
"""
Search path and all of its containing folders for a file name ``fname``.
Args:
path (str): directory to start the search
fname (str): path to search for
stop_fname (str): stop if we find a file with this name.
"""
import itertools as it
dpath = path
found = None
for idx in it.count():
fpath = join(dpath, fname)
if exists(fpath):
found = fpath
break
if stop_fname is not None:
stop_fpath = join(dpath, stop_fname)
if exists(stop_fpath):
raise Exception('found stop fname, cannot find {}'.format(fname))
dpath_next = dirname(dpath)
if idx > max_steps:
raise Exception('too many steps, cannot find {}'.format(fname))
if dpath_next == dpath:
raise Exception('reached the root, cannot find {}'.format(fname))
dpath = dpath_next
return found
[docs]
def ensure_unicode(text):
r"""
Casts bytes into utf8 (mostly for python2 compatibility).
Args:
text (str | bytes):
text to ensure is decoded as unicode
Returns:
str
References:
.. [SO_12561063] http://stackoverflow.com/questions/12561063/extract-data-from-file
Example:
>>> import codecs # NOQA
>>> assert ensure_unicode('my ünicôdé strįng') == 'my ünicôdé strįng'
>>> assert ensure_unicode('text1') == 'text1'
>>> assert ensure_unicode('text1'.encode('utf8')) == 'text1'
>>> assert ensure_unicode('text1'.encode('utf8')) == 'text1'
>>> assert (codecs.BOM_UTF8 + 'text»¿'.encode('utf8')).decode('utf8')
"""
if isinstance(text, str):
return text
elif isinstance(text, bytes):
return text.decode('utf8')
else: # nocover
raise ValueError('unknown input type {!r}'.format(text))