#!/usr/bin/env python3
import scriptconfig as scfg
import ubelt as ub
[docs]
class RunAllPointgenCLI(scfg.DataConfig):
method = scfg.Value("ellipse", help="pointgen method")
ignore_buffer = scfg.Value("20@10GSD", help="kwcoco ignore buffer size")
size_prior = scfg.Value("20.06063 x 20.0141229 @ 10mGSD")
time_prior = scfg.Value("1year")
[docs]
def main(cmdline=1, **kwargs):
"""
Developer script to get point-based polygons generated.
Needs cleanup.
Ignore:
DVC_DATA_DPATH=$(geowatch_dvc --tags='phase3_data' --hardware=auto)
DVC_EXPT_DPATH=$(geowatch_dvc --tags='phase3_expt' --hardware=auto)
echo "$DVC_DATA_DPATH"
echo "$DVC_EXPT_DPATH"
"""
import rich
from rich.markup import escape
config = RunAllPointgenCLI.cli(cmdline=1, strict=True)
rich.print("config = " + escape(ub.urepr(config, nl=1)))
import cmd_queue
import geowatch
data_dvc_dpath = geowatch.find_dvc_dpath(tags="phase3_data", hardware="ssd")
# expt_dvc_dpath = geowatch.find_dvc_dpath(tags='phase3_expt', hardware='auto')
kwcoco_bundle_dpath = data_dvc_dpath / "Drop8-ARA-Median10GSD-V1"
points_fpath = data_dvc_dpath / "annotations/point_based_annotations.zip"
region_dpath = (data_dvc_dpath / "annotations") / "drop8-v1/region_models"
empty_region_dpath = (
data_dvc_dpath / "annotations"
) / "drop8-v1/empty_region_models"
if 0:
# Based on T&E submodule
points_fpath = (
data_dvc_dpath
/ "submodules/annotations/supplemental_data/point_based_annotations.geojson"
)
region_dpath = data_dvc_dpath / "submodules/annotations/region_models/"
empty_region_dpath = (
data_dvc_dpath / "submodules/annotations/empty_region_models/"
)
assert points_fpath.exists()
import geopandas as gpd
# Read json text directly from the zipfile
if points_fpath.endswith(".zip"):
file = ub.zopen(points_fpath + "/" + "point_based_annotations.geojson", "r")
else:
file = open(points_fpath, "r")
with file:
points_gdf_crs84 = gpd.read_file(file)
points_gdf_crs84["region_id"] = points_gdf_crs84["site_id"].apply(
lambda s: "_".join(s.split("_")[0:2])
)
regions_with_points = sorted(points_gdf_crs84["region_id"].unique())
common_params = dict(
size_prior=config.size_prior,
time_prior=config.time_prior,
filepath_to_points=points_fpath,
)
# TODO: do a hash of the config in this output, or just specify it in config?
dest_annot_dpath = (
data_dvc_dpath / "annotations"
) / f"drop8-{config.method}-points-v1"
dest_region_dpath = (dest_annot_dpath / "region_models").ensuredir()
viz_region_dpath = (dest_annot_dpath / "region_models_viz").ensuredir()
region_model_list = list(region_dpath.glob("*.geojson"))
if 0:
# Hack to add special point-only regions
hacked_regions = ["HK_C001", "HK_C002"]
for r in hacked_regions:
region_fpath = empty_region_dpath / (r + ".geojson")
assert region_fpath.exists()
region_model_list.append(region_fpath)
# Handle sites with "xxx" patterns that may be associated with multiple regions
import kwutil
region_with_points_patterns = []
for region_id in regions_with_points:
if region_id.endswith("xxx"):
region_id = region_id.replace("xxx", "*")
region_with_points_patterns.append(region_id)
region_with_points_pattern = kwutil.util_pattern.MultiPattern.coerce(
region_with_points_patterns
)
status_rows = []
for region_path in region_model_list:
region_id = region_path.stem.strip()
kwcoco_path = (
kwcoco_bundle_dpath
/ region_id
/ (f"imgonly-{region_id}-rawbands.kwcoco.zip")
)
# has_points = region_id in regions_with_points
has_points = region_with_points_pattern.match(region_id)
has_kwcoco = kwcoco_path.exists()
row = {
"region_id": region_id,
"annotation_type": region_id.split("_")[1][0],
"has_points": has_points,
"has_kwcoco": has_kwcoco,
"kwcoco_path": kwcoco_path,
}
status_rows.append(row)
import pandas as pd
import rich
status_df = pd.DataFrame(status_rows)
status_df.value_counts("has_kwcoco")
# rows_to_process = status_df[status_df.has_kwcoco & status_df.has_points]
# rows_to_process['annotation_type'].value_counts()
# status_df[~status_df['has_kwcoco']]
rich.print(status_df)
polygen_template = ub.codeblock(
r"""
python -m geowatch.tasks.poly_from_point.predict \
--method '{method}' \
--region_id "{region_id}" \
--filepath_to_images "{filepath_to_images}" \
--filepath_to_points "{filepath_to_points}" \
--filepath_to_region "{filepath_to_region}" \
--time_prior "{time_prior}" \
--size_prior "{size_prior}" \
--filepath_output "{filepath_output}"
"""
)
regionviz_template = ub.codeblock(
"""
geowatch draw_region "{region_fpath}" --fpath "{viz_fpath}"
"""
)
force_rerun = 1
queue = cmd_queue.Queue.create(
#backend="tmux",
backend="serial",
#size=16,
)
# queue.add_header_command(
# ub.codeblock(
# """
# pyenv shell 3.10.5
# source $(pyenv prefix)/envs/pyenv-geowatch/bin/activate
# """
# )
# )
for row in status_rows:
region_id = row["region_id"]
has_kwcoco = row["has_kwcoco"]
has_points = row["has_points"]
kwcoco_path = row["kwcoco_path"]
filepath_output = dest_region_dpath / f"{region_id}.geojson"
if has_kwcoco and has_points:
fmtkw = {
"method": config.method,
"region_id": region_id,
"filepath_to_images": None,
"filepath_to_region": region_path,
"filepath_output": filepath_output,
**common_params,
}
polygen_cmd = polygen_template.format(**fmtkw)
if not filepath_output.exists() or force_rerun:
polygen_job = queue.submit(polygen_cmd, name=f"PolyGen_{region_id}")
else:
polygen_job = None
DO_DRAW = 1
if DO_DRAW:
viz_fpath = viz_region_dpath / f"point_viz_{region_id}.png"
if not viz_fpath.exists() or force_rerun:
regionviz_cmd = regionviz_template.format(
region_fpath=filepath_output,
viz_fpath=viz_fpath,
)
queue.submit(
regionviz_cmd, name=f"Viz_{region_id}", depends=polygen_job
)
final_out_kwcoco_fpath = (
kwcoco_bundle_dpath
/ region_id
/ (f"pointannv4_ellipse-{region_id}-rawbands.kwcoco.zip")
)
if config.ignore_buffer:
reproject_out_kwcoco_fpath = (
kwcoco_bundle_dpath
/ region_id
/ (f"_preignore-pointannv4_ellipse-{region_id}-rawbands.kwcoco.zip")
)
else:
reproject_out_kwcoco_fpath = final_out_kwcoco_fpath
reproject_cmd = ub.codeblock(
rf"""
geowatch reproject_annotations \
--src "{kwcoco_path}" \
--dst "{reproject_out_kwcoco_fpath}" \
--region_models="{filepath_output}"
"""
)
if not reproject_out_kwcoco_fpath.exists() or force_rerun:
reproject_job = queue.submit(
reproject_cmd, name=f"reproject_{region_id}", depends=polygen_job
)
else:
reproject_job = None
if config.ignore_buffer:
ignore_buffer_kwcoco_fpath = final_out_kwcoco_fpath
add_kwcoco_ignore_buffer_command = ub.codeblock(
rf"""
python -m geowatch.cli.coco_add_ignore_buffer \
--ignore_buffer_size '{config.ignore_buffer}' \
--src "{reproject_out_kwcoco_fpath}" \
--dst "{ignore_buffer_kwcoco_fpath}"
"""
)
if not ignore_buffer_kwcoco_fpath.exists() or force_rerun:
add_ignore_buffer_job = queue.submit(
add_kwcoco_ignore_buffer_command,
name=f"add_ignore_buffer_{region_id}",
depends=reproject_job,
)
else:
add_ignore_buffer_job = None # NOQA
queue.print_graph()
queue.print_commands()
#queue.run()
print(
ub.codeblock(
r"""
Next Step is to run this splitgen:
export DST_DVC_DATA_DPATH=$(geowatch_dvc --tags='phase3_data' --hardware=ssd)
export DST_BUNDLE_DPATH=$DST_DVC_DATA_DPATH/Drop8-ARA-Median10GSD-V1
python -m geowatch.cli.queue_cli.prepare_splits \
--src_kwcocos "$DST_BUNDLE_DPATH"/*/pointannv4_ellipse-*-rawbands.kwcoco.zip \
--dst_dpath "$DST_BUNDLE_DPATH" \
--suffix=rawbands_pointannv4_ellipse \
--backend=tmux --tmux_workers=2 \
--splits split6 \
--run=1
"""
)
)
if __name__ == "__main__":
"""
CommandLine:
python -m geowatch.tasks.poly_from_point.run_all_pointgen
"""
main()