"""Model object dedicated to AOI selection."""
import json
from pathlib import Path, PurePosixPath
from typing import Dict, List, Optional, Tuple, Union
import ee
import geopandas as gpd
import pandas as pd
import pygadm
import pygaul
import traitlets as t
from eeclient.client import EESession
from ipyleaflet import GeoJSON
from typing_extensions import Self
from pysepal import color
from pysepal.frontend import styles as ss
from pysepal.message import ms
from pysepal.model import Model
from pysepal.scripts import utils as su
from pysepal.scripts.gee_interface import GEEInterface
__all__ = ["AoiModel"]
[docs]
class AoiModel(Model):
# ###########################################################################
# ### dataset const ###
# ###########################################################################
MAPPING: Path = Path(__file__).parents[1] / "data" / "gaul_iso.json"
"GAUL -> ISO-3 mapping of country code"
ASSET_SUFFIX: str = "aoi_"
"The suffix to identify the asset in GEE"
# ###########################################################################
# ### const methods ###
# ###########################################################################
CUSTOM: str = ms.aoi_sel.custom
"The word displayed for custom method in the relevant lang"
ADMIN: str = ms.aoi_sel.administrative
"The word displayed for admin method in the relevant lang"
METHODS: Dict[str, Dict[str, str]] = {
"ADMIN0": {"name": ms.aoi_sel.adm[0], "type": ADMIN},
"ADMIN1": {"name": ms.aoi_sel.adm[1], "type": ADMIN},
"ADMIN2": {"name": ms.aoi_sel.adm[2], "type": ADMIN},
"SHAPE": {"name": ms.aoi_sel.vector, "type": CUSTOM},
"DRAW": {"name": ms.aoi_sel.draw, "type": CUSTOM},
"POINTS": {"name": ms.aoi_sel.points, "type": CUSTOM},
"ASSET": {"name": ms.aoi_sel.asset, "type": CUSTOM},
}
"The word displayed for all selection methods in the relevant lang"
# ###########################################################################
# ### widget related traitlets ###
# ###########################################################################
method: t.Unicode = t.Unicode(None, allow_none=True).tag(sync=True)
"str: the currently selected method"
point_json: t.Dict = t.Dict(None, allow_none=True).tag(sync=True)
"dict: information that will be use to transform the csv into a gdf"
vector_json: t.Dict = t.Dict(None, allow_none=True).tag(sync=True)
"dict: information that will be use to transform the vector file into a gdf"
geo_json: t.Dict = t.Dict(None, allow_none=True).tag(sync=True)
"dict: the drawn geojson shape"
admin: t.Unicode = t.Unicode(None, allow_none=True).tag(sync=True)
"The admin number selected"
asset_name: t.Unicode = t.Unicode(None, allow_none=True).tag(sync=True)
"The asset name (only for GEE model)"
asset_json: t.Dict = t.Dict(None, allow_none=True).tag(sync=True)
"The asset json description (only for GEE model)"
name: t.Unicode = t.Unicode(None, allow_none=True).tag(sync=True)
"The name of the file to create (used only in drawn shaped)"
object_set: t.Int = t.Int(0).tag(sync=True)
"An integer that is incremented each time the object is set.."
# ###########################################################################
# ### model parameters ###
# ###########################################################################
gee: bool = True
"either or not the model is bound to gee"
folder: Union[str, Path] = ""
"The folder name used in GEE related component, mainly used for debugging"
default_vector: Optional[Union[str, Path]] = None
"The default vector file that will be used to produce the gdf. need to be readable by fiona and/or GDAL/OGR"
default_admin: Optional[str] = None
"The default administrative area in GADM or GAUL norm"
default_asset: Optional[str] = None
"The default asset name, need to point to a readable FeatureCollection"
# ###########################################################################
# ### model outputs ###
# ###########################################################################
dst_asset_id: str = ""
"The exported asset id"
selected_feature: Optional[Union[ee.Feature, gpd.GeoDataFrame]] = None
"The Feature associated with a query"
gdf: Optional[gpd.GeoDataFrame] = None
"The geodataframe corresponding to the selected AOI"
feature_collection: Optional[ee.FeatureCollection] = None
"The feature Collection generated by the parameters (only for GEE models)"
ipygeojson: Optional[GeoJSON] = None
"The representation of the AOI as a ipyleaflet layer"
[docs]
def __init__(
self,
gee: bool = True,
vector: Optional[Union[str, Path]] = None,
asset: Optional[Union[str, Path]] = None,
admin: Optional[str] = None,
folder: Union[str, Path] = "",
gee_session: Optional[EESession] = None,
gee_interface: Optional[GEEInterface] = None,
) -> None:
"""An Model object dedicated to the sorage and the manipulation of aoi.
It is meant to be used with the AoiView object (embedded in the AoiTile).
By using this you will be able to provide your application with aoi as an ee_object
or a gdf, depending if you activated the ee binding or not.
The class also provide insight on your aoi geometry.
Args:
gee: whether or not the aoi selector should be using the EarthEngine binding
vector: the path to the default vector object
admin: the administrative code of the default selection. Need to be GADM if ee==False and GAUL 2015 if ee==True.
asset: the default asset. Can only work if ee==True
folder: the init GEE asset folder where the asset selector should start looking (debugging purpose)
gee_session: the Earth Engine session to use for the GEE binding (deprecated in favor of gee_interface)
gee_interface: a shared GEEInterface instance. If provided, takes precedence over gee_session
Raises:
ValueError: if both gee_session and gee_interface are provided
.. deprecated:: 2.3.2
'asset_name' will be used as variable to store 'ASSET' method info. To get the destination saved asset id, please use 'dst_asset_id' variable.
.. versionadded:: 3.0.0
Added gee_interface parameter for sharing GEEInterface instances across components.
"""
super().__init__()
if gee_session and gee_interface:
raise ValueError(
"Cannot provide both gee_session and gee_interface. "
"Use gee_interface for shared instances or gee_session for component-specific sessions."
)
# the ee retated information
self.gee = gee
if gee:
su.init_ee()
if gee_interface:
self.gee_interface = gee_interface
else:
self.gee_interface = GEEInterface(gee_session)
self.folder = str(folder) if folder else self.gee_interface.get_folder()
# set default values
self.set_default(vector, admin, asset)
[docs]
def set_default(
self,
vector: Optional[Union[str, Path]] = None,
admin: Optional[str] = None,
asset: Optional[Union[str, Path]] = None,
) -> Self:
"""Set the default value of the object and create a gdf/feature_collection out of it.
Args:
vector: the default vector file that will be used to produce the gdf. need to be readable by fiona and/or GDAL/OGR
admin: the default administrative area in GADM or GAUL norm
asset: the default asset name, need to point to a readable FeatureCollection
"""
# save the default values
self.default_vector = vector
self.default_asset = self.asset_name = str(asset) if asset else None
self.asset_json = {"pathname": asset, "column": "ALL", "value": None} if asset else None
self.default_admin = self.admin = admin
# cast the vector to json
self.vector_json = (
{"pathname": str(vector), "column": "ALL", "value": None} if vector else None
)
# cast the asset to json
self.asset_json = {"pathname": asset, "column": "ALL", "value": None} if asset else None
# set the default gdf if possible
if self.vector_json is not None:
self.set_object("SHAPE")
elif self.admin:
self.set_object("ADMIN0") # any level will work
elif self.asset_json is not None:
self.set_object("ASSET")
return self
[docs]
def set_object(self, method: str = "") -> Self:
"""Set the object (gdf/featurecollection) based on the model inputs.
The method can be manually overwritten by setting the ``method`` parameter.
Args:
method: a model loading method
"""
# clear the model output if existing
self.clear_output()
# overwrite self.method
self.method = method or self.method
if self.method in ["ADMIN0", "ADMIN1", "ADMIN2"]:
self._from_admin(self.admin)
elif self.method == "POINTS":
self._from_points(self.point_json)
elif self.method == "SHAPE":
self._from_vector(self.vector_json)
elif self.method == "DRAW":
self._from_geo_json(self.geo_json)
elif self.method == "ASSET":
self._from_asset(self.asset_json)
else:
raise Exception(ms.aoi_sel.exception.no_inputs)
self.object_set += 1
return self
def _from_asset(self, asset_json: dict) -> Self:
"""Set the ee.FeatureCollection output from an existing asset."""
if not (asset_json["pathname"]):
raise Exception(ms.aoi_sel.exception.no_asset)
if asset_json["column"] != "ALL":
if asset_json["value"] is None:
raise Exception(ms.aoi_sel.exception.no_value)
# set the name
self.name = Path(asset_json["pathname"]).stem.replace(self.ASSET_SUFFIX, "")
self.asset_name = asset_json["pathname"]
ee_col = ee.FeatureCollection(asset_json["pathname"])
if asset_json["column"] != "ALL":
column = asset_json["column"]
value = asset_json["value"]
ee_col = ee_col.filterMetadata(column, "equals", value)
self.name = f"{self.name}_{column}_{value}"
# set the feature collection
self.feature_collection = ee_col
return self
def _from_points(self, point_json: dict) -> Self:
"""Set the object output from a csv json.
Args:
point_json: the geo_interface description of the points
"""
if not all(point_json.values()):
raise Exception(ms.aoi_sel.exception.incomplete)
# cast the pathname to pathlib Path
point_file = Path(point_json["pathname"])
# check that the columns are well set
values = [v for v in point_json.values()]
if not len(values) == len(set(values)):
raise Exception(ms.aoi_sel.exception.duplicate_key)
# create the gdf
df = pd.read_csv(point_file, sep=None, engine="python")
self.gdf = gpd.GeoDataFrame(
df,
crs="EPSG:4326",
geometry=gpd.points_from_xy(df[point_json["lng_column"]], df[point_json["lat_column"]]),
)
# set the name
self.name = point_file.stem
if self.gee:
# transform the gdf to ee.FeatureCollection
self.feature_collection = ee.FeatureCollection(self.gdf.__geo_interface__)
# export as a GEE asset
self.export_to_asset()
return self
def _from_vector(self, vector_json: dict) -> Self:
"""Set the object output from a vector json.
Args:
vector_json: the dict describing the vector file, and column filter
"""
if not (vector_json["pathname"]):
raise Exception(ms.aoi_sel.exception.no_file)
if vector_json["column"] != "ALL":
if vector_json["value"] is None:
raise Exception(ms.aoi_sel.exception.no_value)
# cast the pathname to pathlib Path
vector_file = Path(vector_json["pathname"])
# create the gdf
self.gdf = gpd.read_file(vector_file).to_crs("EPSG:4326")
# set the name using the file stem
self.name = vector_file.stem
# filter it if necessary
if vector_json["value"] is not None:
self.gdf = self.gdf[self.gdf[vector_json["column"]] == vector_json["value"]]
self.name = f"{self.name}_{vector_json['column']}_{vector_json['value']}"
if self.gee:
# transform the gdf to ee.FeatureCollection
self.feature_collection = su.geojson_to_ee(self.gdf.__geo_interface__)
# export as a GEE asset
self.export_to_asset()
return self
def _from_geo_json(self, geo_json: dict) -> Self:
"""Set the gdf output from a geo_json.
Args:
geo_json: the __geo_interface__ dict of a geometry drawn on the map
"""
if not geo_json:
raise Exception(ms.aoi_sel.exception.no_draw)
# remove the style property from geojson as it's not recognize by geopandas and gee
for feat in geo_json["features"]:
if "style" in feat["properties"]:
del feat["properties"]["style"]
# create the gdf
self.gdf = gpd.GeoDataFrame.from_features(geo_json).set_crs(epsg=4326)
# normalize the name
self.name = su.normalize_str(self.name)
if self.gee:
# transform the gdf to ee.FeatureCollection
self.feature_collection = su.geojson_to_ee(self.gdf.__geo_interface__)
# export as a GEE asset
self.export_to_asset()
else:
# save the geojson in downloads
path = Path("~", "downloads", "aoi").expanduser()
path.mkdir(
exist_ok=True, parents=True
) # if nothing have been run the downloads folder doesn't exist
self.gdf.to_file(path / f"{self.name}.geojson", driver="GeoJSON")
return self
def _from_admin(self, admin: str) -> Self:
"""Set the object according to the given an administrative code in the GADM/GAUL codes.
Args:
admin: the admin code corresponding to FAO GAUL (if gee) or GADM
"""
if not admin:
raise Exception(ms.aoi_sel.exception.no_admlyr)
# get the data from either the pygaul or the pygadm libs
if self.gee:
self.feature_collection = pygaul.Items(admin=admin)
# get the iso3_code directly from GAUL 2024 dataset
feature = self.feature_collection.first()
properties = self.gee_interface.get_info(feature.toDictionary(feature.propertyNames()))
# GAUL 2024 includes iso3_code directly, fallback to mapping for disputed areas
iso = properties.get("iso3_code", "")
if not iso or (
isinstance(iso, str) and iso.startswith("x")
): # 'x' prefix means disputed/unknown
gaul0_code = str(properties.get("gaul0_code", ""))
iso = json.loads(self.MAPPING.read_text()).get(gaul0_code, "UNK")
# GAUL 2024 uses lowercase column names: gaul0_name, gaul1_name, gaul2_name
names = [value for prop, value in properties.items() if "_name" in prop]
# generate the name from the columns
names = [su.normalize_str(name) for name in names]
names[0] = iso
self.name = "_".join(names)
else:
self.gdf = pygadm.Items(admin=admin)
# generate the name from the columns
r = self.gdf.iloc[0]
names = [su.normalize_str(r[c]) for c in self.gdf.columns if "NAME" in c]
names[0] = r.GID_0[:3]
self.name = "_".join(names)
return self
[docs]
def clear_output(self) -> Self:
"""Clear the output of the aoi selector without changing the traits and/or the parameters."""
# reset the outputs
self.gdf = None
self.feature_collection = None
self.ipygeojson = None
self.selected_feature = None
self.dst_asset_id = None
return self
[docs]
def clear_attributes(self) -> Self:
"""Return all attributes to their default state.
Note:
Set the default setting as current object.
"""
# keep the default
admin = self.default_admin
vector = self.default_vector
asset = self.default_asset
# delete all the traits
[setattr(self, attr, None) for attr in self.trait_names() if attr not in ["object_set"]]
# reset the outputs
self.clear_output()
# reset the default
self.set_default(vector, admin, asset)
return self
[docs]
def get_columns(self) -> List[str]:
"""Retrieve the columns or variables from self excluding geometries and gee index.
Returns:
sorted list of column names
"""
if self._gdf is None and not self.feature_collection:
raise Exception(ms.aoi_sel.exception.no_gdf)
if self.gee:
aoi_ee = ee.Feature(self.feature_collection.first())
columns = self.gee_interface.get_info(aoi_ee.propertyNames())
list_ = [col for col in columns if col not in ["system:index", "Shape_Area"]]
else:
list_ = list(set(["geometry"]) ^ set(self.gdf.columns.to_list()))
return sorted(list_)
[docs]
def get_fields(self, column: str) -> List[str]:
"""Retrieve the fields from a column.
Args:
A column name to query over the asset
Returns:
sorted list of fields value
"""
if self._gdf is None and not self.feature_collection:
raise Exception(ms.aoi_sel.exception.no_gdf)
if self.gee:
fields = self.feature_collection.distinct(column).aggregate_array(column)
list_ = self.gee_interface.get_info(fields)
else:
list_ = self.gdf[column].to_list()
return sorted(list_)
[docs]
def get_selected(self, column: str, field: str) -> Union[ee.Feature, gpd.GeoDataFrame]:
"""Select an ee object based on selected column and field.
Args:
column: the selected column in the dataset
field: the value to search in the selected column
Returns:
The Feature associated with the query
"""
if self._gdf is None and not self.feature_collection:
raise Exception(ms.aoi_sel.exception.no_gdf)
if self.gee:
selected_feature = self.feature_collection.filterMetadata(column, "equals", field)
else:
selected_feature = self.gdf[self.gdf[column] == field]
return selected_feature
[docs]
def total_bounds(self) -> Tuple[float, float, float, float]:
"""Reproduce the behaviour of the total_bounds method from geopandas.
Returns:
minxx, miny, maxx, maxy
"""
# use _gdf to evaluate the condition to avoid accessing the gdf property
if self._gdf is None and not self.feature_collection:
raise ValueError(ms.aoi_sel.exception.no_gdf)
if self.gee:
coords = self.gee_interface.get_info(
self.feature_collection.geometry().bounds().coordinates().get(0)
)
bounds = [coords[0][0], coords[0][1], coords[2][0], coords[2][1]]
else:
bounds = self.gdf.total_bounds.tolist()
return [round(bound, 4) for bound in bounds]
[docs]
def export_to_asset(self) -> Self:
"""Export the feature_collection as an asset (only for ee model)."""
asset_name = self.ASSET_SUFFIX + self.name
asset_id = str(PurePosixPath(self.folder, asset_name))
self.dst_asset_id = asset_id
# check if the table already exist
if self.gee_interface.get_asset(asset_id, not_exists_ok=True):
return self
# check if the task is running
if self.gee_interface.is_running(asset_name):
return self
# run the task
task_config = {
"collection": self.feature_collection,
"description": asset_name,
"asset_id": asset_id,
}
self.gee_interface.export_table_to_asset(**task_config)
return self
[docs]
async def export_to_asset_async(self) -> Self:
"""Export the feature_collection as an asset (only for ee model)."""
asset_name = self.ASSET_SUFFIX + self.name
asset_id = str(PurePosixPath(self.folder, asset_name))
self.dst_asset_id = asset_id
# check if the table already exist
if self.gee_interface.get_asset(asset_id, not_exists_ok=True):
return self
# check if the task is running
if self.gee_interface.is_running(asset_name):
return self
# run the task
task_config = {
"collection": self.feature_collection,
"description": asset_name,
"asset_id": asset_id,
}
await self.gee_interface.export_table_to_asset_async(**task_config)
[docs]
def get_ipygeojson(self, style: Optional[dict] = None) -> GeoJSON:
"""Converts current geopandas object into ipyleaflet GeoJSON.
Args:
style: the predefined style of the aoi. It's by default using a "success" ``sepal_ui.color`` with 0.5 transparent fill color. It can be completely replace by a fully qualified `style dictionary <https://ipyleaflet.readthedocs.io/en/latest/layers/geo_json.html>`__. Use the ``sepal_ui.color`` object to define any color to remain compatible with light and dark theme.
Returns:
The geojson layer of the aoi gdf, ready to use in a Map
"""
# This function aims to work in the same way in both gee and non-gee mode
# It's why we use the gdf property to evaluate the condition
if self.gdf is None:
raise Exception(ms.aoi_sel.exception.no_gdf)
# read the data from geojson and add the name as a property of the shape
# useful when handler are added from ipyleaflet
# Convert to regular GeoDataFrame to avoid issues with pygadm subclasses
# This is necessary because pygadm 0.5.3 has a bug with pandas 2.3+ where
# the __init__ method contains a DataFrame comparison that fails during
# geopandas' internal to_json() process. Converting to plain GeoDataFrame
# first avoids triggering the buggy pygadm constructor.
# See: https://github.com/12rambau/pygadm/issues/81
gdf = gpd.GeoDataFrame(self.gdf)
data = json.loads(gdf.to_json())
for f in data["features"]:
f["properties"]["name"] = self.name
# adapt the style to the theme
if style is None:
style = json.loads((ss.JSON_DIR / "aoi.json").read_text())
style.update(color=color.primary, fillColor=color.primary)
# create a GeoJSON object
# attribution="SEPAL(c)" is not recognized yet
# https://github.com/jupyter-widgets/ipyleaflet/issues/847
self.ipygeojson = GeoJSON(data=data, style=style, name="aoi")
return self.ipygeojson
@property
def gdf(self):
"""Get the geodataframe associated with the AOI."""
if self.gee:
if self._gdf is not None:
# This happens when it comes from vector or geojson
return self._gdf
if not self.feature_collection:
return None
self._load_gdf()
return self._gdf
@gdf.setter
def gdf(self, value):
"""Set the gdf value. Used mainly to reset the gdf value."""
self._gdf = value
def _load_gdf(self):
"""Return a geodataframe from a feature collection."""
features = self.gee_interface.get_info(self.feature_collection)["features"]
self._gdf = gpd.GeoDataFrame.from_features(features).set_crs(epsg=4326)
if self.method in ["ADMIN0", "ADMIN1", "ADMIN2"]:
# GAUL 2024 includes iso3_code directly, fallback to mapping for disputed areas
iso = self._gdf.iso3_code.unique()[0] if "iso3_code" in self._gdf.columns else None
if not iso or (isinstance(iso, str) and iso.startswith("x")):
gaul_country = str(self._gdf.gaul0_code.unique()[0])
iso = json.loads(self.MAPPING.read_text()).get(gaul_country, "UNK")
self._gdf["ISO"] = iso