Skip to content

Commit

Permalink
experimenting with interactive maps
Browse files Browse the repository at this point in the history
  • Loading branch information
betolink committed Nov 15, 2023
1 parent 8461cfa commit 8cc5a23
Show file tree
Hide file tree
Showing 11 changed files with 2,656 additions and 1,156 deletions.
3 changes: 2 additions & 1 deletion earthaccess/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import logging
import threading
from importlib.metadata import version
from typing import Any

from .api import (
auth_environ,
Expand All @@ -16,6 +15,7 @@
open,
search_data,
search_datasets,
search_widget
)
from .auth import Auth
from .search import DataCollections, DataGranules
Expand All @@ -39,6 +39,7 @@
"DataCollections",
"Auth",
"Store",
"search_widget",
"auth_environ",
]

Expand Down
7 changes: 7 additions & 0 deletions earthaccess/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import s3fs
from fsspec import AbstractFileSystem

from .widgets import SearchWidget

from .auth import Auth
from .results import DataGranule
from .search import CollectionQuery, DataCollections, DataGranules, GranuleQuery
Expand Down Expand Up @@ -335,6 +337,11 @@ def get_edl_token() -> str:
return token


def search_widget(projection: str = "global", map: Any = None) -> Any:
sw = SearchWidget(projection=projection, map=map)
return sw


def auth_environ() -> Dict[str, str]:
auth = earthaccess.__auth__
if not auth.authenticated:
Expand Down
2 changes: 1 addition & 1 deletion earthaccess/formatters.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def _repr_collection_html() -> str:
def _repr_granule_html(granule: Any) -> str:
css_styles = _load_static_files()
css_inline = f"""<div id="{uuid4()}" style="height: 0px; display: none">
{''.join([f"<style>{style}</style>" for style in css_styles])}
{''.join([f"<style>{style}</style>" for style in css_styles])} # TODO: this has to happen just one time like other libraries do it
</div>"""
style = "max-height: 120px;"
dataviz_img = "".join(
Expand Down
11 changes: 10 additions & 1 deletion earthaccess/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,16 @@ def __init__(
self.cloud_hosted = cloud_hosted
# TODO: maybe add area, start date and all that as an instance value
self["size"] = self.size()
self.uuid = str(uuid.uuid4())
if "ConceptId" in self["umm"]["CollectionReference"]:
self["dataset-id"] = self["umm"]["CollectionReference"]["ConceptId"]
elif "ShortName" in self["umm"]["CollectionReference"]:
self["dataset-id"] = self["umm"]["CollectionReference"]["ShortName"]
elif "EntryTitle" in self["umm"]["CollectionReference"]:
self["dataset-id"] = self["umm"]["CollectionReference"]["EntryTitle"]
else:
self["dataset-id"] = self["meta"]["provider-id"] + self["meta"]["native-id"][0:4]


self.render_dict: Any
if fields is None:
self.render_dict = self
Expand Down
12 changes: 5 additions & 7 deletions earthaccess/search.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import datetime as dt
from inspect import getmembers, ismethod
from typing import Any, Dict, List, Optional, Tuple, Type
from uuid import uuid4

import dateutil.parser as parser # type: ignore
from cmr import CollectionQuery, GranuleQuery # type: ignore
Expand Down Expand Up @@ -404,12 +405,7 @@ def cloud_hosted(self, cloud_hosted: bool = True) -> Type[CollectionQuery]:
if not isinstance(cloud_hosted, bool):
raise TypeError("cloud_hosted must be of type bool")

if "short_name" in self.params:
provider = find_provider_by_shortname(
self.params["short_name"], cloud_hosted
)
if provider is not None:
self.params["provider"] = provider
self.params["cloud_hosted"] = cloud_hosted
return self

def granule_name(self, granule_name: str) -> Type[CollectionQuery]:
Expand Down Expand Up @@ -527,6 +523,7 @@ def get(self, limit: int = 2000) -> list:
results: List = []
page = 1
headers: Dict[str, str] = {}

while len(results) < limit:
params = {"page_size": page_size}
# TODO: should be in a logger
Expand Down Expand Up @@ -556,7 +553,8 @@ def get(self, limit: int = 2000) -> list:
else:
cloud = False
latest = list(
DataGranule(granule, cloud_hosted=cloud)
DataGranule(granule,
cloud_hosted=cloud)
for granule in response.json()["items"]
)
else:
Expand Down
272 changes: 272 additions & 0 deletions earthaccess/widgets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
from typing import List, Any, Tuple, Union, Dict
import geopandas
import pandas

import re

from shapely import geometry
from shapely.geometry.polygon import orient
from shapely.geometry import Polygon
import shapely

import matplotlib.pyplot as plt
import matplotlib

from .results import DataGranule

import ipyleaflet
import ipywidgets


class SearchWidget:

def __init__(self, params: Dict[str, Any] = {}, projection: str="global", map: Any = None):
self.current_geometry = None
self.active_layers = []
self.roi = []
self.overview = None
self.map = map
self.default_fields = ["size",
"concept_id",
"dataset-id",
"native-id",
"provider-id",
"_related_urls",
"_beginning_date_time",
"_ending_date_time",
"geometry"]
self.html = ipywidgets.HTML(
"""
<h4>File Info</h4>
Hover over a file footprint
"""
)
self.html.layout.margin = "0px 20px 20px 20px"

self._build_widget(params, projection)

def _build_widget(self, params: Dict[str, Any]={}, projection: str ="global") -> None:
self.map_projection = projection
self.search_params = params

if "polygon" not in self.search_params:
self.search_params["polygon"] = ""
self.dc = ipyleaflet.DrawControl(marker={"shapeOptions": {"color": "#0000FF"}},
circlemarker={},
rectangle={"shapeOptions": {"color": "#0000FF"}},)
self.dc.on_draw(self._handle_draw)
proj_dict = {
"global": {
"basemap": ipyleaflet.basemaps.Esri.WorldImagery,
"crs": ipyleaflet.projections.EPSG3857,
"map_center": [0, 0]
},
"south": {
"basemap": ipyleaflet.basemaps.NASAGIBS.BlueMarble3031,
"crs": ipyleaflet.projections.EPSG3031.NASAGIBS,
"map_center": [-90, 0]

},
"north": {
"basemap": ipyleaflet.basemaps.NASAGIBS.BlueMarble3413,
"crs": ipyleaflet.projections.EPSG3413.NASAGIBS,
"map_center": [90, 0]

}
}
if self.map is None:

self.m = ipyleaflet.Map(
center=proj_dict[self.map_projection]["map_center"],
zoom=3,
prefer_canvas=True,
basemap=proj_dict[self.map_projection]["basemap"],
crs=proj_dict[self.map_projection]["crs"],
)
else:
self.m = self.map
self.m.clear_controls()
self.m.add(ipyleaflet.ZoomControl())

self.m.add(ipyleaflet.LayersControl())
self.m.add(ipyleaflet.FullScreenControl())
self.m.add_control(
ipyleaflet.MeasureControl(
position="topleft",
active_color="orange",
primary_length_unit="kilometers",
)
)
self.m.layout.height = '600px'

self.m.add(self.dc)

return None

def _flattent_column_names(self, df: pandas.DataFrame) -> pandas.DataFrame:
df.columns = [re.sub('([A-Z]+)', r'_\1', col.split(".")[-1]).lower() for col in df.columns]
return df


def to_geopandas(self, results: List[DataGranule], fields: List[str] = []) -> geopandas.GeoDataFrame:

results_df = pandas.json_normalize(list(results), errors="ignore")
# results_df = results_df.loc[:,~results_df.columns.duplicated()].copy()
results_df = self._flattent_column_names(results_df)
if len(fields) == 0:
fields = self.default_fields

results_df = results_df.drop(columns=[col for col in results_df.columns if col not in fields])

# results_df["_related_urls"] = results_df["_related_urls"].apply( lambda r: [l["URL"] for l in r._related_urls if l["Type"] == "GET DATA"])
results_df["_related_urls"] = results_df["_related_urls"].apply( lambda r: [l for l in r if l["Type"] in ["GET DATA", "GET DATA VIA DIRECT ACCESS", "GET RELATED VISUALIZATION"]])

# Create shapely polygons for result
geometries = [self._get_shapely_object(results[index]) for index in results_df.index.to_list()]
# Convert to GeoDataframe
gdf = geopandas.GeoDataFrame(results_df, geometry=geometries, crs="EPSG:4326")
return gdf



def _orient_polygon(self, coords) -> List[Tuple[int, int]]:
polygon = orient(Polygon(coords))
return list(polygon.exterior.coords)

def _extract_geometry_info(self, geometry) -> Any:
geometry_type = geometry['type']
coordinates = geometry['coordinates']

if geometry_type in ['Polygon']:
coords = self._orient_polygon(coordinates[0])
self.search_params["polygon"]= coords
return coords
elif geometry_type in ['Point']:
self.search_params["point"] = coordinates
return coordinates
elif geometry_type in ['LineString']:
self.search_params["line"] = coordinates
return coordinates
else:
print("Unsupported geometry type:", geometry_type)
return None


def _handle_draw(self, target, action, geo_json):
for l in self.active_layers:
self.m.remove_layer(l)

self.active_layers = []

self.dc.clear()
if self.current_geometry:
self.m.remove_layer(self.current_geometry)

self.current_geometry = ipyleaflet.GeoJSON(name="ROI",
data=geo_json,
style={
"color": "red",
"opacity": 0.9,
"fillOpacity": 0.1
}
)

self.roi = self._extract_geometry_info(geo_json["geometry"])

self.m.add(self.current_geometry)


def _get_shapely_object(self, result: DataGranule) -> Union[shapely.Geometry, None]:
shape = None
try:
geo = result['umm']['SpatialExtent']['HorizontalSpatialDomain']['Geometry']
keys = geo.keys()
if 'BoundingRectangles' in keys:
bounding_rectangle = geo['BoundingRectangles'][0]
# Create bbox tuple
bbox_coords = (bounding_rectangle['WestBoundingCoordinate'],bounding_rectangle['SouthBoundingCoordinate'],
bounding_rectangle['EastBoundingCoordinate'],bounding_rectangle['NorthBoundingCoordinate'])
# Create shapely geometry from bbox
shape = geometry.box(*bbox_coords, ccw=True)
elif 'GPolygons' in keys:
points = geo['GPolygons'][0]['Boundary']['Points']
# Create shapely geometry from polygons
shape = geometry.Polygon([[p['Longitude'],p['Latitude']] for p in points])
else:
raise ValueError('Provided result does not contain bounding boxes/polygons or is incompatible.')

except Exception as e:
print(e)
pass

return shape

def display(self):
return self.m

def _calculate_bbox_center(self, bbox):
# Calculate the center coordinates
center_x = (bbox[0] + bbox[2]) / 2
center_y = (bbox[1] + bbox[3]) / 2

return [center_x, center_y]

def update_html(self, feature, **kwargs):
native_id = feature["properties"]["native-id"]
start = feature["properties"]["_beginning_date_time"]
end = feature["properties"]["_ending_date_time"]
date_range = f"Start: {start} <> End: {end}"
size = feature["properties"]["size"]
url = [f"<a href={link['URL']}>link</a>" for link in feature["properties"]["_related_urls"] if link["Type"] == "GET DATA" and link["URL"].startswith("https")]
preview = [f"<img src={link['URL']} width='200px'/>" for link in feature["properties"]["_related_urls"] if link["Type"] == "GET RELATED VISUALIZATION" and link["URL"].startswith("https")]
if len(preview)>1:
browse = "".join(preview[0:2])
elif len(preview) == 1:
browse = preview
else:
browse = ""

self.html.value = """
<h4>{}</h4>
Size: {} MB<br>
Start: {}<br>
End: {}<br>
Url: {}<br>
<div>{}</div>
""".format(
native_id, round(size, 2), start, end, url, browse
)
if self.overview:
self.m.remove_control(self.overview)
self.overview = ipyleaflet.WidgetControl(widget=self.html, position="bottomright")
self.m.add(self.overview)


def explore(self, results: List[DataGranule]) -> Any:
gdf = self.to_geopandas(results)
dataset_ids = list(gdf["dataset-id"].unique())
colors = [matplotlib.colors.to_hex(c) for c in plt.cm.tab10.colors]


for p, c in zip(dataset_ids, colors):
df = gdf.loc[gdf['dataset-id'] == p]
total_size = round(df["size"].sum() / 1024, 2)
total_granules = len(df)
g = ipyleaflet.GeoData(
geo_dataframe=df,
style={
"color": c,
"fillColor": c,
"opacity": 0.15,
"weight": 0.04,
"fillOpacity": 0.1,
"stroke-width": 0.05,
},
hover_style={"fillColor": "red", "fillOpacity": 0.6},
name=f"{p} [Count: {total_granules:,} | Size: {total_size} GB]",
)
g.on_hover(self.update_html)
self.active_layers.append(g)
self.m.add(g)

return self.m
8 changes: 8 additions & 0 deletions notebooks/bosque_primavera.json

Large diffs are not rendered by default.

Loading

0 comments on commit 8cc5a23

Please sign in to comment.