diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..da51ada --- /dev/null +++ b/.env.example @@ -0,0 +1 @@ +MAPILLARY_CLIENT_TOKEN = "MY_MAPILLARY_CLIENT_TOKEN" \ No newline at end of file diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 1cec038..2382d1c 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -23,12 +23,11 @@ jobs: cache: "pip" cache-dependency-path: | pyproject.toml - requirements.txt - name: Install dependencies run: | python -m pip install --upgrade pip - pip install -r requirements.txt + pip install . - name: Lint package run: | diff --git a/.gitignore b/.gitignore index f87dce6..de76bff 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# .env file has secrets +.env + # exclude data from source control by default data/ diff --git a/Makefile b/Makefile index 772a002..2e2f92a 100644 --- a/Makefile +++ b/Makefile @@ -21,7 +21,7 @@ endif ## Install Python Dependencies requirements: test_environment $(PYTHON_INTERPRETER) -m pip install -U pip setuptools wheel - $(PYTHON_INTERPRETER) -m pip install -r requirements.txt + $(PYTHON_INTERPRETER) -m pip install . ## Make Dataset data: requirements diff --git a/README.md b/README.md index 175cf3c..fac674e 100644 --- a/README.md +++ b/README.md @@ -38,12 +38,14 @@ If you are interested in joining the project, please check out [`CONTRIBUTING.md - You can use the shortcut command `make create_environment`. 2. Install requirements. ```bash - pip install -r requirements.txt + pip install . ``` - You can use the shortcut command `make requirements` to do the same thing. 3. Put your raw OpenStreetMaps road vector data in `data/raw`. - Your raw data should be geospatial vector features of type `LineString`. The features must include standard OpenStreetMap keys `osm_id` and `highway`. - For example, download [`Three_Rivers_Michigan_USA_line.zip`](https://drive.google.com/file/d/1fpI4I5KP2WyVD5PeytW_hoXZswOt0dwA/view?usp=drive_link) to `data/raw/Three_Rivers_Michigan_USA_line.zip`. Note that this Google Drive link is only accessible to approved project members. +4. Make a copy of the `.env.example` file, removing the `.example` from the end of the filename. + - To download images from [Mapillary](https://www.mapillary.com/) you will need to create a (free) account and replace `MY_MAPILLARY_CLIENT_TOKEN` in the `.env` file with your own token. See the "Setting up API access and obtaining a client token" section on this [Mapillary help page](https://help.mapillary.com/hc/en-us/articles/360010234680-Accessing-imagery-and-data-through-the-Mapillary-API). You only need to enable READ access scope on your token. ### 1. Sample points from roads data @@ -69,6 +71,19 @@ python -m src.create_points --help Both the input files and output files support any file formats that geopandas supports, so long as it can correctly infer the format from the file extension. See the [geopandas documentation](https://geopandas.org/en/stable/docs/user_guide/io.html) for more details. +### 2. Download an image for each point + +We want to fetch a 360 image for each sampled point. You can use the [`download_images.py`](./src/download_images.py) script to find the closest image to each point and download it to local file storage. + +#### Example + +For example, if you're continuing from the example in previous steps and already generated a `Three_Rivers_Michigan_USA_points.gpkg` file: + +```bash +python -m src.download_images data/interim/Three_Rivers_Michigan_USA_points.gpkg data/raw/mapillary +``` + + ## Project Organization ├── LICENSE @@ -83,10 +98,7 @@ Both the input files and output files support any file formats that geopandas su │ the creator's initials, and a short `-` delimited description, e.g. │ `1.0-jqp-initial-data-exploration`. │ - ├── requirements.txt <- The requirements file for reproducing the analysis environment, e.g. - │ generated with `pip freeze > requirements.txt` - │ - ├── setup.py <- makes project pip installable (pip install -e .) so src can be imported + ├── pyproject.toml <- Single source of truth for dependencies, build system, etc └── src <- Source code for use in this project.    └── __init__.py <- Makes src a Python module diff --git a/pyproject.toml b/pyproject.toml index a34cb9f..8b1c0d1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,22 @@ authors = [{ name = "The American National Red Cross" }] classifiers = [ ] +dependencies = [ + "folium", + "geopandas", + "mapclassify", + "matplotlib", + "numpy", + "ruff", + "requests", + "shapely", + "typer", + "pandas", + "geopy", + "tqdm", + "python-dotenv" +] + ## TOOLS ## [tool.ruff] diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index bab4b5f..0000000 --- a/requirements.txt +++ /dev/null @@ -1,14 +0,0 @@ -# local package --e . - -folium -geopandas -jupyterlab -mapclassify -matplotlib -notebook -numpy -ruff -shapely -typer -typing_extensions; python_version<'3.9' diff --git a/src/__init__.py b/src/__init__.py index e69de29..bf6bd6c 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -0,0 +1,3 @@ +from dotenv import load_dotenv + +load_dotenv() diff --git a/src/download_images.py b/src/download_images.py new file mode 100644 index 0000000..2055566 --- /dev/null +++ b/src/download_images.py @@ -0,0 +1,173 @@ +import logging +from os import getenv +from pathlib import Path +from typing import Annotated, Optional + +import geopandas as gpd +from geopy.distance import ELLIPSOIDS, distance +import numpy as np +from pandas import Series +from requests import Session +from requests.adapters import HTTPAdapter +from requests.exceptions import HTTPError, RetryError +from tqdm.contrib.concurrent import thread_map +from typer import Argument, Option, Typer + +log = logging.getLogger(__name__) +log.setLevel(logging.INFO) +log.addHandler(logging.StreamHandler()) +app = Typer() + + +class Mapillary: + url = "https://graph.mapillary.com/images" + + def __init__( + self, + access_token, + basepath=Path(Path(__file__).parent.parent, "data/raw/mapillary"), + ): + self.access_token = access_token + self.basepath = basepath + self.basepath.mkdir(parents=True, exist_ok=True) + self.client = Session() + self.client.mount( + "https://", + HTTPAdapter(max_retries=3), + ) + self.downloaded_images = set() + + def get_image_from_coordinates(self, latitude: int, longitude: int) -> dict: + log.debug("Get Image From Coordinates: %s, %s", latitude, longitude) + results = { + "image_lat": None, + "image_lon": None, + "residual": None, + "image_id": None, + "image_path": None, + } + + try: + response = self.client.get( + self.url, + params={ + "access_token": self.access_token, + "fields": "id,thumb_original_url,geometry", + "is_pano": "true", + "bbox": self._bounds(latitude, longitude), + }, + ) + response.raise_for_status() + except HTTPError or RetryError as e: + log.error(e) + return results + + images = response.json()["data"] + log.debug("Successfully Retrieved Image Data: %s", images) + if len(images) == 0: + log.debug( + "No Images in Bounding Box: %s", self._bounds(latitude, longitude) + ) + return results + + closest = 0 + closest_distance = np.inf + + for i, image in enumerate( + filter(lambda img: img["id"] not in self.downloaded_images, images) + ): + image_coordinates = ( + image["geometry"]["coordinates"][1], + image["geometry"]["coordinates"][0], + ) + residual = distance( + (latitude, longitude), image_coordinates, ellipsoid=ELLIPSOIDS["WGS-84"] + ) + if residual < closest_distance: + closest = i + closest_distance = residual + + image = images[closest] + log.debug("Closest Image: %s", image["id"]) + results["image_id"] = image["id"] + results["image_lat"] = image["geometry"]["coordinates"][1] + results["image_lon"] = image["geometry"]["coordinates"][0] + results["residual"] = closest_distance.m + image_url = image["thumb_original_url"] + results["image_path"] = self._download_image(image_url, results["image_id"]) + self.downloaded_images.add(results["image_id"]) + + return results + + def _bounds(self, latitude, longitude) -> str: + left = longitude - 10 / 111_111 + bottom = latitude - 10 / 111_111 + right = longitude + 10 / 111_111 + top = latitude + 10 / 111_111 + return f"{left},{bottom},{right},{top}" + + def _download_image(self, image_url, image_id) -> Optional[Path]: + log.debug("Downloading Image: %s", image_id) + try: + response = self.client.get(image_url, stream=True) + response.raise_for_status() + except HTTPError or RetryError as e: + log.error(e) + return None + image_content = response.content + log.debug("Successfully Retrieved Image: %s", image_id) + image_path = Path(self.basepath, f"{image_id}.jpeg") + log.debug("Writing Image To: %s", image_path) + + if not image_path.is_file(): + with open(image_path, "wb") as img: + img.write(image_content) + log.debug("Successfully Wrote Image: %s", image_path) + + return image_path + + +@app.command() +def main( + points_file: Annotated[ + Path, + Argument(help="Path to Input Points File"), + ], + images_path: Annotated[ + Path, + Argument(help="Folder to Write Image Data"), + ] = Path(Path(__file__).parent.parent, "data/raw/mapillary"), + verbose: Annotated[bool, Option] = False, +): + if verbose: + log.setLevel(logging.DEBUG) + + mapillary = Mapillary(getenv("MAPILLARY_CLIENT_TOKEN"), images_path) + gdf = gpd.read_file(points_file) + gdf["image_lat"] = Series() + gdf["image_lon"] = Series() + gdf["residual"] = Series() + gdf["image_id"] = Series() + gdf["image_path"] = Series() + + def download_image_for_gdf_row(row: int): + latitude = gdf.at[row, "geometry"].y + longitude = gdf.at[row, "geometry"].x + results = mapillary.get_image_from_coordinates(latitude, longitude) + gdf.at[row, "image_lat"] = results["image_lat"] + gdf.at[row, "image_lon"] = results["image_lon"] + gdf.at[row, "residual"] = results["residual"] + gdf.at[row, "image_id"] = results["image_id"] + gdf.at[row, "image_path"] = str(results["image_path"]) + + log.info("Downloading %s Images...", len(gdf)) + thread_map(download_image_for_gdf_row, range(len(gdf))) + log.info(gdf.head(20)) + + gdf.to_file( + Path(points_file.parent, f"{points_file.stem}_images.gpkg"), driver="GPKG" + ) + + +if __name__ == "__main__": + app()