From a9148a0cd45942a2c0a03644c4bd22ad963472b1 Mon Sep 17 00:00:00 2001 From: Evan Tung Date: Wed, 28 Feb 2024 21:49:00 -0500 Subject: [PATCH 1/7] feat: Add Mapillary Image downloader, add GeoPandas Parser, replace requirements.txt with dependencies in pyproject.toml --- .github/workflows/tests.yml | 3 +- Makefile | 2 +- README.md | 7 +- pyproject.toml | 12 +++ requirements.txt | 14 ---- src/data_parsing/__init__.py | 0 src/data_parsing/geopandas.py | 22 +++++ src/mapillary.py | 149 ++++++++++++++++++++++++++++++++++ 8 files changed, 187 insertions(+), 22 deletions(-) delete mode 100644 requirements.txt create mode 100644 src/data_parsing/__init__.py create mode 100644 src/data_parsing/geopandas.py create mode 100644 src/mapillary.py diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 1cec038..2382d1c 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -23,12 +23,11 @@ jobs: cache: "pip" cache-dependency-path: | pyproject.toml - requirements.txt - name: Install dependencies run: | python -m pip install --upgrade pip - pip install -r requirements.txt + pip install . - name: Lint package run: | diff --git a/Makefile b/Makefile index 772a002..2e2f92a 100644 --- a/Makefile +++ b/Makefile @@ -21,7 +21,7 @@ endif ## Install Python Dependencies requirements: test_environment $(PYTHON_INTERPRETER) -m pip install -U pip setuptools wheel - $(PYTHON_INTERPRETER) -m pip install -r requirements.txt + $(PYTHON_INTERPRETER) -m pip install . ## Make Dataset data: requirements diff --git a/README.md b/README.md index 175cf3c..45db414 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ If you are interested in joining the project, please check out [`CONTRIBUTING.md - You can use the shortcut command `make create_environment`. 2. Install requirements. ```bash - pip install -r requirements.txt + pip install . ``` - You can use the shortcut command `make requirements` to do the same thing. 3. Put your raw OpenStreetMaps road vector data in `data/raw`. @@ -83,10 +83,7 @@ Both the input files and output files support any file formats that geopandas su │ the creator's initials, and a short `-` delimited description, e.g. │ `1.0-jqp-initial-data-exploration`. │ - ├── requirements.txt <- The requirements file for reproducing the analysis environment, e.g. - │ generated with `pip freeze > requirements.txt` - │ - ├── setup.py <- makes project pip installable (pip install -e .) so src can be imported + ├── pyproject.toml <- Single source of truth for dependencies, build system, etc └── src <- Source code for use in this project.    └── __init__.py <- Makes src a Python module diff --git a/pyproject.toml b/pyproject.toml index a34cb9f..ab99e1a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,18 @@ authors = [{ name = "The American National Red Cross" }] classifiers = [ ] +dependencies = [ + "folium", + "geopandas", + "mapclassify", + "matplotlib", + "numpy", + "ruff", + "requests", + "shapely", + "typer" +] + ## TOOLS ## [tool.ruff] diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index bab4b5f..0000000 --- a/requirements.txt +++ /dev/null @@ -1,14 +0,0 @@ -# local package --e . - -folium -geopandas -jupyterlab -mapclassify -matplotlib -notebook -numpy -ruff -shapely -typer -typing_extensions; python_version<'3.9' diff --git a/src/data_parsing/__init__.py b/src/data_parsing/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/data_parsing/geopandas.py b/src/data_parsing/geopandas.py new file mode 100644 index 0000000..8ad398d --- /dev/null +++ b/src/data_parsing/geopandas.py @@ -0,0 +1,22 @@ +import logging +from pathlib import Path + +import geopandas as gpd +from shapely.geometry import Point + +log = logging.getLogger(__name__) +log.setLevel(logging.INFO) +log.addHandler(logging.StreamHandler()) + + +class GeoPandasParser: + def __init__(self, gpkg_path: Path): + self.gdf = gpd.read_file(gpkg_path) + log.debug(self.gdf) + + def get_coordinates(self) -> list[Point]: + log.info("Get Coordinates") + points = self.gdf["geometry"] + log.debug(points) + + return list(points) diff --git a/src/mapillary.py b/src/mapillary.py new file mode 100644 index 0000000..6eec25f --- /dev/null +++ b/src/mapillary.py @@ -0,0 +1,149 @@ +import json +import logging +from multiprocessing import Pool +from pathlib import Path +from typing import Annotated, Optional + +from requests import RequestException, Session +from requests.adapters import HTTPAdapter +from shapely import Point +from typer import Argument, Option, Typer + +from src.data_parsing.geopandas import GeoPandasParser + +log = logging.getLogger(__name__) +log.setLevel(logging.INFO) +log.addHandler(logging.StreamHandler()) +app = Typer() + + +class Mapillary: + url = "https://graph.mapillary.com/images" + + def __init__( + self, + access_token, + basepath=Path(Path(__file__).parent.parent, "data/raw/mapillary"), + ): + self.access_token = access_token + self.basepath = basepath + self.basepath.mkdir(parents=True, exist_ok=True) + self.client = Session() + self.client.mount("https://", HTTPAdapter(max_retries=3)) + + def get_image_from_coordinates(self, point: Point) -> dict: + longitude, latitude = point.x, point.y + log.info("Get Image From Coordinates: %s, %s", latitude, longitude) + try: + response = self.client.get( + self.url, + params={ + "access_token": self.access_token, + "fields": "id,thumb_original_url", + "is_pano": "true", + "bbox": self._bounds(latitude, longitude), + }, + ) + response.raise_for_status() + except RequestException as e: + log.error(e) + return { + "latitude": latitude, + "longitude": longitude, + "image_id": None, + "image_path": None, + } + + images = response.json()["data"] + log.debug("Successfully Retrieved Image Data: %s", images) + if len(images) == 0: + log.debug( + "No Images in Bounding Box: %s", self._bounds(latitude, longitude) + ) + return { + "latitude": latitude, + "longitude": longitude, + "image_id": None, + "image_path": None, + } + + image_id = images[0]["id"] + image_url = images[0]["thumb_original_url"] + image_path = self._download_image(image_url, image_id) + + return { + "latitude": latitude, + "longitude": longitude, + "image_id": image_id, + "image_path": str(image_path), + } + + def _bounds(self, latitude, longitude) -> str: + left = longitude - 10 / 111_111 + bottom = latitude - 10 / 111_111 + right = longitude + 10 / 111_111 + top = latitude + 10 / 111_111 + return f"{left},{bottom},{right},{top}" + + def _download_image(self, image_url, image_id) -> Optional[Path]: + log.info("Downloading Image: %s", image_id) + try: + response = self.client.get(image_url, stream=True) + response.raise_for_status() + except RequestException as e: + log.error(e) + return None + image_content = response.content + log.debug("Successfully Retrieved Image: %s", image_id) + image_path = Path(self.basepath, f"{image_id}.jpeg") + log.debug("Writing Image To: %s", image_path) + + if not image_path.is_file(): + with open(image_path, "wb") as img: + img.write(image_content) + log.debug("Successfully Wrote Image: %s", image_path) + + return image_path + + +@app.command() +def main( + client_token: Annotated[ + str, + Argument(help="Mapillary Client Token from Developer Portal"), + ], + points_file: Annotated[ + Path, + Argument( + help=( + "Path to Input Points File" + ) + ), + ], + image_path: Annotated[ + Path, + Argument(help="Folder to Write Image Data"), + ] = Path(Path(__file__).parent.parent, "data/raw/mapillary"), + verbose: Annotated[bool, Option] = False, +): + if verbose: + log.setLevel(logging.DEBUG) + + if points_file.suffix == ".gpkg": + parser = GeoPandasParser(points_file) + else: + raise ValueError(f"Unsupported File Extension: {points_file.suffix}") + + mapillary = Mapillary(client_token, image_path) + coordinates = parser.get_coordinates() + + with Pool() as pool: + image_data = list(pool.map(mapillary.get_image_from_coordinates, coordinates)) + log.debug(image_data) + + with open(Path(image_path, "image_data.json"), "w") as f: + json.dump(image_data, f) + + +if __name__ == "__main__": + app() From 7178660a6bdc53f47ee8a3c2a06cd9a0a8a95d82 Mon Sep 17 00:00:00 2001 From: Evan Tung Date: Wed, 28 Feb 2024 22:01:51 -0500 Subject: [PATCH 2/7] fix: run Ruff on mapillary.py --- src/mapillary.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/mapillary.py b/src/mapillary.py index 6eec25f..aab53ce 100644 --- a/src/mapillary.py +++ b/src/mapillary.py @@ -114,11 +114,7 @@ def main( ], points_file: Annotated[ Path, - Argument( - help=( - "Path to Input Points File" - ) - ), + Argument(help=("Path to Input Points File")), ], image_path: Annotated[ Path, From 83f6a35ef847c27d3595ca54b7c45a0a220a9d6d Mon Sep 17 00:00:00 2001 From: Dan Joseph Date: Sat, 2 Mar 2024 11:19:27 -0500 Subject: [PATCH 3/7] docs for step 2 --- README.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/README.md b/README.md index 45db414..1703613 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,20 @@ python -m src.create_points --help Both the input files and output files support any file formats that geopandas supports, so long as it can correctly infer the format from the file extension. See the [geopandas documentation](https://geopandas.org/en/stable/docs/user_guide/io.html) for more details. +### 2. Download an image for each point + +We want to fetch a 360 image for each sampled point. You can use the [`mapillary.py`](./src/mapillary.py) script to find the closest image to each point and download it to local file storage. + +#### Example + +For example, if you're continuing from the example in previous steps and already generated a `Three_Rivers_Michigan_USA_points.gpkg` file: + +```bash +python -m src.mapillary "[MAPILLARY_CLIENT_TOKEN]" data/interim/Three_Rivers_Michigan_USA_points.gpkg data/interim/images/ +``` + +To download images from [Mapillary](https://www.mapillary.com/) you will need to create a (free) account and replace `[MAPILLARY_CLIENT_TOKEN]` with your own token. See the "Setting up API access and obtaining a client token" section on this [Mapillary help page](https://help.mapillary.com/hc/en-us/articles/360010234680-Accessing-imagery-and-data-through-the-Mapillary-API). You only need to enable READ access scope on your token. + ## Project Organization ├── LICENSE From 751bae01489e5c7f5071b0be6aa13faf0ee366ec Mon Sep 17 00:00:00 2001 From: Evan Tung Date: Thu, 7 Mar 2024 21:49:40 -0500 Subject: [PATCH 4/7] fix: Parse GPKG in single function, ensure unique images --- .env | 1 + pyproject.toml | 6 +- src/__init__.py | 3 + src/data_parsing/__init__.py | 0 src/data_parsing/geopandas.py | 22 ----- src/download_images.py | 173 ++++++++++++++++++++++++++++++++++ src/mapillary.py | 145 ---------------------------- 7 files changed, 182 insertions(+), 168 deletions(-) create mode 100644 .env delete mode 100644 src/data_parsing/__init__.py delete mode 100644 src/data_parsing/geopandas.py create mode 100644 src/download_images.py delete mode 100644 src/mapillary.py diff --git a/.env b/.env new file mode 100644 index 0000000..1cc4185 --- /dev/null +++ b/.env @@ -0,0 +1 @@ +MAPILLARY_CLIENT_TOKEN = "MLY|___" \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index ab99e1a..8b1c0d1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,11 @@ dependencies = [ "ruff", "requests", "shapely", - "typer" + "typer", + "pandas", + "geopy", + "tqdm", + "python-dotenv" ] ## TOOLS ## diff --git a/src/__init__.py b/src/__init__.py index e69de29..bf6bd6c 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -0,0 +1,3 @@ +from dotenv import load_dotenv + +load_dotenv() diff --git a/src/data_parsing/__init__.py b/src/data_parsing/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/data_parsing/geopandas.py b/src/data_parsing/geopandas.py deleted file mode 100644 index 8ad398d..0000000 --- a/src/data_parsing/geopandas.py +++ /dev/null @@ -1,22 +0,0 @@ -import logging -from pathlib import Path - -import geopandas as gpd -from shapely.geometry import Point - -log = logging.getLogger(__name__) -log.setLevel(logging.INFO) -log.addHandler(logging.StreamHandler()) - - -class GeoPandasParser: - def __init__(self, gpkg_path: Path): - self.gdf = gpd.read_file(gpkg_path) - log.debug(self.gdf) - - def get_coordinates(self) -> list[Point]: - log.info("Get Coordinates") - points = self.gdf["geometry"] - log.debug(points) - - return list(points) diff --git a/src/download_images.py b/src/download_images.py new file mode 100644 index 0000000..818c750 --- /dev/null +++ b/src/download_images.py @@ -0,0 +1,173 @@ +import logging +from os import getenv +from pathlib import Path +from typing import Annotated, Optional + +import geopandas as gpd +from geopy.distance import ELLIPSOIDS, distance +import numpy as np +from pandas import Series +from requests import Session +from requests.adapters import HTTPAdapter +from requests.exceptions import HTTPError, RetryError +from tqdm.contrib.concurrent import thread_map +from typer import Argument, Option, Typer + +log = logging.getLogger(__name__) +log.setLevel(logging.INFO) +log.addHandler(logging.StreamHandler()) +app = Typer() + + +class Mapillary: + url = "https://graph.mapillary.com/images" + + def __init__( + self, + access_token, + basepath=Path(Path(__file__).parent.parent, "data/raw/mapillary"), + ): + self.access_token = access_token + self.basepath = basepath + self.basepath.mkdir(parents=True, exist_ok=True) + self.client = Session() + self.client.mount( + "https://", + HTTPAdapter(max_retries=3), + ) + self.downloaded_images = np.array([], dtype=str) + + def get_image_from_coordinates(self, latitude: int, longitude: int) -> dict: + log.debug("Get Image From Coordinates: %s, %s", latitude, longitude) + results = { + "image_lat": None, + "image_lon": None, + "residual": None, + "image_id": None, + "image_path": None, + } + + try: + response = self.client.get( + self.url, + params={ + "access_token": self.access_token, + "fields": "id,thumb_original_url,geometry", + "is_pano": "true", + "bbox": self._bounds(latitude, longitude), + }, + ) + response.raise_for_status() + except HTTPError or RetryError as e: + log.error(e) + return results + + images = response.json()["data"] + log.debug("Successfully Retrieved Image Data: %s", images) + if len(images) == 0: + log.debug( + "No Images in Bounding Box: %s", self._bounds(latitude, longitude) + ) + return results + + closest = 0 + closest_distance = np.inf + + for i, image in enumerate( + filter(lambda img: not np.isin(img["id"], self.downloaded_images), images) + ): + image_coordinates = ( + image["geometry"]["coordinates"][1], + image["geometry"]["coordinates"][0], + ) + residual = distance( + (latitude, longitude), image_coordinates, ellipsoid=ELLIPSOIDS["WGS-84"] + ) + if residual < closest_distance: + closest = i + closest_distance = residual + + image = images[closest] + log.debug("Closest Image: %s", image["id"]) + results["image_id"] = image["id"] + np.append(self.downloaded_images, image["id"]) + results["image_lat"] = image["geometry"]["coordinates"][1] + results["image_lon"] = image["geometry"]["coordinates"][0] + results["residual"] = closest_distance.m + image_url = image["thumb_original_url"] + results["image_path"] = self._download_image(image_url, results["image_id"]) + + return results + + def _bounds(self, latitude, longitude) -> str: + left = longitude - 10 / 111_111 + bottom = latitude - 10 / 111_111 + right = longitude + 10 / 111_111 + top = latitude + 10 / 111_111 + return f"{left},{bottom},{right},{top}" + + def _download_image(self, image_url, image_id) -> Optional[Path]: + log.debug("Downloading Image: %s", image_id) + try: + response = self.client.get(image_url, stream=True) + response.raise_for_status() + except HTTPError or RetryError as e: + log.error(e) + return None + image_content = response.content + log.debug("Successfully Retrieved Image: %s", image_id) + image_path = Path(self.basepath, f"{image_id}.jpeg") + log.debug("Writing Image To: %s", image_path) + + if not image_path.is_file(): + with open(image_path, "wb") as img: + img.write(image_content) + log.debug("Successfully Wrote Image: %s", image_path) + + return image_path + + +@app.command() +def main( + points_file: Annotated[ + Path, + Argument(help="Path to Input Points File"), + ], + images_path: Annotated[ + Path, + Argument(help="Folder to Write Image Data"), + ] = Path(Path(__file__).parent.parent, "data/raw/mapillary"), + verbose: Annotated[bool, Option] = False, +): + if verbose: + log.setLevel(logging.DEBUG) + + mapillary = Mapillary(getenv("MAPILLARY_CLIENT_TOKEN"), images_path) + gdf = gpd.read_file(points_file) + gdf["image_lat"] = Series() + gdf["image_lon"] = Series() + gdf["residual"] = Series() + gdf["image_id"] = Series() + gdf["image_path"] = Series() + + def download_image_for_gdf_row(row: int): + latitude = gdf.at[row, "geometry"].y + longitude = gdf.at[row, "geometry"].x + results = mapillary.get_image_from_coordinates(latitude, longitude) + gdf.at[row, "image_lat"] = results["image_lat"] + gdf.at[row, "image_lon"] = results["image_lon"] + gdf.at[row, "residual"] = results["residual"] + gdf.at[row, "image_id"] = results["image_id"] + gdf.at[row, "image_path"] = str(results["image_path"]) + + log.info("Downloading %s Images...", len(gdf)) + thread_map(download_image_for_gdf_row, range(len(gdf))) + log.info(gdf.head(20)) + + gdf.to_file( + Path(points_file.parent, f"{points_file.stem}_images.gpkg"), driver="GPKG" + ) + + +if __name__ == "__main__": + app() diff --git a/src/mapillary.py b/src/mapillary.py deleted file mode 100644 index aab53ce..0000000 --- a/src/mapillary.py +++ /dev/null @@ -1,145 +0,0 @@ -import json -import logging -from multiprocessing import Pool -from pathlib import Path -from typing import Annotated, Optional - -from requests import RequestException, Session -from requests.adapters import HTTPAdapter -from shapely import Point -from typer import Argument, Option, Typer - -from src.data_parsing.geopandas import GeoPandasParser - -log = logging.getLogger(__name__) -log.setLevel(logging.INFO) -log.addHandler(logging.StreamHandler()) -app = Typer() - - -class Mapillary: - url = "https://graph.mapillary.com/images" - - def __init__( - self, - access_token, - basepath=Path(Path(__file__).parent.parent, "data/raw/mapillary"), - ): - self.access_token = access_token - self.basepath = basepath - self.basepath.mkdir(parents=True, exist_ok=True) - self.client = Session() - self.client.mount("https://", HTTPAdapter(max_retries=3)) - - def get_image_from_coordinates(self, point: Point) -> dict: - longitude, latitude = point.x, point.y - log.info("Get Image From Coordinates: %s, %s", latitude, longitude) - try: - response = self.client.get( - self.url, - params={ - "access_token": self.access_token, - "fields": "id,thumb_original_url", - "is_pano": "true", - "bbox": self._bounds(latitude, longitude), - }, - ) - response.raise_for_status() - except RequestException as e: - log.error(e) - return { - "latitude": latitude, - "longitude": longitude, - "image_id": None, - "image_path": None, - } - - images = response.json()["data"] - log.debug("Successfully Retrieved Image Data: %s", images) - if len(images) == 0: - log.debug( - "No Images in Bounding Box: %s", self._bounds(latitude, longitude) - ) - return { - "latitude": latitude, - "longitude": longitude, - "image_id": None, - "image_path": None, - } - - image_id = images[0]["id"] - image_url = images[0]["thumb_original_url"] - image_path = self._download_image(image_url, image_id) - - return { - "latitude": latitude, - "longitude": longitude, - "image_id": image_id, - "image_path": str(image_path), - } - - def _bounds(self, latitude, longitude) -> str: - left = longitude - 10 / 111_111 - bottom = latitude - 10 / 111_111 - right = longitude + 10 / 111_111 - top = latitude + 10 / 111_111 - return f"{left},{bottom},{right},{top}" - - def _download_image(self, image_url, image_id) -> Optional[Path]: - log.info("Downloading Image: %s", image_id) - try: - response = self.client.get(image_url, stream=True) - response.raise_for_status() - except RequestException as e: - log.error(e) - return None - image_content = response.content - log.debug("Successfully Retrieved Image: %s", image_id) - image_path = Path(self.basepath, f"{image_id}.jpeg") - log.debug("Writing Image To: %s", image_path) - - if not image_path.is_file(): - with open(image_path, "wb") as img: - img.write(image_content) - log.debug("Successfully Wrote Image: %s", image_path) - - return image_path - - -@app.command() -def main( - client_token: Annotated[ - str, - Argument(help="Mapillary Client Token from Developer Portal"), - ], - points_file: Annotated[ - Path, - Argument(help=("Path to Input Points File")), - ], - image_path: Annotated[ - Path, - Argument(help="Folder to Write Image Data"), - ] = Path(Path(__file__).parent.parent, "data/raw/mapillary"), - verbose: Annotated[bool, Option] = False, -): - if verbose: - log.setLevel(logging.DEBUG) - - if points_file.suffix == ".gpkg": - parser = GeoPandasParser(points_file) - else: - raise ValueError(f"Unsupported File Extension: {points_file.suffix}") - - mapillary = Mapillary(client_token, image_path) - coordinates = parser.get_coordinates() - - with Pool() as pool: - image_data = list(pool.map(mapillary.get_image_from_coordinates, coordinates)) - log.debug(image_data) - - with open(Path(image_path, "image_data.json"), "w") as f: - json.dump(image_data, f) - - -if __name__ == "__main__": - app() From d1575a01bdfc5b3f6323519025071d5984fcaffe Mon Sep 17 00:00:00 2001 From: Dan Joseph Date: Fri, 8 Mar 2024 19:12:56 -0500 Subject: [PATCH 5/7] remove .env file --- .env | 1 - 1 file changed, 1 deletion(-) delete mode 100644 .env diff --git a/.env b/.env deleted file mode 100644 index 1cc4185..0000000 --- a/.env +++ /dev/null @@ -1 +0,0 @@ -MAPILLARY_CLIENT_TOKEN = "MLY|___" \ No newline at end of file From 13b3ad0e0f8ffe262a22b9da09372f75d81c4ea9 Mon Sep 17 00:00:00 2001 From: Dan Joseph Date: Fri, 8 Mar 2024 19:19:16 -0500 Subject: [PATCH 6/7] use .env.example file --- .env.example | 1 + .gitignore | 3 +++ README.md | 5 +++-- 3 files changed, 7 insertions(+), 2 deletions(-) create mode 100644 .env.example diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..da51ada --- /dev/null +++ b/.env.example @@ -0,0 +1 @@ +MAPILLARY_CLIENT_TOKEN = "MY_MAPILLARY_CLIENT_TOKEN" \ No newline at end of file diff --git a/.gitignore b/.gitignore index f87dce6..de76bff 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# .env file has secrets +.env + # exclude data from source control by default data/ diff --git a/README.md b/README.md index 1703613..99a48f5 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,8 @@ If you are interested in joining the project, please check out [`CONTRIBUTING.md 3. Put your raw OpenStreetMaps road vector data in `data/raw`. - Your raw data should be geospatial vector features of type `LineString`. The features must include standard OpenStreetMap keys `osm_id` and `highway`. - For example, download [`Three_Rivers_Michigan_USA_line.zip`](https://drive.google.com/file/d/1fpI4I5KP2WyVD5PeytW_hoXZswOt0dwA/view?usp=drive_link) to `data/raw/Three_Rivers_Michigan_USA_line.zip`. Note that this Google Drive link is only accessible to approved project members. +4. Make a copy of the `.env.example` file, removing the `.example` from the end of the filename. + - To download images from [Mapillary](https://www.mapillary.com/) you will need to create a (free) account and replace `MY_MAPILLARY_CLIENT_TOKEN` in the `.env` file with your own token. See the "Setting up API access and obtaining a client token" section on this [Mapillary help page](https://help.mapillary.com/hc/en-us/articles/360010234680-Accessing-imagery-and-data-through-the-Mapillary-API). You only need to enable READ access scope on your token. ### 1. Sample points from roads data @@ -78,10 +80,9 @@ We want to fetch a 360 image for each sampled point. You can use the [`mapillary For example, if you're continuing from the example in previous steps and already generated a `Three_Rivers_Michigan_USA_points.gpkg` file: ```bash -python -m src.mapillary "[MAPILLARY_CLIENT_TOKEN]" data/interim/Three_Rivers_Michigan_USA_points.gpkg data/interim/images/ +python -m src.mapillary data/interim/Three_Rivers_Michigan_USA_points.gpkg data/raw/mapillary/ ``` -To download images from [Mapillary](https://www.mapillary.com/) you will need to create a (free) account and replace `[MAPILLARY_CLIENT_TOKEN]` with your own token. See the "Setting up API access and obtaining a client token" section on this [Mapillary help page](https://help.mapillary.com/hc/en-us/articles/360010234680-Accessing-imagery-and-data-through-the-Mapillary-API). You only need to enable READ access scope on your token. ## Project Organization From 6115e3a738658aaad530f9ec9142bccbd455fa39 Mon Sep 17 00:00:00 2001 From: Evan Tung Date: Wed, 13 Mar 2024 21:36:54 -0400 Subject: [PATCH 7/7] fix: Change downloaded_images to be a set, update README for download_images.py command --- README.md | 4 ++-- src/download_images.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 99a48f5..fac674e 100644 --- a/README.md +++ b/README.md @@ -73,14 +73,14 @@ Both the input files and output files support any file formats that geopandas su ### 2. Download an image for each point -We want to fetch a 360 image for each sampled point. You can use the [`mapillary.py`](./src/mapillary.py) script to find the closest image to each point and download it to local file storage. +We want to fetch a 360 image for each sampled point. You can use the [`download_images.py`](./src/download_images.py) script to find the closest image to each point and download it to local file storage. #### Example For example, if you're continuing from the example in previous steps and already generated a `Three_Rivers_Michigan_USA_points.gpkg` file: ```bash -python -m src.mapillary data/interim/Three_Rivers_Michigan_USA_points.gpkg data/raw/mapillary/ +python -m src.download_images data/interim/Three_Rivers_Michigan_USA_points.gpkg data/raw/mapillary ``` diff --git a/src/download_images.py b/src/download_images.py index 818c750..2055566 100644 --- a/src/download_images.py +++ b/src/download_images.py @@ -35,7 +35,7 @@ def __init__( "https://", HTTPAdapter(max_retries=3), ) - self.downloaded_images = np.array([], dtype=str) + self.downloaded_images = set() def get_image_from_coordinates(self, latitude: int, longitude: int) -> dict: log.debug("Get Image From Coordinates: %s, %s", latitude, longitude) @@ -74,7 +74,7 @@ def get_image_from_coordinates(self, latitude: int, longitude: int) -> dict: closest_distance = np.inf for i, image in enumerate( - filter(lambda img: not np.isin(img["id"], self.downloaded_images), images) + filter(lambda img: img["id"] not in self.downloaded_images, images) ): image_coordinates = ( image["geometry"]["coordinates"][1], @@ -90,12 +90,12 @@ def get_image_from_coordinates(self, latitude: int, longitude: int) -> dict: image = images[closest] log.debug("Closest Image: %s", image["id"]) results["image_id"] = image["id"] - np.append(self.downloaded_images, image["id"]) results["image_lat"] = image["geometry"]["coordinates"][1] results["image_lon"] = image["geometry"]["coordinates"][0] results["residual"] = closest_distance.m image_url = image["thumb_original_url"] results["image_path"] = self._download_image(image_url, results["image_id"]) + self.downloaded_images.add(results["image_id"]) return results