Skip to content

Commit

Permalink
Merge branch 'feature/street' of https://github.com/mapswipe/python-m…
Browse files Browse the repository at this point in the history
…apswipe-workers into feature/street
  • Loading branch information
Gigaszi committed Nov 28, 2024
2 parents 8593a3c + 9f2ac81 commit 81d44ed
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
build_multipolygon_from_layer_geometries,
check_if_layer_has_too_many_geometries,
save_geojson_to_file,
multipolygon_to_wkt
multipolygon_to_wkt,
)
from mapswipe_workers.project_types.project import BaseProject, BaseTask, BaseGroup
from mapswipe_workers.utils.process_mapillary import get_image_metadata
Expand Down Expand Up @@ -56,7 +56,6 @@ def __init__(self, project_draft):
sampling_threshold=project_draft.get("samplingThreshold", None),
)


self.imageIds = ImageMetadata["ids"]
self.imageGeometries = ImageMetadata["geometries"]

Expand All @@ -83,7 +82,9 @@ def validate_geometries(self):
self.inputGeometriesFileName = save_geojson_to_file(
self.projectId, self.geometry
)
layer, datasource = load_geojson_to_ogr(self.projectId, self.inputGeometriesFileName)
layer, datasource = load_geojson_to_ogr(
self.projectId, self.inputGeometriesFileName
)

# check if inputs fit constraints
check_if_layer_is_empty(self.projectId, layer)
Expand All @@ -97,7 +98,9 @@ def validate_geometries(self):
del datasource
del layer

logger.info(f"{self.projectId}" f" - validate geometry - " f"input geometry is correct.")
logger.info(
f"{self.projectId}" f" - validate geometry - " f"input geometry is correct."
)
wkt_geometry = multipolygon_to_wkt(multi_polygon)
return wkt_geometry

Expand Down
40 changes: 26 additions & 14 deletions mapswipe_workers/mapswipe_workers/utils/process_mapillary.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,15 +127,26 @@ def coordinate_download(
return pd.DataFrame(downloaded_metadata)

target_columns = [
"id", "geometry", "captured_at", "is_pano", "compass_angle", "sequence", "organization_id"
"id",
"geometry",
"captured_at",
"is_pano",
"compass_angle",
"sequence",
"organization_id",
]
for col in target_columns:
if col not in downloaded_metadata.columns:
downloaded_metadata[col] = None

if downloaded_metadata.isna().all().all() == False or downloaded_metadata.empty == True:
if (
downloaded_metadata.isna().all().all() == False
or downloaded_metadata.empty == True
):
downloaded_metadata = downloaded_metadata[
downloaded_metadata['geometry'].apply(lambda point: point.within(polygon))
downloaded_metadata["geometry"].apply(
lambda point: point.within(polygon)
)
]

return downloaded_metadata
Expand Down Expand Up @@ -187,9 +198,7 @@ def filter_results(
df = results_df.copy()
if is_pano is not None:
if df["is_pano"].isna().all():
logger.exception(
"No Mapillary Feature in the AoI has a 'is_pano' value."
)
logger.exception("No Mapillary Feature in the AoI has a 'is_pano' value.")
return None
df = df[df["is_pano"] == is_pano]

Expand Down Expand Up @@ -220,25 +229,28 @@ def get_image_metadata(
organization_id: str = None,
start_time: str = None,
end_time: str = None,
sampling_threshold = None,
sampling_threshold=None,
):
aoi_polygon = geojson_to_polygon(aoi_geojson)
downloaded_metadata = coordinate_download(
aoi_polygon, level, attempt_limit
)
downloaded_metadata = coordinate_download(aoi_polygon, level, attempt_limit)
downloaded_metadata = downloaded_metadata[
downloaded_metadata['geometry'].apply(lambda geom: isinstance(geom, Point))
downloaded_metadata["geometry"].apply(lambda geom: isinstance(geom, Point))
]

downloaded_metadata = filter_results(
downloaded_metadata, is_pano, organization_id, start_time, end_time
)
if sampling_threshold is not None:
downloaded_metadata = spatial_sampling(downloaded_metadata, sampling_threshold)
if downloaded_metadata.isna().all().all() == False or downloaded_metadata.empty == False:
if (
downloaded_metadata.isna().all().all() == False
or downloaded_metadata.empty == False
):
if len(downloaded_metadata) > 100000:
err = (f"Too many Images with selected filter "
f"options for the AoI: {len(downloaded_metadata)}")
err = (
f"Too many Images with selected filter "
f"options for the AoI: {len(downloaded_metadata)}"
)
raise ValueError(err)
else:
return {
Expand Down
59 changes: 38 additions & 21 deletions mapswipe_workers/mapswipe_workers/utils/spatial_sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from shapely import wkt
from shapely.geometry import Point


def distance_on_sphere(p1, p2):
"""
p1 and p2 are two lists that have two elements. They are numpy arrays of the long and lat
Expand Down Expand Up @@ -30,13 +31,19 @@ def distance_on_sphere(p1, p2):
delta_lat = p2[1] - p1[1]
delta_long = p2[0] - p1[0]

a = np.sin(delta_lat / 2) ** 2 + np.cos(p1[1]) * np.cos(p2[1]) * np.sin(delta_long / 2) ** 2
a = (
np.sin(delta_lat / 2) ** 2
+ np.cos(p1[1]) * np.cos(p2[1]) * np.sin(delta_long / 2) ** 2
)
c = 2 * np.arcsin(np.sqrt(a))

distances = earth_radius * c
return distances


"""-----------------------------------Filtering Points------------------------------------------------"""


def filter_points(df, threshold_distance):
"""
Filter points from a DataFrame based on a threshold distance.
Expand All @@ -61,31 +68,37 @@ def filter_points(df, threshold_distance):
lat = df["lat"].to_numpy()
long = df["long"].to_numpy()


distances = distance_on_sphere([long[1:],lat[1:]],
[long[:-1],lat[:-1]])
distances = distance_on_sphere([long[1:], lat[1:]], [long[:-1], lat[:-1]])
road_length = np.sum(distances)

#save the last point if the road segment is relavitely small (< 2*road_length)
# save the last point if the road segment is relavitely small (< 2*road_length)
if threshold_distance <= road_length < 2 * threshold_distance:
mask[-1] = True

accumulated_distance = 0
for i, distance in enumerate(distances):
accumulated_distance += distance
if accumulated_distance >= threshold_distance:
mask[i+1] = True
mask[i + 1] = True
accumulated_distance = 0 # Reset accumulated distance

to_be_returned_df = df[mask]
# since the last point has to be omitted in the vectorized distance calculation, it is being checked manually
p2 = to_be_returned_df.iloc[0]
distance = distance_on_sphere([float(p2["long"]),float(p2["lat"])],[long[-1],lat[-1]])

#last point will be added if it suffices the length condition
#last point will be added in case there is only one point returned
if distance >= threshold_distance or len(to_be_returned_df) ==1:
to_be_returned_df = pd.concat([to_be_returned_df,pd.DataFrame(df.iloc[-1],columns=to_be_returned_df.columns)],axis=0)
distance = distance_on_sphere(
[float(p2["long"]), float(p2["lat"])], [long[-1], lat[-1]]
)

# last point will be added if it suffices the length condition
# last point will be added in case there is only one point returned
if distance >= threshold_distance or len(to_be_returned_df) == 1:
to_be_returned_df = pd.concat(
[
to_be_returned_df,
pd.DataFrame(df.iloc[-1], columns=to_be_returned_df.columns),
],
axis=0,
)
return to_be_returned_df


Expand All @@ -109,19 +122,23 @@ def spatial_sampling(df, interval_length):
if len(df) == 1:
return df

df['long'] = df['geometry'].apply(lambda geom: geom.x if geom.geom_type == 'Point' else None)
df['lat'] = df['geometry'].apply(lambda geom: geom.y if geom.geom_type == 'Point' else None)
sorted_df = df.sort_values(by=['captured_at'])
df["long"] = df["geometry"].apply(
lambda geom: geom.x if geom.geom_type == "Point" else None
)
df["lat"] = df["geometry"].apply(
lambda geom: geom.y if geom.geom_type == "Point" else None
)
sorted_df = df.sort_values(by=["captured_at"])

sampled_sequence_df = pd.DataFrame()

# loop through each sequence
for sequence in sorted_df['sequence_id'].unique():
sequence_df = sorted_df[sorted_df['sequence_id'] == sequence]

filtered_sorted_sub_df = filter_points(sequence_df,interval_length)
sampled_sequence_df = pd.concat([sampled_sequence_df,filtered_sorted_sub_df],axis=0)

for sequence in sorted_df["sequence_id"].unique():
sequence_df = sorted_df[sorted_df["sequence_id"] == sequence]

filtered_sorted_sub_df = filter_points(sequence_df, interval_length)
sampled_sequence_df = pd.concat(
[sampled_sequence_df, filtered_sorted_sub_df], axis=0
)

return sampled_sequence_df

0 comments on commit 81d44ed

Please sign in to comment.