-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1108 from NASA-IMPACT/1107-ej-integrate-original-…
…spreadsheet-data-with-cmr-records-in-api remove destination_server and add datasource
- Loading branch information
Showing
8 changed files
with
395 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
# Environmental Justice API | ||
|
||
## Overview | ||
This API provides access to Environmental Justice data from multiple sources. It supports retrieving data from individual sources or as a combined dataset with defined precedence rules. | ||
|
||
## Endpoints | ||
|
||
### GET /api/environmental-justice/ | ||
|
||
Retrieves environmental justice data based on specified data source. | ||
|
||
#### Query Parameters | ||
|
||
| Parameter | Description | Default | Options | | ||
|-------------|-------------|------------|----------------------------------------------| | ||
| data_source | Data source filter | "combined" | "spreadsheet", "ml_production", "ml_testing", "combined" | | ||
|
||
#### Data Source Behavior | ||
|
||
1. **Single Source** | ||
- `?data_source=spreadsheet`: Returns only spreadsheet data | ||
- `?data_source=ml_production`: Returns only ML production data | ||
- `?data_source=ml_testing`: Returns only ML testing data | ||
|
||
2. **Combined Data** (Default) | ||
- Access via `?data_source=combined` or no parameter | ||
- Merges data from 'spreadsheet' and 'ml_production' sources | ||
- Precedence rules: | ||
- If the same dataset exists in both sources, the spreadsheet version is used | ||
- Unique datasets from ml_production are included | ||
- ML testing data is not included in combined view | ||
|
||
#### Example Requests | ||
|
||
```bash | ||
# Get combined data (default) | ||
GET /api/environmental-justice/ | ||
|
||
# Get combined data (explicit) | ||
GET /api/environmental-justice/?data_source=combined | ||
|
||
# Get only spreadsheet data | ||
GET /api/environmental-justice/?data_source=spreadsheet | ||
|
||
# Get only ML production data | ||
GET /api/environmental-justice/?data_source=ml_production | ||
|
||
# Get only ML testing data | ||
GET /api/environmental-justice/?data_source=ml_testing | ||
``` | ||
|
||
#### Response Fields | ||
|
||
Each record includes the following fields: | ||
- dataset | ||
- description | ||
- description_simplified | ||
- indicators | ||
- intended_use | ||
- latency | ||
- limitations | ||
- project | ||
- source_link | ||
- strengths | ||
- format | ||
- geographic_coverage | ||
- data_visualization | ||
- spatial_resolution | ||
- temporal_extent | ||
- temporal_resolution | ||
- sde_link | ||
- data_source | ||
|
||
## Data Source Definitions | ||
|
||
- **spreadsheet**: Primary source data from environmental justice spreadsheets | ||
- **ml_production**: Production machine learning processed data | ||
- **ml_testing**: Testing/staging machine learning processed data | ||
|
||
## Precedence Rules | ||
When retrieving combined data: | ||
1. If a dataset exists in both spreadsheet and ml_production: | ||
- The spreadsheet version takes precedence | ||
- The ml_production version is excluded | ||
2. Datasets unique to ml_production are included in the response | ||
3. ML testing data is never included in combined results |
52 changes: 52 additions & 0 deletions
52
...tal_justice/migrations/0006_remove_environmentaljusticerow_destination_server_and_more.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
# Generated by Django 4.2.9 on 2024-11-23 03:18 | ||
|
||
from django.db import migrations, models | ||
|
||
|
||
def migrate_destination_server_to_data_source(apps, schema_editor): | ||
EnvironmentalJusticeRow = apps.get_model("environmental_justice", "EnvironmentalJusticeRow") | ||
|
||
# Migrate prod to spreadsheet | ||
EnvironmentalJusticeRow.objects.filter(destination_server="prod").update( | ||
data_source="spreadsheet", destination_server="" | ||
) | ||
|
||
# Migrate dev to ml_production | ||
EnvironmentalJusticeRow.objects.filter(destination_server="dev").update( | ||
data_source="ml_production", destination_server="" | ||
) | ||
|
||
# Migrate test to ml_testing | ||
EnvironmentalJusticeRow.objects.filter(destination_server="test").update( | ||
data_source="ml_testing", destination_server="" | ||
) | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
("environmental_justice", "0005_environmentaljusticerow_destination_server"), | ||
] | ||
|
||
operations = [ | ||
migrations.AddField( | ||
model_name="environmentaljusticerow", | ||
name="data_source", | ||
field=models.CharField( | ||
blank=True, | ||
choices=[ | ||
("spreadsheet", "Spreadsheet"), | ||
("ml_production", "ML Production"), | ||
("ml_testing", "ML Testing"), | ||
], | ||
default="", | ||
max_length=20, | ||
verbose_name="Data Source", | ||
), | ||
), | ||
migrations.RunPython(migrate_destination_server_to_data_source, reverse_code=migrations.RunPython.noop), | ||
migrations.RemoveField( | ||
model_name="environmentaljusticerow", | ||
name="destination_server", | ||
), | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import pytest | ||
from django.urls import include, path | ||
from rest_framework.routers import DefaultRouter | ||
from rest_framework.test import APIClient | ||
|
||
from environmental_justice.views import EnvironmentalJusticeRowViewSet | ||
|
||
# Create router and register our viewset | ||
router = DefaultRouter() | ||
router.register(r"environmental-justice", EnvironmentalJusticeRowViewSet) | ||
|
||
# Create temporary urlpatterns for testing | ||
urlpatterns = [ | ||
path("api/", include(router.urls)), | ||
] | ||
|
||
|
||
# Override default URL conf for testing | ||
@pytest.fixture | ||
def client(): | ||
"""Return a Django REST framework API client""" | ||
return APIClient() | ||
|
||
|
||
@pytest.fixture(autouse=True) | ||
def setup_urls(): | ||
"""Setup URLs for testing""" | ||
from django.conf import settings | ||
|
||
settings.ROOT_URLCONF = __name__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
import factory | ||
from factory.django import DjangoModelFactory | ||
|
||
from environmental_justice.models import EnvironmentalJusticeRow | ||
|
||
|
||
class EnvironmentalJusticeRowFactory(DjangoModelFactory): | ||
class Meta: | ||
model = EnvironmentalJusticeRow | ||
|
||
dataset = factory.Sequence(lambda n: f"dataset_{n}") | ||
description = factory.Faker("sentence") | ||
description_simplified = factory.Faker("sentence") | ||
indicators = factory.Faker("sentence") | ||
intended_use = factory.Faker("sentence") | ||
latency = factory.Faker("word") | ||
limitations = factory.Faker("sentence") | ||
project = factory.Faker("word") | ||
source_link = factory.Faker("url") | ||
strengths = factory.Faker("sentence") | ||
format = factory.Faker("file_extension") | ||
geographic_coverage = factory.Faker("country") | ||
data_visualization = factory.Faker("sentence") | ||
spatial_resolution = factory.Faker("word") | ||
temporal_extent = factory.Faker("date") | ||
temporal_resolution = factory.Faker("word") | ||
sde_link = factory.Faker("url") | ||
data_source = EnvironmentalJusticeRow.DataSourceChoices.SPREADSHEET |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
# docker-compose -f local.yml run --rm django pytest environmental_justice/tests/test_views.py | ||
import pytest | ||
from rest_framework import status | ||
|
||
from environmental_justice.models import EnvironmentalJusticeRow | ||
from environmental_justice.tests.factories import EnvironmentalJusticeRowFactory | ||
|
||
|
||
@pytest.mark.django_db | ||
class TestEnvironmentalJusticeRowViewSet: | ||
"""Test suite for the EnvironmentalJusticeRow API endpoints""" | ||
|
||
def setup_method(self): | ||
"""Setup URL for API endpoint""" | ||
self.url = "/api/environmental-justice/" | ||
|
||
def test_empty_database_returns_empty_list(self, client): | ||
"""Should return empty list when no records exist""" | ||
response = client.get(self.url) | ||
assert response.status_code == status.HTTP_200_OK | ||
assert response.json()["results"] == [] | ||
assert response.json()["count"] == 0 | ||
|
||
def test_single_source_filtering(self, client): | ||
"""Should return records only from requested data source""" | ||
# Create records for each data source | ||
spreadsheet_record = EnvironmentalJusticeRowFactory( | ||
dataset="test_dataset", data_source=EnvironmentalJusticeRow.DataSourceChoices.SPREADSHEET | ||
) | ||
ml_prod_record = EnvironmentalJusticeRowFactory( | ||
dataset="another_dataset", data_source=EnvironmentalJusticeRow.DataSourceChoices.ML_PRODUCTION | ||
) | ||
ml_test_record = EnvironmentalJusticeRowFactory( | ||
dataset="test_dataset_3", data_source=EnvironmentalJusticeRow.DataSourceChoices.ML_TESTING | ||
) | ||
|
||
# Test spreadsheet filter | ||
response = client.get(f"{self.url}?data_source=spreadsheet") | ||
assert response.status_code == status.HTTP_200_OK | ||
data = response.json()["results"] | ||
assert len(data) == 1 | ||
assert data[0]["dataset"] == spreadsheet_record.dataset | ||
|
||
# Test ml_production filter | ||
response = client.get(f"{self.url}?data_source=ml_production") | ||
assert response.status_code == status.HTTP_200_OK | ||
data = response.json()["results"] | ||
assert len(data) == 1 | ||
assert data[0]["dataset"] == ml_prod_record.dataset | ||
|
||
# Test ml_testing filter | ||
response = client.get(f"{self.url}?data_source=ml_testing") | ||
assert response.status_code == status.HTTP_200_OK | ||
data = response.json()["results"] | ||
assert len(data) == 1 | ||
assert data[0]["dataset"] == ml_test_record.dataset | ||
|
||
def test_combined_data_precedence(self, client): | ||
""" | ||
Should return combined data with spreadsheet taking precedence over ml_production | ||
for matching datasets | ||
""" | ||
# Create spreadsheet record | ||
EnvironmentalJusticeRowFactory( | ||
dataset="common_dataset", | ||
description="spreadsheet version", | ||
data_source=EnvironmentalJusticeRow.DataSourceChoices.SPREADSHEET, | ||
) | ||
|
||
# Create ML production record with same dataset | ||
EnvironmentalJusticeRowFactory( | ||
dataset="common_dataset", | ||
description="ml version", | ||
data_source=EnvironmentalJusticeRow.DataSourceChoices.ML_PRODUCTION, | ||
) | ||
|
||
# Create unique ML production record | ||
EnvironmentalJusticeRowFactory( | ||
dataset="unique_ml_dataset", data_source=EnvironmentalJusticeRow.DataSourceChoices.ML_PRODUCTION | ||
) | ||
|
||
# Test combined view (default) | ||
response = client.get(self.url) | ||
assert response.status_code == status.HTTP_200_OK | ||
data = response.json()["results"] | ||
assert len(data) == 2 # Should only return 2 records (not 3) | ||
|
||
# Verify correct records are returned | ||
datasets = [record["dataset"] for record in data] | ||
assert "common_dataset" in datasets | ||
assert "unique_ml_dataset" in datasets | ||
|
||
# Verify precedence - should get spreadsheet version of common dataset | ||
common_record = next(r for r in data if r["dataset"] == "common_dataset") | ||
assert common_record["description"] == "spreadsheet version" | ||
|
||
def test_combined_explicit_parameter(self, client): | ||
"""Should handle explicit 'combined' parameter same as default""" | ||
EnvironmentalJusticeRowFactory(data_source=EnvironmentalJusticeRow.DataSourceChoices.SPREADSHEET) | ||
EnvironmentalJusticeRowFactory( | ||
dataset="unique_ml_dataset", # Ensure different dataset | ||
data_source=EnvironmentalJusticeRow.DataSourceChoices.ML_PRODUCTION, | ||
) | ||
|
||
# Compare default and explicit combined responses | ||
default_response = client.get(self.url) | ||
combined_response = client.get(f"{self.url}?data_source=combined") | ||
|
||
assert default_response.status_code == status.HTTP_200_OK | ||
assert combined_response.status_code == status.HTTP_200_OK | ||
assert default_response.json()["results"] == combined_response.json()["results"] | ||
|
||
def test_invalid_data_source(self, client): | ||
"""Should return 400 error for invalid data_source parameter""" | ||
response = client.get(f"{self.url}?data_source=invalid") | ||
assert response.status_code == status.HTTP_400_BAD_REQUEST | ||
assert "Invalid data_source" in str(response.json()) | ||
|
||
def test_sorting_in_combined_view(self, client): | ||
"""Should return combined results sorted by dataset name""" | ||
# Create records in non-alphabetical order | ||
EnvironmentalJusticeRowFactory( | ||
dataset="zebra_dataset", data_source=EnvironmentalJusticeRow.DataSourceChoices.SPREADSHEET | ||
) | ||
EnvironmentalJusticeRowFactory( | ||
dataset="alpha_dataset", data_source=EnvironmentalJusticeRow.DataSourceChoices.ML_PRODUCTION | ||
) | ||
|
||
response = client.get(self.url) | ||
assert response.status_code == status.HTTP_200_OK | ||
data = response.json()["results"] | ||
|
||
# Verify sorting | ||
datasets = [record["dataset"] for record in data] | ||
assert datasets == sorted(datasets) | ||
|
||
def test_http_methods_allowed(self, client): | ||
"""Should only allow GET requests""" | ||
# Test GET (should work) | ||
get_response = client.get(self.url) | ||
assert get_response.status_code == status.HTTP_200_OK | ||
|
||
# Test POST (should fail) | ||
post_response = client.post(self.url, {}) | ||
assert post_response.status_code == status.HTTP_405_METHOD_NOT_ALLOWED | ||
|
||
# Test PUT (should fail) | ||
put_response = client.put(self.url, {}) | ||
assert put_response.status_code == status.HTTP_405_METHOD_NOT_ALLOWED | ||
|
||
# Test DELETE (should fail) | ||
delete_response = client.delete(self.url) | ||
assert delete_response.status_code == status.HTTP_405_METHOD_NOT_ALLOWED |
Oops, something went wrong.