Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/topic modeling #47

Merged
merged 3 commits into from
Feb 8, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .env.sample
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,5 @@ TEXTEXTRACTION_ECS_ENDPOINT=
SUMMARIZATION_V3_ECS_ENDPOINT=
ENTRYEXTRACTION_ECS_ENDPOINT=
GEOLOCATION_ECS_ENDPOINT=
TOPICMODEL_ECS_ENDPOINT=

1 change: 1 addition & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ jobs:
SUMMARIZATION_V3_ECS_ENDPOINT: ''
ENTRYEXTRACTION_ECS_ENDPOINT: ''
GEOLOCATION_ECS_ENDPOINT: ''
TOPICMODEL_ECS_ENDPOINT: ''

# Celery
CELERY_BROKER_URL: ''
Expand Down
2 changes: 1 addition & 1 deletion analysis_module/mockserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def process_topicmodeling(body) -> Any:
for x in range(0, len(excerpt_ids), ceil(len(excerpt_ids) / clusters))
]

data = {key: val for key, val in enumerate(data)}
data = dict(enumerate(data))

filepath = save_data_local_and_get_url(
dir_name="topicmodel", client_id=client_id, data=data
Expand Down
13 changes: 1 addition & 12 deletions analysis_module/tests/test_apis.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,10 @@ def test_topicmodel_incomplete_data(self):
errors = resp.json()["field_errors"]
assert param in errors

@patch('analysis_module.views.analysis_module.spin_ecs_container')
def test_topicmodel_valid_request(self, spin_ecs_mock):
def test_topicmodel_valid_request(self):
"""
This tests for a topicmodel api with valid data
"""
requests_count = NLPRequest.objects.count()
valid_data = {
"entries_url": "https://someurl.com/entries",
"cluster_size": 2,
Expand All @@ -52,15 +50,6 @@ def test_topicmodel_valid_request(self, spin_ecs_mock):
self.set_credentials()
resp = self.client.post(self.TOPICMODELING_URL, valid_data)
assert resp.status_code == 202
spin_ecs_mock.delay.assert_called_once()
new_requests_count = NLPRequest.objects.count()
assert \
new_requests_count == requests_count + 1, \
"One more NLPRequest object should be created"
assert NLPRequest.objects.filter(
type="topicmodel",
created_by=self.user,
).exists()

def test_ngrams_incomplete_data(self):
"""
Expand Down
7 changes: 6 additions & 1 deletion analysis_module/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
SUMMARIZATION_V3_ECS_ENDPOINT,
TEXT_EXTRACTION_ECS_ENDPOINT,
ENTRYEXTRACTION_ECS_ENDPOINT,
GEOLOCATION_ECS_ENDPOINT
GEOLOCATION_ECS_ENDPOINT,
TOPICMODEL_ECS_ENDPOINT
)

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -253,6 +254,8 @@ def get_ecs_id_param_name(request_type: NLPRequest.FeaturesType):
return "entryextraction_id" # not needed probably, just to be in line with the rest.
if request_type == NLPRequest.FeaturesType.GEOLOCATION:
return "geolocation_id"
if request_type == NLPRequest.FeaturesType.TOPICMODEL:
return "topicmodel_id"
return None


Expand All @@ -265,4 +268,6 @@ def get_ecs_url(request_type: NLPRequest.FeaturesType):
return urljoin(ENTRYEXTRACTION_ECS_ENDPOINT, "/extract_entries")
elif request_type == NLPRequest.FeaturesType.GEOLOCATION:
return urljoin(GEOLOCATION_ECS_ENDPOINT, "/get_geolocations")
elif request_type == NLPRequest.FeaturesType.TOPICMODEL:
return urljoin(TOPICMODEL_ECS_ENDPOINT, "/get_excerpt_clusters")
return None
sudan45 marked this conversation as resolved.
Show resolved Hide resolved
27 changes: 26 additions & 1 deletion analysis_module/views/analysis_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,32 @@ def process_request(
@api_view(["POST"])
@permission_classes([IsAuthenticated])
def topic_modeling(request: Request):
return process_request(TopicModelDeepRequest, request, "topicmodel")
serializer = TopicModelDeepRequest(data=request.data)
serializer.is_valid(raise_exception=True)

if serializer.validated_data.get("mock") or IS_MOCKSERVER:
return process_mock_request(
request=serializer.validated_data,
request_type=NLPRequest.FeaturesType.TOPICMODEL
)

nlp_request = NLPRequest.objects.create(
client_id=serializer.validated_data["client_id"],
type=NLPRequest.FeaturesType.TOPICMODEL,
request_params=serializer.validated_data,
created_by=request.user
)
transaction.on_commit(lambda: send_ecs_http_request(nlp_request))
resp = {
"client_id": serializer.data.get("client_id"),
"type": NLPRequest.FeaturesType.TOPICMODEL,
"message": "Request has been successfully processed.",
"request_id": str(nlp_request.unique_id),
}
return Response(
resp,
status=status.HTTP_202_ACCEPTED,
)


@api_view(["POST"])
Expand Down
1 change: 1 addition & 0 deletions core_server/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
TEXT_EXTRACTION_ECS_ENDPOINT = env("TEXTEXTRACTION_ECS_ENDPOINT")
ENTRYEXTRACTION_ECS_ENDPOINT = env("ENTRYEXTRACTION_ECS_ENDPOINT")
GEOLOCATION_ECS_ENDPOINT = env("GEOLOCATION_ECS_ENDPOINT")
TOPICMODEL_ECS_ENDPOINT = env("TOPICMODEL_ECS_ENDPOINT")


CALLBACK_MAX_RETRIES_LIMIT = env("CALLBACK_MAX_RETRIES_LIMIT")
Expand Down
1 change: 1 addition & 0 deletions docker-compose-prod.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ x-server: &base-server-config
TEXTEXTRACTION_ECS_ENDPOINT: ${TEXTEXTRACTION_ECS_ENDPOINT:?Provide text extraction endpoint}
ENTRYEXTRACTION_ECS_ENDPOINT: ${ENTRYEXTRACTION_ECS_ENDPOINT:?Provide entry extraction endpoint}
GEOLOCATION_ECS_ENDPOINT: ${GEOLOCATION_ECS_ENDPOINT:?Provide geolocation endpoint}
TOPICMODEL_ECS_ENDPOINT: ${TOPICMODEL_ECS_ENDPOINT:?Provide topic model endpoint}

# MODEL_INFO
CLASSIFICATION_MODEL_ID: ${CLASSIFICATION_MODEL_ID:-classification-model}
Expand Down
1 change: 1 addition & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ x-server: &base-server-config
TEXTEXTRACTION_ECS_ENDPOINT: ${TEXTEXTRACTION_ECS_ENDPOINT:?Provide text extraction endpoint}
ENTRYEXTRACTION_ECS_ENDPOINT: ${ENTRYEXTRACTION_ECS_ENDPOINT:?Provide entry extraction endpoint}
GEOLOCATION_ECS_ENDPOINT: ${GEOLOCATION_ECS_ENDPOINT:?Provide geolocation endpoint}
TOPICMODEL_ECS_ENDPOINT: ${TOPICMODEL_ECS_ENDPOINT:?Provide topic model endpoint}

# SENTRY
SENTRY_DSN: ${SENTRY_DSN:-}
Expand Down
Loading