From 620e2242859f374e491032697228e6e862f7acd6 Mon Sep 17 00:00:00 2001 From: Ranjan Shrestha Date: Thu, 8 Feb 2024 10:27:44 +0545 Subject: [PATCH 1/3] updated topicmodel to use endpoint to send request to corresponding ecs task; --- .env.sample | 1 + .github/workflows/build.yml | 1 + analysis_module/mockserver.py | 2 +- analysis_module/utils.py | 7 +++++- analysis_module/views/analysis_module.py | 27 +++++++++++++++++++++++- core_server/settings.py | 1 + docker-compose-prod.yml | 1 + docker-compose.yml | 1 + 8 files changed, 38 insertions(+), 3 deletions(-) diff --git a/.env.sample b/.env.sample index 1e4b138..f46cea4 100644 --- a/.env.sample +++ b/.env.sample @@ -55,4 +55,5 @@ TEXTEXTRACTION_ECS_ENDPOINT= SUMMARIZATION_V3_ECS_ENDPOINT= ENTRYEXTRACTION_ECS_ENDPOINT= GEOLOCATION_ECS_ENDPOINT= +TOPICMODEL_ECS_ENDPOINT= diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f63c838..ee13340 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -58,6 +58,7 @@ jobs: SUMMARIZATION_V3_ECS_ENDPOINT: '' ENTRYEXTRACTION_ECS_ENDPOINT: '' GEOLOCATION_ECS_ENDPOINT: '' + TOPICMODEL_ECS_ENDPOINT: '' # Celery CELERY_BROKER_URL: '' diff --git a/analysis_module/mockserver.py b/analysis_module/mockserver.py index f9c305d..421be44 100644 --- a/analysis_module/mockserver.py +++ b/analysis_module/mockserver.py @@ -188,7 +188,7 @@ def process_topicmodeling(body) -> Any: for x in range(0, len(excerpt_ids), ceil(len(excerpt_ids) / clusters)) ] - data = {key: val for key, val in enumerate(data)} + data = dict(enumerate(data)) filepath = save_data_local_and_get_url( dir_name="topicmodel", client_id=client_id, data=data diff --git a/analysis_module/utils.py b/analysis_module/utils.py index 9cbe2d9..7c83a71 100644 --- a/analysis_module/utils.py +++ b/analysis_module/utils.py @@ -17,7 +17,8 @@ SUMMARIZATION_V3_ECS_ENDPOINT, TEXT_EXTRACTION_ECS_ENDPOINT, ENTRYEXTRACTION_ECS_ENDPOINT, - GEOLOCATION_ECS_ENDPOINT + GEOLOCATION_ECS_ENDPOINT, + TOPICMODEL_ECS_ENDPOINT ) logger = logging.getLogger(__name__) @@ -253,6 +254,8 @@ def get_ecs_id_param_name(request_type: NLPRequest.FeaturesType): return "entryextraction_id" # not needed probably, just to be in line with the rest. if request_type == NLPRequest.FeaturesType.GEOLOCATION: return "geolocation_id" + if request_type == NLPRequest.FeaturesType.TOPICMODEL: + return "topicmodel_id" return None @@ -265,4 +268,6 @@ def get_ecs_url(request_type: NLPRequest.FeaturesType): return urljoin(ENTRYEXTRACTION_ECS_ENDPOINT, "/extract_entries") elif request_type == NLPRequest.FeaturesType.GEOLOCATION: return urljoin(GEOLOCATION_ECS_ENDPOINT, "/get_geolocations") + elif request_type == NLPRequest.FeaturesType.TOPICMODEL: + return urljoin(TOPICMODEL_ECS_ENDPOINT, "/get_excerpt_clusters") return None diff --git a/analysis_module/views/analysis_module.py b/analysis_module/views/analysis_module.py index af4e475..a1a981b 100644 --- a/analysis_module/views/analysis_module.py +++ b/analysis_module/views/analysis_module.py @@ -65,7 +65,32 @@ def process_request( @api_view(["POST"]) @permission_classes([IsAuthenticated]) def topic_modeling(request: Request): - return process_request(TopicModelDeepRequest, request, "topicmodel") + serializer = TopicModelDeepRequest(data=request.data) + serializer.is_valid(raise_exception=True) + + if serializer.validated_data.get("mock") or IS_MOCKSERVER: + return process_mock_request( + request=serializer.validated_data, + request_type=NLPRequest.FeaturesType.TOPICMODEL + ) + + nlp_request = NLPRequest.objects.create( + client_id=serializer.validated_data["client_id"], + type=NLPRequest.FeaturesType.TOPICMODEL, + request_params=serializer.validated_data, + created_by=request.user + ) + transaction.on_commit(lambda: send_ecs_http_request(nlp_request)) + resp = { + "client_id": serializer.data.get("client_id"), + "type": NLPRequest.FeaturesType.TOPICMODEL, + "message": "Request has been successfully processed.", + "request_id": str(nlp_request.unique_id), + } + return Response( + resp, + status=status.HTTP_202_ACCEPTED, + ) @api_view(["POST"]) diff --git a/core_server/settings.py b/core_server/settings.py index 3ed5f15..2dba760 100644 --- a/core_server/settings.py +++ b/core_server/settings.py @@ -57,6 +57,7 @@ TEXT_EXTRACTION_ECS_ENDPOINT = env("TEXTEXTRACTION_ECS_ENDPOINT") ENTRYEXTRACTION_ECS_ENDPOINT = env("ENTRYEXTRACTION_ECS_ENDPOINT") GEOLOCATION_ECS_ENDPOINT = env("GEOLOCATION_ECS_ENDPOINT") +TOPICMODEL_ECS_ENDPOINT = env("TOPICMODEL_ECS_ENDPOINT") CALLBACK_MAX_RETRIES_LIMIT = env("CALLBACK_MAX_RETRIES_LIMIT") diff --git a/docker-compose-prod.yml b/docker-compose-prod.yml index bb6709f..fff00dc 100644 --- a/docker-compose-prod.yml +++ b/docker-compose-prod.yml @@ -47,6 +47,7 @@ x-server: &base-server-config TEXTEXTRACTION_ECS_ENDPOINT: ${TEXTEXTRACTION_ECS_ENDPOINT:?Provide text extraction endpoint} ENTRYEXTRACTION_ECS_ENDPOINT: ${ENTRYEXTRACTION_ECS_ENDPOINT:?Provide entry extraction endpoint} GEOLOCATION_ECS_ENDPOINT: ${GEOLOCATION_ECS_ENDPOINT:?Provide geolocation endpoint} + TOPICMODEL_ECS_ENDPOINT: ${TOPICMODEL_ECS_ENDPOINT:?Provide topic model endpoint} # MODEL_INFO CLASSIFICATION_MODEL_ID: ${CLASSIFICATION_MODEL_ID:-classification-model} diff --git a/docker-compose.yml b/docker-compose.yml index 42b6a8d..1f06fef 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -56,6 +56,7 @@ x-server: &base-server-config TEXTEXTRACTION_ECS_ENDPOINT: ${TEXTEXTRACTION_ECS_ENDPOINT:?Provide text extraction endpoint} ENTRYEXTRACTION_ECS_ENDPOINT: ${ENTRYEXTRACTION_ECS_ENDPOINT:?Provide entry extraction endpoint} GEOLOCATION_ECS_ENDPOINT: ${GEOLOCATION_ECS_ENDPOINT:?Provide geolocation endpoint} + TOPICMODEL_ECS_ENDPOINT: ${TOPICMODEL_ECS_ENDPOINT:?Provide topic model endpoint} # SENTRY SENTRY_DSN: ${SENTRY_DSN:-} From fccf0550d1c203c8188a10bf4192bb860d4e0611 Mon Sep 17 00:00:00 2001 From: Ranjan Shrestha Date: Thu, 8 Feb 2024 11:17:29 +0545 Subject: [PATCH 2/3] test api for topic model updated for valid request; flake8 fixes; --- analysis_module/tests/test_apis.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/analysis_module/tests/test_apis.py b/analysis_module/tests/test_apis.py index 17d0713..55b6446 100644 --- a/analysis_module/tests/test_apis.py +++ b/analysis_module/tests/test_apis.py @@ -36,12 +36,10 @@ def test_topicmodel_incomplete_data(self): errors = resp.json()["field_errors"] assert param in errors - @patch('analysis_module.views.analysis_module.spin_ecs_container') - def test_topicmodel_valid_request(self, spin_ecs_mock): + def test_topicmodel_valid_request(self): """ This tests for a topicmodel api with valid data """ - requests_count = NLPRequest.objects.count() valid_data = { "entries_url": "https://someurl.com/entries", "cluster_size": 2, @@ -52,15 +50,6 @@ def test_topicmodel_valid_request(self, spin_ecs_mock): self.set_credentials() resp = self.client.post(self.TOPICMODELING_URL, valid_data) assert resp.status_code == 202 - spin_ecs_mock.delay.assert_called_once() - new_requests_count = NLPRequest.objects.count() - assert \ - new_requests_count == requests_count + 1, \ - "One more NLPRequest object should be created" - assert NLPRequest.objects.filter( - type="topicmodel", - created_by=self.user, - ).exists() def test_ngrams_incomplete_data(self): """ From 01ede78d8985bd5e9069f754c59ef20a9491b197 Mon Sep 17 00:00:00 2001 From: Ranjan Shrestha Date: Thu, 8 Feb 2024 12:05:57 +0545 Subject: [PATCH 3/3] use the dict instead of many if conditions; --- analysis_module/utils.py | 38 ++++++++++++++++---------------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/analysis_module/utils.py b/analysis_module/utils.py index 7c83a71..90b8cd7 100644 --- a/analysis_module/utils.py +++ b/analysis_module/utils.py @@ -246,28 +246,22 @@ def send_ecs_http_request(nlp_request: NLPRequest): def get_ecs_id_param_name(request_type: NLPRequest.FeaturesType): - if request_type == NLPRequest.FeaturesType.SUMMARIZATION_V3: - return "summarization_id" - if request_type == NLPRequest.FeaturesType.TEXT_EXTRACTION: - return "textextraction_id" - if request_type == NLPRequest.FeaturesType.ENTRY_EXTRACTION: - return "entryextraction_id" # not needed probably, just to be in line with the rest. - if request_type == NLPRequest.FeaturesType.GEOLOCATION: - return "geolocation_id" - if request_type == NLPRequest.FeaturesType.TOPICMODEL: - return "topicmodel_id" - return None + mapper = { + NLPRequest.FeaturesType.TOPICMODEL: "topicmodel_id", + NLPRequest.FeaturesType.GEOLOCATION: "geolocation_id", + NLPRequest.FeaturesType.ENTRY_EXTRACTION: "entryextraction_id", + NLPRequest.FeaturesType.TEXT_EXTRACTION: "textextraction_id", + NLPRequest.FeaturesType.SUMMARIZATION_V3: "summarization_id" + } + return mapper.get(request_type, None) def get_ecs_url(request_type: NLPRequest.FeaturesType): - if request_type == NLPRequest.FeaturesType.SUMMARIZATION_V3: - return urljoin(SUMMARIZATION_V3_ECS_ENDPOINT, "/generate_report") - elif request_type == NLPRequest.FeaturesType.TEXT_EXTRACTION: - return urljoin(TEXT_EXTRACTION_ECS_ENDPOINT, "/extract_document") - elif request_type == NLPRequest.FeaturesType.ENTRY_EXTRACTION: - return urljoin(ENTRYEXTRACTION_ECS_ENDPOINT, "/extract_entries") - elif request_type == NLPRequest.FeaturesType.GEOLOCATION: - return urljoin(GEOLOCATION_ECS_ENDPOINT, "/get_geolocations") - elif request_type == NLPRequest.FeaturesType.TOPICMODEL: - return urljoin(TOPICMODEL_ECS_ENDPOINT, "/get_excerpt_clusters") - return None + mapper = { + NLPRequest.FeaturesType.TOPICMODEL: urljoin(TOPICMODEL_ECS_ENDPOINT, "/get_excerpt_clusters"), + NLPRequest.FeaturesType.GEOLOCATION: urljoin(GEOLOCATION_ECS_ENDPOINT, "/get_geolocations"), + NLPRequest.FeaturesType.ENTRY_EXTRACTION: urljoin(ENTRYEXTRACTION_ECS_ENDPOINT, "/extract_entries"), + NLPRequest.FeaturesType.TEXT_EXTRACTION: urljoin(TEXT_EXTRACTION_ECS_ENDPOINT, "/extract_document"), + NLPRequest.FeaturesType.SUMMARIZATION_V3: urljoin(SUMMARIZATION_V3_ECS_ENDPOINT, "/generate_report") + } + return mapper.get(request_type, None)