Skip to content

Commit

Permalink
Merge pull request #51 from the-deep-nlp/feature/new_endpoints
Browse files Browse the repository at this point in the history
Feature/new endpoints
  • Loading branch information
ranjan-stha authored Dec 5, 2024
2 parents 91f74f9 + 75d58d5 commit f86c3d9
Show file tree
Hide file tree
Showing 21 changed files with 3,345 additions and 820 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ jobs:
CSRF_TRUSTED_ORIGINS: ''
SUMMARIZATION_V3_ECS_ENDPOINT: ''
ENTRYEXTRACTION_ECS_ENDPOINT: ''
ENTRYEXTRACTION_LLM_ECS_ENDPOINT: ''
GEOLOCATION_ECS_ENDPOINT: ''
TOPICMODEL_ECS_ENDPOINT: ''

Expand Down Expand Up @@ -88,6 +89,7 @@ jobs:
RELIABILITY_MODEL_ID: ''
RELIABILITY_MODEL_VERSION: ''

OPENAI_API_KEY: ''
steps:
- name: Checkout
uses: actions/checkout@v3
Expand Down
25 changes: 0 additions & 25 deletions .github/workflows/flake8.yml

This file was deleted.

156 changes: 156 additions & 0 deletions analysis_module/mock_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -607,6 +607,69 @@
]
}

MOCK_ENTRY_CLASSIFICATION_LLM = {
"client_id": "entry-classification-llm-client-6000",
# this mdoel prediction refer to the framework_id: 1623
# entry_id: 510021, project_id: 2587 (2021 IFRC ESSN Turkey)
"model_tags": {
"element1": {
"pillar-0": {
"subpillar-4": {
"sector-1": [
"t31unid23fntmwrb"
],
"sector-4": [
"subsector-1"
]
},
"subpillar-3": {
"sector-4": [
"subsector-1"
]
}
},
"pillar-1": {
"subpillar-1": {
"sector-4": [
"subsector-1"
]
}
}
},
"overview-matrix1dWidget-d48u7z4yohwuu7zg": {
"8lowwhswgb5j9f5s": {
"gjsbosuej330kl45": True,
"iwsqjtrs2u5z8qgk": True
}
},
"element0": {
"pillar-3": {
"subpillar-21": True,
"subpillar-20": True
}
}
},
"geolocations": [
{
"entity": "Somalia",
"meta": {
"offset_start": 88,
"offset_end": 94,
"latitude": -10,
"longitude": -55
}
},
{
"entity": "Portugal",
"meta": {
"offset_start": 183,
"offset_end": 191,
"latitude": 39.6945,
"longitude": -8.13057
}
}
]
}

"""
it's a huge output (and it can be bigger that this one). Maybe we can truncate it.
Expand Down Expand Up @@ -1745,3 +1808,96 @@
}
]
}

MOCK_ENTRY_EXTRACTION_LLM = {
# this model prediction refer to the framework_id: 1623
# lead_id 67027, url: 'https://reliefweb.int/sites/reliefweb.int/files/resources/UNHCR-Turkey-Operational-Update-October-2019.pdf' # noqa
# project_id: 2587 (2021 IFRC ESSN Turkey)
"client_id": "entry-classification-llm-client-6000",
"metadata": {
"total_pages": 10,
"total_words_count": 5876
},
"blocks": [{
"type": "text",
"text": "4 million Refugees and asylum-seekers in Turkey including over 3.6 million Syrian nationals and close to 400,000 registered refugees and asylum-seekers of other nationalities. Over 98% of Syrian refugees live across Turkey in 81 provinces",
"page": 0,
"textOrder": 2,
"relevant": True,
"prediction_status": True,
"classification": {
"element1": {
"pillar-0": {
"o9kyhltzmplk0a1k": {
"sector-9": []
}
},
"46bg6n1o50obgx77": {
"v2kfnyjbn41vv46j": {
"sector-9": []
}
}
},
"overview-matrix1dWidget-d48u7z4yohwuu7zg": {
"8lowwhswgb5j9f5s": {
"qycslaise1s014vm": True,
"7wps5hbnemt59dv9": True
}
},
"element0": {
"pillar-0": {
"nxjm8rsprb9fu2wq": True
},
"kyiciutprwct1vph": {
"4ftpwnssu2ugeekk": True
}
}
},
"geolocations": [
{
"entity": "Niger",
"meta": {
"offset_start": 88,
"offset_end": 94,
"latitude": -10,
"longitude": -55
}
},
{
"entity": "Nigeria",
"meta": {
"offset_start": 183,
"offset_end": 191,
"latitude": None,
"longitude": None
}
}
],
},
{
"type": "text",
"text": "9,700 Refugees departed for resettlement in 2019 as of end of October, over 78 per cent of whom are Syrians",
"page": 0,
"textOrder": 3,
"relevant": True,
"prediction_status": True,
"classification": {
"element1": {
"pillar-0": {
"o9kyhltzmplk0a1k": {
"sector-9": []
}
}
},
"overview-matrix1dWidget-d48u7z4yohwuu7zg": {
"8lowwhswgb5j9f5s": True
},
"element0": {
"kyiciutprwct1vph": {
"4ftpwnssu2ugeekk": True
}
}
}
}
]
}
96 changes: 94 additions & 2 deletions analysis_module/mockserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,11 @@

from core.models import NLPRequest
from core_server.settings import ENDPOINT_NAME
from .mock_templates import MOCK_ENTRY_CLASSIFICATION, MOCK_ENTRY_CLASSIFICATION_FORMATTED, MOCK_GEOLOCATION # noqa
from .mock_templates import (MOCK_ENTRY_CLASSIFICATION,
MOCK_ENTRY_CLASSIFICATION_LLM,
MOCK_ENTRY_CLASSIFICATION_FORMATTED,
MOCK_ENTRY_EXTRACTION_LLM,
MOCK_GEOLOCATION) # noqa
from .utils import send_callback_url_request


Expand Down Expand Up @@ -497,13 +501,76 @@ def process_entry_extraction_mock(body) -> Any:
logger.error("Could not send data to callback url", exc_info=True)


def entry_extraction_llm_mock(body) -> Any:
process_entry_extraction_llm_mock.apply_async(
args=(body,), countdown=2
) # Trigger task after 2 seconds
return json.dumps({"status": "Successfully received the request."}), 200


@shared_task
def process_entry_extraction_llm_mock(body) -> Any:
documents = body.get("documents") or []

callback_url = body.get("callback_url")
if not documents or not callback_url:
return

for document in documents:
client_id = document["client_id"]
text_extraction_id = document["text_extraction_id"]
# random_extracted_text = "This is some random entry extracted text"
random_entry_extraction_classification = MOCK_ENTRY_EXTRACTION_LLM
random_entry_extraction_classification.update({
"classification_model_info": {
"name": "llm_model",
"version": "1.0.0"
},
"client_id": client_id,
"entry_extraction_id": "73f9ca13-deb2-4f39-8e86-a856490bfc0d", # random
"text_extraction_id": text_extraction_id
})
filepath = save_data_local_and_get_url(
"entry_extraction", client_id, random_entry_extraction_classification
)

"""
the text_extraction_id is not something easy to retrieve in case the request is
set with the "url". In both cases, with the url, or the textextractionid, the text
was already extracted, and it's not (easily) to retrieve the id from the presigned url.
In the case of a request with the id, is instead possible to get the right document.
"""
callback_data = {
"client_id": client_id,
"entry_extraction_classification_path": filepath,
"text_extraction_id": text_extraction_id,
"status": 1
}
try:
requests.post(
callback_url,
json=callback_data,
timeout=30,
)
logger.info("Successfully send data on callback url for entry extraction.")
except Exception:
logger.error("Could not send data to callback url", exc_info=True)


def entry_classification_mock(body) -> Any:
process_entry_classification_mock.apply_async(
args=(body,), countdown=2
) # Trigger task after 2 seconds
return json.dumps({"status": "Successfully received the request."}), 200


def entry_classification_llm_mock(body) -> Any:
process_entry_classification_llm_mock.apply_async(
args=(body,), countdown=2
) # Trigger task after 2 seconds
return json.dumps({"status": "Successfully received the request."}), 200


@shared_task
def process_entry_classification_mock(body) -> Any:
callback_payload = MOCK_ENTRY_CLASSIFICATION
Expand All @@ -527,6 +594,29 @@ def process_entry_classification_mock(body) -> Any:
logger.error("Could not send data to callback url", exc_info=True)


@shared_task
def process_entry_classification_llm_mock(body) -> Any:
callback_payload = MOCK_ENTRY_CLASSIFICATION_LLM
callback_payload.update({
"client_id": body["entries"][0]["client_id"],
"model_info": {
"id": "llm_model",
"version": "1.0.0"
},
"prediction_status": True
})
callback_url = body["callback_url"]
try:
requests.post(
callback_url,
json=callback_payload,
timeout=30
)
logger.info("Successfully send data on callback url for entry classification")
except Exception:
logger.error("Could not send data to callback url", exc_info=True)


TYPE_ACTIONS_MOCK = {
"topicmodel": topicmodeling_mock_model,
"summarization": summarization_mock_model,
Expand All @@ -535,7 +625,9 @@ def process_entry_classification_mock(body) -> Any:
"geolocation": geolocation_mock_model,
"text-extraction": text_extraction_mock,
"entry-extraction-classification": entry_extraction_mock,
"entry-classification": entry_classification_mock
"entry-extraction-classification-llm": entry_extraction_llm_mock,
"entry-classification": entry_classification_mock,
"entry-classification-llm": entry_classification_llm_mock
}


Expand Down
21 changes: 21 additions & 0 deletions analysis_module/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,16 @@ class PredictionRequestSerializer(serializers.Serializer):
mock = serializers.BooleanField(default=False)


class PredictionRequestSerializerV2(serializers.Serializer):
entries = PredictionEntrySerializer(many=True)
af_id = serializers.IntegerField()
project_id = serializers.IntegerField()
publishing_organization = serializers.CharField()
authoring_organization = serializers.ListField()
callback_url = serializers.CharField()
mock = serializers.BooleanField(default=False)


class ExtractionDocumentSerializer(serializers.Serializer):
url = serializers.CharField()
client_id = serializers.CharField()
Expand Down Expand Up @@ -137,8 +147,19 @@ def to_representation(self, value):


class EntryExtractionSerializer(serializers.Serializer):
documents = DocumentEntryExtractionUnionField()
callback_url = serializers.CharField()
request_type = serializers.ChoiceField(
choices=ExtractionRequestTypeChoices,
default=ExtractionRequestTypeChoices.USER,
)
mock = serializers.BooleanField(default=False)


class EntryExtractionSerializerLLM(serializers.Serializer):
documents = DocumentEntryExtractionUnionField()
af_id = serializers.IntegerField()
project_id = serializers.IntegerField()
callback_url = serializers.CharField()
request_type = serializers.ChoiceField(
choices=ExtractionRequestTypeChoices,
Expand Down
Loading

0 comments on commit f86c3d9

Please sign in to comment.