Skip to content

Commit

Permalink
Update text_extraction_id in all callback serializers
Browse files Browse the repository at this point in the history
  • Loading branch information
thenav56 committed Dec 27, 2023
1 parent ff1984e commit 63bf6a3
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 17 deletions.
9 changes: 4 additions & 5 deletions apps/deepl_integration/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ def auto_trigger_request_to_extractor(cls, lead):
"documents": [
{
"client_id": cls.get_client_id(lead),
"text_extraction_id": str(lead_preview.text_extraction_id)
"text_extraction_id": str(lead_preview.text_extraction_id),
}
],
"callback_url": cls.get_callback_url()
Expand Down Expand Up @@ -625,18 +625,17 @@ def save_data(
images_uri: List[str],
word_count: int,
page_count: int,
text_extraction_id: str
text_extraction_id: str,
):
LeadPreview.objects.filter(lead=lead).delete()
LeadPreviewImage.objects.filter(lead=lead).delete()
word_count, page_count = word_count, page_count
# and create new one
LeadPreview.objects.create(
lead=lead,
text_extract=RequestHelper(url=text_source_uri, ignore_error=True).get_text(sanitize=True) or '',
word_count=word_count,
page_count=page_count,
text_extraction_id=text_extraction_id
text_extraction_id=text_extraction_id,
)
# Save extracted images as LeadPreviewImage instances
# TODO: The logic is same for unified_connector leads as well. Maybe have a single func?
Expand Down Expand Up @@ -668,7 +667,7 @@ def save_lead_data_using_connector_lead(
text_extract=connector_lead.simplified_text,
word_count=connector_lead.word_count,
page_count=connector_lead.page_count,
text_extraction_id=connector_lead.text_extraction_id
text_extraction_id=connector_lead.text_extraction_id,
)
# Save extracted images as LeadPreviewImage instances
# TODO: The logic is same for unified_connector leads as well. Maybe have a single func?
Expand Down
14 changes: 7 additions & 7 deletions apps/deepl_integration/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ class LeadExtractCallbackSerializer(DeeplServerBaseCallbackSerializer):
text_path = serializers.CharField(required=False, allow_null=True)
total_words_count = serializers.IntegerField(required=False, default=0, allow_null=True)
total_pages = serializers.IntegerField(required=False, default=0, allow_null=True)
text_extraction_id = serializers.CharField(required=True)
text_extraction_id = serializers.UUIDField(required=False, allow_null=True)
nlp_handler = LeadExtractionHandler

def validate(self, data):
Expand All @@ -85,9 +85,9 @@ def validate(self, data):
})
if data['status'] == self.Status.SUCCESS:
errors = {}
for key in ['text_path', 'total_words_count', 'total_pages']:
if key not in data:
errors[key] = f'<{key}> is missing. Required when the extraction status is Success'
for key in ['text_path', 'total_words_count', 'total_pages', 'text_extraction_id']:
if key not in data or data[key] is None:
errors[key] = f"<{key=} or {data.get('key')=}> is missing. Required when the extraction status is Success"
if errors:
raise serializers.ValidationError(errors)
return data
Expand All @@ -102,7 +102,7 @@ def create(self, data):
data.get('images_path', [])[:10], # TODO: Support for more images, too much image will error.
data.get('total_words_count'),
data.get('total_pages'),
data.get('text_extraction_id')
data.get('text_extraction_id'),
)
# Add to deduplication index
transaction.on_commit(lambda: index_lead_and_calculate_duplicates.delay(lead.id))
Expand Down Expand Up @@ -134,7 +134,7 @@ def validate(self, data):
errors = {}
for key in ['text_path', 'total_words_count', 'total_pages', 'text_extraction_id']:
if key not in data or data[key] is None:
errors[key] = f'<{key}> is missing. Required when the extraction is Success'
errors[key] = f"<{key=} or {data.get('key')=}> is missing. Required when the extraction status is Success"
if errors:
raise serializers.ValidationError(errors)
return data
Expand All @@ -149,7 +149,7 @@ def create(self, data):
data.get('images_path', [])[:10], # TODO: Support for more images, to much image will error.
data['total_words_count'],
data['total_pages'],
data['text_extraction_id']
data['text_extraction_id'],
)
connector_lead.update_extraction_status(ConnectorLead.ExtractionStatus.FAILED)
return connector_lead
Expand Down
18 changes: 13 additions & 5 deletions apps/unified_connector/tests/test_mutation.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,19 +498,24 @@ def _check_connector_lead_status(connector_lead, status):
total_pages=10,
status=DeeplServerBaseCallbackSerializer.Status.FAILED.value,
text_extraction_id='c4c3c256-f307-4a85-a50e-5516a6f1ce8e',

)

response = self.client.post(url, data)
self.assert_400(response)
_check_connector_lead_status(connector_lead1, ConnectorLead.ExtractionStatus.PENDING)
connector_lead1.refresh_from_db()
assert connector_lead1.text_extraction_id is None

data['client_id'] = UnifiedConnectorLeadHandler.get_client_id(connector_lead1)
data['status'] = DeeplServerBaseCallbackSerializer.Status.SUCCESS.value
data['status'] = DeeplServerBaseCallbackSerializer.Status.FAILED.value
response = self.client.post(url, data)
self.assert_200(response)
connector_lead1.refresh_from_db()
_check_connector_lead_status(connector_lead1, ConnectorLead.ExtractionStatus.SUCCESS)
_check_connector_lead_status(connector_lead1, ConnectorLead.ExtractionStatus.FAILED)
assert connector_lead1.text_extraction_id is None
assert connector_lead1.simplified_text is None
assert connector_lead1.word_count is None
assert connector_lead1.page_count is None

# ------ Extraction SUCCESS
data = dict(
Expand All @@ -530,14 +535,17 @@ def _check_connector_lead_status(connector_lead, status):
response = self.client.post(url, data)
self.assert_200(response)
_check_connector_lead_status(connector_lead2, ConnectorLead.ExtractionStatus.SUCCESS)
assert connector_lead1.text_extraction_id is data['text_extraction_id']
assert connector_lead1.simplified_text is not None
assert connector_lead1.word_count == 100
assert connector_lead1.page_count == 10

data['url'] = connector_lead2.url
response = self.client.post(url, data)
self.assert_200(response)
connector_lead2.refresh_from_db()
_check_connector_lead_status(connector_lead2, ConnectorLead.ExtractionStatus.SUCCESS)
preview_image_qs = ConnectorLeadPreviewImage.objects.filter(connector_lead=connector_lead2)
preview_image = preview_image_qs.first()
self.assertEqual(connector_lead2.simplified_text, SAMPLE_SIMPLIFIED_TEXT)
self.assertEqual(preview_image_qs.count(), 4)
self.assertEqual(preview_image_qs.count(), 2)
self.assertIsNotNone(preview_image and preview_image.image.name)

0 comments on commit 63bf6a3

Please sign in to comment.