diff --git a/apps/assisted_tagging/migrations/0012_auto_20231222_0554.py b/apps/assisted_tagging/migrations/0012_auto_20231222_0554.py new file mode 100644 index 0000000000..967d85bd3b --- /dev/null +++ b/apps/assisted_tagging/migrations/0012_auto_20231222_0554.py @@ -0,0 +1,23 @@ +# Generated by Django 3.2.17 on 2023-12-22 05:54 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('assisted_tagging', '0011_draftentry_draft_entry_type_squashed_0013_rename_draft_entry_type_draftentry_type'), + ] + + operations = [ + migrations.AddField( + model_name='draftentry', + name='page', + field=models.IntegerField(default=0), + ), + migrations.AddField( + model_name='draftentry', + name='text_order', + field=models.IntegerField(default=0), + ), + ] diff --git a/apps/assisted_tagging/models.py b/apps/assisted_tagging/models.py index 1c7af87028..85875daf3b 100644 --- a/apps/assisted_tagging/models.py +++ b/apps/assisted_tagging/models.py @@ -93,7 +93,8 @@ class PredictionStatus(models.IntegerChoices): class Type(models.IntegerChoices): AUTO = 0, 'Auto Extraction' # NLP defiend extraction text MANUAL = 1, 'Manual Extraction' # manual defined extraction text - + page = models.IntegerField(default=0) + text_order = models.IntegerField(default=0) project = models.ForeignKey(Project, on_delete=models.CASCADE, related_name='+') lead = models.ForeignKey(Lead, on_delete=models.CASCADE, related_name='+') excerpt = models.TextField() diff --git a/apps/assisted_tagging/schema.py b/apps/assisted_tagging/schema.py index 6cc3718637..80decd2f06 100644 --- a/apps/assisted_tagging/schema.py +++ b/apps/assisted_tagging/schema.py @@ -100,8 +100,8 @@ def resolve_prediction_tags(root, info, **kwargs): # -- Project Level -def get_draft_entry_qs(info): # TODO use dataloder - qs = DraftEntry.objects.filter(project=info.context.active_project) +def get_draft_entry_qs(info): # TODO use dataloader + qs = DraftEntry.objects.filter(project=info.context.active_project).order_by('page', 'text_order') if PP.check_permission(info, PP.Permission.VIEW_ENTRY): return qs.prefetch_related( Prefetch( diff --git a/apps/deepl_integration/handlers.py b/apps/deepl_integration/handlers.py index e024a13faa..d26b19cbaf 100644 --- a/apps/deepl_integration/handlers.py +++ b/apps/deepl_integration/handlers.py @@ -531,6 +531,8 @@ def save_data(cls, lead, data_url): ]) draft = DraftEntry.objects.create( + page=model_preds['page'], + text_order=model_preds['textOrder'], project=lead.project, lead=lead, excerpt=model_preds['text'], @@ -690,10 +692,12 @@ def save_data( images_uri: List[str], word_count: int, page_count: int, + text_extraction_id: str, ): connector_lead.simplified_text = RequestHelper(url=text_source_uri, ignore_error=True).get_text(sanitize=True) or '' connector_lead.word_count = word_count connector_lead.page_count = page_count + connector_lead.text_extraction_id = text_extraction_id image_base_path = f'{connector_lead.pk}' for image_uri in images_uri: lead_image = ConnectorLeadPreviewImage(connector_lead=connector_lead) diff --git a/apps/deepl_integration/serializers.py b/apps/deepl_integration/serializers.py index 0f0924dcf5..b55f3db590 100644 --- a/apps/deepl_integration/serializers.py +++ b/apps/deepl_integration/serializers.py @@ -122,9 +122,10 @@ class UnifiedConnectorLeadExtractCallbackSerializer(DeeplServerBaseCallbackSeria child=serializers.CharField(allow_blank=True), required=False, default=[], ) - text_path = serializers.CharField(required=False) - total_words_count = serializers.IntegerField(required=False, default=0) - total_pages = serializers.IntegerField(required=False, default=0) + text_path = serializers.CharField(required=False, allow_null=True) + total_words_count = serializers.IntegerField(required=False, default=0, allow_null=True) + total_pages = serializers.IntegerField(required=False, default=0, allow_null=True) + text_extraction_id = serializers.CharField(required=False, allow_null=True) nlp_handler = UnifiedConnectorLeadHandler @@ -136,7 +137,7 @@ def validate(self, data): }) if data['status'] == self.Status.SUCCESS: errors = {} - for key in ['text_path', 'total_words_count', 'total_pages']: + for key in ['text_path', 'total_words_count', 'total_pages', 'text_extraction_id']: if key not in data: errors[key] = f'<{key}> is missing. Required when the extraction is Success' if errors: @@ -153,6 +154,7 @@ def create(self, data): data.get('images_path', [])[:10], # TODO: Support for more images, to much image will error. data['total_words_count'], data['total_pages'], + data['text_extraction_id'] ) connector_lead.update_extraction_status(ConnectorLead.ExtractionStatus.FAILED) return connector_lead @@ -232,6 +234,8 @@ def create(self, validated_data): class AutoAssistedBlockPredicationCallbackSerializer(serializers.Serializer): + page = serializers.IntegerField() + textOrder = serializers.IntegerField() text = serializers.CharField() relevant = serializers.BooleanField() prediction_status = serializers.BooleanField() diff --git a/apps/unified_connector/migrations/0008_connectorlead_text_extraction_id.py b/apps/unified_connector/migrations/0008_connectorlead_text_extraction_id.py new file mode 100644 index 0000000000..f4c587633c --- /dev/null +++ b/apps/unified_connector/migrations/0008_connectorlead_text_extraction_id.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.17 on 2023-12-22 06:38 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('unified_connector', '0007_alter_connectorsource_source'), + ] + + operations = [ + migrations.AddField( + model_name='connectorlead', + name='text_extraction_id', + field=models.UUIDField(blank=True, null=True), + ), + ] diff --git a/apps/unified_connector/models.py b/apps/unified_connector/models.py index 66199ee721..efb04a7293 100644 --- a/apps/unified_connector/models.py +++ b/apps/unified_connector/models.py @@ -40,6 +40,7 @@ class ExtractionStatus(models.IntegerChoices): simplified_text = models.TextField(blank=True) word_count = models.IntegerField(blank=True, null=True) page_count = models.IntegerField(blank=True, null=True) + text_extraction_id = models.UUIDField(blank=True, null=True) created_at = models.DateTimeField(auto_now_add=True) modified_at = models.DateTimeField(auto_now=True) diff --git a/apps/unified_connector/tests/test_mutation.py b/apps/unified_connector/tests/test_mutation.py index 000460172b..eb4ad89b48 100644 --- a/apps/unified_connector/tests/test_mutation.py +++ b/apps/unified_connector/tests/test_mutation.py @@ -498,6 +498,8 @@ def _check_connector_lead_status(connector_lead, status): total_words_count=100, total_pages=10, status=DeeplServerBaseCallbackSerializer.Status.FAILED.value, + text_extraction_id='c4c3c256-f307-4a85-a50e-5516a6f1ce8e', + ) response = self.client.post(url, data) @@ -524,6 +526,7 @@ def _check_connector_lead_status(connector_lead, status): total_words_count=100, total_pages=10, status=DeeplServerBaseCallbackSerializer.Status.SUCCESS.value, + text_extraction_id='c4c3c256-f307-4a85-a50e-5516a6f1ce8e', ) response = self.client.post(url, data) self.assert_400(response)