Skip to content

Commit

Permalink
Merge pull request #1397 from the-deep/fix/draftentry-order
Browse files Browse the repository at this point in the history
Page number and order added
  • Loading branch information
subinasr authored Dec 26, 2023
2 parents e4b60d0 + 5832e0d commit 7538859
Show file tree
Hide file tree
Showing 8 changed files with 61 additions and 7 deletions.
23 changes: 23 additions & 0 deletions apps/assisted_tagging/migrations/0012_auto_20231222_0554.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Generated by Django 3.2.17 on 2023-12-22 05:54

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('assisted_tagging', '0011_draftentry_draft_entry_type_squashed_0013_rename_draft_entry_type_draftentry_type'),
]

operations = [
migrations.AddField(
model_name='draftentry',
name='page',
field=models.IntegerField(default=0),
),
migrations.AddField(
model_name='draftentry',
name='text_order',
field=models.IntegerField(default=0),
),
]
3 changes: 2 additions & 1 deletion apps/assisted_tagging/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ class PredictionStatus(models.IntegerChoices):
class Type(models.IntegerChoices):
AUTO = 0, 'Auto Extraction' # NLP defiend extraction text
MANUAL = 1, 'Manual Extraction' # manual defined extraction text

page = models.IntegerField(default=0)
text_order = models.IntegerField(default=0)
project = models.ForeignKey(Project, on_delete=models.CASCADE, related_name='+')
lead = models.ForeignKey(Lead, on_delete=models.CASCADE, related_name='+')
excerpt = models.TextField()
Expand Down
4 changes: 2 additions & 2 deletions apps/assisted_tagging/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,8 @@ def resolve_prediction_tags(root, info, **kwargs):


# -- Project Level
def get_draft_entry_qs(info): # TODO use dataloder
qs = DraftEntry.objects.filter(project=info.context.active_project)
def get_draft_entry_qs(info): # TODO use dataloader
qs = DraftEntry.objects.filter(project=info.context.active_project).order_by('page', 'text_order')
if PP.check_permission(info, PP.Permission.VIEW_ENTRY):
return qs.prefetch_related(
Prefetch(
Expand Down
4 changes: 4 additions & 0 deletions apps/deepl_integration/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,8 @@ def save_data(cls, lead, data_url):
])

draft = DraftEntry.objects.create(
page=model_preds['page'],
text_order=model_preds['textOrder'],
project=lead.project,
lead=lead,
excerpt=model_preds['text'],
Expand Down Expand Up @@ -690,10 +692,12 @@ def save_data(
images_uri: List[str],
word_count: int,
page_count: int,
text_extraction_id: str,
):
connector_lead.simplified_text = RequestHelper(url=text_source_uri, ignore_error=True).get_text(sanitize=True) or ''
connector_lead.word_count = word_count
connector_lead.page_count = page_count
connector_lead.text_extraction_id = text_extraction_id
image_base_path = f'{connector_lead.pk}'
for image_uri in images_uri:
lead_image = ConnectorLeadPreviewImage(connector_lead=connector_lead)
Expand Down
12 changes: 8 additions & 4 deletions apps/deepl_integration/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,10 @@ class UnifiedConnectorLeadExtractCallbackSerializer(DeeplServerBaseCallbackSeria
child=serializers.CharField(allow_blank=True),
required=False, default=[],
)
text_path = serializers.CharField(required=False)
total_words_count = serializers.IntegerField(required=False, default=0)
total_pages = serializers.IntegerField(required=False, default=0)
text_path = serializers.CharField(required=False, allow_null=True)
total_words_count = serializers.IntegerField(required=False, default=0, allow_null=True)
total_pages = serializers.IntegerField(required=False, default=0, allow_null=True)
text_extraction_id = serializers.CharField(required=False, allow_null=True)

nlp_handler = UnifiedConnectorLeadHandler

Expand All @@ -136,7 +137,7 @@ def validate(self, data):
})
if data['status'] == self.Status.SUCCESS:
errors = {}
for key in ['text_path', 'total_words_count', 'total_pages']:
for key in ['text_path', 'total_words_count', 'total_pages', 'text_extraction_id']:
if key not in data:
errors[key] = f'<{key}> is missing. Required when the extraction is Success'
if errors:
Expand All @@ -153,6 +154,7 @@ def create(self, data):
data.get('images_path', [])[:10], # TODO: Support for more images, to much image will error.
data['total_words_count'],
data['total_pages'],
data['text_extraction_id']
)
connector_lead.update_extraction_status(ConnectorLead.ExtractionStatus.FAILED)
return connector_lead
Expand Down Expand Up @@ -232,6 +234,8 @@ def create(self, validated_data):


class AutoAssistedBlockPredicationCallbackSerializer(serializers.Serializer):
page = serializers.IntegerField()
textOrder = serializers.IntegerField()
text = serializers.CharField()
relevant = serializers.BooleanField()
prediction_status = serializers.BooleanField()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 3.2.17 on 2023-12-22 06:38

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('unified_connector', '0007_alter_connectorsource_source'),
]

operations = [
migrations.AddField(
model_name='connectorlead',
name='text_extraction_id',
field=models.UUIDField(blank=True, null=True),
),
]
1 change: 1 addition & 0 deletions apps/unified_connector/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class ExtractionStatus(models.IntegerChoices):
simplified_text = models.TextField(blank=True)
word_count = models.IntegerField(blank=True, null=True)
page_count = models.IntegerField(blank=True, null=True)
text_extraction_id = models.UUIDField(blank=True, null=True)

created_at = models.DateTimeField(auto_now_add=True)
modified_at = models.DateTimeField(auto_now=True)
Expand Down
3 changes: 3 additions & 0 deletions apps/unified_connector/tests/test_mutation.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,8 @@ def _check_connector_lead_status(connector_lead, status):
total_words_count=100,
total_pages=10,
status=DeeplServerBaseCallbackSerializer.Status.FAILED.value,
text_extraction_id='c4c3c256-f307-4a85-a50e-5516a6f1ce8e',

)

response = self.client.post(url, data)
Expand All @@ -524,6 +526,7 @@ def _check_connector_lead_status(connector_lead, status):
total_words_count=100,
total_pages=10,
status=DeeplServerBaseCallbackSerializer.Status.SUCCESS.value,
text_extraction_id='c4c3c256-f307-4a85-a50e-5516a6f1ce8e',
)
response = self.client.post(url, data)
self.assert_400(response)
Expand Down

0 comments on commit 7538859

Please sign in to comment.