From 8a7c3bd6f04b72374f29e6a88e4018964f7724d6 Mon Sep 17 00:00:00 2001 From: Bishwas Praveen Date: Mon, 24 Jun 2024 11:33:51 -0500 Subject: [PATCH 01/14] added migration file --- ...ter_candidateurl_document_type_and_more.py | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 sde_collections/migrations/0056_alter_candidateurl_document_type_and_more.py diff --git a/sde_collections/migrations/0056_alter_candidateurl_document_type_and_more.py b/sde_collections/migrations/0056_alter_candidateurl_document_type_and_more.py new file mode 100644 index 00000000..50168981 --- /dev/null +++ b/sde_collections/migrations/0056_alter_candidateurl_document_type_and_more.py @@ -0,0 +1,54 @@ +# Generated by Django 4.2.9 on 2024-06-24 16:31 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("sde_collections", "0055_alter_workflowhistory_old_status_and_more"), + ] + + operations = [ + migrations.AlterField( + model_name="candidateurl", + name="document_type", + field=models.IntegerField( + choices=[ + (1, "Images"), + (2, "Data"), + (3, "Documentation"), + (4, "Software and Tools"), + (5, "Missions and Instruments"), + ], + null=True, + ), + ), + migrations.AlterField( + model_name="collection", + name="document_type", + field=models.IntegerField( + choices=[ + (1, "Images"), + (2, "Data"), + (3, "Documentation"), + (4, "Software and Tools"), + (5, "Missions and Instruments"), + ], + default=3, + ), + ), + migrations.AlterField( + model_name="documenttypepattern", + name="document_type", + field=models.IntegerField( + choices=[ + (1, "Images"), + (2, "Data"), + (3, "Documentation"), + (4, "Software and Tools"), + (5, "Missions and Instruments"), + ] + ), + ), + ] From ab7f1a468afda684948051a438cfa12201b88219 Mon Sep 17 00:00:00 2001 From: Bishwas Praveen Date: Mon, 24 Jun 2024 11:34:17 -0500 Subject: [PATCH 02/14] removed training and education from model choices --- sde_collections/models/collection_choice_fields.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sde_collections/models/collection_choice_fields.py b/sde_collections/models/collection_choice_fields.py index 37ac9412..9ab5b2e7 100644 --- a/sde_collections/models/collection_choice_fields.py +++ b/sde_collections/models/collection_choice_fields.py @@ -30,7 +30,6 @@ class DocumentTypes(models.IntegerChoices): DOCUMENTATION = 3, "Documentation" SOFTWARETOOLS = 4, "Software and Tools" MISSIONSINSTRUMENTS = 5, "Missions and Instruments" - TRAININGANDEDUCATION = 6, "Training and Education" @classmethod def lookup_by_text(cls, text: str) -> int | None: From a3188044fc9aee2de612df37b5abf2bb44c21c6c Mon Sep 17 00:00:00 2001 From: Bishwas Praveen Date: Mon, 24 Jun 2024 11:35:18 -0500 Subject: [PATCH 03/14] removed training and eductaion from front-end components --- .../static/js/candidate_url_list.js | 12 ++++-------- .../sde_collections/candidate_urls_list.html | 4 +--- .../sde_collections/collection_detail.html | 18 ++++++++---------- 3 files changed, 13 insertions(+), 21 deletions(-) diff --git a/sde_indexing_helper/static/js/candidate_url_list.js b/sde_indexing_helper/static/js/candidate_url_list.js index c1bbfa90..971503e4 100644 --- a/sde_indexing_helper/static/js/candidate_url_list.js +++ b/sde_indexing_helper/static/js/candidate_url_list.js @@ -134,7 +134,6 @@ function initializeDataTable() { 3: "Documentation", 4: "Software and Tools", 5: "Missions and Instruments", - 6: "Training and Education", }; this.api() .columns() @@ -498,7 +497,6 @@ function initializeDataTable() { Documentation: 3, "Software and Tools": 4, "Missions and Instruments": 5, - "Training and Education": 6, }, }, }; @@ -663,7 +661,6 @@ function getDocumentTypeColumn() { 3: "Documentation", 4: "Software and Tools", 5: "Missions and Instruments", - 6: "Training and Education", }; button_text = data ? dict[data] : "Select"; button_color = data ? "btn-success" : "btn-secondary"; @@ -681,7 +678,6 @@ function getDocumentTypeColumn() { Documentation Software and Tools Missions and Instruments - Training and Education `; }, @@ -843,7 +839,7 @@ function postDocumentTypePatterns( newDocumentTypePatternsCount = newDocumentTypePatternsCount + 1; $("#documentTypePatternsTab").html( `Document Type Patterns ` + - newDocumentTypePatternsCount + " new" + + newDocumentTypePatternsCount + " new" + `` ); } @@ -890,7 +886,7 @@ function postExcludePatterns(match_pattern, match_pattern_type = 0, force) { newExcludePatternsCount = newExcludePatternsCount + 1; $("#excludePatternsTab").html( `Exclude Patterns ` + - newExcludePatternsCount + " new" + + newExcludePatternsCount + " new" + `` ); } @@ -932,7 +928,7 @@ function postIncludePatterns(match_pattern, match_pattern_type = 0) { newIncludePatternsCount = newIncludePatternsCount + 1; $("#includePatternsTab").html( `Include Patterns ` + - newIncludePatternsCount + " new" + + newIncludePatternsCount + " new" + `` ); } @@ -971,7 +967,7 @@ function postTitlePatterns( newTitlePatternsCount = newTitlePatternsCount + 1; $("#titlePatternsTab").html( `Title Patterns ` + - newTitlePatternsCount + " new" + + newTitlePatternsCount + " new" + `` ); } diff --git a/sde_indexing_helper/templates/sde_collections/candidate_urls_list.html b/sde_indexing_helper/templates/sde_collections/candidate_urls_list.html index c64778c3..4250c104 100644 --- a/sde_indexing_helper/templates/sde_collections/candidate_urls_list.html +++ b/sde_indexing_helper/templates/sde_collections/candidate_urls_list.html @@ -96,7 +96,6 @@

- @@ -236,7 +235,6 @@

- @@ -402,4 +400,4 @@

Customize Column -{% endblock content %} \ No newline at end of file +{% endblock content %} diff --git a/sde_indexing_helper/templates/sde_collections/collection_detail.html b/sde_indexing_helper/templates/sde_collections/collection_detail.html index 0f534b10..09553e3d 100644 --- a/sde_indexing_helper/templates/sde_collections/collection_detail.html +++ b/sde_indexing_helper/templates/sde_collections/collection_detail.html @@ -26,7 +26,7 @@

{{ colle data-match-pattern remove_protocol row - url> + url> - + {{ entry.curated_by }} {{entry.created_at|timesince}} @@ -261,8 +259,8 @@ @@ -285,10 +283,10 @@
@@ -300,4 +298,4 @@

{% block javascripts %} -{% endblock javascripts %} \ No newline at end of file +{% endblock javascripts %} From 88ff2e2bf688770eeeb814586141c38e6626979c Mon Sep 17 00:00:00 2001 From: Bishwas Praveen Date: Tue, 2 Jul 2024 11:11:58 -0500 Subject: [PATCH 04/14] adding env variables to settings --- config/settings/base.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/config/settings/base.py b/config/settings/base.py index 616b9c92..46dfe692 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -339,3 +339,7 @@ SINEQUA_CONFIGS_REPO_DEV_BRANCH = env("SINEQUA_CONFIGS_REPO_DEV_BRANCH") SINEQUA_CONFIGS_REPO_WEBAPP_PR_BRANCH = env("SINEQUA_CONFIGS_REPO_WEBAPP_PR_BRANCH") SLACK_WEBHOOK_URL = env("SLACK_WEBHOOK_URL") +XLI_USER = env("XLI_USER") +XLI_PASSWORD = env("XLI_PASSWORD") +LRM_USER = env("LRM_USER") +LRM_PASSWORD = env("LRM_PASSWORD") From 91a9678f2e990a48af0d0a20ff71b889d6d84270 Mon Sep 17 00:00:00 2001 From: Bishwas Praveen Date: Tue, 2 Jul 2024 11:12:31 -0500 Subject: [PATCH 05/14] removed dev server creds from code --- sde_collections/sinequa_api.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sde_collections/sinequa_api.py b/sde_collections/sinequa_api.py index 192a8176..17c03b33 100644 --- a/sde_collections/sinequa_api.py +++ b/sde_collections/sinequa_api.py @@ -2,6 +2,7 @@ import requests import urllib3 +from django.conf import settings urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) @@ -50,6 +51,10 @@ def __init__(self, server_name: str) -> None: self.app_name: str = server_configs[server_name]["app_name"] self.query_name: str = server_configs[server_name]["query_name"] self.base_url: str = server_configs[server_name]["base_url"] + self.xli_user = settings.XLI_USER + self.xli_password = settings.XLI_PASSWORD + self.lrm_user = settings.LRM_USER + self.lrm_password = settings.LRM_PASSWORD def process_response(self, url: str, payload: dict[str, Any]) -> Any: response = requests.post(url, headers={}, json=payload, verify=False) @@ -63,9 +68,9 @@ def process_response(self, url: str, payload: dict[str, Any]) -> Any: def query(self, page: int, collection_config_folder: str = "") -> Any: if self.server_name == "lis_server": - url = f"{self.base_url}/api/v1/search.query?Password=admin&User=admin" + url = f"{self.base_url}/api/v1/search.query?Password={self.xli_password}&User={self.xli_user}" elif self.server_name == "lrm_dev_server": - url = f"{self.base_url}/api/v1/search.query?Password=QDZ8ASZagUpRCHR&User=lrmdev" + url = f"{self.base_url}/api/v1/search.query?Password={self.lrm_password}&User={self.lrm_user}" else: url = f"{self.base_url}/api/v1/search.query" payload = { From 953fdc69371822df02653a29107783aa339c8cf8 Mon Sep 17 00:00:00 2001 From: Bishwas Praveen Date: Wed, 3 Jul 2024 13:12:06 -0500 Subject: [PATCH 06/14] fix candidate urls that overflow exclude button --- .../static/css/candidate_url_list.css | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/sde_indexing_helper/static/css/candidate_url_list.css b/sde_indexing_helper/static/css/candidate_url_list.css index 304887af..52a6145a 100644 --- a/sde_indexing_helper/static/css/candidate_url_list.css +++ b/sde_indexing_helper/static/css/candidate_url_list.css @@ -90,11 +90,11 @@ border-color: #fafafa; font-size: 0.6875rem; box-shadow: 0 2px 2px 0 rgba(153, 153, 153, 0.14), 0 3px 1px -2px rgba(153, 153, 153, 0.2), 0 1px 5px 0 rgba(153, 153, 153, 0.12); } - + .select-dropdown:hover { box-shadow: 0 14px 26px -12px rgba(250, 250, 250, 0.42), 0 4px 23px 0px rgba(0, 0, 0, 0.12), 0 8px 10px -5px rgba(250, 250, 250, 0.2); } - + .select-dropdown:focus, .select-dropdown.focus { box-shadow: none, 0 0 0 0.2rem rgba(76, 175, 80, 0.5); @@ -194,7 +194,7 @@ letter-spacing: -0.02em; display: flex; align-items: baseline; } - + .checkbox-wrapper label { font-weight: 600; font-size: 16px; @@ -228,7 +228,7 @@ letter-spacing: -0.02em; width: 600px; color: #65B1EF; } - + .title-dropdown { width: fit-content !important; margin-top:20px; @@ -237,7 +237,7 @@ letter-spacing: -0.02em; .table tbody tr:nth-child(odd) { background-color: #050E19 !important; } - + .table tbody tr:nth-child(even) { background-color: #3F4A58 !important; } @@ -247,7 +247,7 @@ letter-spacing: -0.02em; } - + .custom-select, .buttons-csv, .customizeColumns, .addPattern{ border-style: solid !important; border-color: #A7BACD !important; @@ -346,7 +346,7 @@ div.dt-buttons .btn.processing:after { align-items: center; /* justify-content: space-between; */ } - + .headerDiv{ display: flex; justify-content: space-between; @@ -356,6 +356,12 @@ div.dt-buttons .btn.processing:after { display:flex; align-items: center; justify-content: space-between; + word-wrap: break-word; + word-break: break-all; + white-space: normal; + overflow-wrap: break-word; + min-width: 700px; + max-width: 700px; } .url-icon { @@ -415,4 +421,4 @@ div.dt-buttons .btn.processing:after { div.dt-container div.dt-paging ul.pagination { position: absolute; right: 60px; -} \ No newline at end of file +} From 342ab23417309a237ff6c36a9b40158fd69dd4d0 Mon Sep 17 00:00:00 2001 From: Bishwas Praveen Date: Wed, 3 Jul 2024 13:54:10 -0500 Subject: [PATCH 07/14] further fix candidate url lengths --- sde_indexing_helper/static/css/candidate_url_list.css | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sde_indexing_helper/static/css/candidate_url_list.css b/sde_indexing_helper/static/css/candidate_url_list.css index 52a6145a..5eb37028 100644 --- a/sde_indexing_helper/static/css/candidate_url_list.css +++ b/sde_indexing_helper/static/css/candidate_url_list.css @@ -360,8 +360,8 @@ div.dt-buttons .btn.processing:after { word-break: break-all; white-space: normal; overflow-wrap: break-word; - min-width: 700px; - max-width: 700px; + min-width: 550px; + max-width: 550px; } .url-icon { From a8b8330bdf840c87294831dd5c3f75aabc2856f0 Mon Sep 17 00:00:00 2001 From: Bishwas Praveen Date: Wed, 10 Jul 2024 11:53:02 -0500 Subject: [PATCH 08/14] added widths to all the columns --- sde_indexing_helper/static/js/candidate_url_list.js | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sde_indexing_helper/static/js/candidate_url_list.js b/sde_indexing_helper/static/js/candidate_url_list.js index 1e082654..4e9b0ae6 100644 --- a/sde_indexing_helper/static/js/candidate_url_list.js +++ b/sde_indexing_helper/static/js/candidate_url_list.js @@ -705,10 +705,11 @@ function setupClickHandlers() { function getURLColumn() { return { data: "url", + width: "30%", render: function (data, type, row) { return `
${remove_protocol( data - )} + )} open_in_new
`; @@ -719,6 +720,7 @@ function getURLColumn() { function getScrapedTitleColumn() { return { data: "scraped_title", + width: "30%", render: function (data, type, row) { return `${data}`; }, @@ -728,6 +730,7 @@ function getScrapedTitleColumn() { function getGeneratedTitleColumn() { return { data: "generated_title", + width: "20%", render: function (data, type, row) { return ` Date: Wed, 10 Jul 2024 11:53:39 -0500 Subject: [PATCH 09/14] added css changes for the url column to occupy 100% of the assigned space --- sde_indexing_helper/static/css/candidate_url_list.css | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sde_indexing_helper/static/css/candidate_url_list.css b/sde_indexing_helper/static/css/candidate_url_list.css index 5eb37028..004da9b8 100644 --- a/sde_indexing_helper/static/css/candidate_url_list.css +++ b/sde_indexing_helper/static/css/candidate_url_list.css @@ -360,8 +360,8 @@ div.dt-buttons .btn.processing:after { word-break: break-all; white-space: normal; overflow-wrap: break-word; - min-width: 550px; - max-width: 550px; + min-width: 100%; + max-width: 100%; } .url-icon { @@ -422,3 +422,9 @@ div.dt-container div.dt-paging ul.pagination { position: absolute; right: 60px; } + +.individual_title_input { + width: 100%; + max-width: 100%; + min-width: 100%; +} From 237d7aa1ec8ec457174136c1f82a8210ea7787c4 Mon Sep 17 00:00:00 2001 From: Bishwas Praveen Date: Wed, 10 Jul 2024 13:04:31 -0500 Subject: [PATCH 10/14] change status automatically to ready for curation when urls are pulled --- sde_collections/tasks.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sde_collections/tasks.py b/sde_collections/tasks.py index 659f4cc8..fa754efc 100644 --- a/sde_collections/tasks.py +++ b/sde_collections/tasks.py @@ -10,7 +10,7 @@ from config import celery_app -from .models.collection import Collection +from .models.collection import Collection, WorkflowStatusChoices from .sinequa_api import Api from .utils.github_helper import GitHubHandler @@ -90,6 +90,14 @@ def import_candidate_urls_from_api(server_name="test", collection_ids=[]): print("Applying existing patterns; this may take a while") collection.apply_all_patterns() + if collection.workflow_status == WorkflowStatusChoices.READY_FOR_ENGINEERING: + collection.workflow_status = WorkflowStatusChoices.ENGINEERING_IN_PROGRESS + collection.save() + + # Finally set the status to READY_FOR_CURATION + collection.workflow_status = WorkflowStatusChoices.READY_FOR_CURATION + collection.save() + print("Deleting temp files") shutil.rmtree(TEMP_FOLDER_NAME) From d894b519de15840f73d56fd4b615ad53dc1a4f6b Mon Sep 17 00:00:00 2001 From: Bishwas Praveen Date: Thu, 11 Jul 2024 12:31:52 -0500 Subject: [PATCH 11/14] add a function to clear the search results on navigation --- sde_indexing_helper/static/js/collection_list.js | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sde_indexing_helper/static/js/collection_list.js b/sde_indexing_helper/static/js/collection_list.js index de8ed457..5a00c1a4 100644 --- a/sde_indexing_helper/static/js/collection_list.js +++ b/sde_indexing_helper/static/js/collection_list.js @@ -391,6 +391,9 @@ function postCurator(collection_id, curator_id) { $(document).ready(function () { setupClickHandlers(); + // Clear search values and redraw table + clearSearchValues(); + // Remove the search input and add custom titles var paneTitles = [ null, @@ -424,3 +427,8 @@ function setupClickHandlers() { handleWorkflowStatusSelect(); handleCuratorSelect(); } + +function clearSearchValues() { + let table = $("#collection_table").DataTable(); + table.columns().search("").draw(); +} From 6ea0df58485d7e9a9be503a5a9a57be07a669550 Mon Sep 17 00:00:00 2001 From: Bishwas Praveen Date: Thu, 11 Jul 2024 13:37:24 -0500 Subject: [PATCH 12/14] make sure the affected urls on title patterns are updated correctly --- sde_collections/models/pattern.py | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/sde_collections/models/pattern.py b/sde_collections/models/pattern.py index 29248e50..938b5eea 100644 --- a/sde_collections/models/pattern.py +++ b/sde_collections/models/pattern.py @@ -2,18 +2,9 @@ from django.apps import apps from django.core.exceptions import ValidationError -from django.db import models, transaction -from django.db.models.signals import post_save -from django.dispatch import receiver - -from sde_collections.tasks import resolve_title_pattern - -from ..utils.title_resolver import ( - is_valid_fstring, - is_valid_xpath, - parse_title, - resolve_title, -) +from django.db import models + +from ..utils.title_resolver import is_valid_fstring, is_valid_xpath, parse_title from .collection_choice_fields import DocumentTypes @@ -170,12 +161,6 @@ def apply(self) -> None: ResolvedTitleError = apps.get_model("sde_collections", "ResolvedTitleError") for candidate_url in matched_urls: - context = { - "url": candidate_url.url, - "title": candidate_url.scraped_title, - "collection": self.collection.name, - } - try: # generated_title = resolve_title(self.title_pattern, context) generated_title = self.title_pattern @@ -190,6 +175,7 @@ def apply(self) -> None: candidate_url.generated_title = generated_title candidate_url.save() + updated_urls.append(candidate_url) except (ValueError, ValidationError) as e: message = str(e) From ee0641cbeb9b80ffbdfe8cb0bb2fabba0218ce5f Mon Sep 17 00:00:00 2001 From: Bishwas Praveen Date: Fri, 12 Jul 2024 14:28:51 -0500 Subject: [PATCH 13/14] update the unapply method and add delete method to update generated title field to blank when a title pattern is deleted --- sde_collections/models/pattern.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sde_collections/models/pattern.py b/sde_collections/models/pattern.py index 938b5eea..b6f44549 100644 --- a/sde_collections/models/pattern.py +++ b/sde_collections/models/pattern.py @@ -196,7 +196,15 @@ def apply(self) -> None: TitlePatternCandidateURL.objects.bulk_create(pattern_url_associations, ignore_conflicts=True) def unapply(self) -> None: - self.candidate_urls.update(generated_title="") + candidate_urls = self.candidate_urls.all() + for candidate_url in candidate_urls: + candidate_url.generated_title = "" + candidate_url.save() + self.candidate_urls.clear() + + def delete(self, *args, **kwargs): + self.unapply() + super().delete(*args, **kwargs) class Meta: """Meta definition for TitlePattern.""" From 97c76ac71e5dcc628a306996dc083a888d51f89e Mon Sep 17 00:00:00 2001 From: Bishwas Praveen Date: Tue, 16 Jul 2024 12:04:46 -0500 Subject: [PATCH 14/14] the resolve title function was not called and has been fixed --- sde_collections/models/pattern.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/sde_collections/models/pattern.py b/sde_collections/models/pattern.py index b6f44549..0849c6fd 100644 --- a/sde_collections/models/pattern.py +++ b/sde_collections/models/pattern.py @@ -4,7 +4,12 @@ from django.core.exceptions import ValidationError from django.db import models -from ..utils.title_resolver import is_valid_fstring, is_valid_xpath, parse_title +from ..utils.title_resolver import ( + is_valid_fstring, + is_valid_xpath, + parse_title, + resolve_title, +) from .collection_choice_fields import DocumentTypes @@ -161,9 +166,13 @@ def apply(self) -> None: ResolvedTitleError = apps.get_model("sde_collections", "ResolvedTitleError") for candidate_url in matched_urls: + context = { + "url": candidate_url.url, + "title": candidate_url.scraped_title, + "collection": self.collection.name, + } try: - # generated_title = resolve_title(self.title_pattern, context) - generated_title = self.title_pattern + generated_title = resolve_title(self.title_pattern, context) # check to see if the candidate url has an existing resolved title and delete it ResolvedTitle.objects.filter(candidate_url=candidate_url).delete()