diff --git a/.env_sample b/.env_sample new file mode 100644 index 00000000..e370568f --- /dev/null +++ b/.env_sample @@ -0,0 +1,18 @@ +CELERY_BROKER_URL="" +CELERY_FLOWER_PASSWORD="" +CELERY_FLOWER_USER="" +DATABASE_URL='postgresql://:@localhost:5432/' +DJANGO_ACCOUNT_ALLOW_REGISTRATION=False +DJANGO_AWS_ACCESS_KEY_ID="" +DJANGO_AWS_SECRET_ACCESS_KEY="" +DJANGO_AWS_STORAGE_BUCKET_NAME="" +GITHUB_ACCESS_TOKEN="" +GITHUB_BRANCH_FOR_WEBAPP="" +IPYTHONDIR="" +REDIS_URL="" +SINEQUA_CONFIGS_GITHUB_REPO="" +SINEQUA_CONFIGS_REPO_DEV_BRANCH="" +SINEQUA_CONFIGS_REPO_MASTER_BRANCH="" +SINEQUA_CONFIGS_REPO_WEBAPP_PR_BRANCH="" +SLACK_WEBHOOK_URL="" +USE_DOCKER=no diff --git a/.envs/.local/.django b/.envs/.local/.django index 026be76d..7ae4db1c 100644 --- a/.envs/.local/.django +++ b/.envs/.local/.django @@ -29,3 +29,7 @@ SINEQUA_CONFIGS_GITHUB_REPO='NASA-IMPACT/sde-backend' SINEQUA_CONFIGS_REPO_MASTER_BRANCH='master' SINEQUA_CONFIGS_REPO_DEV_BRANCH='dev' SINEQUA_CONFIGS_REPO_WEBAPP_PR_BRANCH='dummy_branch' + +# Slack Webhook +# ------------------------------------------------------------------------------ +SLACK_WEBHOOK_URL='https://hooks.slack.com/services/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' diff --git a/README.md b/README.md index 01efe654..ceabb8dd 100644 --- a/README.md +++ b/README.md @@ -12,15 +12,46 @@ Moved to [settings](http://cookiecutter-django.readthedocs.io/en/latest/settings ## Basic Commands ### Building The Project + ```bash $ docker-compose -f local.yml build ``` ### Running The Necessary Containers + ```bash $ docker-compose -f local.yml up ``` +### Non-docker Local Setup + +If you want to run the project without docker, you will need the following: + +- Postgres + +Run the following commands: + +```` +$ psql postgres +postgres=# create database ; +postgres=# create user with password ''; +postgres=# grant all privileges on database to ; + +# This next one is optional, but it will allow the user to create databases for testing + +postgres=# alter role with superuser; +``` + +Now copy .env_sample in the root directory to .env. Note that in this setup we don't end up using the .envs/ directory, but instead we use the .env file. + +Replace the variables in this line in the .env file: `DATABASE_URL='postgresql://:@localhost:5432/'` with your user, password and database. Change the port if you have a different one. + +You don't need to change any other variable, unless you want to use specific modules (like the GitHub code will require a GitHub token etc). + +There is a section in `config/settings/base.py` which reads environment variables from this file. The line should look like `READ_DOT_ENV_FILE = env.bool("DJANGO_READ_DOT_ENV_FILE", default=True)`. Make sure either the default is True here (which it should already be), or run `export DJANGO_READ_DOT_ENV_FILE=True` in your terminal. + +Run `python manage.py runserver` to test if your setup worked. You might have to run an initial migration with `python manage.py migrate`. + ### Setting Up Your Users - To create a **normal user account**, just go to Sign Up and fill out the form. Once you submit it, you'll see a "Verify Your E-mail Address" page. Go to your console to see a simulated email verification message. Copy the link into your browser. Now the user's email should be verified and ready to go. @@ -144,7 +175,7 @@ To run a celery worker: ```bash cd sde_indexing_helper celery -A config.celery_app worker -l info -``` +```` Please note: For Celery's import magic to work, it is important _where_ the celery commands are run. If you are in the same folder with _manage.py_, you should be right. @@ -186,7 +217,6 @@ Run against the files : It's usually a good idea to run the hooks against all of the files when adding new hooks (usually `pre-commit` will only run on the chnages files during git hooks). - ### Sentry Sentry is an error logging aggregator service. You can sign up for a free account at or download and host it yourself. diff --git a/config/settings/base.py b/config/settings/base.py index 93842d73..616b9c92 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -338,3 +338,4 @@ SINEQUA_CONFIGS_REPO_MASTER_BRANCH = env("SINEQUA_CONFIGS_REPO_MASTER_BRANCH") SINEQUA_CONFIGS_REPO_DEV_BRANCH = env("SINEQUA_CONFIGS_REPO_DEV_BRANCH") SINEQUA_CONFIGS_REPO_WEBAPP_PR_BRANCH = env("SINEQUA_CONFIGS_REPO_WEBAPP_PR_BRANCH") +SLACK_WEBHOOK_URL = env("SLACK_WEBHOOK_URL") diff --git a/config/urls.py b/config/urls.py index 9fbb8e39..1c1f2f45 100644 --- a/config/urls.py +++ b/config/urls.py @@ -5,10 +5,10 @@ from django.views import defaults as default_views admin.site.site_header = ( - "SDE Indexing Administration" # default: "Django Administration" + "SDE Indexing Helper Administration" # default: "Django Administration" ) -admin.site.index_title = "SDE Indexing" # default: "Site administration" -admin.site.site_title = "SDE Indexing" # default: "Django site admin" +admin.site.index_title = "SDE Indexing Helper" # default: "Site administration" +admin.site.site_title = "SDE Indexing Helper" # default: "Django site admin" urlpatterns = [ path("", include("sde_collections.urls", namespace="sde_collections")), @@ -18,8 +18,7 @@ # User management path("users/", include("sde_indexing_helper.users.urls", namespace="users")), path("accounts/", include("allauth.urls")), - path("api-auth/", include("rest_framework.urls")) - # Your stuff: custom urls includes go here + path("api-auth/", include("rest_framework.urls")), ] + static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT) diff --git a/feedback/models.py b/feedback/models.py index 1e1a228f..0666080f 100644 --- a/feedback/models.py +++ b/feedback/models.py @@ -1,6 +1,8 @@ from django.db import models from django.utils import timezone +from sde_collections.utils.slack_utils import send_slack_message + class Feedback(models.Model): name = models.CharField(max_length=150) @@ -17,8 +19,30 @@ class Meta: def save(self, *args, **kwargs): if not self.id: self.created_at = timezone.now() + is_new = self._state.adding + if is_new: + message = self.format_notification_message() + try: + send_slack_message(message) + except Exception as e: + print(f"Failed to send slack message: {e}") super().save(*args, **kwargs) + def format_notification_message(self): + """ + Returns a formatted notification message containing details from this Feedback instance. + """ + notification_message = ( + f" New Feedback Received : \n" + f"Name: {self.name}\n" + f"Email: {self.email}\n" + f"Subject: {self.subject}\n" + f"Comments: {self.comments}\n" + f"Source: {self.source}\n" + f"Received on: {self.created_at.strftime('%Y-%m-%d %H:%M:%S')}" + ) + return notification_message + class ContentCurationRequest(models.Model): name = models.CharField(max_length=150) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..55ec8d78 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,2 @@ +[tool.black] +line-length = 120 diff --git a/requirements/base.txt b/requirements/base.txt index fcd9ac27..dfbdf806 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -28,3 +28,5 @@ PyGithub==2.2.0 tqdm==4.66.1 xmltodict==0.13.0 django-cors-headers==4.3.1 +unidecode==1.3.8 +lxml==4.9.2 diff --git a/scripts/bulk_create_sources_on_webapp.py b/scripts/bulk_create_sources_on_webapp.py new file mode 100644 index 00000000..8c26de28 --- /dev/null +++ b/scripts/bulk_create_sources_on_webapp.py @@ -0,0 +1,64 @@ +from sde_collections.models.collection import Collection +from sde_collections.models.collection_choice_fields import Divisions + +DIVISION_MAPPING = { + "Helio": Divisions.HELIOPHYSICS, + "Astro": Divisions.ASTROPHYSICS, + "PDS": Divisions.PLANETARY, + "Earth": Divisions.EARTH_SCIENCE, + "BPS": Divisions.BIOLOGY, + "Multiple": Divisions.GENERAL, +} + +sources = [ + { + "Name": "Source name", + "Link": "Base link to the source", + "Division": "Division of the source from the spread sheet", + "Notes": "Any notes available from the spreadsheet", + }, +] + + +def get_division_id(division_name): + division_name = division_name.strip() + return DIVISION_MAPPING.get(division_name, None) + + +def create_collection(source): + name = source["Name"] + link = source["Link"] + division_text = source["Division"] + notes = source["Notes"] + + division_id = get_division_id(division_text) + if division_id is None: + print(f"No valid division found for '{division_text}'. Skipping creation for {name}.") + return False + + try: + if Collection.objects.filter(name=name).exists(): + print(f"Collection with name '{name}' already exists. Skipping.") + return False + if Collection.objects.filter(url=link).exists(): + print(f"Collection with link '{link}' already exists. Skipping.") + return False + new_collection = Collection(name=name, url=link, division=division_id, notes=notes) + new_collection.save() + print(f"Collection '{name}' created successfully.") + return True + except Exception as e: + print(f"Failed to create collection '{name}': {e}") + return False + + +def main(): + created_count = 0 + for source in sources: + if create_collection(source): + created_count += 1 + print(f"Total new collections created: {created_count}") + + +if __name__ == "__main__": + main() diff --git a/scripts/quality_and_indexing/add_perfect_to_prod_query.py b/scripts/quality_and_indexing/add_perfect_to_prod_query.py new file mode 100644 index 00000000..013be440 --- /dev/null +++ b/scripts/quality_and_indexing/add_perfect_to_prod_query.py @@ -0,0 +1,11 @@ +""" +adds collections marked as ready for public prod to the public query +after running this code, you will need to merge in the webapp branch +""" + +from sde_collections.models.collection import Collection +from sde_collections.models.collection_choice_fields import WorkflowStatusChoices + +for collection in Collection.objects.filter(workflow_status=WorkflowStatusChoices.READY_FOR_PUBLIC_PROD): + print(collection.config_folder) + collection.add_to_public_query() diff --git a/scripts/quality_and_indexing/change_statuses_on_webapp.py b/scripts/quality_and_indexing/change_statuses_on_webapp.py new file mode 100644 index 00000000..cb9abb30 --- /dev/null +++ b/scripts/quality_and_indexing/change_statuses_on_webapp.py @@ -0,0 +1,66 @@ +""" +take emily's notes from slack and change the appropriate statuses in the webapp +""" + +from sde_collections.models.collection import Collection +from sde_collections.models.collection_choice_fields import WorkflowStatusChoices + +RESEARCH_IN_PROGRESS = 1, "Research in Progress" +READY_FOR_ENGINEERING = 2, "Ready for Engineering" +ENGINEERING_IN_PROGRESS = 3, "Engineering in Progress" +READY_FOR_CURATION = 4, "Ready for Curation" +CURATION_IN_PROGRESS = 5, "Curation in Progress" +CURATED = 6, "Curated" +QUALITY_FIXED = 7, "Quality Fixed" +SECRET_DEPLOYMENT_STARTED = 8, "Secret Deployment Started" +SECRET_DEPLOYMENT_FAILED = 9, "Secret Deployment Failed" +READY_FOR_LRM_QUALITY_CHECK = 10, "Ready for LRM Quality Check" +READY_FOR_FINAL_QUALITY_CHECK = 11, "Ready for Quality Check" +QUALITY_CHECK_FAILED = 12, "Quality Check Failed" +READY_FOR_PUBLIC_PROD = 13, "Ready for Public Production" +PERFECT_ON_PROD = 14, "Perfect and on Production" +LOW_PRIORITY_PROBLEMS_ON_PROD = 15, "Low Priority Problems on Production" +HIGH_PRIORITY_PROBLEMS_ON_PROD = 16, "High Priority Problems on Production, only for old sources" +MERGE_PENDING = 17, "Code Merge Pending" + +perfect = [ + # "WIND_Spacecraft", + # "gamma_ray_data_tools_core_package", + # "land_processes_distributed_active_archive_center", + # "mdscc_deep_space_network", + # "HelioAnalytics", + # "nasa_infrared_telescope_facility_irtf", + # "gmao_fluid", + # "starchild_a_learning_center_for_young_astronomers", + # "voyager_Cosmic_Ray_Subsystem", + "ldas_land_data_assimilatin_system", + "ppi_node", +] + +low_priority = [ + "nasa_applied_sciences", + "parker_solar_probe", + "virtual_wave_observatory", + "explorer_program_acquisition", + "lisa_consortium", + "astropy", + "fermi_at_gsfc", + "microobservatory_robotic_telescope_network", +] + +for config in perfect: + print(config) + collection = Collection.objects.get(config_folder=config) + collection.workflow_status = WorkflowStatusChoices.PERFECT_ON_PROD + collection.save() + +for config in low_priority: + print(config) + collection = Collection.objects.get(config_folder=config) + collection.workflow_status = WorkflowStatusChoices.LOW_PRIORITY_PROBLEMS_ON_PROD + collection.save() + +# for config in perfect: +# collection = Collection.objects.get(config_folder=config) +# collection.workflow_status = WorkflowStatusChoices.PERFECT_ON_PROD +# collection.save() diff --git a/scripts/quality_and_indexing/find_missing_folders.py b/scripts/quality_and_indexing/find_missing_folders.py new file mode 100644 index 00000000..a91fdc06 --- /dev/null +++ b/scripts/quality_and_indexing/find_missing_folders.py @@ -0,0 +1,60 @@ +"""you run this in the shell on the server to find sources to index and find any that are missing plugin folders""" + +import os + +from sde_collections.models.collection import Collection +from sde_collections.models.collection_choice_fields import WorkflowStatusChoices +from sde_collections.utils.github_helper import GitHubHandler + + +def get_sources_to_fix(): + return Collection.objects.filter(workflow_status__in=[WorkflowStatusChoices.QUALITY_FIXED]) + + +def get_sources_to_index(): + return Collection.objects.filter(workflow_status__in=[WorkflowStatusChoices.CURATED]) + + +def get_all_relevant_sources(): + return Collection.objects.filter( + workflow_status__in=[WorkflowStatusChoices.QUALITY_FIXED, WorkflowStatusChoices.CURATED] + ) + + +def get_missing_folders(collections, base_directory): + gh = GitHubHandler() + missing = [] + for source in collections: + folder_path = os.path.join(base_directory, source.config_folder, "default.xml") + if not gh.check_file_exists(folder_path): + missing.append(source) + return missing + + +def print_configs(queryset): + for source in queryset: + print(source.config_folder) + print("---" * 20) + print() + + +print("sources_to_fix") +sources_to_fix = get_sources_to_fix() +print_configs(sources_to_fix) + + +print("sources_to_index") +sources_to_index = get_sources_to_index() +print_configs(sources_to_index) + + +all_relevant_sources = get_all_relevant_sources() + +print("missing_scraper_folders") +missing_folders = get_missing_folders(all_relevant_sources, "sources/scrapers/") +print_configs(missing_folders) + + +print("missing_plugin_folders") +missing_folders = get_missing_folders(all_relevant_sources, "sources/SDE/") +print_configs(missing_folders) diff --git a/scripts/xpath_cleanup/find_xpath_patterns.py b/scripts/xpath_cleanup/find_xpath_patterns.py new file mode 100644 index 00000000..0205a46a --- /dev/null +++ b/scripts/xpath_cleanup/find_xpath_patterns.py @@ -0,0 +1,18 @@ +# flake8: noqa +"""this script is used to find all the xpath patterns in the database, so that they can be mapped to new patterns in xpath_mappings.py""" + +from sde_collections.models.pattern import TitlePattern + +print( + "there are", TitlePattern.objects.filter(title_pattern__contains="xpath").count(), "xpath patterns in the database" +) + +# Get all the xpath patterns and their candidate urls +xpath_patterns = TitlePattern.objects.filter(title_pattern__contains="xpath") +for xpath_pattern in xpath_patterns: + print(xpath_pattern.title_pattern) + # for url in xpath_pattern.candidate_urls.all(): + # print(url.url) + print() + +# not every xpath pattern has a candidate url, but I went ahead and fixed all of them anyway diff --git a/scripts/xpath_cleanup/xpath_mappings.py b/scripts/xpath_cleanup/xpath_mappings.py new file mode 100644 index 00000000..31a66496 --- /dev/null +++ b/scripts/xpath_cleanup/xpath_mappings.py @@ -0,0 +1,73 @@ +# flake8: noqa +xpath_mappings = { + 'Concat(xpath://*[@id="cpad"]/h2, xpath://*[@id="cpad"]/h3)': 'xpath://*[@id="cpad"]/h2 xpath://*[@id="cpad"]/h3', + 'Concat(xpath://*[@id="cpad"]/h2, doc.title)': 'xpath://*[@id="cpad"]/h2 {title}', + 'xpath://*[@id="cpad"]/h2': 'xpath://*[@id="cpad"]/h2', + 'xpath://*[@id="cpad"]/h3': 'xpath://*[@id="cpad"]/h3', + 'Concat("GCN ", xpath://*[@id="gcn-news-and-events"]/a)': 'GCN xpath://*[@id="gcn-news-and-events"]/a', + 'Concat("GCN", xpath://*[@id="super-kamioka-neutrino-detection-experiment-super-kamiokande"]/a)': 'GCN xpath://*[@id="super-kamioka-neutrino-detection-experiment-super-kamiokande"]/a', + 'concat("MAST - Missions and Data - ",xpath://*[@id="page-title"])': 'MAST - Missions and Data - xpath://*[@id="page-title"]', + 'concat("HEK Observation Details: ",xpath://*[@id="event-detail"]/div[1])': 'HEK Observation Details: xpath://*[@id="event-detail"]/div[1]', + 'concat("The Martian Meteorite Compendium ",xpath://*[@id="main_content_wrapper"]/h4/text())': 'The Martian Meteorite Compendium xpath://*[@id="main_content_wrapper"]/h4/text()', + 'concat("Antarctic Meteorite Sample Preparation - ",xpath://*[@id="main_content_wrapper"]/h4)': 'Antarctic Meteorite Sample Preparation - xpath://*[@id="main_content_wrapper"]/h4', + 'concat("My NASA Data: ",xpath://*[@id="block-mynasadata-theme-content"]/article/div/div[1]/h1/span)': 'My NASA Data: xpath://*[@id="block-mynasadata-theme-content"]/article/div/div[1]/h1/span', + 'concat("My NASA Data: Phenomenon - ",xpath:/html/body/div[1]/div/div[1]/div[2]/div/div[1]/div/section/div/div[1]/h1/text())': "My NASA Data: Phenomenon - xpath:/html/body/div[1]/div/div[1]/div[2]/div/div[1]/div/section/div/div[1]/h1/text()", + 'concat("My NASA Data: Mini Lessons - ",xpath://*[@id="block-mynasadata-theme-content"]/article/div/div[1]/h1/span)': 'My NASA Data: Mini Lessons - xpath://*[@id="block-mynasadata-theme-content"]/article/div/div[1]/h1/span', + 'concat("My NASA Data: Lesson Plans - ",xpath://*[@id="block-mynasadata-theme-content"]/article/div/div[1]/h1/span)': 'My NASA Data: Lesson Plans - xpath://*[@id="block-mynasadata-theme-content"]/article/div/div[1]/h1/span', + 'concat("My NASA Data: Interactive Models - ",xpath://*[@id="block-mynasadata-theme-content"]/article/div/div[1]/h1/span)': 'My NASA Data: Interactive Models - xpath://*[@id="block-mynasadata-theme-content"]/article/div/div[1]/h1/span', + 'concat("FIRMS Layer Information: ",xpath://*[@id="layerid"])': 'FIRMS Layer Information: xpath://*[@id="layerid"]', + "concat(“Artwork: “, xpath:/html/body/div/table/tbody/tr/td/table[7]/tbody/tr/td[3]/table[2]/tbody/tr[4]/td/p[1]/b)": "Artwork: xpath:/html/body/div/table/tbody/tr/td/table[7]/tbody/tr/td[3]/table[2]/tbody/tr[4]/td/p[1]/b", + "concat(“Calibration: “, xpath:/html/body/div/table/tbody/tr/td/table[7]/tbody/tr/td[3]/table[2]/tbody/tr[4]/td/p[1]/b)": "Calibration: xpath:/html/body/div/table/tbody/tr/td/table[7]/tbody/tr/td[3]/table[2]/tbody/tr[4]/td/p[1]/b", + "concat(“Canyons: “, xpath:/html/body/div/table/tbody/tr/td/table[7]/tbody/tr/td[3]/table[2]/tbody/tr[4]/td/p[1]/b)": "Canyons: xpath:/html/body/div/table/tbody/tr/td/table[7]/tbody/tr/td[3]/table[2]/tbody/tr[4]/td/p[1]/b", + "concat(“Craters: “, xpath:/html/body/div/table/tbody/tr/td/table[7]/tbody/tr/td[3]/table[2]/tbody/tr[4]/td/p[1]/b)": "Craters: xpath:/html/body/div/table/tbody/tr/td/table[7]/tbody/tr/td[3]/table[2]/tbody/tr[4]/td/p[1]/b", + "concat(“Dust Storms: “, xpath:/html/body/div/table/tbody/tr/td/table[7]/tbody/tr/td[3]/table[2]/tbody/tr[4]/td/p[1]/b)": "Dust Storms: xpath:/html/body/div/table/tbody/tr/td/table[7]/tbody/tr/td[3]/table[2]/tbody/tr[4]/td/p[1]/b", + "concat(“Martian Terrain: “, xpath:/html/body/div/table/tbody/tr/td/table[7]/tbody/tr/td[3]/table[2]/tbody/tr[4]/td/p[1]/b)": "Martian Terrain: xpath:/html/body/div/table/tbody/tr/td/table[7]/tbody/tr/td[3]/table[2]/tbody/tr[4]/td/p[1]/b", + "concat(“Sand Dunes: “, xpath:/html/body/div/table/tbody/tr/td/table[7]/tbody/tr/td[3]/table[2]/tbody/tr[4]/td/p[1]/b)": "Sand Dunes: xpath:/html/body/div/table/tbody/tr/td/table[7]/tbody/tr/td[3]/table[2]/tbody/tr[4]/td/p[1]/b", + 'concat(“MER Mission: “, xpath://*[@id="white-blur"]/table/tbody/tr/td/table/tbody/tr/td/table[2]/tbody/tr/td[3]/table/tbody/tr[1]/td)': 'MER Mission: xpath://*[@id="white-blur"]/table/tbody/tr/td/table/tbody/tr/td/table[2]/tbody/tr/td[3]/table/tbody/tr[1]/td', + 'concat(“MER Spacecraft: “, xpath://*[@id="white-blur"]/table/tbody/tr/td/table/tbody/tr/td/table[2]/tbody/tr/td[3]/table/tbody/tr[1]/td)': 'MER Spacecraft: xpath://*[@id="white-blur"]/table/tbody/tr/td/table/tbody/tr/td/table[2]/tbody/tr/td[3]/table/tbody/tr[1]/td', + 'concat(“MER Spotlight: “, xpath://*[@id="white-blur"]/table/tbody/tr/td/table/tbody/tr/td/table[2]/tbody/tr/td[3]/table/tbody/tr[1]/td)': 'MER Spotlight: xpath://*[@id="white-blur"]/table/tbody/tr/td/table/tbody/tr/td/table[2]/tbody/tr/td[3]/table/tbody/tr[1]/td', + 'concat(“MER Videos: “, xpath://*[@id="white-blur"]/table/tbody/tr/td/table/tbody/tr/td/table[2]/tbody/tr/td[3]/table/tbody/tr[1]/td)': 'MER Videos: xpath://*[@id="white-blur"]/table/tbody/tr/td/table/tbody/tr/td/table[2]/tbody/tr/td[3]/table/tbody/tr[1]/td', + 'concat(“Imagine Mars: “, xpath://*[@id="centeredcontent2"]/table[3]/tbody/tr/td/table/tbody/tr/td/table[1]/tbody/tr/td[2]/div)': 'Imagine Mars: xpath://*[@id="centeredcontent2"]/table[3]/tbody/tr/td/table/tbody/tr/td/table[1]/tbody/tr/td[2]/div', + 'concat(“Imagine Mars Webcasts: “, xpath://*[@id="centeredcontent2"]/table[4]/tbody/tr/td/table/tbody/tr/td/table/tbody/tr[2]/td/div/p[1]/text()[1])': 'Imagine Mars Webcasts: xpath://*[@id="centeredcontent2"]/table[4]/tbody/tr/td/table/tbody/tr/td/table/tbody/tr[2]/td/div/p[1]/text()[1]', + 'STEREO Learning Center - {xpath://*[@id="content"]/div/h3}': 'STEREO Learning Center - xpath://*[@id="content"]/div/h3', + '{xpath://*[@id="content"]/div/h1}': 'xpath://*[@id="content"]/div/h1', + "{xpath:/html/body/center[1]/font/h1/i}": "xpath:/html/body/center[1]/font/h1/i", + "{xpath:/html/body/div[2]/section[1]/div/div/h5/text()} - Images": "xpath:/html/body/div[2]/section[1]/div/div/h5/text() - Images", + "{xpath:/html/body/div[1]/div[2]/div/div[1]/div/div/div/div/div/h2/text()}": "xpath:/html/body/div[1]/div[2]/div/div[1]/div/div/div/div/div/h2/text()", + "{xpath:/html/body/div[2]/div[2]/div/div[1]/div/div/div/div[1]/div/h2/text()}": "xpath:/html/body/div[2]/div[2]/div/div[1]/div/div/div/div[1]/div/h2/text()", + "{xpath:/html/body/div[1]/div[2]/div/div[1]/div/div/div/div[1]/div/h2/text()}": "xpath:/html/body/div[1]/div[2]/div/div[1]/div/div/div/div[1]/div/h2/text()", + '{xpath://*[@id="ascl_body"]/div/h2}': 'xpath://*[@id="ascl_body"]/div/h2', + '{xpath://*[@id="rightcontent"]/h1} NASA - NSSDCA - Experiment - Details': 'xpath://*[@id="rightcontent"]/h1 NASA - NSSDCA - Experiment - Details', + '{xpath://*[@id="rightcontent"]/h1} NASA - NSSDCA - Spacecraft - Details': 'xpath://*[@id="rightcontent"]/h1 NASA - NSSDCA - Spacecraft - Details', + '{xpath://*[@id="rightcontent"]/h1} NASA - NSSDCA - Dataset - Details': 'xpath://*[@id="rightcontent"]/h1 NASA - NSSDCA - Dataset - Details', + '{xpath://*[@id="rightcontent"]/h1} NASA - NSSDCA - Publication - Details': 'xpath://*[@id="rightcontent"]/h1 NASA - NSSDCA - Publication - Details', + '{xpath://*[@id="contentwrapper"]/center/h2} - Abstract': 'xpath://*[@id="contentwrapper"]/center/h2 - Abstract', + '{xpath://*[@id="contentwrapper"]/center/h1} - Publications and Abstracts': 'xpath://*[@id="contentwrapper"]/center/h1 - Publications and Abstracts', + '{xpath://*[@id="page"]/section[3]/div/article/header/h2} - Blogs by Author': 'xpath://*[@id="page"]/section[3]/div/article/header/h2 - Blogs by Author', + "{xpath:/html/body/h2} - {xpath:/html/body/h4[2]}": "xpath:/html/body/h2} - xpath:/html/body/h4[2]", + "{title} - {xpath:/html/body/div/div/h2}": "{title} - xpath:/html/body/div/div/h2", + "{title} - {xpath:/html/body/h3[1]}": "{title} - xpath:/html/body/h3[1]", + '{title} - {xpath://*[@id="OneColumn"]/div[2]/table/tbody/tr/td/blockquote/h2}': '{title} - xpath://*[@id="OneColumn"]/div[2]/table/tbody/tr/td/blockquote/h2', + '{title} - {xpath://*[@id="content-wrapper"]/h1}': '{title} - xpath://*[@id="content-wrapper"]/h1', + "{xpath:/html/body/div/main/div[2]/section/div[2]/h1} | Astrobiology": "xpath:/html/body/div/main/div[2]/section/div[2]/h1 | Astrobiology", + "{xpath:/html/body/div/main/section/div[2]/h1} | The Classroom | Astrobiology": "xpath:/html/body/div/main/section/div[2]/h1 | The Classroom | Astrobiology", + "{xpath:/html/body/div/section[2]/div[1]/article/h1} | About FameLab - Finalist Bios": "xpath:/html/body/div/section[2]/div[1]/article/h1 | About FameLab - Finalist Bios", + "{xpath:/html/body/div/section[2]/div[2]/h1} | About FameLab - Videos": "xpath:/html/body/div/section[2]/div[2]/h1 | About FameLab - Videos", + '{xpath://*[@id="container-body"]/div[2]/div[2]/h2} - {xpath://*[@id="container-body"]/div[2]/div[2]/h4/span[1]/text() | NASA Astrobiology Institute': 'xpath://*[@id="container-body"]/div[2]/div[2]/h2} - {xpath://*[@id="container-body"]/div[2]/div[2]/h4/span[1]/text()} | NASA Astrobiology Institute', + '{xpath://*[@id="container-body"]/div[2]/div[2]/h3/text()} - Annual Report | NASA Astrobiology Institute': 'xpath://*[@id="container-body"]/div[2]/div[2]/h3/text() - Annual Report | NASA Astrobiology Institute', + '{xpath://*[@id="container-body"]/div[2]/div[2]/ol/li/h3} - Article | NASA Astrobiology Institute': 'xpath://*[@id="container-body"]/div[2]/div[2]/ol/li/h3 - Article | NASA Astrobiology Institute', + "All Things Electric and Magnetic - {xpath:/html/body/div[1]/center[1]/table/tbody/tr/td[2]/font/center/h1/i}": "All Things Electric and Magnetic - xpath:/html/body/div[1]/center[1]/table/tbody/tr/td[2]/font/center/h1/i", + 'Tutorial - {xpath://*[@id="Analyzing-interstellar-reddening-and-calculating-synthetic-photometry"]}': 'Tutorial - xpath://*[@id="Analyzing-interstellar-reddening-and-calculating-synthetic-photometry"]', + 'Health & Air Quality - {xpath://*[@id="block-views-block-hero-block-7"]/div/div/div[2]/div/div/div[2]/div/p}': 'Health & Air Quality - xpath://*[@id="block-views-block-hero-block-7"]/div/div/div[2]/div/div/div[2]/div/p', + 'News - {xpath://*[@id="left-column"]/h2} - {xpath://*[@id="left-column"]/p[1]}': 'News - xpath://*[@id="left-column"]/h2} - xpath://*[@id="left-column"]/p[1]', + 'JWST {xpath://*[@id="stsci-content"]/div/div/h2} - {title}': 'JWST xpath://*[@id="stsci-content"]/div/div/h2 - {title}', + '{xpath://*[@id="container-body"]/div[2]/div[2]/ol/li/h2} | NASA Astrobiology Institute': 'xpath://*[@id="container-body"]/div[2]/div[2]/ol/li/h2 | NASA Astrobiology Institute', + 'Directory - {xpath://*[@id="container-body"]/div[2]/div[2]/div[1]/h2/text()} | NASA Astrobiology Institute': 'Directory - xpath://*[@id="container-body"]/div[2]/div[2]/div[1]/h2/text() | NASA Astrobiology Institute', + 'Conference and School Funding - {xpath://*[@id="container-body"]/div[2]/div[2]/ol/li/h3/text()} | NASA Astrobiology Institute': 'Conference and School Funding - xpath://*[@id="container-body"]/div[2]/div[2]/ol/li/h3/text() | NASA Astrobiology Institute', + 'Seminars - {xpath://*[@id="container-body"]/div[2]/div[2]/div/h2} | NASA Astrobiology Institute': 'Seminars - xpath://*[@id="container-body"]/div[2]/div[2]/div/h2 | NASA Astrobiology Institute', + 'Team Members - {xpath://*[@id="container-body"]/div[2]/div[2]/div[1]/h2/text()} | NASA Astrobiology Institute': 'Team Members - xpath://*[@id="container-body"]/div[2]/div[2]/div[1]/h2/text() | NASA Astrobiology Institute', + 'Teams - {xpath://*[@id="container-body"]/div[2]/div[2]/div[1]/h2/text()} | NASA Astrobiology Institute': 'Teams - xpath://*[@id="container-body"]/div[2]/div[2]/div[1]/h2/text() | NASA Astrobiology Institute', + '{xpath://*[@id="page"]/section[3]/div/article/header/h2}': 'xpath://*[@id="page"]/section[3]/div/article/header/h2', + '{xpath://*[@id="page"]/section[1]/div/header/h2}': 'xpath://*[@id="page"]/section[1]/div/header/h2', + '{xpath://*[@id="page"]/section[1]/div/header/h2} - News by Column': 'xpath://*[@id="page"]/section[1]/div/header/h2 - News by Column', +} diff --git a/sde_collections/admin.py b/sde_collections/admin.py index d5bab102..6cec2983 100644 --- a/sde_collections/admin.py +++ b/sde_collections/admin.py @@ -3,8 +3,8 @@ from django.contrib import admin, messages from django.http import HttpResponse -from .models.candidate_url import CandidateURL from .models.collection import Collection, WorkflowHistory +from .models.candidate_url import CandidateURL, ResolvedTitle from .models.pattern import IncludePattern, TitlePattern from .tasks import import_candidate_urls_from_api @@ -22,10 +22,7 @@ def generate_deployment_message(modeladmin, request, queryset): Collections Now Live in Prod:\n""" message_middle = "\n\n".join( - [ - f"- {collection.name} | {collection.server_url_prod}" - for collection in queryset.all() - ] + [f"- {collection.name} | {collection.server_url_prod}" for collection in queryset.all()] ) message_end = """ @@ -46,14 +43,10 @@ def download_candidate_urls_as_csv(modeladmin, request, queryset): writer = csv.writer(response) if len(queryset) > 1: - messages.add_message( - request, messages.ERROR, "You can only export one collection at a time." - ) + messages.add_message(request, messages.ERROR, "You can only export one collection at a time.") return - urls = CandidateURL.objects.filter(collection=queryset.first()).values_list( - "url", flat=True - ) + urls = CandidateURL.objects.filter(collection=queryset.first()).values_list("url", flat=True) # Write your headers here writer.writerow(["candidate_url"]) @@ -137,9 +130,7 @@ def import_candidate_urls_secret_test(modeladmin, request, queryset): @admin.action(description="Import candidate URLs from Secret Production") def import_candidate_urls_secret_production(modeladmin, request, queryset): - import_candidate_urls_from_api_caller( - modeladmin, request, queryset, "secret_production" - ) + import_candidate_urls_from_api_caller(modeladmin, request, queryset, "secret_production") @admin.action(description="Import candidate URLs from Li's Server") @@ -149,9 +140,7 @@ def import_candidate_urls_lis_server(modeladmin, request, queryset): @admin.action(description="Import candidate URLs from LRM Dev Server") def import_candidate_urls_lrm_dev_server(modeladmin, request, queryset): - import_candidate_urls_from_api_caller( - modeladmin, request, queryset, "lrm_dev_server" - ) + import_candidate_urls_from_api_caller(modeladmin, request, queryset, "lrm_dev_server") class ExportCsvMixin: @@ -291,7 +280,12 @@ class WorkflowHistoryAdmin(admin.ModelAdmin): search_fields = ["collection__name"] list_filter = ["workflow_status", "old_status"] + +class ResolvedTitleAdmin(admin.ModelAdmin): + list_display = ["title_pattern", "candidate_url", "resolved_title", "created_at"] + admin.site.register(WorkflowHistory, WorkflowHistoryAdmin) admin.site.register(CandidateURL, CandidateURLAdmin) admin.site.register(TitlePattern, TitlePatternAdmin) -admin.site.register(IncludePattern) \ No newline at end of file +admin.site.register(IncludePattern) +admin.site.register(ResolvedTitle, ResolvedTitleAdmin) diff --git a/sde_collections/migrations/0045_alter_collection_workflow_status.py b/sde_collections/migrations/0045_alter_collection_workflow_status.py new file mode 100644 index 00000000..3580be38 --- /dev/null +++ b/sde_collections/migrations/0045_alter_collection_workflow_status.py @@ -0,0 +1,39 @@ +# Generated by Django 4.2.9 on 2024-05-03 13:41 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("sde_collections", "0044_alter_collection_document_type"), + ] + + operations = [ + migrations.AlterField( + model_name="collection", + name="workflow_status", + field=models.IntegerField( + choices=[ + (1, "Research in Progress"), + (2, "Ready for Engineering"), + (3, "Engineering in Progress"), + (4, "Ready for Curation"), + (5, "Curation in Progress"), + (6, "Curated"), + (7, "Quality Fixed"), + (8, "Secret Deployment Started"), + (9, "Secret Deployment Failed"), + (10, "Ready for LRM Quality Check"), + (11, "Ready for Quality Check"), + (12, "Quality Check Failed"), + (13, "Ready for Public Production"), + (14, "Perfect and on Production"), + (15, "Low Priority Problems on Production"), + (16, "High Priority Problems on Production, only for old sources"), + (17, "Code Merge Pending"), + ], + default=1, + ), + ), + ] diff --git a/sde_collections/migrations/0046_resolvedtitle_candidateurl_resolved_title.py b/sde_collections/migrations/0046_resolvedtitle_candidateurl_resolved_title.py new file mode 100644 index 00000000..4d1e6961 --- /dev/null +++ b/sde_collections/migrations/0046_resolvedtitle_candidateurl_resolved_title.py @@ -0,0 +1,61 @@ +# Generated by Django 4.2.9 on 2024-05-21 19:01 + +from django.db import migrations, models +import django.db.models.deletion +import django.utils.timezone + + +class Migration(migrations.Migration): + + dependencies = [ + ("sde_collections", "0045_alter_collection_workflow_status"), + ] + + operations = [ + migrations.CreateModel( + name="ResolvedTitle", + fields=[ + ("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")), + ( + "resolution_status", + models.BooleanField(default=False, help_text="True if resolved, False if unresolved"), + ), + ("resolution_date_time", models.DateTimeField(default=django.utils.timezone.now)), + ("resolved_title", models.CharField(blank=True, max_length=1024)), + ("error_string", models.TextField(blank=True)), + ("http_status_code", models.IntegerField(blank=True, null=True)), + ( + "candidate_url", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="resolved_titles", + to="sde_collections.candidateurl", + ), + ), + ( + "title_pattern", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="resolved_titles", + to="sde_collections.titlepattern", + ), + ), + ], + options={ + "verbose_name": "Resolved Title", + "verbose_name_plural": "Resolved Titles", + }, + ), + migrations.AddField( + model_name="candidateurl", + name="resolved_title", + field=models.ForeignKey( + blank=True, + help_text="Link to the resolved title data", + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="candidate_urls", + to="sde_collections.resolvedtitle", + ), + ), + ] diff --git a/sde_collections/migrations/0047_remove_candidateurl_resolved_title_and_more.py b/sde_collections/migrations/0047_remove_candidateurl_resolved_title_and_more.py new file mode 100644 index 00000000..46720186 --- /dev/null +++ b/sde_collections/migrations/0047_remove_candidateurl_resolved_title_and_more.py @@ -0,0 +1,27 @@ +# Generated by Django 4.2.9 on 2024-05-21 21:18 + +from django.db import migrations, models +import sde_collections.models.pattern + + +class Migration(migrations.Migration): + + dependencies = [ + ("sde_collections", "0046_resolvedtitle_candidateurl_resolved_title"), + ] + + operations = [ + migrations.RemoveField( + model_name="candidateurl", + name="resolved_title", + ), + migrations.AlterField( + model_name="titlepattern", + name="title_pattern", + field=models.CharField( + help_text="This is the pattern for the new title. You can either write an exact replacement string (no quotes required) or you can write sinequa-valid code", + validators=[sde_collections.models.pattern.validate_title_pattern], + verbose_name="Title Pattern", + ), + ), + ] diff --git a/sde_collections/migrations/0048_alter_resolvedtitle_candidate_url.py b/sde_collections/migrations/0048_alter_resolvedtitle_candidate_url.py new file mode 100644 index 00000000..0a2a7c82 --- /dev/null +++ b/sde_collections/migrations/0048_alter_resolvedtitle_candidate_url.py @@ -0,0 +1,23 @@ +# Generated by Django 4.2.9 on 2024-05-21 21:20 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("sde_collections", "0047_remove_candidateurl_resolved_title_and_more"), + ] + + operations = [ + migrations.AlterField( + model_name="resolvedtitle", + name="candidate_url", + field=models.OneToOneField( + on_delete=django.db.models.deletion.CASCADE, + related_name="resolved_titles", + to="sde_collections.candidateurl", + ), + ), + ] diff --git a/sde_collections/migrations/0049_alter_resolvedtitle_resolution_date_time.py b/sde_collections/migrations/0049_alter_resolvedtitle_resolution_date_time.py new file mode 100644 index 00000000..f51f9fb0 --- /dev/null +++ b/sde_collections/migrations/0049_alter_resolvedtitle_resolution_date_time.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.9 on 2024-05-21 21:23 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("sde_collections", "0048_alter_resolvedtitle_candidate_url"), + ] + + operations = [ + migrations.AlterField( + model_name="resolvedtitle", + name="resolution_date_time", + field=models.DateTimeField(auto_now_add=True), + ), + ] diff --git a/sde_collections/migrations/0050_alter_resolvedtitle_resolved_title.py b/sde_collections/migrations/0050_alter_resolvedtitle_resolved_title.py new file mode 100644 index 00000000..3c47818f --- /dev/null +++ b/sde_collections/migrations/0050_alter_resolvedtitle_resolved_title.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.9 on 2024-05-21 21:24 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("sde_collections", "0049_alter_resolvedtitle_resolution_date_time"), + ] + + operations = [ + migrations.AlterField( + model_name="resolvedtitle", + name="resolved_title", + field=models.CharField(blank=True), + ), + ] diff --git a/sde_collections/migrations/0051_alter_resolvedtitle_error_string_and_more.py b/sde_collections/migrations/0051_alter_resolvedtitle_error_string_and_more.py new file mode 100644 index 00000000..f6e69f23 --- /dev/null +++ b/sde_collections/migrations/0051_alter_resolvedtitle_error_string_and_more.py @@ -0,0 +1,23 @@ +# Generated by Django 4.2.9 on 2024-05-22 15:24 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("sde_collections", "0050_alter_resolvedtitle_resolved_title"), + ] + + operations = [ + migrations.AlterField( + model_name="resolvedtitle", + name="error_string", + field=models.TextField(blank=True, default=""), + ), + migrations.AlterField( + model_name="resolvedtitle", + name="resolved_title", + field=models.CharField(blank=True, default=""), + ), + ] diff --git a/sde_collections/migrations/0052_rename_resolution_date_time_resolvedtitle_created_at_and_more.py b/sde_collections/migrations/0052_rename_resolution_date_time_resolvedtitle_created_at_and_more.py new file mode 100644 index 00000000..621f8f3e --- /dev/null +++ b/sde_collections/migrations/0052_rename_resolution_date_time_resolvedtitle_created_at_and_more.py @@ -0,0 +1,63 @@ +# Generated by Django 4.2.9 on 2024-05-22 17:39 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("sde_collections", "0051_alter_resolvedtitle_error_string_and_more"), + ] + + operations = [ + migrations.RenameField( + model_name="resolvedtitle", + old_name="resolution_date_time", + new_name="created_at", + ), + migrations.RemoveField( + model_name="resolvedtitle", + name="error_string", + ), + migrations.RemoveField( + model_name="resolvedtitle", + name="http_status_code", + ), + migrations.RemoveField( + model_name="resolvedtitle", + name="resolution_status", + ), + migrations.AlterField( + model_name="resolvedtitle", + name="candidate_url", + field=models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, to="sde_collections.candidateurl"), + ), + migrations.AlterField( + model_name="resolvedtitle", + name="title_pattern", + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to="sde_collections.titlepattern"), + ), + migrations.CreateModel( + name="ResolvedTitleError", + fields=[ + ("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")), + ("created_at", models.DateTimeField(auto_now_add=True)), + ("error_string", models.TextField()), + ("http_status_code", models.IntegerField(blank=True, null=True)), + ( + "candidate_url", + models.OneToOneField( + on_delete=django.db.models.deletion.CASCADE, to="sde_collections.candidateurl" + ), + ), + ( + "title_pattern", + models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to="sde_collections.titlepattern"), + ), + ], + options={ + "abstract": False, + }, + ), + ] diff --git a/sde_collections/migrations/0053_alter_collection_url.py b/sde_collections/migrations/0053_alter_collection_url.py new file mode 100644 index 00000000..3378e82a --- /dev/null +++ b/sde_collections/migrations/0053_alter_collection_url.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.9 on 2024-05-23 21:06 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("sde_collections", "0052_rename_resolution_date_time_resolvedtitle_created_at_and_more"), + ] + + operations = [ + migrations.AlterField( + model_name="collection", + name="url", + field=models.URLField(max_length=2048, verbose_name="URL"), + ), + ] diff --git a/sde_collections/migrations/0054_merge_20240531_1332.py b/sde_collections/migrations/0054_merge_20240531_1332.py new file mode 100644 index 00000000..1b54568a --- /dev/null +++ b/sde_collections/migrations/0054_merge_20240531_1332.py @@ -0,0 +1,13 @@ +# Generated by Django 4.2.9 on 2024-05-31 18:32 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("sde_collections", "0046_workflowhistory_old_status"), + ("sde_collections", "0053_alter_collection_url"), + ] + + operations = [] diff --git a/sde_collections/migrations/0055_alter_workflowhistory_old_status_and_more.py b/sde_collections/migrations/0055_alter_workflowhistory_old_status_and_more.py new file mode 100644 index 00000000..8d098cfd --- /dev/null +++ b/sde_collections/migrations/0055_alter_workflowhistory_old_status_and_more.py @@ -0,0 +1,65 @@ +# Generated by Django 4.2.9 on 2024-05-31 18:33 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("sde_collections", "0054_merge_20240531_1332"), + ] + + operations = [ + migrations.AlterField( + model_name="workflowhistory", + name="old_status", + field=models.IntegerField( + choices=[ + (1, "Research in Progress"), + (2, "Ready for Engineering"), + (3, "Engineering in Progress"), + (4, "Ready for Curation"), + (5, "Curation in Progress"), + (6, "Curated"), + (7, "Quality Fixed"), + (8, "Secret Deployment Started"), + (9, "Secret Deployment Failed"), + (10, "Ready for LRM Quality Check"), + (11, "Ready for Quality Check"), + (12, "Quality Check Failed"), + (13, "Ready for Public Production"), + (14, "Perfect and on Production"), + (15, "Low Priority Problems on Production"), + (16, "High Priority Problems on Production, only for old sources"), + (17, "Code Merge Pending"), + ], + null=True, + ), + ), + migrations.AlterField( + model_name="workflowhistory", + name="workflow_status", + field=models.IntegerField( + choices=[ + (1, "Research in Progress"), + (2, "Ready for Engineering"), + (3, "Engineering in Progress"), + (4, "Ready for Curation"), + (5, "Curation in Progress"), + (6, "Curated"), + (7, "Quality Fixed"), + (8, "Secret Deployment Started"), + (9, "Secret Deployment Failed"), + (10, "Ready for LRM Quality Check"), + (11, "Ready for Quality Check"), + (12, "Quality Check Failed"), + (13, "Ready for Public Production"), + (14, "Perfect and on Production"), + (15, "Low Priority Problems on Production"), + (16, "High Priority Problems on Production, only for old sources"), + (17, "Code Merge Pending"), + ], + default=1, + ), + ), + ] diff --git a/sde_collections/models/candidate_url.py b/sde_collections/models/candidate_url.py index acef4114..8f4e423e 100644 --- a/sde_collections/models/candidate_url.py +++ b/sde_collections/models/candidate_url.py @@ -6,16 +6,14 @@ from .collection import Collection from .collection_choice_fields import DocumentTypes -from .pattern import ExcludePattern +from .pattern import ExcludePattern, TitlePattern class CandidateURLQuerySet(models.QuerySet): def with_exclusion_status(self): return self.annotate( excluded=models.Exists( - ExcludePattern.candidate_urls.through.objects.filter( - candidateurl=models.OuterRef("pk") - ) + ExcludePattern.candidate_urls.through.objects.filter(candidateurl=models.OuterRef("pk")) ) ) @@ -28,9 +26,7 @@ def get_queryset(self): class CandidateURL(models.Model): """A candidate URL scraped for a given collection.""" - collection = models.ForeignKey( - Collection, on_delete=models.CASCADE, related_name="candidate_urls" - ) + collection = models.ForeignKey(Collection, on_delete=models.CASCADE, related_name="candidate_urls") url = models.CharField("URL") hash = models.CharField("Hash", max_length=32, blank=True, default="1") scraped_title = models.CharField( @@ -57,9 +53,7 @@ class CandidateURL(models.Model): blank=True, help_text="This is the title present on Production Server", ) - level = models.IntegerField( - "Level", default=0, blank=True, help_text="Level in the tree. Based on /." - ) + level = models.IntegerField("Level", default=0, blank=True, help_text="Level in the tree. Based on /.") visited = models.BooleanField(default=False) objects = CandidateURLManager() document_type = models.IntegerField(choices=DocumentTypes.choices, null=True) @@ -143,3 +137,30 @@ def save(self, *args, **kwargs): self.hash = hash_value super().save(*args, **kwargs) + + +class ResolvedTitleBase(models.Model): + title_pattern = models.ForeignKey(TitlePattern, on_delete=models.CASCADE) + candidate_url = models.OneToOneField(CandidateURL, on_delete=models.CASCADE) + created_at = models.DateTimeField(auto_now_add=True) + + class Meta: + abstract = True + + +class ResolvedTitle(ResolvedTitleBase): + resolved_title = models.CharField(blank=True, default="") + + class Meta: + verbose_name = "Resolved Title" + verbose_name_plural = "Resolved Titles" + + def save(self, *args, **kwargs): + # Finds the linked candidate URL and deletes ResolvedTitleError objects linked to it + ResolvedTitleError.objects.filter(candidate_url=self.candidate_url).delete() + super().save(*args, **kwargs) + + +class ResolvedTitleError(ResolvedTitleBase): + error_string = models.TextField(null=False, blank=False) + http_status_code = models.IntegerField(null=True, blank=True) diff --git a/sde_collections/models/collection.py b/sde_collections/models/collection.py index b3fc7010..5f082a81 100644 --- a/sde_collections/models/collection.py +++ b/sde_collections/models/collection.py @@ -12,6 +12,11 @@ from config_generation.db_to_xml import XmlEditor from ..utils.github_helper import GitHubHandler +from ..utils.slack_utils import ( + STATUS_CHANGE_NOTIFICATIONS, + format_slack_message, + send_slack_message, +) from .collection_choice_fields import ( ConnectorChoices, CurationStatusChoices, @@ -30,7 +35,7 @@ class Collection(models.Model): name = models.CharField("Name", max_length=1024) config_folder = models.CharField("Config Folder", max_length=2048, unique=True, editable=False) - url = models.URLField("URL", max_length=2048, blank=True) + url = models.URLField("URL", max_length=2048) division = models.IntegerField(choices=Divisions.choices) turned_on = models.BooleanField("Turned On", default=True) connector = models.IntegerField(choices=ConnectorChoices.choices, default=ConnectorChoices.CRAWLER2) @@ -118,7 +123,7 @@ def tree_root(self) -> str: def server_url_secret_prod(self) -> str: base_url = "https://sciencediscoveryengine.nasa.gov" payload = { - "name": "query-sde-primary", + "name": "secret-prod", "scope": "All", "text": "", "advanced": { @@ -126,7 +131,7 @@ def server_url_secret_prod(self) -> str: }, } encoded_payload = urllib.parse.quote(json.dumps(payload)) - return f"{base_url}/app/nasa-sba-sde/#/search?query={encoded_payload}" + return f"{base_url}/app/secret-prod/#/search?query={encoded_payload}" @property def server_url_prod(self) -> str: @@ -175,6 +180,7 @@ def workflow_status_button_color(self) -> str: 14: "btn-primary", 15: "btn-info", 16: "btn-secondary", + 17: "btn-light", } return color_choices[self.workflow_status] @@ -453,6 +459,15 @@ def save(self, *args, **kwargs): if not self.config_folder: self.config_folder = self._compute_config_folder_name() + if not self._state.adding: + old_status = Collection.objects.get(id=self.id).workflow_status + new_status = self.workflow_status + if old_status != new_status: + transition = (old_status, new_status) + if transition in STATUS_CHANGE_NOTIFICATIONS: + details = STATUS_CHANGE_NOTIFICATIONS[transition] + message = format_slack_message(self.name, details, self.id) + send_slack_message(message) # Call the parent class's save method super().save(*args, **kwargs) @@ -486,24 +501,20 @@ class Comments(models.Model): def __str__(self): return self.text + class WorkflowHistory(models.Model): - collection = models.ForeignKey( - Collection, on_delete=models.CASCADE, related_name="workflow_history", null=True - ) + collection = models.ForeignKey(Collection, on_delete=models.CASCADE, related_name="workflow_history", null=True) workflow_status = models.IntegerField( choices=WorkflowStatusChoices.choices, default=WorkflowStatusChoices.RESEARCH_IN_PROGRESS, ) - old_status = models.IntegerField( - choices=WorkflowStatusChoices.choices, null=True - ) + old_status = models.IntegerField(choices=WorkflowStatusChoices.choices, null=True) curated_by = models.ForeignKey(User, on_delete=models.DO_NOTHING, null=True, blank=True) created_at = models.DateTimeField(auto_now_add=True) - def __str__(self): - return (str(self.collection) + str(self.workflow_status)) - + return str(self.collection) + str(self.workflow_status) + @property def workflow_status_button_color(self) -> str: color_choices = { @@ -523,9 +534,11 @@ def workflow_status_button_color(self) -> str: 14: "btn-primary", 15: "btn-info", 16: "btn-secondary", + 17: "btn-light", } return color_choices[self.workflow_status] + @receiver(post_save, sender=Collection) def log_workflow_history(sender, instance, created, **kwargs): if instance.workflow_status != instance.old_workflow_status: @@ -533,13 +546,13 @@ def log_workflow_history(sender, instance, created, **kwargs): collection=instance, workflow_status=instance.workflow_status, curated_by=instance.curated_by, - old_status=instance.old_workflow_status + old_status=instance.old_workflow_status, ) @receiver(post_save, sender=Collection) def create_configs_on_status_change(sender, instance, created, **kwargs): - """ + """ Creates various config files on certain workflow status changes """ diff --git a/sde_collections/models/collection_choice_fields.py b/sde_collections/models/collection_choice_fields.py index 5d0a78e2..37ac9412 100644 --- a/sde_collections/models/collection_choice_fields.py +++ b/sde_collections/models/collection_choice_fields.py @@ -95,3 +95,4 @@ class WorkflowStatusChoices(models.IntegerChoices): PERFECT_ON_PROD = 14, "Perfect and on Production" LOW_PRIORITY_PROBLEMS_ON_PROD = 15, "Low Priority Problems on Production" HIGH_PRIORITY_PROBLEMS_ON_PROD = 16, "High Priority Problems on Production, only for old sources" + MERGE_PENDING = 17, "Code Merge Pending" diff --git a/sde_collections/models/pattern.py b/sde_collections/models/pattern.py index 203f3d9c..a1bd8044 100644 --- a/sde_collections/models/pattern.py +++ b/sde_collections/models/pattern.py @@ -1,9 +1,19 @@ import re from django.apps import apps -from django.db import models - -from ..pattern_interpreter import safe_f_string_evaluation +from django.core.exceptions import ValidationError +from django.db import models, transaction +from django.db.models.signals import post_save +from django.dispatch import receiver + +from sde_collections.tasks import resolve_title_pattern + +from ..utils.title_resolver import ( + is_valid_fstring, + is_valid_xpath, + parse_title, + resolve_title, +) from .collection_choice_fields import DocumentTypes @@ -129,30 +139,68 @@ class Meta: unique_together = ("collection", "match_pattern") +def validate_title_pattern(title_pattern_string): + parsed_title = parse_title(title_pattern_string) + + for element in parsed_title: + element_type, element_value = element + + if element_type == "xpath": + if not is_valid_xpath(element_value): + raise ValidationError(f"'xpath:{element_value}' is not a valid xpath.") + elif element_type == "brace": + try: + is_valid_fstring(element_value) + except ValueError as e: + raise ValidationError(str(e)) + + class TitlePattern(BaseMatchPattern): title_pattern = models.CharField( "Title Pattern", help_text="This is the pattern for the new title. You can either write an exact replacement string" " (no quotes required) or you can write sinequa-valid code", + validators=[validate_title_pattern], ) def apply(self) -> None: - CandidateURL = apps.get_model("sde_collections", "CandidateURL") matched_urls = self.matched_urls() updated_urls = [] + ResolvedTitle = apps.get_model("sde_collections", "ResolvedTitle") + ResolvedTitleError = apps.get_model("sde_collections", "ResolvedTitleError") for candidate_url in matched_urls: - context = {"url": candidate_url.url, "title": candidate_url.scraped_title} + context = { + "url": candidate_url.url, + "title": candidate_url.scraped_title, + "collection": self.collection.name, + } try: - generated_title = safe_f_string_evaluation(self.title_pattern, context) + generated_title = resolve_title(self.title_pattern, context) + + # check to see if the candidate url has an existing resolved title and delete it + ResolvedTitle.objects.filter(candidate_url=candidate_url).delete() + + resolved_title = ResolvedTitle.objects.create( + title_pattern=self, candidate_url=candidate_url, resolved_title=generated_title + ) + resolved_title.save() + candidate_url.generated_title = generated_title - updated_urls.append(candidate_url) - except ValueError as e: - print(f"Error applying title pattern to {candidate_url.url}: {e}") + candidate_url.save() + + except (ValueError, ValidationError) as e: + message = str(e) + resolved_title_error = ResolvedTitleError.objects.create( + title_pattern=self, candidate_url=candidate_url, error_string=message + ) + + status_code = re.search(r"Status code: (\d+)", message) + if status_code: + resolved_title_error.http_status_code = int(status_code.group(1)) - if updated_urls: - CandidateURL.objects.bulk_update(updated_urls, ["generated_title"]) + resolved_title_error.save() TitlePatternCandidateURL = TitlePattern.candidate_urls.through pattern_url_associations = [ @@ -196,3 +244,9 @@ class Meta: verbose_name = "Document Type Pattern" verbose_name_plural = "Document Type Patterns" unique_together = ("collection", "match_pattern") + + +@receiver(post_save, sender=TitlePattern) +def post_save_handler(sender, instance, created, **kwargs): + if created: + transaction.on_commit(lambda: resolve_title_pattern.delay(instance.pk)) diff --git a/sde_collections/pattern_interpreter.py b/sde_collections/pattern_interpreter.py deleted file mode 100644 index 4ae5d3aa..00000000 --- a/sde_collections/pattern_interpreter.py +++ /dev/null @@ -1,18 +0,0 @@ -import _ast -import ast - - -def safe_f_string_evaluation(pattern, context): - """Safely interpolates the variables in an f-string pattern using the provided context.""" - parsed = ast.parse(f"f'''{pattern}'''", mode="eval") - - # Walk through the AST to ensure it only contains safe expressions - for node in ast.walk(parsed): - if isinstance(node, _ast.FormattedValue): - if not isinstance(node.value, _ast.Name): - raise ValueError("Unsupported expression in f-string pattern.") - if node.value.id not in context: - raise ValueError(f"Variable {node.value.id} not allowed in f-string pattern.") - - compiled = compile(parsed, "", "eval") - return eval(compiled, {}, context) diff --git a/sde_collections/tasks.py b/sde_collections/tasks.py index 761d92a1..659f4cc8 100644 --- a/sde_collections/tasks.py +++ b/sde_collections/tasks.py @@ -3,6 +3,7 @@ import shutil import boto3 +from django.apps import apps from django.conf import settings from django.core import management from django.core.management.commands import loaddata @@ -31,9 +32,7 @@ def _get_data_to_import(collection, server_name): page = 1 while True: print(f"Getting page: {page}") - response = api.query( - page=page, collection_config_folder=collection.config_folder - ) + response = api.query(page=page, collection_config_folder=collection.config_folder) if response["cursorRowCount"] == 0: break @@ -74,9 +73,7 @@ def import_candidate_urls_from_api(server_name="test", collection_ids=[]): urls_file = f"{TEMP_FOLDER_NAME}/{collection.config_folder}.json" print("Getting responses from API") - data_to_import = _get_data_to_import( - server_name=server_name, collection=collection - ) + data_to_import = _get_data_to_import(server_name=server_name, collection=collection) print(f"Got {len(data_to_import)} records for {collection.config_folder}") print("Dumping django fixture to file") @@ -129,3 +126,10 @@ def pull_latest_collection_metadata_from_github(): aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY, ) s3_client.upload_file(FILENAME, s3_bucket_name, s3_key) + + +@celery_app.task() +def resolve_title_pattern(title_pattern_id): + TitlePattern = apps.get_model("sde_collections", "TitlePattern") + title_pattern = TitlePattern.objects.get(id=title_pattern_id) + title_pattern.apply() diff --git a/sde_collections/urls.py b/sde_collections/urls.py index 261207af..98a1df06 100644 --- a/sde_collections/urls.py +++ b/sde_collections/urls.py @@ -56,4 +56,5 @@ view=views.CandidateURLAPIView.as_view(), name="candidate-url-api", ), + path("titles-and-errors/", views.TitlesAndErrorsView.as_view(), name="titles-and-errors-list"), ] diff --git a/sde_collections/utils/slack_utils.py b/sde_collections/utils/slack_utils.py new file mode 100644 index 00000000..796d9b48 --- /dev/null +++ b/sde_collections/utils/slack_utils.py @@ -0,0 +1,100 @@ +import requests +from django.conf import settings + +from ..models.collection_choice_fields import WorkflowStatusChoices + +SLACK_ID_MAPPING = { + "Carson Davis": "@UESJLQXH6", + "Bishwas Praveen": "@U05QZUF182J", + "Xiang Li": "@U03PPLNDZA7", + "Shravan Vishwanathan": "@U056B4HMGEP", + "Advait Yogaonkar": "@U06L5SKQ5QA", + "Emily Foshee": "@UPKDARB9P", + "Ashish Acharya": "@UC97PNAF6", + "channel": "!here", +} + + +STATUS_CHANGE_NOTIFICATIONS = { + (WorkflowStatusChoices.RESEARCH_IN_PROGRESS, WorkflowStatusChoices.READY_FOR_ENGINEERING): { + "message": "Research on {name} is complete. Ready for engineering! :rocket:", + "tags": [ + SLACK_ID_MAPPING["Xiang Li"], + SLACK_ID_MAPPING["Shravan Vishwanathan"], + SLACK_ID_MAPPING["Advait Yogaonkar"], + ], + }, + (WorkflowStatusChoices.ENGINEERING_IN_PROGRESS, WorkflowStatusChoices.READY_FOR_CURATION): { + "message": "Engineering on {name} is complete. Ready for curation! :mag:", + "tags": [SLACK_ID_MAPPING["Emily Foshee"]], + }, + (WorkflowStatusChoices.CURATION_IN_PROGRESS, WorkflowStatusChoices.CURATED): { + "message": "Curation on {name} is complete. It's now curated! :checkered_flag:", + "tags": [ + SLACK_ID_MAPPING["Carson Davis"], + SLACK_ID_MAPPING["Bishwas Praveen"], + SLACK_ID_MAPPING["Ashish Acharya"], + ], + }, + (WorkflowStatusChoices.SECRET_DEPLOYMENT_STARTED, WorkflowStatusChoices.SECRET_DEPLOYMENT_FAILED): { + "message": "Alert: Secret deployment of {name} has failed! :warning:", + "tags": [ + SLACK_ID_MAPPING["Carson Davis"], + SLACK_ID_MAPPING["Bishwas Praveen"], + SLACK_ID_MAPPING["Ashish Acharya"], + ], + }, + (WorkflowStatusChoices.SECRET_DEPLOYMENT_STARTED, WorkflowStatusChoices.READY_FOR_LRM_QUALITY_CHECK): { + "message": "Indexing of {name} on Secret Prod completed successfully. Ready for LRM QC! :clipboard:", + "tags": [SLACK_ID_MAPPING["Shravan Vishwanathan"], SLACK_ID_MAPPING["Advait Yogaonkar"]], + }, + (WorkflowStatusChoices.READY_FOR_LRM_QUALITY_CHECK, WorkflowStatusChoices.READY_FOR_FINAL_QUALITY_CHECK): { + "message": "LRM QC passed for {name}. Ready for final quality check! :white_check_mark:", + "tags": [SLACK_ID_MAPPING["Emily Foshee"]], + }, + (WorkflowStatusChoices.READY_FOR_FINAL_QUALITY_CHECK, WorkflowStatusChoices.QUALITY_CHECK_FAILED): { + "message": "Quality check on {name} has failed. Changes needed! :x:", + "tags": [ + SLACK_ID_MAPPING["Xiang Li"], + SLACK_ID_MAPPING["Shravan Vishwanathan"], + SLACK_ID_MAPPING["Advait Yogaonkar"], + SLACK_ID_MAPPING["Carson Davis"], + SLACK_ID_MAPPING["Bishwas Praveen"], + SLACK_ID_MAPPING["Ashish Acharya"], + ], + }, + (WorkflowStatusChoices.READY_FOR_FINAL_QUALITY_CHECK, WorkflowStatusChoices.READY_FOR_PUBLIC_PROD): { + "message": "{name} has passed all quality checks and is ready for public production! :trophy:", + "tags": [ + SLACK_ID_MAPPING["Carson Davis"], + SLACK_ID_MAPPING["Bishwas Praveen"], + SLACK_ID_MAPPING["Ashish Acharya"], + ], + }, + (WorkflowStatusChoices.READY_FOR_FINAL_QUALITY_CHECK, WorkflowStatusChoices.LOW_PRIORITY_PROBLEMS_ON_PROD): { + "message": "{name} is now on Public Prod and is almost perfect, with minor issues noted. Please review! :memo:", + "tags": [SLACK_ID_MAPPING["channel"]], + }, + (WorkflowStatusChoices.READY_FOR_PUBLIC_PROD, WorkflowStatusChoices.PERFECT_ON_PROD): { + "message": "{name} is now live on Public Prod! Congrats team! :sparkles:", + "tags": [SLACK_ID_MAPPING["channel"]], + }, +} + + +def format_slack_message(name, details, collection_id): + message_template = details["message"] + tags = " ".join([f"<{user}>" for user in details["tags"]]) + link = f"https://sde-indexing-helper.nasa-impact.net/{collection_id}/" + linked_name = f"<{link}|{name}>" + return tags + " " + message_template.format(name=linked_name) + + +def send_slack_message(message): + webhook_url = settings.SLACK_WEBHOOK_URL + payload = {"text": message} + response = requests.post(webhook_url, json=payload) + if response.status_code != 200: + raise ValueError( + f"Request to Slack returned an error {response.status_code}, the response is:\n{response.text}" + ) diff --git a/sde_collections/utils/title_resolver.py b/sde_collections/utils/title_resolver.py new file mode 100644 index 00000000..b9171de3 --- /dev/null +++ b/sde_collections/utils/title_resolver.py @@ -0,0 +1,147 @@ +import _ast +import ast +import html as html_lib +import re +from typing import Any + +import requests +from lxml import etree, html +from unidecode import unidecode + + +def is_valid_xpath(xpath: str) -> bool: + try: + etree.XPath(xpath) + return True + except etree.XPathSyntaxError: + return False + + +def is_valid_fstring(pattern: str) -> bool: + context = { + "url": "", + "title": "", + "collection": "", + } + parsed = ast.parse(f"f'''{pattern}'''", mode="eval") + # Walk through the AST to ensure it only contains safe expressions + for node in ast.walk(parsed): + if isinstance(node, _ast.FormattedValue): + if not isinstance(node.value, _ast.Name): + raise ValueError("Unsupported expression in f-string pattern.") + if node.value.id not in context: + variables_allowed = ", ".join([key for key in context.keys()]) + raise ValueError( + f"Variable '{node.value.id}' not allowed in f-string pattern." + f" Allowed variables are: {variables_allowed}" + ) + + +def clean_text(text: str) -> str: + text_content = unidecode(text) + text_content = html_lib.unescape(text_content) + # remove tabs and newlines, replace them with a single space + text_content = re.sub(r"[\t\n\r]+", " ", text_content) + # remove multiple spaces + text_content = re.sub(r"\s+", " ", text_content) + # strip leading and trailing whitespace + text_content = text_content.strip() + return text_content + + +def resolve_brace(pattern: str, context: dict[str, Any]) -> str: + """Safely interpolates the variables in an f-string pattern using the provided context.""" + parsed = ast.parse(f"f'''{pattern}'''", mode="eval") + + is_valid_fstring(pattern) # Refactor this + + compiled = compile(parsed, "", "eval") + return str(eval(compiled, {}, context)) + + +def resolve_xpath(xpath: str, url: str) -> str: + if not is_valid_xpath(xpath): + raise ValueError(f"The xpath, {xpath}, is not valid.") + + response = requests.get(url) + + if response.ok: + tree = html.fromstring(response.content) + values = tree.xpath(xpath) + + if len(values) == 1: + if isinstance(values[0], str): + text_content = values[0] + else: + text_content = values[0].text + + if text_content: + text_content = clean_text(text_content) + return text_content + else: + raise ValueError(f"The element at the xpath, {xpath}, does not contain any text content.") + elif len(values) > 1: + raise ValueError(f"More than one element found for the xpath, {xpath}") + else: + raise ValueError(f"No element found for the xpath, {xpath}") + else: + raise ValueError(f"Failed to retrieve the {url}. Status code: {response.status_code}") + + +def parse_title(input_string: str) -> list[tuple[str, str]]: + brace_pattern = re.compile(r"\{([^\}]+)\}") + xpath_pattern = re.compile(r"xpath:(//[^\s]+)") + + result = [] + current_index = 0 + + while current_index < len(input_string): + # Try to match brace pattern + brace_match = brace_pattern.match(input_string, current_index) + if brace_match: + result.append(("brace", "{" + brace_match.group(1) + "}")) + current_index = brace_match.end() + continue + + # Try to match xpath pattern + xpath_match = xpath_pattern.match(input_string, current_index) + if xpath_match: + result.append(("xpath", xpath_match.group(1))) + current_index = xpath_match.end() + continue + + # Otherwise, accumulate as a normal string until the next special pattern + next_special_index = min( + ( + brace_pattern.search(input_string, current_index).start() + if brace_pattern.search(input_string, current_index) + else len(input_string) + ), + ( + xpath_pattern.search(input_string, current_index).start() + if xpath_pattern.search(input_string, current_index) + else len(input_string) + ), + ) + + result.append(("str", input_string[current_index:next_special_index])) + current_index = next_special_index + + return result + + +def resolve_title(raw_title: str, context: dict[str, Any]) -> str: + parsed_title = parse_title(raw_title) + final_string = "" + + for element in parsed_title: + element_type, element_value = element + + if element_type == "xpath": + final_string += resolve_xpath(element_value, context["url"]) + elif element_type == "brace": + final_string += resolve_brace(element_value, context) + elif element_type == "str": + final_string += element_value + + return final_string diff --git a/sde_collections/views.py b/sde_collections/views.py index af056607..595bc2ef 100644 --- a/sde_collections/views.py +++ b/sde_collections/views.py @@ -4,10 +4,10 @@ from django.contrib.auth import get_user_model from django.contrib.auth.mixins import LoginRequiredMixin from django.db import models -from django.shortcuts import get_object_or_404, redirect +from django.shortcuts import get_object_or_404, redirect, render from django.urls import reverse from django.utils import timezone -from django.views.generic import TemplateView +from django.views.generic import TemplateView, View from django.views.generic.detail import DetailView from django.views.generic.edit import DeleteView from django.views.generic.list import ListView @@ -18,8 +18,8 @@ from rest_framework.views import APIView from .forms import CollectionGithubIssueForm, CommentsForm, RequiredUrlForm -from .models.candidate_url import CandidateURL -from .models.collection import Collection, Comments, RequiredUrls, WorkflowHistory +from .models.candidate_url import CandidateURL, ResolvedTitle, ResolvedTitleError +from .models.collection import Collection, Comments, RequiredUrls,WorkflowHistory from .models.collection_choice_fields import ( ConnectorChoices, CurationStatusChoices, @@ -465,3 +465,24 @@ def get_context_data(self, **kwargs): context["differences"] = self.data return context + + +class ResolvedTitleListView(ListView): + model = ResolvedTitle + context_object_name = "resolved_titles" + + +class ResolvedTitleErrorListView(ListView): + model = ResolvedTitleError + context_object_name = "resolved_title_errors" + + +class TitlesAndErrorsView(View): + def get(self, request, *args, **kwargs): + resolved_titles = ResolvedTitle.objects.select_related("title_pattern", "candidate_url").all() + resolved_title_errors = ResolvedTitleError.objects.select_related("title_pattern", "candidate_url").all() + context = { + "resolved_titles": resolved_titles, + "resolved_title_errors": resolved_title_errors, + } + return render(request, "sde_collections/titles_and_errors_list.html", context) diff --git a/sde_indexing_helper/static/css/candidate_url_list.css b/sde_indexing_helper/static/css/candidate_url_list.css index 9568b431..3b45355e 100644 --- a/sde_indexing_helper/static/css/candidate_url_list.css +++ b/sde_indexing_helper/static/css/candidate_url_list.css @@ -322,6 +322,43 @@ letter-spacing: -0.02em; white-space: normal; } +.page-link{ + color:white !important; + border:0.5px solid !important; + margin-left:3px; + margin-right:3px; +} +.page-link:hover{ + background-color: #0066CA !important; + +} + +.page-item.disabled .page-link { + color:grey!important; +} +.dt-paging-input{ + color:white; +} + +.dt-paging-input input{ + background-color: #3F4A58; + color: white; + border:solid 0.5px !important; +} + +.dt-inputpaging{ + position: absolute; + right: 16px; + top: -27px; +} +.ml-auto{ + width:50%; +} + +.custom-select-sm{ + margin-left:5px; +} + .selected{ background-color: inherit !important; } @@ -352,6 +389,15 @@ div.dt-buttons .btn.processing:after { justify-content: space-between; } +.url-cell { + display:flex; + align-items: center; + justify-content: space-between; + } + + .url-icon { + color: #65B1EF; + } #match_pattern_input, #title_pattern_input { background: #3F4A58; border-radius: 4px; @@ -399,5 +445,4 @@ div.dt-buttons .btn.processing:after { .dropdown-item:hover{ background-color: #0066CA !important; - - } \ No newline at end of file + } diff --git a/sde_indexing_helper/static/css/collection_detail.css b/sde_indexing_helper/static/css/collection_detail.css index 244c9465..4eb5caec 100644 --- a/sde_indexing_helper/static/css/collection_detail.css +++ b/sde_indexing_helper/static/css/collection_detail.css @@ -19,7 +19,7 @@ margin-top: -3; } .comment { - background-color: #f8f9fa; + background-color: transparent; border: 1px solid #ddd; padding: 10px; margin-bottom: 10px; @@ -29,7 +29,6 @@ color: #007bff; } .comment span { - color: #6c6840; font-size: 0.9em; margin-bottom: 10px; } diff --git a/sde_indexing_helper/static/css/project.css b/sde_indexing_helper/static/css/project.css index 8150b59a..7beb44c1 100644 --- a/sde_indexing_helper/static/css/project.css +++ b/sde_indexing_helper/static/css/project.css @@ -50,7 +50,7 @@ #candidate_urls_table_wrapper div.dt-info:first-of-type { display: inline-block; - width: 25%; + /* width: 25%; */ } #candidate_urls_table_wrapper div.dt-length, #exclude_patterns_table_wrapper div.dt-length, #include_patterns_table_wrapper div.dt-length, #document_type_patterns_table_wrapper div.dt-length, #title_patterns_table_wrapper div.dt-length{ @@ -59,7 +59,8 @@ } #candidate_urls_table_wrapper div.dt-buttons { - width: 64%; + /* width: 64%; */ + float:right; justify-content: end; } @@ -234,6 +235,7 @@ body { .dt-info{ font-weight:900; font-size:16px; + margin-top:15px; } .buttons-csv, .customizeColumns{ @@ -357,3 +359,209 @@ body { box-shadow: 0px 8px 16px 0px #0037FA4D; } + +/* base_auth.html layout css */ + +.auth-wrapper { + height: 100vh; +} +.auth-col-1, .auth-col-2 { + width: 50%; +} + +.auth-col-1 { + background-image: url("../images/Content.png") !important; + height: 100vh; + background: no-repeat; +} +.auth-col-2-wrapper { + position: relative; +} + +.auth-sde-idx-helper { + justify-content: center; + display: flex; +} + +.auth-col-2 { + padding: 0; + background-color: rgba(5, 14, 25, 1); + margin: auto; +} + +.auth-content { + height: calc(100vh - 6rem); + display: flex; +} + +/* LOGIN PAGE CSS */ + +.signin { + padding-right: 30px; +} + +.login-links { + padding-top: 25px; + display: flex; + justify-content: center; + color: var(--NASA-DM-Link, rgba(101, 177, 239, 1)); + font-size: 13px; + cursor: pointer; +} + +.login-links a { + color: var(--NASA-DM-Link, rgba(101, 177, 239, 1)); +} + +#signup_form .form-label { + display: none; +} + +#id_password_helptext { + display: none; +} + +.login-card-wrapper { + width: 66%; +} + +.login-card { + margin-top: 70px; + background: rgba(21, 35, 46, 1); + padding: 32px 32px 40px 32px; + border: 1px solid var(--NASA-Secondary-Blue, rgba(167, 186, 205, 1)) +} + +.login { + justify-content: center; + align-items: center; + height: 100%; + margin-left: 0 !important; +} + +.login-title { +font-size: 32px; +font-weight: 600; +line-height: 48px; +letter-spacing: -0.03em; +text-align: center; +color: rgba(255, 255, 255, 1); +padding-bottom: 30px; +} + +.login-button { + background-color: rgba(0, 102, 202, 1) !important; + color: rgba(255, 255, 255, 1); + border: none !important; + width: 100%; +font-size: 15px; +font-weight: 500; +line-height: 17.58px; +border-radius: 5px; +padding: 11px 0; +cursor: pointer; +box-shadow: 0px 8px 16px 0px #0037FA4D; + +} + +#div_id_login .form-label, #div_id_password .form-label, #div_id_email .form-label{ + display: none; +} + +#placeholder { + color: rgba(255, 255, 255, 1); +} + +.title-wrapper h4 { + padding-bottom: 0; +} + +.title-wrapper{ + padding-bottom: 30px; +} + +.title-wrapper p { + margin-bottom: 0; + text-align: center; + color: rgba(255, 255, 255, 1); +} + +#id_password1_helptext ul { + font-size: 12px; + color: rgba(255, 255, 255, 1); +} + +.auth-col-2-wrapper input { + background: var(--NASA-Table, rgba(63, 74, 88, 1)); + border-radius: 5px; + color: rgba(255, 255, 255, 1); + padding: 9px 14px; + background-image: none !important; +} + +.auth-col-2-wrapper .form-control:focus { + background: var(--NASA-Table, rgba(63, 74, 88, 1)); + color: rgba(255, 255, 255, 1); +} + +.signup-errors { + margin: 0; +} + +/* FOOTER CSS */ + +.footer-wrapper { + position: absolute; + bottom: 0; + background: transparent; + width: 100%; + color: rgba(255, 255, 255, 1); + display: flex; + justify-content: center; +} + +.footer { + width: 93%; + height: 6rem; +} + +.footer-container { + border-top: 1px solid rgba(255, 255, 255, 1); + display: flex; + flex-direction: row; + justify-content: space-between; + align-items: center; + padding: 0; +} + +.footer-right-side-links { + display: flex; + align-items: center; +} + +.footer-nasa-link, .footer-nasa-link:hover, .footer-nasa-link:focus, .footer-copyright-link { + padding-right: 20px; + color: rgba(255, 255, 255, 1) !important; +} + +.footer-copyright-link a, .footer-contact-us-link:hover, .footer-contact-us-link:focus, .footer-contact-us-link { + color: rgba(255, 255, 255, 1); +} + +.footer-left-side-link{ + background-image: url("../images/NASA\ Logo.svg"); + width: 47px; +height: 38px; +} + +.footer-contact-us-link, .footer-left-side-link, .footer-nasa-link, .footer-copyright-link { + cursor: pointer; +} + +.base-html-footer-wrapper { + display: flex; + justify-content: center; +} + + + diff --git a/sde_indexing_helper/static/images/Content.png b/sde_indexing_helper/static/images/Content.png new file mode 100644 index 00000000..6d23288c Binary files /dev/null and b/sde_indexing_helper/static/images/Content.png differ diff --git a/sde_indexing_helper/static/images/NASA Logo.svg b/sde_indexing_helper/static/images/NASA Logo.svg new file mode 100644 index 00000000..748978f5 --- /dev/null +++ b/sde_indexing_helper/static/images/NASA Logo.svg @@ -0,0 +1,56 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/sde_indexing_helper/static/js/candidate_url_list.js b/sde_indexing_helper/static/js/candidate_url_list.js index 9ba7650b..14f63a17 100644 --- a/sde_indexing_helper/static/js/candidate_url_list.js +++ b/sde_indexing_helper/static/js/candidate_url_list.js @@ -14,6 +14,14 @@ var matchPatternTypeMap = { "Multi-URL Pattern": 2, }; var uniqueId; //used for logic related to contents on column customization modal +const dict = { + 1: "Images", + 2: "Data", + 3: "Documentation", + 4: "Software and Tools", + 5: "Missions and Instruments", + 6: "Training and Education", +}; //fix table allignment when changing around tabs $('a[data-toggle="tab"]').on("shown.bs.tab", function (e) { @@ -35,11 +43,17 @@ function modalContents(tableName) { var checkboxCount = $("#modalBody input[type='checkbox']").length; if (checkboxCount > 0 && tableName === uniqueId) { - $modal = $("#hideShowColumnsModal").modal(); + $modal = $("#hideShowColumnsModal").modal({ + backdrop: 'static', + keyboard: true, + }); return; } - $modal = $("#hideShowColumnsModal").modal(); + $modal = $("#hideShowColumnsModal").modal({ + backdrop: 'static', + keyboard: true, + }); var table = $(tableName).DataTable(); if (tableName !== uniqueId) { $("#modalBody").html(""); @@ -83,20 +97,81 @@ function initializeDataTable() { var false_icon = 'close'; var candidate_urls_table = $("#candidate_urls_table").DataTable({ - // scrollY: true, - lengthMenu: [ - [25, 50, 100, 500], - ["Show 25", "Show 50", "Show 100", "Show 500"], - ], pageLength: 100, + colReorder: true, stateSave: true, + layout: { + bottomEnd: 'inputPaging', + topEnd: null, + topStart: { + info:true, + pageLength: { + menu: [[25, 50, 100, 500],["Show 25", "Show 50", "Show 100", "Show 500"]] + }, + buttons: [ + "spacer", + "csv", + "spacer", + { + text: "Customize Columns", + className: "customizeColumns", + action: function () { + modalContents("#candidate_urls_table"); + }, + }, + ], + } + }, serverSide: true, orderCellsTop: true, pagingType: "input", - dom: "ilBrtip", buttons: [ - "spacer", - "csv", + { + extend: "csv", + exportOptions: { + columns: [0, 11, 2, 12, 10], + }, + customize: function (csv) { + var lines = csv.split("\n"); + // Reorder the header columns + var headers = lines[0].split(","); + var reorderedHeaders = [ + headers[0], + headers[3], + headers[4], + headers[1], + headers[2], + ]; + lines[0] = reorderedHeaders.join(","); + + // Add filter information in the footer + const secondRowFilters = [ + "Applied filters:", + `URL: ${$("#candidateUrlFilter").val() || "No input"}`, + `Exclude: ${$(".dropdown-1").val() || "No selection"}`, + `Scraped Title: ${ + $("#candidateNewTitleFilter").val() || "No input" + }`, + `New Title: ${dict[$(".dropdown-5").val()] || "No input"}`, + `Document Type: ${ + $("#candidateScrapedTitleFilter").val() || "No selection" + }`, + ]; + var appliedFiltersInfo = secondRowFilters.join("\n"); + + // Remove the second row with the filters + if (lines.length > 2) { + lines.splice(1, 1); + } + let alteredLines = []; + lines.forEach((line) => { + let newLine = ""; + newLine = line.replace("open_in_new",""); + alteredLines.push(newLine); + }) + return alteredLines.join("\n") + appliedFiltersInfo; + }, + }, "spacer", { text: "Customize Columns", @@ -158,10 +233,36 @@ function initializeDataTable() { { data: "match_pattern_type", visible: false, searchable: false }, { data: "candidate_urls_count", visible: false, searchable: false }, { data: "excluded", visible: false, searchable: false }, + { + data: null, + render: function (data, type, row) { + if (!row.document_type) return "Select"; + return dict[row.document_type]; + }, + visible: false, + }, + { + data: null, + render: function (data, type, row) { + const excludedDict = { + true: "Yes", + false: "No", + }; + return excludedDict[row.excluded]; + }, + visible: false, + }, + { + data: null, + render: function (data, type, row) { + return row.generated_title; + }, + visible: false, + }, ], createdRow: function (row, data, dataIndex) { if (data["excluded"]) { - $(row).attr("style", "background-color: #ab387d !important"); + $(row).attr("style", "background-color: rgba(255, 61, 87, 0.36) !important"); } }, }); @@ -189,7 +290,6 @@ function initializeDataTable() { var exclude_patterns_table = $("#exclude_patterns_table").DataTable({ // scrollY: true, - serverSide: true, dom: "lBrtip", buttons: [ { @@ -247,8 +347,8 @@ function initializeDataTable() { { data: "candidate_urls_count", class: "text-center whiteText", - sortable: false, - }, + sortable: true, + }, { data: null, sortable: false, @@ -301,7 +401,6 @@ function initializeDataTable() { ], pageLength: 100, orderCellsTop: true, - serverSide: true, ajax: `/api/include-patterns/?format=datatables&collection_id=${collection_id}`, initComplete: function (data) { var table = $("#include_patterns_table").DataTable(); @@ -334,7 +433,7 @@ function initializeDataTable() { { data: "candidate_urls_count", class: "text-center whiteText", - sortable: false, + sortable: true, }, { data: null, @@ -358,7 +457,6 @@ function initializeDataTable() { var title_patterns_table = $("#title_patterns_table").DataTable({ // scrollY: true, - serverSide: true, dom: "lBrtip", buttons: [ { @@ -416,7 +514,7 @@ function initializeDataTable() { { data: "candidate_urls_count", class: "text-center whiteText", - sortable: false, + sortable: true, }, { data: null, @@ -466,7 +564,6 @@ function initializeDataTable() { }, }, ], - serverSide: true, lengthMenu: [ [25, 50, 100, 500], ["Show 25", "Show 50", "Show 100", "Show 500"], @@ -536,7 +633,7 @@ function initializeDataTable() { { data: "candidate_urls_count", class: "text-center whiteText", - sortable: false, + sortable: true, }, { data: null, @@ -581,6 +678,7 @@ function handleTabsClick() { function setupClickHandlers() { handleHideorShowSubmitButton(); + handleHideorShowKeypress(); handleAddNewPatternClick(); handleDeleteDocumentTypeButtonClick(); @@ -602,11 +700,12 @@ function getURLColumn() { return { data: "url", render: function (data, type, row) { - return ` open_in_new ${remove_protocol( + return `
${remove_protocol( data - )}`; + )} + open_in_new
`; }, }; } @@ -666,7 +765,9 @@ function getDocumentTypeColumn() { button_text = data ? dict[data] : "Select"; button_color = data ? "btn-success" : "btn-secondary"; return ` -
+