Skip to content

Commit

Permalink
Merge branch 'dev' into 107-doc-type-filter-not-working
Browse files Browse the repository at this point in the history
  • Loading branch information
Kshaw362 committed Jun 5, 2024
2 parents 7013c83 + f3c38a0 commit be9413b
Show file tree
Hide file tree
Showing 53 changed files with 1,935 additions and 191 deletions.
18 changes: 18 additions & 0 deletions .env_sample
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
CELERY_BROKER_URL=""
CELERY_FLOWER_PASSWORD=""
CELERY_FLOWER_USER=""
DATABASE_URL='postgresql://<user>:<password>@localhost:5432/<database>'
DJANGO_ACCOUNT_ALLOW_REGISTRATION=False
DJANGO_AWS_ACCESS_KEY_ID=""
DJANGO_AWS_SECRET_ACCESS_KEY=""
DJANGO_AWS_STORAGE_BUCKET_NAME=""
GITHUB_ACCESS_TOKEN=""
GITHUB_BRANCH_FOR_WEBAPP=""
IPYTHONDIR=""
REDIS_URL=""
SINEQUA_CONFIGS_GITHUB_REPO=""
SINEQUA_CONFIGS_REPO_DEV_BRANCH=""
SINEQUA_CONFIGS_REPO_MASTER_BRANCH=""
SINEQUA_CONFIGS_REPO_WEBAPP_PR_BRANCH=""
SLACK_WEBHOOK_URL=""
USE_DOCKER=no
4 changes: 4 additions & 0 deletions .envs/.local/.django
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,7 @@ SINEQUA_CONFIGS_GITHUB_REPO='NASA-IMPACT/sde-backend'
SINEQUA_CONFIGS_REPO_MASTER_BRANCH='master'
SINEQUA_CONFIGS_REPO_DEV_BRANCH='dev'
SINEQUA_CONFIGS_REPO_WEBAPP_PR_BRANCH='dummy_branch'

# Slack Webhook
# ------------------------------------------------------------------------------
SLACK_WEBHOOK_URL='https://hooks.slack.com/services/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'
34 changes: 32 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,46 @@ Moved to [settings](http://cookiecutter-django.readthedocs.io/en/latest/settings
## Basic Commands

### Building The Project

```bash
$ docker-compose -f local.yml build
```

### Running The Necessary Containers

```bash
$ docker-compose -f local.yml up
```

### Non-docker Local Setup

If you want to run the project without docker, you will need the following:

- Postgres

Run the following commands:

````
$ psql postgres
postgres=# create database <some database>;
postgres=# create user <some username> with password '<some password>';
postgres=# grant all privileges on database <some database> to <some username>;
# This next one is optional, but it will allow the user to create databases for testing
postgres=# alter role <some username> with superuser;
```
Now copy .env_sample in the root directory to .env. Note that in this setup we don't end up using the .envs/ directory, but instead we use the .env file.
Replace the variables in this line in the .env file: `DATABASE_URL='postgresql://<user>:<password>@localhost:5432/<database>'` with your user, password and database. Change the port if you have a different one.
You don't need to change any other variable, unless you want to use specific modules (like the GitHub code will require a GitHub token etc).
There is a section in `config/settings/base.py` which reads environment variables from this file. The line should look like `READ_DOT_ENV_FILE = env.bool("DJANGO_READ_DOT_ENV_FILE", default=True)`. Make sure either the default is True here (which it should already be), or run `export DJANGO_READ_DOT_ENV_FILE=True` in your terminal.
Run `python manage.py runserver` to test if your setup worked. You might have to run an initial migration with `python manage.py migrate`.
### Setting Up Your Users
- To create a **normal user account**, just go to Sign Up and fill out the form. Once you submit it, you'll see a "Verify Your E-mail Address" page. Go to your console to see a simulated email verification message. Copy the link into your browser. Now the user's email should be verified and ready to go.
Expand Down Expand Up @@ -144,7 +175,7 @@ To run a celery worker:
```bash
cd sde_indexing_helper
celery -A config.celery_app worker -l info
```
````

Please note: For Celery's import magic to work, it is important _where_ the celery commands are run. If you are in the same folder with _manage.py_, you should be right.

Expand Down Expand Up @@ -186,7 +217,6 @@ Run against the files :

It's usually a good idea to run the hooks against all of the files when adding new hooks (usually `pre-commit` will only run on the chnages files during git hooks).

### Sentry

Sentry is an error logging aggregator service. You can sign up for a free account at <https://sentry.io/signup/?code=cookiecutter> or download and host it yourself.
Expand Down
1 change: 1 addition & 0 deletions config/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,3 +338,4 @@
SINEQUA_CONFIGS_REPO_MASTER_BRANCH = env("SINEQUA_CONFIGS_REPO_MASTER_BRANCH")
SINEQUA_CONFIGS_REPO_DEV_BRANCH = env("SINEQUA_CONFIGS_REPO_DEV_BRANCH")
SINEQUA_CONFIGS_REPO_WEBAPP_PR_BRANCH = env("SINEQUA_CONFIGS_REPO_WEBAPP_PR_BRANCH")
SLACK_WEBHOOK_URL = env("SLACK_WEBHOOK_URL")
9 changes: 4 additions & 5 deletions config/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
from django.views import defaults as default_views

admin.site.site_header = (
"SDE Indexing Administration" # default: "Django Administration"
"SDE Indexing Helper Administration" # default: "Django Administration"
)
admin.site.index_title = "SDE Indexing" # default: "Site administration"
admin.site.site_title = "SDE Indexing" # default: "Django site admin"
admin.site.index_title = "SDE Indexing Helper" # default: "Site administration"
admin.site.site_title = "SDE Indexing Helper" # default: "Django site admin"

urlpatterns = [
path("", include("sde_collections.urls", namespace="sde_collections")),
Expand All @@ -18,8 +18,7 @@
# User management
path("users/", include("sde_indexing_helper.users.urls", namespace="users")),
path("accounts/", include("allauth.urls")),
path("api-auth/", include("rest_framework.urls"))
# Your stuff: custom urls includes go here
path("api-auth/", include("rest_framework.urls")),
] + static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)


Expand Down
24 changes: 24 additions & 0 deletions feedback/models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from django.db import models
from django.utils import timezone

from sde_collections.utils.slack_utils import send_slack_message


class Feedback(models.Model):
name = models.CharField(max_length=150)
Expand All @@ -17,8 +19,30 @@ class Meta:
def save(self, *args, **kwargs):
if not self.id:
self.created_at = timezone.now()
is_new = self._state.adding
if is_new:
message = self.format_notification_message()
try:
send_slack_message(message)
except Exception as e:
print(f"Failed to send slack message: {e}")
super().save(*args, **kwargs)

def format_notification_message(self):
"""
Returns a formatted notification message containing details from this Feedback instance.
"""
notification_message = (
f"<!here> New Feedback Received : \n"
f"Name: {self.name}\n"
f"Email: {self.email}\n"
f"Subject: {self.subject}\n"
f"Comments: {self.comments}\n"
f"Source: {self.source}\n"
f"Received on: {self.created_at.strftime('%Y-%m-%d %H:%M:%S')}"
)
return notification_message


class ContentCurationRequest(models.Model):
name = models.CharField(max_length=150)
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[tool.black]
line-length = 120
2 changes: 2 additions & 0 deletions requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,5 @@ PyGithub==2.2.0
tqdm==4.66.1
xmltodict==0.13.0
django-cors-headers==4.3.1
unidecode==1.3.8
lxml==4.9.2
64 changes: 64 additions & 0 deletions scripts/bulk_create_sources_on_webapp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from sde_collections.models.collection import Collection
from sde_collections.models.collection_choice_fields import Divisions

DIVISION_MAPPING = {
"Helio": Divisions.HELIOPHYSICS,
"Astro": Divisions.ASTROPHYSICS,
"PDS": Divisions.PLANETARY,
"Earth": Divisions.EARTH_SCIENCE,
"BPS": Divisions.BIOLOGY,
"Multiple": Divisions.GENERAL,
}

sources = [
{
"Name": "Source name",
"Link": "Base link to the source",
"Division": "Division of the source from the spread sheet",
"Notes": "Any notes available from the spreadsheet",
},
]


def get_division_id(division_name):
division_name = division_name.strip()
return DIVISION_MAPPING.get(division_name, None)


def create_collection(source):
name = source["Name"]
link = source["Link"]
division_text = source["Division"]
notes = source["Notes"]

division_id = get_division_id(division_text)
if division_id is None:
print(f"No valid division found for '{division_text}'. Skipping creation for {name}.")
return False

try:
if Collection.objects.filter(name=name).exists():
print(f"Collection with name '{name}' already exists. Skipping.")
return False
if Collection.objects.filter(url=link).exists():
print(f"Collection with link '{link}' already exists. Skipping.")
return False
new_collection = Collection(name=name, url=link, division=division_id, notes=notes)
new_collection.save()
print(f"Collection '{name}' created successfully.")
return True
except Exception as e:
print(f"Failed to create collection '{name}': {e}")
return False


def main():
created_count = 0
for source in sources:
if create_collection(source):
created_count += 1
print(f"Total new collections created: {created_count}")


if __name__ == "__main__":
main()
11 changes: 11 additions & 0 deletions scripts/quality_and_indexing/add_perfect_to_prod_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"""
adds collections marked as ready for public prod to the public query
after running this code, you will need to merge in the webapp branch
"""

from sde_collections.models.collection import Collection
from sde_collections.models.collection_choice_fields import WorkflowStatusChoices

for collection in Collection.objects.filter(workflow_status=WorkflowStatusChoices.READY_FOR_PUBLIC_PROD):
print(collection.config_folder)
collection.add_to_public_query()
66 changes: 66 additions & 0 deletions scripts/quality_and_indexing/change_statuses_on_webapp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""
take emily's notes from slack and change the appropriate statuses in the webapp
"""

from sde_collections.models.collection import Collection
from sde_collections.models.collection_choice_fields import WorkflowStatusChoices

RESEARCH_IN_PROGRESS = 1, "Research in Progress"
READY_FOR_ENGINEERING = 2, "Ready for Engineering"
ENGINEERING_IN_PROGRESS = 3, "Engineering in Progress"
READY_FOR_CURATION = 4, "Ready for Curation"
CURATION_IN_PROGRESS = 5, "Curation in Progress"
CURATED = 6, "Curated"
QUALITY_FIXED = 7, "Quality Fixed"
SECRET_DEPLOYMENT_STARTED = 8, "Secret Deployment Started"
SECRET_DEPLOYMENT_FAILED = 9, "Secret Deployment Failed"
READY_FOR_LRM_QUALITY_CHECK = 10, "Ready for LRM Quality Check"
READY_FOR_FINAL_QUALITY_CHECK = 11, "Ready for Quality Check"
QUALITY_CHECK_FAILED = 12, "Quality Check Failed"
READY_FOR_PUBLIC_PROD = 13, "Ready for Public Production"
PERFECT_ON_PROD = 14, "Perfect and on Production"
LOW_PRIORITY_PROBLEMS_ON_PROD = 15, "Low Priority Problems on Production"
HIGH_PRIORITY_PROBLEMS_ON_PROD = 16, "High Priority Problems on Production, only for old sources"
MERGE_PENDING = 17, "Code Merge Pending"

perfect = [
# "WIND_Spacecraft",
# "gamma_ray_data_tools_core_package",
# "land_processes_distributed_active_archive_center",
# "mdscc_deep_space_network",
# "HelioAnalytics",
# "nasa_infrared_telescope_facility_irtf",
# "gmao_fluid",
# "starchild_a_learning_center_for_young_astronomers",
# "voyager_Cosmic_Ray_Subsystem",
"ldas_land_data_assimilatin_system",
"ppi_node",
]

low_priority = [
"nasa_applied_sciences",
"parker_solar_probe",
"virtual_wave_observatory",
"explorer_program_acquisition",
"lisa_consortium",
"astropy",
"fermi_at_gsfc",
"microobservatory_robotic_telescope_network",
]

for config in perfect:
print(config)
collection = Collection.objects.get(config_folder=config)
collection.workflow_status = WorkflowStatusChoices.PERFECT_ON_PROD
collection.save()

for config in low_priority:
print(config)
collection = Collection.objects.get(config_folder=config)
collection.workflow_status = WorkflowStatusChoices.LOW_PRIORITY_PROBLEMS_ON_PROD
collection.save()

# for config in perfect:
# collection = Collection.objects.get(config_folder=config)
# collection.workflow_status = WorkflowStatusChoices.PERFECT_ON_PROD
# collection.save()
60 changes: 60 additions & 0 deletions scripts/quality_and_indexing/find_missing_folders.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
"""you run this in the shell on the server to find sources to index and find any that are missing plugin folders"""

import os

from sde_collections.models.collection import Collection
from sde_collections.models.collection_choice_fields import WorkflowStatusChoices
from sde_collections.utils.github_helper import GitHubHandler


def get_sources_to_fix():
return Collection.objects.filter(workflow_status__in=[WorkflowStatusChoices.QUALITY_FIXED])


def get_sources_to_index():
return Collection.objects.filter(workflow_status__in=[WorkflowStatusChoices.CURATED])


def get_all_relevant_sources():
return Collection.objects.filter(
workflow_status__in=[WorkflowStatusChoices.QUALITY_FIXED, WorkflowStatusChoices.CURATED]
)


def get_missing_folders(collections, base_directory):
gh = GitHubHandler()
missing = []
for source in collections:
folder_path = os.path.join(base_directory, source.config_folder, "default.xml")
if not gh.check_file_exists(folder_path):
missing.append(source)
return missing


def print_configs(queryset):
for source in queryset:
print(source.config_folder)
print("---" * 20)
print()


print("sources_to_fix")
sources_to_fix = get_sources_to_fix()
print_configs(sources_to_fix)


print("sources_to_index")
sources_to_index = get_sources_to_index()
print_configs(sources_to_index)


all_relevant_sources = get_all_relevant_sources()

print("missing_scraper_folders")
missing_folders = get_missing_folders(all_relevant_sources, "sources/scrapers/")
print_configs(missing_folders)


print("missing_plugin_folders")
missing_folders = get_missing_folders(all_relevant_sources, "sources/SDE/")
print_configs(missing_folders)
18 changes: 18 additions & 0 deletions scripts/xpath_cleanup/find_xpath_patterns.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# flake8: noqa
"""this script is used to find all the xpath patterns in the database, so that they can be mapped to new patterns in xpath_mappings.py"""

from sde_collections.models.pattern import TitlePattern

print(
"there are", TitlePattern.objects.filter(title_pattern__contains="xpath").count(), "xpath patterns in the database"
)

# Get all the xpath patterns and their candidate urls
xpath_patterns = TitlePattern.objects.filter(title_pattern__contains="xpath")
for xpath_pattern in xpath_patterns:
print(xpath_pattern.title_pattern)
# for url in xpath_pattern.candidate_urls.all():
# print(url.url)
print()

# not every xpath pattern has a candidate url, but I went ahead and fixed all of them anyway
Loading

0 comments on commit be9413b

Please sign in to comment.