-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'dev' into 107-doc-type-filter-not-working
- Loading branch information
Showing
53 changed files
with
1,935 additions
and
191 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
CELERY_BROKER_URL="" | ||
CELERY_FLOWER_PASSWORD="" | ||
CELERY_FLOWER_USER="" | ||
DATABASE_URL='postgresql://<user>:<password>@localhost:5432/<database>' | ||
DJANGO_ACCOUNT_ALLOW_REGISTRATION=False | ||
DJANGO_AWS_ACCESS_KEY_ID="" | ||
DJANGO_AWS_SECRET_ACCESS_KEY="" | ||
DJANGO_AWS_STORAGE_BUCKET_NAME="" | ||
GITHUB_ACCESS_TOKEN="" | ||
GITHUB_BRANCH_FOR_WEBAPP="" | ||
IPYTHONDIR="" | ||
REDIS_URL="" | ||
SINEQUA_CONFIGS_GITHUB_REPO="" | ||
SINEQUA_CONFIGS_REPO_DEV_BRANCH="" | ||
SINEQUA_CONFIGS_REPO_MASTER_BRANCH="" | ||
SINEQUA_CONFIGS_REPO_WEBAPP_PR_BRANCH="" | ||
SLACK_WEBHOOK_URL="" | ||
USE_DOCKER=no |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
[tool.black] | ||
line-length = 120 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,3 +28,5 @@ PyGithub==2.2.0 | |
tqdm==4.66.1 | ||
xmltodict==0.13.0 | ||
django-cors-headers==4.3.1 | ||
unidecode==1.3.8 | ||
lxml==4.9.2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
from sde_collections.models.collection import Collection | ||
from sde_collections.models.collection_choice_fields import Divisions | ||
|
||
DIVISION_MAPPING = { | ||
"Helio": Divisions.HELIOPHYSICS, | ||
"Astro": Divisions.ASTROPHYSICS, | ||
"PDS": Divisions.PLANETARY, | ||
"Earth": Divisions.EARTH_SCIENCE, | ||
"BPS": Divisions.BIOLOGY, | ||
"Multiple": Divisions.GENERAL, | ||
} | ||
|
||
sources = [ | ||
{ | ||
"Name": "Source name", | ||
"Link": "Base link to the source", | ||
"Division": "Division of the source from the spread sheet", | ||
"Notes": "Any notes available from the spreadsheet", | ||
}, | ||
] | ||
|
||
|
||
def get_division_id(division_name): | ||
division_name = division_name.strip() | ||
return DIVISION_MAPPING.get(division_name, None) | ||
|
||
|
||
def create_collection(source): | ||
name = source["Name"] | ||
link = source["Link"] | ||
division_text = source["Division"] | ||
notes = source["Notes"] | ||
|
||
division_id = get_division_id(division_text) | ||
if division_id is None: | ||
print(f"No valid division found for '{division_text}'. Skipping creation for {name}.") | ||
return False | ||
|
||
try: | ||
if Collection.objects.filter(name=name).exists(): | ||
print(f"Collection with name '{name}' already exists. Skipping.") | ||
return False | ||
if Collection.objects.filter(url=link).exists(): | ||
print(f"Collection with link '{link}' already exists. Skipping.") | ||
return False | ||
new_collection = Collection(name=name, url=link, division=division_id, notes=notes) | ||
new_collection.save() | ||
print(f"Collection '{name}' created successfully.") | ||
return True | ||
except Exception as e: | ||
print(f"Failed to create collection '{name}': {e}") | ||
return False | ||
|
||
|
||
def main(): | ||
created_count = 0 | ||
for source in sources: | ||
if create_collection(source): | ||
created_count += 1 | ||
print(f"Total new collections created: {created_count}") | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
""" | ||
adds collections marked as ready for public prod to the public query | ||
after running this code, you will need to merge in the webapp branch | ||
""" | ||
|
||
from sde_collections.models.collection import Collection | ||
from sde_collections.models.collection_choice_fields import WorkflowStatusChoices | ||
|
||
for collection in Collection.objects.filter(workflow_status=WorkflowStatusChoices.READY_FOR_PUBLIC_PROD): | ||
print(collection.config_folder) | ||
collection.add_to_public_query() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
""" | ||
take emily's notes from slack and change the appropriate statuses in the webapp | ||
""" | ||
|
||
from sde_collections.models.collection import Collection | ||
from sde_collections.models.collection_choice_fields import WorkflowStatusChoices | ||
|
||
RESEARCH_IN_PROGRESS = 1, "Research in Progress" | ||
READY_FOR_ENGINEERING = 2, "Ready for Engineering" | ||
ENGINEERING_IN_PROGRESS = 3, "Engineering in Progress" | ||
READY_FOR_CURATION = 4, "Ready for Curation" | ||
CURATION_IN_PROGRESS = 5, "Curation in Progress" | ||
CURATED = 6, "Curated" | ||
QUALITY_FIXED = 7, "Quality Fixed" | ||
SECRET_DEPLOYMENT_STARTED = 8, "Secret Deployment Started" | ||
SECRET_DEPLOYMENT_FAILED = 9, "Secret Deployment Failed" | ||
READY_FOR_LRM_QUALITY_CHECK = 10, "Ready for LRM Quality Check" | ||
READY_FOR_FINAL_QUALITY_CHECK = 11, "Ready for Quality Check" | ||
QUALITY_CHECK_FAILED = 12, "Quality Check Failed" | ||
READY_FOR_PUBLIC_PROD = 13, "Ready for Public Production" | ||
PERFECT_ON_PROD = 14, "Perfect and on Production" | ||
LOW_PRIORITY_PROBLEMS_ON_PROD = 15, "Low Priority Problems on Production" | ||
HIGH_PRIORITY_PROBLEMS_ON_PROD = 16, "High Priority Problems on Production, only for old sources" | ||
MERGE_PENDING = 17, "Code Merge Pending" | ||
|
||
perfect = [ | ||
# "WIND_Spacecraft", | ||
# "gamma_ray_data_tools_core_package", | ||
# "land_processes_distributed_active_archive_center", | ||
# "mdscc_deep_space_network", | ||
# "HelioAnalytics", | ||
# "nasa_infrared_telescope_facility_irtf", | ||
# "gmao_fluid", | ||
# "starchild_a_learning_center_for_young_astronomers", | ||
# "voyager_Cosmic_Ray_Subsystem", | ||
"ldas_land_data_assimilatin_system", | ||
"ppi_node", | ||
] | ||
|
||
low_priority = [ | ||
"nasa_applied_sciences", | ||
"parker_solar_probe", | ||
"virtual_wave_observatory", | ||
"explorer_program_acquisition", | ||
"lisa_consortium", | ||
"astropy", | ||
"fermi_at_gsfc", | ||
"microobservatory_robotic_telescope_network", | ||
] | ||
|
||
for config in perfect: | ||
print(config) | ||
collection = Collection.objects.get(config_folder=config) | ||
collection.workflow_status = WorkflowStatusChoices.PERFECT_ON_PROD | ||
collection.save() | ||
|
||
for config in low_priority: | ||
print(config) | ||
collection = Collection.objects.get(config_folder=config) | ||
collection.workflow_status = WorkflowStatusChoices.LOW_PRIORITY_PROBLEMS_ON_PROD | ||
collection.save() | ||
|
||
# for config in perfect: | ||
# collection = Collection.objects.get(config_folder=config) | ||
# collection.workflow_status = WorkflowStatusChoices.PERFECT_ON_PROD | ||
# collection.save() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
"""you run this in the shell on the server to find sources to index and find any that are missing plugin folders""" | ||
|
||
import os | ||
|
||
from sde_collections.models.collection import Collection | ||
from sde_collections.models.collection_choice_fields import WorkflowStatusChoices | ||
from sde_collections.utils.github_helper import GitHubHandler | ||
|
||
|
||
def get_sources_to_fix(): | ||
return Collection.objects.filter(workflow_status__in=[WorkflowStatusChoices.QUALITY_FIXED]) | ||
|
||
|
||
def get_sources_to_index(): | ||
return Collection.objects.filter(workflow_status__in=[WorkflowStatusChoices.CURATED]) | ||
|
||
|
||
def get_all_relevant_sources(): | ||
return Collection.objects.filter( | ||
workflow_status__in=[WorkflowStatusChoices.QUALITY_FIXED, WorkflowStatusChoices.CURATED] | ||
) | ||
|
||
|
||
def get_missing_folders(collections, base_directory): | ||
gh = GitHubHandler() | ||
missing = [] | ||
for source in collections: | ||
folder_path = os.path.join(base_directory, source.config_folder, "default.xml") | ||
if not gh.check_file_exists(folder_path): | ||
missing.append(source) | ||
return missing | ||
|
||
|
||
def print_configs(queryset): | ||
for source in queryset: | ||
print(source.config_folder) | ||
print("---" * 20) | ||
print() | ||
|
||
|
||
print("sources_to_fix") | ||
sources_to_fix = get_sources_to_fix() | ||
print_configs(sources_to_fix) | ||
|
||
|
||
print("sources_to_index") | ||
sources_to_index = get_sources_to_index() | ||
print_configs(sources_to_index) | ||
|
||
|
||
all_relevant_sources = get_all_relevant_sources() | ||
|
||
print("missing_scraper_folders") | ||
missing_folders = get_missing_folders(all_relevant_sources, "sources/scrapers/") | ||
print_configs(missing_folders) | ||
|
||
|
||
print("missing_plugin_folders") | ||
missing_folders = get_missing_folders(all_relevant_sources, "sources/SDE/") | ||
print_configs(missing_folders) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# flake8: noqa | ||
"""this script is used to find all the xpath patterns in the database, so that they can be mapped to new patterns in xpath_mappings.py""" | ||
|
||
from sde_collections.models.pattern import TitlePattern | ||
|
||
print( | ||
"there are", TitlePattern.objects.filter(title_pattern__contains="xpath").count(), "xpath patterns in the database" | ||
) | ||
|
||
# Get all the xpath patterns and their candidate urls | ||
xpath_patterns = TitlePattern.objects.filter(title_pattern__contains="xpath") | ||
for xpath_pattern in xpath_patterns: | ||
print(xpath_pattern.title_pattern) | ||
# for url in xpath_pattern.candidate_urls.all(): | ||
# print(url.url) | ||
print() | ||
|
||
# not every xpath pattern has a candidate url, but I went ahead and fixed all of them anyway |
Oops, something went wrong.