Skip to content

Commit

Permalink
Merge pull request #505 from NASA-IMPACT/feature-update_pipelines_wit…
Browse files Browse the repository at this point in the history
…h_source

Feature update pipelines with source
  • Loading branch information
CarsonDavis authored Nov 8, 2023
2 parents cb9c726 + 1544196 commit 5b5cbd8
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 8 deletions.
11 changes: 8 additions & 3 deletions config_generation/delete_config_folders.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import shutil

from config import collections_to_delete as collection_names
from config import source


def delete_folders_by_name(collection_names, directory):
Expand Down Expand Up @@ -41,6 +42,10 @@ def delete_xml_files_by_name(collection_names, directory):
print(f"Error deleting file {file_path}: {e.strerror}")


delete_folders_by_name(collection_names, "../sinequa_configs/sources/SMD/")
delete_xml_files_by_name(collection_names, "../sinequa_configs/commands/")
delete_xml_files_by_name(collection_names, "../sinequa_configs/jobs/")
delete_folders_by_name(collection_names, f"../sinequa_configs/sources/{source}/")
delete_xml_files_by_name(
collection_names, "../sinequa_configs/commands/"
) # this might delete jobs from any source, however not a huge issue right now
delete_xml_files_by_name(
collection_names, "../sinequa_configs/jobs/"
) # this might delete jobs from any source, however not a huge issue right now
9 changes: 7 additions & 2 deletions config_generation/delete_server_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@

from db_to_xml_file_based import XmlEditor

from config import collections_to_delete, indexes_to_delete_from, name_of_delete_file
from config import (
collections_to_delete,
indexes_to_delete_from,
name_of_delete_file,
source,
)

COMMAND_FILES_PATH = "../sinequa_configs/commands/"
DELETE_COMMAND_TEMPLATE_PATH = "xmls/delete_template.xml"
Expand All @@ -11,7 +16,7 @@

for collection in collections_to_delete:
for index in indexes_to_delete_from:
sql = f"delete from {index} where collection='{collection}'"
sql = f"delete from {index} where collection='/{source}/{collection}/'"
command_file.update_or_add_element_value(element_name="SQL", element_value=sql, add_duplicate=True)
file_name = f"{COMMAND_FILES_PATH}{name_of_delete_file}.xml"
command_file._update_config_xml(file_name)
6 changes: 5 additions & 1 deletion config_generation/generate_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from db_to_xml_file_based import XmlEditor
from generate_jobs import ParallelJobCreator

from config import source


# note that there is an xml folder that contains templates
class CommandGenerator:
Expand All @@ -13,12 +15,14 @@ def __init__(
command_batch_name,
template_root_path="xmls/",
command_root_path="../sinequa_configs/commands/",
source=source,
):
self.command_batch_name = command_batch_name # this is used to name the commands
self.template_root_path = template_root_path
self.command_template_path = f"{template_root_path}command_template.xml"
self.job_command_template_path = f"{template_root_path}job_command_template.xml"
self.command_root_path = command_root_path
self.source = source

def _generate_job_command_name(self, collection_name):
# TODO
Expand All @@ -39,7 +43,7 @@ def generate_command_file(self, collection_name, commands):
command_file = XmlEditor(self.command_template_path)
command_file.update_or_add_element_value(
element_name="WhereClause",
element_value=f"collection='/SMD/{collection_name}'",
element_value=f"collection='/{self.source}/{collection_name}'",
)
for command in commands:
command_file.add_column_update(
Expand Down
6 changes: 4 additions & 2 deletions config_generation/generate_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from db_to_xml_file_based import XmlEditor

from config import available_indexers, collection_list, date_of_batch
from config import available_indexers, collection_list, date_of_batch, source


class ParallelJobCreator:
Expand All @@ -16,6 +16,7 @@ def __init__(
collection_list,
template_root_path="xmls/",
job_path_root="../sinequa_configs/jobs/",
source=source,
):
"""
these default values rely on the old file structure, where the sinequa_configs were a
Expand All @@ -27,6 +28,7 @@ def __init__(
self.template_root_path = template_root_path
self.joblist_template_path = f"{template_root_path}joblist_template.xml"
self.job_path_root = job_path_root
self.source = source

def _create_job_name(self, collection_name):
"""
Expand Down Expand Up @@ -57,7 +59,7 @@ def _create_collection_jobs(self):
# create single jobs to run each collection
for collection in self.collection_list:
job = XmlEditor(f"{self.template_root_path}job_template.xml")
job.update_or_add_element_value("Collection", f"/SMD/{collection}/")
job.update_or_add_element_value("Collection", f"/{self.source}/{collection}/")
job._update_config_xml(f"{self.job_path_root}{self._create_job_name(collection)}")

def make_all_parallel_jobs(self):
Expand Down

0 comments on commit 5b5cbd8

Please sign in to comment.