From edd58a121ca15684fa9e64fdbc92485b112d6049 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Thu, 5 Oct 2023 10:51:43 +0200 Subject: [PATCH 01/17] progress logging --- cdci_data_analysis/analysis/renku_helper.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/cdci_data_analysis/analysis/renku_helper.py b/cdci_data_analysis/analysis/renku_helper.py index 7d4cc9380..24191f6de 100644 --- a/cdci_data_analysis/analysis/renku_helper.py +++ b/cdci_data_analysis/analysis/renku_helper.py @@ -7,7 +7,7 @@ import giturlparse import copy -from git import Repo, Actor +from git import Repo, Actor, RemoteProgress from configparser import ConfigParser from ..app_logging import app_logging @@ -16,6 +16,13 @@ from .hash import make_hash logger = app_logging.getLogger('renku_helper') +progress_logger = app_logging.getLogger('progress_git_commands_renku_helper') + + +class MyProgressPrinter(RemoteProgress): + def update(self, op_code, cur_count, max_count=None, message=""): + message = message or "NO MESSAGE" + progress_logger.info(f"op_code: {op_code}, cur_count: {cur_count}, max_count: {max_count}, message: {message}") def push_api_code(api_code, @@ -161,7 +168,7 @@ def clone_renku_repo(renku_repository_url, repo_dir=None, renku_gitlab_ssh_key_p # TODO or store known hosts on build/boot git_ssh_cmd = f'ssh -i {renku_gitlab_ssh_key_path} -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no' - repo = Repo.clone_from(renku_repository_url, repo_dir, branch='master', env=dict(GIT_SSH_COMMAND=git_ssh_cmd)) + repo = Repo.clone_from(renku_repository_url, repo_dir, branch='master', env=dict(GIT_SSH_COMMAND=git_ssh_cmd), progress=MyProgressPrinter()) logger.info(f'repository {renku_repository_url} successfully cloned') From a554ef49677308f768adf7a9f1706fca733abd5d Mon Sep 17 00:00:00 2001 From: burnout87 Date: Thu, 5 Oct 2023 10:53:02 +0200 Subject: [PATCH 02/17] update test renku gitlab url --- cdci_data_analysis/pytest_fixtures.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdci_data_analysis/pytest_fixtures.py b/cdci_data_analysis/pytest_fixtures.py index 6371da90a..19705a3a7 100644 --- a/cdci_data_analysis/pytest_fixtures.py +++ b/cdci_data_analysis/pytest_fixtures.py @@ -484,7 +484,7 @@ def dispatcher_test_conf_with_renku_options_fn(dispatcher_test_conf_fn): f.write(f_default.read()) f.write('\n renku_options:' - '\n renku_gitlab_repository_url: "git@renkulab.io:gabriele.barni/old-test-dispatcher-endpoint.git"' + '\n renku_gitlab_repository_url: "git@gitlab.renkulab.io:gabriele.barni/old-test-dispatcher-endpoint.git"' '\n renku_base_project_url: "http://renkulab.io/projects"' f'\n ssh_key_path: "{os.getenv("SSH_KEY_FILE", "ssh_key_file")}"') From 250b1126f1b72af11e2b802beed3551a6c041805 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Thu, 5 Oct 2023 14:14:16 +0200 Subject: [PATCH 03/17] using argument logger --- cdci_data_analysis/analysis/email_helper.py | 32 ++++++++------------- cdci_data_analysis/flask_app/app.py | 3 +- 2 files changed, 13 insertions(+), 22 deletions(-) diff --git a/cdci_data_analysis/analysis/email_helper.py b/cdci_data_analysis/analysis/email_helper.py index 2e1b755e2..f9130d325 100644 --- a/cdci_data_analysis/analysis/email_helper.py +++ b/cdci_data_analysis/analysis/email_helper.py @@ -17,7 +17,6 @@ import glob import black import base64 -import logging from urllib import parse import zlib import json @@ -31,8 +30,6 @@ from datetime import datetime -logger = logging.getLogger() - num_email_sending_max_tries = 5 email_sending_retry_sleep_s = .5 @@ -44,7 +41,7 @@ class EMailNotSent(BadRequest): pass -def timestamp2isot(timestamp_or_string: typing.Union[str, float]): +def timestamp2isot(timestamp_or_string: typing.Union[str, float], logger): try: timestamp_or_string = validate_time(timestamp_or_string).strftime("%Y-%m-%d %H:%M:%S") except (ValueError, OverflowError, TypeError, OSError) as e: @@ -89,7 +86,7 @@ def invalid_email_line_length(body): return False # TODO: not currently fully used, not critical, but should finish this too since it will make nice short permanent urls -def compress_request_url_params(request_url, consider_args=['selected_catalog', 'string_like_name']): +def compress_request_url_params(request_url, logger, consider_args=['selected_catalog', 'string_like_name']): parsed_url = parse.urlparse(request_url) parsed_qs = parse.parse_qs(parsed_url.query) @@ -135,7 +132,7 @@ def generate_products_url_from_par_dict(products_url, par_dict) -> str: request_url = '%s?%s' % (products_url, urlencode(par_dict)) return request_url -def wrap_python_code(code, max_length=100, max_str_length=None): +def wrap_python_code(code, logger, max_length=100, max_str_length=None): # this black currently does not split strings without spaces @@ -178,7 +175,7 @@ def check_scw_list_length( return False -def get_first_submitted_email_time(scratch_dir): +def get_first_submitted_email_time(scratch_dir, logger): first_submitted_email_time = None submitted_email_pattern = os.path.join( scratch_dir, @@ -220,8 +217,7 @@ def send_incident_report_email( decoded_token, incident_content=None, incident_time=None, - scratch_dir=None, - sentry_dsn=None): + scratch_dir=None): sending_time = time_.time() @@ -264,8 +260,7 @@ def send_incident_report_email( scratch_dir=scratch_dir, smtp_server_password=config.smtp_server_password, sending_time=sending_time, - logger=logger, - sentry_dsn=sentry_dsn) + logger=logger) store_incident_report_email_info(message, scratch_dir, sending_time=sending_time) @@ -286,8 +281,7 @@ def send_job_email( time_request=None, request_url="", api_code="", - scratch_dir=None, - sentry_dsn=None): + scratch_dir=None): sending_time = time_.time() # let's get the needed email template; @@ -394,8 +388,7 @@ def send_job_email( sending_time=sending_time, scratch_dir=scratch_dir, logger=logger, - attachment=api_code_email_attachment, - sentry_dsn=sentry_dsn) + attachment=api_code_email_attachment) store_status_email_info(message, status, scratch_dir, sending_time=sending_time, first_submitted_time=time_request) @@ -416,8 +409,7 @@ def send_email(smtp_server, logger, sending_time=None, scratch_dir=None, - attachment=None, - sentry_dsn=None + attachment=None ): server = None @@ -502,7 +494,7 @@ def send_email(smtp_server, server.quit() -def store_status_email_info(message, status, scratch_dir, sending_time=None, first_submitted_time=None): +def store_status_email_info(message, status, scratch_dir, logger, sending_time=None, first_submitted_time=None): path_email_history_folder = os.path.join(scratch_dir, 'email_history') current_time = time_.time() if not os.path.exists(path_email_history_folder): @@ -605,7 +597,7 @@ def log_email_sending_info(logger, status, time_request, scratch_dir, job_id, ad logger.info(f"logging email sending attempt into {email_history_log_fn} file") -def is_email_to_send_run_query(logger, status, time_original_request, scratch_dir, job_id, config, decoded_token=None, sentry_dsn=None): +def is_email_to_send_run_query(logger, status, time_original_request, scratch_dir, job_id, config, decoded_token=None): log_additional_info_obj = {} sending_ok = False time_check = time_.time() @@ -692,7 +684,7 @@ def is_email_to_send_run_query(logger, status, time_original_request, scratch_di return sending_ok -def is_email_to_send_callback(logger, status, time_original_request, scratch_dir, config, job_id, decoded_token=None, sentry_dsn=None): +def is_email_to_send_callback(logger, status, time_original_request, scratch_dir, config, job_id, decoded_token=None): log_additional_info_obj = {} sending_ok = False time_check = time_.time() diff --git a/cdci_data_analysis/flask_app/app.py b/cdci_data_analysis/flask_app/app.py index 983dcf2e2..54153173c 100644 --- a/cdci_data_analysis/flask_app/app.py +++ b/cdci_data_analysis/flask_app/app.py @@ -1030,8 +1030,7 @@ def report_incident(): decoded_token=decoded_token, incident_content=incident_content, incident_time=incident_time, - scratch_dir=scratch_dir, - sentry_dsn=sentry_dsn + scratch_dir=scratch_dir ) report_incident_status = 'incident report email successfully sent' except email_helper.EMailNotSent as e: From 029f2590075e72a6c34664ee73b16fd7c0b647e0 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Fri, 6 Oct 2023 16:30:21 +0200 Subject: [PATCH 04/17] setting up a temp job_id, then renaming scratch_dir --- .../flask_app/dispatcher_query.py | 74 +++++++++++++++---- 1 file changed, 59 insertions(+), 15 deletions(-) diff --git a/cdci_data_analysis/flask_app/dispatcher_query.py b/cdci_data_analysis/flask_app/dispatcher_query.py index 938624d9f..b685c220a 100644 --- a/cdci_data_analysis/flask_app/dispatcher_query.py +++ b/cdci_data_analysis/flask_app/dispatcher_query.py @@ -244,7 +244,52 @@ def __init__(self, app, # TODO is also the case of call_back to handle ? if not data_server_call_back: self.set_instrument(self.instrument_name, roles, email) - verbose = self.par_dic.get('verbose', 'False') == 'True' + # verbose = self.par_dic.get('verbose', 'False') == 'True' + # try: + # self.set_temp_dir(self.par_dic['session_id'], verbose=verbose) + # except Exception as e: + # sentry.capture_message(f"problem creating temp directory: {e}") + # + # raise InternalError("we have encountered an internal error! " + # "Our team is notified and is working on it. We are sorry! " + # "When we find a solution we will try to reach you", status_code=500) + # if self.instrument is not None and not isinstance(self.instrument, str): + # self.instrument.parse_inputs_files( + # par_dic=self.par_dic, + # request=request, + # temp_dir=self.temp_dir, + # verbose=verbose, + # use_scws=self.use_scws, + # sentry_dsn=self.sentry_dsn + # ) + # self.par_dic = self.instrument.set_pars_from_dic(self.par_dic, verbose=verbose) + + # TODO: if not callback! + # if 'query_status' not in self.par_dic: + # raise MissingRequestParameter('no query_status!') + + if not (data_server_call_back or resolve_job_url): + query_status = self.par_dic['query_status'] + self.job_id = None + if query_status == 'new': + # this will overwrite any job_id provided, it should be validated or ignored + #if 'job_id' in self.par_dic: + # self.job_id = self.par_dic['job_id'] + # self.validate_job_id() + + # let's generate a temporary job_id used for the creation of the scratch_dir + self.generate_job_id() + temp_job_id = self.job_id + else: + if 'job_id' not in self.par_dic: + raise RequestNotUnderstood( + f"job_id must be present if query_status != \"new\" (it is \"{query_status}\")") + + self.job_id = self.par_dic['job_id'] + + # verbose = self.par_dic.get('verbose', 'False') == 'True' + self.set_scratch_dir(self.par_dic['session_id'], job_id=self.job_id, verbose=verbose) + if not data_server_call_back: try: self.set_temp_dir(self.par_dic['session_id'], verbose=verbose) except Exception as e: @@ -263,25 +308,18 @@ def __init__(self, app, sentry_dsn=self.sentry_dsn ) self.par_dic = self.instrument.set_pars_from_dic(self.par_dic, verbose=verbose) - - # TODO: if not callback! - # if 'query_status' not in self.par_dic: - # raise MissingRequestParameter('no query_status!') - + # update the job_id if not (data_server_call_back or resolve_job_url): query_status = self.par_dic['query_status'] self.job_id = None if query_status == 'new': - # this will overwrite any job_id provided, it should be validated or ignored - #if 'job_id' in self.par_dic: - # self.job_id = self.par_dic['job_id'] - # self.validate_job_id() - provided_job_id = self.par_dic.get('job_id', None) if provided_job_id == "": # frontend sends this provided_job_id = None + # let's generate the definitive job_id self.generate_job_id() + self.update_scratch_dir_job_id(old_job_id=temp_job_id) if provided_job_id is not None and self.job_id != provided_job_id: raise RequestNotUnderstood(( @@ -295,9 +333,6 @@ def __init__(self, app, self.job_id = self.par_dic['job_id'] - self.set_scratch_dir( - self.par_dic['session_id'], job_id=self.job_id, verbose=verbose) - self.log_query_progression("before move_temp_content") self.move_temp_content() self.log_query_progression("after move_temp_content") @@ -745,7 +780,7 @@ def set_temp_dir(self, session_id, job_id=None, verbose=False): if job_id is not None: suffix += '_jid_'+job_id - td = tempfile.mkdtemp(suffix=suffix) + td = tempfile.mkdtemp(suffix=suffix, dir=self.scratch_dir) self.temp_dir = td def move_temp_content(self): @@ -755,6 +790,15 @@ def move_temp_content(self): file_full_path = os.path.join(self.temp_dir, f) shutil.copy(file_full_path, self.scratch_dir) + def update_scratch_dir_job_id(self, old_job_id=None): + if hasattr(self, 'temp_dir') and os.path.exists(self.temp_dir) \ + and os.path.exists(self.scratch_dir): + if old_job_id is not None: + new_scratch_dir_name = self.scratch_dir.replace(old_job_id, self.job_id) + os.rename(self.scratch_dir, new_scratch_dir_name) + self.scratch_dir = new_scratch_dir_name + + def clear_temp_dir(self): if hasattr(self, 'temp_dir') and os.path.exists(self.temp_dir): shutil.rmtree(self.temp_dir) From 92246efd11ee543d889ca2e7fd3e8ede02fc4661 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Fri, 6 Oct 2023 17:50:33 +0200 Subject: [PATCH 05/17] missing logger --- cdci_data_analysis/analysis/email_helper.py | 11 ++++++----- cdci_data_analysis/flask_app/dispatcher_query.py | 4 ++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/cdci_data_analysis/analysis/email_helper.py b/cdci_data_analysis/analysis/email_helper.py index f9130d325..6b79d4f3c 100644 --- a/cdci_data_analysis/analysis/email_helper.py +++ b/cdci_data_analysis/analysis/email_helper.py @@ -23,7 +23,7 @@ from jinja2 import Environment, FileSystemLoader from bs4 import BeautifulSoup - +from ..app_logging import app_logging from ..analysis.exceptions import BadRequest, MissingRequestParameter from ..analysis.hash import make_hash from ..analysis.time_helper import validate_time @@ -33,6 +33,7 @@ num_email_sending_max_tries = 5 email_sending_retry_sleep_s = .5 +email_helper_logger = app_logging.getLogger('email_helper') class MultipleDoneEmail(BadRequest): pass @@ -41,11 +42,11 @@ class EMailNotSent(BadRequest): pass -def timestamp2isot(timestamp_or_string: typing.Union[str, float], logger): +def timestamp2isot(timestamp_or_string: typing.Union[str, float]): try: timestamp_or_string = validate_time(timestamp_or_string).strftime("%Y-%m-%d %H:%M:%S") except (ValueError, OverflowError, TypeError, OSError) as e: - logger.warning(f'Error when constructing the datetime object from the timestamp {timestamp_or_string}:\n{e}') + email_helper_logger.warning(f'Error when constructing the datetime object from the timestamp {timestamp_or_string}:\n{e}') raise EMailNotSent(f"Email not sent: {e}") return timestamp_or_string @@ -293,9 +294,9 @@ def send_job_email( # api_code = adapt_line_length_api_code(api_code, line_break="\n", add_line_continuation="\\") api_code_no_token = re.sub('"token": ".*?"', '"token": ""', api_code) - api_code_no_token = wrap_python_code(api_code_no_token) + api_code_no_token = wrap_python_code(api_code_no_token, logger) - api_code = wrap_python_code(api_code) + api_code = wrap_python_code(api_code, logger) api_code_too_long = invalid_email_line_length(api_code) or invalid_email_line_length(api_code_no_token) api_code_email_attachment = None diff --git a/cdci_data_analysis/flask_app/dispatcher_query.py b/cdci_data_analysis/flask_app/dispatcher_query.py index b685c220a..208ddfcd0 100644 --- a/cdci_data_analysis/flask_app/dispatcher_query.py +++ b/cdci_data_analysis/flask_app/dispatcher_query.py @@ -1093,7 +1093,7 @@ def run_call_back(self, status_kw_name='action') -> typing.Tuple[str, typing.Uni url=self.app.config['conf'].products_url + "/dispatch-data" ) time_request = time_original_request - time_request_first_submitted = email_helper.get_first_submitted_email_time(self.scratch_dir) + time_request_first_submitted = email_helper.get_first_submitted_email_time(self.scratch_dir, self.logger) if time_request_first_submitted is not None: time_request = time_request_first_submitted @@ -1879,7 +1879,7 @@ def run_query(self, off_line=False, disp_conf=None): url=self.app.config['conf'].products_url + "/dispatch-data" ) time_request = self.time_request - time_request_first_submitted = email_helper.get_first_submitted_email_time(self.scratch_dir) + time_request_first_submitted = email_helper.get_first_submitted_email_time(self.scratch_dir, self.logger) if time_request_first_submitted is not None: time_request = time_request_first_submitted From 90e491469176ff1d226d408b4cb5472a322fd3e6 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Fri, 6 Oct 2023 17:51:20 +0200 Subject: [PATCH 06/17] proper time_helper logger --- cdci_data_analysis/analysis/time_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdci_data_analysis/analysis/time_helper.py b/cdci_data_analysis/analysis/time_helper.py index bf32911f1..a056e2651 100644 --- a/cdci_data_analysis/analysis/time_helper.py +++ b/cdci_data_analysis/analysis/time_helper.py @@ -3,7 +3,7 @@ from ..app_logging import app_logging -logger = app_logging.getLogger('drupal_helper') +logger = app_logging.getLogger('time_helper') def validate_time(timestamp_to_validate): try: From a5ce65e5d10d2578d20b36a5e28d596f45d6cdcd Mon Sep 17 00:00:00 2001 From: burnout87 Date: Fri, 6 Oct 2023 18:19:39 +0200 Subject: [PATCH 07/17] missing logger args --- cdci_data_analysis/analysis/email_helper.py | 2 +- tests/test_job_management.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cdci_data_analysis/analysis/email_helper.py b/cdci_data_analysis/analysis/email_helper.py index 6b79d4f3c..a074714f1 100644 --- a/cdci_data_analysis/analysis/email_helper.py +++ b/cdci_data_analysis/analysis/email_helper.py @@ -391,7 +391,7 @@ def send_job_email( logger=logger, attachment=api_code_email_attachment) - store_status_email_info(message, status, scratch_dir, sending_time=sending_time, first_submitted_time=time_request) + store_status_email_info(message, status, scratch_dir, logger, sending_time=sending_time, first_submitted_time=time_request) return message diff --git a/tests/test_job_management.py b/tests/test_job_management.py index 947f8226a..2c661fc8e 100644 --- a/tests/test_job_management.py +++ b/tests/test_job_management.py @@ -2385,7 +2385,7 @@ def test_email_compress_request_url(): par_str="01"*10000, )) - compressed_url = compress_request_url_params(url, consider_args=['par_str']) + compressed_url = compress_request_url_params(url, logger, consider_args=['par_str']) assert len(compressed_url) < 200 assert len(url) > 10000 @@ -2411,7 +2411,7 @@ def x(arg): scwl_dict = {"scw_list": "115000860010.001,115000870010.001,115000980010.001,115000990010.001,115001000010.001,115001010010.001,115001020010.001,115001030010.001,115001040010.001,115001050010.001,115001060010.001,117100210010.001,118100040010.001,118100050010.001,118900100010.001,118900120010.001,118900130010.001,118900140010.001,119000020010.001,119000030010.001,119000040010.001,119000050010.001,119000190010.001,119900370010.001,119900480010.001,119900490010.001,119900500010.001,119900510010.001,119900520010.001,119900530010.001,119900540010.001,119900550010.001,119900560010.001,119900570010.001,119900670010.001,119900680010.001,119900690010.001,119900700010.001,119900710010.001,119900720010.001,119900730010.001,119900740010.001,119900750010.001,119900760010.001,119900770010.001,119900880010.001,119900890010.001,119900900010.001,119900910010."} """ - c = wrap_python_code(code, max_length=max_length) + c = wrap_python_code(code, logger, max_length=max_length) print("wrapped:\n", c) From 69aba5236a7d01e0ccc2d6c9ee7f6e2f728c4156 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Fri, 6 Oct 2023 20:11:33 +0200 Subject: [PATCH 08/17] re-setting scratch_dir --- .../flask_app/dispatcher_query.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/cdci_data_analysis/flask_app/dispatcher_query.py b/cdci_data_analysis/flask_app/dispatcher_query.py index 208ddfcd0..b106d5e0c 100644 --- a/cdci_data_analysis/flask_app/dispatcher_query.py +++ b/cdci_data_analysis/flask_app/dispatcher_query.py @@ -151,6 +151,7 @@ def __init__(self, app, raise RequestNotUnderstood("job_id must be present during a call_back") if data_server_call_back: # this can be set since it's a call_back and job_id and session_id are available + print(f"before setting scratch_dir: job_id: {self.par_dic['job_id']} callback: {data_server_call_back}, resolve_job_url: {resolve_job_url}") self.set_scratch_dir(session_id=self.par_dic['session_id'], job_id=self.par_dic['job_id']) self.set_scws_call_back_related_params() else: @@ -242,9 +243,10 @@ def __init__(self, app, # TODO why here and not at the beginning ? # self.set_sentry_client() # TODO is also the case of call_back to handle ? + temp_job_id = None if not data_server_call_back: self.set_instrument(self.instrument_name, roles, email) - # verbose = self.par_dic.get('verbose', 'False') == 'True' + verbose = self.par_dic.get('verbose', 'False') == 'True' # try: # self.set_temp_dir(self.par_dic['session_id'], verbose=verbose) # except Exception as e: @@ -279,15 +281,15 @@ def __init__(self, app, # let's generate a temporary job_id used for the creation of the scratch_dir self.generate_job_id() - temp_job_id = self.job_id else: if 'job_id' not in self.par_dic: raise RequestNotUnderstood( f"job_id must be present if query_status != \"new\" (it is \"{query_status}\")") self.job_id = self.par_dic['job_id'] + temp_job_id = self.job_id + print(f"temp_jpb_id set to: {temp_job_id}") - # verbose = self.par_dic.get('verbose', 'False') == 'True' self.set_scratch_dir(self.par_dic['session_id'], job_id=self.job_id, verbose=verbose) if not data_server_call_back: try: @@ -319,7 +321,6 @@ def __init__(self, app, # let's generate the definitive job_id self.generate_job_id() - self.update_scratch_dir_job_id(old_job_id=temp_job_id) if provided_job_id is not None and self.job_id != provided_job_id: raise RequestNotUnderstood(( @@ -332,6 +333,8 @@ def __init__(self, app, f"job_id must be present if query_status != \"new\" (it is \"{query_status}\")") self.job_id = self.par_dic['job_id'] + # self.update_scratch_dir_job_id(old_job_id=temp_job_id) + self.set_scratch_dir(self.par_dic['session_id'], job_id=self.job_id, verbose=verbose) self.log_query_progression("before move_temp_content") self.move_temp_content() @@ -762,7 +765,7 @@ def set_scratch_dir(self, session_id, job_id=None, verbose=False): wd=FilePath(file_dir=wd).path) if alias_workdir is not None: wd = wd+'_aliased' - + print(f"creating scratch_dir: {wd}") wd = FilePath(file_dir=wd) wd.mkdir() self.scratch_dir = wd.path @@ -794,9 +797,11 @@ def update_scratch_dir_job_id(self, old_job_id=None): if hasattr(self, 'temp_dir') and os.path.exists(self.temp_dir) \ and os.path.exists(self.scratch_dir): if old_job_id is not None: - new_scratch_dir_name = self.scratch_dir.replace(old_job_id, self.job_id) + new_scratch_dir_name = self.scratch_dir.replace('_jid_' + old_job_id, '_jid_' + self.job_id) + new_temp_dir_name = self.temp_dir.replace('_jid_' + old_job_id, '_jid_' + self.job_id) os.rename(self.scratch_dir, new_scratch_dir_name) self.scratch_dir = new_scratch_dir_name + self.temp_dir = new_temp_dir_name def clear_temp_dir(self): @@ -1384,6 +1389,7 @@ def get_existing_job_ID_path(self, wd): elif len(dir_list) > 1: sentry.capture_message('Found two non aliased identical job_id') + print(f'Found two non aliased identical job_id, dir_list: {dir_list}') raise RuntimeError('Found two non aliased identical job_id') else: From d54b0395ef20e12df7b773db097a4950a4aba91c Mon Sep 17 00:00:00 2001 From: burnout87 Date: Mon, 9 Oct 2023 09:54:20 +0200 Subject: [PATCH 09/17] setting and then removing temporary scratch_dir --- cdci_data_analysis/flask_app/dispatcher_query.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/cdci_data_analysis/flask_app/dispatcher_query.py b/cdci_data_analysis/flask_app/dispatcher_query.py index b106d5e0c..a55599c15 100644 --- a/cdci_data_analysis/flask_app/dispatcher_query.py +++ b/cdci_data_analysis/flask_app/dispatcher_query.py @@ -243,10 +243,8 @@ def __init__(self, app, # TODO why here and not at the beginning ? # self.set_sentry_client() # TODO is also the case of call_back to handle ? - temp_job_id = None if not data_server_call_back: self.set_instrument(self.instrument_name, roles, email) - verbose = self.par_dic.get('verbose', 'False') == 'True' # try: # self.set_temp_dir(self.par_dic['session_id'], verbose=verbose) # except Exception as e: @@ -290,7 +288,10 @@ def __init__(self, app, temp_job_id = self.job_id print(f"temp_jpb_id set to: {temp_job_id}") + verbose = self.par_dic.get('verbose', 'False') == 'True' + # let's generate a temporary scratch_dir using the temporary job_id self.set_scratch_dir(self.par_dic['session_id'], job_id=self.job_id, verbose=verbose) + temp_scratch_dir = self.scratch_dir if not data_server_call_back: try: self.set_temp_dir(self.par_dic['session_id'], verbose=verbose) @@ -334,6 +335,7 @@ def __init__(self, app, self.job_id = self.par_dic['job_id'] # self.update_scratch_dir_job_id(old_job_id=temp_job_id) + # let's set the scratch_dir with the updated job_id self.set_scratch_dir(self.par_dic['session_id'], job_id=self.job_id, verbose=verbose) self.log_query_progression("before move_temp_content") @@ -357,7 +359,7 @@ def __init__(self, app, finally: self.logger.info("==> clean-up temporary directory") self.log_query_progression("before clear_temp_dir") - self.clear_temp_dir() + self.clear_temp_dir(temp_scratch_dir=temp_scratch_dir) self.log_query_progression("after clear_temp_dir") logger.info("constructed %s:%s for data_server_call_back=%s", self.__class__, self, data_server_call_back) @@ -804,9 +806,11 @@ def update_scratch_dir_job_id(self, old_job_id=None): self.temp_dir = new_temp_dir_name - def clear_temp_dir(self): + def clear_temp_dir(self, temp_scratch_dir=None): if hasattr(self, 'temp_dir') and os.path.exists(self.temp_dir): shutil.rmtree(self.temp_dir) + if temp_scratch_dir is not None and os.path.exists(temp_scratch_dir): + shutil.rmtree(temp_scratch_dir) def prepare_download(self, file_list, file_name, scratch_dir): file_name = file_name.replace(' ', '_') From 47ed510b052dcb874202d85711c8b49a01f52053 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Mon, 9 Oct 2023 10:09:00 +0200 Subject: [PATCH 10/17] init temp_scratch_dir and no need to save temp job_id --- cdci_data_analysis/flask_app/dispatcher_query.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cdci_data_analysis/flask_app/dispatcher_query.py b/cdci_data_analysis/flask_app/dispatcher_query.py index a55599c15..45b814c2b 100644 --- a/cdci_data_analysis/flask_app/dispatcher_query.py +++ b/cdci_data_analysis/flask_app/dispatcher_query.py @@ -128,6 +128,8 @@ def __init__(self, app, self.app = app + temp_scratch_dir=None + self.set_sentry_sdk(getattr(self.app.config.get('conf'), 'sentry_url', None)) try: @@ -285,12 +287,11 @@ def __init__(self, app, f"job_id must be present if query_status != \"new\" (it is \"{query_status}\")") self.job_id = self.par_dic['job_id'] - temp_job_id = self.job_id - print(f"temp_jpb_id set to: {temp_job_id}") verbose = self.par_dic.get('verbose', 'False') == 'True' # let's generate a temporary scratch_dir using the temporary job_id self.set_scratch_dir(self.par_dic['session_id'], job_id=self.job_id, verbose=verbose) + # temp_job_id = self.job_id temp_scratch_dir = self.scratch_dir if not data_server_call_back: try: From 25cc4ea6c7722540f5126ff644eb660cc0847329 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Mon, 9 Oct 2023 10:37:45 +0200 Subject: [PATCH 11/17] temp scratch_dir not removed if coincides with job scratch_dir --- cdci_data_analysis/flask_app/dispatcher_query.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cdci_data_analysis/flask_app/dispatcher_query.py b/cdci_data_analysis/flask_app/dispatcher_query.py index 45b814c2b..17ece3dad 100644 --- a/cdci_data_analysis/flask_app/dispatcher_query.py +++ b/cdci_data_analysis/flask_app/dispatcher_query.py @@ -270,6 +270,7 @@ def __init__(self, app, # if 'query_status' not in self.par_dic: # raise MissingRequestParameter('no query_status!') + verbose = self.par_dic.get('verbose', 'False') == 'True' if not (data_server_call_back or resolve_job_url): query_status = self.par_dic['query_status'] self.job_id = None @@ -288,11 +289,10 @@ def __init__(self, app, self.job_id = self.par_dic['job_id'] - verbose = self.par_dic.get('verbose', 'False') == 'True' - # let's generate a temporary scratch_dir using the temporary job_id - self.set_scratch_dir(self.par_dic['session_id'], job_id=self.job_id, verbose=verbose) - # temp_job_id = self.job_id - temp_scratch_dir = self.scratch_dir + # let's generate a temporary scratch_dir using the temporary job_id + self.set_scratch_dir(self.par_dic['session_id'], job_id=self.job_id, verbose=verbose) + # temp_job_id = self.job_id + temp_scratch_dir = self.scratch_dir if not data_server_call_back: try: self.set_temp_dir(self.par_dic['session_id'], verbose=verbose) @@ -810,7 +810,7 @@ def update_scratch_dir_job_id(self, old_job_id=None): def clear_temp_dir(self, temp_scratch_dir=None): if hasattr(self, 'temp_dir') and os.path.exists(self.temp_dir): shutil.rmtree(self.temp_dir) - if temp_scratch_dir is not None and os.path.exists(temp_scratch_dir): + if temp_scratch_dir is not None and temp_scratch_dir != self.scratch_dir and os.path.exists(temp_scratch_dir): shutil.rmtree(temp_scratch_dir) def prepare_download(self, file_list, file_name, scratch_dir): From 086d4956f1dccbec2b48379aec692256a11eaa0d Mon Sep 17 00:00:00 2001 From: burnout87 Date: Mon, 9 Oct 2023 11:32:38 +0200 Subject: [PATCH 12/17] setting temp scratch_dir in all cases and create temp_dir in cwd if scratch_dir not exists --- .../flask_app/dispatcher_query.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/cdci_data_analysis/flask_app/dispatcher_query.py b/cdci_data_analysis/flask_app/dispatcher_query.py index 17ece3dad..9dcb23d78 100644 --- a/cdci_data_analysis/flask_app/dispatcher_query.py +++ b/cdci_data_analysis/flask_app/dispatcher_query.py @@ -289,10 +289,10 @@ def __init__(self, app, self.job_id = self.par_dic['job_id'] - # let's generate a temporary scratch_dir using the temporary job_id - self.set_scratch_dir(self.par_dic['session_id'], job_id=self.job_id, verbose=verbose) - # temp_job_id = self.job_id - temp_scratch_dir = self.scratch_dir + # let's generate a temporary scratch_dir using the temporary job_id + self.set_scratch_dir(self.par_dic['session_id'], job_id=self.job_id, verbose=verbose) + # temp_job_id = self.job_id + temp_scratch_dir = self.scratch_dir if not data_server_call_back: try: self.set_temp_dir(self.par_dic['session_id'], verbose=verbose) @@ -336,8 +336,8 @@ def __init__(self, app, self.job_id = self.par_dic['job_id'] # self.update_scratch_dir_job_id(old_job_id=temp_job_id) - # let's set the scratch_dir with the updated job_id - self.set_scratch_dir(self.par_dic['session_id'], job_id=self.job_id, verbose=verbose) + # let's set the scratch_dir with the updated job_id + self.set_scratch_dir(self.par_dic['session_id'], job_id=self.job_id, verbose=verbose) self.log_query_progression("before move_temp_content") self.move_temp_content() @@ -785,8 +785,10 @@ def set_temp_dir(self, session_id, job_id=None, verbose=False): if job_id is not None: suffix += '_jid_'+job_id - - td = tempfile.mkdtemp(suffix=suffix, dir=self.scratch_dir) + temp_parent_dir = '.' + if hasattr(self, 'scratch_dir'): + temp_parent_dir = self.scratch_dir + td = tempfile.mkdtemp(suffix=suffix, dir=temp_parent_dir) self.temp_dir = td def move_temp_content(self): From a034fbf9689acacf89e901c94d997c120a6874ac Mon Sep 17 00:00:00 2001 From: burnout87 Date: Mon, 9 Oct 2023 12:00:04 +0200 Subject: [PATCH 13/17] removed commented code and unused defs --- .../flask_app/dispatcher_query.py | 31 +------------------ 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/cdci_data_analysis/flask_app/dispatcher_query.py b/cdci_data_analysis/flask_app/dispatcher_query.py index 9dcb23d78..3136fe083 100644 --- a/cdci_data_analysis/flask_app/dispatcher_query.py +++ b/cdci_data_analysis/flask_app/dispatcher_query.py @@ -247,24 +247,6 @@ def __init__(self, app, # TODO is also the case of call_back to handle ? if not data_server_call_back: self.set_instrument(self.instrument_name, roles, email) - # try: - # self.set_temp_dir(self.par_dic['session_id'], verbose=verbose) - # except Exception as e: - # sentry.capture_message(f"problem creating temp directory: {e}") - # - # raise InternalError("we have encountered an internal error! " - # "Our team is notified and is working on it. We are sorry! " - # "When we find a solution we will try to reach you", status_code=500) - # if self.instrument is not None and not isinstance(self.instrument, str): - # self.instrument.parse_inputs_files( - # par_dic=self.par_dic, - # request=request, - # temp_dir=self.temp_dir, - # verbose=verbose, - # use_scws=self.use_scws, - # sentry_dsn=self.sentry_dsn - # ) - # self.par_dic = self.instrument.set_pars_from_dic(self.par_dic, verbose=verbose) # TODO: if not callback! # if 'query_status' not in self.par_dic: @@ -335,7 +317,7 @@ def __init__(self, app, f"job_id must be present if query_status != \"new\" (it is \"{query_status}\")") self.job_id = self.par_dic['job_id'] - # self.update_scratch_dir_job_id(old_job_id=temp_job_id) + # let's set the scratch_dir with the updated job_id self.set_scratch_dir(self.par_dic['session_id'], job_id=self.job_id, verbose=verbose) @@ -798,17 +780,6 @@ def move_temp_content(self): file_full_path = os.path.join(self.temp_dir, f) shutil.copy(file_full_path, self.scratch_dir) - def update_scratch_dir_job_id(self, old_job_id=None): - if hasattr(self, 'temp_dir') and os.path.exists(self.temp_dir) \ - and os.path.exists(self.scratch_dir): - if old_job_id is not None: - new_scratch_dir_name = self.scratch_dir.replace('_jid_' + old_job_id, '_jid_' + self.job_id) - new_temp_dir_name = self.temp_dir.replace('_jid_' + old_job_id, '_jid_' + self.job_id) - os.rename(self.scratch_dir, new_scratch_dir_name) - self.scratch_dir = new_scratch_dir_name - self.temp_dir = new_temp_dir_name - - def clear_temp_dir(self, temp_scratch_dir=None): if hasattr(self, 'temp_dir') and os.path.exists(self.temp_dir): shutil.rmtree(self.temp_dir) From 14ed8c6b87acb8a5306b8eadd8ba977326937519 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Mon, 9 Oct 2023 13:37:06 +0200 Subject: [PATCH 14/17] reverting changes logger email_helper.py --- cdci_data_analysis/analysis/email_helper.py | 41 +++++++++++-------- cdci_data_analysis/flask_app/app.py | 3 +- .../flask_app/dispatcher_query.py | 4 +- tests/test_job_management.py | 4 +- 4 files changed, 30 insertions(+), 22 deletions(-) diff --git a/cdci_data_analysis/analysis/email_helper.py b/cdci_data_analysis/analysis/email_helper.py index a074714f1..2e1b755e2 100644 --- a/cdci_data_analysis/analysis/email_helper.py +++ b/cdci_data_analysis/analysis/email_helper.py @@ -17,23 +17,25 @@ import glob import black import base64 +import logging from urllib import parse import zlib import json from jinja2 import Environment, FileSystemLoader from bs4 import BeautifulSoup -from ..app_logging import app_logging + from ..analysis.exceptions import BadRequest, MissingRequestParameter from ..analysis.hash import make_hash from ..analysis.time_helper import validate_time from datetime import datetime +logger = logging.getLogger() + num_email_sending_max_tries = 5 email_sending_retry_sleep_s = .5 -email_helper_logger = app_logging.getLogger('email_helper') class MultipleDoneEmail(BadRequest): pass @@ -46,7 +48,7 @@ def timestamp2isot(timestamp_or_string: typing.Union[str, float]): try: timestamp_or_string = validate_time(timestamp_or_string).strftime("%Y-%m-%d %H:%M:%S") except (ValueError, OverflowError, TypeError, OSError) as e: - email_helper_logger.warning(f'Error when constructing the datetime object from the timestamp {timestamp_or_string}:\n{e}') + logger.warning(f'Error when constructing the datetime object from the timestamp {timestamp_or_string}:\n{e}') raise EMailNotSent(f"Email not sent: {e}") return timestamp_or_string @@ -87,7 +89,7 @@ def invalid_email_line_length(body): return False # TODO: not currently fully used, not critical, but should finish this too since it will make nice short permanent urls -def compress_request_url_params(request_url, logger, consider_args=['selected_catalog', 'string_like_name']): +def compress_request_url_params(request_url, consider_args=['selected_catalog', 'string_like_name']): parsed_url = parse.urlparse(request_url) parsed_qs = parse.parse_qs(parsed_url.query) @@ -133,7 +135,7 @@ def generate_products_url_from_par_dict(products_url, par_dict) -> str: request_url = '%s?%s' % (products_url, urlencode(par_dict)) return request_url -def wrap_python_code(code, logger, max_length=100, max_str_length=None): +def wrap_python_code(code, max_length=100, max_str_length=None): # this black currently does not split strings without spaces @@ -176,7 +178,7 @@ def check_scw_list_length( return False -def get_first_submitted_email_time(scratch_dir, logger): +def get_first_submitted_email_time(scratch_dir): first_submitted_email_time = None submitted_email_pattern = os.path.join( scratch_dir, @@ -218,7 +220,8 @@ def send_incident_report_email( decoded_token, incident_content=None, incident_time=None, - scratch_dir=None): + scratch_dir=None, + sentry_dsn=None): sending_time = time_.time() @@ -261,7 +264,8 @@ def send_incident_report_email( scratch_dir=scratch_dir, smtp_server_password=config.smtp_server_password, sending_time=sending_time, - logger=logger) + logger=logger, + sentry_dsn=sentry_dsn) store_incident_report_email_info(message, scratch_dir, sending_time=sending_time) @@ -282,7 +286,8 @@ def send_job_email( time_request=None, request_url="", api_code="", - scratch_dir=None): + scratch_dir=None, + sentry_dsn=None): sending_time = time_.time() # let's get the needed email template; @@ -294,9 +299,9 @@ def send_job_email( # api_code = adapt_line_length_api_code(api_code, line_break="\n", add_line_continuation="\\") api_code_no_token = re.sub('"token": ".*?"', '"token": ""', api_code) - api_code_no_token = wrap_python_code(api_code_no_token, logger) + api_code_no_token = wrap_python_code(api_code_no_token) - api_code = wrap_python_code(api_code, logger) + api_code = wrap_python_code(api_code) api_code_too_long = invalid_email_line_length(api_code) or invalid_email_line_length(api_code_no_token) api_code_email_attachment = None @@ -389,9 +394,10 @@ def send_job_email( sending_time=sending_time, scratch_dir=scratch_dir, logger=logger, - attachment=api_code_email_attachment) + attachment=api_code_email_attachment, + sentry_dsn=sentry_dsn) - store_status_email_info(message, status, scratch_dir, logger, sending_time=sending_time, first_submitted_time=time_request) + store_status_email_info(message, status, scratch_dir, sending_time=sending_time, first_submitted_time=time_request) return message @@ -410,7 +416,8 @@ def send_email(smtp_server, logger, sending_time=None, scratch_dir=None, - attachment=None + attachment=None, + sentry_dsn=None ): server = None @@ -495,7 +502,7 @@ def send_email(smtp_server, server.quit() -def store_status_email_info(message, status, scratch_dir, logger, sending_time=None, first_submitted_time=None): +def store_status_email_info(message, status, scratch_dir, sending_time=None, first_submitted_time=None): path_email_history_folder = os.path.join(scratch_dir, 'email_history') current_time = time_.time() if not os.path.exists(path_email_history_folder): @@ -598,7 +605,7 @@ def log_email_sending_info(logger, status, time_request, scratch_dir, job_id, ad logger.info(f"logging email sending attempt into {email_history_log_fn} file") -def is_email_to_send_run_query(logger, status, time_original_request, scratch_dir, job_id, config, decoded_token=None): +def is_email_to_send_run_query(logger, status, time_original_request, scratch_dir, job_id, config, decoded_token=None, sentry_dsn=None): log_additional_info_obj = {} sending_ok = False time_check = time_.time() @@ -685,7 +692,7 @@ def is_email_to_send_run_query(logger, status, time_original_request, scratch_di return sending_ok -def is_email_to_send_callback(logger, status, time_original_request, scratch_dir, config, job_id, decoded_token=None): +def is_email_to_send_callback(logger, status, time_original_request, scratch_dir, config, job_id, decoded_token=None, sentry_dsn=None): log_additional_info_obj = {} sending_ok = False time_check = time_.time() diff --git a/cdci_data_analysis/flask_app/app.py b/cdci_data_analysis/flask_app/app.py index 54153173c..983dcf2e2 100644 --- a/cdci_data_analysis/flask_app/app.py +++ b/cdci_data_analysis/flask_app/app.py @@ -1030,7 +1030,8 @@ def report_incident(): decoded_token=decoded_token, incident_content=incident_content, incident_time=incident_time, - scratch_dir=scratch_dir + scratch_dir=scratch_dir, + sentry_dsn=sentry_dsn ) report_incident_status = 'incident report email successfully sent' except email_helper.EMailNotSent as e: diff --git a/cdci_data_analysis/flask_app/dispatcher_query.py b/cdci_data_analysis/flask_app/dispatcher_query.py index 3136fe083..5ec04d794 100644 --- a/cdci_data_analysis/flask_app/dispatcher_query.py +++ b/cdci_data_analysis/flask_app/dispatcher_query.py @@ -1076,7 +1076,7 @@ def run_call_back(self, status_kw_name='action') -> typing.Tuple[str, typing.Uni url=self.app.config['conf'].products_url + "/dispatch-data" ) time_request = time_original_request - time_request_first_submitted = email_helper.get_first_submitted_email_time(self.scratch_dir, self.logger) + time_request_first_submitted = email_helper.get_first_submitted_email_time(self.scratch_dir) if time_request_first_submitted is not None: time_request = time_request_first_submitted @@ -1863,7 +1863,7 @@ def run_query(self, off_line=False, disp_conf=None): url=self.app.config['conf'].products_url + "/dispatch-data" ) time_request = self.time_request - time_request_first_submitted = email_helper.get_first_submitted_email_time(self.scratch_dir, self.logger) + time_request_first_submitted = email_helper.get_first_submitted_email_time(self.scratch_dir) if time_request_first_submitted is not None: time_request = time_request_first_submitted diff --git a/tests/test_job_management.py b/tests/test_job_management.py index 2c661fc8e..947f8226a 100644 --- a/tests/test_job_management.py +++ b/tests/test_job_management.py @@ -2385,7 +2385,7 @@ def test_email_compress_request_url(): par_str="01"*10000, )) - compressed_url = compress_request_url_params(url, logger, consider_args=['par_str']) + compressed_url = compress_request_url_params(url, consider_args=['par_str']) assert len(compressed_url) < 200 assert len(url) > 10000 @@ -2411,7 +2411,7 @@ def x(arg): scwl_dict = {"scw_list": "115000860010.001,115000870010.001,115000980010.001,115000990010.001,115001000010.001,115001010010.001,115001020010.001,115001030010.001,115001040010.001,115001050010.001,115001060010.001,117100210010.001,118100040010.001,118100050010.001,118900100010.001,118900120010.001,118900130010.001,118900140010.001,119000020010.001,119000030010.001,119000040010.001,119000050010.001,119000190010.001,119900370010.001,119900480010.001,119900490010.001,119900500010.001,119900510010.001,119900520010.001,119900530010.001,119900540010.001,119900550010.001,119900560010.001,119900570010.001,119900670010.001,119900680010.001,119900690010.001,119900700010.001,119900710010.001,119900720010.001,119900730010.001,119900740010.001,119900750010.001,119900760010.001,119900770010.001,119900880010.001,119900890010.001,119900900010.001,119900910010."} """ - c = wrap_python_code(code, logger, max_length=max_length) + c = wrap_python_code(code, max_length=max_length) print("wrapped:\n", c) From 8c4913330c78419cdba50e6037dd2b84db614858 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Mon, 9 Oct 2023 13:38:20 +0200 Subject: [PATCH 15/17] reverting changes logger time_helper.py --- cdci_data_analysis/analysis/time_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdci_data_analysis/analysis/time_helper.py b/cdci_data_analysis/analysis/time_helper.py index a056e2651..bf32911f1 100644 --- a/cdci_data_analysis/analysis/time_helper.py +++ b/cdci_data_analysis/analysis/time_helper.py @@ -3,7 +3,7 @@ from ..app_logging import app_logging -logger = app_logging.getLogger('time_helper') +logger = app_logging.getLogger('drupal_helper') def validate_time(timestamp_to_validate): try: From 4133a2dc4a4878fc0fe4448997a5d5ed3ff77624 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Mon, 9 Oct 2023 17:34:09 +0200 Subject: [PATCH 16/17] addressing comments --- cdci_data_analysis/flask_app/dispatcher_query.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/cdci_data_analysis/flask_app/dispatcher_query.py b/cdci_data_analysis/flask_app/dispatcher_query.py index 5ec04d794..3514a45f0 100644 --- a/cdci_data_analysis/flask_app/dispatcher_query.py +++ b/cdci_data_analysis/flask_app/dispatcher_query.py @@ -128,7 +128,7 @@ def __init__(self, app, self.app = app - temp_scratch_dir=None + temp_scratch_dir = None self.set_sentry_sdk(getattr(self.app.config.get('conf'), 'sentry_url', None)) @@ -257,11 +257,6 @@ def __init__(self, app, query_status = self.par_dic['query_status'] self.job_id = None if query_status == 'new': - # this will overwrite any job_id provided, it should be validated or ignored - #if 'job_id' in self.par_dic: - # self.job_id = self.par_dic['job_id'] - # self.validate_job_id() - # let's generate a temporary job_id used for the creation of the scratch_dir self.generate_job_id() else: From 0d1d191763aa6f74ce66b0d89125379a4fccf7e3 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Mon, 9 Oct 2023 17:38:04 +0200 Subject: [PATCH 17/17] addressing comments, using logger --- cdci_data_analysis/flask_app/dispatcher_query.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cdci_data_analysis/flask_app/dispatcher_query.py b/cdci_data_analysis/flask_app/dispatcher_query.py index 3514a45f0..7b28485a3 100644 --- a/cdci_data_analysis/flask_app/dispatcher_query.py +++ b/cdci_data_analysis/flask_app/dispatcher_query.py @@ -153,7 +153,7 @@ def __init__(self, app, raise RequestNotUnderstood("job_id must be present during a call_back") if data_server_call_back: # this can be set since it's a call_back and job_id and session_id are available - print(f"before setting scratch_dir: job_id: {self.par_dic['job_id']} callback: {data_server_call_back}, resolve_job_url: {resolve_job_url}") + self.logger.info(f"before setting scratch_dir: job_id: {self.par_dic['job_id']} callback: {data_server_call_back}, resolve_job_url: {resolve_job_url}") self.set_scratch_dir(session_id=self.par_dic['session_id'], job_id=self.par_dic['job_id']) self.set_scws_call_back_related_params() else: @@ -745,7 +745,7 @@ def set_scratch_dir(self, session_id, job_id=None, verbose=False): wd=FilePath(file_dir=wd).path) if alias_workdir is not None: wd = wd+'_aliased' - print(f"creating scratch_dir: {wd}") + wd = FilePath(file_dir=wd) wd.mkdir() self.scratch_dir = wd.path