diff --git a/cdci_data_analysis/flask_app/app.py b/cdci_data_analysis/flask_app/app.py index 3188949f..41136b1d 100644 --- a/cdci_data_analysis/flask_app/app.py +++ b/cdci_data_analysis/flask_app/app.py @@ -15,7 +15,7 @@ import random import hashlib import validators - +import re import logging from raven.contrib.flask import Sentry @@ -223,6 +223,19 @@ def remove_nested_keys(D, keys): return D +def sanitize_dict_before_log(dict_to_sanitize): + sensitive_keys = ['token'] # Add any other sensitive keys here + not_allowed_characters = r'[^A-Za-z0-9 ]' + replacement_character = '' + sanitized_values = {} + for key, value in dict_to_sanitize.items(): + if key not in sensitive_keys: + # value = str(value).replace('\n', '').replace('\r', '') + value = re.sub(not_allowed_characters, replacement_character, str(value)) + sanitized_values[key] = value + return sanitized_values + + def common_exception_payload(): payload = {} @@ -441,9 +454,11 @@ def run_analysis(): request_summary = log_run_query_request() try: + sanitized_request_values = sanitize_dict_before_log(request.values) + logger.info('\033[32m===> dataserver_call_back\033[0m') logger.info('\033[33m raw request values: %s \033[0m', - dict(request.values)) + dict(sanitized_request_values)) query_id = hashlib.sha224(str(request.values).encode()).hexdigest()[:8] @@ -522,9 +537,11 @@ def resolve_job_url(): @app.route('/call_back', methods=['POST', 'GET']) def dataserver_call_back(): + sanitized_request_values = sanitize_dict_before_log(request.values) + logger.info('\033[32m===========================> dataserver_call_back\033[0m') - logger.info('\033[33m raw request values: %s \033[0m', dict(request.values)) + logger.info('\033[33m raw request values: %s \033[0m', dict(sanitized_request_values)) query_id = hashlib.sha224(str(request.values).encode()).hexdigest()[:8] @@ -584,8 +601,11 @@ def get(self, path): @app.route('/resolve_name', methods=['GET']) def resolve_name(): - logger.info("request.args: %s ", request.args) - token = request.args.get('token', None) + par_dic = request.values.to_dict() + sanitized_par_dic = sanitize_dict_before_log(par_dic) + logger.info("request.args: %s ", sanitized_par_dic) + + token = par_dic.pop('token', None) app_config = app.config.get('conf') secret_key = app_config.secret_key @@ -596,7 +616,7 @@ def resolve_name(): if output_code is not None: return make_response(output, output_code) - name = request.args.get('name', None) + name = par_dic.get('name', None) name_resolver_url = app_config.name_resolver_url entities_portal_url = app_config.entities_portal_url @@ -610,8 +630,11 @@ def resolve_name(): @app.route('/get_revnum', methods=['GET']) def get_revnum(): - logger.info("request.args: %s ", request.args) - token = request.args.get('token', None) + par_dic = request.values.to_dict() + sanitized_par_dic = sanitize_dict_before_log(par_dic) + logger.info("request.args: %s ", sanitized_par_dic) + + token = par_dic.pop('token', None) app_config = app.config.get('conf') secret_key = app_config.secret_key @@ -622,7 +645,7 @@ def get_revnum(): if output_code is not None: return make_response(output, output_code) - time_to_convert = request.args.get('time_to_convert', None) + time_to_convert = par_dic.get('time_to_convert', None) converttime_revnum_service_url = app_config.converttime_revnum_service_url @@ -633,8 +656,11 @@ def get_revnum(): @app.route('/get_list_terms', methods=['GET']) def get_list_terms(): - logger.info("request.args: %s ", request.args) - token = request.args.get('token', None) + par_dic = request.values.to_dict() + sanitized_par_dic = sanitize_dict_before_log(par_dic) + logger.info("request.args: %s ", sanitized_par_dic) + + token = par_dic.pop('token', None) app_config = app.config.get('conf') secret_key = app_config.secret_key @@ -648,8 +674,8 @@ def get_list_terms(): sentry_dsn = sentry.sentry_url - group = request.args.get('group', None) - parent = request.args.get('parent', None) + group = par_dic.get('group', None) + parent = par_dic.get('parent', None) list_terms = drupal_helper.get_list_terms(disp_conf=app_config, group=group, @@ -664,8 +690,11 @@ def get_list_terms(): @app.route('/get_parents_term', methods=['GET']) def get_parents_term(): - logger.info("request.args: %s ", request.args) - token = request.args.get('token', None) + par_dic = request.values.to_dict() + sanitized_par_dic = sanitize_dict_before_log(par_dic) + logger.info("request.args: %s ", sanitized_par_dic) + + token = par_dic.pop('token', None) app_config = app.config.get('conf') secret_key = app_config.secret_key @@ -679,8 +708,8 @@ def get_parents_term(): sentry_dsn = sentry.sentry_url - group = request.args.get('group', None) - term = request.args.get('term', None) + group = par_dic.get('group', None) + term = par_dic.get('term', None) list_parents = drupal_helper.get_parents_term(disp_conf=app_config, term=term, @@ -695,9 +724,12 @@ def get_parents_term(): @app.route('/get_observation_attachments', methods=['GET']) def get_observation_attachments(): + par_dic = request.values.to_dict() + sanitized_par_dic = sanitize_dict_before_log(par_dic) + logger.info("request.args: %s ", sanitized_par_dic) logger.info("request.args: %s ", request.args) - token = request.args.get('token', None) + token = par_dic.pop('token', None) app_config = app.config.get('conf') secret_key = app_config.secret_key @@ -709,9 +741,6 @@ def get_observation_attachments(): return make_response(output, output_code) decoded_token = output - par_dic = request.values.to_dict() - par_dic.pop('token') - sentry_dsn = sentry.sentry_url gallery_secret_key = app_config.product_gallery_secret_key @@ -735,10 +764,12 @@ def get_observation_attachments(): @app.route('/get_all_revs', methods=['GET']) def get_all_revs(): - logger.info("request.args: %s ", request.args) + par_dic = request.values.to_dict() + sanitized_par_dic = sanitize_dict_before_log(par_dic) + logger.info("request.args: %s ", sanitized_par_dic) logger.info("request.files: %s ", request.files) - token = request.args.get('token', None) + token = par_dic.pop('token', None) app_config = app.config.get('conf') secret_key = app_config.secret_key @@ -750,9 +781,6 @@ def get_all_revs(): return make_response(output, output_code) decoded_token = output - par_dic = request.values.to_dict() - par_dic.pop('token') - sentry_dsn = sentry.sentry_url gallery_secret_key = app_config.product_gallery_secret_key @@ -775,10 +803,12 @@ def get_all_revs(): @app.route('/get_all_astro_entities', methods=['GET']) def get_all_astro_entities(): - logger.info("request.args: %s ", request.args) + par_dic = request.values.to_dict() + sanitized_par_dic = sanitize_dict_before_log(par_dic) + logger.info("request.args: %s ", sanitized_par_dic) logger.info("request.files: %s ", request.files) - token = request.args.get('token', None) + token = par_dic.pop('token', None) app_config = app.config.get('conf') secret_key = app_config.secret_key @@ -790,9 +820,6 @@ def get_all_astro_entities(): return make_response(output, output_code) decoded_token = output - par_dic = request.values.to_dict() - par_dic.pop('token') - sentry_dsn = sentry.sentry_url gallery_secret_key = app_config.product_gallery_secret_key @@ -814,7 +841,9 @@ def get_all_astro_entities(): @app.route('/get_astro_entity_info_by_source_name', methods=['GET']) def get_astro_entity_info_by_source_name(): - logger.info("request.args: %s ", request.args) + par_dic = request.values.to_dict() + sanitized_par_dic = sanitize_dict_before_log(par_dic) + logger.info("request.args: %s ", sanitized_par_dic) logger.info("request.files: %s ", request.files) app_config = app.config.get('conf') @@ -822,7 +851,7 @@ def get_astro_entity_info_by_source_name(): sentry_dsn = sentry.sentry_url product_gallery_url = app_config.product_gallery_url - src_name = request.args.get('src_name', None) + src_name = par_dic.get('src_name', None) source_entity_info = drupal_helper.get_source_astrophysical_entity_info_by_source_and_alternative_name(product_gallery_url, gallery_jwt_token=None, @@ -849,10 +878,11 @@ def get_astro_entity_info_by_source_name(): @app.route('/get_data_product_list_with_conditions', methods=['GET']) def get_data_product_list_with_conditions(): - logger.info("request.args: %s ", request.args) + par_dic = request.values.to_dict() + sanitized_par_dic = sanitize_dict_before_log(par_dic) + logger.info("request.args: %s ", sanitized_par_dic) logger.info("request.files: %s ", request.files) - par_dic = request.values.to_dict() token = par_dic.pop('token', None) app_config = app.config.get('conf') secret_key = app_config.secret_key @@ -876,8 +906,6 @@ def get_data_product_list_with_conditions(): # update the token gallery_jwt_token = drupal_helper.generate_gallery_jwt_token(gallery_secret_key, user_id=user_id_product_creator) - # src_name = par_dic.pop('src_name', None) - output_get = drupal_helper.get_data_product_list_by_source_name_with_conditions(product_gallery_url=product_gallery_url, gallery_jwt_token=gallery_jwt_token, sentry_dsn=sentry_dsn, @@ -890,10 +918,12 @@ def get_data_product_list_with_conditions(): # TODO to refactor using get_data_product_list_with_conditions @app.route('/get_data_product_list_by_source_name', methods=['GET']) def get_data_product_list_by_source_name(): - logger.info("request.args: %s ", request.args) + par_dic = request.values.to_dict() + sanitized_par_dic = sanitize_dict_before_log(par_dic) + logger.info("request.args: %s ", sanitized_par_dic) logger.info("request.files: %s ", request.files) - token = request.args.get('token', None) + token = par_dic.pop('token', None) app_config = app.config.get('conf') secret_key = app_config.secret_key @@ -905,8 +935,6 @@ def get_data_product_list_by_source_name(): return make_response(output, output_code) decoded_token = output - par_dic = request.values.to_dict() - par_dic.pop('token') sentry_dsn = sentry.sentry_url @@ -930,13 +958,16 @@ def get_data_product_list_by_source_name(): return output_list - @app.route('/post_astro_entity_to_gallery', methods=['POST']) def post_astro_entity_to_gallery(): - logger.info("request.args: %s ", request.args) + par_dic = request.values.to_dict() + sanitized_par_dic = sanitize_dict_before_log(par_dic) + + logger.info("request.values: %s ", sanitized_par_dic) logger.info("request.files: %s ", request.files) - token = request.args.get('token', None) + token = par_dic.pop('token', None) + app_config = app.config.get('conf') secret_key = app_config.secret_key @@ -948,9 +979,6 @@ def post_astro_entity_to_gallery(): return make_response(output, output_code) decoded_token = output - par_dic = request.values.to_dict() - par_dic.pop('token') - output_post = drupal_helper.post_content_to_gallery(decoded_token=decoded_token, content_type="astrophysical_entity", disp_conf=app_config, @@ -962,10 +990,14 @@ def post_astro_entity_to_gallery(): @app.route('/post_observation_to_gallery', methods=['POST']) def post_observation_to_gallery(): - logger.info("request.args: %s ", request.args) + par_dic = request.values.to_dict() + sanitized_par_dic = sanitize_dict_before_log(par_dic) + + token = par_dic.pop('token', None) + + logger.info("request.values: %s ", sanitized_par_dic) logger.info("request.files: %s ", request.files) - token = request.args.get('token', None) app_config = app.config.get('conf') secret_key = app_config.secret_key @@ -977,9 +1009,6 @@ def post_observation_to_gallery(): return make_response(output, output_code) decoded_token = output - par_dic = request.values.to_dict() - par_dic.pop('token') - output_post = drupal_helper.post_content_to_gallery(decoded_token=decoded_token, content_type="observation", disp_conf=app_config, @@ -991,10 +1020,14 @@ def post_observation_to_gallery(): @app.route('/post_product_to_gallery', methods=['POST']) def post_product_to_gallery(): - logger.info("request.args: %s ", request.args) + par_dic = request.values.to_dict() + sanitized_par_dic = sanitize_dict_before_log(par_dic) + + logger.info("request.values: %s ", sanitized_par_dic) logger.info("request.files: %s ", request.files) - token = request.args.get('token', None) + token = par_dic.pop('token', None) + app_config = app.config.get('conf') secret_key = app_config.secret_key @@ -1006,9 +1039,6 @@ def post_product_to_gallery(): return make_response(output, output_code) decoded_token = output - par_dic = request.values.to_dict() - par_dic.pop('token') - output_post = drupal_helper.post_content_to_gallery(decoded_token=decoded_token, disp_conf=app_config, files=request.files, @@ -1019,10 +1049,14 @@ def post_product_to_gallery(): @app.route('/delete_product_to_gallery', methods=['POST']) def delete_product_to_gallery(): - logger.info("request.args: %s ", request.args) + par_dic = request.values.to_dict() + sanitized_par_dic = sanitize_dict_before_log(par_dic) + + logger.info("request.values: %s ", sanitized_par_dic) logger.info("request.files: %s ", request.files) - token = request.args.get('token', None) + token = par_dic.pop('token', None) + app_config = app.config.get('conf') secret_key = app_config.secret_key @@ -1034,9 +1068,6 @@ def delete_product_to_gallery(): return make_response(output, output_code) decoded_token = output - par_dic = request.values.to_dict() - par_dic.pop('token') - output_post = drupal_helper.delete_content_gallery(decoded_token=decoded_token, disp_conf=app_config, files=request.files, @@ -1047,10 +1078,14 @@ def delete_product_to_gallery(): @app.route('/post_revolution_processing_log_to_gallery', methods=['POST']) def post_revolution_processing_log_to_gallery(): - logger.info("request.args: %s ", request.args) + par_dic = request.values.to_dict() + sanitized_par_dic = sanitize_dict_before_log(par_dic) + + logger.info("request.values: %s ", sanitized_par_dic) logger.info("request.files: %s ", request.files) - token = request.args.get('token', None) + token = par_dic.pop('token', None) + app_config = app.config.get('conf') secret_key = app_config.secret_key @@ -1062,9 +1097,6 @@ def post_revolution_processing_log_to_gallery(): return make_response(output, output_code) decoded_token = output - par_dic = request.values.to_dict() - par_dic.pop('token') - output_post = drupal_helper.post_content_to_gallery(decoded_token=decoded_token, disp_conf=app_config, files=request.files, diff --git a/tests/test_server_basic.py b/tests/test_server_basic.py index ee3f9983..88f13826 100644 --- a/tests/test_server_basic.py +++ b/tests/test_server_basic.py @@ -28,6 +28,7 @@ from cdci_data_analysis.analysis.renku_helper import clone_renku_repo, checkout_branch_renku_repo, check_job_id_branch_is_present, get_repo_path, generate_commit_request_url, create_new_notebook_with_code, generate_nb_hash, create_renku_ini_config_obj, generate_ini_file_hash from cdci_data_analysis.analysis.drupal_helper import execute_drupal_request, get_drupal_request_headers, get_revnum, get_observations_for_time_range, generate_gallery_jwt_token, get_user_id, get_source_astrophysical_entity_id_by_source_name from cdci_data_analysis.plugins.dummy_plugin.data_server_dispatcher import DataServerQuery, ReturnProgressProductQuery +from cdci_data_analysis.flask_app.app import sanitize_dict_before_log # logger logger = logging.getLogger(__name__) @@ -70,6 +71,24 @@ def remove_args_from_dic(arg_dic, remove_keys): tem=0, ) +@pytest.mark.fast +def test_sanitize_dict_before_log(): + + test_dict = { + 'token': 'mytoken', + 'field': 'myfield\n\r', + 'username': 'myusername', + 'email': 'myemail@example.com' + } + + expected_dict = { + 'field': 'myfield', + 'username': 'myusername', + 'email': 'myemailexamplecom' + } + + sanitized_dict = sanitize_dict_before_log(test_dict) + assert sanitized_dict == expected_dict @pytest.mark.fast def test_js9(dispatcher_live_fixture): @@ -2800,7 +2819,7 @@ def test_product_gallery_data_product_with_period_of_observation(dispatcher_live params['T2'] = now.strftime('%Y-%m-%dT%H:%M:%S') c = requests.post(os.path.join(server, "post_product_to_gallery"), - params={**params}, + data=params, files=file_obj ) @@ -3122,7 +3141,7 @@ def test_product_gallery_get_data_products_list_with_conditions(dispatcher_live_ } c = requests.post(os.path.join(server, "post_astro_entity_to_gallery"), - params={**source_params}, + data=source_params, ) assert c.status_code == 200 @@ -3141,7 +3160,7 @@ def test_product_gallery_get_data_products_list_with_conditions(dispatcher_live_ 'T2': '2022-08-23T05:29:11' } c = requests.post(os.path.join(server, "post_product_to_gallery"), - params={**product_params} + data=product_params ) assert c.status_code == 200 @@ -3270,7 +3289,7 @@ def test_product_gallery_get_data_products_list_for_given_source(dispatcher_live } c = requests.post(os.path.join(server, "post_astro_entity_to_gallery"), - params={**source_params}, + data=source_params, ) assert c.status_code == 200 @@ -3286,7 +3305,7 @@ def test_product_gallery_get_data_products_list_for_given_source(dispatcher_live 'insert_new_source': True } c = requests.post(os.path.join(server, "post_product_to_gallery"), - params={**product_params} + data=product_params ) assert c.status_code == 200 @@ -3434,7 +3453,7 @@ def test_product_gallery_get_period_of_observation_attachments(dispatcher_live_f c = requests.post(os.path.join(server, "post_observation_to_gallery"), - params={**params}, + data=params, files=file_obj ) @@ -3522,7 +3541,7 @@ def test_product_gallery_post_period_of_observation(dispatcher_live_fixture_with params['T2'] = now.strftime('%Y-%m-%dT%H:%M:%S') c = requests.post(os.path.join(server, "post_observation_to_gallery"), - params={**params}, + data=params, files=file_obj ) @@ -3621,7 +3640,7 @@ def test_revolution_processing_log_gallery_post(dispatcher_live_fixture_with_gal } c = requests.post(os.path.join(server, "post_revolution_processing_log_to_gallery"), - params={**params}, + data=params, ) assert c.status_code == 200 @@ -3740,7 +3759,7 @@ def test_product_gallery_post(dispatcher_live_fixture_with_gallery, dispatcher_t 'fits_file_1': open('data/dummy_prods/query_catalog.fits', 'rb')} c = requests.post(os.path.join(server, "post_product_to_gallery"), - params={**params}, + data=params, files=file_obj ) @@ -3852,7 +3871,7 @@ def test_post_data_product_with_multiple_sources(dispatcher_live_fixture_with_ga 'insert_new_source': insert_new_source } c = requests.post(os.path.join(server, "post_product_to_gallery"), - params={**params} + data=params ) assert c.status_code == 200 @@ -3982,7 +4001,7 @@ def test_product_gallery_update(dispatcher_live_fixture_with_gallery, dispatcher 'fits_file_1': open('data/dummy_prods/query_catalog.fits', 'rb')} c = requests.post(os.path.join(server, "post_product_to_gallery"), - params={**params}, + data=params, files=file_obj ) @@ -4028,7 +4047,7 @@ def test_product_gallery_update(dispatcher_live_fixture_with_gallery, dispatcher 'fits_file_0': open('data/dummy_prods/isgri_query_lc.fits', 'rb')} c = requests.post(os.path.join(server, "post_product_to_gallery"), - params={**params}, + data=params, files=file_obj ) assert c.status_code == 200 @@ -4084,7 +4103,7 @@ def test_product_gallery_delete(dispatcher_live_fixture_with_gallery, dispatcher token=encoded_token) c = requests.post(os.path.join(server, "post_product_to_gallery"), - params={**params}, + data=params, ) assert c.status_code == 200 @@ -4096,14 +4115,14 @@ def test_product_gallery_delete(dispatcher_live_fixture_with_gallery, dispatcher assert 'field_product_id' in drupal_res_obj assert drupal_res_obj['field_product_id'][0]['value'] == product_id - params = { - 'product_id': product_id, + params_products_list = { + 'product_id_value': product_id, 'content_type': 'data_product', 'token': encoded_token } c = requests.get(os.path.join(server, "get_data_product_list_with_conditions"), - params=params + params=params_products_list ) assert c.status_code == 200 @@ -4111,8 +4130,14 @@ def test_product_gallery_delete(dispatcher_live_fixture_with_gallery, dispatcher assert len(drupal_res_obj) == 1 assert drupal_res_obj[0]['nid'] == str(nid_creation) + params = { + 'product_id': product_id, + 'content_type': 'data_product', + 'token': encoded_token + } + c = requests.post(os.path.join(server, "delete_product_to_gallery"), - params={**params}, + data=params, ) assert c.status_code == 200 @@ -4120,7 +4145,7 @@ def test_product_gallery_delete(dispatcher_live_fixture_with_gallery, dispatcher assert drupal_res_obj == {} c = requests.get(os.path.join(server, "get_data_product_list_with_conditions"), - params=params + params=params_products_list ) assert c.status_code == 200 @@ -4155,7 +4180,7 @@ def test_product_gallery_error_message(dispatcher_live_fixture_with_gallery): } c = requests.post(os.path.join(server, "post_product_to_gallery"), - params={**params}, + data=params, ) assert c.status_code == 500