From 91e0a5523d5c529ce0df354952276a675b15ccca Mon Sep 17 00:00:00 2001 From: Gabriele Barni Date: Mon, 2 Dec 2024 18:15:55 +0100 Subject: [PATCH] add token only if not present, also to `fileURL` parameters (#214) * add token only if not present * adding token also to fileURL and FileReference that are mmoda URL * only checking file url and using the default value in case no parameter value was provided * test * adapted another test * reintroduced condition with FileReference * extended test * better test * issuing valueerror in case arg is not FileURL * extended test * removed commented code * test optimization code --------- Co-authored-by: Denys Savchenko <56398430+dsavchenko@users.noreply.github.com> --- nb2workflow/nbadapter.py | 33 +++-- tests/test_input_types.py | 128 +++++++++++++++++- tests/test_nbadapter.py | 2 +- .../testfilereference_extra_annotated.ipynb | 63 +++++++++ .../testfileurl_extra_annotated.ipynb | 67 +++++++++ 5 files changed, 281 insertions(+), 12 deletions(-) create mode 100644 tests/testfiles/testfilereference_extra_annotated.ipynb create mode 100644 tests/testfiles/testfileurl_extra_annotated.ipynb diff --git a/nb2workflow/nbadapter.py b/nb2workflow/nbadapter.py index 35f0825..e4aeda6 100644 --- a/nb2workflow/nbadapter.py +++ b/nb2workflow/nbadapter.py @@ -841,14 +841,22 @@ def handle_url_params(self, parameters, tmpdir, context={}): adapted_parameters = copy.deepcopy(parameters) exceptions = [] posix_path_with_annotations_pattern = re.compile(rf"^{re.escape(oda_prefix)}.*_POSIXPath_") + file_url_with_annotations_pattern = re.compile(rf"^{re.escape(oda_prefix)}.*_FileURL_") + file_reference_with_annotations_pattern = re.compile(rf"^{re.escape(oda_prefix)}.*_FileReference_") for input_par_name, input_par_obj in self.input_parameters.items(): if ontology.is_ontology_available: parameter_hierarchy = ontology.get_parameter_hierarchy(input_par_obj['owl_type']) is_posix_path = f"{oda_prefix}POSIXPath" in parameter_hierarchy + is_file_url = f"{oda_prefix}FileURL" in parameter_hierarchy + is_file_reference = f"{oda_prefix}FileReference" in parameter_hierarchy else: is_posix_path = f"{oda_prefix}POSIXPath" == input_par_obj['owl_type'] or \ posix_path_with_annotations_pattern.match(input_par_obj['owl_type']) is not None - if is_posix_path: + is_file_url = f"{oda_prefix}FileURL" == input_par_obj['owl_type'] or \ + file_url_with_annotations_pattern.match(input_par_obj['owl_type']) is not None + is_file_reference = f"{oda_prefix}FileReference" == input_par_obj['owl_type'] or \ + file_reference_with_annotations_pattern.match(input_par_obj['owl_type']) is not None + if is_posix_path or is_file_url or is_file_reference: arg_par_value = parameters.get(input_par_name, None) if arg_par_value is None: arg_par_value = input_par_obj['default_value'] @@ -859,21 +867,26 @@ def handle_url_params(self, parameters, tmpdir, context={}): token = context.get('token', None) if token is not None: logger.debug(f'adding token to the url: {arg_par_value}') - url_parts = urlparse(adapted_parameters[input_par_name]) + url_to_adapt = adapted_parameters.get(input_par_name, arg_par_value) + url_parts = urlparse(url_to_adapt) url_args = parse_qs(url_parts.query) - url_args['token'] = [token] # the values in the dictionary need to be lists - new_url_parts = url_parts._replace(query=urlencode(url_args, doseq=True)) - adapted_parameters[input_par_name] = urlunparse(new_url_parts) - logger.debug(f"updated url: {adapted_parameters[input_par_name]}") - arg_par_value = adapted_parameters[input_par_name] + if token not in url_args: + url_args['token'] = [token] # the values in the dictionary need to be lists + new_url_parts = url_parts._replace(query=urlencode(url_args, doseq=True)) + adapted_parameters[input_par_name] = urlunparse(new_url_parts) + logger.debug(f"updated url: {adapted_parameters[input_par_name]}") + arg_par_value = adapted_parameters[input_par_name] logger.debug(f'download {arg_par_value}') try: - file_name = self.download_file(arg_par_value, tmpdir) - adapted_parameters[input_par_name] = file_name + if is_posix_path: + file_name = self.download_file(arg_par_value, tmpdir) + adapted_parameters[input_par_name] = file_name except Exception as e: exceptions.append(e) - + elif is_file_url: + exceptions.append(ValueError( + f'Parameter {input_par_name} value "{arg_par_value}" can not be interpreted as FileURL.')) return dict( adapted_parameters=adapted_parameters, exceptions=exceptions diff --git a/tests/test_input_types.py b/tests/test_input_types.py index c91d149..c169b55 100644 --- a/tests/test_input_types.py +++ b/tests/test_input_types.py @@ -7,7 +7,8 @@ logger = logging.getLogger(__name__) -from urllib.parse import urlencode +from urllib.parse import urlencode, urlparse, parse_qs + @pytest.fixture def app(): @@ -39,6 +40,131 @@ def test_posix_download_file_extra_annotations(client): r = client.get('/api/v1.0/get/testposixpath_extra_annotated', query_string={'fits_file_path': 'https://fits.gsfc.nasa.gov/samples/testkeys.fits'}) assert r.json['output']['output_file_download'] == 'file downloaded successfully' +@pytest.mark.parametrize("query_string_fits_file_path", ["generic_url", "mmoda_url", None]) +def test_file_reference(client, query_string_fits_file_path): + status_callback_file = "status.json" + callback_url = 'file://' + status_callback_file + token = 'abc123' + query_string = dict( + _async_request='no', + _async_request_callback=callback_url, + _token=token) + + fits_file_path_value = None + if query_string_fits_file_path == "generic_url": + fits_file_path_value = "https://fits.gsfc.nasa.gov/samples/testkeys.fits" + elif query_string_fits_file_path == "mmoda_url": + fits_file_path_value = "https://www.astro.unige.ch/mmoda/dispatch-data/test.fits" + + query_string['fits_file_path'] = fits_file_path_value + + r = client.get(f'/api/v1.0/get/testfilereference_extra_annotated', query_string=query_string) + assert r.status_code == 201 + + from nb2workflow.service import AsyncWorker + + def test_worker_run(): + AsyncWorker('test-worker').run_one() + + test_worker_thread = threading.Thread(target=test_worker_run) + test_worker_thread.start() + + while True: + options = client.get('/api/v1.0/options') + assert options.status_code == 200 + + r = client.get(f'/api/v1.0/get/testfilereference_extra_annotated', + query_string=query_string) + + logger.info('service returns %s %s', r, r.json) + + if r.json['workflow_status'] == 'done': + logger.info('workflow done!') + break + + time.sleep(0.1) + + test_worker_thread.join() + assert 'data' in r.json + assert 'output' in r.json['data'] + assert 'fits_file_path_modified' in r.json['data']['output'] + + if query_string_fits_file_path is None: + assert r.json['data']['output']['fits_file_path_modified'] == "/home/local/test.fits" + elif query_string_fits_file_path == "mmoda_url": + url_parts = urlparse(r.json['data']['output']['fits_file_path_modified']) + url_args = parse_qs(url_parts.query) + assert 'token' in url_args + elif query_string_fits_file_path == "generic_url": + url_parts = urlparse(r.json['data']['output']['fits_file_path_modified']) + url_args = parse_qs(url_parts.query) + assert 'token' not in url_args + +@pytest.mark.parametrize("query_string_fits_file_url", ["generic_url", "file_path", "numeric", None]) +def test_mmoda_file_url(client, query_string_fits_file_url): + status_callback_file = "status.json" + callback_url = 'file://' + status_callback_file + token = 'abc123' + query_string = dict( + _async_request='no', + _async_request_callback=callback_url, + _token=token) + + fits_file_url_value = None + if query_string_fits_file_url == "generic_url": + fits_file_url_value = "https://fits.gsfc.nasa.gov/samples/testkeys.fits" + elif query_string_fits_file_url == "file_path": + fits_file_url_value = "/home/local/test.fits" + elif query_string_fits_file_url == "numeric": + fits_file_url_value = 123456 + + query_string['fits_file_url'] = fits_file_url_value + + r = client.get(f'/api/v1.0/get/testfileurl_extra_annotated', query_string=query_string) + + assert r.status_code == 201 + + from nb2workflow.service import AsyncWorker + + def test_worker_run(): + AsyncWorker('test-worker').run_one() + + test_worker_thread = threading.Thread(target=test_worker_run) + test_worker_thread.start() + + while True: + options = client.get('/api/v1.0/options') + assert options.status_code == 200 + + r = client.get(f'/api/v1.0/get/testfileurl_extra_annotated', + query_string=query_string) + + logger.info('service returns %s %s', r, r.json) + + if r.json['workflow_status'] == 'done': + logger.info('workflow done!') + break + + time.sleep(0.1) + + test_worker_thread.join() + assert 'data' in r.json + assert 'output' in r.json['data'] + if query_string_fits_file_url != "file_path" and query_string_fits_file_url != "numeric": + assert 'mmoda_url_modified' in r.json['data']['output'] + url_parts = urlparse(r.json['data']['output']['mmoda_url_modified']) + url_args = parse_qs(url_parts.query) + assert 'token' in url_args + + assert 'fits_file_url_modified' in r.json['data']['output'] + url_parts = urlparse(r.json['data']['output']['fits_file_url_modified']) + url_args = parse_qs(url_parts.query) + assert 'token' not in url_args + + else: + assert r.json['data']['output'] == 'incomplete' + assert r.json['data']['exceptions'][0]['edump'] == f"ValueError('Parameter fits_file_url value \"{fits_file_url_value}\" can not be interpreted as FileURL.')" + def test_posix_download_file_with_arg_low_download_limit(client, app_low_download_limit): r = client.get('/api/v1.0/get/testposixpath', query_string={'fits_file_path': 'https://fits.gsfc.nasa.gov/samples/testkeys.fits'}) assert r.json['output'] == {} diff --git a/tests/test_nbadapter.py b/tests/test_nbadapter.py index 77ffaaf..23d352a 100644 --- a/tests/test_nbadapter.py +++ b/tests/test_nbadapter.py @@ -84,7 +84,7 @@ def test_find_notebooks(caplog): assert 'Ignoring pattern.' in caplog.text nbas = find_notebooks(nb_dir) - assert len(nbas) == 9 + assert len(nbas) == 11 nbas = find_notebooks(nb_dir, pattern=r'.*bool') assert len(nbas) == 1 diff --git a/tests/testfiles/testfilereference_extra_annotated.ipynb b/tests/testfiles/testfilereference_extra_annotated.ipynb new file mode 100644 index 0000000..d7eaf3c --- /dev/null +++ b/tests/testfiles/testfilereference_extra_annotated.ipynb @@ -0,0 +1,63 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "fits_file_path = \"/home/local/test.fits\" # oda:FileReference ; oda:label \"Test file path\" ; oda:description \"Description test file path\" ." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fits_file_path_modified = fits_file_path" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "outputs" + ] + }, + "outputs": [], + "source": [ + "fits_file_path_modified" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "767d51c1340bd893661ea55ea3124f6de3c7a262a8b4abca0554b478b1e2ff90" + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.15" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/tests/testfiles/testfileurl_extra_annotated.ipynb b/tests/testfiles/testfileurl_extra_annotated.ipynb new file mode 100644 index 0000000..f7e75fd --- /dev/null +++ b/tests/testfiles/testfileurl_extra_annotated.ipynb @@ -0,0 +1,67 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "# oda:oda_token_access oda:InOdaContext .\n", + "fits_file_url = \"https://fits.gsfc.nasa.gov/samples/testkeys.fits\" # oda:FileURL ; oda:label \"Test url\" ; oda:description \"Description test url\" .\n", + "mmoda_url = \"https://www.astro.unige.ch/mmoda/dispatch-data/test.fits\" # oda:FileURL ; oda:label \"Test mmoda url\" ; oda:description \"Description mmoda url\" ." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fits_file_url_modified = fits_file_url\n", + "mmoda_url_modified = mmoda_url" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "outputs" + ] + }, + "outputs": [], + "source": [ + "fits_file_url_modified\n", + "mmoda_url_modified" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "767d51c1340bd893661ea55ea3124f6de3c7a262a8b4abca0554b478b1e2ff90" + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.15" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}