From 91e0a5523d5c529ce0df354952276a675b15ccca Mon Sep 17 00:00:00 2001
From: Gabriele Barni <burnout87@users.noreply.github.com>
Date: Mon, 2 Dec 2024 18:15:55 +0100
Subject: [PATCH] add token only if not present, also to `fileURL` parameters
 (#214)

* add token only if not present

* adding token also to fileURL and FileReference that are mmoda URL

* only checking file url and using the default value in case no parameter value was provided

* test

* adapted another test

* reintroduced condition with FileReference

* extended test

* better test

* issuing valueerror in case arg is not FileURL

* extended test

* removed commented code

* test optimization code

---------

Co-authored-by: Denys Savchenko <56398430+dsavchenko@users.noreply.github.com>
---
 nb2workflow/nbadapter.py                      |  33 +++--
 tests/test_input_types.py                     | 128 +++++++++++++++++-
 tests/test_nbadapter.py                       |   2 +-
 .../testfilereference_extra_annotated.ipynb   |  63 +++++++++
 .../testfileurl_extra_annotated.ipynb         |  67 +++++++++
 5 files changed, 281 insertions(+), 12 deletions(-)
 create mode 100644 tests/testfiles/testfilereference_extra_annotated.ipynb
 create mode 100644 tests/testfiles/testfileurl_extra_annotated.ipynb

diff --git a/nb2workflow/nbadapter.py b/nb2workflow/nbadapter.py
index 35f0825..e4aeda6 100644
--- a/nb2workflow/nbadapter.py
+++ b/nb2workflow/nbadapter.py
@@ -841,14 +841,22 @@ def handle_url_params(self, parameters, tmpdir, context={}):
         adapted_parameters = copy.deepcopy(parameters)
         exceptions = []
         posix_path_with_annotations_pattern = re.compile(rf"^{re.escape(oda_prefix)}.*_POSIXPath_")
+        file_url_with_annotations_pattern = re.compile(rf"^{re.escape(oda_prefix)}.*_FileURL_")
+        file_reference_with_annotations_pattern = re.compile(rf"^{re.escape(oda_prefix)}.*_FileReference_")
         for input_par_name, input_par_obj in self.input_parameters.items():
             if ontology.is_ontology_available:
                 parameter_hierarchy = ontology.get_parameter_hierarchy(input_par_obj['owl_type'])
                 is_posix_path = f"{oda_prefix}POSIXPath" in parameter_hierarchy
+                is_file_url = f"{oda_prefix}FileURL" in parameter_hierarchy
+                is_file_reference = f"{oda_prefix}FileReference" in parameter_hierarchy
             else:
                 is_posix_path = f"{oda_prefix}POSIXPath" == input_par_obj['owl_type'] or \
                                 posix_path_with_annotations_pattern.match(input_par_obj['owl_type']) is not None
-            if is_posix_path:
+                is_file_url = f"{oda_prefix}FileURL" == input_par_obj['owl_type'] or \
+                                file_url_with_annotations_pattern.match(input_par_obj['owl_type']) is not None
+                is_file_reference = f"{oda_prefix}FileReference" == input_par_obj['owl_type'] or \
+                                    file_reference_with_annotations_pattern.match(input_par_obj['owl_type']) is not None
+            if is_posix_path or is_file_url or is_file_reference:
                 arg_par_value = parameters.get(input_par_name, None)
                 if arg_par_value is None:
                     arg_par_value = input_par_obj['default_value']
@@ -859,21 +867,26 @@ def handle_url_params(self, parameters, tmpdir, context={}):
                         token = context.get('token', None)
                         if token is not None:
                             logger.debug(f'adding token to the url: {arg_par_value}')
-                            url_parts = urlparse(adapted_parameters[input_par_name])
+                            url_to_adapt = adapted_parameters.get(input_par_name, arg_par_value)
+                            url_parts = urlparse(url_to_adapt)
                             url_args = parse_qs(url_parts.query)
-                            url_args['token'] = [token] # the values in the dictionary need to be lists
-                            new_url_parts = url_parts._replace(query=urlencode(url_args, doseq=True))
-                            adapted_parameters[input_par_name] = urlunparse(new_url_parts)
-                            logger.debug(f"updated url: {adapted_parameters[input_par_name]}")
-                            arg_par_value = adapted_parameters[input_par_name]
+                            if token not in url_args:
+                                url_args['token'] = [token] # the values in the dictionary need to be lists
+                                new_url_parts = url_parts._replace(query=urlencode(url_args, doseq=True))
+                                adapted_parameters[input_par_name] = urlunparse(new_url_parts)
+                                logger.debug(f"updated url: {adapted_parameters[input_par_name]}")
+                                arg_par_value = adapted_parameters[input_par_name]
 
                     logger.debug(f'download {arg_par_value}')
                     try:
-                        file_name = self.download_file(arg_par_value, tmpdir)
-                        adapted_parameters[input_par_name] = file_name
+                        if is_posix_path:
+                            file_name = self.download_file(arg_par_value, tmpdir)
+                            adapted_parameters[input_par_name] = file_name
                     except Exception as e:
                         exceptions.append(e)
-
+                elif is_file_url:
+                    exceptions.append(ValueError(
+                        f'Parameter {input_par_name} value "{arg_par_value}" can not be interpreted as FileURL.'))
         return dict(
             adapted_parameters=adapted_parameters,
             exceptions=exceptions
diff --git a/tests/test_input_types.py b/tests/test_input_types.py
index c91d149..c169b55 100644
--- a/tests/test_input_types.py
+++ b/tests/test_input_types.py
@@ -7,7 +7,8 @@
 
 logger = logging.getLogger(__name__)
 
-from urllib.parse import urlencode
+from urllib.parse import urlencode, urlparse, parse_qs
+
 
 @pytest.fixture
 def app():
@@ -39,6 +40,131 @@ def test_posix_download_file_extra_annotations(client):
     r = client.get('/api/v1.0/get/testposixpath_extra_annotated', query_string={'fits_file_path': 'https://fits.gsfc.nasa.gov/samples/testkeys.fits'})
     assert r.json['output']['output_file_download'] == 'file downloaded successfully'
 
+@pytest.mark.parametrize("query_string_fits_file_path", ["generic_url", "mmoda_url", None])
+def test_file_reference(client, query_string_fits_file_path):
+    status_callback_file = "status.json"
+    callback_url = 'file://' + status_callback_file
+    token = 'abc123'
+    query_string = dict(
+        _async_request='no',
+        _async_request_callback=callback_url,
+        _token=token)
+
+    fits_file_path_value = None
+    if query_string_fits_file_path == "generic_url":
+        fits_file_path_value = "https://fits.gsfc.nasa.gov/samples/testkeys.fits"
+    elif query_string_fits_file_path == "mmoda_url":
+        fits_file_path_value = "https://www.astro.unige.ch/mmoda/dispatch-data/test.fits"
+
+    query_string['fits_file_path'] = fits_file_path_value
+
+    r = client.get(f'/api/v1.0/get/testfilereference_extra_annotated', query_string=query_string)
+    assert r.status_code == 201
+
+    from nb2workflow.service import AsyncWorker
+
+    def test_worker_run():
+        AsyncWorker('test-worker').run_one()
+
+    test_worker_thread = threading.Thread(target=test_worker_run)
+    test_worker_thread.start()
+
+    while True:
+        options = client.get('/api/v1.0/options')
+        assert options.status_code == 200
+
+        r = client.get(f'/api/v1.0/get/testfilereference_extra_annotated',
+                       query_string=query_string)
+
+        logger.info('service returns %s %s', r, r.json)
+
+        if r.json['workflow_status'] == 'done':
+            logger.info('workflow done!')
+            break
+
+        time.sleep(0.1)
+
+    test_worker_thread.join()
+    assert 'data' in r.json
+    assert 'output' in r.json['data']
+    assert 'fits_file_path_modified' in r.json['data']['output']
+
+    if query_string_fits_file_path is None:
+        assert r.json['data']['output']['fits_file_path_modified'] == "/home/local/test.fits"
+    elif query_string_fits_file_path == "mmoda_url":
+        url_parts = urlparse(r.json['data']['output']['fits_file_path_modified'])
+        url_args = parse_qs(url_parts.query)
+        assert 'token' in url_args
+    elif query_string_fits_file_path == "generic_url":
+        url_parts = urlparse(r.json['data']['output']['fits_file_path_modified'])
+        url_args = parse_qs(url_parts.query)
+        assert 'token' not in url_args
+
+@pytest.mark.parametrize("query_string_fits_file_url", ["generic_url", "file_path", "numeric", None])
+def test_mmoda_file_url(client, query_string_fits_file_url):
+    status_callback_file = "status.json"
+    callback_url = 'file://' + status_callback_file
+    token = 'abc123'
+    query_string = dict(
+        _async_request='no',
+        _async_request_callback=callback_url,
+        _token=token)
+
+    fits_file_url_value = None
+    if query_string_fits_file_url == "generic_url":
+        fits_file_url_value = "https://fits.gsfc.nasa.gov/samples/testkeys.fits"
+    elif query_string_fits_file_url == "file_path":
+        fits_file_url_value = "/home/local/test.fits"
+    elif query_string_fits_file_url == "numeric":
+        fits_file_url_value = 123456
+
+    query_string['fits_file_url'] = fits_file_url_value
+
+    r = client.get(f'/api/v1.0/get/testfileurl_extra_annotated', query_string=query_string)
+
+    assert r.status_code == 201
+
+    from nb2workflow.service import AsyncWorker
+
+    def test_worker_run():
+        AsyncWorker('test-worker').run_one()
+
+    test_worker_thread = threading.Thread(target=test_worker_run)
+    test_worker_thread.start()
+
+    while True:
+        options = client.get('/api/v1.0/options')
+        assert options.status_code == 200
+
+        r = client.get(f'/api/v1.0/get/testfileurl_extra_annotated',
+                       query_string=query_string)
+
+        logger.info('service returns %s %s', r, r.json)
+
+        if r.json['workflow_status'] == 'done':
+            logger.info('workflow done!')
+            break
+
+        time.sleep(0.1)
+
+    test_worker_thread.join()
+    assert 'data' in r.json
+    assert 'output' in r.json['data']
+    if query_string_fits_file_url != "file_path" and query_string_fits_file_url != "numeric":
+        assert 'mmoda_url_modified' in r.json['data']['output']
+        url_parts = urlparse(r.json['data']['output']['mmoda_url_modified'])
+        url_args = parse_qs(url_parts.query)
+        assert 'token' in url_args
+
+        assert 'fits_file_url_modified' in r.json['data']['output']
+        url_parts = urlparse(r.json['data']['output']['fits_file_url_modified'])
+        url_args = parse_qs(url_parts.query)
+        assert 'token' not in url_args
+
+    else:
+        assert r.json['data']['output'] == 'incomplete'
+        assert r.json['data']['exceptions'][0]['edump'] == f"ValueError('Parameter fits_file_url value \"{fits_file_url_value}\" can not be interpreted as FileURL.')"
+
 def test_posix_download_file_with_arg_low_download_limit(client, app_low_download_limit):
     r = client.get('/api/v1.0/get/testposixpath', query_string={'fits_file_path': 'https://fits.gsfc.nasa.gov/samples/testkeys.fits'})
     assert r.json['output'] == {}
diff --git a/tests/test_nbadapter.py b/tests/test_nbadapter.py
index 77ffaaf..23d352a 100644
--- a/tests/test_nbadapter.py
+++ b/tests/test_nbadapter.py
@@ -84,7 +84,7 @@ def test_find_notebooks(caplog):
     assert 'Ignoring pattern.' in caplog.text
     
     nbas = find_notebooks(nb_dir)
-    assert len(nbas) == 9
+    assert len(nbas) == 11
     
     nbas = find_notebooks(nb_dir, pattern=r'.*bool')
     assert len(nbas) == 1
diff --git a/tests/testfiles/testfilereference_extra_annotated.ipynb b/tests/testfiles/testfilereference_extra_annotated.ipynb
new file mode 100644
index 0000000..d7eaf3c
--- /dev/null
+++ b/tests/testfiles/testfilereference_extra_annotated.ipynb
@@ -0,0 +1,63 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": [
+     "parameters"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "fits_file_path = \"/home/local/test.fits\" # oda:FileReference ; oda:label \"Test file path\" ; oda:description \"Description test file path\" ."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fits_file_path_modified = fits_file_path"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": [
+     "outputs"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "fits_file_path_modified"
+   ]
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "767d51c1340bd893661ea55ea3124f6de3c7a262a8b4abca0554b478b1e2ff90"
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.15"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/tests/testfiles/testfileurl_extra_annotated.ipynb b/tests/testfiles/testfileurl_extra_annotated.ipynb
new file mode 100644
index 0000000..f7e75fd
--- /dev/null
+++ b/tests/testfiles/testfileurl_extra_annotated.ipynb
@@ -0,0 +1,67 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": [
+     "parameters"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "# oda:oda_token_access oda:InOdaContext .\n",
+    "fits_file_url = \"https://fits.gsfc.nasa.gov/samples/testkeys.fits\" # oda:FileURL ; oda:label \"Test url\" ; oda:description \"Description test url\" .\n",
+    "mmoda_url = \"https://www.astro.unige.ch/mmoda/dispatch-data/test.fits\"  # oda:FileURL ; oda:label \"Test mmoda url\" ; oda:description \"Description mmoda url\" ."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fits_file_url_modified = fits_file_url\n",
+    "mmoda_url_modified = mmoda_url"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": [
+     "outputs"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "fits_file_url_modified\n",
+    "mmoda_url_modified"
+   ]
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "767d51c1340bd893661ea55ea3124f6de3c7a262a8b4abca0554b478b1e2ff90"
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.15"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}