From b70c520c1729c2bc5c2b7ac6e1d21bd33fab3f53 Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Fri, 15 Mar 2024 18:23:29 +0100 Subject: [PATCH 1/8] test to show vulnerability --- tests/test_server_basic.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/test_server_basic.py b/tests/test_server_basic.py index be39ae94..d5b14506 100644 --- a/tests/test_server_basic.py +++ b/tests/test_server_basic.py @@ -439,6 +439,42 @@ def test_download_products_public(dispatcher_long_living_fixture, empty_products assert data_downloaded == empty_products_files_fixture['content'] +@pytest.mark.fast +def test_download_products_outside_dir(dispatcher_long_living_fixture, empty_products_files_fixture): + server = dispatcher_long_living_fixture + + logger.info("constructed server: %s", server) + + session_id = empty_products_files_fixture['session_id'] + job_id = empty_products_files_fixture['job_id'] + + with open('external_file', 'w') as fd: + fd.write('confidential') + + params = { + 'instrument': 'any_name', + # since we are passing a job_id + 'query_status': 'ready', + 'file_list': '../external_file', + 'download_file_name': 'output_test', + 'session_id': session_id, + 'job_id': job_id + } + + c = requests.get(server + "/download_products", + params=params) + + assert c.status_code == 200 + + # download the output, read it and then compare it + with open(f'scratch_sid_{session_id}_jid_{job_id}/output_test', 'wb') as fout: + fout.write(c.content) + + with gzip.open(f'scratch_sid_{session_id}_jid_{job_id}/output_test', 'rt') as fout: + data_downloaded = fout.read() + + assert data_downloaded == 'confidential' + def test_query_restricted_instrument(dispatcher_live_fixture): server = dispatcher_live_fixture From daad103f80378a85a0a78afa2c487aae6bbc7e28 Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Fri, 15 Mar 2024 18:42:06 +0100 Subject: [PATCH 2/8] check file is in session dir --- cdci_data_analysis/flask_app/dispatcher_query.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/cdci_data_analysis/flask_app/dispatcher_query.py b/cdci_data_analysis/flask_app/dispatcher_query.py index 2bc93744..1bcd1f5f 100644 --- a/cdci_data_analysis/flask_app/dispatcher_query.py +++ b/cdci_data_analysis/flask_app/dispatcher_query.py @@ -922,7 +922,14 @@ def prepare_download(self, file_list, file_name, scratch_dir): file_list = [file_list] for ID, f in enumerate(file_list): - file_list[ID] = os.path.join(scratch_dir + '/', f) + file_list[ID] = os.path.join(scratch_dir, f) + # check if the file is in the scratch_dir (i.e. f didn't contain ..) + scratch_abs = os.path.abspath(scratch_dir) + file_abs = os.path.abspath(file_list[ID]) + + if (os.path.commonpath(scratch_abs) != os.path.commonpath([scratch_abs, file_abs]) + or not os.path.isfile(file_abs)): + raise RequestNotAuthorized('No such file') tmp_dir = tempfile.mkdtemp(prefix='download_', dir='./') @@ -1055,6 +1062,7 @@ def download_products(self): self.validate_job_id(request_parameters_from_scratch_dir=True) file_list = self.args.get('file_list').split(',') + file_name = self.args.get('download_file_name') tmp_dir, target_file = self.prepare_download( From b2ec7d38b0250080e790d99230c5b2aa51f8a80f Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Fri, 15 Mar 2024 18:47:12 +0100 Subject: [PATCH 3/8] also prevent relative in args --- cdci_data_analysis/flask_app/dispatcher_query.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cdci_data_analysis/flask_app/dispatcher_query.py b/cdci_data_analysis/flask_app/dispatcher_query.py index 1bcd1f5f..3b92ccc4 100644 --- a/cdci_data_analysis/flask_app/dispatcher_query.py +++ b/cdci_data_analysis/flask_app/dispatcher_query.py @@ -1062,7 +1062,10 @@ def download_products(self): self.validate_job_id(request_parameters_from_scratch_dir=True) file_list = self.args.get('file_list').split(',') - + for fn in file_list: + if f"../" is fn: + raise RequestNotAuthorized('No such file') + file_name = self.args.get('download_file_name') tmp_dir, target_file = self.prepare_download( From ed41f0318f55d0cac76c00baf858f67043acbe94 Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Fri, 15 Mar 2024 19:18:20 +0100 Subject: [PATCH 4/8] adapt test --- .../flask_app/dispatcher_query.py | 2 +- tests/test_server_basic.py | 34 +++++++++++-------- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/cdci_data_analysis/flask_app/dispatcher_query.py b/cdci_data_analysis/flask_app/dispatcher_query.py index 3b92ccc4..8732cba1 100644 --- a/cdci_data_analysis/flask_app/dispatcher_query.py +++ b/cdci_data_analysis/flask_app/dispatcher_query.py @@ -927,7 +927,7 @@ def prepare_download(self, file_list, file_name, scratch_dir): scratch_abs = os.path.abspath(scratch_dir) file_abs = os.path.abspath(file_list[ID]) - if (os.path.commonpath(scratch_abs) != os.path.commonpath([scratch_abs, file_abs]) + if (os.path.commonpath([scratch_abs]) != os.path.commonpath([scratch_abs, file_abs]) or not os.path.isfile(file_abs)): raise RequestNotAuthorized('No such file') diff --git a/tests/test_server_basic.py b/tests/test_server_basic.py index d5b14506..91abfd21 100644 --- a/tests/test_server_basic.py +++ b/tests/test_server_basic.py @@ -1,7 +1,7 @@ import re import shutil import urllib - +import io import requests import time import uuid @@ -448,8 +448,8 @@ def test_download_products_outside_dir(dispatcher_long_living_fixture, empty_pro session_id = empty_products_files_fixture['session_id'] job_id = empty_products_files_fixture['job_id'] - with open('external_file', 'w') as fd: - fd.write('confidential') + with open('external_file', 'w') as outb: + outb.write('__confidential__') params = { 'instrument': 'any_name', @@ -464,17 +464,23 @@ def test_download_products_outside_dir(dispatcher_long_living_fixture, empty_pro c = requests.get(server + "/download_products", params=params) - assert c.status_code == 200 - - # download the output, read it and then compare it - with open(f'scratch_sid_{session_id}_jid_{job_id}/output_test', 'wb') as fout: - fout.write(c.content) - - with gzip.open(f'scratch_sid_{session_id}_jid_{job_id}/output_test', 'rt') as fout: - data_downloaded = fout.read() - - assert data_downloaded == 'confidential' - + assert c.status_code in [403, 500] + + # check the output anyway + assert b"__confidential__" not in c.content + if hasattr(c, 'text'): + assert "__confidential__" not in c.text + + with io.BytesIO() as outb: + outb.write(c.content) + outb.seek(0) + gz = gzip.GzipFile(fileobj=outb, mode='rb') + with pytest.raises(gzip.BadGzipFile): + gz.read() + try: + os.remove('external_file') + except: + pass def test_query_restricted_instrument(dispatcher_live_fixture): server = dispatcher_live_fixture From d70a9e52f96bdb0fd628ab983500a5d743bc632a Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Fri, 15 Mar 2024 19:22:57 +0100 Subject: [PATCH 5/8] typo --- cdci_data_analysis/flask_app/dispatcher_query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdci_data_analysis/flask_app/dispatcher_query.py b/cdci_data_analysis/flask_app/dispatcher_query.py index 8732cba1..a8412ee8 100644 --- a/cdci_data_analysis/flask_app/dispatcher_query.py +++ b/cdci_data_analysis/flask_app/dispatcher_query.py @@ -1063,7 +1063,7 @@ def download_products(self): self.validate_job_id(request_parameters_from_scratch_dir=True) file_list = self.args.get('file_list').split(',') for fn in file_list: - if f"../" is fn: + if f"../" in fn: raise RequestNotAuthorized('No such file') file_name = self.args.get('download_file_name') From e1f0f3d21fbb6212261f45af583086a0832215df Mon Sep 17 00:00:00 2001 From: Denys Savchenko <56398430+dsavchenko@users.noreply.github.com> Date: Fri, 15 Mar 2024 19:55:42 +0100 Subject: [PATCH 6/8] unneded f --- cdci_data_analysis/flask_app/dispatcher_query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdci_data_analysis/flask_app/dispatcher_query.py b/cdci_data_analysis/flask_app/dispatcher_query.py index a8412ee8..2ca3116b 100644 --- a/cdci_data_analysis/flask_app/dispatcher_query.py +++ b/cdci_data_analysis/flask_app/dispatcher_query.py @@ -1063,7 +1063,7 @@ def download_products(self): self.validate_job_id(request_parameters_from_scratch_dir=True) file_list = self.args.get('file_list').split(',') for fn in file_list: - if f"../" in fn: + if "../" in fn: raise RequestNotAuthorized('No such file') file_name = self.args.get('download_file_name') From a9b511ef5dbf5a5c8f092ee316f2f91954e31560 Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Fri, 15 Mar 2024 21:40:31 +0100 Subject: [PATCH 7/8] better checks of file paths --- .../flask_app/dispatcher_query.py | 27 ++++++---- tests/test_server_basic.py | 53 +++++++++++-------- 2 files changed, 49 insertions(+), 31 deletions(-) diff --git a/cdci_data_analysis/flask_app/dispatcher_query.py b/cdci_data_analysis/flask_app/dispatcher_query.py index 2ca3116b..c03247ff 100644 --- a/cdci_data_analysis/flask_app/dispatcher_query.py +++ b/cdci_data_analysis/flask_app/dispatcher_query.py @@ -913,6 +913,21 @@ def clear_temp_dir(self, temp_scratch_dir=None): if temp_scratch_dir is not None and temp_scratch_dir != self.scratch_dir and os.path.exists(temp_scratch_dir): shutil.rmtree(temp_scratch_dir) + @staticmethod + def validated_download_file_path(basepath, filename, should_exist=True): + # basic arg validation + if "../" in filename or filename.startswith(os.sep): + raise RequestNotAuthorized('No such file') + + # still explicitly check if the file is in the dir + base_abs = os.path.realpath(basepath) + file_abs = os.path.realpath(os.path.join(basepath, filename)) + + if (os.path.commonpath([base_abs]) != os.path.commonpath([base_abs, file_abs]) + or (should_exist and not os.path.isfile(file_abs)) ): + raise RequestNotAuthorized('No such file') + return file_abs + def prepare_download(self, file_list, file_name, scratch_dir): file_name = file_name.replace(' ', '_') @@ -922,25 +937,17 @@ def prepare_download(self, file_list, file_name, scratch_dir): file_list = [file_list] for ID, f in enumerate(file_list): - file_list[ID] = os.path.join(scratch_dir, f) - # check if the file is in the scratch_dir (i.e. f didn't contain ..) - scratch_abs = os.path.abspath(scratch_dir) - file_abs = os.path.abspath(file_list[ID]) - - if (os.path.commonpath([scratch_abs]) != os.path.commonpath([scratch_abs, file_abs]) - or not os.path.isfile(file_abs)): - raise RequestNotAuthorized('No such file') + file_list[ID] = self.validated_download_file_path(scratch_dir, f) tmp_dir = tempfile.mkdtemp(prefix='download_', dir='./') - file_path = os.path.join(tmp_dir, file_name) + file_path = self.validated_download_file_path(tmp_dir, file_name, should_exist=False) out_dir = file_name.replace('.tar', '') out_dir = out_dir.replace('.gz', '') if len(file_list) > 1: tar = tarfile.open("%s" % (file_path), "w:gz") for name in file_list: - #print('add to tar', file_name,name) if name is not None: tar.add(name, arcname='%s/%s' % (out_dir, os.path.basename(name))) diff --git a/tests/test_server_basic.py b/tests/test_server_basic.py index 91abfd21..535a87c7 100644 --- a/tests/test_server_basic.py +++ b/tests/test_server_basic.py @@ -440,23 +440,30 @@ def test_download_products_public(dispatcher_long_living_fixture, empty_products assert data_downloaded == empty_products_files_fixture['content'] @pytest.mark.fast -def test_download_products_outside_dir(dispatcher_long_living_fixture, empty_products_files_fixture): +@pytest.mark.parametrize('filelist', ['../external_file', '/tmp/external_file', 'test.fits.gz']) +@pytest.mark.parametrize('outname', ['/tmp/output_test', '../output_test', 'output_test']) +def test_download_products_outside_dir(dispatcher_long_living_fixture, + empty_products_files_fixture, + filelist, + outname): server = dispatcher_long_living_fixture + is_good = True if filelist == 'test.fits.gz' and outname == 'output_test' else False logger.info("constructed server: %s", server) session_id = empty_products_files_fixture['session_id'] job_id = empty_products_files_fixture['job_id'] - with open('external_file', 'w') as outb: - outb.write('__confidential__') + if not is_good: + with open(filelist.replace('../', ''), 'w') as outb: + outb.write('__confidential__') params = { 'instrument': 'any_name', # since we are passing a job_id 'query_status': 'ready', - 'file_list': '../external_file', - 'download_file_name': 'output_test', + 'file_list': filelist, + 'download_file_name': outname, 'session_id': session_id, 'job_id': job_id } @@ -464,23 +471,27 @@ def test_download_products_outside_dir(dispatcher_long_living_fixture, empty_pro c = requests.get(server + "/download_products", params=params) - assert c.status_code in [403, 500] - - # check the output anyway - assert b"__confidential__" not in c.content - if hasattr(c, 'text'): - assert "__confidential__" not in c.text + if is_good: + assert c.status_code == 200 + # further checks in previous test + else: + assert c.status_code == 403 - with io.BytesIO() as outb: - outb.write(c.content) - outb.seek(0) - gz = gzip.GzipFile(fileobj=outb, mode='rb') - with pytest.raises(gzip.BadGzipFile): - gz.read() - try: - os.remove('external_file') - except: - pass + # check the output anyway + assert b"__confidential__" not in c.content + if hasattr(c, 'text'): + assert "__confidential__" not in c.text + + with io.BytesIO() as outb: + outb.write(c.content) + outb.seek(0) + gz = gzip.GzipFile(fileobj=outb, mode='rb') + with pytest.raises(gzip.BadGzipFile): + gz.read() + try: + os.remove(filelist.replace('../', '')) + except: + pass def test_query_restricted_instrument(dispatcher_live_fixture): server = dispatcher_live_fixture From 66595471fa969783d473dd02a3d2152a060792a0 Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Fri, 15 Mar 2024 21:50:49 +0100 Subject: [PATCH 8/8] remove duplication --- cdci_data_analysis/flask_app/dispatcher_query.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/cdci_data_analysis/flask_app/dispatcher_query.py b/cdci_data_analysis/flask_app/dispatcher_query.py index c03247ff..3c9244d0 100644 --- a/cdci_data_analysis/flask_app/dispatcher_query.py +++ b/cdci_data_analysis/flask_app/dispatcher_query.py @@ -1069,9 +1069,6 @@ def download_products(self): self.validate_job_id(request_parameters_from_scratch_dir=True) file_list = self.args.get('file_list').split(',') - for fn in file_list: - if "../" in fn: - raise RequestNotAuthorized('No such file') file_name = self.args.get('download_file_name')