From 3fc0d89ca38f0561bc51c5da8601bba61694b52c Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Wed, 22 May 2024 14:59:28 -0700 Subject: [PATCH 1/4] quick patch --- llmfoundry/utils/config_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llmfoundry/utils/config_utils.py b/llmfoundry/utils/config_utils.py index 6010b19b6f..ec4be0e567 100644 --- a/llmfoundry/utils/config_utils.py +++ b/llmfoundry/utils/config_utils.py @@ -589,7 +589,7 @@ def _process_data_source( ('uc_volume', source_dataset_path[len('dbfs:'):], true_split), ) # Check for HF path - elif 'hf_name' in dataset: + elif 'hf_name' in dataset and dataset['hf_name']: hf_path = dataset['hf_name'] backend, _, _ = parse_uri(hf_path) if backend: @@ -600,7 +600,7 @@ def _process_data_source( else: data_paths.append(('hf', hf_path, true_split)) # Check for remote path - elif 'remote' in dataset: + elif 'remote' in dataset and dataset['remote']: remote_path = dataset['remote'] backend, _, _ = parse_uri(remote_path) if backend: From dda8e118f0352c24ffae7052fdc27e8cb050687b Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Wed, 22 May 2024 15:07:10 -0700 Subject: [PATCH 2/4] also seperately check for local path --- llmfoundry/utils/config_utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llmfoundry/utils/config_utils.py b/llmfoundry/utils/config_utils.py index ec4be0e567..6b5e803cc9 100644 --- a/llmfoundry/utils/config_utils.py +++ b/llmfoundry/utils/config_utils.py @@ -610,7 +610,11 @@ def _process_data_source( ) if cfg_split else remote_path data_paths.append((backend, remote_path, true_split)) else: + # No backend detected so assume local path data_paths.append(('local', remote_path, true_split)) + # Check for remote path + elif 'local' in dataset and dataset['local']: + data_paths.append(('local', remote_path, true_split)) else: log.warning('DataSource Not Found.') From d04036d02578f3b7af34cd959b651d9c8e567faf Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Wed, 22 May 2024 15:07:26 -0700 Subject: [PATCH 3/4] typo --- llmfoundry/utils/config_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llmfoundry/utils/config_utils.py b/llmfoundry/utils/config_utils.py index 6b5e803cc9..8ac8d2afa7 100644 --- a/llmfoundry/utils/config_utils.py +++ b/llmfoundry/utils/config_utils.py @@ -612,7 +612,7 @@ def _process_data_source( else: # No backend detected so assume local path data_paths.append(('local', remote_path, true_split)) - # Check for remote path + # Check for local path elif 'local' in dataset and dataset['local']: data_paths.append(('local', remote_path, true_split)) else: From 27304c40e6fe9b09b45fb2e3f4b5a8de6f0372a0 Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Wed, 22 May 2024 15:25:35 -0700 Subject: [PATCH 4/4] typo --- llmfoundry/utils/config_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llmfoundry/utils/config_utils.py b/llmfoundry/utils/config_utils.py index 8ac8d2afa7..72ca19834b 100644 --- a/llmfoundry/utils/config_utils.py +++ b/llmfoundry/utils/config_utils.py @@ -614,7 +614,7 @@ def _process_data_source( data_paths.append(('local', remote_path, true_split)) # Check for local path elif 'local' in dataset and dataset['local']: - data_paths.append(('local', remote_path, true_split)) + data_paths.append(('local', dataset['local'], true_split)) else: log.warning('DataSource Not Found.')