mosaicml · KuuCi · Dec 13, 2024 · Dec 12, 2024 · Dec 13, 2024 · Dec 13, 2024
@@ -6,6 +6,7 @@
 import logging
 import math
 import os
+import re
 import warnings
 from dataclasses import dataclass, fields
 from typing import (
@@ -703,6 +704,8 @@ def _process_data_source(
         true_split (str): The split of the dataset to be added (i.e. train or eval)
         data_paths (List[Tuple[str, str, str]]): A list of tuples formatted as (data type, path, split)
     """
+    if source_dataset_path:
+        source_dataset_path = re.sub(r'/+', '/', source_dataset_path)
     # Check for Delta table
     if source_dataset_path and len(source_dataset_path.split('.')) == 3:
         data_paths.append(('delta_table', source_dataset_path, true_split))
@@ -788,7 +791,6 @@ def log_dataset_uri(cfg: dict[str, Any]) -> None:
 
     # Map data source types to their respective MLFlow DataSource.
     for dataset_type, path, split in data_paths:
-
         if dataset_type in dataset_source_mapping:
             source_class = dataset_source_mapping[dataset_type]
             if dataset_type == 'delta_table':