diff --git a/sky/backends/wheel_utils.py b/sky/backends/wheel_utils.py index 44d47b52926..ed580569e0b 100644 --- a/sky/backends/wheel_utils.py +++ b/sky/backends/wheel_utils.py @@ -129,7 +129,11 @@ def _build_sky_wheel() -> pathlib.Path: wheel_dir = WHEEL_DIR / hash_of_latest_wheel wheel_dir.mkdir(parents=True, exist_ok=True) - shutil.move(str(wheel_path), wheel_dir) + # shutil.move will fail when the file already exists and is being + # moved across filesystems. + if not os.path.exists( + os.path.join(wheel_dir, os.path.basename(wheel_path))): + shutil.move(str(wheel_path), wheel_dir) return wheel_dir / wheel_path.name diff --git a/sky/utils/command_runner.py b/sky/utils/command_runner.py index 7eae76040d8..d373df20d16 100644 --- a/sky/utils/command_runner.py +++ b/sky/utils/command_runner.py @@ -11,6 +11,7 @@ from sky.skylet import constants from sky.skylet import log_lib from sky.utils import common_utils +from sky.utils import control_master_utils from sky.utils import subprocess_utils from sky.utils import timeline @@ -442,7 +443,9 @@ def __init__( None if ssh_control_name is None else hashlib.md5( ssh_control_name.encode()).hexdigest()[:_HASH_MAX_LENGTH]) self._ssh_proxy_command = ssh_proxy_command - self.disable_control_master = disable_control_master + self.disable_control_master = ( + disable_control_master or + control_master_utils.should_disable_control_master()) if docker_user is not None: assert port is None or port == 22, ( f'port must be None or 22 for docker_user, got {port}.') diff --git a/sky/utils/control_master_utils.py b/sky/utils/control_master_utils.py new file mode 100644 index 00000000000..d645014c417 --- /dev/null +++ b/sky/utils/control_master_utils.py @@ -0,0 +1,49 @@ +"""Utils to check if the ssh control master should be disabled.""" + +import functools + +from sky import sky_logging +from sky.utils import subprocess_utils + +logger = sky_logging.init_logger(__name__) + + +def is_tmp_9p_filesystem() -> bool: + """Check if the /tmp filesystem is 9p. + + Returns: + bool: True if the /tmp filesystem is 9p, False otherwise. + """ + + result = subprocess_utils.run(['df', '-T', '/tmp'], + capture_output=True, + text=True, + shell=None, + check=False, + executable=None) + + if result.returncode != 0: + return False + + filesystem_infos = result.stdout.strip().split('\n') + if len(filesystem_infos) < 2: + return False + filesystem_types = filesystem_infos[1].split() + if len(filesystem_types) < 2: + return False + return filesystem_types[1].lower() == '9p' + + +@functools.lru_cache +def should_disable_control_master() -> bool: + """Whether disable ssh control master based on file system. + + Returns: + bool: True if the ssh control master should be disabled, + False otherwise. + """ + if is_tmp_9p_filesystem(): + return True + # there may be additional criteria to disable ssh control master + # in the future. They should be checked here + return False