Skip to content

Commit

Permalink
update comments
Browse files Browse the repository at this point in the history
  • Loading branch information
cg505 committed Dec 8, 2024
1 parent 0036d26 commit a63eaf5
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 4 deletions.
5 changes: 2 additions & 3 deletions sky/backends/backend_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1839,9 +1839,8 @@ def run_ray_status_to_check_ray_cluster_healthy() -> bool:
if (not node_statuses and handle.launched_resources.cloud.STATUS_VERSION >=
clouds.StatusVersion.SKYPILOT):
# Note: launched_at is set during sky launch, even on an existing
# cluster. This could cause extra checks if the cluster was already up
# before sky launch, but it will catch the case where the cluster was
# terminated on the cloud and restarted by sky launch.
# cluster. This will catch the case where the cluster was terminated on
# the cloud and restarted by sky launch.
time_since_launch = time.time() - record['launched_at']
if (record['status'] == status_lib.ClusterStatus.INIT and
time_since_launch < _LAUNCH_DOUBLE_CHECK_WINDOW):
Expand Down
9 changes: 8 additions & 1 deletion sky/backends/cloud_vm_ray_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -4133,7 +4133,14 @@ def post_teardown_cleanup(self,
backend_utils.SSHConfigHelper.remove_cluster(handle.cluster_name)

# Confirm that instances have actually transitioned state before
# updating the state database.
# updating the state database. We do this immediately before removing
# the state from the database, so that we can guarantee that this is
# always called before the state is removed. We considered running this
# check as part of provisioner.teardown_cluster or
# provision.terminate_instances, but it would open the door code paths
# that successfully call this function but do not first call
# teardown_cluster or terminate_instances. See
# https://github.com/skypilot-org/skypilot/pull/4443#discussion_r1872798032
attempts = 0
while True:
logger.debug(f'instance statuses attempt {attempts + 1}')
Expand Down

0 comments on commit a63eaf5

Please sign in to comment.