diff --git a/ray-operator/controllers/ray/raycluster_controller.go b/ray-operator/controllers/ray/raycluster_controller.go index c64d9137cde..d69e1ee4ad2 100644 --- a/ray-operator/controllers/ray/raycluster_controller.go +++ b/ray-operator/controllers/ray/raycluster_controller.go @@ -1239,7 +1239,7 @@ func (r *RayClusterReconciler) buildRedisCleanupJob(ctx context.Context, instanc if utils.EnvVarExists(utils.REDIS_USERNAME, pod.Spec.Containers[utils.RayContainerIndex].Env) { pod.Spec.Containers[utils.RayContainerIndex].Args[0] += "sys.exit(1) if not cleanup_redis_storage(host=parsed.hostname, port=parsed.port, username=os.getenv('REDIS_USERNAME', parsed.username), password=os.getenv('REDIS_PASSWORD', parsed.password), use_ssl=parsed.scheme=='rediss', storage_namespace=os.getenv('RAY_external_storage_namespace')) else None\"" } else { - pod.Spec.Containers[utils.RayContainerIndex].Args[0] += "sys.exit(1) if not cleanup_redis_storage(host=parsed.hostname, port=parsed.port, password=os.getenv('REDIS_PASSWORD', parsed.password), use_ssl=parsed.scheme=='rediss', storage_namespace=os.getenv('RAY_external_storage_namespace')) else None\"" + pod.Spec.Containers[utils.RayContainerIndex].Args[0] += "sys.exit(1) if not cleanup_redis_storage(host=parsed.hostname, port=parsed.port, password=os.getenv('REDIS_PASSWORD', parsed.password or ''), use_ssl=parsed.scheme=='rediss', storage_namespace=os.getenv('RAY_external_storage_namespace')) else None\"" } // Disable liveness and readiness probes because the Job will not launch processes like Raylet and GCS. diff --git a/ray-operator/test/e2e/raycluster_gcsft_test.go b/ray-operator/test/e2e/raycluster_gcsft_test.go index 143c50ba512..58cc892cae9 100644 --- a/ray-operator/test/e2e/raycluster_gcsft_test.go +++ b/ray-operator/test/e2e/raycluster_gcsft_test.go @@ -2,6 +2,7 @@ package e2e import ( "testing" + "time" . "github.com/onsi/gomega" v1 "k8s.io/api/core/v1" @@ -81,7 +82,8 @@ func TestGcsFaultToleranceOptions(t *testing.T) { g := NewWithT(t) namespace := test.NewTestNamespace() - deployRedis(test, namespace.Name, tc.redisPassword) + checkRedisDBSize := deployRedis(test, namespace.Name, tc.redisPassword) + defer g.Eventually(checkRedisDBSize, time.Second*30, time.Second).Should(BeEquivalentTo("0")) if tc.createSecret { test.T().Logf("Creating Redis password secret") @@ -116,6 +118,9 @@ func TestGcsFaultToleranceOptions(t *testing.T) { } else { g.Expect(utils.EnvVarExists(utils.REDIS_PASSWORD, headPod.Spec.Containers[utils.RayContainerIndex].Env)).Should(BeTrue()) } + + err = test.Client().Ray().RayV1().RayClusters(namespace.Name).Delete(test.Ctx(), rayCluster.Name, metav1.DeleteOptions{}) + g.Expect(err).NotTo(HaveOccurred()) }) } } @@ -171,7 +176,8 @@ func TestGcsFaultToleranceAnnotations(t *testing.T) { redisPassword = tc.redisPasswordInRayStartParams } - deployRedis(test, namespace.Name, redisPassword) + checkRedisDBSize := deployRedis(test, namespace.Name, redisPassword) + defer g.Eventually(checkRedisDBSize, time.Second*30, time.Second).Should(BeEquivalentTo("0")) // Prepare RayCluster ApplyConfiguration podTemplateAC := headPodTemplateApplyConfiguration() @@ -224,6 +230,9 @@ func TestGcsFaultToleranceAnnotations(t *testing.T) { } else { g.Expect(utils.EnvVarExists(utils.REDIS_PASSWORD, headPod.Spec.Containers[utils.RayContainerIndex].Env)).Should(BeTrue()) } + + err = test.Client().Ray().RayV1().RayClusters(namespace.Name).Delete(test.Ctx(), rayCluster.Name, metav1.DeleteOptions{}) + g.Expect(err).NotTo(HaveOccurred()) }) } } diff --git a/ray-operator/test/e2e/support.go b/ray-operator/test/e2e/support.go index 7ea97c8f58b..2dec0d953df 100644 --- a/ray-operator/test/e2e/support.go +++ b/ray-operator/test/e2e/support.go @@ -2,13 +2,12 @@ package e2e import ( "embed" + "strings" "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" - appsv1ac "k8s.io/client-go/applyconfigurations/apps/v1" corev1ac "k8s.io/client-go/applyconfigurations/core/v1" - metav1ac "k8s.io/client-go/applyconfigurations/meta/v1" rayv1ac "github.com/ray-project/kuberay/ray-operator/pkg/client/applyconfiguration/ray/v1" . "github.com/ray-project/kuberay/ray-operator/test/support" @@ -177,26 +176,20 @@ func jobSubmitterPodTemplateApplyConfiguration() *corev1ac.PodTemplateSpecApplyC })))) } -func deployRedis(t Test, namespace string, password string) { +func deployRedis(t Test, namespace string, password string) func() string { redisContainer := corev1ac.Container().WithName("redis").WithImage("redis:7.4"). WithPorts(corev1ac.ContainerPort().WithContainerPort(6379)) + dbSizeCmd := []string{"redis-cli", "--no-auth-warning", "DBSIZE"} if password != "" { redisContainer.WithCommand("redis-server", "--requirepass", password) + dbSizeCmd = []string{"redis-cli", "--no-auth-warning", "-a", password, "DBSIZE"} } - _, err := t.Client().Core().AppsV1().Deployments(namespace).Apply( + pod, err := t.Client().Core().CoreV1().Pods(namespace).Apply( t.Ctx(), - appsv1ac.Deployment("redis", namespace). - WithSpec(appsv1ac.DeploymentSpec(). - WithReplicas(1). - WithSelector(metav1ac.LabelSelector().WithMatchLabels(map[string]string{"app": "redis"})). - WithTemplate(corev1ac.PodTemplateSpec(). - WithLabels(map[string]string{"app": "redis"}). - WithSpec(corev1ac.PodSpec(). - WithContainers(redisContainer), - ), - ), - ), + corev1ac.Pod("redis", namespace). + WithLabels(map[string]string{"app": "redis"}). + WithSpec(corev1ac.PodSpec().WithContainers(redisContainer)), TestApplyOptions, ) assert.NoError(t.T(), err) @@ -213,4 +206,9 @@ func deployRedis(t Test, namespace string, password string) { TestApplyOptions, ) assert.NoError(t.T(), err) + + return func() string { + stdout, stderr := ExecPodCmd(t, pod, "redis", dbSizeCmd) + return strings.TrimSpace(stdout.String() + stderr.String()) + } }