Skip to content

Commit

Permalink
[Refactor] Move ValidateRayClusterSpec to validation.go and its `…
Browse files Browse the repository at this point in the history
…unit test` to `validation_test.go` (#2790)
  • Loading branch information
CheyuWu authored Jan 22, 2025
1 parent 20ed56f commit 8c53bd5
Show file tree
Hide file tree
Showing 6 changed files with 517 additions and 502 deletions.
3 changes: 3 additions & 0 deletions apiserver/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ require github.com/pmezard/go-difflib v1.0.0 // indirect
require (
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/blang/semver/v4 v4.0.0 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/emicklei/go-restful/v3 v3.12.1 // indirect
Expand Down Expand Up @@ -86,6 +87,8 @@ require (
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/apiextensions-apiserver v0.30.2 // indirect
k8s.io/apiserver v0.30.2 // indirect
k8s.io/component-base v0.30.2 // indirect
k8s.io/kube-openapi v0.0.0-20240620174524-b456828f718b // indirect
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
Expand Down
6 changes: 6 additions & 0 deletions apiserver/go.sum

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

73 changes: 1 addition & 72 deletions ray-operator/controllers/ray/raycluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,77 +211,6 @@ func (r *RayClusterReconciler) deleteAllPods(ctx context.Context, filters common
return pods, nil
}

// Validation for invalid Ray Cluster configurations.
func validateRayClusterSpec(instance *rayv1.RayCluster) error {
if len(instance.Spec.HeadGroupSpec.Template.Spec.Containers) == 0 {
return fmt.Errorf("headGroupSpec should have at least one container")
}

for _, workerGroup := range instance.Spec.WorkerGroupSpecs {
if len(workerGroup.Template.Spec.Containers) == 0 {
return fmt.Errorf("workerGroupSpec should have at least one container")
}
}

if instance.Annotations[utils.RayFTEnabledAnnotationKey] != "" && instance.Spec.GcsFaultToleranceOptions != nil {
return fmt.Errorf("%s annotation and GcsFaultToleranceOptions are both set. "+
"Please use only GcsFaultToleranceOptions to configure GCS fault tolerance", utils.RayFTEnabledAnnotationKey)
}

if !utils.IsGCSFaultToleranceEnabled(*instance) {
if utils.EnvVarExists(utils.RAY_REDIS_ADDRESS, instance.Spec.HeadGroupSpec.Template.Spec.Containers[utils.RayContainerIndex].Env) {
return fmt.Errorf("%s is set which implicitly enables GCS fault tolerance, "+
"but GcsFaultToleranceOptions is not set. Please set GcsFaultToleranceOptions "+
"to enable GCS fault tolerance", utils.RAY_REDIS_ADDRESS)
}
}

headContainer := instance.Spec.HeadGroupSpec.Template.Spec.Containers[utils.RayContainerIndex]
if instance.Spec.GcsFaultToleranceOptions != nil {
if redisPassword := instance.Spec.HeadGroupSpec.RayStartParams["redis-password"]; redisPassword != "" {
return fmt.Errorf("cannot set `redis-password` in rayStartParams when " +
"GcsFaultToleranceOptions is enabled - use GcsFaultToleranceOptions.RedisPassword instead")
}

if utils.EnvVarExists(utils.REDIS_PASSWORD, headContainer.Env) {
return fmt.Errorf("cannot set `REDIS_PASSWORD` env var in head Pod when " +
"GcsFaultToleranceOptions is enabled - use GcsFaultToleranceOptions.RedisPassword instead")
}

if utils.EnvVarExists(utils.RAY_REDIS_ADDRESS, headContainer.Env) {
return fmt.Errorf("cannot set `RAY_REDIS_ADDRESS` env var in head Pod when " +
"GcsFaultToleranceOptions is enabled - use GcsFaultToleranceOptions.RedisAddress instead")
}

if instance.Annotations[utils.RayExternalStorageNSAnnotationKey] != "" {
return fmt.Errorf("cannot set `ray.io/external-storage-namespace` annotation when " +
"GcsFaultToleranceOptions is enabled - use GcsFaultToleranceOptions.ExternalStorageNamespace instead")
}
}
if instance.Spec.HeadGroupSpec.RayStartParams["redis-username"] != "" || utils.EnvVarExists(utils.REDIS_USERNAME, headContainer.Env) {
return fmt.Errorf("cannot set redis username in rayStartParams or environment variables" +
" - use GcsFaultToleranceOptions.RedisUsername instead")
}

if !features.Enabled(features.RayJobDeletionPolicy) {
for _, workerGroup := range instance.Spec.WorkerGroupSpecs {
if workerGroup.Suspend != nil && *workerGroup.Suspend {
return fmt.Errorf("suspending worker groups is currently available when the RayJobDeletionPolicy feature gate is enabled")
}
}
}

if utils.IsAutoscalingEnabled(instance) {
for _, workerGroup := range instance.Spec.WorkerGroupSpecs {
if workerGroup.Suspend != nil && *workerGroup.Suspend {
// TODO (rueian): This can be supported in future Ray. We should check the RayVersion once we know the version.
return fmt.Errorf("suspending worker groups is not currently supported with Autoscaler enabled")
}
}
}
return nil
}

func (r *RayClusterReconciler) rayClusterReconcile(ctx context.Context, instance *rayv1.RayCluster) (ctrl.Result, error) {
var reconcileErr error
logger := ctrl.LoggerFrom(ctx)
Expand All @@ -291,7 +220,7 @@ func (r *RayClusterReconciler) rayClusterReconcile(ctx context.Context, instance
return ctrl.Result{}, nil
}

if err := validateRayClusterSpec(instance); err != nil {
if err := utils.ValidateRayClusterSpec(instance); err != nil {
logger.Error(err, fmt.Sprintf("The RayCluster spec is invalid %s/%s", instance.Namespace, instance.Name))
r.Recorder.Eventf(instance, corev1.EventTypeWarning, string(utils.InvalidRayClusterSpec),
"The RayCluster spec is invalid %s/%s: %v", instance.Namespace, instance.Name, err)
Expand Down
Loading

0 comments on commit 8c53bd5

Please sign in to comment.