From 030262450947bbeac039999ad555cd2153f47580 Mon Sep 17 00:00:00 2001 From: Krithika Sundararajan Date: Thu, 27 Jul 2023 13:51:32 +0800 Subject: [PATCH] fix: Add precautionary logic for PDB (#353) * Add precautionary logic for PDB * Add unit test for PDB creation * Update integration tests * Fix lint error --- .../service/router_deployment_service.go | 51 +++- .../service/router_deployment_service_test.go | 223 +++++++++++++++++- 2 files changed, 252 insertions(+), 22 deletions(-) diff --git a/api/turing/service/router_deployment_service.go b/api/turing/service/router_deployment_service.go index 51f8d3239..86edaa5b0 100644 --- a/api/turing/service/router_deployment_service.go +++ b/api/turing/service/router_deployment_service.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + "math" "os" "strings" "sync" @@ -748,8 +749,26 @@ func (ds *deploymentService) createPodDisruptionBudgets( ) []*cluster.PodDisruptionBudget { pdbs := []*cluster.PodDisruptionBudget{} + // Only create PDB if: ceil(minReplica * minAvailablePercent) < minReplica + // If not, the replicas may be unable to be removed. + // Note that we only care about minReplica here because, for active replicas > minReplica, + // the condition will be satisfied if it was satisfied for the minReplica case. + + var minAvailablePercent float64 + if ds.pdbConfig.MinAvailablePercentage != nil { + minAvailablePercent = float64(*ds.pdbConfig.MinAvailablePercentage) / 100.0 + } else if ds.pdbConfig.MaxUnavailablePercentage != nil { + minAvailablePercent = float64(100-*ds.pdbConfig.MaxUnavailablePercentage) / 100.0 + } else { + // PDB config is not set properly, we can't apply it. + return pdbs + } + // Enricher's PDB - if routerVersion.Enricher != nil { + if routerVersion.Enricher != nil && + routerVersion.Enricher.ResourceRequest != nil && + math.Ceil(float64(routerVersion.Enricher.ResourceRequest.MinReplica)* + minAvailablePercent) < float64(routerVersion.Enricher.ResourceRequest.MinReplica) { enricherPdb := ds.svcBuilder.NewPodDisruptionBudget( routerVersion, project, @@ -759,8 +778,16 @@ func (ds *deploymentService) createPodDisruptionBudgets( pdbs = append(pdbs, enricherPdb) } - // Ensembler's PDB - if routerVersion.Enricher != nil { + // Ensembler's PDB - create for Docker / Pyfunc ensemblers only + if routerVersion.Ensembler != nil && + ((routerVersion.Ensembler.DockerConfig != nil && + routerVersion.Ensembler.DockerConfig.ResourceRequest != nil && + math.Ceil(float64(routerVersion.Ensembler.DockerConfig.ResourceRequest.MinReplica)* + minAvailablePercent) < float64(routerVersion.Ensembler.DockerConfig.ResourceRequest.MinReplica)) || + (routerVersion.Ensembler.PyfuncConfig != nil && + routerVersion.Ensembler.PyfuncConfig.ResourceRequest != nil && + math.Ceil(float64(routerVersion.Ensembler.PyfuncConfig.ResourceRequest.MinReplica)* + minAvailablePercent) < float64(routerVersion.Ensembler.PyfuncConfig.ResourceRequest.MinReplica))) { ensemblerPdb := ds.svcBuilder.NewPodDisruptionBudget( routerVersion, project, @@ -771,13 +798,17 @@ func (ds *deploymentService) createPodDisruptionBudgets( } // Router's PDB - routerPdb := ds.svcBuilder.NewPodDisruptionBudget( - routerVersion, - project, - servicebuilder.ComponentTypes.Router, - ds.pdbConfig, - ) - pdbs = append(pdbs, routerPdb) + if routerVersion.ResourceRequest != nil && + math.Ceil(float64(routerVersion.ResourceRequest.MinReplica)* + minAvailablePercent) < float64(routerVersion.ResourceRequest.MinReplica) { + routerPdb := ds.svcBuilder.NewPodDisruptionBudget( + routerVersion, + project, + servicebuilder.ComponentTypes.Router, + ds.pdbConfig, + ) + pdbs = append(pdbs, routerPdb) + } return pdbs } diff --git a/api/turing/service/router_deployment_service_test.go b/api/turing/service/router_deployment_service_test.go index d3d41e826..b6e33e89f 100644 --- a/api/turing/service/router_deployment_service_test.go +++ b/api/turing/service/router_deployment_service_test.go @@ -364,16 +364,6 @@ func TestDeployEndpoint(t *testing.T) { }, }, }) - controller.AssertCalled(t, "ApplyPodDisruptionBudget", mock.Anything, testNamespace, cluster.PodDisruptionBudget{ - Name: "test-svc-turing-enricher-1-pdb", - Namespace: testNamespace, - MinAvailablePercentage: &defaultMinAvailablePercentage, - Selector: &apimetav1.LabelSelector{ - MatchLabels: map[string]string{ - "app": "test-svc-turing-enricher-1-0", - }, - }, - }) controller.AssertCalled(t, "ApplyPodDisruptionBudget", mock.Anything, testNamespace, cluster.PodDisruptionBudget{ Name: "test-svc-turing-ensembler-1-pdb", Namespace: testNamespace, @@ -384,7 +374,7 @@ func TestDeployEndpoint(t *testing.T) { }, }, }) - controller.AssertNumberOfCalls(t, "ApplyPodDisruptionBudget", 3) + controller.AssertNumberOfCalls(t, "ApplyPodDisruptionBudget", 2) // Verify endpoint for upi routers routerVersion.Protocol = routerConfig.UPI @@ -409,6 +399,7 @@ func TestDeleteEndpoint(t *testing.T) { testEnv := "test-env" testNs := "test-namespace" timeout := time.Second * 5 + defaultMinAvailablePercentage := 10 // Create mock controller controller := &mocks.Controller{} @@ -447,6 +438,10 @@ func TestDeleteEndpoint(t *testing.T) { testEnv: controller, }, svcBuilder: svcBuilder, + pdbConfig: config.PodDisruptionBudgetConfig{ + Enabled: true, + MinAvailablePercentage: &defaultMinAvailablePercentage, + }, } eventsCh := NewEventChannel() @@ -479,7 +474,7 @@ func TestDeleteEndpoint(t *testing.T) { controller.AssertCalled(t, "DeletePersistentVolumeClaim", mock.Anything, "pvc", testNs, false) controller.AssertCalled(t, "DeletePodDisruptionBudget", mock.Anything, testNs, mock.Anything) controller.AssertNumberOfCalls(t, "DeleteKnativeService", 3) - controller.AssertNumberOfCalls(t, "DeletePodDisruptionBudget", 3) + controller.AssertNumberOfCalls(t, "DeletePodDisruptionBudget", 2) } func TestBuildEnsemblerServiceImage(t *testing.T) { @@ -548,3 +543,207 @@ func TestBuildEnsemblerServiceImage(t *testing.T) { Env: routerVersion.Ensembler.PyfuncConfig.Env, }) } + +func TestCreatePodDisruptionBudgets(t *testing.T) { + twenty, eighty := 20, 80 + testRouterLabels := map[string]string{ + "app": "test", + "environment": "", + "orchestrator": "turing", + "stream": "", + "team": "", + } + + tests := map[string]struct { + rv *models.RouterVersion + pdbConfig config.PodDisruptionBudgetConfig + expected []*cluster.PodDisruptionBudget + }{ + "bad pdb config": { + rv: &models.RouterVersion{ + ResourceRequest: &models.ResourceRequest{ + MinReplica: 5, + }, + }, + pdbConfig: config.PodDisruptionBudgetConfig{ + Enabled: true, + }, + expected: []*cluster.PodDisruptionBudget{}, + }, + "all pdbs | minAvailablePercentage": { + rv: &models.RouterVersion{ + Router: &models.Router{ + Name: "test", + }, + Version: 3, + ResourceRequest: &models.ResourceRequest{ + MinReplica: 5, + }, + Enricher: &models.Enricher{ + ResourceRequest: &models.ResourceRequest{ + MinReplica: 3, + }, + }, + Ensembler: &models.Ensembler{ + DockerConfig: &models.EnsemblerDockerConfig{ + ResourceRequest: &models.ResourceRequest{ + MinReplica: 2, + }, + }, + }, + }, + pdbConfig: config.PodDisruptionBudgetConfig{ + Enabled: true, + MinAvailablePercentage: &twenty, + }, + expected: []*cluster.PodDisruptionBudget{ + { + Name: "test-turing-enricher-3-pdb", + Namespace: "ns", + Labels: testRouterLabels, + MinAvailablePercentage: &twenty, + Selector: &apimetav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "test-turing-enricher-3-0", + }, + }, + }, + { + Name: "test-turing-ensembler-3-pdb", + Namespace: "ns", + Labels: testRouterLabels, + MinAvailablePercentage: &twenty, + Selector: &apimetav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "test-turing-ensembler-3-0", + }, + }, + }, + { + Name: "test-turing-router-3-pdb", + Namespace: "ns", + Labels: testRouterLabels, + MinAvailablePercentage: &twenty, + Selector: &apimetav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "test-turing-router-3-0", + }, + }, + }, + }, + }, + "all pdbs | maxUnavailablePercentage": { + rv: &models.RouterVersion{ + Router: &models.Router{ + Name: "test", + }, + Version: 3, + ResourceRequest: &models.ResourceRequest{ + MinReplica: 5, + }, + Enricher: &models.Enricher{ + ResourceRequest: &models.ResourceRequest{ + MinReplica: 3, + }, + }, + Ensembler: &models.Ensembler{ + DockerConfig: &models.EnsemblerDockerConfig{ + ResourceRequest: &models.ResourceRequest{ + MinReplica: 2, + }, + }, + }, + }, + pdbConfig: config.PodDisruptionBudgetConfig{ + Enabled: true, + MaxUnavailablePercentage: &eighty, + }, + expected: []*cluster.PodDisruptionBudget{ + { + Name: "test-turing-enricher-3-pdb", + Namespace: "ns", + Labels: testRouterLabels, + MaxUnavailablePercentage: &eighty, + Selector: &apimetav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "test-turing-enricher-3-0", + }, + }, + }, + { + Name: "test-turing-ensembler-3-pdb", + Namespace: "ns", + Labels: testRouterLabels, + MaxUnavailablePercentage: &eighty, + Selector: &apimetav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "test-turing-ensembler-3-0", + }, + }, + }, + { + Name: "test-turing-router-3-pdb", + Namespace: "ns", + Labels: testRouterLabels, + MaxUnavailablePercentage: &eighty, + Selector: &apimetav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "test-turing-router-3-0", + }, + }, + }, + }, + }, + "pyfunc ensembler": { + rv: &models.RouterVersion{ + Router: &models.Router{ + Name: "test", + }, + Version: 3, + ResourceRequest: &models.ResourceRequest{ + MinReplica: 1, + }, + Ensembler: &models.Ensembler{ + PyfuncConfig: &models.EnsemblerPyfuncConfig{ + ResourceRequest: &models.ResourceRequest{ + MinReplica: 10, + }, + }, + }, + }, + pdbConfig: config.PodDisruptionBudgetConfig{ + Enabled: true, + MinAvailablePercentage: &twenty, + }, + expected: []*cluster.PodDisruptionBudget{ + { + Name: "test-turing-ensembler-3-pdb", + Namespace: "ns", + Labels: testRouterLabels, + MinAvailablePercentage: &twenty, + Selector: &apimetav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "test-turing-ensembler-3-0", + }, + }, + }, + }, + }, + } + + for name, tt := range tests { + t.Run(name, func(t *testing.T) { + ds := &deploymentService{ + pdbConfig: tt.pdbConfig, + svcBuilder: servicebuilder.NewClusterServiceBuilder( + resource.MustParse("200m"), + resource.MustParse("200Mi"), + 10, + []corev1.TopologySpreadConstraint{}, + ), + } + pdbs := ds.createPodDisruptionBudgets(tt.rv, &mlp.Project{Name: "ns"}) + assert.Equal(t, tt.expected, pdbs) + }) + } +}