Skip to content

Commit

Permalink
test: fix daemonset sensitive tests (#6123)
Browse files Browse the repository at this point in the history
  • Loading branch information
jmdeal authored May 2, 2024
1 parent 9836cbe commit 896ae3d
Show file tree
Hide file tree
Showing 6 changed files with 124 additions and 66 deletions.
22 changes: 22 additions & 0 deletions test/pkg/environment/common/expectations.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ import (
pscheduling "sigs.k8s.io/karpenter/pkg/controllers/provisioning/scheduling"
"sigs.k8s.io/karpenter/pkg/scheduling"
"sigs.k8s.io/karpenter/pkg/test"
coreresources "sigs.k8s.io/karpenter/pkg/utils/resources"
)

func (env *Environment) ExpectCreated(objects ...client.Object) {
Expand Down Expand Up @@ -902,3 +903,24 @@ func (env *Environment) GetDaemonSetCount(np *corev1beta1.NodePool) int {
return true
})
}

func (env *Environment) GetDaemonSetOverhead(np *corev1beta1.NodePool) v1.ResourceList {
GinkgoHelper()

// Performs the same logic as the scheduler to get the number of daemonset
// pods that we estimate we will need to schedule as overhead to each node
daemonSetList := &appsv1.DaemonSetList{}
Expect(env.Client.List(env.Context, daemonSetList)).To(Succeed())

return coreresources.RequestsForPods(lo.FilterMap(daemonSetList.Items, func(ds appsv1.DaemonSet, _ int) (*v1.Pod, bool) {
p := &v1.Pod{Spec: ds.Spec.Template.Spec}
nodeClaimTemplate := pscheduling.NewNodeClaimTemplate(np)
if err := scheduling.Taints(nodeClaimTemplate.Spec.Taints).Tolerates(p); err != nil {
return nil, false
}
if err := nodeClaimTemplate.Requirements.Compatible(scheduling.NewPodRequirements(p), scheduling.AllowUndefinedWellKnownLabels); err != nil {
return nil, false
}
return p, true
})...)
}
17 changes: 15 additions & 2 deletions test/suites/consolidation/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,14 @@ var _ = Describe("Consolidation", func() {
},
},
ResourceRequirements: v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("1.5")},
Requests: v1.ResourceList{
v1.ResourceCPU: func() resource.Quantity {
dsOverhead := env.GetDaemonSetOverhead(nodePool)
base := lo.ToPtr(resource.MustParse("1800m"))
base.Sub(*dsOverhead.Cpu())
return *base
}(),
},
},
},
})
Expand Down Expand Up @@ -673,7 +680,13 @@ var _ = Describe("Consolidation", func() {
},
},
ResourceRequirements: v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("1.5")},
Requests: v1.ResourceList{v1.ResourceCPU: func() resource.Quantity {
dsOverhead := env.GetDaemonSetOverhead(nodePool)
base := lo.ToPtr(resource.MustParse("1800m"))
base.Sub(*dsOverhead.Cpu())
return *base
}(),
},
},
},
})
Expand Down
46 changes: 16 additions & 30 deletions test/suites/drift/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -272,50 +272,36 @@ var _ = Describe("Drift", func() {
Values: []string{"xlarge"},
},
},
// Add an Exists operator so that we can select on a fake partition later
corev1beta1.NodeSelectorRequirementWithMinValues{
NodeSelectorRequirement: v1.NodeSelectorRequirement{
Key: "test-partition",
Operator: v1.NodeSelectorOpExists,
},
},
)
nodePool.Labels = appLabels
// We're expecting to create 5 nodes, so we'll expect to see at most 3 nodes deleting at one time.
nodePool.Spec.Disruption.Budgets = []corev1beta1.Budget{{
Nodes: "3",
}}

// Make 5 pods all with different deployments and different test partitions, so that each pod can be put
// on a separate node.
selector = labels.SelectorFromSet(appLabels)
numPods = 5
deployments := make([]*appsv1.Deployment, numPods)
for i := range lo.Range(numPods) {
deployments[i] = coretest.Deployment(coretest.DeploymentOptions{
Replicas: 1,
PodOptions: coretest.PodOptions{
ObjectMeta: metav1.ObjectMeta{
Labels: appLabels,
},
NodeSelector: map[string]string{"test-partition": fmt.Sprintf("%d", i)},
// Each xlarge has 4 cpu, so each node should fit no more than 1 pod.
ResourceRequirements: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("3"),
},
},
// Create a 5 pod deployment with hostname inter-pod anti-affinity to ensure each pod is placed on a unique node
deployment := coretest.Deployment(coretest.DeploymentOptions{
Replicas: 5,
PodOptions: coretest.PodOptions{
ObjectMeta: metav1.ObjectMeta{
Labels: appLabels,
},
})
}
PodAntiRequirements: []v1.PodAffinityTerm{{
TopologyKey: v1.LabelHostname,
LabelSelector: &metav1.LabelSelector{
MatchLabels: appLabels,
},
}},
},
})

env.ExpectCreated(nodeClass, nodePool, deployments[0], deployments[1], deployments[2], deployments[3], deployments[4])
env.ExpectCreated(nodeClass, nodePool, deployment)

originalNodeClaims := env.EventuallyExpectCreatedNodeClaimCount("==", 5)
originalNodes := env.EventuallyExpectCreatedNodeCount("==", 5)

// Check that all deployment pods are online
env.EventuallyExpectHealthyPodCount(selector, numPods)
env.EventuallyExpectHealthyPodCount(labels.SelectorFromSet(appLabels), numPods)

By("cordoning and adding finalizer to the nodes")
// Add a finalizer to each node so that we can stop termination disruptions
Expand Down
42 changes: 15 additions & 27 deletions test/suites/expiration/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -354,13 +354,6 @@ var _ = Describe("Expiration", func() {
Values: []string{"xlarge"},
},
},
// Add an Exists operator so that we can select on a fake partition later
corev1beta1.NodeSelectorRequirementWithMinValues{
NodeSelectorRequirement: v1.NodeSelectorRequirement{
Key: "test-partition",
Operator: v1.NodeSelectorOpExists,
},
},
)
nodePool.Labels = appLabels
// We're expecting to create 5 nodes, so we'll expect to see at most 3 nodes deleting at one time.
Expand All @@ -371,33 +364,28 @@ var _ = Describe("Expiration", func() {
// Make 5 pods all with different deployments and different test partitions, so that each pod can be put
// on a separate node.
selector = labels.SelectorFromSet(appLabels)
numPods = 5
deployments := make([]*appsv1.Deployment, numPods)
for i := range lo.Range(numPods) {
deployments[i] = coretest.Deployment(coretest.DeploymentOptions{
Replicas: 1,
PodOptions: coretest.PodOptions{
ObjectMeta: metav1.ObjectMeta{
Labels: appLabels,
},
NodeSelector: map[string]string{"test-partition": fmt.Sprintf("%d", i)},
// Each xlarge has 4 cpu, so each node should fit no more than 1 pod.
ResourceRequirements: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("3"),
},
},
deployment := coretest.Deployment(coretest.DeploymentOptions{
Replicas: 5,
PodOptions: coretest.PodOptions{
ObjectMeta: metav1.ObjectMeta{
Labels: appLabels,
},
})
}
PodAntiRequirements: []v1.PodAffinityTerm{{
TopologyKey: v1.LabelHostname,
LabelSelector: &metav1.LabelSelector{
MatchLabels: appLabels,
},
}},
},
})

env.ExpectCreated(nodeClass, nodePool, deployments[0], deployments[1], deployments[2], deployments[3], deployments[4])
env.ExpectCreated(nodeClass, nodePool, deployment)

env.EventuallyExpectCreatedNodeClaimCount("==", 5)
nodes := env.EventuallyExpectCreatedNodeCount("==", 5)

// Check that all daemonsets and deployment pods are online
env.EventuallyExpectHealthyPodCount(selector, numPods)
env.EventuallyExpectHealthyPodCount(labels.SelectorFromSet(appLabels), numPods)

By("cordoning and adding finalizer to the nodes")
// Add a finalizer to each node so that we can stop termination disruptions
Expand Down
45 changes: 40 additions & 5 deletions test/suites/integration/scheduling_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -450,11 +450,37 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() {
if version, err := env.GetK8sMinorVersion(0); err != nil || version < 29 {
Skip("native sidecar containers are only enabled on EKS 1.29+")
}

labels := map[string]string{"test": test.RandomName()}
// Create a buffer pod to even out the total resource requests regardless of the daemonsets on the cluster. Assumes
// CPU is the resource in contention and that total daemonset CPU requests <= 3.
dsBufferPod := test.Pod(test.PodOptions{
ObjectMeta: metav1.ObjectMeta{
Labels: labels,
},
PodRequirements: []v1.PodAffinityTerm{{
LabelSelector: &metav1.LabelSelector{
MatchLabels: labels,
},
TopologyKey: v1.LabelHostname,
}},
ResourceRequirements: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: func() resource.Quantity {
dsOverhead := env.GetDaemonSetOverhead(nodePool)
base := lo.ToPtr(resource.MustParse("3"))
base.Sub(*dsOverhead.Cpu())
return *base
}(),
},
},
})

test.ReplaceRequirements(nodePool, corev1beta1.NodeSelectorRequirementWithMinValues{
NodeSelectorRequirement: v1.NodeSelectorRequirement{
Key: v1beta1.LabelInstanceCPU,
Operator: v1.NodeSelectorOpIn,
Values: []string{"1", "2"},
Values: []string{"4", "8"},
},
}, corev1beta1.NodeSelectorRequirementWithMinValues{
NodeSelectorRequirement: v1.NodeSelectorRequirement{
Expand All @@ -464,15 +490,24 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() {
},
})
pod := test.Pod(test.PodOptions{
ObjectMeta: metav1.ObjectMeta{
Labels: labels,
},
PodRequirements: []v1.PodAffinityTerm{{
LabelSelector: &metav1.LabelSelector{
MatchLabels: labels,
},
TopologyKey: v1.LabelHostname,
}},
InitContainers: initContainers,
ResourceRequirements: containerRequirements,
})
env.ExpectCreated(nodePool, nodeClass, pod)
env.ExpectCreated(nodePool, nodeClass, dsBufferPod, pod)
env.EventuallyExpectHealthy(pod)
node := env.ExpectCreatedNodeCount("==", 1)[0]
Expect(node.ObjectMeta.GetLabels()[v1beta1.LabelInstanceCPU]).To(Equal(expectedNodeCPU))
},
Entry("sidecar requirements + later init requirements do exceed container requirements", "2", v1.ResourceRequirements{
Entry("sidecar requirements + later init requirements do exceed container requirements", "8", v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("400m")},
}, ephemeralInitContainer(v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("300m")},
Expand All @@ -484,7 +519,7 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() {
}, ephemeralInitContainer(v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("1")},
})),
Entry("sidecar requirements + later init requirements do not exceed container requirements", "1", v1.ResourceRequirements{
Entry("sidecar requirements + later init requirements do not exceed container requirements", "4", v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("400m")},
}, ephemeralInitContainer(v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("300m")},
Expand All @@ -496,7 +531,7 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() {
}, ephemeralInitContainer(v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("300m")},
})),
Entry("init container requirements exceed all later requests", "2", v1.ResourceRequirements{
Entry("init container requirements exceed all later requests", "8", v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("400m")},
}, v1.Container{
RestartPolicy: lo.ToPtr(v1.ContainerRestartPolicyAlways),
Expand Down
18 changes: 16 additions & 2 deletions test/suites/integration/utilization_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ import (

corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1"

"github.com/samber/lo"

"github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1"
"github.com/aws/karpenter-provider-aws/test/pkg/debug"

Expand All @@ -49,8 +51,20 @@ var _ = Describe("Utilization", Label(debug.NoWatch), Label(debug.NoEvents), fun
},
)
deployment := test.Deployment(test.DeploymentOptions{
Replicas: 100,
PodOptions: test.PodOptions{ResourceRequirements: v1.ResourceRequirements{Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("1.5")}}}})
Replicas: 100,
PodOptions: test.PodOptions{
ResourceRequirements: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: func() resource.Quantity {
dsOverhead := env.GetDaemonSetOverhead(nodePool)
base := lo.ToPtr(resource.MustParse("1800m"))
base.Sub(*dsOverhead.Cpu())
return *base
}(),
},
},
},
})

env.ExpectCreated(nodeClass, nodePool, deployment)
env.EventuallyExpectHealthyPodCountWithTimeout(time.Minute*10, labels.SelectorFromSet(deployment.Spec.Selector.MatchLabels), int(*deployment.Spec.Replicas))
Expand Down

0 comments on commit 896ae3d

Please sign in to comment.