From 22ee6dfc0c81b7d2d0fff945f9185c9651f5c705 Mon Sep 17 00:00:00 2001 From: Jaeyeon Park Date: Sun, 27 Oct 2024 16:57:26 +0900 Subject: [PATCH 1/4] feat: ditributed tracing span error (pod-delete only) Signed-off-by: Jaeyeon Park --- bin/experiment/experiment.go | 5 +++ chaoslib/litmus/pod-delete/lib/pod-delete.go | 42 +++++++++++++++++-- .../pod-delete/experiment/pod-delete.go | 22 ++++++++++ pkg/probe/probe.go | 15 ++++++- 4 files changed, 80 insertions(+), 4 deletions(-) diff --git a/bin/experiment/experiment.go b/bin/experiment/experiment.go index ef01e1f2f..5c7bee086 100755 --- a/bin/experiment/experiment.go +++ b/bin/experiment/experiment.go @@ -4,6 +4,7 @@ import ( "context" "errors" "flag" + "fmt" "os" // Uncomment to load all auth plugins @@ -68,6 +69,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/sirupsen/logrus" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" ) func init() { @@ -106,6 +108,8 @@ func main() { //Getting kubeConfig and Generate ClientSets if err := clients.GenerateClientSetFromKubeConfig(); err != nil { log.Errorf("Unable to Get the kubeconfig, err: %v", err) + span.SetStatus(codes.Error, "Unable to Get the kubeconfig") + span.RecordError(err) return } @@ -211,6 +215,7 @@ func main() { k6Loadgen.Experiment(ctx, clients) default: log.Errorf("Unsupported -name %v, please provide the correct value of -name args", *experimentName) + span.SetStatus(codes.Error, fmt.Sprintf("Unsupported -name %v", *experimentName)) return } } diff --git a/chaoslib/litmus/pod-delete/lib/pod-delete.go b/chaoslib/litmus/pod-delete/lib/pod-delete.go index aa4fec6e8..39a0f9c35 100644 --- a/chaoslib/litmus/pod-delete/lib/pod-delete.go +++ b/chaoslib/litmus/pod-delete/lib/pod-delete.go @@ -21,6 +21,7 @@ import ( "github.com/palantir/stacktrace" "github.com/sirupsen/logrus" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -46,14 +47,22 @@ func PreparePodDelete(ctx context.Context, experimentsDetails *experimentTypes.E switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err := injectChaosInSerialMode(ctx, experimentsDetails, clients, chaosDetails, eventsDetails, resultDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err := injectChaosInParallelMode(ctx, experimentsDetails, clients, chaosDetails, eventsDetails, resultDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + errReason := fmt.Sprintf("sequence '%s' is not supported", experimentsDetails.Sequence) + span.SetStatus(codes.Error, errReason) + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: errReason} + span.RecordError(err) + return err } //Waiting for the ramp time after chaos injection @@ -72,6 +81,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "could not run the probes during chaos") + span.RecordError(err) return err } } @@ -85,11 +96,16 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // Get the target pod details for the chaos execution // if the target pod is not defined it will derive the random target pod list using pod affected percentage if experimentsDetails.TargetPods == "" && chaosDetails.AppDetail == nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "provide one of the appLabel or TARGET_PODS"} + span.SetStatus(codes.Error, "provide one of the appLabel or TARGET_PODS") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "provide one of the appLabel or TARGET_PODS"} + span.RecordError(err) + return err } targetPodList, err := common.GetTargetPods(experimentsDetails.NodeLabel, experimentsDetails.TargetPods, experimentsDetails.PodsAffectedPerc, clients, chaosDetails) if err != nil { + span.SetStatus(codes.Error, "could not get target pods") + span.RecordError(err) return stacktrace.Propagate(err, "could not get target pods") } @@ -97,6 +113,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment for _, pod := range targetPodList.Items { kind, parentName, err := workloads.GetPodOwnerTypeAndName(&pod, clients.DynamicClient) if err != nil { + span.SetStatus(codes.Error, "could not get pod owner name and kind") + span.RecordError(err) return stacktrace.Propagate(err, "could not get pod owner name and kind") } common.SetParentName(parentName, kind, pod.Namespace, chaosDetails) @@ -123,12 +141,16 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment err = clients.KubeClient.CoreV1().Pods(pod.Namespace).Delete(context.Background(), pod.Name, v1.DeleteOptions{}) } if err != nil { + span.SetStatus(codes.Error, "could not delete the target pod") + span.RecordError(err) return cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Target: fmt.Sprintf("{podName: %s, namespace: %s}", pod.Name, pod.Namespace), Reason: fmt.Sprintf("failed to delete the target pod: %s", err.Error())} } switch chaosDetails.Randomness { case true: if err := common.RandomInterval(experimentsDetails.ChaosInterval); err != nil { + span.SetStatus(codes.Error, "could not get random chaos interval") + span.RecordError(err) return stacktrace.Propagate(err, "could not get random chaos interval") } default: @@ -149,6 +171,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment Namespace: parent.Namespace, } if err = status.CheckUnTerminatedPodStatusesByWorkloadName(target, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not check pod statuses by workload names") + span.RecordError(err) return stacktrace.Propagate(err, "could not check pod statuses by workload names") } } @@ -184,10 +208,15 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // Get the target pod details for the chaos execution // if the target pod is not defined it will derive the random target pod list using pod affected percentage if experimentsDetails.TargetPods == "" && chaosDetails.AppDetail == nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "please provide one of the appLabel or TARGET_PODS"} + span.SetStatus(codes.Error, "please provide one of the appLabel or TARGET_PODS") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "please provide one of the appLabel or TARGET_PODS"} + span.RecordError(err) + return err } targetPodList, err := common.GetTargetPods(experimentsDetails.NodeLabel, experimentsDetails.TargetPods, experimentsDetails.PodsAffectedPerc, clients, chaosDetails) if err != nil { + span.SetStatus(codes.Error, "could not get target pods") + span.RecordError(err) return stacktrace.Propagate(err, "could not get target pods") } @@ -195,6 +224,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime for _, pod := range targetPodList.Items { kind, parentName, err := workloads.GetPodOwnerTypeAndName(&pod, clients.DynamicClient) if err != nil { + span.SetStatus(codes.Error, "could not get pod owner name and kind") + span.RecordError(err) return stacktrace.Propagate(err, "could not get pod owner name and kind") } common.SetParentName(parentName, kind, pod.Namespace, chaosDetails) @@ -221,6 +252,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime err = clients.KubeClient.CoreV1().Pods(pod.Namespace).Delete(context.Background(), pod.Name, v1.DeleteOptions{}) } if err != nil { + span.SetStatus(codes.Error, "could not delete the target pod") + span.RecordError(err) return cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Target: fmt.Sprintf("{podName: %s, namespace: %s}", pod.Name, pod.Namespace), Reason: fmt.Sprintf("failed to delete the target pod: %s", err.Error())} } } @@ -228,6 +261,7 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime switch chaosDetails.Randomness { case true: if err := common.RandomInterval(experimentsDetails.ChaosInterval); err != nil { + span.SetStatus(codes.Error, "could not get random chaos interval") return stacktrace.Propagate(err, "could not get random chaos interval") } default: @@ -248,6 +282,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime Namespace: parent.Namespace, } if err = status.CheckUnTerminatedPodStatusesByWorkloadName(target, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not check pod statuses by workload names") + span.RecordError(err) return stacktrace.Propagate(err, "could not check pod statuses by workload names") } } diff --git a/experiments/generic/pod-delete/experiment/pod-delete.go b/experiments/generic/pod-delete/experiment/pod-delete.go index 0fb445f15..5caa4cf94 100644 --- a/experiments/generic/pod-delete/experiment/pod-delete.go +++ b/experiments/generic/pod-delete/experiment/pod-delete.go @@ -17,10 +17,14 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodDelete inject the pod-delete chaos func PodDelete(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) + experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} eventsDetails := types.EventDetails{} @@ -40,6 +44,8 @@ func PodDelete(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -49,6 +55,8 @@ func PodDelete(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to create the chaosresult, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -56,6 +64,8 @@ func PodDelete(ctx context.Context, clients clients.ClientSets) { if err := result.SetResultUID(&resultDetails, clients, &chaosDetails); err != nil { log.Errorf("Unable to set the result uid, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to set the result uid") + span.RecordError(err) return } @@ -85,6 +95,8 @@ func PodDelete(ctx context.Context, clients clients.ClientSets) { log.Errorf("failed to create %v event inside chaosengine", types.PreChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -104,6 +116,8 @@ func PodDelete(ctx context.Context, clients clients.ClientSets) { log.Errorf("failed to create %v event inside chaosengine", types.PreChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -117,6 +131,8 @@ func PodDelete(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PreparePodDelete(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -132,6 +148,8 @@ func PodDelete(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -150,6 +168,8 @@ func PodDelete(ctx context.Context, clients clients.ClientSets) { log.Errorf("failed to create %v event inside chaosengine", types.PostChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -165,6 +185,8 @@ func PodDelete(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to update the chaosresult, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to update the chaosresult") + span.RecordError(err) return } diff --git a/pkg/probe/probe.go b/pkg/probe/probe.go index fe6e1a271..1237beb79 100644 --- a/pkg/probe/probe.go +++ b/pkg/probe/probe.go @@ -18,6 +18,7 @@ import ( "github.com/palantir/stacktrace" "github.com/sirupsen/logrus" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -32,6 +33,8 @@ func RunProbes(ctx context.Context, chaosDetails *types.ChaosDetails, clients cl // get the probes details from the chaosengine probes, err := getProbesFromChaosEngine(chaosDetails, clients) if err != nil { + span.SetStatus(codes.Error, "getProbesFromChaosEngine failed") + span.RecordError(err) return err } @@ -42,6 +45,8 @@ func RunProbes(ctx context.Context, chaosDetails *types.ChaosDetails, clients cl switch strings.ToLower(probe.Mode) { case "sot", "edge", "continuous": if err := execute(probe, chaosDetails, clients, resultDetails, phase); err != nil { + span.SetStatus(codes.Error, fmt.Sprintf("%s mode %s probe execute failed", probe.Mode, probe.Name)) + span.RecordError(err) return err } } @@ -51,6 +56,8 @@ func RunProbes(ctx context.Context, chaosDetails *types.ChaosDetails, clients cl for _, probe := range probes { if strings.ToLower(probe.Mode) == "onchaos" { if err := execute(probe, chaosDetails, clients, resultDetails, phase); err != nil { + span.SetStatus(codes.Error, fmt.Sprintf("%s mode %s probe execute failed", probe.Mode, probe.Name)) + span.RecordError(err) return err } } @@ -72,13 +79,19 @@ func RunProbes(ctx context.Context, chaosDetails *types.ChaosDetails, clients cl } } if len(probeError) != 0 { - return cerrors.PreserveError{ErrString: fmt.Sprintf("[%s]", strings.Join(probeError, ","))} + errString := fmt.Sprintf("[%s]", strings.Join(probeError, ",")) + span.SetStatus(codes.Error, errString) + err := cerrors.PreserveError{ErrString: errString} + span.RecordError(err) + return err } // executes the eot and edge modes for _, probe := range probes { switch strings.ToLower(probe.Mode) { case "eot", "edge": if err := execute(probe, chaosDetails, clients, resultDetails, phase); err != nil { + span.SetStatus(codes.Error, fmt.Sprintf("%s mode %s probe execute failed", probe.Mode, probe.Name)) + span.RecordError(err) return err } } From fc8ae3ae841926381bae396a82367dc6a22d7b63 Mon Sep 17 00:00:00 2001 From: Jaeyeon Park Date: Thu, 26 Dec 2024 16:02:37 +0900 Subject: [PATCH 2/4] feat: ditributed tracing span error Signed-off-by: Jaeyeon Park --- .../experiment/aws-ssm-chaos-by-id.go | 21 +++++++++++++ .../experiment/aws-ssm-chaos-by-tag.go | 19 ++++++++++++ .../experiment/azure-disk-loss.go | 21 +++++++++++++ .../experiment/azure-instance-stop.go | 19 ++++++++++++ .../experiment/redfish-node-restart.go | 31 +++++++++++++++++++ .../pod-delete/experiment/pod-delete.go | 29 +++++++++++++++++ .../experiment/gcp-vm-disk-loss-by-label.go | 21 +++++++++++++ .../experiment/gcp-vm-disk-loss.go | 23 ++++++++++++++ .../gcp-vm-instance-stop-by-label.go | 21 +++++++++++++ .../experiment/gcp-vm-instance-stop.go | 21 +++++++++++++ .../experiment/container-kill.go | 19 ++++++++++++ .../generic/disk-fill/experiment/disk-fill.go | 19 ++++++++++++ .../experiment/docker-service-kill.go | 25 +++++++++++++++ .../experiment/kubelet-service-kill.go | 25 +++++++++++++++ .../node-cpu-hog/experiment/node-cpu-hog.go | 25 +++++++++++++++ .../node-drain/experiment/node-drain.go | 25 +++++++++++++++ .../experiment/node-io-stress.go | 25 +++++++++++++++ .../experiment/node-memory-hog.go | 25 +++++++++++++++ .../node-restart/experiment/node-restart.go | 25 +++++++++++++++ .../node-taint/experiment/node-taint.go | 25 +++++++++++++++ .../experiment/pod-autoscaler.go | 19 ++++++++++++ .../experiment/pod-cpu-hog-exec.go | 19 ++++++++++++ .../pod-cpu-hog/experiment/pod-cpu-hog.go | 19 ++++++++++++ .../pod-dns-error/experiment/pod-dns-error.go | 19 ++++++++++++ .../pod-dns-spoof/experiment/pod-dns-spoof.go | 19 ++++++++++++ .../experiment/pod-fio-stress.go | 19 ++++++++++++ .../experiment/pod-http-latency.go | 19 ++++++++++++ .../experiment/pod-http-modify-body.go | 19 ++++++++++++ .../experiment/pod-http-modify-header.go | 19 ++++++++++++ .../experiment/pod-http-reset-peer.go | 19 ++++++++++++ .../experiment/pod-http-status-code.go | 19 ++++++++++++ .../pod-io-stress/experiment/pod-io-stress.go | 19 ++++++++++++ .../experiment/pod-memory-hog-exec.go | 19 ++++++++++++ .../experiment/pod-memory-hog.go | 19 ++++++++++++ .../experiment/pod-network-corruption.go | 19 ++++++++++++ .../experiment/pod-network-duplication.go | 19 ++++++++++++ .../experiment/pod-network-latency.go | 19 ++++++++++++ .../experiment/pod-network-loss.go | 20 ++++++++++++ .../experiment/pod-network-partition.go | 19 ++++++++++++ .../experiment/kafka-broker-pod-failure.go | 25 +++++++++++++++ .../experiment/ebs-loss-by-id.go | 19 ++++++++++++ .../experiment/ebs-loss-by-tag.go | 19 ++++++++++++ .../experiment/ec2-terminate-by-id.go | 23 ++++++++++++++ .../experiment/ec2-terminate-tag.go | 23 ++++++++++++++ .../load/k6-loadgen/experiment/k6-loadgen.go | 19 ++++++++++++ .../experiment/spring-boot-faults.go | 23 ++++++++++++++ .../vm-poweroff/experiment/vm-poweroff.go | 23 ++++++++++++++ 47 files changed, 1000 insertions(+) diff --git a/experiments/aws-ssm/aws-ssm-chaos-by-id/experiment/aws-ssm-chaos-by-id.go b/experiments/aws-ssm/aws-ssm-chaos-by-id/experiment/aws-ssm-chaos-by-id.go index 4edeaf6d3..8c6399792 100644 --- a/experiments/aws-ssm/aws-ssm-chaos-by-id/experiment/aws-ssm-chaos-by-id.go +++ b/experiments/aws-ssm/aws-ssm-chaos-by-id/experiment/aws-ssm-chaos-by-id.go @@ -18,10 +18,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // AWSSSMChaosByID inject the ssm chaos on ec2 instance func AWSSSMChaosByID(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -42,6 +45,8 @@ func AWSSSMChaosByID(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -51,6 +56,8 @@ func AWSSSMChaosByID(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to create the chaosresult: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -89,6 +96,8 @@ func AWSSSMChaosByID(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PreChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -104,6 +113,8 @@ func AWSSSMChaosByID(ctx context.Context, clients clients.ClientSets) { if err := ssm.CheckInstanceInformation(&experimentsDetails); err != nil { log.Errorf("Failed perform ssm api calls: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to perform ssm api calls") + span.RecordError(err) return } @@ -112,6 +123,8 @@ func AWSSSMChaosByID(ctx context.Context, clients clients.ClientSets) { if err := ec2.InstanceStatusCheckByID(experimentsDetails.EC2InstanceID, experimentsDetails.Region); err != nil { log.Errorf("Failed to get the ec2 instance status: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to get the ec2 instance status") + span.RecordError(err) return } log.Info("[Status]: EC2 instance is in running state") @@ -129,6 +142,8 @@ func AWSSSMChaosByID(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to delete ssm doc: %v", err) } } + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -142,6 +157,8 @@ func AWSSSMChaosByID(ctx context.Context, clients clients.ClientSets) { if err := ec2.InstanceStatusCheckByID(experimentsDetails.EC2InstanceID, experimentsDetails.Region); err != nil { log.Errorf("Failed to get the ec2 instance status: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to get the ec2 instance status") + span.RecordError(err) return } log.Info("[Status]: EC2 instance is in running state (post chaos)") @@ -161,6 +178,8 @@ func AWSSSMChaosByID(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PostChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -177,6 +196,8 @@ func AWSSSMChaosByID(ctx context.Context, clients clients.ClientSets) { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to update the chaosresult: %v", err) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/aws-ssm/aws-ssm-chaos-by-tag/experiment/aws-ssm-chaos-by-tag.go b/experiments/aws-ssm/aws-ssm-chaos-by-tag/experiment/aws-ssm-chaos-by-tag.go index 1799ac322..29c7fd4e0 100644 --- a/experiments/aws-ssm/aws-ssm-chaos-by-tag/experiment/aws-ssm-chaos-by-tag.go +++ b/experiments/aws-ssm/aws-ssm-chaos-by-tag/experiment/aws-ssm-chaos-by-tag.go @@ -18,10 +18,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // AWSSSMChaosByTag inject the ssm chaos on ec2 instance func AWSSSMChaosByTag(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -42,6 +45,8 @@ func AWSSSMChaosByTag(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -51,6 +56,8 @@ func AWSSSMChaosByTag(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to create the chaosresult: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -79,6 +86,8 @@ func AWSSSMChaosByTag(ctx context.Context, clients clients.ClientSets) { if err := ssm.CheckInstanceInformation(&experimentsDetails); err != nil { log.Errorf("Target instance status check failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Target instance status check failed") + span.RecordError(err) return } @@ -97,6 +106,8 @@ func AWSSSMChaosByTag(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PreChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -120,6 +131,8 @@ func AWSSSMChaosByTag(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to delete ssm document: %v", err) } } + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -133,6 +146,8 @@ func AWSSSMChaosByTag(ctx context.Context, clients clients.ClientSets) { if err := ec2.InstanceStatusCheck(experimentsDetails.TargetInstanceIDList, experimentsDetails.Region); err != nil { log.Errorf("Failed to get the ec2 instance status: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to get the ec2 instance status") + span.RecordError(err) return } log.Info("[Status]: EC2 instance is in running state (post chaos)") @@ -152,6 +167,8 @@ func AWSSSMChaosByTag(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PostChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -167,6 +184,8 @@ func AWSSSMChaosByTag(ctx context.Context, clients clients.ClientSets) { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to update the chaosresult: %v", err) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/azure/azure-disk-loss/experiment/azure-disk-loss.go b/experiments/azure/azure-disk-loss/experiment/azure-disk-loss.go index 85507b7ad..4a5a785a2 100644 --- a/experiments/azure/azure-disk-loss/experiment/azure-disk-loss.go +++ b/experiments/azure/azure-disk-loss/experiment/azure-disk-loss.go @@ -18,10 +18,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // AzureDiskLoss contains steps to inject chaos func AzureDiskLoss(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) var err error experimentsDetails := experimentTypes.ExperimentDetails{} @@ -43,6 +46,8 @@ func AzureDiskLoss(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err = types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -52,6 +57,8 @@ func AzureDiskLoss(ctx context.Context, clients clients.ClientSets) { if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to create the chaosresult: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -80,6 +87,8 @@ func AzureDiskLoss(ctx context.Context, clients clients.ClientSets) { if experimentsDetails.SubscriptionID, err = azureCommon.GetSubscriptionID(); err != nil { log.Errorf("fail to get the subscription id: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "fail to get the subscription id") + span.RecordError(err) return } @@ -89,6 +98,8 @@ func AzureDiskLoss(ctx context.Context, clients clients.ClientSets) { if err = azureStatus.CheckVirtualDiskWithInstance(experimentsDetails.SubscriptionID, experimentsDetails.VirtualDiskNames, experimentsDetails.ResourceGroup); err != nil { log.Errorf("Virtual disk status check failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Virtual disk status check failed") + span.RecordError(err) return } } @@ -108,6 +119,8 @@ func AzureDiskLoss(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PreChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -124,6 +137,8 @@ func AzureDiskLoss(ctx context.Context, clients clients.ClientSets) { if err = litmusLIB.PrepareChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) log.Errorf("Chaos injection failed: %v", err) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -138,6 +153,8 @@ func AzureDiskLoss(ctx context.Context, clients clients.ClientSets) { if err = azureStatus.CheckVirtualDiskWithInstance(experimentsDetails.SubscriptionID, experimentsDetails.VirtualDiskNames, experimentsDetails.ResourceGroup); err != nil { log.Errorf("Virtual disk status check failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Virtual disk status check failed") + span.RecordError(err) return } } @@ -156,6 +173,8 @@ func AzureDiskLoss(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PostChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -172,6 +191,8 @@ func AzureDiskLoss(ctx context.Context, clients clients.ClientSets) { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to update the chaosresult: %v", err) + span.SetStatus(codes.Error, "Unable to Update the chaosresult") + span.RecordError(err) return } diff --git a/experiments/azure/instance-stop/experiment/azure-instance-stop.go b/experiments/azure/instance-stop/experiment/azure-instance-stop.go index 74b3424c8..cef126bb3 100644 --- a/experiments/azure/instance-stop/experiment/azure-instance-stop.go +++ b/experiments/azure/instance-stop/experiment/azure-instance-stop.go @@ -11,6 +11,8 @@ import ( "github.com/litmuschaos/litmus-go/pkg/clients" azureCommon "github.com/litmuschaos/litmus-go/pkg/cloud/azure/common" azureStatus "github.com/litmuschaos/litmus-go/pkg/cloud/azure/instance" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" "github.com/litmuschaos/litmus-go/pkg/events" "github.com/litmuschaos/litmus-go/pkg/log" @@ -23,6 +25,7 @@ import ( // AzureInstanceStop inject the azure instance stop chaos func AzureInstanceStop(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) var err error experimentsDetails := experimentTypes.ExperimentDetails{} @@ -44,6 +47,8 @@ func AzureInstanceStop(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err = types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) } } @@ -53,6 +58,8 @@ func AzureInstanceStop(ctx context.Context, clients clients.ClientSets) { if err != nil { log.Errorf("Unable to create the chaosresult: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -74,6 +81,8 @@ func AzureInstanceStop(ctx context.Context, clients clients.ClientSets) { if experimentsDetails.SubscriptionID, err = azureCommon.GetSubscriptionID(); err != nil { log.Errorf("Failed to get the subscription id: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "fail to get the subscription id") + span.RecordError(err) return } @@ -100,6 +109,8 @@ func AzureInstanceStop(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PreChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -116,6 +127,8 @@ func AzureInstanceStop(ctx context.Context, clients clients.ClientSets) { if err = azureStatus.InstanceStatusCheckByName(experimentsDetails.AzureInstanceNames, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup); err != nil { log.Errorf("Azure instance status check failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Azure instance status check failed") + span.RecordError(err) return } log.Info("[Status]: Azure instance(s) is in running state (pre-chaos)") @@ -126,6 +139,8 @@ func AzureInstanceStop(ctx context.Context, clients clients.ClientSets) { if err = litmusLIB.PrepareAzureStop(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -139,6 +154,8 @@ func AzureInstanceStop(ctx context.Context, clients clients.ClientSets) { if err = azureStatus.InstanceStatusCheckByName(experimentsDetails.AzureInstanceNames, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup); err != nil { log.Errorf("Azure instance status check failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Azure instance status check failed") + span.RecordError(err) return } log.Info("[Status]: Azure instance is in running state (post chaos)") @@ -159,6 +176,8 @@ func AzureInstanceStop(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PostChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" diff --git a/experiments/baremetal/redfish-node-restart/experiment/redfish-node-restart.go b/experiments/baremetal/redfish-node-restart/experiment/redfish-node-restart.go index 7b9ae654d..a480b9c49 100644 --- a/experiments/baremetal/redfish-node-restart/experiment/redfish-node-restart.go +++ b/experiments/baremetal/redfish-node-restart/experiment/redfish-node-restart.go @@ -18,10 +18,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // NodeRestart contains steps to inject chaos func NodeRestart(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -42,6 +45,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -51,6 +56,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Create the Chaos Result") + span.RecordError(err) return } @@ -77,6 +84,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -87,6 +96,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -97,11 +108,15 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { if err != nil { result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) log.Errorf("[Verification]: Unable to get node power status(pre-chaos). Error: %v", err) + span.SetStatus(codes.Error, "Unable to get node power status") + span.RecordError(err) return } if nodeStatus != "On" { result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) log.Errorf("[Verification]: Node is not in running state(pre-chaos)") + span.SetStatus(codes.Error, "Node is not in running state") + span.RecordError(err) return } log.Info("[Verification]: Node is in running state(pre-chaos)") @@ -119,6 +134,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "NUT: Running, Probes: Successful" @@ -133,6 +150,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) log.Errorf("Chaos injection failed, err: %v", err) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -147,6 +166,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { if err = status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -157,6 +178,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -167,11 +190,15 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { if err != nil { result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) log.Errorf("[Verification]: Unable to get node power status. Error: %v ", err) + span.SetStatus(codes.Error, "Unable to get node power status") + span.RecordError(err) return } if nodeStatus != "On" { result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) log.Errorf("[Verification]: Node is not in running state(post-chaos)") + span.SetStatus(codes.Error, "Node is not in running state") + span.RecordError(err) return } log.Info("[Verification]: Node is in running state(post-chaos)") @@ -188,6 +215,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "NUT: Running, Probes: Successful" @@ -202,6 +231,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/cassandra/pod-delete/experiment/pod-delete.go b/experiments/cassandra/pod-delete/experiment/pod-delete.go index 6e7417d48..81bcf07a4 100644 --- a/experiments/cassandra/pod-delete/experiment/pod-delete.go +++ b/experiments/cassandra/pod-delete/experiment/pod-delete.go @@ -18,10 +18,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // CasssandraPodDelete inject the cassandra-pod-delete chaos func CasssandraPodDelete(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) var err error var ResourceVersionBefore string @@ -44,6 +47,8 @@ func CasssandraPodDelete(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err = types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -53,6 +58,8 @@ func CasssandraPodDelete(ctx context.Context, clients clients.ClientSets) { if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Create the Chaos Result") + span.RecordError(err) return } @@ -84,6 +91,8 @@ func CasssandraPodDelete(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -92,6 +101,8 @@ func CasssandraPodDelete(ctx context.Context, clients clients.ClientSets) { if err = cassandra.NodeToolStatusCheck(&experimentsDetails, clients); err != nil { log.Errorf("[Status]: Chaos node tool status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos node tool status check failed") + span.RecordError(err) return } } @@ -109,6 +120,8 @@ func CasssandraPodDelete(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -124,6 +137,8 @@ func CasssandraPodDelete(ctx context.Context, clients clients.ClientSets) { if err != nil { log.Errorf("[Liveness]: Cassandra liveness check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Cassandra liveness check failed") + span.RecordError(err) return } log.Info("[Confirmation]: The cassandra application liveness pod created successfully") @@ -136,6 +151,8 @@ func CasssandraPodDelete(ctx context.Context, clients clients.ClientSets) { if err = litmusLIB.PreparePodDelete(ctx, experimentsDetails.ChaoslibDetail, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -152,6 +169,8 @@ func CasssandraPodDelete(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -160,6 +179,8 @@ func CasssandraPodDelete(ctx context.Context, clients clients.ClientSets) { if err = cassandra.NodeToolStatusCheck(&experimentsDetails, clients); err != nil { log.Errorf("[Status]: Chaos node tool status check is failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos node tool status check failed") + span.RecordError(err) return } } @@ -176,6 +197,8 @@ func CasssandraPodDelete(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -193,11 +216,15 @@ func CasssandraPodDelete(ctx context.Context, clients clients.ClientSets) { if err = status.CheckApplicationStatusesByLabels(experimentsDetails.ChaoslibDetail.AppNS, "name=cassandra-liveness-deploy-"+experimentsDetails.RunID, experimentsDetails.ChaoslibDetail.Timeout, experimentsDetails.ChaoslibDetail.Delay, clients); err != nil { log.Errorf("Liveness status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Liveness status check failed") + span.RecordError(err) return } if err = cassandra.LivenessCleanup(&experimentsDetails, clients, ResourceVersionBefore); err != nil { log.Errorf("Liveness cleanup failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Liveness cleanup failed") + span.RecordError(err) return } } @@ -205,6 +232,8 @@ func CasssandraPodDelete(ctx context.Context, clients clients.ClientSets) { log.Info("[The End]: Updating the chaos result of cassandra pod delete experiment (EOT)") if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/gcp/gcp-vm-disk-loss-by-label/experiment/gcp-vm-disk-loss-by-label.go b/experiments/gcp/gcp-vm-disk-loss-by-label/experiment/gcp-vm-disk-loss-by-label.go index 644aea20f..3c295e113 100644 --- a/experiments/gcp/gcp-vm-disk-loss-by-label/experiment/gcp-vm-disk-loss-by-label.go +++ b/experiments/gcp/gcp-vm-disk-loss-by-label/experiment/gcp-vm-disk-loss-by-label.go @@ -17,11 +17,14 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" "google.golang.org/api/compute/v1" ) // GCPVMDiskLossByLabel contains steps to inject chaos func GCPVMDiskLossByLabel(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) var ( computeService *compute.Service @@ -47,6 +50,8 @@ func GCPVMDiskLossByLabel(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -56,6 +61,8 @@ func GCPVMDiskLossByLabel(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -90,6 +97,8 @@ func GCPVMDiskLossByLabel(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -104,6 +113,8 @@ func GCPVMDiskLossByLabel(ctx context.Context, clients clients.ClientSets) { if err != nil { log.Errorf("Failed to obtain a gcp compute service, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to obtain a gcp compute service") + span.RecordError(err) return } @@ -111,6 +122,8 @@ func GCPVMDiskLossByLabel(ctx context.Context, clients clients.ClientSets) { if err := gcp.SetTargetDiskVolumes(computeService, &experimentsDetails); err != nil { log.Errorf("Failed to get the target gcp disk volumes, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to get the target gcp disk volumes") + span.RecordError(err) return } @@ -121,6 +134,8 @@ func GCPVMDiskLossByLabel(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareDiskVolumeLossByLabel(ctx, computeService, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -135,6 +150,8 @@ func GCPVMDiskLossByLabel(ctx context.Context, clients clients.ClientSets) { if err != nil || instanceName == "" { log.Errorf("Failed to verify disk volume attachment status, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to verify disk volume attachment status") + span.RecordError(err) return } } @@ -153,6 +170,8 @@ func GCPVMDiskLossByLabel(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -167,6 +186,8 @@ func GCPVMDiskLossByLabel(ctx context.Context, clients clients.ClientSets) { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/gcp/gcp-vm-disk-loss/experiment/gcp-vm-disk-loss.go b/experiments/gcp/gcp-vm-disk-loss/experiment/gcp-vm-disk-loss.go index be1adbc8a..870edb2d7 100644 --- a/experiments/gcp/gcp-vm-disk-loss/experiment/gcp-vm-disk-loss.go +++ b/experiments/gcp/gcp-vm-disk-loss/experiment/gcp-vm-disk-loss.go @@ -17,11 +17,14 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" "google.golang.org/api/compute/v1" ) // VMDiskLoss injects the disk volume loss chaos func VMDiskLoss(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) var ( computeService *compute.Service @@ -47,6 +50,8 @@ func VMDiskLoss(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err = types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -56,6 +61,8 @@ func VMDiskLoss(ctx context.Context, clients clients.ClientSets) { if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the Chaos Result") + span.RecordError(err) return } @@ -90,6 +97,8 @@ func VMDiskLoss(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -104,6 +113,8 @@ func VMDiskLoss(ctx context.Context, clients clients.ClientSets) { if err != nil { log.Errorf("Failed to obtain a gcp compute service, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to obtain a gcp compute service") + span.RecordError(err) return } @@ -112,6 +123,8 @@ func VMDiskLoss(ctx context.Context, clients clients.ClientSets) { if err := gcp.DiskVolumeStateCheck(computeService, &experimentsDetails); err != nil { log.Errorf("Volume status check failed pre chaos, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Volume status check failed pre chaos") + span.RecordError(err) return } log.Info("[Status]: Disk volumes are attached to the VM instances (pre-chaos)") @@ -121,6 +134,8 @@ func VMDiskLoss(ctx context.Context, clients clients.ClientSets) { if err := gcp.SetTargetDiskInstanceNames(computeService, &experimentsDetails); err != nil { log.Errorf("Failed to fetch the disk instance names, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to fetch the disk instance names") + span.RecordError(err) return } @@ -129,6 +144,8 @@ func VMDiskLoss(ctx context.Context, clients clients.ClientSets) { if err = litmusLIB.PrepareDiskVolumeLoss(ctx, computeService, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -142,6 +159,8 @@ func VMDiskLoss(ctx context.Context, clients clients.ClientSets) { if err := gcp.DiskVolumeStateCheck(computeService, &experimentsDetails); err != nil { log.Errorf("Volume status check failed post chaos, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Volume status check failed post chaos") + span.RecordError(err) return } log.Info("[Status]: Disk volumes are attached to the VM instances (post-chaos)") @@ -159,6 +178,8 @@ func VMDiskLoss(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -173,6 +194,8 @@ func VMDiskLoss(ctx context.Context, clients clients.ClientSets) { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("unable to Update the Chaos Result, err: %v", err) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/gcp/gcp-vm-instance-stop-by-label/experiment/gcp-vm-instance-stop-by-label.go b/experiments/gcp/gcp-vm-instance-stop-by-label/experiment/gcp-vm-instance-stop-by-label.go index 30dc7d7c5..55f362641 100644 --- a/experiments/gcp/gcp-vm-instance-stop-by-label/experiment/gcp-vm-instance-stop-by-label.go +++ b/experiments/gcp/gcp-vm-instance-stop-by-label/experiment/gcp-vm-instance-stop-by-label.go @@ -17,11 +17,14 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" "google.golang.org/api/compute/v1" ) // GCPVMInstanceStopByLabel contains steps to inject chaos func GCPVMInstanceStopByLabel(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) var ( computeService *compute.Service @@ -47,6 +50,8 @@ func GCPVMInstanceStopByLabel(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -56,6 +61,8 @@ func GCPVMInstanceStopByLabel(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -91,6 +98,8 @@ func GCPVMInstanceStopByLabel(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -105,6 +114,8 @@ func GCPVMInstanceStopByLabel(ctx context.Context, clients clients.ClientSets) { if err != nil { log.Errorf("Failed to obtain a gcp compute service, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to obtain a gcp compute service") + span.RecordError(err) return } @@ -112,6 +123,8 @@ func GCPVMInstanceStopByLabel(ctx context.Context, clients clients.ClientSets) { if err = gcp.SetTargetInstance(computeService, &experimentsDetails); err != nil { log.Errorf("Failed to get the target VM instances, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to get the target VM instances") + span.RecordError(err) return } @@ -122,6 +135,8 @@ func GCPVMInstanceStopByLabel(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareVMStopByLabel(ctx, computeService, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -135,6 +150,8 @@ func GCPVMInstanceStopByLabel(ctx context.Context, clients clients.ClientSets) { if err := gcp.InstanceStatusCheck(computeService, experimentsDetails.TargetVMInstanceNameList, experimentsDetails.GCPProjectID, []string{experimentsDetails.Zones}); err != nil { log.Errorf("Failed to get VM instance status, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to get VM instance status") + span.RecordError(err) return } } @@ -153,6 +170,8 @@ func GCPVMInstanceStopByLabel(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -167,6 +186,8 @@ func GCPVMInstanceStopByLabel(ctx context.Context, clients clients.ClientSets) { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) + span.SetStatus(codes.Error, "Unable to update the chaos result") + span.RecordError(err) return } diff --git a/experiments/gcp/gcp-vm-instance-stop/experiment/gcp-vm-instance-stop.go b/experiments/gcp/gcp-vm-instance-stop/experiment/gcp-vm-instance-stop.go index 8da11f7f7..2d0de8b0a 100644 --- a/experiments/gcp/gcp-vm-instance-stop/experiment/gcp-vm-instance-stop.go +++ b/experiments/gcp/gcp-vm-instance-stop/experiment/gcp-vm-instance-stop.go @@ -17,11 +17,14 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" "google.golang.org/api/compute/v1" ) // VMInstanceStop executes the experiment steps by injecting chaos into the specified vm instances func VMInstanceStop(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) var ( computeService *compute.Service @@ -47,6 +50,8 @@ func VMInstanceStop(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -56,6 +61,8 @@ func VMInstanceStop(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Create the Chaos Result") + span.RecordError(err) return } @@ -90,6 +97,8 @@ func VMInstanceStop(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -104,6 +113,8 @@ func VMInstanceStop(ctx context.Context, clients clients.ClientSets) { if err != nil { log.Errorf("Failed to obtain a gcp compute service, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to obtain a gcp compute service") + span.RecordError(err) return } @@ -112,6 +123,8 @@ func VMInstanceStop(ctx context.Context, clients clients.ClientSets) { if err := gcp.InstanceStatusCheckByName(computeService, experimentsDetails.ManagedInstanceGroup, experimentsDetails.Delay, experimentsDetails.Timeout, "pre-chaos", experimentsDetails.VMInstanceName, experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { log.Errorf("Failed to get the vm instance status, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to get the vm instance status") + span.RecordError(err) return } @@ -123,6 +136,8 @@ func VMInstanceStop(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareVMStop(ctx, computeService, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -136,6 +151,8 @@ func VMInstanceStop(ctx context.Context, clients clients.ClientSets) { if err := gcp.InstanceStatusCheckByName(computeService, experimentsDetails.ManagedInstanceGroup, experimentsDetails.Delay, experimentsDetails.Timeout, "post-chaos", experimentsDetails.VMInstanceName, experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { log.Errorf("failed to get the vm instance status, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to get the vm instance status") + span.RecordError(err) return } @@ -154,6 +171,8 @@ func VMInstanceStop(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -168,6 +187,8 @@ func VMInstanceStop(ctx context.Context, clients clients.ClientSets) { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/container-kill/experiment/container-kill.go b/experiments/generic/container-kill/experiment/container-kill.go index 05d3f8f80..0d81e11e5 100644 --- a/experiments/generic/container-kill/experiment/container-kill.go +++ b/experiments/generic/container-kill/experiment/container-kill.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // ContainerKill inject the container-kill chaos func ContainerKill(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func ContainerKill(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func ContainerKill(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -80,6 +87,8 @@ func ContainerKill(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -97,6 +106,8 @@ func ContainerKill(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -110,6 +121,8 @@ func ContainerKill(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareContainerKill(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -125,6 +138,8 @@ func ContainerKill(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -141,6 +156,8 @@ func ContainerKill(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -156,6 +173,8 @@ func ContainerKill(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to update the chaosresult") + span.RecordError(err) return } diff --git a/experiments/generic/disk-fill/experiment/disk-fill.go b/experiments/generic/disk-fill/experiment/disk-fill.go index 19546d0e6..b3bb4f226 100644 --- a/experiments/generic/disk-fill/experiment/disk-fill.go +++ b/experiments/generic/disk-fill/experiment/disk-fill.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // DiskFill inject the disk-fill chaos func DiskFill(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func DiskFill(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func DiskFill(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -79,6 +86,8 @@ func DiskFill(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -96,6 +105,8 @@ func DiskFill(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -109,6 +120,8 @@ func DiskFill(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareDiskFill(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -124,6 +137,8 @@ func DiskFill(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -140,6 +155,8 @@ func DiskFill(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -155,6 +172,8 @@ func DiskFill(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result err: %v\n", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/docker-service-kill/experiment/docker-service-kill.go b/experiments/generic/docker-service-kill/experiment/docker-service-kill.go index 06ed1d565..22d5b817f 100644 --- a/experiments/generic/docker-service-kill/experiment/docker-service-kill.go +++ b/experiments/generic/docker-service-kill/experiment/docker-service-kill.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // DockerServiceKill inject the docker-service-kill chaos func DockerServiceKill(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func DockerServiceKill(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func DockerServiceKill(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -77,6 +84,8 @@ func DockerServiceKill(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -86,6 +95,8 @@ func DockerServiceKill(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -97,6 +108,8 @@ func DockerServiceKill(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Target nodes are not in the ready state") + span.RecordError(err) return } } @@ -114,6 +127,8 @@ func DockerServiceKill(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "NUT: Ready, Probes: Successful" @@ -127,6 +142,8 @@ func DockerServiceKill(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareDockerServiceKill(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) log.Errorf("Chaos injection failed, err: %v", err) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -140,6 +157,8 @@ func DockerServiceKill(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -149,6 +168,8 @@ func DockerServiceKill(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -174,6 +195,8 @@ func DockerServiceKill(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "NUT: Ready, Probes: Successful" @@ -189,6 +212,8 @@ func DockerServiceKill(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/kubelet-service-kill/experiment/kubelet-service-kill.go b/experiments/generic/kubelet-service-kill/experiment/kubelet-service-kill.go index c78f065ed..3fb8b2045 100644 --- a/experiments/generic/kubelet-service-kill/experiment/kubelet-service-kill.go +++ b/experiments/generic/kubelet-service-kill/experiment/kubelet-service-kill.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // KubeletServiceKill inject the kubelet-service-kill chaos func KubeletServiceKill(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func KubeletServiceKill(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func KubeletServiceKill(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -77,6 +84,8 @@ func KubeletServiceKill(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -86,6 +95,8 @@ func KubeletServiceKill(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -97,6 +108,8 @@ func KubeletServiceKill(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Target nodes are not in the ready state") + span.RecordError(err) return } } @@ -114,6 +127,8 @@ func KubeletServiceKill(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "NUT: Ready, Probes: Successful" @@ -127,6 +142,8 @@ func KubeletServiceKill(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareKubeletKill(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -140,6 +157,8 @@ func KubeletServiceKill(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -149,6 +168,8 @@ func KubeletServiceKill(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -174,6 +195,8 @@ func KubeletServiceKill(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "NUT: Ready, Probes: Successful" @@ -189,6 +212,8 @@ func KubeletServiceKill(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/node-cpu-hog/experiment/node-cpu-hog.go b/experiments/generic/node-cpu-hog/experiment/node-cpu-hog.go index 5fbd0248a..b265e4268 100644 --- a/experiments/generic/node-cpu-hog/experiment/node-cpu-hog.go +++ b/experiments/generic/node-cpu-hog/experiment/node-cpu-hog.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // NodeCPUHog inject the node-cpu-hog chaos func NodeCPUHog(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func NodeCPUHog(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func NodeCPUHog(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -78,6 +85,8 @@ func NodeCPUHog(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -87,6 +96,8 @@ func NodeCPUHog(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -98,6 +109,8 @@ func NodeCPUHog(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Target nodes are not in the ready state") + span.RecordError(err) return } } @@ -115,6 +128,8 @@ func NodeCPUHog(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "NUT: Ready, Probes: Successful" @@ -128,6 +143,8 @@ func NodeCPUHog(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareNodeCPUHog(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("[Error]: CPU hog failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -141,6 +158,8 @@ func NodeCPUHog(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Infof("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -150,6 +169,8 @@ func NodeCPUHog(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -175,6 +196,8 @@ func NodeCPUHog(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "NUT: Ready, Probes: Successful" @@ -190,6 +213,8 @@ func NodeCPUHog(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/node-drain/experiment/node-drain.go b/experiments/generic/node-drain/experiment/node-drain.go index 9b6533b17..efc8fefa5 100644 --- a/experiments/generic/node-drain/experiment/node-drain.go +++ b/experiments/generic/node-drain/experiment/node-drain.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // NodeDrain inject the node-drain chaos func NodeDrain(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func NodeDrain(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func NodeDrain(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -77,6 +84,8 @@ func NodeDrain(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -86,6 +95,8 @@ func NodeDrain(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -97,6 +108,8 @@ func NodeDrain(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Target nodes are not in the ready state") + span.RecordError(err) return } } @@ -114,6 +127,8 @@ func NodeDrain(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "NUT: Ready, Probes: Successful" @@ -127,6 +142,8 @@ func NodeDrain(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareNodeDrain(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -140,6 +157,8 @@ func NodeDrain(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -149,6 +168,8 @@ func NodeDrain(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -174,6 +195,8 @@ func NodeDrain(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "NUT: Ready, Probes: Successful" @@ -189,6 +212,8 @@ func NodeDrain(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/node-io-stress/experiment/node-io-stress.go b/experiments/generic/node-io-stress/experiment/node-io-stress.go index a58e0f2e5..610001f99 100644 --- a/experiments/generic/node-io-stress/experiment/node-io-stress.go +++ b/experiments/generic/node-io-stress/experiment/node-io-stress.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // NodeIOStress inject the node-io-stress chaos func NodeIOStress(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func NodeIOStress(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func NodeIOStress(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -80,6 +87,8 @@ func NodeIOStress(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -89,6 +98,8 @@ func NodeIOStress(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -100,6 +111,8 @@ func NodeIOStress(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Target nodes are not in the ready state") + span.RecordError(err) return } } @@ -117,6 +130,8 @@ func NodeIOStress(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "NUT: Ready, Probes: Successful" @@ -130,6 +145,8 @@ func NodeIOStress(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareNodeIOStress(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("[Error]: node io stress failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -143,6 +160,8 @@ func NodeIOStress(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Infof("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -152,6 +171,8 @@ func NodeIOStress(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -177,6 +198,8 @@ func NodeIOStress(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "NUT: Ready, Probes: Successful" @@ -192,6 +215,8 @@ func NodeIOStress(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/node-memory-hog/experiment/node-memory-hog.go b/experiments/generic/node-memory-hog/experiment/node-memory-hog.go index cd040c5a0..8de0e0eff 100644 --- a/experiments/generic/node-memory-hog/experiment/node-memory-hog.go +++ b/experiments/generic/node-memory-hog/experiment/node-memory-hog.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // NodeMemoryHog inject the node-memory-hog chaos func NodeMemoryHog(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func NodeMemoryHog(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func NodeMemoryHog(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -79,6 +86,8 @@ func NodeMemoryHog(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -88,6 +97,8 @@ func NodeMemoryHog(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -99,6 +110,8 @@ func NodeMemoryHog(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Target nodes are not in the ready state") + span.RecordError(err) return } } @@ -116,6 +129,8 @@ func NodeMemoryHog(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "NUT: Ready, Probes: Successful" @@ -129,6 +144,8 @@ func NodeMemoryHog(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareNodeMemoryHog(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("[Error]: node memory hog failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -142,6 +159,8 @@ func NodeMemoryHog(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Infof("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -151,6 +170,8 @@ func NodeMemoryHog(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -176,6 +197,8 @@ func NodeMemoryHog(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "NUT: Ready, Probes: Successful" @@ -191,6 +214,8 @@ func NodeMemoryHog(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/node-restart/experiment/node-restart.go b/experiments/generic/node-restart/experiment/node-restart.go index 788836f76..75dc23ad7 100644 --- a/experiments/generic/node-restart/experiment/node-restart.go +++ b/experiments/generic/node-restart/experiment/node-restart.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // NodeRestart inject the node-restart chaos func NodeRestart(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -77,6 +84,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -86,6 +95,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -97,6 +108,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Target nodes are not in the ready state") + span.RecordError(err) return } } @@ -114,6 +127,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "NUT: Ready, Probes: Successful" @@ -127,6 +142,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareNodeRestart(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("[Error]: Node restart failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -140,6 +157,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Infof("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -149,6 +168,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -174,6 +195,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "NUT: Ready, Probes: Successful" @@ -189,6 +212,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } if experimentsDetails.EngineName != "" { diff --git a/experiments/generic/node-taint/experiment/node-taint.go b/experiments/generic/node-taint/experiment/node-taint.go index 9468e1c9d..719d312c7 100644 --- a/experiments/generic/node-taint/experiment/node-taint.go +++ b/experiments/generic/node-taint/experiment/node-taint.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // NodeTaint inject the node-taint chaos func NodeTaint(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func NodeTaint(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func NodeTaint(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -78,6 +85,8 @@ func NodeTaint(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -87,6 +96,8 @@ func NodeTaint(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -98,6 +109,8 @@ func NodeTaint(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Target nodes are not in the ready state") + span.RecordError(err) return } } @@ -115,6 +128,8 @@ func NodeTaint(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "NUT: Ready, Probes: Successful" @@ -128,6 +143,8 @@ func NodeTaint(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareNodeTaint(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -141,6 +158,8 @@ func NodeTaint(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -150,6 +169,8 @@ func NodeTaint(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -175,6 +196,8 @@ func NodeTaint(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "NUT: Ready, Probes: Successful" @@ -190,6 +213,8 @@ func NodeTaint(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-autoscaler/experiment/pod-autoscaler.go b/experiments/generic/pod-autoscaler/experiment/pod-autoscaler.go index b7b78b8c5..1fcd7c6c6 100644 --- a/experiments/generic/pod-autoscaler/experiment/pod-autoscaler.go +++ b/experiments/generic/pod-autoscaler/experiment/pod-autoscaler.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodAutoscaler inject the pod-autoscaler chaos func PodAutoscaler(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func PodAutoscaler(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func PodAutoscaler(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -81,6 +88,8 @@ func PodAutoscaler(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -98,6 +107,8 @@ func PodAutoscaler(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -111,6 +122,8 @@ func PodAutoscaler(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PreparePodAutoscaler(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -127,6 +140,8 @@ func PodAutoscaler(ctx context.Context, clients clients.ClientSets) { events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -143,6 +158,8 @@ func PodAutoscaler(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -158,6 +175,8 @@ func PodAutoscaler(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-cpu-hog-exec/experiment/pod-cpu-hog-exec.go b/experiments/generic/pod-cpu-hog-exec/experiment/pod-cpu-hog-exec.go index 396d74c04..d3fdf45fe 100644 --- a/experiments/generic/pod-cpu-hog-exec/experiment/pod-cpu-hog-exec.go +++ b/experiments/generic/pod-cpu-hog-exec/experiment/pod-cpu-hog-exec.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodCPUHogExec inject the pod-cpu-hog-exec chaos func PodCPUHogExec(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func PodCPUHogExec(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func PodCPUHogExec(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -80,6 +87,8 @@ func PodCPUHogExec(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -97,6 +106,8 @@ func PodCPUHogExec(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -110,6 +121,8 @@ func PodCPUHogExec(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareCPUExecStress(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("[Error]: CPU hog failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "CPU hog failed") + span.RecordError(err) return } @@ -125,6 +138,8 @@ func PodCPUHogExec(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -141,6 +156,8 @@ func PodCPUHogExec(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -156,6 +173,8 @@ func PodCPUHogExec(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-cpu-hog/experiment/pod-cpu-hog.go b/experiments/generic/pod-cpu-hog/experiment/pod-cpu-hog.go index 6ad9c212a..e84623fef 100644 --- a/experiments/generic/pod-cpu-hog/experiment/pod-cpu-hog.go +++ b/experiments/generic/pod-cpu-hog/experiment/pod-cpu-hog.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodCPUHog inject the pod-cpu-hog chaos func PodCPUHog(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func PodCPUHog(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func PodCPUHog(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -80,6 +87,8 @@ func PodCPUHog(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -97,6 +106,8 @@ func PodCPUHog(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -110,6 +121,8 @@ func PodCPUHog(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareAndInjectStressChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("[Error]: CPU hog failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "CPU hog failed") + span.RecordError(err) return } @@ -125,6 +138,8 @@ func PodCPUHog(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -141,6 +156,8 @@ func PodCPUHog(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -156,6 +173,8 @@ func PodCPUHog(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-dns-error/experiment/pod-dns-error.go b/experiments/generic/pod-dns-error/experiment/pod-dns-error.go index a7c739f65..260caa5bd 100644 --- a/experiments/generic/pod-dns-error/experiment/pod-dns-error.go +++ b/experiments/generic/pod-dns-error/experiment/pod-dns-error.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodDNSError contains steps to inject chaos func PodDNSError(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func PodDNSError(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func PodDNSError(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -81,6 +88,8 @@ func PodDNSError(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -98,6 +107,8 @@ func PodDNSError(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -111,6 +122,8 @@ func PodDNSError(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareAndInjectChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -126,6 +139,8 @@ func PodDNSError(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -142,6 +157,8 @@ func PodDNSError(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -157,6 +174,8 @@ func PodDNSError(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-dns-spoof/experiment/pod-dns-spoof.go b/experiments/generic/pod-dns-spoof/experiment/pod-dns-spoof.go index 145376a64..956e9e3c5 100644 --- a/experiments/generic/pod-dns-spoof/experiment/pod-dns-spoof.go +++ b/experiments/generic/pod-dns-spoof/experiment/pod-dns-spoof.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodDNSSpoof contains steps to inject chaos func PodDNSSpoof(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) var err error experimentsDetails := experimentTypes.ExperimentDetails{} @@ -42,6 +45,8 @@ func PodDNSSpoof(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err = types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -51,6 +56,8 @@ func PodDNSSpoof(ctx context.Context, clients clients.ClientSets) { if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -82,6 +89,8 @@ func PodDNSSpoof(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -100,6 +109,8 @@ func PodDNSSpoof(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -113,6 +124,8 @@ func PodDNSSpoof(ctx context.Context, clients clients.ClientSets) { if err = litmusLIB.PrepareAndInjectChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -128,6 +141,8 @@ func PodDNSSpoof(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -144,6 +159,8 @@ func PodDNSSpoof(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -159,6 +176,8 @@ func PodDNSSpoof(ctx context.Context, clients clients.ClientSets) { if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-fio-stress/experiment/pod-fio-stress.go b/experiments/generic/pod-fio-stress/experiment/pod-fio-stress.go index e23a4df08..ff516c3a4 100644 --- a/experiments/generic/pod-fio-stress/experiment/pod-fio-stress.go +++ b/experiments/generic/pod-fio-stress/experiment/pod-fio-stress.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // Experiment contains steps to inject chaos func PodFioStress(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func PodFioStress(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func PodFioStress(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -79,6 +86,8 @@ func PodFioStress(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -96,6 +105,8 @@ func PodFioStress(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -109,6 +120,8 @@ func PodFioStress(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) log.Errorf("Chaos injection failed, err: %v", err) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -124,6 +137,8 @@ func PodFioStress(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -140,6 +155,8 @@ func PodFioStress(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -155,6 +172,8 @@ func PodFioStress(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-http-latency/experiment/pod-http-latency.go b/experiments/generic/pod-http-latency/experiment/pod-http-latency.go index 7f8a9f1cb..79266ba78 100644 --- a/experiments/generic/pod-http-latency/experiment/pod-http-latency.go +++ b/experiments/generic/pod-http-latency/experiment/pod-http-latency.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodHttpLatency inject the pod-http-latency chaos func PodHttpLatency(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func PodHttpLatency(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func PodHttpLatency(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -80,6 +87,8 @@ func PodHttpLatency(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -97,6 +106,8 @@ func PodHttpLatency(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -110,6 +121,8 @@ func PodHttpLatency(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PodHttpLatencyChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -125,6 +138,8 @@ func PodHttpLatency(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -141,6 +156,8 @@ func PodHttpLatency(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -156,6 +173,8 @@ func PodHttpLatency(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-http-modify-body/experiment/pod-http-modify-body.go b/experiments/generic/pod-http-modify-body/experiment/pod-http-modify-body.go index 782d539f4..3b618dbc7 100644 --- a/experiments/generic/pod-http-modify-body/experiment/pod-http-modify-body.go +++ b/experiments/generic/pod-http-modify-body/experiment/pod-http-modify-body.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodHttpModifyBody contains steps to inject chaos func PodHttpModifyBody(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func PodHttpModifyBody(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func PodHttpModifyBody(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -80,6 +87,8 @@ func PodHttpModifyBody(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -97,6 +106,8 @@ func PodHttpModifyBody(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -110,6 +121,8 @@ func PodHttpModifyBody(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PodHttpModifyBodyChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -125,6 +138,8 @@ func PodHttpModifyBody(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -141,6 +156,8 @@ func PodHttpModifyBody(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -156,6 +173,8 @@ func PodHttpModifyBody(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-http-modify-header/experiment/pod-http-modify-header.go b/experiments/generic/pod-http-modify-header/experiment/pod-http-modify-header.go index f05053481..0ef8d88fe 100644 --- a/experiments/generic/pod-http-modify-header/experiment/pod-http-modify-header.go +++ b/experiments/generic/pod-http-modify-header/experiment/pod-http-modify-header.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodHttpModifyHeader inject the pod-http-modify-header chaos func PodHttpModifyHeader(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func PodHttpModifyHeader(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func PodHttpModifyHeader(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -80,6 +87,8 @@ func PodHttpModifyHeader(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -97,6 +106,8 @@ func PodHttpModifyHeader(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -110,6 +121,8 @@ func PodHttpModifyHeader(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PodHttpModifyHeaderChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -125,6 +138,8 @@ func PodHttpModifyHeader(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -141,6 +156,8 @@ func PodHttpModifyHeader(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -156,6 +173,8 @@ func PodHttpModifyHeader(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-http-reset-peer/experiment/pod-http-reset-peer.go b/experiments/generic/pod-http-reset-peer/experiment/pod-http-reset-peer.go index d5df84fd4..3c2478011 100644 --- a/experiments/generic/pod-http-reset-peer/experiment/pod-http-reset-peer.go +++ b/experiments/generic/pod-http-reset-peer/experiment/pod-http-reset-peer.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodHttpResetPeer contains steps to inject chaos func PodHttpResetPeer(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func PodHttpResetPeer(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func PodHttpResetPeer(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -80,6 +87,8 @@ func PodHttpResetPeer(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -97,6 +106,8 @@ func PodHttpResetPeer(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -110,6 +121,8 @@ func PodHttpResetPeer(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PodHttpResetPeerChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -125,6 +138,8 @@ func PodHttpResetPeer(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -141,6 +156,8 @@ func PodHttpResetPeer(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -156,6 +173,8 @@ func PodHttpResetPeer(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-http-status-code/experiment/pod-http-status-code.go b/experiments/generic/pod-http-status-code/experiment/pod-http-status-code.go index 447b3ef13..d30412f10 100644 --- a/experiments/generic/pod-http-status-code/experiment/pod-http-status-code.go +++ b/experiments/generic/pod-http-status-code/experiment/pod-http-status-code.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodHttpStatusCode contains steps to inject chaos func PodHttpStatusCode(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -42,6 +45,8 @@ func PodHttpStatusCode(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -51,6 +56,8 @@ func PodHttpStatusCode(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -88,6 +95,8 @@ func PodHttpStatusCode(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -105,6 +114,8 @@ func PodHttpStatusCode(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -118,6 +129,8 @@ func PodHttpStatusCode(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PodHttpStatusCodeChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -133,6 +146,8 @@ func PodHttpStatusCode(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -149,6 +164,8 @@ func PodHttpStatusCode(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -164,6 +181,8 @@ func PodHttpStatusCode(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-io-stress/experiment/pod-io-stress.go b/experiments/generic/pod-io-stress/experiment/pod-io-stress.go index 95b964754..4922ae06d 100644 --- a/experiments/generic/pod-io-stress/experiment/pod-io-stress.go +++ b/experiments/generic/pod-io-stress/experiment/pod-io-stress.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodIOStress inject the pod-io-stress chaos func PodIOStress(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func PodIOStress(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func PodIOStress(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -80,6 +87,8 @@ func PodIOStress(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -97,6 +106,8 @@ func PodIOStress(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -110,6 +121,8 @@ func PodIOStress(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareAndInjectStressChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("[Error]: Pod IO Stress failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Pod IO Stress failed") + span.RecordError(err) return } @@ -125,6 +138,8 @@ func PodIOStress(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -141,6 +156,8 @@ func PodIOStress(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -156,6 +173,8 @@ func PodIOStress(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-memory-hog-exec/experiment/pod-memory-hog-exec.go b/experiments/generic/pod-memory-hog-exec/experiment/pod-memory-hog-exec.go index 7cad3f0e3..2d2257d2c 100644 --- a/experiments/generic/pod-memory-hog-exec/experiment/pod-memory-hog-exec.go +++ b/experiments/generic/pod-memory-hog-exec/experiment/pod-memory-hog-exec.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodMemoryHogExec inject the pod-memory-hog-exec chaos func PodMemoryHogExec(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func PodMemoryHogExec(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func PodMemoryHogExec(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -80,6 +87,8 @@ func PodMemoryHogExec(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -97,6 +106,8 @@ func PodMemoryHogExec(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -110,6 +121,8 @@ func PodMemoryHogExec(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareMemoryExecStress(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("[Error]: pod memory hog failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "pod memory hog failed") + span.RecordError(err) return } @@ -125,6 +138,8 @@ func PodMemoryHogExec(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -141,6 +156,8 @@ func PodMemoryHogExec(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -156,6 +173,8 @@ func PodMemoryHogExec(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-memory-hog/experiment/pod-memory-hog.go b/experiments/generic/pod-memory-hog/experiment/pod-memory-hog.go index 128e177f4..f3b869a24 100644 --- a/experiments/generic/pod-memory-hog/experiment/pod-memory-hog.go +++ b/experiments/generic/pod-memory-hog/experiment/pod-memory-hog.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodMemoryHog inject the pod-memory-hog chaos func PodMemoryHog(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func PodMemoryHog(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func PodMemoryHog(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -80,6 +87,8 @@ func PodMemoryHog(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -97,6 +106,8 @@ func PodMemoryHog(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -110,6 +121,8 @@ func PodMemoryHog(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareAndInjectStressChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("[Error]: pod memory hog failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "pod memory hog failed") + span.RecordError(err) return } @@ -125,6 +138,8 @@ func PodMemoryHog(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -141,6 +156,8 @@ func PodMemoryHog(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -156,6 +173,8 @@ func PodMemoryHog(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-network-corruption/experiment/pod-network-corruption.go b/experiments/generic/pod-network-corruption/experiment/pod-network-corruption.go index 183b2021b..257a6f910 100644 --- a/experiments/generic/pod-network-corruption/experiment/pod-network-corruption.go +++ b/experiments/generic/pod-network-corruption/experiment/pod-network-corruption.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodNetworkCorruption inject the pod-network-corruption chaos func PodNetworkCorruption(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func PodNetworkCorruption(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func PodNetworkCorruption(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -81,6 +88,8 @@ func PodNetworkCorruption(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -98,6 +107,8 @@ func PodNetworkCorruption(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -111,6 +122,8 @@ func PodNetworkCorruption(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PodNetworkCorruptionChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -126,6 +139,8 @@ func PodNetworkCorruption(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -142,6 +157,8 @@ func PodNetworkCorruption(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -157,6 +174,8 @@ func PodNetworkCorruption(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-network-duplication/experiment/pod-network-duplication.go b/experiments/generic/pod-network-duplication/experiment/pod-network-duplication.go index f1e42c839..09e277f75 100644 --- a/experiments/generic/pod-network-duplication/experiment/pod-network-duplication.go +++ b/experiments/generic/pod-network-duplication/experiment/pod-network-duplication.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodNetworkDuplication inject the pod-network-duplication chaos func PodNetworkDuplication(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func PodNetworkDuplication(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func PodNetworkDuplication(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -81,6 +88,8 @@ func PodNetworkDuplication(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -98,6 +107,8 @@ func PodNetworkDuplication(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -111,6 +122,8 @@ func PodNetworkDuplication(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PodNetworkDuplicationChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -126,6 +139,8 @@ func PodNetworkDuplication(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -142,6 +157,8 @@ func PodNetworkDuplication(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -157,6 +174,8 @@ func PodNetworkDuplication(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-network-latency/experiment/pod-network-latency.go b/experiments/generic/pod-network-latency/experiment/pod-network-latency.go index efa7699a0..09766a5a2 100644 --- a/experiments/generic/pod-network-latency/experiment/pod-network-latency.go +++ b/experiments/generic/pod-network-latency/experiment/pod-network-latency.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodNetworkLatency inject the pod-network-latency chaos func PodNetworkLatency(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func PodNetworkLatency(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func PodNetworkLatency(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -81,6 +88,8 @@ func PodNetworkLatency(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -97,6 +106,8 @@ func PodNetworkLatency(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -111,6 +122,8 @@ func PodNetworkLatency(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PodNetworkLatencyChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -126,6 +139,8 @@ func PodNetworkLatency(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -142,6 +157,8 @@ func PodNetworkLatency(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -157,6 +174,8 @@ func PodNetworkLatency(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-network-loss/experiment/pod-network-loss.go b/experiments/generic/pod-network-loss/experiment/pod-network-loss.go index cfb538156..2c6c3252a 100644 --- a/experiments/generic/pod-network-loss/experiment/pod-network-loss.go +++ b/experiments/generic/pod-network-loss/experiment/pod-network-loss.go @@ -17,10 +17,14 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodNetworkLoss inject the pod-network-loss chaos func PodNetworkLoss(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) + experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} chaosDetails := types.ChaosDetails{} @@ -40,6 +44,8 @@ func PodNetworkLoss(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -49,6 +55,8 @@ func PodNetworkLoss(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -80,6 +88,8 @@ func PodNetworkLoss(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -97,6 +107,8 @@ func PodNetworkLoss(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -110,6 +122,8 @@ func PodNetworkLoss(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PodNetworkLossChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -125,6 +139,8 @@ func PodNetworkLoss(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -141,6 +157,8 @@ func PodNetworkLoss(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -156,6 +174,8 @@ func PodNetworkLoss(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-network-partition/experiment/pod-network-partition.go b/experiments/generic/pod-network-partition/experiment/pod-network-partition.go index 44e73cd67..0d2b223f5 100644 --- a/experiments/generic/pod-network-partition/experiment/pod-network-partition.go +++ b/experiments/generic/pod-network-partition/experiment/pod-network-partition.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodNetworkPartition inject the pod-network-partition chaos func PodNetworkPartition(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func PodNetworkPartition(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func PodNetworkPartition(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -79,6 +86,8 @@ func PodNetworkPartition(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -96,6 +105,8 @@ func PodNetworkPartition(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -109,6 +120,8 @@ func PodNetworkPartition(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareAndInjectChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) log.Errorf("Chaos injection failed, err: %v", err) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -124,6 +137,8 @@ func PodNetworkPartition(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -140,6 +155,8 @@ func PodNetworkPartition(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -155,6 +172,8 @@ func PodNetworkPartition(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/kafka/kafka-broker-pod-failure/experiment/kafka-broker-pod-failure.go b/experiments/kafka/kafka-broker-pod-failure/experiment/kafka-broker-pod-failure.go index ef33545cf..31e3597ea 100644 --- a/experiments/kafka/kafka-broker-pod-failure/experiment/kafka-broker-pod-failure.go +++ b/experiments/kafka/kafka-broker-pod-failure/experiment/kafka-broker-pod-failure.go @@ -19,10 +19,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // KafkaBrokerPodFailure derive and kill the kafka broker leader func KafkaBrokerPodFailure(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -43,6 +46,8 @@ func KafkaBrokerPodFailure(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -52,6 +57,8 @@ func KafkaBrokerPodFailure(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Create the Chaos Resultt") + span.RecordError(err) return } @@ -79,6 +86,8 @@ func KafkaBrokerPodFailure(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Cluster health check failed") + span.RecordError(err) return } } @@ -96,6 +105,8 @@ func KafkaBrokerPodFailure(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -112,6 +123,8 @@ func KafkaBrokerPodFailure(ctx context.Context, clients clients.ClientSets) { if err != nil { log.Errorf("Liveness check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Liveness check failed") + span.RecordError(err) return } log.Info("The Liveness pod gets established") @@ -129,6 +142,8 @@ func KafkaBrokerPodFailure(ctx context.Context, clients clients.ClientSets) { if err := kafkaPodDelete.PreparePodDelete(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -145,6 +160,8 @@ func KafkaBrokerPodFailure(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Cluster health check failed") + span.RecordError(err) return } } @@ -161,6 +178,8 @@ func KafkaBrokerPodFailure(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -178,6 +197,8 @@ func KafkaBrokerPodFailure(ctx context.Context, clients clients.ClientSets) { if err := status.CheckApplicationStatusesByLabels(experimentsDetails.ChaoslibDetail.AppNS, "name=kafka-liveness-"+experimentsDetails.RunID, experimentsDetails.ChaoslibDetail.Timeout, experimentsDetails.ChaoslibDetail.Delay, clients); err != nil { log.Errorf("Application liveness status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application liveness status check failed") + span.RecordError(err) return } @@ -185,6 +206,8 @@ func KafkaBrokerPodFailure(ctx context.Context, clients clients.ClientSets) { if err := kafka.LivenessCleanup(&experimentsDetails, clients); err != nil { log.Errorf("liveness cleanup failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "liveness cleanup failed") + span.RecordError(err) return } } @@ -193,6 +216,8 @@ func KafkaBrokerPodFailure(ctx context.Context, clients clients.ClientSets) { log.Info("[The End]: Updating the chaos result of kafka pod delete experiment (EOT)") if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/kube-aws/ebs-loss-by-id/experiment/ebs-loss-by-id.go b/experiments/kube-aws/ebs-loss-by-id/experiment/ebs-loss-by-id.go index 20d3cc248..280540321 100644 --- a/experiments/kube-aws/ebs-loss-by-id/experiment/ebs-loss-by-id.go +++ b/experiments/kube-aws/ebs-loss-by-id/experiment/ebs-loss-by-id.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // EBSLossByID inject the ebs volume loss chaos func EBSLossByID(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) var err error experimentsDetails := experimentTypes.ExperimentDetails{} @@ -42,6 +45,8 @@ func EBSLossByID(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err = types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -51,6 +56,8 @@ func EBSLossByID(ctx context.Context, clients clients.ClientSets) { if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to create the chaosresult: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -80,6 +87,8 @@ func EBSLossByID(ctx context.Context, clients clients.ClientSets) { if err = aws.EBSStateCheckByID(experimentsDetails.EBSVolumeID, experimentsDetails.Region); err != nil { log.Errorf("Volume status check failed pre chaos: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Volume status check failed pre chaos") + span.RecordError(err) return } } @@ -99,6 +108,8 @@ func EBSLossByID(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PreChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -115,6 +126,8 @@ func EBSLossByID(ctx context.Context, clients clients.ClientSets) { if err = litmusLIB.PrepareEBSLossByID(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -128,6 +141,8 @@ func EBSLossByID(ctx context.Context, clients clients.ClientSets) { if err = aws.EBSStateCheckByID(experimentsDetails.EBSVolumeID, experimentsDetails.Region); err != nil { log.Errorf("Volume status check failed post chaos: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Volume status check failed post chaos") + span.RecordError(err) return } } @@ -146,6 +161,8 @@ func EBSLossByID(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PostChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -162,6 +179,8 @@ func EBSLossByID(ctx context.Context, clients clients.ClientSets) { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to update the chaosresult: %v", err) + span.SetStatus(codes.Error, "Unable to update the chaosresult") + span.RecordError(err) return } diff --git a/experiments/kube-aws/ebs-loss-by-tag/experiment/ebs-loss-by-tag.go b/experiments/kube-aws/ebs-loss-by-tag/experiment/ebs-loss-by-tag.go index 44f201efa..feb0610e3 100644 --- a/experiments/kube-aws/ebs-loss-by-tag/experiment/ebs-loss-by-tag.go +++ b/experiments/kube-aws/ebs-loss-by-tag/experiment/ebs-loss-by-tag.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // EBSLossByTag inject the ebs volume loss chaos func EBSLossByTag(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func EBSLossByTag(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func EBSLossByTag(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to create the chaosresult: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -79,6 +86,8 @@ func EBSLossByTag(ctx context.Context, clients clients.ClientSets) { if err := aws.SetTargetVolumeIDs(&experimentsDetails); err != nil { log.Errorf("Failed to set the volumes under chaos: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to set the volumes under chaos") + span.RecordError(err) return } @@ -97,6 +106,8 @@ func EBSLossByTag(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PreChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -113,6 +124,8 @@ func EBSLossByTag(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareEBSLossByTag(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -126,6 +139,8 @@ func EBSLossByTag(ctx context.Context, clients clients.ClientSets) { if err := aws.PostChaosVolumeStatusCheck(&experimentsDetails); err != nil { log.Errorf("Failed to verify that the EBS volume is attached to an instance: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to verify that the EBS volume is attached to an instance") + span.RecordError(err) return } } @@ -144,6 +159,8 @@ func EBSLossByTag(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PostChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -160,6 +177,8 @@ func EBSLossByTag(ctx context.Context, clients clients.ClientSets) { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to update the chaosresult: %v", err) + span.SetStatus(codes.Error, "Unable to update the chaosresult") + span.RecordError(err) return } diff --git a/experiments/kube-aws/ec2-terminate-by-id/experiment/ec2-terminate-by-id.go b/experiments/kube-aws/ec2-terminate-by-id/experiment/ec2-terminate-by-id.go index 95010a803..c1d587d99 100644 --- a/experiments/kube-aws/ec2-terminate-by-id/experiment/ec2-terminate-by-id.go +++ b/experiments/kube-aws/ec2-terminate-by-id/experiment/ec2-terminate-by-id.go @@ -18,10 +18,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // EC2TerminateByID inject the ebs volume loss chaos func EC2TerminateByID(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) var ( err error @@ -47,6 +50,8 @@ func EC2TerminateByID(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err = types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -56,6 +61,8 @@ func EC2TerminateByID(ctx context.Context, clients clients.ClientSets) { if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to create the chaosresult: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -95,6 +102,8 @@ func EC2TerminateByID(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PreChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -112,6 +121,8 @@ func EC2TerminateByID(ctx context.Context, clients clients.ClientSets) { if err = aws.InstanceStatusCheckByID(experimentsDetails.Ec2InstanceID, experimentsDetails.Region); err != nil { log.Errorf("EC2 instance status check failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "EC2 instance status check failed") + span.RecordError(err) return } log.Info("[Status]: EC2 instance is in running state") @@ -124,6 +135,8 @@ func EC2TerminateByID(ctx context.Context, clients clients.ClientSets) { if err != nil { log.Errorf("Pre chaos node status check failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Pre chaos node status check failed") + span.RecordError(err) return } } @@ -133,6 +146,8 @@ func EC2TerminateByID(ctx context.Context, clients clients.ClientSets) { if err = litmusLIB.PrepareEC2TerminateByID(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -147,6 +162,8 @@ func EC2TerminateByID(ctx context.Context, clients clients.ClientSets) { if err = aws.InstanceStatusCheckByID(experimentsDetails.Ec2InstanceID, experimentsDetails.Region); err != nil { log.Errorf("EC2 instance status check failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "EC2 instance status check failed") + span.RecordError(err) return } log.Info("[Status]: EC2 instance is in running state (post chaos)") @@ -158,6 +175,8 @@ func EC2TerminateByID(ctx context.Context, clients clients.ClientSets) { if err := aws.PostChaosNodeCountCheck(activeNodeCount, autoScalingGroupName, experimentsDetails.Region); err != nil { log.Errorf("Post chaos active node count check failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Post chaos active node count check failed") + span.RecordError(err) return } } @@ -176,6 +195,8 @@ func EC2TerminateByID(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PostChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -192,6 +213,8 @@ func EC2TerminateByID(ctx context.Context, clients clients.ClientSets) { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to update the chaosresult: %v", err) + span.SetStatus(codes.Error, "Unable to update the chaosresult") + span.RecordError(err) return } diff --git a/experiments/kube-aws/ec2-terminate-by-tag/experiment/ec2-terminate-tag.go b/experiments/kube-aws/ec2-terminate-by-tag/experiment/ec2-terminate-tag.go index 5d4a0ee8e..10c0ab535 100644 --- a/experiments/kube-aws/ec2-terminate-by-tag/experiment/ec2-terminate-tag.go +++ b/experiments/kube-aws/ec2-terminate-by-tag/experiment/ec2-terminate-tag.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // EC2TerminateByTag inject the ebs volume loss chaos func EC2TerminateByTag(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) var ( err error @@ -46,6 +49,8 @@ func EC2TerminateByTag(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err = types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -55,6 +60,8 @@ func EC2TerminateByTag(ctx context.Context, clients clients.ClientSets) { if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to create the chaosresult: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -95,6 +102,8 @@ func EC2TerminateByTag(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PreChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -110,6 +119,8 @@ func EC2TerminateByTag(ctx context.Context, clients clients.ClientSets) { if err = litmusLIB.SetTargetInstance(&experimentsDetails); err != nil { log.Errorf("Failed to get the target ec2 instance: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to get the target ec2 instance") + span.RecordError(err) return } @@ -120,6 +131,8 @@ func EC2TerminateByTag(ctx context.Context, clients clients.ClientSets) { if err != nil { log.Errorf("Pre chaos node status check failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Pre chaos node status check failed") + span.RecordError(err) return } } @@ -129,6 +142,8 @@ func EC2TerminateByTag(ctx context.Context, clients clients.ClientSets) { if err = litmusLIB.PrepareEC2TerminateByTag(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -143,6 +158,8 @@ func EC2TerminateByTag(ctx context.Context, clients clients.ClientSets) { if err = aws.InstanceStatusCheck(experimentsDetails.TargetInstanceIDList, experimentsDetails.Region); err != nil { log.Errorf("Failed to get the ec2 instance status as running post chaos: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to get the ec2 instance status as running post chaos") + span.RecordError(err) return } log.Info("[Status]: EC2 instance is in running state (post chaos)") @@ -154,6 +171,8 @@ func EC2TerminateByTag(ctx context.Context, clients clients.ClientSets) { if err = aws.PostChaosNodeCountCheck(activeNodeCount, autoScalingGroupName, experimentsDetails.Region); err != nil { log.Errorf("Post chaos active node count check failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Post chaos active node count check failed") + span.RecordError(err) return } } @@ -171,6 +190,8 @@ func EC2TerminateByTag(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PostChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -187,6 +208,8 @@ func EC2TerminateByTag(ctx context.Context, clients clients.ClientSets) { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to update the chaosresult: %v", err) + span.SetStatus(codes.Error, "Unable to update the chaosresult") + span.RecordError(err) return } diff --git a/experiments/load/k6-loadgen/experiment/k6-loadgen.go b/experiments/load/k6-loadgen/experiment/k6-loadgen.go index 0a62d3949..88e1d4894 100644 --- a/experiments/load/k6-loadgen/experiment/k6-loadgen.go +++ b/experiments/load/k6-loadgen/experiment/k6-loadgen.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // Experiment contains steps to inject chaos func Experiment(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -79,6 +86,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -96,6 +105,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -108,6 +119,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } log.Infof("[Confirmation]: %v chaos has been injected successfully", experimentsDetails.ExperimentName) @@ -122,6 +135,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -138,6 +153,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -153,6 +170,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to update the chaosresult") + span.RecordError(err) return } diff --git a/experiments/spring-boot/spring-boot-faults/experiment/spring-boot-faults.go b/experiments/spring-boot/spring-boot-faults/experiment/spring-boot-faults.go index 45d27ef62..699c4da0b 100644 --- a/experiments/spring-boot/spring-boot-faults/experiment/spring-boot-faults.go +++ b/experiments/spring-boot/spring-boot-faults/experiment/spring-boot-faults.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // Experiment contains steps to inject chaos func Experiment(ctx context.Context, clients clients.ClientSets, expName string) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets, expName string) // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets, expName string) if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the Chaos Result") + span.RecordError(err) return } @@ -78,6 +85,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets, expName string) types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "Pods: Not Found", "Warning", &chaosDetails) _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to get target pod list") + span.RecordError(err) return } podNames := make([]string, 0, 1) @@ -93,6 +102,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets, expName string) types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "ChaosMonkey: Not Found", "Warning", &chaosDetails) _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Some target pods don't have the chaos monkey endpoint") + span.RecordError(err) return } @@ -104,6 +115,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets, expName string) types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -120,6 +133,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets, expName string) types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -134,6 +149,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets, expName string) if err := litmusLIB.PrepareChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -150,6 +167,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets, expName string) types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -166,6 +185,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets, expName string) types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -180,6 +201,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets, expName string) log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/vmware/vm-poweroff/experiment/vm-poweroff.go b/experiments/vmware/vm-poweroff/experiment/vm-poweroff.go index 551d717d0..010f71049 100644 --- a/experiments/vmware/vm-poweroff/experiment/vm-poweroff.go +++ b/experiments/vmware/vm-poweroff/experiment/vm-poweroff.go @@ -16,6 +16,8 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" experimentEnv "github.com/litmuschaos/litmus-go/pkg/vmware/vm-poweroff/environment" experimentTypes "github.com/litmuschaos/litmus-go/pkg/vmware/vm-poweroff/types" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" "github.com/sirupsen/logrus" ) @@ -24,6 +26,7 @@ var err error // VMPoweroff contains steps to inject vm-power-off chaos func VMPoweroff(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -44,6 +47,8 @@ func VMPoweroff(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -53,6 +58,8 @@ func VMPoweroff(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to create the chaosresult: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -72,6 +79,8 @@ func VMPoweroff(ctx context.Context, clients clients.ClientSets) { if err != nil { result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) log.Errorf("Unable to get the VM ID, err: %v", err) + span.SetStatus(codes.Error, "Unable to get the VM ID") + span.RecordError(err) return } } @@ -91,6 +100,8 @@ func VMPoweroff(ctx context.Context, clients clients.ClientSets) { if err != nil { result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) log.Errorf("Vcenter Login failed: %v", err) + span.SetStatus(codes.Error, "Vcenter Login failed") + span.RecordError(err) return } @@ -99,6 +110,8 @@ func VMPoweroff(ctx context.Context, clients clients.ClientSets) { if err := vmware.VMStatusCheck(experimentsDetails.VcenterServer, experimentsDetails.VMIds, cookie); err != nil { log.Errorf("VM status check failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "VM status check failed") + span.RecordError(err) return } log.Info("[Verification]: VMs are in running state (pre-chaos)") @@ -119,6 +132,8 @@ func VMPoweroff(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PreChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "IUT: Running, Probes: Successful" @@ -135,6 +150,8 @@ func VMPoweroff(ctx context.Context, clients clients.ClientSets) { if err = litmusLIB.InjectVMPowerOffChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails, cookie); err != nil { log.Errorf("Chaos injection failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -149,6 +166,8 @@ func VMPoweroff(ctx context.Context, clients clients.ClientSets) { if err := vmware.VMStatusCheck(experimentsDetails.VcenterServer, experimentsDetails.VMIds, cookie); err != nil { log.Errorf("VM status check failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "VM status check failed") + span.RecordError(err) return } log.Info("[Verification]: VMs are in running state (post-chaos)") @@ -168,6 +187,8 @@ func VMPoweroff(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PostChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "IUT: Running, Probes: Successful" @@ -184,6 +205,8 @@ func VMPoweroff(ctx context.Context, clients clients.ClientSets) { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to update the chaosresult: %v", err) + span.SetStatus(codes.Error, "Unable to update the chaosresult") + span.RecordError(err) return } From 498f5845998f1544b955b2cc8e13003970e52f91 Mon Sep 17 00:00:00 2001 From: Jaeyeon Park Date: Thu, 26 Dec 2024 16:52:02 +0900 Subject: [PATCH 3/4] feat: ditributed tracing span error Signed-off-by: Jaeyeon Park --- .../litmus/aws-ssm-chaos/lib/ssm-chaos.go | 17 +++++++ .../azure-disk-loss/lib/azure-disk-loss.go | 37 ++++++++++++++- .../lib/azure-instance-stop.go | 38 +++++++++++++++- .../container-kill/lib/container-kill.go | 45 +++++++++++++++++-- 4 files changed, 130 insertions(+), 7 deletions(-) diff --git a/chaoslib/litmus/aws-ssm-chaos/lib/ssm-chaos.go b/chaoslib/litmus/aws-ssm-chaos/lib/ssm-chaos.go index 0a54489fd..c855954df 100644 --- a/chaoslib/litmus/aws-ssm-chaos/lib/ssm-chaos.go +++ b/chaoslib/litmus/aws-ssm-chaos/lib/ssm-chaos.go @@ -17,6 +17,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" ) // InjectChaosInSerialMode will inject the aws ssm chaos in serial mode that is one after other @@ -51,6 +52,8 @@ func InjectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment ec2IDList := strings.Fields(ec2ID) commandId, err := ssm.SendSSMCommand(experimentsDetails, ec2IDList) if err != nil { + span.SetStatus(codes.Error, "failed to send ssm command") + span.RecordError(err) return stacktrace.Propagate(err, "failed to send ssm command") } //prepare commands for abort recovery @@ -59,6 +62,8 @@ func InjectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //wait for the ssm command to get in running state log.Info("[Wait]: Waiting for the ssm command to get in InProgress state") if err := ssm.WaitForCommandStatus("InProgress", commandId, ec2ID, experimentsDetails.Region, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, experimentsDetails.Delay); err != nil { + span.SetStatus(codes.Error, "failed to start ssm command") + span.RecordError(err) return stacktrace.Propagate(err, "failed to start ssm command") } common.SetTargets(ec2ID, "injected", "EC2", chaosDetails) @@ -66,6 +71,8 @@ func InjectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 && i == 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return stacktrace.Propagate(err, "failed to run probes") } } @@ -73,6 +80,8 @@ func InjectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //wait for the ssm command to get succeeded in the given chaos duration log.Info("[Wait]: Waiting for the ssm command to get completed") if err := ssm.WaitForCommandStatus("Success", commandId, ec2ID, experimentsDetails.Region, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, experimentsDetails.Delay); err != nil { + span.SetStatus(codes.Error, "failed to send ssm command") + span.RecordError(err) return stacktrace.Propagate(err, "failed to send ssm command") } common.SetTargets(ec2ID, "reverted", "EC2", chaosDetails) @@ -117,6 +126,8 @@ func InjectChaosInParallelMode(ctx context.Context, experimentsDetails *experime log.Info("[Chaos]: Starting the ssm command") commandId, err := ssm.SendSSMCommand(experimentsDetails, instanceIDList) if err != nil { + span.SetStatus(codes.Error, "failed to send ssm command") + span.RecordError(err) return stacktrace.Propagate(err, "failed to send ssm command") } //prepare commands for abort recovery @@ -126,6 +137,8 @@ func InjectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //wait for the ssm command to get in running state log.Info("[Wait]: Waiting for the ssm command to get in InProgress state") if err := ssm.WaitForCommandStatus("InProgress", commandId, ec2ID, experimentsDetails.Region, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, experimentsDetails.Delay); err != nil { + span.SetStatus(codes.Error, "failed to start ssm command") + span.RecordError(err) return stacktrace.Propagate(err, "failed to start ssm command") } } @@ -133,6 +146,8 @@ func InjectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return stacktrace.Propagate(err, "failed to run probes") } } @@ -141,6 +156,8 @@ func InjectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //wait for the ssm command to get succeeded in the given chaos duration log.Info("[Wait]: Waiting for the ssm command to get completed") if err := ssm.WaitForCommandStatus("Success", commandId, ec2ID, experimentsDetails.Region, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, experimentsDetails.Delay); err != nil { + span.SetStatus(codes.Error, "failed to send ssm command") + span.RecordError(err) return stacktrace.Propagate(err, "failed to send ssm command") } } diff --git a/chaoslib/litmus/azure-disk-loss/lib/azure-disk-loss.go b/chaoslib/litmus/azure-disk-loss/lib/azure-disk-loss.go index 210377809..a22aee935 100644 --- a/chaoslib/litmus/azure-disk-loss/lib/azure-disk-loss.go +++ b/chaoslib/litmus/azure-disk-loss/lib/azure-disk-loss.go @@ -24,6 +24,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/retry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" ) var ( @@ -55,11 +56,16 @@ func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.Exper //get the disk name or list of disk names diskNameList := strings.Split(experimentsDetails.VirtualDiskNames, ",") if experimentsDetails.VirtualDiskNames == "" || len(diskNameList) == 0 { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no volume names found to detach"} + span.SetStatus(codes.Error, "no volume names found to detach") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no volume names found to detach"} + span.RecordError(err) + return err } instanceNamesWithDiskNames, err := diskStatus.GetInstanceNameForDisks(diskNameList, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup) if err != nil { + span.SetStatus(codes.Error, "failed to get instance names for disks") + span.RecordError(err) return stacktrace.Propagate(err, "error fetching attached instances for disks") } @@ -69,6 +75,8 @@ func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.Exper for instanceName := range instanceNamesWithDiskNames { attachedDisksWithInstance[instanceName], err = diskStatus.GetInstanceDiskList(experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, experimentsDetails.ScaleSet, instanceName) if err != nil { + span.SetStatus(codes.Error, "failed to get attached disks") + span.RecordError(err) return stacktrace.Propagate(err, "error fetching virtual disks") } } @@ -85,14 +93,21 @@ func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.Exper switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, experimentsDetails, instanceNamesWithDiskNames, attachedDisksWithInstance, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "failed to run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, experimentsDetails, instanceNamesWithDiskNames, attachedDisksWithInstance, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "failed to run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } //Waiting for the ramp time after chaos injection @@ -125,6 +140,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime log.Info("[Chaos]: Detaching the virtual disks from the instances") for instanceName, diskNameList := range instanceNamesWithDiskNames { if err = diskStatus.DetachDisks(experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, instanceName, experimentsDetails.ScaleSet, diskNameList); err != nil { + span.SetStatus(codes.Error, "failed to detach disks") + span.RecordError(err) return stacktrace.Propagate(err, "failed to detach disks") } } @@ -133,6 +150,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime for _, diskName := range diskNameList { log.Infof("[Wait]: Waiting for Disk '%v' to detach", diskName) if err := diskStatus.WaitForDiskToDetach(experimentsDetails, diskName); err != nil { + span.SetStatus(codes.Error, "failed to detach disks") + span.RecordError(err) return stacktrace.Propagate(err, "disk detachment check failed") } } @@ -147,6 +166,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return stacktrace.Propagate(err, "failed to run probes") } } @@ -159,6 +180,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime log.Info("[Chaos]: Attaching the Virtual disks back to the instances") for instanceName, diskNameList := range attachedDisksWithInstance { if err = diskStatus.AttachDisk(experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, instanceName, experimentsDetails.ScaleSet, diskNameList); err != nil { + span.SetStatus(codes.Error, "virtual disk attachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "virtual disk attachment failed") } @@ -167,6 +190,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime for _, diskName := range diskNameList { log.Infof("[Wait]: Waiting for Disk '%v' to attach", diskName) if err := diskStatus.WaitForDiskToAttach(experimentsDetails, diskName); err != nil { + span.SetStatus(codes.Error, "failed to attach disks") + span.RecordError(err) return stacktrace.Propagate(err, "disk attachment check failed") } } @@ -209,12 +234,16 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // Detaching the virtual disks log.Infof("[Chaos]: Detaching %v from the instance", diskName) if err = diskStatus.DetachDisks(experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, instanceName, experimentsDetails.ScaleSet, diskNameToList); err != nil { + span.SetStatus(codes.Error, "failed to detach disks") + span.RecordError(err) return stacktrace.Propagate(err, "failed to detach disks") } // Waiting for disk to be detached log.Infof("[Wait]: Waiting for Disk '%v' to detach", diskName) if err := diskStatus.WaitForDiskToDetach(experimentsDetails, diskName); err != nil { + span.SetStatus(codes.Error, "failed to detach disks") + span.RecordError(err) return stacktrace.Propagate(err, "disk detachment check failed") } @@ -235,12 +264,16 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //Attaching the virtual disks to the instance log.Infof("[Chaos]: Attaching %v back to the instance", diskName) if err = diskStatus.AttachDisk(experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, instanceName, experimentsDetails.ScaleSet, attachedDisksWithInstance[instanceName]); err != nil { + span.SetStatus(codes.Error, "disk attachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "disk attachment failed") } // Waiting for disk to be attached log.Infof("[Wait]: Waiting for Disk '%v' to attach", diskName) if err := diskStatus.WaitForDiskToAttach(experimentsDetails, diskName); err != nil { + span.SetStatus(codes.Error, "failed to attach disks") + span.RecordError(err) return stacktrace.Propagate(err, "disk attachment check failed") } diff --git a/chaoslib/litmus/azure-instance-stop/lib/azure-instance-stop.go b/chaoslib/litmus/azure-instance-stop/lib/azure-instance-stop.go index eefd1c54a..3bcc75121 100644 --- a/chaoslib/litmus/azure-instance-stop/lib/azure-instance-stop.go +++ b/chaoslib/litmus/azure-instance-stop/lib/azure-instance-stop.go @@ -22,6 +22,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" ) var ( @@ -61,14 +62,21 @@ func PrepareAzureStop(ctx context.Context, experimentsDetails *experimentTypes.E switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, experimentsDetails, instanceNameList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "failed to run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, experimentsDetails, instanceNameList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "failed to run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } // Waiting for the ramp time after chaos injection @@ -110,10 +118,14 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment log.Infof("[Chaos]: Stopping the Azure instance: %v", vmName) if experimentsDetails.ScaleSet == "enable" { if err := azureStatus.AzureScaleSetInstanceStop(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { + span.SetStatus(codes.Error, "failed to stop the Azure instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to stop the Azure instance") } } else { if err := azureStatus.AzureInstanceStop(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { + span.SetStatus(codes.Error, "failed to stop the Azure instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to stop the Azure instance") } } @@ -121,6 +133,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // Wait for Azure instance to completely stop log.Infof("[Wait]: Waiting for Azure instance '%v' to get in the stopped state", vmName) if err := azureStatus.WaitForAzureComputeDown(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { + span.SetStatus(codes.Error, "failed to check instance poweroff status") + span.RecordError(err) return stacktrace.Propagate(err, "instance poweroff status check failed") } @@ -128,6 +142,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // the OnChaos probes execution will start in the first iteration and keep running for the entire chaos duration if len(resultDetails.ProbeDetails) != 0 && i == 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return stacktrace.Propagate(err, "failed to run probes") } } @@ -140,10 +156,14 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment log.Info("[Chaos]: Starting back the Azure instance") if experimentsDetails.ScaleSet == "enable" { if err := azureStatus.AzureScaleSetInstanceStart(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { + span.SetStatus(codes.Error, "failed to start the Azure instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to start the Azure instance") } } else { if err := azureStatus.AzureInstanceStart(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { + span.SetStatus(codes.Error, "failed to start the Azure instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to start the Azure instance") } } @@ -151,6 +171,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // Wait for Azure instance to get in running state log.Infof("[Wait]: Waiting for Azure instance '%v' to get in the running state", vmName) if err := azureStatus.WaitForAzureComputeUp(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { + span.SetStatus(codes.Error, "failed to check instance power on status") + span.RecordError(err) return stacktrace.Propagate(err, "instance power on status check failed") } } @@ -190,10 +212,14 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime log.Infof("[Chaos]: Stopping the Azure instance: %v", vmName) if experimentsDetails.ScaleSet == "enable" { if err := azureStatus.AzureScaleSetInstanceStop(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { + span.SetStatus(codes.Error, "failed to stop the Azure instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to stop Azure instance") } } else { if err := azureStatus.AzureInstanceStop(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { + span.SetStatus(codes.Error, "failed to stop the Azure instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to stop Azure instance") } } @@ -203,6 +229,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime for _, vmName := range instanceNameList { log.Infof("[Wait]: Waiting for Azure instance '%v' to get in the stopped state", vmName) if err := azureStatus.WaitForAzureComputeDown(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { + span.SetStatus(codes.Error, "failed to check instance poweroff status") + span.RecordError(err) return stacktrace.Propagate(err, "instance poweroff status check failed") } } @@ -210,6 +238,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // Run probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return stacktrace.Propagate(err, "failed to run probes") } } @@ -223,10 +253,14 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime log.Infof("[Chaos]: Starting back the Azure instance: %v", vmName) if experimentsDetails.ScaleSet == "enable" { if err := azureStatus.AzureScaleSetInstanceStart(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { + span.SetStatus(codes.Error, "failed to start the Azure instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to start the Azure instance") } } else { if err := azureStatus.AzureInstanceStart(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { + span.SetStatus(codes.Error, "failed to start the Azure instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to start the Azure instance") } } @@ -236,6 +270,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime for _, vmName := range instanceNameList { log.Infof("[Wait]: Waiting for Azure instance '%v' to get in the running state", vmName) if err := azureStatus.WaitForAzureComputeUp(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { + span.SetStatus(codes.Error, "failed to check instance power on status") + span.RecordError(err) return stacktrace.Propagate(err, "instance power on status check failed") } } diff --git a/chaoslib/litmus/container-kill/lib/container-kill.go b/chaoslib/litmus/container-kill/lib/container-kill.go index 95d81bf96..a1ca06e81 100644 --- a/chaoslib/litmus/container-kill/lib/container-kill.go +++ b/chaoslib/litmus/container-kill/lib/container-kill.go @@ -11,6 +11,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "github.com/litmuschaos/litmus-go/pkg/clients" experimentTypes "github.com/litmuschaos/litmus-go/pkg/generic/container-kill/types" @@ -46,6 +47,8 @@ func PrepareContainerKill(ctx context.Context, experimentsDetails *experimentTyp targetPodList, err := common.GetTargetPods(experimentsDetails.NodeLabel, experimentsDetails.TargetPods, experimentsDetails.PodsAffectedPerc, clients, chaosDetails) if err != nil { + span.SetStatus(codes.Error, "Unable to get the target pods") + span.RecordError(err) return stacktrace.Propagate(err, "could not get target pods") } @@ -59,12 +62,16 @@ func PrepareContainerKill(ctx context.Context, experimentsDetails *experimentTyp if experimentsDetails.ChaosServiceAccount == "" { experimentsDetails.ChaosServiceAccount, err = common.GetServiceAccount(experimentsDetails.ChaosNamespace, experimentsDetails.ChaosPodName, clients) if err != nil { + span.SetStatus(codes.Error, "Unable to get the experiment service account") + span.RecordError(err) return stacktrace.Propagate(err, "could not experiment service account") } } if experimentsDetails.EngineName != "" { if err := common.SetHelperData(chaosDetails, experimentsDetails.SetHelperData, clients); err != nil { + span.SetStatus(codes.Error, "Unable to set helper data") + span.RecordError(err) return stacktrace.Propagate(err, "could not set helper data") } } @@ -73,14 +80,21 @@ func PrepareContainerKill(ctx context.Context, experimentsDetails *experimentTyp switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, experimentsDetails, targetPodList, clients, chaosDetails, resultDetails, eventsDetails); err != nil { + span.SetStatus(codes.Error, "Unable to run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, experimentsDetails, targetPodList, clients, chaosDetails, resultDetails, eventsDetails); err != nil { + span.SetStatus(codes.Error, "Unable to run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "Sequence not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } //Waiting for the ramp time after chaos injection @@ -98,6 +112,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -113,6 +129,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment runID := stringutils.GetRunID() if err := createHelperPod(ctx, experimentsDetails, clients, chaosDetails, fmt.Sprintf("%s:%s:%s", pod.Name, pod.Namespace, experimentsDetails.TargetContainer), pod.Spec.NodeName, runID); err != nil { + span.SetStatus(codes.Error, "failed to create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } @@ -122,6 +140,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment log.Info("[Status]: Checking the status of the helper pods") if err := status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "failed to check helper status") + span.RecordError(err) return stacktrace.Propagate(err, "could not check helper status") } @@ -131,12 +151,17 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, appLabel, clients, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, common.GetContainerNames(chaosDetails)...) if err != nil || podStatus == "Failed" { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) - return common.HelperFailedError(err, appLabel, experimentsDetails.ChaosNamespace, true) + err := common.HelperFailedError(err, appLabel, experimentsDetails.ChaosNamespace, true) + span.SetStatus(codes.Error, "failed to wait for completion of helper pod") + span.RecordError(err) + return err } //Deleting all the helper pod for container-kill chaos log.Info("[Cleanup]: Deleting all the helper pods") if err = common.DeleteAllPod(appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "failed to delete helper pod(s)") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod(s)") } } @@ -164,6 +189,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime } if err := createHelperPod(ctx, experimentsDetails, clients, chaosDetails, strings.Join(targetsPerNode, ";"), node, runID); err != nil { + span.SetStatus(codes.Error, "failed to create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } } @@ -174,6 +201,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime log.Info("[Status]: Checking the status of the helper pods") if err := status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "failed to check helper status") + span.RecordError(err) return stacktrace.Propagate(err, "could not check helper status") } @@ -183,12 +212,17 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, appLabel, clients, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, common.GetContainerNames(chaosDetails)...) if err != nil || podStatus == "Failed" { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) - return common.HelperFailedError(err, appLabel, experimentsDetails.ChaosNamespace, true) + err := common.HelperFailedError(err, appLabel, experimentsDetails.ChaosNamespace, true) + span.SetStatus(codes.Error, "failed to wait for completion of helper pod") + span.RecordError(err) + return err } //Deleting all the helper pod for container-kill chaos log.Info("[Cleanup]: Deleting all the helper pods") if err = common.DeleteAllPod(appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "failed to delete helper pod(s)") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod(s)") } @@ -264,7 +298,10 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.SetStatus(codes.Error, "failed to create helper pod") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.RecordError(err) + return err } return nil } From e0d751f30e8c6a15981417886b6aa5e5770fe9cb Mon Sep 17 00:00:00 2001 From: Jaeyeon Park Date: Thu, 2 Jan 2025 23:00:08 +0900 Subject: [PATCH 4/4] feat: ditributed tracing span error Signed-off-by: Jaeyeon Park --- .../lib/ssm/aws-ssm-chaos-by-id.go | 19 +++++++- .../lib/ssm/aws-ssm-chaos-by-tag.go | 19 +++++++- .../azure-disk-loss/lib/azure-disk-loss.go | 18 ++++---- .../lib/azure-instance-stop.go | 28 ++++++------ .../container-kill/lib/container-kill.go | 2 + chaoslib/litmus/disk-fill/lib/disk-fill.go | 45 +++++++++++++++++-- .../lib/docker-service-kill.go | 22 ++++++++- .../lib/ebs-loss-by-id/lib/ebs-loss-by-id.go | 10 ++++- .../ebs-loss-by-tag/lib/ebs-loss-by-tag.go | 10 ++++- chaoslib/litmus/ebs-loss/lib/ebs-loss.go | 29 ++++++++++++ .../lib/ec2-terminate-by-id.go | 35 ++++++++++++++- .../lib/ec2-terminate-by-tag.go | 30 ++++++++++++- .../lib/gcp-vm-disk-loss-by-label.go | 34 +++++++++++++- .../gcp-vm-disk-loss/lib/gcp-vm-disk-loss.go | 36 ++++++++++++++- .../lib/gcp-vm-instance-stop-by-label.go | 34 +++++++++++++- .../lib/gcp-vm-instance-stop.go | 34 +++++++++++++- chaoslib/litmus/http-chaos/lib/http-chaos.go | 41 ++++++++++++++++- 17 files changed, 405 insertions(+), 41 deletions(-) diff --git a/chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-id.go b/chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-id.go index e4bb5a50b..0be649c13 100644 --- a/chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-id.go +++ b/chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-id.go @@ -19,6 +19,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" ) var ( @@ -49,6 +50,8 @@ func PrepareAWSSSMChaosByID(ctx context.Context, experimentsDetails *experimentT //create and upload the ssm document on the given aws service monitoring docs if err = ssm.CreateAndUploadDocument(experimentsDetails.DocumentName, experimentsDetails.DocumentType, experimentsDetails.DocumentFormat, experimentsDetails.DocumentPath, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "could not create and upload the ssm document") + span.RecordError(err) return stacktrace.Propagate(err, "could not create and upload the ssm document") } experimentsDetails.IsDocsUploaded = true @@ -60,25 +63,37 @@ func PrepareAWSSSMChaosByID(ctx context.Context, experimentsDetails *experimentT //get the instance id or list of instance ids instanceIDList := strings.Split(experimentsDetails.EC2InstanceID, ",") if experimentsDetails.EC2InstanceID == "" || len(instanceIDList) == 0 { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no instance id found for chaos injection"} + span.SetStatus(codes.Error, "no instance id found for chaos injection") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no instance id found for chaos injection"} + span.RecordError(err) + return err } switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = lib.InjectChaosInSerialMode(ctx, experimentsDetails, instanceIDList, clients, resultDetails, eventsDetails, chaosDetails, inject); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = lib.InjectChaosInParallelMode(ctx, experimentsDetails, instanceIDList, clients, resultDetails, eventsDetails, chaosDetails, inject); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } //Delete the ssm document on the given aws service monitoring docs err = ssm.SSMDeleteDocument(experimentsDetails.DocumentName, experimentsDetails.Region) if err != nil { + span.SetStatus(codes.Error, "failed to delete ssm doc") + span.RecordError(err) return stacktrace.Propagate(err, "failed to delete ssm doc") } diff --git a/chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-tag.go b/chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-tag.go index c7e872c7b..d0baf474e 100644 --- a/chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-tag.go +++ b/chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-tag.go @@ -19,6 +19,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" ) // PrepareAWSSSMChaosByTag contains the prepration and injection steps for the experiment @@ -44,6 +45,8 @@ func PrepareAWSSSMChaosByTag(ctx context.Context, experimentsDetails *experiment //create and upload the ssm document on the given aws service monitoring docs if err = ssm.CreateAndUploadDocument(experimentsDetails.DocumentName, experimentsDetails.DocumentType, experimentsDetails.DocumentFormat, experimentsDetails.DocumentPath, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "could not create and upload the ssm document") + span.RecordError(err) return stacktrace.Propagate(err, "could not create and upload the ssm document") } experimentsDetails.IsDocsUploaded = true @@ -55,25 +58,37 @@ func PrepareAWSSSMChaosByTag(ctx context.Context, experimentsDetails *experiment log.Infof("[Chaos]:Number of Instance targeted: %v", len(instanceIDList)) if len(instanceIDList) == 0 { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no instance id found for chaos injection"} + span.SetStatus(codes.Error, "no instance id found for chaos injection") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no instance id found for chaos injection"} + span.RecordError(err) + return err } switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = lib.InjectChaosInSerialMode(ctx, experimentsDetails, instanceIDList, clients, resultDetails, eventsDetails, chaosDetails, inject); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = lib.InjectChaosInParallelMode(ctx, experimentsDetails, instanceIDList, clients, resultDetails, eventsDetails, chaosDetails, inject); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } //Delete the ssm document on the given aws service monitoring docs err = ssm.SSMDeleteDocument(experimentsDetails.DocumentName, experimentsDetails.Region) if err != nil { + span.SetStatus(codes.Error, "failed to delete ssm doc") + span.RecordError(err) return stacktrace.Propagate(err, "failed to delete ssm doc") } diff --git a/chaoslib/litmus/azure-disk-loss/lib/azure-disk-loss.go b/chaoslib/litmus/azure-disk-loss/lib/azure-disk-loss.go index a22aee935..3464df48b 100644 --- a/chaoslib/litmus/azure-disk-loss/lib/azure-disk-loss.go +++ b/chaoslib/litmus/azure-disk-loss/lib/azure-disk-loss.go @@ -64,7 +64,7 @@ func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.Exper instanceNamesWithDiskNames, err := diskStatus.GetInstanceNameForDisks(diskNameList, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup) if err != nil { - span.SetStatus(codes.Error, "failed to get instance names for disks") + span.SetStatus(codes.Error, "error fetching attached instances for disks") span.RecordError(err) return stacktrace.Propagate(err, "error fetching attached instances for disks") } @@ -75,7 +75,7 @@ func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.Exper for instanceName := range instanceNamesWithDiskNames { attachedDisksWithInstance[instanceName], err = diskStatus.GetInstanceDiskList(experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, experimentsDetails.ScaleSet, instanceName) if err != nil { - span.SetStatus(codes.Error, "failed to get attached disks") + span.SetStatus(codes.Error, "error fetching virtual disks") span.RecordError(err) return stacktrace.Propagate(err, "error fetching virtual disks") } @@ -93,13 +93,13 @@ func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.Exper switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, experimentsDetails, instanceNamesWithDiskNames, attachedDisksWithInstance, clients, resultDetails, eventsDetails, chaosDetails); err != nil { - span.SetStatus(codes.Error, "failed to run chaos in serial mode") + span.SetStatus(codes.Error, "could not run chaos in serial mode") span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, experimentsDetails, instanceNamesWithDiskNames, attachedDisksWithInstance, clients, resultDetails, eventsDetails, chaosDetails); err != nil { - span.SetStatus(codes.Error, "failed to run chaos in parallel mode") + span.SetStatus(codes.Error, "could not run chaos in parallel mode") span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } @@ -150,7 +150,7 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime for _, diskName := range diskNameList { log.Infof("[Wait]: Waiting for Disk '%v' to detach", diskName) if err := diskStatus.WaitForDiskToDetach(experimentsDetails, diskName); err != nil { - span.SetStatus(codes.Error, "failed to detach disks") + span.SetStatus(codes.Error, "disk detachment check failed") span.RecordError(err) return stacktrace.Propagate(err, "disk detachment check failed") } @@ -190,7 +190,7 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime for _, diskName := range diskNameList { log.Infof("[Wait]: Waiting for Disk '%v' to attach", diskName) if err := diskStatus.WaitForDiskToAttach(experimentsDetails, diskName); err != nil { - span.SetStatus(codes.Error, "failed to attach disks") + span.SetStatus(codes.Error, "disk attachment check failed") span.RecordError(err) return stacktrace.Propagate(err, "disk attachment check failed") } @@ -242,7 +242,7 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // Waiting for disk to be detached log.Infof("[Wait]: Waiting for Disk '%v' to detach", diskName) if err := diskStatus.WaitForDiskToDetach(experimentsDetails, diskName); err != nil { - span.SetStatus(codes.Error, "failed to detach disks") + span.SetStatus(codes.Error, "disk detachment check failed") span.RecordError(err) return stacktrace.Propagate(err, "disk detachment check failed") } @@ -253,6 +253,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // the OnChaos probes execution will start in the first iteration and keep running for the entire chaos duration if len(resultDetails.ProbeDetails) != 0 && i == 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return stacktrace.Propagate(err, "failed to run probes") } } @@ -272,7 +274,7 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // Waiting for disk to be attached log.Infof("[Wait]: Waiting for Disk '%v' to attach", diskName) if err := diskStatus.WaitForDiskToAttach(experimentsDetails, diskName); err != nil { - span.SetStatus(codes.Error, "failed to attach disks") + span.SetStatus(codes.Error, "disk attachment check failed") span.RecordError(err) return stacktrace.Propagate(err, "disk attachment check failed") } diff --git a/chaoslib/litmus/azure-instance-stop/lib/azure-instance-stop.go b/chaoslib/litmus/azure-instance-stop/lib/azure-instance-stop.go index 3bcc75121..f019b0440 100644 --- a/chaoslib/litmus/azure-instance-stop/lib/azure-instance-stop.go +++ b/chaoslib/litmus/azure-instance-stop/lib/azure-instance-stop.go @@ -62,13 +62,13 @@ func PrepareAzureStop(ctx context.Context, experimentsDetails *experimentTypes.E switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, experimentsDetails, instanceNameList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { - span.SetStatus(codes.Error, "failed to run chaos in serial mode") + span.SetStatus(codes.Error, "could not run chaos in serial mode") span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, experimentsDetails, instanceNameList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { - span.SetStatus(codes.Error, "failed to run chaos in parallel mode") + span.SetStatus(codes.Error, "could not run chaos in parallel mode") span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } @@ -118,13 +118,13 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment log.Infof("[Chaos]: Stopping the Azure instance: %v", vmName) if experimentsDetails.ScaleSet == "enable" { if err := azureStatus.AzureScaleSetInstanceStop(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { - span.SetStatus(codes.Error, "failed to stop the Azure instance") + span.SetStatus(codes.Error, "unable to stop the Azure instance") span.RecordError(err) return stacktrace.Propagate(err, "unable to stop the Azure instance") } } else { if err := azureStatus.AzureInstanceStop(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { - span.SetStatus(codes.Error, "failed to stop the Azure instance") + span.SetStatus(codes.Error, "unable to stop the Azure instance") span.RecordError(err) return stacktrace.Propagate(err, "unable to stop the Azure instance") } @@ -133,7 +133,7 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // Wait for Azure instance to completely stop log.Infof("[Wait]: Waiting for Azure instance '%v' to get in the stopped state", vmName) if err := azureStatus.WaitForAzureComputeDown(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { - span.SetStatus(codes.Error, "failed to check instance poweroff status") + span.SetStatus(codes.Error, "instance poweroff status check failed") span.RecordError(err) return stacktrace.Propagate(err, "instance poweroff status check failed") } @@ -156,13 +156,13 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment log.Info("[Chaos]: Starting back the Azure instance") if experimentsDetails.ScaleSet == "enable" { if err := azureStatus.AzureScaleSetInstanceStart(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { - span.SetStatus(codes.Error, "failed to start the Azure instance") + span.SetStatus(codes.Error, "unable to start the Azure instance") span.RecordError(err) return stacktrace.Propagate(err, "unable to start the Azure instance") } } else { if err := azureStatus.AzureInstanceStart(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { - span.SetStatus(codes.Error, "failed to start the Azure instance") + span.SetStatus(codes.Error, "unable to start the Azure instance") span.RecordError(err) return stacktrace.Propagate(err, "unable to start the Azure instance") } @@ -171,7 +171,7 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // Wait for Azure instance to get in running state log.Infof("[Wait]: Waiting for Azure instance '%v' to get in the running state", vmName) if err := azureStatus.WaitForAzureComputeUp(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { - span.SetStatus(codes.Error, "failed to check instance power on status") + span.SetStatus(codes.Error, "instance power on status check failed") span.RecordError(err) return stacktrace.Propagate(err, "instance power on status check failed") } @@ -212,13 +212,13 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime log.Infof("[Chaos]: Stopping the Azure instance: %v", vmName) if experimentsDetails.ScaleSet == "enable" { if err := azureStatus.AzureScaleSetInstanceStop(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { - span.SetStatus(codes.Error, "failed to stop the Azure instance") + span.SetStatus(codes.Error, "unable to stop Azure instance") span.RecordError(err) return stacktrace.Propagate(err, "unable to stop Azure instance") } } else { if err := azureStatus.AzureInstanceStop(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { - span.SetStatus(codes.Error, "failed to stop the Azure instance") + span.SetStatus(codes.Error, "unable to stop Azure instance") span.RecordError(err) return stacktrace.Propagate(err, "unable to stop Azure instance") } @@ -229,7 +229,7 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime for _, vmName := range instanceNameList { log.Infof("[Wait]: Waiting for Azure instance '%v' to get in the stopped state", vmName) if err := azureStatus.WaitForAzureComputeDown(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { - span.SetStatus(codes.Error, "failed to check instance poweroff status") + span.SetStatus(codes.Error, "instance poweroff status check failed") span.RecordError(err) return stacktrace.Propagate(err, "instance poweroff status check failed") } @@ -253,13 +253,13 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime log.Infof("[Chaos]: Starting back the Azure instance: %v", vmName) if experimentsDetails.ScaleSet == "enable" { if err := azureStatus.AzureScaleSetInstanceStart(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { - span.SetStatus(codes.Error, "failed to start the Azure instance") + span.SetStatus(codes.Error, "unable to start the Azure instance") span.RecordError(err) return stacktrace.Propagate(err, "unable to start the Azure instance") } } else { if err := azureStatus.AzureInstanceStart(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { - span.SetStatus(codes.Error, "failed to start the Azure instance") + span.SetStatus(codes.Error, "unable to start the Azure instancee") span.RecordError(err) return stacktrace.Propagate(err, "unable to start the Azure instance") } @@ -270,7 +270,7 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime for _, vmName := range instanceNameList { log.Infof("[Wait]: Waiting for Azure instance '%v' to get in the running state", vmName) if err := azureStatus.WaitForAzureComputeUp(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { - span.SetStatus(codes.Error, "failed to check instance power on status") + span.SetStatus(codes.Error, "instance power on status check failed") span.RecordError(err) return stacktrace.Propagate(err, "instance power on status check failed") } diff --git a/chaoslib/litmus/container-kill/lib/container-kill.go b/chaoslib/litmus/container-kill/lib/container-kill.go index a1ca06e81..13b2a52be 100644 --- a/chaoslib/litmus/container-kill/lib/container-kill.go +++ b/chaoslib/litmus/container-kill/lib/container-kill.go @@ -175,6 +175,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } diff --git a/chaoslib/litmus/disk-fill/lib/disk-fill.go b/chaoslib/litmus/disk-fill/lib/disk-fill.go index 0c63f84b2..571d2dd96 100644 --- a/chaoslib/litmus/disk-fill/lib/disk-fill.go +++ b/chaoslib/litmus/disk-fill/lib/disk-fill.go @@ -11,6 +11,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "github.com/litmuschaos/litmus-go/pkg/clients" experimentTypes "github.com/litmuschaos/litmus-go/pkg/generic/disk-fill/types" @@ -37,7 +38,10 @@ func PrepareDiskFill(ctx context.Context, experimentsDetails *experimentTypes.Ex // Get the target pod details for the chaos execution // if the target pod is not defined it will derive the random target pod list using pod affected percentage if experimentsDetails.TargetPods == "" && chaosDetails.AppDetail == nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "provide one of the appLabel or TARGET_PODS"} + span.SetStatus(codes.Error, "provide one of the appLabel or TARGET_PODS") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "provide one of the appLabel or TARGET_PODS"} + span.RecordError(err) + return err } //set up the tunables if provided in range setChaosTunables(experimentsDetails) @@ -51,6 +55,8 @@ func PrepareDiskFill(ctx context.Context, experimentsDetails *experimentTypes.Ex targetPodList, err := common.GetTargetPods(experimentsDetails.NodeLabel, experimentsDetails.TargetPods, experimentsDetails.PodsAffectedPerc, clients, chaosDetails) if err != nil { + span.SetStatus(codes.Error, "could not get target pods") + span.RecordError(err) return stacktrace.Propagate(err, "could not get target pods") } @@ -64,12 +70,16 @@ func PrepareDiskFill(ctx context.Context, experimentsDetails *experimentTypes.Ex if experimentsDetails.ChaosServiceAccount == "" { experimentsDetails.ChaosServiceAccount, err = common.GetServiceAccount(experimentsDetails.ChaosNamespace, experimentsDetails.ChaosPodName, clients) if err != nil { + span.SetStatus(codes.Error, "could not experiment service account") + span.RecordError(err) return stacktrace.Propagate(err, "could not experiment service account") } } if experimentsDetails.EngineName != "" { if err := common.SetHelperData(chaosDetails, experimentsDetails.SetHelperData, clients); err != nil { + span.SetStatus(codes.Error, "could not set helper data") + span.RecordError(err) return stacktrace.Propagate(err, "could not set helper data") } } @@ -78,14 +88,21 @@ func PrepareDiskFill(ctx context.Context, experimentsDetails *experimentTypes.Ex switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, experimentsDetails, targetPodList, clients, chaosDetails, execCommandDetails, resultDetails, eventsDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, experimentsDetails, targetPodList, clients, chaosDetails, execCommandDetails, resultDetails, eventsDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } //Waiting for the ramp time after chaos injection @@ -103,6 +120,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -117,6 +136,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment runID := stringutils.GetRunID() if err := createHelperPod(ctx, experimentsDetails, clients, chaosDetails, fmt.Sprintf("%s:%s:%s", pod.Name, pod.Namespace, experimentsDetails.TargetContainer), pod.Spec.NodeName, runID); err != nil { + span.SetStatus(codes.Error, "could not create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } @@ -126,6 +147,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment log.Info("[Status]: Checking the status of the helper pods") if err := status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not check helper status") + span.RecordError(err) return stacktrace.Propagate(err, "could not check helper status") } @@ -135,12 +158,15 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, appLabel, clients, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, common.GetContainerNames(chaosDetails)...) if err != nil || podStatus == "Failed" { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not wait for completion of helper pod") return common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, true) } //Deleting all the helper pod for disk-fill chaos log.Info("[Cleanup]: Deleting the helper pod") if err = common.DeleteAllPod(appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not delete helper pod(s)") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod(s)") } } @@ -157,6 +183,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -171,6 +199,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime } if err := createHelperPod(ctx, experimentsDetails, clients, chaosDetails, strings.Join(targetsPerNode, ";"), node, runID); err != nil { + span.SetStatus(codes.Error, "could not create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } } @@ -181,6 +211,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime log.Info("[Status]: Checking the status of the helper pods") if err := status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not check helper status") + span.RecordError(err) return stacktrace.Propagate(err, "could not check helper status") } @@ -190,12 +222,16 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, appLabel, clients, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, common.GetContainerNames(chaosDetails)...) if err != nil || podStatus == "Failed" { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not wait for completion of helper pod") + span.RecordError(err) return common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, true) } //Deleting all the helper pod for disk-fill chaos log.Info("[Cleanup]: Deleting all the helper pod") if err = common.DeleteAllPod(appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not delete helper pod(s)") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod(s)") } @@ -270,7 +306,10 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.SetStatus(codes.Error, "unable to create helper pod") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.RecordError(err) + return err } return nil } diff --git a/chaoslib/litmus/docker-service-kill/lib/docker-service-kill.go b/chaoslib/litmus/docker-service-kill/lib/docker-service-kill.go index f1fef9c9e..ef1932e61 100644 --- a/chaoslib/litmus/docker-service-kill/lib/docker-service-kill.go +++ b/chaoslib/litmus/docker-service-kill/lib/docker-service-kill.go @@ -9,6 +9,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "github.com/litmuschaos/litmus-go/pkg/clients" "github.com/litmuschaos/litmus-go/pkg/events" @@ -34,6 +35,8 @@ func PrepareDockerServiceKill(ctx context.Context, experimentsDetails *experimen //Select node for docker-service-kill experimentsDetails.TargetNode, err = common.GetNodeName(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.NodeLabel, clients) if err != nil { + span.SetStatus(codes.Error, "could not get node name") + span.RecordError(err) return stacktrace.Propagate(err, "could not get node name") } } @@ -58,12 +61,16 @@ func PrepareDockerServiceKill(ctx context.Context, experimentsDetails *experimen if experimentsDetails.EngineName != "" { if err := common.SetHelperData(chaosDetails, experimentsDetails.SetHelperData, clients); err != nil { + span.SetStatus(codes.Error, "could not set helper data") + span.RecordError(err) return stacktrace.Propagate(err, "could not set helper data") } } // Creating the helper pod to perform docker-service-kill if err = createHelperPod(ctx, experimentsDetails, clients, chaosDetails, experimentsDetails.TargetNode); err != nil { + span.SetStatus(codes.Error, "could not create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } @@ -73,6 +80,8 @@ func PrepareDockerServiceKill(ctx context.Context, experimentsDetails *experimen log.Info("[Status]: Checking the status of the helper pod") if err = status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteHelperPodBasedOnJobCleanupPolicy(experimentsDetails.ExperimentName+"-helper-"+experimentsDetails.RunID, appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not check helper status") + span.RecordError(err) return stacktrace.Propagate(err, "could not check helper status") } @@ -80,6 +89,8 @@ func PrepareDockerServiceKill(ctx context.Context, experimentsDetails *experimen if len(resultDetails.ProbeDetails) != 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -88,6 +99,8 @@ func PrepareDockerServiceKill(ctx context.Context, experimentsDetails *experimen log.Info("[Status]: Check for the node to be in NotReady state") if err = status.CheckNodeNotReadyState(experimentsDetails.TargetNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteHelperPodBasedOnJobCleanupPolicy(experimentsDetails.ExperimentName+"-helper-"+experimentsDetails.RunID, appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not check for NOT READY state") + span.RecordError(err) return stacktrace.Propagate(err, "could not check for NOT READY state") } @@ -96,12 +109,16 @@ func PrepareDockerServiceKill(ctx context.Context, experimentsDetails *experimen podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, appLabel, clients, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, common.GetContainerNames(chaosDetails)...) if err != nil || podStatus == "Failed" { common.DeleteHelperPodBasedOnJobCleanupPolicy(experimentsDetails.ExperimentName+"-helper-"+experimentsDetails.RunID, appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "helper pod failed") + span.RecordError(err) return common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, false) } //Deleting the helper pod log.Info("[Cleanup]: Deleting the helper pod") if err = common.DeletePod(experimentsDetails.ExperimentName+"-helper-"+experimentsDetails.RunID, appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not delete helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod") } @@ -204,7 +221,10 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.SetStatus(codes.Error, "unable to create helper pod") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.RecordError(err) + return err } return nil } diff --git a/chaoslib/litmus/ebs-loss/lib/ebs-loss-by-id/lib/ebs-loss-by-id.go b/chaoslib/litmus/ebs-loss/lib/ebs-loss-by-id/lib/ebs-loss-by-id.go index dbc504628..152d1463f 100644 --- a/chaoslib/litmus/ebs-loss/lib/ebs-loss-by-id/lib/ebs-loss-by-id.go +++ b/chaoslib/litmus/ebs-loss/lib/ebs-loss-by-id/lib/ebs-loss-by-id.go @@ -18,6 +18,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" ) var ( @@ -63,14 +64,21 @@ func PrepareEBSLossByID(ctx context.Context, experimentsDetails *experimentTypes switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = ebsloss.InjectChaosInSerialMode(ctx, experimentsDetails, volumeIDList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = ebsloss.InjectChaosInParallelMode(ctx, experimentsDetails, volumeIDList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } //Waiting for the ramp time after chaos injection diff --git a/chaoslib/litmus/ebs-loss/lib/ebs-loss-by-tag/lib/ebs-loss-by-tag.go b/chaoslib/litmus/ebs-loss/lib/ebs-loss-by-tag/lib/ebs-loss-by-tag.go index 6e8589129..a8107e0ca 100644 --- a/chaoslib/litmus/ebs-loss/lib/ebs-loss-by-tag/lib/ebs-loss-by-tag.go +++ b/chaoslib/litmus/ebs-loss/lib/ebs-loss-by-tag/lib/ebs-loss-by-tag.go @@ -18,6 +18,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" ) var ( @@ -61,14 +62,21 @@ func PrepareEBSLossByTag(ctx context.Context, experimentsDetails *experimentType switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = ebsloss.InjectChaosInSerialMode(ctx, experimentsDetails, targetEBSVolumeIDList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = ebsloss.InjectChaosInParallelMode(ctx, experimentsDetails, targetEBSVolumeIDList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } //Waiting for the ramp time after chaos injection if experimentsDetails.RampTime != 0 { diff --git a/chaoslib/litmus/ebs-loss/lib/ebs-loss.go b/chaoslib/litmus/ebs-loss/lib/ebs-loss.go index 8fa9bb0e4..5dc1d87d3 100644 --- a/chaoslib/litmus/ebs-loss/lib/ebs-loss.go +++ b/chaoslib/litmus/ebs-loss/lib/ebs-loss.go @@ -18,6 +18,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" ) // InjectChaosInSerialMode will inject the ebs loss chaos in serial mode which means one after other @@ -41,12 +42,16 @@ func InjectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //Get volume attachment details ec2InstanceID, device, err := ebs.GetVolumeAttachmentDetails(volumeID, experimentsDetails.VolumeTag, experimentsDetails.Region) if err != nil { + span.SetStatus(codes.Error, "failed to get the attachment info") + span.RecordError(err) return stacktrace.Propagate(err, "failed to get the attachment info") } //Detaching the ebs volume from the instance log.Info("[Chaos]: Detaching the EBS volume from the instance") if err = ebs.EBSVolumeDetach(volumeID, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ebs detachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "ebs detachment failed") } @@ -55,6 +60,8 @@ func InjectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //Wait for ebs volume detachment log.Infof("[Wait]: Wait for EBS volume detachment for volume %v", volumeID) if err = ebs.WaitForVolumeDetachment(volumeID, ec2InstanceID, experimentsDetails.Region, experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "ebs detachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "ebs detachment failed") } @@ -62,6 +69,8 @@ func InjectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // the OnChaos probes execution will start in the first iteration and keep running for the entire chaos duration if len(resultDetails.ProbeDetails) != 0 && i == 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return stacktrace.Propagate(err, "failed to run probes") } } @@ -73,6 +82,8 @@ func InjectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //Getting the EBS volume attachment status ebsState, err := ebs.GetEBSStatus(volumeID, ec2InstanceID, experimentsDetails.Region) if err != nil { + span.SetStatus(codes.Error, "failed to get the ebs status") + span.RecordError(err) return stacktrace.Propagate(err, "failed to get the ebs status") } @@ -83,12 +94,16 @@ func InjectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //Attaching the ebs volume from the instance log.Info("[Chaos]: Attaching the EBS volume back to the instance") if err = ebs.EBSVolumeAttach(volumeID, ec2InstanceID, device, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ebs attachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "ebs attachment failed") } //Wait for ebs volume attachment log.Infof("[Wait]: Wait for EBS volume attachment for %v volume", volumeID) if err = ebs.WaitForVolumeAttachment(volumeID, ec2InstanceID, experimentsDetails.Region, experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "ebs attachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "ebs attachment failed") } } @@ -139,6 +154,8 @@ func InjectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Detaching the ebs volume from the instance log.Info("[Chaos]: Detaching the EBS volume from the instance") if err := ebs.EBSVolumeDetach(volumeID, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ebs detachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "ebs detachment failed") } common.SetTargets(volumeID, "injected", "EBS", chaosDetails) @@ -146,6 +163,8 @@ func InjectChaosInParallelMode(ctx context.Context, experimentsDetails *experime log.Info("[Info]: Checking if the detachment process initiated") if err := ebs.CheckEBSDetachmentInitialisation(targetEBSVolumeIDList, ec2InstanceIDList, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "failed to initialise the detachment") + span.RecordError(err) return stacktrace.Propagate(err, "failed to initialise the detachment") } @@ -153,6 +172,8 @@ func InjectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Wait for ebs volume detachment log.Infof("[Wait]: Wait for EBS volume detachment for volume %v", volumeID) if err := ebs.WaitForVolumeDetachment(volumeID, ec2InstanceIDList[i], experimentsDetails.Region, experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "ebs detachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "ebs detachment failed") } } @@ -160,6 +181,8 @@ func InjectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return stacktrace.Propagate(err, "failed to run probes") } } @@ -173,6 +196,8 @@ func InjectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Getting the EBS volume attachment status ebsState, err := ebs.GetEBSStatus(volumeID, ec2InstanceIDList[i], experimentsDetails.Region) if err != nil { + span.SetStatus(codes.Error, "failed to get the ebs status") + span.RecordError(err) return stacktrace.Propagate(err, "failed to get the ebs status") } @@ -183,12 +208,16 @@ func InjectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Attaching the ebs volume from the instance log.Info("[Chaos]: Attaching the EBS volume from the instance") if err = ebs.EBSVolumeAttach(volumeID, ec2InstanceIDList[i], deviceList[i], experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ebs attachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "ebs attachment failed") } //Wait for ebs volume attachment log.Infof("[Wait]: Wait for EBS volume attachment for volume %v", volumeID) if err = ebs.WaitForVolumeAttachment(volumeID, ec2InstanceIDList[i], experimentsDetails.Region, experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "ebs attachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "ebs attachment failed") } } diff --git a/chaoslib/litmus/ec2-terminate-by-id/lib/ec2-terminate-by-id.go b/chaoslib/litmus/ec2-terminate-by-id/lib/ec2-terminate-by-id.go index 5a844099a..1483f6fd7 100644 --- a/chaoslib/litmus/ec2-terminate-by-id/lib/ec2-terminate-by-id.go +++ b/chaoslib/litmus/ec2-terminate-by-id/lib/ec2-terminate-by-id.go @@ -21,6 +21,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" ) var ( @@ -52,7 +53,10 @@ func PrepareEC2TerminateByID(ctx context.Context, experimentsDetails *experiment //get the instance id or list of instance ids instanceIDList := strings.Split(experimentsDetails.Ec2InstanceID, ",") if experimentsDetails.Ec2InstanceID == "" || len(instanceIDList) == 0 { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no EC2 instance ID found to terminate"} + span.SetStatus(codes.Error, "no EC2 instance ID found to terminate") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no EC2 instance ID found to terminate"} + span.RecordError(err) + return err } // watching for the abort signal and revert the chaos @@ -61,14 +65,21 @@ func PrepareEC2TerminateByID(ctx context.Context, experimentsDetails *experiment switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, experimentsDetails, instanceIDList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, experimentsDetails, instanceIDList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } //Waiting for the ramp time after chaos injection @@ -109,6 +120,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //Stopping the EC2 instance log.Info("[Chaos]: Stopping the desired EC2 instance") if err := awslib.EC2Stop(id, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to stop") } @@ -117,6 +130,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //Wait for ec2 instance to completely stop log.Infof("[Wait]: Wait for EC2 instance '%v' to get in stopped state", id) if err := awslib.WaitForEC2Down(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ManagedNodegroup, experimentsDetails.Region, id); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to stop") } @@ -124,6 +139,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // the OnChaos probes execution will start in the first iteration and keep running for the entire chaos duration if len(resultDetails.ProbeDetails) != 0 && i == 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return stacktrace.Propagate(err, "failed to run probes") } } @@ -136,12 +153,16 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment if experimentsDetails.ManagedNodegroup != "enable" { log.Info("[Chaos]: Starting back the EC2 instance") if err := awslib.EC2Start(id, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to start") } //Wait for ec2 instance to get in running state log.Infof("[Wait]: Wait for EC2 instance '%v' to get in running state", id) if err := awslib.WaitForEC2Up(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ManagedNodegroup, experimentsDetails.Region, id); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to start") } } @@ -182,6 +203,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Stopping the EC2 instance log.Info("[Chaos]: Stopping the desired EC2 instance") if err := awslib.EC2Stop(id, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to stop") } common.SetTargets(id, "injected", "EC2", chaosDetails) @@ -191,6 +214,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Wait for ec2 instance to completely stop log.Infof("[Wait]: Wait for EC2 instance '%v' to get in stopped state", id) if err := awslib.WaitForEC2Down(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ManagedNodegroup, experimentsDetails.Region, id); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to stop") } common.SetTargets(id, "reverted", "EC2 Instance ID", chaosDetails) @@ -199,6 +224,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return stacktrace.Propagate(err, "failed to run probes") } } @@ -213,6 +240,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime for _, id := range instanceIDList { log.Info("[Chaos]: Starting back the EC2 instance") if err := awslib.EC2Start(id, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to start") } } @@ -221,6 +250,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Wait for ec2 instance to get in running state log.Infof("[Wait]: Wait for EC2 instance '%v' to get in running state", id) if err := awslib.WaitForEC2Up(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ManagedNodegroup, experimentsDetails.Region, id); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to start") } } diff --git a/chaoslib/litmus/ec2-terminate-by-tag/lib/ec2-terminate-by-tag.go b/chaoslib/litmus/ec2-terminate-by-tag/lib/ec2-terminate-by-tag.go index 2c34b83b4..eb2ac319a 100644 --- a/chaoslib/litmus/ec2-terminate-by-tag/lib/ec2-terminate-by-tag.go +++ b/chaoslib/litmus/ec2-terminate-by-tag/lib/ec2-terminate-by-tag.go @@ -22,6 +22,7 @@ import ( "github.com/palantir/stacktrace" "github.com/sirupsen/logrus" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" ) var inject, abort chan os.Signal @@ -56,14 +57,21 @@ func PrepareEC2TerminateByTag(ctx context.Context, experimentsDetails *experimen switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err := injectChaosInSerialMode(ctx, experimentsDetails, instanceIDList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err := injectChaosInParallelMode(ctx, experimentsDetails, instanceIDList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not valid") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } //Waiting for the ramp time after chaos injection @@ -104,6 +112,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //Stopping the EC2 instance log.Info("[Chaos]: Stopping the desired EC2 instance") if err := awslib.EC2Stop(id, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to stop") } @@ -112,6 +122,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //Wait for ec2 instance to completely stop log.Infof("[Wait]: Wait for EC2 instance '%v' to get in stopped state", id) if err := awslib.WaitForEC2Down(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ManagedNodegroup, experimentsDetails.Region, id); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to stop") } @@ -119,6 +131,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // the OnChaos probes execution will start in the first iteration and keep running for the entire chaos duration if len(resultDetails.ProbeDetails) != 0 && i == 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return stacktrace.Propagate(err, "failed to run probes") } } @@ -131,12 +145,16 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment if experimentsDetails.ManagedNodegroup != "enable" { log.Info("[Chaos]: Starting back the EC2 instance") if err := awslib.EC2Start(id, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to start") } //Wait for ec2 instance to get in running state log.Infof("[Wait]: Wait for EC2 instance '%v' to get in running state", id) if err := awslib.WaitForEC2Up(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ManagedNodegroup, experimentsDetails.Region, id); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to start") } } @@ -176,6 +194,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Stopping the EC2 instance log.Info("[Chaos]: Stopping the desired EC2 instance") if err := awslib.EC2Stop(id, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to stop") } common.SetTargets(id, "injected", "EC2", chaosDetails) @@ -185,6 +205,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Wait for ec2 instance to completely stop log.Infof("[Wait]: Wait for EC2 instance '%v' to get in stopped state", id) if err := awslib.WaitForEC2Down(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ManagedNodegroup, experimentsDetails.Region, id); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to stop") } } @@ -192,6 +214,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return stacktrace.Propagate(err, "failed to run probes") } } @@ -206,6 +230,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime for _, id := range instanceIDList { log.Info("[Chaos]: Starting back the EC2 instance") if err := awslib.EC2Start(id, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to start") } } @@ -214,6 +240,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Wait for ec2 instance to get in running state log.Infof("[Wait]: Wait for EC2 instance '%v' to get in running state", id) if err := awslib.WaitForEC2Up(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ManagedNodegroup, experimentsDetails.Region, id); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to start") } } diff --git a/chaoslib/litmus/gcp-vm-disk-loss-by-label/lib/gcp-vm-disk-loss-by-label.go b/chaoslib/litmus/gcp-vm-disk-loss-by-label/lib/gcp-vm-disk-loss-by-label.go index 42efdf8bd..15032769e 100644 --- a/chaoslib/litmus/gcp-vm-disk-loss-by-label/lib/gcp-vm-disk-loss-by-label.go +++ b/chaoslib/litmus/gcp-vm-disk-loss-by-label/lib/gcp-vm-disk-loss-by-label.go @@ -21,6 +21,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "google.golang.org/api/compute/v1" ) @@ -69,14 +70,21 @@ func PrepareDiskVolumeLossByLabel(ctx context.Context, computeService *compute.S switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, computeService, experimentsDetails, diskVolumeNamesList, experimentsDetails.TargetDiskInstanceNamesList, experimentsDetails.Zones, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "failed to run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, computeService, experimentsDetails, diskVolumeNamesList, experimentsDetails.TargetDiskInstanceNamesList, experimentsDetails.Zones, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "failed to run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } } @@ -111,6 +119,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Detaching the disk volume from the instance log.Info("[Chaos]: Detaching the disk volume from the instance") if err = gcp.DiskVolumeDetach(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, zone, experimentsDetails.DeviceNamesList[i]); err != nil { + span.SetStatus(codes.Error, "failed to detach the disk volume from the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "disk detachment failed") } @@ -119,6 +129,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Wait for disk volume detachment log.Infof("[Wait]: Wait for disk volume detachment for volume %v", targetDiskVolumeNamesList[i]) if err = gcp.WaitForVolumeDetachment(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, instanceNamesList[i], zone, experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "failed to detach the disk volume from the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to detach the disk volume from the vm instance") } @@ -126,6 +138,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic // the OnChaos probes execution will start in the first iteration and keep running for the entire chaos duration if len(resultDetails.ProbeDetails) != 0 && i == 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -137,6 +151,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Getting the disk volume attachment status diskState, err := gcp.GetDiskVolumeState(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, instanceNamesList[i], zone) if err != nil { + span.SetStatus(codes.Error, "failed to get the disk volume status") + span.RecordError(err) return stacktrace.Propagate(err, "failed to get the disk volume status") } @@ -147,12 +163,16 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Attaching the disk volume to the instance log.Info("[Chaos]: Attaching the disk volume back to the instance") if err = gcp.DiskVolumeAttach(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, zone, experimentsDetails.DeviceNamesList[i], targetDiskVolumeNamesList[i]); err != nil { + span.SetStatus(codes.Error, "disk attachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "disk attachment failed") } //Wait for disk volume attachment log.Infof("[Wait]: Wait for disk volume attachment for %v volume", targetDiskVolumeNamesList[i]) if err = gcp.WaitForVolumeAttachment(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, instanceNamesList[i], zone, experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "failed to attach the disk volume to the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to attach the disk volume to the vm instance") } } @@ -188,6 +208,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv //Detaching the disk volume from the instance log.Info("[Chaos]: Detaching the disk volume from the instance") if err = gcp.DiskVolumeDetach(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, zone, experimentsDetails.DeviceNamesList[i]); err != nil { + span.SetStatus(codes.Error, "disk detachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "disk detachment failed") } @@ -199,6 +221,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv //Wait for disk volume detachment log.Infof("[Wait]: Wait for disk volume detachment for volume %v", targetDiskVolumeNamesList[i]) if err = gcp.WaitForVolumeDetachment(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, instanceNamesList[i], zone, experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "unable to detach the disk volume from the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to detach the disk volume from the vm instance") } } @@ -206,6 +230,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -219,6 +245,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv //Getting the disk volume attachment status diskState, err := gcp.GetDiskVolumeState(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, instanceNamesList[i], zone) if err != nil { + span.SetStatus(codes.Error, "failed to get the disk status") + span.RecordError(err) return stacktrace.Propagate(err, "failed to get the disk status") } @@ -229,12 +257,16 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv //Attaching the disk volume to the instance log.Info("[Chaos]: Attaching the disk volume to the instance") if err = gcp.DiskVolumeAttach(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, zone, experimentsDetails.DeviceNamesList[i], targetDiskVolumeNamesList[i]); err != nil { + span.SetStatus(codes.Error, "disk attachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "disk attachment failed") } //Wait for disk volume attachment log.Infof("[Wait]: Wait for disk volume attachment for volume %v", targetDiskVolumeNamesList[i]) if err = gcp.WaitForVolumeAttachment(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, instanceNamesList[i], zone, experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "unable to attach the disk volume to the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to attach the disk volume to the vm instance") } } diff --git a/chaoslib/litmus/gcp-vm-disk-loss/lib/gcp-vm-disk-loss.go b/chaoslib/litmus/gcp-vm-disk-loss/lib/gcp-vm-disk-loss.go index 6a99010d9..e19e8a59d 100644 --- a/chaoslib/litmus/gcp-vm-disk-loss/lib/gcp-vm-disk-loss.go +++ b/chaoslib/litmus/gcp-vm-disk-loss/lib/gcp-vm-disk-loss.go @@ -22,6 +22,7 @@ import ( "github.com/palantir/stacktrace" "github.com/pkg/errors" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "google.golang.org/api/compute/v1" ) @@ -59,6 +60,8 @@ func PrepareDiskVolumeLoss(ctx context.Context, computeService *compute.Service, //get the device names for the given disks if err := getDeviceNamesList(computeService, experimentsDetails, diskNamesList, diskZonesList); err != nil { + span.SetStatus(codes.Error, "failed to fetch the disk device names") + span.RecordError(err) return stacktrace.Propagate(err, "failed to fetch the disk device names") } @@ -74,14 +77,21 @@ func PrepareDiskVolumeLoss(ctx context.Context, computeService *compute.Service, switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, computeService, experimentsDetails, diskNamesList, diskZonesList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, computeService, experimentsDetails, diskNamesList, diskZonesList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } } @@ -114,6 +124,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Detaching the disk volume from the instance log.Infof("[Chaos]: Detaching %s disk volume from the instance", targetDiskVolumeNamesList[i]) if err = gcp.DiskVolumeDetach(computeService, experimentsDetails.TargetDiskInstanceNamesList[i], experimentsDetails.GCPProjectID, diskZonesList[i], experimentsDetails.DeviceNamesList[i]); err != nil { + span.SetStatus(codes.Error, "disk detachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "disk detachment failed") } @@ -122,6 +134,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Wait for disk volume detachment log.Infof("[Wait]: Wait for %s disk volume detachment", targetDiskVolumeNamesList[i]) if err = gcp.WaitForVolumeDetachment(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.TargetDiskInstanceNamesList[i], diskZonesList[i], experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "unable to detach disk volume from the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to detach disk volume from the vm instance") } @@ -129,6 +143,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic // the OnChaos probes execution will start in the first iteration and keep running for the entire chaos duration if len(resultDetails.ProbeDetails) != 0 && i == 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -140,6 +156,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Getting the disk volume attachment status diskState, err := gcp.GetDiskVolumeState(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.TargetDiskInstanceNamesList[i], diskZonesList[i]) if err != nil { + span.SetStatus(codes.Error, "failed to get disk status") + span.RecordError(err) return stacktrace.Propagate(err, fmt.Sprintf("failed to get %s disk volume status", targetDiskVolumeNamesList[i])) } @@ -150,12 +168,16 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Attaching the disk volume to the instance log.Infof("[Chaos]: Attaching %s disk volume back to the instance", targetDiskVolumeNamesList[i]) if err = gcp.DiskVolumeAttach(computeService, experimentsDetails.TargetDiskInstanceNamesList[i], experimentsDetails.GCPProjectID, diskZonesList[i], experimentsDetails.DeviceNamesList[i], targetDiskVolumeNamesList[i]); err != nil { + span.SetStatus(codes.Error, "disk attachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "disk attachment failed") } //Wait for disk volume attachment log.Infof("[Wait]: Wait for %s disk volume attachment", targetDiskVolumeNamesList[i]) if err = gcp.WaitForVolumeAttachment(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.TargetDiskInstanceNamesList[i], diskZonesList[i], experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "unable to attach disk volume to the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to attach disk volume to the vm instance") } } @@ -188,6 +210,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv //Detaching the disk volume from the instance log.Infof("[Chaos]: Detaching %s disk volume from the instance", targetDiskVolumeNamesList[i]) if err = gcp.DiskVolumeDetach(computeService, experimentsDetails.TargetDiskInstanceNamesList[i], experimentsDetails.GCPProjectID, diskZonesList[i], experimentsDetails.DeviceNamesList[i]); err != nil { + span.SetStatus(codes.Error, "disk detachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "disk detachment failed") } @@ -199,6 +223,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv //Wait for disk volume detachment log.Infof("[Wait]: Wait for %s disk volume detachment", targetDiskVolumeNamesList[i]) if err = gcp.WaitForVolumeDetachment(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.TargetDiskInstanceNamesList[i], diskZonesList[i], experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "unable to detach disk volume from the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to detach disk volume from the vm instance") } } @@ -206,6 +232,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -219,6 +247,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv //Getting the disk volume attachment status diskState, err := gcp.GetDiskVolumeState(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.TargetDiskInstanceNamesList[i], diskZonesList[i]) if err != nil { + span.SetStatus(codes.Error, "failed to get disk status") + span.RecordError(err) return errors.Errorf("failed to get the disk status, err: %v", err) } @@ -229,12 +259,16 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv //Attaching the disk volume to the instance log.Infof("[Chaos]: Attaching %s disk volume to the instance", targetDiskVolumeNamesList[i]) if err = gcp.DiskVolumeAttach(computeService, experimentsDetails.TargetDiskInstanceNamesList[i], experimentsDetails.GCPProjectID, diskZonesList[i], experimentsDetails.DeviceNamesList[i], targetDiskVolumeNamesList[i]); err != nil { + span.SetStatus(codes.Error, "disk attachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "disk attachment failed") } //Wait for disk volume attachment log.Infof("[Wait]: Wait for %s disk volume attachment", targetDiskVolumeNamesList[i]) if err = gcp.WaitForVolumeAttachment(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.TargetDiskInstanceNamesList[i], diskZonesList[i], experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "unable to attach disk volume to the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to attach disk volume to the vm instance") } } diff --git a/chaoslib/litmus/gcp-vm-instance-stop-by-label/lib/gcp-vm-instance-stop-by-label.go b/chaoslib/litmus/gcp-vm-instance-stop-by-label/lib/gcp-vm-instance-stop-by-label.go index 644a02137..3672b9d96 100644 --- a/chaoslib/litmus/gcp-vm-instance-stop-by-label/lib/gcp-vm-instance-stop-by-label.go +++ b/chaoslib/litmus/gcp-vm-instance-stop-by-label/lib/gcp-vm-instance-stop-by-label.go @@ -21,6 +21,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "google.golang.org/api/compute/v1" ) @@ -56,14 +57,21 @@ func PrepareVMStopByLabel(ctx context.Context, computeService *compute.Service, switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err := injectChaosInSerialMode(ctx, computeService, experimentsDetails, instanceNamesList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err := injectChaosInParallelMode(ctx, computeService, experimentsDetails, instanceNamesList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } //Waiting for the ramp time after chaos injection @@ -105,6 +113,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Stopping the VM instance log.Infof("[Chaos]: Stopping %s VM instance", instanceNamesList[i]) if err := gcplib.VMInstanceStop(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { + span.SetStatus(codes.Error, "VM instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "VM instance failed to stop") } @@ -113,6 +123,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Wait for VM instance to completely stop log.Infof("[Wait]: Wait for VM instance %s to stop", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceDown(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { + span.SetStatus(codes.Error, "VM instance failed to fully shutdown") + span.RecordError(err) return stacktrace.Propagate(err, "vm instance failed to fully shutdown") } @@ -120,6 +132,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic // the OnChaos probes execution will start in the first iteration and keep running for the entire chaos duration if len(resultDetails.ProbeDetails) != 0 && i == 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -134,6 +148,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic // wait for VM instance to get in running state log.Infof("[Wait]: Wait for VM instance %s to get in RUNNING state", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceUp(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { + span.SetStatus(codes.Error, "unable to start the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to start %s vm instance") } @@ -142,12 +158,16 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic // starting the VM instance log.Infof("[Chaos]: Starting back %s VM instance", instanceNamesList[i]) if err := gcplib.VMInstanceStart(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { + span.SetStatus(codes.Error, "vm instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "vm instance failed to start") } // wait for VM instance to get in running state log.Infof("[Wait]: Wait for VM instance %s to get in RUNNING state", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceUp(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { + span.SetStatus(codes.Error, "unable to start the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to start %s vm instance") } } @@ -191,6 +211,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv // stopping the VM instance log.Infof("[Chaos]: Stopping %s VM instance", instanceNamesList[i]) if err := gcplib.VMInstanceStop(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { + span.SetStatus(codes.Error, "vm instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "vm instance failed to stop") } @@ -202,6 +224,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv // wait for VM instance to completely stop log.Infof("[Wait]: Wait for VM instance %s to get in stopped state", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceDown(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { + span.SetStatus(codes.Error, "vm instance failed to fully shutdown") + span.RecordError(err) return stacktrace.Propagate(err, "vm instance failed to fully shutdown") } } @@ -209,6 +233,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -225,6 +251,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv log.Infof("[Wait]: Wait for VM instance '%v' to get in running state", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceUp(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { + span.SetStatus(codes.Error, "unable to start the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to start the vm instance") } @@ -238,6 +266,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv log.Info("[Chaos]: Starting back the VM instance") if err := gcplib.VMInstanceStart(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { + span.SetStatus(codes.Error, "vm instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "vm instance failed to start") } } @@ -247,6 +277,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv log.Infof("[Wait]: Wait for VM instance '%v' to get in running state", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceUp(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { + span.SetStatus(codes.Error, "unable to start the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to start the vm instance") } diff --git a/chaoslib/litmus/gcp-vm-instance-stop/lib/gcp-vm-instance-stop.go b/chaoslib/litmus/gcp-vm-instance-stop/lib/gcp-vm-instance-stop.go index 281e1c211..9ba8a0900 100644 --- a/chaoslib/litmus/gcp-vm-instance-stop/lib/gcp-vm-instance-stop.go +++ b/chaoslib/litmus/gcp-vm-instance-stop/lib/gcp-vm-instance-stop.go @@ -21,6 +21,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "google.golang.org/api/compute/v1" ) @@ -61,14 +62,21 @@ func PrepareVMStop(ctx context.Context, computeService *compute.Service, experim switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, computeService, experimentsDetails, instanceNamesList, instanceZonesList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, computeService, experimentsDetails, instanceNamesList, instanceZonesList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } // wait for the ramp time after chaos injection @@ -110,6 +118,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Stopping the VM instance log.Infof("[Chaos]: Stopping %s VM instance", instanceNamesList[i]) if err := gcplib.VMInstanceStop(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, instanceZonesList[i]); err != nil { + span.SetStatus(codes.Error, "vm instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "vm instance failed to stop") } @@ -118,6 +128,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Wait for VM instance to completely stop log.Infof("[Wait]: Wait for VM instance %s to get in stopped state", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceDown(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, instanceZonesList[i]); err != nil { + span.SetStatus(codes.Error, "vm instance failed to fully shutdown") + span.RecordError(err) return stacktrace.Propagate(err, "vm instance failed to fully shutdown") } @@ -125,6 +137,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic // the OnChaos probes execution will start in the first iteration and keep running for the entire chaos duration if len(resultDetails.ProbeDetails) != 0 && i == 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -139,12 +153,16 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic // starting the VM instance log.Infof("[Chaos]: Starting back %s VM instance", instanceNamesList[i]) if err := gcplib.VMInstanceStart(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, instanceZonesList[i]); err != nil { + span.SetStatus(codes.Error, "vm instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "vm instance failed to start") } // wait for VM instance to get in running state log.Infof("[Wait]: Wait for VM instance %s to get in running state", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceUp(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, instanceZonesList[i]); err != nil { + span.SetStatus(codes.Error, "unable to start vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to start vm instance") } @@ -153,6 +171,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic // wait for VM instance to get in running state log.Infof("[Wait]: Wait for VM instance %s to get in running state", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceUp(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, instanceZonesList[i]); err != nil { + span.SetStatus(codes.Error, "unable to start vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to start vm instance") } } @@ -197,6 +217,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv // stopping the VM instance log.Infof("[Chaos]: Stopping %s VM instance", instanceNamesList[i]) if err := gcplib.VMInstanceStop(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, instanceZonesList[i]); err != nil { + span.SetStatus(codes.Error, "vm instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "vm instance failed to stop") } @@ -208,6 +230,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv // wait for VM instance to completely stop log.Infof("[Wait]: Wait for VM instance %s to get in stopped state", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceDown(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, instanceZonesList[i]); err != nil { + span.SetStatus(codes.Error, "vm instance failed to fully shutdown") + span.RecordError(err) return stacktrace.Propagate(err, "vm instance failed to fully shutdown") } } @@ -215,6 +239,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -230,6 +256,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv for i := range instanceNamesList { log.Infof("[Chaos]: Starting back %s VM instance", instanceNamesList[i]) if err := gcplib.VMInstanceStart(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, instanceZonesList[i]); err != nil { + span.SetStatus(codes.Error, "vm instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "vm instance failed to start") } } @@ -239,6 +267,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv log.Infof("[Wait]: Wait for VM instance %s to get in running state", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceUp(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, instanceZonesList[i]); err != nil { + span.SetStatus(codes.Error, "unable to start vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to start vm instance") } @@ -252,6 +282,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv log.Infof("[Wait]: Wait for VM instance %s to get in running state", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceUp(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, instanceZonesList[i]); err != nil { + span.SetStatus(codes.Error, "unable to start vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to start vm instance") } diff --git a/chaoslib/litmus/http-chaos/lib/http-chaos.go b/chaoslib/litmus/http-chaos/lib/http-chaos.go index 59323f0b8..a36c7dbab 100644 --- a/chaoslib/litmus/http-chaos/lib/http-chaos.go +++ b/chaoslib/litmus/http-chaos/lib/http-chaos.go @@ -11,6 +11,8 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" "github.com/litmuschaos/litmus-go/pkg/clients" experimentTypes "github.com/litmuschaos/litmus-go/pkg/generic/http-chaos/types" @@ -27,6 +29,7 @@ import ( // PrepareAndInjectChaos contains the preparation & injection steps func PrepareAndInjectChaos(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails, args string) error { + span := trace.SpanFromContext(ctx) var err error // Get the target pod details for the chaos execution @@ -39,6 +42,8 @@ func PrepareAndInjectChaos(ctx context.Context, experimentsDetails *experimentTy targetPodList, err := common.GetTargetPods(experimentsDetails.NodeLabel, experimentsDetails.TargetPods, experimentsDetails.PodsAffectedPerc, clients, chaosDetails) if err != nil { + span.SetStatus(codes.Error, "could not get target pods") + span.RecordError(err) return stacktrace.Propagate(err, "could not get target pods") } @@ -52,12 +57,16 @@ func PrepareAndInjectChaos(ctx context.Context, experimentsDetails *experimentTy if experimentsDetails.ChaosServiceAccount == "" { experimentsDetails.ChaosServiceAccount, err = common.GetServiceAccount(experimentsDetails.ChaosNamespace, experimentsDetails.ChaosPodName, clients) if err != nil { + span.SetStatus(codes.Error, "could not get experiment service account") + span.RecordError(err) return stacktrace.Propagate(err, "could not experiment service account") } } if experimentsDetails.EngineName != "" { if err := common.SetHelperData(chaosDetails, experimentsDetails.SetHelperData, clients); err != nil { + span.SetStatus(codes.Error, "could not set helper data") + span.RecordError(err) return stacktrace.Propagate(err, "could not set helper data") } } @@ -67,14 +76,21 @@ func PrepareAndInjectChaos(ctx context.Context, experimentsDetails *experimentTy switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, experimentsDetails, targetPodList, args, clients, chaosDetails, resultDetails, eventsDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, experimentsDetails, targetPodList, args, clients, chaosDetails, resultDetails, eventsDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } return nil @@ -108,6 +124,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment runID := stringutils.GetRunID() if err := createHelperPod(ctx, experimentsDetails, clients, chaosDetails, fmt.Sprintf("%s:%s:%s", pod.Name, pod.Namespace, experimentsDetails.TargetContainer), pod.Spec.NodeName, runID, args); err != nil { + span.SetStatus(codes.Error, "could not create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } @@ -117,6 +135,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment log.Info("[Status]: Checking the status of the helper pods") if err := status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not check helper status") + span.RecordError(err) return stacktrace.Propagate(err, "could not check helper status") } @@ -126,12 +146,16 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, appLabel, clients, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, common.GetContainerNames(chaosDetails)...) if err != nil || podStatus == "Failed" { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "helper pod failed") + span.RecordError(err) return common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, true) } //Deleting all the helper pod for http chaos log.Info("[Cleanup]: Deleting the helper pod") if err := common.DeleteAllPod(appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not delete helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod(s)") } } @@ -147,6 +171,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -161,6 +187,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime } if err := createHelperPod(ctx, experimentsDetails, clients, chaosDetails, strings.Join(targetsPerNode, ";"), node, runID, args); err != nil { + span.SetStatus(codes.Error, "could not create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } } @@ -171,6 +199,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime log.Info("[Status]: Checking the status of the helper pods") if err := status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not check helper status") + span.RecordError(err) return stacktrace.Propagate(err, "could not check helper status") } @@ -180,12 +210,16 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, appLabel, clients, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, common.GetContainerNames(chaosDetails)...) if err != nil || podStatus == "Failed" { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "helper pod failed") + span.RecordError(err) return common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, true) } // Deleting all the helper pod for http chaos log.Info("[Cleanup]: Deleting all the helper pod") if err := common.DeleteAllPod(appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not delete helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod(s)") } @@ -266,7 +300,10 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.SetStatus(codes.Error, "could not create helper pod") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.RecordError(err) + return err } return nil }