Skip to content

Commit

Permalink
[rhoai] Update Component rules in Prometheus (#1511)
Browse files Browse the repository at this point in the history
* Update Component rules in Prometheus

(cherry picked from commit ef677d0)

(cherry picked from commit ca07cac)

Update Component rules in Prometheus

(cherry picked from commit bcdd7c8)

* update: address some comments

Signed-off-by: Wen Zhou <[email protected]>
(cherry picked from commit cab3a3a)

* update: add monitoring CR status on condition of prom deployment

Signed-off-by: Wen Zhou <[email protected]>
(cherry picked from commit e0b7c39)

* update: address comments for monitoring component (#1520)

- move if to switch...case
- add .status.condition.MonitoringReady type
- change Monitoring CR .status.condition Reason and Message and Type name
- remove unused predicate var from DSCI
- change check on promethus deployment ready
- update: change to use Apply than Create
- update: add or remove prom rules
- add field manager for monitoring CR to DSCI
- add isComponentReady()
- update predicate for monitoring on DSC change on both .spec.components and .status.condition

Signed-off-by: Wen Zhou <[email protected]>

---------

Signed-off-by: Wen Zhou <[email protected]>
(cherry picked from commit 05c7947)
Signed-off-by: Wen Zhou <[email protected]>

* fix: add missing cache for selfmanaged cluster because of monitoring is created there as well

* fix: wrong name in manifestss

Signed-off-by: Wen Zhou <[email protected]>

---------

Signed-off-by: Wen Zhou <[email protected]>
Co-authored-by: Wen Zhou <[email protected]>
  • Loading branch information
VaishnaviHire and zdtsw authored Jan 21, 2025
1 parent ec43a3b commit 83abe0d
Show file tree
Hide file tree
Showing 28 changed files with 1,040 additions and 107 deletions.
1 change: 1 addition & 0 deletions Dockerfiles/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ RUN if [ "${USE_LOCAL}" != "true" ]; then \
COPY config/monitoring/ /opt/manifests/monitoring
# Copy ods-configs
COPY config/osd-configs/ /opt/manifests/osd-configs

################################################################################
FROM registry.access.redhat.com/ubi8/go-toolset:$GOLANG_VERSION as builder
ARG CGO_ENABLED=1
Expand Down
2 changes: 1 addition & 1 deletion config/monitoring/networkpolicy/operator/operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ spec:
kubernetes.io/metadata.name: openshift-console
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: oopenshift-operators
kubernetes.io/metadata.name: openshift-operators
- namespaceSelector:
matchLabels:
opendatahub.io/generated-namespace: "true"
Expand Down
78 changes: 50 additions & 28 deletions controllers/dscinitialization/dscinitialization_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ package dscinitialization
import (
"context"
"path/filepath"
"reflect"

operatorv1 "github.com/openshift/api/operator/v1"
routev1 "github.com/openshift/api/route/v1"
Expand All @@ -29,6 +28,7 @@ import (
networkingv1 "k8s.io/api/networking/v1"
rbacv1 "k8s.io/api/rbac/v1"
k8serr "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/record"
Expand All @@ -49,6 +49,7 @@ import (
"github.com/opendatahub-io/opendatahub-operator/v2/controllers/status"
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster"
odhClient "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/client"
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/predicates/resources"
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/deploy"
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/logger"
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/trustedcabundle"
Expand Down Expand Up @@ -257,6 +258,9 @@ func (r *DSCInitializationReconciler) Reconcile(ctx context.Context, req ctrl.Re
}
if instance.Spec.Monitoring.ManagementState == operatorv1.Managed {
log.Info("Monitoring enabled in initialization stage", "cluster", "Managed Service Mode")
if err := r.configureMonitoring(ctx, instance); err != nil {
return ctrl.Result{}, err
}
err := r.configureManagedMonitoring(ctx, instance, "init")
if err != nil {
return reconcile.Result{}, err
Expand Down Expand Up @@ -351,22 +355,24 @@ func (r *DSCInitializationReconciler) SetupWithManager(ctx context.Context, mgr
Owns(
&routev1.Route{},
builder.WithPredicates(predicate.Or(predicate.GenerationChangedPredicate{}, predicate.LabelChangedPredicate{}))).
Owns(&corev1.PersistentVolumeClaim{},
builder.WithPredicates(predicate.Or(predicate.GenerationChangedPredicate{}, predicate.LabelChangedPredicate{}))).
Watches(
&dscv1.DataScienceCluster{},
handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, a client.Object) []reconcile.Request {
return r.watchDSCResource(ctx)
}),
builder.WithPredicates(DSCDeletionPredicate),
builder.WithPredicates(resources.DSCDeletionPredicate), // TODO: is it needed?
).
Watches(
&corev1.Secret{},
handler.EnqueueRequestsFromMapFunc(r.watchMonitoringSecretResource),
builder.WithPredicates(SecretContentChangedPredicate),
builder.WithPredicates(resources.SecretContentChangedPredicate),
).
Watches(
&corev1.ConfigMap{},
handler.EnqueueRequestsFromMapFunc(r.watchMonitoringConfigMapResource),
builder.WithPredicates(CMContentChangedPredicate),
builder.WithPredicates(resources.CMContentChangedPredicate),
).
Watches(
&serviceApi.Auth{},
Expand All @@ -375,30 +381,6 @@ func (r *DSCInitializationReconciler) SetupWithManager(ctx context.Context, mgr
Complete(r)
}

var SecretContentChangedPredicate = predicate.Funcs{
UpdateFunc: func(e event.UpdateEvent) bool {
oldSecret, _ := e.ObjectOld.(*corev1.Secret)
newSecret, _ := e.ObjectNew.(*corev1.Secret)

return !reflect.DeepEqual(oldSecret.Data, newSecret.Data)
},
}

var CMContentChangedPredicate = predicate.Funcs{
UpdateFunc: func(e event.UpdateEvent) bool {
oldCM, _ := e.ObjectOld.(*corev1.ConfigMap)
newCM, _ := e.ObjectNew.(*corev1.ConfigMap)

return !reflect.DeepEqual(oldCM.Data, newCM.Data)
},
}

var DSCDeletionPredicate = predicate.Funcs{
DeleteFunc: func(e event.DeleteEvent) bool {
return true
},
}

var dsciPredicateStateChangeTrustedCA = predicate.Funcs{
UpdateFunc: func(e event.UpdateEvent) bool {
oldDSCI, _ := e.ObjectOld.(*dsciv1.DSCInitialization)
Expand Down Expand Up @@ -468,3 +450,43 @@ func (r *DSCInitializationReconciler) watchAuthResource(ctx context.Context, a c

return nil
}

func (r *DSCInitializationReconciler) configureMonitoring(ctx context.Context, dsci *dsciv1.DSCInitialization) error {
// Create Monitoring CR singleton
defaultMonitoring := client.Object(&serviceApi.Monitoring{
TypeMeta: metav1.TypeMeta{
Kind: serviceApi.MonitoringKind,
APIVersion: serviceApi.GroupVersion.String(),
},
ObjectMeta: metav1.ObjectMeta{
Name: serviceApi.MonitoringInstanceName,
OwnerReferences: []metav1.OwnerReference{{
APIVersion: dsciv1.GroupVersion.String(),
Kind: dsci.Kind,
Name: dsci.Name,
UID: dsci.UID,
},
},
},
Spec: serviceApi.MonitoringSpec{
MonitoringCommonSpec: serviceApi.MonitoringCommonSpec{
Namespace: dsci.Spec.Monitoring.Namespace,
},
},
},
)

if dsci.Spec.Monitoring.ManagementState == operatorv1.Managed {
// for generic case if we need to support configable monitoring namespace
// set filed manager to DSCI
if err := r.Apply(ctx, defaultMonitoring, client.FieldOwner("dscinitialization.opendatahub.io"), client.ForceOwnership); err != nil && !k8serr.IsAlreadyExists(err) {
return err
}
} else {
err := r.Delete(ctx, defaultMonitoring)
if err != nil && !k8serr.IsNotFound(err) {
return err
}
}
return nil
}
1 change: 1 addition & 0 deletions controllers/dscinitialization/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ var _ = BeforeSuite(func() {
utilruntime.Must(templatev1.Install(testScheme))
utilruntime.Must(configv1.Install(testScheme))
utilruntime.Must(serviceApi.AddToScheme(testScheme))
utilruntime.Must(monitoringv1.AddToScheme(testScheme))
// +kubebuilder:scaffold:scheme

k8sClient, err = client.New(cfg, client.Options{Scheme: testScheme})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,34 @@ package monitoring

import (
"context"
"fmt"
"os"
"path/filepath"
"strings"

conditionsv1 "github.com/openshift/custom-resource-status/conditions/v1"
"gopkg.in/yaml.v2"
k8serr "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
"sigs.k8s.io/controller-runtime/pkg/client"
logf "sigs.k8s.io/controller-runtime/pkg/log"

"github.com/opendatahub-io/opendatahub-operator/v2/pkg/deploy"
"github.com/opendatahub-io/opendatahub-operator/v2/apis/common"
serviceApi "github.com/opendatahub-io/opendatahub-operator/v2/apis/services/v1alpha1"
"github.com/opendatahub-io/opendatahub-operator/v2/controllers/status"
odhcli "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/client"
odhdeploy "github.com/opendatahub-io/opendatahub-operator/v2/pkg/deploy"
)

var (
prometheusConfigPath = filepath.Join(deploy.DefaultManifestPath, "monitoring", "prometheus", "apps", "prometheus-configs.yaml")
ComponentName = serviceApi.MonitoringServiceName
prometheusConfigPath = filepath.Join(odhdeploy.DefaultManifestPath, ComponentName, "prometheus", "apps", "prometheus-configs.yaml")
ReadyConditionType = conditionsv1.ConditionType(status.ReadySuffix)
)

// UpdatePrometheusConfig update prometheus-configs.yaml to include/exclude <component>.rules
// updatePrometheusConfig update prometheus-configs.yaml to include/exclude <component>.rules
// parameter enable when set to true to add new rules, when set to false to remove existing rules.
func UpdatePrometheusConfig(ctx context.Context, _ client.Client, enable bool, component string) error {
func updatePrometheusConfig(ctx context.Context, enable bool, component string) error {
l := logf.FromContext(ctx)

// create a struct to mock poremtheus.yml
Expand All @@ -44,8 +54,6 @@ func UpdatePrometheusConfig(ctx context.Context, _ client.Client, enable bool, c
MMARules string `yaml:"model-mesh-alerting.rules"`
OdhModelRRules string `yaml:"odh-model-controller-recording.rules"`
OdhModelARules string `yaml:"odh-model-controller-alerting.rules"`
CFORRules string `yaml:"codeflare-recording.rules"`
CFOARules string `yaml:"codeflare-alerting.rules"`
RayARules string `yaml:"ray-alerting.rules"`
WorkbenchesRRules string `yaml:"workbenches-recording.rules"`
WorkbenchesARules string `yaml:"workbenches-alerting.rules"`
Expand Down Expand Up @@ -124,3 +132,15 @@ func UpdatePrometheusConfig(ctx context.Context, _ client.Client, enable bool, c

return err
}

func isComponentReady(ctx context.Context, cli *odhcli.Client, obj common.PlatformObject) (bool, error) {
err := cli.Client.Get(ctx, client.ObjectKeyFromObject(obj), obj)
switch {
case k8serr.IsNotFound(err):
return false, nil
case err != nil:
return false, fmt.Errorf("failed to get component instance: %w", err)
default:
return meta.IsStatusConditionTrue(obj.GetStatus().Conditions, status.ConditionTypeReady), nil
}
}
53 changes: 5 additions & 48 deletions controllers/services/monitoring/monitoring_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,46 +20,19 @@ import (
"context"
"fmt"

routev1 "github.com/openshift/api/route/v1"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
extv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
ctrl "sigs.k8s.io/controller-runtime"

dscv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/datasciencecluster/v1"
serviceApi "github.com/opendatahub-io/opendatahub-operator/v2/apis/services/v1alpha1"
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy"
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/render/kustomize"
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/updatestatus"
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/handlers"
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/predicates/resources"
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/reconciler"
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/metadata/labels"
)

const serviceName = "monitoring"

// NewServiceReconciler creates a ServiceReconciler for the Monitoring API.
func NewServiceReconciler(ctx context.Context, mgr ctrl.Manager) error {
_, err := reconciler.ReconcilerFor(mgr, &serviceApi.Monitoring{}).
// operands - owned
Owns(&corev1.ConfigMap{}).
Owns(&corev1.Secret{}).
Owns(&rbacv1.ClusterRoleBinding{}).
Owns(&rbacv1.ClusterRole{}).
Owns(&rbacv1.Role{}).
Owns(&rbacv1.RoleBinding{}).
Owns(&corev1.ServiceAccount{}).
Owns(&corev1.Service{}).
Owns(&corev1.PersistentVolumeClaim{}).
Owns(&monitoringv1.ServiceMonitor{}).
Owns(&monitoringv1.PrometheusRule{}).
// By default, a predicated for changed generation is added by the Owns()
// method, however for deployments, we also need to retrieve status info
// hence we need a dedicated predicate to react to replicas status change
Owns(&appsv1.Deployment{}, reconciler.WithPredicates(resources.NewDeploymentPredicate())).
// operands - openshift
Owns(&routev1.Route{}).
// operands - watched
//
// By default the Watches functions adds:
Expand All @@ -69,29 +42,13 @@ func NewServiceReconciler(ctx context.Context, mgr ctrl.Manager) error {
// for to objects that have the label components.platform.opendatahub.io/part-of
// or services.platform.opendatahub.io/part-of set to the current owner
//
Watches(&extv1.CustomResourceDefinition{}).
Watches(&dscv1.DataScienceCluster{}, reconciler.WithEventHandler(handlers.ToNamed(serviceApi.MonitoringInstanceName)),
reconciler.WithPredicates(resources.DSCComponentUpdatePredicate)).
// actions
WithAction(initialize).
WithAction(kustomize.NewAction(
kustomize.WithCache(),
// Those are the default labels added by the legacy deploy method
// and should be preserved as the original plugin were affecting
// deployment selectors that are immutable once created, so it won't
// be possible to actually amend the labels in a non-disruptive
// manner.
//
// Additional labels/annotations MUST be added by the deploy action
// so they would affect only objects metadata without side effects
// kustomize.WithLabel(labels.ODH.Component(componentName), "true"),
kustomize.WithLabel(labels.K8SCommon.PartOf, serviceName),
)).
WithAction(updatePrometheusConfigMap).
WithAction(deploy.NewAction(
deploy.WithCache(),
deploy.WithFieldOwner(serviceApi.MonitoringInstanceName),
deploy.WithLabel(labels.PlatformPartOf, serviceApi.MonitoringServiceName),
)).
WithAction(updatestatus.NewAction(
updatestatus.WithSelectorLabel(labels.PlatformPartOf, serviceApi.MonitoringServiceName),
)).
WithAction(updateStatus).
Build(ctx)
Expand Down
Loading

0 comments on commit 83abe0d

Please sign in to comment.