From 39cee3538af9c561efed5e2d2abdd241a62b4eb8 Mon Sep 17 00:00:00 2001 From: Julian Brost Date: Fri, 28 Jan 2022 15:15:38 +0100 Subject: [PATCH 1/3] Checkable: improve state notifications after suppression ends This commit changes the Checkable notification suppression logic (notifications are currently suppressed on the Checkable if it is unreachable, in a downtime, or acknowledged) to that after the suppression reason ends, a state notification is sent if and only if the first hard state after is different from the last hard state from before. If the checkable is in a soft state after the suppression ends, the notification is further suppressed until a hard state is reached. To achieve this behavior, a new attribute state_before_suppression is added to Checkable. This attribute is set to the last hard state the first time either a PROBLEM or a RECOVERY notification is suppressed. Compared to from before, neither of these two flags in the suppressed_notification will ever be cleared while the supression is still ongoing but only after the suppression ended and the current state is compared with the old state stored in state_before_suppression. --- lib/icinga/checkable-check.cpp | 26 ++++++++++++++----- lib/icinga/checkable-notification.cpp | 37 ++++++++++++++++++++++++--- lib/icinga/checkable.ti | 3 +++ 3 files changed, 57 insertions(+), 9 deletions(-) diff --git a/lib/icinga/checkable-check.cpp b/lib/icinga/checkable-check.cpp index 7963f675e11..a302bd92372 100644 --- a/lib/icinga/checkable-check.cpp +++ b/lib/icinga/checkable-check.cpp @@ -479,7 +479,12 @@ void Checkable::ProcessCheckResult(const CheckResult::Ptr& cr, const MessageOrig if (send_notification && !is_flapping) { if (!IsPaused()) { - if (suppress_notification) { + /* If there are still some pending suppressed state notification, keep the suppression until these are + * handled by Checkable::FireSuppressedNotifications(). + */ + bool pending = GetSuppressedNotifications() & (NotificationRecovery|NotificationProblem); + + if (suppress_notification || pending) { suppressed_types |= (recovery ? NotificationRecovery : NotificationProblem); } else { OnNotificationsRequested(this, recovery ? NotificationRecovery : NotificationProblem, cr, "", "", nullptr); @@ -496,12 +501,21 @@ void Checkable::ProcessCheckResult(const CheckResult::Ptr& cr, const MessageOrig int suppressed_types_before (GetSuppressedNotifications()); int suppressed_types_after (suppressed_types_before | suppressed_types); - for (int conflict : {NotificationProblem | NotificationRecovery, NotificationFlappingStart | NotificationFlappingEnd}) { - /* E.g. problem and recovery notifications neutralize each other. */ + const int conflict = NotificationFlappingStart | NotificationFlappingEnd; + if ((suppressed_types_after & conflict) == conflict) { + /* Flapping start and end cancel out each other. */ + suppressed_types_after &= ~conflict; + } - if ((suppressed_types_after & conflict) == conflict) { - suppressed_types_after &= ~conflict; - } + const int stateNotifications = NotificationRecovery | NotificationProblem; + if (!(suppressed_types_before & stateNotifications) && (suppressed_types & stateNotifications)) { + /* A state-related notification is suppressed for the first time, store the previous state. When + * notifications are no longer suppressed, this can be compared with the current state to determine + * if a notification must be sent. This is done differently compared to flapping notifications just above + * as for state notifications, problem and recovery don't always cancel each other. For example, + * WARNING -> OK -> CRITICAL generates both types once, but there should still be a notification. + */ + SetStateBeforeSuppression(old_stateType == StateTypeHard ? old_state : ServiceOK); } if (suppressed_types_after != suppressed_types_before) { diff --git a/lib/icinga/checkable-notification.cpp b/lib/icinga/checkable-notification.cpp index 6404bb50a76..7978d40763d 100644 --- a/lib/icinga/checkable-notification.cpp +++ b/lib/icinga/checkable-notification.cpp @@ -168,7 +168,38 @@ static void FireSuppressedNotifications(Checkable* checkable) return false; }); - for (auto type : {NotificationProblem, NotificationRecovery, NotificationFlappingStart, NotificationFlappingEnd}) { + if (suppressed_types & (NotificationProblem|NotificationRecovery)) { + CheckResult::Ptr cr = checkable->GetLastCheckResult(); + NotificationType type = cr && checkable->IsStateOK(cr->GetState()) ? NotificationRecovery : NotificationProblem; + bool state_suppressed = checkable->NotificationReasonSuppressed(NotificationProblem) || checkable->NotificationReasonSuppressed(NotificationRecovery); + + /* Only process (i.e. send or dismiss) suppressed state notifications if the following conditions are met: + * + * 1. State notifications are not suppressed at the moment. State notifications must only be removed from + * the suppressed notifications bitset after the reason for the suppression is gone as these bits are + * used as a marker for when to set the state_before_suppression attribute. + * 2. The checkable is in a hard state. Soft states represent a state where we are not certain yet about + * the actual state and wait with sending notifications. If we want to immediately send a notification, + * we might send a recovery notification for something that just started failing or a problem + * notification which might be for an intermittent problem that would have never received a + * notification if there was no suppression as it still was in a soft state. Both cases aren't ideal so + * better wait until we are certain. + * 3. The checkable isn't likely checked soon. For example, if a downtime ended, give the checkable a + * chance to recover afterwards before sending a notification. + * 4. No parent recovered recently. Similar to the previous condition, give the checkable a chance to + * recover after one of its dependencies recovered before sending a notification. + * + * If any of these conditions is not met, processing the suppressed notification is further delayed. + */ + if (!state_suppressed && checkable->GetStateType() == StateTypeHard && !checkable->IsLikelyToBeCheckedSoon() && !wasLastParentRecoveryRecent.Get()) { + if (checkable->NotificationReasonApplies(type)) { + Checkable::OnNotificationsRequested(checkable, type, cr, "", "", nullptr); + } + subtract |= NotificationRecovery|NotificationProblem; + } + } + + for (auto type : {NotificationFlappingStart, NotificationFlappingEnd}) { if (suppressed_types & type) { bool still_applies = checkable->NotificationReasonApplies(type); @@ -224,12 +255,12 @@ bool Checkable::NotificationReasonApplies(NotificationType type) case NotificationProblem: { auto cr (GetLastCheckResult()); - return cr && !IsStateOK(cr->GetState()) && GetStateType() == StateTypeHard; + return cr && !IsStateOK(cr->GetState()) && cr->GetState() != GetStateBeforeSuppression(); } case NotificationRecovery: { auto cr (GetLastCheckResult()); - return cr && IsStateOK(cr->GetState()); + return cr && IsStateOK(cr->GetState()) && cr->GetState() != GetStateBeforeSuppression(); } case NotificationFlappingStart: return IsFlapping(); diff --git a/lib/icinga/checkable.ti b/lib/icinga/checkable.ti index e66a237c67a..6f7a5daee46 100644 --- a/lib/icinga/checkable.ti +++ b/lib/icinga/checkable.ti @@ -175,6 +175,9 @@ abstract class Checkable : CustomVarObject [state, no_user_view, no_user_modify] int suppressed_notifications { default {{{ return 0; }}} }; + [state, enum, no_user_view, no_user_modify] ServiceState state_before_suppression { + default {{{ return ServiceOK; }}} + }; [config, navigation] name(Endpoint) command_endpoint (CommandEndpointRaw) { navigate {{{ From cbc0b21b86427a73b048441c0b5842b2eb0517c1 Mon Sep 17 00:00:00 2001 From: Julian Brost Date: Tue, 22 Feb 2022 12:01:06 +0100 Subject: [PATCH 2/3] Checkable: sync state_before_suppression in cluster This ensures that in case of a failover in an HA zone, the other can take over properly and has the required state to send the proper notifications. --- doc/19-technical-concepts.md | 37 +++++++++++++++++++++ lib/icinga/clusterevents.cpp | 64 ++++++++++++++++++++++++++++++++++++ lib/icinga/clusterevents.hpp | 3 ++ 3 files changed, 104 insertions(+) diff --git a/doc/19-technical-concepts.md b/doc/19-technical-concepts.md index 1048e21d39c..1da32b6f2bf 100644 --- a/doc/19-technical-concepts.md +++ b/doc/19-technical-concepts.md @@ -1370,6 +1370,43 @@ Message updates will be dropped when: * Checkable does not exist. * Origin endpoint's zone is not allowed to access this checkable. +#### event::SetStateBeforeSuppression + +> Location: `clusterevents.cpp` + +##### Message Body + +Key | Value +----------|--------------------------------- +jsonrpc | 2.0 +method | event::SetStateBeforeSuppression +params | Dictionary + +##### Params + +Key | Type | Description +---------------------------|--------|----------------------------------------------- +host | String | Host name +service | String | Service name +state\_before\_suppression | Number | Checkable state before the current suppression + +##### Functions + +Event Sender: `Checkable::OnStateBeforeSuppressionChanged` +Event Receiver: `StateBeforeSuppressionChangedAPIHandler` + +Used to sync the checkable state from before a notification suppression (for example +because the checkable is in a downtime) started within the same HA zone. + +##### Permissions + +The receiver will not process messages from not configured endpoints. + +Message updates will be dropped when: + +* Checkable does not exist. +* Origin endpoint is not within the local zone. + #### event::SetSuppressedNotifications > Location: `clusterevents.cpp` diff --git a/lib/icinga/clusterevents.cpp b/lib/icinga/clusterevents.cpp index bade7e5a58a..e7fb9f3e04d 100644 --- a/lib/icinga/clusterevents.cpp +++ b/lib/icinga/clusterevents.cpp @@ -25,6 +25,7 @@ INITIALIZE_ONCE(&ClusterEvents::StaticInitialize); REGISTER_APIFUNCTION(CheckResult, event, &ClusterEvents::CheckResultAPIHandler); REGISTER_APIFUNCTION(SetNextCheck, event, &ClusterEvents::NextCheckChangedAPIHandler); REGISTER_APIFUNCTION(SetLastCheckStarted, event, &ClusterEvents::LastCheckStartedChangedAPIHandler); +REGISTER_APIFUNCTION(SetStateBeforeSuppression, event, &ClusterEvents::StateBeforeSuppressionChangedAPIHandler); REGISTER_APIFUNCTION(SetSuppressedNotifications, event, &ClusterEvents::SuppressedNotificationsChangedAPIHandler); REGISTER_APIFUNCTION(SetSuppressedNotificationTypes, event, &ClusterEvents::SuppressedNotificationTypesChangedAPIHandler); REGISTER_APIFUNCTION(SetNextNotification, event, &ClusterEvents::NextNotificationChangedAPIHandler); @@ -45,6 +46,7 @@ void ClusterEvents::StaticInitialize() Checkable::OnNewCheckResult.connect(&ClusterEvents::CheckResultHandler); Checkable::OnNextCheckChanged.connect(&ClusterEvents::NextCheckChangedHandler); Checkable::OnLastCheckStartedChanged.connect(&ClusterEvents::LastCheckStartedChangedHandler); + Checkable::OnStateBeforeSuppressionChanged.connect(&ClusterEvents::StateBeforeSuppressionChangedHandler); Checkable::OnSuppressedNotificationsChanged.connect(&ClusterEvents::SuppressedNotificationsChangedHandler); Notification::OnSuppressedNotificationsChanged.connect(&ClusterEvents::SuppressedNotificationTypesChangedHandler); Notification::OnNextNotificationChanged.connect(&ClusterEvents::NextNotificationChangedHandler); @@ -306,6 +308,68 @@ Value ClusterEvents::LastCheckStartedChangedAPIHandler(const MessageOrigin::Ptr& return Empty; } +void ClusterEvents::StateBeforeSuppressionChangedHandler(const Checkable::Ptr& checkable, const MessageOrigin::Ptr& origin) +{ + ApiListener::Ptr listener = ApiListener::GetInstance(); + + if (!listener) + return; + + Host::Ptr host; + Service::Ptr service; + tie(host, service) = GetHostService(checkable); + + Dictionary::Ptr params = new Dictionary(); + params->Set("host", host->GetName()); + if (service) + params->Set("service", service->GetShortName()); + params->Set("state_before_suppression", checkable->GetStateBeforeSuppression()); + + Dictionary::Ptr message = new Dictionary(); + message->Set("jsonrpc", "2.0"); + message->Set("method", "event::SetStateBeforeSuppression"); + message->Set("params", params); + + listener->RelayMessage(origin, nullptr, message, true); +} + +Value ClusterEvents::StateBeforeSuppressionChangedAPIHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params) +{ + Endpoint::Ptr endpoint = origin->FromClient->GetEndpoint(); + + if (!endpoint) { + Log(LogNotice, "ClusterEvents") + << "Discarding 'state before suppression changed' message from '" << origin->FromClient->GetIdentity() << "': Invalid endpoint origin (client not allowed)."; + return Empty; + } + + Host::Ptr host = Host::GetByName(params->Get("host")); + + if (!host) + return Empty; + + Checkable::Ptr checkable; + + if (params->Contains("service")) + checkable = host->GetServiceByShortName(params->Get("service")); + else + checkable = host; + + if (!checkable) + return Empty; + + if (origin->FromZone && origin->FromZone != Zone::GetLocalZone()) { + Log(LogNotice, "ClusterEvents") + << "Discarding 'state before suppression changed' message for checkable '" << checkable->GetName() + << "' from '" << origin->FromClient->GetIdentity() << "': Unauthorized access."; + return Empty; + } + + checkable->SetStateBeforeSuppression(ServiceState(int(params->Get("state_before_suppression"))), false, origin); + + return Empty; +} + void ClusterEvents::SuppressedNotificationsChangedHandler(const Checkable::Ptr& checkable, const MessageOrigin::Ptr& origin) { ApiListener::Ptr listener = ApiListener::GetInstance(); diff --git a/lib/icinga/clusterevents.hpp b/lib/icinga/clusterevents.hpp index a6c21971aed..4cdadacc97c 100644 --- a/lib/icinga/clusterevents.hpp +++ b/lib/icinga/clusterevents.hpp @@ -29,6 +29,9 @@ class ClusterEvents static void LastCheckStartedChangedHandler(const Checkable::Ptr& checkable, const MessageOrigin::Ptr& origin); static Value LastCheckStartedChangedAPIHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params); + static void StateBeforeSuppressionChangedHandler(const Checkable::Ptr& checkable, const MessageOrigin::Ptr& origin); + static Value StateBeforeSuppressionChangedAPIHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params); + static void SuppressedNotificationsChangedHandler(const Checkable::Ptr& checkable, const MessageOrigin::Ptr& origin); static Value SuppressedNotificationsChangedAPIHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params); From 90848f602bfea29b5c3cf19adbf4265f66efbe7e Mon Sep 17 00:00:00 2001 From: Julian Brost Date: Fri, 28 Jan 2022 15:15:44 +0100 Subject: [PATCH 3/3] Checkable: Add test for state notifications after a suppression ends --- lib/icinga/checkable-notification.cpp | 46 +++--- lib/icinga/checkable.cpp | 2 +- lib/icinga/checkable.hpp | 4 +- lib/icinga/downtime.hpp | 3 +- lib/icinga/downtime.ti | 4 +- lib/icinga/host.hpp | 3 +- lib/icinga/service.hpp | 3 +- test/CMakeLists.txt | 1 + test/icinga-checkresult.cpp | 220 ++++++++++++++++++++++++++ 9 files changed, 256 insertions(+), 30 deletions(-) diff --git a/lib/icinga/checkable-notification.cpp b/lib/icinga/checkable-notification.cpp index 7978d40763d..21bcc399f52 100644 --- a/lib/icinga/checkable-notification.cpp +++ b/lib/icinga/checkable-notification.cpp @@ -129,26 +129,26 @@ void Checkable::UnregisterNotification(const Notification::Ptr& notification) m_Notifications.erase(notification); } -static void FireSuppressedNotifications(Checkable* checkable) +void Checkable::FireSuppressedNotifications() { - if (!checkable->IsActive()) + if (!IsActive()) return; - if (checkable->IsPaused()) + if (IsPaused()) return; - if (!checkable->GetEnableNotifications()) + if (!GetEnableNotifications()) return; - int suppressed_types (checkable->GetSuppressedNotifications()); + int suppressed_types (GetSuppressedNotifications()); if (!suppressed_types) return; int subtract = 0; { - LazyInit wasLastParentRecoveryRecent ([&checkable]() { - auto cr (checkable->GetLastCheckResult()); + LazyInit wasLastParentRecoveryRecent ([this]() { + auto cr (GetLastCheckResult()); if (!cr) { return true; @@ -156,7 +156,7 @@ static void FireSuppressedNotifications(Checkable* checkable) auto threshold (cr->GetExecutionStart()); - for (auto& dep : checkable->GetDependencies()) { + for (auto& dep : GetDependencies()) { auto parent (dep->GetParent()); ObjectLock oLock (parent); @@ -169,9 +169,9 @@ static void FireSuppressedNotifications(Checkable* checkable) }); if (suppressed_types & (NotificationProblem|NotificationRecovery)) { - CheckResult::Ptr cr = checkable->GetLastCheckResult(); - NotificationType type = cr && checkable->IsStateOK(cr->GetState()) ? NotificationRecovery : NotificationProblem; - bool state_suppressed = checkable->NotificationReasonSuppressed(NotificationProblem) || checkable->NotificationReasonSuppressed(NotificationRecovery); + CheckResult::Ptr cr = GetLastCheckResult(); + NotificationType type = cr && IsStateOK(cr->GetState()) ? NotificationRecovery : NotificationProblem; + bool state_suppressed = NotificationReasonSuppressed(NotificationProblem) || NotificationReasonSuppressed(NotificationRecovery); /* Only process (i.e. send or dismiss) suppressed state notifications if the following conditions are met: * @@ -191,9 +191,9 @@ static void FireSuppressedNotifications(Checkable* checkable) * * If any of these conditions is not met, processing the suppressed notification is further delayed. */ - if (!state_suppressed && checkable->GetStateType() == StateTypeHard && !checkable->IsLikelyToBeCheckedSoon() && !wasLastParentRecoveryRecent.Get()) { - if (checkable->NotificationReasonApplies(type)) { - Checkable::OnNotificationsRequested(checkable, type, cr, "", "", nullptr); + if (!state_suppressed && GetStateType() == StateTypeHard && !IsLikelyToBeCheckedSoon() && !wasLastParentRecoveryRecent.Get()) { + if (NotificationReasonApplies(type)) { + Checkable::OnNotificationsRequested(this, type, cr, "", "", nullptr); } subtract |= NotificationRecovery|NotificationProblem; } @@ -201,11 +201,11 @@ static void FireSuppressedNotifications(Checkable* checkable) for (auto type : {NotificationFlappingStart, NotificationFlappingEnd}) { if (suppressed_types & type) { - bool still_applies = checkable->NotificationReasonApplies(type); + bool still_applies = NotificationReasonApplies(type); if (still_applies) { - if (!checkable->NotificationReasonSuppressed(type) && !checkable->IsLikelyToBeCheckedSoon() && !wasLastParentRecoveryRecent.Get()) { - Checkable::OnNotificationsRequested(checkable, type, checkable->GetLastCheckResult(), "", "", nullptr); + if (!NotificationReasonSuppressed(type) && !IsLikelyToBeCheckedSoon() && !wasLastParentRecoveryRecent.Get()) { + Checkable::OnNotificationsRequested(this, type, GetLastCheckResult(), "", "", nullptr); subtract |= type; } @@ -217,13 +217,13 @@ static void FireSuppressedNotifications(Checkable* checkable) } if (subtract) { - ObjectLock olock (checkable); + ObjectLock olock (this); - int suppressed_types_before (checkable->GetSuppressedNotifications()); + int suppressed_types_before (GetSuppressedNotifications()); int suppressed_types_after (suppressed_types_before & ~subtract); if (suppressed_types_after != suppressed_types_before) { - checkable->SetSuppressedNotifications(suppressed_types_after); + SetSuppressedNotifications(suppressed_types_after); } } } @@ -231,14 +231,14 @@ static void FireSuppressedNotifications(Checkable* checkable) /** * Re-sends all notifications previously suppressed by e.g. downtimes if the notification reason still applies. */ -void Checkable::FireSuppressedNotifications(const Timer * const&) +void Checkable::FireSuppressedNotificationsTimer(const Timer * const&) { for (auto& host : ConfigType::GetObjectsByType()) { - ::FireSuppressedNotifications(host.get()); + host->FireSuppressedNotifications(); } for (auto& service : ConfigType::GetObjectsByType()) { - ::FireSuppressedNotifications(service.get()); + service->FireSuppressedNotifications(); } } diff --git a/lib/icinga/checkable.cpp b/lib/icinga/checkable.cpp index 6ec7f6f92a5..b212389b365 100644 --- a/lib/icinga/checkable.cpp +++ b/lib/icinga/checkable.cpp @@ -103,7 +103,7 @@ void Checkable::Start(bool runtimeCreated) boost::call_once(once, []() { l_CheckablesFireSuppressedNotifications = new Timer(); l_CheckablesFireSuppressedNotifications->SetInterval(5); - l_CheckablesFireSuppressedNotifications->OnTimerExpired.connect(&Checkable::FireSuppressedNotifications); + l_CheckablesFireSuppressedNotifications->OnTimerExpired.connect(&Checkable::FireSuppressedNotificationsTimer); l_CheckablesFireSuppressedNotifications->Start(); l_CleanDeadlinedExecutions = new Timer(); diff --git a/lib/icinga/checkable.hpp b/lib/icinga/checkable.hpp index 292dac28124..bb8e567b45b 100644 --- a/lib/icinga/checkable.hpp +++ b/lib/icinga/checkable.hpp @@ -191,6 +191,8 @@ class Checkable : public ObjectImpl bool NotificationReasonSuppressed(NotificationType type); bool IsLikelyToBeCheckedSoon(); + void FireSuppressedNotifications(); + static void IncreasePendingChecks(); static void DecreasePendingChecks(); static int GetPendingChecks(); @@ -222,7 +224,7 @@ class Checkable : public ObjectImpl static void NotifyDowntimeEnd(const Downtime::Ptr& downtime); - static void FireSuppressedNotifications(const Timer * const&); + static void FireSuppressedNotificationsTimer(const Timer * const&); static void CleanDeadlinedExecutions(const Timer * const&); /* Comments */ diff --git a/lib/icinga/downtime.hpp b/lib/icinga/downtime.hpp index 38d4e41e4cf..5f0c4937bdb 100644 --- a/lib/icinga/downtime.hpp +++ b/lib/icinga/downtime.hpp @@ -62,12 +62,13 @@ class Downtime final : public ObjectImpl void TriggerDowntime(double triggerTime); void SetRemovalInfo(const String& removedBy, double removeTime, const MessageOrigin::Ptr& origin = nullptr); + void OnAllConfigLoaded() override; + static String GetDowntimeIDFromLegacyID(int id); static DowntimeChildOptions ChildOptionsFromValue(const Value& options); protected: - void OnAllConfigLoaded() override; void Start(bool runtimeCreated) override; void Stop(bool runtimeRemoved) override; diff --git a/lib/icinga/downtime.ti b/lib/icinga/downtime.ti index d21f0ebf570..9fe1823921c 100644 --- a/lib/icinga/downtime.ti +++ b/lib/icinga/downtime.ti @@ -25,12 +25,12 @@ class Downtime : ConfigObject < DowntimeNameComposer load_after Host; load_after Service; - [config, protected, required, navigation(host)] name(Host) host_name { + [config, required, navigation(host)] name(Host) host_name { navigate {{{ return Host::GetByName(GetHostName()); }}} }; - [config, protected, navigation(service)] String service_name { + [config, navigation(service)] String service_name { track {{{ if (!oldValue.IsEmpty()) { Service::Ptr service = Service::GetByNamePair(GetHostName(), oldValue); diff --git a/lib/icinga/host.hpp b/lib/icinga/host.hpp index 87f560af5dd..d0d6c1aa47f 100644 --- a/lib/icinga/host.hpp +++ b/lib/icinga/host.hpp @@ -50,10 +50,11 @@ class Host final : public ObjectImpl, public MacroResolver bool ResolveMacro(const String& macro, const CheckResult::Ptr& cr, Value *result) const override; + void OnAllConfigLoaded() override; + protected: void Stop(bool runtimeRemoved) override; - void OnAllConfigLoaded() override; void CreateChildObjects(const Type::Ptr& childType) override; private: diff --git a/lib/icinga/service.hpp b/lib/icinga/service.hpp index 493f261e295..74237018bff 100644 --- a/lib/icinga/service.hpp +++ b/lib/icinga/service.hpp @@ -44,10 +44,11 @@ class Service final : public ObjectImpl, public MacroResolver static void EvaluateApplyRules(const Host::Ptr& host); + void OnAllConfigLoaded() override; + static boost::signals2::signal OnHostProblemChanged; protected: - void OnAllConfigLoaded() override; void CreateChildObjects(const Type::Ptr& childType) override; private: diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 530dd1cc9bd..8b800d914b4 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -133,6 +133,7 @@ add_boost_test(base icinga_checkresult/service_3attempts icinga_checkresult/host_flapping_notification icinga_checkresult/service_flapping_notification + icinga_checkresult/suppressed_notification icinga_dependencies/multi_parent icinga_notification/strings icinga_notification/state_filter diff --git a/test/icinga-checkresult.cpp b/test/icinga-checkresult.cpp index 33bbc509ee4..fdc78915ede 100644 --- a/test/icinga-checkresult.cpp +++ b/test/icinga-checkresult.cpp @@ -1,8 +1,13 @@ /* Icinga 2 | (c) 2012 Icinga GmbH | GPLv2+ */ +#include "icinga/downtime.hpp" #include "icinga/host.hpp" +#include "icinga/service.hpp" #include #include +#include +#include +#include using namespace icinga; @@ -809,4 +814,219 @@ BOOST_AUTO_TEST_CASE(service_flapping_ok_over_bad_into_ok) #endif /* I2_DEBUG */ } + +BOOST_AUTO_TEST_CASE(suppressed_notification) +{ + /* Tests that suppressed notifications on a Checkable are sent after the suppression ends if and only if the first + * hard state after the suppression is different from the last hard state before the suppression. The test works + * by bringing a service in a defined hard state, creating a downtime, performing some state changes, removing the + * downtime, bringing the service into another defined hard state (if not already) and checking the requested + * notifications. + */ + + struct NotificationLog { + std::vector> GetAndClear() { + std::lock_guard lock (mutex); + + std::vector> ret; + std::swap(ret, log); + return ret; + } + + void Add(std::pair notification) { + std::lock_guard lock (mutex); + + log.emplace_back(notification); + } + + private: + std::mutex mutex; + std::vector> log; + }; + + const std::vector states {ServiceOK, ServiceWarning, ServiceCritical, ServiceUnknown}; + + for (bool isVolatile : {false, true}) { + for (int checkAttempts : {1, 2}) { + for (ServiceState initialState : states) { + for (ServiceState s1 : states) + for (ServiceState s2 : states) + for (ServiceState s3 : states) { + const std::vector sequence {s1, s2, s3}; + + std::string testcase; + + { + std::ostringstream buf; + buf << "volatile=" << isVolatile + << " checkAttempts=" << checkAttempts + << " sequence={" << Service::StateToString(initialState); + + for (ServiceState s : sequence) { + buf << " " << Service::StateToString(s); + } + + buf << "}"; + testcase = buf.str(); + } + + std::cout << "Test case: " << testcase << std::endl; + + // Create host and service for the test. + Host::Ptr host = new Host(); + host->SetName("suppressed_notifications"); + host->Register(); + + Service::Ptr service = new Service(); + service->SetHostName(host->GetName()); + service->SetName("service"); + service->SetActive(true); + service->SetVolatile(isVolatile); + service->SetMaxCheckAttempts(checkAttempts); + service->Activate(); + service->SetAuthority(true); + service->Register(); + + host->OnAllConfigLoaded(); + service->OnAllConfigLoaded(); + + // Bring service into the initial hard state. + for (int i = 0; i < checkAttempts; i++) { + std::cout << " ProcessCheckResult(" + << Service::StateToString(initialState) << ")" << std::endl; + service->ProcessCheckResult(MakeCheckResult(initialState)); + } + + BOOST_CHECK(service->GetState() == initialState); + BOOST_CHECK(service->GetStateType() == StateTypeHard); + + /* Keep track of all notifications requested from now on. + * + * Boost.Signal2 handler may still be executing from another thread after they were disconnected. + * Make the structures accessed by the handlers shared pointers so that they remain valid as long + * as they may be accessed from one of these handlers. + */ + auto notificationLog = std::make_shared(); + + boost::signals2::scoped_connection c (Checkable::OnNotificationsRequested.connect( + [notificationLog,service]( + const Checkable::Ptr& checkable, NotificationType type, const CheckResult::Ptr& cr, + const String&, const String&, const MessageOrigin::Ptr& + ) { + BOOST_CHECK_EQUAL(checkable, service); + std::cout << " -> OnNotificationsRequested(" << Notification::NotificationTypeToString(type) + << ", " << Service::StateToString(cr->GetState()) << ")" << std::endl; + + notificationLog->Add({type, cr->GetState()}); + } + )); + + // Helper to assert which notifications were requested. Implicitly clears the stored notifications. + auto assertNotifications = [notificationLog]( + const std::vector>& expected, + const std::string& extraMessage + ) { + // Pretty-printer for the vectors of requested and expected notifications. + auto pretty = [](const std::vector>& vec) { + std::ostringstream s; + + s << "{"; + bool first = true; + for (const auto &v : vec) { + if (first) { + first = false; + } else { + s << ", "; + } + s << Notification::NotificationTypeToString(v.first) + << "/" << Service::StateToString(v.second); + } + s << "}"; + + return s.str(); + }; + + auto got (notificationLog->GetAndClear()); + + BOOST_CHECK_MESSAGE(got == expected, "expected=" << pretty(expected) + << " got=" << pretty(got) + << (extraMessage.empty() ? "" : " ") << extraMessage); + }; + + // Start a downtime for the service. + std::cout << " Downtime Start" << std::endl; + Downtime::Ptr downtime = new Downtime(); + downtime->SetHostName(host->GetName()); + downtime->SetServiceName(service->GetName()); + downtime->SetName("downtime"); + downtime->SetFixed(true); + downtime->SetStartTime(Utility::GetTime() - 3600); + downtime->SetEndTime(Utility::GetTime() + 3600); + service->RegisterDowntime(downtime); + downtime->Register(); + downtime->OnAllConfigLoaded(); + downtime->TriggerDowntime(Utility::GetTime()); + + BOOST_CHECK(service->IsInDowntime()); + + // Process check results for the state sequence. + for (ServiceState s : sequence) { + std::cout << " ProcessCheckResult(" << Service::StateToString(s) << ")" << std::endl; + service->ProcessCheckResult(MakeCheckResult(s)); + BOOST_CHECK(service->GetState() == s); + if (checkAttempts == 1) { + BOOST_CHECK(service->GetStateType() == StateTypeHard); + } + } + + assertNotifications({}, "(no notifications in downtime)"); + + if (service->GetSuppressedNotifications()) { + BOOST_CHECK_EQUAL(service->GetStateBeforeSuppression(), initialState); + } + + // Remove the downtime. + std::cout << " Downtime End" << std::endl; + service->UnregisterDowntime(downtime); + downtime->Unregister(); + BOOST_CHECK(!service->IsInDowntime()); + + if (service->GetStateType() == icinga::StateTypeSoft) { + // When the current state is a soft state, no notification should be sent just yet. + std::cout << " FireSuppressedNotifications()" << std::endl; + service->FireSuppressedNotifications(); + + assertNotifications({}, testcase + " (should not fire in soft state)"); + + // Repeat the last check result until reaching a hard state. + for (int i = 0; i < checkAttempts && service->GetStateType() == StateTypeSoft; i++) { + std::cout << " ProcessCheckResult(" << Service::StateToString(sequence.back()) << ")" + << std::endl; + service->ProcessCheckResult(MakeCheckResult(sequence.back())); + BOOST_CHECK(service->GetState() == sequence.back()); + } + } + + // The service should be in a hard state now and notifications should now be sent if applicable. + BOOST_CHECK(service->GetStateType() == StateTypeHard); + + std::cout << " FireSuppressedNotifications()" << std::endl; + service->FireSuppressedNotifications(); + + if (initialState != sequence.back()) { + NotificationType t = sequence.back() == ServiceOK ? NotificationRecovery : NotificationProblem; + assertNotifications({{t, sequence.back()}}, testcase); + } else { + assertNotifications({}, testcase); + } + + // Remove host and service. + service->Unregister(); + host->Unregister(); + } + } + } + } +} + BOOST_AUTO_TEST_SUITE_END()