Skip to content

Commit

Permalink
Merge pull request #407 from nthomas-redhat/alerts
Browse files Browse the repository at this point in the history
Improve alert messages
  • Loading branch information
nthomas-redhat authored Apr 13, 2018
2 parents 86f66a7 + 25584ef commit 72a0d76
Show file tree
Hide file tree
Showing 11 changed files with 79 additions and 79 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -52,16 +52,12 @@ def format_alert(self, alert_json):
alert['severity'] = \
constants.TENDRL_SEVERITY_MAP['warning']
alert['tags']['message'] = (
"Brick utilization of %s:%s under volume %s in "
"cluster %s is %s %% which is above %s"
" threshold (%s %%)" % (
"Brick utilization on %s:%s in %s "
"at %s %% and nearing full capacity" % (
alert['tags']['fqdn'],
alert['tags']['brick_path'],
alert["tags"]["volume_name"],
alert['tags']['integration_id'],
alert['current_value'],
alert['severity'],
alert['tags']['warning_max']
alert['current_value']
)
)
elif alert_json['State'] == constants.GRAFANA_CLEAR_ALERT:
Expand All @@ -74,20 +70,19 @@ def format_alert(self, alert_json):
constants.TENDRL_SEVERITY_MAP['warning']
alert['severity'] = constants.TENDRL_SEVERITY_MAP['info']
alert['tags']['message'] = (
"Brick utilization of %s:%s under volume %s in "
"cluster %s is back normal" % (
"Brick utilization of %s:%s in %s "
"back to normal" % (
alert['tags']['fqdn'],
alert['tags']['brick_path'],
alert["tags"]["volume_name"],
alert['tags']['integration_id']
alert["tags"]["volume_name"]
)
)
else:
logger.log(
"error",
NS.publisher_id,
{
"message": "Alert %s have unsupported alert"
"message": "Unsupported alert %s "
"severity" % alert_json
}
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,11 @@ def format_alert(self, alert_json):
alert['severity'] = \
constants.TENDRL_SEVERITY_MAP['warning']
alert['tags']['message'] = (
"Volume utilization of %s in "
"cluster %s is %s %% which is above %s"
" threshold (%s %%)" % (
"Volume utilization on %s in "
"%s at %s %% and nearing full capacity" % (
alert['tags']['volume_name'],
alert['tags']['integration_id'],
alert['current_value'],
alert['severity'],
alert['tags']['warning_max']
alert['current_value']
)
)
elif alert_json['State'] == constants.GRAFANA_CLEAR_ALERT:
Expand All @@ -63,8 +60,8 @@ def format_alert(self, alert_json):
constants.TENDRL_SEVERITY_MAP['warning']
alert['severity'] = constants.TENDRL_SEVERITY_MAP['info']
alert['tags']['message'] = (
"Volume utilization of %s in "
"cluster %s is back normal" % (
"Volume utilization on %s in "
"%s back to normal" % (
alert['tags']['volume_name'],
alert['tags']['integration_id']
)
Expand All @@ -74,7 +71,7 @@ def format_alert(self, alert_json):
"error",
NS.publisher_id,
{
"message": "Alert %s have unsupported alert"
"message": "Unsupported alert %s "
"severity" % alert_json
}
)
Expand Down
19 changes: 10 additions & 9 deletions tendrl/monitoring_integration/alert/handlers/node/cpu_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,11 @@ def format_alert(self, alert_json):
alert['severity'] = \
constants.TENDRL_SEVERITY_MAP['warning']
alert['tags']['message'] = (
"Cpu utilization of node %s is"
" %s %% which is above the %s threshold (%s %%)." % (
"Cpu utilization on node %s in %s"
" at %s %% and running out of cpu" % (
alert['tags']['fqdn'],
alert['current_value'],
alert['severity'],
alert['tags']['warning_max']))
alert['tags']['integration_id'],
alert['current_value']))
elif alert_json['State'] == constants.GRAFANA_CLEAR_ALERT:
# Identifying clear alert from which panel critical/warning
if "critical" in alert_json['Name'].lower():
Expand All @@ -60,15 +59,17 @@ def format_alert(self, alert_json):
alert['tags']['clear_alert'] = \
constants.TENDRL_SEVERITY_MAP['warning']
alert['severity'] = constants.TENDRL_SEVERITY_MAP['info']
alert['tags']['message'] = ("Cpu utilization of node %s is"
" back to normal" % (
alert['tags']['fqdn']))
alert['tags']['message'] = \
("Cpu utilization on node %s in"
" %s back to normal" % (
alert['tags']['fqdn'],
alert['tags']['integration_id']))
else:
logger.log(
"error",
NS.publisher_id,
{
"message": "Alert %s have unsupported alert"
"message": "Unsupported alert %s "
"severity" % alert_json
}
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,11 @@ def format_alert(self, alert_json):
alert['severity'] = \
constants.TENDRL_SEVERITY_MAP['warning']
alert['tags']['message'] = (
"Memory utilization of node %s is"
" %s %% which is above the %s threshold (%s %%)." % (
"Memory utilization on node %s in "
"%s at %s %% and running out of memory" % (
alert['tags']['fqdn'],
alert['current_value'],
alert['severity'],
alert['tags']['warning_max']))
alert['tags']['integration_id'],
alert['current_value']))
elif alert_json['State'] == constants.GRAFANA_CLEAR_ALERT:
# Identifying clear alert from which panel critical/warning
if "critical" in alert_json['Name'].lower():
Expand All @@ -61,15 +60,16 @@ def format_alert(self, alert_json):
constants.TENDRL_SEVERITY_MAP['warning']
alert['severity'] = constants.TENDRL_SEVERITY_MAP['info']
alert['tags']['message'] = (
"Memory utilization of node %s is"
"Memory utilization on node %s in %s"
" back to normal" % (
alert['tags']['fqdn']))
alert['tags']['fqdn'],
alert['tags']['integration_id']))
else:
logger.log(
"error",
NS.publisher_id,
{
"message": "Alert %s have unsupported alert"
"message": "Unsupported alert %s "
"severity" % alert_json
}
)
Expand Down
27 changes: 15 additions & 12 deletions tendrl/monitoring_integration/alert/handlers/node/swap_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,14 @@ def format_alert(self, alert_json):
else:
alert['severity'] = \
constants.TENDRL_SEVERITY_MAP['warning']
alert['tags']['message'] = ("Swap utilization of node %s is"
" %s %% which is above the %s "
"threshold (%s %%)." % (
alert['tags']['fqdn'],
alert['current_value'],
alert['severity'],
alert['tags']['warning_max']
))
alert['tags']['message'] = \
("Swap utilization on node %s in"
" %s at %s %% and running out of "
"swap space" % (
alert['tags']['fqdn'],
alert['tags']['integration_id'],
alert['current_value']
))
elif alert_json['State'] == constants.GRAFANA_CLEAR_ALERT:
# Identifying clear alert from which panel critical/warning
if "critical" in alert_json['Name'].lower():
Expand All @@ -61,15 +61,18 @@ def format_alert(self, alert_json):
alert['tags']['clear_alert'] = \
constants.TENDRL_SEVERITY_MAP['warning']
alert['severity'] = constants.TENDRL_SEVERITY_MAP['info']
alert['tags']['message'] = ("Swap utilization of node %s is"
" back to normal" % (
alert['tags']['fqdn']))
alert['tags']['message'] = \
("Swap utilization on node %s in"
" %s back to normal" % (
alert['tags']['fqdn'],
alert['tags']['integration_id']
))
else:
logger.log(
"error",
NS.publisher_id,
{
"message": "Alert %s have unsupported alert"
"message": "Unsupported alert %s "
"severity" % alert_json
}
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ def run(self):
"debug",
NS.publisher_id,
{
"message": "Cluster %s moved to un-managed state.\n"
"The archived monitoring data available at: %s" %
"message": "%s un-managed.\n"
"Archived monitoring data to %s" %
(integration_id, archive_path)
}
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,8 @@ def test_brick_handler(vol_name, cluster_name, pid, node_id):
'-c5aff4eef15d',
'warning_max': 75,
'message': u'Brick utilization of dhcp122-234'
':|gluster|brick1 under volume vol1 in cluster '
'7616f2a4-6502-4222-85bb-c5aff4eef15d is back '
'normal',
':|gluster|brick1 in vol1 '
'back to normal',
'volume_name': 'vol1'
},
'source': 'GRAFANA',
Expand All @@ -68,9 +67,8 @@ def test_brick_handler(vol_name, cluster_name, pid, node_id):
'percent-percent_bytes',
'fqdn': u'dhcp122-234',
'message': u'Brick utilization of dhcp122-234:|'
'gluster|brick1 under volume vol1 in cluster '
'7616f2a4-6502-4222-85bb-c5aff4eef15d is 20.75 % '
'which is above WARNING threshold (17 %)',
'gluster|brick1 in vol1 '
'at 20.75 % and nearing full capacity',
'integration_id': u'7616f2a4-6502-4222-85bb-'
'c5aff4eef15d',
'cluster_name': 'c1',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ def test_volume_handler(cluster_name, pid):
'node_id': None,
'alert_type': 'UTILIZATION',
'tags': {'cluster_name': 'c1',
'message': u'Volume utilization of '
'message': u'Volume utilization on '
'V1 in cluster 7616f2a4-6502-4222-'
'85bb-c5aff4eef15d is back normal',
'85bb-c5aff4eef15d back to normal',
'warning_max': 75,
'volume_name': u'V1',
'integration_id': u'7616f2a4-6502-4222'
Expand Down Expand Up @@ -63,10 +63,9 @@ def test_volume_handler(cluster_name, pid):
'warning_max': 14,
'integration_id': u'7616f2a4-6502-4222'
'-85bb-c5aff4eef15d',
'message': u'Volume utilization of V1 in'
'message': u'Volume utilization on V1 in'
' cluster 7616f2a4-6502-4222-85bb-c5aff4eef1'
'5d is 20.86 % which is above WARNING '
'threshold (14 %)'
'5d at 20.86 % and nearing full capacity'
},
'source': 'GRAFANA',
'current_value': '20.86',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,10 @@ def test_cpu_handler(pid, node_id):
'time_stamp': u'2018-02-07T17:28:05+05:30',
'tags': {'warning_max': 80,
'fqdn': u'dhcp122-234',
'message': u'Cpu utilization of '
'node dhcp122-234 is back to normal',
'message': u'Cpu utilization on '
'dhcp122-234 in '
'7616f2a4-6502-4222-'
'85bb-c5aff4eef15d back to normal',
'integration_id': '7616f2a4-6502-4222-'
'85bb-c5aff4eef15d'
}
Expand All @@ -46,9 +48,10 @@ def test_cpu_handler(pid, node_id):
condition = {'pid': '123',
'tags': {'fqdn': u'dhcp122-234',
'warning_max': 1,
'message': u'Cpu utilization of node '
'dhcp122-234 is 2.61 % which is above the '
'WARNING threshold (1 %).',
'message': u'Cpu utilization on '
'dhcp122-234 in 7616f2a4-6502-4222-'
'85bb-c5aff4eef15d at 2.61 % and '
'running out of cpu',
'integration_id': '7616f2a4-6502-4222-'
'85bb-c5aff4eef15d'
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@ def test_memory_handler(pid, node_id):
condition = {'pid': '123',
'tags': {'warning_max': 80,
'fqdn': u'dhcp122-234',
'message': u'Memory utilization of '
'node dhcp122-234 is back to normal',
'message': u'Memory utilization on '
'node dhcp122-234 in 7616f2a4-6502-4222-85bb-'
'c5aff4eef15d back to normal',
'integration_id': '7616f2a4-6502-4222-85bb-'
'c5aff4eef15d'
},
Expand Down Expand Up @@ -51,10 +52,11 @@ def test_memory_handler(pid, node_id):
'current_value': '29.47',
'node_id': '1',
'tags': {'fqdn': u'dhcp122-234',
'message': u'Memory utilization of '
'node dhcp122-234 '
'is 29.47 % which is above the WARNING '
'threshold (23 %).',
'message': u'Memory utilization on '
'node dhcp122-234 in '
'7616f2a4-6502-4222-85bb-'
'c5aff4eef15d'
'at 29.47 % and running out of memory',
'warning_max': 23,
'integration_id': '7616f2a4-6502-4222-85bb-'
'c5aff4eef15d'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@ def test_swap_handler(pid, node_id):
'time_stamp': u'2018-02-07T17:40:02+05:30',
'pid': '123',
'tags': {'warning_max': 50,
'message': u'Swap utilization of node '
'dhcp122-234 is back to normal',
'message': u'Swap utilization on node '
'dhcp122-234 in 7616f2a4-6502-4222-85bb'
'-c5aff4eef15d back to normal',
'fqdn': u'dhcp122-234',
'integration_id': '7616f2a4-6502-4222-85bb-'
'c5aff4eef15d'
Expand All @@ -49,9 +50,10 @@ def test_swap_handler(pid, node_id):
'time_stamp': u'2018-02-12T11:16:23+05:30',
'pid': '123',
'tags': {'warning_max': 70,
'message': u'Swap utilization of node '
'dhcp122-234 is 80.0 % which is above '
'the WARNING threshold (70 %).',
'message': u'Swap utilization on node '
'dhcp122-234 in 7616f2a4-6502-4222-85bb-'
'c5aff4eef15d at 80.0 % and running out of '
'swap space',
'fqdn': u'dhcp122-234',
'integration_id': '7616f2a4-6502-4222-85bb-'
'c5aff4eef15d'
Expand Down

0 comments on commit 72a0d76

Please sign in to comment.