Newer
Older
@pytest.mark.prometheus
def test_prometheus_alerts(host):
def summarize_alerts(alerts):
"""Print a alert summary."""
print('Total alerts: %s' % len(alerts))
print(json.dumps(alerts, indent=2))
print("Starting prometheus test...")
url = 'http://127.0.0.1:30090/api/v1/alerts'
alert_json = json.loads(host.check_output('curl ' + url))
status = alert_json["status"]
alerts = alert_json["data"]["alerts"]
real_alerts = []
ignored_alerts = []
for alert in alerts:
# Ignore the ever firing "Dead mans switch" test alert
if (alert["labels"]["severity"] == "none" or \
# Filter out failing Nextcloud installation jobs since a lot of
# them fail until they succeed during installation
(re.match(r'(KubeJobFailed|KubeJobCompletion)',
alert["labels"]["alertname"]) and
"nextcloud" in alert["labels"]["job_name"]) or
# Ignore failing Nextcloud pods since a lot of pods fail
# during installation
# We use python-behave tests to check for functionality
(re.match(r'(KubePodNotReady)',
alert["labels"]["alertname"]) and
"nextcloud" in alert["labels"]["pod"]) or
# Ignore `KubeAPILatencyHigh` fom high load during installation
# phase
alert["labels"]["alertname"] == "KubeAPILatencyHigh"):
ignored_alerts.append(alert)
else:
real_alerts.append(alert)
print('\n\n\n========= Ignored ==========')
summarize_alerts(ignored_alerts)
print('\n\n\n========= Firing ==========')
summarize_alerts(real_alerts)
count = len(real_alerts)
assert status == "success", "Failure queriying the prometheus api at" + url
assert count == 0, "Firing alerts: {0}".format(count)