From 1745a5dd856bc36f7f6cf291101d5e117d6891b7 Mon Sep 17 00:00:00 2001 From: Varac <varac@varac.net> Date: Mon, 16 Dec 2019 12:24:06 +0100 Subject: [PATCH] Ignore failing pods and jobs from NC installation Currently, Nextcloud installation is a pain where a lot of jobs and pod fail until NC is finally installed. Still they show up in proemetheus and create noise, why we ignore them in this check. Once we achieve a better NC installation we should revert this. Closes: #379 --- test/pytest/test_prometheus.py | 39 ++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/test/pytest/test_prometheus.py b/test/pytest/test_prometheus.py index d7a3fcce7..6fb82913a 100755 --- a/test/pytest/test_prometheus.py +++ b/test/pytest/test_prometheus.py @@ -1,9 +1,14 @@ import json import pytest -import requests +import re + @pytest.mark.prometheus def test_prometheus_alerts(host): + def summarize_alerts(alerts): + """Print a alert summary.""" + print('Total alerts: %s' % len(alerts)) + print(json.dumps(alerts, indent=2)) print("Starting prometheus test...") @@ -12,15 +17,37 @@ def test_prometheus_alerts(host): status = alert_json["status"] alerts = alert_json["data"]["alerts"] - # Filter out the ever firing "Dead mans switch" - real_alerts = [alert for alert in alerts - if alert["labels"]["severity"] != "none"] + real_alerts = [] + ignored_alerts = [] + + for alert in alerts: + # Filter out the ever firing "Dead mans switch" test alert + if (alert["labels"]["severity"] == "none" or \ + # Filter out failing Nextcloud installation jobs since a lot of + # them fail until they succeed during installation + (re.match(r'(KubeJobFailed|KubeJobCompletion)', + alert["labels"]["alertname"]) and + "nextcloud" in alert["labels"]["job_name"]) or + # Filter out failing Nextcloud pods since a lot of pods fail + # during installation + # We use python-behave tests to check for functionality + (re.match(r'(KubePodNotReady)', + alert["labels"]["alertname"]) and + "nextcloud" in alert["labels"]["pod"])): + ignored_alerts.append(alert) + else: + real_alerts.append(alert) + + print('\n\n\n========= Ignored ==========') + summarize_alerts(ignored_alerts) + + print('\n\n\n========= Firing ==========') + summarize_alerts(real_alerts) - alert_names = list(map(lambda alert: alert["labels"]["alertname"], real_alerts)) count = len(real_alerts) assert status == "success", "Failure queriying the prometheus api at" + url - assert count == 0, "Firing alerts: {0}".format(str(alert_names)) + assert count == 0, "Firing alerts: {0}".format(count) if __name__ == "__main__": -- GitLab