diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 99420c7fe8b3043160c4cbc2f38bbe2d2e6b4d92..da8898a26544d84a698f363509f38e9537b699fb 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -847,7 +847,7 @@ kube-prometheus-stack-alerts: - *debug_information - export BASIC_AUTH_PW=$(python3 -m openappstack $HOSTNAME secrets | grep oas-prometheus-basic-auth | cut -d'=' -f2) - cd test/ - - pytest -s -m 'prometheus' --connection=ansible --ansible-inventory=${CLUSTER_DIR}/inventory.yml --hosts='ansible://*' + - bash ../.gitlab/ci_scripts/retry_cmd_until_success.sh 10 10 pytest -s -m 'prometheus' --connection=ansible --ansible-inventory=${CLUSTER_DIR}/inventory.yml --hosts='ansible://*' extends: - .ssh_setup - .kube_prometheus_stack_rules diff --git a/test/pytest/test_prometheus.py b/test/pytest/test_prometheus.py index 70aec925381618ee2035f955072782eeee8f20b4..869285efefd2be590f7eec5569d76319402b36e0 100755 --- a/test/pytest/test_prometheus.py +++ b/test/pytest/test_prometheus.py @@ -11,47 +11,15 @@ def ignore_alert(alert): equals "none", or in some application specific cases. """ + # Ignore watchdog alerts if alert["labels"]["severity"] == "none": return True # Ignore `KubeAPILatencyHigh` fom high load during installation # phase - if alert["labels"]["alertname"] == "KubeAPILatencyHigh": + if re.search(r"(KubeAPIDown|KubeletDown)", alert["labels"]["alertname"]): return True - if 'pod' in alert["labels"]: - # Filter out failing Nextcloud installation jobs since a lot of - # them fail until they succeed during installation - if "nextcloud" in alert["labels"]["pod"]: - if alert["labels"]["alertname"] in [ - "KubeJobFailed", - "KubeJobCompletion", - "KubePodNotReady"]: - return True - - # Filter out when some apps take too long to start - if re.search("(rocketchat|wordpress)", alert["labels"]["pod"]): - if alert["labels"]["alertname"] in [ - "KubePodNotReady", - "KubeDeploymentReplicasMismatch", - "KubeStatefulSetReplicasMismatch"]: - return True - - # Filter out failed signgle-sign-on pods until we fix - # https://open.greenhost.net/openappstack/single-sign-on/issues/26 - if "single-sign-on-create-" in alert["labels"]["pod"]: - if alert["labels"]["alertname"] in ["KubePodNotReady"]: - return True - - if 'job_name' in alert["labels"]: - # Filter out failed signgle-sign-on jobs until we fix - # https://open.greenhost.net/openappstack/single-sign-on/issues/26 - if "single-sign-on-create-" in alert["labels"]["job_name"]: - if alert["labels"]["alertname"] in [ - "KubeJobFailed", - "KubeJobCompletion"]: - return True - return False