Skip to content
Snippets Groups Projects
Verified Commit acf537a4 authored by Maarten de Waard's avatar Maarten de Waard :angel:
Browse files

re-apply !612

parent 8e1a6873
No related branches found
No related tags found
No related merge requests found
......@@ -85,6 +85,7 @@ include:
- flux2/infrastructure/sources/grafana.yaml
- flux2/infrastructure/sources/wikimedia.yaml
- flux2/infrastructure/sources/prometheus-community.yaml
- flux2/config/monitoring/*.yaml
- install/install-app.sh
- test/taiko/*
- if: '$TRIGGER_JOBS =~ /enable-monitoring/'
......
......@@ -53,3 +53,25 @@ spec:
summary: "Reboot required on {{ $labels.instance }}"
description: "Warning: Reboot required on {{ $labels.instance }} (job {{ $labels.job\
\ }}) for more then 2 days."
- name: stackspin-resources
rules:
# https://awesome-prometheus-alerts.grep.to/rules#rule-host-and-hardware-1-23
- alert: HostOomKillDetected
expr: increase(node_vmstat_oom_kill[20m]) > 0
for: 0m
labels:
severity: warning
annotations:
summary: Host OOM kill detected (instance {{ $labels.instance }})
description: "OOM kill detected\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
# https://awesome-prometheus-alerts.grep.to/rules#rule-docker-containers-1-4
- alert: ContainerMemoryUsage
expr: (sum(container_memory_working_set_bytes) BY (instance, name) / sum(container_spec_memory_limit_bytes > 0) BY (instance, name) * 100) > 80
for: 2m
labels:
severity: warning
annotations:
summary: Container Memory usage (instance {{ $labels.instance }})
description: "Container Memory usage is above 80%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment