Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
---
apiVersion: v1
kind: ConfigMap
metadata:
name: oas-kube-prometheus-stack-values
data:
values.yaml: |
# https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-prometheus-stack/values.yaml
# From: https://github.com/cablespaghetti/k3s-monitoring/blob/master/kube-prometheus-stack-values.yaml
# Disable etcd monitoring. See https://github.com/cablespaghetti/k3s-monitoring/issues/4
kubeEtcd:
enabled: false
# Disable kube-controller-manager and kube-scheduler monitoring. See https://github.com/cablespaghetti/k3s-monitoring/issues/2
kubeControllerManager:
enabled: false
kubeScheduler:
enabled: false
# https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-prometheus-stack/values.yaml#L115
alertmanager:
ingress:
enabled: true
annotations:
nginx.ingress.kubernetes.io/auth-type: basic
nginx.ingress.kubernetes.io/auth-secret: oas-alertmanager-basic-auth
nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required'
kubernetes.io/tls-acme: "true"
pathType: ImplementationSpecific
hosts:
- "alertmanager.${domain}"
tls:
- secretName: alertmanager-tls
hosts:
- "alertmanager.${domain}"
config:
global:
smtp_from: "${outgoing_mail_from_address}"
smtp_smarthost: "${outgoing_mail_smtp_host}:${outgoing_mail_smtp_port}"
smtp_auth_username: "${outgoing_mail_smtp_user}"
smtp_auth_password: "${outgoing_mail_smtp_password}"
route:
group_by: ['job']
group_wait: 30s
group_interval: 5m
repeat_interval: 1h
receiver: email
routes:
- match:
# This is an alert meant to ensure that the entire alerting pipeline is functional.
# This alert is always firing, therefore it should always be firing in Alertmanager
# and always fire against a receiver. There are integrations with various notification
# mechanisms that send a notification when this alert is not firing. For example the
# "DeadMansSnitch" integration in PagerDuty.
alertname: Watchdog
receiver: 'null'
receivers:
- name: 'null'
- name: email
email_configs:
- send_resolved: true
to: ${admin_email}
# Inhibition rules allow to mute a set of alerts given that another alert is firing.
# We use this to mute any warning-level notifications if the same alert is already critical.
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
# Apply inhibition if the alertname is the same.
equal: ['alertname', 'namespace']
alertmanagerSpec:
# replicas: 3
# podAntiAffinity: "soft"
storage:
volumeClaimTemplate:
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 1Gi
# resources:
# limits:
# cpu: 500m
# memory: 64Mi
# requests:
# cpu: 25m
# memory: 32Mi
# priorityClassName: high-priority
prometheus:
# https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus
prometheusSpec:
# Use these settings to debug prometheus queries
# logLevel: debug
# queryLogFile: /dev/stdout
scrapeInterval: "3m"
evaluationInterval: "3m"
retention: "30d"
storageSpec:
volumeClaimTemplate:
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 10Gi
resources:
limits:
memory: 1.5Gi
requests:

Maarten de Waard
committed
cpu: 300m
# Discover ALL custom serviceMonitors, podMonitors and prometheusrules,
# not only those with particular release labels set by
# kube-prometheus-stack
# https://stackoverflow.com/a/65648944
serviceMonitorSelectorNilUsesHelmValues: false
ruleSelectorNilUsesHelmValues: false
podMonitorSelectorNilUsesHelmValues: false
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
ingress:
enabled: true
annotations:
nginx.ingress.kubernetes.io/auth-type: basic
nginx.ingress.kubernetes.io/auth-secret: oas-prometheus-basic-auth
nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required'
kubernetes.io/tls-acme: "true"
pathType: ImplementationSpecific
hosts:
- "prometheus.${domain}"
tls:
- secretName: prometheus-tls
hosts:
- "prometheus.${domain}"
#
# service:
# sessionAffinity: "ClientIP"
#
grafana:
# https://github.com/grafana/helm-charts/tree/main/charts/grafana
adminPassword: "${grafana_admin_password}"
grafana.ini:
server:
root_url: "https://grafana.${domain}"
auth.generic_oauth:
name: OpenAppStack
enabled: true
client_id: grafana
client_secret: "${grafana_oauth_client_secret}"
scopes: "openid profile email openappstack_roles"
auth_url: "https://sso.${domain}/oauth2/auth"
token_url: "https://sso.${domain}/oauth2/token"
api_url: "https://sso.${domain}/userinfo"
role_attribute_path: contains(openappstack_roles[*], 'admin') && 'Admin' || 'Editor'
ingress:
enabled: true
annotations:
kubernetes.io/tls-acme: "true"
pathType: ImplementationSpecific
hosts:
- "grafana.${domain}"
tls:
- secretName: grafana-tls
hosts:
- "grafana.${domain}"
persistence:
enabled: true
existingClaim: "grafana"
podAnnotations:
backup.velero.io/backup-volumes: "storage"
# This allows us to pick up the Loki datasource
# sidecar:
# datasources:
# enabled: true
# label: grafana_datasource
# # Make a configmap with the label `grafana_dashboard` to add dashboards to
# # Grafana.
# dashboards:
# enabled: true
# lablel: grafana_dashboard
# dashboardProviders:
# dashboardproviders.yaml:
# apiVersion: 1
# providers:
# - name: 'default'
# orgId: 1
# folder: ''
# type: file
# disableDeletion: false
# editable: true
# options:
# path: /var/lib/grafana/dashboards
# dashboards:
# default:
# kube-dash:
# gnetId: 11074
# revision: 2
# datasource: Prometheus
# loki-dash:
# gnetId: 10880
# revision: 1
# datasource: Loki
# datasources:
# datasources.yaml:
# apiVersion: 1
# datasources:
# - name: Prometheus
# type: prometheus
# url: http://prometheus-server
# access: proxy
# isDefault: true
plugins:
- grafana-piechart-panel
resources:
limits:
cpu: 400m
memory: 256Mi
requests:
cpu: 200m
memory: 128Mi
#
# sidecar:
# resources:
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 5m
# memory: 64Mi
prometheusOperator:
resources:
limits:
cpu: 400m
memory: 256Mi
requests:

Maarten de Waard
committed
cpu: 100m
memory: 128Mi
# priorityClassName: high-priority
prometheus-node-exporter:
resources:
limits:
cpu: 800m
memory: 64Mi
requests:

Maarten de Waard
committed
cpu: 100m