kube-prometheus-stack-values-configmap.yaml

---
apiVersion: v1
kind: ConfigMap
metadata:
  name: oas-kube-prometheus-stack-values
data:
  values.yaml: |
    # https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-prometheus-stack/values.yaml

    # From: https://github.com/cablespaghetti/k3s-monitoring/blob/master/kube-prometheus-stack-values.yaml
    # Disable etcd monitoring. See https://github.com/cablespaghetti/k3s-monitoring/issues/4
    kubeEtcd:
      enabled: false

    # Disable kube-controller-manager and kube-scheduler monitoring. See https://github.com/cablespaghetti/k3s-monitoring/issues/2
    kubeControllerManager:
      enabled: false
    kubeScheduler:
      enabled: false

    # https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-prometheus-stack/values.yaml#L115
    alertmanager:
      ingress:
        enabled: true
        annotations:
          nginx.ingress.kubernetes.io/auth-type: basic
          nginx.ingress.kubernetes.io/auth-secret: oas-alertmanager-basic-auth
          nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required'
          kubernetes.io/tls-acme: "true"
        pathType: ImplementationSpecific
        hosts:
          - "alertmanager.${domain}"
        tls:
          - secretName: alertmanager-tls
            hosts:
              - "alertmanager.${domain}"
      config:
        global:
          smtp_from: "${outgoing_mail_from_address}"
          smtp_smarthost: "${outgoing_mail_smtp_host}:${outgoing_mail_smtp_port}"
          smtp_auth_username: "${outgoing_mail_smtp_user}"
          smtp_auth_password: "${outgoing_mail_smtp_password}"
        route:
          group_by: ['job']
          group_wait: 30s
          group_interval: 5m
          repeat_interval: 1h
          receiver: email
          routes:
          - match:
              # This is an alert meant to ensure that the entire alerting pipeline is functional.
              # This alert is always firing, therefore it should always be firing in Alertmanager
              # and always fire against a receiver. There are integrations with various notification
              # mechanisms that send a notification when this alert is not firing. For example the
              # "DeadMansSnitch" integration in PagerDuty.
              alertname: Watchdog
            receiver: 'null'

        receivers:
        - name: 'null'
        - name: email
          email_configs:
          - send_resolved: true
            to: ${admin_email}

        # Inhibition rules allow to mute a set of alerts given that another alert is firing.
        # We use this to mute any warning-level notifications if the same alert is already critical.
        inhibit_rules:
        - source_match:
            severity: 'critical'
          target_match:
            severity: 'warning'
          # Apply inhibition if the alertname is the same.
          equal: ['alertname', 'namespace']

      alertmanagerSpec:
    #    replicas: 3
    #    podAntiAffinity: "soft"
        storage:
          volumeClaimTemplate:
            spec:
              accessModes: ["ReadWriteOnce"]
              resources:
                requests:
                  storage: 1Gi
    #    resources:
    #      limits:
    #        cpu: 500m
    #        memory: 64Mi
    #      requests:
    #        cpu: 25m
    #        memory: 32Mi
    #    priorityClassName: high-priority


    prometheus:
      # https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus
      prometheusSpec:
        # Use these settings to debug prometheus queries
        # logLevel: debug
        # queryLogFile: /dev/stdout
        scrapeInterval: "3m"
        evaluationInterval: "3m"
        retention: "30d"
        storageSpec:
          volumeClaimTemplate:
            spec:
              accessModes: ["ReadWriteOnce"]
              resources:
                requests:
                  storage: 10Gi
        resources:
          limits:
            cpu: 1
            memory: 1.5Gi
          requests:
            cpu: 300m
            memory: 1Gi
        # Discover ALL custom serviceMonitors, podMonitors and prometheusrules,
        # not only those with particular release labels set by
        # kube-prometheus-stack
        # https://stackoverflow.com/a/65648944
        serviceMonitorSelectorNilUsesHelmValues: false
        ruleSelectorNilUsesHelmValues: false
        podMonitorSelectorNilUsesHelmValues: false

      ingress:
        enabled: true
        annotations:
          nginx.ingress.kubernetes.io/auth-type: basic
          nginx.ingress.kubernetes.io/auth-secret: oas-prometheus-basic-auth
          nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required'
          kubernetes.io/tls-acme: "true"
        pathType: ImplementationSpecific
        hosts:
          - "prometheus.${domain}"
        tls:
          - secretName: prometheus-tls
            hosts:
              - "prometheus.${domain}"

    #
    #  service:
    #    sessionAffinity: "ClientIP"
    #

    grafana:
      # https://github.com/grafana/helm-charts/tree/main/charts/grafana
      adminPassword: "${grafana_admin_password}"
      grafana.ini:
        server:
          root_url: "https://grafana.${domain}"
        auth.generic_oauth:
          name: OpenAppStack
          enabled: true
          client_id: grafana
          client_secret: "${grafana_oauth_client_secret}"
          scopes: "openid profile email openappstack_roles"
          auth_url: "https://sso.${domain}/oauth2/auth"
          token_url: "https://sso.${domain}/oauth2/token"
          api_url: "https://sso.${domain}/userinfo"
          role_attribute_path: contains(openappstack_roles[*], 'admin') && 'Admin' || 'Editor'
      ingress:
        enabled: true
        annotations:
          kubernetes.io/tls-acme: "true"
        pathType: ImplementationSpecific
        hosts:
          - "grafana.${domain}"
        tls:
          - secretName: grafana-tls
            hosts:
              - "grafana.${domain}"
      persistence:
        enabled: true
        existingClaim: "grafana"
      podAnnotations:
        backup.velero.io/backup-volumes: "storage"

      # This allows us to pick up the Loki datasource
      # sidecar:
      #   datasources:
      #     enabled: true
      #     label: grafana_datasource
      #   # Make a configmap with the label `grafana_dashboard` to add dashboards to
      #   # Grafana.
      #   dashboards:
      #     enabled: true
      #     lablel: grafana_dashboard

      # dashboardProviders:
      #   dashboardproviders.yaml:
      #     apiVersion: 1
      #     providers:
      #     - name: 'default'
      #       orgId: 1
      #       folder: ''
      #       type: file
      #       disableDeletion: false
      #       editable: true
      #       options:
      #         path: /var/lib/grafana/dashboards
      # dashboards:
      #   default:
      #     kube-dash:
      #       gnetId: 11074
      #       revision: 2
      #       datasource: Prometheus
      #     loki-dash:
      #       gnetId: 10880
      #       revision: 1
      #       datasource: Loki

      # datasources:
      #  datasources.yaml:
      #    apiVersion: 1
      #    datasources:
      #    - name: Prometheus
      #      type: prometheus
      #      url: http://prometheus-server
      #      access: proxy
      #      isDefault: true

      plugins:
        - grafana-piechart-panel

      resources:
        limits:
          cpu: 400m
          memory: 256Mi
        requests:
          cpu: 200m
          memory: 128Mi
    #
    #  sidecar:
    #    resources:
    #      limits:
    #        cpu: 100m
    #        memory: 128Mi
    #      requests:
    #        cpu: 5m
    #        memory: 64Mi

    prometheusOperator:
      resources:
        limits:
          cpu: 400m
          memory: 256Mi
        requests:
          cpu: 100m
          memory: 128Mi
    #  priorityClassName: high-priority

    prometheus-node-exporter:
      resources:
        limits:
          cpu: 800m
          memory: 64Mi
        requests:
          cpu: 100m
          memory: 32Mi
    #  priorityClassName: high-priority

    kube-state-metrics:
      resources:
        limits:
          cpu: 200m
          memory: 128Mi
        requests:
          cpu: 100m
          memory: 64Mi
    #  priorityClassName: high-priority