From a6b7eac5b0e2daa580c2bcd3d21e33771b2710a2 Mon Sep 17 00:00:00 2001
From: Arie Peterson <arie@greenhost.nl>
Date: Thu, 23 Feb 2023 12:45:44 +0100
Subject: [PATCH] Trap and report errors in postStart script

---
 Chart.yaml                                 |  2 +-
 templates/nextcloud-monitoring.yaml        | 43 ++++++++++++++++++++++
 templates/nextcloud-onlyoffice-config.yaml | 34 +++++++++++++++++
 values.yaml                                | 15 +++++++-
 4 files changed, 92 insertions(+), 2 deletions(-)
 create mode 100644 templates/nextcloud-monitoring.yaml

diff --git a/Chart.yaml b/Chart.yaml
index 4a627dc8..1d5333bb 100644
--- a/Chart.yaml
+++ b/Chart.yaml
@@ -4,7 +4,7 @@ description: |
   A helm chart for installing NextCloud and setting up ONLYOFFICE integration
 name: nextcloud-onlyoffice
 appVersion: NC-25.0.3-OO-7.2.2.56
-version: 0.15.10
+version: 0.15.11-poststart-1
 icon: https://cdn.rawgit.com/docker-library/docs/defa5ffc7123177acd60ddef6e16bddf694cc35f/nextcloud/logo.svg
 dependencies:
   # https://artifacthub.io/packages/helm/nextcloud/nextcloud
diff --git a/templates/nextcloud-monitoring.yaml b/templates/nextcloud-monitoring.yaml
new file mode 100644
index 00000000..6fd197ac
--- /dev/null
+++ b/templates/nextcloud-monitoring.yaml
@@ -0,0 +1,43 @@
+{{- if (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1") }}
+apiVersion: monitoring.coreos.com/v1
+kind: PodMonitor
+metadata:
+  name: nextcloud
+  labels:
+    app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
+    app.kubernetes.io/name: nextcloud
+    app.kubernetes.io/instance: {{ .Release.Name | quote }}
+    helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
+spec:
+  namespaceSelector:
+    matchNames:
+    - stackspin-apps
+  podMetricsEndpoints:
+  - port: metrics
+  jobLabel: app.kubernetes.io/name
+  selector:
+    matchLabels:
+      app.kubernetes.io/component: app
+      app.kubernetes.io/instance: {{ .Release.Name | quote }}
+      app.kubernetes.io/name: nextcloud
+---
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+  labels:
+    app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
+    app.kubernetes.io/name: nextcloud
+    app.kubernetes.io/instance: {{ .Release.Name | quote }}
+    helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
+  name: nextcloud
+spec:
+  groups:
+  - name: nextcloud
+    rules:
+    - alert: NextcloudSetupError
+      annotations:
+        message: The setup-apps.sh script of Nextcloud has encountered errors. See the postStart logs for more information.
+      expr: nextcloud_poststart_errors > 0
+      labels:
+        severity: warning
+{{- end }}
diff --git a/templates/nextcloud-onlyoffice-config.yaml b/templates/nextcloud-onlyoffice-config.yaml
index 67e2ceba..de164044 100644
--- a/templates/nextcloud-onlyoffice-config.yaml
+++ b/templates/nextcloud-onlyoffice-config.yaml
@@ -30,6 +30,40 @@ data:
     exec > /var/www/tmp/postStart-$(date +"%s").log
     exec 2> /var/www/tmp/postStart-$(date +"%s")_error.log
 
+    # Write a simple status (errors or no errors) to a file, to be served to
+    # prometheus by a sidecar container.
+    report_metrics() {
+      errors=$1
+      outfile=/srv/metrics/status
+      truncate -s 0 $outfile
+      echo '# HELP nextcloud_poststart_errors Whether the nextcloud postStart script has encountered errors.' >> $outfile
+      echo '# TYPE nextcloud_poststart_errors gauge' >> $outfile
+      echo "nextcloud_poststart_errors $errors" >> $outfile
+    }
+    # We just started, so no errors yet!
+    report_metrics "0"
+
+    exception_handler() {
+        signal=$1
+        exitCode=$2
+        lineNumber=$3
+        echo "setup-apps.sh received $signal (code $exitCode) on line $lineNumber"
+        echo "Exiting with status 0 to allow nextcloud to start."
+        # Report to prometheus that we have an error.
+        report_metrics "1"
+        # Remove the handler for `EXIT` so we don't run that as well. If we're
+        # currently handling `EXIT` already then this is not necessary because
+        # bash treats an `exit` specially if it happens in the `EXIT` handler.
+        # If we're handling another signal though, then we want to prevent that
+        # the call to `exit` also triggers the `EXIT` handler.
+        trap '' EXIT
+        exit 0
+    }
+
+    trap 'exception_handler ERR $? $LINENO' ERR
+    trap 'exception_handler EXIT $? $LINENO' EXIT
+    trap 'exception_handler SIGINT $? $LINENO' SIGINT
+    trap 'exception_handler SIGTERM $? $LINENO' SIGTERM
 
     # Copied from the NC docker entrypoint to run OCC commands
     run_as() {
diff --git a/values.yaml b/values.yaml
index 219f9694..de41efcc 100644
--- a/values.yaml
+++ b/values.yaml
@@ -37,10 +37,23 @@ nextcloud:
       - name: nextcloud-onlyoffice-config
         configMap:
           name: nextcloud-onlyoffice-config-and-scripts
-
+      - name: poststart-metrics
+        emptyDir:
+          medium: Memory
     extraVolumeMounts:
       - name: nextcloud-onlyoffice-config
         mountPath: /var/local
+      - name: poststart-metrics
+        mountPath: "/srv/metrics"
+    extraSidecarContainers:
+      - name: poststart-metrics
+        image: "weibeld/file-exporter:0.0.2"
+        ports:
+        - name: metrics
+          containerPort: 9872
+        volumeMounts:
+        - name: poststart-metrics
+          mountPath: "/srv/metrics"
 
   lifecycle:
     postStartCommand:
-- 
GitLab