From a6b7eac5b0e2daa580c2bcd3d21e33771b2710a2 Mon Sep 17 00:00:00 2001 From: Arie Peterson <arie@greenhost.nl> Date: Thu, 23 Feb 2023 12:45:44 +0100 Subject: [PATCH] Trap and report errors in postStart script --- Chart.yaml | 2 +- templates/nextcloud-monitoring.yaml | 43 ++++++++++++++++++++++ templates/nextcloud-onlyoffice-config.yaml | 34 +++++++++++++++++ values.yaml | 15 +++++++- 4 files changed, 92 insertions(+), 2 deletions(-) create mode 100644 templates/nextcloud-monitoring.yaml diff --git a/Chart.yaml b/Chart.yaml index 4a627dc8..1d5333bb 100644 --- a/Chart.yaml +++ b/Chart.yaml @@ -4,7 +4,7 @@ description: | A helm chart for installing NextCloud and setting up ONLYOFFICE integration name: nextcloud-onlyoffice appVersion: NC-25.0.3-OO-7.2.2.56 -version: 0.15.10 +version: 0.15.11-poststart-1 icon: https://cdn.rawgit.com/docker-library/docs/defa5ffc7123177acd60ddef6e16bddf694cc35f/nextcloud/logo.svg dependencies: # https://artifacthub.io/packages/helm/nextcloud/nextcloud diff --git a/templates/nextcloud-monitoring.yaml b/templates/nextcloud-monitoring.yaml new file mode 100644 index 00000000..6fd197ac --- /dev/null +++ b/templates/nextcloud-monitoring.yaml @@ -0,0 +1,43 @@ +{{- if (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1") }} +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: nextcloud + labels: + app.kubernetes.io/managed-by: {{ .Release.Service | quote }} + app.kubernetes.io/name: nextcloud + app.kubernetes.io/instance: {{ .Release.Name | quote }} + helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" +spec: + namespaceSelector: + matchNames: + - stackspin-apps + podMetricsEndpoints: + - port: metrics + jobLabel: app.kubernetes.io/name + selector: + matchLabels: + app.kubernetes.io/component: app + app.kubernetes.io/instance: {{ .Release.Name | quote }} + app.kubernetes.io/name: nextcloud +--- +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + app.kubernetes.io/managed-by: {{ .Release.Service | quote }} + app.kubernetes.io/name: nextcloud + app.kubernetes.io/instance: {{ .Release.Name | quote }} + helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + name: nextcloud +spec: + groups: + - name: nextcloud + rules: + - alert: NextcloudSetupError + annotations: + message: The setup-apps.sh script of Nextcloud has encountered errors. See the postStart logs for more information. + expr: nextcloud_poststart_errors > 0 + labels: + severity: warning +{{- end }} diff --git a/templates/nextcloud-onlyoffice-config.yaml b/templates/nextcloud-onlyoffice-config.yaml index 67e2ceba..de164044 100644 --- a/templates/nextcloud-onlyoffice-config.yaml +++ b/templates/nextcloud-onlyoffice-config.yaml @@ -30,6 +30,40 @@ data: exec > /var/www/tmp/postStart-$(date +"%s").log exec 2> /var/www/tmp/postStart-$(date +"%s")_error.log + # Write a simple status (errors or no errors) to a file, to be served to + # prometheus by a sidecar container. + report_metrics() { + errors=$1 + outfile=/srv/metrics/status + truncate -s 0 $outfile + echo '# HELP nextcloud_poststart_errors Whether the nextcloud postStart script has encountered errors.' >> $outfile + echo '# TYPE nextcloud_poststart_errors gauge' >> $outfile + echo "nextcloud_poststart_errors $errors" >> $outfile + } + # We just started, so no errors yet! + report_metrics "0" + + exception_handler() { + signal=$1 + exitCode=$2 + lineNumber=$3 + echo "setup-apps.sh received $signal (code $exitCode) on line $lineNumber" + echo "Exiting with status 0 to allow nextcloud to start." + # Report to prometheus that we have an error. + report_metrics "1" + # Remove the handler for `EXIT` so we don't run that as well. If we're + # currently handling `EXIT` already then this is not necessary because + # bash treats an `exit` specially if it happens in the `EXIT` handler. + # If we're handling another signal though, then we want to prevent that + # the call to `exit` also triggers the `EXIT` handler. + trap '' EXIT + exit 0 + } + + trap 'exception_handler ERR $? $LINENO' ERR + trap 'exception_handler EXIT $? $LINENO' EXIT + trap 'exception_handler SIGINT $? $LINENO' SIGINT + trap 'exception_handler SIGTERM $? $LINENO' SIGTERM # Copied from the NC docker entrypoint to run OCC commands run_as() { diff --git a/values.yaml b/values.yaml index 219f9694..de41efcc 100644 --- a/values.yaml +++ b/values.yaml @@ -37,10 +37,23 @@ nextcloud: - name: nextcloud-onlyoffice-config configMap: name: nextcloud-onlyoffice-config-and-scripts - + - name: poststart-metrics + emptyDir: + medium: Memory extraVolumeMounts: - name: nextcloud-onlyoffice-config mountPath: /var/local + - name: poststart-metrics + mountPath: "/srv/metrics" + extraSidecarContainers: + - name: poststart-metrics + image: "weibeld/file-exporter:0.0.2" + ports: + - name: metrics + containerPort: 9872 + volumeMounts: + - name: poststart-metrics + mountPath: "/srv/metrics" lifecycle: postStartCommand: -- GitLab