diff --git a/.gitlab/issue_templates/new_app.md b/.gitlab/issue_templates/new_app.md
index a27b00ed6bda7883c952a0a6e43a44969cf553b6..1f323d59b3cfc62d81f7a2de42fcc2273876baba 100644
--- a/.gitlab/issue_templates/new_app.md
+++ b/.gitlab/issue_templates/new_app.md
@@ -2,9 +2,19 @@
 
 ## Single sign-on
 
-- [ ] Integrate the new app into the signle sign-on system
+* [ ] Integrate the new app into the signle sign-on system
+
+## Documentation
+
+* [ ] Add app to `docs/installation_instructions.md`
+* [ ] Add app to `docs/testing_instructions.md`
 
 ## Tests
 
-- [ ] Add behave feature (`tests/behave/feature`)
-- [ ] Check for successful helmrelease (`test/pytest/test_helmreleases.py`)
+* [ ] Add behave feature (`tests/behave/feature`)
+* [ ] Check for successful helmrelease (`test/pytest/test_helmreleases.py`)
+* [ ] Test cert (`test/pytest/test_certs.py`)
+
+## CI
+
+* [ ] Run new tests in CI (`.gitlab-ci.yml`)
diff --git a/ansible/roles/apps/templates/monitoring-settings.yaml b/ansible/roles/apps/templates/monitoring-settings.yaml
index e3b558479ff01006201df9c4844a7b1ff0f77613..463a1a33359ae24dffb8b2501cac03addcced4b4 100644
--- a/ansible/roles/apps/templates/monitoring-settings.yaml
+++ b/ansible/roles/apps/templates/monitoring-settings.yaml
@@ -122,17 +122,16 @@ grafana:
   grafana.ini:
     server:
       root_url: "https://grafana.{{ domain }}"
-    auth:
-      generic_oauth:
-        name: OpenAppStack
-        enabled: true
-        client_id: grafana
-        client_secret: "{{ grafana_oauth_client_secret }}"
-        scopes: "openid profile email openappstack_roles"
-        auth_url: "https://sso.{{ domain }}/oauth2/auth"
-        token_url: "https://sso.{{ domain }}/oauth2/token"
-        api_url: "https://sso.{{ domain }}/userinfo"
-        role_attribute_path: contains(openappstack_roles[*], 'admin') && 'Admin' || 'Editor'
+    auth.generic_oauth:
+      name: OpenAppStack
+      enabled: true
+      client_id: grafana
+      client_secret: "{{ grafana_oauth_client_secret }}"
+      scopes: "openid profile email openappstack_roles"
+      auth_url: "https://sso.{{ domain }}/oauth2/auth"
+      token_url: "https://sso.{{ domain }}/oauth2/token"
+      api_url: "https://sso.{{ domain }}/userinfo"
+      role_attribute_path: contains(openappstack_roles[*], 'admin') && 'Admin' || 'Editor'
   ingress:
     enabled: true
     annotations:
diff --git a/docs/installation_instructions.md b/docs/installation_instructions.md
index 6e412631bb9980c46edf5345bb7ac4b6d05c24c0..0137bcdfbe55076cd2a666cfee98f14048bd2e6d 100644
--- a/docs/installation_instructions.md
+++ b/docs/installation_instructions.md
@@ -40,7 +40,9 @@ guide][https://openappstack.net/contact.html).
   * At least 25GB of disk space for installation, plus more for application
     data. We recommend starting with 30GB.
   * Root ssh access
-  * Python3 installed
+  * `python3-pip` installed. On a debian based system:
+
+    `apt install python3-pip`
 
 * A trusted machine to run the installer on (we call this the `provisioning
   machine`). All the commands listed in these instructions should be run on the
@@ -322,7 +324,7 @@ These applications should be available after the installation is completed:
     button and then click "Login with OpenID Connect" to use the single sign-on
     server.
 * [Grafana](https://grafana.com) that shows you information about the status of
-  your cluster. 
+  your cluster.
   - Read more about Grafana in the [monitoring chapter below](#monitoring)
 
 ### Known limitations
diff --git a/docs/maintenance.md b/docs/maintenance.md
index e38abcb3ff27fd7a72b5c9f494ac1098c1d62cb1..883a23cfc31369e60975eeef4f96906873a8f4ee 100644
--- a/docs/maintenance.md
+++ b/docs/maintenance.md
@@ -17,6 +17,57 @@ The [Loki documentation](https://github.com/grafana/loki#documentation) is a
 good starting point how this setup works, and the [Using Loki in Grafana](https://grafana.com/docs/grafana/latest/features/datasources/loki/)
 gets you started with querying your cluster logs with grafana.
 
+You will find the loki grafana integration on your cluster at https://grafana.oas.example.org/explore
+together with some generic query examples.
+
+### LogQL query examples
+
+Please also refer to the [LogQL documentation](https://github.com/grafana/loki/blob/master/docs/logql.md).
+
+#### Flux
+
+Flux is responsible for installing applications. It used `helm-operator` to
+deploy the desired helm releases.
+
+Query all messages from `flux`:
+
+    {app="flux"}
+
+Query all messages from `flux` and `helm-operator`:
+
+    {app=~"(flux|helm-operator)"}
+
+`flux` messages containing `wordpress`:
+
+    {app = "flux"} |= "wordpress"
+
+`flux` messages containing `wordpress` without `unchanged` events (to only show
+the installation messages):
+
+    {app = "flux"} |= "wordpress" != "unchanged"
+
+Filter out redundant `flux` messages:
+
+    { app = "flux" } !~ "(unchanged | event=refreshed | method=Sync | component=checkpoint)"
+
+
+#### Cert-manager
+
+Cert manager is responsible for requesting Let's Encrypt TLS certificates.
+
+Query `cert-manager` messages containing `chat`:
+
+    {app="cert-manager"} |= "chat"
+
+
+#### Hydra
+
+Hydra is the single sign-on system.
+
+Show only warnings and errors from `hydra`:
+
+    {container_name="hydra"} != "level=info"
+
 ## Backup
 
 Please take care to backup the following locations:
diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md
index c0621de2d6cab4ce85135df9a7692b195d55dd7a..d6d257ba6830e9e852e97fbca0d417e5ff8f49c2 100644
--- a/docs/troubleshooting.md
+++ b/docs/troubleshooting.md
@@ -115,7 +115,7 @@ then:
 
 #### Behave tests
 
-##### Using Firefox instead of Chromium 
+##### Using Firefox instead of Chromium
 
 If you want to use Firefox instead of Chromium, you need to install the gecko
 driver
@@ -161,13 +161,35 @@ on the VPS:
   installed in your cluster. You can also use it to perform manual upgrades;
   see `helm --help`.
 
+## Using kubectl to debug your cluster
+
+You can use `kubectl`, the Kubernetes control program, to find and manipulate
+your Kubernetes cluster. Once you have installed `kubectl`, to get access to your
+cluster with the OAS CLI:
+
+    $ python -m openappstack my-cluster info
+
+Look for these lines:
+
+    To use kubectl with this cluster, copy-paste this in your terminal:
+
+    export KUBECONFIG=/home/you/projects/openappstack/clusters/my-cluster/secrets/kube_config_cluster.yml
+
+Copy the whole `export` line into your terminal. In *the same terminal window*,
+kubectl will connect to your cluster.
+
 ## HTTPS Certificates
 
 OAS uses [cert-manager](http://docs.cert-manager.io/en/latest/) to automatically
 fetch [Let's Encrypt](https://letsencrypt.org/) certificates for all deployed
-services. If you experience invalid SSL certificates (i.e. your browser warns you
-when visiting Nextcloud (`https://files.YOUR.CLUSTER.DOMAIN`) here's how to
-debug this:
+services. If you experience invalid SSL certificates, i.e. your browser warns you
+when visiting Rocketchat (`https://chat.example.org`), here's how to
+debug this. A useful resource for troubleshooting is also the official cert-manager
+[Troubleshooting Issuing ACME Certificates](https://cert-manager.io/docs/faq/acme/)
+documentation.
+
+In this example we fix a failed certificate request for `chat.example.org`.
+We will start by checking if `cert-manager` is set up correctly.
 
 Did you create your cluster using the `--acme-staging` argument?
 Please check the resulting value of the `acme_staging` key in
@@ -177,42 +199,72 @@ which can't be validated by default in your browser.
 
 Are all cert-manager pods in the `oas` namespace in the `READY` state ?
 
-    cluster$ kubectl -n oas get pods | grep cert-manager
+    $ kubectl -n oas get pods | grep cert-manager
 
 Are there any `cm-acme-http-solver-*` pods still running, indicating that there
 are unfinished certificate requests ?
 
-    cluster$ kubectl get pods --all-namespaces | grep cm-acme-http-solver
+    $ kubectl get pods --all-namespaces | grep cm-acme-http-solver
 
 Show the logs of the main `cert-manager` pod:
 
-    cluster$ kubectl -n oas logs -l "app.kubernetes.io/name=cert-manager"
+    $ kubectl -n oas logs -l "app.kubernetes.io/name=cert-manager"
 
 You can `grep` for your cluster domain or for any specific subdomain to narrow
 down results.
 
-## Using kubectl to debug your cluster
+Query for failed certificates, -requests, challenges or orders:
 
-You can use `kubectl`, the Kubernetes control program, to find and manipulate
-your Kubernetes cluster. Once you have installed `kubectl`, to get access to your
-cluster with the OAS CLI:
+    $ kubectl get --all-namespaces certificate,certificaterequest,challenge,order | grep -iE '(false|pending)'
+    oas-apps    certificate.cert-manager.io/oas-rocketchat                 False   oas-rocketchat                 15h
+    oas-apps    certificaterequest.cert-manager.io/oas-rocketchat-2045852889                 False   15h
+    oas-apps    challenge.acme.cert-manager.io/oas-rocketchat-2045852889-1775447563-837515681   pending   chat.example.org   15h
+    oas-apps    order.acme.cert-manager.io/oas-rocketchat-2045852889-1775447563                 pending   15h
 
-    $ python -m openappstack my-cluster info
+We see that the Rocketchat certificate resources are in a bad state since 15h.
 
-Look for these lines:
+Show certificate resource status message:
 
-    To use kubectl with this cluster, copy-paste this in your terminal:
+    $ kubectl -n oas-apps get certificate oas-rocketchat -o jsonpath="{.status.conditions[*]['message']}"
+    Waiting for CertificateRequest "oas-rocketchat-2045852889" to complete
 
-    export KUBECONFIG=/home/you/projects/openappstack/clusters/my-cluster/secrets/kube_config_cluster.yml
+We see that the `certificate` is waiting for the `certificaterequest`, lets
+query it's status message:
+
+    $ kubectl -n oas-apps get certificaterequest oas-rocketchat-2045852889 -o jsonpath="{.status.conditions[*]['message']}"
+    Waiting on certificate issuance from order oas-apps/oas-rocketchat-2045852889-1775447563: "pending"
+
+Show the related order resource and look at the status and events:
+
+		kubectl -n oas-apps describe order oas-rocketchat-2045852889-1775447563
+
+Show the failed challenge resource reason:
+
+		$ kubectl -n oas-apps get challenge oas-rocketchat-2045852889-1775447563-837515681 -o jsonpath='{.status.reason}'
+		Waiting for http-01 challenge propagation: wrong status code '503', expected '200'
+
+In this example, deleting the challenge fixed the issue and a proper certificate
+could get fetched:
+
+    $ kubectl -n oas-apps delete challenges.acme.cert-manager.io oas-rocketchat-2045852889-1775447563-837515681
+
+
+## Application installation fails
+
+Find applications that fail to install:
+
+    helm ls --all | grep -i -v DEPLOYED
+    kubectl get helmreleases --all-namespaces | grep -i -v DEPLOYED
+
+Especially the nextcloud installation process is brittle and error-prone.
+Lets take it as an example how to debug the root cause.
 
-Copy the whole `export` line into your terminal. In *the same terminal window*,
-kubectl will connect to your cluster.
 
 ## Purge OAS and install from scratch
 
 If ever things fail beyond possible recovery, here's how to completely purge an OAS installation in order to start from scratch:
 
     cluster$ apt purge docker-ce-cli containerd.io
-    cluster$ mount | egrep '^(tmpfs.*kubelet|nsfs.*docker)' | cut -d' ' -f 3 | xargs umount
-    cluster$ systemctl reboot
+    cluster$ mount | egrep '^(.*kubelet|nsfs.*docker)' | cut -d' ' -f 3 | xargs umount
     cluster$ rm -rf /var/lib/docker /var/lib/OpenAppStack /etc/kubernetes /var/lib/etcd /var/lib/rancher /var/lib/kubelet /var/log/OpenAppStack /var/log/containers /var/log/pods
+    cluster$ systemctl reboot
diff --git a/docs/upgrading.md b/docs/upgrading.md
index 2c4f01f28672e039a0ead354f0b569e5012d9c80..7618d0f731da83fcda907184704cf2d5226fbcb1 100644
--- a/docs/upgrading.md
+++ b/docs/upgrading.md
@@ -9,13 +9,13 @@ intervention.
 
     ```
     cd CLUSTER_DIR
-    mkdir ./group_vars/all
+    mkdir -p ./group_vars/all/
     mv settings.yml ./group_vars/all/
     ```
 
 * [Flux](https://fluxcd.io) is now used to install and update applications. For
   that reason, we need you to remove all helm charts (WARNING: You will lose
-  your data!): 
+  your data!):
 
   ```
   helm delete --purge oas-test-cert-manager oas-test-local-storage \
diff --git a/requirements-stable.txt b/requirements-stable.txt
index 365222ec207fa1427b682d68a2319f67c02482a6..3e560257d067b0b4356cdafd28fb2f8eb20ea378 100644
--- a/requirements-stable.txt
+++ b/requirements-stable.txt
@@ -24,7 +24,6 @@ packaging==19.2
 paramiko==2.7.1
 parse==1.12.1
 parse-type==0.5.2
-pkg-resources==0.0.0
 pluggy==0.13.1
 psutil==5.6.7
 py==1.8.0
diff --git a/test/pytest/test_prometheus.py b/test/pytest/test_prometheus.py
index 05f00bceb1afbf8a40840a015f3a4970d14d2bca..b59ab572c3d8c814d4b0e878bdc4ff8834e5393c 100755
--- a/test/pytest/test_prometheus.py
+++ b/test/pytest/test_prometheus.py
@@ -1,5 +1,6 @@
 import json
 import pytest
+import re
 
 
 @pytest.mark.prometheus
@@ -49,27 +50,48 @@ def ignore_alert(alert):
     Returns true if an alert should be ignored. This is when the "severity"
     equals "none", or in some application specific cases.
     """
+
     if alert["labels"]["severity"] == "none":
         return True
+
     # Ignore `KubeAPILatencyHigh` fom high load during installation
     # phase
     if alert["labels"]["alertname"] == "KubeAPILatencyHigh":
         return True
-    # Filter out failing Nextcloud installation jobs since a lot of
-    # them fail until they succeed during installation
-    if "nextcloud" in alert["labels"]["pod"]:
-        if alert["labels"]["alertname"] in [
-                "KubeJobFailed",
-                "KubeJobCompletion",
-                "KubePodNotReady"]:
-            return True
-    # Filter out when RocketChat pods take very long to start
-    if "rocketchat" in alert["labels"]["pod"]:
-        if alert["labels"]["alertname"] in [
-                "KubePodNotReady",
-                "KubeDeploymentReplicasMismatch",
-                "KubeStatefulSetReplicasMismatch"]:
-            return True
+
+    if 'pod' in alert["labels"]:
+        # Filter out failing Nextcloud installation jobs since a lot of
+        # them fail until they succeed during installation
+        if "nextcloud" in alert["labels"]["pod"]:
+            if alert["labels"]["alertname"] in [
+                    "KubeJobFailed",
+                    "KubeJobCompletion",
+                    "KubePodNotReady"]:
+                return True
+
+        # Filter out when some apps take too long to start
+        if re.search("(rocketchat|wordpress)", alert["labels"]["pod"]):
+            if alert["labels"]["alertname"] in [
+                    "KubePodNotReady",
+                    "KubeDeploymentReplicasMismatch",
+                    "KubeStatefulSetReplicasMismatch"]:
+                return True
+
+        # Filter out failed signgle-sign-on pods until we fix
+        # https://open.greenhost.net/openappstack/single-sign-on/issues/26
+        if "single-sign-on-create-" in alert["labels"]["pod"]:
+            if alert["labels"]["alertname"] in ["KubePodNotReady"]:
+                return True
+
+    if 'job_name' in alert["labels"]:
+        # Filter out failed signgle-sign-on jobs until we fix
+        # https://open.greenhost.net/openappstack/single-sign-on/issues/26
+        if "single-sign-on-create-" in alert["labels"]["job_name"]:
+            if alert["labels"]["alertname"] in [
+                    "KubeJobFailed",
+                    "KubeJobCompletion"]:
+                return True
+
     return False