From bfafcecb74e5e91b0619ef43680f6eef60e368d7 Mon Sep 17 00:00:00 2001
From: Maarten de Waard <maarten@greenhost.nl>
Date: Thu, 12 Aug 2021 17:09:36 +0200
Subject: [PATCH] document upgrade procedure (untested) and undo change of
 prometheus-basic-auth file because it was incorrect

---
 .gitlab-ci.yml                                |   2 +-
 CHANGELOG.md                                  |   9 ++
 docs/upgrading.rst                            | 128 ++++++++++++++++++
 .../kube-prometheus-stack-release.yaml        |   2 +-
 ...l.jinja => oas-prometheus-basic-auth.yaml} |   2 +-
 openappstack/cluster.py                       |   2 +-
 6 files changed, 141 insertions(+), 4 deletions(-)
 rename install/templates/{oas-kube-prometheus-stack-basic-auth.yaml.jinja => oas-prometheus-basic-auth.yaml} (84%)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 5f2c1a908..fb243c290 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -844,7 +844,7 @@ kube-prometheus-stack-alerts:
   allow_failure: true
   script:
     - *debug_information
-    - export BASIC_AUTH_PW=$(python3 -m openappstack $HOSTNAME secrets | grep oas-kube-prometheus-stack-basic-auth | cut -d'=' -f2)
+    - export BASIC_AUTH_PW=$(python3 -m openappstack $HOSTNAME secrets | grep oas-prometheus-basic-auth | cut -d'=' -f2)
     - cd test/
     - pytest -s -m 'prometheus' --connection=ansible --ansible-inventory=${CLUSTER_DIR}/inventory.yml --hosts='ansible://*'
   extends:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index bb25e69ad..defb8bf78 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,14 @@
 # Changelog
 
+## [0.7.0] - Unreleased
+
+* **BREAKING**: the `oas-secrets` helm chart has been removed. Because of this
+  `flux` will remove your existing secrets when you change the tracking branch
+  from `v0.6` to `v0.7`. Follow our [upgrade
+  guide](https://docs.openappstack.net/en/latest/upgrading.html) to prevent data
+  loss
+* TODO INSERT MORE HERE
+
 ## [0.6.0] - 2021-07-14
 
 ### Added
diff --git a/docs/upgrading.rst b/docs/upgrading.rst
index 17bc83dcd..7a543d70e 100644
--- a/docs/upgrading.rst
+++ b/docs/upgrading.rst
@@ -1,6 +1,134 @@
 Upgrading OpenAppStack
 ======================
 
+Upgrading to 0.7.0
+------------------
+
+Because of [problems with Helm and secret
+management](https://open.greenhost.net/openappstack/openappstack/-/issues/891)
+we had to move away from using a helm chart for secrets, and now use scripts
+that run during installation to manage secrets. Because we have removed the
+``oas-secrets`` helm chart, Flux will try to remove the secrets that it has
+generated. **It is important that you back up these secrets before switching
+from ``v0.6`` to ``v0.7``!**
+
+To back-up your secrets, run the following script: 
+
+.. code::bash
+
+   bash
+   #!/usr/bin/env bash
+
+   mkdir secrets-backup
+
+   kubectl get secret -o yaml -n flux-system  oas-cluster-variables > secrets-backup/oas-cluster-variables.yaml
+   kubectl get secret -o yaml -n flux-system  oas-wordpress-variables > secrets-backup/oas-wordpress-variables.yaml
+   kubectl get secret -o yaml -n flux-system  oas-wekan-variables > secrets-backup/oas-wekan-variables.yaml
+   kubectl get secret -o yaml -n flux-system  oas-single-sign-on-variables > secrets-backup/oas-single-sign-on-variables.yaml
+   kubectl get secret -o yaml -n flux-system  oas-rocketchat-variables > secrets-backup/oas-rocketchat-variables.yaml
+   kubectl get secret -o yaml -n flux-system  oas-kube-prometheus-stack-variables > secrets-backup/oas-kube-prometheus-stack-variables.yaml
+   kubectl get secret -o yaml -n oas          oas-prometheus-basic-auth > secrets-backup/oas-prometheus-basic-auth.yaml
+   kubectl get secret -o yaml -n oas          oas-alertmanager-basic-auth > secrets-backup/oas-alertmanager-basic-auth.yaml
+   kubectl get secret -o yaml -n flux-system  oas-oauth-variables > secrets-backup/oas-oauth-variables.yaml
+   kubectl get secret -o yaml -n flux-system  oas-nextcloud-variables > secrets-backup/oas-nextcloud-variables.yaml
+
+This script assumes you have all applications enabled. You might get an error
+like: 
+
+.. code::bash
+
+   Error from server (NotFound): secrets "oas-wekan-variables" not found
+
+This is not a problem.
+
+This script creates a directory called ``secrets-backup`` and places the secrets
+that have been generated by Helm in it as ``yaml`` files.
+
+Now you can upgrade your cluster by running ``kubectl edit gitrepository -n
+flux-system openappstack`` and setting ``spec.ref.branch`` to ``v0.7``
+
+Flux will now start updating your cluster to version 0.7. This process will most
+likely fail, because it will remove the secrets that you just backed up. Make
+sure that the ``oas-secrets`` helmrelease has been removed by running ``flux get
+hr -A``. You might also see that some helmreleases start failing to be installed
+because important secrets do not exist anymore. 
+
+As soon as the ``oas-secrets`` helmrelease does not exist anymore, you can run
+the following code:
+
+.. code::bash
+
+   #!/usr/bin/env bash
+
+   # Uses https://github.com/mikefarah/yq -- install with `snap install yq`
+   yq eval 'del(.metadata.annotations,.metadata.labels,.metadata.creationTimestamp,.metadata.resourceVersion,.metadata.uid)' secrets-backup/oas-cluster-variables.yaml | kubectl apply -f - -n flux-system
+   yq eval 'del(.metadata.annotations,.metadata.labels,.metadata.creationTimestamp,.metadata.resourceVersion,.metadata.uid)' secrets-backup/oas-wordpress-variables.yaml | kubectl apply -f - -n flux-system
+   yq eval 'del(.metadata.annotations,.metadata.labels,.metadata.creationTimestamp,.metadata.resourceVersion,.metadata.uid)' secrets-backup/oas-wekan-variables.yaml | kubectl apply -f - -n flux-system
+   yq eval 'del(.metadata.annotations,.metadata.labels,.metadata.creationTimestamp,.metadata.resourceVersion,.metadata.uid)' secrets-backup/oas-single-sign-on-variables.yaml | kubectl apply -f - -n flux-system
+   yq eval 'del(.metadata.annotations,.metadata.labels,.metadata.creationTimestamp,.metadata.resourceVersion,.metadata.uid)' secrets-backup/oas-rocketchat-variables.yaml | kubectl apply -f - -n flux-system
+   yq eval 'del(.metadata.annotations,.metadata.labels,.metadata.creationTimestamp,.metadata.resourceVersion,.metadata.uid)' secrets-backup/oas-kube-prometheus-stack-variables.yaml | kubectl apply -f - -n flux-system
+   yq eval 'del(.metadata.annotations,.metadata.labels,.metadata.creationTimestamp,.metadata.resourceVersion,.metadata.uid)' secrets-backup/oas-prometheus-basic-auth.yaml | kubectl apply -f - -n flux-system
+   yq eval 'del(.metadata.annotations,.metadata.labels,.metadata.creationTimestamp,.metadata.resourceVersion,.metadata.uid)' secrets-backup/oas-alertmanager-basic-auth.yaml | kubectl apply -f - -n flux-system
+   yq eval 'del(.metadata.annotations,.metadata.labels,.metadata.creationTimestamp,.metadata.resourceVersion,.metadata.uid)' secrets-backup/oas-oauth-variables.yaml | kubectl apply -f - -n flux-system
+   yq eval 'del(.metadata.annotations,.metadata.labels,.metadata.creationTimestamp,.metadata.resourceVersion,.metadata.uid)' secrets-backup/oas-nextcloud-variables.yaml | kubectl apply -f - -n flux-system
+
+Again this script assumes you have all applications installed. If you get the
+following error, you can ignore it:
+
+.. code::bash
+
+   error: error validating "STDIN": error validating data: [apiVersion not set, kind not set]; if you choose to ignore these errors, turn validation off with --validate=false
+
+Now Flux should succeed in finishing the update. Some helmreleases or
+kustomizations might have already failed because the secrets did not exist. Once
+failed, you can retrigger reconciliation of a kustomization using the commands
+``flux reconcile kustomization ...`` or ``flux reconcile helmrelease ...``.
+
+Some errors we've seen during our own upgrade process, and how to solve them
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+SSO helm upgrade failed
+'''''''''''''''''''''''
+
+.. code::
+
+   oas         	single-sign-on        	False	Helm upgrade failed: template: single-sign-on/templates/secret-oauth2-clients.yaml:9:55: executing "single-sign-on/templates/secret-oauth2-clients.yaml" at <b64enc>: invalid value; expected string	0.2.2   	False
+
+This means that the ``single-sign-on`` helmrelease was created with empty oauth
+secrets. The secrets will get a value once the ``core`` *kustomization* is
+reconciled: ``flux reconcile ks core`` should solve the problem.
+
+If that does not solve the problem, you should check if the secret contains a
+value for all the apps: 
+
+.. code::
+
+   # kubectl get secret -n flux-system oas-oauth-variables -o yaml
+   apiVersion: v1
+   data:
+     grafana_oauth_client_secret: <redacted>
+     nextcloud_oauth_client_secret: <redacted>
+     rocketchat_oauth_client_secret: <redacted>
+     userpanel_oauth_client_secret: <redacted>
+     wekan_oauth_client_secret: <redacted>
+     wordpress_oauth_client_secret: <redacted>
+   ...
+
+If your secret lacks one of these variables, use ``kubectl edit`` to add them.
+You can use any password generator to generate a password for it. Make sure to
+base64 encode the data before you enter it in the secret.
+
+Loki upgrade retries exhausted
+''''''''''''''''''''''''''''''
+
+While running ``flux get helmrelease -A``, you'll see:
+
+.. code::
+   oas         	loki                  	False  	upgrade retries exhausted       	2.5.2   	False
+
+This happens sometimes because Loki takes a long time to upgrade. Usually it is
+solved by running ``flux reconcile hr loki -n oas`` again.
+
 Upgrading to 0.6.0
 ------------------
 
diff --git a/flux2/apps/monitoring/kube-prometheus-stack-release.yaml b/flux2/apps/monitoring/kube-prometheus-stack-release.yaml
index a982303d8..7c252f302 100644
--- a/flux2/apps/monitoring/kube-prometheus-stack-release.yaml
+++ b/flux2/apps/monitoring/kube-prometheus-stack-release.yaml
@@ -135,7 +135,7 @@ spec:
         enabled: true
         annotations:
           nginx.ingress.kubernetes.io/auth-type: basic
-          nginx.ingress.kubernetes.io/auth-secret: oas-kube-prometheus-stack-basic-auth
+          nginx.ingress.kubernetes.io/auth-secret: oas-prometheus-basic-auth
           nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required'
           kubernetes.io/tls-acme: "true"
         pathType: ImplementationSpecific
diff --git a/install/templates/oas-kube-prometheus-stack-basic-auth.yaml.jinja b/install/templates/oas-prometheus-basic-auth.yaml
similarity index 84%
rename from install/templates/oas-kube-prometheus-stack-basic-auth.yaml.jinja
rename to install/templates/oas-prometheus-basic-auth.yaml
index cb4f6fe32..d4d2d3552 100644
--- a/install/templates/oas-kube-prometheus-stack-basic-auth.yaml.jinja
+++ b/install/templates/oas-prometheus-basic-auth.yaml
@@ -2,7 +2,7 @@ apiVersion: v1
 kind: Secret
 metadata:
   namespace: "oas"
-  name: "oas-kube-prometheus-stack-basic-auth"
+  name: "oas-prometheus-basic-auth"
 data:
   # Readable version of the password for humans who want to log in
   pass: "{{ pass | b64encode }}"
diff --git a/openappstack/cluster.py b/openappstack/cluster.py
index 5680e15d6..7a36b0380 100644
--- a/openappstack/cluster.py
+++ b/openappstack/cluster.py
@@ -224,7 +224,7 @@ KUBECONFIG={cluster_dir}/kube_config_cluster.yml
             },
             'oas': {
                 'oas-alertmanager-basic-auth': ['pass'],
-                'oas-kube-prometheus-stack-basic-auth': ['pass']
+                'oas-prometheus-basic-auth': ['pass']
             }
         }
 
-- 
GitLab