From bfafcecb74e5e91b0619ef43680f6eef60e368d7 Mon Sep 17 00:00:00 2001 From: Maarten de Waard <maarten@greenhost.nl> Date: Thu, 12 Aug 2021 17:09:36 +0200 Subject: [PATCH] document upgrade procedure (untested) and undo change of prometheus-basic-auth file because it was incorrect --- .gitlab-ci.yml | 2 +- CHANGELOG.md | 9 ++ docs/upgrading.rst | 128 ++++++++++++++++++ .../kube-prometheus-stack-release.yaml | 2 +- ...l.jinja => oas-prometheus-basic-auth.yaml} | 2 +- openappstack/cluster.py | 2 +- 6 files changed, 141 insertions(+), 4 deletions(-) rename install/templates/{oas-kube-prometheus-stack-basic-auth.yaml.jinja => oas-prometheus-basic-auth.yaml} (84%) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 5f2c1a908..fb243c290 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -844,7 +844,7 @@ kube-prometheus-stack-alerts: allow_failure: true script: - *debug_information - - export BASIC_AUTH_PW=$(python3 -m openappstack $HOSTNAME secrets | grep oas-kube-prometheus-stack-basic-auth | cut -d'=' -f2) + - export BASIC_AUTH_PW=$(python3 -m openappstack $HOSTNAME secrets | grep oas-prometheus-basic-auth | cut -d'=' -f2) - cd test/ - pytest -s -m 'prometheus' --connection=ansible --ansible-inventory=${CLUSTER_DIR}/inventory.yml --hosts='ansible://*' extends: diff --git a/CHANGELOG.md b/CHANGELOG.md index bb25e69ad..defb8bf78 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ # Changelog +## [0.7.0] - Unreleased + +* **BREAKING**: the `oas-secrets` helm chart has been removed. Because of this + `flux` will remove your existing secrets when you change the tracking branch + from `v0.6` to `v0.7`. Follow our [upgrade + guide](https://docs.openappstack.net/en/latest/upgrading.html) to prevent data + loss +* TODO INSERT MORE HERE + ## [0.6.0] - 2021-07-14 ### Added diff --git a/docs/upgrading.rst b/docs/upgrading.rst index 17bc83dcd..7a543d70e 100644 --- a/docs/upgrading.rst +++ b/docs/upgrading.rst @@ -1,6 +1,134 @@ Upgrading OpenAppStack ====================== +Upgrading to 0.7.0 +------------------ + +Because of [problems with Helm and secret +management](https://open.greenhost.net/openappstack/openappstack/-/issues/891) +we had to move away from using a helm chart for secrets, and now use scripts +that run during installation to manage secrets. Because we have removed the +``oas-secrets`` helm chart, Flux will try to remove the secrets that it has +generated. **It is important that you back up these secrets before switching +from ``v0.6`` to ``v0.7``!** + +To back-up your secrets, run the following script: + +.. code::bash + + bash + #!/usr/bin/env bash + + mkdir secrets-backup + + kubectl get secret -o yaml -n flux-system oas-cluster-variables > secrets-backup/oas-cluster-variables.yaml + kubectl get secret -o yaml -n flux-system oas-wordpress-variables > secrets-backup/oas-wordpress-variables.yaml + kubectl get secret -o yaml -n flux-system oas-wekan-variables > secrets-backup/oas-wekan-variables.yaml + kubectl get secret -o yaml -n flux-system oas-single-sign-on-variables > secrets-backup/oas-single-sign-on-variables.yaml + kubectl get secret -o yaml -n flux-system oas-rocketchat-variables > secrets-backup/oas-rocketchat-variables.yaml + kubectl get secret -o yaml -n flux-system oas-kube-prometheus-stack-variables > secrets-backup/oas-kube-prometheus-stack-variables.yaml + kubectl get secret -o yaml -n oas oas-prometheus-basic-auth > secrets-backup/oas-prometheus-basic-auth.yaml + kubectl get secret -o yaml -n oas oas-alertmanager-basic-auth > secrets-backup/oas-alertmanager-basic-auth.yaml + kubectl get secret -o yaml -n flux-system oas-oauth-variables > secrets-backup/oas-oauth-variables.yaml + kubectl get secret -o yaml -n flux-system oas-nextcloud-variables > secrets-backup/oas-nextcloud-variables.yaml + +This script assumes you have all applications enabled. You might get an error +like: + +.. code::bash + + Error from server (NotFound): secrets "oas-wekan-variables" not found + +This is not a problem. + +This script creates a directory called ``secrets-backup`` and places the secrets +that have been generated by Helm in it as ``yaml`` files. + +Now you can upgrade your cluster by running ``kubectl edit gitrepository -n +flux-system openappstack`` and setting ``spec.ref.branch`` to ``v0.7`` + +Flux will now start updating your cluster to version 0.7. This process will most +likely fail, because it will remove the secrets that you just backed up. Make +sure that the ``oas-secrets`` helmrelease has been removed by running ``flux get +hr -A``. You might also see that some helmreleases start failing to be installed +because important secrets do not exist anymore. + +As soon as the ``oas-secrets`` helmrelease does not exist anymore, you can run +the following code: + +.. code::bash + + #!/usr/bin/env bash + + # Uses https://github.com/mikefarah/yq -- install with `snap install yq` + yq eval 'del(.metadata.annotations,.metadata.labels,.metadata.creationTimestamp,.metadata.resourceVersion,.metadata.uid)' secrets-backup/oas-cluster-variables.yaml | kubectl apply -f - -n flux-system + yq eval 'del(.metadata.annotations,.metadata.labels,.metadata.creationTimestamp,.metadata.resourceVersion,.metadata.uid)' secrets-backup/oas-wordpress-variables.yaml | kubectl apply -f - -n flux-system + yq eval 'del(.metadata.annotations,.metadata.labels,.metadata.creationTimestamp,.metadata.resourceVersion,.metadata.uid)' secrets-backup/oas-wekan-variables.yaml | kubectl apply -f - -n flux-system + yq eval 'del(.metadata.annotations,.metadata.labels,.metadata.creationTimestamp,.metadata.resourceVersion,.metadata.uid)' secrets-backup/oas-single-sign-on-variables.yaml | kubectl apply -f - -n flux-system + yq eval 'del(.metadata.annotations,.metadata.labels,.metadata.creationTimestamp,.metadata.resourceVersion,.metadata.uid)' secrets-backup/oas-rocketchat-variables.yaml | kubectl apply -f - -n flux-system + yq eval 'del(.metadata.annotations,.metadata.labels,.metadata.creationTimestamp,.metadata.resourceVersion,.metadata.uid)' secrets-backup/oas-kube-prometheus-stack-variables.yaml | kubectl apply -f - -n flux-system + yq eval 'del(.metadata.annotations,.metadata.labels,.metadata.creationTimestamp,.metadata.resourceVersion,.metadata.uid)' secrets-backup/oas-prometheus-basic-auth.yaml | kubectl apply -f - -n flux-system + yq eval 'del(.metadata.annotations,.metadata.labels,.metadata.creationTimestamp,.metadata.resourceVersion,.metadata.uid)' secrets-backup/oas-alertmanager-basic-auth.yaml | kubectl apply -f - -n flux-system + yq eval 'del(.metadata.annotations,.metadata.labels,.metadata.creationTimestamp,.metadata.resourceVersion,.metadata.uid)' secrets-backup/oas-oauth-variables.yaml | kubectl apply -f - -n flux-system + yq eval 'del(.metadata.annotations,.metadata.labels,.metadata.creationTimestamp,.metadata.resourceVersion,.metadata.uid)' secrets-backup/oas-nextcloud-variables.yaml | kubectl apply -f - -n flux-system + +Again this script assumes you have all applications installed. If you get the +following error, you can ignore it: + +.. code::bash + + error: error validating "STDIN": error validating data: [apiVersion not set, kind not set]; if you choose to ignore these errors, turn validation off with --validate=false + +Now Flux should succeed in finishing the update. Some helmreleases or +kustomizations might have already failed because the secrets did not exist. Once +failed, you can retrigger reconciliation of a kustomization using the commands +``flux reconcile kustomization ...`` or ``flux reconcile helmrelease ...``. + +Some errors we've seen during our own upgrade process, and how to solve them +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +SSO helm upgrade failed +''''''''''''''''''''''' + +.. code:: + + oas single-sign-on False Helm upgrade failed: template: single-sign-on/templates/secret-oauth2-clients.yaml:9:55: executing "single-sign-on/templates/secret-oauth2-clients.yaml" at <b64enc>: invalid value; expected string 0.2.2 False + +This means that the ``single-sign-on`` helmrelease was created with empty oauth +secrets. The secrets will get a value once the ``core`` *kustomization* is +reconciled: ``flux reconcile ks core`` should solve the problem. + +If that does not solve the problem, you should check if the secret contains a +value for all the apps: + +.. code:: + + # kubectl get secret -n flux-system oas-oauth-variables -o yaml + apiVersion: v1 + data: + grafana_oauth_client_secret: <redacted> + nextcloud_oauth_client_secret: <redacted> + rocketchat_oauth_client_secret: <redacted> + userpanel_oauth_client_secret: <redacted> + wekan_oauth_client_secret: <redacted> + wordpress_oauth_client_secret: <redacted> + ... + +If your secret lacks one of these variables, use ``kubectl edit`` to add them. +You can use any password generator to generate a password for it. Make sure to +base64 encode the data before you enter it in the secret. + +Loki upgrade retries exhausted +'''''''''''''''''''''''''''''' + +While running ``flux get helmrelease -A``, you'll see: + +.. code:: + oas loki False upgrade retries exhausted 2.5.2 False + +This happens sometimes because Loki takes a long time to upgrade. Usually it is +solved by running ``flux reconcile hr loki -n oas`` again. + Upgrading to 0.6.0 ------------------ diff --git a/flux2/apps/monitoring/kube-prometheus-stack-release.yaml b/flux2/apps/monitoring/kube-prometheus-stack-release.yaml index a982303d8..7c252f302 100644 --- a/flux2/apps/monitoring/kube-prometheus-stack-release.yaml +++ b/flux2/apps/monitoring/kube-prometheus-stack-release.yaml @@ -135,7 +135,7 @@ spec: enabled: true annotations: nginx.ingress.kubernetes.io/auth-type: basic - nginx.ingress.kubernetes.io/auth-secret: oas-kube-prometheus-stack-basic-auth + nginx.ingress.kubernetes.io/auth-secret: oas-prometheus-basic-auth nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required' kubernetes.io/tls-acme: "true" pathType: ImplementationSpecific diff --git a/install/templates/oas-kube-prometheus-stack-basic-auth.yaml.jinja b/install/templates/oas-prometheus-basic-auth.yaml similarity index 84% rename from install/templates/oas-kube-prometheus-stack-basic-auth.yaml.jinja rename to install/templates/oas-prometheus-basic-auth.yaml index cb4f6fe32..d4d2d3552 100644 --- a/install/templates/oas-kube-prometheus-stack-basic-auth.yaml.jinja +++ b/install/templates/oas-prometheus-basic-auth.yaml @@ -2,7 +2,7 @@ apiVersion: v1 kind: Secret metadata: namespace: "oas" - name: "oas-kube-prometheus-stack-basic-auth" + name: "oas-prometheus-basic-auth" data: # Readable version of the password for humans who want to log in pass: "{{ pass | b64encode }}" diff --git a/openappstack/cluster.py b/openappstack/cluster.py index 5680e15d6..7a36b0380 100644 --- a/openappstack/cluster.py +++ b/openappstack/cluster.py @@ -224,7 +224,7 @@ KUBECONFIG={cluster_dir}/kube_config_cluster.yml }, 'oas': { 'oas-alertmanager-basic-auth': ['pass'], - 'oas-kube-prometheus-stack-basic-auth': ['pass'] + 'oas-prometheus-basic-auth': ['pass'] } } -- GitLab