--- include: - /.gitlab/ci_templates/kaniko.yml - /.gitlab/ci_templates/ssh_setup.yml - template: 'Workflows/MergeRequest-Pipelines.gitlab-ci.yml' # Global templates and YAML anchors # ================================= # # Used in various stages/job definitions # We don't use a `before_script` definition here because `extend` doesn't merge # `before_script` but rather overwrites it. # So we rather use [yaml anchors](https://docs.gitlab.com/ce/ci/yaml/README.html#anchors) # here. Unfortunatly, anchors can't get included from files so we need to # define them here. .debug_information: &debug_information - | echo "Env vars:" echo env | grep -E '^(HOSTNAME|CLUSTER_NAME|FQDN|IP_ADDRESS|CLUSTER_DIR|ANSIBLE_HOST_KEY_CHECKING|KANIKO_BUILD_IMAGENAME|SSH_KEY_ID|SHELL|CI_PROJECT_DIR)=' echo echo "Uptime: $(uptime)" echo "KANIKO build image ref: ${CI_REGISTRY_IMAGE}/${KANIKO_BUILD_IMAGENAME}:${CI_CONTAINER_TAG}" echo - if [ -f .ci.env ]; then echo "Content of .ci.env:"; cat .ci.env; fi - if [ -f .cluster.env ]; then echo "Content of .ci.env:"; cat .cluster.env; fi # The dotenv report requires us to report the artifacts in every job that is # required with a `needs:` from another job. .report_artifacts: artifacts: paths: - clusters expire_in: 1 month when: always reports: dotenv: $CLUSTER_DIR/.cluster.env # Rules that enable the cluster to be built and are applied to most steps # (except for application-specific steps) .general_rules: rules: - changes: - .gitlab-ci.yml - .gitlab/ci_scripts/* - Dockerfile - ansible/**/* - flux/**/* - flux2/**/* - install/**/* - test/**/* - stackspin/**/* - requirements.txt - if: '$TRIGGER_JOBS =~ /enable-.*/' - if: '$CI_COMMIT_MESSAGE =~ /TRIGGER_JOBS=.*enable-/' - if: '$CI_COMMIT_BRANCH == "main"' # app rules # # Define the rules when/if app specific jobs are run. # Just add the variable RESOURCE to the job like this: # variables: # RESOURCE: "eventrouter" # and import the templates with i.e. # extends: .eventrouter_rules # .eventrouter_rules will ensure that the job is only executed: # - when files related to the app changed in the repo # - A pipeline gets started from the UI and the job name is included in the # CI variable `TRIGGER_JOBS` # - A commit is pushed containing the pattern TRIGGER_JOBS=.*<job name> # (i.e. TRIGGER_JOBS=ci-test-image-build,enable-nextcloud) # # Gitlab CI allows pushing CI vars via `git push` but a bug prevents this when # using merge request pipelines (see https://gitlab.com/gitlab-org/gitlab/-/issues/326098) .monitoring_rules: rules: - changes: - flux2/apps/monitoring/*.yaml - flux2/cluster/optional/monitoring/*.yaml - flux2/core/base/sources/grafana.yaml - flux2/core/base/sources/wikimedia.yaml - flux2/core/base/sources/prometheus-community.yaml - flux2/config/monitoring/*.yaml - install/install-app.sh - install/flux-version-check.sh - test/taiko/* - if: '$TRIGGER_JOBS =~ /enable-monitoring/' - if: '$CI_COMMIT_MESSAGE =~ /TRIGGER_JOBS=.*enable-monitoring/' - if: '$CI_COMMIT_BRANCH == "main"' .nextcloud_rules: rules: - changes: - flux2/apps/$RESOURCE/*.yaml - flux2/cluster/optional/$RESOURCE/*.yaml - flux2/core/base/sources/nextcloud.yaml - install/install-app.sh - install/flux-version-check.sh - test/taiko/* - if: '$TRIGGER_JOBS =~ /enable-nextcloud/' - if: '$CI_COMMIT_MESSAGE =~ /TRIGGER_JOBS=.*enable-nextcloud/' - if: '$CI_COMMIT_BRANCH == "main"' .single_sign_on_rules: rules: - changes: - flux2/core/base/$RESOURCE/*.yaml - flux2/infrastructure/sources/single-sign-on.yaml - install/install-stackspin.sh - test/taiko/* - if: '$TRIGGER_JOBS =~ /enable-single-sign-on/' - if: '$CI_COMMIT_MESSAGE =~ /TRIGGER_JOBS=.*enable-single-sign-on/' - if: '$CI_COMMIT_BRANCH == "main"' .velero_rules: rules: - changes: - flux2/apps/$RESOURCE/*.yaml - flux2/cluster/optional/$RESOURCE/*.yaml - flux2/core/base/sources/vmware-tanzu.yaml - install/install-app.sh - install/flux-version-check.sh - if: '$TRIGGER_JOBS =~ /enable-velero/' - if: '$CI_COMMIT_MESSAGE =~ /TRIGGER_JOBS=.*enable-velero/' - if: '$CI_COMMIT_BRANCH == "main"' .wekan_rules: rules: - changes: - flux2/apps/$RESOURCE/*.yaml - flux2/cluster/optional/$RESOURCE/*.yaml - flux2/core/base/sources/wekan.yaml - install/install-app.sh - install/flux-version-check.sh - test/taiko/* - if: '$TRIGGER_JOBS =~ /enable-wekan/' - if: '$CI_COMMIT_MESSAGE =~ /TRIGGER_JOBS=.*enable-wekan/' - if: '$CI_COMMIT_BRANCH == "main"' .wordpress_rules: rules: - changes: - flux2/apps/$RESOURCE/*.yaml - flux2/cluster/optional/$RESOURCE/*.yaml - flux2/core/base/sources/wordpress.yaml - install/install-app.sh - install/flux-version-check.sh - test/taiko/* - if: '$TRIGGER_JOBS =~ /enable-wordpress/' - if: '$CI_COMMIT_MESSAGE =~ /TRIGGER_JOBS=.*enable-wordpress/' - if: '$CI_COMMIT_BRANCH == "main"' .zulip_rules: rules: - changes: - flux2/apps/$RESOURCE/*.yaml - flux2/cluster/optional/$RESOURCE/*.yaml - flux2/core/base/sources/zulip.yaml - install/install-app.sh - install/flux-version-check.sh - test/taiko/* - if: '$TRIGGER_JOBS =~ /enable-zulip/' - if: '$CI_COMMIT_MESSAGE =~ /TRIGGER_JOBS=.*enable-zulip/' - if: '$CI_COMMIT_BRANCH == "main"' # Global declarations # =================== # https://docs.gitlab.com/ee/ci/yaml/README.html#workflowrules-templates stages: - build - install-cluster - install-stackspin - base-ready - configure-stackspin - optional-apps-ready - certs - cluster-health - integration-test variables: SSH_KEY_ID: "411" HOSTNAME: "${CI_COMMIT_REF_SLUG}" ANSIBLE_HOST_KEY_CHECKING: "False" KANIKO_BUILD_IMAGENAME: "stackspin-ci" CLUSTER_DIR: "clusters/${CI_COMMIT_REF_SLUG}" default: image: "${CI_REGISTRY_IMAGE}/${KANIKO_BUILD_IMAGENAME}:${CI_CONTAINER_TAG}" # Stage: build # ============ # # Builds CI test container image # There are 2 moments in which we (re)build the container image. If some files are # changed, or when the job is triggered with TRIGGER_JOBS. ci-test-image-build: stage: build before_script: - *debug_information after_script: - | echo "CI_CONTAINER_TAG=${CI_COMMIT_REF_SLUG}" | tee .ci.env artifacts: paths: - .ci.env expire_in: 1 month when: always reports: dotenv: .ci.env environment: name: image/$CI_COMMIT_REF_SLUG url: https://open.greenhost.net:4567/stackspin/stackspin/stackspin-ci:${CI_COMMIT_REF_SLUG} auto_stop_in: 3 weeks rules: # Automatically rebuild the container image if this file, the Dockerfile, # the installed requirements or the kaniko template change - changes: - Dockerfile - requirements.txt - .gitlab/ci_templates/kaniko.yml # Also rebuild when the CI variable contain this jobs name # or commit msg contains /TRIGGER_JOBS=.*ci-test-image-build/ - if: '$TRIGGER_JOBS =~ /ci-test-image-build/' - if: '$CI_COMMIT_MESSAGE =~ /TRIGGER_JOBS=.*ci-test-image-build/' extends: - .kaniko_build interruptible: true report-ci-image-tag: stage: build image: "curlimages/curl" before_script: - *debug_information script: - | TAG_INFORMATION=$(curl -sS https://open.greenhost.net/api/v4/projects/stackspin%2Fstackspin/registry/repositories/73/tags/${CI_COMMIT_REF_SLUG}); echo "Tag information: ${TAG_INFORMATION}" if [ "$TAG_INFORMATION" == '{"message":"404 Tag Not Found"}' ]; then CI_CONTAINER_TAG="main" else CI_CONTAINER_TAG="${CI_COMMIT_REF_SLUG}" fi echo "CI_CONTAINER_TAG=${CI_CONTAINER_TAG}" | tee .ci.env artifacts: paths: - .ci.env expire_in: 1 month when: always reports: dotenv: .ci.env rules: # Make sure this job does not run if ci-test-image-build runs - changes: - Dockerfile - requirements.txt - .gitlab/ci_templates/kaniko.yml when: never # Never run on file changes that trigger ci-test-image-build - if: '$TRIGGER_JOBS =~ /ci-test-image-build/' when: never # Never run when ci-test-image is triggered manually - if: '$CI_COMMIT_MESSAGE =~ /TRIGGER_JOBS=.*ci-test-image-build/' when: never # Never run when ci-test-image is triggered manually - when: always interruptible: true # Stage: install-cluster # ====================== # # * Creates the vps for the pipeline # * Installs k8s with ansible create-vps: stage: install-cluster variables: SUBDOMAIN: "${CI_COMMIT_REF_SLUG}.ci" DOMAIN: "stackspin.net" script: - *debug_information # Creates a VPS based on a custom CI image for which the ansible playbook # has already run. See CONTRIBUTING.md#ci-pipeline-image for more info - bash .gitlab/ci_scripts/create_vps.sh # Make sure .ci.env variables are not lost - cat .ci.env >> ${CLUSTER_DIR}/.cluster.env extends: - .report_artifacts - .general_rules environment: name: $CI_COMMIT_REF_SLUG url: https://$FQDN on_stop: terminate-droplet auto_stop_in: 1 week interruptible: true test-dns: stage: install-cluster needs: - job: create-vps # Needs a pytest ansible connection to get the configured system resolvers script: - *debug_information - cd ansible/ - pytest -v -s -m 'dns' --connection=ansible --ansible-inventory=../${CLUSTER_DIR}/inventory.yml --hosts='ansible://*' extends: - .general_rules interruptible: true install-k8s: stage: install-cluster needs: - job: create-vps script: - *debug_information # Copy inventory files to ansible folder for use in install-apps step - chmod 700 ansible - cp ${CLUSTER_DIR}/inventory.yml ansible/ # Set up cluster - python3 -m stackspin $HOSTNAME install extends: - .ssh_setup - .report_artifacts - .general_rules interruptible: true # Terminates a droplet and deletes the branch container image once the MR for it is merged terminate-droplet: # Stage has to be the same as the step that created the VPS # https://docs.gitlab.com/ee/ci/environments.html#automatically-stopping-an-environment stage: install-cluster # Gets triggered by on_stop of create-vps job when: manual variables: GIT_STRATEGY: none script: - *debug_information # Delete droplet - python3 -c "import greenhost_cloud; greenhost_cloud.terminate_droplets_by_name(\"^${CI_COMMIT_REF_SLUG}\")" # Delete container image if one was created - > "if [ \"$CI_CONTAINER_TAG\" != \"main\" ]; then curl --request DELETE --header \"PRIVATE-TOKEN: ${CLEANER_TOKEN}\" https://open.greenhost.net/api/v4/projects/stackspin%2Fstackspin/registry/repositories/73/tags/${CI_CONTAINER_TAG}; fi" environment: name: $CI_COMMIT_REF_SLUG action: stop # Stage: install-stackspin # ======================== # # Installs flux and stackspin with it install-stackspin: stage: install-stackspin needs: - job: test-dns - job: install-k8s script: - *debug_information # Customize env file, remove all comments and empty lines - cp install/.flux.env.example ${CLUSTER_DIR}/.flux.env - sed -i "s/1.2.3.4/$IP_ADDRESS/" ${CLUSTER_DIR}/.flux.env - sed -i "s/example.org/$FQDN/" ${CLUSTER_DIR}/.flux.env - sed -i "/^\s*#.*$/d; /^\s*$/d" ${CLUSTER_DIR}/.flux.env # Disable outgoing mail - sed -i "s/outgoing_mail_enabled=true/outgoing_mail_enabled=false/" ${CLUSTER_DIR}/.flux.env # Deploy secret/stackspin-cluster-variables - cp install/kustomization.yaml ${CLUSTER_DIR} - kubectl create namespace flux-system - kubectl apply -k ${CLUSTER_DIR} # Add an override so cert-manager uses the SSL.com ClusterIssuer - kubectl create namespace cert-manager - kubectl apply -f ./install/overrides/stackspin-cert-manager-override.yaml # Install flux and general, non-app specific secrets - bash ./install/install-stackspin.sh extends: - .report_artifacts - .general_rules interruptible: true .enable_app_template: stage: install-stackspin needs: - job: install-stackspin script: - *debug_information # Add optional override values we need for the CI pipeline only - > [ -f ./install/overrides/stackspin-${RESOURCE}-override.yaml ] && kubectl apply -f ./install/overrides/stackspin-${RESOURCE}-override.yaml - bash ./install/install-app.sh ${RESOURCE} interruptible: true enable-monitoring: variables: RESOURCE: "monitoring" extends: - .enable_app_template - .monitoring_rules enable-nextcloud: variables: RESOURCE: "nextcloud" extends: - .enable_app_template - .nextcloud_rules enable-velero: variables: RESOURCE: "velero" extends: - .enable_app_template - .velero_rules enable-wekan: variables: RESOURCE: "wekan" extends: - .enable_app_template - .wekan_rules enable-wordpress: variables: RESOURCE: "wordpress" extends: - .enable_app_template - .wordpress_rules enable-zulip: variables: RESOURCE: "zulip" extends: - .enable_app_template - .zulip_rules # Stage: base-ready # ==================== # # Test if base kustomizations are ready, before configuration can get applied # that makes use of CRDs, i.e. clusterIssuer .kustomization-ready: stage: base-ready needs: - job: install-stackspin script: - *debug_information - cd test/ - export KUBECONFIG="${PWD}/../clusters/${HOSTNAME}/kube_config_cluster.yml" - pytest -v -s -m 'kustomizations' --resource="$RESOURCE" --reruns 120 --reruns-delay 20 extends: - .general_rules interruptible: true cert-manager-kustomization-ready: variables: RESOURCE: "cert-manager" extends: - .kustomization-ready dashboard-kustomization-ready: variables: RESOURCE: "dashboard" extends: - .kustomization-ready letsencrypt-issuer-kustomization-ready: variables: RESOURCE: "letsencrypt-issuer" extends: - .kustomization-ready local-path-provisioner-kustomization-ready: variables: RESOURCE: "local-path-provisioner" extends: - .kustomization-ready metallb-kustomization-ready: variables: RESOURCE: "metallb" extends: - .kustomization-ready namespaces-kustomization-ready: variables: RESOURCE: "namespaces" extends: - .kustomization-ready nginx-kustomization-ready: variables: RESOURCE: "nginx" extends: - .kustomization-ready single-sign-on-kustomization-ready: variables: RESOURCE: "single-sign-on" extends: - .kustomization-ready sources-kustomization-ready: variables: RESOURCE: "sources" extends: - .kustomization-ready stackspin-kustomization-ready: variables: RESOURCE: "stackspin" extends: - .kustomization-ready # Stage: configure-stackspin # # Configure cluster after basic installation # i.e. CI-related config like sslcom clusterIssuer # configure-sslcom-issuer: stage: configure-stackspin needs: - job: install-stackspin - job: cert-manager-kustomization-ready script: - *debug_information # Install custom ClusterIssuer for SSL.com production certificates - bash ./.gitlab/ci_scripts/install_sslcom_issuer.sh extends: - .report_artifacts - .general_rules interruptible: true # Stage: optional-apps-ready # ================ # # Check that the kustomizations of all installed apps are ready. .app-kustomization-ready: stage: optional-apps-ready extends: - .kustomization-ready monitoring-kustomization-ready: needs: - job: install-stackspin - job: enable-monitoring variables: RESOURCE: "monitoring" extends: - .app-kustomization-ready - .monitoring_rules nextcloud-kustomization-ready: needs: - job: install-stackspin - job: enable-nextcloud variables: RESOURCE: "nextcloud" extends: - .app-kustomization-ready - .nextcloud_rules velero-kustomization-ready: needs: - job: install-stackspin - job: enable-velero variables: RESOURCE: "velero" extends: - .app-kustomization-ready - .velero_rules wekan-kustomization-ready: needs: - job: install-stackspin - job: enable-wekan variables: RESOURCE: "wekan" extends: - .app-kustomization-ready - .wekan_rules wordpress-kustomization-ready: needs: - job: install-stackspin - job: enable-wordpress variables: RESOURCE: "wordpress" extends: - .app-kustomization-ready - .wordpress_rules zulip-kustomization-ready: needs: - job: install-stackspin - job: enable-zulip variables: RESOURCE: "zulip" extends: - .app-kustomization-ready - .zulip_rules # Stage: certs # ================ # # Test each app for proper certs .apps-cert: stage: certs script: - *debug_information - cd test/ - pytest -v -s -m 'certs' --resource="$RESOURCE" --reruns 120 --reruns-delay 10 interruptible: true nextcloud-cert: variables: RESOURCE: "nextcloud" needs: - job: configure-sslcom-issuer - job: install-stackspin extends: - .apps-cert - .nextcloud_rules kube-prometheus-stack-cert: variables: RESOURCE: "kube-prometheus-stack" needs: - job: configure-sslcom-issuer - job: install-stackspin extends: - .apps-cert - .monitoring_rules single-sign-on-cert: variables: RESOURCE: "single-sign-on" needs: - job: configure-sslcom-issuer - job: install-stackspin extends: - .apps-cert - .general_rules dashboard-cert: variables: RESOURCE: "dashboard" needs: - job: configure-sslcom-issuer - job: install-stackspin extends: - .apps-cert - .general_rules wekan-cert: variables: RESOURCE: "wekan" needs: - job: configure-sslcom-issuer - job: install-stackspin extends: - .apps-cert - .wekan_rules wordpress-cert: variables: RESOURCE: "wordpress" needs: - job: configure-sslcom-issuer - job: install-stackspin extends: - .apps-cert - .wordpress_rules zulip-cert: variables: RESOURCE: "zulip" needs: - job: configure-sslcom-issuer - job: install-stackspin extends: - .apps-cert - .zulip_rules # Stage: cluster-health # ===================== # # General cluster health checks testinfra: stage: cluster-health needs: - job: install-stackspin script: - *debug_information - cd ansible/ - pytest -v -s -m 'testinfra' --connection=ansible --ansible-inventory=../${CLUSTER_DIR}/inventory.yml --hosts='ansible://*' extends: - .ssh_setup - .general_rules interruptible: true prometheus-alerts: stage: cluster-health needs: - job: install-stackspin - job: kube-prometheus-stack-cert variables: # RESOURCE var is used in job specific rules (i.e. ..monitoring_rules) RESOURCE: "kube-prometheus-stack" script: - *debug_information - export BASIC_AUTH_PW=$(python3 -m stackspin $HOSTNAME secrets | grep stackspin-prometheus-basic-auth | awk '{ print $4 }') - cd test/ - bash ../.gitlab/ci_scripts/retry_cmd_until_success.sh 30 10 pytest -s -m prometheus extends: - .monitoring_rules interruptible: true # Stage: integration-test # ======================= # # Runs integration tests for most apps using taiko .taiko: stage: integration-test before_script: - *debug_information script: # Retry taiko tests 20 times until they succeed, # with a sleep interval of 10s in between tests - bash ./.gitlab/ci_scripts/retry_cmd_until_success.sh 20 10 unbuffer python3 -m stackspin $HOSTNAME test --apps $RESOURCE | ts -i | ts artifacts: paths: - test/taiko/Screenshot* expire_in: 1 month when: on_failure interruptible: true dashboard-taiko: variables: RESOURCE: "dashboard" needs: - job: install-stackspin - job: dashboard-cert - job: single-sign-on-cert - job: dashboard-kustomization-ready extends: - .taiko - .general_rules grafana-taiko: variables: RESOURCE: "grafana" needs: - job: install-stackspin - job: kube-prometheus-stack-cert - job: single-sign-on-cert - job: monitoring-kustomization-ready extends: - .taiko - .monitoring_rules nextcloud-taiko: variables: RESOURCE: "nextcloud" needs: - job: install-stackspin - job: nextcloud-cert - job: single-sign-on-cert - job: nextcloud-kustomization-ready extends: - .taiko - .nextcloud_rules wekan-taiko: variables: RESOURCE: "wekan" needs: - job: install-stackspin - job: wekan-cert - job: single-sign-on-cert - job: wekan-kustomization-ready extends: - .taiko - .wekan_rules wordpress-taiko: variables: RESOURCE: "wordpress" needs: - job: install-stackspin - job: wordpress-cert - job: single-sign-on-cert - job: wordpress-kustomization-ready extends: - .taiko - .wordpress_rules zulip-taiko: variables: RESOURCE: "zulip" needs: - job: install-stackspin - job: zulip-cert - job: single-sign-on-cert - job: zulip-kustomization-ready extends: - .taiko - .zulip_rules