From 0b8f614d5ca30f9132abd69e73c73ffc0d8006de Mon Sep 17 00:00:00 2001
From: Arie Peterson <arie@greenhost.nl>
Date: Wed, 15 Nov 2023 21:25:03 +0100
Subject: [PATCH] Telepresence WIP

---
 README.md                           | 195 +++++++++++++------------
 backend/config.py                   |  13 +-
 backend/helpers/kubernetes.py       |  16 +-
 dev.sh                              | 219 ++++++++++++++++++++++++++++
 docker-compose.yml                  |  63 --------
 frontend/package.json               |   2 +-
 frontend/src/services/api/config.ts |   2 +-
 telepresence-values.yaml            |   5 +
 8 files changed, 355 insertions(+), 160 deletions(-)
 create mode 100755 dev.sh
 create mode 100644 telepresence-values.yaml

diff --git a/README.md b/README.md
index 34a99ff6..5ec7a55c 100644
--- a/README.md
+++ b/README.md
@@ -52,98 +52,109 @@ it is based on traditional Bootstrap + JQuery.
 
 ## Development environment
 
-After this process is finished, the following will run in local docker containers:
-
-- the dashboard frontend
-- the dashboard backend
-
-The following will be available through proxies running in local docker containers and port-forwards:
-
-- Hydra admin API
-- Kratos admin API and public API
-- The MariaDB database
-
-These need to be available locally, because Kratos wants to run on the same
-domain as the front-end that serves the login interface.
-
-### Setup
-
-Before you start, make sure your machine has the required software installed, as per official documentation: https://docs.stackspin.net/en/v2/installation/install_cli.html#preparing-the-provisioning-machine.
-
-Please read through all subsections to set up your environment before
-attempting to run the dashboard locally.
-
-#### 1. Stackspin cluster
-
-To develop the Dashboard, you need a Stackspin cluster that is set up as a
-development environment. Follow the instructions [in the
-dashboard-dev-overrides
-repository](https://open.greenhost.net/stackspin/dashboard-dev-overrides#dashboard-dev-overrides)
-in order to set up a development-capable cluster. The Dashboard, as well as
-Kratos and Hydra, will be configured to point their endpoints to
-`http://stackspin_proxy:8081` in that cluster. As a result, you can run
-components using the `docker-compose.yml` file in this repository, and still log
-into Stackspin applications that run on the cluster.
-
-#### 2. Environment for frontend
-
-The frontend needs to know where the backend API and hydra can be reached. To
-configure it, create a `local.env` file in the `frontend` directory:
-
-    cp local.env.example local.env
-
-#### 3. Setup hosts file
-
-The application will run on `http://stackspin_proxy`. Add the following line to
-`/etc/hosts` to be able to access that from your browser:
-
-```
-127.0.0.1	stackspin_proxy
-```
-
-#### 4. Kubernetes access
-
-The `./run_app.sh` script needs to access the Kubernetes cluster that runs your Stackspin instance. If you followed the setup as above, you will have a YAML configuration file somewhere on your machine -- usually in the `clusters` directory of your Stackspin local repository -- called `kube_config_cluster.yml`. This file holds all the configuration information (URLs, domain names, certificate data) needed to connect to the instance.
-
-Copy that file into the `backend/kubeconfig` directory.
-
-If you wish to connect this dashboard to another Stackspin cluster, you can replace the `kube_config_cluster.yml` file with the one that's in that Stackspin's `clusters` directory.
-
-## 5. Build and run
-
-To recap, you now have:
-
-- All the software and configurations as described above
-- A running Stackspin cluster (a VPS somewhere in The Cloud)
-- A `kube_config_cluster.yml` file in the `backend/kubeconfig` that will tell the script how to connect to your Stackspin cluster of choice
-- Overrides for local dashboard development (by installing and running the [Dashboard Dev Overrides](https://open.greenhost.net/stackspin/dashboard-dev-overrides) repository, editing your `/etc/hosts` file, etc)
-- A copy of the [Stackspin Dashboard repository](https://open.greenhost.net/stackspin/dashboard) on your device.
-
-That's a lot of work! Good job.
-
-### Setup your local dev environment
-
-Before you actually run the main script, `cd` into the `/frontend` directory and run`yarn install`.
-
-This is not strictly necessary for development; the script already builds and installs all the necessary modules in the dashboard's docker container. But running `yarn install` locally will let your IDE enable all of its bells and whistles like linting, autocorrecting, intellisense etc. Without this step, your IDE will most probably complain it cannot find any modules to `import`, as there is no `node_modules` folder.
-
-### Let's Run this App
-
-After you've finished all setup steps, you can run everything using:
-
-```
-./run_app.sh
-```
-
-This script
-
-- sets a few environment variables based on the content in your cluster
-  secrets, and
-- runs `docker compose up` to build and run all necessary components, including a reverse proxy and the backend flask application.
-
-If you're curious about what `docker compose up` does, you can check out the `docker-compose.yml` file. If you are curious about what `docker compose up` _means,_ you can start here: https://github.com/docker/compose or even here: https://en.wikipedia.org/wiki/Infrastructure_as_code.
-
-This should be it, congratulations!! If you're having issues, or if something is not working properly, please open an issue or get in touch: info@stackspin.net
+The development environment is a hybrid one, where one or both of the dashboard
+frontend and backend run locally, but the rest of the cluster runs on a remote
+machine.
+
+The remote should be a regular Stackspin cluster, though preferably one that's
+dedicated to development purposes.
+
+The local dashboard frontend and/or backend can run in a docker container or
+directly ("native mode"). (At this time it's not possible to mix the two, for
+example by having the dashboard backend run directly and the frontend in a
+docker container.)
+
+The connection between the local and remote parts is set up by a tool called
+telepresence. If you want to develop the frontend for example, telepresence
+intercepts traffic that goes into the remote's frontend pod and redirects it to
+your copy that's running locally on your machine; responses from your local
+frontend are led back via the remote. This interception happens invisibly to
+your browser, which you just point at the remote cluster.
+
+### Prerequisites
+
+#### Set up telepresence on your local development machine
+
+You need to do this once for every development machine you're using
+(workstation, laptop).
+
+* You need root on your machine and at some point allow telepresence to perform
+  actions as root, in order to make network changes to allow the two-way
+  tunnel. If this is not possible or not desirable, you can try to run your
+  local dashboard in a docker container instead.
+* Set `user_allow_other` in `/etc/fuse.conf`. This is necessary when
+  telepresence adds (FUSE-based) sshfs mounts so your local code can access
+  volumes from the kubernetes cluster, in particular the one with the service
+  account token (credentials for calling the kubernetes api), to let the
+  dashboard interact with the cluster.
+* Download and install the telepresence binary on your development machine:
+  https://www.getambassador.io/docs/telepresence-oss/latest/install
+
+#### Access to development cluster
+
+You need `kubectl` and `helm` binaries, and a `kubectl` configuration file
+(often called "kubeconfig") containing credentials needed to authenticate
+against your cluster. If the `KUBECONFIG` environment variable is set and
+points to the config file, this will be picked up by the various programs.
+
+#### Set up telepresence on your development cluster
+
+You need to do this once for every cluster you want to use as a development cluster.
+
+* Install telepresence on your development cluster:
+  ```
+  telepresence helm install -f telepresence-values.yaml
+  ```
+
+#### Install local dependencies
+
+Before running the frontend in native mode:
+* Make sure you have nodejs installed. You may want to use [Node Version
+  Manager](https://github.com/nvm-sh/nvm) to make it easy to install several
+  version side by side.
+* Install necessary javascript dependencies (will be placed in
+  `frontend/node_modules`) using `./dev.sh frontend setup`.
+
+Before running the backend in native mode:
+* Make sure you have python3 installed.
+* Install necessary python dependencies (in a virtualenv in `backend/venv`)
+  using `./dev.sh backend setup`.
+
+### Run
+
+From the root `dashboard` directory, run for example `./dev.sh frontend`. This
+will set up the telepresence tunnel to the cluster, and start the dashboard
+frontend server in native mode. `./dev.sh backend` will do the same but for the
+backend. You can run both at the same time (in separate terminal windows) if
+you want to make changes to both frontend and backend.
+
+If you want to run the local dashboard in docker instead, use `./dev.sh
+frontend docker` and/or `./dev.sh backend docker`. Please note that due to a
+telepresence limitation it's not currently possible to run the frontend
+natively and the backend in docker at the same time, or vice versa.
+
+#### Known issues
+
+* Running the dashboard backend locally with telepresence in docker mode
+  currently doesn't work because of dns resolution issues in the docker
+  container: https://github.com/telepresenceio/telepresence/issues/1492 . We
+  could work around this by using a fully qualified domain name for the
+  database service -- which doesn't agree with the goal of making the stackspin
+  namespace variable -- or using the service env vars, but we're hoping that
+  telepresence with fix this in time.
+* Telepresence intercepts traffic to a pod, but the original pod is still
+  running. In case of the backend, this is sometimes problematic, for example
+  when you're adding database migrations which the original pod then doesn't
+  know about and crashes, or with SCIM which involves timer-based actions which
+  are then performed both by your modified local instance and by the original
+  remote one. There is some work in progress to allow scaling down the
+  intercepted pod: https://github.com/telepresenceio/telepresence/issues/1608 .
+* If telepresence is giving errors, in particular ones about "an intercept with
+  the same name already existing" on repeated runs, it may help to reset the
+  telepresence state by doing `./dev.sh reset`. This will stop the local
+  telepresence daemon so it can be cleanly restarted on the next try, and will
+  also restart the "traffic manager" on the remote so it will discard any old
+  lingering intercepts.
 
 ---
 
diff --git a/backend/config.py b/backend/config.py
index 7f9c276e..831f41ca 100644
--- a/backend/config.py
+++ b/backend/config.py
@@ -20,8 +20,19 @@ SQLALCHEMY_TRACK_MODIFICATIONS = False
 # Set this to "true" to load the config from a Kubernetes serviceaccount
 # running in a Kubernetes pod. Set it to "false" to load the config from the
 # `KUBECONFIG` environment variable.
-LOAD_INCLUSTER_CONFIG = os.environ.get("LOAD_INCLUSTER_CONFIG").lower() == "true"
+LOAD_INCLUSTER_CONFIG = os.environ.get("LOAD_INCLUSTER_CONFIG", "").lower() == "true"
 RUN_BY_GUNICORN = "gunicorn" in os.environ.get("SERVER_SOFTWARE", "")
+if os.environ.get("TELEPRESENCE_ROOT"):
+    TELEPRESENCE = True
+    TELEPRESENCE_MODE = os.environ.get("TELEPRESENCE_MODE")
+    print(f"TELEPRESENCE_MODE: {TELEPRESENCE_MODE}")
+    if TELEPRESENCE_MODE == "docker":
+        KUBECONFIG = os.environ["TELEPRESENCE_MOUNTS"]
+    else:
+        KUBECONFIG = os.environ.get("TELEPRESENCE_ROOT") + os.environ["TELEPRESENCE_MOUNTS"]
+else:
+    TELEPRESENCE = False
+    KUBECONFIG = None
 
 DEMO_INSTANCE = os.environ.get("DASHBOARD_DEMO_INSTANCE", "False").lower() in ('true', '1')
 ENFORCE_2FA = os.environ.get("DASHBOARD_ENFORCE_2FA", "False").lower() in ('true', '1')
diff --git a/backend/helpers/kubernetes.py b/backend/helpers/kubernetes.py
index 1d54caaa..cfdcbf7d 100644
--- a/backend/helpers/kubernetes.py
+++ b/backend/helpers/kubernetes.py
@@ -8,20 +8,32 @@ import string
 import jinja2
 import yaml
 from kubernetes import client, config
+from kubernetes.config.incluster_config import InClusterConfigLoader
 from kubernetes.client import api_client
 from kubernetes.client.exceptions import ApiException
 from kubernetes.utils import create_from_yaml
 from kubernetes.utils.create_from_yaml import FailToCreateError
 from flask import current_app
 
-from config import LOAD_INCLUSTER_CONFIG
+from config import KUBECONFIG, LOAD_INCLUSTER_CONFIG, TELEPRESENCE
 
 # Load the kube config once
 #
 # By default this loads whatever we define in the `KUBECONFIG` env variable,
 # otherwise loads the config from default locations, similar to what kubectl
 # does.
-if LOAD_INCLUSTER_CONFIG:
+if TELEPRESENCE:
+    print(f"token_filename: {KUBECONFIG}/token")
+    import os
+    if os.path.isfile(f"{KUBECONFIG}/token"):
+        print("token_filename exists")
+    else:
+        print("token_filename does not exist")
+    InClusterConfigLoader(
+        token_filename=f"{KUBECONFIG}/token",
+        cert_filename=f"{KUBECONFIG}/ca.crt"
+    ).load_and_set()
+elif LOAD_INCLUSTER_CONFIG:
     config.load_incluster_config()
 else:
     config.load_kube_config()
diff --git a/dev.sh b/dev.sh
new file mode 100755
index 00000000..f2cac835
--- /dev/null
+++ b/dev.sh
@@ -0,0 +1,219 @@
+#!/bin/bash
+
+# TODO:
+# * Check that KUBECONFIG is set, maybe load automatically like before?
+# * env var for native/docker mode
+# * env var for local (or remote) docker image name
+
+TELEPRESENCE_NOT_RUNNING=0
+TELEPRESENCE_NATIVE=1
+TELEPRESENCE_DOCKER=2
+
+telepresenceRunning() {
+  status=$(telepresence status | head -n 1)
+  if [ "${status#*: }" == "Running" ]
+  then
+    echo "telepresence running"
+    daemonType="${status%%:*}"
+    echo "daemon type: $daemonType"
+    if [ "${daemonType##* }" == "container" ]
+    then
+      echo "This seems to be a docker-mode daemon."
+      return $TELEPRESENCE_DOCKER
+    else
+      echo "This seems to be a native-mode daemon."
+      return $TELEPRESENCE_NATIVE
+    fi
+  elif [ "${status#*: }" == "Not running" ]
+  then
+    echo "telepresence not running"
+    return $TELEPRESENCE_NOT_RUNNING
+  else
+    echo "Unknown telepresence state: $status"
+    exit 2
+  fi
+}
+
+telepresenceDns() {
+  telepresence status | grep -i 'Remote IP' | awk -F': ' '{print $2}'
+}
+
+prepareDocker() {
+  telepresenceRunning
+  case $? in
+    $TELEPRESENCE_NOT_RUNNING)
+      echo "Starting telepresence daemon in docker mode."
+      telepresence connect -n stackspin --docker || exit 3
+      ;;
+    $TELEPRESENCE_NATIVE)
+      echo "Error: you want to start a docker-mode intercept, but there is a native telepresence daemon running. This is not currently supported."
+      exit 3
+      ;;
+    $TELEPRESENCE_DOCKER)
+      echo "telepresence daemon already running; connecting to it."
+      telepresence connect -n stackspin --docker || exit 3
+      ;;
+  esac
+}
+
+prepareNative() {
+  telepresenceRunning
+  case $? in
+    $TELEPRESENCE_NOT_RUNNING)
+      echo "Starting telepresence daemon in native mode."
+      telepresence connect -n stackspin || exit 3
+      ;;
+    $TELEPRESENCE_NATIVE)
+      echo "telepresence daemon already running; connecting to it."
+      telepresence connect -n stackspin || exit 3
+      ;;
+    $TELEPRESENCE_DOCKER)
+      echo "Error: you want to start a native-mode intercept, but there is a docker telepresence daemon running. This is not currently supported."
+      exit 3
+      ;;
+  esac
+}
+
+runBackend() {
+  echo "Running dashboard backend locally and connecting to cluster."
+  case $mode in
+    "native")
+      pushd backend > /dev/null
+      source venv/bin/activate
+      echo "Stopping any previous intercept for dashboard-backend..."
+      telepresence leave dashboard-backend
+      echo "Starting new intercept for dashboard-backend..."
+      telepresence intercept dashboard-backend --service=dashboard-backend --port 5000:80 --mount=true -- env TELEPRESENCE_MODE=native flask run --reload
+      deactivate
+      popd > /dev/null
+      ;;
+    "docker")
+      echo "Stopping any previous intercept for dashboard-backend..."
+      telepresence leave dashboard-backend
+      telepresence intercept dashboard-backend --service=dashboard-backend --port 5000:80 --mount=true --docker-run -- --dns $(telepresenceDns) -e TELEPRESENCE_MODE=docker dashboard-backend:test
+  esac
+}
+
+runFrontend() {
+  echo "Running dashboard frontend locally and connecting to cluster."
+  case $mode in
+    "native")
+      pushd frontend > /dev/null
+      echo "Stopping any previous intercept for dashboard-frontend..."
+      telepresence leave dashboard-frontend
+      telepresence intercept dashboard-frontend --service=dashboard --port 3000:80 --mount=true -- yarn start --watch --verbose
+      popd > /dev/null
+      ;;
+    "docker")
+      echo "Stopping any previous intercept for dashboard-frontend..."
+      telepresence leave dashboard-frontend
+      telepresence intercept dashboard-frontend --service=dashboard --port 3000:80 --mount=true --docker-run -- dashboard-frontend:test
+      ;;
+  esac
+}
+
+setupFrontend() {
+  pushd frontend > /dev/null
+  yarn install
+  popd > /dev/null
+}
+
+setupBackend() {
+  pushd backend > /dev/null
+  if ! [ -d venv ]
+  then
+    python3 -m venv venv
+  fi
+  source venv/bin/activate
+  pip install -r requirements.txt
+  deactivate
+  popd > /dev/null
+}
+
+setupCluster() {
+  telepresence helm install -f telepresence-values.yaml
+}
+
+cleanCluster() {
+  telepresence uninstall --all-agents
+  helm uninstall -n ambassador traffic-manager
+}
+
+if [ "$1" == "reset" ]
+then
+  telepresence quit -s
+  kubectl rollout restart -n ambassador deploy/traffic-manager
+  exit 0
+fi
+
+if [ $# -eq 1 ]
+then
+  mode="native"
+else
+  mode=$2
+fi
+
+if [ $# -eq 2 ] && [ "$1" == "cluster" ] && [ "$2" == "clean" ]
+then
+  echo "Removing all development modifications from remote cluster..."
+  cleanCluster
+  exit 0
+fi
+
+if [ $# -eq 2 ] && [ "$2" == "pause" ]
+then
+  case $1 in
+    "backend")
+      kubectl set image -n stackspin deploy/dashboard-backend backend=docker.io/rancher/mirrored-pause:3.6
+      exit 0
+      ;;
+    *)
+      echo "Unknown pause command: $1 $2"
+      exit 4
+  esac
+fi
+
+if [ $# -eq 2 ] && [ "$2" == "setup" ]
+then
+  case $1 in
+    "backend")
+      setupBackend
+      exit 0
+      ;;
+    "frontend")
+      setupFrontend
+      exit 0
+      ;;
+    "cluster")
+      setupCluster
+      exit 0
+      ;;
+    *)
+      echo "Unknown setup command: $1 $2"
+      exit 4
+  esac
+fi
+
+case $mode in
+  "docker")
+    prepareDocker
+    ;;
+  "native")
+    prepareNative
+    ;;
+  *)
+    echo "Unknown mode (should be docker or native): $mode"
+    exit 4
+esac
+
+case $1 in
+  "backend")
+    runBackend
+    ;;
+  "frontend")
+    runFrontend
+    ;;
+  *)
+    echo "Unknown command: $1"
+    exit 4
+esac
diff --git a/docker-compose.yml b/docker-compose.yml
index 7fde9fa1..f2f3437a 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -10,15 +10,6 @@ services:
     ports:
       - "3000:3000"
     command: "yarn start --watch --verbose"
-  stackspin_proxy:
-    image: nginx:1.25.3
-    ports:
-      - "8081:8081"
-    volumes:
-      - ./backend/proxy/default.conf:/etc/nginx/conf.d/default.conf
-    depends_on:
-      - kube_port_kratos_public
-      - flask_app
   flask_app:
     build:
       context: ./backend
@@ -56,57 +47,3 @@ services:
     depends_on:
       - kube_port_mysql
     entrypoint: ["bash", "-c", "flask run --host $$(hostname -i)"]
-  kube_port_kratos_admin:
-    image: bitnami/kubectl:1.28.3
-    user: "${KUBECTL_UID}:${KUBECTL_GID}"
-    expose:
-      - 8000
-    volumes:
-      - "$KUBECONFIG:/.kube/config"
-    entrypoint:
-      [
-        "bash",
-        "-c",
-        "kubectl -n stackspin port-forward --address $$(hostname -i) service/kratos-admin 8000:80",
-      ]
-  kube_port_hydra_admin:
-    image: bitnami/kubectl:1.28.3
-    user: "${KUBECTL_UID}:${KUBECTL_GID}"
-    expose:
-      - 4445
-    volumes:
-      - "$KUBECONFIG:/.kube/config"
-    entrypoint:
-      [
-        "bash",
-        "-c",
-        "kubectl -n stackspin port-forward --address $$(hostname -i) service/hydra-admin 4445:4445",
-      ]
-  kube_port_kratos_public:
-    image: bitnami/kubectl:1.28.3
-    user: "${KUBECTL_UID}:${KUBECTL_GID}"
-    ports:
-      - "8080:8080"
-    expose:
-      - 8080
-    volumes:
-      - "$KUBECONFIG:/.kube/config"
-    entrypoint:
-      [
-        "bash",
-        "-c",
-        "kubectl -n stackspin port-forward --address 0.0.0.0 service/kratos-public 8080:80",
-      ]
-  kube_port_mysql:
-    image: bitnami/kubectl:1.28.3
-    user: "${KUBECTL_UID}:${KUBECTL_GID}"
-    expose:
-      - 3306
-    volumes:
-      - "$KUBECONFIG:/.kube/config"
-    entrypoint:
-      [
-        "bash",
-        "-c",
-        "kubectl -n stackspin port-forward --address $$(hostname -i) service/single-sign-on-database-mariadb 3306:3306",
-      ]
diff --git a/frontend/package.json b/frontend/package.json
index a56e1775..7485b1ad 100644
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -42,7 +42,7 @@
     "redux-persist": "^6.0.0",
     "redux-thunk": "^2.3.0",
     "remark-gfm": "^3.0.1",
-    "tailwindcss": "npm:@tailwindcss/postcss7-compat",
+    "tailwindcss": "npm:@tailwindcss/postcss7-compat@^2.2.17",
     "typescript": "^4.1.2",
     "urlcat": "^2.0.4",
     "web-vitals": "^1.0.1",
diff --git a/frontend/src/services/api/config.ts b/frontend/src/services/api/config.ts
index d62f993a..f56f66fb 100644
--- a/frontend/src/services/api/config.ts
+++ b/frontend/src/services/api/config.ts
@@ -1,3 +1,3 @@
 export const api = {
-  hostname: process.env.REACT_APP_API_URL,
+  hostname: process.env.REACT_APP_API_URL || '/api/v1',
 };
diff --git a/telepresence-values.yaml b/telepresence-values.yaml
new file mode 100644
index 00000000..d792b050
--- /dev/null
+++ b/telepresence-values.yaml
@@ -0,0 +1,5 @@
+client:
+  routing:
+    allowConflictingSubnets:
+    - "10.42.0.0/16"
+    - "10.43.0.0/16"
-- 
GitLab