From a2b5e37c7c2fa1ede8fc91c9e5e9f16420e63e58 Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Mon, 6 Oct 2025 09:07:55 -0600 Subject: [PATCH 01/43] keycloak deploying. API endpoints not working --- .../modules/kubernetes/keycloak-helm/main.tf | 19 ++- .../kubernetes/keycloak-helm/values.yaml | 113 ++++++++++++------ 2 files changed, 92 insertions(+), 40 deletions(-) diff --git a/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/main.tf b/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/main.tf index 4b4e7527d8..ea737fa750 100644 --- a/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/main.tf +++ b/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/main.tf @@ -3,8 +3,8 @@ resource "helm_release" "keycloak" { namespace = var.namespace repository = "https://codecentric.github.io/helm-charts" - chart = "keycloak" - version = "15.0.2" + chart = "keycloakx" + version = "7.1.3" values = concat([ # https://github.com/codecentric/helm-charts/blob/keycloak-15.0.2/charts/keycloak/values.yaml @@ -43,6 +43,18 @@ resource "helm_release" "keycloak" { value = var.initial_root_password } + # Force replacement when values.yaml changes + lifecycle { + replace_triggered_by = [ + terraform_data.values_hash + ] + } + +} + +# Track changes to values.yaml +resource "terraform_data" "values_hash" { + input = filesha256("${path.module}/values.yaml") } @@ -62,8 +74,7 @@ resource "kubernetes_manifest" "keycloak-http" { match = "Host(`${var.external-url}`) && PathPrefix(`/auth`) " services = [ { - name = "keycloak-headless" - # Really not sure why 8080 works here + name = "keycloak-keycloakx-http" port = 80 namespace = var.namespace } diff --git a/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml b/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml index 6fbb39b602..753123f8ca 100644 --- a/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml +++ b/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml @@ -1,7 +1,7 @@ -# https://github.com/codecentric/helm-charts/blob/keycloak-15.0.2/charts/keycloak/values.yaml +# Updated for keycloakx chart (Quarkus-based Keycloak) +# https://github.com/codecentric/helm-charts/tree/master/charts/keycloakx + ingress: - # Helm chart (14.0 anyway) will only define Ingress records, not IngressRoute as required by Traefik, so - # we will need to define our own IngressRoute elsewhere. enabled: false image: @@ -10,50 +10,66 @@ image: imagePullSecrets: - name: "extcrcreds" -extraEnv: | - - name: PROXY_ADDRESS_FORWARDING - value: "true" +# Command to start Keycloak (required for Quarkus-based Keycloak) +command: + - "/opt/keycloak/bin/kc.sh" -startupScripts: - keycloak.cli: | - {{- .Files.Get "scripts/keycloak.cli" | nindent 2 }} +args: + - "start" - nebariadminuser.sh: | - /opt/jboss/keycloak/bin/add-user-keycloak.sh -r master -u root -p "{{ .Values.initial_root_password }}" - /opt/jboss/keycloak/bin/add-user-keycloak.sh -r master -u nebari-bot -p "{{ .Values.nebari_bot_password }}" +# HTTP configuration +http: + relativePath: "/auth" - mv-custom-themes.sh: | - #!/bin/sh - printf '=%.0s' {1..73} - echo "Start moving custom themes to /opt/jboss/keycloak/themes" +# Proxy configuration for Keycloak behind Traefik +proxy: + enabled: true + mode: edge - if [ -d /opt/data/custom-themes/themes ]; then - echo 'Copying custom themes from /opt/data/custom-themes/themes to /opt/jboss/keycloak/themes' - cp -r /opt/data/custom-themes/themes/* /opt/jboss/keycloak/themes/ - else - echo 'No custom themes found in /opt/data/custom-themes' - fi +# Cache configuration - use jdbc-ping for clustering via database +cache: + stack: jdbc-ping - echo "Finished moving custom themes" - printf '=%.0s' {1..73} +# Environment variables for Keycloak configuration +extraEnv: | + - name: KC_HOSTNAME + value: "tylertesting42.io" + - name: KC_HOSTNAME_PATH + value: "/auth" + - name: KC_HOSTNAME_STRICT + value: "false" + - name: KC_HOSTNAME_STRICT_HTTPS + value: "false" + - name: KC_HTTP_ENABLED + value: "true" + - name: KC_PROXY_HEADERS + value: "xforwarded" + - name: KEYCLOAK_ADMIN + value: "root" + - name: KEYCLOAK_ADMIN_PASSWORD + value: "{{ .Values.initial_root_password }}" + - name: KC_HEALTH_ENABLED + value: "true" + - name: KC_METRICS_ENABLED + value: "true" extraInitContainers: | - command: - sh - -c - | - if [ ! -f /data/keycloak-metrics-spi-2.5.3.jar ]; then - wget https://github.com/aerogear/keycloak-metrics-spi/releases/download/2.5.3/keycloak-metrics-spi-2.5.3.jar -P /data/ && - export SHA256SUM=9b3f52f842a66dadf5ff3cc3a729b8e49042d32f84510a5d73d41a2e39f29a96 && - if ! (echo "$SHA256SUM /data/keycloak-metrics-spi-2.5.3.jar" | sha256sum -c) + if [ ! -f /providers/keycloak-metrics-spi-7.0.0.jar ]; then + wget https://github.com/aerogear/keycloak-metrics-spi/releases/download/7.0.0/keycloak-metrics-spi-7.0.0.jar -P /providers/ && + export SHA256SUM=e7ec72ab1699e57a25b61cb5e3ef1c532ec9858ed6931c1b491d3368f5d007b8 && + if ! (echo "$SHA256SUM /providers/keycloak-metrics-spi-7.0.0.jar" | sha256sum -c) then echo "Error: Checksum not verified" && exit 1 else - chown 1000:1000 /data/keycloak-metrics-spi-2.5.3.jar && - chmod 777 /data/keycloak-metrics-spi-2.5.3.jar + chown 1000:1000 /providers/keycloak-metrics-spi-7.0.0.jar && + chmod 644 /providers/keycloak-metrics-spi-7.0.0.jar fi else - echo "File /data/keycloak-metrics-spi-2.5.3.jar already exists. Skipping download." + echo "File /providers/keycloak-metrics-spi-7.0.0.jar already exists. Skipping download." fi image: busybox:1.36 name: initialize-spi-metrics-jar @@ -61,7 +77,7 @@ extraInitContainers: | runAsUser: 0 volumeMounts: - name: metrics-plugin - mountPath: /data + mountPath: /providers {{- if .Values.customThemes.enabled }} - env: - name: GIT_SYNC_REPO @@ -78,7 +94,7 @@ extraInitContainers: | value: themes - name: GIT_SYNC_SSH value: "false" - image: k8s.gcr.io/git-sync:v3.1.5 + image: registry.k8s.io/git-sync/git-sync:v4.3.0 imagePullPolicy: IfNotPresent name: keycloak-git-sync resources: {} @@ -90,16 +106,41 @@ extraInitContainers: | volumeMounts: - mountPath: /opt/data/custom-themes name: custom-themes + - command: + - sh + - -c + - | + if [ -d /opt/data/custom-themes/themes ]; then + echo 'Copying custom themes from /opt/data/custom-themes/themes to /themes' + cp -r /opt/data/custom-themes/themes/* /themes/ + else + echo 'No custom themes found in /opt/data/custom-themes' + fi + image: busybox:1.36 + name: move-custom-themes + securityContext: + runAsUser: 0 + volumeMounts: + - mountPath: /opt/data/custom-themes + name: custom-themes + - mountPath: /themes + name: theme-data {{- end }} extraVolumeMounts: | - name: metrics-plugin - mountPath: /opt/jboss/keycloak/providers/ - - mountPath: /opt/data/custom-themes - name: custom-themes + mountPath: /opt/keycloak/providers/ + {{- if .Values.customThemes.enabled }} + - mountPath: /opt/keycloak/themes + name: theme-data + {{- end }} extraVolumes: | - name: metrics-plugin emptyDir: {} + {{- if .Values.customThemes.enabled }} - name: custom-themes emptyDir: {} + - name: theme-data + emptyDir: {} + {{- end }} From 8c0ca6c2a85399401035a59df65a05ea05d5a7c8 Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Mon, 6 Oct 2025 12:20:13 -0600 Subject: [PATCH 02/43] startup script was deprecated. Add nebari-bot in python --- .../stages/kubernetes_keycloak/__init__.py | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/src/_nebari/stages/kubernetes_keycloak/__init__.py b/src/_nebari/stages/kubernetes_keycloak/__init__.py index 0224f47a09..19dabf7b2a 100644 --- a/src/_nebari/stages/kubernetes_keycloak/__init__.py +++ b/src/_nebari/stages/kubernetes_keycloak/__init__.py @@ -315,6 +315,49 @@ def _attempt_keycloak_connection( print("Keycloak service successfully started") + def post_deploy( + self, stage_outputs: Dict[str, Dict[str, Any]], disable_prompt: bool = False + ): + """Create nebari-bot user after Keycloak is deployed.""" + from keycloak import KeycloakAdmin + from keycloak.exceptions import KeycloakError + + keycloak_url = f"{stage_outputs['stages/' + self.name]['keycloak_credentials']['value']['url']}/auth/" + nebari_bot_password = stage_outputs["stages/" + self.name]["keycloak_nebari_bot_password"]["value"] + + print("Creating nebari-bot user in Keycloak master realm...") + + try: + # Connect as root user + admin = KeycloakAdmin( + keycloak_url, + username="root", + password=self.config.security.keycloak.initial_root_password, + realm_name="master", + client_id="admin-cli", + verify=False, + ) + + # Check if nebari-bot already exists + users = admin.get_users({"username": "nebari-bot"}) + + if users: + print("nebari-bot user already exists, skipping creation") + else: + # Create nebari-bot user + admin.create_user({ + "username": "nebari-bot", + "enabled": True, + "credentials": [{ + "type": "password", + "value": nebari_bot_password, + "temporary": False + }] + }) + print("Successfully created nebari-bot user") + except KeycloakError as e: + print(f"Warning: Failed to create nebari-bot user: {e}") + @contextlib.contextmanager def deploy( self, stage_outputs: Dict[str, Dict[str, Any]], disable_prompt: bool = False From 29348f69519ec737ba9d905d725ab3e3335bf747 Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Mon, 6 Oct 2025 12:41:44 -0600 Subject: [PATCH 03/43] nebari deploying fully now. Auth not working for apps --- .../stages/kubernetes_keycloak/__init__.py | 22 ++++++++++++++++--- .../template/main.tf | 5 +++++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/src/_nebari/stages/kubernetes_keycloak/__init__.py b/src/_nebari/stages/kubernetes_keycloak/__init__.py index 19dabf7b2a..87c32f5116 100644 --- a/src/_nebari/stages/kubernetes_keycloak/__init__.py +++ b/src/_nebari/stages/kubernetes_keycloak/__init__.py @@ -342,10 +342,11 @@ def post_deploy( users = admin.get_users({"username": "nebari-bot"}) if users: - print("nebari-bot user already exists, skipping creation") + print("nebari-bot user already exists") + user_id = users[0]["id"] else: # Create nebari-bot user - admin.create_user({ + user_id = admin.create_user({ "username": "nebari-bot", "enabled": True, "credentials": [{ @@ -355,8 +356,23 @@ def post_deploy( }] }) print("Successfully created nebari-bot user") + + # Assign admin role to nebari-bot user + # Get the admin role from master realm + admin_role = admin.get_realm_role("admin") + + # Check if user already has the admin role + user_roles = admin.get_realm_roles_of_user(user_id) + has_admin_role = any(role.get("name") == "admin" for role in user_roles) + + if not has_admin_role: + admin.assign_realm_roles(user_id, [admin_role]) + print("Assigned admin role to nebari-bot user") + else: + print("nebari-bot user already has admin role") + except KeycloakError as e: - print(f"Warning: Failed to create nebari-bot user: {e}") + print(f"Warning: Failed to configure nebari-bot user: {e}") @contextlib.contextmanager def deploy( diff --git a/src/_nebari/stages/kubernetes_keycloak_configuration/template/main.tf b/src/_nebari/stages/kubernetes_keycloak_configuration/template/main.tf index 95dba71810..8292b322c4 100644 --- a/src/_nebari/stages/kubernetes_keycloak_configuration/template/main.tf +++ b/src/_nebari/stages/kubernetes_keycloak_configuration/template/main.tf @@ -87,6 +87,11 @@ resource "keycloak_default_groups" "default" { for g in var.default_groups : keycloak_group.groups[g].id ] + + depends_on = [ + keycloak_realm.main, + keycloak_group.groups + ] } data "keycloak_realm" "master" { From ba572933f7fedbb43812e5ef9c90b2060c5eb949 Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Tue, 7 Oct 2025 14:13:42 -0600 Subject: [PATCH 04/43] add openid scope to fix jhub keycloak --- .../template/modules/kubernetes/services/jupyterhub/main.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/main.tf b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/main.tf index b0a8da8ee3..2ea88d93ab 100644 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/main.tf +++ b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/main.tf @@ -162,6 +162,7 @@ resource "helm_release" "jupyterhub" { token_url = module.jupyterhub-openid-client.config.token_url userdata_url = module.jupyterhub-openid-client.config.userinfo_url realm_api_url = module.jupyterhub-openid-client.config.realm_api_url + scope = ["openid", "profile", "email"] login_service = "Keycloak" username_claim = "preferred_username" claim_groups_key = "groups" From e2f142db741c0ddbf5b1d14deb208fb919e122fe Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Tue, 7 Oct 2025 14:21:44 -0600 Subject: [PATCH 05/43] add openid to fix grafana --- .../template/modules/kubernetes/services/monitoring/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/monitoring/main.tf b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/monitoring/main.tf index 3933b1f009..43ac28463b 100644 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/monitoring/main.tf +++ b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/monitoring/main.tf @@ -251,7 +251,7 @@ resource "helm_release" "prometheus-grafana" { allow_sign_up = "true" client_id = "$__env{grafana-oauth-client-id}" client_secret = "$__env{grafana-oauth-client-secret}" - scopes = "profile" + scopes = "openid profile email" auth_url = module.grafana-client-id.config.authentication_url token_url = module.grafana-client-id.config.token_url api_url = module.grafana-client-id.config.userinfo_url From 60976bdaaebc108f0f12cf985ecc243d234fc522 Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Tue, 7 Oct 2025 14:37:16 -0600 Subject: [PATCH 06/43] updates for conda-store to access keycloak --- .../services/conda-store/config/conda_store_config.py | 2 +- .../modules/kubernetes/services/conda-store/server.tf | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/conda-store/config/conda_store_config.py b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/conda-store/config/conda_store_config.py index 3136d891bd..5fb3679c3c 100644 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/conda-store/config/conda_store_config.py +++ b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/conda-store/config/conda_store_config.py @@ -88,7 +88,7 @@ def conda_store_config(path="/var/lib/conda-store/config.json"): ) c.GenericOAuthAuthentication.client_id = config["openid-config"]["client_id"] c.GenericOAuthAuthentication.client_secret = config["openid-config"]["client_secret"] -c.GenericOAuthAuthentication.access_scope = "profile" +c.GenericOAuthAuthentication.access_scope = "openid profile email" c.GenericOAuthAuthentication.user_data_key = "preferred_username" c.GenericOAuthAuthentication.tls_verify = False diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/conda-store/server.tf b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/conda-store/server.tf index 8a29bc2d41..2a8d0658e8 100644 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/conda-store/server.tf +++ b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/conda-store/server.tf @@ -26,8 +26,8 @@ resource "kubernetes_secret" "conda-store-secret" { extra-settings = var.extra-settings extra-config = var.extra-config default-namespace = var.default-namespace-name - token_url_internal = "http://keycloak-http.${var.namespace}.svc/auth/realms/${var.realm_id}/protocol/openid-connect/token" - realm_api_url_internal = "http://keycloak-http.${var.namespace}.svc/auth/admin/realms/${var.realm_id}" + token_url_internal = "http://keycloak-keycloakx-http.${var.namespace}.svc/auth/realms/${var.realm_id}/protocol/openid-connect/token" + realm_api_url_internal = "http://keycloak-keycloakx-http.${var.namespace}.svc/auth/admin/realms/${var.realm_id}" service-tokens = { for service, value in var.services : base64encode(random_password.conda_store_service_token[service].result) => value } From 954a0fd938efcd5353453d4ce652a2f7515503d6 Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Thu, 9 Oct 2025 10:18:41 -0600 Subject: [PATCH 07/43] add retry mechanism for nebari bot --- .../stages/kubernetes_keycloak/__init__.py | 99 +++++++++++-------- 1 file changed, 56 insertions(+), 43 deletions(-) diff --git a/src/_nebari/stages/kubernetes_keycloak/__init__.py b/src/_nebari/stages/kubernetes_keycloak/__init__.py index 87c32f5116..b8327df7e9 100644 --- a/src/_nebari/stages/kubernetes_keycloak/__init__.py +++ b/src/_nebari/stages/kubernetes_keycloak/__init__.py @@ -327,52 +327,65 @@ def post_deploy( print("Creating nebari-bot user in Keycloak master realm...") - try: - # Connect as root user - admin = KeycloakAdmin( - keycloak_url, - username="root", - password=self.config.security.keycloak.initial_root_password, - realm_name="master", - client_id="admin-cli", - verify=False, - ) + max_attempts = 10 + retry_delay = 5 # seconds + + for attempt in range(1, max_attempts + 1): + try: + # Connect as root user + admin = KeycloakAdmin( + keycloak_url, + username="root", + password=self.config.security.keycloak.initial_root_password, + realm_name="master", + client_id="admin-cli", + verify=False, + ) - # Check if nebari-bot already exists - users = admin.get_users({"username": "nebari-bot"}) + # Check if nebari-bot already exists + users = admin.get_users({"username": "nebari-bot"}) - if users: - print("nebari-bot user already exists") - user_id = users[0]["id"] - else: - # Create nebari-bot user - user_id = admin.create_user({ - "username": "nebari-bot", - "enabled": True, - "credentials": [{ - "type": "password", - "value": nebari_bot_password, - "temporary": False - }] - }) - print("Successfully created nebari-bot user") - - # Assign admin role to nebari-bot user - # Get the admin role from master realm - admin_role = admin.get_realm_role("admin") - - # Check if user already has the admin role - user_roles = admin.get_realm_roles_of_user(user_id) - has_admin_role = any(role.get("name") == "admin" for role in user_roles) - - if not has_admin_role: - admin.assign_realm_roles(user_id, [admin_role]) - print("Assigned admin role to nebari-bot user") - else: - print("nebari-bot user already has admin role") + if users: + print("nebari-bot user already exists") + user_id = users[0]["id"] + else: + # Create nebari-bot user + user_id = admin.create_user({ + "username": "nebari-bot", + "enabled": True, + "credentials": [{ + "type": "password", + "value": nebari_bot_password, + "temporary": False + }] + }) + print("Successfully created nebari-bot user") + + # Assign admin role to nebari-bot user + # Get the admin role from master realm + admin_role = admin.get_realm_role("admin") + + # Check if user already has the admin role + user_roles = admin.get_realm_roles_of_user(user_id) + has_admin_role = any(role.get("name") == "admin" for role in user_roles) + + if not has_admin_role: + admin.assign_realm_roles(user_id, [admin_role]) + print("Assigned admin role to nebari-bot user") + else: + print("nebari-bot user already has admin role") - except KeycloakError as e: - print(f"Warning: Failed to configure nebari-bot user: {e}") + # Success - break out of retry loop + break + + except KeycloakError as e: + if attempt < max_attempts: + print(f"Attempt {attempt}/{max_attempts} failed: {e}") + print(f"Retrying in {retry_delay} seconds...") + time.sleep(retry_delay) + else: + print(f"Failed to configure nebari-bot user after {max_attempts} attempts: {e}") + sys.exit(1) @contextlib.contextmanager def deploy( From 7e74f5813df57a4ca3162741af2a4eec5c71cfda Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Thu, 9 Oct 2025 10:53:39 -0600 Subject: [PATCH 08/43] remove force replacement that was used for dev --- .../template/modules/kubernetes/keycloak-helm/main.tf | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/main.tf b/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/main.tf index ea737fa750..6d20679a7e 100644 --- a/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/main.tf +++ b/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/main.tf @@ -42,14 +42,6 @@ resource "helm_release" "keycloak" { name = "initial_root_password" value = var.initial_root_password } - - # Force replacement when values.yaml changes - lifecycle { - replace_triggered_by = [ - terraform_data.values_hash - ] - } - } # Track changes to values.yaml From 3c4e90d2e50351910c2ead09d0e445cdd43df9e3 Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Thu, 9 Oct 2025 18:51:20 -0600 Subject: [PATCH 09/43] add database configuration into values for keycloak --- .../modules/kubernetes/keycloak-helm/values.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml b/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml index 753123f8ca..e9482d1726 100644 --- a/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml +++ b/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml @@ -10,6 +10,21 @@ image: imagePullSecrets: - name: "extcrcreds" +# Database configuration - connect to PostgreSQL subchart +# These settings match the default credentials from the old keycloak chart +# to ensure smooth migration without losing users +database: + vendor: postgres + hostname: keycloak-postgresql + port: 5432 + database: keycloak + username: keycloak + password: keycloak + +# Enable database readiness check +dbchecker: + enabled: true + # Command to start Keycloak (required for Quarkus-based Keycloak) command: - "/opt/keycloak/bin/kc.sh" From cede5492587954f1be7a0bdabe0a35a2611b1ddd Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Sat, 11 Oct 2025 15:15:42 -0600 Subject: [PATCH 10/43] add standalone postgres because new keycloak doesn't bundle --- .../modules/kubernetes/keycloak-helm/main.tf | 30 +++++++++++++++++++ .../kubernetes/keycloak-helm/values.yaml | 7 ++--- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/main.tf b/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/main.tf index 6d20679a7e..5ef86ff3ad 100644 --- a/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/main.tf +++ b/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/main.tf @@ -1,3 +1,33 @@ +# Standalone PostgreSQL database for Keycloak +# Deployed separately to allow safe upgrade from keycloak to keycloakx chart +resource "helm_release" "keycloak_postgresql" { + name = "keycloak-postgres-standalone" + namespace = var.namespace + repository = "https://raw.githubusercontent.com/bitnami/charts/archive-full-index/bitnami" + chart = "postgresql" + version = "10.16.2" + + values = [ + jsonencode({ + image = { + registry = "docker.io" + repository = "bitnamilegacy/postgresql" + tag = "11.14.0" + } + primary = { + nodeSelector = { + "${var.node_group.key}" = var.node_group.value + } + } + auth = { + username = "keycloak" + password = "keycloak" + database = "keycloak" + } + }) + ] +} + resource "helm_release" "keycloak" { name = "keycloak" namespace = var.namespace diff --git a/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml b/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml index e9482d1726..9837e87018 100644 --- a/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml +++ b/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml @@ -10,12 +10,11 @@ image: imagePullSecrets: - name: "extcrcreds" -# Database configuration - connect to PostgreSQL subchart -# These settings match the default credentials from the old keycloak chart -# to ensure smooth migration without losing users +# Database configuration - connect to standalone PostgreSQL +# Points to the standalone PostgreSQL we migrated data to database: vendor: postgres - hostname: keycloak-postgresql + hostname: keycloak-postgres-standalone-postgresql port: 5432 database: keycloak username: keycloak From 04b709418fd0fd9cfcb49fce16d6da7b8c8f4674 Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Sat, 11 Oct 2025 15:42:26 -0600 Subject: [PATCH 11/43] Release 2025.10.1: Major Keycloak upgrade with PostgreSQL migration - Upgrade from keycloak chart (15.0.2) to keycloakx chart (7.1.3) - Move PostgreSQL from subchart to standalone deployment - Add Upgrade_2025_10_1 step with database backup prompt - Add UPGRADE_STEPS.md with detailed migration guide - Update RELEASE.md with breaking changes and upgrade instructions --- RELEASE.md | 49 ++++++ UPGRADE_STEPS.md | 350 +++++++++++++++++++++++++++++++++++++++++ src/_nebari/upgrade.py | 195 +++++++++++++++++++++++ 3 files changed, 594 insertions(+) create mode 100644 UPGRADE_STEPS.md diff --git a/RELEASE.md b/RELEASE.md index f86129d771..2da1d80e2c 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -9,6 +9,55 @@ This file is copied to nebari-dev/nebari-docs using a GitHub Action. --> --- +## Release 2025.10.1 - October 11, 2025 + +### Breaking Changes + +⚠️ **CRITICAL UPGRADE** - This release includes a major Keycloak upgrade that requires manual intervention. + +### What's Changed + +#### Keycloak Upgrade (15.0.2 → keycloakx 7.1.3) + +This release upgrades Keycloak from the `keycloak` chart (15.0.2, JBoss/WildFly) to the `keycloakx` chart (7.1.3, Quarkus). This is a major architectural change with the following impacts: + +**Key Changes:** +- PostgreSQL is now deployed as a standalone Helm release instead of a subchart +- Keycloak service name changes from `keycloak-headless` to `keycloak-keycloakx-http` +- OAuth clients now require the `openid` scope explicitly +- Startup scripts replaced with Python `post_deploy` hooks for user creation + +**Required Actions:** + +1. **Before upgrading**, you MUST backup your Keycloak PostgreSQL database: + ```bash + kubectl exec -n dev keycloak-postgresql-0 -- env PGPASSWORD=keycloak pg_dump -U keycloak -d keycloak > keycloak-backup.sql + ``` + +2. Run the upgrade command: + ```bash + nebari upgrade -c nebari-config.yaml + ``` + The upgrade step will prompt you to backup the database and can optionally perform the backup automatically if you have kubectl access. + +3. After upgrade, render and deploy: + ```bash + nebari render -c nebari-config.yaml + nebari deploy -c nebari-config.yaml + ``` + +**For detailed upgrade instructions, see `UPGRADE_STEPS.md` in the repository.** + +**Affected Services:** +- JupyterHub OAuth configuration updated to include `openid` scope +- Grafana OAuth configuration updated to include `openid` scope +- conda-store OAuth configuration updated to include `openid` scope +- conda-store internal service URLs updated for new Keycloak service name + +**Full Changelog**: https://github.com/nebari-dev/nebari/compare/2025.6.1...2025.10.1 + +--- + ## Release 2025.6.1 - June 06, 2025 ### What's Changed diff --git a/UPGRADE_STEPS.md b/UPGRADE_STEPS.md new file mode 100644 index 0000000000..105729a0eb --- /dev/null +++ b/UPGRADE_STEPS.md @@ -0,0 +1,350 @@ +# Upgrading Keycloak from 15.0.2 (JBoss) to keycloakx 7.1.3 (Quarkus) in Nebari + +## Overview + +This guide covers upgrading from the `keycloak` chart (15.0.2) to the `keycloakx` chart (7.1.3) while preserving all users, realms, and configuration. + +## Key Challenge + +The old `keycloak` chart includes PostgreSQL as a subchart dependency, but the new `keycloakx` chart does not. To safely upgrade without losing data, we need to: +1. Extract PostgreSQL to a standalone deployment +2. Migrate the data +3. Upgrade to keycloakx pointing to the standalone database + +## Prerequisites + +- Existing Nebari deployment with Keycloak 15.0.2 +- kubectl access to the cluster +- Terraform/OpenTofu installed + +## Step 1: Deploy Standalone PostgreSQL + +Add a standalone PostgreSQL Helm release in `kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/main.tf`: + +```terraform +# Standalone PostgreSQL database for Keycloak +# Deployed separately to allow safe upgrade from keycloak to keycloakx chart +resource "helm_release" "keycloak_postgresql" { + name = "keycloak-postgres-standalone" + namespace = var.namespace + repository = "https://raw.githubusercontent.com/bitnami/charts/archive-full-index/bitnami" + chart = "postgresql" + version = "10.16.2" + + values = [ + jsonencode({ + image = { + registry = "docker.io" + repository = "bitnamilegacy/postgresql" + tag = "11.14.0" + } + primary = { + nodeSelector = { + "${var.node_group.key}" = var.node_group.value + } + } + auth = { + username = "keycloak" + password = "keycloak" + database = "keycloak" + } + }) + ] +} +``` + +Deploy it: +```bash +cd nebari-local/stages/05-kubernetes-keycloak +tofu apply +``` + +Verify both PostgreSQL instances are running: +```bash +kubectl get pods -n dev | grep postgres +``` + +Expected output: +- `keycloak-postgresql-0` (old - from keycloak subchart) +- `keycloak-postgres-standalone-postgresql-0` (new - standalone) + +## Step 2: Backup and Migrate Database + +Backup from old PostgreSQL: +```bash +kubectl exec -n dev keycloak-postgresql-0 -- env PGPASSWORD=keycloak pg_dump -U keycloak -d keycloak > keycloak-backup.sql +``` + +Get the postgres superuser password for the new database: +```bash +kubectl get secret -n dev keycloak-postgres-standalone-postgresql -o jsonpath='{.data.postgres-password}' | base64 -d +echo +``` + +Create database and user in new PostgreSQL: +```bash +# Replace with the actual password from above +kubectl exec -i -n dev keycloak-postgres-standalone-postgresql-0 -- env PGPASSWORD= psql -U postgres -c "CREATE DATABASE keycloak;" + +kubectl exec -i -n dev keycloak-postgres-standalone-postgresql-0 -- env PGPASSWORD= psql -U postgres -c "CREATE USER keycloak WITH PASSWORD 'keycloak';" + +kubectl exec -i -n dev keycloak-postgres-standalone-postgresql-0 -- env PGPASSWORD= psql -U postgres -c "GRANT ALL PRIVILEGES ON DATABASE keycloak TO keycloak;" +``` + +Restore the backup: +```bash +cat keycloak-backup.sql | kubectl exec -i -n dev keycloak-postgres-standalone-postgresql-0 -- env PGPASSWORD= psql -U postgres -d keycloak +``` + +Verify migration: +```bash +kubectl exec -n dev keycloak-postgres-standalone-postgresql-0 -- env PGPASSWORD= psql -U postgres -d keycloak -c "SELECT count(*) FROM user_entity;" +``` + +## Step 3: Update values.yaml for keycloakx + +Update `kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml`: + +```yaml +# Database configuration - connect to standalone PostgreSQL +database: + vendor: postgres + hostname: keycloak-postgres-standalone-postgresql + port: 5432 + database: keycloak + username: keycloak + password: keycloak + +# Enable database readiness check +dbchecker: + enabled: true +``` + +## Step 4: Upgrade to keycloakx Chart + +Update `kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/main.tf`: + +```terraform +resource "helm_release" "keycloak" { + name = "keycloak" + namespace = var.namespace + + repository = "https://codecentric.github.io/helm-charts" + chart = "keycloakx" # Changed from "keycloak" + version = "7.1.3" # Changed from "15.0.2" + + # ... rest of configuration +} +``` + +Update the IngressRoute service name: +```terraform +resource "kubernetes_manifest" "keycloak-http" { + manifest = { + # ... + spec = { + # ... + routes = [ + { + # ... + services = [ + { + name = "keycloak-keycloakx-http" # Changed from "keycloak-headless" + port = 80 + namespace = var.namespace + } + ] + } + ] + } + } +} +``` + +## Step 5: Fix OAuth Scopes for Keycloak 20+ + +Keycloak 20+ requires the `openid` scope to be explicitly requested. Update the following files: + +### JupyterHub +`kubernetes_services/template/modules/kubernetes/services/jupyterhub/main.tf`: +```terraform +KeyCloakOAuthenticator = { + # ... + scope = ["openid", "profile", "email"] # Add this line + # ... +} +``` + +### Grafana +`kubernetes_services/template/modules/kubernetes/services/monitoring/main.tf`: +```terraform +"auth.generic_oauth" = { + # ... + scopes = "openid profile email" # Changed from "profile" + # ... +} +``` + +### conda-store +`kubernetes_services/template/modules/kubernetes/services/conda-store/config/conda_store_config.py`: +```python +c.GenericOAuthAuthentication.access_scope = "openid profile email" # Changed from "profile" +``` + +### conda-store internal service URLs +`kubernetes_services/template/modules/kubernetes/services/conda-store/server.tf`: +```terraform +token_url_internal = "http://keycloak-keycloakx-http.${var.namespace}.svc/auth/realms/${var.realm_id}/protocol/openid-connect/token" +realm_api_url_internal = "http://keycloak-keycloakx-http.${var.namespace}.svc/auth/admin/realms/${var.realm_id}" +``` + +## Step 6: Add nebari-bot User Creation + +Since the keycloakx chart doesn't support startup scripts, create `nebari-bot` user via Python in the `post_deploy` hook. + +Update `kubernetes_keycloak/__init__.py`: + +```python +def post_deploy( + self, stage_outputs: Dict[str, Dict[str, Any]], disable_prompt: bool = False +): + """Create nebari-bot user after Keycloak is deployed.""" + from keycloak import KeycloakAdmin + from keycloak.exceptions import KeycloakError + + keycloak_url = f"{stage_outputs['stages/' + self.name]['keycloak_credentials']['value']['url']}/auth/" + nebari_bot_password = stage_outputs["stages/" + self.name]["keycloak_nebari_bot_password"]["value"] + + print("Creating nebari-bot user in Keycloak master realm...") + + max_attempts = 10 + retry_delay = 5 # seconds + + for attempt in range(1, max_attempts + 1): + try: + # Connect as root user + admin = KeycloakAdmin( + keycloak_url, + username="root", + password=self.config.security.keycloak.initial_root_password, + realm_name="master", + client_id="admin-cli", + verify=False, + ) + + # Check if nebari-bot already exists + users = admin.get_users({"username": "nebari-bot"}) + + if users: + print("nebari-bot user already exists") + user_id = users[0]["id"] + else: + # Create nebari-bot user + user_id = admin.create_user({ + "username": "nebari-bot", + "enabled": True, + "credentials": [{ + "type": "password", + "value": nebari_bot_password, + "temporary": False + }] + }) + print("Successfully created nebari-bot user") + + # Assign admin role to nebari-bot user + admin_role = admin.get_realm_role("admin") + user_roles = admin.get_realm_roles_of_user(user_id) + has_admin_role = any(role.get("name") == "admin" for role in user_roles) + + if not has_admin_role: + admin.assign_realm_roles(user_id, [admin_role]) + print("Assigned admin role to nebari-bot user") + else: + print("nebari-bot user already has admin role") + + # Success - break out of retry loop + break + + except KeycloakError as e: + if attempt < max_attempts: + print(f"Attempt {attempt}/{max_attempts} failed: {e}") + print(f"Retrying in {retry_delay} seconds...") + time.sleep(retry_delay) + else: + print(f"Failed to configure nebari-bot user after {max_attempts} attempts: {e}") + sys.exit(1) +``` + +## Step 7: Deploy the Upgrade + +```bash +# Render updated configuration +nebari render -c nebari-config.yaml -o nebari-local + +# Apply Keycloak upgrade +cd nebari-local/stages/05-kubernetes-keycloak +tofu apply + +# Apply Keycloak configuration +cd ../06-kubernetes-keycloak-configuration +tofu apply + +# Apply services (JupyterHub, Grafana, conda-store) +cd ../07-kubernetes-services +tofu apply +``` + +## Step 8: Verify the Upgrade + +Check Keycloak is running: +```bash +kubectl get pods -n dev | grep keycloak +``` + +Verify users were preserved: +```bash +kubectl exec -n dev keycloak-postgres-standalone-postgresql-0 -- env PGPASSWORD= psql -U postgres -d keycloak -c "SELECT username FROM user_entity;" +``` + +Test authentication: +- Access JupyterHub: `https:///hub` +- Access Grafana: `https:///monitoring` +- Access conda-store: `https:///conda-store` + +## Rollback (if needed) + +If the upgrade fails, rollback using Helm: + +```bash +# Check current revision +helm history keycloak -n dev + +# Rollback to previous revision +helm rollback keycloak -n dev +``` + +## Key Differences: keycloak vs keycloakx + +| Feature | keycloak 15.0.2 | keycloakx 7.1.3 | +|---------|-----------------|------------------| +| **Base** | JBoss/WildFly | Quarkus | +| **PostgreSQL** | Included as subchart | Not included (external) | +| **Database Config** | Auto-configured | Manual `database:` section | +| **Startup Scripts** | Supported | Not supported | +| **Service Name** | `keycloak-headless` | `keycloak-keycloakx-http` | +| **OAuth Scopes** | Auto-includes openid | Must explicitly request `openid` | +| **User Creation** | Startup scripts | Python `post_deploy` hook | + +## Troubleshooting + +### "Missing openid scope" error +- Ensure all OAuth clients request `scope = ["openid", "profile", "email"]` + +### "Database not found" error +- Verify `database.hostname` points to correct service name +- Check PostgreSQL is running: `kubectl get pods -n dev | grep postgres` + +### Users deleted after upgrade +- Data is in PostgreSQL persistent volume - check PVC exists +- Verify database migration completed successfully +- Check keycloakx is connecting to the correct database diff --git a/src/_nebari/upgrade.py b/src/_nebari/upgrade.py index b23fba2d32..7fe148d73d 100644 --- a/src/_nebari/upgrade.py +++ b/src/_nebari/upgrade.py @@ -1847,6 +1847,7 @@ class Upgrade_2025_10_1(UpgradeStep): This upgrade includes: - Patching deprecated Bitnami images to avoid deployment issues during Helm chart upgrades + """ version = "2025.10.1" @@ -2049,6 +2050,200 @@ def _version_specific_upgrade( exit() rich.print("\nReady to upgrade to Nebari version [green]2025.10.1[/green].") + return config + + +class Upgrade_2025_11_1(UpgradeStep): + namespace = config.get("namespace", "dev") + + rich.print("\n ⚠️ CRITICAL UPGRADE WARNING ⚠️") + rich.print( + textwrap.dedent( + f""" + This version includes a major [bold red]Keycloak upgrade[/bold red] from the [green]keycloak[/green] chart (15.0.2) + to the [green]keycloakx[/green] chart (7.1.3). This upgrade changes the underlying architecture + from JBoss/WildFly to Quarkus. + + [bold yellow]IMPORTANT:[/bold yellow] The old keycloak chart includes PostgreSQL as a subchart dependency, + but the new keycloakx chart does not. To prevent data loss, you [bold red]MUST[/bold red] backup your + Keycloak PostgreSQL database [bold red]BEFORE[/bold red] running [cyan]nebari deploy[/cyan]. + + [bold]Key changes:[/bold] + - PostgreSQL moved from subchart to standalone deployment + - Keycloak service name changes from [green]keycloak-headless[/green] to [green]keycloak-keycloakx-http[/green] + - OAuth clients now require [green]openid[/green] scope explicitly + - Startup scripts replaced with Python post_deploy hooks + + After this upgrade step completes, you will need to: + 1. [cyan]nebari render -c {config_filename}[/cyan] to generate updated Terraform + 2. [cyan]nebari deploy -c {config_filename}[/cyan] to apply the changes + """ + ) + ) + + # Check if user has kubectl access + has_kubectl = False + try: + kubernetes.config.load_kube_config() + has_kubectl = True + except kubernetes.config.config_exception.ConfigException: + pass + + backup_command = f"kubectl exec -n {namespace} keycloak-postgresql-0 -- env PGPASSWORD=keycloak pg_dump -U keycloak -d keycloak > keycloak-backup.sql" + + if has_kubectl: + rich.print( + "\n[green]✓[/green] kubectl configuration detected. Nebari can help you backup the database." + ) + + # Offer to run the backup automatically + run_backup = kwargs.get("attempt_fixes", False) or Confirm.ask( + "\nWould you like Nebari to backup the Keycloak database for you now?", + default=True, + ) + + if run_backup: + rich.print( + f"\n[cyan]Running backup command:[/cyan]\n{backup_command}\n" + ) + + try: + # Check if the old PostgreSQL pod exists + api_instance = kubernetes.client.CoreV1Api() + try: + api_instance.read_namespaced_pod( + name="keycloak-postgresql-0", namespace=namespace + ) + except kubernetes.client.exceptions.ApiException as e: + if e.status == 404: + rich.print( + f"\n[yellow]Pod keycloak-postgresql-0 not found in namespace {namespace}.[/yellow]" + ) + rich.print( + "This may be normal if you're on a fresh deployment or have already migrated." + ) + skip_backup = Confirm.ask( + "\nDo you want to skip the backup and continue with the upgrade?", + default=False, + ) + if not skip_backup: + rich.print( + "[red]Upgrade cancelled. Please verify your Keycloak deployment.[/red]" + ) + exit(1) + else: + rich.print( + "[yellow]Skipping backup. Proceeding with upgrade...[/yellow]" + ) + return config + else: + raise e + + # Run the backup command + backup_file = config_filename.parent / "keycloak-backup.sql" + rich.print( + f"[green]Creating backup at:[/green] {backup_file}\n" + ) + + import subprocess + + result = subprocess.run( + backup_command, + shell=True, + capture_output=True, + text=True, + cwd=config_filename.parent, + ) + + if result.returncode == 0: + rich.print( + f"[green]✓ Backup successful![/green] Saved to {backup_file}" + ) + if backup_file.exists(): + file_size = backup_file.stat().st_size + rich.print( + f"[green] Backup size:[/green] {file_size / 1024:.2f} KB" + ) + else: + rich.print( + f"[red]✗ Backup failed with error:[/red]\n{result.stderr}" + ) + rich.print( + "\n[yellow]Please backup manually before proceeding with the upgrade.[/yellow]" + ) + exit(1) + + except Exception as e: + rich.print(f"[red]Error during backup:[/red] {e}") + rich.print( + "\n[yellow]Please backup manually using the command below before proceeding.[/yellow]" + ) + exit(1) + else: + rich.print( + "\n[yellow]You chose not to backup automatically.[/yellow]" + ) + rich.print( + f"\n[bold]Please backup manually by running this command:[/bold]\n" + f"[cyan]{backup_command}[/cyan]\n" + ) + + confirmed = Confirm.ask( + "Have you successfully backed up your Keycloak database?", + default=False, + ) + if not confirmed: + rich.print( + f"\n[red bold]You must backup the Keycloak database before upgrading to {self.version}.[/red bold]" + ) + exit(1) + else: + # No kubectl access - provide manual instructions + rich.print( + "\n[yellow]⚠[/yellow] kubectl configuration not found. You must backup manually." + ) + rich.print( + textwrap.dedent( + f""" + [bold]To backup your Keycloak database:[/bold] + + 1. Ensure you have kubectl access to your cluster + ([link=https://www.nebari.dev/docs/how-tos/debug-nebari#generating-the-kubeconfig]see docs[/link]) + + 2. Run this command in your terminal: + [cyan]{backup_command}[/cyan] + + 3. Verify the backup file was created: + [cyan]ls -lh keycloak-backup.sql[/cyan] + + [yellow]The backup file will be saved to your current directory.[/yellow] + """ + ) + ) + + confirmed = Confirm.ask( + "\nHave you successfully backed up your Keycloak database?", + default=False, + ) + if not confirmed: + rich.print( + f"\n[red bold]You must backup the Keycloak database before upgrading to {self.version}.[/red bold]" + ) + rich.print( + "[yellow]After completing the backup, run this upgrade command again.[/yellow]" + ) + exit(1) + + rich.print( + "\n[green]✓ Database backup confirmed.[/green] You can now proceed with:" + ) + rich.print(f" 1. [cyan]nebari render -c {config_filename}[/cyan]") + rich.print(f" 2. [cyan]nebari deploy -c {config_filename}[/cyan]") + rich.print( + "\n[yellow]For detailed upgrade instructions, see:[/yellow] UPGRADE_STEPS.md\n" + ) + + rich.print("Ready to upgrade to Nebari version [green]2025.10.1[/green].") return config From 042c7364a48ea7c503fec4cab079f21265cc0462 Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Tue, 14 Oct 2025 13:51:19 -0600 Subject: [PATCH 12/43] bump helm provider version, add standalone postgres --- .../modules/kubernetes/keycloak-helm/main.tf | 30 +++++-------------- .../kubernetes_keycloak/template/versions.tf | 2 +- 2 files changed, 8 insertions(+), 24 deletions(-) diff --git a/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/main.tf b/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/main.tf index 5ef86ff3ad..e83dc27ff2 100644 --- a/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/main.tf +++ b/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/main.tf @@ -3,17 +3,12 @@ resource "helm_release" "keycloak_postgresql" { name = "keycloak-postgres-standalone" namespace = var.namespace - repository = "https://raw.githubusercontent.com/bitnami/charts/archive-full-index/bitnami" + repository = "oci://registry-1.docker.io/bitnamicharts" chart = "postgresql" - version = "10.16.2" + version = "18.0.15" values = [ jsonencode({ - image = { - registry = "docker.io" - repository = "bitnamilegacy/postgresql" - tag = "11.14.0" - } primary = { nodeSelector = { "${var.node_group.key}" = var.node_group.value @@ -36,6 +31,10 @@ resource "helm_release" "keycloak" { chart = "keycloakx" version = "7.1.3" + depends_on = [ + helm_release.keycloak_postgresql + ] + values = concat([ # https://github.com/codecentric/helm-charts/blob/keycloak-15.0.2/charts/keycloak/values.yaml file("${path.module}/values.yaml"), @@ -43,22 +42,7 @@ resource "helm_release" "keycloak" { nodeSelector = { "${var.node_group.key}" = var.node_group.value } - postgresql = { - # TODO: Remove hardcoded image values after Helm chart update - # This is a workaround due to bitnami charts deprecation - # See: https://github.com/bitnami/charts/issues/35164 - # See: https://github.com/nebari-dev/nebari/issues/3120 - image = { - registry = "docker.io" - repository = "bitnamilegacy/postgresql" - tag = "11.14.0" - } - primary = { - nodeSelector = { - "${var.node_group.key}" = var.node_group.value - } - } - } + customThemes = var.themes }) ], var.overrides) diff --git a/src/_nebari/stages/kubernetes_keycloak/template/versions.tf b/src/_nebari/stages/kubernetes_keycloak/template/versions.tf index d1e5f8acfb..5f24182baa 100644 --- a/src/_nebari/stages/kubernetes_keycloak/template/versions.tf +++ b/src/_nebari/stages/kubernetes_keycloak/template/versions.tf @@ -2,7 +2,7 @@ terraform { required_providers { helm = { source = "hashicorp/helm" - version = "2.1.2" + version = "~> 2.16.0" } kubernetes = { source = "hashicorp/kubernetes" From 8f24426bb66482813b733cc3fd143cc050c44787 Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Tue, 14 Oct 2025 15:58:13 -0600 Subject: [PATCH 13/43] add upgrade command leveraging python-kubernetes --- src/_nebari/upgrade.py | 74 +++++++++++++++++++++++++++++++++--------- 1 file changed, 58 insertions(+), 16 deletions(-) diff --git a/src/_nebari/upgrade.py b/src/_nebari/upgrade.py index 7fe148d73d..eefe800c68 100644 --- a/src/_nebari/upgrade.py +++ b/src/_nebari/upgrade.py @@ -19,6 +19,7 @@ import kubernetes.config import requests import rich +from kubernetes.stream import stream from packaging.version import Version from pydantic import ValidationError from rich.prompt import Confirm, Prompt @@ -2139,34 +2140,75 @@ class Upgrade_2025_11_1(UpgradeStep): else: raise e - # Run the backup command + # Run the backup command using Kubernetes API backup_file = config_filename.parent / "keycloak-backup.sql" rich.print( f"[green]Creating backup at:[/green] {backup_file}\n" ) - import subprocess - - result = subprocess.run( - backup_command, - shell=True, - capture_output=True, - text=True, - cwd=config_filename.parent, - ) + try: + # Execute pg_dump command in the pod using Kubernetes API + exec_command = [ + '/bin/sh', + '-c', + 'env PGPASSWORD=keycloak pg_dump -U keycloak -d keycloak' + ] - if result.returncode == 0: - rich.print( - f"[green]✓ Backup successful![/green] Saved to {backup_file}" + resp = stream( + api_instance.connect_get_namespaced_pod_exec, + name='keycloak-postgresql-0', + namespace=namespace, + command=exec_command, + stderr=True, + stdin=False, + stdout=True, + tty=False, + _preload_content=False ) - if backup_file.exists(): + + # Write the output to the backup file + error_output = [] + with open(backup_file, 'wb') as f: + while resp.is_open(): + resp.update(timeout=1) + if resp.peek_stdout(): + stdout_data = resp.read_stdout() + f.write(stdout_data.encode('utf-8')) + if resp.peek_stderr(): + stderr_data = resp.read_stderr() + if stderr_data: + error_output.append(stderr_data) + + resp.close() + + # Check if backup was successful + if backup_file.exists() and backup_file.stat().st_size > 0: file_size = backup_file.stat().st_size + rich.print( + f"[green]✓ Backup successful![/green] Saved to {backup_file}" + ) rich.print( f"[green] Backup size:[/green] {file_size / 1024:.2f} KB" ) - else: + + # Show any warnings from stderr (pg_dump often writes info to stderr) + if error_output: + for err in error_output: + if 'NOTICE' in err or 'WARNING' in err: + rich.print(f"[yellow]{err.strip()}[/yellow]") + else: + error_msg = '\n'.join(error_output) if error_output else "Unknown error - backup file is empty or doesn't exist" + rich.print( + f"[red]✗ Backup failed:[/red]\n{error_msg}" + ) + rich.print( + "\n[yellow]Please backup manually before proceeding with the upgrade.[/yellow]" + ) + exit(1) + + except kubernetes.client.exceptions.ApiException as api_err: rich.print( - f"[red]✗ Backup failed with error:[/red]\n{result.stderr}" + f"[red]✗ Kubernetes API error during backup:[/red]\n{api_err}" ) rich.print( "\n[yellow]Please backup manually before proceeding with the upgrade.[/yellow]" From a16528612f68e85084c8fe8290a9099d3197eca2 Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Wed, 15 Oct 2025 15:42:30 -0600 Subject: [PATCH 14/43] use python kubernetes for keycloak backup instead of kubectl --- src/_nebari/upgrade.py | 292 +++++++++++++++++------------------------ 1 file changed, 124 insertions(+), 168 deletions(-) diff --git a/src/_nebari/upgrade.py b/src/_nebari/upgrade.py index eefe800c68..7f509201f9 100644 --- a/src/_nebari/upgrade.py +++ b/src/_nebari/upgrade.py @@ -2081,203 +2081,159 @@ class Upgrade_2025_11_1(UpgradeStep): """ ) ) - - # Check if user has kubectl access - has_kubectl = False try: kubernetes.config.load_kube_config() - has_kubectl = True except kubernetes.config.config_exception.ConfigException: - pass + rich.print( + "[red bold]No default kube configuration file was found. Make sure to [link=https://www.nebari.dev/docs/how-tos/debug-nebari#generating-the-kubeconfig]have one pointing to your Nebari cluster[/link] before upgrading.[/red bold]" + ) + exit() + + current_kube_context = kubernetes.config.list_kube_config_contexts()[1] + cluster_name = current_kube_context["context"]["cluster"] + rich.print( + f"\nThe following backup will be attempted on the [cyan bold]{cluster_name}[/cyan bold] cluster.\n" + ) - backup_command = f"kubectl exec -n {namespace} keycloak-postgresql-0 -- env PGPASSWORD=keycloak pg_dump -U keycloak -d keycloak > keycloak-backup.sql" + # Kubernetes config available - offer automatic backup + rich.print( + "\n[green]✓[/green] Kubernetes configuration detected. Nebari can backup the database automatically." + ) - if has_kubectl: + if not (kwargs.get("attempt_fixes", False) or Confirm.ask( + "\nWould you like Nebari to backup the Keycloak database for you now?", + default=True, + )): + # User declined automatic backup + rich.print("\n[yellow]You chose not to backup automatically.[/yellow]") rich.print( - "\n[green]✓[/green] kubectl configuration detected. Nebari can help you backup the database." - ) - - # Offer to run the backup automatically - run_backup = kwargs.get("attempt_fixes", False) or Confirm.ask( - "\nWould you like Nebari to backup the Keycloak database for you now?", - default=True, + "[yellow]Please ensure you have a backup before proceeding with deployment.[/yellow]" ) - if run_backup: + if not Confirm.ask( + "\nHave you successfully backed up your Keycloak database?", + default=False, + ): rich.print( - f"\n[cyan]Running backup command:[/cyan]\n{backup_command}\n" + f"\n[red bold]You must backup the Keycloak database before upgrading to {self.version}.[/red bold]" ) + exit(1) - try: - # Check if the old PostgreSQL pod exists - api_instance = kubernetes.client.CoreV1Api() - try: - api_instance.read_namespaced_pod( - name="keycloak-postgresql-0", namespace=namespace - ) - except kubernetes.client.exceptions.ApiException as e: - if e.status == 404: - rich.print( - f"\n[yellow]Pod keycloak-postgresql-0 not found in namespace {namespace}.[/yellow]" - ) - rich.print( - "This may be normal if you're on a fresh deployment or have already migrated." - ) - skip_backup = Confirm.ask( - "\nDo you want to skip the backup and continue with the upgrade?", - default=False, - ) - if not skip_backup: - rich.print( - "[red]Upgrade cancelled. Please verify your Keycloak deployment.[/red]" - ) - exit(1) - else: - rich.print( - "[yellow]Skipping backup. Proceeding with upgrade...[/yellow]" - ) - return config - else: - raise e - - # Run the backup command using Kubernetes API - backup_file = config_filename.parent / "keycloak-backup.sql" - rich.print( - f"[green]Creating backup at:[/green] {backup_file}\n" - ) - - try: - # Execute pg_dump command in the pod using Kubernetes API - exec_command = [ - '/bin/sh', - '-c', - 'env PGPASSWORD=keycloak pg_dump -U keycloak -d keycloak' - ] - - resp = stream( - api_instance.connect_get_namespaced_pod_exec, - name='keycloak-postgresql-0', - namespace=namespace, - command=exec_command, - stderr=True, - stdin=False, - stdout=True, - tty=False, - _preload_content=False - ) - - # Write the output to the backup file - error_output = [] - with open(backup_file, 'wb') as f: - while resp.is_open(): - resp.update(timeout=1) - if resp.peek_stdout(): - stdout_data = resp.read_stdout() - f.write(stdout_data.encode('utf-8')) - if resp.peek_stderr(): - stderr_data = resp.read_stderr() - if stderr_data: - error_output.append(stderr_data) - - resp.close() - - # Check if backup was successful - if backup_file.exists() and backup_file.stat().st_size > 0: - file_size = backup_file.stat().st_size - rich.print( - f"[green]✓ Backup successful![/green] Saved to {backup_file}" - ) - rich.print( - f"[green] Backup size:[/green] {file_size / 1024:.2f} KB" - ) + rich.print( + "\n[green]✓ Database backup confirmed.[/green] You can now proceed with:" + ) + rich.print(f" 1. [cyan]nebari render -c {config_filename}[/cyan]") + rich.print(f" 2. [cyan]nebari deploy -c {config_filename}[/cyan]") + rich.print( + "\n[yellow]For detailed upgrade instructions, see:[/yellow] UPGRADE_STEPS.md\n" + ) + rich.print("Ready to upgrade to Nebari version [green]2025.10.1[/green].") + return config - # Show any warnings from stderr (pg_dump often writes info to stderr) - if error_output: - for err in error_output: - if 'NOTICE' in err or 'WARNING' in err: - rich.print(f"[yellow]{err.strip()}[/yellow]") - else: - error_msg = '\n'.join(error_output) if error_output else "Unknown error - backup file is empty or doesn't exist" - rich.print( - f"[red]✗ Backup failed:[/red]\n{error_msg}" - ) - rich.print( - "\n[yellow]Please backup manually before proceeding with the upgrade.[/yellow]" - ) - exit(1) + # User accepted automatic backup - proceed + rich.print( + "\n[cyan]Backing up Keycloak database using Kubernetes API...[/cyan]\n" + ) - except kubernetes.client.exceptions.ApiException as api_err: - rich.print( - f"[red]✗ Kubernetes API error during backup:[/red]\n{api_err}" - ) - rich.print( - "\n[yellow]Please backup manually before proceeding with the upgrade.[/yellow]" - ) - exit(1) + api_instance = kubernetes.client.CoreV1Api() + backup_file = config_filename.parent / "keycloak-backup.sql" - except Exception as e: - rich.print(f"[red]Error during backup:[/red] {e}") - rich.print( - "\n[yellow]Please backup manually using the command below before proceeding.[/yellow]" - ) - exit(1) - else: - rich.print( - "\n[yellow]You chose not to backup automatically.[/yellow]" - ) + # Check if the old PostgreSQL pod exists + try: + api_instance.read_namespaced_pod( + name="keycloak-postgresql-0", namespace=namespace + ) + except kubernetes.client.exceptions.ApiException as e: + if e.status == 404: rich.print( - f"\n[bold]Please backup manually by running this command:[/bold]\n" - f"[cyan]{backup_command}[/cyan]\n" + f"\n[yellow]Pod keycloak-postgresql-0 not found in namespace {namespace}.[/yellow]" ) - - confirmed = Confirm.ask( - "Have you successfully backed up your Keycloak database?", + if not Confirm.ask( + "\nDo you want to skip the backup and continue with the upgrade?", default=False, - ) - if not confirmed: + ): rich.print( - f"\n[red bold]You must backup the Keycloak database before upgrading to {self.version}.[/red bold]" + "[red]Upgrade cancelled. Please verify your Keycloak deployment.[/red]" ) exit(1) - else: - # No kubectl access - provide manual instructions - rich.print( - "\n[yellow]⚠[/yellow] kubectl configuration not found. You must backup manually." - ) - rich.print( - textwrap.dedent( - f""" - [bold]To backup your Keycloak database:[/bold] - 1. Ensure you have kubectl access to your cluster - ([link=https://www.nebari.dev/docs/how-tos/debug-nebari#generating-the-kubeconfig]see docs[/link]) + rich.print("[yellow]Skipping backup. Proceeding with upgrade...[/yellow]") + return config - 2. Run this command in your terminal: - [cyan]{backup_command}[/cyan] + # Other API errors + rich.print(f"[red]✗ Error checking for PostgreSQL pod:[/red] {e}") + exit(1) - 3. Verify the backup file was created: - [cyan]ls -lh keycloak-backup.sql[/cyan] + # Execute pg_dump command in the pod + rich.print(f"[green]Creating backup at:[/green] {backup_file}\n") - [yellow]The backup file will be saved to your current directory.[/yellow] - """ - ) - ) + try: + exec_command = [ + '/bin/sh', + '-c', + 'env PGPASSWORD=keycloak pg_dump -U keycloak -d keycloak' + ] - confirmed = Confirm.ask( - "\nHave you successfully backed up your Keycloak database?", - default=False, + resp = stream( + api_instance.connect_get_namespaced_pod_exec, + name='keycloak-postgresql-0', + namespace=namespace, + command=exec_command, + stderr=True, + stdin=False, + stdout=True, + tty=False, + _preload_content=False ) - if not confirmed: - rich.print( - f"\n[red bold]You must backup the Keycloak database before upgrading to {self.version}.[/red bold]" - ) - rich.print( - "[yellow]After completing the backup, run this upgrade command again.[/yellow]" - ) + + # Write the output to the backup file + error_output = [] + with open(backup_file, 'wb') as f: + while resp.is_open(): + resp.update(timeout=1) + if resp.peek_stdout(): + stdout_data = resp.read_stdout() + f.write(stdout_data.encode('utf-8')) + if resp.peek_stderr(): + stderr_data = resp.read_stderr() + if stderr_data: + error_output.append(stderr_data) + + resp.close() + + # Check if backup was successful + if not backup_file.exists() or backup_file.stat().st_size == 0: + error_msg = '\n'.join(error_output) if error_output else "Backup file is empty or doesn't exist" + rich.print(f"[red]✗ Backup failed:[/red]\n{error_msg}") exit(1) + # Backup succeeded + file_size = backup_file.stat().st_size + rich.print( + f"[green]✓ Backup successful![/green] Saved to {backup_file}" + ) + rich.print( + f"[green] Backup size:[/green] {file_size / 1024:.2f} KB" + ) + + # Show any warnings from stderr (pg_dump often writes info to stderr) + if error_output: + for err in error_output: + if 'NOTICE' in err or 'WARNING' in err: + rich.print(f"[yellow]{err.strip()}[/yellow]") + + except kubernetes.client.exceptions.ApiException as api_err: + rich.print( + f"[red]✗ Kubernetes API error during backup:[/red]\n{api_err}" + ) + exit(1) + except Exception as e: + rich.print(f"[red]✗ Unexpected error during backup:[/red] {e}") + exit(1) + + # Backup complete - provide next steps rich.print( - "\n[green]✓ Database backup confirmed.[/green] You can now proceed with:" + "\n[green]✓ Database backup completed.[/green] You can now proceed with:" ) rich.print(f" 1. [cyan]nebari render -c {config_filename}[/cyan]") rich.print(f" 2. [cyan]nebari deploy -c {config_filename}[/cyan]") From e1264fa5d255eefe150340c9fefcc27958456be7 Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Sat, 25 Oct 2025 17:13:52 -0600 Subject: [PATCH 15/43] add code to automatically restore database in post_deploy --- .../stages/kubernetes_keycloak/__init__.py | 286 +++++++++++++++++- 1 file changed, 284 insertions(+), 2 deletions(-) diff --git a/src/_nebari/stages/kubernetes_keycloak/__init__.py b/src/_nebari/stages/kubernetes_keycloak/__init__.py index b8327df7e9..36b5b7e386 100644 --- a/src/_nebari/stages/kubernetes_keycloak/__init__.py +++ b/src/_nebari/stages/kubernetes_keycloak/__init__.py @@ -318,13 +318,36 @@ def _attempt_keycloak_connection( def post_deploy( self, stage_outputs: Dict[str, Dict[str, Any]], disable_prompt: bool = False ): - """Create nebari-bot user after Keycloak is deployed.""" + """Restore Keycloak database (if backup exists) and create nebari-bot user after Keycloak is deployed.""" + from pathlib import Path + + import kubernetes from keycloak import KeycloakAdmin from keycloak.exceptions import KeycloakError + from kubernetes.stream import stream + + # Step 1: Restore database if backup exists + backup_file = Path(self.output_directory) / "keycloak-backup.sql" + + if backup_file.exists(): + print("\n" + "=" * 80) + print("KEYCLOAK DATABASE RESTORE") + print("=" * 80) + print(f"Found backup file: {backup_file}") + print(f"Size: {backup_file.stat().st_size / 1024:.2f} KB\n") + self._restore_keycloak_database(backup_file) + + # Rename backup file to prevent re-running restore on subsequent deploys + backup_file.rename(backup_file.with_suffix('.sql.restored')) + print(f"\n✓ Renamed backup file to {backup_file.with_suffix('.sql.restored')}") + print("=" * 80 + "\n") + else: + print("No Keycloak database backup found, skipping restore") + + # Step 2: Create nebari-bot user keycloak_url = f"{stage_outputs['stages/' + self.name]['keycloak_credentials']['value']['url']}/auth/" nebari_bot_password = stage_outputs["stages/" + self.name]["keycloak_nebari_bot_password"]["value"] - print("Creating nebari-bot user in Keycloak master realm...") max_attempts = 10 @@ -387,6 +410,265 @@ def post_deploy( print(f"Failed to configure nebari-bot user after {max_attempts} attempts: {e}") sys.exit(1) + def _restore_keycloak_database(self, backup_file): + """Restore PostgreSQL database from backup file using Kubernetes exec.""" + import kubernetes + from kubernetes.stream import stream + + # Configuration - these should match your new postgres deployment + namespace = self.config.namespace + keycloak_statefulset_name = "keycloak-keycloakx" + pod_name = "keycloak-postgres-standalone-postgresql-0" + db_user = "keycloak" + db_name = "keycloak" + db_password = "keycloak" # This should ideally come from config or secret + postgres_user = "postgres" + + # Load kubernetes config + kubernetes.config.load_kube_config() + api = kubernetes.client.CoreV1Api() + apps_api = kubernetes.client.AppsV1Api() + + # Step 0: Scale down Keycloak to prevent active database connections + print(f"Step 0: Scaling down Keycloak statefulset '{keycloak_statefulset_name}' to 0 replicas...") + try: + # Get current statefulset + statefulset = apps_api.read_namespaced_stateful_set( + name=keycloak_statefulset_name, + namespace=namespace + ) + original_replicas = statefulset.spec.replicas + print(f" Current replicas: {original_replicas}") + + # Scale to 0 + statefulset.spec.replicas = 0 + apps_api.patch_namespaced_stateful_set( + name=keycloak_statefulset_name, + namespace=namespace, + body=statefulset + ) + print(f" Scaled to 0 replicas") + + # Wait for pods to terminate + print(f" Waiting for Keycloak pods to terminate...") + max_wait = 60 # seconds + wait_interval = 2 + elapsed = 0 + while elapsed < max_wait: + pods = api.list_namespaced_pod( + namespace=namespace, + label_selector=f"app.kubernetes.io/name=keycloak" + ) + if len(pods.items) == 0: + print(f" ✓ All Keycloak pods terminated") + break + print(f" Still waiting... ({len(pods.items)} pods remaining)") + time.sleep(wait_interval) + elapsed += wait_interval + + if elapsed >= max_wait: + print(f" ⚠ Warning: Timed out waiting for pods to terminate, proceeding anyway") + + print("✓ Keycloak scaled down\n") + + except kubernetes.client.exceptions.ApiException as e: + print(f"⚠ Warning: Could not scale down Keycloak statefulset: {e}") + print("Proceeding with restore anyway...\n") + original_replicas = None + + # Check if pod exists + print(f"Checking if pod '{pod_name}' exists in namespace '{namespace}'...") + try: + api.read_namespaced_pod(name=pod_name, namespace=namespace) + print(f"✓ Pod found\n") + except kubernetes.client.exceptions.ApiException as e: + if e.status == 404: + print(f"✗ Pod '{pod_name}' not found in namespace '{namespace}'") + print("Skipping database restore - pod may not be ready yet") + return + raise + + # Get postgres superuser password from secret + print("Getting postgres superuser password from secret...") + try: + secret_name = "keycloak-postgres-standalone-postgresql" + secret = api.read_namespaced_secret(name=secret_name, namespace=namespace) + import base64 + postgres_password = base64.b64decode(secret.data['postgres-password']).decode('utf-8') + print("✓ Got postgres password\n") + except Exception as e: + print(f"✗ Error getting postgres password: {e}") + print("Skipping database restore") + return + + # Helper function to run commands in pod + def run_command(command, show_output=True): + print(f" Running: {command}") + sys.stdout.flush() + + resp = stream( + api.connect_get_namespaced_pod_exec, + name=pod_name, + namespace=namespace, + command=['/bin/sh', '-c', command], + stderr=True, + stdin=False, + stdout=True, + tty=False, + _preload_content=False + ) + + stdout_lines = [] + stderr_lines = [] + + while resp.is_open(): + resp.update(timeout=1) + if resp.peek_stdout(): + data = resp.read_stdout() + stdout_lines.append(data) + if show_output: + print(data, end='') + sys.stdout.flush() + if resp.peek_stderr(): + data = resp.read_stderr() + stderr_lines.append(data) + if show_output: + print(data, end='', file=sys.stderr) + sys.stderr.flush() + + resp.close() + return ''.join(stdout_lines), ''.join(stderr_lines) + + # Helper function to run command with stdin + def run_command_with_stdin(command, stdin_data, show_output=True): + print(f" Running: {command}") + print(f" Piping {len(stdin_data)} bytes to stdin...") + sys.stdout.flush() + + resp = stream( + api.connect_get_namespaced_pod_exec, + name=pod_name, + namespace=namespace, + command=['/bin/sh', '-c', command], + stderr=True, + stdin=True, + stdout=True, + tty=False, + _preload_content=False + ) + + # Write stdin data and close stdin to signal EOF + resp.write_stdin(stdin_data) + resp.write_stdin("") # Signal EOF + + stdout_lines = [] + stderr_lines = [] + no_data_cycles = 0 + max_no_data_cycles = 30 + + while resp.is_open(): + resp.update(timeout=1) + has_data = False + + if resp.peek_stdout(): + data = resp.read_stdout() + stdout_lines.append(data) + if show_output: + print(data, end='') + sys.stdout.flush() + has_data = True + no_data_cycles = 0 + + if resp.peek_stderr(): + data = resp.read_stderr() + stderr_lines.append(data) + if show_output: + print(data, end='', file=sys.stderr) + sys.stderr.flush() + has_data = True + no_data_cycles = 0 + + if not has_data: + no_data_cycles += 1 + if no_data_cycles >= max_no_data_cycles: + print("\n No output for 30 seconds, assuming command completed...") + break + + resp.close() + return ''.join(stdout_lines), ''.join(stderr_lines) + + # Step 1: Drop existing database + print(f"Step 1: Dropping database '{db_name}' (if exists)...") + drop_cmd = f"env PGPASSWORD={postgres_password} psql -U {postgres_user} -c 'DROP DATABASE IF EXISTS {db_name};'" + run_command(drop_cmd) + print("✓ Database dropped\n") + + # Step 2: Create fresh database + print(f"Step 2: Creating fresh database '{db_name}'...") + create_cmd = f"env PGPASSWORD={postgres_password} psql -U {postgres_user} -c 'CREATE DATABASE {db_name};'" + run_command(create_cmd) + print("✓ Database created\n") + + # Step 3: Grant privileges to keycloak user + print(f"Step 3: Granting privileges to user '{db_user}'...") + grant_db_cmd = f"env PGPASSWORD={postgres_password} psql -U {postgres_user} -c 'GRANT ALL PRIVILEGES ON DATABASE {db_name} TO {db_user};'" + run_command(grant_db_cmd) + + grant_schema_cmd = f"env PGPASSWORD={postgres_password} psql -U {postgres_user} -d {db_name} -c 'GRANT ALL ON SCHEMA public TO {db_user};'" + run_command(grant_schema_cmd) + + grant_default_cmd = f"env PGPASSWORD={postgres_password} psql -U {postgres_user} -d {db_name} -c 'ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO {db_user};'" + run_command(grant_default_cmd) + print("✓ Privileges granted\n") + + # Step 4: Read the backup file + print(f"Step 4: Reading backup file...") + with open(backup_file, 'r') as f: + backup_sql = f.read() + print(f"✓ Backup file loaded ({len(backup_sql)} characters)\n") + + # Step 5: Restore the database + print(f"Step 5: Restoring database from backup...") + print("This may take a few moments. Output will be shown below:\n") + print("Note: Warnings about 'public' schema permissions are expected and harmless.") + print("=" * 80) + + restore_cmd = f"env PGPASSWORD={db_password} psql -U {db_user} -d {db_name} --set ON_ERROR_STOP=off" + run_command_with_stdin(restore_cmd, backup_sql) + + print("=" * 80) + print("\n✓ Restore completed!\n") + + # Step 6: Verify the restore + print(f"Step 6: Verifying restore by checking user count...") + verify_cmd = f"env PGPASSWORD={db_password} psql -U {db_user} -d {db_name} -c 'SELECT count(*) FROM user_entity;'" + run_command(verify_cmd) + print("✓ Verification complete\n") + + print("=" * 80) + print("DATABASE RESTORE SUCCESSFUL!") + print("=" * 80) + + # Step 7: Scale Keycloak back up + if original_replicas is not None: + print(f"\nStep 7: Scaling Keycloak statefulset back to {original_replicas} replicas...") + try: + statefulset = apps_api.read_namespaced_stateful_set( + name=keycloak_statefulset_name, + namespace=namespace + ) + statefulset.spec.replicas = original_replicas + apps_api.patch_namespaced_stateful_set( + name=keycloak_statefulset_name, + namespace=namespace, + body=statefulset + ) + print(f"✓ Keycloak scaled back to {original_replicas} replicas") + print(" Keycloak pods will start connecting to the restored database\n") + except kubernetes.client.exceptions.ApiException as e: + print(f"⚠ Warning: Could not scale up Keycloak statefulset: {e}") + print(f" You may need to manually scale it back up: kubectl scale statefulset {keycloak_statefulset_name} --replicas={original_replicas} -n {namespace}\n") + @contextlib.contextmanager def deploy( self, stage_outputs: Dict[str, Dict[str, Any]], disable_prompt: bool = False From a4c241614cbb4905ed96ef3424f240bf80ca137f Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Mon, 3 Nov 2025 16:01:02 -0700 Subject: [PATCH 16/43] update upgrade step version --- src/_nebari/upgrade.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/_nebari/upgrade.py b/src/_nebari/upgrade.py index 7f509201f9..c38f03facc 100644 --- a/src/_nebari/upgrade.py +++ b/src/_nebari/upgrade.py @@ -2055,6 +2055,22 @@ def _version_specific_upgrade( class Upgrade_2025_11_1(UpgradeStep): + """ + Upgrade step for Nebari version 2025.11.1 + + This upgrade includes a major Keycloak upgrade from the `keycloak` chart (15.0.2) + to the `keycloakx` chart (7.1.3). The old keycloak chart includes PostgreSQL as a + subchart, but the new keycloakx chart does not. Users must backup their Keycloak + PostgreSQL database before upgrading to prevent data loss. + """ + + version = "2025.11.1" + + @override + def _version_specific_upgrade( + self, config, start_version, config_filename: Path, *args, **kwargs + ): + namespace = config.get("namespace", "dev") rich.print("\n ⚠️ CRITICAL UPGRADE WARNING ⚠️") @@ -2241,7 +2257,7 @@ class Upgrade_2025_11_1(UpgradeStep): "\n[yellow]For detailed upgrade instructions, see:[/yellow] UPGRADE_STEPS.md\n" ) - rich.print("Ready to upgrade to Nebari version [green]2025.10.1[/green].") + rich.print("Ready to upgrade to Nebari version [green]2025.11.1[/green].") return config From 2c56491ae9993193cceb00141a1eb41771d25d3e Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Mon, 3 Nov 2025 16:29:36 -0700 Subject: [PATCH 17/43] copy sql restore file as tar instead of standard in --- .../stages/kubernetes_keycloak/__init__.py | 109 ++++++++++-------- 1 file changed, 58 insertions(+), 51 deletions(-) diff --git a/src/_nebari/stages/kubernetes_keycloak/__init__.py b/src/_nebari/stages/kubernetes_keycloak/__init__.py index 36b5b7e386..bd35ff7ad1 100644 --- a/src/_nebari/stages/kubernetes_keycloak/__init__.py +++ b/src/_nebari/stages/kubernetes_keycloak/__init__.py @@ -371,6 +371,15 @@ def post_deploy( if users: print("nebari-bot user already exists") user_id = users[0]["id"] + + # Reset password to ensure it matches the expected value + # (Keycloak doesn't allow reading passwords for comparison) + admin.set_user_password( + user_id=user_id, + password=nebari_bot_password, + temporary=False + ) + print("Updated nebari-bot password to match expected value") else: # Create nebari-bot user user_id = admin.create_user({ @@ -412,6 +421,11 @@ def post_deploy( def _restore_keycloak_database(self, backup_file): """Restore PostgreSQL database from backup file using Kubernetes exec.""" + import base64 + import tarfile + from io import BytesIO + from pathlib import Path + import kubernetes from kubernetes.stream import stream @@ -539,17 +553,32 @@ def run_command(command, show_output=True): resp.close() return ''.join(stdout_lines), ''.join(stderr_lines) - # Helper function to run command with stdin - def run_command_with_stdin(command, stdin_data, show_output=True): - print(f" Running: {command}") - print(f" Piping {len(stdin_data)} bytes to stdin...") - sys.stdout.flush() + # Helper function to copy file to pod using tar + def copy_file_to_pod(local_path, remote_path): + """ + Copy a file to pod using tar (similar to kubectl cp). + More reliable than stdin streaming for large files. + """ + print(f" Copying {local_path.name} to pod:{remote_path}") + print(f" File size: {local_path.stat().st_size / 1024:.2f} KB") + + # Create tar archive in memory + tar_buffer = BytesIO() + with tarfile.open(fileobj=tar_buffer, mode='w') as tar: + tar.add(str(local_path), arcname=os.path.basename(remote_path)) + + tar_buffer.seek(0) + tar_data = tar_buffer.getvalue() + + # Extract tar in pod + remote_dir = os.path.dirname(remote_path) + extract_cmd = ['tar', 'xf', '-', '-C', remote_dir or '/'] resp = stream( api.connect_get_namespaced_pod_exec, name=pod_name, namespace=namespace, - command=['/bin/sh', '-c', command], + command=extract_cmd, stderr=True, stdin=True, stdout=True, @@ -557,45 +586,16 @@ def run_command_with_stdin(command, stdin_data, show_output=True): _preload_content=False ) - # Write stdin data and close stdin to signal EOF - resp.write_stdin(stdin_data) - resp.write_stdin("") # Signal EOF - - stdout_lines = [] - stderr_lines = [] - no_data_cycles = 0 - max_no_data_cycles = 30 - - while resp.is_open(): - resp.update(timeout=1) - has_data = False - - if resp.peek_stdout(): - data = resp.read_stdout() - stdout_lines.append(data) - if show_output: - print(data, end='') - sys.stdout.flush() - has_data = True - no_data_cycles = 0 - - if resp.peek_stderr(): - data = resp.read_stderr() - stderr_lines.append(data) - if show_output: - print(data, end='', file=sys.stderr) - sys.stderr.flush() - has_data = True - no_data_cycles = 0 - - if not has_data: - no_data_cycles += 1 - if no_data_cycles >= max_no_data_cycles: - print("\n No output for 30 seconds, assuming command completed...") - break + # Write tar data in chunks + chunk_size = 1024 * 1024 # 1MB chunks + for i in range(0, len(tar_data), chunk_size): + chunk = tar_data[i:i + chunk_size] + resp.write_stdin(chunk) + resp.write_stdin('') # Signal EOF resp.close() - return ''.join(stdout_lines), ''.join(stderr_lines) + + print(f" ✓ File copied successfully") # Step 1: Drop existing database print(f"Step 1: Dropping database '{db_name}' (if exists)...") @@ -621,20 +621,21 @@ def run_command_with_stdin(command, stdin_data, show_output=True): run_command(grant_default_cmd) print("✓ Privileges granted\n") - # Step 4: Read the backup file - print(f"Step 4: Reading backup file...") - with open(backup_file, 'r') as f: - backup_sql = f.read() - print(f"✓ Backup file loaded ({len(backup_sql)} characters)\n") + # Step 4: Copy backup file to pod + print(f"Step 4: Copying backup file to pod...") + remote_backup_path = "/tmp/keycloak-backup.sql" + copy_file_to_pod(Path(backup_file), remote_backup_path) + print(f"✓ Backup file copied to pod\n") - # Step 5: Restore the database + # Step 5: Restore the database from file print(f"Step 5: Restoring database from backup...") print("This may take a few moments. Output will be shown below:\n") print("Note: Warnings about 'public' schema permissions are expected and harmless.") print("=" * 80) - restore_cmd = f"env PGPASSWORD={db_password} psql -U {db_user} -d {db_name} --set ON_ERROR_STOP=off" - run_command_with_stdin(restore_cmd, backup_sql) + # Use -f flag to read from file instead of stdin - much more reliable! + restore_cmd = f"env PGPASSWORD={db_password} psql -U {db_user} -d {db_name} --set ON_ERROR_STOP=off -f {remote_backup_path}" + run_command(restore_cmd) print("=" * 80) print("\n✓ Restore completed!\n") @@ -645,6 +646,12 @@ def run_command_with_stdin(command, stdin_data, show_output=True): run_command(verify_cmd) print("✓ Verification complete\n") + # Step 6.5: Clean up temporary file in pod + print(f"Step 6.5: Cleaning up temporary file in pod...") + cleanup_cmd = f"rm -f {remote_backup_path}" + run_command(cleanup_cmd, show_output=False) + print(f"✓ Removed {remote_backup_path}\n") + print("=" * 80) print("DATABASE RESTORE SUCCESSFUL!") print("=" * 80) From c4c1eadb4a6ee6d63c8723d8662d9c96ff75a8b2 Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Tue, 4 Nov 2025 14:23:02 -0700 Subject: [PATCH 18/43] clean up unneeded notes and documentation --- RELEASE.md | 49 ------ UPGRADE_STEPS.md | 350 ----------------------------------------- src/_nebari/upgrade.py | 19 +-- 3 files changed, 5 insertions(+), 413 deletions(-) delete mode 100644 UPGRADE_STEPS.md diff --git a/RELEASE.md b/RELEASE.md index 2da1d80e2c..f86129d771 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -9,55 +9,6 @@ This file is copied to nebari-dev/nebari-docs using a GitHub Action. --> --- -## Release 2025.10.1 - October 11, 2025 - -### Breaking Changes - -⚠️ **CRITICAL UPGRADE** - This release includes a major Keycloak upgrade that requires manual intervention. - -### What's Changed - -#### Keycloak Upgrade (15.0.2 → keycloakx 7.1.3) - -This release upgrades Keycloak from the `keycloak` chart (15.0.2, JBoss/WildFly) to the `keycloakx` chart (7.1.3, Quarkus). This is a major architectural change with the following impacts: - -**Key Changes:** -- PostgreSQL is now deployed as a standalone Helm release instead of a subchart -- Keycloak service name changes from `keycloak-headless` to `keycloak-keycloakx-http` -- OAuth clients now require the `openid` scope explicitly -- Startup scripts replaced with Python `post_deploy` hooks for user creation - -**Required Actions:** - -1. **Before upgrading**, you MUST backup your Keycloak PostgreSQL database: - ```bash - kubectl exec -n dev keycloak-postgresql-0 -- env PGPASSWORD=keycloak pg_dump -U keycloak -d keycloak > keycloak-backup.sql - ``` - -2. Run the upgrade command: - ```bash - nebari upgrade -c nebari-config.yaml - ``` - The upgrade step will prompt you to backup the database and can optionally perform the backup automatically if you have kubectl access. - -3. After upgrade, render and deploy: - ```bash - nebari render -c nebari-config.yaml - nebari deploy -c nebari-config.yaml - ``` - -**For detailed upgrade instructions, see `UPGRADE_STEPS.md` in the repository.** - -**Affected Services:** -- JupyterHub OAuth configuration updated to include `openid` scope -- Grafana OAuth configuration updated to include `openid` scope -- conda-store OAuth configuration updated to include `openid` scope -- conda-store internal service URLs updated for new Keycloak service name - -**Full Changelog**: https://github.com/nebari-dev/nebari/compare/2025.6.1...2025.10.1 - ---- - ## Release 2025.6.1 - June 06, 2025 ### What's Changed diff --git a/UPGRADE_STEPS.md b/UPGRADE_STEPS.md deleted file mode 100644 index 105729a0eb..0000000000 --- a/UPGRADE_STEPS.md +++ /dev/null @@ -1,350 +0,0 @@ -# Upgrading Keycloak from 15.0.2 (JBoss) to keycloakx 7.1.3 (Quarkus) in Nebari - -## Overview - -This guide covers upgrading from the `keycloak` chart (15.0.2) to the `keycloakx` chart (7.1.3) while preserving all users, realms, and configuration. - -## Key Challenge - -The old `keycloak` chart includes PostgreSQL as a subchart dependency, but the new `keycloakx` chart does not. To safely upgrade without losing data, we need to: -1. Extract PostgreSQL to a standalone deployment -2. Migrate the data -3. Upgrade to keycloakx pointing to the standalone database - -## Prerequisites - -- Existing Nebari deployment with Keycloak 15.0.2 -- kubectl access to the cluster -- Terraform/OpenTofu installed - -## Step 1: Deploy Standalone PostgreSQL - -Add a standalone PostgreSQL Helm release in `kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/main.tf`: - -```terraform -# Standalone PostgreSQL database for Keycloak -# Deployed separately to allow safe upgrade from keycloak to keycloakx chart -resource "helm_release" "keycloak_postgresql" { - name = "keycloak-postgres-standalone" - namespace = var.namespace - repository = "https://raw.githubusercontent.com/bitnami/charts/archive-full-index/bitnami" - chart = "postgresql" - version = "10.16.2" - - values = [ - jsonencode({ - image = { - registry = "docker.io" - repository = "bitnamilegacy/postgresql" - tag = "11.14.0" - } - primary = { - nodeSelector = { - "${var.node_group.key}" = var.node_group.value - } - } - auth = { - username = "keycloak" - password = "keycloak" - database = "keycloak" - } - }) - ] -} -``` - -Deploy it: -```bash -cd nebari-local/stages/05-kubernetes-keycloak -tofu apply -``` - -Verify both PostgreSQL instances are running: -```bash -kubectl get pods -n dev | grep postgres -``` - -Expected output: -- `keycloak-postgresql-0` (old - from keycloak subchart) -- `keycloak-postgres-standalone-postgresql-0` (new - standalone) - -## Step 2: Backup and Migrate Database - -Backup from old PostgreSQL: -```bash -kubectl exec -n dev keycloak-postgresql-0 -- env PGPASSWORD=keycloak pg_dump -U keycloak -d keycloak > keycloak-backup.sql -``` - -Get the postgres superuser password for the new database: -```bash -kubectl get secret -n dev keycloak-postgres-standalone-postgresql -o jsonpath='{.data.postgres-password}' | base64 -d -echo -``` - -Create database and user in new PostgreSQL: -```bash -# Replace with the actual password from above -kubectl exec -i -n dev keycloak-postgres-standalone-postgresql-0 -- env PGPASSWORD= psql -U postgres -c "CREATE DATABASE keycloak;" - -kubectl exec -i -n dev keycloak-postgres-standalone-postgresql-0 -- env PGPASSWORD= psql -U postgres -c "CREATE USER keycloak WITH PASSWORD 'keycloak';" - -kubectl exec -i -n dev keycloak-postgres-standalone-postgresql-0 -- env PGPASSWORD= psql -U postgres -c "GRANT ALL PRIVILEGES ON DATABASE keycloak TO keycloak;" -``` - -Restore the backup: -```bash -cat keycloak-backup.sql | kubectl exec -i -n dev keycloak-postgres-standalone-postgresql-0 -- env PGPASSWORD= psql -U postgres -d keycloak -``` - -Verify migration: -```bash -kubectl exec -n dev keycloak-postgres-standalone-postgresql-0 -- env PGPASSWORD= psql -U postgres -d keycloak -c "SELECT count(*) FROM user_entity;" -``` - -## Step 3: Update values.yaml for keycloakx - -Update `kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml`: - -```yaml -# Database configuration - connect to standalone PostgreSQL -database: - vendor: postgres - hostname: keycloak-postgres-standalone-postgresql - port: 5432 - database: keycloak - username: keycloak - password: keycloak - -# Enable database readiness check -dbchecker: - enabled: true -``` - -## Step 4: Upgrade to keycloakx Chart - -Update `kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/main.tf`: - -```terraform -resource "helm_release" "keycloak" { - name = "keycloak" - namespace = var.namespace - - repository = "https://codecentric.github.io/helm-charts" - chart = "keycloakx" # Changed from "keycloak" - version = "7.1.3" # Changed from "15.0.2" - - # ... rest of configuration -} -``` - -Update the IngressRoute service name: -```terraform -resource "kubernetes_manifest" "keycloak-http" { - manifest = { - # ... - spec = { - # ... - routes = [ - { - # ... - services = [ - { - name = "keycloak-keycloakx-http" # Changed from "keycloak-headless" - port = 80 - namespace = var.namespace - } - ] - } - ] - } - } -} -``` - -## Step 5: Fix OAuth Scopes for Keycloak 20+ - -Keycloak 20+ requires the `openid` scope to be explicitly requested. Update the following files: - -### JupyterHub -`kubernetes_services/template/modules/kubernetes/services/jupyterhub/main.tf`: -```terraform -KeyCloakOAuthenticator = { - # ... - scope = ["openid", "profile", "email"] # Add this line - # ... -} -``` - -### Grafana -`kubernetes_services/template/modules/kubernetes/services/monitoring/main.tf`: -```terraform -"auth.generic_oauth" = { - # ... - scopes = "openid profile email" # Changed from "profile" - # ... -} -``` - -### conda-store -`kubernetes_services/template/modules/kubernetes/services/conda-store/config/conda_store_config.py`: -```python -c.GenericOAuthAuthentication.access_scope = "openid profile email" # Changed from "profile" -``` - -### conda-store internal service URLs -`kubernetes_services/template/modules/kubernetes/services/conda-store/server.tf`: -```terraform -token_url_internal = "http://keycloak-keycloakx-http.${var.namespace}.svc/auth/realms/${var.realm_id}/protocol/openid-connect/token" -realm_api_url_internal = "http://keycloak-keycloakx-http.${var.namespace}.svc/auth/admin/realms/${var.realm_id}" -``` - -## Step 6: Add nebari-bot User Creation - -Since the keycloakx chart doesn't support startup scripts, create `nebari-bot` user via Python in the `post_deploy` hook. - -Update `kubernetes_keycloak/__init__.py`: - -```python -def post_deploy( - self, stage_outputs: Dict[str, Dict[str, Any]], disable_prompt: bool = False -): - """Create nebari-bot user after Keycloak is deployed.""" - from keycloak import KeycloakAdmin - from keycloak.exceptions import KeycloakError - - keycloak_url = f"{stage_outputs['stages/' + self.name]['keycloak_credentials']['value']['url']}/auth/" - nebari_bot_password = stage_outputs["stages/" + self.name]["keycloak_nebari_bot_password"]["value"] - - print("Creating nebari-bot user in Keycloak master realm...") - - max_attempts = 10 - retry_delay = 5 # seconds - - for attempt in range(1, max_attempts + 1): - try: - # Connect as root user - admin = KeycloakAdmin( - keycloak_url, - username="root", - password=self.config.security.keycloak.initial_root_password, - realm_name="master", - client_id="admin-cli", - verify=False, - ) - - # Check if nebari-bot already exists - users = admin.get_users({"username": "nebari-bot"}) - - if users: - print("nebari-bot user already exists") - user_id = users[0]["id"] - else: - # Create nebari-bot user - user_id = admin.create_user({ - "username": "nebari-bot", - "enabled": True, - "credentials": [{ - "type": "password", - "value": nebari_bot_password, - "temporary": False - }] - }) - print("Successfully created nebari-bot user") - - # Assign admin role to nebari-bot user - admin_role = admin.get_realm_role("admin") - user_roles = admin.get_realm_roles_of_user(user_id) - has_admin_role = any(role.get("name") == "admin" for role in user_roles) - - if not has_admin_role: - admin.assign_realm_roles(user_id, [admin_role]) - print("Assigned admin role to nebari-bot user") - else: - print("nebari-bot user already has admin role") - - # Success - break out of retry loop - break - - except KeycloakError as e: - if attempt < max_attempts: - print(f"Attempt {attempt}/{max_attempts} failed: {e}") - print(f"Retrying in {retry_delay} seconds...") - time.sleep(retry_delay) - else: - print(f"Failed to configure nebari-bot user after {max_attempts} attempts: {e}") - sys.exit(1) -``` - -## Step 7: Deploy the Upgrade - -```bash -# Render updated configuration -nebari render -c nebari-config.yaml -o nebari-local - -# Apply Keycloak upgrade -cd nebari-local/stages/05-kubernetes-keycloak -tofu apply - -# Apply Keycloak configuration -cd ../06-kubernetes-keycloak-configuration -tofu apply - -# Apply services (JupyterHub, Grafana, conda-store) -cd ../07-kubernetes-services -tofu apply -``` - -## Step 8: Verify the Upgrade - -Check Keycloak is running: -```bash -kubectl get pods -n dev | grep keycloak -``` - -Verify users were preserved: -```bash -kubectl exec -n dev keycloak-postgres-standalone-postgresql-0 -- env PGPASSWORD= psql -U postgres -d keycloak -c "SELECT username FROM user_entity;" -``` - -Test authentication: -- Access JupyterHub: `https:///hub` -- Access Grafana: `https:///monitoring` -- Access conda-store: `https:///conda-store` - -## Rollback (if needed) - -If the upgrade fails, rollback using Helm: - -```bash -# Check current revision -helm history keycloak -n dev - -# Rollback to previous revision -helm rollback keycloak -n dev -``` - -## Key Differences: keycloak vs keycloakx - -| Feature | keycloak 15.0.2 | keycloakx 7.1.3 | -|---------|-----------------|------------------| -| **Base** | JBoss/WildFly | Quarkus | -| **PostgreSQL** | Included as subchart | Not included (external) | -| **Database Config** | Auto-configured | Manual `database:` section | -| **Startup Scripts** | Supported | Not supported | -| **Service Name** | `keycloak-headless` | `keycloak-keycloakx-http` | -| **OAuth Scopes** | Auto-includes openid | Must explicitly request `openid` | -| **User Creation** | Startup scripts | Python `post_deploy` hook | - -## Troubleshooting - -### "Missing openid scope" error -- Ensure all OAuth clients request `scope = ["openid", "profile", "email"]` - -### "Database not found" error -- Verify `database.hostname` points to correct service name -- Check PostgreSQL is running: `kubectl get pods -n dev | grep postgres` - -### Users deleted after upgrade -- Data is in PostgreSQL persistent volume - check PVC exists -- Verify database migration completed successfully -- Check keycloakx is connecting to the correct database diff --git a/src/_nebari/upgrade.py b/src/_nebari/upgrade.py index c38f03facc..6aba609ea2 100644 --- a/src/_nebari/upgrade.py +++ b/src/_nebari/upgrade.py @@ -2091,9 +2091,8 @@ def _version_specific_upgrade( - OAuth clients now require [green]openid[/green] scope explicitly - Startup scripts replaced with Python post_deploy hooks - After this upgrade step completes, you will need to: - 1. [cyan]nebari render -c {config_filename}[/cyan] to generate updated Terraform - 2. [cyan]nebari deploy -c {config_filename}[/cyan] to apply the changes + After this upgrade step completes, you will need to run: + nebari deploy -c {config_filename}[/cyan] to apply the changes """ ) ) @@ -2138,12 +2137,8 @@ def _version_specific_upgrade( rich.print( "\n[green]✓ Database backup confirmed.[/green] You can now proceed with:" ) - rich.print(f" 1. [cyan]nebari render -c {config_filename}[/cyan]") - rich.print(f" 2. [cyan]nebari deploy -c {config_filename}[/cyan]") - rich.print( - "\n[yellow]For detailed upgrade instructions, see:[/yellow] UPGRADE_STEPS.md\n" - ) - rich.print("Ready to upgrade to Nebari version [green]2025.10.1[/green].") + rich.print(f" [cyan]nebari deploy -c {config_filename}[/cyan]") + rich.print("Ready to upgrade to Nebari version [green]2025.11.1[/green].") return config # User accepted automatic backup - proceed @@ -2251,11 +2246,7 @@ def _version_specific_upgrade( rich.print( "\n[green]✓ Database backup completed.[/green] You can now proceed with:" ) - rich.print(f" 1. [cyan]nebari render -c {config_filename}[/cyan]") - rich.print(f" 2. [cyan]nebari deploy -c {config_filename}[/cyan]") - rich.print( - "\n[yellow]For detailed upgrade instructions, see:[/yellow] UPGRADE_STEPS.md\n" - ) + rich.print(f" [cyan]nebari deploy -c {config_filename}[/cyan]") rich.print("Ready to upgrade to Nebari version [green]2025.11.1[/green].") From d3cf6e32115df4e752a71a9d87b8c2efbd5ec9bb Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Tue, 4 Nov 2025 14:37:59 -0700 Subject: [PATCH 19/43] add templating for keycloak external URL --- .../template/modules/kubernetes/keycloak-helm/main.tf | 5 +++++ .../template/modules/kubernetes/keycloak-helm/values.yaml | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/main.tf b/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/main.tf index e83dc27ff2..fdf02f41f9 100644 --- a/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/main.tf +++ b/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/main.tf @@ -56,6 +56,11 @@ resource "helm_release" "keycloak" { name = "initial_root_password" value = var.initial_root_password } + + set { + name = "external_url" + value = var.external-url + } } # Track changes to values.yaml diff --git a/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml b/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml index 9837e87018..1eea48232f 100644 --- a/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml +++ b/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml @@ -47,7 +47,7 @@ cache: # Environment variables for Keycloak configuration extraEnv: | - name: KC_HOSTNAME - value: "tylertesting42.io" + value: "{{ .Values.external_url }}" - name: KC_HOSTNAME_PATH value: "/auth" - name: KC_HOSTNAME_STRICT From c394eb5b6573a2d4d74f668777327bd4e914d70d Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Tue, 4 Nov 2025 14:41:12 -0700 Subject: [PATCH 20/43] Clean up step naming in restore function --- src/_nebari/stages/kubernetes_keycloak/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/_nebari/stages/kubernetes_keycloak/__init__.py b/src/_nebari/stages/kubernetes_keycloak/__init__.py index bd35ff7ad1..4048408b58 100644 --- a/src/_nebari/stages/kubernetes_keycloak/__init__.py +++ b/src/_nebari/stages/kubernetes_keycloak/__init__.py @@ -646,8 +646,8 @@ def copy_file_to_pod(local_path, remote_path): run_command(verify_cmd) print("✓ Verification complete\n") - # Step 6.5: Clean up temporary file in pod - print(f"Step 6.5: Cleaning up temporary file in pod...") + # Step 7: Clean up temporary file in pod + print(f"Step 7: Cleaning up temporary file in pod...") cleanup_cmd = f"rm -f {remote_backup_path}" run_command(cleanup_cmd, show_output=False) print(f"✓ Removed {remote_backup_path}\n") @@ -656,9 +656,9 @@ def copy_file_to_pod(local_path, remote_path): print("DATABASE RESTORE SUCCESSFUL!") print("=" * 80) - # Step 7: Scale Keycloak back up + # Step 8: Scale Keycloak back up if original_replicas is not None: - print(f"\nStep 7: Scaling Keycloak statefulset back to {original_replicas} replicas...") + print(f"\nStep 8: Scaling Keycloak statefulset back to {original_replicas} replicas...") try: statefulset = apps_api.read_namespaced_stateful_set( name=keycloak_statefulset_name, From b434ffdc126e66e2676d995f974dfb0ee7b12934 Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Tue, 4 Nov 2025 15:06:04 -0700 Subject: [PATCH 21/43] add random password for keycloak --- src/_nebari/stages/kubernetes_keycloak/__init__.py | 10 +++++----- .../template/modules/kubernetes/keycloak-helm/main.tf | 2 +- .../modules/kubernetes/keycloak-helm/values.yaml | 4 +++- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/_nebari/stages/kubernetes_keycloak/__init__.py b/src/_nebari/stages/kubernetes_keycloak/__init__.py index 4048408b58..de0fd706e1 100644 --- a/src/_nebari/stages/kubernetes_keycloak/__init__.py +++ b/src/_nebari/stages/kubernetes_keycloak/__init__.py @@ -435,7 +435,6 @@ def _restore_keycloak_database(self, backup_file): pod_name = "keycloak-postgres-standalone-postgresql-0" db_user = "keycloak" db_name = "keycloak" - db_password = "keycloak" # This should ideally come from config or secret postgres_user = "postgres" # Load kubernetes config @@ -502,16 +501,17 @@ def _restore_keycloak_database(self, backup_file): return raise - # Get postgres superuser password from secret - print("Getting postgres superuser password from secret...") + # Get postgres passwords from secret + print("Getting database passwords from secret...") try: secret_name = "keycloak-postgres-standalone-postgresql" secret = api.read_namespaced_secret(name=secret_name, namespace=namespace) import base64 postgres_password = base64.b64decode(secret.data['postgres-password']).decode('utf-8') - print("✓ Got postgres password\n") + db_password = base64.b64decode(secret.data['password']).decode('utf-8') + print("✓ Got database passwords\n") except Exception as e: - print(f"✗ Error getting postgres password: {e}") + print(f"✗ Error getting database passwords: {e}") print("Skipping database restore") return diff --git a/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/main.tf b/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/main.tf index fdf02f41f9..2e8acea19d 100644 --- a/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/main.tf +++ b/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/main.tf @@ -16,7 +16,7 @@ resource "helm_release" "keycloak_postgresql" { } auth = { username = "keycloak" - password = "keycloak" + # password is auto-generated by Helm chart and stored in secret database = "keycloak" } }) diff --git a/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml b/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml index 1eea48232f..871cdef787 100644 --- a/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml +++ b/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml @@ -18,7 +18,9 @@ database: port: 5432 database: keycloak username: keycloak - password: keycloak + # Password is read from the auto-generated secret + existingSecret: keycloak-postgres-standalone-postgresql + existingSecretPasswordKey: password # Enable database readiness check dbchecker: From 12d9f27f888e5a983c16462851dd8872765791fa Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Tue, 4 Nov 2025 15:27:17 -0700 Subject: [PATCH 22/43] comment out theme code, change metrics volume name back to "data" --- .../kubernetes/keycloak-helm/values.yaml | 134 +++++++++--------- 1 file changed, 69 insertions(+), 65 deletions(-) diff --git a/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml b/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml index 871cdef787..ca0cca3b0c 100644 --- a/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml +++ b/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml @@ -74,18 +74,18 @@ extraInitContainers: | - sh - -c - | - if [ ! -f /providers/keycloak-metrics-spi-7.0.0.jar ]; then - wget https://github.com/aerogear/keycloak-metrics-spi/releases/download/7.0.0/keycloak-metrics-spi-7.0.0.jar -P /providers/ && + if [ ! -f /data/keycloak-metrics-spi-7.0.0.jar ]; then + wget https://github.com/aerogear/keycloak-metrics-spi/releases/download/7.0.0/keycloak-metrics-spi-7.0.0.jar -P /data/ && export SHA256SUM=e7ec72ab1699e57a25b61cb5e3ef1c532ec9858ed6931c1b491d3368f5d007b8 && - if ! (echo "$SHA256SUM /providers/keycloak-metrics-spi-7.0.0.jar" | sha256sum -c) + if ! (echo "$SHA256SUM /data/keycloak-metrics-spi-7.0.0.jar" | sha256sum -c) then echo "Error: Checksum not verified" && exit 1 else - chown 1000:1000 /providers/keycloak-metrics-spi-7.0.0.jar && - chmod 644 /providers/keycloak-metrics-spi-7.0.0.jar + chown 1000:1000 /data/keycloak-metrics-spi-7.0.0.jar && + chmod 644 /data/keycloak-metrics-spi-7.0.0.jar fi else - echo "File /providers/keycloak-metrics-spi-7.0.0.jar already exists. Skipping download." + echo "File /data/keycloak-metrics-spi-7.0.0.jar already exists. Skipping download." fi image: busybox:1.36 name: initialize-spi-metrics-jar @@ -93,70 +93,74 @@ extraInitContainers: | runAsUser: 0 volumeMounts: - name: metrics-plugin - mountPath: /providers - {{- if .Values.customThemes.enabled }} - - env: - - name: GIT_SYNC_REPO - value: {{ .Values.customThemes.repository }} - - name: GIT_SYNC_BRANCH - value: {{ .Values.customThemes.branch }} - - name: GIT_SYNC_ONE_TIME - value: "true" - - name: GIT_SYNC_GROUP_WRITE - value: "true" - - name: GIT_SYNC_ROOT - value: /opt/data/custom-themes - - name: GIT_SYNC_DEST - value: themes - - name: GIT_SYNC_SSH - value: "false" - image: registry.k8s.io/git-sync/git-sync:v4.3.0 - imagePullPolicy: IfNotPresent - name: keycloak-git-sync - resources: {} - securityContext: - runAsGroup: 1000 - runAsUser: 0 - terminationMessagePath: /dev/termination-log - terminationMessagePolicy: File - volumeMounts: - - mountPath: /opt/data/custom-themes - name: custom-themes - - command: - - sh - - -c - - | - if [ -d /opt/data/custom-themes/themes ]; then - echo 'Copying custom themes from /opt/data/custom-themes/themes to /themes' - cp -r /opt/data/custom-themes/themes/* /themes/ - else - echo 'No custom themes found in /opt/data/custom-themes' - fi - image: busybox:1.36 - name: move-custom-themes - securityContext: - runAsUser: 0 - volumeMounts: - - mountPath: /opt/data/custom-themes - name: custom-themes - - mountPath: /themes - name: theme-data - {{- end }} + mountPath: /data +# Custom themes are not supported initially with v26 upgrade + # {{- if .Values.customThemes.enabled }} + # - env: + # - name: GIT_SYNC_REPO + # value: {{ .Values.customThemes.repository }} + # - name: GIT_SYNC_BRANCH + # value: {{ .Values.customThemes.branch }} + # - name: GIT_SYNC_ONE_TIME + # value: "true" + # - name: GIT_SYNC_GROUP_WRITE + # value: "true" + # - name: GIT_SYNC_ROOT + # value: /opt/data/custom-themes + # - name: GIT_SYNC_DEST + # value: themes + # - name: GIT_SYNC_SSH + # value: "false" + # image: registry.k8s.io/git-sync/git-sync:v4.3.0 + # imagePullPolicy: IfNotPresent + # name: keycloak-git-sync + # resources: {} + # securityContext: + # runAsGroup: 1000 + # runAsUser: 0 + # terminationMessagePath: /dev/termination-log + # terminationMessagePolicy: File + # volumeMounts: + # - mountPath: /opt/data/custom-themes + # name: custom-themes + # - command: + # - sh + # - -c + # - | + # if [ -d /opt/data/custom-themes/themes ]; then + # echo 'Copying custom themes from /opt/data/custom-themes/themes to /themes' + # cp -r /opt/data/custom-themes/themes/* /themes/ + # else + # echo 'No custom themes found in /opt/data/custom-themes' + # fi + # image: busybox:1.36 + # name: move-custom-themes + # securityContext: + # runAsUser: 0 + # volumeMounts: + # - mountPath: /opt/data/custom-themes + # name: custom-themes + # - mountPath: /themes + # name: theme-data + # {{- end }} extraVolumeMounts: | - name: metrics-plugin mountPath: /opt/keycloak/providers/ - {{- if .Values.customThemes.enabled }} - - mountPath: /opt/keycloak/themes - name: theme-data - {{- end }} +# Custom themes are not supported initially with v26 upgrade + # {{- if .Values.customThemes.enabled }} + # - mountPath: /opt/keycloak/themes + # name: theme-data + # {{- end }} extraVolumes: | - name: metrics-plugin emptyDir: {} - {{- if .Values.customThemes.enabled }} - - name: custom-themes - emptyDir: {} - - name: theme-data - emptyDir: {} - {{- end }} + +# Custom themes are not supported initially with v26 upgrade + # {{- if .Values.customThemes.enabled }} + # - name: custom-themes + # emptyDir: {} + # - name: theme-data + # emptyDir: {} + # {{- end }} From 092efad2b5190d50774fe009241f2835837e4fa4 Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Tue, 4 Nov 2025 15:33:20 -0700 Subject: [PATCH 23/43] remove unneeded clustering config --- src/_nebari/stages/kubernetes_keycloak/__init__.py | 1 - .../template/modules/kubernetes/keycloak-helm/values.yaml | 4 ---- 2 files changed, 5 deletions(-) diff --git a/src/_nebari/stages/kubernetes_keycloak/__init__.py b/src/_nebari/stages/kubernetes_keycloak/__init__.py index de0fd706e1..80d0be662a 100644 --- a/src/_nebari/stages/kubernetes_keycloak/__init__.py +++ b/src/_nebari/stages/kubernetes_keycloak/__init__.py @@ -633,7 +633,6 @@ def copy_file_to_pod(local_path, remote_path): print("Note: Warnings about 'public' schema permissions are expected and harmless.") print("=" * 80) - # Use -f flag to read from file instead of stdin - much more reliable! restore_cmd = f"env PGPASSWORD={db_password} psql -U {db_user} -d {db_name} --set ON_ERROR_STOP=off -f {remote_backup_path}" run_command(restore_cmd) diff --git a/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml b/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml index ca0cca3b0c..c2a6d1f5e5 100644 --- a/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml +++ b/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml @@ -42,10 +42,6 @@ proxy: enabled: true mode: edge -# Cache configuration - use jdbc-ping for clustering via database -cache: - stack: jdbc-ping - # Environment variables for Keycloak configuration extraEnv: | - name: KC_HOSTNAME From f92a639a1d1566777d148597da20d932796e8aac Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Tue, 4 Nov 2025 16:35:24 -0700 Subject: [PATCH 24/43] refine upgrade script language --- src/_nebari/upgrade.py | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/src/_nebari/upgrade.py b/src/_nebari/upgrade.py index 6aba609ea2..1fe7667e1e 100644 --- a/src/_nebari/upgrade.py +++ b/src/_nebari/upgrade.py @@ -2092,7 +2092,7 @@ def _version_specific_upgrade( - Startup scripts replaced with Python post_deploy hooks After this upgrade step completes, you will need to run: - nebari deploy -c {config_filename}[/cyan] to apply the changes + [cyan]nebari deploy -c {config_filename}[/cyan] to apply the changes """ ) ) @@ -2100,20 +2100,20 @@ def _version_specific_upgrade( kubernetes.config.load_kube_config() except kubernetes.config.config_exception.ConfigException: rich.print( - "[red bold]No default kube configuration file was found. Make sure to [link=https://www.nebari.dev/docs/how-tos/debug-nebari#generating-the-kubeconfig]have one pointing to your Nebari cluster[/link] before upgrading.[/red bold]" + "[red bold]No default kube configuration file was found. Make sure to have one pointing to your cluster before upgrading.[/red bold] see docs: https://www.nebari.dev/docs/how-tos/debug-nebari#generating-the-kubeconfig" ) exit() current_kube_context = kubernetes.config.list_kube_config_contexts()[1] cluster_name = current_kube_context["context"]["cluster"] - rich.print( - f"\nThe following backup will be attempted on the [cyan bold]{cluster_name}[/cyan bold] cluster.\n" - ) # Kubernetes config available - offer automatic backup rich.print( "\n[green]✓[/green] Kubernetes configuration detected. Nebari can backup the database automatically." ) + rich.print( + f"\nThe following backup will be attempted on the [cyan bold]{cluster_name}[/cyan bold] cluster.\n" + ) if not (kwargs.get("attempt_fixes", False) or Confirm.ask( "\nWould you like Nebari to backup the Keycloak database for you now?", @@ -2122,11 +2122,11 @@ def _version_specific_upgrade( # User declined automatic backup rich.print("\n[yellow]You chose not to backup automatically.[/yellow]") rich.print( - "[yellow]Please ensure you have a backup before proceeding with deployment.[/yellow]" + "[yellow]Please ensure you have a backup if desired before proceeding with deployment.[/yellow]" ) if not Confirm.ask( - "\nHave you successfully backed up your Keycloak database?", + "\nHave you successfully backed up your Keycloak database/would you like to proceed without backup?", default=False, ): rich.print( @@ -2135,7 +2135,7 @@ def _version_specific_upgrade( exit(1) rich.print( - "\n[green]✓ Database backup confirmed.[/green] You can now proceed with:" + "\nYou can now proceed with:" ) rich.print(f" [cyan]nebari deploy -c {config_filename}[/cyan]") rich.print("Ready to upgrade to Nebari version [green]2025.11.1[/green].") @@ -2159,17 +2159,11 @@ def _version_specific_upgrade( rich.print( f"\n[yellow]Pod keycloak-postgresql-0 not found in namespace {namespace}.[/yellow]" ) - if not Confirm.ask( - "\nDo you want to skip the backup and continue with the upgrade?", - default=False, - ): - rich.print( - "[red]Upgrade cancelled. Please verify your Keycloak deployment.[/red]" - ) - exit(1) + rich.print( + "\nUpgrade cancelled due to keycloak backup failure. Please verify your Kube config is pointing at your Nebari cluster" + ) - rich.print("[yellow]Skipping backup. Proceeding with upgrade...[/yellow]") - return config + exit(1) # Other API errors rich.print(f"[red]✗ Error checking for PostgreSQL pod:[/red] {e}") From d75d8ebc0986aab71d8a86229b31e5fd213b4a13 Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Tue, 4 Nov 2025 16:48:24 -0700 Subject: [PATCH 25/43] add back required cache mechanism --- .../template/modules/kubernetes/keycloak-helm/values.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml b/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml index c2a6d1f5e5..750fc736e2 100644 --- a/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml +++ b/src/_nebari/stages/kubernetes_keycloak/template/modules/kubernetes/keycloak-helm/values.yaml @@ -30,6 +30,11 @@ dbchecker: command: - "/opt/keycloak/bin/kc.sh" +# Without this configured get the below error on startup: +# "Error while trying to create a channel using the specified configuration 'kubernetes'" +cache: + stack: jdbc-ping + args: - "start" From 5095098dad128a1bc8dcf0a54b8ee8a233de9cce Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 5 Nov 2025 22:07:39 +0000 Subject: [PATCH 26/43] [pre-commit.ci] Apply automatic pre-commit fixes --- .../stages/kubernetes_keycloak/__init__.py | 125 ++++++++++-------- src/_nebari/upgrade.py | 51 ++++--- 2 files changed, 97 insertions(+), 79 deletions(-) diff --git a/src/_nebari/stages/kubernetes_keycloak/__init__.py b/src/_nebari/stages/kubernetes_keycloak/__init__.py index 80d0be662a..36067edd97 100644 --- a/src/_nebari/stages/kubernetes_keycloak/__init__.py +++ b/src/_nebari/stages/kubernetes_keycloak/__init__.py @@ -339,15 +339,19 @@ def post_deploy( self._restore_keycloak_database(backup_file) # Rename backup file to prevent re-running restore on subsequent deploys - backup_file.rename(backup_file.with_suffix('.sql.restored')) - print(f"\n✓ Renamed backup file to {backup_file.with_suffix('.sql.restored')}") + backup_file.rename(backup_file.with_suffix(".sql.restored")) + print( + f"\n✓ Renamed backup file to {backup_file.with_suffix('.sql.restored')}" + ) print("=" * 80 + "\n") else: print("No Keycloak database backup found, skipping restore") # Step 2: Create nebari-bot user keycloak_url = f"{stage_outputs['stages/' + self.name]['keycloak_credentials']['value']['url']}/auth/" - nebari_bot_password = stage_outputs["stages/" + self.name]["keycloak_nebari_bot_password"]["value"] + nebari_bot_password = stage_outputs["stages/" + self.name][ + "keycloak_nebari_bot_password" + ]["value"] print("Creating nebari-bot user in Keycloak master realm...") max_attempts = 10 @@ -375,22 +379,24 @@ def post_deploy( # Reset password to ensure it matches the expected value # (Keycloak doesn't allow reading passwords for comparison) admin.set_user_password( - user_id=user_id, - password=nebari_bot_password, - temporary=False + user_id=user_id, password=nebari_bot_password, temporary=False ) print("Updated nebari-bot password to match expected value") else: # Create nebari-bot user - user_id = admin.create_user({ - "username": "nebari-bot", - "enabled": True, - "credentials": [{ - "type": "password", - "value": nebari_bot_password, - "temporary": False - }] - }) + user_id = admin.create_user( + { + "username": "nebari-bot", + "enabled": True, + "credentials": [ + { + "type": "password", + "value": nebari_bot_password, + "temporary": False, + } + ], + } + ) print("Successfully created nebari-bot user") # Assign admin role to nebari-bot user @@ -416,7 +422,9 @@ def post_deploy( print(f"Retrying in {retry_delay} seconds...") time.sleep(retry_delay) else: - print(f"Failed to configure nebari-bot user after {max_attempts} attempts: {e}") + print( + f"Failed to configure nebari-bot user after {max_attempts} attempts: {e}" + ) sys.exit(1) def _restore_keycloak_database(self, backup_file): @@ -443,12 +451,13 @@ def _restore_keycloak_database(self, backup_file): apps_api = kubernetes.client.AppsV1Api() # Step 0: Scale down Keycloak to prevent active database connections - print(f"Step 0: Scaling down Keycloak statefulset '{keycloak_statefulset_name}' to 0 replicas...") + print( + f"Step 0: Scaling down Keycloak statefulset '{keycloak_statefulset_name}' to 0 replicas..." + ) try: # Get current statefulset statefulset = apps_api.read_namespaced_stateful_set( - name=keycloak_statefulset_name, - namespace=namespace + name=keycloak_statefulset_name, namespace=namespace ) original_replicas = statefulset.spec.replicas print(f" Current replicas: {original_replicas}") @@ -456,31 +465,31 @@ def _restore_keycloak_database(self, backup_file): # Scale to 0 statefulset.spec.replicas = 0 apps_api.patch_namespaced_stateful_set( - name=keycloak_statefulset_name, - namespace=namespace, - body=statefulset + name=keycloak_statefulset_name, namespace=namespace, body=statefulset ) - print(f" Scaled to 0 replicas") + print(" Scaled to 0 replicas") # Wait for pods to terminate - print(f" Waiting for Keycloak pods to terminate...") + print(" Waiting for Keycloak pods to terminate...") max_wait = 60 # seconds wait_interval = 2 elapsed = 0 while elapsed < max_wait: pods = api.list_namespaced_pod( namespace=namespace, - label_selector=f"app.kubernetes.io/name=keycloak" + label_selector="app.kubernetes.io/name=keycloak", ) if len(pods.items) == 0: - print(f" ✓ All Keycloak pods terminated") + print(" ✓ All Keycloak pods terminated") break print(f" Still waiting... ({len(pods.items)} pods remaining)") time.sleep(wait_interval) elapsed += wait_interval if elapsed >= max_wait: - print(f" ⚠ Warning: Timed out waiting for pods to terminate, proceeding anyway") + print( + " ⚠ Warning: Timed out waiting for pods to terminate, proceeding anyway" + ) print("✓ Keycloak scaled down\n") @@ -493,7 +502,7 @@ def _restore_keycloak_database(self, backup_file): print(f"Checking if pod '{pod_name}' exists in namespace '{namespace}'...") try: api.read_namespaced_pod(name=pod_name, namespace=namespace) - print(f"✓ Pod found\n") + print("✓ Pod found\n") except kubernetes.client.exceptions.ApiException as e: if e.status == 404: print(f"✗ Pod '{pod_name}' not found in namespace '{namespace}'") @@ -507,8 +516,11 @@ def _restore_keycloak_database(self, backup_file): secret_name = "keycloak-postgres-standalone-postgresql" secret = api.read_namespaced_secret(name=secret_name, namespace=namespace) import base64 - postgres_password = base64.b64decode(secret.data['postgres-password']).decode('utf-8') - db_password = base64.b64decode(secret.data['password']).decode('utf-8') + + postgres_password = base64.b64decode( + secret.data["postgres-password"] + ).decode("utf-8") + db_password = base64.b64decode(secret.data["password"]).decode("utf-8") print("✓ Got database passwords\n") except Exception as e: print(f"✗ Error getting database passwords: {e}") @@ -524,12 +536,12 @@ def run_command(command, show_output=True): api.connect_get_namespaced_pod_exec, name=pod_name, namespace=namespace, - command=['/bin/sh', '-c', command], + command=["/bin/sh", "-c", command], stderr=True, stdin=False, stdout=True, tty=False, - _preload_content=False + _preload_content=False, ) stdout_lines = [] @@ -541,17 +553,17 @@ def run_command(command, show_output=True): data = resp.read_stdout() stdout_lines.append(data) if show_output: - print(data, end='') + print(data, end="") sys.stdout.flush() if resp.peek_stderr(): data = resp.read_stderr() stderr_lines.append(data) if show_output: - print(data, end='', file=sys.stderr) + print(data, end="", file=sys.stderr) sys.stderr.flush() resp.close() - return ''.join(stdout_lines), ''.join(stderr_lines) + return "".join(stdout_lines), "".join(stderr_lines) # Helper function to copy file to pod using tar def copy_file_to_pod(local_path, remote_path): @@ -564,7 +576,7 @@ def copy_file_to_pod(local_path, remote_path): # Create tar archive in memory tar_buffer = BytesIO() - with tarfile.open(fileobj=tar_buffer, mode='w') as tar: + with tarfile.open(fileobj=tar_buffer, mode="w") as tar: tar.add(str(local_path), arcname=os.path.basename(remote_path)) tar_buffer.seek(0) @@ -572,7 +584,7 @@ def copy_file_to_pod(local_path, remote_path): # Extract tar in pod remote_dir = os.path.dirname(remote_path) - extract_cmd = ['tar', 'xf', '-', '-C', remote_dir or '/'] + extract_cmd = ["tar", "xf", "-", "-C", remote_dir or "/"] resp = stream( api.connect_get_namespaced_pod_exec, @@ -583,19 +595,19 @@ def copy_file_to_pod(local_path, remote_path): stdin=True, stdout=True, tty=False, - _preload_content=False + _preload_content=False, ) # Write tar data in chunks chunk_size = 1024 * 1024 # 1MB chunks for i in range(0, len(tar_data), chunk_size): - chunk = tar_data[i:i + chunk_size] + chunk = tar_data[i : i + chunk_size] resp.write_stdin(chunk) - resp.write_stdin('') # Signal EOF + resp.write_stdin("") # Signal EOF resp.close() - print(f" ✓ File copied successfully") + print(" ✓ File copied successfully") # Step 1: Drop existing database print(f"Step 1: Dropping database '{db_name}' (if exists)...") @@ -622,15 +634,17 @@ def copy_file_to_pod(local_path, remote_path): print("✓ Privileges granted\n") # Step 4: Copy backup file to pod - print(f"Step 4: Copying backup file to pod...") + print("Step 4: Copying backup file to pod...") remote_backup_path = "/tmp/keycloak-backup.sql" copy_file_to_pod(Path(backup_file), remote_backup_path) - print(f"✓ Backup file copied to pod\n") + print("✓ Backup file copied to pod\n") # Step 5: Restore the database from file - print(f"Step 5: Restoring database from backup...") + print("Step 5: Restoring database from backup...") print("This may take a few moments. Output will be shown below:\n") - print("Note: Warnings about 'public' schema permissions are expected and harmless.") + print( + "Note: Warnings about 'public' schema permissions are expected and harmless." + ) print("=" * 80) restore_cmd = f"env PGPASSWORD={db_password} psql -U {db_user} -d {db_name} --set ON_ERROR_STOP=off -f {remote_backup_path}" @@ -640,13 +654,13 @@ def copy_file_to_pod(local_path, remote_path): print("\n✓ Restore completed!\n") # Step 6: Verify the restore - print(f"Step 6: Verifying restore by checking user count...") + print("Step 6: Verifying restore by checking user count...") verify_cmd = f"env PGPASSWORD={db_password} psql -U {db_user} -d {db_name} -c 'SELECT count(*) FROM user_entity;'" run_command(verify_cmd) print("✓ Verification complete\n") # Step 7: Clean up temporary file in pod - print(f"Step 7: Cleaning up temporary file in pod...") + print("Step 7: Cleaning up temporary file in pod...") cleanup_cmd = f"rm -f {remote_backup_path}" run_command(cleanup_cmd, show_output=False) print(f"✓ Removed {remote_backup_path}\n") @@ -657,23 +671,28 @@ def copy_file_to_pod(local_path, remote_path): # Step 8: Scale Keycloak back up if original_replicas is not None: - print(f"\nStep 8: Scaling Keycloak statefulset back to {original_replicas} replicas...") + print( + f"\nStep 8: Scaling Keycloak statefulset back to {original_replicas} replicas..." + ) try: statefulset = apps_api.read_namespaced_stateful_set( - name=keycloak_statefulset_name, - namespace=namespace + name=keycloak_statefulset_name, namespace=namespace ) statefulset.spec.replicas = original_replicas apps_api.patch_namespaced_stateful_set( name=keycloak_statefulset_name, namespace=namespace, - body=statefulset + body=statefulset, ) print(f"✓ Keycloak scaled back to {original_replicas} replicas") - print(" Keycloak pods will start connecting to the restored database\n") + print( + " Keycloak pods will start connecting to the restored database\n" + ) except kubernetes.client.exceptions.ApiException as e: print(f"⚠ Warning: Could not scale up Keycloak statefulset: {e}") - print(f" You may need to manually scale it back up: kubectl scale statefulset {keycloak_statefulset_name} --replicas={original_replicas} -n {namespace}\n") + print( + f" You may need to manually scale it back up: kubectl scale statefulset {keycloak_statefulset_name} --replicas={original_replicas} -n {namespace}\n" + ) @contextlib.contextmanager def deploy( diff --git a/src/_nebari/upgrade.py b/src/_nebari/upgrade.py index 1fe7667e1e..5381be8aae 100644 --- a/src/_nebari/upgrade.py +++ b/src/_nebari/upgrade.py @@ -2101,7 +2101,7 @@ def _version_specific_upgrade( except kubernetes.config.config_exception.ConfigException: rich.print( "[red bold]No default kube configuration file was found. Make sure to have one pointing to your cluster before upgrading.[/red bold] see docs: https://www.nebari.dev/docs/how-tos/debug-nebari#generating-the-kubeconfig" - ) + ) exit() current_kube_context = kubernetes.config.list_kube_config_contexts()[1] @@ -2115,10 +2115,13 @@ def _version_specific_upgrade( f"\nThe following backup will be attempted on the [cyan bold]{cluster_name}[/cyan bold] cluster.\n" ) - if not (kwargs.get("attempt_fixes", False) or Confirm.ask( - "\nWould you like Nebari to backup the Keycloak database for you now?", - default=True, - )): + if not ( + kwargs.get("attempt_fixes", False) + or Confirm.ask( + "\nWould you like Nebari to backup the Keycloak database for you now?", + default=True, + ) + ): # User declined automatic backup rich.print("\n[yellow]You chose not to backup automatically.[/yellow]") rich.print( @@ -2134,9 +2137,7 @@ def _version_specific_upgrade( ) exit(1) - rich.print( - "\nYou can now proceed with:" - ) + rich.print("\nYou can now proceed with:") rich.print(f" [cyan]nebari deploy -c {config_filename}[/cyan]") rich.print("Ready to upgrade to Nebari version [green]2025.11.1[/green].") return config @@ -2174,31 +2175,31 @@ def _version_specific_upgrade( try: exec_command = [ - '/bin/sh', - '-c', - 'env PGPASSWORD=keycloak pg_dump -U keycloak -d keycloak' + "/bin/sh", + "-c", + "env PGPASSWORD=keycloak pg_dump -U keycloak -d keycloak", ] resp = stream( api_instance.connect_get_namespaced_pod_exec, - name='keycloak-postgresql-0', + name="keycloak-postgresql-0", namespace=namespace, command=exec_command, stderr=True, stdin=False, stdout=True, tty=False, - _preload_content=False + _preload_content=False, ) # Write the output to the backup file error_output = [] - with open(backup_file, 'wb') as f: + with open(backup_file, "wb") as f: while resp.is_open(): resp.update(timeout=1) if resp.peek_stdout(): stdout_data = resp.read_stdout() - f.write(stdout_data.encode('utf-8')) + f.write(stdout_data.encode("utf-8")) if resp.peek_stderr(): stderr_data = resp.read_stderr() if stderr_data: @@ -2208,29 +2209,27 @@ def _version_specific_upgrade( # Check if backup was successful if not backup_file.exists() or backup_file.stat().st_size == 0: - error_msg = '\n'.join(error_output) if error_output else "Backup file is empty or doesn't exist" + error_msg = ( + "\n".join(error_output) + if error_output + else "Backup file is empty or doesn't exist" + ) rich.print(f"[red]✗ Backup failed:[/red]\n{error_msg}") exit(1) # Backup succeeded file_size = backup_file.stat().st_size - rich.print( - f"[green]✓ Backup successful![/green] Saved to {backup_file}" - ) - rich.print( - f"[green] Backup size:[/green] {file_size / 1024:.2f} KB" - ) + rich.print(f"[green]✓ Backup successful![/green] Saved to {backup_file}") + rich.print(f"[green] Backup size:[/green] {file_size / 1024:.2f} KB") # Show any warnings from stderr (pg_dump often writes info to stderr) if error_output: for err in error_output: - if 'NOTICE' in err or 'WARNING' in err: + if "NOTICE" in err or "WARNING" in err: rich.print(f"[yellow]{err.strip()}[/yellow]") except kubernetes.client.exceptions.ApiException as api_err: - rich.print( - f"[red]✗ Kubernetes API error during backup:[/red]\n{api_err}" - ) + rich.print(f"[red]✗ Kubernetes API error during backup:[/red]\n{api_err}") exit(1) except Exception as e: rich.print(f"[red]✗ Unexpected error during backup:[/red] {e}") From aca9fd2c54a00caed0b33a9a24e04d21fcc2ca54 Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Wed, 5 Nov 2025 15:43:49 -0700 Subject: [PATCH 27/43] corrections for precommit warnings --- .../stages/kubernetes_keycloak/__init__.py | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/_nebari/stages/kubernetes_keycloak/__init__.py b/src/_nebari/stages/kubernetes_keycloak/__init__.py index 36067edd97..7e8a2f1da3 100644 --- a/src/_nebari/stages/kubernetes_keycloak/__init__.py +++ b/src/_nebari/stages/kubernetes_keycloak/__init__.py @@ -2,6 +2,7 @@ import enum import json import os +from pathlib import Path import secrets import string import sys @@ -10,6 +11,9 @@ from pydantic import Field, ValidationInfo, field_validator, model_validator +from keycloak import KeycloakAdmin +from keycloak.exceptions import KeycloakError + from _nebari.stages.base import NebariTerraformStage from _nebari.stages.tf_objects import ( NebariHelmProvider, @@ -319,12 +323,7 @@ def post_deploy( self, stage_outputs: Dict[str, Dict[str, Any]], disable_prompt: bool = False ): """Restore Keycloak database (if backup exists) and create nebari-bot user after Keycloak is deployed.""" - from pathlib import Path - import kubernetes - from keycloak import KeycloakAdmin - from keycloak.exceptions import KeycloakError - from kubernetes.stream import stream # Step 1: Restore database if backup exists backup_file = Path(self.output_directory) / "keycloak-backup.sql" @@ -427,7 +426,7 @@ def post_deploy( ) sys.exit(1) - def _restore_keycloak_database(self, backup_file): + def _restore_keycloak_database(self, backup_file: Path): """Restore PostgreSQL database from backup file using Kubernetes exec.""" import base64 import tarfile @@ -528,7 +527,7 @@ def _restore_keycloak_database(self, backup_file): return # Helper function to run commands in pod - def run_command(command, show_output=True): + def run_command(command: str, show_output: bool = True): print(f" Running: {command}") sys.stdout.flush() @@ -566,25 +565,26 @@ def run_command(command, show_output=True): return "".join(stdout_lines), "".join(stderr_lines) # Helper function to copy file to pod using tar - def copy_file_to_pod(local_path, remote_path): + def copy_file_to_pod(local_path: Path, remote_path: Path): """ Copy a file to pod using tar (similar to kubectl cp). More reliable than stdin streaming for large files. """ - print(f" Copying {local_path.name} to pod:{remote_path}") + print(f" Copying {local_path.name} to pod:{remote_path.name}") print(f" File size: {local_path.stat().st_size / 1024:.2f} KB") # Create tar archive in memory tar_buffer = BytesIO() - with tarfile.open(fileobj=tar_buffer, mode="w") as tar: - tar.add(str(local_path), arcname=os.path.basename(remote_path)) + with tarfile.open(fileobj=tar_buffer, mode='w') as tar: + tar.add(str(local_path), arcname=remote_path.name) tar_buffer.seek(0) tar_data = tar_buffer.getvalue() # Extract tar in pod - remote_dir = os.path.dirname(remote_path) - extract_cmd = ["tar", "xf", "-", "-C", remote_dir or "/"] + remote_dir = str(remote_path.parent) + extract_cmd = ['tar', 'xf', '-', '-C', remote_dir or '/'] + resp = stream( api.connect_get_namespaced_pod_exec, @@ -636,8 +636,8 @@ def copy_file_to_pod(local_path, remote_path): # Step 4: Copy backup file to pod print("Step 4: Copying backup file to pod...") remote_backup_path = "/tmp/keycloak-backup.sql" - copy_file_to_pod(Path(backup_file), remote_backup_path) - print("✓ Backup file copied to pod\n") + copy_file_to_pod(Path(backup_file), Path(remote_backup_path)) + print(f"✓ Backup file copied to pod\n") # Step 5: Restore the database from file print("Step 5: Restoring database from backup...") From ce565c07e823d40fd02a71d2ebf5aee3aa623841 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 5 Nov 2025 22:47:26 +0000 Subject: [PATCH 28/43] [pre-commit.ci] Apply automatic pre-commit fixes --- src/_nebari/stages/kubernetes_keycloak/__init__.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/_nebari/stages/kubernetes_keycloak/__init__.py b/src/_nebari/stages/kubernetes_keycloak/__init__.py index 7e8a2f1da3..07997b6584 100644 --- a/src/_nebari/stages/kubernetes_keycloak/__init__.py +++ b/src/_nebari/stages/kubernetes_keycloak/__init__.py @@ -2,17 +2,16 @@ import enum import json import os -from pathlib import Path import secrets import string import sys import time +from pathlib import Path from typing import Any, Dict, List, Optional, Type, Union -from pydantic import Field, ValidationInfo, field_validator, model_validator - from keycloak import KeycloakAdmin from keycloak.exceptions import KeycloakError +from pydantic import Field, ValidationInfo, field_validator, model_validator from _nebari.stages.base import NebariTerraformStage from _nebari.stages.tf_objects import ( @@ -324,7 +323,6 @@ def post_deploy( ): """Restore Keycloak database (if backup exists) and create nebari-bot user after Keycloak is deployed.""" - # Step 1: Restore database if backup exists backup_file = Path(self.output_directory) / "keycloak-backup.sql" @@ -575,7 +573,7 @@ def copy_file_to_pod(local_path: Path, remote_path: Path): # Create tar archive in memory tar_buffer = BytesIO() - with tarfile.open(fileobj=tar_buffer, mode='w') as tar: + with tarfile.open(fileobj=tar_buffer, mode="w") as tar: tar.add(str(local_path), arcname=remote_path.name) tar_buffer.seek(0) @@ -583,8 +581,7 @@ def copy_file_to_pod(local_path: Path, remote_path: Path): # Extract tar in pod remote_dir = str(remote_path.parent) - extract_cmd = ['tar', 'xf', '-', '-C', remote_dir or '/'] - + extract_cmd = ["tar", "xf", "-", "-C", remote_dir or "/"] resp = stream( api.connect_get_namespaced_pod_exec, @@ -637,7 +634,7 @@ def copy_file_to_pod(local_path: Path, remote_path: Path): print("Step 4: Copying backup file to pod...") remote_backup_path = "/tmp/keycloak-backup.sql" copy_file_to_pod(Path(backup_file), Path(remote_backup_path)) - print(f"✓ Backup file copied to pod\n") + print("✓ Backup file copied to pod\n") # Step 5: Restore the database from file print("Step 5: Restoring database from backup...") From ec1ab30729ba18f50635f49a413533977463b436 Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Mon, 10 Nov 2025 15:06:26 -0700 Subject: [PATCH 29/43] make firstname and lastname optional in keycloak --- .../template/main.tf | 123 ++++++++++++++++++ .../template/providers.tf | 1 + .../template/versions.tf | 4 +- 3 files changed, 126 insertions(+), 2 deletions(-) diff --git a/src/_nebari/stages/kubernetes_keycloak_configuration/template/main.tf b/src/_nebari/stages/kubernetes_keycloak_configuration/template/main.tf index 8292b322c4..2220eaa58a 100644 --- a/src/_nebari/stages/kubernetes_keycloak_configuration/template/main.tf +++ b/src/_nebari/stages/kubernetes_keycloak_configuration/template/main.tf @@ -138,3 +138,126 @@ resource "keycloak_realm_events" "realm_events" { "jboss-logging", "metrics-listener", ] } + +resource "keycloak_realm_user_profile" "userprofile" { + realm_id = keycloak_realm.main.id + + unmanaged_attribute_policy = "ENABLED" + + # Username attribute + attribute { + name = "username" + display_name = "$${username}" + multi_valued = false + + permissions { + view = ["admin", "user"] + edit = ["admin", "user"] + } + + validator { + name = "length" + config = { + min = "3" + max = "255" + } + } + + validator { + name = "username-prohibited-characters" + } + + validator { + name = "up-username-not-idn-homograph" + } + } + + # Email attribute + attribute { + name = "email" + display_name = "$${email}" + multi_valued = false + + required_for_roles = ["user"] + + permissions { + view = ["admin", "user"] + edit = ["admin", "user"] + } + + validator { + name = "email" + } + + validator { + name = "length" + config = { + max = "255" + } + } + } + + # First Name attribute + attribute { + name = "firstName" + display_name = "$${firstName}" + multi_valued = false + + #Below makes this attribute optional + required_for_roles = [] + required_for_scopes = [] + + permissions { + view = ["admin", "user"] + edit = ["admin", "user"] + } + + validator { + name = "length" + config = { + max = "255" + } + } + + validator { + name = "person-name-prohibited-characters" + } + + annotations = {} + } + + # Last Name attribute + attribute { + name = "lastName" + display_name = "$${lastName}" + multi_valued = false + + required_for_roles = [] + required_for_scopes = [] + + permissions { + view = ["admin", "user"] + edit = ["admin", "user"] + } + + validator { + name = "length" + config = { + max = "255" + } + } + + validator { + name = "person-name-prohibited-characters" + } + + annotations = {} + } + + # Group + group { + name = "user-metadata" + display_header = "User metadata" + display_description = "Attributes, which refer to user metadata" + } + } diff --git a/src/_nebari/stages/kubernetes_keycloak_configuration/template/providers.tf b/src/_nebari/stages/kubernetes_keycloak_configuration/template/providers.tf index ed8e8eeb5c..9737f08fad 100644 --- a/src/_nebari/stages/kubernetes_keycloak_configuration/template/providers.tf +++ b/src/_nebari/stages/kubernetes_keycloak_configuration/template/providers.tf @@ -1,3 +1,4 @@ provider "keycloak" { + base_path = "/auth" tls_insecure_skip_verify = true } diff --git a/src/_nebari/stages/kubernetes_keycloak_configuration/template/versions.tf b/src/_nebari/stages/kubernetes_keycloak_configuration/template/versions.tf index d3f87478e2..084592ba8f 100644 --- a/src/_nebari/stages/kubernetes_keycloak_configuration/template/versions.tf +++ b/src/_nebari/stages/kubernetes_keycloak_configuration/template/versions.tf @@ -9,8 +9,8 @@ terraform { version = "2.35.1" } keycloak = { - source = "mrparkers/keycloak" - version = "3.7.0" + source = "keycloak/keycloak" + version = "5.5.0" } } required_version = ">= 1.0" From 534322b925f1463fce1eb853a81bf2376d61c63c Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Mon, 10 Nov 2025 15:42:53 -0700 Subject: [PATCH 30/43] pre-commit edits for terraform fmt --- .../template/main.tf | 176 +++++++++--------- .../template/versions.tf | 2 +- 2 files changed, 89 insertions(+), 89 deletions(-) diff --git a/src/_nebari/stages/kubernetes_keycloak_configuration/template/main.tf b/src/_nebari/stages/kubernetes_keycloak_configuration/template/main.tf index 2220eaa58a..42268d28c8 100644 --- a/src/_nebari/stages/kubernetes_keycloak_configuration/template/main.tf +++ b/src/_nebari/stages/kubernetes_keycloak_configuration/template/main.tf @@ -142,122 +142,122 @@ resource "keycloak_realm_events" "realm_events" { resource "keycloak_realm_user_profile" "userprofile" { realm_id = keycloak_realm.main.id - unmanaged_attribute_policy = "ENABLED" + unmanaged_attribute_policy = "ENABLED" - # Username attribute - attribute { - name = "username" - display_name = "$${username}" - multi_valued = false + # Username attribute + attribute { + name = "username" + display_name = "$${username}" + multi_valued = false - permissions { - view = ["admin", "user"] - edit = ["admin", "user"] - } + permissions { + view = ["admin", "user"] + edit = ["admin", "user"] + } - validator { - name = "length" - config = { - min = "3" - max = "255" - } + validator { + name = "length" + config = { + min = "3" + max = "255" } + } - validator { - name = "username-prohibited-characters" - } + validator { + name = "username-prohibited-characters" + } - validator { - name = "up-username-not-idn-homograph" - } + validator { + name = "up-username-not-idn-homograph" } + } - # Email attribute - attribute { - name = "email" - display_name = "$${email}" - multi_valued = false + # Email attribute + attribute { + name = "email" + display_name = "$${email}" + multi_valued = false - required_for_roles = ["user"] + required_for_roles = ["user"] - permissions { - view = ["admin", "user"] - edit = ["admin", "user"] - } + permissions { + view = ["admin", "user"] + edit = ["admin", "user"] + } - validator { - name = "email" - } + validator { + name = "email" + } - validator { - name = "length" - config = { - max = "255" - } + validator { + name = "length" + config = { + max = "255" } } + } - # First Name attribute - attribute { - name = "firstName" - display_name = "$${firstName}" - multi_valued = false - - #Below makes this attribute optional - required_for_roles = [] - required_for_scopes = [] + # First Name attribute + attribute { + name = "firstName" + display_name = "$${firstName}" + multi_valued = false - permissions { - view = ["admin", "user"] - edit = ["admin", "user"] - } + #Below makes this attribute optional + required_for_roles = [] + required_for_scopes = [] - validator { - name = "length" - config = { - max = "255" - } - } + permissions { + view = ["admin", "user"] + edit = ["admin", "user"] + } - validator { - name = "person-name-prohibited-characters" + validator { + name = "length" + config = { + max = "255" } + } - annotations = {} + validator { + name = "person-name-prohibited-characters" } - # Last Name attribute - attribute { - name = "lastName" - display_name = "$${lastName}" - multi_valued = false + annotations = {} + } - required_for_roles = [] - required_for_scopes = [] + # Last Name attribute + attribute { + name = "lastName" + display_name = "$${lastName}" + multi_valued = false - permissions { - view = ["admin", "user"] - edit = ["admin", "user"] - } + required_for_roles = [] + required_for_scopes = [] - validator { - name = "length" - config = { - max = "255" - } - } + permissions { + view = ["admin", "user"] + edit = ["admin", "user"] + } - validator { - name = "person-name-prohibited-characters" + validator { + name = "length" + config = { + max = "255" } - - annotations = {} } - # Group - group { - name = "user-metadata" - display_header = "User metadata" - display_description = "Attributes, which refer to user metadata" + validator { + name = "person-name-prohibited-characters" } + + annotations = {} } + + # Group + group { + name = "user-metadata" + display_header = "User metadata" + display_description = "Attributes, which refer to user metadata" + } +} diff --git a/src/_nebari/stages/kubernetes_keycloak_configuration/template/versions.tf b/src/_nebari/stages/kubernetes_keycloak_configuration/template/versions.tf index 084592ba8f..591bebaeec 100644 --- a/src/_nebari/stages/kubernetes_keycloak_configuration/template/versions.tf +++ b/src/_nebari/stages/kubernetes_keycloak_configuration/template/versions.tf @@ -9,7 +9,7 @@ terraform { version = "2.35.1" } keycloak = { - source = "keycloak/keycloak" + source = "keycloak/keycloak" version = "5.5.0" } } From 364ac3a756b35098ecc4739f7f575a6a63aa714c Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Mon, 10 Nov 2025 16:35:19 -0700 Subject: [PATCH 31/43] add playwright test updates for new keycloak logout flow --- tests/common/navigator.py | 8 ++++++-- tests/tests_e2e/playwright/test_playwright.py | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/common/navigator.py b/tests/common/navigator.py index e0b404fd26..58797a767b 100644 --- a/tests/common/navigator.py +++ b/tests/common/navigator.py @@ -114,7 +114,11 @@ def login(self): def logout(self): """Logout from Nebari deployment.""" self.page.get_by_role("button", name="Logout").click() - self.page.wait_for_load_state + self.page.wait_for_load_state("networkidle") + + # Keycloak has a second logout button + self.page.locator("#kc-logout").click() + self.page.wait_for_load_state("networkidle") def _login_google(self): logger.debug(">>> Sign in via Google and start the server") @@ -136,7 +140,7 @@ def _login_password(self): self.page.get_by_role("button", name="Sign in with Keycloak").click() self.page.get_by_label("Username").fill(self.username) - self.page.get_by_label("Password").fill(self.password) + self.page.get_by_role("textbox", name="Password").fill(self.password) self.page.get_by_role("button", name="Sign In").click() self.page.wait_for_load_state() diff --git a/tests/tests_e2e/playwright/test_playwright.py b/tests/tests_e2e/playwright/test_playwright.py index 0a835c8413..cbf4baf63e 100644 --- a/tests/tests_e2e/playwright/test_playwright.py +++ b/tests/tests_e2e/playwright/test_playwright.py @@ -10,7 +10,7 @@ def test_login_logout(navigator): expect(navigator.page.get_by_text(navigator.username)).to_be_visible() navigator.logout() - expect(navigator.page.get_by_text("Sign in with Keycloak")).to_be_visible() + expect(navigator.page.get_by_text("You are logged out").first).to_be_visible() @pytest.mark.parametrize( From d4350a7378eecaceee84abde78471a3b19fc741b Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Wed, 12 Nov 2025 15:06:08 -0700 Subject: [PATCH 32/43] update all keycloak providers in Nebari --- .../stages/kubernetes_keycloak_configuration/template/main.tf | 2 +- .../modules/kubernetes/services/keycloak-client/versions.tf | 4 ++-- src/_nebari/stages/kubernetes_services/template/providers.tf | 1 + src/_nebari/stages/kubernetes_services/template/versions.tf | 4 ++-- .../template/modules/nebariextension/main.tf | 4 ++-- src/_nebari/stages/nebari_tf_extensions/template/providers.tf | 1 + .../stages/nebari_tf_extensions/template/tf-extensions.tf | 4 ++++ src/_nebari/stages/nebari_tf_extensions/template/versions.tf | 4 ++-- 8 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/_nebari/stages/kubernetes_keycloak_configuration/template/main.tf b/src/_nebari/stages/kubernetes_keycloak_configuration/template/main.tf index 42268d28c8..989018a194 100644 --- a/src/_nebari/stages/kubernetes_keycloak_configuration/template/main.tf +++ b/src/_nebari/stages/kubernetes_keycloak_configuration/template/main.tf @@ -28,7 +28,7 @@ resource "keycloak_realm" "main" { ignore_changes = [ # We want user to have control over attributes we are not managing # If attribute is added above remove it from this list - # https://registry.terraform.io/providers/mrparkers/keycloak/latest/docs/resources/realm + # https://registry.terraform.io/providers/keycloak/keycloak/latest/docs/resources/realm attributes, registration_allowed, registration_email_as_username, diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/keycloak-client/versions.tf b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/keycloak-client/versions.tf index 0ddb981e5e..819066d5a0 100644 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/keycloak-client/versions.tf +++ b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/keycloak-client/versions.tf @@ -1,8 +1,8 @@ terraform { required_providers { keycloak = { - source = "mrparkers/keycloak" - version = "3.7.0" + source = "keycloak/keycloak" + version = "5.5.0" } } required_version = ">= 1.0" diff --git a/src/_nebari/stages/kubernetes_services/template/providers.tf b/src/_nebari/stages/kubernetes_services/template/providers.tf index ed8e8eeb5c..9737f08fad 100644 --- a/src/_nebari/stages/kubernetes_services/template/providers.tf +++ b/src/_nebari/stages/kubernetes_services/template/providers.tf @@ -1,3 +1,4 @@ provider "keycloak" { + base_path = "/auth" tls_insecure_skip_verify = true } diff --git a/src/_nebari/stages/kubernetes_services/template/versions.tf b/src/_nebari/stages/kubernetes_services/template/versions.tf index d3f87478e2..591bebaeec 100644 --- a/src/_nebari/stages/kubernetes_services/template/versions.tf +++ b/src/_nebari/stages/kubernetes_services/template/versions.tf @@ -9,8 +9,8 @@ terraform { version = "2.35.1" } keycloak = { - source = "mrparkers/keycloak" - version = "3.7.0" + source = "keycloak/keycloak" + version = "5.5.0" } } required_version = ">= 1.0" diff --git a/src/_nebari/stages/nebari_tf_extensions/template/modules/nebariextension/main.tf b/src/_nebari/stages/nebari_tf_extensions/template/modules/nebariextension/main.tf index 57a4cc55f0..fbf062425f 100644 --- a/src/_nebari/stages/nebari_tf_extensions/template/modules/nebariextension/main.tf +++ b/src/_nebari/stages/nebari_tf_extensions/template/modules/nebariextension/main.tf @@ -1,8 +1,8 @@ terraform { required_providers { keycloak = { - source = "mrparkers/keycloak" - version = "3.7.0" + source = "keycloak/keycloak" + version = "5.5.0" } } } diff --git a/src/_nebari/stages/nebari_tf_extensions/template/providers.tf b/src/_nebari/stages/nebari_tf_extensions/template/providers.tf index ed8e8eeb5c..9737f08fad 100644 --- a/src/_nebari/stages/nebari_tf_extensions/template/providers.tf +++ b/src/_nebari/stages/nebari_tf_extensions/template/providers.tf @@ -1,3 +1,4 @@ provider "keycloak" { + base_path = "/auth" tls_insecure_skip_verify = true } diff --git a/src/_nebari/stages/nebari_tf_extensions/template/tf-extensions.tf b/src/_nebari/stages/nebari_tf_extensions/template/tf-extensions.tf index 915b78879e..1325a0e4af 100644 --- a/src/_nebari/stages/nebari_tf_extensions/template/tf-extensions.tf +++ b/src/_nebari/stages/nebari_tf_extensions/template/tf-extensions.tf @@ -3,6 +3,10 @@ module "extension" { source = "./modules/nebariextension" + providers = { + keycloak = keycloak + } + name = "nebari-ext-${each.key}" namespace = var.environment image = each.value.image diff --git a/src/_nebari/stages/nebari_tf_extensions/template/versions.tf b/src/_nebari/stages/nebari_tf_extensions/template/versions.tf index d3f87478e2..591bebaeec 100644 --- a/src/_nebari/stages/nebari_tf_extensions/template/versions.tf +++ b/src/_nebari/stages/nebari_tf_extensions/template/versions.tf @@ -9,8 +9,8 @@ terraform { version = "2.35.1" } keycloak = { - source = "mrparkers/keycloak" - version = "3.7.0" + source = "keycloak/keycloak" + version = "5.5.0" } } required_version = ">= 1.0" From a3d4f961f08e7d1380dfd7745de1a9d0e78694fc Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Thu, 13 Nov 2025 14:50:06 -0700 Subject: [PATCH 33/43] add context choices for upgrade step, sleep for keycloak restart --- .../stages/kubernetes_keycloak/__init__.py | 41 ++++++++++- src/_nebari/upgrade.py | 73 ++++++++++++------- 2 files changed, 86 insertions(+), 28 deletions(-) diff --git a/src/_nebari/stages/kubernetes_keycloak/__init__.py b/src/_nebari/stages/kubernetes_keycloak/__init__.py index 07997b6584..27177cfd73 100644 --- a/src/_nebari/stages/kubernetes_keycloak/__init__.py +++ b/src/_nebari/stages/kubernetes_keycloak/__init__.py @@ -682,9 +682,44 @@ def copy_file_to_pod(local_path: Path, remote_path: Path): body=statefulset, ) print(f"✓ Keycloak scaled back to {original_replicas} replicas") - print( - " Keycloak pods will start connecting to the restored database\n" - ) + + # Wait for StatefulSet to be ready + print(" Waiting for Keycloak to be ready...") + max_wait = 300 # 5 minutes + wait_interval = 5 + elapsed = 0 + + while elapsed < max_wait: + statefulset = apps_api.read_namespaced_stateful_set( + name=keycloak_statefulset_name, namespace=namespace + ) + + ready_replicas = statefulset.status.ready_replicas or 0 + current_replicas = statefulset.status.current_replicas or 0 + + if ( + ready_replicas == original_replicas + and current_replicas == original_replicas + ): + print( + f" ✓ Keycloak is ready ({ready_replicas}/{original_replicas} replicas)" + ) + break + + print( + f" Still waiting... ({ready_replicas}/{original_replicas} ready)" + ) + time.sleep(wait_interval) + elapsed += wait_interval + + if elapsed >= max_wait: + print(" ⚠ Warning: Timed out waiting for StatefulSet to be ready") + print(" The StatefulSet may still be starting up") + else: + print( + " Keycloak pods are ready and connected to the restored database\n" + ) + except kubernetes.client.exceptions.ApiException as e: print(f"⚠ Warning: Could not scale up Keycloak statefulset: {e}") print( diff --git a/src/_nebari/upgrade.py b/src/_nebari/upgrade.py index 5381be8aae..36315b4907 100644 --- a/src/_nebari/upgrade.py +++ b/src/_nebari/upgrade.py @@ -2096,23 +2096,59 @@ def _version_specific_upgrade( """ ) ) - try: - kubernetes.config.load_kube_config() - except kubernetes.config.config_exception.ConfigException: + # Get all available contexts + contexts, active_context = kubernetes.config.list_kube_config_contexts() + + if not contexts: rich.print( - "[red bold]No default kube configuration file was found. Make sure to have one pointing to your cluster before upgrading.[/red bold] see docs: https://www.nebari.dev/docs/how-tos/debug-nebari#generating-the-kubeconfig" + "[red bold]No kubectl contexts found in your kube configuration.[/red bold] See docs: https://www.nebari.dev/docs/how-tos/debug-nebari#generating-the-kubeconfig" ) - exit() + exit(1) + + # Display available contexts + rich.print( + "\n[cyan]Below are available kubectl contexts for kubernetes. Please select the context for your cluster[/cyan]" + ) + for i, ctx in enumerate(contexts): + context_name = ctx["name"] + is_active = " [green](current)[/green]" if ctx == active_context else "" + rich.print(f" {i + 1}. {context_name} {is_active}") + + # Add option for context not present + rich.print( + f" {len(contexts) + 1}. [yellow]Desired context not present[/yellow]" + ) + + # Let user choose + choice = Prompt.ask( + "\n[cyan]Select context number[/cyan]", + choices=[str(i + 1) for i in range(len(contexts) + 1)], + ) + selected_index = int(choice) - 1 + + # Handle "context not present" option + if selected_index == len(contexts): + rich.print( + "\n[yellow]Please add your desired kubectl context before proceeding.[/yellow]" + ) + rich.print( + "See docs: [link=https://www.nebari.dev/docs/how-tos/debug-nebari#generating-the-kubeconfig]https://www.nebari.dev/docs/how-tos/debug-nebari#generating-the-kubeconfig[/link]" + ) + exit(1) - current_kube_context = kubernetes.config.list_kube_config_contexts()[1] + current_kube_context = contexts[selected_index] cluster_name = current_kube_context["context"]["cluster"] - # Kubernetes config available - offer automatic backup + # Set the selected context as active + kubernetes.config.load_kube_config(context=current_kube_context["name"]) + rich.print( - "\n[green]✓[/green] Kubernetes configuration detected. Nebari can backup the database automatically." + f"\n[green]✓ Using context:[/green] {current_kube_context['name']} -> {cluster_name}" ) + + # Kubernetes config available - offer automatic backup rich.print( - f"\nThe following backup will be attempted on the [cyan bold]{cluster_name}[/cyan bold] cluster.\n" + "\n[green]✓[/green] Kubernetes configuration detected. Nebari can backup the database automatically." ) if not ( @@ -2123,24 +2159,11 @@ def _version_specific_upgrade( ) ): # User declined automatic backup - rich.print("\n[yellow]You chose not to backup automatically.[/yellow]") + rich.print( - "[yellow]Please ensure you have a backup if desired before proceeding with deployment.[/yellow]" + f"\n[red bold]You must backup the Keycloak database before upgrading to {self.version}.[/red bold]" ) - - if not Confirm.ask( - "\nHave you successfully backed up your Keycloak database/would you like to proceed without backup?", - default=False, - ): - rich.print( - f"\n[red bold]You must backup the Keycloak database before upgrading to {self.version}.[/red bold]" - ) - exit(1) - - rich.print("\nYou can now proceed with:") - rich.print(f" [cyan]nebari deploy -c {config_filename}[/cyan]") - rich.print("Ready to upgrade to Nebari version [green]2025.11.1[/green].") - return config + exit(1) # User accepted automatic backup - proceed rich.print( From 9b77ba9b988c5d2780228c216814bc98c19b363a Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Fri, 14 Nov 2025 16:28:48 -0700 Subject: [PATCH 34/43] update test_upgrade file for latest upgrade command --- tests/tests_unit/test_upgrade.py | 94 +++++++++++++++++++++++++++++++- 1 file changed, 92 insertions(+), 2 deletions(-) diff --git a/tests/tests_unit/test_upgrade.py b/tests/tests_unit/test_upgrade.py index 1f3fa29e4c..9c81f76d98 100644 --- a/tests/tests_unit/test_upgrade.py +++ b/tests/tests_unit/test_upgrade.py @@ -102,12 +102,24 @@ def mock_input(prompt, **kwargs): return False elif prompt == TERRAFORM_REMOVE_TERRAFORM_STAGE_FILES_CONFIRMATION: return attempt_fixes + elif ( + prompt + == "\nWould you like Nebari to backup the Keycloak database for you now?" + ): + # Always backup in tests so upgrade can complete + return True # All other prompts will be answered with "y" else: return True + def mock_prompt_ask(prompt, **kwargs): + # Mock context selection for 2025.11.1 upgrade + if "Select context number" in prompt: + return "1" # Select first context + return "" + monkeypatch.setattr(Confirm, "ask", mock_input) - monkeypatch.setattr(Prompt, "ask", lambda x: "") + monkeypatch.setattr(Prompt, "ask", mock_prompt_ask) from kubernetes import config as _kube_config from kubernetes.client import ApiextensionsV1Api as _ApiextensionsV1Api @@ -127,15 +139,27 @@ class MonkeypatchApiResponse: return MonkeypatchApiResponse + def monkey_patch_read_namespaced_pod(*args, **kwargs): + # Mock the pod exists for keycloak backup + class MockPod: + metadata = type("obj", (object,), {"name": "keycloak-postgresql-0"}) + + return MockPod() + monkeypatch.setattr( _kube_config, "load_kube_config", lambda *args, **kwargs: None, ) + # Mock kubectl contexts for 2025.11.1 upgrade + mock_contexts = [ + {"name": "test-context", "context": {"cluster": "test-cluster"}}, + ] + mock_active_context = mock_contexts[0] monkeypatch.setattr( _kube_config, "list_kube_config_contexts", - lambda *args, **kwargs: [None, {"context": {"cluster": "test"}}], + lambda *args, **kwargs: (mock_contexts, mock_active_context), ) monkeypatch.setattr( _ApiextensionsV1Api, @@ -162,9 +186,68 @@ class MonkeypatchApiResponse: "list_namespaced_daemon_set", monkey_patch_list_namespaced_daemon_set, ) + monkeypatch.setattr( + _CoreV1Api, + "read_namespaced_pod", + monkey_patch_read_namespaced_pod, + ) + + # Mock connect_get_namespaced_pod_exec to prevent real API calls + def monkey_patch_connect_pod_exec(*args, **kwargs): + # This should not be called if stream mock works, but add as safety + class MockExecResponse: + def read_stdout(self): + return "-- PostgreSQL database dump\n" + + def read_stderr(self): + return "" + return MockExecResponse() + + monkeypatch.setattr( + _CoreV1Api, + "connect_get_namespaced_pod_exec", + monkey_patch_connect_pod_exec, + ) + + # Mock kubernetes stream for database backup + class MockStreamResponse: + def __init__(self): + self._open = True + self._stdout_data = "-- PostgreSQL database dump\n-- Dump completed\n" + self._read_count = 0 + + def is_open(self): + # Close after reading data + return self._read_count < 1 + + def peek_stdout(self): + return self._read_count < 1 + + def read_stdout(self): + self._read_count += 1 + return self._stdout_data + + def peek_stderr(self): + return False + + def read_stderr(self): + return "" + + def update(self, timeout=1): + pass + + def close(self): + self._open = False + + def mock_stream(*args, **kwargs): + return MockStreamResponse() + + # Patch stream in the upgrade module where it's imported from _nebari import upgrade as _upgrade + monkeypatch.setattr(_upgrade, "stream", mock_stream) + def monkey_patch_get_keycloak_admin(*args, **kwargs): return MockKeycloakAdmin() @@ -235,6 +318,13 @@ def monkey_patch_get_keycloak_admin(*args, **kwargs): assert orig_contents == tmp_qhub_config_backup.read_text() + # Check Keycloak database backup file was created (from 2025.11.1 upgrade) + keycloak_backup_file = Path(tmp_path, "keycloak-backup.sql") + assert keycloak_backup_file.exists() + # Verify backup contains postgres dump content + backup_content = keycloak_backup_file.read_text() + assert "PostgreSQL database dump" in backup_content + @pytest.mark.parametrize( "version_str, exception", From 9828d4dd473dd1f80c45756c290cacd4392352e4 Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Tue, 18 Nov 2025 09:42:35 -0700 Subject: [PATCH 35/43] mock helm subprocess, only check for keycloak if version>2025.11.1 --- tests/tests_unit/test_upgrade.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/tests/tests_unit/test_upgrade.py b/tests/tests_unit/test_upgrade.py index 9c81f76d98..4f66101b05 100644 --- a/tests/tests_unit/test_upgrade.py +++ b/tests/tests_unit/test_upgrade.py @@ -248,6 +248,15 @@ def mock_stream(*args, **kwargs): monkeypatch.setattr(_upgrade, "stream", mock_stream) + # Mock helm subprocess to prevent actual helm commands from running + def mock_run_helm_subprocess(*args, **kwargs): + # Don't actually run helm, just return successfully + pass + + from _nebari.provider import helm as _helm + + monkeypatch.setattr(_helm, "run_helm_subprocess", mock_run_helm_subprocess) + def monkey_patch_get_keycloak_admin(*args, **kwargs): return MockKeycloakAdmin() @@ -319,11 +328,16 @@ def monkey_patch_get_keycloak_admin(*args, **kwargs): assert orig_contents == tmp_qhub_config_backup.read_text() # Check Keycloak database backup file was created (from 2025.11.1 upgrade) - keycloak_backup_file = Path(tmp_path, "keycloak-backup.sql") - assert keycloak_backup_file.exists() - # Verify backup contains postgres dump content - backup_content = keycloak_backup_file.read_text() - assert "PostgreSQL database dump" in backup_content + # Only check if the current version is >= 2025.11.1 + current_version = rounded_ver_parse(__version__) + target_version = rounded_ver_parse("2025.11.1") + + if current_version >= target_version: + keycloak_backup_file = Path(tmp_path, "keycloak-backup.sql") + assert keycloak_backup_file.exists() + # Verify backup contains postgres dump content + backup_content = keycloak_backup_file.read_text() + assert "PostgreSQL database dump" in backup_content @pytest.mark.parametrize( From 0494ee2d37d38f3d6bfc0518fa791cbc5add78bd Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Wed, 19 Nov 2025 12:09:10 -0700 Subject: [PATCH 36/43] add proper scopes to forwardauth to fix dask gateway --- .../template/modules/kubernetes/forwardauth/main.tf | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/forwardauth/main.tf b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/forwardauth/main.tf index 564d397d1a..f36bff8d63 100644 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/forwardauth/main.tf +++ b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/forwardauth/main.tf @@ -127,6 +127,12 @@ resource "kubernetes_deployment" "forwardauth-deployment" { name = "LOG_LEVEL" value = "trace" } + + env { + name = "PROVIDERS_GENERIC_OAUTH_SCOPE" + value = "openid profile email" + } + env { name = "AUTH_HOST" value = var.external-url From 8c262040f76b39f5c3a21f55080f40a0f626812d Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Wed, 19 Nov 2025 15:08:48 -0700 Subject: [PATCH 37/43] clean up upgrade notice --- src/_nebari/upgrade.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/_nebari/upgrade.py b/src/_nebari/upgrade.py index 36315b4907..2f850b8f8f 100644 --- a/src/_nebari/upgrade.py +++ b/src/_nebari/upgrade.py @@ -2089,7 +2089,6 @@ def _version_specific_upgrade( - PostgreSQL moved from subchart to standalone deployment - Keycloak service name changes from [green]keycloak-headless[/green] to [green]keycloak-keycloakx-http[/green] - OAuth clients now require [green]openid[/green] scope explicitly - - Startup scripts replaced with Python post_deploy hooks After this upgrade step completes, you will need to run: [cyan]nebari deploy -c {config_filename}[/cyan] to apply the changes From cd090907a6b40d98c7b4d7a136d4afa92931d29e Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Wed, 26 Nov 2025 10:44:29 -0700 Subject: [PATCH 38/43] add backup failure handling --- .../stages/kubernetes_keycloak/__init__.py | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/src/_nebari/stages/kubernetes_keycloak/__init__.py b/src/_nebari/stages/kubernetes_keycloak/__init__.py index 27177cfd73..f1eac94fd0 100644 --- a/src/_nebari/stages/kubernetes_keycloak/__init__.py +++ b/src/_nebari/stages/kubernetes_keycloak/__init__.py @@ -333,14 +333,25 @@ def post_deploy( print(f"Found backup file: {backup_file}") print(f"Size: {backup_file.stat().st_size / 1024:.2f} KB\n") - self._restore_keycloak_database(backup_file) + try: + self._restore_keycloak_database(backup_file) - # Rename backup file to prevent re-running restore on subsequent deploys - backup_file.rename(backup_file.with_suffix(".sql.restored")) - print( - f"\n✓ Renamed backup file to {backup_file.with_suffix('.sql.restored')}" - ) - print("=" * 80 + "\n") + # Rename backup file to prevent re-running restore on subsequent deploys + backup_file.rename(backup_file.with_suffix(".sql.restored")) + print( + f"\n✓ Renamed backup file to {backup_file.with_suffix('.sql.restored')}" + ) + print("=" * 80 + "\n") + except Exception as e: + backup_file.rename(backup_file.with_suffix(".sql.failed_restore")) + print("\n" + "=" * 80) + print("ERROR: KEYCLOAK DATABASE RESTORE FAILED") + print("=" * 80) + print(f"Error: {e}") + print(f"Backup file location: {backup_file.absolute()}") + print("\nThe backup file has been preserved for manual recovery.") + print("=" * 80 + "\n") + raise else: print("No Keycloak database backup found, skipping restore") From ea07ef1cee6f71b493c5fb737f346a771fc1410a Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Wed, 26 Nov 2025 10:44:45 -0700 Subject: [PATCH 39/43] change stateful set to always be 1 --- .../stages/kubernetes_keycloak/__init__.py | 102 +++++++++--------- 1 file changed, 49 insertions(+), 53 deletions(-) diff --git a/src/_nebari/stages/kubernetes_keycloak/__init__.py b/src/_nebari/stages/kubernetes_keycloak/__init__.py index f1eac94fd0..61bea10278 100644 --- a/src/_nebari/stages/kubernetes_keycloak/__init__.py +++ b/src/_nebari/stages/kubernetes_keycloak/__init__.py @@ -459,18 +459,16 @@ def _restore_keycloak_database(self, backup_file: Path): apps_api = kubernetes.client.AppsV1Api() # Step 0: Scale down Keycloak to prevent active database connections + # Keycloak statefulset always runs with 1 replica + original_replicas = 1 print( f"Step 0: Scaling down Keycloak statefulset '{keycloak_statefulset_name}' to 0 replicas..." ) try: - # Get current statefulset + # Get current statefulset and scale to 0 statefulset = apps_api.read_namespaced_stateful_set( name=keycloak_statefulset_name, namespace=namespace ) - original_replicas = statefulset.spec.replicas - print(f" Current replicas: {original_replicas}") - - # Scale to 0 statefulset.spec.replicas = 0 apps_api.patch_namespaced_stateful_set( name=keycloak_statefulset_name, namespace=namespace, body=statefulset @@ -504,7 +502,6 @@ def _restore_keycloak_database(self, backup_file: Path): except kubernetes.client.exceptions.ApiException as e: print(f"⚠ Warning: Could not scale down Keycloak statefulset: {e}") print("Proceeding with restore anyway...\n") - original_replicas = None # Check if pod exists print(f"Checking if pod '{pod_name}' exists in namespace '{namespace}'...") @@ -677,66 +674,65 @@ def copy_file_to_pod(local_path: Path, remote_path: Path): print("DATABASE RESTORE SUCCESSFUL!") print("=" * 80) - # Step 8: Scale Keycloak back up - if original_replicas is not None: - print( - f"\nStep 8: Scaling Keycloak statefulset back to {original_replicas} replicas..." + # Step 8: Scale Keycloak back up to 1 replica + print( + f"\nStep 8: Scaling Keycloak statefulset back to {original_replicas} replicas..." + ) + try: + statefulset = apps_api.read_namespaced_stateful_set( + name=keycloak_statefulset_name, namespace=namespace ) - try: + statefulset.spec.replicas = original_replicas + apps_api.patch_namespaced_stateful_set( + name=keycloak_statefulset_name, + namespace=namespace, + body=statefulset, + ) + print(f"✓ Keycloak scaled back to {original_replicas} replicas") + + # Wait for StatefulSet to be ready + print(" Waiting for Keycloak to be ready...") + max_wait = 300 # 5 minutes + wait_interval = 5 + elapsed = 0 + + while elapsed < max_wait: statefulset = apps_api.read_namespaced_stateful_set( name=keycloak_statefulset_name, namespace=namespace ) - statefulset.spec.replicas = original_replicas - apps_api.patch_namespaced_stateful_set( - name=keycloak_statefulset_name, - namespace=namespace, - body=statefulset, - ) - print(f"✓ Keycloak scaled back to {original_replicas} replicas") - # Wait for StatefulSet to be ready - print(" Waiting for Keycloak to be ready...") - max_wait = 300 # 5 minutes - wait_interval = 5 - elapsed = 0 - - while elapsed < max_wait: - statefulset = apps_api.read_namespaced_stateful_set( - name=keycloak_statefulset_name, namespace=namespace - ) - - ready_replicas = statefulset.status.ready_replicas or 0 - current_replicas = statefulset.status.current_replicas or 0 - - if ( - ready_replicas == original_replicas - and current_replicas == original_replicas - ): - print( - f" ✓ Keycloak is ready ({ready_replicas}/{original_replicas} replicas)" - ) - break + ready_replicas = statefulset.status.ready_replicas or 0 + current_replicas = statefulset.status.current_replicas or 0 + if ( + ready_replicas == original_replicas + and current_replicas == original_replicas + ): print( - f" Still waiting... ({ready_replicas}/{original_replicas} ready)" + f" ✓ Keycloak is ready ({ready_replicas}/{original_replicas} replicas)" ) - time.sleep(wait_interval) - elapsed += wait_interval + break - if elapsed >= max_wait: - print(" ⚠ Warning: Timed out waiting for StatefulSet to be ready") - print(" The StatefulSet may still be starting up") - else: - print( - " Keycloak pods are ready and connected to the restored database\n" - ) + print( + f" Still waiting... ({ready_replicas}/{original_replicas} ready)" + ) + time.sleep(wait_interval) + elapsed += wait_interval - except kubernetes.client.exceptions.ApiException as e: - print(f"⚠ Warning: Could not scale up Keycloak statefulset: {e}") + if elapsed >= max_wait: + print(" ⚠ Warning: Timed out waiting for StatefulSet to be ready") + print(" The StatefulSet may still be starting up") + else: print( - f" You may need to manually scale it back up: kubectl scale statefulset {keycloak_statefulset_name} --replicas={original_replicas} -n {namespace}\n" + " Keycloak pods are ready and connected to the restored database\n" ) + except kubernetes.client.exceptions.ApiException as e: + print(f"⚠ Warning: Could not scale up Keycloak statefulset: {e}") + print( + f" You may need to manually scale it back up: kubectl scale statefulset {keycloak_statefulset_name} --replicas={original_replicas} -n {namespace}\n" + ) + @contextlib.contextmanager def deploy( self, stage_outputs: Dict[str, Dict[str, Any]], disable_prompt: bool = False From 04e22c18f6a0d7af5cf43b3b9a0477b6d2319aeb Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Wed, 26 Nov 2025 10:58:32 -0700 Subject: [PATCH 40/43] mask postgres passwords in console output --- src/_nebari/stages/kubernetes_keycloak/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/_nebari/stages/kubernetes_keycloak/__init__.py b/src/_nebari/stages/kubernetes_keycloak/__init__.py index 61bea10278..64b35348aa 100644 --- a/src/_nebari/stages/kubernetes_keycloak/__init__.py +++ b/src/_nebari/stages/kubernetes_keycloak/__init__.py @@ -534,7 +534,11 @@ def _restore_keycloak_database(self, backup_file: Path): # Helper function to run commands in pod def run_command(command: str, show_output: bool = True): - print(f" Running: {command}") + # Mask password in output + masked_command = command.replace(postgres_password, "***").replace( + db_password, "***" + ) + print(f" Running: {masked_command}") sys.stdout.flush() resp = stream( From d47364be978b5324f21c700f0eabd1bf64aba41f Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Wed, 26 Nov 2025 11:20:14 -0700 Subject: [PATCH 41/43] move imports to top of file --- .../stages/kubernetes_keycloak/__init__.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/src/_nebari/stages/kubernetes_keycloak/__init__.py b/src/_nebari/stages/kubernetes_keycloak/__init__.py index 64b35348aa..4f21c924e6 100644 --- a/src/_nebari/stages/kubernetes_keycloak/__init__.py +++ b/src/_nebari/stages/kubernetes_keycloak/__init__.py @@ -1,3 +1,4 @@ +import base64 import contextlib import enum import json @@ -5,12 +6,16 @@ import secrets import string import sys +import tarfile import time +from io import BytesIO from pathlib import Path from typing import Any, Dict, List, Optional, Type, Union +import kubernetes from keycloak import KeycloakAdmin from keycloak.exceptions import KeycloakError +from kubernetes.stream import stream from pydantic import Field, ValidationInfo, field_validator, model_validator from _nebari.stages.base import NebariTerraformStage @@ -261,8 +266,6 @@ def input_vars(self, stage_outputs: Dict[str, Dict[str, Any]]): def check( self, stage_outputs: Dict[str, Dict[str, Any]], disable_check: bool = False ): - from keycloak import KeycloakAdmin - from keycloak.exceptions import KeycloakError keycloak_url = f"{stage_outputs['stages/' + self.name]['keycloak_credentials']['value']['url']}/auth/" @@ -437,14 +440,6 @@ def post_deploy( def _restore_keycloak_database(self, backup_file: Path): """Restore PostgreSQL database from backup file using Kubernetes exec.""" - import base64 - import tarfile - from io import BytesIO - from pathlib import Path - - import kubernetes - from kubernetes.stream import stream - # Configuration - these should match your new postgres deployment namespace = self.config.namespace keycloak_statefulset_name = "keycloak-keycloakx" @@ -520,8 +515,6 @@ def _restore_keycloak_database(self, backup_file: Path): try: secret_name = "keycloak-postgres-standalone-postgresql" secret = api.read_namespaced_secret(name=secret_name, namespace=namespace) - import base64 - postgres_password = base64.b64decode( secret.data["postgres-password"] ).decode("utf-8") From 167b85e3852cd46d6de3f1fb41fab75cde52eff8 Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Wed, 26 Nov 2025 11:31:59 -0700 Subject: [PATCH 42/43] add logic for finding postgres and keycloak podnames --- .../stages/kubernetes_keycloak/__init__.py | 31 ++++++++++++++----- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/src/_nebari/stages/kubernetes_keycloak/__init__.py b/src/_nebari/stages/kubernetes_keycloak/__init__.py index 4f21c924e6..ff08f26ddc 100644 --- a/src/_nebari/stages/kubernetes_keycloak/__init__.py +++ b/src/_nebari/stages/kubernetes_keycloak/__init__.py @@ -440,10 +440,8 @@ def post_deploy( def _restore_keycloak_database(self, backup_file: Path): """Restore PostgreSQL database from backup file using Kubernetes exec.""" - # Configuration - these should match your new postgres deployment + # Configuration namespace = self.config.namespace - keycloak_statefulset_name = "keycloak-keycloakx" - pod_name = "keycloak-postgres-standalone-postgresql-0" db_user = "keycloak" db_name = "keycloak" postgres_user = "postgres" @@ -453,6 +451,19 @@ def _restore_keycloak_database(self, backup_file: Path): api = kubernetes.client.CoreV1Api() apps_api = kubernetes.client.AppsV1Api() + # Find Keycloak StatefulSet by label + statefulsets = apps_api.list_namespaced_stateful_set( + namespace=namespace, label_selector="app.kubernetes.io/name=keycloakx" + ) + keycloak_statefulset_name = statefulsets.items[0].metadata.name + + # Find PostgreSQL pod by label + postgres_pods = api.list_namespaced_pod( + namespace=namespace, + label_selector="app.kubernetes.io/name=postgresql,app.kubernetes.io/component=primary", + ) + postgres_pod_name = postgres_pods.items[0].metadata.name + # Step 0: Scale down Keycloak to prevent active database connections # Keycloak statefulset always runs with 1 replica original_replicas = 1 @@ -499,13 +510,17 @@ def _restore_keycloak_database(self, backup_file: Path): print("Proceeding with restore anyway...\n") # Check if pod exists - print(f"Checking if pod '{pod_name}' exists in namespace '{namespace}'...") + print( + f"Checking if pod '{postgres_pod_name}' exists in namespace '{namespace}'..." + ) try: - api.read_namespaced_pod(name=pod_name, namespace=namespace) + api.read_namespaced_pod(name=postgres_pod_name, namespace=namespace) print("✓ Pod found\n") except kubernetes.client.exceptions.ApiException as e: if e.status == 404: - print(f"✗ Pod '{pod_name}' not found in namespace '{namespace}'") + print( + f"✗ Pod '{postgres_pod_name}' not found in namespace '{namespace}'" + ) print("Skipping database restore - pod may not be ready yet") return raise @@ -536,7 +551,7 @@ def run_command(command: str, show_output: bool = True): resp = stream( api.connect_get_namespaced_pod_exec, - name=pod_name, + name=postgres_pod_name, namespace=namespace, command=["/bin/sh", "-c", command], stderr=True, @@ -590,7 +605,7 @@ def copy_file_to_pod(local_path: Path, remote_path: Path): resp = stream( api.connect_get_namespaced_pod_exec, - name=pod_name, + name=postgres_pod_name, namespace=namespace, command=extract_cmd, stderr=True, From dfb12dc408b07dba7db4f278021d7b741a405208 Mon Sep 17 00:00:00 2001 From: Tyler Potts Date: Wed, 26 Nov 2025 11:40:19 -0700 Subject: [PATCH 43/43] raise error on failure to get database password --- src/_nebari/stages/kubernetes_keycloak/__init__.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/_nebari/stages/kubernetes_keycloak/__init__.py b/src/_nebari/stages/kubernetes_keycloak/__init__.py index ff08f26ddc..4569c28c06 100644 --- a/src/_nebari/stages/kubernetes_keycloak/__init__.py +++ b/src/_nebari/stages/kubernetes_keycloak/__init__.py @@ -536,9 +536,7 @@ def _restore_keycloak_database(self, backup_file: Path): db_password = base64.b64decode(secret.data["password"]).decode("utf-8") print("✓ Got database passwords\n") except Exception as e: - print(f"✗ Error getting database passwords: {e}") - print("Skipping database restore") - return + raise (f"✗ Error getting database passwords: {e}") # Helper function to run commands in pod def run_command(command: str, show_output: bool = True):