From e3432d4df86a8fd2fd2171286d1756bd76d1b94e Mon Sep 17 00:00:00 2001 From: Aditya Thebe Date: Mon, 29 Jun 2026 19:09:04 +0545 Subject: [PATCH] fix(auth): cache Clerk JWKS instead of nil-panic on fetch failure getJWTKeyFunc handled a failed JWKS fetch with logger.Fatalf, but the slog logger's Fatalf only logs and does not os.Exit. Execution fell through to `return jwks.Keyfunc` with a nil *keyfunc.JWKS, which later panicked inside ParseWithClaims -> (*JWKS).getKey on a nil receiver. So a transient network blip to Clerk crashed request goroutines and the OIDC callback. It was also called from parseJWTToken on every Clerk-authenticated request, so each request did a synchronous JWKS HTTP fetch and spawned a background-refresh goroutine that lived forever (per-request fetch + goroutine leak). Build the JWKS once and reuse it via a pointer-held jwksCache (shared across ClerkHandler value-receiver copies). Failed fetches are not cached, so a transient outage retries on the next request instead of poisoning the handler, and degrades to the existing logged-error/401 path instead of panicking. Key rotation is still handled by keyfunc's RefreshUnknownKID + hourly refresh against the single long-lived instance. --- auth/clerk_client.go | 38 ++++++++++++++++++++++++++++++++++++-- auth/tokens.go | 11 +++++++---- 2 files changed, 43 insertions(+), 6 deletions(-) diff --git a/auth/clerk_client.go b/auth/clerk_client.go index 451dba4c2..3fc96e9dd 100644 --- a/auth/clerk_client.go +++ b/auth/clerk_client.go @@ -6,6 +6,7 @@ import ( "net/http" "net/url" "strings" + "sync" "time" "github.com/flanksource/commons/logger" @@ -42,6 +43,33 @@ type ClerkHandler struct { tokenCache *cache.Cache accessTokenCache *cache.Cache userCache *cache.Cache + jwks *jwksCache +} + +// jwksCache lazily fetches and caches the Clerk JWKS keyfunc. The underlying +// keyfunc.Get performs a network fetch and spawns a background-refresh goroutine, +// so it must be built once and shared rather than rebuilt per request. The cache +// is held behind a pointer so copies of ClerkHandler (value receivers) share the +// same instance and lock. A failed fetch is not cached, so the next request retries. +type jwksCache struct { + url string + mu sync.Mutex + fn jwt.Keyfunc +} + +func (c *jwksCache) keyfunc() (jwt.Keyfunc, error) { + c.mu.Lock() + defer c.mu.Unlock() + + if c.fn == nil { + fn, err := newClerkKeyfunc(c.url) + if err != nil { + return nil, err + } + c.fn = fn + } + + return c.fn, nil } func NewClerkHandler() (*ClerkHandler, error) { @@ -58,19 +86,25 @@ func NewClerkHandler() (*ClerkHandler, error) { tokenCache: cache.New(3*24*time.Hour, 12*time.Hour), accessTokenCache: cache.New(3*24*time.Hour, 12*time.Hour), userCache: cache.New(3*24*time.Hour, 12*time.Hour), + jwks: &jwksCache{url: ClerkJwksUrl}, }, nil } func (h ClerkHandler) parseJWTToken(token string) (jwt.MapClaims, error) { + keyfunc, err := h.jwks.keyfunc() + if err != nil { + return nil, err + } + claims := jwt.MapClaims{} - jt, err := jwt.ParseWithClaims(token, claims, getJWTKeyFunc(h.jwksURL)) + jt, err := jwt.ParseWithClaims(token, claims, keyfunc) if err != nil { return claims, err } if !jt.Valid { return claims, fmt.Errorf("jwt token not valid") } - return claims, err + return claims, nil } type AuthResult struct { diff --git a/auth/tokens.go b/auth/tokens.go index 21c5445bb..b257fbeac 100644 --- a/auth/tokens.go +++ b/auth/tokens.go @@ -110,7 +110,11 @@ func newPostgRESTJWT(config api.PostgrestConfig, claims jwt.MapClaims) (string, return jwt.NewWithClaims(jwt.SigningMethodHS256, claims).SignedString([]byte(config.JWTSecret)) } -func getJWTKeyFunc(jwksURL string) jwt.Keyfunc { +// newClerkKeyfunc fetches the JWKS from the given URL and returns a jwt.Keyfunc +// backed by it. keyfunc.Get performs a synchronous network fetch and spawns a +// background-refresh goroutine, so the result must be created once and reused +// rather than rebuilt per request. +func newClerkKeyfunc(jwksURL string) (jwt.Keyfunc, error) { // Create the keyfunc options. Use an error handler that logs. Refresh the JWKS when a JWT signed by an unknown KID // is found or at the specified interval. Rate limit these refreshes. Timeout the initial JWKS refresh request after // 10 seconds. This timeout is also used to create the initial context.Context for keyfunc.Get. @@ -127,10 +131,9 @@ func getJWTKeyFunc(jwksURL string) jwt.Keyfunc { // Create the JWKS from the resource at the given URL. jwks, err := keyfunc.Get(jwksURL, options) if err != nil { - logger.Fatalf("Failed to create JWKS from resource at the given URL.\nError: %s", err.Error()) - // TODO Handle + return nil, fmt.Errorf("failed to fetch JWKS from %q: %w", jwksURL, err) } - return jwks.Keyfunc + return jwks.Keyfunc, nil } func getAccessToken(ctx context.Context, token string) (*models.AccessToken, error) {