From 31d6debed54653d7ab6498fdf11b3d109f82a3a3 Mon Sep 17 00:00:00 2001 From: Jakub Hrozek Date: Wed, 29 Apr 2026 12:28:40 +0100 Subject: [PATCH] Add identity extractor for OAuth2 token responses Some OAuth2 upstream providers do not expose a usable userinfo endpoint and instead place user identity directly in the token endpoint response. Two response shapes appear in practice: - Identity as side-attributes alongside the tokens, e.g. Snowflake's `username`, Slack's `authed_user.id`, Shopify's `associated_user.{id,email,first_name}`. - Identity claims embedded inside a JWT-shaped access token, e.g. Auth0, Azure AD, Keycloak, Cognito. Introduce a pure helper that reads operator-supplied gjson dot-notation paths from the raw token-response body to extract subject, name, and email. Register a custom gjson modifier `@upstreamjwt` so paths can pipe through a JWT payload decode step (e.g. "access_token|@upstreamjwt|sub"). The modifier base64url-decodes the JWT payload without verifying the signature; trust comes from the TLS-authenticated channel to the AS, the same trust model as the existing userinfo path. Signed-token flows remain handled by the existing OIDC provider type. Modifier registration is exported and explicit (RegisterModifiers) so callers control when the process-global gjson state mutates. The helper is consumed by the embedded auth server's OAuth2 upstream provider in a later commit; nothing in this commit calls it yet. Type guard restricts the subject to scalar string or number values to avoid silently returning a JSON blob as the user's identity. Numeric subjects are returned via the raw JSON token rather than gjson's float64 formatting to preserve integer precision beyond 2^53. Error messages never include any portion of the body. Closes #5152 --- .../upstream/identity_from_token.go | 176 +++++++++++++++++ .../upstream/identity_from_token_test.go | 186 ++++++++++++++++++ 2 files changed, 362 insertions(+) create mode 100644 pkg/authserver/upstream/identity_from_token.go create mode 100644 pkg/authserver/upstream/identity_from_token_test.go diff --git a/pkg/authserver/upstream/identity_from_token.go b/pkg/authserver/upstream/identity_from_token.go new file mode 100644 index 0000000000..e827d19815 --- /dev/null +++ b/pkg/authserver/upstream/identity_from_token.go @@ -0,0 +1,176 @@ +// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package upstream + +import ( + "encoding/base64" + "errors" + "fmt" + "log/slog" + "strings" + + "github.com/tidwall/gjson" +) + +// partialIdentity holds identity fields extracted from a token response body. +// It is used internally to pass the extracted subject, name, and email +// between extractIdentityFromTokenResponse and the provider layer. +type partialIdentity struct { + Subject string + Name string + Email string +} + +// IdentityFromTokenConfig is the runtime configuration for extracting user +// identity directly from an OAuth2 token endpoint response body. +// +// Each path is a gjson dot-notation path (e.g. "username" or +// "associated_user.id") into the raw JSON body returned by the token +// endpoint. Path semantics, trust-model warnings, and uniqueness notes are +// documented on the corresponding CRD type +// (cmd/thv-operator/api/v1alpha1.IdentityFromTokenConfig). +type IdentityFromTokenConfig struct { + // SubjectPath is the gjson path to the unique user identifier (required). + SubjectPath string + + // NamePath is the gjson path to the user's display name (optional). + // Leave empty to skip name extraction. + NamePath string + + // EmailPath is the gjson path to the user's email address (optional). + // Leave empty to skip email extraction. + EmailPath string +} + +// RegisterModifiers registers the gjson custom modifiers used by this +// package's path-based identity extractors. Call once during application +// or test wire-up before invoking any extractor that consumes a +// modifier-bearing path. Repeated calls are safe — gjson.AddModifier +// overwrites the existing entry. +// +// Modifiers registered: +// - @upstreamjwt: see upstreamJWTModifier. +func RegisterModifiers() { + gjson.AddModifier("upstreamjwt", upstreamJWTModifier) +} + +// extractIdentityFromTokenResponse extracts user identity fields from a raw +// OAuth2 token endpoint response body using the paths in cfg. +// +// SubjectPath must resolve to a string or number value; objects, arrays, null, +// and missing paths are rejected with ErrIdentityResolutionFailed. NamePath +// and EmailPath are optional: type mismatches or missing paths produce a +// slog.Warn and leave the respective field empty. Empty NamePath/EmailPath in +// cfg means "do not extract" and are skipped silently. +func extractIdentityFromTokenResponse(body []byte, cfg *IdentityFromTokenConfig) (partialIdentity, error) { + if cfg == nil { + return partialIdentity{}, errors.New("identity extraction config is required") + } + + subjectResult := gjson.GetBytes(body, cfg.SubjectPath) + if err := validateIdentityField(subjectResult); err != nil { + return partialIdentity{}, fmt.Errorf("%w: subjectPath %q %s", ErrIdentityResolutionFailed, cfg.SubjectPath, err.Error()) + } + + name := extractOptionalField(body, cfg.NamePath, "namePath") + email := extractOptionalField(body, cfg.EmailPath, "emailPath") + + return partialIdentity{ + Subject: scalarToString(subjectResult), + Name: name, + Email: email, + }, nil +} + +// scalarToString returns the string representation of a gjson scalar value. +// For Number, it returns the raw JSON token rather than gjson.Result.String(), +// which formats via float64 and would lose precision for integer IDs larger +// than 2^53 (e.g., some upstream providers return 64-bit numeric subjects). +// For String, gjson.Result.String() correctly strips the surrounding quotes. +// The caller must already have validated the type. +func scalarToString(r gjson.Result) string { + if r.Type == gjson.Number { + return r.Raw + } + return r.String() +} + +// validateIdentityField checks that a gjson result is a non-empty scalar +// (string or number). Returns a descriptive error on failure. +func validateIdentityField(result gjson.Result) error { + if !result.Exists() { + return errors.New("path not found in token response") + } + switch result.Type { + case gjson.String: + if result.String() == "" { + return errors.New("resolved to an empty string") + } + return nil + case gjson.Number: + return nil + case gjson.JSON: + return errors.New("resolved to an object or array, expected a scalar") + case gjson.Null, gjson.False, gjson.True: + return errors.New("resolved to null or unsupported type") + } + // Unreachable: all gjson.Type cases are handled above. + return errors.New("unrecognised gjson result type") +} + +// extractOptionalField extracts an optional identity field from the token body. +// Returns an empty string if the path is not configured, missing, or has an +// unexpected type (with a slog.Warn for unexpected types). +func extractOptionalField(body []byte, path, fieldName string) string { + if path == "" { + return "" + } + result := gjson.GetBytes(body, path) + if !result.Exists() { + slog.Warn("optional identity field not found in token response", "field", fieldName, "path", path) + return "" + } + switch result.Type { + case gjson.String, gjson.Number: + return scalarToString(result) + case gjson.JSON, gjson.Null, gjson.False, gjson.True: + slog.Warn("optional identity field has unexpected type, skipping", "field", fieldName, "path", path) + return "" + } + // Unreachable: all gjson.Type cases are handled above. + return "" +} + +// upstreamJWTModifier is a gjson modifier that decodes the payload of a +// JWT-shaped string value and returns it as JSON, enabling further gjson +// path drilling (e.g. "access_token|@upstreamjwt|sub"). +// +// Trust model: NO signature verification. Use only for JWTs received over +// a TLS-authenticated channel directly from the upstream AS's token +// endpoint, where the channel itself provides integrity. For JWTs that +// have transited an untrusted hop, configure the upstream as OIDC and +// use the existing signed-ID-token path instead. +// +// Failure modes (all return ""): +// - input is not a JSON string +// - input does not contain exactly three dot-separated parts +// - the second part is not valid base64url +// +// Returning "" causes the next pipe stage to resolve to gjson.Null, which +// the caller's validateIdentityField rejects as "path not found". +func upstreamJWTModifier(jsonValue, _ string) string { + token := gjson.Parse(jsonValue).String() + if token == "" { + return "" + } + parts := strings.Split(token, ".") + if len(parts) != 3 { + return "" + } + payload, err := base64.RawURLEncoding.DecodeString(parts[1]) + if err != nil { + return "" + } + return string(payload) +} diff --git a/pkg/authserver/upstream/identity_from_token_test.go b/pkg/authserver/upstream/identity_from_token_test.go new file mode 100644 index 0000000000..3688c32be0 --- /dev/null +++ b/pkg/authserver/upstream/identity_from_token_test.go @@ -0,0 +1,186 @@ +// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package upstream + +import ( + "encoding/base64" + "errors" + "os" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestMain(m *testing.M) { + RegisterModifiers() + os.Exit(m.Run()) +} + +func makeJWT(payload string) string { + h := base64.RawURLEncoding.EncodeToString([]byte(`{"alg":"none","typ":"JWT"}`)) + b := base64.RawURLEncoding.EncodeToString([]byte(payload)) + return h + "." + b + ".sig" +} + +func TestExtractIdentityFromTokenResponse(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + body []byte + cfg *IdentityFromTokenConfig + want partialIdentity + wantErr bool + wantErrIs error + }{ + { + name: "snowflake flat happy path", + body: []byte(`{"access_token":"opaque-blob","expires_in":600,"refresh_token":"r","token_type":"Bearer","username":"user1"}`), + cfg: &IdentityFromTokenConfig{SubjectPath: "username"}, + want: partialIdentity{Subject: "user1"}, + }, + { + name: "slack nested happy path", + body: []byte(`{"ok":true,"access_token":"xoxb-...","authed_user":{"id":"U1234"},"team":{"id":"T1"}}`), + cfg: &IdentityFromTokenConfig{SubjectPath: "authed_user.id"}, + want: partialIdentity{Subject: "U1234"}, + }, + { + name: "shopify nested with all three fields", + body: []byte(`{"access_token":"a","associated_user":{"id":902541635,"email":"john@example.com","first_name":"John"}}`), + cfg: &IdentityFromTokenConfig{ + SubjectPath: "associated_user.id", + NamePath: "associated_user.first_name", + EmailPath: "associated_user.email", + }, + want: partialIdentity{Subject: "902541635", Name: "John", Email: "john@example.com"}, + }, + { + name: "numeric subject explicit", + body: []byte(`{"user_id":42}`), + cfg: &IdentityFromTokenConfig{SubjectPath: "user_id"}, + want: partialIdentity{Subject: "42"}, + }, + { + name: "missing optional name path in body", + body: []byte(`{"sub":"user1"}`), + cfg: &IdentityFromTokenConfig{SubjectPath: "sub", NamePath: "display_name"}, + want: partialIdentity{Subject: "user1"}, + }, + { + name: "name path not configured", + body: []byte(`{"sub":"user1","display_name":"Alice"}`), + cfg: &IdentityFromTokenConfig{SubjectPath: "sub"}, + want: partialIdentity{Subject: "user1"}, + }, + { + name: "optional name path resolves to object, skipped", + body: []byte(`{"sub":"u1","profile":{"first":"Alice"}}`), + cfg: &IdentityFromTokenConfig{SubjectPath: "sub", NamePath: "profile"}, + want: partialIdentity{Subject: "u1"}, + }, + { + name: "jwt-embedded subject happy path", + body: []byte(`{"access_token":"` + makeJWT(`{"sub":"u1"}`) + `"}`), + cfg: &IdentityFromTokenConfig{SubjectPath: "access_token|@upstreamjwt|sub"}, + want: partialIdentity{Subject: "u1"}, + }, + { + name: "jwt-embedded subject, malformed jwt", + body: []byte(`{"access_token":"not.a.jwt.really"}`), + cfg: &IdentityFromTokenConfig{SubjectPath: "access_token|@upstreamjwt|sub"}, + wantErr: true, + wantErrIs: ErrIdentityResolutionFailed, + }, + { + name: "empty subject value", + body: []byte(`{"username":""}`), + cfg: &IdentityFromTokenConfig{SubjectPath: "username"}, + wantErr: true, + wantErrIs: ErrIdentityResolutionFailed, + }, + { + name: "subject path resolves to object", + body: []byte(`{"user":{"id":"x"}}`), + cfg: &IdentityFromTokenConfig{SubjectPath: "user"}, + wantErr: true, + wantErrIs: ErrIdentityResolutionFailed, + }, + { + name: "subject path resolves to null", + body: []byte(`{"username":null}`), + cfg: &IdentityFromTokenConfig{SubjectPath: "username"}, + wantErr: true, + wantErrIs: ErrIdentityResolutionFailed, + }, + { + name: "large numeric subject preserves integer precision beyond 2^53", + // 9007199254740993 = 2^53 + 1; not exactly representable as float64. + // We use the raw JSON token rather than formatting via gjson.Result.String() + // (which goes through float64) to keep the digits exact. + body: []byte(`{"user_id":9007199254740993}`), + cfg: &IdentityFromTokenConfig{SubjectPath: "user_id"}, + want: partialIdentity{Subject: "9007199254740993"}, + }, + { + name: "subject path missing from body", + body: []byte(`{}`), + cfg: &IdentityFromTokenConfig{SubjectPath: "username"}, + wantErr: true, + wantErrIs: ErrIdentityResolutionFailed, + }, + { + name: "empty body", + body: []byte{}, + cfg: &IdentityFromTokenConfig{SubjectPath: "username"}, + wantErr: true, + wantErrIs: ErrIdentityResolutionFailed, + }, + { + name: "malformed json body", + body: []byte("not-json"), + cfg: &IdentityFromTokenConfig{SubjectPath: "username"}, + wantErr: true, + wantErrIs: ErrIdentityResolutionFailed, + }, + { + name: "nil cfg", + body: []byte(`{"username":"user1"}`), + cfg: nil, + wantErr: true, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + got, err := extractIdentityFromTokenResponse(tc.body, tc.cfg) + if tc.wantErr { + require.Error(t, err) + if tc.wantErrIs != nil { + assert.True(t, errors.Is(err, tc.wantErrIs), "expected error to wrap %v, got: %v", tc.wantErrIs, err) + } + return + } + + require.NoError(t, err) + assert.Equal(t, tc.want, got) + }) + } +} + +func TestExtractIdentityFromTokenResponse_ErrorDoesNotLeakBody(t *testing.T) { + t.Parallel() + + const secretMarker = "DO-NOT-LEAK-ME-XYZ" + body := []byte(`{"username":"","secret":"` + secretMarker + `"}`) + + _, err := extractIdentityFromTokenResponse(body, &IdentityFromTokenConfig{SubjectPath: "username"}) + require.Error(t, err) + assert.False(t, strings.Contains(err.Error(), secretMarker), + "error message must not contain body content, but got: %s", err.Error()) +}