-
Notifications
You must be signed in to change notification settings - Fork 214
Add identity extractor for OAuth2 token responses #5200
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
+362
−0
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,176 @@ | ||
| // SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc. | ||
| // SPDX-License-Identifier: Apache-2.0 | ||
|
|
||
| package upstream | ||
|
|
||
| import ( | ||
| "encoding/base64" | ||
| "errors" | ||
| "fmt" | ||
| "log/slog" | ||
| "strings" | ||
|
|
||
| "github.com/tidwall/gjson" | ||
| ) | ||
|
|
||
| // partialIdentity holds identity fields extracted from a token response body. | ||
| // It is used internally to pass the extracted subject, name, and email | ||
| // between extractIdentityFromTokenResponse and the provider layer. | ||
| type partialIdentity struct { | ||
| Subject string | ||
| Name string | ||
| Email string | ||
| } | ||
|
|
||
| // IdentityFromTokenConfig is the runtime configuration for extracting user | ||
| // identity directly from an OAuth2 token endpoint response body. | ||
| // | ||
| // Each path is a gjson dot-notation path (e.g. "username" or | ||
| // "associated_user.id") into the raw JSON body returned by the token | ||
| // endpoint. Path semantics, trust-model warnings, and uniqueness notes are | ||
| // documented on the corresponding CRD type | ||
| // (cmd/thv-operator/api/v1alpha1.IdentityFromTokenConfig). | ||
| type IdentityFromTokenConfig struct { | ||
| // SubjectPath is the gjson path to the unique user identifier (required). | ||
| SubjectPath string | ||
|
|
||
| // NamePath is the gjson path to the user's display name (optional). | ||
| // Leave empty to skip name extraction. | ||
| NamePath string | ||
|
|
||
| // EmailPath is the gjson path to the user's email address (optional). | ||
| // Leave empty to skip email extraction. | ||
| EmailPath string | ||
| } | ||
|
|
||
| // RegisterModifiers registers the gjson custom modifiers used by this | ||
| // package's path-based identity extractors. Call once during application | ||
| // or test wire-up before invoking any extractor that consumes a | ||
| // modifier-bearing path. Repeated calls are safe — gjson.AddModifier | ||
| // overwrites the existing entry. | ||
| // | ||
| // Modifiers registered: | ||
| // - @upstreamjwt: see upstreamJWTModifier. | ||
| func RegisterModifiers() { | ||
| gjson.AddModifier("upstreamjwt", upstreamJWTModifier) | ||
| } | ||
|
|
||
| // extractIdentityFromTokenResponse extracts user identity fields from a raw | ||
| // OAuth2 token endpoint response body using the paths in cfg. | ||
| // | ||
| // SubjectPath must resolve to a string or number value; objects, arrays, null, | ||
| // and missing paths are rejected with ErrIdentityResolutionFailed. NamePath | ||
| // and EmailPath are optional: type mismatches or missing paths produce a | ||
| // slog.Warn and leave the respective field empty. Empty NamePath/EmailPath in | ||
| // cfg means "do not extract" and are skipped silently. | ||
| func extractIdentityFromTokenResponse(body []byte, cfg *IdentityFromTokenConfig) (partialIdentity, error) { | ||
| if cfg == nil { | ||
| return partialIdentity{}, errors.New("identity extraction config is required") | ||
| } | ||
|
|
||
| subjectResult := gjson.GetBytes(body, cfg.SubjectPath) | ||
| if err := validateIdentityField(subjectResult); err != nil { | ||
| return partialIdentity{}, fmt.Errorf("%w: subjectPath %q %s", ErrIdentityResolutionFailed, cfg.SubjectPath, err.Error()) | ||
| } | ||
|
|
||
| name := extractOptionalField(body, cfg.NamePath, "namePath") | ||
| email := extractOptionalField(body, cfg.EmailPath, "emailPath") | ||
|
|
||
| return partialIdentity{ | ||
| Subject: scalarToString(subjectResult), | ||
| Name: name, | ||
| Email: email, | ||
| }, nil | ||
| } | ||
|
|
||
| // scalarToString returns the string representation of a gjson scalar value. | ||
| // For Number, it returns the raw JSON token rather than gjson.Result.String(), | ||
| // which formats via float64 and would lose precision for integer IDs larger | ||
| // than 2^53 (e.g., some upstream providers return 64-bit numeric subjects). | ||
| // For String, gjson.Result.String() correctly strips the surrounding quotes. | ||
| // The caller must already have validated the type. | ||
| func scalarToString(r gjson.Result) string { | ||
| if r.Type == gjson.Number { | ||
| return r.Raw | ||
| } | ||
| return r.String() | ||
| } | ||
|
|
||
| // validateIdentityField checks that a gjson result is a non-empty scalar | ||
| // (string or number). Returns a descriptive error on failure. | ||
| func validateIdentityField(result gjson.Result) error { | ||
| if !result.Exists() { | ||
| return errors.New("path not found in token response") | ||
| } | ||
| switch result.Type { | ||
| case gjson.String: | ||
| if result.String() == "" { | ||
| return errors.New("resolved to an empty string") | ||
| } | ||
| return nil | ||
| case gjson.Number: | ||
| return nil | ||
| case gjson.JSON: | ||
| return errors.New("resolved to an object or array, expected a scalar") | ||
| case gjson.Null, gjson.False, gjson.True: | ||
| return errors.New("resolved to null or unsupported type") | ||
| } | ||
| // Unreachable: all gjson.Type cases are handled above. | ||
| return errors.New("unrecognised gjson result type") | ||
| } | ||
|
|
||
| // extractOptionalField extracts an optional identity field from the token body. | ||
| // Returns an empty string if the path is not configured, missing, or has an | ||
| // unexpected type (with a slog.Warn for unexpected types). | ||
| func extractOptionalField(body []byte, path, fieldName string) string { | ||
| if path == "" { | ||
| return "" | ||
| } | ||
| result := gjson.GetBytes(body, path) | ||
| if !result.Exists() { | ||
| slog.Warn("optional identity field not found in token response", "field", fieldName, "path", path) | ||
| return "" | ||
| } | ||
| switch result.Type { | ||
| case gjson.String, gjson.Number: | ||
| return scalarToString(result) | ||
| case gjson.JSON, gjson.Null, gjson.False, gjson.True: | ||
| slog.Warn("optional identity field has unexpected type, skipping", "field", fieldName, "path", path) | ||
| return "" | ||
| } | ||
| // Unreachable: all gjson.Type cases are handled above. | ||
| return "" | ||
| } | ||
|
|
||
| // upstreamJWTModifier is a gjson modifier that decodes the payload of a | ||
| // JWT-shaped string value and returns it as JSON, enabling further gjson | ||
| // path drilling (e.g. "access_token|@upstreamjwt|sub"). | ||
| // | ||
| // Trust model: NO signature verification. Use only for JWTs received over | ||
| // a TLS-authenticated channel directly from the upstream AS's token | ||
| // endpoint, where the channel itself provides integrity. For JWTs that | ||
| // have transited an untrusted hop, configure the upstream as OIDC and | ||
| // use the existing signed-ID-token path instead. | ||
| // | ||
| // Failure modes (all return ""): | ||
| // - input is not a JSON string | ||
| // - input does not contain exactly three dot-separated parts | ||
| // - the second part is not valid base64url | ||
| // | ||
| // Returning "" causes the next pipe stage to resolve to gjson.Null, which | ||
| // the caller's validateIdentityField rejects as "path not found". | ||
| func upstreamJWTModifier(jsonValue, _ string) string { | ||
| token := gjson.Parse(jsonValue).String() | ||
| if token == "" { | ||
| return "" | ||
| } | ||
| parts := strings.Split(token, ".") | ||
| if len(parts) != 3 { | ||
| return "" | ||
| } | ||
| payload, err := base64.RawURLEncoding.DecodeString(parts[1]) | ||
| if err != nil { | ||
| return "" | ||
| } | ||
| return string(payload) | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,186 @@ | ||
| // SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc. | ||
| // SPDX-License-Identifier: Apache-2.0 | ||
|
|
||
| package upstream | ||
|
|
||
| import ( | ||
| "encoding/base64" | ||
| "errors" | ||
| "os" | ||
| "strings" | ||
| "testing" | ||
|
|
||
| "github.com/stretchr/testify/assert" | ||
| "github.com/stretchr/testify/require" | ||
| ) | ||
|
|
||
| func TestMain(m *testing.M) { | ||
| RegisterModifiers() | ||
| os.Exit(m.Run()) | ||
| } | ||
|
|
||
| func makeJWT(payload string) string { | ||
| h := base64.RawURLEncoding.EncodeToString([]byte(`{"alg":"none","typ":"JWT"}`)) | ||
| b := base64.RawURLEncoding.EncodeToString([]byte(payload)) | ||
| return h + "." + b + ".sig" | ||
| } | ||
|
|
||
| func TestExtractIdentityFromTokenResponse(t *testing.T) { | ||
| t.Parallel() | ||
|
|
||
| tests := []struct { | ||
| name string | ||
| body []byte | ||
| cfg *IdentityFromTokenConfig | ||
| want partialIdentity | ||
| wantErr bool | ||
| wantErrIs error | ||
| }{ | ||
| { | ||
| name: "snowflake flat happy path", | ||
| body: []byte(`{"access_token":"opaque-blob","expires_in":600,"refresh_token":"r","token_type":"Bearer","username":"user1"}`), | ||
| cfg: &IdentityFromTokenConfig{SubjectPath: "username"}, | ||
| want: partialIdentity{Subject: "user1"}, | ||
| }, | ||
| { | ||
| name: "slack nested happy path", | ||
| body: []byte(`{"ok":true,"access_token":"xoxb-...","authed_user":{"id":"U1234"},"team":{"id":"T1"}}`), | ||
| cfg: &IdentityFromTokenConfig{SubjectPath: "authed_user.id"}, | ||
| want: partialIdentity{Subject: "U1234"}, | ||
| }, | ||
| { | ||
| name: "shopify nested with all three fields", | ||
| body: []byte(`{"access_token":"a","associated_user":{"id":902541635,"email":"john@example.com","first_name":"John"}}`), | ||
| cfg: &IdentityFromTokenConfig{ | ||
| SubjectPath: "associated_user.id", | ||
| NamePath: "associated_user.first_name", | ||
| EmailPath: "associated_user.email", | ||
| }, | ||
| want: partialIdentity{Subject: "902541635", Name: "John", Email: "john@example.com"}, | ||
| }, | ||
| { | ||
| name: "numeric subject explicit", | ||
| body: []byte(`{"user_id":42}`), | ||
| cfg: &IdentityFromTokenConfig{SubjectPath: "user_id"}, | ||
| want: partialIdentity{Subject: "42"}, | ||
| }, | ||
| { | ||
| name: "missing optional name path in body", | ||
| body: []byte(`{"sub":"user1"}`), | ||
| cfg: &IdentityFromTokenConfig{SubjectPath: "sub", NamePath: "display_name"}, | ||
| want: partialIdentity{Subject: "user1"}, | ||
| }, | ||
| { | ||
| name: "name path not configured", | ||
| body: []byte(`{"sub":"user1","display_name":"Alice"}`), | ||
| cfg: &IdentityFromTokenConfig{SubjectPath: "sub"}, | ||
| want: partialIdentity{Subject: "user1"}, | ||
| }, | ||
| { | ||
| name: "optional name path resolves to object, skipped", | ||
| body: []byte(`{"sub":"u1","profile":{"first":"Alice"}}`), | ||
| cfg: &IdentityFromTokenConfig{SubjectPath: "sub", NamePath: "profile"}, | ||
| want: partialIdentity{Subject: "u1"}, | ||
| }, | ||
| { | ||
| name: "jwt-embedded subject happy path", | ||
| body: []byte(`{"access_token":"` + makeJWT(`{"sub":"u1"}`) + `"}`), | ||
| cfg: &IdentityFromTokenConfig{SubjectPath: "access_token|@upstreamjwt|sub"}, | ||
| want: partialIdentity{Subject: "u1"}, | ||
| }, | ||
| { | ||
| name: "jwt-embedded subject, malformed jwt", | ||
| body: []byte(`{"access_token":"not.a.jwt.really"}`), | ||
| cfg: &IdentityFromTokenConfig{SubjectPath: "access_token|@upstreamjwt|sub"}, | ||
| wantErr: true, | ||
| wantErrIs: ErrIdentityResolutionFailed, | ||
| }, | ||
| { | ||
| name: "empty subject value", | ||
| body: []byte(`{"username":""}`), | ||
| cfg: &IdentityFromTokenConfig{SubjectPath: "username"}, | ||
| wantErr: true, | ||
| wantErrIs: ErrIdentityResolutionFailed, | ||
| }, | ||
| { | ||
| name: "subject path resolves to object", | ||
| body: []byte(`{"user":{"id":"x"}}`), | ||
| cfg: &IdentityFromTokenConfig{SubjectPath: "user"}, | ||
| wantErr: true, | ||
| wantErrIs: ErrIdentityResolutionFailed, | ||
| }, | ||
| { | ||
| name: "subject path resolves to null", | ||
| body: []byte(`{"username":null}`), | ||
| cfg: &IdentityFromTokenConfig{SubjectPath: "username"}, | ||
| wantErr: true, | ||
| wantErrIs: ErrIdentityResolutionFailed, | ||
| }, | ||
| { | ||
| name: "large numeric subject preserves integer precision beyond 2^53", | ||
| // 9007199254740993 = 2^53 + 1; not exactly representable as float64. | ||
| // We use the raw JSON token rather than formatting via gjson.Result.String() | ||
| // (which goes through float64) to keep the digits exact. | ||
| body: []byte(`{"user_id":9007199254740993}`), | ||
| cfg: &IdentityFromTokenConfig{SubjectPath: "user_id"}, | ||
| want: partialIdentity{Subject: "9007199254740993"}, | ||
| }, | ||
| { | ||
| name: "subject path missing from body", | ||
| body: []byte(`{}`), | ||
| cfg: &IdentityFromTokenConfig{SubjectPath: "username"}, | ||
| wantErr: true, | ||
| wantErrIs: ErrIdentityResolutionFailed, | ||
| }, | ||
| { | ||
| name: "empty body", | ||
| body: []byte{}, | ||
| cfg: &IdentityFromTokenConfig{SubjectPath: "username"}, | ||
| wantErr: true, | ||
| wantErrIs: ErrIdentityResolutionFailed, | ||
| }, | ||
| { | ||
| name: "malformed json body", | ||
| body: []byte("not-json"), | ||
| cfg: &IdentityFromTokenConfig{SubjectPath: "username"}, | ||
| wantErr: true, | ||
| wantErrIs: ErrIdentityResolutionFailed, | ||
| }, | ||
| { | ||
| name: "nil cfg", | ||
| body: []byte(`{"username":"user1"}`), | ||
| cfg: nil, | ||
| wantErr: true, | ||
| }, | ||
| } | ||
|
|
||
| for _, tc := range tests { | ||
| t.Run(tc.name, func(t *testing.T) { | ||
| t.Parallel() | ||
|
|
||
| got, err := extractIdentityFromTokenResponse(tc.body, tc.cfg) | ||
| if tc.wantErr { | ||
| require.Error(t, err) | ||
| if tc.wantErrIs != nil { | ||
| assert.True(t, errors.Is(err, tc.wantErrIs), "expected error to wrap %v, got: %v", tc.wantErrIs, err) | ||
| } | ||
| return | ||
| } | ||
|
|
||
| require.NoError(t, err) | ||
| assert.Equal(t, tc.want, got) | ||
| }) | ||
| } | ||
| } | ||
|
|
||
| func TestExtractIdentityFromTokenResponse_ErrorDoesNotLeakBody(t *testing.T) { | ||
| t.Parallel() | ||
|
|
||
| const secretMarker = "DO-NOT-LEAK-ME-XYZ" | ||
| body := []byte(`{"username":"","secret":"` + secretMarker + `"}`) | ||
|
|
||
| _, err := extractIdentityFromTokenResponse(body, &IdentityFromTokenConfig{SubjectPath: "username"}) | ||
| require.Error(t, err) | ||
| assert.False(t, strings.Contains(err.Error(), secretMarker), | ||
| "error message must not contain body content, but got: %s", err.Error()) | ||
| } |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.