diff --git a/src/Access/AccessControl.cpp b/src/Access/AccessControl.cpp index 8878c3e629b1..a26444225617 100644 --- a/src/Access/AccessControl.cpp +++ b/src/Access/AccessControl.cpp @@ -666,7 +666,12 @@ void AccessControl::restoreFromBackup(RestorerFromBackup & restorer, const Strin void AccessControl::setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config) { - external_authenticators->setConfiguration(config, getLogger()); + /// Re-read `enable_token_auth` on every config reload. `setupFromMainConfig` + /// runs only once at startup, so without this re-sync flipping the flag in + /// the config and triggering a reload would silently leave the previous + /// value in place -- operators who toggle token auth off in response to an + /// IdP outage or a credential leak would see no effect until restart. + external_authenticators->setConfiguration(config, getLogger(), config.getBool("enable_token_auth", true)); } diff --git a/src/Access/AuthenticationData.cpp b/src/Access/AuthenticationData.cpp index fdf3e0662f1d..9720bf90f899 100644 --- a/src/Access/AuthenticationData.cpp +++ b/src/Access/AuthenticationData.cpp @@ -147,6 +147,13 @@ bool AuthenticationData::Util::checkPasswordBcrypt(std::string_view password [[m bool operator ==(const AuthenticationData & lhs, const AuthenticationData & rhs) { + /// `MemoryAccessStorage::updateNoLock` short-circuits when the existing + /// entity equals the new one, so any field omitted from this comparator + /// becomes invisible to ALTER USER -- same-type ALTER would silently + /// no-op. JWT users carry two extra fields (`token_processor_name` and + /// `jwt_claims`) and they MUST take part in equality, otherwise re-pinning + /// a JWT user via ALTER USER is a no-op (CREATE USER OR REPLACE works + /// only by accident, via storage->insertOrReplace). return (lhs.type == rhs.type) && (lhs.password_hash == rhs.password_hash) && (lhs.ldap_server_name == rhs.ldap_server_name) && (lhs.kerberos_realm == rhs.kerberos_realm) #if USE_SSL @@ -157,6 +164,8 @@ bool operator ==(const AuthenticationData & lhs, const AuthenticationData & rhs) #endif && (lhs.http_auth_scheme == rhs.http_auth_scheme) && (lhs.http_auth_server_name == rhs.http_auth_server_name) + && (lhs.token_processor_name == rhs.token_processor_name) + && (lhs.jwt_claims == rhs.jwt_claims) && (lhs.valid_until == rhs.valid_until); } @@ -411,7 +420,23 @@ boost::intrusive_ptr AuthenticationData::toAST() const } case AuthenticationType::JWT: { - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is available only in ClickHouse Cloud"); + /// Round-trip into the same shape the parser produces: PROCESSOR + /// child first (when set), CLAIMS child after (when set), with the + /// AST flags telling the formatter which slot is which. + const auto & processor_name = getTokenProcessorName(); + if (!processor_name.empty()) + { + node->has_jwt_processor = true; + node->children.push_back(make_intrusive(processor_name)); + } + + const auto & claims = getJWTClaims(); + if (!claims.empty()) + { + node->has_jwt_claims = true; + node->children.push_back(make_intrusive(claims)); + } + break; } case AuthenticationType::KERBEROS: { @@ -689,6 +714,35 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que auth_data.setHTTPAuthenticationServerName(server); auth_data.setHTTPAuthenticationScheme(scheme); } +#if USE_JWT_CPP + else if (query.type == AuthenticationType::JWT) + { + /// `query.has_jwt_processor` and `query.has_jwt_claims` describe which + /// of the two optional clauses the parser saw. Children are pushed in + /// PROCESSOR-then-CLAIMS order, so we walk them in that order. + size_t arg_idx = 0; + + if (query.has_jwt_processor) + { + String processor_name = checkAndGetLiteralArgument(args[arg_idx++], "processor"); + if (processor_name.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "JWT 'PROCESSOR' name must not be empty"); + auth_data.setTokenProcessorName(processor_name); + } + + if (query.has_jwt_claims) + { + String value = checkAndGetLiteralArgument(args[arg_idx++], "claims"); + picojson::value json_obj; + auto error = picojson::parse(json_obj, value); + if (!error.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad JWT claims: {}", error); + if (!json_obj.is()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad JWT claims: is not an object"); + auth_data.setJWTClaims(value); + } + } +#endif else { throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected ASTAuthenticationData structure"); diff --git a/src/Access/AuthenticationData.h b/src/Access/AuthenticationData.h index 1eef3fad6187..b318dadd34c6 100644 --- a/src/Access/AuthenticationData.h +++ b/src/Access/AuthenticationData.h @@ -79,6 +79,12 @@ class AuthenticationData const String & getHTTPAuthenticationServerName() const { return http_auth_server_name; } void setHTTPAuthenticationServerName(const String & name) { http_auth_server_name = name; } + const String & getTokenProcessorName() const { return token_processor_name; } + void setTokenProcessorName(const String & name) { token_processor_name = name; } + + const String & getJWTClaims() const { return jwt_claims; } + void setJWTClaims(const String & claims) { jwt_claims = claims; } + time_t getValidUntil() const { return valid_until; } void setValidUntil(time_t valid_until_) { valid_until = valid_until_; } @@ -121,6 +127,8 @@ class AuthenticationData String http_auth_server_name; HTTPAuthenticationScheme http_auth_scheme = HTTPAuthenticationScheme::BASIC; time_t valid_until = 0; + String token_processor_name; + String jwt_claims; }; } diff --git a/src/Access/Common/JWKSProvider.cpp b/src/Access/Common/JWKSProvider.cpp new file mode 100644 index 000000000000..2b656df3a3a4 --- /dev/null +++ b/src/Access/Common/JWKSProvider.cpp @@ -0,0 +1,232 @@ +#include + +#if USE_JWT_CPP +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int AUTHENTICATION_FAILED; + extern const int INVALID_CONFIG_PARAMETER; +} + +JWKSType JWKSClient::getJWKS() +{ + /// `last_request_send` semantics: timestamp of the most recent fetch + /// *attempt*, success or failure. Updated unconditionally before the + /// HTTP call so a failed fetch doesn't leave the timestamp stale and + /// invite every concurrent thread to re-hammer a failing endpoint + /// (L-02). Within `refresh_timeout` of an attempt: + /// - if a previously-successful JWKS is cached, serve it. + /// - otherwise, throw a "fetch in cooldown" exception so callers + /// don't queue up new attempts during the back-off window. + + { + std::shared_lock lock(mutex); + auto now = std::chrono::steady_clock::now(); + if (last_request_send.has_value()) + { + auto diff = std::chrono::duration(now - *last_request_send).count(); + if (diff < static_cast(refresh_timeout)) + { + if (cached_jwks.has_value()) + return cached_jwks.value(); + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, + "JWKS endpoint at '{}' is in cooldown after a recent failed fetch; will retry after the cache lifetime elapses", + jwks_uri.toString()); + } + } + } + + std::unique_lock lock(mutex); + auto now = std::chrono::steady_clock::now(); + if (last_request_send.has_value()) + { + auto diff = std::chrono::duration(now - *last_request_send).count(); + if (diff < static_cast(refresh_timeout)) + { + if (cached_jwks.has_value()) + return cached_jwks.value(); + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, + "JWKS endpoint at '{}' is in cooldown after a recent failed fetch; will retry after the cache lifetime elapses", + jwks_uri.toString()); + } + } + + /// Mark the attempt before issuing the network call so that even if the + /// fetch throws, subsequent waiters on this mutex see an updated + /// `last_request_send` and short-circuit via the cooldown branches above + /// instead of repeating the failing fetch back-to-back. + last_request_send = now; + + Poco::Net::HTTPResponse response; + std::string response_string; + + Poco::Net::HTTPRequest request{Poco::Net::HTTPRequest::HTTP_GET, jwks_uri.getPathAndQuery()}; + + /// Bound every JWKS fetch to a known limit. Without this, Poco's default + /// `HTTPSession` timeout of 60 seconds applies, and because the JWKS fetch + /// runs while `ExternalAuthenticators::mutex` is held by the outer + /// `checkTokenCredentials` call, a single slow or hung JWKS endpoint would + /// stall the whole auth subsystem (LDAP, Kerberos, HTTP basic, all other + /// token auth paths) for up to a full minute per request. 10 seconds is a + /// conservative cap: well above any healthy provider latency, well below + /// the default. + const Poco::Timespan jwks_http_timeout(/*seconds=*/10, 0); + + if (jwks_uri.getScheme() == "https") + { + Poco::Net::HTTPSClientSession session = Poco::Net::HTTPSClientSession(jwks_uri.getHost(), jwks_uri.getPort()); + session.setTimeout(jwks_http_timeout, jwks_http_timeout, jwks_http_timeout); + session.sendRequest(request); + std::istream & response_stream = session.receiveResponse(response); + if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK || !response_stream) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Failed to get user info by access token, code: {}, reason: {}", + response.getStatus(), response.getReason()); + Poco::StreamCopier::copyToString(response_stream, response_string); + } + else + { + Poco::Net::HTTPClientSession session = Poco::Net::HTTPClientSession(jwks_uri.getHost(), jwks_uri.getPort()); + session.setTimeout(jwks_http_timeout, jwks_http_timeout, jwks_http_timeout); + session.sendRequest(request); + std::istream & response_stream = session.receiveResponse(response); + if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK || !response_stream) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Failed to get user info by access token, code: {}, reason: {}", response.getStatus(), response.getReason()); + Poco::StreamCopier::copyToString(response_stream, response_string); + } + + JWKSType parsed_jwks; + + try + { + parsed_jwks = jwt::parse_jwks(response_string); + } + catch (const std::exception & e) + { + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Failed to parse JWKS: {}", e.what()); + } + + cached_jwks = std::move(parsed_jwks); + return cached_jwks.value(); +} + +StaticJWKSParams::StaticJWKSParams(const std::string & static_jwks_, const std::string & static_jwks_file_) +{ + if (static_jwks_.empty() && static_jwks_file_.empty()) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "JWT validator misconfigured: `static_jwks` or `static_jwks_file` keys must be present in static JWKS validator configuration"); + if (!static_jwks_.empty() && !static_jwks_file_.empty()) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "JWT validator misconfigured: `static_jwks` and `static_jwks_file` keys cannot both be present in static JWKS validator configuration"); + + static_jwks = static_jwks_; + static_jwks_file = static_jwks_file_; +} + +StaticJWKS::StaticJWKS(const StaticJWKSParams & params) +{ + static_jwks_file = params.static_jwks_file; + + String content = String(params.static_jwks); + if (!static_jwks_file.empty()) + { + std::ifstream ifs(static_jwks_file); + Poco::StreamCopier::copyToString(ifs, content); + /// Record the mtime so subsequent `getJWKS()` calls can notice rotation. + std::error_code ec; + const auto write_time = std::filesystem::last_write_time(static_jwks_file, ec); + if (!ec) + last_loaded_mtime = write_time; + } + try + { + auto keys = jwt::parse_jwks(content); + jwks = std::move(keys); + } + catch (const std::exception & e) + { + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Failed to parse JWKS: {}", e.what()); + } +} + +void StaticJWKS::reloadFromFileIfChangedNoLock() +{ + /// Inline `static_jwks` source: nothing to refresh from disk. + if (static_jwks_file.empty()) + return; + + std::error_code ec; + const auto mtime = std::filesystem::last_write_time(static_jwks_file, ec); + if (ec) + { + /// File disappeared or became unreadable. Keep the previously-loaded + /// keys -- failing closed here would lock everyone out on a transient + /// filesystem hiccup. The operator gets a log signal. + LOG_WARNING(getLogger("TokenAuthentication"), + "StaticJWKS: failed to stat '{}' for refresh ({}); keeping previously-loaded keys.", + static_jwks_file, ec.message()); + return; + } + if (mtime <= last_loaded_mtime) + return; + + /// File has been rotated. Read + parse + swap. + String content; + try + { + std::ifstream ifs(static_jwks_file); + Poco::StreamCopier::copyToString(ifs, content); + auto new_keys = jwt::parse_jwks(content); + jwks = std::move(new_keys); + last_loaded_mtime = mtime; + LOG_INFO(getLogger("TokenAuthentication"), + "StaticJWKS: reloaded keys from '{}' after detecting mtime change.", static_jwks_file); + } + catch (const std::exception & e) + { + /// Malformed new JWKS: keep the old one. Loud signal so the operator + /// knows the rotation didn't take. + LOG_ERROR(getLogger("TokenAuthentication"), + "StaticJWKS: failed to parse '{}' on refresh: {}; keeping previously-loaded keys.", + static_jwks_file, e.what()); + } +} + +JWKSType StaticJWKS::getJWKS() +{ + /// Fast path: shared lock + mtime check. Refresh under exclusive lock only + /// when the file actually changed. + { + std::shared_lock lock(mutex); + if (static_jwks_file.empty()) + return jwks; + + std::error_code ec; + const auto mtime = std::filesystem::last_write_time(static_jwks_file, ec); + if (ec) + return jwks; + if (mtime <= last_loaded_mtime) + return jwks; + } + + std::unique_lock lock(mutex); + reloadFromFileIfChangedNoLock(); + return jwks; +} + +} +#endif diff --git a/src/Access/Common/JWKSProvider.h b/src/Access/Common/JWKSProvider.h new file mode 100644 index 000000000000..c59266d9b40c --- /dev/null +++ b/src/Access/Common/JWKSProvider.h @@ -0,0 +1,94 @@ +#include + +#if USE_JWT_CPP +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +using JWKSType = jwt::jwks; + +/// JWKS (JSON Web Key Set) is a kind of a set of public keys that are used to validate JWT authenticity locally. +/// They are usually exposed by identity providers (e.g. Keycloak) via a well-known URI (usually /.well-known/jwks.json) +/// This interface is responsible for managing JWKS. Retrieving, caching and refreshing of JWKS happens here. +/// JWKS can either be static (e.g. provided in config) or dynamic (fetched from a remote URI and). +class IJWKSProvider +{ +public: + virtual ~IJWKSProvider() = default; + + virtual JWKSType getJWKS() = 0; +}; + +class JWKSClient : public IJWKSProvider +{ +public: + explicit JWKSClient(const String & uri, const size_t refresh_ms_): refresh_timeout(refresh_ms_), jwks_uri(uri) {} + + ~JWKSClient() override = default; + JWKSClient(const JWKSClient &) = delete; + JWKSClient(JWKSClient &&) = delete; + JWKSClient &operator=(const JWKSClient &) = delete; + JWKSClient &operator=(JWKSClient &&) = delete; + + JWKSType getJWKS() override; + +private: + size_t refresh_timeout; + Poco::URI jwks_uri; + + std::shared_mutex mutex; + std::optional cached_jwks; + /// `steady_clock` (not `system_clock`): refresh-cooldown is an elapsed-time + /// measurement; a wall-clock jump must not skip or freeze it. + /// `std::nullopt` means "no fetch has ever been attempted" -- needed to + /// distinguish a never-attempted state from a recently-failed one, because + /// the steady-clock epoch may sit only a short distance in the past on + /// freshly-booted hosts / containers with isolated CLOCK_MONOTONIC, making + /// a zero-initialized time_point look like a "recent" attempt. + std::optional> last_request_send; +}; + +struct StaticJWKSParams +{ + StaticJWKSParams(const std::string &static_jwks_, const std::string &static_jwks_file_); + + String static_jwks; + String static_jwks_file; +}; + +class StaticJWKS : public IJWKSProvider +{ +public: + explicit StaticJWKS(const StaticJWKSParams ¶ms); + + /// Reload the JWKS from disk if `static_jwks_file` was specified and the + /// file's mtime has advanced since the last load. Inline `static_jwks` + /// (no file path) is returned from the in-memory copy without I/O. + /// Without this, rotating the underlying file did NOT refresh the + /// in-memory keys -- admins had to trigger a full + /// `setExternalAuthenticatorsConfig` reload to pick up the new file. + JWKSType getJWKS() override; + +private: + void reloadFromFileIfChangedNoLock(); + + /// Source path -- empty when JWKS came from inline `` config. + String static_jwks_file; + /// `mtime` of the file at the most recent successful load. Used to detect + /// rotation. `file_time_type::min()` means "not loaded from a file" or + /// "never seen the file yet". + std::filesystem::file_time_type last_loaded_mtime = std::filesystem::file_time_type::min(); + + mutable std::shared_mutex mutex; + JWKSType jwks; +}; + +} +#endif diff --git a/src/Access/Credentials.h b/src/Access/Credentials.h index f98eb31ff0a2..0fab3d5dc85f 100644 --- a/src/Access/Credentials.h +++ b/src/Access/Credentials.h @@ -9,6 +9,9 @@ #endif #include +#include +#include +#include namespace Poco::Net { @@ -137,6 +140,28 @@ class MySQLNative41Credentials : public CredentialsWithScramble using CredentialsWithScramble::CredentialsWithScramble; }; +class TokenCredentials : public Credentials +{ +public: + explicit TokenCredentials(const String & token_) + : token(token_) + { + is_ready = true; + } + + void setUserName(const String & user_name_) { user_name = user_name_; } + void setGroups(const std::set & groups_) { groups = groups_; } + const std::set & getGroups() const { return groups; } + void setExpiresAt(const std::chrono::system_clock::time_point & expires_at_) { expires_at = expires_at_; } + std::optional getExpiresAt() const { return expires_at; } + const String & getToken() const { return token; } + +private: + String token; + std::set groups; + std::optional expires_at; +}; + #if USE_SSH class SshCredentials : public Credentials { diff --git a/src/Access/ExternalAuthenticators.cpp b/src/Access/ExternalAuthenticators.cpp index 6fa7c28bc980..03a484de5890 100644 --- a/src/Access/ExternalAuthenticators.cpp +++ b/src/Access/ExternalAuthenticators.cpp @@ -293,11 +293,63 @@ void ExternalAuthenticators::reset() resetImpl(); } -void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfiguration & config, LoggerPtr log) +/// Parse all token processors as an all-or-nothing operation. +/// +/// Throws if ANY processor fails to parse. The caller is expected to react by +/// disabling token authentication for this configuration cycle (fail-closed). +void parseTokenProcessors(std::map> & token_processors, + const Poco::Util::AbstractConfiguration & config, + const String & token_processors_config, + LoggerPtr log) +{ + Poco::Util::AbstractConfiguration::Keys token_processors_keys; + config.keys(token_processors_config, token_processors_keys); + + /// Build into a local map first so the live set is never observed in a partially-constructed state. + /// Ordered so the auto-discovery iteration order in `checkTokenCredentials` is stable. + std::map> parsed; + + for (const auto & processor : token_processors_keys) + { + String prefix = fmt::format("{}.{}", token_processors_config, processor); + try + { + parsed[processor] = ITokenProcessor::parseTokenProcessor(config, prefix, processor); + } + catch (...) + { + tryLogCurrentException(log, "Could not parse token processor " + backQuote(processor)); + /// Re-throw so the caller fails. + throw; + } + } + + token_processors = std::move(parsed); +} + +bool ExternalAuthenticators::isTokenAuthEnabled() const +{ + std::lock_guard lock(mutex); + return token_auth_enabled; +} + +bool ExternalAuthenticators::hasTokenProcessor(const String & name) const +{ + std::lock_guard lock(mutex); + if (!token_auth_enabled) + return false; + if (name.empty()) + return true; + return token_processors.contains(name); +} + +void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfiguration & config, LoggerPtr log, bool token_auth_enabled_) { std::lock_guard lock(mutex); resetImpl(); + token_auth_enabled = token_auth_enabled_; + Poco::Util::AbstractConfiguration::Keys all_keys; config.keys("", all_keys); @@ -306,6 +358,7 @@ void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfigur std::size_t http_auth_server_keys_count = 0; const String http_auth_servers_config = "http_authentication_servers"; + const String token_processors_config = "token_processors"; for (auto key : all_keys) { @@ -383,6 +436,26 @@ void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfigur { tryLogCurrentException(log, "Could not parse Kerberos section"); } + + if (token_auth_enabled) + { + try + { + parseTokenProcessors(token_processors, config, token_processors_config, log); + } + catch (...) + { + /// Fail closed: if any token processor failed to parse, refuse to + /// activate token auth at all for this config cycle. + tryLogCurrentException(log, + "One or more token processors failed to parse; " + "disabling token authentication entirely until the configuration is fixed"); + token_processors.clear(); + token_auth_enabled = false; + } + } + else + LOG_INFO(log, "Token authentication is disabled, skipping token processors configuration"); } static UInt128 computeParamsHash(const LDAPClient::Params & params, const LDAPClient::RoleSearchParamsList * role_search_params) @@ -561,6 +634,241 @@ HTTPAuthClientParams ExternalAuthenticators::getHTTPAuthenticationParams(const S return it->second; } +bool ExternalAuthenticators::checkCredentialsAgainstProcessor(const ITokenProcessor & processor, + TokenCredentials & credentials) const +{ + if (!processor.resolveAndValidate(credentials)) + { + LOG_TRACE(getLogger("AccessTokenAuthentication"), "Failed authentication with access token by {}", processor.getProcessorName()); + return false; + } + + /// Clamp the credentials' expires_at to the processor's cache lifetime so + /// upper layers (notably `Session`) bind their lifetime to whichever is + /// shorter -- the token's own expiry or the operator-configured TTL. This + /// is a *post-validation finalization* of the credentials, not a cache + /// write; the actual token-cache entry is written by `primeTokenCache`, + /// and only after any per-user `jwt_claims` policy has also accepted the + /// token (see `checkTokenCredentials`). + auto default_expiration_ts = std::chrono::system_clock::now() + + std::chrono::seconds(processor.getTokenCacheLifetime()); + + if (credentials.getExpiresAt().has_value()) + { + if (credentials.getExpiresAt().value() >= default_expiration_ts) + { + LOG_TRACE(getLogger("AccessTokenAuthentication"), "Token for user {} expires after default cache lifetime; using default TTL by {}", credentials.getUserName(), processor.getProcessorName()); + credentials.setExpiresAt(default_expiration_ts); + } + } + else + { + credentials.setExpiresAt(default_expiration_ts); + } + + LOG_DEBUG(getLogger("AccessTokenAuthentication"), "Authenticated user {} with access token by {}", quoteString(credentials.getUserName()), processor.getProcessorName()); + return true; +} + +void ExternalAuthenticators::primeTokenCache(const ITokenProcessor & processor, + const TokenCredentials & credentials) const +{ + /// Build a cache entry from the credentials state that + /// `checkCredentialsAgainstProcessor` finalized. The caller is responsible + /// for invoking this only after both processor validation AND the per-user + /// `jwt_claims` policy have accepted the token -- caching before claims + /// have been evaluated would let later unconstrained lookups (e.g. the + /// HTTP/TCP pre-user-lookup call which passes empty `jwt_claims`) hit a + /// cache entry that never actually satisfied the user's policy. + TokenCacheEntry cache_entry; + cache_entry.user_name = credentials.getUserName(); + cache_entry.external_roles = credentials.getGroups(); + cache_entry.processor_name = processor.getProcessorName(); + cache_entry.expires_at = credentials.getExpiresAt().value_or( + std::chrono::system_clock::now() + std::chrono::seconds(processor.getTokenCacheLifetime())); + + /// If the same token already has a forward entry that maps to a DIFFERENT + /// user_name, clean up the stale reverse entry for that other user before + /// we overwrite the forward entry. This happens when two processors extract + /// different `username_claim` values from the same token (e.g. processor X + /// uses `sub`, processor Y uses `email`): without this, the rotation step + /// below would not see the old user's entry in the reverse map and the + /// bi-map would diverge -- forward saying token -> new_user while a stale + /// reverse says old_user -> token, surfacing later as a dangling reverse + /// pointer that breaks the single-token-per-user invariant. + auto existing_forward = access_token_to_username_cache.find(credentials.getToken()); + if (existing_forward != access_token_to_username_cache.end() + && existing_forward->second.user_name != cache_entry.user_name) + { + auto stale_reverse = username_to_access_token_cache.find(existing_forward->second.user_name); + if (stale_reverse != username_to_access_token_cache.end() + && stale_reverse->second == credentials.getToken()) + username_to_access_token_cache.erase(stale_reverse); + } + + /// If a previous entry exists for the same user under a different token, + /// drop it -- the user has rotated tokens and the old one is now stale. + auto old_token_iter = username_to_access_token_cache.find(cache_entry.user_name); + if (old_token_iter != username_to_access_token_cache.end()) + { + access_token_to_username_cache.erase(old_token_iter->second); + username_to_access_token_cache.erase(old_token_iter); + } + + access_token_to_username_cache[credentials.getToken()] = cache_entry; + username_to_access_token_cache[cache_entry.user_name] = credentials.getToken(); + LOG_TRACE(getLogger("AccessTokenAuthentication"), "Cache entry for user {} added", quoteString(cache_entry.user_name)); +} + +bool ExternalAuthenticators::checkTokenCredentials(const TokenCredentials & credentials, + const String & processor_name, + const String & jwt_claims, + bool prime_cache_on_success) const +{ + /// Per-user claims restriction is binding: when a user is configured with `jwt_claims`, + /// authentication is only allowed via processors that can actually evaluate those claims + /// (i.e. JWT processors). If the resolving processor cannot enforce the restriction we + /// must deny -- silently treating it as "no restriction" would let an opaque/access-token + /// processor authenticate a token that fails the user's per-user policy. + auto check_claims_if_required = [&](const ITokenProcessor & processor) -> bool + { + if (jwt_claims.empty()) + return true; + if (!processor.supportsJwtClaimsRestriction()) + { + LOG_TRACE(getLogger("AccessTokenAuthentication"), + "Processor {} does not support per-user JWT claims restriction; " + "denying authentication that requires claims to be checked", + processor.getProcessorName()); + return false; + } + return processor.checkClaims(credentials, jwt_claims); + }; + + /// Snapshot the processor set under the mutex, then run the expensive + /// crypto verify WITHOUT the mutex (M-20). `shared_ptr` keeps each + /// processor alive even if a config reload swaps `token_processors` in + /// the middle of validation. Cache lookup stays under the mutex. + std::map> processors_snapshot; + + { + std::lock_guard lock{mutex}; + + if (!token_auth_enabled) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Token authentication is disabled"); + + if (token_processors.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Token authentication is not configured"); + + /// lookup token in local cache if not expired. + auto cached_entry_iter = access_token_to_username_cache.find(credentials.getToken()); + if (cached_entry_iter != access_token_to_username_cache.end()) + { + if (cached_entry_iter->second.expires_at <= std::chrono::system_clock::now()) // Token found in cache, but already outdated -- need to remove it. + { + const auto expired_user_name = cached_entry_iter->second.user_name; + const auto expired_token = cached_entry_iter->first; + LOG_TRACE(getLogger("AccessTokenAuthentication"), "Cache entry for user {} expired, removing", quoteString(expired_user_name)); + access_token_to_username_cache.erase(cached_entry_iter); + + /// Only unlink the reverse mapping if it currently points at the token + /// we just evicted. The bi-map invariant is maintained by + /// `primeTokenCache`, but if a reverse entry is somehow stale (or if a + /// concurrent rotation under the same mutex hold has already pointed + /// the user's reverse mapping at a fresh, still-valid token), erasing + /// blindly here would unlink that fresh token's reverse entry -- + /// silently breaking the single-token-per-user invariant and extending + /// the stale token's effective retention. + auto reverse_it = username_to_access_token_cache.find(expired_user_name); + if (reverse_it != username_to_access_token_cache.end() && reverse_it->second == expired_token) + username_to_access_token_cache.erase(reverse_it); + } + /// Enforce the per-user processor pin even on cache hit. A cache entry produced by + /// processor A must NOT be used to satisfy an authentication request that is pinned + /// to a different processor B.When the caller did not pin a processor (processor_name is + /// empty) any cached entry is acceptable. + else if (processor_name.empty() || processor_name == cached_entry_iter->second.processor_name) + { + /// Evaluate per-user claims FIRST, before mutating the outer + /// `TokenCredentials`. The `const_cast`-ed `setUserName`/`setGroups`/ + /// `setExpiresAt` writes below would otherwise leak the cached + /// identity into the caller's credentials object even on rejection. + if (!jwt_claims.empty()) + { + const auto it = token_processors.find(cached_entry_iter->second.processor_name); + if (it == token_processors.end() || !check_claims_if_required(*it->second)) + return false; + } + + const auto & user_data = cached_entry_iter->second; + const_cast(credentials).setUserName(user_data.user_name); + const_cast(credentials).setGroups(user_data.external_roles); + const_cast(credentials).setExpiresAt(user_data.expires_at); + LOG_TRACE(getLogger("AccessTokenAuthentication"), "Cache entry for user {} found, using it to authenticate", quoteString(user_data.user_name)); + return true; + } + else + { + LOG_TRACE(getLogger("AccessTokenAuthentication"), + "Cached token entry was produced by processor {}, but authentication is pinned to {}; " + "ignoring cache and re-authenticating via the pinned processor", + cached_entry_iter->second.processor_name, processor_name); + } + } + + processors_snapshot = token_processors; + } + + /// Validation path runs WITHOUT the mutex. RSA/ECDSA verifies and any + /// expensive claim matching no longer serialize the auth subsystem. + auto try_processor = [&](const std::shared_ptr & proc) -> std::optional + { + if (!checkCredentialsAgainstProcessor(*proc, const_cast(credentials))) + return std::nullopt; + if (!check_claims_if_required(*proc)) + return false; + if (prime_cache_on_success) + { + std::lock_guard lock{mutex}; + primeTokenCache(*proc, credentials); + } + return true; + }; + + if (processor_name.empty()) + { + for (const auto & [name, proc] : processors_snapshot) + { + if (!jwt_claims.empty() && !proc->supportsJwtClaimsRestriction()) + { + LOG_TRACE(getLogger("AccessTokenAuthentication"), + "Skipping processor {} during auto-discovery: it cannot enforce per-user JWT claims", + proc->getProcessorName()); + continue; + } + if (auto result = try_processor(proc); result.has_value()) + return *result; + } + } + else + { + const auto it = processors_snapshot.find(processor_name); + if (it == processors_snapshot.end()) + return false; + if (!jwt_claims.empty() && !it->second->supportsJwtClaimsRestriction()) + { + LOG_TRACE(getLogger("AccessTokenAuthentication"), + "Pinned processor {} cannot enforce per-user JWT claims; denying authentication", + it->second->getProcessorName()); + return false; + } + if (auto result = try_processor(it->second); result.has_value()) + return *result; + } + + return false; +} + bool ExternalAuthenticators::checkHTTPBasicCredentials( const String & server, const BasicCredentials & credentials, const ClientInfo & client_info, SettingsChanges & settings) const { diff --git a/src/Access/ExternalAuthenticators.h b/src/Access/ExternalAuthenticators.h index 6aa26bb3842a..d13c1547d8d3 100644 --- a/src/Access/ExternalAuthenticators.h +++ b/src/Access/ExternalAuthenticators.h @@ -37,7 +37,16 @@ class ExternalAuthenticators { public: void reset(); - void setConfiguration(const Poco::Util::AbstractConfiguration & config, LoggerPtr log); + void setConfiguration(const Poco::Util::AbstractConfiguration & config, LoggerPtr log, bool token_auth_enabled_); + + bool isTokenAuthEnabled() const; + + /// Returns true if a token processor with the given name is currently + /// configured. Used by `Session::checkIfUserIsStillValid` to terminate + /// active sessions whose authenticating processor was removed by config + /// reload (M-28). Empty `name` is treated as "no specific pin" and + /// returns true (token auth must still be enabled, of course). + bool hasTokenProcessor(const String & name) const; // The name and readiness of the credentials must be verified before calling these. bool checkLDAPCredentials(const String & server, const BasicCredentials & credentials, @@ -45,6 +54,21 @@ class ExternalAuthenticators bool checkKerberosCredentials(const String & realm, const GSSAcceptorContext & credentials) const; bool checkHTTPBasicCredentials(const String & server, const BasicCredentials & credentials, const ClientInfo & client_info, SettingsChanges & settings) const; + /// `prime_cache_on_success` controls whether a successful validation populates the + /// token cache. Per-user authentication paths (the chain reached from + /// `Session::authenticate`) leave this at the default `true` -- their result is + /// gated by the user's pinned processor and per-user JWT claims, so the cache + /// entry it produces is safe to consult on subsequent requests. The HTTP and TCP + /// bearer entry points authenticate the token *before* the user is known + /// (they need the username from the token to drive user lookup) and so call + /// this with `false`: their decision is made under no processor pin and no + /// claims constraint, and a cache entry written from that context would be + /// trusted by a later per-user call whose `processor_name` is empty -- bypassing + /// the per-user processor and claim selection that would otherwise occur. + bool checkTokenCredentials(const TokenCredentials & credentials, + const String & processor_name = "", + const String & jwt_claims = "", + bool prime_cache_on_success = true) const; GSSAcceptorContext::Params getKerberosParams() const; private: @@ -66,6 +90,57 @@ class ExternalAuthenticators mutable LDAPCaches ldap_caches TSA_GUARDED_BY(mutex) ; std::optional kerberos_params TSA_GUARDED_BY(mutex) ; std::unordered_map http_auth_servers TSA_GUARDED_BY(mutex) ; + /// Ordered (std::map, not unordered_map) so that the auto-discovery + /// dispatch order in `checkTokenCredentials` is deterministic across + /// process runs. Without an ordering, the iteration order of + /// `unordered_map` is implementation-defined and may differ run-to-run + /// or after rehashing -- which means the same unpinned token can be + /// validated by processor A in one run and processor B in another, + /// producing different cached identities, different role mappings (each + /// processor has its own `groups_claim`), and surprising debugging + /// outcomes. Alphabetical-by-name order makes "first to succeed wins" + /// stable and predictable from configuration alone. + /// + /// `shared_ptr` so callers can snapshot the relevant processor pointer + /// (or the whole map) under the mutex, RELEASE the mutex, and run the + /// expensive crypto verify without serializing the entire auth + /// subsystem behind a single attacker-driven RSA verify (M-20). Cheap: + /// processor count is tiny, snapshot is shared_ptr copies. + mutable std::map> token_processors TSA_GUARDED_BY(mutex) ; + + struct TokenCacheEntry + { + std::chrono::system_clock::time_point expires_at; + String user_name; + std::set external_roles; + /// Name of the token processor that produced this cache entry. + String processor_name; + }; + + /// Home-made simple bi-mapping, needed to effectively clean up cache from old tokens. + using TokenToUsernameCache = std::unordered_map; // Access token -> cache entry + using UsernameToTokenCache = std::unordered_map; // User name -> access token + + mutable TokenToUsernameCache access_token_to_username_cache TSA_GUARDED_BY(mutex) ; + mutable UsernameToTokenCache username_to_access_token_cache TSA_GUARDED_BY(mutex) ; + + bool token_auth_enabled TSA_GUARDED_BY(mutex) = true; + + /// Validates the credentials with the given processor. On success, mutates + /// `credentials` (user name, groups, effective expires_at) and returns true. + /// Does NOT write the token cache -- caching is the responsibility of the + /// caller, after the per-user `jwt_claims` policy has been evaluated. + /// + /// MUST be called WITHOUT holding `mutex`: this is the expensive crypto + /// path (M-20). The processor must be passed by `shared_ptr` so it + /// outlives a concurrent config reload that resets `token_processors`. + bool checkCredentialsAgainstProcessor(const ITokenProcessor & processor, + TokenCredentials & credentials) const; + + /// Writes the per-token cache entry. Must be called only after both processor + /// validation AND any per-user `jwt_claims` policy have accepted the token. + void primeTokenCache(const ITokenProcessor & processor, + const TokenCredentials & credentials) const TSA_REQUIRES(mutex); void resetImpl() TSA_REQUIRES(mutex); }; diff --git a/src/Access/TokenAccessStorage.cpp b/src/Access/TokenAccessStorage.cpp new file mode 100644 index 000000000000..72dc3ed394d1 --- /dev/null +++ b/src/Access/TokenAccessStorage.cpp @@ -0,0 +1,727 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +namespace +{ + struct ParsedTransform + { + String pattern; + String replacement; + bool global; + }; + + /// Unescape a string segment + String unescapeSegment(const String & str, size_t start, size_t end) + { + String result; + result.reserve(end - start); + bool escaped = false; + + for (size_t i = start; i < end; ++i) + { + if (escaped) + { + result += str[i]; + escaped = false; + } + else if (str[i] == '\\') + escaped = true; + else + result += str[i]; + } + + return result; + } + + /// Parse sed-style transform pattern: s/pattern/replacement/flags + ParsedTransform parseSedTransform(const String & transform) + { + if (transform.size() < 4 || transform[0] != 's' || transform[1] != '/') + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid roles_transform format. Expected sed-style pattern like 's/pattern/replacement/g'"); + } + + bool escaped = false; + size_t first_slash = 1; + size_t second_slash = String::npos; + size_t third_slash = String::npos; + + // Find delimiters using simple state machine + for (size_t i = first_slash + 1; i < transform.size(); ++i) + { + if (escaped) + { + escaped = false; + continue; + } + + if (transform[i] == '\\') + { + escaped = true; + continue; + } + + if (transform[i] == '/') + { + if (second_slash == String::npos) + second_slash = i; + else if (third_slash == String::npos) + third_slash = i; + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid roles_transform format. Too many unescaped slashes. Expected sed-style pattern like 's/pattern/replacement/g'"); + } + } + + if (second_slash == String::npos || third_slash == String::npos) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid roles_transform format. Expected sed-style pattern like 's/pattern/replacement/g'"); + + ParsedTransform result; + + result.pattern = unescapeSegment(transform, first_slash + 1, second_slash); + + size_t replacement_end = (third_slash != String::npos) ? third_slash : transform.size(); + result.replacement = unescapeSegment(transform, second_slash + 1, replacement_end); + + String flags = transform.substr(third_slash + 1); + result.global = (flags.find('g') != String::npos); + + return result; + } + + String applyTransform(const String & input, const re2::RE2 & re, const String & replacement, bool global) + { + /// `re` is precompiled at storage construction (the constructor refuses + /// to load with an invalid pattern, so by the time we get here the + /// regex is guaranteed to be `ok()`). No per-call recompilation; no + /// silent no-op on a bad pattern. + String result = input; + if (global) + RE2::GlobalReplace(&result, re, replacement); + else + RE2::Replace(&result, re, replacement); + return result; + } +} + +TokenAccessStorage::TokenAccessStorage(const String & storage_name_, AccessControl & access_control_, const Poco::Util::AbstractConfiguration & config_, const String & prefix_) + : IAccessStorage(storage_name_), access_control(access_control_), config(config_), prefix(prefix_), + memory_storage(storage_name_, access_control.getChangesNotifier(), false) +{ + std::lock_guard lock(mutex); + + const String prefix_str = (prefix.empty() ? "" : prefix + "."); + + if (config.has(prefix_str + "roles_filter")) + { + const String filter_pattern = config.getString(prefix_str + "roles_filter"); + roles_filter.emplace(filter_pattern); + + /// Fail closed on invalid regex. RE2 does not throw on bad patterns -- it + /// constructs an object with ok()==false and silently fails every match. + /// Reject the configuration up front so the + /// storage cannot be instantiated in a permissive state. + if (!roles_filter->ok()) + { + const String error = roles_filter->error(); + roles_filter.reset(); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Invalid 'roles_filter' regex for Token user directory '{}': {}. " + "Refusing to start with a misconfigured filter to avoid granting " + "all token groups as roles.", + storage_name_, error); + } + } + + if (config.has(prefix_str + "roles_transform")) + { + String transform = config.getString(prefix_str + "roles_transform"); + ParsedTransform parsed = parseSedTransform(transform); + + /// Compile and validate the regex up front. If we deferred compilation + /// to runtime (the previous behavior), an invalid regex would silently + /// return the input unchanged on every call -- meaning every role name + /// from the IdP would flow into role-mapping ungroomed, defeating the + /// purpose of `roles_transform`. Fail loudly at construction so the + /// misconfiguration is visible at startup. + if (!parsed.pattern.empty()) + { + roles_transform_pattern.emplace(parsed.pattern); + if (!roles_transform_pattern->ok()) + { + const String error = roles_transform_pattern->error(); + roles_transform_pattern.reset(); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Invalid 'roles_transform' regex for Token user directory '{}': {}. " + "Refusing to start with a misconfigured transform to avoid admitting " + "ungroomed role names from the IdP.", + storage_name_, error); + } + } + roles_transform_replacement = parsed.replacement; + roles_transform_global = parsed.global; + } + + provider_name = config.getString(prefix_str + "processor"); + if (provider_name.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "'processor' must be specified for Token user directory"); + + std::set common_roles_cfg; + if (config.has(prefix_str + "common_roles")) + { + Poco::Util::AbstractConfiguration::Keys role_names; + config.keys(prefix_str + "common_roles", role_names); + + common_roles_cfg.insert(role_names.begin(), role_names.end()); + } + common_role_names.swap(common_roles_cfg); + + if (config.has(prefix_str + "default_profile")) + default_profile_name = config.getString(prefix_str + "default_profile"); + + /// Optional IP allowlist for auto-provisioned users. Mirrors the + /// `users.xml` `` shape: `SUBNET` / + /// `NAME` / `REGEX` children. + /// Without this, every auto-created token user defaults to `AnyHost` and + /// admins have no way to restrict token-auth by network through standard + /// access-control config. + const auto networks_config_path = prefix_str + "networks"; + if (config.has(networks_config_path)) + { + AllowedClientHosts hosts; + Poco::Util::AbstractConfiguration::Keys network_keys; + config.keys(networks_config_path, network_keys); + for (const String & key : network_keys) + { + const String value = config.getString(networks_config_path + "." + key); + if (key.starts_with("ip")) + hosts.addSubnet(value); + else if (key.starts_with("host_regexp")) + hosts.addNameRegexp(value); + else if (key.starts_with("host")) + hosts.addName(value); + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Token user directory '{}': unknown entry '{}'; expected 'ip', 'host', or 'host_regexp'.", + storage_name_, key); + } + auto_user_allowed_hosts = std::move(hosts); + } + + user_external_roles.clear(); + users_per_roles.clear(); + roles_per_users.clear(); + granted_role_names.clear(); + granted_role_ids.clear(); + + role_change_subscription = access_control.subscribeForChanges( + [this] (const UUID & id, const AccessEntityPtr & entity) + { + this->processRoleChange(id, entity); + } + ); +} + +void TokenAccessStorage::applyRoleChangeNoLock(bool grant, const UUID & role_id, const String & role_name) +{ + std::vector user_ids; + + // Build a list of ids of the relevant users. + if (common_role_names.contains(role_name)) + { + user_ids = memory_storage.findAll(); + } + else + { + const auto it = users_per_roles.find(role_name); + if (it != users_per_roles.end()) + { + const auto & user_names = it->second; + user_ids.reserve(user_names.size()); + + for (const auto & user_name : user_names) + { + if (const auto user_id = memory_storage.find(user_name)) + user_ids.emplace_back(*user_id); + } + } + } + + // Update the granted roles of the relevant users. + if (!user_ids.empty()) + { + auto update_func = [&role_id, &grant] (const AccessEntityPtr & entity_, const UUID &) -> AccessEntityPtr + { + if (auto user = typeid_cast>(entity_)) + { + auto changed_user = typeid_cast>(user->clone()); + if (grant) + changed_user->granted_roles.grant(role_id); + else + changed_user->granted_roles.revoke(role_id); + return changed_user; + } + return entity_; + }; + + memory_storage.update(user_ids, update_func); + } + + // Actualize granted_role_* mappings. + if (grant) + { + if (!user_ids.empty()) + { + granted_role_names.insert_or_assign(role_id, role_name); + granted_role_ids.insert_or_assign(role_name, role_id); + } + } + else + { + granted_role_ids.erase(role_name); + granted_role_names.erase(role_id); + } +} + +void TokenAccessStorage::processRoleChange(const UUID & id, const AccessEntityPtr & entity) +{ + std::lock_guard lock(mutex); + const auto role = typeid_cast>(entity); + const auto it = granted_role_names.find(id); + + if (role) // Added or renamed a role. + { + const auto & new_role_name = role->getName(); + if (it != granted_role_names.end()) // Renamed a granted role. + { + const auto & old_role_name = it->second; + if (new_role_name != old_role_name) + { + // Revoke the old role first, then grant the new role. + applyRoleChangeNoLock(false /* revoke */, id, old_role_name); + applyRoleChangeNoLock(true /* grant */, id, new_role_name); + } + } + else // Added a role. + { + applyRoleChangeNoLock(true /* grant */, id, new_role_name); + } + } + else // Removed a role. + { + if (it != granted_role_names.end()) // Removed a granted role. + { + const auto & old_role_name = it->second; + applyRoleChangeNoLock(false /* revoke */, id, old_role_name); + } + } +} + +const char * TokenAccessStorage::getStorageType() const +{ + return STORAGE_TYPE; +} + +bool TokenAccessStorage::exists(const UUID & id) const +{ + std::lock_guard lock(mutex); + return memory_storage.exists(id); +} + +String TokenAccessStorage::getStorageParamsJSON() const +{ + std::lock_guard lock(mutex); + Poco::JSON::Object params_json; + + params_json.set("provider", provider_name); + + Poco::JSON::Array common_role_names_json; + for (const auto & role : common_role_names) + { + common_role_names_json.add(role); + } + params_json.set("roles", common_role_names_json); + + std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + oss.exceptions(std::ios::failbit); + Poco::JSON::Stringifier::stringify(params_json, oss); + + return oss.str(); +} + +bool TokenAccessStorage::areTokenCredentialsValidNoLock(const User & user, const Credentials & credentials, const ExternalAuthenticators & external_authenticators) const +{ + if (!credentials.isReady()) + return false; + + if (credentials.getUserName() != user.getName()) + return false; + + if (const auto * token_credentials = dynamic_cast(&credentials)) + return external_authenticators.checkTokenCredentials(*token_credentials); + + return false; +} + +std::optional TokenAccessStorage::findImpl(AccessEntityType type, const String & name) const +{ + std::lock_guard lock(mutex); + return memory_storage.find(type, name); +} + + +std::vector TokenAccessStorage::findAllImpl(AccessEntityType type) const +{ + std::lock_guard lock(mutex); + return memory_storage.findAll(type); +} + +AccessEntityPtr TokenAccessStorage::readImpl(const UUID & id, bool throw_if_not_exists) const +{ + std::lock_guard lock(mutex); + return memory_storage.read(id, throw_if_not_exists); +} + +std::optional> TokenAccessStorage::readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const +{ + std::lock_guard lock(mutex); + return memory_storage.readNameWithType(id, throw_if_not_exists); +} + +void TokenAccessStorage::assignRolesNoLock(User & user, const std::set & external_roles) const +{ + const auto & user_name = user.getName(); + auto & granted_roles = user.granted_roles; + + auto grant_role = [this, &user_name, &granted_roles] (const String & role_name, const bool common) + { + auto it = granted_role_ids.find(role_name); + if (it == granted_role_ids.end()) + { + if (const auto role_id = access_control.find(role_name)) + { + granted_role_names.insert_or_assign(*role_id, role_name); + it = granted_role_ids.insert_or_assign(role_name, *role_id).first; + } + } + + if (it != granted_role_ids.end()) + { + const auto & role_id = it->second; + granted_roles.grant(role_id); + } + else + { + LOG_TRACE(getLogger(), "Did not grant {} role '{}' to user '{}': role not found", (common ? "common" : "mapped"), role_name, user_name); + } + }; + + user_external_roles.erase(user_name); + granted_roles = {}; + const auto old_role_names = std::move(roles_per_users[user_name]); + + // Grant the common roles first. + for (const auto & role_name : common_role_names) + { + grant_role(role_name, true /* common */); + } + + // Grant the mapped external roles and actualize users_per_roles mapping. + // external_roles allowed to overlap with common_role_names. + for (const auto & role_name : external_roles) + { + grant_role(role_name, false /* mapped */); + users_per_roles[role_name].insert(user_name); + } + + // Cleanup users_per_roles and granted_role_* mappings. + for (const auto & old_role_name : old_role_names) + { + if (external_roles.contains(old_role_name)) + continue; + + const auto rit = users_per_roles.find(old_role_name); + if (rit == users_per_roles.end()) + continue; + + auto & user_names = rit->second; + user_names.erase(user_name); + + if (!user_names.empty()) + continue; + + users_per_roles.erase(rit); + + if (common_role_names.contains(old_role_name)) + continue; + + const auto iit = granted_role_ids.find(old_role_name); + if (iit == granted_role_ids.end()) + continue; + + const auto old_role_id = iit->second; + granted_role_names.erase(old_role_id); + granted_role_ids.erase(iit); + } + + // Actualize roles_per_users mapping and user_external_roles cache. + if (external_roles.empty()) + roles_per_users.erase(user_name); + else + roles_per_users[user_name] = external_roles; + + user_external_roles[user_name] = external_roles; +} + +void TokenAccessStorage::assignProfileNoLock(User & user) const +{ + if (default_profile_name.empty()) + return; + + const auto & user_name = user.getName(); + auto & settings = user.settings; + + // Look up the profile ID once + const auto profile_id = access_control.find(default_profile_name); + if (!profile_id) + { + LOG_TRACE(getLogger(), "Did not assign profile '{}' to user '{}': profile not found", default_profile_name, user_name); + return; + } + + // Check if profile is already assigned + bool profile_already_assigned = false; + for (const auto & element : settings) + { + if (element.parent_profile.has_value() && element.parent_profile == *profile_id) + { + profile_already_assigned = true; + break; + } + } + + if (!profile_already_assigned) + { + SettingsProfileElement profile_element; + profile_element.parent_profile = *profile_id; + settings.push_back(std::move(profile_element)); + LOG_TRACE(getLogger(), "Assigned profile '{}' to user '{}'", default_profile_name, user_name); + } +} + +std::optional TokenAccessStorage::authenticateImpl( + const Credentials & credentials, + const Poco::Net::IPAddress & address, + const ExternalAuthenticators & external_authenticators, + const ClientInfo & /* client_info */, + bool throw_if_user_not_exists, + bool /* allow_no_password */, + bool /* allow_plaintext_password */) const +{ + std::lock_guard lock(mutex); + + /// Reject mismatched credential types BEFORE the typeid_cast that would + /// throw a `LOGICAL_ERROR`. The reference-form `typeid_cast` is fatal on + /// mismatch, and `MultipleAccessStorage::authenticateImpl` does not catch + /// per-storage exceptions -- so a single Basic / SSL-cert / Kerberos / SSH + /// login attempt would propagate that exception out of the chain and abort + /// authentication for every later storage in `user_directories`. Concretely, + /// listing `` ahead of `` would lock out every Basic-auth + /// user. Return nullopt cleanly, matching the LDAP-side idiom in + /// `LDAPAccessStorage::areLDAPCredentialsValidNoLock`. + const auto * token_credentials_ptr = dynamic_cast(&credentials); + if (!token_credentials_ptr) + { + if (throw_if_user_not_exists) + throwNotFound(AccessEntityType::USER, credentials.getUserName(), getStorageName()); + return {}; + } + + auto id = memory_storage.find(credentials.getUserName()); + UserPtr user = id ? memory_storage.read(*id) : nullptr; + + const auto & token_credentials = *token_credentials_ptr; + + if (!external_authenticators.checkTokenCredentials(token_credentials, provider_name)) + { + // Even though token itself may be valid (especially in case of a jwt token), authentication has just failed. + if (throw_if_user_not_exists) + throwNotFound(AccessEntityType::USER, credentials.getUserName(), getStorageName()); + + return {}; + } + + std::shared_ptr new_user; + if (!user) + { + // User does not exist, so we create one, and will add it if authentication is successful. + new_user = std::make_shared(); + new_user->setName(credentials.getUserName()); + new_user->authentication_methods.emplace_back(AuthenticationType::JWT); + /// Stamp the storage's pinned processor onto the auth method so the + /// per-request validity check (`Session::checkIfUserIsStillValid`) + /// can detect when an admin removes that processor and terminate + /// active sessions whose tokens were issued through it (M-28). + new_user->authentication_methods.back().setTokenProcessorName(provider_name); + /// If the operator configured a network allowlist for this storage, + /// stamp it onto the auto-created user so `isAddressAllowed` checks it + /// below. Without this, every auto-provisioned token user inherits + /// `AnyHostTag` and there is no way to restrict token auth by network. + if (auto_user_allowed_hosts.has_value()) + new_user->allowed_client_hosts = *auto_user_allowed_hosts; + user = new_user; + } + + if (!isAddressAllowed(*user, address)) + throwAddressNotAllowed(address); + + std::set external_roles; + if (roles_filter.has_value()) + { + /// Defensive: a broken regex must NEVER cause a fall-through to the + /// permissive "grant all groups" branch. Parse-time validation in the + /// constructor already rejects invalid patterns; this guard ensures the + /// invariant still holds if any future code path constructs the filter + /// without the parse-time check (e.g. config reload). + if (!roles_filter->ok()) + { + LOG_ERROR(getLogger(), + "{}: Configured 'roles_filter' is invalid ('{}'); refusing to map any " + "external roles for user '{}' to avoid granting all token groups.", + getStorageName(), roles_filter->error(), credentials.getUserName()); + } + else + { + LOG_TRACE(getLogger(), "{}: External role filter found, applying only matching groups", getStorageName()); + for (const auto & group: token_credentials.getGroups()) { + if (RE2::FullMatch(group, roles_filter.value())) + { + String transformed_group = group; + if (roles_transform_pattern.has_value() && roles_transform_replacement.has_value()) + { + transformed_group = applyTransform(group, roles_transform_pattern.value(), roles_transform_replacement.value(), roles_transform_global); + LOG_TRACE(getLogger(), "{}: Transformed group '{}' to '{}'", getStorageName(), group, transformed_group); + } + external_roles.insert(transformed_group); + LOG_TRACE(getLogger(), "{}: Granted role (group) {} to user", getStorageName(), transformed_group); + } + } + } + } + else + { + LOG_TRACE(getLogger(), "{}: No external role filtering set, applying all available groups", getStorageName()); + for (const auto & group: token_credentials.getGroups()) + { + String transformed_group = group; + if (roles_transform_pattern.has_value() && roles_transform_replacement.has_value()) + { + transformed_group = applyTransform(group, roles_transform_pattern.value(), roles_transform_replacement.value(), roles_transform_global); + LOG_TRACE(getLogger(), "{}: Transformed group '{}' to '{}'", getStorageName(), group, transformed_group); + } + external_roles.insert(transformed_group); + } + } + + if (new_user) + { + assignRolesNoLock(*new_user, external_roles); + assignProfileNoLock(*new_user); + id = memory_storage.insert(new_user); + } + else + { + /// Apply role-set and profile changes atomically under a single + /// `memory_storage.update`. Splitting them into two separate updates + /// (the prior shape) opened a reader-observable window between + /// "new roles, old profile" and "new roles, new profile" -- a query + /// from another thread that read the user via `AccessControl::read` + /// would observe a mid-state, since `MemoryAccessStorage`'s lock is + /// independent of `TokenAccessStorage::mutex` (M-31). + /// + /// Preserve the existing early-return optimization: skip the update + /// when external_roles haven't changed AND the profile is already + /// assigned. The `assignRolesNoLock` cleanup still has to run if + /// the role set changes, so it lives inside the update lambda. + const bool roles_changed = [&] + { + const auto it = user_external_roles.find(user->getName()); + return it == user_external_roles.end() || it->second != external_roles; + }(); + + if (roles_changed) + { + memory_storage.update(*id, [this, &external_roles] (const AccessEntityPtr & entity_, const UUID &) -> AccessEntityPtr + { + if (auto user_entity = typeid_cast>(entity_)) + { + auto changed_user = typeid_cast>(user_entity->clone()); + assignRolesNoLock(*changed_user, external_roles); + assignProfileNoLock(*changed_user); + return changed_user; + } + return entity_; + }); + } + else + { + /// Roles are stable; just refresh the profile in case it was + /// added/changed in config since the last auth. + memory_storage.update(*id, [this] (const AccessEntityPtr & entity_, const UUID &) -> AccessEntityPtr + { + if (auto user_entity = typeid_cast>(entity_)) + { + auto changed_user = typeid_cast>(user_entity->clone()); + assignProfileNoLock(*changed_user); + return changed_user; + } + return entity_; + }); + } + } + + /// Flush queued user-entity events from this storage's `memory_storage` so + /// subscribers observe the freshly-resolved roles and profile right away. + /// + /// `memory_storage.insert` / `update` only enqueue `onEntityAdded` / + /// `onEntityUpdated` on the shared `AccessChangesNotifier`; without an + /// explicit `sendNotifications` they sit on the queue until some unrelated + /// access mutation (a SQL DDL on access entities, a config reload, a + /// replicated-storage sync) happens to trigger a drain. During that window + /// any existing `ContextAccess` bound to this user UUID keeps serving its + /// previously-cached authorization state -- a freshly-revoked role appears + /// "still granted" until the next unrelated trigger. + /// + /// Note: `applyRoleChangeNoLock` (the storage's other mutation site) does + /// NOT need an explicit flush -- it only runs inside `processRoleChange`, + /// which is itself dispatched from a `sendNotifications` drain; the events + /// it queues are picked up by the very loop that called it. Only + /// `authenticateImpl` runs outside of any drain and so is the one site + /// that has to flush explicitly. + access_control.getChangesNotifier().sendNotifications(); + + if (id) + return AuthResult{ .user_id = *id, .authentication_data = AuthenticationData(AuthenticationType::JWT), .user_name = user->getName() }; + return std::nullopt; +} + + +} diff --git a/src/Access/TokenAccessStorage.h b/src/Access/TokenAccessStorage.h new file mode 100644 index 000000000000..9f15319a0d82 --- /dev/null +++ b/src/Access/TokenAccessStorage.h @@ -0,0 +1,101 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace Poco +{ + namespace Util + { + class AbstractConfiguration; + } +} + + +namespace DB +{ +class AccessControl; + +/// Implementation of IAccessStorage which allows to import user data from oauth server using access token. +/// Normally, this should be unified with LDAPAccessStorage, but not done to minimize changes to code that is common with upstream. +class TokenAccessStorage : public IAccessStorage +{ +public: + static constexpr char STORAGE_TYPE[] = "token"; + + explicit TokenAccessStorage(const String & storage_name_, AccessControl & access_control_, const Poco::Util::AbstractConfiguration & config, const String & prefix); + ~TokenAccessStorage() override = default; + + // IAccessStorage implementations. + const char * getStorageType() const override; + String getStorageParamsJSON() const override; + bool isReadOnly() const override { return true; } + bool exists(const UUID & id) const override; + +private: + mutable std::recursive_mutex mutex; // Note: Reentrance possible by internal role lookup via access_control + AccessControl & access_control; + const Poco::Util::AbstractConfiguration & config; + const String & prefix; + + String provider_name; + std::optional roles_filter = std::nullopt; + /// `roles_transform` regex compiled once at construction. Storing the + /// compiled `re2::RE2` (instead of the pattern string) avoids per-call + /// recompilation and -- more importantly -- makes parse-time validation + /// possible: an invalid regex now fails the storage construction loudly + /// rather than silently no-op'ing every transform at runtime (which would + /// admit ungroomed role names; symmetric with the `roles_filter` fail- + /// closed handling). + std::optional roles_transform_pattern = std::nullopt; + std::optional roles_transform_replacement = std::nullopt; + bool roles_transform_global = false; + + std::set common_role_names; // role name that should be granted to all users at all times + String default_profile_name; // settings profile name that should be assigned to all users + /// Optional IP allowlist applied to auto-provisioned users at creation + /// time. When unset, auto-created users inherit the default `AnyHostTag` + /// (current behavior, no breakage). When set, only clients whose source + /// address matches this allowlist can authenticate as a token-auto-created + /// user, regardless of the IdP's verdict on the token. + std::optional auto_user_allowed_hosts; + mutable std::map> user_external_roles; + mutable std::map> users_per_roles; // role name -> user names (...it should be granted to; may but don't have to exist for common roles) + mutable std::map> roles_per_users; // user name -> role names (...that should be granted to it; may but don't have to include common roles) + mutable std::map granted_role_names; // (currently granted) role id -> its name + mutable std::map granted_role_ids; // (currently granted) role name -> its id + mutable MemoryAccessStorage memory_storage; + scope_guard role_change_subscription; + + void processRoleChange(const UUID & id, const AccessEntityPtr & entity); + + bool areTokenCredentialsValidNoLock(const User & user, const Credentials & credentials, const ExternalAuthenticators & external_authenticators) const; + + void applyRoleChangeNoLock(bool grant, const UUID & role_id, const String & role_name); + void assignRolesNoLock(User & user, const std::set & external_roles) const; + void assignProfileNoLock(User & user) const; + +protected: + std::optional findImpl(AccessEntityType type, const String & name) const override; + std::vector findAllImpl(AccessEntityType type) const override; + AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override; + std::optional> readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const override; + std::optional authenticateImpl(const Credentials & credentials, + const Poco::Net::IPAddress & address, + const ExternalAuthenticators & external_authenticators, + const ClientInfo & client_info, + bool throw_if_user_not_exists, + bool allow_no_password, + bool allow_plaintext_password) const override; +}; +} diff --git a/src/Access/TokenProcessors.h b/src/Access/TokenProcessors.h new file mode 100644 index 000000000000..f2ee32d90887 --- /dev/null +++ b/src/Access/TokenProcessors.h @@ -0,0 +1,254 @@ +#pragma once + +#include +#include + +#if USE_JWT_CPP +#include +#include +#include +#endif + +namespace DB +{ + +class RemoteHostFilter; + +namespace ErrorCodes +{ +extern const int NOT_IMPLEMENTED; +} + +class ITokenProcessor +{ +public: + explicit ITokenProcessor(const String & processor_name_, + UInt64 token_cache_lifetime_, + const String & username_claim_ = "sub", + const String & groups_claim_ = "groups") + : processor_name(processor_name_), token_cache_lifetime(token_cache_lifetime_), username_claim(username_claim_), groups_claim(groups_claim_) {} + virtual ~ITokenProcessor() = default; + + virtual bool resolveAndValidate(TokenCredentials &) const + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented for ITokenProcessor interface"); + } + + virtual bool checkClaims(const TokenCredentials &, const String &) const { return true; } + + /// True only for JWT processors (static-key/JWKS). Opaque/access token processors do not use per-user claims. + virtual bool supportsJwtClaimsRestriction() const { return false; } + + UInt64 getTokenCacheLifetime() const { return token_cache_lifetime; } + String getProcessorName() const { return processor_name; } + + static std::unique_ptr parseTokenProcessor( + const Poco::Util::AbstractConfiguration & config, + const String & prefix, + const String & processor_name); + +protected: + const String processor_name; + const UInt64 token_cache_lifetime; + const String username_claim; + const String groups_claim; +}; + +#if USE_JWT_CPP + +struct StaticKeyJwtParams +{ + /// Algorithm name (required). Supported: "none", "hs256", "hs384", "hs512", + /// "ps256", "ps384", "ps512", "ed25519", "ed448", "rs256", "rs384", "rs512", + /// "es256", "es256k", "es384", "es512" + String algo; + + /// For HS algorithms (hs256, hs384, hs512): symmetric key (required for HS algorithms) + String static_key; + + /// For HS algorithms: whether static_key is base64 encoded (optional, defaults to false) + bool static_key_in_base64 = false; + + /// For PS/ED/RSA/ES algorithms: public key (required for PS/ED/RSA/ES algorithms) + String public_key; + + /// For PS/ED/RSA/ES algorithms: private key (optional) + String private_key; + + /// For PS/ED/RSA/ES algorithms: public key password (optional) + String public_key_password; + + /// For PS/ED/RSA/ES algorithms: private key password (optional) + String private_key_password; + + /// JWT claims to validate (optional) + String claims; + + /// Clock-drift tolerance for `exp`/`nbf`/`iat` checks, in seconds. + /// jwt-cpp's default is 0, which rejects tokens on any client/server skew. + /// 60 seconds matches the OpenID processor's default and standard + /// industry practice (RFC 7519 §4.1.4 hints at "small leeway"). + UInt64 verifier_leeway = 60; +}; + +class StaticKeyJwtProcessor : public ITokenProcessor +{ +public: + explicit StaticKeyJwtProcessor(const String & processor_name_, + UInt64 token_cache_lifetime_, + const String & username_claim_, + const String & groups_claim_, + const String & expected_issuer_, + const String & expected_audience_, + const String & expected_typ_, + bool allow_no_expiration_, + const StaticKeyJwtParams & params); + + bool resolveAndValidate(TokenCredentials & credentials) const override; + bool checkClaims(const TokenCredentials & credentials, const String & claims_to_check) const override; + bool supportsJwtClaimsRestriction() const override { return true; } + +private: + const String claims; + const String expected_issuer; + const String expected_audience; + /// Required JWT `typ` header (RFC 8725 §3.11). Empty = no enforcement. + const String expected_typ; + const bool allow_no_expiration; + jwt::verifier verifier = jwt::verify(); +}; + + +class JwksJwtProcessor : public ITokenProcessor +{ +public: + explicit JwksJwtProcessor(const String & processor_name_, + UInt64 token_cache_lifetime_, + const String & username_claim_, + const String & groups_claim_, + const String & expected_issuer_, + const String & expected_audience_, + const String & expected_typ_, + bool allow_no_expiration_, + const String & claims_, + size_t verifier_leeway_, + std::shared_ptr provider_); + + explicit JwksJwtProcessor(const String & processor_name_, + UInt64 token_cache_lifetime_, + const String & username_claim_, + const String & groups_claim_, + const String & expected_issuer_, + const String & expected_audience_, + const String & expected_typ_, + bool allow_no_expiration_, + const String & claims_, + size_t verifier_leeway_, + const String & jwks_uri_, + size_t jwks_cache_lifetime_) + : JwksJwtProcessor(processor_name_, + token_cache_lifetime_, + username_claim_, + groups_claim_, + expected_issuer_, + expected_audience_, + expected_typ_, + allow_no_expiration_, + claims_, + verifier_leeway_, + std::make_shared(jwks_uri_, jwks_cache_lifetime_)) {} + + bool resolveAndValidate(TokenCredentials & credentials) const override; + bool checkClaims(const TokenCredentials & credentials, const String & claims_to_check) const override; + bool supportsJwtClaimsRestriction() const override { return true; } + +private: + const String claims; + const String expected_issuer; + const String expected_audience; + /// Required JWT `typ` header (RFC 8725 §3.11). Empty = no enforcement. + const String expected_typ; + const bool allow_no_expiration; + /// Verifier is built fresh per call inside `resolveAndValidate` (it depends + /// on the current JWT's `kid` -> JWKS-resolved key, which can rotate). A + /// local-per-call verifier also makes the function thread-safe so callers + /// can invoke it without holding the global `ExternalAuthenticators::mutex`. + std::shared_ptr provider; + const size_t verifier_leeway; +}; + +/// Opaque tokens + +class GoogleTokenProcessor : public ITokenProcessor +{ +public: + GoogleTokenProcessor(const String & processor_name_, + UInt64 token_cache_lifetime_, + const String & username_claim_, + const String & groups_claim_, + const String & expected_audience_); + + bool resolveAndValidate(TokenCredentials & credentials) const override; + +private: + const String expected_audience; +}; + +class AzureTokenProcessor : public ITokenProcessor +{ +public: + AzureTokenProcessor(const String & processor_name_, + UInt64 token_cache_lifetime_, + const String & username_claim_, + const String & groups_claim_, + const String & expected_audience_); + + bool resolveAndValidate(TokenCredentials & credentials) const override; + +private: + const String expected_audience; +}; + +class OpenIdTokenProcessor : public ITokenProcessor +{ +public: + /// Specify endpoints manually + OpenIdTokenProcessor(const String & processor_name_, + UInt64 token_cache_lifetime_, + const String & username_claim_, + const String & groups_claim_, + const String & expected_issuer_, + const String & expected_audience_, + bool allow_no_expiration_, + const String & userinfo_endpoint_, + const String & token_introspection_endpoint_, + UInt64 verifier_leeway_, + const String & jwks_uri_, + UInt64 jwks_cache_lifetime_); + + /// Obtain endpoints from openid-configuration URL + OpenIdTokenProcessor(const String & processor_name_, + UInt64 token_cache_lifetime_, + const String & username_claim_, + const String & groups_claim_, + const String & expected_issuer_, + const String & expected_audience_, + bool allow_no_expiration_, + const String & openid_config_endpoint_, + UInt64 verifier_leeway_, + UInt64 jwks_cache_lifetime_, + const RemoteHostFilter & remote_host_filter_, + bool allow_http_discovery_urls_); + + bool resolveAndValidate(TokenCredentials & credentials) const override; +private: + Poco::URI userinfo_endpoint; + Poco::URI token_introspection_endpoint; + + /// Access token is often a valid JWT, so we can validate it locally to avoid unnecesary network requests. + std::optional jwt_validator = std::nullopt; +}; + +#endif + +} diff --git a/src/Access/TokenProcessorsJWT.cpp b/src/Access/TokenProcessorsJWT.cpp new file mode 100644 index 000000000000..21c3a641aeb6 --- /dev/null +++ b/src/Access/TokenProcessorsJWT.cpp @@ -0,0 +1,789 @@ +#include "TokenProcessors.h" + +#if USE_JWT_CPP +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/// Ensure picojson's parse-depth ceiling stays in line with H-28's claims-recursion bound. +/// If a future picojson bump removes or raises this, we'd silently re-expose stack-exhaustion. +static_assert(picojson::DEFAULT_MAX_DEPTHS <= 100, + "picojson::DEFAULT_MAX_DEPTHS bumped above 100; revisit JWT parse-depth safety"); + +namespace DB { + +namespace ErrorCodes +{ + extern const int AUTHENTICATION_FAILED; + extern const int INVALID_CONFIG_PARAMETER; +} + +namespace +{ + +/// Depth budget for `check_claims` recursion. +/// +/// `picojson::DEFAULT_MAX_DEPTHS = 100` rejects deeply-nested JSON at parse +/// time, which already prevents the stack-exhaustion variant. We carry a +/// smaller budget here as defense in depth: if a future contrib bump or +/// PICOJSON_USE_RVALUE change widens the parse-time limit, this bound still +/// caps recursion. 32 is well above any realistic operator-configured claim +/// shape but keeps the worst-case stack frame count modest. +constexpr int kMaxClaimsRecursionDepth = 32; + +bool check_claims(const picojson::value & claims, const picojson::value & payload, const String & path, int depth_remaining); +bool check_claims(const picojson::value::object & claims, const picojson::value::object & payload, const String & path, int depth_remaining) +{ + for (const auto & it : claims) + { + const auto & payload_it = payload.find(it.first); + if (payload_it == payload.end()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "Key '{}.{}' not found in JWT payload", path, it.first); + return false; + } + if (!check_claims(it.second, payload_it->second, path + "." + it.first, depth_remaining)) + { + return false; + } + } + return true; +} + +bool check_claims(const picojson::value::array & claims, const picojson::value::array & payload, const String & path, int depth_remaining) +{ + if (claims.size() > payload.size()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "JWT payload too small for claims key '{}'", path); + return false; + } + for (size_t claims_i = 0; claims_i < claims.size(); ++claims_i) + { + bool found = false; + const auto & claims_val = claims.at(claims_i); + for (const auto & payload_val : payload) + { + /// Break on the first match. Without this, the inner loop kept + /// scanning the rest of the payload even after finding a match, + /// turning the worst case into O(|claims_array| * |payload_array|) + /// even when matches are easy. Combined with `kMaxClaimsRecursionDepth`, + /// this caps CPU per `check_claims` call so a crafted token cannot + /// stall the global `ExternalAuthenticators::mutex` (H-19) for an + /// unbounded time. + if (check_claims(claims_val, payload_val, path + "[" + std::to_string(claims_i) + "]", depth_remaining)) + { + found = true; + break; + } + } + if (!found) + { + LOG_TRACE(getLogger("TokenAuthentication"), "JWT payload does not contain an object matching claims key '{}[{}]'", path, claims_i); + return false; + } + } + return true; +} + +bool check_claims(const picojson::value & claims, const picojson::value & payload, const String & path, int depth_remaining) +{ + if (depth_remaining <= 0) + { + LOG_ERROR(getLogger("TokenAuthentication"), + "JWT claims comparison exceeded the maximum recursion depth ({}) at '{}'; " + "rejecting to bound CPU under the auth mutex.", + kMaxClaimsRecursionDepth, path); + return false; + } + --depth_remaining; + + if (claims.is()) + { + if (!payload.is()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "JWT payload does not match key type 'array' in claims '{}'", path); + return false; + } + return check_claims(claims.get(), payload.get(), path, depth_remaining); + } + if (claims.is()) + { + if (!payload.is()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "JWT payload does not match key type 'object' in claims '{}'", path); + return false; + } + return check_claims(claims.get(), payload.get(), path, depth_remaining); + } + if (claims.is()) + { + if (!payload.is()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "JWT payload does not match key type 'bool' in claims '{}'", path); + return false; + } + if (claims.get() != payload.get()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "JWT payload does not match the value in the '{}' assertions. Expected '{}' but given '{}'", path, claims.get(), payload.get()); + return false; + } + return true; + } + if (claims.is()) + { + if (!payload.is()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "JWT payload does not match key type 'double' in claims '{}'", path); + return false; + } + if (claims.get() != payload.get()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "JWT payload does not match the value in the '{}' assertions. Expected '{}' but given '{}'", path, claims.get(), payload.get()); + return false; + } + return true; + } + if (claims.is()) + { + if (!payload.is()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "JWT payload does not match key type 'std::string' in claims '{}'", path); + return false; + } + if (claims.get() != payload.get()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "JWT payload does not match the value in the '{}' assertions. Expected '{}' but given '{}'", path, claims.get(), payload.get()); + return false; + } + return true; + } +#ifdef PICOJSON_USE_INT64 + if (claims.is()) + { + if (!payload.is()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "JWT payload does not match key type 'int64_t' in claims '{}'", path); + return false; + } + if (claims.get() != payload.get()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "JWT payload does not match the value in claims '{}'. Expected '{}' but given '{}'", path, claims.get(), payload.get()); + return false; + } + return true; + } +#endif + LOG_ERROR(getLogger("TokenAuthentication"), "JWT claim '{}' does not match any known type", path); + return false; +} + +bool check_claims(const String & claims, const picojson::value::object & payload) +{ + if (claims.empty()) + return true; + picojson::value json; + auto errors = picojson::parse(json, claims); + if (!errors.empty()) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Bad JWT claims: {}", errors); + if (!json.is()) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Bad JWT claims: is not an object"); + return check_claims(json.get(), payload, "", kMaxClaimsRecursionDepth); +} + +std::string create_public_key_from_ec_components(const std::string & x, const std::string & y, int curve_nid) +{ + auto decode_base64url = [](const std::string & value) + { + return jwt::base::decode(jwt::base::pad(value)); + }; + + auto decoded_x = decode_base64url(x); + auto decoded_y = decode_base64url(y); + + size_t coordinate_size = 0; + if (curve_nid == NID_X9_62_prime256v1) + coordinate_size = 32; + else if (curve_nid == NID_secp384r1) + coordinate_size = 48; + else if (curve_nid == NID_secp521r1) + coordinate_size = 66; + else + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: unsupported EC curve"); + + if (decoded_x.size() > coordinate_size || decoded_y.size() > coordinate_size) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: invalid EC key coordinates length"); + + std::vector public_key_octets(1 + 2 * coordinate_size, 0); + public_key_octets[0] = 0x04; // Uncompressed point format. + std::memcpy(public_key_octets.data() + 1 + (coordinate_size - decoded_x.size()), decoded_x.data(), decoded_x.size()); + std::memcpy(public_key_octets.data() + 1 + coordinate_size + (coordinate_size - decoded_y.size()), decoded_y.data(), decoded_y.size()); + + const char * group_name = OBJ_nid2sn(curve_nid); + if (!group_name) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: unsupported EC curve"); + + std::unique_ptr params_bld(OSSL_PARAM_BLD_new(), OSSL_PARAM_BLD_free); + if (!params_bld) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: failed to allocate OpenSSL parameter builder"); + + if (OSSL_PARAM_BLD_push_utf8_string(params_bld.get(), OSSL_PKEY_PARAM_GROUP_NAME, group_name, 0) != 1) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: failed to set EC group parameter"); + + if (OSSL_PARAM_BLD_push_octet_string(params_bld.get(), OSSL_PKEY_PARAM_PUB_KEY, public_key_octets.data(), public_key_octets.size()) != 1) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: failed to set EC public key parameter"); + + std::unique_ptr params(OSSL_PARAM_BLD_to_param(params_bld.get()), OSSL_PARAM_free); + if (!params) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: failed to build OpenSSL parameters"); + + std::unique_ptr key_ctx(EVP_PKEY_CTX_new_from_name(nullptr, "EC", nullptr), EVP_PKEY_CTX_free); + if (!key_ctx) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: failed to create EVP key context"); + + if (EVP_PKEY_fromdata_init(key_ctx.get()) <= 0) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: failed to initialize EVP key import"); + + EVP_PKEY * raw_evp_key = nullptr; + if (EVP_PKEY_fromdata(key_ctx.get(), &raw_evp_key, EVP_PKEY_PUBLIC_KEY, params.get()) <= 0) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: failed to import EC public key"); + + std::unique_ptr evp_key(raw_evp_key, EVP_PKEY_free); + + std::unique_ptr bio(BIO_new(BIO_s_mem()), BIO_free); + if (!bio) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: failed to allocate BIO"); + + if (PEM_write_bio_PUBKEY(bio.get(), evp_key.get()) != 1) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: failed to encode EC public key"); + + char * data = nullptr; + auto len = BIO_get_mem_data(bio.get(), &data); + return std::string(data, len); +} + +} + +namespace +{ +std::set parseGroupsFromJsonArray(picojson::array groups_array) +{ + std::set external_groups_names; + + for (const auto & group : groups_array) + { + if (group.is()) + external_groups_names.insert(group.get()); + } + + return external_groups_names; +} +} + +namespace +{ +/// Warn at construction time when a JWT processor is left without an +/// `expected_audience` (and/or `expected_issuer`) pin. Without `aud`, +/// the same token is replayable on any other deployment that trusts +/// the same IdP -- a token minted for cluster X authenticates on +/// cluster Y as well, because nothing ties a JWT to "this specific +/// relying party". Same shape as the Google/Azure warnings (H-10); +/// the warning is the only signal operators get since the verifier +/// just silently skips the check when the pin is empty. +void warnIfBindingsNotPinned(const String & processor_name, + const String & expected_issuer, + const String & expected_audience, + const String & expected_typ) +{ + if (expected_audience.empty()) + LOG_WARNING(getLogger("TokenAuthentication"), + "{}: 'expected_audience' is not configured. Tokens issued by the same IdP for " + "any other relying party will be accepted here, including tokens minted for a " + "different ClickHouse deployment. Set 'expected_audience' to this deployment's " + "audience to prevent cross-cluster replay.", + processor_name); + if (expected_issuer.empty()) + LOG_WARNING(getLogger("TokenAuthentication"), + "{}: 'expected_issuer' is not configured. The JWT 'iss' claim will not be enforced; " + "any token signed by a key in this processor's JWKS will be accepted regardless of " + "issuer. Set 'expected_issuer' to bind tokens to a specific IdP.", + processor_name); + if (expected_typ.empty()) + LOG_WARNING(getLogger("TokenAuthentication"), + "{}: 'expected_typ' is not configured. The JWT 'typ' header will not be enforced; " + "ID tokens / refresh JWTs / internal-profile JWTs from the same IdP can be presented " + "as access tokens. RFC 8725 §3.11 / RFC 9068 recommend setting 'expected_typ' " + "(commonly 'at+jwt' for OAuth 2.0 access tokens) to prevent cross-token-class substitution.", + processor_name); +} + +/// Verify the JWT header `typ` matches the operator-configured pin. +/// Returns false (with a TRACE log) on mismatch; true if no pin or match. +/// Comparison is case-insensitive per RFC 7519 §5.1 ("JWT" and "jwt" both valid). +bool checkJwtTyp(const String & processor_name, + const String & expected_typ, + const jwt::decoded_jwt & decoded) +{ + if (expected_typ.empty()) + return true; + + if (!decoded.has_type()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Token has no 'typ' header but 'expected_typ' is configured to '{}'; rejecting.", + processor_name, expected_typ); + return false; + } + + const String actual_typ = decoded.get_type(); + if (Poco::toLower(actual_typ) != Poco::toLower(expected_typ)) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Token 'typ' header '{}' does not match 'expected_typ' '{}'; rejecting.", + processor_name, actual_typ, expected_typ); + return false; + } + + return true; +} +} + +StaticKeyJwtProcessor::StaticKeyJwtProcessor(const String & processor_name_, + UInt64 token_cache_lifetime_, + const String & username_claim_, + const String & groups_claim_, + const String & expected_issuer_, + const String & expected_audience_, + const String & expected_typ_, + bool allow_no_expiration_, + const StaticKeyJwtParams & params) + : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_), + claims(params.claims), expected_issuer(expected_issuer_), expected_audience(expected_audience_), + expected_typ(expected_typ_), + allow_no_expiration(allow_no_expiration_) +{ + warnIfBindingsNotPinned(processor_name, expected_issuer, expected_audience, expected_typ); + + const String & algo = params.algo; + const String & static_key = params.static_key; + bool static_key_in_base64 = params.static_key_in_base64; + const String & public_key = params.public_key; + const String & private_key = params.private_key; + const String & public_key_password = params.public_key_password; + const String & private_key_password = params.private_key_password; + + if (algo == "ps256" || + algo == "ps384" || + algo == "ps512" || + algo == "ed25519" || + algo == "ed448" || + algo == "rs256" || + algo == "rs384" || + algo == "rs512" || + algo == "es256" || + algo == "es256k" || + algo == "es384" || + algo == "es512" ) + { + if (public_key.empty()) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "{}: Invalid token processor definition, `public_key` parameter required for {}", processor_name, algo); + } + else if (algo == "hs256" || + algo == "hs384" || + algo == "hs512" ) + { + if (static_key.empty()) + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "{}: Invalid token processor definition, `static_key` parameter required for {}", processor_name, algo); + } + else if (algo != "none") + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "{}: Invalid token processor definition, unknown algorithm {}", processor_name, algo); + + if (algo == "none") + verifier = verifier.allow_algorithm(jwt::algorithm::none()); + else if (algo == "ps256") + verifier = verifier.allow_algorithm(jwt::algorithm::ps256(public_key, private_key, public_key_password, private_key_password)); + else if (algo == "ps384") + verifier = verifier.allow_algorithm(jwt::algorithm::ps384(public_key, private_key, public_key_password, private_key_password)); + else if (algo == "ps512") + verifier = verifier.allow_algorithm(jwt::algorithm::ps512(public_key, private_key, public_key_password, private_key_password)); + else if (algo == "ed25519") + verifier = verifier.allow_algorithm(jwt::algorithm::ed25519(public_key, private_key, public_key_password, private_key_password)); + else if (algo == "ed448") + verifier = verifier.allow_algorithm(jwt::algorithm::ed448(public_key, private_key, public_key_password, private_key_password)); + else if (algo == "rs256") + verifier = verifier.allow_algorithm(jwt::algorithm::rs256(public_key, private_key, public_key_password, private_key_password)); + else if (algo == "rs384") + verifier = verifier.allow_algorithm(jwt::algorithm::rs384(public_key, private_key, public_key_password, private_key_password)); + else if (algo == "rs512") + verifier = verifier.allow_algorithm(jwt::algorithm::rs512(public_key, private_key, public_key_password, private_key_password)); + else if (algo == "es256") + verifier = verifier.allow_algorithm(jwt::algorithm::es256(public_key, private_key, public_key_password, private_key_password)); + else if (algo == "es256k") + verifier = verifier.allow_algorithm(jwt::algorithm::es256k(public_key, private_key, public_key_password, private_key_password)); + else if (algo == "es384") + verifier = verifier.allow_algorithm(jwt::algorithm::es384(public_key, private_key, public_key_password, private_key_password)); + else if (algo == "es512") + verifier = verifier.allow_algorithm(jwt::algorithm::es512(public_key, private_key, public_key_password, private_key_password)); + else if (algo.starts_with("hs")) + { + auto key = static_key; + if (static_key_in_base64) + key = base64Decode(key); + if (algo == "hs256") + verifier = verifier.allow_algorithm(jwt::algorithm::hs256(key)); + else if (algo == "hs384") + verifier = verifier.allow_algorithm(jwt::algorithm::hs384(key)); + else if (algo == "hs512") + verifier = verifier.allow_algorithm(jwt::algorithm::hs512(key)); + else + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "{}: Invalid token processor definition, unknown algorithm {}", processor_name, algo); + } + else + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "{}: Invalid token processor definition, unknown algorithm {}", processor_name, algo); + + /// Apply clock-drift tolerance. jwt-cpp's default is 0, which rejects + /// tokens whose `exp`/`nbf` straddles even sub-second client/server skew. + /// Operators who set `verifier_leeway` in config get that value; + /// otherwise the parser-side default (60s) kicks in. + verifier = verifier.leeway(params.verifier_leeway); + + if (!expected_issuer.empty()) + verifier = verifier.with_issuer(expected_issuer); + + if (!expected_audience.empty()) + verifier = verifier.with_audience(expected_audience); +} + +namespace +{ +bool checkUserClaims(const TokenCredentials & credentials, const String & claims_to_check) +{ + try { + auto decoded_jwt = jwt::decode(credentials.getToken()); + return check_claims(claims_to_check, decoded_jwt.get_payload_json()); + } + catch (const std::exception &) + { + return false; + } +} +} + +bool StaticKeyJwtProcessor::checkClaims(const TokenCredentials & credentials, const String & claims_to_check) const +{ + return checkUserClaims(credentials, claims_to_check); +} + +bool JwksJwtProcessor::checkClaims(const TokenCredentials & credentials, const String & claims_to_check) const +{ + return checkUserClaims(credentials, claims_to_check); +} + +bool StaticKeyJwtProcessor::resolveAndValidate(TokenCredentials & credentials) const +{ + try + { + auto decoded_jwt = jwt::decode(credentials.getToken()); + + /// RFC 7515 §4.1.11: an unrecognized `crit` extension MUST cause rejection. + if (decoded_jwt.has_header_claim("crit")) + return false; + + verifier.verify(decoded_jwt); + + if (!checkJwtTyp(processor_name, expected_typ, decoded_jwt)) + return false; + + if (!allow_no_expiration && !decoded_jwt.has_expires_at()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "{}: Token missing 'exp' claim, rejecting", processor_name); + return false; + } + + if (!check_claims(claims, decoded_jwt.get_payload_json())) + return false; + + if (!decoded_jwt.has_payload_claim(username_claim)) + { + LOG_ERROR(getLogger("TokenAuthentication"), "{}: Specified username_claim {} not found in token", processor_name, username_claim); + return false; + } + + /// Reject empty `username_claim` value (M-27): a present-but-empty + /// claim would set user_name="" with `is_ready=false`, which the + /// cache would happily accept and collapse every empty-username + /// token into one dynamic user. + const auto user_name = decoded_jwt.get_payload_claim(username_claim).as_string(); + if (user_name.empty()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Resolved username from claim '{}' is empty; rejecting", processor_name, username_claim); + return false; + } + credentials.setUserName(user_name); + + if (decoded_jwt.has_payload_claim(groups_claim)) + credentials.setGroups(parseGroupsFromJsonArray(decoded_jwt.get_payload_claim(groups_claim).as_array())); + else + LOG_TRACE(getLogger("TokenAuthentication"), "{}: Specified groups_claim {} not found in token, no external roles will be mapped", processor_name, groups_claim); + + return true; + } + catch (const std::exception & ex) + { + LOG_TRACE(getLogger("TokenAuthentication"), "{}: Failed to validate JWT: {}", processor_name, ex.what()); + return false; + } +} + +JwksJwtProcessor::JwksJwtProcessor(const String & processor_name_, + UInt64 token_cache_lifetime_, + const String & username_claim_, + const String & groups_claim_, + const String & expected_issuer_, + const String & expected_audience_, + const String & expected_typ_, + bool allow_no_expiration_, + const String & claims_, + size_t verifier_leeway_, + std::shared_ptr provider_) + : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_), + claims(claims_), expected_issuer(expected_issuer_), expected_audience(expected_audience_), + expected_typ(expected_typ_), + allow_no_expiration(allow_no_expiration_), provider(provider_), verifier_leeway(verifier_leeway_) +{ + warnIfBindingsNotPinned(processor_name, expected_issuer, expected_audience, expected_typ); +} + +bool JwksJwtProcessor::resolveAndValidate(TokenCredentials & credentials) const +{ + /// Whole-body try/catch mirrors `StaticKeyJwtProcessor::resolveAndValidate`. + /// + /// In the auto-discovery path inside `ExternalAuthenticators::checkTokenCredentials`, + /// processors are tried in turn and any exception out of one aborts the entire + /// loop -- later processors are never consulted. That is fine for "the token is + /// definitively bad", but the failures in this body are also raised when the + /// token simply belongs to a different processor (e.g. its `kid` is not in + /// THIS processor's JWKS, or its `alg` is one this JWKS does not know about, + /// or the JWK lacks the components this code path needs). In a multi-processor + /// deployment, raising in those cases denies a perfectly good token just + /// because a sibling processor happened to be iterated first. Convert every + /// such failure into a `false` return so the iterator can move on -- consistent + /// with how `StaticKeyJwtProcessor` already handles its own validation errors. + try + { + auto decoded_jwt = jwt::decode(credentials.getToken()); + + /// RFC 7515 §4.1.11: an unrecognized `crit` extension MUST cause rejection. + if (decoded_jwt.has_header_claim("crit")) + return false; + + if (!checkJwtTyp(processor_name, expected_typ, decoded_jwt)) + return false; + + if (!allow_no_expiration && !decoded_jwt.has_expires_at()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "{}: Token missing 'exp' claim, rejecting", processor_name); + return false; + } + + if (!decoded_jwt.has_payload_claim(username_claim)) + { + LOG_ERROR(getLogger("TokenAuthentication"), "{}: Specified username_claim not found in token", processor_name); + return false; + } + + if (!decoded_jwt.has_key_id()) + { + LOG_ERROR(getLogger("TokenAuthentication"), "{}: 'kid' (key ID) claim not found in token", processor_name); + return false; + } + + if (!provider->getJWKS().has_jwk(decoded_jwt.get_key_id())) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: No JWK matching token 'kid' {} in this processor's JWKS; rejecting (a sibling processor may still accept it).", + processor_name, quoteString(decoded_jwt.get_key_id())); + return false; + } + + auto jwk = provider->getJWKS().get_jwk(decoded_jwt.get_key_id()); + auto username = decoded_jwt.get_payload_claim(username_claim).as_string(); + + if (!decoded_jwt.has_algorithm()) + { + LOG_ERROR(getLogger("TokenAuthentication"), "{}: Algorithm not specified in token", processor_name); + return false; + } + auto algo = Poco::toLower(decoded_jwt.get_algorithm()); + + + String public_key; + + try + { + auto x5c = jwk.get_x5c_key_value(); + + if (!x5c.empty()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "{}: Verifying {} with 'x5c' key", processor_name, quoteString(username)); + public_key = jwt::helper::convert_base64_der_to_pem(x5c); + } + } + catch (const jwt::error::claim_not_present_exception &) + { + LOG_TRACE(getLogger("TokenAuthentication"), "{}: x5c was not specified in JWK, will try RSA components", processor_name); + } + + if (public_key.empty()) + { + const auto key_type = jwk.get_key_type(); + if (key_type == "EC") + { + if (!(jwk.has_jwk_claim("x") && jwk.has_jwk_claim("y"))) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: JWK for token 'kid' is missing 'x'/'y' for EC key type; rejecting.", processor_name); + return false; + } + + int curve_nid = NID_undef; + std::optional expected_crv; + if (algo == "es256") + { + curve_nid = NID_X9_62_prime256v1; + expected_crv = "P-256"; + } + else if (algo == "es384") + { + curve_nid = NID_secp384r1; + expected_crv = "P-384"; + } + else if (algo == "es512") + { + curve_nid = NID_secp521r1; + expected_crv = "P-521"; + } + + if (curve_nid == NID_undef) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Unknown algorithm {} for EC key; rejecting.", processor_name, quoteString(algo)); + return false; + } + + if (jwk.has_jwk_claim("crv")) + { + const auto crv = jwk.get_jwk_claim("crv").as_string(); + if (expected_crv.has_value() && crv != expected_crv.value()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: JWK 'crv' {} does not match JWT algorithm {}; rejecting.", + processor_name, quoteString(crv), quoteString(algo)); + return false; + } + } + + LOG_TRACE(getLogger("TokenAuthentication"), "{}: `x5c` not present, verifying {} with EC components", processor_name, quoteString(username)); + const auto x = jwk.get_jwk_claim("x").as_string(); + const auto y = jwk.get_jwk_claim("y").as_string(); + public_key = create_public_key_from_ec_components(x, y, curve_nid); + } + else if (key_type == "RSA") + { + if (!(jwk.has_jwk_claim("n") && jwk.has_jwk_claim("e"))) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: JWK is missing 'n'/'e' for RSA key type; rejecting.", processor_name); + return false; + } + LOG_TRACE(getLogger("TokenAuthentication"), "{}: `issuer` or `x5c` not present, verifying {} with RSA components", processor_name, quoteString(username)); + const auto modulus = jwk.get_jwk_claim("n").as_string(); + const auto exponent = jwk.get_jwk_claim("e").as_string(); + public_key = jwt::helper::create_public_key_from_rsa_components(modulus, exponent); + } + else + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Unsupported JWK key type {}; rejecting.", processor_name, quoteString(key_type)); + return false; + } + } + + if (jwk.has_algorithm() && Poco::toLower(jwk.get_algorithm()) != algo) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: JWK 'alg' does not match JWT algorithm {}; rejecting.", processor_name, quoteString(algo)); + return false; + } + + /// Build the verifier locally (was a `mutable` member; making it local + /// makes `resolveAndValidate` thread-safe so the caller can drop the + /// global auth mutex around the expensive crypto verify). + auto verifier = jwt::verify(); + if (algo == "rs256") + verifier = verifier.allow_algorithm(jwt::algorithm::rs256(public_key, "", "", "")); + else if (algo == "rs384") + verifier = verifier.allow_algorithm(jwt::algorithm::rs384(public_key, "", "", "")); + else if (algo == "rs512") + verifier = verifier.allow_algorithm(jwt::algorithm::rs512(public_key, "", "", "")); + else if (algo == "es256") + verifier = verifier.allow_algorithm(jwt::algorithm::es256(public_key, "", "", "")); + else if (algo == "es384") + verifier = verifier.allow_algorithm(jwt::algorithm::es384(public_key, "", "", "")); + else if (algo == "es512") + verifier = verifier.allow_algorithm(jwt::algorithm::es512(public_key, "", "", "")); + else + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Unknown JWT algorithm {}; rejecting.", processor_name, quoteString(algo)); + return false; + } + + verifier = verifier.leeway(verifier_leeway); + + if (!expected_issuer.empty()) + verifier = verifier.with_issuer(expected_issuer); + + if (!expected_audience.empty()) + verifier = verifier.with_audience(expected_audience); + + verifier.verify(decoded_jwt); + + if (!claims.empty() && !check_claims(claims, decoded_jwt.get_payload_json())) + return false; + + /// Reject empty resolved username (M-27); see the + /// `StaticKeyJwtProcessor` peer for rationale. + const auto user_name = decoded_jwt.get_payload_claim(username_claim).as_string(); + if (user_name.empty()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Resolved username from claim '{}' is empty; rejecting", processor_name, username_claim); + return false; + } + credentials.setUserName(user_name); + + if (decoded_jwt.has_payload_claim(groups_claim)) + credentials.setGroups(parseGroupsFromJsonArray(decoded_jwt.get_payload_claim(groups_claim).as_array())); + else + LOG_TRACE(getLogger("TokenAuthentication"), "{}: Specified groups_claim {} not found in token, no external roles will be mapped", processor_name, groups_claim); + + return true; + } + catch (const std::exception & ex) + { + LOG_TRACE(getLogger("TokenAuthentication"), "{}: Failed to validate JWT: {}", processor_name, ex.what()); + return false; + } +} + +} + +#endif diff --git a/src/Access/TokenProcessorsOpaque.cpp b/src/Access/TokenProcessorsOpaque.cpp new file mode 100644 index 000000000000..cc3ec9755b07 --- /dev/null +++ b/src/Access/TokenProcessorsOpaque.cpp @@ -0,0 +1,816 @@ +#include "TokenProcessors.h" + +#if USE_JWT_CPP +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace DB { + +namespace ErrorCodes +{ + extern const int AUTHENTICATION_FAILED; + extern const int INVALID_CONFIG_PARAMETER; +} + +namespace +{ + /// The JSON reply from provider has only a few key-value pairs, so no need for any advanced parsing. + /// Reduce complexity by using picojson. + picojson::object parseJSON(const String & json_string) { + picojson::value jsonValue; + std::string err = picojson::parse(jsonValue, json_string); + + if (!err.empty()) { + throw std::runtime_error("JSON parsing error: " + err); + } + + if (!jsonValue.is()) { + throw std::runtime_error("JSON is not an object"); + } + + return jsonValue.get(); + } + + template + std::optional getValueByKey(const picojson::object & jsonObject, const std::string & key) { + auto it = jsonObject.find(key); // Find the key in the object + if (it == jsonObject.end()) + { + if constexpr (throw_on_exception) + throw std::runtime_error("Key not found: " + key); + else + return std::nullopt; + } + + const picojson::value & value = it->second; + if (!value.is()) { + if constexpr (throw_on_exception) + throw std::runtime_error("Value for key '" + key + "' has incorrect type."); + else + return std::nullopt; + } + + return value.get(); + } + + /// Bound every IdP-bound HTTP call (OIDC discovery, userinfo, introspection) + /// to a known limit. Without this, Poco's default `HTTPSession` timeout of + /// 60 seconds applies, and because `ExternalAuthenticators::mutex` is held + /// for the entire duration of `checkTokenCredentials` -- including the + /// outbound call this function makes -- a single slow or hung IdP would + /// stall the whole auth subsystem (LDAP, Kerberos, HTTP basic, every other + /// token auth) for up to a full minute per request. + /// + /// 10 seconds is a deliberately conservative cap: well above any healthy + /// IdP latency, well below the default. Operators who need a different + /// value would have to expose this via per-processor config; for now it + /// is hard-coded so deployments inherit the bounded behavior automatically. + constexpr int kIdpHttpTimeoutSeconds = 10; + + void applyIdpSessionTimeouts(Poco::Net::HTTPClientSession & session) + { + const Poco::Timespan timeout(kIdpHttpTimeoutSeconds, 0); + session.setTimeout(timeout, timeout, timeout); + } + + picojson::object getObjectFromURI(const Poco::URI & uri, const String & token = "") + { + Poco::Net::HTTPResponse response; + std::ostringstream responseString; + + Poco::Net::HTTPRequest request{Poco::Net::HTTPRequest::HTTP_GET, uri.getPathAndQuery()}; + if (!token.empty()) + request.add("Authorization", "Bearer " + token); + + if (uri.getScheme() == "https") { + Poco::Net::HTTPSClientSession session(uri.getHost(), uri.getPort()); + applyIdpSessionTimeouts(session); + session.sendRequest(request); + Poco::StreamCopier::copyStream(session.receiveResponse(response), responseString); + } + else + { + Poco::Net::HTTPClientSession session(uri.getHost(), uri.getPort()); + applyIdpSessionTimeouts(session); + session.sendRequest(request); + Poco::StreamCopier::copyStream(session.receiveResponse(response), responseString); + } + + if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, + "Failed to get user info by access token, code: {}, reason: {}", response.getStatus(), + response.getReason()); + + try + { + return parseJSON(responseString.str()); + } + catch (const std::runtime_error & e) + { + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Failed to parse server response: {}", e.what()); + } + } +} + +GoogleTokenProcessor::GoogleTokenProcessor(const String & processor_name_, + UInt64 token_cache_lifetime_, + const String & username_claim_, + const String & groups_claim_, + const String & expected_audience_) + : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_) + , expected_audience(expected_audience_) +{ + /// Without an audience pin, this processor accepts any Google access token + /// that authenticates the user against Google -- including tokens minted for + /// completely unrelated OAuth clients (a classic confused-deputy scenario). + /// Operators who actually want token-based auth almost always want it bound + /// to their own client_id; surface this gap loudly at startup so it can't + /// stay silently un-enforced. + if (expected_audience.empty()) + LOG_WARNING(getLogger("TokenAuthentication"), + "{}: 'expected_audience' is not configured for Google token processor. " + "Any valid Google access token (regardless of issuing client) will be accepted; " + "set 'expected_audience' to the OAuth client_id this processor should accept.", + processor_name); +} + +bool GoogleTokenProcessor::resolveAndValidate(TokenCredentials & credentials) const +{ + const String & token = credentials.getToken(); + + std::unordered_map user_info; + picojson::object user_info_json = getObjectFromURI(Poco::URI("https://www.googleapis.com/oauth2/v3/userinfo"), token); + + if (!user_info_json.contains("email")) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, + "{}: Specified username_claim {} not found in token", processor_name, username_claim); + + user_info["email"] = getValueByKey(user_info_json, "email").value_or(""); + + user_info[username_claim] = getValueByKey(user_info_json, username_claim).value(); + + String user_name = user_info[username_claim]; + + auto token_info = getObjectFromURI(Poco::URI("https://www.googleapis.com/oauth2/v3/tokeninfo"), token); + + /// Audience binding (H-10): the Google /tokeninfo endpoint authoritatively + /// reports the OAuth client_id the access token was issued for in its 'aud' + /// field. Without this check, a token minted for any other Google OAuth + /// client (the user's mobile app, a third-party tool) would authenticate + /// here too -- because Google /userinfo will happily honor any valid token. + /// Refusing tokens whose 'aud' does not match the configured client pin is + /// what makes the binding strict. + if (!expected_audience.empty()) + { + const auto aud = getValueByKey(token_info, "aud").value_or(""); + if (aud != expected_audience) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Google access token audience '{}' does not match configured 'expected_audience' '{}'; rejecting", + processor_name, aud, expected_audience); + return false; + } + } + + /// Reject empty resolved username (M-27). `TokenCredentials::setUserName` + /// leaves `is_ready=false` for empty input but the function would still + /// return true; the cache would then accept an entry under user_name "", + /// collapsing every empty-username token across all IdPs into the same + /// dynamic ClickHouse user. + if (user_name.empty()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Resolved username from token is empty; rejecting", processor_name); + return false; + } + + credentials.setUserName(user_name); + + if (token_info.contains("exp")) + { + /// picojson stores all numerics as double; we need to validate the + /// value is a finite, positive Unix timestamp that fits in time_t + /// before casting. + const double exp = getValueByKey(token_info, "exp").value(); + if (!std::isfinite(exp) || exp <= 0.0 + || exp > static_cast(std::numeric_limits::max())) + throw Exception( + ErrorCodes::AUTHENTICATION_FAILED, + "{}: tokeninfo response contains an out-of-range 'exp' value: {}", + processor_name, exp); + credentials.setExpiresAt(std::chrono::system_clock::from_time_t(static_cast(exp))); + } + + /// Groups info can only be retrieved if user email is known. + /// If no email found in user info, we skip this step and there are no external roles for the user. + if (!user_info["email"].empty()) + { + std::set external_groups_names; + const Poco::URI get_groups_uri = Poco::URI("https://cloudidentity.googleapis.com/v1/groups/-/memberships:searchDirectGroups?query=member_key_id==" + user_info["email"] + "'"); + + try + { + auto groups_response = getObjectFromURI(get_groups_uri, token); + + if (!groups_response.contains("memberships") || !groups_response["memberships"].is()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Failed to get Google groups: invalid content in response from server", processor_name); + return true; + } + + for (const auto & group: groups_response["memberships"].get()) + { + if (!group.is()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Failed to get Google groups: invalid content in response from server", processor_name); + continue; + } + + auto group_data = group.get(); + + /// Guard against a missing or non-object `groupKey`. Without + /// these checks `group_data["groupKey"].get()` + /// would auto-insert a null `picojson::value` (because picojson + /// objects are `std::map` and `[]` + /// default-constructs on a missing key) and then throw + /// `std::bad_cast` on the `.get()` call -- + /// which the `catch (const Exception &)` below does NOT + /// catch (`std::bad_cast` is `std::exception`-derived, not + /// `DB::Exception`-derived). The uncaught exception used to + /// propagate out of `resolveAndValidate` and abort auth. + auto group_key_it = group_data.find("groupKey"); + if (group_key_it == group_data.end() || !group_key_it->second.is()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Group entry without a 'groupKey' object; skipping", processor_name); + continue; + } + + String group_name = getValueByKey(group_key_it->second.get(), "id").value_or(""); + if (!group_name.empty()) + { + external_groups_names.insert(group_name); + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: User {}: new external group {}", + processor_name, quoteString(user_name), quoteString(group_name)); + } + } + + credentials.setGroups(external_groups_names); + } + catch (const std::exception & e) + { + /// Defense in depth: catch `std::exception` (not just `DB::Exception`) + /// so picojson's `std::bad_cast` and `std::runtime_error` -- and any + /// other future deviation -- degrade to "no roles mapped" rather + /// than aborting the whole authentication. + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Failed to get Google groups, no external roles will be mapped. reason: {}", processor_name, e.what()); + return true; + } + } + + return true; +} + +AzureTokenProcessor::AzureTokenProcessor(const String & processor_name_, + UInt64 token_cache_lifetime_, + const String & username_claim_, + const String & groups_claim_, + const String & expected_audience_) + : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_) + , expected_audience(expected_audience_) +{ + /// Without an audience pin, this processor accepts any Azure AD access token + /// that Microsoft Graph happens to honor -- which includes tokens minted for + /// other applications inside the same tenant. Surface the gap so operators + /// can lock the processor to their own application's audience. + if (expected_audience.empty()) + LOG_WARNING(getLogger("TokenAuthentication"), + "{}: 'expected_audience' is not configured for Azure token processor. " + "Any Azure access token Microsoft Graph accepts will authenticate here, " + "regardless of which application it was issued for; set 'expected_audience' " + "to the audience this processor should accept.", + processor_name); +} + +bool AzureTokenProcessor::resolveAndValidate(TokenCredentials & credentials) const +{ + /// Token is a JWT in this case, but we cannot directly verify it against Azure AD JWKS. + /// We will not trust user data in this token except for 'exp' value to determine caching duration. + /// Explanation here: https://stackoverflow.com/questions/60778634/failing-signature-validation-of-jwt-tokens-from-azure-ad + /// Let Azure validate it: only valid tokens will be accepted. + /// Use GET https://graph.microsoft.com/oidc/userinfo to verify token and get user info at the same time + + const String & token = credentials.getToken(); + + String username; + try + { + picojson::object user_info_json = getObjectFromURI(Poco::URI("https://graph.microsoft.com/oidc/userinfo"), token); + username = getValueByKey(user_info_json, username_claim).value(); + } + catch (...) + { + return false; + } + + /// Audience binding (H-10): only after Microsoft Graph has accepted the + /// token (proving it is a real, signed Azure AD token) do we trust its + /// claims. We then enforce that the 'aud' claim matches the operator-pinned + /// audience -- without this check, *any* token issued for *any* application + /// in the tenant that has Graph access would authenticate. With the check, + /// tokens minted for other applications are rejected even though Graph + /// itself would honor them. + if (!expected_audience.empty()) + { + try + { + auto decoded_token = jwt::decode(token); + if (!decoded_token.has_audience()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Azure access token has no 'aud' claim; cannot enforce 'expected_audience' '{}'; rejecting", + processor_name, expected_audience); + return false; + } + const auto auds = decoded_token.get_audience(); + if (auds.find(expected_audience) == auds.end()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Azure access token audience does not contain configured 'expected_audience' '{}'; rejecting", + processor_name, expected_audience); + return false; + } + } + catch (const std::exception & e) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Failed to decode Azure access token while enforcing 'expected_audience': {}; rejecting", + processor_name, e.what()); + return false; + } + } + + /// Reject empty resolved username (M-27). Previously this branch only + /// logged the gap and proceeded to return true at the end of the function, + /// which would cache an entry under user_name "" and collapse every + /// empty-username token across all IdPs into the same dynamic user. + if (username.empty()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Resolved username from token is empty; rejecting", processor_name); + return false; + } + credentials.setUserName(username); + + try + { + credentials.setExpiresAt(jwt::decode(token).get_expires_at()); + } + catch (...) { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: No expiration data found in a valid token, will use default cache lifetime", processor_name); + } + + std::set external_groups_names; + const Poco::URI get_groups_uri = Poco::URI("https://graph.microsoft.com/v1.0/me/memberOf"); + + try + { + auto groups_response = getObjectFromURI(get_groups_uri, token); + + if (!groups_response.contains("value") || !groups_response["value"].is()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Failed to get Azure groups: invalid content in response from server", processor_name); + return true; + } + + picojson::array groups_array = groups_response["value"].get(); + + for (const auto & group: groups_array) + { + /// Got some invalid response. Ignore this, log this. + if (!group.is()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Failed to get Azure groups: invalid content in response from server", processor_name); + continue; + } + + auto group_data = group.get(); + + /// Use the immutable `id` (GUID), not the mutable `displayName`, + /// for role-mapping. `displayName` can be renamed by an Azure AD + /// admin -- and on rename, every ClickHouse role-mapping regex + /// that referenced the old name silently stops matching, while + /// every regex that matches the new name silently starts. Two + /// distinct AAD groups can also share a display name and merge + /// into a single ClickHouse group; deleting and recreating a + /// group with the same name silently inherits the old grants. + /// `id` is a GUID assigned by AAD at group creation; it never + /// changes, never collides, and is never reused. + /// + /// Operators upgrading from a build that emitted `displayName` + /// must update their `roles_filter` / `roles_transform` regex + /// to reference the GUIDs Azure AD assigns to the groups they + /// want to map. The role identifier is not human-friendly -- + /// that is the cost of using an immutable handle. + if (!group_data.contains("id")) + continue; + + String group_name = getValueByKey(group_data, "id").value_or(""); + if (!group_name.empty()) + { + external_groups_names.insert(group_name); + String display_name = getValueByKey(group_data, "displayName").value_or(""); + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: User {}: new external group id={} (displayName={})", + processor_name, quoteString(credentials.getUserName()), + quoteString(group_name), quoteString(display_name)); + } + } + } + catch (const std::exception & e) + { + /// Defense in depth (M-10 sibling): broadened to `std::exception` so a + /// picojson `std::bad_cast` from a malformed response degrades to "no + /// roles mapped" rather than aborting the whole authentication. + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Failed to get Azure groups, no external roles will be mapped. reason: {}", processor_name, e.what()); + return true; + } + + credentials.setGroups(external_groups_names); + return true; +} + +OpenIdTokenProcessor::OpenIdTokenProcessor(const String & processor_name_, + UInt64 token_cache_lifetime_, + const String & username_claim_, + const String & groups_claim_, + const String & expected_issuer_, + const String & expected_audience_, + bool allow_no_expiration_, + const String & userinfo_endpoint_, + const String & token_introspection_endpoint_, + UInt64 verifier_leeway_, + const String & jwks_uri_, + UInt64 jwks_cache_lifetime_) + : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_), + userinfo_endpoint(userinfo_endpoint_), token_introspection_endpoint(token_introspection_endpoint_) +{ + /// Without `jwks_uri`, no `jwt_validator` is created and so `expected_issuer` + /// / `expected_audience` cannot be enforced anywhere on the validation path + /// -- the runtime falls straight to the userinfo endpoint, which only + /// answers "the IdP describes this user", not "the token's `iss`/`aud` + /// match what this deployment pinned". Refuse to load with that combination + /// rather than silently dropping the operator's bindings. + if (jwks_uri_.empty() && (!expected_issuer_.empty() || !expected_audience_.empty())) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "{}: 'expected_issuer' / 'expected_audience' are configured but no 'jwks_uri' is provided. " + "These bindings can only be enforced via local JWT validation against a JWKS; the userinfo " + "fallback alone cannot enforce them. Configure 'jwks_uri' (or, if you intentionally want " + "userinfo-only validation, clear 'expected_issuer'/'expected_audience').", + processor_name); + + if (!jwks_uri_.empty()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "{}: JWKS URI set, local JWT processing will be attempted", processor_name_); + /// `expected_typ` is left empty here: OpenID's JWT-fastpath inherits no + /// `typ` enforcement from the operator config (the parser doesn't surface + /// `expected_typ` for the `openid` processor type yet). Operators who + /// want strict `typ` enforcement should use `jwt_static_jwks` / + /// `jwt_dynamic_jwks` directly instead of `openid`. + jwt_validator.emplace(processor_name_ + "jwks_val", + token_cache_lifetime_, + username_claim_, + groups_claim_, + expected_issuer_, + expected_audience_, + /*expected_typ=*/"", + allow_no_expiration_, + "", + verifier_leeway_, + jwks_uri_, + jwks_cache_lifetime_); + } +} + +OpenIdTokenProcessor::OpenIdTokenProcessor(const String & processor_name_, + UInt64 token_cache_lifetime_, + const String & username_claim_, + const String & groups_claim_, + const String & expected_issuer_, + const String & expected_audience_, + bool allow_no_expiration_, + const String & openid_config_endpoint_, + UInt64 verifier_leeway_, + UInt64 jwks_cache_lifetime_, + const RemoteHostFilter & remote_host_filter_, + bool allow_http_discovery_urls_) + : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_) +{ + /// Defense in depth: the discovery endpoint itself was already validated by + /// the parser, but re-check here in case this constructor is reached via a + /// future code path that bypasses parseTokenProcessor. + try + { + remote_host_filter_.checkURL(Poco::URI(openid_config_endpoint_)); + } + catch (const Exception & e) + { + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "{}: 'configuration_endpoint' URL '{}' is not in : {}", + processor_name, openid_config_endpoint_, e.message()); + } + + const picojson::object openid_config = getObjectFromURI(Poco::URI(openid_config_endpoint_)); + + /// Only `userinfo_endpoint` is mandatory: it backs the runtime userinfo + /// fallback (and is the sole user-info source when no JWKS is configured). + /// `introspection_endpoint` is currently unused at runtime -- it's plumbed + /// for a future RFC 7662 introspection feature -- so a discovery document + /// that omits it should not block processor construction. + if (!openid_config.contains("userinfo_endpoint")) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, + "{}: Cannot extract userinfo_endpoint from OIDC configuration at '{}'; consider manual configuration.", + processor_name, openid_config_endpoint_); + + /// The discovery document is untrusted: even with the issuer-anchor check + /// below (H-08), a poisoned or misdirected response can still try to point + /// trust-chain endpoints (jwks_uri, userinfo_endpoint, introspection_endpoint) + /// at hosts the operator never approved. Refuse to load the processor when + /// any returned URL is outside ; this prevents the + /// server from reaching out to attacker-controlled endpoints during token + /// validation. + /// + /// Additionally, refuse non-HTTPS schemes on discovery-returned URLs. + /// Without this, an attacker who can MITM the discovery fetch (operator + /// typed an `http://` configuration_endpoint, or any TLS interception path) + /// can substitute a discovery doc whose `jwks_uri` is `http://169.254.169.254/...` + /// (cloud metadata), `http://127.0.0.1:...` (local admin ports), or + /// `http://kubernetes.default.svc:...` -- and the server issues a one-shot + /// HTTP GET under its own process identity. `` is + /// the primary defense, but not every deployment configures it; an + /// HTTPS-only rule on returned URLs is a cheap, orthogonal layer that + /// blocks all three of those targets independently. Operators who run an + /// IdP over plain HTTP intentionally can wire the trust chain manually + /// (`userinfo_endpoint`/`token_introspection_endpoint`/`jwks_uri` directly) + /// instead of relying on discovery, or opt out of this check by setting + /// `true` on the + /// processor (false by default; still applies). + if (allow_http_discovery_urls_) + LOG_WARNING(getLogger("TokenAuthentication"), + "{}: 'allow_http_discovery_urls' is enabled; HTTPS check on URLs returned by OIDC discovery " + "is suppressed. Make sure restricts which targets the server may " + "be redirected to via a poisoned discovery document.", + processor_name); + auto require_allowed_discovery_url = [&](const std::string & url, const char * field) + { + Poco::URI parsed_uri(url); + if (!allow_http_discovery_urls_ && parsed_uri.getScheme() != "https") + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "{}: OIDC discovery at '{}' returned non-HTTPS '{}' URL '{}' (scheme '{}'). " + "The trust-chain URLs from discovery must use HTTPS so a poisoned discovery " + "document cannot redirect token validation through internal endpoints " + "(cloud metadata, localhost, in-cluster service IPs). If the IdP genuinely " + "runs over plain HTTP, either configure the trust chain manually instead of " + "using 'configuration_endpoint', or set " + "'true' on this processor.", + processor_name, openid_config_endpoint_, field, url, parsed_uri.getScheme()); + + try + { + remote_host_filter_.checkURL(parsed_uri); + } + catch (const Exception & e) + { + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "{}: OIDC discovery at '{}' returned '{}' URL '{}' which is not in " + ": {}", + processor_name, openid_config_endpoint_, field, url, e.message()); + } + }; + + require_allowed_discovery_url(getValueByKey(openid_config, "userinfo_endpoint").value(), "userinfo_endpoint"); + if (openid_config.contains("introspection_endpoint")) + require_allowed_discovery_url(getValueByKey(openid_config, "introspection_endpoint").value(), "introspection_endpoint"); + if (openid_config.contains("jwks_uri")) + require_allowed_discovery_url(getValueByKey(openid_config, "jwks_uri").value(), "jwks_uri"); + + /// Anchor the discovery document to a known issuer when one is configured. + /// + /// OIDC Discovery 1.0 §4.3 / RFC 8414 §3.3 require the metadata's "issuer" + /// to be tied to the URL used to fetch it. Without this anchor a poisoned + /// or misdirected discovery response can redirect the entire trust chain + /// (jwks_uri, userinfo_endpoint, introspection_endpoint) to URLs the + /// operator never approved -- and because the embedded JWT verifier only + /// enforces the `iss` claim when expected_issuer is non-empty, JWTs signed + /// by the attacker's keys would be silently accepted at runtime. + /// + /// Policy: + /// - expected_issuer configured => discovery's "issuer" MUST match it + /// (refuse to construct on mismatch or + /// absence). Verifier is pinned to it. + /// - expected_issuer empty => log a warning so the gap is visible + /// in operator logs, then proceed with + /// the historical (lax) behavior. The + /// verifier is left without an issuer + /// pin to preserve compatibility. + const auto issuer_from_discovery = getValueByKey(openid_config, "issuer").value_or(""); + + if (!expected_issuer_.empty()) + { + if (issuer_from_discovery.empty()) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "{}: OIDC discovery document at '{}' does not advertise an 'issuer'; " + "cannot verify it against the configured 'expected_issuer' '{}'.", + processor_name, openid_config_endpoint_, expected_issuer_); + + if (issuer_from_discovery != expected_issuer_) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "{}: OIDC discovery 'issuer' mismatch: configured 'expected_issuer' is '{}' " + "but discovery document at '{}' returned issuer '{}'. Refusing to load the " + "processor to avoid trusting metadata that belongs to a different issuer.", + processor_name, expected_issuer_, openid_config_endpoint_, issuer_from_discovery); + } + else + { + LOG_WARNING(getLogger("TokenAuthentication"), + "{}: 'expected_issuer' is not configured for OIDC discovery at '{}'. " + "The JWT 'iss' claim will NOT be enforced.", processor_name, openid_config_endpoint_); + } + + userinfo_endpoint = Poco::URI(getValueByKey(openid_config, "userinfo_endpoint").value()); + if (openid_config.contains("introspection_endpoint")) + token_introspection_endpoint = Poco::URI(getValueByKey(openid_config, "introspection_endpoint").value()); + + /// See manual-constructor comment: `expected_issuer` / `expected_audience` + /// can only be enforced via local JWT validation. If the discovery document + /// does not advertise a `jwks_uri`, no `jwt_validator` will be created and + /// the userinfo fallback alone cannot enforce these bindings. Refuse the + /// configuration rather than silently dropping them. + if (!openid_config.contains("jwks_uri") && (!expected_issuer_.empty() || !expected_audience_.empty())) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "{}: OIDC discovery at '{}' did not advertise a 'jwks_uri', but 'expected_issuer' / " + "'expected_audience' are configured. These bindings can only be enforced via local JWT " + "validation against a JWKS; userinfo cannot enforce them. Refusing to load.", + processor_name, openid_config_endpoint_); + + if (openid_config.contains("jwks_uri")) + { + LOG_TRACE(getLogger("TokenAuthentication"), "{}: JWKS URI set, local JWT processing will be attempted", processor_name_); + /// `expected_typ` empty for the same reason as the manual constructor. + jwt_validator.emplace(processor_name_ + "jwks_val", + token_cache_lifetime_, + username_claim_, + groups_claim_, + expected_issuer_, + expected_audience_, + /*expected_typ=*/"", + allow_no_expiration_, + "", + verifier_leeway_, + getValueByKey(openid_config, "jwks_uri").value(), + jwks_cache_lifetime_); + } +} + +bool OpenIdTokenProcessor::resolveAndValidate(TokenCredentials & credentials) const +{ + const String & token = credentials.getToken(); + String username; + picojson::object user_info_json; + + if (jwt_validator.has_value()) + { + /// When a `jwt_validator` is configured, it owns the operator's + /// `expected_issuer` / `expected_audience` / `allow_no_expiration` + /// bindings. If it rejects the token we MUST NOT fall back to the + /// userinfo endpoint: userinfo only confirms "the IdP describes this + /// user", it has no notion of the operator-pinned audience or issuer + /// and does not enforce the local expiration policy. Falling back here + /// would silently bypass exactly the bindings the operator opted into, + /// e.g. a JWT with the wrong `aud` would still authenticate because + /// the IdP's own userinfo accepts it for itself. + if (!jwt_validator.value().resolveAndValidate(credentials)) + { + /// DEBUG, not TRACE: this is the binding-rejection path. Operators + /// running with DEBUG enabled will see a clear signal that the + /// JWT-fastpath (which enforces `expected_issuer` / `expected_audience` + /// / `allow_no_expiration`) rejected a token. The auth failure itself + /// is also visible to the client, but the log line tells the operator + /// *why* it was rejected on the local side. + LOG_DEBUG(getLogger("TokenAuthentication"), + "{}: Local JWT validation rejected the token. Refusing to fall back to " + "userinfo: the operator-configured bindings (expected_issuer / expected_audience / " + "allow_no_expiration) cannot be enforced by userinfo, and a fallback would silently " + "bypass them.", + processor_name); + return false; + } + + try + { + auto decoded_token = jwt::decode(token); + user_info_json = decoded_token.get_payload_json(); + username = getValueByKey(user_info_json, username_claim).value(); + + /// TODO: Now we work only with Keycloak -- and it provides expires_at in token itself. Need to add actual token introspection logic for other OIDC providers. + if (decoded_token.has_expires_at()) + credentials.setExpiresAt(decoded_token.get_expires_at()); + } + catch (const std::exception & ex) + { + /// WARNING: validation passed but extracting the payload locally + /// failed -- a genuinely rare condition (the same token was just + /// successfully verified, so its bytes ARE a valid JWT). The + /// processor is about to fall back to userinfo for username + /// extraction. Bindings were already enforced by `jwt_validator`, + /// so this fallback is safe -- but the underlying mismatch + /// (decode failure on a verified token) usually means an IdP + /// behavioral change, a clock skew, or a payload-format drift, + /// and operators should know about it loudly. + LOG_WARNING(getLogger("TokenAuthentication"), + "{}: JWT validation succeeded but payload extraction failed: {}. " + "Falling back to userinfo for username; the operator-configured " + "bindings have ALREADY been enforced by JWT validation, so this " + "fallback is safe -- but the decode failure indicates an unexpected " + "JWT shape from the IdP.", + processor_name, ex.what()); + } + } + + /// Userinfo path: only reachable when no `jwt_validator` is configured + /// (the constructor guarantees that combination is incompatible with any + /// `expected_issuer` / `expected_audience` pin), or when local JWT validation + /// passed but extracting the username/payload from the decoded token failed + /// for an unrelated reason -- in which case the bindings have already been + /// enforced by `jwt_validator` and userinfo is just being asked for the user + /// identity. + if (username.empty() || user_info_json.empty()) + { + try + { + user_info_json = getObjectFromURI(userinfo_endpoint, token); + username = getValueByKey(user_info_json, username_claim).value(); + } + catch (...) + { + return false; + } + } + + if (user_info_json.empty()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "{}: Failed to obtain user info", processor_name); + return false; + } + + if (username.empty()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "{}: Failed to get username", processor_name); + return false; + } + + credentials.setUserName(username); + + /// For now, list of groups is expected in a claim with specified name either in token itself or in userinfo response (Keycloak works this way) + /// TODO: add support for custom endpoints for retrieving groups. Keycloak lists groups in /userinfo and token itself, which is not always the case. + if (!groups_claim.empty() && user_info_json.contains(groups_claim)) + { + if (!user_info_json[groups_claim].is()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Failed to extract groups: invalid content in user data", processor_name); + return true; + } + + std::set external_groups_names; + + picojson::array groups_array = user_info_json[groups_claim].get(); + for (const auto & group: groups_array) + { + if (group.is()) + external_groups_names.insert(group.get()); + } + credentials.setGroups(external_groups_names); + } + + return true; +} + +} +#endif diff --git a/src/Access/TokenProcessorsParse.cpp b/src/Access/TokenProcessorsParse.cpp new file mode 100644 index 000000000000..62602b1efd81 --- /dev/null +++ b/src/Access/TokenProcessorsParse.cpp @@ -0,0 +1,209 @@ +#include "TokenProcessors.h" + +#include +#include +#include +#include + +namespace DB { + +namespace ErrorCodes +{ + extern const int INVALID_CONFIG_PARAMETER; + extern const int SUPPORT_IS_DISABLED; +} + +#if USE_JWT_CPP +std::unique_ptr ITokenProcessor::parseTokenProcessor( + const Poco::Util::AbstractConfiguration & config, + const String & prefix, + const String & processor_name) +{ + if (!config.hasProperty(prefix + ".type")) + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "'type' parameter shall be specified in token_processor configuration.'"); + + auto provider_type = Poco::toLower(config.getString(prefix + ".type")); + + auto token_cache_lifetime = config.getUInt64(prefix + ".token_cache_lifetime", 3600); + auto username_claim = config.getString(prefix + ".username_claim", "sub"); + auto groups_claim = config.getString(prefix + ".groups_claim", "groups"); + auto expected_issuer = config.getString(prefix + ".expected_issuer", ""); + auto expected_audience = config.getString(prefix + ".expected_audience", ""); + /// `expected_typ` is the JWT header `typ` to require. RFC 8725 §3.11 and + /// RFC 9068 recommend type discrimination to prevent cross-token-class + /// substitution -- e.g. accepting an ID token (intended for client login) + /// where an access token (intended for resource access) is expected. + /// Common values: "at+jwt" (RFC 9068 access tokens), "JWT" (generic). + /// Empty (the default) means no `typ` enforcement; the JWT processors warn + /// at startup when this is left empty so the gap is visible. + auto expected_typ = config.getString(prefix + ".expected_typ", ""); + auto allow_no_expiration = config.getBool(prefix + ".allow_no_expiration", false); + + /// Constrain every OIDC/JWT trust-chain fetch (discovery, userinfo, + /// introspection, JWKS) to the operator-approved . + /// + /// Without this gate, any URL the operator pastes into the processor config + /// -- and any URL returned by an OIDC discovery document -- is fetched + /// blindly. A misconfigured or attacker-influenced discovery response can + /// then redirect token validation through hosts the operator never approved. + /// + /// We pre-validate every URL the operator typed into the processor config + /// here, at parse time, so a bad config fails fast at startup rather than + /// at first authentication. Discovery-derived URLs (jwks_uri etc.) are + /// validated separately, after the discovery fetch, inside the processor. + /// + /// If is absent the filter degrades to its + /// historical permissive behavior: this matches every other ClickHouse + /// outbound URL site and avoids breaking existing deployments. + RemoteHostFilter remote_host_filter; + remote_host_filter.setValuesFromConfig(config); + + auto require_allowed_url = [&](const String & raw_url, const char * param_name) + { + if (raw_url.empty()) + return; + try + { + remote_host_filter.checkURL(Poco::URI(raw_url)); + } + catch (const Exception & e) + { + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "Token processor '{}': '{}' URL '{}' is not in : {}", + processor_name, param_name, raw_url, e.message()); + } + }; + + if (provider_type == "google") + { + return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, expected_audience); + } + else if (provider_type == "azure") + { + return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, expected_audience); + } + else if (provider_type == "openid") + { + auto verifier_leeway = config.getUInt64(prefix + ".verifier_leeway", 60); + auto jwks_cache_lifetime = config.getUInt64(prefix + ".jwks_cache_lifetime", 3600); + + /// `token_introspection_endpoint` is currently unused at runtime: the + /// processor relies on JWT-local validation (when JWKS is configured) + /// or on userinfo, never on RFC 7662 introspection. Don't require it + /// for "locally configured" mode -- forcing operators to set a value + /// that does nothing is a footgun. If introspection is wired up later, + /// the field is already plumbed and can become required at that point. + bool externally_configured = config.hasProperty(prefix + ".configuration_endpoint") && !config.hasProperty(prefix + ".jwks_uri"); + bool locally_configured = config.hasProperty(prefix + ".userinfo_endpoint"); + + if (externally_configured && ! locally_configured) + { + const auto configuration_endpoint = config.getString(prefix + ".configuration_endpoint"); + require_allowed_url(configuration_endpoint, "configuration_endpoint"); + /// Opt-out for the HTTPS-on-discovery-returned-URLs check. False by + /// default; operators who knowingly run an IdP over plain HTTP can + /// enable it without falling back to manual trust-chain config. + const auto allow_http_discovery_urls = config.getBool(prefix + ".allow_http_discovery_urls", false); + return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, + expected_issuer, expected_audience, allow_no_expiration, + configuration_endpoint, + verifier_leeway, + jwks_cache_lifetime, + remote_host_filter, + allow_http_discovery_urls); + } + else if (locally_configured && !externally_configured) + { + const auto userinfo_endpoint = config.getString(prefix + ".userinfo_endpoint"); + const auto token_introspection_endpoint = config.getString(prefix + ".token_introspection_endpoint", ""); + const auto jwks_uri = config.getString(prefix + ".jwks_uri", ""); + require_allowed_url(userinfo_endpoint, "userinfo_endpoint"); + require_allowed_url(token_introspection_endpoint, "token_introspection_endpoint"); + require_allowed_url(jwks_uri, "jwks_uri"); + return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, + expected_issuer, expected_audience, allow_no_expiration, + userinfo_endpoint, + token_introspection_endpoint, + verifier_leeway, + jwks_uri, + jwks_cache_lifetime); + } + + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "Either 'configuration_endpoint' or 'userinfo_endpoint' " + "(and, optionally, 'token_introspection_endpoint' / 'jwks_uri') must be specified for 'openid' processor"); + } + else if (provider_type == "jwt_static_key") + { + if (!config.hasProperty(prefix + ".static_key")) + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "'static_key' must be specified for 'jwt_static_key' processor"); + + if (!config.hasProperty(prefix + ".algo")) + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "'algo' must be specified for 'jwt_static_key' processor"); + + StaticKeyJwtParams params = {Poco::toLower(config.getString(prefix + ".algo")), + config.getString(prefix + ".static_key", ""), + config.getBool(prefix + ".static_key_in_base64", false), + config.getString(prefix + ".public_key", ""), + config.getString(prefix + ".private_key", ""), + config.getString(prefix + ".public_key_password", ""), + config.getString(prefix + ".private_key_password", ""), + config.getString(prefix + ".claims", ""), + config.getUInt64(prefix + ".verifier_leeway", 60)}; + return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, expected_issuer, expected_audience, expected_typ, allow_no_expiration, params); + } + else if (provider_type == "jwt_static_jwks") + { + if (config.hasProperty(prefix + ".static_jwks") && config.hasProperty(prefix + ".static_jwks_file")) + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "'static_jwks' and 'static_jwks_file' cannot be specified simultaneously for 'jwt_static_jwks' processor"); + + if (!config.hasProperty(prefix + ".static_jwks") && !config.hasProperty(prefix + ".static_jwks_file")) + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "'static_jwks' or 'static_jwks_file' must be specified for 'jwt_static_jwks' processor"); + + if (config.hasProperty(prefix + ".jwks_uri")) + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "'jwks_uri' cannot be specified for 'jwt_static_jwks' processor"); + + StaticJWKSParams params + { + config.getString(prefix + ".static_jwks", ""), + config.getString(prefix + ".static_jwks_file", "") + }; + return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, + expected_issuer, expected_audience, expected_typ, allow_no_expiration, + config.getString(prefix + ".claims", ""), + config.getUInt64(prefix + ".verifier_leeway", 60), + std::make_shared(params)); + } + if (provider_type == "jwt_dynamic_jwks") + { + if (config.hasProperty(prefix + ".static_jwks") || config.hasProperty(prefix + ".static_jwks_file")) + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "'static_jwks' and 'static_jwks_file' cannot be specified for 'jwt_dynamic_jwks' processor"); + if (!config.hasProperty(prefix + ".jwks_uri")) + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "'jwks_uri' must be specified for 'jwt_dynamic_jwks' processor"); + + const auto jwks_uri = config.getString(prefix + ".jwks_uri"); + require_allowed_url(jwks_uri, "jwks_uri"); + return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, + expected_issuer, expected_audience, expected_typ, allow_no_expiration, + config.getString(prefix + ".claims", ""), + config.getUInt64(prefix + ".verifier_leeway", 60), + jwks_uri, + config.getUInt(prefix + ".jwks_cache_lifetime", 3600)); + } + else + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Invalid type: {}", provider_type); + + // throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Failed to parse token processor: {}", processor_name); +} + +#else +std::unique_ptr ITokenProcessor::parseTokenProcessor( + const Poco::Util::AbstractConfiguration &, + const String &, + const String &) +{ + throw DB::Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Failed to parse token_processor, ClickHouse was built without JWT support."); +} +#endif + +} diff --git a/src/Interpreters/ClientInfo.cpp b/src/Interpreters/ClientInfo.cpp index 39bc2e0ec362..e86583295995 100644 --- a/src/Interpreters/ClientInfo.cpp +++ b/src/Interpreters/ClientInfo.cpp @@ -147,13 +147,23 @@ void ClientInfo::write(WriteBuffer & out, UInt64 server_protocol_revision) const if (server_protocol_revision >= DBMS_MIN_REVISON_WITH_JWT_IN_INTERSERVER) { - if (!jwt.empty()) - { - writeBinary(static_cast(1), out); - writeBinary(jwt, out); - } - else - writeBinary(static_cast(0), out); + /// Never serialize the bearer token over the interserver wire. + /// + /// Distributed queries use this `ClientInfo` to fan out to remote shards + /// and replicas. Interserver transport is plaintext by default + /// (`interserver_http_port` vs `interserver_https_port`), so writing the + /// raw JWT here exposes session credentials on the internal network for + /// every distributed query whenever the operator hasn't opted into TLS + /// for interserver -- and no code on the receiving side currently reads + /// `client_info.jwt`, so the transmission is pure leakage with no + /// functional benefit. + /// + /// The protocol-revision byte is still emitted (always `0` = "no JWT") + /// to preserve wire compatibility with peers that expect this field at + /// this offset; receivers will read it as "no JWT present" and skip + /// the body. The `jwt` member of `ClientInfo` is retained for any + /// in-process use within the same node. + writeBinary(static_cast(0), out); } } @@ -346,8 +356,17 @@ void ClientInfo::setFromHTTPRequest(const Poco::Net::HTTPRequest & request) for (const auto & header : request) { /// These headers can contain authentication info and shouldn't be accessible by the user. + /// + /// The standard HTTP authorization header is `Authorization` (RFC 7235 §4.2); + /// `Authentication` is a separate header (RFC 7615) that ClickHouse does not use + /// for credentials. Filter both: `Authorization` is the actual credential header + /// (Basic, Bearer, etc.) and must not be exposed via `getClientHTTPHeader` or + /// relayed through `` on HTTP auth servers; `Authentication` is + /// filtered defensively to preserve prior behavior. String key_lowercase = Poco::toLower(header.first); - if (key_lowercase.starts_with("x-clickhouse") || key_lowercase == "authentication") + if (key_lowercase.starts_with("x-clickhouse") + || key_lowercase == "authorization" + || key_lowercase == "authentication") continue; http_headers[header.first] = header.second; } diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index 7bf6d4ebfb1d..21b07e3ebc9b 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -385,6 +386,13 @@ void Session::authenticate(const Credentials & credentials_, const Poco::Net::So user_id = auth_result.user_id; user_authenticated_with = auth_result.authentication_data; settings_from_auth_server = auth_result.settings; + + /// Bind the session lifetime to the access-token lifetime when applicable. + if (const auto * token_credentials = typeid_cast(&credentials_)) + auth_token_expires_at = token_credentials->getExpiresAt(); + else + auth_token_expires_at.reset(); + LOG_DEBUG(log, "{} Authenticated with global context as user {}", toString(auth_id), toString(*user_id)); @@ -411,13 +419,33 @@ void Session::authenticate(const Credentials & credentials_, const Poco::Net::So void Session::checkIfUserIsStillValid() { + const auto now = std::chrono::system_clock::now(); + if (const auto valid_until = user_authenticated_with.getValidUntil()) { - const time_t now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); - - if (now > valid_until) + if (std::chrono::system_clock::to_time_t(now) > valid_until) throw Exception(ErrorCodes::USER_EXPIRED, "Authentication method used has expired"); } + + /// For sessions established via a bearer/access token (JWT or opaque), enforce token expiry. + if (auth_token_expires_at.has_value() && now >= *auth_token_expires_at) + throw Exception(ErrorCodes::USER_EXPIRED, "Access token used to authenticate the session has expired"); + + /// For JWT/token sessions, also re-validate that the authenticating + /// processor is still configured. Without this, an admin removing a + /// processor (or disabling token auth entirely) would NOT terminate + /// active sessions until each session's token expired naturally -- a + /// gap of up to one token TTL (~1h for typical IdPs) between the + /// admin's "stop accepting tokens from this IdP" intent and actual + /// session termination (M-28). + if (user_authenticated_with.getType() == AuthenticationType::JWT) + { + const auto & processor_name = user_authenticated_with.getTokenProcessorName(); + if (!global_context->getAccessControl().getExternalAuthenticators().hasTokenProcessor(processor_name)) + throw Exception(ErrorCodes::USER_EXPIRED, + "Token processor '{}' that authenticated this session is no longer configured", + processor_name.empty() ? "" : processor_name); + } } void Session::onAuthenticationFailure(const std::optional & user_name, const Poco::Net::SocketAddress & address_, const Exception & e) diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h index babd16a9975a..f62113f83452 100644 --- a/src/Interpreters/Session.h +++ b/src/Interpreters/Session.h @@ -121,6 +121,10 @@ class Session std::vector external_roles; AuthenticationData user_authenticated_with; + /// When the user was authenticated with a bearer/access token, this holds the + /// effective token expiry captured at authentication time. + std::optional auth_token_expires_at; + ContextMutablePtr session_context; mutable bool query_context_created = false; diff --git a/src/Parsers/Access/ASTAuthenticationData.cpp b/src/Parsers/Access/ASTAuthenticationData.cpp index 35e7d2032d83..ec1cfc02908a 100644 --- a/src/Parsers/Access/ASTAuthenticationData.cpp +++ b/src/Parsers/Access/ASTAuthenticationData.cpp @@ -116,9 +116,29 @@ void ASTAuthenticationData::formatImpl(WriteBuffer & ostr, const FormatSettings } case AuthenticationType::JWT: { - prefix = "CLAIMS"; - parameter = true; - break; + /// JWT carries two independent optional clauses (PROCESSOR and + /// CLAIMS), so it does not fit the single-prefix/single-parameter + /// shape the rest of this function uses. Emit directly here and + /// short-circuit the prefix/parameter pipeline by returning at + /// the end of this case. + ostr << " " << auth_type_name; + + size_t child_idx = 0; + if (has_jwt_processor) + { + ostr << " PROCESSOR "; + children[child_idx++]->format(ostr, settings); + } + if (has_jwt_claims) + { + ostr << " CLAIMS "; + children[child_idx++]->format(ostr, settings); + } + + if (valid_until) + formatValidUntil(*valid_until, ostr, settings); + + return; } case AuthenticationType::LDAP: { diff --git a/src/Parsers/Access/ASTAuthenticationData.h b/src/Parsers/Access/ASTAuthenticationData.h index ab2da84fcaf2..742c7f5e39d3 100644 --- a/src/Parsers/Access/ASTAuthenticationData.h +++ b/src/Parsers/Access/ASTAuthenticationData.h @@ -41,6 +41,16 @@ class ASTAuthenticationData : public IAST bool contains_password = false; bool contains_hash = false; + /// IDENTIFIED WITH jwt accepts two optional clauses: + /// PROCESSOR '' + /// CLAIMS '' + /// Both are stored in `children` in this order; flags below tell which slots + /// are populated (children layout depends on which were specified). The + /// processor pin is what protects against the H-14 / H-17 cache-priming + /// bypass for SQL-declared JWT users; without it the per-user lookup goes + /// through the iterate-all-processors auto-discovery path with empty pin. + bool has_jwt_processor = false; + bool has_jwt_claims = false; ASTPtr valid_until; protected: diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index cb1561c0c912..372b964d2ce9 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -83,6 +83,7 @@ namespace bool expect_ssl_cert_subjects = false; bool expect_public_ssh_key = false; bool expect_http_auth_server = false; + bool expect_jwt_args = false; auto parse_non_password_based_type = [&](auto check_type) { @@ -104,7 +105,7 @@ namespace else if (check_type == AuthenticationType::HTTP) expect_http_auth_server = true; else if (check_type == AuthenticationType::JWT) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "CREATE USER is not supported for JWT"); + expect_jwt_args = true; else if (check_type != AuthenticationType::NO_PASSWORD) expect_password = true; @@ -165,6 +166,8 @@ namespace ASTPtr http_auth_scheme; ASTPtr ssl_cert_subjects; std::optional ssl_cert_subject_type; + ASTPtr jwt_processor; + ASTPtr jwt_claims; if (expect_password || expect_hash) { @@ -229,6 +232,32 @@ namespace return false; } } + else if (expect_jwt_args) + { + /// IDENTIFIED WITH jwt accepts two optional clauses, in either order: + /// PROCESSOR '' -- pin to a specific token_processor + /// CLAIMS '' -- per-user claims requirement + /// Either, both, or neither may appear. Pinning a processor is what + /// gates SQL-declared JWT users out of the iterate-all-processors + /// auto-discovery path. + for (size_t i = 0; i < 2; ++i) + { + if (!jwt_processor && ParserKeyword{Keyword::PROCESSOR}.ignore(pos, expected)) + { + if (!ParserStringAndSubstitution{}.parse(pos, jwt_processor, expected)) + return false; + } + else if (!jwt_claims && ParserKeyword{Keyword::CLAIMS}.ignore(pos, expected)) + { + if (!ParserStringAndSubstitution{}.parse(pos, jwt_claims, expected)) + return false; + } + else + { + break; + } + } + } auth_data = make_intrusive(); @@ -254,6 +283,18 @@ namespace if (http_auth_scheme) auth_data->children.push_back(std::move(http_auth_scheme)); + if (jwt_processor) + { + auth_data->has_jwt_processor = true; + auth_data->children.push_back(std::move(jwt_processor)); + } + + if (jwt_claims) + { + auth_data->has_jwt_claims = true; + auth_data->children.push_back(std::move(jwt_claims)); + } + parseValidUntil(pos, expected, auth_data->valid_until); return true; diff --git a/src/Parsers/CommonParsers.h b/src/Parsers/CommonParsers.h index 65e7138e4108..009f53409c17 100644 --- a/src/Parsers/CommonParsers.h +++ b/src/Parsers/CommonParsers.h @@ -428,6 +428,7 @@ namespace DB MR_MACROS(PRIMARY_KEY, "PRIMARY KEY") \ MR_MACROS(PRIORITY, "PRIORITY") \ MR_MACROS(PRIMARY, "PRIMARY") \ + MR_MACROS(PROCESSOR, "PROCESSOR") \ MR_MACROS(PROFILE, "PROFILE") \ MR_MACROS(PROFILES, "PROFILES") \ MR_MACROS(PROJECTION, "PROJECTION") \ diff --git a/src/Server/HTTP/authenticateUserByHTTP.cpp b/src/Server/HTTP/authenticateUserByHTTP.cpp index d2399a662b67..826f0c0cf0b0 100644 --- a/src/Server/HTTP/authenticateUserByHTTP.cpp +++ b/src/Server/HTTP/authenticateUserByHTTP.cpp @@ -87,6 +87,7 @@ bool authenticateUserByHTTP( bool has_credentials_in_query_params = params.has("user") || params.has("password"); std::string spnego_challenge; + std::string bearer_token; #if USE_SSL X509Certificate::Subjects certificate_subjects; #endif @@ -164,6 +165,10 @@ bool authenticateUserByHTTP( if (spnego_challenge.empty()) throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: SPNEGO challenge is empty"); } + else if (Poco::icompare(scheme, "Bearer") == 0) + { + bearer_token = auth_info; + } else { throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: '{}' HTTP Authorization scheme is not supported", scheme); @@ -221,6 +226,28 @@ bool authenticateUserByHTTP( } } #endif + else if (!bearer_token.empty()) + { + const auto & access_control = global_context->getAccessControl(); + if (!access_control.getExternalAuthenticators().isTokenAuthEnabled()) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Token authentication is disabled"); + + const auto token_credentials = TokenCredentials(bearer_token); + const auto & external_authenticators = access_control.getExternalAuthenticators(); + + /// Pre-user-lookup token validation. Pass `prime_cache_on_success=false` + /// so this unconstrained call (no processor pin, no JWT claims) does not + /// populate the token cache. The cache is reserved for entries produced + /// by the per-user authentication path (`Authentication::areCredentialsValid`), + /// which applies the user's pinned processor and per-user claims. + /// Without this, a user whose `` block omits `` would + /// satisfy a later cache lookup with empty `processor_name` -- silently + /// inheriting whichever processor happened to win this auto-discovery race. + if (!external_authenticators.checkTokenCredentials(token_credentials, /*processor_name=*/"", /*jwt_claims=*/"", /*prime_cache_on_success=*/false)) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: Token could not be verified."); + + current_credentials = std::make_unique(token_credentials); + } else // I.e., now using user name and password strings ("Basic"). { if (!current_credentials) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index b8978fb6e469..459aab7c6635 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1962,6 +1962,10 @@ void TCPHandler::receiveHello() if (is_ssh_based_auth) user.erase(0, std::string_view(EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER).size()); + is_jwt_based_auth = user.starts_with(EncodedUserInfo::JWT_AUTHENTICAION_MARKER); + if (is_jwt_based_auth) + user.erase(0, std::string_view(EncodedUserInfo::JWT_AUTHENTICAION_MARKER).size()); + session = makeSession(); const auto & client_info = session->getClientInfo(); @@ -2049,6 +2053,29 @@ void TCPHandler::receiveHello() } #endif + if (is_jwt_based_auth) + { + const auto & access_control = server.context()->getAccessControl(); + if (!access_control.getExternalAuthenticators().isTokenAuthEnabled()) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Token authentication is disabled"); + + auto credentials = TokenCredentials(password); + + const auto & external_authenticators = access_control.getExternalAuthenticators(); + + /// Pre-user-lookup token validation. Pass `prime_cache_on_success=false` + /// so this unconstrained call (no processor pin, no JWT claims) does not + /// populate the token cache; the per-user authentication path is the only + /// site allowed to populate it, after applying the user's pinned processor + /// and per-user claims. See `ExternalAuthenticators::checkTokenCredentials`. + if (!external_authenticators.checkTokenCredentials(credentials, /*processor_name=*/"", /*jwt_claims=*/"", /*prime_cache_on_success=*/false)) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Token is invalid"); + + session->authenticate(credentials, getClientAddress(client_info), socket().peerAddress()); + return; + } + + session->authenticate(user, password, getClientAddress(client_info), socket().peerAddress()); } diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index 3819c87fb00a..f95fbe2bcadd 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -244,6 +244,7 @@ class TCPHandler : public Poco::Net::TCPServerConnection String default_database; bool is_ssh_based_auth = false; /// authentication is via SSH pub-key challenge + bool is_jwt_based_auth = false; /// authentication is via JWT bearer token /// For inter-server secret (remote_server.*.secret) bool is_interserver_mode = false; bool is_interserver_authenticated = false; diff --git a/tests/integration/test_jwt_auth/test.py b/tests/integration/test_jwt_auth/test.py new file mode 100644 index 000000000000..f7b00246cdde --- /dev/null +++ b/tests/integration/test_jwt_auth/test.py @@ -0,0 +1,209 @@ +import os +import pytest + +from helpers.cluster import ClickHouseCluster +from helpers.mock_servers import start_mock_servers + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance( + "instance", + main_configs=["configs/validators.xml"], + user_configs=["configs/users.xml"], + with_minio=True, + # We actually don't need minio, but we need to run dummy resolver + # (a shortcut not to change cluster.py in a more unclear way, TBC later). +) +client = cluster.add_instance( + "client", +) + + +def run_jwks_server(): + script_dir = os.path.join(os.path.dirname(__file__), "jwks_server") + start_mock_servers( + cluster, + script_dir, + [ + ("server.py", "resolver", "8080"), + ], + ) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + run_jwks_server() + yield cluster + finally: + cluster.shutdown() + + +def curl_with_jwt(token, ip, https=False): + http_prefix = "https" if https else "http" + curl = f'curl -H "Authorization: Bearer {token}" "{http_prefix}://{ip}:8123/?query=SELECT%20currentUser()"' + return curl + + +# See helpers/ directory if you need to re-create tokens (or understand how they are created) +def test_static_key(started_cluster): + res = client.exec_in_container( + [ + "bash", + "-c", + curl_with_jwt( + token="eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJqd3RfdXNlciJ9." + "kfivQ8qD_oY0UvihydeadD7xvuiO3zSmhFOc_SGbEPQ", + ip=cluster.get_instance_ip(instance.name), + ), + ] + ) + assert res == "jwt_user\n" + + +def test_jwks_server(started_cluster): + res = client.exec_in_container( + [ + "bash", + "-c", + curl_with_jwt( + token="eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzUxMiIsImtpZCI6Im15a2lkIn0." + "eyJzdWIiOiJqd3RfdXNlciIsImlzcyI6InRlc3RfaXNzIn0.MjegqrrVyrMMpkxIM-J_q-" + "Sw68Vk5xZuFpxecLLMFs5qzvnh0jslWtyRfi-ANJeJTONPZM5m0yP1ITt8BExoHWobkkR11bXz0ylYEIOgwxqw" + "36XhL2GkE17p-wMvfhCPhGOVL3b7msDRUKXNN48aAJA-NxRbQFhMr-eEx3HsrZXy17Qc7z-" + "0dINe355kzAInGp6gMk3uksAlJ3vMODK8jE-WYFqXusr5GFhXubZXdE2mK0mIbMUGisOZhZLc4QVwvUsYDLBCgJ2RHr5vm" + "jp17j_ZArIedUJkjeC4o72ZMC97kLVnVw94QJwNvd4YisxL6A_mWLTRq9FqNLD4HmbcOQ", + ip=cluster.get_instance_ip(instance.name), + ), + ] + ) + assert res == "jwt_user\n" + + +def test_jwks_server_ec_es384(started_cluster): + res = client.exec_in_container( + [ + "bash", + "-c", + curl_with_jwt( + token="eyJhbGciOiJFUzM4NCIsImtpZCI6ImVjbXlraWQiLCJ0eXAiOiJKV1QifQ." + "eyJzdWIiOiJqd3RfdXNlciIsImlzcyI6InRlc3RfaXNzIn0." + "3iGUcKfc07oLN4XmBA6BJSGSfu7cBsdQ6KAFh1sV64rWYkVL5VzYlAskHaWZ4R9hR3QK0Bv0EPjia8Vo-xdN9jS7-fVB7RF0" + "rGvbTOIuxE-yDumCyji3MYoLpcbOVasU", + ip=cluster.get_instance_ip(instance.name), + ), + ] + ) + assert res == "jwt_user\n" + + +# Helper: request `SELECT currentUser()` over HTTP with the given bearer token +# and return the body. Caller decides whether to assert on the username or on +# rejection (rejected requests return a non-username error body). +def http_select_current_user(token: str) -> str: + return client.exec_in_container( + [ + "bash", + "-c", + curl_with_jwt(token=token, ip=cluster.get_instance_ip(instance.name)), + ] + ) + + +def make_token(payload: dict, secret: str) -> str: + """Sign an HS256 JWT with the given secret. Matches the secrets configured + for `single_key_processor` (`my_secret`) and `another_single_key_processor` + (`other_secret`) in `configs/validators.xml`.""" + import jwt + return jwt.encode(payload, secret, algorithm="HS256") + + +def test_sql_create_jwt_user_with_processor_pin(started_cluster): + """SQL `CREATE USER ... IDENTIFIED WITH jwt PROCESSOR ''` actually + pins the auth path: a token that validates against a different processor + in the same chain must NOT authenticate the SQL-pinned user. Without the + pin the iterate-all-processors auto-discovery branch would happily accept + either token (this is the H-22 / H-14 bypass surface).""" + + instance.query( + "CREATE USER OR REPLACE sql_jwt_user IDENTIFIED WITH jwt PROCESSOR 'single_key_processor'" + ) + + # Round-trip: SHOW CREATE USER must emit the PROCESSOR clause we just set. + # `TSVRaw` is used so single quotes in the SQL literal are not TSV-escaped. + show = instance.query("SHOW CREATE USER sql_jwt_user FORMAT TSVRaw").strip() + assert "PROCESSOR 'single_key_processor'" in show, show + assert "CLAIMS" not in show, show + + token_my = make_token({"sub": "sql_jwt_user"}, "my_secret") + token_other = make_token({"sub": "sql_jwt_user"}, "other_secret") + + # Pinned processor accepts the my_secret-signed token. + assert http_select_current_user(token_my) == "sql_jwt_user\n" + + # The other_secret-signed token validates fine against + # `another_single_key_processor`, but the user is pinned to + # `single_key_processor` -- the pin must reject it. + rejected = http_select_current_user(token_other) + assert "AUTHENTICATION_FAILED" in rejected, rejected + + # Re-pin via ALTER and the relationship inverts. + instance.query( + "ALTER USER sql_jwt_user IDENTIFIED WITH jwt PROCESSOR 'another_single_key_processor'" + ) + assert http_select_current_user(token_other) == "sql_jwt_user\n" + rejected = http_select_current_user(token_my) + assert "AUTHENTICATION_FAILED" in rejected, rejected + + instance.query("DROP USER sql_jwt_user") + + +def test_sql_create_jwt_user_with_claims(started_cluster): + """`CLAIMS ''` must be enforced for SQL-declared JWT users: a token + that is valid against the pinned processor but lacks the required claim + must be rejected, and a token that has the claim must be accepted.""" + + instance.query( + "CREATE USER OR REPLACE sql_jwt_claims_user " + "IDENTIFIED WITH jwt PROCESSOR 'single_key_processor' " + "CLAIMS '{\"role\":\"admin\"}'" + ) + + show = instance.query("SHOW CREATE USER sql_jwt_claims_user FORMAT TSVRaw").strip() + assert "PROCESSOR 'single_key_processor'" in show, show + assert "CLAIMS '{\"role\":\"admin\"}'" in show, show + + # Token signed with the pinned processor's secret but no `role` claim: + # processor accepts, per-user CLAIMS rejects. + token_no_claim = make_token({"sub": "sql_jwt_claims_user"}, "my_secret") + rejected = http_select_current_user(token_no_claim) + assert "AUTHENTICATION_FAILED" in rejected, rejected + + # Token with the required claim: both gates pass. + token_with_claim = make_token( + {"sub": "sql_jwt_claims_user", "role": "admin"}, "my_secret" + ) + assert http_select_current_user(token_with_claim) == "sql_jwt_claims_user\n" + + instance.query("DROP USER sql_jwt_claims_user") + + +def test_sql_jwt_user_no_pin_uses_auto_discovery(started_cluster): + """Without `PROCESSOR`, the SQL JWT user falls back to auto-discovery: any + configured processor that validates the token will be accepted. This is + the documented behavior for users who explicitly chose not to pin.""" + + instance.query("CREATE USER OR REPLACE sql_jwt_unpinned IDENTIFIED WITH jwt") + + show = instance.query("SHOW CREATE USER sql_jwt_unpinned FORMAT TSVRaw").strip() + assert "PROCESSOR" not in show, show + + # Both tokens (each valid against a different processor) authenticate the + # same unpinned SQL user. + for secret in ("my_secret", "other_secret"): + token = make_token({"sub": "sql_jwt_unpinned"}, secret) + assert http_select_current_user(token) == "sql_jwt_unpinned\n" + + instance.query("DROP USER sql_jwt_unpinned") diff --git a/tests/integration/test_keycloak_auth/configs/validators.xml b/tests/integration/test_keycloak_auth/configs/validators.xml new file mode 100644 index 000000000000..c98805fc7be8 --- /dev/null +++ b/tests/integration/test_keycloak_auth/configs/validators.xml @@ -0,0 +1,22 @@ + + + + + jwt_dynamic_jwks + http://keycloak:8080/realms/clickhouse-test/protocol/openid-connect/certs + http://keycloak:8080/realms/clickhouse-test + preferred_username + 60 + + + + + openid + http://keycloak:8080/realms/clickhouse-test/.well-known/openid-configuration + true + preferred_username + 60 + + + diff --git a/tests/queries/0_stateless/01292_create_user.reference b/tests/queries/0_stateless/01292_create_user.reference index b93bddafb6d5..0aa4225e4394 100644 --- a/tests/queries/0_stateless/01292_create_user.reference +++ b/tests/queries/0_stateless/01292_create_user.reference @@ -125,3 +125,9 @@ CREATE USER u1_01292 IDENTIFIED WITH no_password CREATE USER `u1_01292@192.168.%.%` IDENTIFIED WITH no_password HOST LIKE \'192.168.%.%\' CREATE USER `u2_01292@192.168.%.%` IDENTIFIED WITH no_password HOST LIKE \'192.168.%.%\' -- creating user identified with JWT +CREATE USER user1 IDENTIFIED WITH jwt +CREATE USER user1 IDENTIFIED WITH jwt PROCESSOR \'my_processor\' +CREATE USER user1 IDENTIFIED WITH jwt CLAIMS \'{"role":"admin"}\' +CREATE USER user1 IDENTIFIED WITH jwt PROCESSOR \'my_processor\' CLAIMS \'{"role":"admin"}\' +CREATE USER user1 IDENTIFIED WITH jwt PROCESSOR \'my_processor\' CLAIMS \'{"role":"admin"}\' +CREATE USER user1 IDENTIFIED WITH jwt PROCESSOR \'other_processor\' diff --git a/tests/queries/0_stateless/01292_create_user.sql b/tests/queries/0_stateless/01292_create_user.sql index 830163d7345d..3b6176a6a063 100644 --- a/tests/queries/0_stateless/01292_create_user.sql +++ b/tests/queries/0_stateless/01292_create_user.sql @@ -263,5 +263,20 @@ SHOW CREATE USER u2_01292@'192.168.%.%'; DROP USER u1_01292, u1_01292@'192.168.%.%', u2_01292@'192.168.%.%'; SELECT '-- creating user identified with JWT'; -CREATE USER user1 IDENTIFIED WITH jwt BY '1'; -- { clientError BAD_ARGUMENTS } -CREATE USER user1 IDENTIFIED WITH jwt; -- { clientError BAD_ARGUMENTS } +CREATE USER user1 IDENTIFIED WITH jwt BY '1'; -- { clientError SYNTAX_ERROR } +CREATE USER user1 IDENTIFIED WITH jwt; +SHOW CREATE USER user1; +CREATE USER OR REPLACE user1 IDENTIFIED WITH jwt PROCESSOR 'my_processor'; +SHOW CREATE USER user1; +CREATE USER OR REPLACE user1 IDENTIFIED WITH jwt CLAIMS '{"role":"admin"}'; +SHOW CREATE USER user1; +CREATE USER OR REPLACE user1 IDENTIFIED WITH jwt PROCESSOR 'my_processor' CLAIMS '{"role":"admin"}'; +SHOW CREATE USER user1; +CREATE USER OR REPLACE user1 IDENTIFIED WITH jwt CLAIMS '{"role":"admin"}' PROCESSOR 'my_processor'; +SHOW CREATE USER user1; +ALTER USER user1 IDENTIFIED WITH jwt PROCESSOR 'other_processor'; +SHOW CREATE USER user1; +DROP USER user1; +CREATE USER user1 IDENTIFIED WITH jwt PROCESSOR ''; -- { serverError BAD_ARGUMENTS } +CREATE USER user1 IDENTIFIED WITH jwt CLAIMS 'not-json'; -- { serverError BAD_ARGUMENTS } +CREATE USER user1 IDENTIFIED WITH jwt CLAIMS '[]'; -- { serverError BAD_ARGUMENTS }