-
Notifications
You must be signed in to change notification settings - Fork 4.5k
Add Kerberos authentication support to HadoopFileSystem #20719 #37290
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -40,6 +40,11 @@ | |||||||||||||||||||||
| except ImportError: | ||||||||||||||||||||||
| hdfs = None | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| try: | ||||||||||||||||||||||
| from hdfs.ext.kerberos import KerberosClient | ||||||||||||||||||||||
| except ImportError: | ||||||||||||||||||||||
| KerberosClient = None | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| __all__ = ['HadoopFileSystem'] | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| _HDFS_PREFIX = 'hdfs:/' | ||||||||||||||||||||||
|
|
@@ -123,11 +128,13 @@ def __init__(self, pipeline_options): | |||||||||||||||||||||
| hdfs_host = hdfs_options.hdfs_host | ||||||||||||||||||||||
| hdfs_port = hdfs_options.hdfs_port | ||||||||||||||||||||||
| hdfs_user = hdfs_options.hdfs_user | ||||||||||||||||||||||
| hdfs_client = hdfs_options.hdfs_client | ||||||||||||||||||||||
| self._full_urls = hdfs_options.hdfs_full_urls | ||||||||||||||||||||||
| else: | ||||||||||||||||||||||
| hdfs_host = pipeline_options.get('hdfs_host') | ||||||||||||||||||||||
| hdfs_port = pipeline_options.get('hdfs_port') | ||||||||||||||||||||||
| hdfs_user = pipeline_options.get('hdfs_user') | ||||||||||||||||||||||
| hdfs_client = pipeline_options.get('hdfs_client', 'INSECURE') | ||||||||||||||||||||||
| self._full_urls = pipeline_options.get('hdfs_full_urls', False) | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| if hdfs_host is None: | ||||||||||||||||||||||
|
|
@@ -139,8 +146,25 @@ def __init__(self, pipeline_options): | |||||||||||||||||||||
| if not isinstance(self._full_urls, bool): | ||||||||||||||||||||||
| raise ValueError( | ||||||||||||||||||||||
| 'hdfs_full_urls should be bool, got: %s', self._full_urls) | ||||||||||||||||||||||
| self._hdfs_client = hdfs.InsecureClient( | ||||||||||||||||||||||
| 'http://%s:%s' % (hdfs_host, str(hdfs_port)), user=hdfs_user) | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| # Create HDFS client based on authentication type | ||||||||||||||||||||||
| url = 'http://%s:%s' % (hdfs_host, str(hdfs_port)) | ||||||||||||||||||||||
| if hdfs_client == 'KERBEROS': | ||||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To make the client selection more robust, it would be good to perform a case-insensitive comparison. While
Suggested change
|
||||||||||||||||||||||
| if KerberosClient is None: | ||||||||||||||||||||||
| raise ImportError( | ||||||||||||||||||||||
| 'Kerberos authentication requires the requests-kerberos library. ' | ||||||||||||||||||||||
| 'Install it with: pip install requests-kerberos') | ||||||||||||||||||||||
| _LOGGER.info('Using KerberosClient for HDFS authentication') | ||||||||||||||||||||||
| try: | ||||||||||||||||||||||
| self._hdfs_client = KerberosClient(url) | ||||||||||||||||||||||
| except Exception as e: | ||||||||||||||||||||||
| raise RuntimeError( | ||||||||||||||||||||||
| 'Failed to create KerberosClient. Ensure you have valid Kerberos ' | ||||||||||||||||||||||
| 'credentials (run kinit) or have configured a keytab. ' | ||||||||||||||||||||||
| 'Error: %s' % str(e)) | ||||||||||||||||||||||
|
Comment on lines
+160
to
+164
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When re-raising the exception, it's better to use Also, consider using an f-string for formatting the error message for better readability, as the project seems to be on a modern Python version.
Suggested change
|
||||||||||||||||||||||
| else: | ||||||||||||||||||||||
| # Default to INSECURE for backward compatibility | ||||||||||||||||||||||
| self._hdfs_client = hdfs.InsecureClient(url, user=hdfs_user) | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| @classmethod | ||||||||||||||||||||||
| def scheme(cls): | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems like there's an extra hyphen
-here, which will render as a nested list item. Was this intentional? If not, it should probably be removed to maintain consistent formatting.