-
Notifications
You must be signed in to change notification settings - Fork 1.3k
fix: improve subprocess exception in git_utils for V3 #5811
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f353c77
3878100
8c69a59
777295b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,6 +12,8 @@ | |
| # language governing permissions and limitations under the License. | ||
| from __future__ import absolute_import | ||
|
|
||
| import subprocess | ||
|
|
||
| import pytest | ||
|
|
||
| from sagemaker.core import git_utils | ||
|
|
@@ -308,6 +310,129 @@ def test_git_clone_repo_blocks_url_encoded_attack(self): | |
| git_utils.git_clone_repo(malicious_git_config, entry_point) | ||
| assert "Suspicious URL encoding detected" in str(error.value) | ||
|
|
||
| class TestCredentialRedaction: | ||
| """Test cases for credential redaction in clone error handling.""" | ||
|
|
||
| def test_redact_token_from_url(self): | ||
| """Test that a token embedded in an HTTPS URL is redacted.""" | ||
| url = "https://ghp_SuperSecretToken123@github.com/user/repo.git" | ||
| result = git_utils._redact_credentials_from_url(url) | ||
| assert "ghp_SuperSecretToken123" not in result | ||
| assert result == "https://<credentials-redacted>@github.com/user/repo.git" | ||
|
|
||
| def test_redact_username_password_from_url(self): | ||
| """Test that username:password embedded in an HTTPS URL is redacted.""" | ||
| url = "https://myuser:mypassword@github.com/user/repo.git" | ||
| result = git_utils._redact_credentials_from_url(url) | ||
| assert "myuser" not in result | ||
| assert "mypassword" not in result | ||
| assert result == "https://<credentials-redacted>@github.com/user/repo.git" | ||
|
|
||
| def test_redact_url_encoded_password(self): | ||
| """Test that URL-encoded credentials are redacted.""" | ||
| url = "https://user:p%40ss%20word@git-codecommit.us-east-1.amazonaws.com/v1/repos/myrepo" | ||
| result = git_utils._redact_credentials_from_url(url) | ||
| assert "p%40ss%20word" not in result | ||
| assert "<credentials-redacted>" in result | ||
|
|
||
| def test_no_redaction_without_credentials(self): | ||
| """Test that URLs without credentials are unchanged.""" | ||
| url = "https://github.com/user/repo.git" | ||
| result = git_utils._redact_credentials_from_url(url) | ||
| assert result == url | ||
|
|
||
| def test_no_redaction_for_ssh_url(self): | ||
| """Test that SSH URLs are not affected by redaction.""" | ||
| url = "git@github.com:user/repo.git" | ||
| result = git_utils._redact_credentials_from_url(url) | ||
| assert result == url | ||
|
|
||
| @pytest.fixture | ||
| def mock_env(self, monkeypatch): | ||
| """Set minimal env for subprocess calls.""" | ||
| monkeypatch.setenv("PATH", "/usr/bin:/bin") | ||
|
|
||
| def test_clone_failure_redacts_token(self, mock_env): | ||
| """Test that CalledProcessError from a failed clone does not contain the token.""" | ||
| from unittest.mock import patch | ||
|
|
||
| token_url = "https://ghp_secret123@github.com/user/repo.git" | ||
| with patch( | ||
| "subprocess.check_call", | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. mocked
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We did test this locally with a real git clone against an actual invalid URL (https://myuser:ghp_SuperSecretToken123@github.com/nonexistent/repo-xyz) — no mocks. GitHub returned a real Authentication failed, and we confirmed the re-raised exception contains only with no trace of the original credentials. We didn't add it as a formal integration test in CI because it depends on external network access to GitHub, which makes it flaky and environment-dependent — CI runners may have restricted outbound access, and GitHub rate limits or outages would cause false failures. The unit tests with mocked subprocess cover the same code paths deterministically. Checked the existing integ test suite — there are no integration tests that perform real git clone operations against GitHub. All git_utils tests are unit tests with mocked subprocess. So not adding a network-dependent integ test is consistent with the current test approach. |
||
| side_effect=subprocess.CalledProcessError( | ||
| 128, ["git", "clone", token_url, "/tmp/dest"] | ||
| ), | ||
| ): | ||
| with pytest.raises(subprocess.CalledProcessError) as exc_info: | ||
| git_utils._run_clone_command(token_url, "/tmp/dest") | ||
|
|
||
| # The token must NOT appear anywhere in the re-raised exception | ||
| assert "ghp_secret123" not in str(exc_info.value) | ||
| assert "ghp_secret123" not in str(exc_info.value.cmd) | ||
| assert "<credentials-redacted>" in str(exc_info.value.cmd) | ||
|
|
||
| def test_clone_failure_redacts_username_password(self, mock_env): | ||
| """Test that CalledProcessError from a failed clone does not contain username/password.""" | ||
| from unittest.mock import patch | ||
|
|
||
| cred_url = "https://admin:hunter2@github.com/org/repo.git" | ||
| with patch( | ||
| "subprocess.check_call", | ||
| side_effect=subprocess.CalledProcessError( | ||
| 128, ["git", "clone", cred_url, "/tmp/dest"] | ||
| ), | ||
| ): | ||
| with pytest.raises(subprocess.CalledProcessError) as exc_info: | ||
| git_utils._run_clone_command(cred_url, "/tmp/dest") | ||
|
|
||
| assert "admin" not in str(exc_info.value.cmd) | ||
| assert "hunter2" not in str(exc_info.value.cmd) | ||
| assert "<credentials-redacted>" in str(exc_info.value.cmd) | ||
|
|
||
| def test_clone_failure_redacts_codecommit_credentials(self, mock_env): | ||
| """Test that CodeCommit HTTPS credentials are redacted on failure.""" | ||
| from unittest.mock import patch | ||
|
|
||
| cc_url = "https://user:pass@git-codecommit.us-east-1.amazonaws.com/v1/repos/myrepo" | ||
| with patch( | ||
| "subprocess.check_call", | ||
| side_effect=subprocess.CalledProcessError( | ||
| 128, ["git", "clone", cc_url, "/tmp/dest"] | ||
| ), | ||
| ): | ||
| with pytest.raises(subprocess.CalledProcessError) as exc_info: | ||
| git_utils._run_clone_command(cc_url, "/tmp/dest") | ||
|
|
||
| assert "user:pass" not in str(exc_info.value.cmd) | ||
| assert "<credentials-redacted>" in str(exc_info.value.cmd) | ||
|
|
||
| def test_clone_failure_suppresses_exception_chain(self, mock_env): | ||
| """Test that the original exception chain is suppressed (from None).""" | ||
| from unittest.mock import patch | ||
|
|
||
| token_url = "https://ghp_secret@github.com/user/repo.git" | ||
| with patch( | ||
| "subprocess.check_call", | ||
| side_effect=subprocess.CalledProcessError( | ||
| 128, ["git", "clone", token_url, "/tmp/dest"] | ||
| ), | ||
| ): | ||
| with pytest.raises(subprocess.CalledProcessError) as exc_info: | ||
| git_utils._run_clone_command(token_url, "/tmp/dest") | ||
|
|
||
| # __cause__ should be None due to 'from None' | ||
| assert exc_info.value.__cause__ is None | ||
|
|
||
| def test_clone_success_no_exception(self, mock_env): | ||
| """Test that successful clone does not raise.""" | ||
| from unittest.mock import patch | ||
|
|
||
| url = "https://ghp_token@github.com/user/repo.git" | ||
| with patch("subprocess.check_call"): | ||
| # Should not raise | ||
| git_utils._run_clone_command(url, "/tmp/dest") | ||
|
|
||
|
|
||
| def test_sanitize_git_url_comprehensive_attack_scenarios(self): | ||
| attack_scenarios = [ | ||
| "https://USER@YOUR_NGROK_OR_LOCALHOST/malicious.git@github.com%25legit%25repo.git", | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The catch-all except Exception as e uses t
ype(e)(str(e).replace(repo_url, safe_url))which assumes the exception type can be reconstructed from a single string argument. This could fail for custom exception types with different constructors. Consider wrapping in a secondary try/except or logging a warning.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point, adding in a new commit.