diff --git a/.env-dist b/.env-dist index fee944146b4..6d467a05e7f 100644 --- a/.env-dist +++ b/.env-dist @@ -26,3 +26,9 @@ USE_SSO_AUTH=False # in your .env OIDC_RP_CLIENT_ID=setme OIDC_RP_CLIENT_SECRET=setme + +# To sync down sec advisories from github.com/mozilla/foundation-security-advisories-private/ +# you'll need an auth key - see our password vault if you need it. +# If not set, the update will just be skipped. Downloading a DB with +# `make preflight` will get you the laest ones anyway. +# MOFO_SECURITY_ADVISORIES_REPO=setme diff --git a/bedrock/base/tests/test_sentry.py b/bedrock/base/tests/test_sentry.py index 7e3f7eeca68..1c8ab267597 100644 --- a/bedrock/base/tests/test_sentry.py +++ b/bedrock/base/tests/test_sentry.py @@ -23,7 +23,15 @@ def test_pre_sentry_sanitisation__before_send_setup(): assert SENSITIVE_FIELDS_TO_MASK_ENTIRELY == [ "email", - # "token", # token is on the default blocklist, which we also use + # The following are also on `with_default_keys=True`'s blocklist; + # listed explicitly as belt-and-braces against SDK / processor + # version drift removing any of them from the default set. + "token", + "auth", + "pat", + # Substring of GIT_CONFIG_VALUE_0/_1/... — the env keys we use to + # pass an http.extraheader containing the base64-encoded PAT to git. + "git_config_value", ] @@ -38,6 +46,12 @@ def test_pre_sentry_sanitisation__before_send_setup(): "token": "this is in sentry_processor's default set of keys to scrub AND out blocklist of keys", # Custom blocklist "email": "These items are on our blocklist and should be removed entirely", + # Substring match: GIT_CONFIG_VALUE_0 lowercases to git_config_value_0, + # which contains the "git_config_value" substring on our blocklist. + # (The real-world value would be an "AUTHORIZATION: Basic " + # string; the test uses a simpler value because the sanitiser's + # query_string path splits on `=` and would mishandle base64 padding.) + "GIT_CONFIG_VALUE_0": "fake-credential-that-must-be-masked", } expected_sanitised_data = { @@ -50,6 +64,7 @@ def test_pre_sentry_sanitisation__before_send_setup(): "token": "********", # Custom blocklist "email": "********", + "GIT_CONFIG_VALUE_0": "********", } @@ -111,6 +126,59 @@ def test_pre_sentry_sanitisation(shared_datadir): ) assert output == expected_sanitised_event + +def test_pre_sentry_sanitisation_for_git_config_value_with_base64_padding(): + """Direct check that a realistic ``http.extraheader`` style value + (contains a colon AND base64 ``=`` padding, as the real + GIT_CONFIG_VALUE_0 will at runtime) is masked when it appears under + the ``GIT_CONFIG_VALUE_0`` key inside captured stacktrace locals. + + This is the actual leak path: when ``GitRepo.git()`` raises a + ``CalledProcessError``, the ``env`` / ``kwargs`` locals contain the + encoded credential. Sentry serialises those into + ``event.exception.values[].stacktrace.frames[].vars`` and the + DesensitizationProcessor walks them via ``varmap``, masking values + whose key substring-matches the blocklist. + + The broader ``test_pre_sentry_sanitisation`` test above uses a + simpler value because it also exercises the query_string sanitiser, + which has unrelated quirks around values containing ``=``; that + path doesn't apply to our real failure scenario (management + commands have no request). + """ + realistic_value = "AUTHORIZATION: Basic eC1hY2Nlc3MtdG9rZW46c3VwZXJzZWNyZXQ=" + event = { + "exception": { + "values": [ + { + "stacktrace": { + "frames": [ + { + "vars": { + "env": { + "GIT_CONFIG_COUNT": "1", + "GIT_CONFIG_KEY_0": "http.extraheader", + "GIT_CONFIG_VALUE_0": realistic_value, + "GIT_TERMINAL_PROMPT": "0", + }, + }, + }, + ], + }, + }, + ], + }, + } + + output = before_send(event=event, hint=None) + + masked = output["exception"]["values"][0]["stacktrace"]["frames"][0]["vars"]["env"] + assert masked["GIT_CONFIG_VALUE_0"] == "********" + # And, belt-and-braces, no fragment of the credential survives anywhere + # in the serialised payload. + assert realistic_value not in json.dumps(output) + assert "eC1hY2Nlc3MtdG9rZW46" not in json.dumps(output) + # quick belt-and-braces check, too stringified = json.dumps(output) diff --git a/bedrock/security/management/commands/update_security_advisories.py b/bedrock/security/management/commands/update_security_advisories.py index b9d7282da62..d4d0f3c9345 100644 --- a/bedrock/security/management/commands/update_security_advisories.py +++ b/bedrock/security/management/commands/update_security_advisories.py @@ -30,6 +30,7 @@ ADVISORIES_REPO = settings.MOFO_SECURITY_ADVISORIES_REPO ADVISORIES_PATH = settings.MOFO_SECURITY_ADVISORIES_PATH ADVISORIES_BRANCH = settings.MOFO_SECURITY_ADVISORIES_BRANCH +ADVISORIES_AUTH = settings.MOFO_SECURITY_ADVISORIES_AUTH SM_RE = re.compile(r"seamonkey", flags=re.IGNORECASE) FNULL = open(os.devnull, "w") @@ -262,12 +263,19 @@ def add_arguments(self, parser): ) def handle_safe(self, quiet, no_git, clear_db, **options): + if not ADVISORIES_AUTH: + # No PAT configured — nothing to sync against the private repo. + # Skip rather than failing scripts like bin/run-db-update.sh. + self.stdout.write("Skipping security advisories sync: MOFO_SECURITY_ADVISORIES_AUTH is not set.") + return + force = no_git or clear_db repo = GitRepo( ADVISORIES_PATH, ADVISORIES_REPO, branch_name=ADVISORIES_BRANCH, name="Security Advisories", + auth=ADVISORIES_AUTH or None, ) def printout(msg, ending=None): diff --git a/bedrock/security/tests/test_commands.py b/bedrock/security/tests/test_commands.py index f9f3e900707..ff6ec688169 100644 --- a/bedrock/security/tests/test_commands.py +++ b/bedrock/security/tests/test_commands.py @@ -2,10 +2,12 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. +import io import os.path from unittest.mock import patch from django.conf import settings +from django.test import override_settings from bedrock.mozorg.tests import TestCase from bedrock.security.management.commands import update_security_advisories @@ -227,6 +229,7 @@ def test_delete_orphaned_products(self): assert update_security_advisories.delete_orphaned_products() == 2 assert Product.objects.get().name == "Firefox 43.0.1" + @patch.object(update_security_advisories, "ADVISORIES_AUTH", "set-for-test") @patch.object(update_security_advisories, "get_all_file_names") @patch.object(update_security_advisories, "delete_files") @patch.object(update_security_advisories, "update_db_from_file") @@ -243,3 +246,26 @@ def test_file_name_extension_change(self, git_mock, udbff_mock, df_mock, gafn_mo update_security_advisories.Command().handle_safe(quiet=True, no_git=False, clear_db=False) udbff_mock.assert_called_with(update_security_advisories.filter_advisory_filenames(all_files)[0]) df_mock.assert_called_with(["mfsa2016-43.md"]) + + @override_settings(DEV=True) + @patch.object(update_security_advisories, "GitRepo") + @patch.object(update_security_advisories, "ADVISORIES_AUTH", "") + def test_skip_when_auth_missing_in_dev(self, git_mock): + """A missing PAT in DEV should skip the sync rather than fail.""" + cmd = update_security_advisories.Command() + cmd.stdout = io.StringIO() + cmd.handle_safe(quiet=False, no_git=False, clear_db=False) + assert "Skipping security advisories sync" in cmd.stdout.getvalue() + git_mock.assert_not_called() + + @override_settings(DEV=False) + @patch.object(update_security_advisories, "GitRepo") + @patch.object(update_security_advisories, "ADVISORIES_AUTH", "") + def test_skip_when_auth_missing_outside_dev(self, git_mock): + """Outside DEV, missing auth should still skip — there is no useful + sync work to do without a PAT against the private repo.""" + cmd = update_security_advisories.Command() + cmd.stdout = io.StringIO() + cmd.handle_safe(quiet=False, no_git=False, clear_db=False) + assert "Skipping security advisories sync" in cmd.stdout.getvalue() + git_mock.assert_not_called() diff --git a/bedrock/settings/base.py b/bedrock/settings/base.py index d1057b67661..a1adb9fc946 100644 --- a/bedrock/settings/base.py +++ b/bedrock/settings/base.py @@ -1087,9 +1087,12 @@ def _is_bedrock_custom_app(app_name): MOFO_SECURITY_ADVISORIES_PATH = config("MOFO_SECURITY_ADVISORIES_PATH", default=data_path("mofo_security_advisories")) MOFO_SECURITY_ADVISORIES_REPO = config( "MOFO_SECURITY_ADVISORIES_REPO", - default="https://github.com/mozilla/foundation-security-advisories.git", + default="https://github.com/mozilla/foundation-security-advisories-private.git", ) MOFO_SECURITY_ADVISORIES_BRANCH = config("MOFO_SECURITY_ADVISORIES_BRANCH", default="master") +# ":" (or just "") used to authenticate +# git clone/fetch against the advisories repo when it is private. Empty = no auth. +MOFO_SECURITY_ADVISORIES_AUTH = config("MOFO_SECURITY_ADVISORIES_AUTH", default="") CORS_ORIGIN_ALLOW_ALL = True CORS_URLS_REGEX = r"^/([a-zA-Z-]+/)?(newsletter)/" @@ -1176,7 +1179,21 @@ def get_default_gateway_linux(): # https://github.com/laiyongtao/sentry-processor SENSITIVE_FIELDS_TO_MASK_ENTIRELY = [ "email", - # "token", # token is on the default blocklist, which we also use via `with_default_keys` + # Belt-and-braces: these names are also on `with_default_keys=True`'s + # blocklist, but listing them here explicitly guards against SDK / processor + # version drift removing a name from the default set, and documents the + # local-variable names that touch credentials in this codebase. + "token", + "auth", + "pat", + # `git_config_value` is a substring of GIT_CONFIG_VALUE_0/_1/... — the env + # keys we use in bedrock/utils/git.py to pass an http.extraheader containing + # a base64-encoded PAT to git. On a clone/fetch failure those keys end up in + # frame locals (env, kwargs) which Sentry captures with with_locals=True. + # The default blocklist names don't substring-match the GIT_CONFIG_VALUE_* + # keys, so without this entry the encoded credential would survive into the + # Sentry payload. + "git_config_value", ] SENTRY_IGNORE_ERRORS = ( BrokenPipeError, diff --git a/bedrock/utils/git.py b/bedrock/utils/git.py index e587fba3975..2bb904b9bc4 100644 --- a/bedrock/utils/git.py +++ b/bedrock/utils/git.py @@ -1,6 +1,8 @@ # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. +import base64 +import contextlib import os from datetime import datetime from hashlib import sha256 @@ -9,6 +11,7 @@ from shutil import rmtree from subprocess import STDOUT, CalledProcessError, check_output from time import time +from urllib.parse import urlparse from django.conf import settings from django.utils.encoding import force_str @@ -19,28 +22,93 @@ GIT = getattr(settings, "GIT_BIN", "git") +# Conventional GitHub username for token-only auth (fine-grained PATs). +DEFAULT_AUTH_USERNAME = "x-access-token" + class GitRepo: - def __init__(self, path, remote_url=None, branch_name="main", name=None): + def __init__(self, path, remote_url=None, branch_name="main", name=None, auth=None): self.path = Path(path) self.path_str = str(self.path) self.remote_url = remote_url self.branch_name = branch_name + self.auth = auth or None db_latest_key = f"{self.path_str}:{remote_url or ''}:{branch_name}" self.db_latest_key = sha256(db_latest_key.encode()).hexdigest() self.repo_name = name or self.path.name - def git(self, *args): + def git(self, *args, env=None): """Run a git command against the current repo""" curdir = os.getcwd() try: os.chdir(self.path_str) - output = check_output((GIT,) + args, stderr=STDOUT) + kwargs = {"stderr": STDOUT} + if env is not None: + kwargs["env"] = {**os.environ, **env} + output = check_output((GIT,) + args, **kwargs) finally: os.chdir(curdir) return force_str(output.strip()) + def _split_auth(self): + """Return (username, token) parsed from self.auth. + + Accepts either ``":"`` (matches the existing + ``FLUENT_REPO_AUTH`` convention) or a bare ``""``. Returns + ``(None, None)`` if no auth is configured. + """ + if not self.auth: + return None, None + if ":" in self.auth: + username, token = self.auth.split(":", 1) + return (username or DEFAULT_AUTH_USERNAME), token + return DEFAULT_AUTH_USERNAME, self.auth + + @contextlib.contextmanager + def _auth_env(self): + """Yield env overrides that authenticate git via HTTP Basic auth. + + Uses git's ``GIT_CONFIG_COUNT`` / ``GIT_CONFIG_KEY_N`` / + ``GIT_CONFIG_VALUE_N`` env-var protocol (git >= 2.31) to set + ``http.extraheader`` for the duration of the subprocess. The token + lives only in the subprocess environment — it never enters argv, + git's error output, or the ``CalledProcessError`` payload that the + ``@alert_sentry_on_exception`` decorator would otherwise ship. + + Yields ``None`` when no auth is configured. + """ + if not self.auth: + yield None + return + + username, token = self._split_auth() + basic = base64.b64encode(f"{username}:{token}".encode()).decode("ascii") + yield { + "GIT_CONFIG_COUNT": "1", + "GIT_CONFIG_KEY_0": self._extraheader_config_key(), + "GIT_CONFIG_VALUE_0": f"AUTHORIZATION: Basic {basic}", + "GIT_TERMINAL_PROMPT": "0", + } + + def _extraheader_config_key(self): + """Return the git config key for the http.extraheader setting, + scoped to the remote's scheme+host when possible. + + Per git's URL-specific HTTP config rules, ``http..extraheader`` + only applies to requests whose URL is prefix-matched by ````. + Scoping to e.g. ``http.https://github.com/.extraheader`` means + the ``Authorization`` header won't follow off-host redirects or + LFS fetches against unrelated hosts during the clone/fetch. + + Falls back to the global ``http.extraheader`` if the remote URL + isn't usable HTTPS (e.g. SSH form, missing host). + """ + parsed = urlparse(self.remote_url or "") + if parsed.scheme in ("http", "https") and parsed.hostname: + return f"http.{parsed.scheme}://{parsed.hostname}/.extraheader" + return "http.extraheader" + @property def current_hash(self): """The git revision ID (hash) of the current HEAD or None if no repo""" @@ -102,13 +170,15 @@ def clone(self): raise RuntimeError("remote_url required to clone") self.path.mkdir(parents=True, exist_ok=True) - self.git("clone", "--depth", "1", "--branch", self.branch_name, self.remote_url, ".") + with self._auth_env() as env: + extra = {"env": env} if env else {} + self.git("clone", "--depth", "1", "--branch", self.branch_name, self.remote_url, ".", **extra) def reclone(self): """Safely get a fresh clone of the repo""" if self.path.exists(): new_path = self.path.with_suffix(f".{int(time())}") - new_repo = GitRepo(new_path, self.remote_url, self.branch_name) + new_repo = GitRepo(new_path, self.remote_url, self.branch_name, auth=self.auth) new_repo.clone() # only remove the old after the new clone succeeds rmtree(self.path_str, ignore_errors=True) @@ -121,7 +191,9 @@ def pull(self): Return the previous hash and the new hash.""" old_hash = self.current_hash - self.git("fetch", "-f", self.remote_url, self.branch_name) + with self._auth_env() as env: + extra = {"env": env} if env else {} + self.git("fetch", "-f", self.remote_url, self.branch_name, **extra) self.git("checkout", "-f", "FETCH_HEAD") return old_hash, self.current_hash diff --git a/bedrock/utils/tests/test_git.py b/bedrock/utils/tests/test_git.py index 23145e1d1da..bcb1972c696 100644 --- a/bedrock/utils/tests/test_git.py +++ b/bedrock/utils/tests/test_git.py @@ -2,6 +2,7 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. +import base64 from unittest.mock import DEFAULT, call, patch from django.test import override_settings @@ -82,6 +83,129 @@ def test_git_clone(): git_mock["git"].assert_called_with("clone", "--depth", "1", "--branch", "main", "https://example.com", ".") +def test_git_split_auth(): + g = git.GitRepo(".") + assert g._split_auth() == (None, None) + + g.auth = "bare-token" + assert g._split_auth() == ("x-access-token", "bare-token") + + g.auth = "testserviceaccount:supersecret" + assert g._split_auth() == ("testserviceaccount", "supersecret") + + # missing username falls back to the default + g.auth = ":supersecret" + assert g._split_auth() == ("x-access-token", "supersecret") + + +def test_git_auth_env_no_auth_yields_none(): + g = git.GitRepo(".") + with g._auth_env() as env: + assert env is None + + +def test_git_auth_env_with_auth(): + g = git.GitRepo(".", "https://github.com/mozilla/repo.git", auth="testserviceaccount:supersecret") + with g._auth_env() as env: + assert env is not None + assert env["GIT_CONFIG_COUNT"] == "1" + # Config key is scoped to the remote host so the Authorization header + # won't follow off-host redirects or LFS fetches. + assert env["GIT_CONFIG_KEY_0"] == "http.https://github.com/.extraheader" + assert env["GIT_TERMINAL_PROMPT"] == "0" + + header = env["GIT_CONFIG_VALUE_0"] + assert header.startswith("AUTHORIZATION: Basic ") + decoded = base64.b64decode(header.removeprefix("AUTHORIZATION: Basic ")).decode("ascii") + assert decoded == "testserviceaccount:supersecret" + + +def test_git_auth_env_falls_back_to_global_key_for_non_https_remote(): + """If we somehow have auth but no usable HTTPS remote, fall back to the + unscoped `http.extraheader` rather than producing a malformed key.""" + g = git.GitRepo(".", "git@github.com:foo/bar.git", auth="user:tok") + with g._auth_env() as env: + assert env["GIT_CONFIG_KEY_0"] == "http.extraheader" + + g = git.GitRepo(".", auth="user:tok") # no remote_url at all + with g._auth_env() as env: + assert env["GIT_CONFIG_KEY_0"] == "http.extraheader" + + +def test_git_auth_env_bare_token_uses_default_username(): + g = git.GitRepo(".", auth="bare-token") + with g._auth_env() as env: + decoded = base64.b64decode(env["GIT_CONFIG_VALUE_0"].removeprefix("AUTHORIZATION: Basic ")).decode("ascii") + assert decoded == "x-access-token:bare-token" + + +def test_git_clone_with_auth_keeps_token_out_of_argv_and_url(): + g = git.GitRepo(".", "https://example.com/repo.git", auth="testserviceaccount:supersecret") + with patch.multiple(g, git=DEFAULT, path=DEFAULT) as mocks: + g.clone() + + call_args = mocks["git"].call_args + # URL is plain — no username, no token. + assert call_args.args == ( + "clone", + "--depth", + "1", + "--branch", + "main", + "https://example.com/repo.git", + ".", + ) + # Token must not appear in any positional arg. + for a in call_args.args: + assert "supersecret" not in str(a) + # Auth travels via env vars setting host-scoped http.extraheader. + assert "env" in call_args.kwargs + env = call_args.kwargs["env"] + assert env["GIT_CONFIG_KEY_0"] == "http.https://example.com/.extraheader" + assert env["GIT_TERMINAL_PROMPT"] == "0" + + +def test_git_pull_with_auth_only_authenticates_fetch(): + g = git.GitRepo(".", "https://example.com/repo.git", auth="testserviceaccount:supersecret") + with patch.object(g, "git") as git_mock: + git_mock.return_value = "deadbeef" + g.pull() + + fetch_calls = [c for c in git_mock.call_args_list if c.args and c.args[0] == "fetch"] + assert len(fetch_calls) == 1 + fetch_call = fetch_calls[0] + # Token never appears in argv. + for a in fetch_call.args: + assert "supersecret" not in str(a) + # URL is plain. + assert "https://example.com/repo.git" in fetch_call.args + # Auth env is set on the fetch call (host-scoped). + assert fetch_call.kwargs["env"]["GIT_CONFIG_KEY_0"] == "http.https://example.com/.extraheader" + + # The checkout step does not need auth. + checkout_calls = [c for c in git_mock.call_args_list if c.args and c.args[0] == "checkout"] + assert len(checkout_calls) == 1 + assert checkout_calls[0].kwargs == {} + + +def test_git_call_passes_env_when_provided(): + """The base git() helper merges the supplied env on top of os.environ.""" + with patch.object(git, "os") as os_mock, patch.object(git, "check_output") as co_mock: + os_mock.getcwd.return_value = "olddir" + os_mock.environ = {"PATH": "/usr/bin", "EXISTING": "keep"} + co_mock.return_value = "ok" + g = git.GitRepo("new_repo") + g.git("fetch", "origin", env={"GIT_ASKPASS": "/tmp/x", "EXISTING": "override"}) + + _, kwargs = co_mock.call_args + assert kwargs["stderr"] == git.STDOUT + assert kwargs["env"] == { + "PATH": "/usr/bin", + "EXISTING": "override", + "GIT_ASKPASS": "/tmp/x", + } + + @patch.object(git, "rmtree") def test_git_update(rmtree_mock): g = git.GitRepo(".", "https://example.com") diff --git a/bin/run-db-update.sh b/bin/run-db-update.sh index 262921bbac2..b7182f03760 100755 --- a/bin/run-db-update.sh +++ b/bin/run-db-update.sh @@ -38,7 +38,6 @@ failure_detected=false # make sure l10n files are here for use in other commands python manage.py l10n_update || failure_detected=true python manage.py update_product_details_files || failure_detected=true -python manage.py update_security_advisories --quiet || failure_detected=true python manage.py update_wordpress --quiet || failure_detected=true python manage.py update_release_notes --quiet || failure_detected=true python manage.py update_content_cards --quiet || failure_detected=true @@ -49,9 +48,10 @@ python manage.py update_webvision_docs --quiet || failure_detected=true DEV=False python manage.py update_sitemaps_data --quiet || failure_detected=true python manage.py sync_greenhouse --quiet || failure_detected=true -# if [[ "$AUTH" == true ]]; then -# # Some jobs require some auth. Don't run these during build of the Docker images -# fi +if [[ "$AUTH" == true ]]; then + # Some jobs require some auth. Don't run these during build of the Docker images + python manage.py update_security_advisories --quiet || failure_detected=true +fi # If all is well, ping DMS to avoid an alert being raised. if [[ $failure_detected == false ]]; then