Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .env-dist
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,9 @@ USE_SSO_AUTH=False
# in your .env
OIDC_RP_CLIENT_ID=setme
OIDC_RP_CLIENT_SECRET=setme

# To sync down sec advisories from github.com/mozilla/foundation-security-advisories-private/
# you'll need an auth key - see our password vault if you need it.
# If not set, the update will just be skipped. Downloading a DB with
# `make preflight` will get you the laest ones anyway.
# MOFO_SECURITY_ADVISORIES_REPO=setme
70 changes: 69 additions & 1 deletion bedrock/base/tests/test_sentry.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,15 @@ def test_pre_sentry_sanitisation__before_send_setup():

assert SENSITIVE_FIELDS_TO_MASK_ENTIRELY == [
"email",
# "token", # token is on the default blocklist, which we also use
# The following are also on `with_default_keys=True`'s blocklist;
# listed explicitly as belt-and-braces against SDK / processor
# version drift removing any of them from the default set.
"token",
"auth",
"pat",
# Substring of GIT_CONFIG_VALUE_0/_1/... — the env keys we use to
# pass an http.extraheader containing the base64-encoded PAT to git.
"git_config_value",
]


Expand All @@ -38,6 +46,12 @@ def test_pre_sentry_sanitisation__before_send_setup():
"token": "this is in sentry_processor's default set of keys to scrub AND out blocklist of keys",
# Custom blocklist
"email": "These items are on our blocklist and should be removed entirely",
# Substring match: GIT_CONFIG_VALUE_0 lowercases to git_config_value_0,
# which contains the "git_config_value" substring on our blocklist.
# (The real-world value would be an "AUTHORIZATION: Basic <base64>"
# string; the test uses a simpler value because the sanitiser's
# query_string path splits on `=` and would mishandle base64 padding.)
"GIT_CONFIG_VALUE_0": "fake-credential-that-must-be-masked",
}

expected_sanitised_data = {
Expand All @@ -50,6 +64,7 @@ def test_pre_sentry_sanitisation__before_send_setup():
"token": "********",
# Custom blocklist
"email": "********",
"GIT_CONFIG_VALUE_0": "********",
}


Expand Down Expand Up @@ -111,6 +126,59 @@ def test_pre_sentry_sanitisation(shared_datadir):
)
assert output == expected_sanitised_event


def test_pre_sentry_sanitisation_for_git_config_value_with_base64_padding():
"""Direct check that a realistic ``http.extraheader`` style value
(contains a colon AND base64 ``=`` padding, as the real
GIT_CONFIG_VALUE_0 will at runtime) is masked when it appears under
the ``GIT_CONFIG_VALUE_0`` key inside captured stacktrace locals.

This is the actual leak path: when ``GitRepo.git()`` raises a
``CalledProcessError``, the ``env`` / ``kwargs`` locals contain the
encoded credential. Sentry serialises those into
``event.exception.values[].stacktrace.frames[].vars`` and the
DesensitizationProcessor walks them via ``varmap``, masking values
whose key substring-matches the blocklist.

The broader ``test_pre_sentry_sanitisation`` test above uses a
simpler value because it also exercises the query_string sanitiser,
which has unrelated quirks around values containing ``=``; that
path doesn't apply to our real failure scenario (management
commands have no request).
"""
realistic_value = "AUTHORIZATION: Basic eC1hY2Nlc3MtdG9rZW46c3VwZXJzZWNyZXQ="
event = {
"exception": {
"values": [
{
"stacktrace": {
"frames": [
{
"vars": {
"env": {
"GIT_CONFIG_COUNT": "1",
"GIT_CONFIG_KEY_0": "http.extraheader",
"GIT_CONFIG_VALUE_0": realistic_value,
"GIT_TERMINAL_PROMPT": "0",
},
},
},
],
},
},
],
},
}

output = before_send(event=event, hint=None)

masked = output["exception"]["values"][0]["stacktrace"]["frames"][0]["vars"]["env"]
assert masked["GIT_CONFIG_VALUE_0"] == "********"
# And, belt-and-braces, no fragment of the credential survives anywhere
# in the serialised payload.
assert realistic_value not in json.dumps(output)
assert "eC1hY2Nlc3MtdG9rZW46" not in json.dumps(output)

# quick belt-and-braces check, too
stringified = json.dumps(output)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
ADVISORIES_REPO = settings.MOFO_SECURITY_ADVISORIES_REPO
ADVISORIES_PATH = settings.MOFO_SECURITY_ADVISORIES_PATH
ADVISORIES_BRANCH = settings.MOFO_SECURITY_ADVISORIES_BRANCH
ADVISORIES_AUTH = settings.MOFO_SECURITY_ADVISORIES_AUTH

SM_RE = re.compile(r"seamonkey", flags=re.IGNORECASE)
FNULL = open(os.devnull, "w")
Expand Down Expand Up @@ -262,12 +263,19 @@ def add_arguments(self, parser):
)

def handle_safe(self, quiet, no_git, clear_db, **options):
if not ADVISORIES_AUTH:
# No PAT configured — nothing to sync against the private repo.
# Skip rather than failing scripts like bin/run-db-update.sh.
self.stdout.write("Skipping security advisories sync: MOFO_SECURITY_ADVISORIES_AUTH is not set.")
return

force = no_git or clear_db
repo = GitRepo(
ADVISORIES_PATH,
ADVISORIES_REPO,
branch_name=ADVISORIES_BRANCH,
name="Security Advisories",
auth=ADVISORIES_AUTH or None,
)

def printout(msg, ending=None):
Expand Down
26 changes: 26 additions & 0 deletions bedrock/security/tests/test_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.

import io
import os.path
from unittest.mock import patch

from django.conf import settings
from django.test import override_settings

from bedrock.mozorg.tests import TestCase
from bedrock.security.management.commands import update_security_advisories
Expand Down Expand Up @@ -227,6 +229,7 @@ def test_delete_orphaned_products(self):
assert update_security_advisories.delete_orphaned_products() == 2
assert Product.objects.get().name == "Firefox 43.0.1"

@patch.object(update_security_advisories, "ADVISORIES_AUTH", "set-for-test")
@patch.object(update_security_advisories, "get_all_file_names")
@patch.object(update_security_advisories, "delete_files")
@patch.object(update_security_advisories, "update_db_from_file")
Expand All @@ -243,3 +246,26 @@ def test_file_name_extension_change(self, git_mock, udbff_mock, df_mock, gafn_mo
update_security_advisories.Command().handle_safe(quiet=True, no_git=False, clear_db=False)
udbff_mock.assert_called_with(update_security_advisories.filter_advisory_filenames(all_files)[0])
df_mock.assert_called_with(["mfsa2016-43.md"])

@override_settings(DEV=True)
@patch.object(update_security_advisories, "GitRepo")
@patch.object(update_security_advisories, "ADVISORIES_AUTH", "")
def test_skip_when_auth_missing_in_dev(self, git_mock):
"""A missing PAT in DEV should skip the sync rather than fail."""
cmd = update_security_advisories.Command()
cmd.stdout = io.StringIO()
cmd.handle_safe(quiet=False, no_git=False, clear_db=False)
assert "Skipping security advisories sync" in cmd.stdout.getvalue()
git_mock.assert_not_called()

@override_settings(DEV=False)
@patch.object(update_security_advisories, "GitRepo")
@patch.object(update_security_advisories, "ADVISORIES_AUTH", "")
def test_skip_when_auth_missing_outside_dev(self, git_mock):
"""Outside DEV, missing auth should still skip — there is no useful
sync work to do without a PAT against the private repo."""
cmd = update_security_advisories.Command()
cmd.stdout = io.StringIO()
cmd.handle_safe(quiet=False, no_git=False, clear_db=False)
assert "Skipping security advisories sync" in cmd.stdout.getvalue()
git_mock.assert_not_called()
21 changes: 19 additions & 2 deletions bedrock/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1087,9 +1087,12 @@ def _is_bedrock_custom_app(app_name):
MOFO_SECURITY_ADVISORIES_PATH = config("MOFO_SECURITY_ADVISORIES_PATH", default=data_path("mofo_security_advisories"))
MOFO_SECURITY_ADVISORIES_REPO = config(
"MOFO_SECURITY_ADVISORIES_REPO",
default="https://github.com/mozilla/foundation-security-advisories.git",
default="https://github.com/mozilla/foundation-security-advisories-private.git",
)
MOFO_SECURITY_ADVISORIES_BRANCH = config("MOFO_SECURITY_ADVISORIES_BRANCH", default="master")
# "<github username>:<github token>" (or just "<github token>") used to authenticate
# git clone/fetch against the advisories repo when it is private. Empty = no auth.
MOFO_SECURITY_ADVISORIES_AUTH = config("MOFO_SECURITY_ADVISORIES_AUTH", default="")
Comment on lines 1088 to +1095

CORS_ORIGIN_ALLOW_ALL = True
CORS_URLS_REGEX = r"^/([a-zA-Z-]+/)?(newsletter)/"
Expand Down Expand Up @@ -1176,7 +1179,21 @@ def get_default_gateway_linux():
# https://github.com/laiyongtao/sentry-processor
SENSITIVE_FIELDS_TO_MASK_ENTIRELY = [
"email",
# "token", # token is on the default blocklist, which we also use via `with_default_keys`
# Belt-and-braces: these names are also on `with_default_keys=True`'s
# blocklist, but listing them here explicitly guards against SDK / processor
# version drift removing a name from the default set, and documents the
# local-variable names that touch credentials in this codebase.
"token",
"auth",
"pat",
# `git_config_value` is a substring of GIT_CONFIG_VALUE_0/_1/... — the env
# keys we use in bedrock/utils/git.py to pass an http.extraheader containing
# a base64-encoded PAT to git. On a clone/fetch failure those keys end up in
# frame locals (env, kwargs) which Sentry captures with with_locals=True.
# The default blocklist names don't substring-match the GIT_CONFIG_VALUE_*
# keys, so without this entry the encoded credential would survive into the
# Sentry payload.
"git_config_value",
]
SENTRY_IGNORE_ERRORS = (
BrokenPipeError,
Expand Down
84 changes: 78 additions & 6 deletions bedrock/utils/git.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
import base64
import contextlib
import os
from datetime import datetime
from hashlib import sha256
Expand All @@ -9,6 +11,7 @@
from shutil import rmtree
from subprocess import STDOUT, CalledProcessError, check_output
from time import time
from urllib.parse import urlparse

from django.conf import settings
from django.utils.encoding import force_str
Expand All @@ -19,28 +22,93 @@

GIT = getattr(settings, "GIT_BIN", "git")

# Conventional GitHub username for token-only auth (fine-grained PATs).
DEFAULT_AUTH_USERNAME = "x-access-token"


class GitRepo:
def __init__(self, path, remote_url=None, branch_name="main", name=None):
def __init__(self, path, remote_url=None, branch_name="main", name=None, auth=None):
self.path = Path(path)
self.path_str = str(self.path)
self.remote_url = remote_url
self.branch_name = branch_name
self.auth = auth or None
db_latest_key = f"{self.path_str}:{remote_url or ''}:{branch_name}"
self.db_latest_key = sha256(db_latest_key.encode()).hexdigest()
self.repo_name = name or self.path.name

def git(self, *args):
def git(self, *args, env=None):
"""Run a git command against the current repo"""
curdir = os.getcwd()
try:
os.chdir(self.path_str)
output = check_output((GIT,) + args, stderr=STDOUT)
kwargs = {"stderr": STDOUT}
if env is not None:
kwargs["env"] = {**os.environ, **env}
output = check_output((GIT,) + args, **kwargs)
finally:
os.chdir(curdir)

return force_str(output.strip())

def _split_auth(self):
"""Return (username, token) parsed from self.auth.

Accepts either ``"<username>:<token>"`` (matches the existing
``FLUENT_REPO_AUTH`` convention) or a bare ``"<token>"``. Returns
``(None, None)`` if no auth is configured.
"""
if not self.auth:
return None, None
if ":" in self.auth:
username, token = self.auth.split(":", 1)
return (username or DEFAULT_AUTH_USERNAME), token
return DEFAULT_AUTH_USERNAME, self.auth

@contextlib.contextmanager
def _auth_env(self):
"""Yield env overrides that authenticate git via HTTP Basic auth.

Uses git's ``GIT_CONFIG_COUNT`` / ``GIT_CONFIG_KEY_N`` /
``GIT_CONFIG_VALUE_N`` env-var protocol (git >= 2.31) to set
``http.extraheader`` for the duration of the subprocess. The token
lives only in the subprocess environment — it never enters argv,
git's error output, or the ``CalledProcessError`` payload that the
``@alert_sentry_on_exception`` decorator would otherwise ship.

Yields ``None`` when no auth is configured.
"""
if not self.auth:
yield None
return

username, token = self._split_auth()
basic = base64.b64encode(f"{username}:{token}".encode()).decode("ascii")
yield {
"GIT_CONFIG_COUNT": "1",
"GIT_CONFIG_KEY_0": self._extraheader_config_key(),
"GIT_CONFIG_VALUE_0": f"AUTHORIZATION: Basic {basic}",
"GIT_TERMINAL_PROMPT": "0",
}

def _extraheader_config_key(self):
"""Return the git config key for the http.extraheader setting,
scoped to the remote's scheme+host when possible.

Per git's URL-specific HTTP config rules, ``http.<url>.extraheader``
only applies to requests whose URL is prefix-matched by ``<url>``.
Scoping to e.g. ``http.https://github.com/.extraheader`` means
the ``Authorization`` header won't follow off-host redirects or
LFS fetches against unrelated hosts during the clone/fetch.

Falls back to the global ``http.extraheader`` if the remote URL
isn't usable HTTPS (e.g. SSH form, missing host).
"""
parsed = urlparse(self.remote_url or "")
if parsed.scheme in ("http", "https") and parsed.hostname:
return f"http.{parsed.scheme}://{parsed.hostname}/.extraheader"
return "http.extraheader"

@property
def current_hash(self):
"""The git revision ID (hash) of the current HEAD or None if no repo"""
Expand Down Expand Up @@ -102,13 +170,15 @@ def clone(self):
raise RuntimeError("remote_url required to clone")

self.path.mkdir(parents=True, exist_ok=True)
self.git("clone", "--depth", "1", "--branch", self.branch_name, self.remote_url, ".")
with self._auth_env() as env:
extra = {"env": env} if env else {}
self.git("clone", "--depth", "1", "--branch", self.branch_name, self.remote_url, ".", **extra)

def reclone(self):
"""Safely get a fresh clone of the repo"""
if self.path.exists():
new_path = self.path.with_suffix(f".{int(time())}")
new_repo = GitRepo(new_path, self.remote_url, self.branch_name)
new_repo = GitRepo(new_path, self.remote_url, self.branch_name, auth=self.auth)
new_repo.clone()
# only remove the old after the new clone succeeds
rmtree(self.path_str, ignore_errors=True)
Expand All @@ -121,7 +191,9 @@ def pull(self):

Return the previous hash and the new hash."""
old_hash = self.current_hash
self.git("fetch", "-f", self.remote_url, self.branch_name)
with self._auth_env() as env:
extra = {"env": env} if env else {}
self.git("fetch", "-f", self.remote_url, self.branch_name, **extra)
self.git("checkout", "-f", "FETCH_HEAD")
return old_hash, self.current_hash

Expand Down
Loading
Loading