Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 33 additions & 1 deletion components/lif/mdr_utils/database_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
import re
from typing import AsyncGenerator
from urllib.parse import urlparse, urlunparse

from fastapi import HTTPException, Request, status
from lif.mdr_utils.logger_config import get_logger
Expand All @@ -15,8 +16,39 @@
logger = get_logger(__name__)


def _redact_url(url: str) -> str:
"""Mask the password in a SQLAlchemy connection URL for safe logging.

Returns the URL with the password replaced by ``***`` while preserving
scheme, username, host, port, and database. URLs without a password
are returned unchanged. Issue #938: the previous startup log emitted
the full URL including the credential, exposing the dev/demo DB
password to anyone with read access on the shared CloudWatch log
group.

Best-effort: this is only called for logging, so we never want it to
raise and take down MDR startup. Any parsing surprise (urlparse on
a malformed URL, ``parts.port`` raising on a non-integer port — which
happens when an env var was unset and the URL contains the literal
string ``None``) returns a sentinel so the log line still emits
something operator-readable.
"""
try:
parts = urlparse(url)
if not parts.password:
return url
user = parts.username or ""
host = parts.hostname or ""
netloc = f"{user}:***@{host}"
if parts.port:
netloc += f":{parts.port}"
return urlunparse(parts._replace(netloc=netloc))
except ValueError:
return "<unparseable-url>"


DATABASE_URL = f"postgresql+asyncpg://{os.getenv('POSTGRESQL_USER')}:{os.getenv('POSTGRESQL_PASSWORD')}@{os.getenv('POSTGRESQL_HOST')}:{os.getenv('POSTGRESQL_PORT')}/{os.getenv('POSTGRESQL_DB')}"
logger.info(f"DATABASE_URL : {DATABASE_URL}")
logger.info("DATABASE_URL : %s", _redact_url(DATABASE_URL))
# Create an async engine
engine = create_async_engine(DATABASE_URL, echo=True)

Expand Down
Empty file.
15 changes: 15 additions & 0 deletions test/components/lif/mdr_utils/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""Seed dummy env vars so importing `lif.mdr_utils.database_setup` doesn't
fail at engine-construction time when running these unit tests.

The tests in this directory exercise pure-Python helpers (e.g.
`_redact_url`); they never actually connect to a database. We just need
the module-level `DATABASE_URL` + `create_async_engine` calls to succeed.
"""

import os

os.environ.setdefault("POSTGRESQL_USER", "test")
os.environ.setdefault("POSTGRESQL_PASSWORD", "test")
os.environ.setdefault("POSTGRESQL_HOST", "localhost")
os.environ.setdefault("POSTGRESQL_PORT", "5432")
os.environ.setdefault("POSTGRESQL_DB", "test")
47 changes: 47 additions & 0 deletions test/components/lif/mdr_utils/test_database_setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""Unit tests for `database_setup._redact_url` — issue #938.

The full `database_setup` module imports SQLAlchemy/asyncpg/etc at import
time and tries to construct an engine from env vars, so we import the
helper directly rather than the whole module via the package surface."""

from lif.mdr_utils.database_setup import _redact_url


class TestRedactUrl:
def test_masks_password_in_typical_postgres_url(self):
url = "postgresql+asyncpg://postgres:s3cret!@dbhost.example:5432/mydb"
assert _redact_url(url) == "postgresql+asyncpg://postgres:***@dbhost.example:5432/mydb"

def test_masks_password_with_special_characters(self):
# Real dev password observed in CloudWatch had `:`, `}`, `&`, `<`, `$`
# in it. urlparse handles percent-encoding; here we use a
# representative literal to confirm the redaction doesn't choke.
url = "postgresql+asyncpg://postgres:p:%24sw0rd@host:5432/db"
redacted = _redact_url(url)
assert "p:%24sw0rd" not in redacted
assert "postgres:***@host:5432/db" in redacted

def test_url_without_port_still_redacts(self):
url = "postgresql+asyncpg://postgres:s3cret@host/db"
# No explicit port — netloc is just user:pass@host. Redacted form
# must drop the password but keep everything else.
redacted = _redact_url(url)
assert "s3cret" not in redacted
assert "postgres:***@host" in redacted
assert "/db" in redacted

def test_url_without_password_is_unchanged(self):
# IAM-auth style (no password), or local trust auth — we return
# the original string rather than mangling it.
url = "postgresql+asyncpg://postgres@host:5432/db"
assert _redact_url(url) == url

def test_unparseable_input_does_not_raise(self):
# The function is only ever called for logging; if it raises,
# MDR startup fails. Return a sentinel string instead so the
# log line still emits something operator-readable.
# urlparse is famously tolerant — `urlparse("")` returns an
# empty ParseResult rather than raising — so this test really
# documents the "no exception" guarantee.
assert _redact_url("") == ""
assert _redact_url("not-a-url") == "not-a-url"