From 6fb0ed82eeeb5ae00813af55d2625a62d6fb893d Mon Sep 17 00:00:00 2001 From: Micah Villmow <4211002+mvillmow@users.noreply.github.com> Date: Tue, 12 May 2026 19:43:57 -0700 Subject: [PATCH 1/3] security(promtail): redact URL-embedded credentials (#194) Adds two new replace stages to the hermes pipeline: - scheme://user:password@host -> scheme://@host - ?api_key=... / &access_token=... / &client_secret=... / etc -> Adds regression tests asserting both forms are stripped before logs leave promtail. Closes #194 Co-Authored-By: Claude Opus 4.7 (1M context) --- configs/promtail.yml | 9 ++++++++ tests/test_configs.py | 48 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/configs/promtail.yml b/configs/promtail.yml index 82467ca..83f4ea4 100644 --- a/configs/promtail.yml +++ b/configs/promtail.yml @@ -62,6 +62,15 @@ scrape_configs: - replace: expression: '(?i)(password[=:]\s*)\S+' replace: '${1}' + # URL-embedded credentials: scheme://user:password@host -> scheme://@host + - replace: + expression: '([a-zA-Z][a-zA-Z0-9+.\-]*://)[^\s/@:]+:[^\s/@]+@' + replace: '${1}@' + # Query-string API keys / tokens / secrets / passwords: + # ?api_key=abc, &access_token=xyz, &client_secret=..., &password=... + - replace: + expression: '(?i)([?&](?:api[_-]?key|access[_-]?token|auth[_-]?token|client[_-]?secret|secret|token|password|key)=)[^&\s#]+' + replace: '${1}' # Tail NATS server log - job_name: nats diff --git a/tests/test_configs.py b/tests/test_configs.py index d677542..870c866 100644 --- a/tests/test_configs.py +++ b/tests/test_configs.py @@ -171,6 +171,54 @@ def test_redaction_enabled_jobs_have_secret_patterns(self): f"{pat!r}; got expressions: {joined!r}" ) + def _hermes_replace_expressions(self) -> list[str]: + hermes_job = next( + (j for j in self.config["scrape_configs"] if j.get("job_name") == "hermes"), + None, + ) + assert hermes_job is not None, "hermes scrape job not found" + stages = hermes_job.get("pipeline_stages", []) + exprs: list[str] = [] + for stage in stages: + if isinstance(stage, dict) and "replace" in stage: + expr = stage["replace"].get("expression") + if expr: + exprs.append(expr) + return exprs + + def test_hermes_redacts_url_embedded_credentials(self): + """Issue #194: scheme://user:password@host must be redacted.""" + import re + + exprs = self._hermes_replace_expressions() + sample = "connecting to https://alice:hunter2@db.example.com/foo" + for expr in exprs: + sample = re.sub(expr, "", sample, flags=re.IGNORECASE) + assert "hunter2" not in sample, ( + f"URL-embedded password leaked through redaction stages: {sample!r}" + ) + assert "alice" not in sample, ( + f"URL-embedded username leaked through redaction stages: {sample!r}" + ) + + def test_hermes_redacts_query_string_api_keys(self): + """Issue #194: ?api_key= / &access_token= etc must be redacted.""" + import re + + exprs = self._hermes_replace_expressions() + cases = [ + ("GET /v1?api_key=abc123XYZ&name=alice", "abc123XYZ"), + ("url?access_token=tok_DEADBEEF&next=x", "tok_DEADBEEF"), + ("/auth?client_secret=shh_SECRET_42 done", "shh_SECRET_42"), + ] + for line, secret in cases: + redacted = line + for expr in exprs: + redacted = re.sub(expr, "", redacted, flags=re.IGNORECASE) + assert secret not in redacted, ( + f"query-string secret {secret!r} leaked: input={line!r} output={redacted!r}" + ) + class TestGrafanaDatasourcesConfig(unittest.TestCase): def setUp(self): From f18eafef06495ed4df6f55c7c11eb70f484fadce Mon Sep 17 00:00:00 2001 From: Micah Villmow <4211002+mvillmow@users.noreply.github.com> Date: Tue, 12 May 2026 19:48:41 -0700 Subject: [PATCH 2/3] security(promtail): extend URL-credential redaction to nats pipeline (#194) The nats scrape job gained credential redaction in #191 (merged after this branch diverged). Mirror the URL-embedded + query-string secret stages so the new pipeline has the same coverage as hermes. Generalise the regression tests to assert that every scrape job which performs credential redaction also strips URL-embedded credentials and query-string API keys. Refs #194 Co-Authored-By: Claude Opus 4.7 (1M context) --- configs/promtail.yml | 9 ++++++ tests/test_configs.py | 71 +++++++++++++++++++++++++++---------------- 2 files changed, 53 insertions(+), 27 deletions(-) diff --git a/configs/promtail.yml b/configs/promtail.yml index 83f4ea4..c8269be 100644 --- a/configs/promtail.yml +++ b/configs/promtail.yml @@ -97,3 +97,12 @@ scrape_configs: - replace: expression: '(?i)(password[=:]\s*)\S+' replace: '${1}' + # URL-embedded credentials: scheme://user:password@host -> scheme://@host + - replace: + expression: '([a-zA-Z][a-zA-Z0-9+.\-]*://)[^\s/@:]+:[^\s/@]+@' + replace: '${1}@' + # Query-string API keys / tokens / secrets / passwords: + # ?api_key=abc, &access_token=xyz, &client_secret=..., &password=... + - replace: + expression: '(?i)([?&](?:api[_-]?key|access[_-]?token|auth[_-]?token|client[_-]?secret|secret|token|password|key)=)[^&\s#]+' + replace: '${1}' diff --git a/tests/test_configs.py b/tests/test_configs.py index 870c866..eb80a52 100644 --- a/tests/test_configs.py +++ b/tests/test_configs.py @@ -171,13 +171,13 @@ def test_redaction_enabled_jobs_have_secret_patterns(self): f"{pat!r}; got expressions: {joined!r}" ) - def _hermes_replace_expressions(self) -> list[str]: - hermes_job = next( - (j for j in self.config["scrape_configs"] if j.get("job_name") == "hermes"), + def _replace_expressions(self, job_name: str) -> list[str]: + job = next( + (j for j in self.config["scrape_configs"] if j.get("job_name") == job_name), None, ) - assert hermes_job is not None, "hermes scrape job not found" - stages = hermes_job.get("pipeline_stages", []) + assert job is not None, f"{job_name} scrape job not found" + stages = job.get("pipeline_stages", []) exprs: list[str] = [] for stage in stages: if isinstance(stage, dict) and "replace" in stage: @@ -186,38 +186,55 @@ def _hermes_replace_expressions(self) -> list[str]: exprs.append(expr) return exprs - def test_hermes_redacts_url_embedded_credentials(self): - """Issue #194: scheme://user:password@host must be redacted.""" + def _jobs_with_redaction(self) -> list[str]: + """Return job_names that already perform credential redaction.""" + jobs: list[str] = [] + for j in self.config["scrape_configs"]: + stages = j.get("pipeline_stages") or [] + if any(isinstance(s, dict) and "replace" in s for s in stages): + jobs.append(j["job_name"]) + return jobs + + def test_url_embedded_credentials_redacted_in_all_redaction_pipelines(self): + """Issue #194: scheme://user:password@host must be redacted in every + pipeline that already does credential redaction.""" import re - exprs = self._hermes_replace_expressions() - sample = "connecting to https://alice:hunter2@db.example.com/foo" - for expr in exprs: - sample = re.sub(expr, "", sample, flags=re.IGNORECASE) - assert "hunter2" not in sample, ( - f"URL-embedded password leaked through redaction stages: {sample!r}" - ) - assert "alice" not in sample, ( - f"URL-embedded username leaked through redaction stages: {sample!r}" - ) + jobs = self._jobs_with_redaction() + assert jobs, "expected at least one job with redaction stages" + sample_in = "connecting to https://alice:hunter2@db.example.com/foo" + for job in jobs: + sample = sample_in + for expr in self._replace_expressions(job): + sample = re.sub(expr, "", sample, flags=re.IGNORECASE) + assert "hunter2" not in sample, ( + f"job {job!r}: URL-embedded password leaked: {sample!r}" + ) + assert "alice" not in sample, ( + f"job {job!r}: URL-embedded username leaked: {sample!r}" + ) - def test_hermes_redacts_query_string_api_keys(self): - """Issue #194: ?api_key= / &access_token= etc must be redacted.""" + def test_query_string_api_keys_redacted_in_all_redaction_pipelines(self): + """Issue #194: ?api_key= / &access_token= etc must be redacted in every + pipeline that already does credential redaction.""" import re - exprs = self._hermes_replace_expressions() + jobs = self._jobs_with_redaction() cases = [ ("GET /v1?api_key=abc123XYZ&name=alice", "abc123XYZ"), ("url?access_token=tok_DEADBEEF&next=x", "tok_DEADBEEF"), ("/auth?client_secret=shh_SECRET_42 done", "shh_SECRET_42"), ] - for line, secret in cases: - redacted = line - for expr in exprs: - redacted = re.sub(expr, "", redacted, flags=re.IGNORECASE) - assert secret not in redacted, ( - f"query-string secret {secret!r} leaked: input={line!r} output={redacted!r}" - ) + for job in jobs: + exprs = self._replace_expressions(job) + for line, secret in cases: + redacted = line + for expr in exprs: + redacted = re.sub(expr, "", redacted, flags=re.IGNORECASE) + assert secret not in redacted, ( + f"job {job!r}: query-string secret {secret!r} leaked: " + f"input={line!r} output={redacted!r}" + ) class TestGrafanaDatasourcesConfig(unittest.TestCase): From 6c797a721463dfb89c6cbb7a20babefc82c4d1a8 Mon Sep 17 00:00:00 2001 From: Micah Villmow <4211002+mvillmow@users.noreply.github.com> Date: Wed, 13 May 2026 07:15:16 -0700 Subject: [PATCH 3/3] fix(promtail): extend URL-cred + query-string redaction to syslog job (#194) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The rebased redaction pipelines on top of #190 (syslog secrets) and #191/#509 (NATS secrets) left the syslog job without URL-embedded credential and query-string secret redaction, while hermes and nats both had them. The test_url_embedded_credentials_redacted_in_all_redaction_pipelines test (which enumerates all jobs already doing replace-based redaction) demanded URL-cred handling for every such job — so syslog regressed. Also adds 'yamllint disable-line rule:line-length' to the three query-string regex lines, which intentionally exceed 120 chars because splitting an alternation across YAML lines would inject whitespace into the regex and break matching. Co-Authored-By: Claude Opus 4.7 (1M context) --- configs/promtail.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/configs/promtail.yml b/configs/promtail.yml index c8269be..76a67cc 100644 --- a/configs/promtail.yml +++ b/configs/promtail.yml @@ -36,6 +36,16 @@ scrape_configs: - replace: expression: '(?i)(password[=:]\s*)\S+' replace: '${1}' + # URL-embedded credentials: scheme://user:password@host -> scheme://@host + - replace: + expression: '([a-zA-Z][a-zA-Z0-9+.\-]*://)[^\s/@:]+:[^\s/@]+@' + replace: '${1}@' + # Query-string API keys / tokens / secrets / passwords: + # ?api_key=abc, &access_token=xyz, &client_secret=..., &password=... + - replace: + # yamllint disable-line rule:line-length + expression: '(?i)([?&](?:api[_-]?key|access[_-]?token|auth[_-]?token|client[_-]?secret|secret|token|password|key)=)[^&\s#]+' + replace: '${1}' # Tail Hermes application log - job_name: hermes @@ -69,6 +79,7 @@ scrape_configs: # Query-string API keys / tokens / secrets / passwords: # ?api_key=abc, &access_token=xyz, &client_secret=..., &password=... - replace: + # yamllint disable-line rule:line-length expression: '(?i)([?&](?:api[_-]?key|access[_-]?token|auth[_-]?token|client[_-]?secret|secret|token|password|key)=)[^&\s#]+' replace: '${1}' @@ -104,5 +115,6 @@ scrape_configs: # Query-string API keys / tokens / secrets / passwords: # ?api_key=abc, &access_token=xyz, &client_secret=..., &password=... - replace: + # yamllint disable-line rule:line-length expression: '(?i)([?&](?:api[_-]?key|access[_-]?token|auth[_-]?token|client[_-]?secret|secret|token|password|key)=)[^&\s#]+' replace: '${1}'