From 88419e1c4e57582c9dd1782fa8754d7fa6ee4de3 Mon Sep 17 00:00:00 2001 From: Micah Villmow <4211002+mvillmow@users.noreply.github.com> Date: Sat, 16 May 2026 12:35:26 -0700 Subject: [PATCH 1/9] feat(exporter): honor LOG_LEVEL env var for runtime log verbosity closes #267 --- exporter/exporter.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/exporter/exporter.py b/exporter/exporter.py index 5ecc4cb..27c3451 100644 --- a/exporter/exporter.py +++ b/exporter/exporter.py @@ -16,7 +16,11 @@ from http.server import BaseHTTPRequestHandler, HTTPServer from typing import Optional -logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") +# LOG_LEVEL env var (default INFO) controls log verbosity at runtime so +# operators can flip to DEBUG (e.g. for HTTP access logs) without a redeploy. +# Accepts standard logging level names: DEBUG, INFO, WARNING, ERROR, CRITICAL. +_LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO").upper() +logging.basicConfig(level=_LOG_LEVEL, format="%(asctime)s %(levelname)s %(message)s") log = logging.getLogger("homeric-exporter") AGAMEMNON_URL = os.environ.get("AGAMEMNON_URL", "http://172.20.0.1:8080") From 9a111aabb68a14b4eddd19db6af6afb5c91d9350 Mon Sep 17 00:00:00 2001 From: Micah Villmow <4211002+mvillmow@users.noreply.github.com> Date: Sat, 16 May 2026 12:35:35 -0700 Subject: [PATCH 2/9] fix(exporter): log warning in _health_check on caught exception closes #270 --- exporter/exporter.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/exporter/exporter.py b/exporter/exporter.py index 27c3451..630e8e1 100644 --- a/exporter/exporter.py +++ b/exporter/exporter.py @@ -71,7 +71,10 @@ def _health_check(url: str, ca_file: Optional[str] = None) -> int: ctx = _build_ssl_context(ca_file) r = urllib.request.urlopen(url, timeout=5, context=ctx) return 1 if r.status == 200 else 0 - except Exception: # broad catch: probe must never propagate + except Exception as e: # broad catch: probe must never propagate + # Log at DEBUG so operators can distinguish a misconfigured URL from + # a genuine upstream outage without changing the return-value contract. + log.debug("health_check %s failed: %s", url, e) return 0 From 2046bdc5120102df920abf8430cdaffdce5e13db Mon Sep 17 00:00:00 2001 From: Micah Villmow <4211002+mvillmow@users.noreply.github.com> Date: Sat, 16 May 2026 12:35:54 -0700 Subject: [PATCH 3/9] test(exporter): remove redundant test_returns_none_on_exception from TestFetch closes #271 --- tests/test_exporter.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/test_exporter.py b/tests/test_exporter.py index 46d6e94..1c0bc91 100644 --- a/tests/test_exporter.py +++ b/tests/test_exporter.py @@ -102,11 +102,6 @@ def test_propagates_unexpected_exception(self): with self.assertRaises(MemoryError): exporter_mod._fetch("http://fake/data") - def test_returns_none_on_exception(self): - with patch("urllib.request.urlopen", side_effect=_urlopen_raises): - result = exporter_mod._fetch("http://fake/data") - self.assertIsNone(result) - # --------------------------------------------------------------------------- # Helper: patch all seven upstream calls in collect() From 9535906b7b5ab4724ea09c1c108ddeabb18b69ce Mon Sep 17 00:00:00 2001 From: Micah Villmow <4211002+mvillmow@users.noreply.github.com> Date: Sat, 16 May 2026 12:36:05 -0700 Subject: [PATCH 4/9] fix(compose): use python stdlib probe for argus-exporter healthcheck closes #391 --- docker-compose.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index 9c9009f..066bc97 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -273,7 +273,9 @@ services: NESTOR_TLS_CA: "" NATS_TLS_CA: "" healthcheck: - test: ["CMD", "wget", "-qO-", "http://localhost:9100/health"] + # The python:3.11-slim image does not ship wget; use python stdlib instead. + test: ["CMD", "python", "-c", + "import urllib.request,sys; sys.exit(0 if urllib.request.urlopen('http://localhost:9100/health',timeout=5).status==200 else 1)"] interval: 30s timeout: 10s retries: 3 From 0207729ed9fb08907d77532a9b68529cc9c7c5fe Mon Sep 17 00:00:00 2001 From: Micah Villmow <4211002+mvillmow@users.noreply.github.com> Date: Sat, 16 May 2026 12:36:20 -0700 Subject: [PATCH 5/9] fix(scripts): use mktemp+trap in import-dashboards.sh response file closes #408 --- scripts/import-dashboards.sh | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/scripts/import-dashboards.sh b/scripts/import-dashboards.sh index 28e6d78..1400e5a 100755 --- a/scripts/import-dashboards.sh +++ b/scripts/import-dashboards.sh @@ -16,6 +16,11 @@ if [[ ! -d "$DASHBOARDS_DIR" ]]; then exit 1 fi +# Use a per-run mktemp file instead of a predictable /tmp path (avoids a +# race condition and minor info-leak on shared machines). Cleaned up on exit. +resp_file=$(mktemp) +trap 'rm -f "$resp_file"' EXIT + shopt -s nullglob files=("$DASHBOARDS_DIR"/*.json) @@ -28,17 +33,17 @@ for f in "${files[@]}"; do echo "Importing $(basename "$f") ..." payload=$(jq -n --slurpfile dash "$f" '{"dashboard": $dash[0], "overwrite": true, "folderId": 0}') http_code=$(curl -s --connect-timeout 5 -m 10 \ - -o /tmp/grafana_import_resp.json -w "%{http_code}" \ + -o "$resp_file" -w "%{http_code}" \ -u "$GRAFANA_AUTH" \ -H "Content-Type: application/json" \ -d "$payload" \ "${GRAFANA_URL}/api/dashboards/db") if [[ "$http_code" -lt 200 || "$http_code" -ge 300 ]]; then echo " -> ERROR: HTTP $http_code from Grafana API" >&2 - cat /tmp/grafana_import_resp.json >&2 + cat "$resp_file" >&2 exit 1 fi - status=$(jq -r '.status // "unknown"' /tmp/grafana_import_resp.json) + status=$(jq -r '.status // "unknown"' "$resp_file") echo " -> status: $status" done From a298a8a0dfb701a94646d7083efc79f227a857cb Mon Sep 17 00:00:00 2001 From: Micah Villmow <4211002+mvillmow@users.noreply.github.com> Date: Sat, 16 May 2026 12:36:55 -0700 Subject: [PATCH 6/9] ci(supply-chain): pin actions/checkout and actions/setup-go to commit SHAs closes #295 --- .github/workflows/ci.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6e8a468..c125af8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,7 +17,7 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v6 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Install pixi uses: prefix-dev/setup-pixi@v0.9.5 @@ -135,7 +135,7 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v6 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Install pixi run: curl -fsSL https://pixi.sh/install.sh | bash && echo "$HOME/.pixi/bin" >> $GITHUB_PATH @@ -160,10 +160,10 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v6 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Set up Go - uses: actions/setup-go@v5 + uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5 with: go-version-file: dashboard/go.mod cache-dependency-path: dashboard/go.sum @@ -192,7 +192,7 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v6 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Set up Python uses: actions/setup-python@v6 @@ -211,7 +211,7 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v6 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Set up Python uses: actions/setup-python@v6 @@ -230,7 +230,7 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v6 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Set up Python uses: actions/setup-python@v6 From 6e2311a5d3e1f64d5a2efaa4e190e8c2a6d1799b Mon Sep 17 00:00:00 2001 From: Micah Villmow <4211002+mvillmow@users.noreply.github.com> Date: Sat, 16 May 2026 12:37:14 -0700 Subject: [PATCH 7/9] test(exporter): assert old scrape_timestamp metric name is absent closes #425 --- tests/test_exporter.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/test_exporter.py b/tests/test_exporter.py index 1c0bc91..5df9827 100644 --- a/tests/test_exporter.py +++ b/tests/test_exporter.py @@ -266,6 +266,10 @@ def test_exporter_self_metrics_present(self): self.assertIn("homeric_exporter_fetch_errors", self.output) # Must not carry the _total counter suffix (gauge, not counter) self.assertNotIn("homeric_exporter_fetch_errors_total", self.output) + # Regression guard: the old (un-suffixed) name must not coexist with + # the canonical _seconds-suffixed metric (#425). Match on the trailing + # `{` to distinguish the bare name from `_seconds`. + self.assertNotIn("homeric_exporter_scrape_timestamp{", self.output) def test_nats_msg_metrics_use_gauge_names_not_total(self): """nats_in_msgs and nats_out_msgs must not carry the _total counter suffix.""" From 98f9bfe5ebce7d21b172ec82f06bcb67f551d983 Mon Sep 17 00:00:00 2001 From: Micah Villmow <4211002+mvillmow@users.noreply.github.com> Date: Sat, 16 May 2026 12:37:39 -0700 Subject: [PATCH 8/9] test(exporter): silence ThreadingHTTPServer access logs via Handler subclass closes #286 --- tests/test_exporter.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tests/test_exporter.py b/tests/test_exporter.py index 5df9827..7c4d9bc 100644 --- a/tests/test_exporter.py +++ b/tests/test_exporter.py @@ -310,10 +310,23 @@ def _make_handler(path: str) -> tuple: return handler, mock_server +class _SilentHandler(exporter_mod.Handler): + """Test-only Handler subclass that suppresses access log output (#286). + + The production Handler routes log_message to log.debug, which is silent at + the default INFO level but spams stderr if a developer flips LOG_LEVEL to + DEBUG while running the test suite. Override with a no-op so the in-process + fixture stays quiet regardless of the surrounding log config. + """ + + def log_message(self, fmt: str, *args: object) -> None: # type: ignore[override] + return + + @contextlib.contextmanager def _live_server(): """Spin up a real ThreadingHTTPServer on an ephemeral port; yield the port.""" - server = ThreadingHTTPServer(("127.0.0.1", 0), exporter_mod.Handler) + server = ThreadingHTTPServer(("127.0.0.1", 0), _SilentHandler) thread = threading.Thread(target=server.serve_forever, daemon=True) thread.start() try: From db8a8cf1b2ef02c2241cb64287c491c669b8774b Mon Sep 17 00:00:00 2001 From: Micah Villmow <4211002+mvillmow@users.noreply.github.com> Date: Sat, 16 May 2026 13:04:49 -0700 Subject: [PATCH 9/9] fix(compose): wrap argus-exporter healthcheck python probe under 120 cols The python stdlib healthcheck probe added in 9535906 produced a 141-char line that tripped 'Validate configs' yamllint (line-length max 120). Reformat the test: as a multi-line YAML sequence with a folded scalar so the Python one-liner is split across lines but folds back to a single logical command at runtime. Verified with yaml.safe_load: parsed healthcheck.test still equals the original 4-arg command list. --- docker-compose.yml | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 066bc97..0ddf435 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -274,8 +274,15 @@ services: NATS_TLS_CA: "" healthcheck: # The python:3.11-slim image does not ship wget; use python stdlib instead. - test: ["CMD", "python", "-c", - "import urllib.request,sys; sys.exit(0 if urllib.request.urlopen('http://localhost:9100/health',timeout=5).status==200 else 1)"] + # Probe split across imports + check to satisfy yamllint line-length:120. + test: + - "CMD" + - "python" + - "-c" + - >- + import urllib.request, sys; + r = urllib.request.urlopen('http://localhost:9100/health', timeout=5); + sys.exit(0 if r.status == 200 else 1) interval: 30s timeout: 10s retries: 3