diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6e8a468..c125af8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,7 +17,7 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v6 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Install pixi uses: prefix-dev/setup-pixi@v0.9.5 @@ -135,7 +135,7 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v6 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Install pixi run: curl -fsSL https://pixi.sh/install.sh | bash && echo "$HOME/.pixi/bin" >> $GITHUB_PATH @@ -160,10 +160,10 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v6 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Set up Go - uses: actions/setup-go@v5 + uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5 with: go-version-file: dashboard/go.mod cache-dependency-path: dashboard/go.sum @@ -192,7 +192,7 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v6 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Set up Python uses: actions/setup-python@v6 @@ -211,7 +211,7 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v6 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Set up Python uses: actions/setup-python@v6 @@ -230,7 +230,7 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v6 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Set up Python uses: actions/setup-python@v6 diff --git a/docker-compose.yml b/docker-compose.yml index 9c9009f..0ddf435 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -273,7 +273,16 @@ services: NESTOR_TLS_CA: "" NATS_TLS_CA: "" healthcheck: - test: ["CMD", "wget", "-qO-", "http://localhost:9100/health"] + # The python:3.11-slim image does not ship wget; use python stdlib instead. + # Probe split across imports + check to satisfy yamllint line-length:120. + test: + - "CMD" + - "python" + - "-c" + - >- + import urllib.request, sys; + r = urllib.request.urlopen('http://localhost:9100/health', timeout=5); + sys.exit(0 if r.status == 200 else 1) interval: 30s timeout: 10s retries: 3 diff --git a/exporter/exporter.py b/exporter/exporter.py index 5ecc4cb..630e8e1 100644 --- a/exporter/exporter.py +++ b/exporter/exporter.py @@ -16,7 +16,11 @@ from http.server import BaseHTTPRequestHandler, HTTPServer from typing import Optional -logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") +# LOG_LEVEL env var (default INFO) controls log verbosity at runtime so +# operators can flip to DEBUG (e.g. for HTTP access logs) without a redeploy. +# Accepts standard logging level names: DEBUG, INFO, WARNING, ERROR, CRITICAL. +_LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO").upper() +logging.basicConfig(level=_LOG_LEVEL, format="%(asctime)s %(levelname)s %(message)s") log = logging.getLogger("homeric-exporter") AGAMEMNON_URL = os.environ.get("AGAMEMNON_URL", "http://172.20.0.1:8080") @@ -67,7 +71,10 @@ def _health_check(url: str, ca_file: Optional[str] = None) -> int: ctx = _build_ssl_context(ca_file) r = urllib.request.urlopen(url, timeout=5, context=ctx) return 1 if r.status == 200 else 0 - except Exception: # broad catch: probe must never propagate + except Exception as e: # broad catch: probe must never propagate + # Log at DEBUG so operators can distinguish a misconfigured URL from + # a genuine upstream outage without changing the return-value contract. + log.debug("health_check %s failed: %s", url, e) return 0 diff --git a/scripts/import-dashboards.sh b/scripts/import-dashboards.sh index 28e6d78..1400e5a 100755 --- a/scripts/import-dashboards.sh +++ b/scripts/import-dashboards.sh @@ -16,6 +16,11 @@ if [[ ! -d "$DASHBOARDS_DIR" ]]; then exit 1 fi +# Use a per-run mktemp file instead of a predictable /tmp path (avoids a +# race condition and minor info-leak on shared machines). Cleaned up on exit. +resp_file=$(mktemp) +trap 'rm -f "$resp_file"' EXIT + shopt -s nullglob files=("$DASHBOARDS_DIR"/*.json) @@ -28,17 +33,17 @@ for f in "${files[@]}"; do echo "Importing $(basename "$f") ..." payload=$(jq -n --slurpfile dash "$f" '{"dashboard": $dash[0], "overwrite": true, "folderId": 0}') http_code=$(curl -s --connect-timeout 5 -m 10 \ - -o /tmp/grafana_import_resp.json -w "%{http_code}" \ + -o "$resp_file" -w "%{http_code}" \ -u "$GRAFANA_AUTH" \ -H "Content-Type: application/json" \ -d "$payload" \ "${GRAFANA_URL}/api/dashboards/db") if [[ "$http_code" -lt 200 || "$http_code" -ge 300 ]]; then echo " -> ERROR: HTTP $http_code from Grafana API" >&2 - cat /tmp/grafana_import_resp.json >&2 + cat "$resp_file" >&2 exit 1 fi - status=$(jq -r '.status // "unknown"' /tmp/grafana_import_resp.json) + status=$(jq -r '.status // "unknown"' "$resp_file") echo " -> status: $status" done diff --git a/tests/test_exporter.py b/tests/test_exporter.py index 46d6e94..7c4d9bc 100644 --- a/tests/test_exporter.py +++ b/tests/test_exporter.py @@ -102,11 +102,6 @@ def test_propagates_unexpected_exception(self): with self.assertRaises(MemoryError): exporter_mod._fetch("http://fake/data") - def test_returns_none_on_exception(self): - with patch("urllib.request.urlopen", side_effect=_urlopen_raises): - result = exporter_mod._fetch("http://fake/data") - self.assertIsNone(result) - # --------------------------------------------------------------------------- # Helper: patch all seven upstream calls in collect() @@ -271,6 +266,10 @@ def test_exporter_self_metrics_present(self): self.assertIn("homeric_exporter_fetch_errors", self.output) # Must not carry the _total counter suffix (gauge, not counter) self.assertNotIn("homeric_exporter_fetch_errors_total", self.output) + # Regression guard: the old (un-suffixed) name must not coexist with + # the canonical _seconds-suffixed metric (#425). Match on the trailing + # `{` to distinguish the bare name from `_seconds`. + self.assertNotIn("homeric_exporter_scrape_timestamp{", self.output) def test_nats_msg_metrics_use_gauge_names_not_total(self): """nats_in_msgs and nats_out_msgs must not carry the _total counter suffix.""" @@ -311,10 +310,23 @@ def _make_handler(path: str) -> tuple: return handler, mock_server +class _SilentHandler(exporter_mod.Handler): + """Test-only Handler subclass that suppresses access log output (#286). + + The production Handler routes log_message to log.debug, which is silent at + the default INFO level but spams stderr if a developer flips LOG_LEVEL to + DEBUG while running the test suite. Override with a no-op so the in-process + fixture stays quiet regardless of the surrounding log config. + """ + + def log_message(self, fmt: str, *args: object) -> None: # type: ignore[override] + return + + @contextlib.contextmanager def _live_server(): """Spin up a real ThreadingHTTPServer on an ephemeral port; yield the port.""" - server = ThreadingHTTPServer(("127.0.0.1", 0), exporter_mod.Handler) + server = ThreadingHTTPServer(("127.0.0.1", 0), _SilentHandler) thread = threading.Thread(target=server.serve_forever, daemon=True) thread.start() try: