Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:

steps:
- name: Checkout
uses: actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

- name: Install pixi
uses: prefix-dev/setup-pixi@v0.9.5
Expand Down Expand Up @@ -135,7 +135,7 @@ jobs:

steps:
- name: Checkout
uses: actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

- name: Install pixi
run: curl -fsSL https://pixi.sh/install.sh | bash && echo "$HOME/.pixi/bin" >> $GITHUB_PATH
Expand All @@ -160,10 +160,10 @@ jobs:

steps:
- name: Checkout
uses: actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

- name: Set up Go
uses: actions/setup-go@v5
uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
with:
go-version-file: dashboard/go.mod
cache-dependency-path: dashboard/go.sum
Expand Down Expand Up @@ -192,7 +192,7 @@ jobs:

steps:
- name: Checkout
uses: actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

- name: Set up Python
uses: actions/setup-python@v6
Expand All @@ -211,7 +211,7 @@ jobs:

steps:
- name: Checkout
uses: actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

- name: Set up Python
uses: actions/setup-python@v6
Expand All @@ -230,7 +230,7 @@ jobs:

steps:
- name: Checkout
uses: actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

- name: Set up Python
uses: actions/setup-python@v6
Expand Down
11 changes: 10 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
networks:

Check warning on line 1 in docker-compose.yml

View workflow job for this annotation

GitHub Actions / Validate configs

1:1 [document-start] missing document start "---"

Check warning on line 1 in docker-compose.yml

View workflow job for this annotation

GitHub Actions / Validate configs

1:1 [document-start] missing document start "---"
argus:
driver: bridge
loki-internal:
Expand Down Expand Up @@ -273,7 +273,16 @@
NESTOR_TLS_CA: ""
NATS_TLS_CA: ""
healthcheck:
test: ["CMD", "wget", "-qO-", "http://localhost:9100/health"]
# The python:3.11-slim image does not ship wget; use python stdlib instead.
# Probe split across imports + check to satisfy yamllint line-length:120.
test:
- "CMD"
- "python"
- "-c"
- >-
import urllib.request, sys;
r = urllib.request.urlopen('http://localhost:9100/health', timeout=5);
sys.exit(0 if r.status == 200 else 1)
interval: 30s
timeout: 10s
retries: 3
Expand Down
11 changes: 9 additions & 2 deletions exporter/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,11 @@
from http.server import BaseHTTPRequestHandler, HTTPServer
from typing import Optional

logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
# LOG_LEVEL env var (default INFO) controls log verbosity at runtime so
# operators can flip to DEBUG (e.g. for HTTP access logs) without a redeploy.
# Accepts standard logging level names: DEBUG, INFO, WARNING, ERROR, CRITICAL.
_LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO").upper()
logging.basicConfig(level=_LOG_LEVEL, format="%(asctime)s %(levelname)s %(message)s")
log = logging.getLogger("homeric-exporter")

AGAMEMNON_URL = os.environ.get("AGAMEMNON_URL", "http://172.20.0.1:8080")
Expand Down Expand Up @@ -67,7 +71,10 @@ def _health_check(url: str, ca_file: Optional[str] = None) -> int:
ctx = _build_ssl_context(ca_file)
r = urllib.request.urlopen(url, timeout=5, context=ctx)
return 1 if r.status == 200 else 0
except Exception: # broad catch: probe must never propagate
except Exception as e: # broad catch: probe must never propagate
# Log at DEBUG so operators can distinguish a misconfigured URL from
# a genuine upstream outage without changing the return-value contract.
log.debug("health_check %s failed: %s", url, e)
return 0


Expand Down
11 changes: 8 additions & 3 deletions scripts/import-dashboards.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ if [[ ! -d "$DASHBOARDS_DIR" ]]; then
exit 1
fi

# Use a per-run mktemp file instead of a predictable /tmp path (avoids a
# race condition and minor info-leak on shared machines). Cleaned up on exit.
resp_file=$(mktemp)
trap 'rm -f "$resp_file"' EXIT

shopt -s nullglob
files=("$DASHBOARDS_DIR"/*.json)

Expand All @@ -28,17 +33,17 @@ for f in "${files[@]}"; do
echo "Importing $(basename "$f") ..."
payload=$(jq -n --slurpfile dash "$f" '{"dashboard": $dash[0], "overwrite": true, "folderId": 0}')
http_code=$(curl -s --connect-timeout 5 -m 10 \
-o /tmp/grafana_import_resp.json -w "%{http_code}" \
-o "$resp_file" -w "%{http_code}" \
-u "$GRAFANA_AUTH" \
-H "Content-Type: application/json" \
-d "$payload" \
"${GRAFANA_URL}/api/dashboards/db")
if [[ "$http_code" -lt 200 || "$http_code" -ge 300 ]]; then
echo " -> ERROR: HTTP $http_code from Grafana API" >&2
cat /tmp/grafana_import_resp.json >&2
cat "$resp_file" >&2
exit 1
fi
status=$(jq -r '.status // "unknown"' /tmp/grafana_import_resp.json)
status=$(jq -r '.status // "unknown"' "$resp_file")
echo " -> status: $status"
done

Expand Down
24 changes: 18 additions & 6 deletions tests/test_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,6 @@ def test_propagates_unexpected_exception(self):
with self.assertRaises(MemoryError):
exporter_mod._fetch("http://fake/data")

def test_returns_none_on_exception(self):
with patch("urllib.request.urlopen", side_effect=_urlopen_raises):
result = exporter_mod._fetch("http://fake/data")
self.assertIsNone(result)


# ---------------------------------------------------------------------------
# Helper: patch all seven upstream calls in collect()
Expand Down Expand Up @@ -271,6 +266,10 @@ def test_exporter_self_metrics_present(self):
self.assertIn("homeric_exporter_fetch_errors", self.output)
# Must not carry the _total counter suffix (gauge, not counter)
self.assertNotIn("homeric_exporter_fetch_errors_total", self.output)
# Regression guard: the old (un-suffixed) name must not coexist with
# the canonical _seconds-suffixed metric (#425). Match on the trailing
# `{` to distinguish the bare name from `_seconds`.
self.assertNotIn("homeric_exporter_scrape_timestamp{", self.output)

def test_nats_msg_metrics_use_gauge_names_not_total(self):
"""nats_in_msgs and nats_out_msgs must not carry the _total counter suffix."""
Expand Down Expand Up @@ -311,10 +310,23 @@ def _make_handler(path: str) -> tuple:
return handler, mock_server


class _SilentHandler(exporter_mod.Handler):
"""Test-only Handler subclass that suppresses access log output (#286).

The production Handler routes log_message to log.debug, which is silent at
the default INFO level but spams stderr if a developer flips LOG_LEVEL to
DEBUG while running the test suite. Override with a no-op so the in-process
fixture stays quiet regardless of the surrounding log config.
"""

def log_message(self, fmt: str, *args: object) -> None: # type: ignore[override]
return


@contextlib.contextmanager
def _live_server():
"""Spin up a real ThreadingHTTPServer on an ephemeral port; yield the port."""
server = ThreadingHTTPServer(("127.0.0.1", 0), exporter_mod.Handler)
server = ThreadingHTTPServer(("127.0.0.1", 0), _SilentHandler)
thread = threading.Thread(target=server.serve_forever, daemon=True)
thread.start()
try:
Expand Down
Loading