Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions images/flashbox-l1.conf
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Include=shared/mkosi.conf
Include=modules/flashbox/common/mkosi.conf
Include=modules/flashbox/flashbox-l1/mkosi.conf
Include=modules/flashbox/observability/mkosi.conf

[Config]
Profiles=azure,gcp
Expand Down
1 change: 1 addition & 0 deletions images/flashbox-l2.conf
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Include=shared/mkosi.conf
Include=modules/flashbox/common/mkosi.conf
Include=modules/flashbox/flashbox-l2/mkosi.conf
Include=modules/flashbox/observability/mkosi.conf

[Config]
Profiles=gcp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,13 @@ drop_dst_ip() {
#
# `source` is not supported in dash
###########################################################################

. /etc/bob/firewall-config

# Observability rule (sourced only if the observability module is included
# in the image — it owns its own egress rule + env-file dependency).
[ -f /etc/bob/firewall-config-observability ] && . /etc/bob/firewall-config-observability

###########################################################################
# (6) Start in Maintenance Mode
###########################################################################
Expand Down
8 changes: 8 additions & 0 deletions modules/flashbox/observability/mkosi.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[Content]
ExtraTrees=modules/flashbox/observability/mkosi.extra
PostInstallationScripts=modules/flashbox/observability/mkosi.postinst

Packages=prometheus
prometheus-node-exporter
prometheus-process-exporter
gettext-base
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Observability drop-in for /etc/bob/firewall-config.d/
#
# Loaded by init-firewall.sh after the per-image firewall-config.
# Owns:
# - sourcing the metrics endpoint env file written by
# flashbox-observability-setup at boot
# - the egress allowlist for the metrics endpoint IPs

[ -f /etc/flashbox/observability.env ] && . /etc/flashbox/observability.env

if [ -n "${METRICS_ENDPOINTS:-}" ]; then
for ip in $METRICS_ENDPOINTS; do
accept_dst_ip_port $CHAIN_ALWAYS_OUT tcp "$ip" $HTTPS_PORT "Metrics endpoint (Flashbots)"
done
fi
Comment on lines +11 to +15
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know this is important but I have concerns about it from different angles:

  • it introduces dynamic IP allowlisting which deviates from the Flashbox L1 images having everything static and part of the measurements for attestation/verification purposes
  • should we consider dropping those opened endpoints manually for the searcher's rootless podman container as we do for couple always out endpoints in the init-container.sh? or what is the rational behind leaving that out? what's the impact if the searcher's container could reach those endpoints too beside the guest-os?
  • I recall you mentioned those IP endpoints might change, what is the process to update those and refresh the firewall rules at runtime? how invasive it is? does it have potential downtime? is it automated or manually triggered?

Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
process_names:
# Monitor the searcher container (conmon + all children via --children flag)
- name: "searcher-container"
cmdline:
- 'conmon.*searcher-container'
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
global:
scrape_interval: 15s
evaluation_interval: 15s
# Stamped onto every series sent via remote_write so the central
# Prometheus / AMP can distinguish samples coming from different
# flashbox VMs.
external_labels:
host: ${FLASHBOX_VM}

# Recording rules for aggregated metrics
rule_files:
- /etc/prometheus/recording_rules.yml

# Scrape configurations
scrape_configs:
# Node exporter on localhost
- job_name: 'node'
static_configs:
- targets: ['localhost:9100']
metric_relabel_configs:
# Only keep aggregated metrics for remote write
- source_labels: [__name__]
regex: 'node_(cpu|memory|disk|filesystem|network|vmstat)_.*'
action: keep

# Process exporter for container monitoring
- job_name: 'process'
static_configs:
- targets: ['localhost:9256']
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@

# Remote write configuration (dynamically configured)
remote_write:
# Flashbots endpoint
- url: ${METRICS_FLASHBOTS_URL}
write_relabel_configs:
# Only send flashbox: prefixed metrics
- source_labels: [__name__]
regex: 'flashbox:.*'
action: keep
sigv4:
region: ${METRICS_FLASHBOTS_REGION}
access_key: ${METRICS_FLASHBOTS_ACCESS_KEY}
secret_key: ${METRICS_FLASHBOTS_SECRET_KEY}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
groups:
# Base metrics — local: prefix means they stay inside the TEE
# (remote_write only forwards flashbox:*)
- name: local_container_metrics
interval: 30s
rules:
- record: local:container_cpu_percent
expr: sum(rate(namedprocess_namegroup_cpu_seconds_total{groupname=~".*searcher-container.*"}[5m])) * 100

# Forwarded metrics — flashbox: prefix, picked up by remote_write
- name: flashbox_health
interval: 30s
rules:
# Binary: 1 if process-exporter is up AND at least one process is in the
# searcher-container cgroup; 0 otherwise.
- record: flashbox:container_alive
expr: >
(up{job="process"} * on(instance) group_left(cgroup) namedprocess_namegroup_num_procs{groupname=~".*searcher-container.*"})
> bool 0

# Spike-guarded: current 15m avg must be under 80%,
# AND the 10m max ending 5m ago must have been under 70%
- record: flashbox:container_average_cpu_is_under_80_percent
expr: >
(avg_over_time(local:container_cpu_percent[15m]) < bool 80)
* (max_over_time(local:container_cpu_percent[10m] offset 5m) < bool 70)

- record: flashbox:container_oom_kills_count
expr: node_vmstat_oom_kill

- record: flashbox:disk_free_space_is_over_10_percent
expr: >
(node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}) > bool 0.1

- record: flashbox:disk_free_space_is_over_128_gb
expr: >
(node_filesystem_avail_bytes{mountpoint="/persistent"}) > bool (128 * 1024 * 1024 * 1024)

- record: flashbox:network_is_up
expr: >
(sum(rate(node_network_receive_bytes_total{device!~"lo"}[5m]))
+ sum(rate(node_network_transmit_bytes_total{device!~"lo"}[5m])))
> bool 0
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[Unit]
Description=Flashbox observability setup (fetch creds, render Prometheus config)
After=network-online.target
Wants=network-online.target

[Service]
Type=oneshot
ExecStart=/usr/bin/flashbox-observability-setup
RemainAfterExit=yes
StandardOutput=journal
StandardError=journal

[Install]
WantedBy=minimal.target
Comment thread
pablin-10 marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
[Unit]
Description=Prometheus Node Exporter
Documentation=https://github.com/prometheus/node_exporter
After=network-online.target
Wants=network-online.target

[Service]
Type=simple
User=prometheus
Group=prometheus
ExecStart=/usr/bin/prometheus-node-exporter \
--web.listen-address=127.0.0.1:9100 \
--collector.cpu \
--collector.meminfo \
--collector.diskstats \
--collector.filesystem \
--collector.netdev \
--collector.loadavg \
--no-collector.arp \
--no-collector.bcache \
--no-collector.bonding \
--no-collector.conntrack \
--no-collector.cpufreq \
--no-collector.edac \
--no-collector.entropy \
--no-collector.filefd \
--no-collector.hwmon \
--no-collector.infiniband \
--no-collector.ipvs \
--no-collector.mdadm \
--no-collector.netclass \
--no-collector.netstat \
--no-collector.nfs \
--no-collector.nfsd \
--no-collector.pressure \
--no-collector.rapl \
--no-collector.schedstat \
--no-collector.sockstat \
--no-collector.softnet \
--no-collector.stat \
--no-collector.textfile \
--no-collector.thermal_zone \
--no-collector.time \
--no-collector.timex \
--no-collector.udp_queues \
--no-collector.uname \
--collector.vmstat \
--no-collector.xfs \
--no-collector.zfs \
--no-collector.systemd \
--collector.filesystem.mount-points-exclude=^/(dev|proc|sys|run|var/lib/docker)($|/)
Restart=on-failure
RestartSec=5s

[Install]
WantedBy=minimal.target
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
[Unit]
Description=Prometheus Process Exporter
Documentation=https://github.com/ncabatoff/process-exporter
After=network-online.target searcher-container.service
Wants=network-online.target

[Service]
Type=simple
User=prometheus
Group=prometheus
ExecStart=/usr/bin/prometheus-process-exporter \
--web.listen-address=127.0.0.1:9256 \
--config.path=/etc/prometheus/process-exporter.yml \
--children
Restart=on-failure
RestartSec=5s

[Install]
WantedBy=minimal.target
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[Unit]
Description=Prometheus Monitoring System
Documentation=https://prometheus.io/docs/introduction/overview/
After=network-online.target flashbox-observability-setup.service
Wants=network-online.target

[Service]
Type=simple
User=prometheus
Group=prometheus
ExecStart=/usr/bin/prometheus \
--config.file=/etc/prometheus/prometheus.yml \
--storage.tsdb.path=/var/lib/prometheus/ \
--storage.tsdb.retention.time=24h \
--web.console.templates=/usr/share/prometheus/consoles \
--web.console.libraries=/usr/share/prometheus/console_libraries \
--web.listen-address=127.0.0.1:9090
ExecReload=/bin/kill -HUP $MAINPID
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this needed? doesnt systemd handle this automatically?

Restart=on-failure
RestartSec=5s

[Install]
WantedBy=minimal.target
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

out of curiosity, what is this used/needed for here inside the observability module itself?

Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[Unit]
After=flashbox-observability-setup.service
Wants=flashbox-observability-setup.service
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#!/bin/sh
set -eu -o pipefail

# Boot-time observability setup: fetch metrics creds from Vault, render
# /etc/prometheus/prometheus.yml, write the firewall env file, and pin the
# metrics endpoint's FQDN → IPs mapping in /etc/hosts so Prometheus can
# reconnect after the host firewall locks down (production mode blocks DNS).
#
# On any failure (Vault unreachable, missing secret, missing fields) this
# script writes sane defaults and exits 0. Boot must never fail because of
# observability.

OBSERVABILITY_DIR=/etc/flashbox
OBSERVABILITY_ENV_PATH="$OBSERVABILITY_DIR/observability.env"

PROMETHEUS_CONFIG=/etc/prometheus/prometheus.yml
PROMETHEUS_BASE=/etc/prometheus/prometheus-base.yml.tmpl
PROMETHEUS_REMOTE_WRITE=/etc/prometheus/prometheus-remote-write.yml.tmpl

HOSTS_MARKER=flashbox-observability

. /usr/lib/flashbox/vault.sh
. /usr/lib/flashbox/render.sh
. /usr/lib/flashbox/hosts.sh

mkdir -p "$OBSERVABILITY_DIR"

# Read the GCE instance name and expose it to prometheus-base.yml.tmpl's
# external_labels — every remote_write sample is stamped with `host=<vm>`
# so the central Prometheus can distinguish flashbox VMs.
FLASHBOX_VM=$(curl -sf --header "Metadata-Flavor: Google" \
"http://metadata/computeMetadata/v1/instance/name" || echo "unknown")
export FLASHBOX_VM

write_firewall_env() {
cat > "$OBSERVABILITY_ENV_PATH" <<EOF
METRICS_ENDPOINTS='${1:-}'
EOF
}

# Default state: base config rendered with external_labels, firewall closed,
# /etc/hosts sentinel block cleared. The success path below re-populates the
# block; until then any stale entries are removed.
render_template "$PROMETHEUS_CONFIG" "FLASHBOX_VM" "$PROMETHEUS_BASE"
write_firewall_env ""
hosts_clean_block "$HOSTS_MARKER"

# Local QEMU dev: skip Vault entirely.
if dmidecode -s system-manufacturer 2>/dev/null | grep -q "QEMU" && \
[ -f /etc/systemd/system/serial-console.service ]; then
echo "QEMU dev environment, skipping observability config fetch"
exit 0
fi

echo "Fetching observability config from Vault..."

if ! vault_fetch; then
echo "WARNING: could not fetch observability config from Vault, using defaults"
exit 0
fi

if [ -z "${METRICS_FLASHBOTS_URL:-}" ]; then
echo "No metrics URL configured, remote_write disabled"
exit 0
fi
Comment on lines +62 to +65
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wouldnt this always trigger an exit 0 here or where is "METRICS_FLASHBOTS_URL" being populated beforehand?


if [ -z "${METRICS_FLASHBOTS_REGION:-}" ] \
|| [ -z "${METRICS_FLASHBOTS_ACCESS_KEY:-}" ] \
|| [ -z "${METRICS_FLASHBOTS_SECRET_KEY:-}" ]; then
echo "WARNING: metrics URL set but SigV4 credentials are incomplete (need REGION, ACCESS_KEY, SECRET_KEY), remote_write disabled"
exit 0
fi

endpoints=$(resolve_to_ips "$METRICS_FLASHBOTS_URL")
host=$(url_to_host "$METRICS_FLASHBOTS_URL")
if [ -z "$endpoints" ]; then
echo "WARNING: could not resolve metrics URL host, firewall egress will not be opened — remote_write will be blocked"
fi

write_firewall_env "$endpoints"
hosts_write_block "$HOSTS_MARKER" "$host" "$endpoints"

render_template "$PROMETHEUS_CONFIG" \
"FLASHBOX_VM METRICS_FLASHBOTS_URL METRICS_FLASHBOTS_REGION METRICS_FLASHBOTS_ACCESS_KEY METRICS_FLASHBOTS_SECRET_KEY" \
"$PROMETHEUS_BASE" "$PROMETHEUS_REMOTE_WRITE"

echo "Observability config written (host: ${host:-none}, endpoints: ${endpoints:-none})"
Loading