From 9b7442119a8608fb048c128af3164878a80dc114 Mon Sep 17 00:00:00 2001 From: alastairong1 <177203013+alastairong1@users.noreply.github.com> Date: Thu, 7 May 2026 06:08:23 +0000 Subject: [PATCH] Add detailed health endpoint with raindex sync status (#95) ## Motivation `GET /health` only proves that the API process is up. It does not show whether raindex local DB sync is configured, actively syncing, ready for reads, or failing. The detailed endpoint gives monitoring and operators an on-demand snapshot without this API needing to know raindex local DB internals. ## Solution - Add `GET /health/detailed` while keeping the existing `GET /health` liveness response unchanged. - Bump `lib/rain.orderbook` to `0f2f3d19212e6682c70e52a335d66988a10083a6` and use the new `get_local_db_sync_snapshot()` API for raindex local DB sync status. - Report app DB health plus raindex sync state, including configured/healthy flags, scheduler state, network readiness, orderbook readiness, sync phase, last synced block, update timestamp, and errors. - Refactor the detailed health implementation to call `raindex.client().get_local_db_sync_snapshot().await` instead of opening/querying SQLite directly. - Remove the direct `rusqlite` dependency from this crate. - Add OpenAPI response types for raindex sync networks and orderbooks. - Map raindex component status as `active`, `syncing`, `failure`, or `not_configured`; top-level API health remains `ok`, `degraded`, or `error`. - Update local test fixtures for orderbook YAML spec version 5 and adjust small SDK API changes from the submodule bump. ## Chained PRs - **#96** (`alastair/ops-tooling`) is stacked on this PR and should be merged after this PR. ## Dependent PRs - **rainlanguage/raindex#2563** must merge before this PR. This PR bumps `lib/rain.orderbook` to a commit that depends on that raindex change. ## Checks - [x] `nix develop -c cargo fmt` - [x] `nix develop -c cargo test` - [x] `nix develop -c rainix-rs-static` Part of the deployed-state split tracked in #94. ## Summary by CodeRabbit * **New Features** * Added a detailed health endpoint providing structured status information about the application's database connectivity and synchronization components, including component-level metrics and error details. --- lib/rain.orderbook | 2 +- src/main.rs | 1 + src/routes/health.rs | 390 +++++++++++++++++++++++++++++++++++++++- src/routes/order/mod.rs | 2 +- src/types/health.rs | 163 ++++++++++++++++- 5 files changed, 551 insertions(+), 7 deletions(-) diff --git a/lib/rain.orderbook b/lib/rain.orderbook index 014016c..5d9b9e6 160000 --- a/lib/rain.orderbook +++ b/lib/rain.orderbook @@ -1 +1 @@ -Subproject commit 014016c699a2da4aa27d335bbf565e7347c2f762 +Subproject commit 5d9b9e6419eb0e1252686b17929959591eab6da7 diff --git a/src/main.rs b/src/main.rs index a7fa8f8..ec7bcdc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -53,6 +53,7 @@ enum StartupError { #[openapi( paths( routes::health::get_health, + routes::health::get_health_detailed, routes::tokens::get_tokens, routes::swap::post_swap_quote, routes::swap::post_swap_calldata, diff --git a/src/routes/health.rs b/src/routes/health.rs index d08d2ee..18fd8d5 100644 --- a/src/routes/health.rs +++ b/src/routes/health.rs @@ -1,8 +1,16 @@ +use crate::db::DbPool; use crate::error::ApiError; use crate::fairings::TracingSpan; -use crate::types::health::HealthResponse; +use crate::raindex::SharedRaindexProvider; +use crate::types::health::{ + DbHealthStatus, DbStatus, DetailedHealthResponse, HealthResponse, HealthStatus, + NetworkSyncInfo, OrderbookSyncInfo, RaindexSyncStatus, RaindexSyncStatusKind, +}; +use rain_orderbook_common::raindex_client::local_db::{ + LocalDbSyncSnapshot, NetworkSyncStatusSnapshot, OrderbookSyncStatusSnapshot, +}; use rocket::serde::json::Json; -use rocket::Route; +use rocket::{Route, State}; use tracing::Instrument; #[utoipa::path( @@ -18,13 +26,387 @@ pub async fn get_health(span: TracingSpan) -> Result, ApiEr async move { tracing::info!("request received"); Ok(Json(HealthResponse { - status: "ok".into(), + status: HealthStatus::Ok, })) } .instrument(span.0) .await } +#[utoipa::path( + get, + path = "/health/detailed", + tag = "Health", + responses( + (status = 200, description = "Detailed service health including sync status", body = DetailedHealthResponse), + ) +)] +#[get("/health/detailed")] +pub async fn get_health_detailed( + span: TracingSpan, + pool: &State, + shared_raindex: &State, +) -> Result, ApiError> { + async move { + tracing::info!("detailed health check request received"); + + tracing::info!("checking application database and raindex local database"); + let (app_db, raindex) = tokio::join!(check_app_db(pool), check_raindex_db(shared_raindex)); + + let status = detailed_status(&app_db, &raindex); + tracing::info!(status = ?status, "detailed health check completed"); + + Ok(Json(DetailedHealthResponse { + status, + app_db, + raindex, + })) + } + .instrument(span.0) + .await +} + +async fn check_app_db(pool: &DbPool) -> DbStatus { + match sqlx::query("SELECT 1").execute(pool).await { + Ok(_) => DbStatus { + status: DbHealthStatus::Ok, + connected: true, + error: None, + }, + Err(e) => { + tracing::warn!(error = %e, "app database health check failed"); + DbStatus { + status: DbHealthStatus::Error, + connected: false, + error: Some("application database unavailable".to_string()), + } + } + } +} + +async fn check_raindex_db(shared_raindex: &SharedRaindexProvider) -> RaindexSyncStatus { + let client = { + let raindex = shared_raindex.read().await; + raindex.client().clone() + }; + + match client.get_local_db_sync_snapshot().await { + Ok(snapshot) => map_raindex_snapshot(snapshot), + Err(e) => { + tracing::warn!(error = %e, "failed to get raindex local db sync snapshot"); + RaindexSyncStatus { + status: RaindexSyncStatusKind::Failure, + configured: false, + healthy: false, + error: Some("raindex local DB sync snapshot unavailable".to_string()), + networks: vec![], + orderbooks: vec![], + } + } + } +} + +fn map_raindex_snapshot(snapshot: LocalDbSyncSnapshot) -> RaindexSyncStatus { + let status = if snapshot.configured { + snapshot.status.into() + } else { + RaindexSyncStatusKind::NotConfigured + }; + + log_raindex_snapshot_errors(&snapshot); + + RaindexSyncStatus { + status, + configured: snapshot.configured, + healthy: snapshot.healthy, + error: raindex_error(&snapshot), + networks: snapshot + .networks + .into_iter() + .map(map_network_snapshot) + .collect(), + orderbooks: snapshot + .orderbooks + .into_iter() + .map(map_orderbook_snapshot) + .collect(), + } +} + +fn map_network_snapshot(snapshot: NetworkSyncStatusSnapshot) -> NetworkSyncInfo { + NetworkSyncInfo { + chain_id: snapshot.chain_id, + network_key: snapshot.network_key, + status: snapshot.status.into(), + orderbook_count: snapshot.orderbook_count, + ready: snapshot.ready, + error: snapshot.error.map(|_| "network sync failed".to_string()), + } +} + +fn map_orderbook_snapshot(snapshot: OrderbookSyncStatusSnapshot) -> OrderbookSyncInfo { + OrderbookSyncInfo { + chain_id: snapshot.ob_id.chain_id, + orderbook_address: format!("{:#x}", snapshot.ob_id.orderbook_address), + orderbook_key: snapshot.orderbook_key, + network_key: snapshot.network_key, + status: snapshot.status.into(), + ready: snapshot.ready, + phase_message: snapshot.phase_message, + last_synced_block: snapshot.last_synced_block, + updated_at: snapshot.updated_at, + error: snapshot.error.map(|_| "orderbook sync failed".to_string()), + } +} + +fn log_raindex_snapshot_errors(snapshot: &LocalDbSyncSnapshot) { + for network in &snapshot.networks { + if let Some(error) = &network.error { + tracing::warn!( + chain_id = network.chain_id, + network_key = network.network_key.as_deref(), + error = %error, + "raindex network sync failed" + ); + } + } + + for orderbook in &snapshot.orderbooks { + if let Some(error) = &orderbook.error { + tracing::warn!( + chain_id = orderbook.ob_id.chain_id, + orderbook_address = %format!("{:#x}", orderbook.ob_id.orderbook_address), + orderbook_key = orderbook.orderbook_key.as_deref(), + network_key = orderbook.network_key.as_deref(), + error = %error, + "raindex orderbook sync failed" + ); + } + } +} + +fn raindex_error(snapshot: &LocalDbSyncSnapshot) -> Option { + if !snapshot.healthy { + Some("raindex local DB sync is unhealthy".to_string()) + } else if !snapshot.configured { + Some("raindex local DB sync is not configured".to_string()) + } else { + None + } +} + +fn detailed_status(app_db: &DbStatus, raindex: &RaindexSyncStatus) -> HealthStatus { + if !app_db.connected || !raindex.healthy || raindex.status == RaindexSyncStatusKind::Failure { + HealthStatus::Error + } else if raindex.status == RaindexSyncStatusKind::NotConfigured + || raindex.status == RaindexSyncStatusKind::Syncing + { + HealthStatus::Degraded + } else { + HealthStatus::Ok + } +} + pub fn routes() -> Vec { - rocket::routes![get_health] + rocket::routes![get_health, get_health_detailed] +} + +#[cfg(test)] +mod tests { + use super::*; + use alloy::primitives::address; + use rain_orderbook_common::local_db::OrderbookIdentifier; + use rain_orderbook_common::raindex_client::local_db::{LocalDbStatus, SchedulerState}; + + #[test] + fn detailed_status_is_degraded_when_raindex_has_not_started() { + let app_db = DbStatus { + status: DbHealthStatus::Ok, + connected: true, + error: None, + }; + let raindex = RaindexSyncStatus { + status: RaindexSyncStatusKind::Syncing, + configured: true, + healthy: true, + error: None, + networks: vec![], + orderbooks: vec![], + }; + + assert_eq!(detailed_status(&app_db, &raindex), HealthStatus::Degraded); + } + + #[test] + fn detailed_status_is_error_when_app_db_is_down() { + let app_db = DbStatus { + status: DbHealthStatus::Error, + connected: false, + error: Some("db unavailable".to_string()), + }; + let raindex = RaindexSyncStatus { + status: RaindexSyncStatusKind::Active, + configured: true, + healthy: true, + error: None, + networks: vec![], + orderbooks: vec![], + }; + + assert_eq!(detailed_status(&app_db, &raindex), HealthStatus::Error); + } + + #[test] + fn detailed_status_is_degraded_when_raindex_is_not_configured() { + let app_db = DbStatus { + status: DbHealthStatus::Ok, + connected: true, + error: None, + }; + let raindex = RaindexSyncStatus { + status: RaindexSyncStatusKind::NotConfigured, + configured: false, + healthy: true, + error: Some("raindex local DB sync is not configured".to_string()), + networks: vec![], + orderbooks: vec![], + }; + + assert_eq!(detailed_status(&app_db, &raindex), HealthStatus::Degraded); + } + + #[test] + fn map_raindex_snapshot_preserves_network_and_orderbook_status() { + let orderbook_id = + OrderbookIdentifier::new(8453, address!("d2938e7c9fe3597f78832ce780feb61945c377d7")); + let snapshot = LocalDbSyncSnapshot::from_parts( + vec![NetworkSyncStatusSnapshot { + chain_id: 8453, + network_key: Some("base".to_string()), + status: LocalDbStatus::Active, + scheduler_state: SchedulerState::Leader, + orderbook_count: 1, + ready: true, + error: None, + }], + vec![OrderbookSyncStatusSnapshot { + ob_id: orderbook_id, + orderbook_key: Some("base-orderbook".to_string()), + network_key: Some("base".to_string()), + status: LocalDbStatus::Active, + scheduler_state: SchedulerState::Leader, + ready: true, + phase_message: None, + last_synced_block: Some(12_345_678), + updated_at: Some("2026-05-01 12:00:00".to_string()), + error: None, + }], + ); + + let raindex = map_raindex_snapshot(snapshot); + + assert_eq!(raindex.status, RaindexSyncStatusKind::Active); + assert!(raindex.configured); + assert!(raindex.healthy); + assert_eq!(raindex.networks.len(), 1); + assert_eq!(raindex.networks[0].network_key.as_deref(), Some("base")); + assert_eq!(raindex.orderbooks.len(), 1); + assert_eq!( + raindex.orderbooks[0].orderbook_address, + "0xd2938e7c9fe3597f78832ce780feb61945c377d7" + ); + assert_eq!(raindex.orderbooks[0].last_synced_block, Some(12_345_678)); + assert_eq!( + raindex.orderbooks[0].updated_at.as_deref(), + Some("2026-05-01 12:00:00") + ); + } + + #[test] + fn map_raindex_snapshot_sanitizes_sync_errors() { + let orderbook_id = + OrderbookIdentifier::new(8453, address!("d2938e7c9fe3597f78832ce780feb61945c377d7")); + let snapshot = LocalDbSyncSnapshot::from_parts( + vec![NetworkSyncStatusSnapshot { + chain_id: 8453, + network_key: Some("base".to_string()), + status: LocalDbStatus::Failure, + scheduler_state: SchedulerState::Leader, + orderbook_count: 1, + ready: false, + error: Some("sqlite: no such table sync_status".to_string()), + }], + vec![OrderbookSyncStatusSnapshot { + ob_id: orderbook_id, + orderbook_key: Some("base-orderbook".to_string()), + network_key: Some("base".to_string()), + status: LocalDbStatus::Failure, + scheduler_state: SchedulerState::Leader, + ready: false, + phase_message: None, + last_synced_block: None, + updated_at: None, + error: Some("provider url includes internal host".to_string()), + }], + ); + + let raindex = map_raindex_snapshot(snapshot); + + assert_eq!( + raindex.error.as_deref(), + Some("raindex local DB sync is unhealthy") + ); + assert_eq!( + raindex.networks[0].error.as_deref(), + Some("network sync failed") + ); + assert_eq!( + raindex.orderbooks[0].error.as_deref(), + Some("orderbook sync failed") + ); + } + + #[test] + fn detailed_response_does_not_expose_scheduler_state() { + let response = DetailedHealthResponse { + status: HealthStatus::Ok, + app_db: DbStatus { + status: DbHealthStatus::Ok, + connected: true, + error: None, + }, + raindex: RaindexSyncStatus { + status: RaindexSyncStatusKind::Active, + configured: true, + healthy: true, + error: None, + networks: vec![], + orderbooks: vec![], + }, + }; + + let serialized = match serde_json::to_value(response) { + Ok(value) => value, + Err(error) => panic!("detailed health response should serialize: {error}"), + }; + + assert!(serialized.get("scheduler_state").is_none()); + assert!(serialized["raindex"].get("scheduler_state").is_none()); + assert_eq!(serialized["status"], "ok"); + assert_eq!(serialized["app_db"]["status"], "ok"); + assert_eq!(serialized["raindex"]["status"], "active"); + } + + #[test] + fn map_raindex_snapshot_reports_not_configured() { + let raindex = map_raindex_snapshot(LocalDbSyncSnapshot::not_configured()); + + assert_eq!(raindex.status, RaindexSyncStatusKind::NotConfigured); + assert!(!raindex.configured); + assert!(raindex.healthy); + assert_eq!( + raindex.error.as_deref(), + Some("raindex local DB sync is not configured") + ); + } } diff --git a/src/routes/order/mod.rs b/src/routes/order/mod.rs index 55b1ad0..0cbb859 100644 --- a/src/routes/order/mod.rs +++ b/src/routes/order/mod.rs @@ -98,7 +98,7 @@ pub(crate) mod test_fixtures { pub fn stub_raindex_client() -> serde_json::Value { json!({ "orderbook_yaml": { - "documents": ["version: 4\nnetworks:\n base:\n rpcs:\n - https://mainnet.base.org\n chain-id: 8453\n currency: ETH\nsubgraphs:\n base: https://example.com/sg\norderbooks:\n base:\n address: 0xd2938e7c9fe3597f78832ce780feb61945c377d7\n network: base\n subgraph: base\n deployment-block: 0\ndeployers:\n base:\n address: 0xC1A14cE2fd58A3A2f99deCb8eDd866204eE07f8D\n network: base\n"], + "documents": ["version: 5\nnetworks:\n base:\n rpcs:\n - https://mainnet.base.org\n chain-id: 8453\n currency: ETH\nsubgraphs:\n base: https://example.com/sg\norderbooks:\n base:\n address: 0xd2938e7c9fe3597f78832ce780feb61945c377d7\n network: base\n subgraph: base\n deployment-block: 0\ndeployers:\n base:\n address: 0xC1A14cE2fd58A3A2f99deCb8eDd866204eE07f8D\n network: base\n"], "profile": "strict" } }) diff --git a/src/types/health.rs b/src/types/health.rs index a2d66f1..b6fe6ad 100644 --- a/src/types/health.rs +++ b/src/types/health.rs @@ -1,8 +1,169 @@ +use rain_orderbook_common::raindex_client::local_db::LocalDbStatus; use serde::{Deserialize, Serialize}; use utoipa::ToSchema; #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] pub struct HealthResponse { #[schema(example = "ok")] - pub status: String, + pub status: HealthStatus, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "snake_case")] +pub enum HealthStatus { + Ok, + Degraded, + Error, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct DetailedHealthResponse { + /// Overall API status: "ok", "degraded", or "error" + #[schema(example = "ok")] + pub status: HealthStatus, + + /// st0x application database connectivity + pub app_db: DbStatus, + + /// raindex local database sync status + pub raindex: RaindexSyncStatus, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct DbStatus { + /// Component status: "ok" or "error" + #[schema(example = "ok")] + pub status: DbHealthStatus, + + /// Whether the database is reachable + #[schema(example = true)] + pub connected: bool, + + /// Error message if not connected + #[serde(skip_serializing_if = "Option::is_none")] + pub error: Option, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "snake_case")] +pub enum DbHealthStatus { + Ok, + Error, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct RaindexSyncStatus { + /// Local DB sync status: "active", "syncing", "failure", or "not_configured" + #[schema(example = "active")] + pub status: RaindexSyncStatusKind, + + /// Whether local DB sync is configured in raindex settings. + #[schema(example = true)] + pub configured: bool, + + /// Whether raindex reports the local DB sync as healthy. + #[schema(example = true)] + pub healthy: bool, + + /// Error message if raindex sync status could not be read or is failing. + #[serde(skip_serializing_if = "Option::is_none")] + pub error: Option, + + /// Per-network sync status from raindex. + #[serde(skip_serializing_if = "Vec::is_empty", default)] + pub networks: Vec, + + /// Per-orderbook sync status from raindex. + #[serde(skip_serializing_if = "Vec::is_empty", default)] + pub orderbooks: Vec, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "snake_case")] +pub enum RaindexSyncStatusKind { + Active, + Syncing, + Failure, + NotConfigured, +} + +impl From for RaindexSyncStatusKind { + fn from(status: LocalDbStatus) -> Self { + match status { + LocalDbStatus::Active => Self::Active, + LocalDbStatus::Syncing => Self::Syncing, + LocalDbStatus::Failure => Self::Failure, + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct NetworkSyncInfo { + /// Chain ID (e.g. 8453 for Base) + #[schema(example = 8453)] + pub chain_id: u32, + + /// Network key from raindex settings. + #[serde(skip_serializing_if = "Option::is_none")] + pub network_key: Option, + + /// Network sync status: "active", "syncing", or "failure" + #[schema(example = "active")] + pub status: RaindexSyncStatusKind, + + /// Number of configured orderbooks on this network. + #[schema(example = 1)] + pub orderbook_count: usize, + + /// Whether the network is ready for local DB reads. + #[schema(example = true)] + pub ready: bool, + + /// Error message if this network failed. + #[serde(skip_serializing_if = "Option::is_none")] + pub error: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct OrderbookSyncInfo { + /// Chain ID (e.g. 8453 for Base) + #[schema(example = 8453)] + pub chain_id: u32, + + /// Orderbook contract address + #[schema(example = "0xd2938e7c9fe3597f78832ce780feb61945c377d7")] + pub orderbook_address: String, + + /// Orderbook key from raindex settings. + #[serde(skip_serializing_if = "Option::is_none")] + pub orderbook_key: Option, + + /// Network key from raindex settings. + #[serde(skip_serializing_if = "Option::is_none")] + pub network_key: Option, + + /// Orderbook sync status: "active", "syncing", or "failure" + #[schema(example = "active")] + pub status: RaindexSyncStatusKind, + + /// Whether this orderbook is ready for local DB reads. + #[schema(example = true)] + pub ready: bool, + + /// Current sync phase message, when syncing. + #[serde(skip_serializing_if = "Option::is_none")] + pub phase_message: Option, + + /// Last block number persisted by raindex for this orderbook. + #[schema(example = 12345678)] + #[serde(skip_serializing_if = "Option::is_none")] + pub last_synced_block: Option, + + /// Timestamp when raindex last updated the persisted sync status. + #[serde(skip_serializing_if = "Option::is_none")] + pub updated_at: Option, + + /// Error message if this orderbook failed. + #[serde(skip_serializing_if = "Option::is_none")] + pub error: Option, }