diff --git a/.github/workflows/kernel-dataplane.yml b/.github/workflows/kernel-dataplane.yml index 9a50701c..1e9af4ef 100644 --- a/.github/workflows/kernel-dataplane.yml +++ b/.github/workflows/kernel-dataplane.yml @@ -193,6 +193,20 @@ jobs: topology: tests/interop/m52-fib-ecmp-relax-frr.clab.yml script: tests/interop/scripts/test-m52-fib-ecmp-relax-frr.sh + m58: + name: M58 — ADR-0061 FIB-table runtime CRUD (FRR 10.3.1) + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - uses: actions/checkout@v6 + - uses: ./.github/actions/setup-dataplane-host + - name: Run M58 (deploy + test + destroy with retry) + uses: ./.github/actions/run-interop-test + with: + label: M58 + topology: tests/interop/m58-fib-table-crud-frr.clab.yml + script: tests/interop/scripts/test-m58-fib-table-crud-frr.sh + m53: name: M53 — ADR-0069 BGP unnumbered + scoped FIB (FRR 10.3.1) runs-on: ubuntu-latest diff --git a/CHANGELOG.md b/CHANGELOG.md index a5dabef9..2588aee6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,20 @@ This project follows [Semantic Versioning](https://semver.org/spec/v2.0.0.html). Adding a range validates identically to config load (peer-group must exist and not enable BFD, valid prefix, no duplicate effective prefix); config-load validation now also rejects exact-duplicate effective prefixes. +- **Runtime `[[fib_tables]]` CRUD.** `SetFibTable` (create-or-replace by name, + tier `mutating`), `DeleteFibTable` (tier `mutating`), and `ListFibTables` + (tier `sensitive_read`) on `RibService`, with `rustbgpctl fib-table + {list,set,delete}`. `set` carries the full table definition (not a patch); + changing `table_id`/`metric` for an existing name is a table-key move (old + kernel rows withdraw, the new table back-fills). Edits hot-apply through the + ADR-0061 FIB reconciler and persist to the TOML config (atomic write) when + started with `--config`. The candidate is validated against the live config + before dispatch, persisted only after the reconciler acknowledges the exact + accepted set, and serialized with SIGHUP FIB reloads through one coordinator + lock — so runtime and on-disk config cannot drift. Requires the reconciler to + be running (at least one `[[fib_tables]]` entry at startup) — otherwise the + mutating RPCs return `FAILED_PRECONDITION` (enabling FIB from an empty config + is still restart-required). ### Changed diff --git a/ROADMAP.md b/ROADMAP.md index b5791ab6..a05daefa 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -102,7 +102,9 @@ Later for what remains. operator-confidence pieces).* ADR-0061/0066/0068 cover configured-table install, ECMP, per-class caps, `multipath_relax`, and Link Bandwidth weighting; the next pain points are lifecycle and scale, not base - capability. In scope now: hot-swap `[[fib_tables]]` (operator confidence). + capability. **Done:** hot-swap `[[fib_tables]]` without a restart — SIGHUP + soft-reload and gRPC/`rustbgpctl fib-table` CRUD (`SetFibTable` / + `DeleteFibTable` / `ListFibTables`), ack-gated with no runtime/config drift. Decide based on signal: over-cap detail APIs beyond the sampled `route_limit_exceeded` rows. Defer unless perf-gated or demanded: incremental equal-cost sibling index for wide full-table multipath; platform-diversity diff --git a/crates/api/src/authz.rs b/crates/api/src/authz.rs index fc06d966..9900f43e 100644 --- a/crates/api/src/authz.rs +++ b/crates/api/src/authz.rs @@ -412,6 +412,24 @@ pub const METHODS: &[GrpcMethodAuthz] = &[ "/rustbgpd.v1.RibService/ListFibRoutes", AuthTier::SensitiveRead, ), + method( + "rustbgpd.v1.RibService", + "SetFibTable", + "/rustbgpd.v1.RibService/SetFibTable", + AuthTier::Mutating, + ), + method( + "rustbgpd.v1.RibService", + "DeleteFibTable", + "/rustbgpd.v1.RibService/DeleteFibTable", + AuthTier::Mutating, + ), + method( + "rustbgpd.v1.RibService", + "ListFibTables", + "/rustbgpd.v1.RibService/ListFibTables", + AuthTier::SensitiveRead, + ), method( "rustbgpd.v1.RibService", "ListRouteEvents", @@ -707,7 +725,7 @@ mod tests { .collect::>(); assert_eq!(matrix_methods, proto_methods); - assert_eq!(METHODS.len(), 78); + assert_eq!(METHODS.len(), 81); } #[test] @@ -748,8 +766,8 @@ mod tests { #[test] fn method_matrix_tier_counts_match_inventory() { assert_eq!(method_count_by_tier(AuthTier::Read), 0); - assert_eq!(method_count_by_tier(AuthTier::SensitiveRead), 42); - assert_eq!(method_count_by_tier(AuthTier::Mutating), 17); + assert_eq!(method_count_by_tier(AuthTier::SensitiveRead), 43); + assert_eq!(method_count_by_tier(AuthTier::Mutating), 19); assert_eq!(method_count_by_tier(AuthTier::OperatorOnly), 19); } diff --git a/crates/api/src/lib.rs b/crates/api/src/lib.rs index 1792f942..863af265 100644 --- a/crates/api/src/lib.rs +++ b/crates/api/src/lib.rs @@ -29,7 +29,7 @@ mod peer_group_service; pub mod peer_types; mod policy_helpers; mod policy_service; -mod rib_service; +pub mod rib_service; pub mod server; pub use evpn_service::EvpnService; diff --git a/crates/api/src/peer_types.rs b/crates/api/src/peer_types.rs index 3df009ae..917c33ac 100644 --- a/crates/api/src/peer_types.rs +++ b/crates/api/src/peer_types.rs @@ -298,6 +298,34 @@ pub enum PeerManagerCommand { /// Reply channel returning redacted diff output only. reply: oneshot::Sender>, }, + /// Atomically validate a candidate `[[fib_tables]]` set against the live + /// runtime config (peer-group references, reserved/duplicate table ids, + /// families, ECMP caps) and, on success, stage it into + /// `current_config.fib_tables`. Used by the gRPC FIB-table CRUD control + /// path before it reaches the FIB reconciler. Validating and staging in one + /// command (the peer manager processes commands serially) closes the TOCTOU + /// against a concurrent peer-group deletion that would otherwise check a + /// snapshot that doesn't yet reflect the in-flight table's references. + StageFibTables { + /// The full candidate table set (already merged with the upsert/delete). + tables: Vec, + /// Reply: `Ok(())` if it validated and was staged, else `Err(msg)` + /// (nothing staged). + reply: oneshot::Sender>, + }, + /// Refresh the peer manager's runtime config snapshot with the accepted + /// `[[fib_tables]]` set after a successful gRPC CRUD mutation, so the + /// snapshot the live `DiffRuntimeConfig` compares against doesn't report + /// the just-applied set as a pending change. The control path awaits the + /// ack (while holding the FIB coordinator lock) so the snapshot is applied + /// before the mutation returns and before a concurrent SIGHUP reload can + /// run — keeping the two snapshot writers serialized. + SetFibTablesSnapshot { + /// The full accepted table set the FIB reconciler acknowledged. + tables: Vec, + /// Acknowledgement, sent after `current_config.fib_tables` is assigned. + reply: oneshot::Sender<()>, + }, /// Query a single peer's state by address. GetPeerState { /// Peer identity to query. @@ -886,11 +914,45 @@ pub struct PeerManagerNeighborConfig { pub export_policy: Option, } +/// Crate-boundary mirror of a binary `FibTableConfig` (`[[fib_tables]]`). +/// +/// `ConfigEvent` lives in this API crate but the binary owns the real config +/// types, so FIB-table persistence events carry this plain snapshot instead of +/// the binary struct. The binary's `apply_config_event` converts it back to a +/// `FibTableConfig`, validating the candidate config before assigning. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct FibTableSnapshot { + /// Operator-facing handle, unique across the table set. + pub name: String, + /// Linux route table id. + pub table_id: u32, + /// Kernel route metric / priority for daemon-owned rows. + pub metric: u32, + /// Address families eligible for install (empty = both unicast families). + pub families: Vec, + /// Optional peer-group allow-list (empty = all peer groups). + pub allowed_peer_groups: Vec, + /// Optional neighbor-address allow-list (empty = all neighbors). + pub allowed_neighbors: Vec, + /// Optional hard row cap (None = no cap). + pub max_routes: Option, + /// Optional ECMP caps (ADR-0066); None = single next-hop / fallback. + pub maximum_paths: Option, + /// Per-class eBGP ECMP cap; None falls back to `maximum_paths`. + pub maximum_paths_ebgp: Option, + /// Per-class iBGP ECMP cap; None falls back to `maximum_paths`. + pub maximum_paths_ibgp: Option, +} + /// A config persistence event sent after successful peer add/delete. /// /// The binary crate converts these into config file mutations. /// Kept simple — only the data the neighbor service already has. pub enum ConfigEvent { + /// The `[[fib_tables]]` set was replaced at runtime (gRPC FIB-table CRUD). + /// Carries the full accepted table set the FIB reconciler acknowledged, so + /// persistence writes exactly what the runtime applied. + FibTablesReplaced(Vec), /// A neighbor was successfully added at runtime. NeighborAdded(PeerManagerNeighborConfig), /// A neighbor was successfully deleted at runtime. diff --git a/crates/api/src/rib_service.rs b/crates/api/src/rib_service.rs index 5e28b6b9..17b90231 100644 --- a/crates/api/src/rib_service.rs +++ b/crates/api/src/rib_service.rs @@ -26,12 +26,78 @@ pub type BlackholeDiscardSnapshotFn = pub type FibRouteSnapshotFn = std::sync::Arc Vec + Send + Sync + 'static>; +/// One CRUD operation for the daemon FIB-table control hook (`[[fib_tables]]`). +/// +/// The binary-owned closure behind [`FibTableControlFn`] interprets these: +/// it reads the FIB reconciler's current table set, applies the op, validates +/// the candidate against the live config, hot-applies it through the reconciler, +/// and persists the accepted set — all under a coordinator lock shared with the +/// SIGHUP reload path so concurrent edits can't interleave. +pub enum FibTableControlRequest { + /// Create-or-replace a table by name (upsert); carries the full definition. + Set(proto::FibTableConfig), + /// Remove a table by name. + Delete { name: String }, + /// Read the current accepted table set + runtime availability. + List, +} + +/// Error returned by the daemon FIB-table control hook. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum FibTableControlError { + /// Candidate validation failed (bad `table_id` / family / caps / refs). + InvalidArgument(String), + /// Delete target name does not exist in the current set. + NotFound(String), + /// The FIB reconciler is not running: either no `[[fib_tables]]` entry was + /// present at startup (enabling FIB is restart-required) or the runtime is + /// unavailable (non-Linux platform / netlink setup failure). + FailedPrecondition(String), + /// The persistence channel is saturated or closed. + Unavailable(String), + /// Coordinator lock poisoned, actor channel closed, or other internal fault. + Internal(String), +} + +impl FibTableControlError { + fn into_status(self) -> Status { + match self { + Self::InvalidArgument(message) => Status::invalid_argument(message), + Self::NotFound(message) => Status::not_found(message), + Self::FailedPrecondition(message) => Status::failed_precondition(message), + Self::Unavailable(message) => Status::unavailable(message), + Self::Internal(message) => Status::internal(message), + } + } +} + +/// Future returned by the FIB-table control hook. +pub type FibTableControlFuture = Pin< + Box< + dyn std::future::Future> + + Send, + >, +>; +/// Daemon-owned hook that performs `[[fib_tables]]` CRUD. Wired in `main.rs` +/// where the binary config types, validator, FIB actor sender, and config +/// persistence channel are all visible; injected here so this API crate never +/// needs to reach across the crate boundary into the binary. +pub type FibTableControlFn = + std::sync::Arc FibTableControlFuture + Send + Sync + 'static>; + /// gRPC service for querying the RIB (received, best, advertised routes). pub struct RibService { rib_tx: mpsc::Sender, blackhole_discard_snapshot: BlackholeDiscardSnapshotFn, fib_route_snapshot: FibRouteSnapshotFn, metrics: BgpMetrics, + /// Per-listener access mode; gates the mutating FIB-table RPCs as + /// defense-in-depth behind the authz interceptor's `Mutating` tier. + access_mode: crate::server::AccessMode, + /// Daemon FIB-table CRUD hook. `None` when the service was built without + /// it (tests, or a build without FIB control) — the mutating RPCs then + /// return `FAILED_PRECONDITION`. + fib_table_control: Option, } impl RibService { @@ -43,6 +109,9 @@ impl RibService { blackhole_discard_snapshot: std::sync::Arc::new(Vec::new), fib_route_snapshot: std::sync::Arc::new(Vec::new), metrics: BgpMetrics::new(), + // Fail closed: callers opt into mutations via `with_fib_table_control`. + access_mode: crate::server::AccessMode::ReadOnly, + fib_table_control: None, } } @@ -74,9 +143,25 @@ impl RibService { blackhole_discard_snapshot, fib_route_snapshot, metrics, + access_mode: crate::server::AccessMode::ReadOnly, + fib_table_control: None, } } + /// Attach the per-listener access mode and the daemon FIB-table CRUD hook, + /// enabling `SetFibTable` / `DeleteFibTable` / `ListFibTables`. Without this + /// the mutating RPCs report the runtime as unavailable. + #[must_use] + pub fn with_fib_table_control( + mut self, + access_mode: crate::server::AccessMode, + fib_table_control: Option, + ) -> Self { + self.access_mode = access_mode; + self.fib_table_control = fib_table_control; + self + } + async fn query_routes(&self, peer: Option) -> Result, Status> { let (reply_tx, reply_rx) = oneshot::channel(); self.rib_tx @@ -1235,6 +1320,75 @@ impl proto::rib_service_server::RibService for RibService { Ok(Response::new(proto::ListEvpnResponse { routes })) } + + async fn set_fib_table( + &self, + request: Request, + ) -> Result, Status> { + if let Some(status) = crate::server::read_only_rejection(self.access_mode) { + return Err(status); + } + let table = request + .into_inner() + .table + .ok_or_else(|| Status::invalid_argument("set_fib_table: missing table definition"))?; + dispatch_fib_table_control( + self.fib_table_control.as_ref(), + FibTableControlRequest::Set(table), + ) + .await + } + + async fn delete_fib_table( + &self, + request: Request, + ) -> Result, Status> { + if let Some(status) = crate::server::read_only_rejection(self.access_mode) { + return Err(status); + } + let name = request.into_inner().name; + if name.trim().is_empty() { + return Err(Status::invalid_argument( + "delete_fib_table: table name is required", + )); + } + dispatch_fib_table_control( + self.fib_table_control.as_ref(), + FibTableControlRequest::Delete { name }, + ) + .await + } + + async fn list_fib_tables( + &self, + _request: Request, + ) -> Result, Status> { + // Read-only: no access-mode gate. The hook reports runtime availability + // even when the reconciler is not running (configured-but-not-started). + dispatch_fib_table_control( + self.fib_table_control.as_ref(), + FibTableControlRequest::List, + ) + .await + } +} + +/// Forward a FIB-table CRUD request to the daemon control hook, mapping the +/// hook's typed error to a gRPC `Status`. A missing hook means the daemon was +/// built without FIB-table control (tests / non-gRPC builds). +async fn dispatch_fib_table_control( + control: Option<&FibTableControlFn>, + request: FibTableControlRequest, +) -> Result, Status> { + let control = control.ok_or_else(|| { + Status::failed_precondition( + "FIB-table control is unavailable (the FIB reconciler was not started)", + ) + })?; + control(request) + .await + .map(Response::new) + .map_err(FibTableControlError::into_status) } #[expect(clippy::too_many_lines)] @@ -1629,6 +1783,62 @@ mod tests { RibService::new(tx) } + fn fib_table_proto(name: &str) -> proto::FibTableConfig { + proto::FibTableConfig { + name: name.to_string(), + table_id: 1000, + metric: 200, + ..Default::default() + } + } + + #[tokio::test] + async fn set_fib_table_rejected_when_read_only() { + // make_service() defaults to ReadOnly with no control hook. + let status = make_service() + .set_fib_table(Request::new(proto::SetFibTableRequest { + table: Some(fib_table_proto("edge")), + })) + .await + .unwrap_err(); + assert_eq!(status.code(), tonic::Code::PermissionDenied); + } + + #[tokio::test] + async fn set_fib_table_unavailable_without_control_hook() { + let (tx, _rx) = mpsc::channel(16); + let svc = + RibService::new(tx).with_fib_table_control(crate::server::AccessMode::ReadWrite, None); + let status = svc + .set_fib_table(Request::new(proto::SetFibTableRequest { + table: Some(fib_table_proto("edge")), + })) + .await + .unwrap_err(); + assert_eq!(status.code(), tonic::Code::FailedPrecondition); + } + + #[tokio::test] + async fn set_fib_table_missing_table_is_invalid_argument() { + let (tx, _rx) = mpsc::channel(16); + let svc = + RibService::new(tx).with_fib_table_control(crate::server::AccessMode::ReadWrite, None); + let status = svc + .set_fib_table(Request::new(proto::SetFibTableRequest { table: None })) + .await + .unwrap_err(); + assert_eq!(status.code(), tonic::Code::InvalidArgument); + } + + #[tokio::test] + async fn list_fib_tables_unavailable_without_control_hook() { + let status = make_service() + .list_fib_tables(Request::new(proto::ListFibTablesRequest {})) + .await + .unwrap_err(); + assert_eq!(status.code(), tonic::Code::FailedPrecondition); + } + fn list_routes_request() -> proto::ListRoutesRequest { proto::ListRoutesRequest { neighbor_address: String::new(), diff --git a/crates/api/src/server.rs b/crates/api/src/server.rs index 10860193..69d04595 100644 --- a/crates/api/src/server.rs +++ b/crates/api/src/server.rs @@ -108,6 +108,11 @@ pub struct ServeConfig { /// install state. Returns an empty list when no `[[fib_tables]]` /// are configured or before the actor's first reconcile pass. pub fib_route_snapshot: crate::rib_service::FibRouteSnapshotFn, + /// Daemon hook for runtime `[[fib_tables]]` CRUD (gRPC `SetFibTable` / + /// `DeleteFibTable` / `ListFibTables`). `None` disables the mutating RPCs + /// (they return `FAILED_PRECONDITION`). Wired in `main.rs` where the + /// FIB actor sender, validator, and config persistence are visible. + pub fib_table_control: Option, /// Live ADR-0061 per-route FIB dataplane event source. This is /// separate from the aggregate dataplane poller so route events /// are not delayed by snapshot polling. @@ -427,6 +432,7 @@ async fn run_listener( let evpn_duplicate_mac_clear = config.evpn_duplicate_mac_clear; let blackhole_discard_snapshot = config.blackhole_discard_snapshot; let fib_route_snapshot = config.fib_route_snapshot; + let fib_table_control = config.fib_table_control; let dataplane_route_events = config.dataplane_route_events; let bfd_session_snapshot = config.bfd_session_snapshot; let bfd_events = config.bfd_events; @@ -473,6 +479,7 @@ async fn run_listener( evpn_duplicate_mac_clear, blackhole_discard_snapshot, fib_route_snapshot, + fib_table_control, dataplane_route_events, bfd_session_snapshot, bfd_events, @@ -514,6 +521,7 @@ async fn run_listener( evpn_duplicate_mac_clear, blackhole_discard_snapshot, fib_route_snapshot, + fib_table_control, dataplane_route_events, bfd_session_snapshot, bfd_events, @@ -562,6 +570,7 @@ async fn run_tcp_listener( evpn_duplicate_mac_clear: Option, blackhole_discard_snapshot: crate::rib_service::BlackholeDiscardSnapshotFn, fib_route_snapshot: crate::rib_service::FibRouteSnapshotFn, + fib_table_control: Option, dataplane_route_events: Option>, bfd_session_snapshot: crate::bfd_service::BfdSessionSnapshotFn, bfd_events: Option>, @@ -615,7 +624,8 @@ async fn run_tcp_listener( blackhole_discard_snapshot.clone(), fib_route_snapshot.clone(), metrics.clone(), - ), + ) + .with_fib_table_control(access_mode, fib_table_control.clone()), interceptor.clone(), )); routes.add_service(EventServiceServer::with_interceptor( @@ -741,6 +751,7 @@ async fn run_uds_listener( evpn_duplicate_mac_clear: Option, blackhole_discard_snapshot: crate::rib_service::BlackholeDiscardSnapshotFn, fib_route_snapshot: crate::rib_service::FibRouteSnapshotFn, + fib_table_control: Option, dataplane_route_events: Option>, bfd_session_snapshot: crate::bfd_service::BfdSessionSnapshotFn, bfd_events: Option>, @@ -774,7 +785,8 @@ async fn run_uds_listener( blackhole_discard_snapshot.clone(), fib_route_snapshot.clone(), metrics.clone(), - ), + ) + .with_fib_table_control(access_mode, fib_table_control.clone()), interceptor.clone(), )); routes.add_service(EventServiceServer::with_interceptor( diff --git a/crates/cli/src/commands/fib_table.rs b/crates/cli/src/commands/fib_table.rs new file mode 100644 index 00000000..8100c280 --- /dev/null +++ b/crates/cli/src/commands/fib_table.rs @@ -0,0 +1,241 @@ +//! `rustbgpctl fib-table ...` — runtime `[[fib_tables]]` CRUD. +//! +//! Wraps RibService's `SetFibTable` / `DeleteFibTable` / `ListFibTables`. +//! `set` is a create-or-replace upsert keyed on table name (the request carries +//! the full table definition, not a patch); `delete` removes by name. Both +//! hot-apply through the FIB reconciler and persist to the TOML config, and +//! require the reconciler to be running (at least one table present at startup). + +use serde::Serialize; + +use crate::connection::Connection; +use crate::error::CliError; +use crate::proto::rib_service_client::RibServiceClient; +use crate::proto::{ + DeleteFibTableRequest, FibTableConfig, ListFibTablesRequest, ListFibTablesResponse, + SetFibTableRequest, +}; + +#[derive(Debug, Serialize)] +struct JsonFibTable { + name: String, + table_id: u32, + metric: u32, + families: Vec, + allowed_peer_groups: Vec, + allowed_neighbors: Vec, + max_routes: Option, + maximum_paths: Option, + maximum_paths_ebgp: Option, + maximum_paths_ibgp: Option, +} + +#[derive(Debug, Serialize)] +struct JsonFibTableList { + runtime_available: bool, + tables: Vec, +} + +impl From<&FibTableConfig> for JsonFibTable { + fn from(t: &FibTableConfig) -> Self { + Self { + name: t.name.clone(), + table_id: t.table_id, + metric: t.metric, + families: t.families.clone(), + allowed_peer_groups: t.allowed_peer_groups.clone(), + allowed_neighbors: t.allowed_neighbors.clone(), + max_routes: t.max_routes, + maximum_paths: t.maximum_paths, + maximum_paths_ebgp: t.maximum_paths_ebgp, + maximum_paths_ibgp: t.maximum_paths_ibgp, + } + } +} + +fn render(resp: &ListFibTablesResponse, json: bool) { + if json { + let out = JsonFibTableList { + runtime_available: resp.runtime_available, + tables: resp.tables.iter().map(JsonFibTable::from).collect(), + }; + println!( + "{}", + serde_json::to_string_pretty(&out).expect("failed to serialize FIB tables as JSON") + ); + return; + } + if !resp.runtime_available { + println!( + "FIB reconciler not running (no [[fib_tables]] at startup, or non-Linux / netlink \ + unavailable); a restart is required to start it." + ); + } + if resp.tables.is_empty() { + println!("No FIB tables configured"); + return; + } + println!( + "{:<16} {:<9} {:<7} {:<24} MAX_ROUTES", + "NAME", "TABLE_ID", "METRIC", "FAMILIES" + ); + for t in &resp.tables { + let families = if t.families.is_empty() { + "(default)".to_string() + } else { + t.families.join(",") + }; + let max_routes = t + .max_routes + .map_or_else(|| "-".to_string(), |m| m.to_string()); + println!( + "{:<16} {:<9} {:<7} {:<24} {}", + t.name, t.table_id, t.metric, families, max_routes + ); + } +} + +pub async fn list(connection: Connection, json: bool) -> Result<(), CliError> { + let mut client = + RibServiceClient::with_interceptor(connection.channel(), connection.interceptor()); + let resp = client + .list_fib_tables(ListFibTablesRequest {}) + .await? + .into_inner(); + render(&resp, json); + Ok(()) +} + +#[allow(clippy::too_many_arguments)] +pub async fn set( + connection: Connection, + name: &str, + table_id: u32, + metric: u32, + families: Vec, + allowed_peer_groups: Vec, + allowed_neighbors: Vec, + max_routes: Option, + maximum_paths: Option, + maximum_paths_ebgp: Option, + maximum_paths_ibgp: Option, + json: bool, +) -> Result<(), CliError> { + let mut client = + RibServiceClient::with_interceptor(connection.channel(), connection.interceptor()); + let resp = client + .set_fib_table(SetFibTableRequest { + table: Some(FibTableConfig { + name: name.to_string(), + table_id, + metric, + families, + allowed_peer_groups, + allowed_neighbors, + max_routes, + maximum_paths, + maximum_paths_ebgp, + maximum_paths_ibgp, + }), + }) + .await? + .into_inner(); + if json { + render(&resp, true); + } else { + println!( + "FIB table {name} applied ({} table(s) now active)", + resp.tables.len() + ); + } + Ok(()) +} + +pub async fn delete(connection: Connection, name: &str, json: bool) -> Result<(), CliError> { + let mut client = + RibServiceClient::with_interceptor(connection.channel(), connection.interceptor()); + let resp = client + .delete_fib_table(DeleteFibTableRequest { + name: name.to_string(), + }) + .await? + .into_inner(); + if json { + render(&resp, true); + } else { + println!( + "FIB table {name} deleted ({} table(s) now active)", + resp.tables.len() + ); + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::connection::connect; + use crate::test_support::spawn_mock_server; + + #[tokio::test] + async fn list_renders_tables() { + let server = spawn_mock_server(None).await; + let connection = connect(&server.addr, None).await.unwrap(); + list(connection, true).await.unwrap(); + } + + #[tokio::test] + async fn set_sends_full_table() { + let server = spawn_mock_server(None).await; + let connection = connect(&server.addr, None).await.unwrap(); + set( + connection, + "edge", + 1000, + 200, + vec!["ipv4_unicast".to_string()], + vec![], + vec![], + Some(50_000), + None, + None, + None, + true, + ) + .await + .unwrap(); + let captured = server + .state + .last_set_fib_table + .lock() + .await + .clone() + .unwrap(); + let table = captured.table.unwrap(); + assert_eq!(table.name, "edge"); + assert_eq!(table.table_id, 1000); + assert_eq!(table.metric, 200); + assert_eq!(table.families, vec!["ipv4_unicast"]); + assert_eq!(table.allowed_peer_groups, Vec::::new()); + assert_eq!(table.allowed_neighbors, Vec::::new()); + assert_eq!(table.max_routes, Some(50_000)); + assert_eq!(table.maximum_paths, None); + assert_eq!(table.maximum_paths_ebgp, None); + assert_eq!(table.maximum_paths_ibgp, None); + } + + #[tokio::test] + async fn delete_sends_name() { + let server = spawn_mock_server(None).await; + let connection = connect(&server.addr, None).await.unwrap(); + delete(connection, "edge", true).await.unwrap(); + let captured = server + .state + .last_delete_fib_table + .lock() + .await + .clone() + .unwrap(); + assert_eq!(captured.name, "edge"); + } +} diff --git a/crates/cli/src/commands/mod.rs b/crates/cli/src/commands/mod.rs index 666b5fb7..95ea1acf 100644 --- a/crates/cli/src/commands/mod.rs +++ b/crates/cli/src/commands/mod.rs @@ -3,6 +3,7 @@ pub mod config; pub mod control; pub mod dynamic_neighbor; pub mod evpn; +pub mod fib_table; pub mod flowspec; pub mod global; pub mod neighbor; diff --git a/crates/cli/src/main.rs b/crates/cli/src/main.rs index f0365a9b..3be2254d 100644 --- a/crates/cli/src/main.rs +++ b/crates/cli/src/main.rs @@ -243,6 +243,13 @@ enum Command { action: DynamicNeighborAction, }, + /// Manage `[[fib_tables]]` (ADR-0061 general unicast FIB export) at runtime. + /// Hot-applies through the FIB reconciler and persists to the config. + FibTable { + #[command(subcommand)] + action: FibTableAction, + }, + /// Generate shell completions Completions { /// Shell to generate completions for @@ -410,6 +417,50 @@ enum DynamicNeighborAction { }, } +#[derive(Subcommand)] +enum FibTableAction { + /// List the configured FIB tables and runtime availability + List, + /// Create or replace a FIB table by name (full definition, not a patch) + Set { + /// Table name (unique handle) + name: String, + /// Linux route table id + #[arg(long)] + table_id: u32, + /// Kernel route metric / priority for daemon-owned rows + #[arg(long)] + metric: u32, + /// Address families (comma-separated, e.g. ipv4_unicast,ipv6_unicast). + /// Empty defaults to both unicast families. + #[arg(long, value_delimiter = ',')] + families: Vec, + /// Peer-group allow-list (repeatable / comma-separated). Empty = all. + #[arg(long = "allowed-peer-group", value_delimiter = ',')] + allowed_peer_groups: Vec, + /// Neighbor-address allow-list (repeatable / comma-separated). Empty = all. + #[arg(long = "allowed-neighbor", value_delimiter = ',')] + allowed_neighbors: Vec, + /// Hard cap on eligible routes (rows). Unset = no cap. + #[arg(long)] + max_routes: Option, + /// Global ECMP cap (1..=256). Unset/1 = single next-hop. + #[arg(long)] + maximum_paths: Option, + /// Per-class eBGP ECMP cap (overrides maximum_paths for eBGP). + #[arg(long)] + maximum_paths_ebgp: Option, + /// Per-class iBGP ECMP cap (overrides maximum_paths for iBGP). + #[arg(long)] + maximum_paths_ibgp: Option, + }, + /// Delete a FIB table by name + Delete { + /// Table name to remove + name: String, + }, +} + #[derive(Subcommand)] enum NeighborAction { /// Add a new neighbor @@ -1645,6 +1696,40 @@ async fn run(cli: Cli) -> Result<(), CliError> { commands::dynamic_neighbor::delete(connection, &prefix, json).await } }, + Command::FibTable { action } => match action { + FibTableAction::List => commands::fib_table::list(connection, json).await, + FibTableAction::Set { + name, + table_id, + metric, + families, + allowed_peer_groups, + allowed_neighbors, + max_routes, + maximum_paths, + maximum_paths_ebgp, + maximum_paths_ibgp, + } => { + commands::fib_table::set( + connection, + &name, + table_id, + metric, + families, + allowed_peer_groups, + allowed_neighbors, + max_routes, + maximum_paths, + maximum_paths_ebgp, + maximum_paths_ibgp, + json, + ) + .await + } + FibTableAction::Delete { name } => { + commands::fib_table::delete(connection, &name, json).await + } + }, Command::Completions { .. } => unreachable!("handled before connect"), } } diff --git a/crates/cli/src/test_support.rs b/crates/cli/src/test_support.rs index b2f21143..3d6ba94d 100644 --- a/crates/cli/src/test_support.rs +++ b/crates/cli/src/test_support.rs @@ -40,6 +40,8 @@ pub(crate) struct MockState { pub(crate) last_add_dynamic_neighbor: Mutex>, pub(crate) last_delete_dynamic_neighbor: Mutex>, + pub(crate) last_set_fib_table: Mutex>, + pub(crate) last_delete_fib_table: Mutex>, pub(crate) last_set_global_import_chain: Mutex>, pub(crate) last_set_global_export_chain: @@ -767,6 +769,55 @@ impl rustbgpd_api::proto::rib_service_server::RibService for MockRibService { } Ok(Response::new(server_proto::ListEvpnResponse { routes })) } + + async fn set_fib_table( + &self, + request: Request, + ) -> Result, Status> { + let request = request.into_inner(); + let table = request.table.clone().unwrap_or_default(); + *self.state.last_set_fib_table.lock().await = Some(request); + Ok(Response::new(server_proto::ListFibTablesResponse { + tables: vec![table], + runtime_available: true, + })) + } + + async fn delete_fib_table( + &self, + request: Request, + ) -> Result, Status> { + *self.state.last_delete_fib_table.lock().await = Some(request.into_inner()); + Ok(Response::new(server_proto::ListFibTablesResponse { + tables: vec![], + runtime_available: true, + })) + } + + async fn list_fib_tables( + &self, + _request: Request, + ) -> Result, Status> { + Ok(Response::new(server_proto::ListFibTablesResponse { + tables: vec![mock_fib_table()], + runtime_available: true, + })) + } +} + +fn mock_fib_table() -> server_proto::FibTableConfig { + server_proto::FibTableConfig { + name: "edge".to_string(), + table_id: 1000, + metric: 200, + families: vec!["ipv4_unicast".to_string()], + allowed_peer_groups: vec![], + allowed_neighbors: vec![], + max_routes: None, + maximum_paths: None, + maximum_paths_ebgp: None, + maximum_paths_ibgp: None, + } } fn mock_evpn_route(route_type: u32) -> server_proto::EvpnRouteEntry { diff --git a/docs/API.md b/docs/API.md index 88572f1d..473c9cfd 100644 --- a/docs/API.md +++ b/docs/API.md @@ -150,7 +150,7 @@ for `grpc_authz` logs and the related Prometheus metrics live in | `NeighborService` | `ListNeighbors`, `GetNeighborState`, `ListDynamicNeighbors` | `AddNeighbor`, `DeleteNeighbor`, `EnableNeighbor`, `DisableNeighbor`, `SoftResetIn`, `AddDynamicNeighbor`, `DeleteDynamicNeighbor`, `SetGracefulShutdown` | | `PolicyService` | `ListPolicies`, `GetPolicy`, `ListNeighborSets`, `GetNeighborSet`, `GetGlobalPolicyChains`, `GetNeighborPolicyChains`, `ExplainImportPolicy` | `SetPolicy`, `DeletePolicy`, `SetNeighborSet`, `DeleteNeighborSet`, `SetGlobalImportChain`, `SetGlobalExportChain`, `ClearGlobalImportChain`, `ClearGlobalExportChain`, `SetNeighborImportChain`, `SetNeighborExportChain`, `ClearNeighborImportChain`, `ClearNeighborExportChain` | | `PeerGroupService` | `ListPeerGroups`, `GetPeerGroup` | `SetPeerGroup`, `DeletePeerGroup`, `SetNeighborPeerGroup`, `ClearNeighborPeerGroup` | -| `RibService` | All RPCs | None | +| `RibService` | All read/list/explain RPCs (incl. `ListFibTables`) | `SetFibTable`, `DeleteFibTable` | | `EventService` | All RPCs | None | | `EvpnService` | `GetEvpnRuntime`, `ListEvpnInstances`, `ListEvpnNexthops`, `ListIpVrfs`, `GetIpVrf` | `ClearDuplicateMacQuarantine`, `ApplyEvpnRuntime` | | `BfdService` | `GetBfdSessions` | None | @@ -283,8 +283,8 @@ added at runtime. | `EnableNeighbor` | Re-enable a previously disabled peer | | `DisableNeighbor` | Administratively disable a peer (sends NOTIFICATION) | | `SoftResetIn` | Request inbound route refresh (RFC 2918/7313) for one or more families | -| `AddDynamicNeighbor` | Reserved runtime add for a `[[dynamic_neighbors]]` range; currently returns `UNIMPLEMENTED` | -| `DeleteDynamicNeighbor` | Reserved runtime delete for a dynamic-neighbor range; currently returns `UNIMPLEMENTED` | +| `AddDynamicNeighbor` | Add a `[[dynamic_neighbors]]` prefix range at runtime; persists to the config (atomic write) when started with `--config` | +| `DeleteDynamicNeighbor` | Remove a dynamic-neighbor range at runtime (stops future accepts; established peers drain on Idle) | | `ListDynamicNeighbors` | List configured dynamic-neighbor ranges with active peer counts | | `SetGracefulShutdown` | RFC 8326 initiator toggle — attach the `GRACEFUL_SHUTDOWN` community to outbound updates for one peer (or all peers when `address` is empty) and clear with `clear = true` | @@ -540,6 +540,9 @@ Query the routing information base and subscribe to real-time route changes. | `ListEvpnRoutes` | EVPN routes (RFC 7432) in Loc-RIB view, filterable by route type / peer / RD | | `ListBlackholeDiscards` | RFC 7999 BLACKHOLE kernel-discard install status when `[global] honor_blackhole = true` and `[global] install_blackhole_discard = true` | | `ListFibRoutes` | ADR-0061 general unicast Linux FIB route status for configured `[[fib_tables]]` | +| `ListFibTables` | List the configured `[[fib_tables]]` and whether the FIB reconciler is running (`sensitive_read`) | +| `SetFibTable` | Create-or-replace a `[[fib_tables]]` entry by name (upsert; full definition, not a patch) at runtime; hot-applies through the reconciler and persists. Requires the reconciler running (≥1 table at startup) else `FAILED_PRECONDITION` (`mutating`) | +| `DeleteFibTable` | Remove a `[[fib_tables]]` entry by name at runtime; `NOT_FOUND` if absent (`mutating`) | | `ListRouteEvents` | Recent unicast route add / withdraw / best-change / export-policy-filtered event history from the bounded in-memory RIB ring | | `WatchRoutes` | Server-streaming: real-time route add / withdraw / best-change / export-policy-filtered events | | `WatchRouteEvents` | Server-streaming: real-time route add / withdraw / best-change / export-policy-filtered events wrapped as `BgpEvent`, including explicit lag warnings | diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 4eb42c7f..50f530d6 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -1752,6 +1752,28 @@ so adding the first table needs a restart (SIGHUP logs this and leaves the runtime unchanged). Deleting all tables at runtime is fine — the actor stays alive but idle, and re-adding a table later hot-applies. +**gRPC / CLI runtime CRUD** (same lifecycle rules as SIGHUP): `RibService` +exposes `SetFibTable` (create-or-replace by name — the request carries the full +table definition, not a patch), `DeleteFibTable`, and `ListFibTables`, surfaced +as `rustbgpctl fib-table {list,set,delete}`. A `set` that changes `table_id` or +`metric` for an existing name is a table-key move: the old kernel rows withdraw +and the new table back-fills. The candidate is validated against the live config +(reserved/duplicate ids, families, ECMP caps, peer-group references) before it +reaches the reconciler, applied through the same hot-reload path, and persisted +to the TOML config (atomic write) **only after** the reconciler acknowledges the +exact accepted set — and runtime CRUD is serialized with SIGHUP reloads through +one coordinator lock, so runtime and on-disk config cannot drift. The mutating +RPCs require the reconciler to be running (return `FAILED_PRECONDITION` +otherwise) and are tier `mutating`; `ListFibTables` is `sensitive_read` and also +reports whether the reconciler is running. + +```console +$ rustbgpctl fib-table set edge --table-id 1000 --metric 200 \ + --families ipv4_unicast,ipv6_unicast --max-routes 50000 +$ rustbgpctl fib-table list +$ rustbgpctl fib-table delete edge +``` + --- ## `[[evpn_instances]]` diff --git a/docs/grpc-method-inventory.json b/docs/grpc-method-inventory.json index e56728b8..2a9cc35e 100644 --- a/docs/grpc-method-inventory.json +++ b/docs/grpc-method-inventory.json @@ -6,11 +6,11 @@ "additional_protos": [ "proto/github.com/openconfig/gnmi/proto/gnmi/gnmi.proto" ], - "method_count": 78, + "method_count": 81, "tier_counts": { "read": 0, - "sensitive_read": 42, - "mutating": 17, + "sensitive_read": 43, + "mutating": 19, "operator_only": 19 }, "methods": [ @@ -64,6 +64,9 @@ { "service": "rustbgpd.v1.RibService", "method": "ExplainBestPath", "path": "/rustbgpd.v1.RibService/ExplainBestPath", "tier": "sensitive_read" }, { "service": "rustbgpd.v1.RibService", "method": "ListBlackholeDiscards", "path": "/rustbgpd.v1.RibService/ListBlackholeDiscards", "tier": "sensitive_read" }, { "service": "rustbgpd.v1.RibService", "method": "ListFibRoutes", "path": "/rustbgpd.v1.RibService/ListFibRoutes", "tier": "sensitive_read" }, + { "service": "rustbgpd.v1.RibService", "method": "SetFibTable", "path": "/rustbgpd.v1.RibService/SetFibTable", "tier": "mutating" }, + { "service": "rustbgpd.v1.RibService", "method": "DeleteFibTable", "path": "/rustbgpd.v1.RibService/DeleteFibTable", "tier": "mutating" }, + { "service": "rustbgpd.v1.RibService", "method": "ListFibTables", "path": "/rustbgpd.v1.RibService/ListFibTables", "tier": "sensitive_read" }, { "service": "rustbgpd.v1.RibService", "method": "ListRouteEvents", "path": "/rustbgpd.v1.RibService/ListRouteEvents", "tier": "sensitive_read" }, { "service": "rustbgpd.v1.RibService", "method": "WatchRoutes", "path": "/rustbgpd.v1.RibService/WatchRoutes", "tier": "sensitive_read" }, { "service": "rustbgpd.v1.RibService", "method": "WatchRouteEvents", "path": "/rustbgpd.v1.RibService/WatchRouteEvents", "tier": "sensitive_read" }, diff --git a/docs/grpc-method-inventory.md b/docs/grpc-method-inventory.md index c1b1fd2d..a79fb13e 100644 --- a/docs/grpc-method-inventory.md +++ b/docs/grpc-method-inventory.md @@ -118,7 +118,7 @@ shape itself does not raise the tier. | `SetNeighborPeerGroup` | `mutating` | Single-neighbor reassignment. | | `ClearNeighborPeerGroup` | `mutating` | Single-neighbor. | -### RibService (12 RPCs) +### RibService (15 RPCs) | RPC | Tier | Notes | |-----|------|-------| @@ -129,6 +129,9 @@ shape itself does not raise the tier. | `ExplainBestPath` | `sensitive_read` | Per-route best-path tie-break trace. | | `ListBlackholeDiscards` | `sensitive_read` | Per-discard reasons; exposes RFC 7999 BLACKHOLE community installations. | | `ListFibRoutes` | `sensitive_read` | ADR-0061 FIB status snapshot — exposes which routes are installed in the kernel. | +| `SetFibTable` | `mutating` | Create-or-replace a `[[fib_tables]]` entry by name; hot-applies via the reconciler and persists. Programs kernel route tables. | +| `DeleteFibTable` | `mutating` | Remove a `[[fib_tables]]` entry by name; withdraws its kernel rows. | +| `ListFibTables` | `sensitive_read` | Configured FIB table set + runtime availability. | | `ListRouteEvents` | `sensitive_read` | Bounded route-event history. | | `WatchRoutes` (stream) | `sensitive_read` | Live route-event stream. Streaming shape; same data as `ListRouteEvents`. | | `WatchRouteEvents` (stream) | `sensitive_read` | Enveloped live route-event stream with explicit `stream_lagged` signals. | @@ -197,12 +200,12 @@ shape itself does not raise the tier. | Tier | Count | % | |------|------:|--:| | `read` | 0 | 0.0% | -| `sensitive_read` | 42 | 53.8% | -| `mutating` | 17 | 21.8% | -| `operator_only` | 19 | 24.4% | -| **Total** | **78** | **100%** | +| `sensitive_read` | 43 | 53.1% | +| `mutating` | 19 | 23.5% | +| `operator_only` | 19 | 23.5% | +| **Total** | **81** | **100%** | -(Counts treat `SetGracefulShutdown` as one RPC even though it appears once in `NeighborService`; the 78 total is 74 native `rustbgpd.v1` RPCs plus 4 `gnmi.gNMI` RPCs.) +(Counts treat `SetGracefulShutdown` as one RPC even though it appears once in `NeighborService`; the 81 total is 77 native `rustbgpd.v1` RPCs plus 4 `gnmi.gNMI` RPCs.) ## Notes for ADR-0064 @@ -278,7 +281,7 @@ specific method if the model warrants it. ## Code matrix -`crates/api/src/authz.rs` contains the same 78-method classification +`crates/api/src/authz.rs` contains the same 81-method classification as a static Rust table. `docs/grpc-method-inventory.json` is the machine-readable export for auditors, tooling, and generated clients. The `authz` tests parse `proto/rustbgpd.proto` and fail if a new RPC is added diff --git a/docs/milestones.md b/docs/milestones.md index b81ed087..77d0aa62 100644 --- a/docs/milestones.md +++ b/docs/milestones.md @@ -761,6 +761,7 @@ runners can't sustain: | **M50** | ADR-0066 unicast multipath/ECMP FIB install. Two FRR peers in the same AS each originate the same prefix; rustbgpd with `[[fib_tables]] maximum_paths=2` installs a kernel `RTA_MULTIPATH` route with both gateways, collapses to the survivor when one path withdraws, and restores the two-way ECMP when it returns. | `kernel-dataplane` CI (GitHub-hosted). Script: `test-m50-fib-ecmp-frr.sh`. | | **M51** | ADR-0067 single-hop async BFD + RFC 5882 coupling against FRR `bfdd`. Validates BGP Established + BFD Up from both sides (`GetBfdSessions` + `show bfd peers`), then kills `bfdd` and asserts the coupling tears BGP down faster than the 90 s hold timer, and that BFD + BGP recover once `watchfrr` restarts `bfdd`. | `kernel-dataplane` CI (GitHub-hosted). Script: `test-m51-bfd-frr.sh`. | | **M52** | ADR-0066 multipath-relax. Two FRR peers in different ASes (65002 / 65003) originate the same prefix with equal `AS_PATH` length; rustbgpd with `[global] multipath_relax=true` + `maximum_paths=2` co-installs them as a kernel ECMP route (exact-`AS_PATH` grouping would not), collapses to the survivor on withdraw, and restores ECMP on re-advertise. | `kernel-dataplane` CI (GitHub-hosted). Script: `test-m52-fib-ecmp-relax-frr.sh`. | +| **M58** | ADR-0061 FIB-table runtime CRUD. Drives `SetFibTable` / `DeleteFibTable` / `ListFibTables` against a real kernel: runtime table add, `table_id`/`metric` key-move (old rows withdraw, new install), persist-across-restart, delete withdraws only its rows, and `NOT_FOUND` on a missing name. Companion to M42 (startup FIB path). | `kernel-dataplane` CI (GitHub-hosted). Script: `test-m58-fib-table-crud-frr.sh`. | --- diff --git a/proto/rustbgpd.proto b/proto/rustbgpd.proto index f71d4983..5e82777c 100644 --- a/proto/rustbgpd.proto +++ b/proto/rustbgpd.proto @@ -749,6 +749,14 @@ service RibService { rpc ExplainBestPath(ExplainBestPathRequest) returns (ExplainBestPathResponse); rpc ListBlackholeDiscards(ListBlackholeDiscardsRequest) returns (ListBlackholeDiscardsResponse); rpc ListFibRoutes(ListFibRoutesRequest) returns (ListFibRoutesResponse); + // Runtime CRUD for `[[fib_tables]]` (ADR-0061). Hot-applies through the FIB + // reconciler and persists to the TOML config, mirroring dynamic-neighbor + // CRUD. SetFibTable is a create-or-replace upsert keyed on table name; + // DeleteFibTable removes by name. Both require the reconciler to be running + // (at least one table present at startup) — otherwise FAILED_PRECONDITION. + rpc SetFibTable(SetFibTableRequest) returns (ListFibTablesResponse); + rpc DeleteFibTable(DeleteFibTableRequest) returns (ListFibTablesResponse); + rpc ListFibTables(ListFibTablesRequest) returns (ListFibTablesResponse); rpc ListRouteEvents(ListRouteEventsRequest) returns (ListRouteEventsResponse); rpc WatchRoutes(WatchRoutesRequest) returns (stream RouteEvent); rpc WatchRouteEvents(WatchRoutesRequest) returns (stream BgpEvent); @@ -1066,6 +1074,56 @@ message ListFibRoutesResponse { uint64 total_count = 3; } +// Full mirror of a `[[fib_tables]]` config block. SetFibTable carries the +// complete definition (not a patch), so defaults and validation stay legible. +message FibTableConfig { + // Operator-facing handle, unique across the table set. + string name = 1; + // Linux route table id (reserved ids are rejected at validation). + uint32 table_id = 2; + // Kernel route metric / priority for daemon-owned rows. + uint32 metric = 3; + // Address families eligible for install, e.g. "ipv4_unicast" / + // "ipv6_unicast". Empty defaults to both unicast families. + repeated string families = 4; + // Optional peer-group allow-list. Empty = all peer groups. + repeated string allowed_peer_groups = 5; + // Optional neighbor-address allow-list. Empty = all neighbors. + repeated string allowed_neighbors = 6; + // Optional hard row cap. Unset = no cap. Zero is rejected. + optional uint32 max_routes = 7; + // Optional ECMP caps (ADR-0066), each 1..=256. Unset/1 = single next-hop. + // Per-class caps override the global one for their best-path class. + optional uint32 maximum_paths = 8; + optional uint32 maximum_paths_ebgp = 9; + optional uint32 maximum_paths_ibgp = 10; +} + +message SetFibTableRequest { + // Full table definition, upserted by name (create-or-replace). A change to + // table_id/metric for an existing name is a real table-key move: old kernel + // rows withdraw and the new table back-fills through the reconciler. + FibTableConfig table = 1; +} + +message DeleteFibTableRequest { + // Name of the table to remove. NOT_FOUND if no table has this name. + string name = 1; +} + +message ListFibTablesRequest {} + +message ListFibTablesResponse { + // For Set/Delete: the full accepted runtime table set after the operation. + // For List: the current configured set. + repeated FibTableConfig tables = 1; + // True when the FIB reconciler is running and can hot-apply table edits. + // False on List means edits are restart-required: either no table was + // present at startup (the reconciler isn't spawned) or the runtime is + // unavailable (non-Linux platform / netlink setup failure). + bool runtime_available = 2; +} + message ListRouteEventsRequest { // Optional neighbor address filter. Empty = all peers. Matches both // current and previous peer, so a peer-scoped query includes diff --git a/src/config/schema.rs b/src/config/schema.rs index b0e69465..07e54ba4 100644 --- a/src/config/schema.rs +++ b/src/config/schema.rs @@ -1209,7 +1209,7 @@ pub struct FibTableConfig { pub maximum_paths_ibgp: Option, } -fn default_fib_families() -> Vec { +pub(crate) fn default_fib_families() -> Vec { vec!["ipv4_unicast".to_string(), "ipv6_unicast".to_string()] } diff --git a/src/fib_runtime.rs b/src/fib_runtime.rs index 54d60de0..0cd9a882 100644 --- a/src/fib_runtime.rs +++ b/src/fib_runtime.rs @@ -83,6 +83,14 @@ pub enum FibRuntimeCommand { tables: Vec, reply: oneshot::Sender>, }, + /// Return the actor's current desired table set — its live source of truth. + /// The gRPC CRUD control path reads this, applies the upsert/delete, + /// validates the candidate, then issues a `ReplaceTables`, all while + /// holding the FIB-config coordinator lock so the read-modify-write is + /// atomic against concurrent CRUD and SIGHUP reloads. + GetTables { + reply: oneshot::Sender>, + }, } /// Operator-visible state for one projected FIB row. @@ -384,6 +392,9 @@ async fn run_loop( })); } } + Some(FibRuntimeCommand::GetTables { reply }) => { + let _ = reply.send(config.tables.clone()); + } None => cmd_open = false, } } diff --git a/src/fib_table_control.rs b/src/fib_table_control.rs new file mode 100644 index 00000000..718b624c --- /dev/null +++ b/src/fib_table_control.rs @@ -0,0 +1,469 @@ +//! Runtime `[[fib_tables]]` CRUD control (gRPC `RibService.SetFibTable` / +//! `DeleteFibTable` / `ListFibTables`). +//! +//! This is the binary-owned hook behind `RibService`'s `FibTableControlFn`. +//! It lives here, not in the API crate, because it needs the binary config +//! types, the FIB reconciler command channel (`FibRuntimeCommand`), the +//! peer-manager validator, and the config-persistence channel — none of which +//! the API crate can see across the crate boundary. +//! +//! Safety properties (see ADR-0061): +//! - **Atomic read-modify-write.** A single coordinator `Mutex`, shared with +//! the SIGHUP reload path, is held across read (`GetTables`) → validate → +//! apply (`ReplaceTables`) → persist. Concurrent CRUD calls and SIGHUP FIB +//! reloads can't interleave and clobber each other. +//! - **Validate before dispatch.** The candidate set is validated against the +//! live config (peer-group references, reserved/duplicate ids, families, ECMP +//! caps) by the peer manager before it ever reaches the reconciler. +//! - **Persist only after the actor acknowledges**, and only the exact accepted +//! set — so runtime and on-disk config can't drift. +//! - **Durable after dispatch.** The mutation runs in a detached task whose +//! join handle the request future awaits; a canceled gRPC call cannot split a +//! successful apply from its persistence. + +use std::sync::Arc; +use std::time::Duration; + +use tokio::sync::{Mutex, mpsc, oneshot}; + +use rustbgpd_api::peer_types::{ConfigEvent, FibTableSnapshot, PeerManagerCommand}; +use rustbgpd_api::proto; +use rustbgpd_api::rib_service::{ + FibTableControlError, FibTableControlFn, FibTableControlFuture, FibTableControlRequest, +}; + +use crate::config::FibTableConfig; +use crate::fib_runtime::FibRuntimeCommand; + +/// How long to wait for a config-persistence permit before refusing the +/// mutation (mirrors the dynamic-neighbor CRUD reserve deadline). +const PERSIST_RESERVE_TIMEOUT: Duration = Duration::from_secs(2); + +/// Dependencies for the FIB-table control hook, wired from `main.rs`. +pub struct FibTableControlDeps { + /// FIB reconciler command channel. `None` when the reconciler did not spawn + /// (no `[[fib_tables]]` at startup, or non-Linux / netlink setup failure). + pub fib_cmd_tx: Option>, + /// Peer-manager channel — the live-config validation authority. + pub peer_mgr_tx: mpsc::Sender, + /// Config-persistence channel. `None` when the daemon was started without + /// `--config` (nothing to persist to). + pub config_tx: Option>, + /// Coordinator lock shared with the SIGHUP reload FIB step. + pub lock: Arc>, + /// The `[[fib_tables]]` set present at startup. When the reconciler is not + /// running, `List` falls back to this so a non-Linux / netlink-failure + /// daemon still shows its configured tables, and an empty set distinguishes + /// "restart required to enable FIB" from "runtime unavailable". The set is + /// static while the reconciler is absent (mutations are rejected and SIGHUP + /// can't hot-apply), so it stays accurate. + pub startup_tables: Vec, +} + +/// Build the `FibTableControlFn` the RIB service calls for FIB-table CRUD. +#[must_use] +pub fn make_fib_table_control_fn(deps: FibTableControlDeps) -> FibTableControlFn { + let deps = Arc::new(deps); + Arc::new(move |request| { + let deps = deps.clone(); + Box::pin(async move { handle(deps, request).await }) as FibTableControlFuture + }) +} + +async fn handle( + deps: Arc, + request: FibTableControlRequest, +) -> Result { + match request { + FibTableControlRequest::List => { + let _guard = deps.lock.lock().await; + // A running reconciler is the source of truth; otherwise fall back + // to the startup set so configured tables stay visible even when + // the actor failed to spawn (non-Linux / netlink failure). + let (tables, runtime_available) = + match read_current_tables(deps.fib_cmd_tx.as_ref()).await? { + Some(current) => (current, true), + None => (deps.startup_tables.clone(), false), + }; + Ok(proto::ListFibTablesResponse { + tables: tables.iter().map(config_to_proto).collect(), + runtime_available, + }) + } + FibTableControlRequest::Set(table) => { + mutate(deps, Mutation::Upsert(proto_to_config(table))).await + } + FibTableControlRequest::Delete { name } => mutate(deps, Mutation::Delete(name)).await, + } +} + +enum Mutation { + Upsert(FibTableConfig), + Delete(String), +} + +async fn mutate( + deps: Arc, + mutation: Mutation, +) -> Result { + // A mutation needs a running reconciler and a persistence sink. Resolve + // both up front so a missing one fails before we spawn the durable task. + let fib_cmd_tx = deps + .fib_cmd_tx + .clone() + .ok_or_else(|| runtime_unavailable_error(!deps.startup_tables.is_empty()))?; + let config_tx = deps.config_tx.clone().ok_or_else(|| { + FibTableControlError::FailedPrecondition( + "FIB-table CRUD requires a persisted config (start rustbgpd with --config)".to_string(), + ) + })?; + let peer_mgr_tx = deps.peer_mgr_tx.clone(); + let lock = deps.lock.clone(); + + // Run the critical section in a detached task so a canceled gRPC request + // cannot split a successful `ReplaceTables` from its persistence: once the + // task starts it runs to completion regardless of the awaiting future. + let join = tokio::spawn(async move { + let _guard = lock.lock().await; + + let current = read_current_tables(Some(&fib_cmd_tx)) + .await? + .unwrap_or_default(); + let previous: Vec = current.iter().map(config_to_snapshot).collect(); + let candidate = apply_mutation(current, mutation)?; + let snapshots: Vec = candidate.iter().map(config_to_snapshot).collect(); + + // Validate AND stage the candidate into the peer manager's live config + // in one atomic command. This closes the TOCTOU against peer-group + // deletion: once staged, the candidate's `allowed_peer_groups` are + // visible to `peer_group_references`, so a concurrent delete is rejected + // (and if a delete raced ahead, the candidate fails validation here). + // The peer manager processes commands one at a time, so the check and + // the commit cannot interleave with another config mutation. + stage_candidate(&peer_mgr_tx, snapshots.clone()).await?; + + // From here the candidate is staged in the live config, so every early + // exit must roll it back to `previous` to keep the snapshot from + // drifting ahead of the runtime/disk state. + let permit = match reserve_persist_permit(&config_tx).await { + Ok(permit) => permit, + Err(error) => { + rollback_snapshot(&peer_mgr_tx, previous).await; + return Err(error); + } + }; + + // Apply to the reconciler and wait for its acknowledgement. + if let Err(error) = replace_tables(&fib_cmd_tx, candidate.clone()).await { + rollback_snapshot(&peer_mgr_tx, previous).await; + return Err(error); + } + + // Persist exactly the accepted set, only after the ack. The live config + // snapshot already holds the candidate (staged above). + permit.send(ConfigEvent::FibTablesReplaced(snapshots)); + + Ok(proto::ListFibTablesResponse { + tables: candidate.iter().map(config_to_proto).collect(), + runtime_available: true, + }) + }); + + join.await.map_err(|_| { + FibTableControlError::Internal("FIB-table mutation task did not complete".to_string()) + })? +} + +/// Read the reconciler's current table set. `Ok(None)` means no reconciler is +/// running (used by `List` to report `runtime_available = false`). +async fn read_current_tables( + fib_cmd_tx: Option<&mpsc::Sender>, +) -> Result>, FibTableControlError> { + let Some(tx) = fib_cmd_tx else { + return Ok(None); + }; + let (reply_tx, reply_rx) = oneshot::channel(); + tx.send(FibRuntimeCommand::GetTables { reply: reply_tx }) + .await + .map_err(|_| { + FibTableControlError::Internal("FIB reconciler command channel closed".to_string()) + })?; + let tables = reply_rx.await.map_err(|_| { + FibTableControlError::Internal("FIB reconciler dropped the GetTables reply".to_string()) + })?; + Ok(Some(tables)) +} + +fn apply_mutation( + mut current: Vec, + mutation: Mutation, +) -> Result, FibTableControlError> { + match mutation { + Mutation::Upsert(table) => { + if let Some(existing) = current.iter_mut().find(|t| t.name == table.name) { + *existing = table; + } else { + current.push(table); + } + Ok(current) + } + Mutation::Delete(name) => { + let before = current.len(); + current.retain(|t| t.name != name); + if current.len() == before { + return Err(FibTableControlError::NotFound(format!( + "no FIB table named '{name}'" + ))); + } + Ok(current) + } + } +} + +/// Atomically validate the candidate against the live config and, on success, +/// stage it into the peer manager's `current_config.fib_tables`. Returns +/// `InvalidArgument` if it doesn't validate (nothing is staged in that case). +async fn stage_candidate( + peer_mgr_tx: &mpsc::Sender, + tables: Vec, +) -> Result<(), FibTableControlError> { + let (reply_tx, reply_rx) = oneshot::channel(); + peer_mgr_tx + .send(PeerManagerCommand::StageFibTables { + tables, + reply: reply_tx, + }) + .await + .map_err(|_| { + FibTableControlError::Internal("peer manager command channel closed".to_string()) + })?; + reply_rx + .await + .map_err(|_| { + FibTableControlError::Internal( + "peer manager dropped the StageFibTables reply".to_string(), + ) + })? + .map_err(FibTableControlError::InvalidArgument) +} + +/// Restore the peer manager's staged `current_config.fib_tables` to `previous` +/// after a post-stage failure (reserve or reconciler apply), so the live +/// config snapshot can't drift ahead of the runtime/disk state. Best-effort: +/// the mutation is already failing, so a channel error can't change the outcome. +async fn rollback_snapshot( + peer_mgr_tx: &mpsc::Sender, + previous: Vec, +) { + let (reply_tx, reply_rx) = oneshot::channel(); + if peer_mgr_tx + .send(PeerManagerCommand::SetFibTablesSnapshot { + tables: previous, + reply: reply_tx, + }) + .await + .is_ok() + { + let _ = reply_rx.await; + } +} + +async fn reserve_persist_permit( + config_tx: &mpsc::Sender, +) -> Result, FibTableControlError> { + tokio::time::timeout(PERSIST_RESERVE_TIMEOUT, config_tx.clone().reserve_owned()) + .await + .map_err(|_| { + FibTableControlError::Unavailable( + "config persistence queue busy — refusing mutation to avoid drift".to_string(), + ) + })? + .map_err(|_| { + FibTableControlError::Unavailable("config persistence unavailable".to_string()) + }) +} + +async fn replace_tables( + fib_cmd_tx: &mpsc::Sender, + tables: Vec, +) -> Result<(), FibTableControlError> { + let (reply_tx, reply_rx) = oneshot::channel(); + fib_cmd_tx + .send(FibRuntimeCommand::ReplaceTables { + tables, + reply: reply_tx, + }) + .await + .map_err(|_| { + FibTableControlError::Internal("FIB reconciler command channel closed".to_string()) + })?; + reply_rx + .await + .map_err(|_| { + FibTableControlError::Internal( + "FIB reconciler dropped the ReplaceTables reply".to_string(), + ) + })? + // The reconciler reverted (RIB/dump bail or a removed-table withdraw + // that would orphan a kernel row) — surface it without persisting. + .map_err(FibTableControlError::FailedPrecondition) +} + +fn runtime_unavailable_error(startup_had_tables: bool) -> FibTableControlError { + if startup_had_tables { + FibTableControlError::FailedPrecondition( + "the FIB reconciler is unavailable (it did not spawn at startup — non-Linux platform \ + or netlink setup failure); restart rustbgpd to apply [[fib_tables]] edits" + .to_string(), + ) + } else { + FibTableControlError::FailedPrecondition( + "no [[fib_tables]] were present at startup, so the FIB reconciler is not running; add \ + a table to the config and restart rustbgpd to start it" + .to_string(), + ) + } +} + +fn proto_to_config(table: proto::FibTableConfig) -> FibTableConfig { + // An empty `families` repeated field is indistinguishable from "omitted" + // (proto3 has no presence on repeated), so mirror the TOML default of both + // unicast families rather than letting validation reject an empty set. + let families = if table.families.is_empty() { + crate::config::default_fib_families() + } else { + table.families + }; + FibTableConfig { + name: table.name, + table_id: table.table_id, + metric: table.metric, + families, + allowed_peer_groups: table.allowed_peer_groups, + allowed_neighbors: table.allowed_neighbors, + max_routes: table.max_routes, + maximum_paths: table.maximum_paths, + maximum_paths_ebgp: table.maximum_paths_ebgp, + maximum_paths_ibgp: table.maximum_paths_ibgp, + } +} + +fn config_to_proto(table: &FibTableConfig) -> proto::FibTableConfig { + proto::FibTableConfig { + name: table.name.clone(), + table_id: table.table_id, + metric: table.metric, + families: table.families.clone(), + allowed_peer_groups: table.allowed_peer_groups.clone(), + allowed_neighbors: table.allowed_neighbors.clone(), + max_routes: table.max_routes, + maximum_paths: table.maximum_paths, + maximum_paths_ebgp: table.maximum_paths_ebgp, + maximum_paths_ibgp: table.maximum_paths_ibgp, + } +} + +fn config_to_snapshot(table: &FibTableConfig) -> FibTableSnapshot { + FibTableSnapshot { + name: table.name.clone(), + table_id: table.table_id, + metric: table.metric, + families: table.families.clone(), + allowed_peer_groups: table.allowed_peer_groups.clone(), + allowed_neighbors: table.allowed_neighbors.clone(), + max_routes: table.max_routes, + maximum_paths: table.maximum_paths, + maximum_paths_ebgp: table.maximum_paths_ebgp, + maximum_paths_ibgp: table.maximum_paths_ibgp, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn table(name: &str, table_id: u32) -> FibTableConfig { + FibTableConfig { + name: name.to_string(), + table_id, + metric: 200, + families: vec!["ipv4_unicast".to_string()], + allowed_peer_groups: vec![], + allowed_neighbors: vec![], + max_routes: None, + maximum_paths: None, + maximum_paths_ebgp: None, + maximum_paths_ibgp: None, + } + } + + #[test] + fn upsert_adds_a_new_table() { + let out = apply_mutation( + vec![table("edge", 1000)], + Mutation::Upsert(table("core", 1001)), + ) + .unwrap(); + assert_eq!(out.len(), 2); + assert!(out.iter().any(|t| t.name == "core" && t.table_id == 1001)); + } + + #[test] + fn upsert_replaces_in_place_by_name() { + // A table_id/metric change for an existing name is a full replacement, + // not a second entry — the reconciler treats it as a table-key move. + let mut replacement = table("edge", 2000); + replacement.metric = 250; + let out = apply_mutation(vec![table("edge", 1000)], Mutation::Upsert(replacement)).unwrap(); + assert_eq!(out.len(), 1); + assert_eq!(out[0].table_id, 2000); + assert_eq!(out[0].metric, 250); + } + + #[test] + fn delete_removes_by_name() { + let out = apply_mutation( + vec![table("edge", 1000), table("core", 1001)], + Mutation::Delete("edge".to_string()), + ) + .unwrap(); + assert_eq!(out.len(), 1); + assert_eq!(out[0].name, "core"); + } + + #[test] + fn delete_missing_name_is_not_found() { + let err = apply_mutation( + vec![table("edge", 1000)], + Mutation::Delete("nope".to_string()), + ) + .unwrap_err(); + assert!(matches!(err, FibTableControlError::NotFound(_))); + } + + #[test] + fn proto_empty_families_defaults_to_both_unicast() { + // proto3 repeated has no presence, so an empty `families` is "omitted" + // and must default like TOML rather than be rejected by validation. + let cfg = proto_to_config(proto::FibTableConfig { + name: "edge".to_string(), + table_id: 1000, + metric: 200, + families: vec![], + ..Default::default() + }); + assert_eq!(cfg.families, vec!["ipv4_unicast", "ipv6_unicast"]); + } + + #[test] + fn proto_explicit_families_are_preserved() { + let cfg = proto_to_config(proto::FibTableConfig { + name: "edge".to_string(), + table_id: 1000, + metric: 200, + families: vec!["ipv6_unicast".to_string()], + ..Default::default() + }); + assert_eq!(cfg.families, vec!["ipv6_unicast"]); + } +} diff --git a/src/main.rs b/src/main.rs index d6fdfc4c..df0544a2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -31,6 +31,7 @@ mod evpn_svi; mod fib; mod fib_common; mod fib_runtime; +mod fib_table_control; mod looking_glass; mod metrics_server; mod peer_manager; @@ -57,7 +58,7 @@ use crate::config::{ }; use crate::config_persister::{ConfigMutation, ConfigPersister}; use crate::peer_manager::PeerManager; -use crate::reload::{ReloadedConfig, apply_reload_outcome, reload_config, run_config_bridge}; +use crate::reload::{apply_reload_outcome, reload_config, run_config_bridge}; use rustbgpd_api::peer_types::{PeerManagerCommand, PeerManagerNeighborConfig}; use rustbgpd_api::server::{ AccessMode as GrpcServerAccessMode, ListenerConfig as GrpcListenerConfig, ListenerEndpoint, @@ -1860,6 +1861,10 @@ async fn run(mut config: Config, profiler: Option) { }) as rustbgpd_api::evpn_service::DuplicateMacClearFuture }) as rustbgpd_api::evpn_service::DuplicateMacClearFn }); + // Coordinator lock serializing runtime `[[fib_tables]]` mutations: the + // gRPC CRUD control path and the SIGHUP reload FIB step both hold it across + // their read → apply → persist sequence so they can't interleave. + let fib_table_control_lock = std::sync::Arc::new(tokio::sync::Mutex::new(())); let serve_config = ServeConfig { asn: config.global.asn, router_id: config.global.router_id.clone(), @@ -2005,6 +2010,15 @@ async fn run(mut config: Config, profiler: Option) { .collect() }) }, + fib_table_control: Some(fib_table_control::make_fib_table_control_fn( + fib_table_control::FibTableControlDeps { + fib_cmd_tx: fib_cmd_tx.clone(), + peer_mgr_tx: peer_mgr_tx.clone(), + config_tx: config_event_tx.clone(), + lock: fib_table_control_lock.clone(), + startup_tables: config.fib_tables.clone(), + }, + )), dataplane_route_events: Some(fib_bgp_event_tx), bfd_session_snapshot: { let rx = bfd_status_rx.clone(); @@ -2165,7 +2179,9 @@ async fn run(mut config: Config, profiler: Option) { // post-reload sync. A SIGHUP that arrives while a reload is still // running is logged and dropped — the operator-facing back-pressure // surface. - let mut reload_in_flight: Option>> = None; + let mut reload_in_flight: Option< + tokio::task::JoinHandle>>, + > = None; loop { tokio::select! { result = tokio::signal::ctrl_c() => { @@ -2203,8 +2219,17 @@ async fn run(mut config: Config, profiler: Option) { let live_uds = live_grpc_uds.clone(); let pm_tx = peer_mgr_tx.clone(); let fib_cmd = fib_cmd_tx.clone(); + // Hold the FIB coordinator lock across BOTH the reload and the + // outcome application (peer-manager + config-bridge snapshot + // refresh), so a concurrent gRPC FIB-table CRUD can't slip into + // the gap between them and have its applied/persisted table set + // clobbered by the stale reload snapshot. + let fib_lock = fib_table_control_lock.clone(); + let pm_internal = peer_mgr_internal_tx.clone(); + let bridge_replace = bridge_replace_tx.clone(); reload_in_flight = Some(tokio::spawn(async move { - reload_config( + let _fib_guard = fib_lock.lock().await; + let reloaded = reload_config( &path, &snapshot, live_tcp.as_ref(), @@ -2212,7 +2237,8 @@ async fn run(mut config: Config, profiler: Option) { &pm_tx, fib_cmd.as_ref(), ) - .await + .await?; + Some(apply_reload_outcome(reloaded, &pm_internal, bridge_replace.as_ref()).await) })); } // Only polled when a reload is in flight. Standard tokio @@ -2229,23 +2255,13 @@ async fn run(mut config: Config, profiler: Option) { } => { reload_in_flight = None; match outcome { - Ok(Some(new_config)) => { - match apply_reload_outcome( - new_config, - &peer_mgr_internal_tx, - bridge_replace_tx.as_ref(), - ) - .await - { - Ok(advanced) => config = advanced, - Err(stage) => error!( - stage, - "post-reload sync failed mid-flight; in-memory config not advanced — next SIGHUP will retry" - ), - } - } + Ok(Some(Ok(advanced))) => config = advanced, + Ok(Some(Err(stage))) => error!( + stage, + "post-reload sync failed mid-flight; in-memory config not advanced — next SIGHUP will retry" + ), Ok(None) => { - // reload_config already logged the failure. + // reload_config returned None (failure) or short-circuited. } Err(e) => error!(error = %e, "reload task panicked"), } diff --git a/src/peer_manager/events.rs b/src/peer_manager/events.rs index deae5947..5f2cab94 100644 --- a/src/peer_manager/events.rs +++ b/src/peer_manager/events.rs @@ -167,6 +167,9 @@ impl PeerManager { ConfigEvent::NeighborAdded(_) | ConfigEvent::NeighborDeleted(_) => { ("change", "neighbor", String::new(), None) } + ConfigEvent::FibTablesReplaced(_) => { + ("replace", "fib_tables", "fib_tables".to_string(), None) + } } } diff --git a/src/peer_manager/mod.rs b/src/peer_manager/mod.rs index 8a16c045..b1eef0a1 100644 --- a/src/peer_manager/mod.rs +++ b/src/peer_manager/mod.rs @@ -17,7 +17,7 @@ use rustbgpd_transport::{ SessionNotificationEvent as TransportNotificationEvent, TransportConfig, }; use rustbgpd_wire::{Afi, Safi}; -use tokio::sync::{broadcast, mpsc, watch}; +use tokio::sync::{broadcast, mpsc, oneshot, watch}; use tracing::{debug, error, info, warn}; use crate::config::Config; @@ -69,7 +69,14 @@ const PEER_POLICY_UPDATE_TIMEOUT: Duration = Duration::from_millis(500); const EXPLAIN_QUERY_TIMEOUT: Duration = Duration::from_millis(500); pub(crate) enum InternalCommand { - ReplaceConfigSnapshot(Box), + ReplaceConfigSnapshot { + config: Box, + /// Optional acknowledgement, sent after `current_config` is assigned. + /// The SIGHUP reload path awaits this before releasing the FIB + /// coordinator lock so a following gRPC FIB-table CRUD can't have its + /// snapshot overtaken by this (stale) one on the separate channel. + ack: Option>, + }, } #[allow(clippy::struct_excessive_bools)] @@ -549,6 +556,14 @@ impl PeerManager { let result = self.diff_runtime_config(&candidate_toml); let _ = reply.send(result); } + PeerManagerCommand::StageFibTables { tables, reply } => { + let result = self.stage_fib_tables_candidate(&tables); + let _ = reply.send(result); + } + PeerManagerCommand::SetFibTablesSnapshot { tables, reply } => { + self.set_fib_tables_snapshot(&tables); + let _ = reply.send(()); + } PeerManagerCommand::GetPeerState { peer, reply } => { let info = self.get_peer_info(&peer).await; let _ = reply.send(info); @@ -827,8 +842,11 @@ impl PeerManager { } } internal = self.internal_rx.recv() => { - if let Some(InternalCommand::ReplaceConfigSnapshot(config)) = internal { + if let Some(InternalCommand::ReplaceConfigSnapshot { config, ack }) = internal { self.current_config = *config; + if let Some(ack) = ack { + let _ = ack.send(()); + } } } notification = self.session_notify_rx.recv() => { diff --git a/src/peer_manager/reconcile.rs b/src/peer_manager/reconcile.rs index be3cd410..fe34523d 100644 --- a/src/peer_manager/reconcile.rs +++ b/src/peer_manager/reconcile.rs @@ -1,12 +1,13 @@ use std::collections::HashMap; use rustbgpd_api::peer_types::{ - PeerKey, PeerManagerNeighborConfig, ReconcileFailure, ReconcileFailureKind, ReconcileResult, - RuntimeConfigDiff, + FibTableSnapshot, PeerKey, PeerManagerNeighborConfig, ReconcileFailure, ReconcileFailureKind, + ReconcileResult, RuntimeConfigDiff, }; use tracing::{info, warn}; use crate::config::Config; +use crate::policy_admin::fib_table_snapshot_to_config; use super::PeerManager; @@ -134,4 +135,34 @@ impl PeerManager { diff_json, }) } + + /// Atomically validate a candidate `[[fib_tables]]` set against the live + /// runtime config and, on success, commit it into `current_config`. Builds + /// a probe config from `current_config`'s peer groups / neighbors with the + /// candidate table set substituted in, runs the full validator (reserved / + /// duplicate table ids, families, ECMP caps, peer-group references), and + /// only assigns `current_config.fib_tables` if it passes. Because the peer + /// manager runs commands serially, validate-then-commit here cannot + /// interleave with a peer-group deletion — closing the TOCTOU where a delete + /// would check a snapshot that doesn't yet reflect the in-flight table. + pub(super) fn stage_fib_tables_candidate( + &mut self, + tables: &[FibTableSnapshot], + ) -> Result<(), String> { + let staged: Vec = + tables.iter().map(fib_table_snapshot_to_config).collect(); + let mut probe = self.current_config.clone(); + probe.fib_tables.clone_from(&staged); + probe.validate().map_err(|error| error.to_string())?; + self.current_config.fib_tables = staged; + Ok(()) + } + + /// Refresh the live config snapshot's `[[fib_tables]]` after a gRPC CRUD + /// mutation acknowledged by the FIB reconciler, so `diff_runtime_config` + /// (which compares against `current_config`) doesn't report the + /// just-applied set as pending. + pub(super) fn set_fib_tables_snapshot(&mut self, tables: &[FibTableSnapshot]) { + self.current_config.fib_tables = tables.iter().map(fib_table_snapshot_to_config).collect(); + } } diff --git a/src/policy_admin.rs b/src/policy_admin.rs index 37e59a92..2b97f8e7 100644 --- a/src/policy_admin.rs +++ b/src/policy_admin.rs @@ -3,14 +3,16 @@ use std::net::IpAddr; use rustbgpd_api::peer_types::{ - AddPathDefinition, ConfigEvent, NamedNeighborSetSnapshot, NamedPeerGroupSnapshot, - NamedPolicyDefinition, NamedPolicySnapshot, NeighborSetDefinition, PeerGroupDefinition, - PolicyAsPathPrependConfig, PolicyChainAssignment, PolicyStatementDefinition, + AddPathDefinition, ConfigEvent, FibTableSnapshot, NamedNeighborSetSnapshot, + NamedPeerGroupSnapshot, NamedPolicyDefinition, NamedPolicySnapshot, NeighborSetDefinition, + PeerGroupDefinition, PolicyAsPathPrependConfig, PolicyChainAssignment, + PolicyStatementDefinition, }; use crate::config::{ - AddPathConfig, AsPathPrependConfig, BgpRoleConfig, Config, ConfigError, NamedPolicyConfig, - Neighbor, NeighborSetConfig, PeerGroupConfig, PolicyStatementConfig, TcpAoConfig, + AddPathConfig, AsPathPrependConfig, BgpRoleConfig, Config, ConfigError, FibTableConfig, + NamedPolicyConfig, Neighbor, NeighborSetConfig, PeerGroupConfig, PolicyStatementConfig, + TcpAoConfig, }; const fn wire_role_to_config(role: rustbgpd_wire::BgpRole) -> BgpRoleConfig { @@ -354,9 +356,29 @@ pub fn peer_group_references(config: &Config, name: &str) -> Vec { refs.push(format!("neighbor_set {set_name}")); } } + for table in &config.fib_tables { + if table.allowed_peer_groups.iter().any(|group| group == name) { + refs.push(format!("fib_table {}", table.name)); + } + } refs } +pub(crate) fn fib_table_snapshot_to_config(snapshot: &FibTableSnapshot) -> FibTableConfig { + FibTableConfig { + name: snapshot.name.clone(), + table_id: snapshot.table_id, + metric: snapshot.metric, + families: snapshot.families.clone(), + allowed_peer_groups: snapshot.allowed_peer_groups.clone(), + allowed_neighbors: snapshot.allowed_neighbors.clone(), + max_routes: snapshot.max_routes, + maximum_paths: snapshot.maximum_paths, + maximum_paths_ebgp: snapshot.maximum_paths_ebgp, + maximum_paths_ibgp: snapshot.maximum_paths_ibgp, + } +} + /// Apply a config event to a config snapshot and validate the result. #[expect( clippy::too_many_lines, @@ -556,6 +578,13 @@ pub fn apply_config_event(config: &mut Config, event: &ConfigEvent) -> Result<() ConfigEvent::ClearNeighborPeerGroup { address } => { neighbor_mut(config, *address)?.peer_group = None; } + ConfigEvent::FibTablesReplaced(snapshots) => { + // The full accepted set the FIB reconciler acknowledged. The + // trailing `config.validate()` re-checks it (`validate_fib_tables`) + // before the caller commits the snapshot, so a malformed event + // cannot poison the persisted config. + config.fib_tables = snapshots.iter().map(fib_table_snapshot_to_config).collect(); + } } config.validate() } @@ -743,6 +772,26 @@ remote_asn = 65002 assert!(refs.contains(&"neighbor 10.0.0.2 export_policy_chain".to_string())); } + #[test] + fn peer_group_references_include_fib_table_allow_lists() { + let mut config = minimal_config(); + config.fib_tables.push(FibTableConfig { + name: "edge".to_string(), + table_id: 1000, + metric: 200, + families: vec!["ipv4_unicast".to_string(), "ipv6_unicast".to_string()], + allowed_peer_groups: vec!["fabric".to_string()], + allowed_neighbors: Vec::new(), + max_routes: None, + maximum_paths: None, + maximum_paths_ebgp: None, + maximum_paths_ibgp: None, + }); + + let refs = peer_group_references(&config, "fabric"); + assert_eq!(refs, vec!["fib_table edge"]); + } + #[test] fn neighbor_added_event_preserves_tcp_ao_key_material() { let mut config = minimal_config(); diff --git a/src/reload.rs b/src/reload.rs index 86689cb0..8a46f319 100644 --- a/src/reload.rs +++ b/src/reload.rs @@ -171,10 +171,15 @@ pub(crate) async fn run_config_bridge( event = event_rx.recv(), if event_rx_open => { match event { Some(event) => { - if let Err(error) = apply_config_event(&mut current_config, &event) { + // Apply to a candidate clone and commit only on success, + // so a validation failure can't leave the live snapshot + // partially mutated (poisoned) for the next event. + let mut candidate = current_config.clone(); + if let Err(error) = apply_config_event(&mut candidate, &event) { error!(error = %error, "failed to apply config event before persistence"); continue; } + current_config = candidate; if mutation_tx .send(ConfigMutation::ReplaceConfig(Box::new(current_config.clone()))) .await @@ -224,14 +229,24 @@ pub(crate) async fn apply_reload_outcome( peer_mgr_internal_tx: &mpsc::UnboundedSender, bridge_replace_tx: Option<&mpsc::UnboundedSender>>, ) -> Result { + // Acknowledge the snapshot so the caller (holding the FIB coordinator lock) + // doesn't release the lock until the peer manager has actually assigned + // `current_config`. Otherwise a following gRPC FIB-table CRUD could enqueue + // its own snapshot on the separate peer-manager channel and have it + // overtaken by this one, reverting the just-applied table set. + let (ack_tx, ack_rx) = oneshot::channel(); if peer_mgr_internal_tx - .send(InternalCommand::ReplaceConfigSnapshot(Box::new( - reloaded.runtime.clone(), - ))) + .send(InternalCommand::ReplaceConfigSnapshot { + config: Box::new(reloaded.runtime.clone()), + ack: Some(ack_tx), + }) .is_err() { return Err("peer_mgr_snapshot"); } + if ack_rx.await.is_err() { + return Err("peer_mgr_snapshot"); + } if let Some(tx) = bridge_replace_tx && tx.send(Box::new(reloaded.desired.clone())).is_err() { @@ -1995,6 +2010,7 @@ metric = 200 let _ = reply.send(Ok(())); tables.len() } + Some(FibRuntimeCommand::GetTables { .. }) => panic!("unexpected GetTables"), None => panic!("expected ReplaceTables"), } }); @@ -2938,6 +2954,21 @@ peer_group = "secure" let cfg = Config::load_with_diagnostics(path.to_str().unwrap()).unwrap(); std::fs::remove_file(&path).ok(); + // Stand in for the peer manager: receive the snapshot and ack it so + // apply_reload_outcome proceeds to the (failing) bridge send. + let expected_asn = cfg.global.asn; + let pm = tokio::spawn(async move { + match peer_mgr_internal_rx.recv().await { + Some(InternalCommand::ReplaceConfigSnapshot { config, ack }) => { + if let Some(ack) = ack { + let _ = ack.send(()); + } + config.global.asn + } + None => panic!("peer manager must receive the snapshot"), + } + }); + let result = apply_reload_outcome( ReloadedConfig::new(cfg.clone(), cfg.clone()), &peer_mgr_internal_tx, @@ -2950,14 +2981,11 @@ peer_group = "secure" Some("config_bridge"), "bridge failure must surface as the named stage so the caller's log line is actionable" ); - let snapshot = peer_mgr_internal_rx - .try_recv() - .expect("peer manager must receive the snapshot before the bridge send is attempted"); - match snapshot { - InternalCommand::ReplaceConfigSnapshot(received) => { - assert_eq!(received.global.asn, cfg.global.asn); - } - } + assert_eq!( + pm.await.unwrap(), + expected_asn, + "peer manager must receive the snapshot before the bridge send is attempted" + ); } /// Bridge-disabled mode (no `file_path`, so no persister and no @@ -2975,6 +3003,16 @@ peer_group = "secure" let cfg = Config::load_with_diagnostics(path.to_str().unwrap()).unwrap(); std::fs::remove_file(&path).ok(); + // Stand in for the peer manager: receive the snapshot and ack it. + let pm = tokio::spawn(async move { + match peer_mgr_internal_rx.recv().await { + Some(InternalCommand::ReplaceConfigSnapshot { ack: Some(ack), .. }) => { + ack.send(()).is_ok() + } + _ => false, + } + }); + let advanced = apply_reload_outcome( ReloadedConfig::new(cfg.clone(), cfg.clone()), &peer_mgr_internal_tx, @@ -2983,7 +3021,7 @@ peer_group = "secure" .await .expect("no-bridge mode must succeed"); assert_eq!(advanced.global.asn, cfg.global.asn); - assert!(peer_mgr_internal_rx.try_recv().is_ok()); + assert!(pm.await.unwrap(), "peer manager must receive the snapshot"); } /// Regression test for the bridge stale-snapshot bug. The bridge @@ -3122,13 +3160,18 @@ peer_group = "secure" let (peer_mgr_internal_tx, mut peer_mgr_internal_rx) = mpsc::unbounded_channel::(); + let pm = tokio::spawn(async move { + match peer_mgr_internal_rx.recv().await { + Some(InternalCommand::ReplaceConfigSnapshot { ack: Some(ack), .. }) => { + ack.send(()).is_ok() + } + _ => false, + } + }); let runtime = apply_reload_outcome(reloaded, &peer_mgr_internal_tx, Some(&replace_tx)) .await .expect("post-reload sync should succeed"); - assert!( - peer_mgr_internal_rx.try_recv().is_ok(), - "peer manager snapshot must be refreshed" - ); + assert!(pm.await.unwrap(), "peer manager snapshot must be refreshed"); event_tx .send(ConfigEvent::SetPolicy { diff --git a/tests/interop/configs/frr-bgpd-m58-fib-crud.conf b/tests/interop/configs/frr-bgpd-m58-fib-crud.conf new file mode 100644 index 00000000..c81756fb --- /dev/null +++ b/tests/interop/configs/frr-bgpd-m58-fib-crud.conf @@ -0,0 +1,20 @@ +frr version 10.3.1 +frr defaults traditional +hostname frr-m58 +log stdout informational + +router bgp 65002 + bgp router-id 10.0.0.2 + no bgp ebgp-requires-policy + neighbor 10.0.0.1 remote-as 65001 + neighbor 10.0.0.1 description rustbgpd-m58 + neighbor 10.0.0.1 timers 30 90 + ! + address-family ipv4 unicast + network 203.0.113.42/32 + network 203.0.113.99/32 + exit-address-family +! +ip route 203.0.113.42/32 10.0.0.2 +ip route 203.0.113.99/32 10.0.0.2 +! diff --git a/tests/interop/configs/rustbgpd-m58-fib-crud.toml b/tests/interop/configs/rustbgpd-m58-fib-crud.toml new file mode 100644 index 00000000..0d35bff9 --- /dev/null +++ b/tests/interop/configs/rustbgpd-m58-fib-crud.toml @@ -0,0 +1,32 @@ +[global] +asn = 65001 +router_id = "10.0.0.1" +listen_port = 179 + +[global.telemetry] +prometheus_addr = "0.0.0.0:9179" +log_format = "json" + +[global.telemetry.grpc_tcp] +address = "0.0.0.0:50051" + +# Start with one table so the ADR-0061 reconciler is running — runtime CRUD +# (SetFibTable / DeleteFibTable) requires it. The test adds, mutates, and +# deletes a second table at runtime, and asserts the post-CRUD set survives a +# restart. This file is mounted as a read-only template and copied to a +# writable container-local path at startup (see the topology header) so the +# atomic config persist works. +[[fib_tables]] +name = "edge" +table_id = 1000 +metric = 200 +families = ["ipv4_unicast"] + +[[neighbors]] +address = "10.0.0.2" +remote_asn = 65002 +description = "frr-m58" +hold_time = 90 + +[security.grpc] +enforcement = "legacy" diff --git a/tests/interop/m58-fib-table-crud-frr.clab.yml b/tests/interop/m58-fib-table-crud-frr.clab.yml new file mode 100644 index 00000000..b2c6f72e --- /dev/null +++ b/tests/interop/m58-fib-table-crud-frr.clab.yml @@ -0,0 +1,66 @@ +# Containerlab topology: rustbgpd ↔ FRR (M58 — ADR-0061 FIB-table runtime CRUD) +# +# Validates the runtime [[fib_tables]] CRUD surface (SetFibTable / +# DeleteFibTable / ListFibTables on RibService) end-to-end against a real +# FRR peer and a real kernel: +# +# * rustbgpd starts with ONE table (edge, table_id=1000, metric=200) so the +# ADR-0061 reconciler is running (CRUD requires it). +# * FRR advertises 203.0.113.42/32 and 203.0.113.99/32. +# * SetFibTable adds a second table (core, table_id=2000, metric=300) at +# runtime → its kernel rows must appear in table 2000. +# * SetFibTable on `core` changing metric 300→350 is a table-key move → +# the old metric-300 rows withdraw and metric-350 rows install. +# * DeleteFibTable core → only its kernel rows withdraw; the edge table +# (1000) is untouched. +# * Persistence: the runtime CRUD writes the TOML config (atomic), so after +# a restart the daemon comes back with the post-CRUD table set. +# +# The config is mounted read-only as a template and copied to a container-local +# writable path at startup. The daemon runs against that local copy, so the +# atomic config persist (write `.config.toml.tmp` + rename over `config.toml`) +# works — a single-file read-write bind mount would make the rename target a +# mount point and fail with EBUSY. The persistence round-trip is still real: the +# restart is a process restart inside the same container, so the container-local +# config survives and a fresh daemon reads the post-CRUD set. +# +# Usage: +# docker build -t rustbgpd:dev . +# containerlab deploy -t tests/interop/m58-fib-table-crud-frr.clab.yml +# bash tests/interop/scripts/test-m58-fib-table-crud-frr.sh +# containerlab destroy -t tests/interop/m58-fib-table-crud-frr.clab.yml --cleanup + +name: m58-fib-table-crud-frr + +topology: + nodes: + rustbgpd: + kind: linux + image: rustbgpd:dev + cmd: sleep infinity + binds: + - ./configs/rustbgpd-m58-fib-crud.toml:/etc/rustbgpd/config.template.toml:ro + - ./scripts/start-rustbgpd.sh:/usr/local/bin/start-rustbgpd.sh:ro + exec: + - ip addr add 10.0.0.1/24 dev eth1 + - ip link set eth1 up + # Copy the read-only template to a writable container-local path so the + # atomic config persist (temp + rename) works; see the header note. + - cp /etc/rustbgpd/config.template.toml /etc/rustbgpd/config.toml + env: + RUST_LOG: info + + frr: + kind: linux + image: quay.io/frrouting/frr:10.3.1 + binds: + - ./configs/frr-daemons:/etc/frr/daemons:ro + - ./configs/frr-bgpd-m58-fib-crud.conf:/etc/frr/frr.conf:ro + exec: + - ip addr add 10.0.0.2/24 dev eth1 + - ip link set eth1 up + + links: + - endpoints: ["rustbgpd:eth1", "frr:eth1"] + +# Network: 10.0.0.0/24 — rustbgpd (10.0.0.1, AS 65001) ↔ FRR (10.0.0.2, AS 65002) diff --git a/tests/interop/scripts/test-m58-fib-table-crud-frr.sh b/tests/interop/scripts/test-m58-fib-table-crud-frr.sh new file mode 100755 index 00000000..1cbff2a5 --- /dev/null +++ b/tests/interop/scripts/test-m58-fib-table-crud-frr.sh @@ -0,0 +1,240 @@ +#!/usr/bin/env bash +# M58 interop test — ADR-0061 FIB-table runtime CRUD. +# +# Exercises the runtime [[fib_tables]] CRUD surface end-to-end against a real +# FRR peer and a real kernel: SetFibTable (add + table-key move), +# DeleteFibTable, ListFibTables, and the persist-across-restart round-trip. +# Companion to M42, which covers the startup FIB path. +# +# Prerequisites: +# - docker build -t rustbgpd:dev . +# - containerlab deploy -t tests/interop/m58-fib-table-crud-frr.clab.yml +# - grpcurl + jq installed on the host + +set -euo pipefail + +TOPO="m58-fib-table-crud-frr" +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +source "$SCRIPT_DIR/test-lib.sh" +FRR="clab-${TOPO}-frr" + +EDGE_ID=1000 +EDGE_METRIC=200 +CORE_ID=2000 +CORE_METRIC=300 +CORE_METRIC_MOVED=350 +NEXT_HOP="10.0.0.2" +# Both prefixes are advertised by FRR and carry no pre-existing foreign route, +# so they install cleanly as daemon-owned in every configured table. +PREFIX_A="203.0.113.42/32" +PREFIX_B="203.0.113.99/32" + +resolve_grpc_addr +start_rustbgpd +wait_frr_established "$FRR" "10.0.0.1" "rustbgpd ↔ FRR" + +# ---- gRPC helpers ----------------------------------------------------------- + +grpc() { + grpcurl -plaintext -import-path . -proto "$PROTO" "$@" +} + +list_fib_tables() { + grpc "$GRPC_ADDR" rustbgpd.v1.RibService/ListFibTables +} + +set_fib_table() { + local name=$1 table_id=$2 metric=$3 + grpc -d "{\"table\":{\"name\":\"${name}\",\"tableId\":${table_id},\"metric\":${metric},\"families\":[\"ipv4_unicast\"]}}" \ + "$GRPC_ADDR" rustbgpd.v1.RibService/SetFibTable +} + +delete_fib_table() { + local name=$1 + grpc -d "{\"name\":\"${name}\"}" "$GRPC_ADDR" rustbgpd.v1.RibService/DeleteFibTable +} + +# Count configured tables ListFibTables currently reports. +fib_table_count() { + list_fib_tables 2>/dev/null | jq -r '.tables | length' 2>/dev/null || echo "ERR" +} + +# Does ListFibTables report a table with this name + table_id + metric? +fib_table_has() { + local name=$1 table_id=$2 metric=$3 + list_fib_tables 2>/dev/null | jq -e --arg n "$name" --argjson t "$table_id" --argjson m "$metric" \ + '[.tables[]? | select(.name == $n and .tableId == $t and .metric == $m)] | length == 1' \ + >/dev/null 2>&1 +} + +# ---- kernel helpers (parameterized by table + metric) ----------------------- + +kernel_owned() { + # owned (daemon-installed) route: proto bgp, given metric, via NEXT_HOP + local prefix=$1 table_id=$2 metric=$3 + docker exec "$RUSTBGPD" ip route show table "$table_id" exact "$prefix" 2>/dev/null \ + | grep -q "via $NEXT_HOP" \ + && docker exec "$RUSTBGPD" ip route show table "$table_id" exact "$prefix" 2>/dev/null \ + | grep -q "proto bgp" \ + && docker exec "$RUSTBGPD" ip route show table "$table_id" exact "$prefix" 2>/dev/null \ + | grep -q "metric $metric" +} + +kernel_absent() { + local prefix=$1 table_id=$2 + ! docker exec "$RUSTBGPD" ip route show table "$table_id" exact "$prefix" 2>/dev/null | grep -q . +} + +dump_state_on_failure() { + echo "===== ListFibTables =====" >&2 + list_fib_tables | jq . >&2 || true + echo "===== table $EDGE_ID =====" >&2 + docker exec "$RUSTBGPD" ip route show table "$EDGE_ID" >&2 || true + echo "===== table $CORE_ID =====" >&2 + docker exec "$RUSTBGPD" ip route show table "$CORE_ID" >&2 || true + echo "===== config.toml =====" >&2 + docker exec "$RUSTBGPD" cat /etc/rustbgpd/config.toml >&2 || true + echo "===== rustbgpd.log tail =====" >&2 + docker exec "$RUSTBGPD" tail -40 /tmp/rustbgpd.log >&2 || true +} + +wait_kernel_owned() { + local prefix=$1 table_id=$2 metric=$3 + log "Waiting for $prefix owned in table $table_id metric $metric..." + for i in $(seq 1 30); do + if kernel_owned "$prefix" "$table_id" "$metric"; then + ok "$prefix installed in table $table_id (metric $metric, via $NEXT_HOP) after ${i}s" + return 0 + fi + sleep 1 + done + fail "$prefix did not install in table $table_id metric $metric" + dump_state_on_failure + return 1 +} + +wait_kernel_absent() { + local prefix=$1 table_id=$2 + log "Waiting for $prefix to leave table $table_id..." + for i in $(seq 1 30); do + if kernel_absent "$prefix" "$table_id"; then + ok "$prefix absent from table $table_id after ${i}s" + return 0 + fi + sleep 1 + done + fail "$prefix remained in table $table_id" + dump_state_on_failure + return 1 +} + +assert_set_ack() { + local label=$1 json=$2 + if printf '%s' "$json" | jq -e '.runtimeAvailable == true' >/dev/null 2>&1; then + ok "$label acknowledged (runtime_available=true)" + else + fail "$label did not return runtime_available=true: $json" + dump_state_on_failure + exit 1 + fi +} + +# ---- 1. Baseline: the startup `edge` table installs both prefixes ----------- + +log "=== 1. Baseline: startup table 'edge' ($EDGE_ID/$EDGE_METRIC) ===" +wait_kernel_owned "$PREFIX_A" "$EDGE_ID" "$EDGE_METRIC" +wait_kernel_owned "$PREFIX_B" "$EDGE_ID" "$EDGE_METRIC" +if fib_table_has "edge" "$EDGE_ID" "$EDGE_METRIC"; then + ok "ListFibTables reports edge/$EDGE_ID/$EDGE_METRIC" +else + fail "ListFibTables missing edge table"; dump_state_on_failure; exit 1 +fi + +# ---- 2. SetFibTable adds a second table at runtime -------------------------- + +log "=== 2. SetFibTable adds 'core' ($CORE_ID/$CORE_METRIC) at runtime ===" +set_resp=$(set_fib_table "core" "$CORE_ID" "$CORE_METRIC") +assert_set_ack "SetFibTable core" "$set_resp" +wait_kernel_owned "$PREFIX_A" "$CORE_ID" "$CORE_METRIC" +wait_kernel_owned "$PREFIX_B" "$CORE_ID" "$CORE_METRIC" +# The edge table must be untouched by the add. +if kernel_owned "$PREFIX_A" "$EDGE_ID" "$EDGE_METRIC"; then + ok "edge table unaffected by adding core" +else + fail "edge table lost $PREFIX_A after adding core"; dump_state_on_failure; exit 1 +fi +if [ "$(fib_table_count)" = "2" ]; then + ok "ListFibTables now reports 2 tables" +else + fail "expected 2 tables after add, got $(fib_table_count)"; dump_state_on_failure; exit 1 +fi + +# ---- 3. SetFibTable metric change is a table-key move ----------------------- + +log "=== 3. SetFibTable 'core' metric $CORE_METRIC->$CORE_METRIC_MOVED (table-key move) ===" +move_resp=$(set_fib_table "core" "$CORE_ID" "$CORE_METRIC_MOVED") +assert_set_ack "SetFibTable core (metric move)" "$move_resp" +# New metric installs; old metric rows withdraw. +wait_kernel_owned "$PREFIX_A" "$CORE_ID" "$CORE_METRIC_MOVED" +wait_kernel_owned "$PREFIX_B" "$CORE_ID" "$CORE_METRIC_MOVED" +for prefix in "$PREFIX_A" "$PREFIX_B"; do + if docker exec "$RUSTBGPD" ip route show table "$CORE_ID" exact "$prefix" 2>/dev/null | grep -q "metric $CORE_METRIC\b"; then + fail "$prefix still present at old metric $CORE_METRIC after table-key move" + dump_state_on_failure; exit 1 + fi +done +ok "table-key move withdrew old metric-$CORE_METRIC rows, installed metric-$CORE_METRIC_MOVED rows" + +# ---- 4. Persistence: restart and confirm the post-CRUD set survives -------- + +log "=== 4. Restart rustbgpd; the runtime CRUD must have persisted to TOML ===" +# The config bind is read-write, and the daemon was started with --config, so +# SetFibTable persisted the new set. After a restart the daemon must come back +# with BOTH edge(1000/200) and core(2000/350). +if docker exec "$RUSTBGPD" grep -q "table_id = $CORE_ID" /etc/rustbgpd/config.toml; then + ok "core table persisted into config.toml" +else + fail "config.toml does not contain the runtime-added core table"; dump_state_on_failure; exit 1 +fi +log "Stopping rustbgpd..." +docker exec "$RUSTBGPD" sh -c 'kill -TERM "$(pidof rustbgpd)"' +for i in $(seq 1 30); do + docker exec "$RUSTBGPD" sh -c 'pidof rustbgpd >/dev/null 2>&1' || break + sleep 1 +done +start_rustbgpd +wait_frr_established "$FRR" "10.0.0.1" "rustbgpd ↔ FRR (after restart)" +wait_kernel_owned "$PREFIX_A" "$EDGE_ID" "$EDGE_METRIC" +wait_kernel_owned "$PREFIX_A" "$CORE_ID" "$CORE_METRIC_MOVED" +if [ "$(fib_table_count)" = "2" ]; then + ok "post-restart ListFibTables reports both persisted tables" +else + fail "expected 2 tables after restart, got $(fib_table_count)"; dump_state_on_failure; exit 1 +fi + +# ---- 5. DeleteFibTable withdraws only its rows ------------------------------ + +log "=== 5. DeleteFibTable 'core' withdraws its rows; edge untouched ===" +del_resp=$(delete_fib_table "core") +assert_set_ack "DeleteFibTable core" "$del_resp" +wait_kernel_absent "$PREFIX_A" "$CORE_ID" +wait_kernel_absent "$PREFIX_B" "$CORE_ID" +# edge table must still hold both prefixes. +wait_kernel_owned "$PREFIX_A" "$EDGE_ID" "$EDGE_METRIC" +wait_kernel_owned "$PREFIX_B" "$EDGE_ID" "$EDGE_METRIC" +if [ "$(fib_table_count)" = "1" ]; then + ok "ListFibTables back to 1 table after delete" +else + fail "expected 1 table after delete, got $(fib_table_count)"; dump_state_on_failure; exit 1 +fi + +# ---- 6. DeleteFibTable on a missing name is NOT_FOUND ----------------------- + +log "=== 6. DeleteFibTable on a missing name returns NOT_FOUND ===" +if delete_fib_table "does-not-exist" >/dev/null 2>&1; then + fail "deleting a missing table unexpectedly succeeded"; dump_state_on_failure; exit 1 +else + ok "delete of missing table rejected as expected" +fi + +print_summary