From af1090cd3b1d6114ff7152c0d8de1b5d1a15e135 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Thu, 5 Mar 2026 11:41:53 -0500 Subject: [PATCH 1/3] lookup MGD ports in DNS --- nexus/src/app/mod.rs | 45 +++++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index a33a9ef4b65..6b9d7590262 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -43,6 +43,7 @@ use sled_agent_types::early_networking::SwitchLocation; use slog::Logger; use slog_error_chain::InlineErrorChain; use std::collections::HashMap; +use std::net::SocketAddr; use std::net::SocketAddrV6; use std::net::{IpAddr, Ipv6Addr}; use std::num::NonZeroU32; @@ -1159,19 +1160,41 @@ impl Nexus { let resolver = self.resolver(); let mappings = switch_zone_address_mappings(resolver, &self.log).await?; - let mut clients: Vec<(SwitchLocation, mg_admin_client::Client)> = - vec![]; - for (location, addr) in &mappings { - let port = MGD_PORT; - let socketaddr = - std::net::SocketAddr::V6(SocketAddrV6::new(*addr, port, 0, 0)); - let client = mg_admin_client::Client::new( - format!("http://{}", socketaddr).as_str(), - self.log.clone(), + let mgd_addrs = resolver + .lookup_all_socket_v6(ServiceName::Mgd) + .await + .map_err(|err| { + format!( + "failed to resolve mgd in DNS: {}", + InlineErrorChain::new(&err) + ) + })?; + let mut clients = HashMap::new(); + for (location, ip) in mappings { + let addr = + match mgd_addrs.iter().copied().find(|addr| *addr.ip() == ip) { + Some(addr) => SocketAddr::V6(addr), + None => { + warn!( + self.log, + "no MGD DNS entry found matching switch location \ + IP address; assuming default port"; + "switch-location" => ?location, + "switch-ip" => %ip, + "mgd-dns-entries" => ?mgd_addrs, + ); + SocketAddr::V6(SocketAddrV6::new(ip, MGD_PORT, 0, 0)) + } + }; + clients.insert( + location, + mg_admin_client::Client::new( + &format!("http://{addr}"), + self.log.clone(), + ), ); - clients.push((*location, client)); } - Ok(clients.into_iter().collect::>()) + Ok(clients) } pub(crate) fn demo_sagas( From ae54bbc5f753dd30d72c974bf7930e4c34e9707d Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Thu, 5 Mar 2026 11:42:11 -0500 Subject: [PATCH 2/3] add basic "get bfd status" test (currently failing) --- nexus/tests/integration_tests/bfd.rs | 30 ++++++++++++++++++++++++++++ nexus/tests/integration_tests/mod.rs | 1 + 2 files changed, 31 insertions(+) create mode 100644 nexus/tests/integration_tests/bfd.rs diff --git a/nexus/tests/integration_tests/bfd.rs b/nexus/tests/integration_tests/bfd.rs new file mode 100644 index 00000000000..c66d634ed69 --- /dev/null +++ b/nexus/tests/integration_tests/bfd.rs @@ -0,0 +1,30 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Tests BFD support in the API + +use nexus_test_utils::http_testing::AuthnMode; +use nexus_test_utils::http_testing::NexusRequest; +use nexus_test_utils_macros::nexus_test; +use nexus_types::external_api::bfd::BfdStatus; + +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + +const STATUS_URL: &str = "/v1/system/networking/bfd-status"; + +#[nexus_test] +async fn test_empty_bfd_status(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + + let status = NexusRequest::object_get(client, STATUS_URL) + .authn_as(AuthnMode::PrivilegedUser) + .execute_and_parse_unwrap::>() + .await; + + // `#[nexus_test]` doesn't set up BFD, so we should have no status. But we + // should still be able to ask for that! (#[nexus_test] also only sets up + // one fake scrimlet - that used to cause this endpoint to fail.) + assert_eq!(status, Vec::new()); +} diff --git a/nexus/tests/integration_tests/mod.rs b/nexus/tests/integration_tests/mod.rs index 85407afa5c4..64507774dc5 100644 --- a/nexus/tests/integration_tests/mod.rs +++ b/nexus/tests/integration_tests/mod.rs @@ -14,6 +14,7 @@ mod audit_log; mod authn_http; mod authz; mod basic; +mod bfd; mod certificates; mod cockroach; mod commands; From bd7932447a4eeffcb5344add7d3892f87f497e91 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Thu, 5 Mar 2026 11:50:23 -0500 Subject: [PATCH 3/3] bfd status no longer 404s if one switch is missing --- nexus/src/app/bfd.rs | 34 +++++++++++----------------------- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/nexus/src/app/bfd.rs b/nexus/src/app/bfd.rs index 50f6f062a38..5ad30bed0a6 100644 --- a/nexus/src/app/bfd.rs +++ b/nexus/src/app/bfd.rs @@ -11,28 +11,6 @@ use sled_agent_types::early_networking::BfdMode; use sled_agent_types::early_networking::SwitchLocation; impl super::Nexus { - async fn mg_client_for_switch_location( - &self, - switch: SwitchLocation, - ) -> Result { - let mg_client: mg_admin_client::Client = self - .mg_clients() - .await - .map_err(|e| { - Error::internal_error(&format!("failed to get mg clients: {e}")) - })? - .get(&switch) - .ok_or_else(|| { - Error::not_found_by_name( - omicron_common::api::external::ResourceType::Switch, - &switch.to_string().parse().unwrap(), - ) - })? - .clone(); - - Ok(mg_client) - } - pub async fn bfd_enable( &self, opctx: &OpContext, @@ -69,9 +47,19 @@ impl super::Nexus { ) -> Result, Error> { // ask each rack switch about all its BFD sessions. This will need to // be updated for multirack. + let mg_clients = self.mg_clients().await.map_err(|err| { + Error::internal_error(&format!("failed to get mg clients: {err}")) + })?; let mut result = Vec::new(); for s in &[SwitchLocation::Switch0, SwitchLocation::Switch1] { - let mg_client = self.mg_client_for_switch_location(*s).await?; + // If we only have one scrimlet, we won't have an entry in + // `mg_clients` for one of the switch locations. Log that, but + // continue so we can still report status from whichever switch we + // do have. + let Some(mg_client) = mg_clients.get(s) else { + warn!(self.log, "no mgd client found for switch location {s}"); + continue; + }; let status = mg_client .get_bfd_peers() .await