From 2576c34b76d382a76c9d9e09a71518b010a7f5f5 Mon Sep 17 00:00:00 2001 From: Zeeshan Lakhani Date: Tue, 10 Mar 2026 08:25:04 +0000 Subject: [PATCH] [multicast] Remove recovery path and update to latest dendrite This incorporates https://github.com/oxidecomputer/dendrite/pull/232 and https://github.com/oxidecomputer/dendrite/pull/195. Note: No API changes here. --- Cargo.lock | 14 +-- Cargo.toml | 2 +- nexus/src/app/multicast/dataplane.rs | 126 ++++----------------------- package-manifest.toml | 12 +-- tools/dendrite_stub_checksums | 4 +- tools/dendrite_version | 2 +- 6 files changed, 32 insertions(+), 128 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 31e144f33ec..4237389d467 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1718,7 +1718,7 @@ dependencies = [ [[package]] name = "common" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/dendrite?rev=c0bf0a3b536baab0393c96fec3204b32e4f9368b#c0bf0a3b536baab0393c96fec3204b32e4f9368b" +source = "git+https://github.com/oxidecomputer/dendrite?rev=b31ba90d14636ef6bba9dbb56d756efa249d4b4e#b31ba90d14636ef6bba9dbb56d756efa249d4b4e" dependencies = [ "anyhow", "chrono", @@ -3047,11 +3047,11 @@ dependencies = [ [[package]] name = "dpd-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/dendrite?rev=c0bf0a3b536baab0393c96fec3204b32e4f9368b#c0bf0a3b536baab0393c96fec3204b32e4f9368b" +source = "git+https://github.com/oxidecomputer/dendrite?rev=b31ba90d14636ef6bba9dbb56d756efa249d4b4e#b31ba90d14636ef6bba9dbb56d756efa249d4b4e" dependencies = [ "async-trait", "chrono", - "common 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=c0bf0a3b536baab0393c96fec3204b32e4f9368b)", + "common 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=b31ba90d14636ef6bba9dbb56d756efa249d4b4e)", "crc8", "futures", "http", @@ -7537,7 +7537,7 @@ dependencies = [ "crucible-agent-client", "dns-server", "dns-service-client", - "dpd-client 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=c0bf0a3b536baab0393c96fec3204b32e4f9368b)", + "dpd-client 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=b31ba90d14636ef6bba9dbb56d756efa249d4b4e)", "dropshot", "futures", "gateway-messages", @@ -8488,7 +8488,7 @@ dependencies = [ "display-error-chain", "dns-server", "dns-service-client", - "dpd-client 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=c0bf0a3b536baab0393c96fec3204b32e4f9368b)", + "dpd-client 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=b31ba90d14636ef6bba9dbb56d756efa249d4b4e)", "dropshot", "ereport-types", "expectorate", @@ -16569,7 +16569,7 @@ name = "wicket-common" version = "0.1.0" dependencies = [ "anyhow", - "dpd-client 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=c0bf0a3b536baab0393c96fec3204b32e4f9368b)", + "dpd-client 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=b31ba90d14636ef6bba9dbb56d756efa249d4b4e)", "dropshot", "gateway-client", "gateway-types", @@ -16631,7 +16631,7 @@ dependencies = [ "clap", "debug-ignore", "display-error-chain", - "dpd-client 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=c0bf0a3b536baab0393c96fec3204b32e4f9368b)", + "dpd-client 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=b31ba90d14636ef6bba9dbb56d756efa249d4b4e)", "dropshot", "either", "expectorate", diff --git a/Cargo.toml b/Cargo.toml index 17e9dd6acf4..abd81f5d501 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -477,7 +477,7 @@ digest = "0.10.7" dns-server = { path = "dns-server" } dns-server-api = { path = "dns-server-api" } dns-service-client = { path = "clients/dns-service-client" } -dpd-client = { git = "https://github.com/oxidecomputer/dendrite", rev = "c0bf0a3b536baab0393c96fec3204b32e4f9368b" } +dpd-client = { git = "https://github.com/oxidecomputer/dendrite", rev = "b31ba90d14636ef6bba9dbb56d756efa249d4b4e" } dropshot = { version = "0.16.6", features = [ "usdt-probes" ] } dropshot-api-manager = "0.5.2" dropshot-api-manager-types = "0.5.2" diff --git a/nexus/src/app/multicast/dataplane.rs b/nexus/src/app/multicast/dataplane.rs index 4dc55ba5e11..73c03d8874f 100644 --- a/nexus/src/app/multicast/dataplane.rs +++ b/nexus/src/app/multicast/dataplane.rs @@ -43,7 +43,7 @@ use std::net::IpAddr; use futures::future::try_join_all; use oxnet::MulticastMac; -use slog::{Logger, debug, error, info, warn}; +use slog::{Logger, debug, error, info}; use dpd_client::Error as DpdError; use dpd_client::types::{ @@ -927,116 +927,20 @@ impl MulticastDataplaneClient { &underlay_ip_admin, &tag, &update_entry) .await; - match update_res { - Ok(_) => {} - Err(DpdError::ErrorResponse(ref resp)) - if resp.status() == reqwest::StatusCode::NOT_FOUND - || resp.status() - == reqwest::StatusCode::INTERNAL_SERVER_ERROR => - { - // 404: Group disappeared (race or external cleanup) - // 500: ASIC state inconsistent with DPD DB - // - // In both cases, delete and recreate with the updated members. - info!( - log, - "underlay update failed, attempting delete+recreate"; - "underlay_ip" => %underlay_ip, - "switch" => %switch_slot, - "operation" => %operation_name, - "status" => %resp.status(), - "dpd_operation" => "modify_group_membership_recreate" - ); - - // TODO: this `reset_by_tag` fallback can be removed - // once DPD's `modify_group_internal` calls - // `process_membership_changes` in the - // empty-transition arm, preventing the 500 that - // triggers this recovery path. - // See https://github.com/oxidecomputer/dendrite/pull/232 - // - // Try to delete the stale underlay group. If this - // fails because the underlay group is still - // referenced by an external group via NAT target, - // fall back to `reset_by_tag`, which deletes - // external groups first so the ASIC state is clean - // for the next reconciler pass. - if let Err(del_err) = client - .multicast_group_delete(&underlay_ip, &tag) - .await - { - warn!( - log, - "underlay delete failed, resetting all \ - groups by tag for clean ASIC state"; - "underlay_ip" => %underlay_ip, - "switch" => %switch_slot, - "delete_error" => %del_err, - "dpd_operation" => "modify_group_membership_recreate" - ); - - if let Err(reset_err) = client - .multicast_reset_by_tag(&tag) - .await - { - error!( - log, - "tag reset also failed during recovery"; - "underlay_ip" => %underlay_ip, - "switch" => %switch_slot, - "error" => %reset_err, - "dpd_operation" => "modify_group_membership_recreate" - ); - } - - // Return error so the reconciler retries. - // Drift correction will recreate the groups - // with clean ASIC state on the next pass. - return Err(Error::internal_error(&format!( - "underlay group recovery on {switch_slot}: \ - reset by tag after delete failed ({del_err})" - ))); - } - - // Recreate with the updated members - let create_entry = MulticastGroupCreateUnderlayEntry { - group_ip: underlay_ip_admin.clone(), - members: update_entry.members, - tag: underlay_group.tag.clone(), - }; - - client - .multicast_group_create_underlay(&create_entry) - .await - .map_err(|e| { - error!( - log, - "underlay recreate with members failed"; - "underlay_ip" => %underlay_ip, - "switch" => %switch_slot, - "error" => %e, - "dpd_operation" => "modify_group_membership_recreate" - ); - Error::internal_error(&format!( - "underlay recreate with members failed on {switch_slot}: {e}" - )) - })?; - } - Err(e) => { - error!( - log, - "underlay member modify failed"; - "operation_name" => %operation_name, - "underlay_ip" => %underlay_ip, - "switch" => %switch_slot, - "error" => %e, - "dpd_operation" => "modify_group_membership_update" - ); - return Err(Error::internal_error(&format!( - "underlay member modify failed on {switch_slot}: {e}" - ))); - } - } + update_res.map_err(|e| { + error!( + log, + "underlay member modify failed"; + "operation_name" => %operation_name, + "underlay_ip" => %underlay_ip, + "switch" => %switch_slot, + "error" => %e, + "dpd_operation" => "modify_group_membership_update" + ); + Error::internal_error(&format!( + "underlay member modify failed on {switch_slot}: {e}" + )) + })?; info!( log, diff --git a/package-manifest.toml b/package-manifest.toml index 7a911a23824..1a3ff61be6a 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -748,8 +748,8 @@ only_for_targets.image = "standard" # the other `source.*` keys. source.type = "prebuilt" source.repo = "dendrite" -source.commit = "c0bf0a3b536baab0393c96fec3204b32e4f9368b" -source.sha256 = "28a7670be35a68ff3e52b3e5e57f4fa6b8465e3ec8a21314b09381fb213d3a3e" +source.commit = "b31ba90d14636ef6bba9dbb56d756efa249d4b4e" +source.sha256 = "31a8e71c9b83a1739738f715d41d1b254e99030d8dacb5632ff703e47a05a4a5" output.type = "zone" output.intermediate_only = true @@ -775,8 +775,8 @@ only_for_targets.image = "standard" # the other `source.*` keys. source.type = "prebuilt" source.repo = "dendrite" -source.commit = "c0bf0a3b536baab0393c96fec3204b32e4f9368b" -source.sha256 = "8070af15f48316ace21e6bb339ba4a1fb2219256a9a017cdbe85505f66aa08ad" +source.commit = "b31ba90d14636ef6bba9dbb56d756efa249d4b4e" +source.sha256 = "d86081a688ccb59df2439b4f3ee224b84f095de7a09cac8613ba75ed0ef41d56" output.type = "zone" output.intermediate_only = true @@ -795,8 +795,8 @@ only_for_targets.image = "standard" # the other `source.*` keys. source.type = "prebuilt" source.repo = "dendrite" -source.commit = "c0bf0a3b536baab0393c96fec3204b32e4f9368b" -source.sha256 = "69018db9c5f2d8878a8bc8dec26049679e8ece0e7d66e742c2e573c3484f8c29" +source.commit = "b31ba90d14636ef6bba9dbb56d756efa249d4b4e" +source.sha256 = "4eb8ad0a9e6564858716db9170e449d9d427c7427ee5241b13f72213b5eaf5a4" output.type = "zone" output.intermediate_only = true diff --git a/tools/dendrite_stub_checksums b/tools/dendrite_stub_checksums index a02ab52969c..e4583ea0cf7 100644 --- a/tools/dendrite_stub_checksums +++ b/tools/dendrite_stub_checksums @@ -1,3 +1,3 @@ -CIDL_SHA256_ILLUMOS="28a7670be35a68ff3e52b3e5e57f4fa6b8465e3ec8a21314b09381fb213d3a3e" -CIDL_SHA256_LINUX_DPD="c68ea190f27e9526aa1fba1cc9d1416778d536c805c53c469ee62f603cc433fb" +CIDL_SHA256_ILLUMOS="31a8e71c9b83a1739738f715d41d1b254e99030d8dacb5632ff703e47a05a4a5" +CIDL_SHA256_LINUX_DPD="ab6a66ec4e6571b020496cf2a5abf2107416951e93ec7c6a1d0ca708697ee718" CIDL_SHA256_LINUX_SWADM="a2826dbdb9b3001cf35756e6ae77dbc9f43a5732932528a0b7cf482d0e0cb237" diff --git a/tools/dendrite_version b/tools/dendrite_version index 55fbd266038..37b1957d560 100644 --- a/tools/dendrite_version +++ b/tools/dendrite_version @@ -1 +1 @@ -COMMIT="c0bf0a3b536baab0393c96fec3204b32e4f9368b" +COMMIT="b31ba90d14636ef6bba9dbb56d756efa249d4b4e"