Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,7 @@ digest = "0.10.7"
dns-server = { path = "dns-server" }
dns-server-api = { path = "dns-server-api" }
dns-service-client = { path = "clients/dns-service-client" }
dpd-client = { git = "https://github.com/oxidecomputer/dendrite", rev = "c0bf0a3b536baab0393c96fec3204b32e4f9368b" }
dpd-client = { git = "https://github.com/oxidecomputer/dendrite", rev = "b31ba90d14636ef6bba9dbb56d756efa249d4b4e" }
dropshot = { version = "0.16.6", features = [ "usdt-probes" ] }
dropshot-api-manager = "0.6.0"
dropshot-api-manager-types = "0.6.0"
Expand Down
126 changes: 15 additions & 111 deletions nexus/src/app/multicast/dataplane.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ use std::net::IpAddr;

use futures::future::try_join_all;
use oxnet::MulticastMac;
use slog::{Logger, debug, error, info, warn};
use slog::{Logger, debug, error, info};

use dpd_client::Error as DpdError;
use dpd_client::types::{
Expand Down Expand Up @@ -927,116 +927,20 @@ impl MulticastDataplaneClient {
&underlay_ip_admin, &tag, &update_entry)
.await;

match update_res {
Ok(_) => {}
Err(DpdError::ErrorResponse(ref resp))
if resp.status() == reqwest::StatusCode::NOT_FOUND
|| resp.status()
== reqwest::StatusCode::INTERNAL_SERVER_ERROR =>
{
// 404: Group disappeared (race or external cleanup)
// 500: ASIC state inconsistent with DPD DB
//
// In both cases, delete and recreate with the updated members.
info!(
log,
"underlay update failed, attempting delete+recreate";
"underlay_ip" => %underlay_ip,
"switch" => ?switch_slot,
"operation" => %operation_name,
"status" => %resp.status(),
"dpd_operation" => "modify_group_membership_recreate"
);

// TODO: this `reset_by_tag` fallback can be removed
// once DPD's `modify_group_internal` calls
// `process_membership_changes` in the
// empty-transition arm, preventing the 500 that
// triggers this recovery path.
// See https://github.com/oxidecomputer/dendrite/pull/232
//
// Try to delete the stale underlay group. If this
// fails because the underlay group is still
// referenced by an external group via NAT target,
// fall back to `reset_by_tag`, which deletes
// external groups first so the ASIC state is clean
// for the next reconciler pass.
if let Err(del_err) = client
.multicast_group_delete(&underlay_ip, &tag)
.await
{
warn!(
log,
"underlay delete failed, resetting all \
groups by tag for clean ASIC state";
"underlay_ip" => %underlay_ip,
"switch" => ?switch_slot,
"delete_error" => %del_err,
"dpd_operation" => "modify_group_membership_recreate"
);

if let Err(reset_err) = client
.multicast_reset_by_tag(&tag)
.await
{
error!(
log,
"tag reset also failed during recovery";
"underlay_ip" => %underlay_ip,
"switch" => ?switch_slot,
"error" => %reset_err,
"dpd_operation" => "modify_group_membership_recreate"
);
}

// Return error so the reconciler retries.
// Drift correction will recreate the groups
// with clean ASIC state on the next pass.
return Err(Error::internal_error(&format!(
"underlay group recovery on {switch_slot:?}: \
reset by tag after delete failed ({del_err})"
)));
}

// Recreate with the updated members
let create_entry = MulticastGroupCreateUnderlayEntry {
group_ip: underlay_ip_admin.clone(),
members: update_entry.members,
tag: underlay_group.tag.clone(),
};

client
.multicast_group_create_underlay(&create_entry)
.await
.map_err(|e| {
error!(
log,
"underlay recreate with members failed";
"underlay_ip" => %underlay_ip,
"switch" => ?switch_slot,
"error" => %e,
"dpd_operation" => "modify_group_membership_recreate"
);
Error::internal_error(&format!(
"underlay recreate with members failed on {switch_slot:?}: {e}"
))
})?;
}
Err(e) => {
error!(
log,
"underlay member modify failed";
"operation_name" => %operation_name,
"underlay_ip" => %underlay_ip,
"switch" => ?switch_slot,
"error" => %e,
"dpd_operation" => "modify_group_membership_update"
);
return Err(Error::internal_error(&format!(
"underlay member modify failed on {switch_slot:?}: {e}"
)));
}
}
update_res.map_err(|e| {
error!(
log,
"underlay member modify failed";
"operation_name" => %operation_name,
"underlay_ip" => %underlay_ip,
"switch" => ?switch_slot,
"error" => %e,
"dpd_operation" => "modify_group_membership_update"
);
Error::internal_error(&format!(
"underlay member modify failed on {switch_slot:?}: {e}"
))
})?;

info!(
log,
Expand Down
12 changes: 6 additions & 6 deletions package-manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -748,8 +748,8 @@ only_for_targets.image = "standard"
# the other `source.*` keys.
source.type = "prebuilt"
source.repo = "dendrite"
source.commit = "c0bf0a3b536baab0393c96fec3204b32e4f9368b"
source.sha256 = "28a7670be35a68ff3e52b3e5e57f4fa6b8465e3ec8a21314b09381fb213d3a3e"
source.commit = "b31ba90d14636ef6bba9dbb56d756efa249d4b4e"
source.sha256 = "31a8e71c9b83a1739738f715d41d1b254e99030d8dacb5632ff703e47a05a4a5"
output.type = "zone"
output.intermediate_only = true

Expand All @@ -775,8 +775,8 @@ only_for_targets.image = "standard"
# the other `source.*` keys.
source.type = "prebuilt"
source.repo = "dendrite"
source.commit = "c0bf0a3b536baab0393c96fec3204b32e4f9368b"
source.sha256 = "8070af15f48316ace21e6bb339ba4a1fb2219256a9a017cdbe85505f66aa08ad"
source.commit = "b31ba90d14636ef6bba9dbb56d756efa249d4b4e"
source.sha256 = "d86081a688ccb59df2439b4f3ee224b84f095de7a09cac8613ba75ed0ef41d56"
output.type = "zone"
output.intermediate_only = true

Expand All @@ -795,8 +795,8 @@ only_for_targets.image = "standard"
# the other `source.*` keys.
source.type = "prebuilt"
source.repo = "dendrite"
source.commit = "c0bf0a3b536baab0393c96fec3204b32e4f9368b"
source.sha256 = "69018db9c5f2d8878a8bc8dec26049679e8ece0e7d66e742c2e573c3484f8c29"
source.commit = "b31ba90d14636ef6bba9dbb56d756efa249d4b4e"
source.sha256 = "4eb8ad0a9e6564858716db9170e449d9d427c7427ee5241b13f72213b5eaf5a4"
output.type = "zone"
output.intermediate_only = true

Expand Down
4 changes: 2 additions & 2 deletions tools/dendrite_stub_checksums
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
CIDL_SHA256_ILLUMOS="28a7670be35a68ff3e52b3e5e57f4fa6b8465e3ec8a21314b09381fb213d3a3e"
CIDL_SHA256_LINUX_DPD="c68ea190f27e9526aa1fba1cc9d1416778d536c805c53c469ee62f603cc433fb"
CIDL_SHA256_ILLUMOS="31a8e71c9b83a1739738f715d41d1b254e99030d8dacb5632ff703e47a05a4a5"
CIDL_SHA256_LINUX_DPD="ab6a66ec4e6571b020496cf2a5abf2107416951e93ec7c6a1d0ca708697ee718"
CIDL_SHA256_LINUX_SWADM="a2826dbdb9b3001cf35756e6ae77dbc9f43a5732932528a0b7cf482d0e0cb237"
2 changes: 1 addition & 1 deletion tools/dendrite_version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
COMMIT="c0bf0a3b536baab0393c96fec3204b32e4f9368b"
COMMIT="b31ba90d14636ef6bba9dbb56d756efa249d4b4e"
Loading