diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs index 5fd33dcf15a..7a6d40e6bb2 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs @@ -53,7 +53,6 @@ use nexus_types::deployment::PlanningInput; use nexus_types::deployment::SledFilter; use nexus_types::deployment::SledResources; use nexus_types::deployment::TufRepoContentsError; -use nexus_types::deployment::ZpoolFilter; use nexus_types::deployment::ZpoolName; use nexus_types::deployment::blueprint_zone_type; use nexus_types::external_api::views::SledState; @@ -512,15 +511,15 @@ pub struct BlueprintBuilder<'a> { /// The ID that the completed blueprint will have new_blueprint_id: BlueprintUuid, - // These fields are used to allocate resources for sleds. - input: &'a PlanningInput, - // These fields will become part of the final blueprint. See the // corresponding fields in `Blueprint`. sled_editors: BTreeMap, cockroachdb_setting_preserve_downgrade: CockroachDbPreserveDowngrade, + cockroachdb_fingerprint: String, target_release_minimum_generation: Generation, nexus_generation: Generation, + internal_dns_version: Generation, + external_dns_version: Generation, creator: String, operations: Vec, @@ -624,17 +623,19 @@ impl<'a> BlueprintBuilder<'a> { let editor = match state { SledState::Active => { - let subnet = input + let details = input .sled_lookup(SledFilter::Commissioned, *sled_id) .with_context(|| { format!( "failed to find sled details for \ active sled in parent blueprint {sled_id}" ) - })? - .resources - .subnet; - SledEditor::for_existing_active(subnet, sled_cfg.clone()) + })?; + SledEditor::for_existing_active( + Arc::new(details.baseboard_id.clone()), + details.resources.subnet, + sled_cfg.clone(), + ) } SledState::Decommissioned => { SledEditor::for_existing_decommissioned(sled_cfg.clone()) @@ -652,6 +653,7 @@ impl<'a> BlueprintBuilder<'a> { for (sled_id, details) in input.all_sleds(SledFilter::Commissioned) { if let Entry::Vacant(slot) = sled_editors.entry(sled_id) { slot.insert(SledEditor::for_new_active( + Arc::new(details.baseboard_id.clone()), details.resources.subnet, )); } @@ -672,14 +674,19 @@ impl<'a> BlueprintBuilder<'a> { .clone(), oximeter_read_policy, new_blueprint_id: rng.next_blueprint(), - input, sled_editors, cockroachdb_setting_preserve_downgrade: parent_blueprint .cockroachdb_setting_preserve_downgrade, + cockroachdb_fingerprint: input + .cockroachdb_settings() + .state_fingerprint + .clone(), pending_mgs_updates: parent_blueprint.pending_mgs_updates.clone(), target_release_minimum_generation: parent_blueprint .target_release_minimum_generation, nexus_generation: parent_blueprint.nexus_generation, + internal_dns_version: input.internal_dns_version(), + external_dns_version: input.external_dns_version(), creator: creator.to_owned(), operations: Vec::new(), comments: Vec::new(), @@ -738,9 +745,9 @@ impl<'a> BlueprintBuilder<'a> { // multirack (either DNS will be on a wider subnet or we need to pick a // particular rack subnet here?). let any_sled_subnet = self - .input - .all_sled_resources(SledFilter::Commissioned) - .map(|(_sled_id, resources)| resources.subnet) + .sled_editors + .values() + .filter_map(|editor| editor.subnet()) .next() .ok_or(Error::RackSubnetUnknownNoSleds)?; let rack_subnet = ReservedRackSubnet::from_subnet(any_sled_subnet); @@ -764,10 +771,6 @@ impl<'a> BlueprintBuilder<'a> { })) } - pub fn planning_input(&self) -> &PlanningInput { - &self.input - } - /// Iterates over the list of sled IDs for which we have zones. /// /// This may include decommissioned sleds. @@ -879,19 +882,14 @@ impl<'a> BlueprintBuilder<'a> { sleds, pending_mgs_updates: self.pending_mgs_updates, parent_blueprint_id: Some(self.parent_blueprint.id), - internal_dns_version: self.input.internal_dns_version(), - external_dns_version: self.input.external_dns_version(), + internal_dns_version: self.internal_dns_version, + external_dns_version: self.external_dns_version, target_release_minimum_generation: self .target_release_minimum_generation, nexus_generation: self.nexus_generation, - cockroachdb_fingerprint: self - .input - .cockroachdb_settings() - .state_fingerprint - .clone(), + cockroachdb_fingerprint: self.cockroachdb_fingerprint, cockroachdb_setting_preserve_downgrade: self .cockroachdb_setting_preserve_downgrade, - clickhouse_cluster_config: self.clickhouse_cluster_config, oximeter_read_version: self.oximeter_read_policy.version, oximeter_read_mode: self.oximeter_read_policy.mode, @@ -1315,14 +1313,16 @@ impl<'a> BlueprintBuilder<'a> { "tried to ensure mupdate override for unknown sled {sled_id}" )) })?; + let baseboard_id = editor.baseboard_id().ok_or_else(|| { + // All commissioned sleds have baseboards; this should never fail. + Error::Planner(anyhow!( + "tried to ensure mupdate override for \ + decommissioned sled {sled_id}" + )) + })?; // Also map the editor to the corresponding PendingMgsUpdates. - let sled_details = self - .input - .sled_lookup(SledFilter::InService, sled_id) - .map_err(|error| Error::Planner(anyhow!(error)))?; - let pending_mgs_update = - self.pending_mgs_updates.entry(&sled_details.baseboard_id); + let pending_mgs_update = self.pending_mgs_updates.entry(baseboard_id); let noop_sled_info = noop_info.sled_info_mut(sled_id)?; editor @@ -1492,14 +1492,14 @@ impl<'a> BlueprintBuilder<'a> { image_source: BlueprintZoneImageSource, ) -> Result { let pool_name = ZpoolName::new_external(zpool_id); + let editor = self.sled_editors.get(&sled_id).ok_or_else(|| { + Error::Planner(anyhow!( + "tried to ensure crucible zone for unknown sled {sled_id}" + )) + })?; // If this sled already has a Crucible zone on this pool, do nothing. - let has_crucible_on_this_pool = { - let editor = self.sled_editors.get(&sled_id).ok_or_else(|| { - Error::Planner(anyhow!( - "tried to ensure crucible zone for unknown sled {sled_id}" - )) - })?; + let has_crucible_on_this_pool = editor.zones(BlueprintZoneDisposition::is_in_service).any(|z| { matches!( &z.zone_type, @@ -1509,17 +1509,19 @@ impl<'a> BlueprintBuilder<'a> { }) if dataset.pool_name == pool_name ) - }) - }; + }); if has_crucible_on_this_pool { return Ok(Ensure::NotNeeded); } - let sled_info = self.sled_resources(sled_id)?; - if !sled_info.zpools.contains_key(&zpool_id) { + // Double-check that our caller didn't pass a bad sled/zpool combo. + if !editor + .disks(BlueprintPhysicalDiskDisposition::is_in_service) + .any(|disk| disk.pool_id == zpool_id) + { return Err(Error::Planner(anyhow!( "adding crucible zone for sled {:?}: \ - attempted to use unknown zpool {:?}", + attempted to use unknown zpool {:?}", sled_id, pool_name ))); @@ -2114,17 +2116,12 @@ impl<'a> BlueprintBuilder<'a> { )) })?; - // We'll check both the disks available to this sled per our current - // blueprint and the list of all in-service zpools on this sled per our - // planning input, and only pick zpools that are available in both. - let current_sled_disks = editor + // Only choose from zpools that are in-service. + let in_service_zpools = editor .disks(BlueprintPhysicalDiskDisposition::is_in_service) .map(|disk_config| disk_config.pool_id) .collect::>(); - let all_in_service_zpools = - self.sled_resources(sled_id)?.all_zpools(ZpoolFilter::InService); - // We refuse to choose a zpool for a zone of a given `zone_kind` if this // sled already has a durable zone of that kind on the same zpool. Build // up a set of invalid zpools for this sled/kind pair. @@ -2142,31 +2139,14 @@ impl<'a> BlueprintBuilder<'a> { skip_zpools.insert(&zone_config.filesystem_pool); } - for &zpool_id in all_in_service_zpools { + for zpool_id in in_service_zpools { let zpool_name = ZpoolName::new_external(zpool_id); - if !skip_zpools.contains(&zpool_name) - && current_sled_disks.contains(&zpool_id) - { + if !skip_zpools.contains(&zpool_name) { return Ok(zpool_name); } } - Err(Error::NoAvailableZpool { sled_id, kind: zone_kind }) - } - /// Returns the resources for a sled that hasn't been decommissioned. - fn sled_resources( - &self, - sled_id: SledUuid, - ) -> Result<&'a SledResources, Error> { - let details = self - .input - .sled_lookup(SledFilter::Commissioned, sled_id) - .map_err(|error| { - Error::Planner(anyhow!(error).context(format!( - "for sled {sled_id}, error looking up resources" - ))) - })?; - Ok(&details.resources) + Err(Error::NoAvailableZpool { sled_id, kind: zone_kind }) } /// Determine the number of desired external DNS zones by counting diff --git a/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor.rs b/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor.rs index 2b675ce08d2..757c1f136aa 100644 --- a/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor.rs +++ b/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor.rs @@ -36,6 +36,7 @@ use nexus_types::deployment::BlueprintZoneType; use nexus_types::deployment::PendingMgsUpdate; use nexus_types::deployment::blueprint_zone_type; use nexus_types::external_api::views::SledState; +use nexus_types::inventory::BaseboardId; use omicron_common::address::Ipv6Subnet; use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::Generation; @@ -50,6 +51,7 @@ use scalar::ScalarEditor; use std::iter; use std::mem; use std::net::Ipv6Addr; +use std::sync::Arc; use underlay_ip_allocator::SledUnderlayIpAllocator; mod datasets; @@ -148,6 +150,7 @@ pub enum SledEditError { pub(crate) struct SledEditor(InnerSledEditor); #[derive(Debug)] +#[allow(clippy::large_enum_variant)] enum InnerSledEditor { // Internally, `SledEditor` has a variant for each variant of `SledState`, // as the operations allowed in different states are substantially different @@ -159,6 +162,7 @@ enum InnerSledEditor { impl SledEditor { pub fn for_existing_active( + baseboard_id: Arc, subnet: Ipv6Subnet, config: BlueprintSledConfig, ) -> Result { @@ -167,7 +171,7 @@ impl SledEditor { SledState::Active, "for_existing_active called on non-active sled" ); - let inner = ActiveSledEditor::new(subnet, config)?; + let inner = ActiveSledEditor::new(baseboard_id, subnet, config)?; Ok(Self(InnerSledEditor::Active(inner))) } @@ -187,8 +191,14 @@ impl SledEditor { Ok(Self(InnerSledEditor::Decommissioned(inner))) } - pub fn for_new_active(subnet: Ipv6Subnet) -> Self { - Self(InnerSledEditor::Active(ActiveSledEditor::new_empty(subnet))) + pub fn for_new_active( + baseboard_id: Arc, + subnet: Ipv6Subnet, + ) -> Self { + Self(InnerSledEditor::Active(ActiveSledEditor::new_empty( + baseboard_id, + subnet, + ))) } pub fn finalize(self) -> EditedSled { @@ -205,6 +215,26 @@ impl SledEditor { } } + /// Returns the baseboard of this sled if it is active, or `None` if it is + /// decommissioned. + pub fn baseboard_id(&self) -> Option<&Arc> { + match &self.0 { + InnerSledEditor::Active(active) => Some(&active.baseboard_id), + InnerSledEditor::Decommissioned(_) => None, + } + } + + /// Returns the subnet of this sled if it is active, or `None` if it is + /// decommissioned. + pub fn subnet(&self) -> Option> { + match &self.0 { + InnerSledEditor::Active(active) => { + Some(active.underlay_ip_allocator.subnet()) + } + InnerSledEditor::Decommissioned(_) => None, + } + } + pub fn edit_counts(&self) -> SledEditCounts { match &self.0 { InnerSledEditor::Active(editor) => editor.edit_counts(), @@ -231,9 +261,10 @@ impl SledEditor { // below, we'll be left in the active state with an empty sled // editor), but omicron in general is not panic safe and aborts // on panic. Plus `finalize()` should never panic. - let mut stolen = ActiveSledEditor::new_empty(Ipv6Subnet::new( - Ipv6Addr::LOCALHOST, - )); + let mut stolen = ActiveSledEditor::new_empty( + Arc::clone(&editor.baseboard_id), + Ipv6Subnet::new(Ipv6Addr::LOCALHOST), + ); mem::swap(editor, &mut stolen); let mut finalized = stolen.finalize(); @@ -475,6 +506,7 @@ impl SledEditor { #[derive(Debug)] struct ActiveSledEditor { + baseboard_id: Arc, underlay_ip_allocator: SledUnderlayIpAllocator, incoming_sled_agent_generation: Generation, zones: ZonesEditor, @@ -494,6 +526,7 @@ pub(crate) struct EditedSled { impl ActiveSledEditor { pub fn new( + baseboard_id: Arc, subnet: Ipv6Subnet, config: BlueprintSledConfig, ) -> Result { @@ -508,6 +541,7 @@ impl ActiveSledEditor { zones.zones(BlueprintZoneDisposition::any).map(|z| z.underlay_ip()); Ok(Self { + baseboard_id, underlay_ip_allocator: SledUnderlayIpAllocator::new( subnet, zone_ips, ), @@ -523,7 +557,10 @@ impl ActiveSledEditor { }) } - pub fn new_empty(subnet: Ipv6Subnet) -> Self { + pub fn new_empty( + baseboard_id: Arc, + subnet: Ipv6Subnet, + ) -> Self { // Creating the underlay IP allocator can only fail if we have a zone // with an IP outside the sled subnet, but we don't have any zones at // all, so this can't fail. Match explicitly to guard against this error @@ -532,6 +569,7 @@ impl ActiveSledEditor { SledUnderlayIpAllocator::new(subnet, iter::empty()); Self { + baseboard_id, underlay_ip_allocator, incoming_sled_agent_generation: Generation::new(), zones: ZonesEditor::empty(), diff --git a/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor/underlay_ip_allocator.rs b/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor/underlay_ip_allocator.rs index 7aee5b08104..332746dd6a2 100644 --- a/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor/underlay_ip_allocator.rs +++ b/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor/underlay_ip_allocator.rs @@ -23,6 +23,7 @@ use std::net::Ipv6Addr; // general enough to use here, though this one could potentially be used there. #[derive(Debug)] pub(crate) struct SledUnderlayIpAllocator { + subnet: Ipv6Subnet, last: Ipv6Addr, maximum: Ipv6Addr, } @@ -55,7 +56,7 @@ impl SledUnderlayIpAllocator { assert!(sled_subnet.net().contains(minimum)); assert!(sled_subnet.net().contains(maximum)); - let mut slf = Self { last: minimum, maximum }; + let mut slf = Self { subnet: sled_subnet, last: minimum, maximum }; for ip in in_use_ips { slf.mark_as_allocated(ip); } @@ -65,6 +66,11 @@ impl SledUnderlayIpAllocator { slf } + /// Get the subnet used to create this allocator. + pub fn subnet(&self) -> Ipv6Subnet { + self.subnet + } + /// Mark an address as used. /// /// Marking an address that has already been handed out by this allocator diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index c607669c51d..54c7cf8dd41 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -207,7 +207,7 @@ impl<'a> Planner<'a> { let zone_safety_checks = ZoneSafetyChecks::new( &self.blueprint, &self.inventory, - self.input.internal_dns_version(), + &self.input, ); let mut noop_info = diff --git a/nexus/reconfigurator/planning/src/planner/zone_safety.rs b/nexus/reconfigurator/planning/src/planner/zone_safety.rs index 4020d25e7f2..2b565783cae 100644 --- a/nexus/reconfigurator/planning/src/planner/zone_safety.rs +++ b/nexus/reconfigurator/planning/src/planner/zone_safety.rs @@ -11,9 +11,9 @@ use nexus_sled_agent_shared::inventory::ZoneKind; use nexus_types::deployment::BlueprintZoneConfig; use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::CockroachdbUnsafeToShutdown; +use nexus_types::deployment::PlanningInput; use nexus_types::deployment::ZoneUnsafeToShutdown; use nexus_types::inventory::Collection; -use omicron_common::api::external::Generation; use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::SledUuid; use std::collections::BTreeMap; @@ -51,14 +51,9 @@ impl ZoneSafetyChecks { pub fn new( blueprint: &BlueprintBuilder<'_>, inventory: &Collection, - current_internal_dns_generation: Generation, + input: &PlanningInput, ) -> Self { - ZoneSafetyChecksBuilder::new( - blueprint, - inventory, - current_internal_dns_generation, - ) - .build() + ZoneSafetyChecksBuilder::new(blueprint, inventory, input).build() } pub fn empty() -> Self { @@ -114,7 +109,7 @@ impl ZoneSafetyChecks { struct ZoneSafetyChecksBuilder<'a> { blueprint: &'a BlueprintBuilder<'a>, inventory: &'a Collection, - current_internal_dns_generation: Generation, + input: &'a PlanningInput, internal_dns_zones: BTreeSet, boundary_ntp_zones: BTreeSet, checks: ZoneSafetyChecks, @@ -124,7 +119,7 @@ impl<'a> ZoneSafetyChecksBuilder<'a> { fn new( blueprint: &'a BlueprintBuilder<'a>, inventory: &'a Collection, - current_internal_dns_generation: Generation, + input: &'a PlanningInput, ) -> Self { let mut internal_dns_zones = BTreeSet::new(); let mut boundary_ntp_zones = BTreeSet::new(); @@ -148,7 +143,7 @@ impl<'a> ZoneSafetyChecksBuilder<'a> { Self { blueprint, inventory, - current_internal_dns_generation, + input, internal_dns_zones, boundary_ntp_zones, checks: ZoneSafetyChecks::empty(), @@ -214,7 +209,7 @@ impl<'a> ZoneSafetyChecksBuilder<'a> { } let target_boundary_ntp_zone_count = - self.blueprint.planning_input().target_boundary_ntp_zone_count(); + self.input.target_boundary_ntp_zone_count(); if synchronized_boundary_ntp_count < target_boundary_ntp_zone_count { return Some(ZoneUnsafeToShutdown::BoundaryNtp { total_boundary_ntp_zones: self.boundary_ntp_zones.len(), @@ -232,7 +227,7 @@ impl<'a> ZoneSafetyChecksBuilder<'a> { use ZoneUnsafeToShutdown::Cockroachdb; let target_cockroachdb_zone_count = - self.blueprint.planning_input().target_cockroachdb_zone_count(); + self.input.target_cockroachdb_zone_count(); // We must hear from all nodes let all_statuses = &self.inventory.cockroach_status; @@ -287,7 +282,7 @@ impl<'a> ZoneSafetyChecksBuilder<'a> { // Either way, from our perspective, the internal DNS zone // shouldn't be considered "ready-to-shutdown". if self.internal_dns_zones.contains(&status.zone_id) - && status.generation == self.current_internal_dns_generation + && status.generation == self.input.internal_dns_version() { synchronized_internal_dns_count += 1; } @@ -302,7 +297,7 @@ impl<'a> ZoneSafetyChecksBuilder<'a> { // tolerate "at least one upgrade, and at least one failure during that // upgrade window" (e.g., it should be "at least 3" in production). let target_internal_dns_zone_count = - self.blueprint.planning_input().target_internal_dns_zone_count(); + self.input.target_internal_dns_zone_count(); if synchronized_internal_dns_count >= target_internal_dns_zone_count { return None; } else {