Skip to content

Commit 98b55de

Browse files
authored
BlueprintBuilder: don't hold on to PlanningInput (#9374)
This builds on top of #9370 and is a big piece of #9238: as of this PR, `BlueprintBuilder` no longer holds a `PlanningInput`, which should reduce confusion on where a given bit of logic should live. Anything that needs the planning input to be decided now _must_ be implemented in the planner. `BlueprintBuilder::new_based_on(..)` still takes a `PlanningInput` argument, which I don't love but need to do some more work to figure out how (if?) to address. It uses it for a few things; I think I'd group them as: 1. Reassembling information about sleds that we knew in previous iterations but that aren't stored in the blueprint. (Specifically: the `BaseboardId` and subnet of each active sleds. This is the one that seems trickiest to tackle.) 2. Adding new, empty `SledEditor`s for commissioned sleds that are present in `PlanningInput` but not the parent blueprint. (I think I'd make the case that `new_based_on()` shouldn't do this at all, and the planner should do it?) 3. Making a copy of a few fields that are present in the blueprint, but that are updated each time the planner runs based on potentially-newer values in `PlanningInput`. (Specifically: the cockroachdb fingerprint and the internal/external DNS generations. I think it would be fine for `BlueprintBuilder` to only copy the parent blueprint, and provide setters that the planner could use to update these to newer values.) There's one hopefully-small-and-fine logical change in this PR that I'll point out below with some extra details.
1 parent e5fa9fb commit 98b55de

File tree

5 files changed

+112
-93
lines changed

5 files changed

+112
-93
lines changed

nexus/reconfigurator/planning/src/blueprint_builder/builder.rs

Lines changed: 49 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ use nexus_types::deployment::PlanningInput;
5353
use nexus_types::deployment::SledFilter;
5454
use nexus_types::deployment::SledResources;
5555
use nexus_types::deployment::TufRepoContentsError;
56-
use nexus_types::deployment::ZpoolFilter;
5756
use nexus_types::deployment::ZpoolName;
5857
use nexus_types::deployment::blueprint_zone_type;
5958
use nexus_types::external_api::views::SledState;
@@ -512,15 +511,15 @@ pub struct BlueprintBuilder<'a> {
512511
/// The ID that the completed blueprint will have
513512
new_blueprint_id: BlueprintUuid,
514513

515-
// These fields are used to allocate resources for sleds.
516-
input: &'a PlanningInput,
517-
518514
// These fields will become part of the final blueprint. See the
519515
// corresponding fields in `Blueprint`.
520516
sled_editors: BTreeMap<SledUuid, SledEditor>,
521517
cockroachdb_setting_preserve_downgrade: CockroachDbPreserveDowngrade,
518+
cockroachdb_fingerprint: String,
522519
target_release_minimum_generation: Generation,
523520
nexus_generation: Generation,
521+
internal_dns_version: Generation,
522+
external_dns_version: Generation,
524523

525524
creator: String,
526525
operations: Vec<Operation>,
@@ -624,17 +623,19 @@ impl<'a> BlueprintBuilder<'a> {
624623

625624
let editor = match state {
626625
SledState::Active => {
627-
let subnet = input
626+
let details = input
628627
.sled_lookup(SledFilter::Commissioned, *sled_id)
629628
.with_context(|| {
630629
format!(
631630
"failed to find sled details for \
632631
active sled in parent blueprint {sled_id}"
633632
)
634-
})?
635-
.resources
636-
.subnet;
637-
SledEditor::for_existing_active(subnet, sled_cfg.clone())
633+
})?;
634+
SledEditor::for_existing_active(
635+
Arc::new(details.baseboard_id.clone()),
636+
details.resources.subnet,
637+
sled_cfg.clone(),
638+
)
638639
}
639640
SledState::Decommissioned => {
640641
SledEditor::for_existing_decommissioned(sled_cfg.clone())
@@ -652,6 +653,7 @@ impl<'a> BlueprintBuilder<'a> {
652653
for (sled_id, details) in input.all_sleds(SledFilter::Commissioned) {
653654
if let Entry::Vacant(slot) = sled_editors.entry(sled_id) {
654655
slot.insert(SledEditor::for_new_active(
656+
Arc::new(details.baseboard_id.clone()),
655657
details.resources.subnet,
656658
));
657659
}
@@ -672,14 +674,19 @@ impl<'a> BlueprintBuilder<'a> {
672674
.clone(),
673675
oximeter_read_policy,
674676
new_blueprint_id: rng.next_blueprint(),
675-
input,
676677
sled_editors,
677678
cockroachdb_setting_preserve_downgrade: parent_blueprint
678679
.cockroachdb_setting_preserve_downgrade,
680+
cockroachdb_fingerprint: input
681+
.cockroachdb_settings()
682+
.state_fingerprint
683+
.clone(),
679684
pending_mgs_updates: parent_blueprint.pending_mgs_updates.clone(),
680685
target_release_minimum_generation: parent_blueprint
681686
.target_release_minimum_generation,
682687
nexus_generation: parent_blueprint.nexus_generation,
688+
internal_dns_version: input.internal_dns_version(),
689+
external_dns_version: input.external_dns_version(),
683690
creator: creator.to_owned(),
684691
operations: Vec::new(),
685692
comments: Vec::new(),
@@ -738,9 +745,9 @@ impl<'a> BlueprintBuilder<'a> {
738745
// multirack (either DNS will be on a wider subnet or we need to pick a
739746
// particular rack subnet here?).
740747
let any_sled_subnet = self
741-
.input
742-
.all_sled_resources(SledFilter::Commissioned)
743-
.map(|(_sled_id, resources)| resources.subnet)
748+
.sled_editors
749+
.values()
750+
.filter_map(|editor| editor.subnet())
744751
.next()
745752
.ok_or(Error::RackSubnetUnknownNoSleds)?;
746753
let rack_subnet = ReservedRackSubnet::from_subnet(any_sled_subnet);
@@ -764,10 +771,6 @@ impl<'a> BlueprintBuilder<'a> {
764771
}))
765772
}
766773

767-
pub fn planning_input(&self) -> &PlanningInput {
768-
&self.input
769-
}
770-
771774
/// Iterates over the list of sled IDs for which we have zones.
772775
///
773776
/// This may include decommissioned sleds.
@@ -879,19 +882,14 @@ impl<'a> BlueprintBuilder<'a> {
879882
sleds,
880883
pending_mgs_updates: self.pending_mgs_updates,
881884
parent_blueprint_id: Some(self.parent_blueprint.id),
882-
internal_dns_version: self.input.internal_dns_version(),
883-
external_dns_version: self.input.external_dns_version(),
885+
internal_dns_version: self.internal_dns_version,
886+
external_dns_version: self.external_dns_version,
884887
target_release_minimum_generation: self
885888
.target_release_minimum_generation,
886889
nexus_generation: self.nexus_generation,
887-
cockroachdb_fingerprint: self
888-
.input
889-
.cockroachdb_settings()
890-
.state_fingerprint
891-
.clone(),
890+
cockroachdb_fingerprint: self.cockroachdb_fingerprint,
892891
cockroachdb_setting_preserve_downgrade: self
893892
.cockroachdb_setting_preserve_downgrade,
894-
895893
clickhouse_cluster_config: self.clickhouse_cluster_config,
896894
oximeter_read_version: self.oximeter_read_policy.version,
897895
oximeter_read_mode: self.oximeter_read_policy.mode,
@@ -1315,14 +1313,16 @@ impl<'a> BlueprintBuilder<'a> {
13151313
"tried to ensure mupdate override for unknown sled {sled_id}"
13161314
))
13171315
})?;
1316+
let baseboard_id = editor.baseboard_id().ok_or_else(|| {
1317+
// All commissioned sleds have baseboards; this should never fail.
1318+
Error::Planner(anyhow!(
1319+
"tried to ensure mupdate override for \
1320+
decommissioned sled {sled_id}"
1321+
))
1322+
})?;
13181323

13191324
// Also map the editor to the corresponding PendingMgsUpdates.
1320-
let sled_details = self
1321-
.input
1322-
.sled_lookup(SledFilter::InService, sled_id)
1323-
.map_err(|error| Error::Planner(anyhow!(error)))?;
1324-
let pending_mgs_update =
1325-
self.pending_mgs_updates.entry(&sled_details.baseboard_id);
1325+
let pending_mgs_update = self.pending_mgs_updates.entry(baseboard_id);
13261326
let noop_sled_info = noop_info.sled_info_mut(sled_id)?;
13271327

13281328
editor
@@ -1492,14 +1492,14 @@ impl<'a> BlueprintBuilder<'a> {
14921492
image_source: BlueprintZoneImageSource,
14931493
) -> Result<Ensure, Error> {
14941494
let pool_name = ZpoolName::new_external(zpool_id);
1495+
let editor = self.sled_editors.get(&sled_id).ok_or_else(|| {
1496+
Error::Planner(anyhow!(
1497+
"tried to ensure crucible zone for unknown sled {sled_id}"
1498+
))
1499+
})?;
14951500

14961501
// If this sled already has a Crucible zone on this pool, do nothing.
1497-
let has_crucible_on_this_pool = {
1498-
let editor = self.sled_editors.get(&sled_id).ok_or_else(|| {
1499-
Error::Planner(anyhow!(
1500-
"tried to ensure crucible zone for unknown sled {sled_id}"
1501-
))
1502-
})?;
1502+
let has_crucible_on_this_pool =
15031503
editor.zones(BlueprintZoneDisposition::is_in_service).any(|z| {
15041504
matches!(
15051505
&z.zone_type,
@@ -1509,17 +1509,19 @@ impl<'a> BlueprintBuilder<'a> {
15091509
})
15101510
if dataset.pool_name == pool_name
15111511
)
1512-
})
1513-
};
1512+
});
15141513
if has_crucible_on_this_pool {
15151514
return Ok(Ensure::NotNeeded);
15161515
}
15171516

1518-
let sled_info = self.sled_resources(sled_id)?;
1519-
if !sled_info.zpools.contains_key(&zpool_id) {
1517+
// Double-check that our caller didn't pass a bad sled/zpool combo.
1518+
if !editor
1519+
.disks(BlueprintPhysicalDiskDisposition::is_in_service)
1520+
.any(|disk| disk.pool_id == zpool_id)
1521+
{
15201522
return Err(Error::Planner(anyhow!(
15211523
"adding crucible zone for sled {:?}: \
1522-
attempted to use unknown zpool {:?}",
1524+
attempted to use unknown zpool {:?}",
15231525
sled_id,
15241526
pool_name
15251527
)));
@@ -2114,17 +2116,12 @@ impl<'a> BlueprintBuilder<'a> {
21142116
))
21152117
})?;
21162118

2117-
// We'll check both the disks available to this sled per our current
2118-
// blueprint and the list of all in-service zpools on this sled per our
2119-
// planning input, and only pick zpools that are available in both.
2120-
let current_sled_disks = editor
2119+
// Only choose from zpools that are in-service.
2120+
let in_service_zpools = editor
21212121
.disks(BlueprintPhysicalDiskDisposition::is_in_service)
21222122
.map(|disk_config| disk_config.pool_id)
21232123
.collect::<BTreeSet<_>>();
21242124

2125-
let all_in_service_zpools =
2126-
self.sled_resources(sled_id)?.all_zpools(ZpoolFilter::InService);
2127-
21282125
// We refuse to choose a zpool for a zone of a given `zone_kind` if this
21292126
// sled already has a durable zone of that kind on the same zpool. Build
21302127
// up a set of invalid zpools for this sled/kind pair.
@@ -2142,31 +2139,14 @@ impl<'a> BlueprintBuilder<'a> {
21422139
skip_zpools.insert(&zone_config.filesystem_pool);
21432140
}
21442141

2145-
for &zpool_id in all_in_service_zpools {
2142+
for zpool_id in in_service_zpools {
21462143
let zpool_name = ZpoolName::new_external(zpool_id);
2147-
if !skip_zpools.contains(&zpool_name)
2148-
&& current_sled_disks.contains(&zpool_id)
2149-
{
2144+
if !skip_zpools.contains(&zpool_name) {
21502145
return Ok(zpool_name);
21512146
}
21522147
}
2153-
Err(Error::NoAvailableZpool { sled_id, kind: zone_kind })
2154-
}
21552148

2156-
/// Returns the resources for a sled that hasn't been decommissioned.
2157-
fn sled_resources(
2158-
&self,
2159-
sled_id: SledUuid,
2160-
) -> Result<&'a SledResources, Error> {
2161-
let details = self
2162-
.input
2163-
.sled_lookup(SledFilter::Commissioned, sled_id)
2164-
.map_err(|error| {
2165-
Error::Planner(anyhow!(error).context(format!(
2166-
"for sled {sled_id}, error looking up resources"
2167-
)))
2168-
})?;
2169-
Ok(&details.resources)
2149+
Err(Error::NoAvailableZpool { sled_id, kind: zone_kind })
21702150
}
21712151

21722152
/// Determine the number of desired external DNS zones by counting

nexus/reconfigurator/planning/src/blueprint_editor/sled_editor.rs

Lines changed: 45 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ use nexus_types::deployment::BlueprintZoneType;
3636
use nexus_types::deployment::PendingMgsUpdate;
3737
use nexus_types::deployment::blueprint_zone_type;
3838
use nexus_types::external_api::views::SledState;
39+
use nexus_types::inventory::BaseboardId;
3940
use omicron_common::address::Ipv6Subnet;
4041
use omicron_common::address::SLED_PREFIX;
4142
use omicron_common::api::external::Generation;
@@ -50,6 +51,7 @@ use scalar::ScalarEditor;
5051
use std::iter;
5152
use std::mem;
5253
use std::net::Ipv6Addr;
54+
use std::sync::Arc;
5355
use underlay_ip_allocator::SledUnderlayIpAllocator;
5456

5557
mod datasets;
@@ -148,6 +150,7 @@ pub enum SledEditError {
148150
pub(crate) struct SledEditor(InnerSledEditor);
149151

150152
#[derive(Debug)]
153+
#[allow(clippy::large_enum_variant)]
151154
enum InnerSledEditor {
152155
// Internally, `SledEditor` has a variant for each variant of `SledState`,
153156
// as the operations allowed in different states are substantially different
@@ -159,6 +162,7 @@ enum InnerSledEditor {
159162

160163
impl SledEditor {
161164
pub fn for_existing_active(
165+
baseboard_id: Arc<BaseboardId>,
162166
subnet: Ipv6Subnet<SLED_PREFIX>,
163167
config: BlueprintSledConfig,
164168
) -> Result<Self, SledInputError> {
@@ -167,7 +171,7 @@ impl SledEditor {
167171
SledState::Active,
168172
"for_existing_active called on non-active sled"
169173
);
170-
let inner = ActiveSledEditor::new(subnet, config)?;
174+
let inner = ActiveSledEditor::new(baseboard_id, subnet, config)?;
171175
Ok(Self(InnerSledEditor::Active(inner)))
172176
}
173177

@@ -187,8 +191,14 @@ impl SledEditor {
187191
Ok(Self(InnerSledEditor::Decommissioned(inner)))
188192
}
189193

190-
pub fn for_new_active(subnet: Ipv6Subnet<SLED_PREFIX>) -> Self {
191-
Self(InnerSledEditor::Active(ActiveSledEditor::new_empty(subnet)))
194+
pub fn for_new_active(
195+
baseboard_id: Arc<BaseboardId>,
196+
subnet: Ipv6Subnet<SLED_PREFIX>,
197+
) -> Self {
198+
Self(InnerSledEditor::Active(ActiveSledEditor::new_empty(
199+
baseboard_id,
200+
subnet,
201+
)))
192202
}
193203

194204
pub fn finalize(self) -> EditedSled {
@@ -205,6 +215,26 @@ impl SledEditor {
205215
}
206216
}
207217

218+
/// Returns the baseboard of this sled if it is active, or `None` if it is
219+
/// decommissioned.
220+
pub fn baseboard_id(&self) -> Option<&Arc<BaseboardId>> {
221+
match &self.0 {
222+
InnerSledEditor::Active(active) => Some(&active.baseboard_id),
223+
InnerSledEditor::Decommissioned(_) => None,
224+
}
225+
}
226+
227+
/// Returns the subnet of this sled if it is active, or `None` if it is
228+
/// decommissioned.
229+
pub fn subnet(&self) -> Option<Ipv6Subnet<SLED_PREFIX>> {
230+
match &self.0 {
231+
InnerSledEditor::Active(active) => {
232+
Some(active.underlay_ip_allocator.subnet())
233+
}
234+
InnerSledEditor::Decommissioned(_) => None,
235+
}
236+
}
237+
208238
pub fn edit_counts(&self) -> SledEditCounts {
209239
match &self.0 {
210240
InnerSledEditor::Active(editor) => editor.edit_counts(),
@@ -231,9 +261,10 @@ impl SledEditor {
231261
// below, we'll be left in the active state with an empty sled
232262
// editor), but omicron in general is not panic safe and aborts
233263
// on panic. Plus `finalize()` should never panic.
234-
let mut stolen = ActiveSledEditor::new_empty(Ipv6Subnet::new(
235-
Ipv6Addr::LOCALHOST,
236-
));
264+
let mut stolen = ActiveSledEditor::new_empty(
265+
Arc::clone(&editor.baseboard_id),
266+
Ipv6Subnet::new(Ipv6Addr::LOCALHOST),
267+
);
237268
mem::swap(editor, &mut stolen);
238269

239270
let mut finalized = stolen.finalize();
@@ -475,6 +506,7 @@ impl SledEditor {
475506

476507
#[derive(Debug)]
477508
struct ActiveSledEditor {
509+
baseboard_id: Arc<BaseboardId>,
478510
underlay_ip_allocator: SledUnderlayIpAllocator,
479511
incoming_sled_agent_generation: Generation,
480512
zones: ZonesEditor,
@@ -494,6 +526,7 @@ pub(crate) struct EditedSled {
494526

495527
impl ActiveSledEditor {
496528
pub fn new(
529+
baseboard_id: Arc<BaseboardId>,
497530
subnet: Ipv6Subnet<SLED_PREFIX>,
498531
config: BlueprintSledConfig,
499532
) -> Result<Self, SledInputError> {
@@ -508,6 +541,7 @@ impl ActiveSledEditor {
508541
zones.zones(BlueprintZoneDisposition::any).map(|z| z.underlay_ip());
509542

510543
Ok(Self {
544+
baseboard_id,
511545
underlay_ip_allocator: SledUnderlayIpAllocator::new(
512546
subnet, zone_ips,
513547
),
@@ -523,7 +557,10 @@ impl ActiveSledEditor {
523557
})
524558
}
525559

526-
pub fn new_empty(subnet: Ipv6Subnet<SLED_PREFIX>) -> Self {
560+
pub fn new_empty(
561+
baseboard_id: Arc<BaseboardId>,
562+
subnet: Ipv6Subnet<SLED_PREFIX>,
563+
) -> Self {
527564
// Creating the underlay IP allocator can only fail if we have a zone
528565
// with an IP outside the sled subnet, but we don't have any zones at
529566
// all, so this can't fail. Match explicitly to guard against this error
@@ -532,6 +569,7 @@ impl ActiveSledEditor {
532569
SledUnderlayIpAllocator::new(subnet, iter::empty());
533570

534571
Self {
572+
baseboard_id,
535573
underlay_ip_allocator,
536574
incoming_sled_agent_generation: Generation::new(),
537575
zones: ZonesEditor::empty(),

0 commit comments

Comments
 (0)