Skip to content

Commit 2d46f8c

Browse files
authored
[sled-agent] Integrate self-assembling Propolis (#3456)
Integrates oxidecomputer/propolis#454 This avoid using `zlogin` to modify the propolis zone after launch, instead preferring to write a profile file to the zone ahead-of-time. This matches the pattern for other self-assembling zones, where configuration information is supplied ahead-of-time through the `ProfileBuilder`, which places an auxiliary SMF manifest into `/var/svc/profile/site.sml` to be imported when the zone starts. Relies on: oxidecomputer/propolis#454 Part of the fix for #3454
1 parent 92258b4 commit 2d46f8c

File tree

2 files changed

+53
-112
lines changed

2 files changed

+53
-112
lines changed

package-manifest.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -266,10 +266,10 @@ service_name = "propolis-server"
266266
only_for_targets.image = "standard"
267267
source.type = "prebuilt"
268268
source.repo = "propolis"
269-
source.commit = "04a275736e9f3316de6bf2a4077d03acfa4e2cdf"
269+
source.commit = "21ac8a9f5005f96caa384e3de0bd38283fc0188f"
270270
# The SHA256 digest is automatically posted to:
271271
# https://buildomat.eng.oxide.computer/public/file/oxidecomputer/propolis/image/<commit>/propolis-server.sha256.txt
272-
source.sha256 = "3b52f303d7f0a687fd895e7129c93ca1d683d2cb676bff9c6c9d15bd3d2b3b5e"
272+
source.sha256 = "2a7b4bfa6d2b13714f68ec0095419218257ab584e763faac1d34baf38c8b09de"
273273
output.type = "zone"
274274

275275
[package.maghemite]

sled-agent/src/instance.rs

Lines changed: 51 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,17 @@ use crate::params::{
1414
InstanceHardware, InstanceMigrationSourceParams,
1515
InstanceMigrationTargetParams, InstanceStateRequested, VpcFirewallRule,
1616
};
17+
use crate::profile::*;
1718
use anyhow::anyhow;
1819
use backoff::BackoffError;
1920
use futures::lock::{Mutex, MutexGuard};
2021
use illumos_utils::dladm::Etherstub;
2122
use illumos_utils::link::VnicAllocator;
2223
use illumos_utils::opte::PortManager;
23-
use illumos_utils::running_zone::{
24-
InstalledZone, RunCommandError, RunningZone,
25-
};
24+
use illumos_utils::running_zone::{InstalledZone, RunningZone};
2625
use illumos_utils::svc::wait_for_service;
2726
use illumos_utils::zfs::ZONE_ZFS_RAMDISK_DATASET_MOUNTPOINT;
28-
use illumos_utils::zone::{AddressRequest, PROPOLIS_ZONE_PREFIX};
27+
use illumos_utils::zone::PROPOLIS_ZONE_PREFIX;
2928
use omicron_common::address::NEXUS_INTERNAL_PORT;
3029
use omicron_common::address::PROPOLIS_PORT;
3130
use omicron_common::api::internal::nexus::InstanceRuntimeState;
@@ -37,7 +36,7 @@ use omicron_common::backoff;
3736
use propolis_client::Client as PropolisClient;
3837
use slog::Logger;
3938
use std::net::IpAddr;
40-
use std::net::SocketAddr;
39+
use std::net::{SocketAddr, SocketAddrV6};
4140
use std::sync::Arc;
4241
use tokio::task::JoinHandle;
4342
use uuid::Uuid;
@@ -96,6 +95,9 @@ pub enum Error {
9695

9796
#[error("Instance already registered with Propolis ID {0}")]
9897
InstanceAlreadyRegistered(Uuid),
98+
99+
#[error("I/O error")]
100+
Io(#[from] std::io::Error),
99101
}
100102

101103
// Issues read-only, idempotent HTTP requests at propolis until it responds with
@@ -143,12 +145,8 @@ fn service_name() -> &'static str {
143145
"svc:/system/illumos/propolis-server"
144146
}
145147

146-
fn instance_name(id: &Uuid) -> String {
147-
format!("vm-{}", id)
148-
}
149-
150-
fn fmri_name(id: &Uuid) -> String {
151-
format!("{}:{}", service_name(), instance_name(id))
148+
fn fmri_name() -> String {
149+
format!("{}:default", service_name())
152150
}
153151

154152
fn propolis_zone_name(id: &Uuid) -> String {
@@ -673,6 +671,10 @@ impl Instance {
673671
migration_params: Option<InstanceMigrationTargetParams>,
674672
) -> Result<(), Error> {
675673
if let Some(running_state) = inner.running_state.as_ref() {
674+
info!(
675+
&inner.log,
676+
"Ensuring instance which already has a running state"
677+
);
676678
inner
677679
.propolis_ensure(
678680
&running_state.client,
@@ -692,10 +694,13 @@ impl Instance {
692694
// logically running (on the source) while the target Propolis
693695
// is being launched.
694696
if migration_params.is_none() {
697+
info!(&inner.log, "Ensuring new instance");
695698
inner.state.transition(PublishedInstanceState::Starting);
696699
if let Err(e) = inner.publish_state_to_nexus().await {
697700
break 'setup Err(e);
698701
}
702+
} else {
703+
info!(&inner.log, "Ensuring new instance (migration)");
699704
}
700705

701706
// Set up the Propolis zone and the objects associated with it.
@@ -878,118 +883,53 @@ impl Instance {
878883
)
879884
.await?;
880885

881-
let running_zone = RunningZone::boot(installed_zone).await?;
882-
let addr_request = AddressRequest::new_static(inner.propolis_ip, None);
883-
let network = running_zone.ensure_address(addr_request).await?;
884-
info!(inner.log, "Created address {} for zone: {}", network, zname);
885-
886886
let gateway = inner.port_manager.underlay_ip();
887-
running_zone.add_default_route(*gateway)?;
888-
889-
// Run Propolis in the Zone.
890-
let smf_service_name = "svc:/system/illumos/propolis-server";
891-
let instance_name = format!("vm-{}", inner.propolis_id());
892-
let smf_instance_name =
893-
format!("{}:{}", smf_service_name, instance_name);
894-
let server_addr = SocketAddr::new(inner.propolis_ip, PROPOLIS_PORT);
895-
896-
// We intentionally do not import the service - it is placed under
897-
// `/var/svc/manifest`, and should automatically be imported by
898-
// configd.
899-
//
900-
// Insteady, we re-try adding the instance until it succeeds.
901-
// This implies that the service was added successfully.
902-
info!(
903-
inner.log, "Adding service"; "smf_name" => &smf_instance_name
904-
);
905-
backoff::retry_notify(
906-
backoff::retry_policy_local(),
907-
|| async {
908-
running_zone
909-
.run_cmd(&[
910-
illumos_utils::zone::SVCCFG,
911-
"-s",
912-
smf_service_name,
913-
"add",
914-
&instance_name,
915-
])
916-
.map_err(|e| backoff::BackoffError::transient(e))
917-
},
918-
|err: RunCommandError, delay| {
919-
warn!(
920-
inner.log,
921-
"Failed to add {} as a service (retrying in {:?}): {}",
922-
instance_name,
923-
delay,
924-
err.to_string()
925-
);
926-
},
927-
)
928-
.await?;
929-
930-
info!(inner.log, "Adding service property group 'config'");
931-
running_zone.run_cmd(&[
932-
illumos_utils::zone::SVCCFG,
933-
"-s",
934-
&smf_instance_name,
935-
"addpg",
936-
"config",
937-
"astring",
938-
])?;
939-
940-
info!(inner.log, "Setting server address property"; "address" => &server_addr);
941-
running_zone.run_cmd(&[
942-
illumos_utils::zone::SVCCFG,
943-
"-s",
944-
&smf_instance_name,
945-
"setprop",
946-
&format!("config/server_addr={}", server_addr),
947-
])?;
948-
949-
let metric_addr = inner.lazy_nexus_client.get_ip().await.unwrap();
950-
info!(
951-
inner.log,
952-
"Setting metric address property address [{}]:{}",
953-
metric_addr,
887+
// TODO: We should pass DNS information to Propolis, rather than a
888+
// single point-in-time Nexus IP address.
889+
let metric_ip = inner.lazy_nexus_client.get_ip().await.unwrap();
890+
let metric_addr = SocketAddr::V6(SocketAddrV6::new(
891+
metric_ip,
954892
NEXUS_INTERNAL_PORT,
955-
);
956-
running_zone.run_cmd(&[
957-
illumos_utils::zone::SVCCFG,
958-
"-s",
959-
&smf_instance_name,
960-
"setprop",
961-
&format!(
962-
"config/metric_addr=[{}]:{}",
963-
metric_addr, NEXUS_INTERNAL_PORT
893+
0,
894+
0,
895+
));
896+
897+
let config = PropertyGroupBuilder::new("config")
898+
.add_property(
899+
"datalink",
900+
"astring",
901+
installed_zone.get_control_vnic_name(),
902+
)
903+
.add_property("gateway", "astring", &gateway.to_string())
904+
.add_property(
905+
"listen_addr",
906+
"astring",
907+
&inner.propolis_ip.to_string(),
908+
)
909+
.add_property("listen_port", "astring", &PROPOLIS_PORT.to_string())
910+
.add_property("metric_addr", "astring", &metric_addr.to_string());
911+
912+
let profile = ProfileBuilder::new("omicron").add_service(
913+
ServiceBuilder::new("system/illumos/propolis-server").add_instance(
914+
ServiceInstanceBuilder::new("default")
915+
.add_property_group(config),
964916
),
965-
])?;
966-
967-
info!(inner.log, "Refreshing instance");
968-
running_zone.run_cmd(&[
969-
illumos_utils::zone::SVCCFG,
970-
"-s",
971-
&smf_instance_name,
972-
"refresh",
973-
])?;
974-
975-
info!(inner.log, "Enabling instance");
976-
running_zone.run_cmd(&[
977-
illumos_utils::zone::SVCADM,
978-
"enable",
979-
"-t",
980-
&smf_instance_name,
981-
])?;
917+
);
918+
profile.add_to_zone(&inner.log, &installed_zone).await?;
982919

920+
let running_zone = RunningZone::boot(installed_zone).await?;
983921
info!(inner.log, "Started propolis in zone: {}", zname);
984922

985923
// This isn't strictly necessary - we wait for the HTTP server below -
986924
// but it helps distinguish "online in SMF" from "responding to HTTP
987925
// requests".
988-
let fmri = fmri_name(inner.propolis_id());
926+
let fmri = fmri_name();
989927
wait_for_service(Some(&zname), &fmri)
990928
.await
991929
.map_err(|_| Error::Timeout(fmri.to_string()))?;
930+
info!(inner.log, "Propolis SMF service is online");
992931

932+
let server_addr = SocketAddr::new(inner.propolis_ip, PROPOLIS_PORT);
993933
inner.state.current_mut().propolis_addr = Some(server_addr);
994934

995935
// We use a custom client builder here because the default progenitor
@@ -1004,6 +944,7 @@ impl Instance {
1004944
// yet. Wait for it to respond to requests, so users of the instance
1005945
// don't need to worry about initialization races.
1006946
wait_for_http_server(&inner.log, &client).await?;
947+
info!(inner.log, "Propolis HTTP server online");
1007948

1008949
Ok(PropolisSetup { client, running_zone })
1009950
}

0 commit comments

Comments
 (0)