From efffa422f3a2eb907536f538fb077a43c29c1c51 Mon Sep 17 00:00:00 2001 From: Mike Zeller Date: Tue, 24 Jun 2025 15:43:47 -0400 Subject: [PATCH] [spr] initial version Created using spr 1.3.6-beta.1 --- illumos-utils/src/zfs.rs | 29 ++++++++---- sled-agent/src/backing_fs.rs | 2 +- sled-agent/src/sled_agent.rs | 2 +- sled-agent/src/support_bundle/logs.rs | 64 ++++++++++++++++++++++----- sled-agent/src/zone_bundle.rs | 1 + sled-diagnostics/src/logs.rs | 2 + sled-storage/src/resources.rs | 9 ---- 7 files changed, 79 insertions(+), 30 deletions(-) diff --git a/illumos-utils/src/zfs.rs b/illumos-utils/src/zfs.rs index f28c3372280..9e5d268f5dc 100644 --- a/illumos-utils/src/zfs.rs +++ b/illumos-utils/src/zfs.rs @@ -1205,6 +1205,7 @@ impl Zfs { filesystem_name, &[&property], Some(PropertySource::Local), + false, ) .await?; Ok(value) @@ -1214,8 +1215,10 @@ impl Zfs { pub async fn get_value( filesystem_name: &str, name: &str, + parsable: bool, ) -> Result { - let [value] = Self::get_values(filesystem_name, &[name], None).await?; + let [value] = + Self::get_values(filesystem_name, &[name], None, parsable).await?; Ok(value) } @@ -1288,11 +1291,13 @@ impl Zfs { /// /// - `names`: The properties being acquired /// - `source`: The optioanl property source (origin of the property) + /// - `parsable`: Display numbers in parsable (exact) values /// Defaults to "all sources" when unspecified. pub async fn get_values( filesystem_name: &str, names: &[&str; N], source: Option, + parsable: bool, ) -> Result<[String; N], GetValueError> { let mut cmd = Command::new(PFEXEC); let all_names = names @@ -1308,7 +1313,8 @@ impl Zfs { .collect::, GetValueError>>()? .join(","); - cmd.args(&[ZFS, "get", "-Ho", "value"]); + let field_fmt = if parsable { "-Hpo" } else { "-Ho" }; + cmd.args(&[ZFS, "get", field_fmt, "value"]); if let Some(source) = source { cmd.args(&["-s", &source.to_string()]); } @@ -1362,7 +1368,8 @@ impl Snapshot { // When a mountpoint is returned as "legacy" we could go fish around in // "/etc/mnttab". That would probably mean making this function return a // result of an option. - let mountpoint = Zfs::get_value(&self.filesystem, "mountpoint").await?; + let mountpoint = + Zfs::get_value(&self.filesystem, "mountpoint", false).await?; Ok(Utf8PathBuf::from(mountpoint) .join(format!(".zfs/snapshot/{}", self.snap_name))) } @@ -1451,22 +1458,26 @@ mod test { #[tokio::test] async fn get_values_of_rpool() { // If the rpool exists, it should have a name. - let values = Zfs::get_values("rpool", &["name"; 1], None) + let values = Zfs::get_values("rpool", &["name"; 1], None, false) .await .expect("Failed to query rpool type"); assert_eq!(values[0], "rpool"); // We don't really care if any local properties are set, we just don't // want this to throw an error. - let _values = - Zfs::get_values("rpool", &["name"; 1], Some(PropertySource::Local)) - .await - .expect("Failed to query rpool type"); + let _values = Zfs::get_values( + "rpool", + &["name"; 1], + Some(PropertySource::Local), + false, + ) + .await + .expect("Failed to query rpool type"); // Also, the "all" property should not be queryable. It's normally fine // to pass this value, it just returns a variable number of properties, // which doesn't work with the current implementation's parsing. - let err = Zfs::get_values("rpool", &["all"; 1], None) + let err = Zfs::get_values("rpool", &["all"; 1], None, false) .await .expect_err("Should not be able to query for 'all' property"); diff --git a/sled-agent/src/backing_fs.rs b/sled-agent/src/backing_fs.rs index ac6e625512a..2df08d54f22 100644 --- a/sled-agent/src/backing_fs.rs +++ b/sled-agent/src/backing_fs.rs @@ -164,7 +164,7 @@ pub(crate) async fn ensure_backing_fs( // can't retrieve the property at all, then there is definitely no ZFS // filesystem mounted there - most likely we are running with a non-ZFS // root, such as when net booted during CI. - if Zfs::get_value(&bfs.mountpoint, "mountpoint") + if Zfs::get_value(&bfs.mountpoint, "mountpoint", false) .await .unwrap_or("not-zfs".to_string()) == bfs.mountpoint diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index bf2c693b25e..646e28f3446 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -703,7 +703,7 @@ impl SledAgent { pub(crate) fn as_support_bundle_logs(&self) -> SupportBundleLogs<'_> { SupportBundleLogs::new( &self.log, - self.inner.config_reconciler.internal_disks_rx(), + self.inner.config_reconciler.available_datasets_rx(), ) } diff --git a/sled-agent/src/support_bundle/logs.rs b/sled-agent/src/support_bundle/logs.rs index c68a3bdedde..dc87e9ed236 100644 --- a/sled-agent/src/support_bundle/logs.rs +++ b/sled-agent/src/support_bundle/logs.rs @@ -6,8 +6,10 @@ use camino_tempfile::tempfile_in; use dropshot::HttpError; +use futures::StreamExt; +use futures::stream::FuturesUnordered; use range_requests::make_get_response; -use sled_agent_config_reconciler::InternalDisksReceiver; +use sled_agent_config_reconciler::AvailableDatasetsReceiver; use slog::Logger; use slog_error_chain::InlineErrorChain; use tokio::io::AsyncSeekExt; @@ -43,15 +45,15 @@ impl From for HttpError { pub struct SupportBundleLogs<'a> { log: &'a Logger, - internal_disks_rx: &'a InternalDisksReceiver, + available_datasets_rx: AvailableDatasetsReceiver, } impl<'a> SupportBundleLogs<'a> { pub fn new( log: &'a Logger, - internal_disks_rx: &'a InternalDisksReceiver, + available_datasets_rx: AvailableDatasetsReceiver, ) -> Self { - Self { log, internal_disks_rx } + Self { log, available_datasets_rx } } /// Get a list of zones on a sled containing logs that we want to include in @@ -78,12 +80,54 @@ impl<'a> SupportBundleLogs<'a> { where Z: Into, { - // We are using an M.2 device for temporary storage to assemble a zip - // file made up of all of the discovered zone's logs. - let current_internal_disks = self.internal_disks_rx.current(); - let mut m2_debug_datasets = current_internal_disks.all_debug_datasets(); - let tempdir = m2_debug_datasets.next().ok_or(Error::MissingStorage)?; - let mut tempfile = tempfile_in(tempdir)?; + // Attempt to find a U.2 device with the most available free space + // for temporary storage to assemble a zip file made up of all of the + // discovered zone's logs. + let mounted_debug_datasets = + self.available_datasets_rx.all_mounted_debug_datasets(); + let storage_paths_to_size: Vec<_> = mounted_debug_datasets + .into_iter() + .map(|dataset_path| { + let path = dataset_path.path; + async move { + match illumos_utils::zfs::Zfs::get_value( + path.as_str(), + "available", + true, + ) + .await + { + Ok(size_str) => match size_str.parse::() { + Ok(size) => Some((path, size)), + Err(e) => { + warn!( + &self.log, + "failed to parse available size for the \ + dataset at path {path}: {e}" + ); + None + } + }, + Err(e) => { + warn!( + &self.log, + "failed to get available size for the dataset \ + at path {path}: {e}" + ); + None + } + } + } + }) + .collect::>() + .collect() + .await; + let (largest_avail_space, _) = storage_paths_to_size + .into_iter() + .flatten() + .max_by_key(|(_, size)| *size) + .ok_or(Error::MissingStorage)?; + let mut tempfile = tempfile_in(largest_avail_space)?; let log = self.log.clone(); let zone = zone.into(); diff --git a/sled-agent/src/zone_bundle.rs b/sled-agent/src/zone_bundle.rs index ecb39d4f3b5..e0eeddd8200 100644 --- a/sled-agent/src/zone_bundle.rs +++ b/sled-agent/src/zone_bundle.rs @@ -87,6 +87,7 @@ async fn initialize_zfs_resources(log: &Logger) -> Result<(), BundleError> { &name, &[ZONE_BUNDLE_ZFS_PROPERTY_NAME], Some(illumos_utils::zfs::PropertySource::Local), + false, ) .await else { diff --git a/sled-diagnostics/src/logs.rs b/sled-diagnostics/src/logs.rs index 5d165a333c6..729f4c9c588 100644 --- a/sled-diagnostics/src/logs.rs +++ b/sled-diagnostics/src/logs.rs @@ -338,6 +338,7 @@ impl LogsHandle { &name, &[SLED_DIAGNOSTICS_ZFS_PROPERTY_NAME], Some(illumos_utils::zfs::PropertySource::Local), + false, ) .await { @@ -985,6 +986,7 @@ mod illumos_tests { &name, &[SLED_DIAGNOSTICS_ZFS_PROPERTY_NAME], Some(illumos_utils::zfs::PropertySource::Local), + false, ) .await .unwrap() diff --git a/sled-storage/src/resources.rs b/sled-storage/src/resources.rs index 0e63fca65e7..5748a963a61 100644 --- a/sled-storage/src/resources.rs +++ b/sled-storage/src/resources.rs @@ -209,15 +209,6 @@ impl AllDisks { .collect() } - /// Return the directories that can be used for temporary sled-diagnostics - /// file storage. - pub fn all_sled_diagnostics_directories(&self) -> Vec { - // These directories are currently used for tempfile storage when - // zipping up zone logs before shuffling them off to a nexus collecting - // a support bundle. - self.all_m2_mountpoints(M2_DEBUG_DATASET).into_iter().collect() - } - /// Returns an iterator over all managed disks. pub fn iter_managed(&self) -> impl Iterator { self.inner.values.iter().filter_map(|(identity, disk)| match disk {