diff --git a/Cargo.lock b/Cargo.lock index f2abe1f8721..a97ba526952 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13715,6 +13715,8 @@ dependencies = [ "bytes", "futures-core", "futures-sink", + "futures-util", + "hashbrown 0.15.4", "pin-project-lite", "slab", "tokio", diff --git a/dev-tools/omdb/src/bin/omdb/db/ereport.rs b/dev-tools/omdb/src/bin/omdb/db/ereport.rs index e0d2d184d31..99db2825342 100644 --- a/dev-tools/omdb/src/bin/omdb/db/ereport.rs +++ b/dev-tools/omdb/src/bin/omdb/db/ereport.rs @@ -155,7 +155,10 @@ async fn cmd_db_ereport_list( restart_id: restart_id.into_untyped_uuid(), ena: ena.into(), class: class.clone(), - source: db::model::Reporter::Sp { sp_type, slot: sp_slot.0 }, + source: db::model::Reporter::Sp { + sp_type: sp_type.into(), + slot: sp_slot.0, + }, serial: serial_number.as_deref(), part_number: part_number.as_deref(), } @@ -547,7 +550,10 @@ async fn cmd_db_ereporters( )| { ReporterRow { first_seen, - identity: db::model::Reporter::Sp { slot: slot.0, sp_type }, + identity: db::model::Reporter::Sp { + slot: slot.0, + sp_type: sp_type.into(), + }, serial, part_number, id: restart_id, diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index eac1cb2118a..9e4c4cbd6f0 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -66,6 +66,7 @@ use nexus_types::internal_api::background::RegionSnapshotReplacementStartStatus; use nexus_types::internal_api::background::RegionSnapshotReplacementStepStatus; use nexus_types::internal_api::background::SupportBundleCleanupReport; use nexus_types::internal_api::background::SupportBundleCollectionReport; +use nexus_types::internal_api::background::SupportBundleEreportStatus; use nexus_types::internal_api::background::TufArtifactReplicationCounters; use nexus_types::internal_api::background::TufArtifactReplicationRequest; use nexus_types::internal_api::background::TufArtifactReplicationStatus; @@ -2414,6 +2415,8 @@ fn print_task_support_bundle_collector(details: &serde_json::Value) { listed_in_service_sleds, listed_sps, activated_in_db_ok, + sp_ereports, + host_ereports, }) = collection_report { println!(" Support Bundle Collection Report:"); @@ -2427,6 +2430,26 @@ fn print_task_support_bundle_collector(details: &serde_json::Value) { println!( " Bundle was activated in the database: {activated_in_db_ok}" ); + print_ereport_status("SP", &sp_ereports); + print_ereport_status("Host OS", &host_ereports); + } + } + } + + fn print_ereport_status(which: &str, status: &SupportBundleEreportStatus) { + match status { + SupportBundleEreportStatus::NotRequested => { + println!(" {which} ereport collection was not requested"); + } + SupportBundleEreportStatus::Failed { error, n_collected } => { + println!(" {which} ereport collection failed:"); + println!( + " ereports collected successfully: {n_collected}" + ); + println!(" error: {error}"); + } + SupportBundleEreportStatus::Collected { n_collected } => { + println!(" {which} ereports collected: {n_collected}"); } } } diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index cdda6423973..4f35ab1dc19 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -107,7 +107,7 @@ tempfile.workspace = true thiserror.workspace = true tokio = { workspace = true, features = ["full"] } tokio-postgres = { workspace = true, features = ["with-serde_json-1"] } -tokio-util = { workspace = true, features = ["codec"] } +tokio-util = { workspace = true, features = ["codec", "rt"] } tough.workspace = true tufaceous-artifact.workspace = true usdt.workspace = true diff --git a/nexus/db-model/src/ereport.rs b/nexus/db-model/src/ereport.rs index 12320836b50..1129cbc988d 100644 --- a/nexus/db-model/src/ereport.rs +++ b/nexus/db-model/src/ereport.rs @@ -63,11 +63,14 @@ where } } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] pub struct Ereport { + #[serde(flatten)] pub id: EreportId, + #[serde(flatten)] pub metadata: EreportMetadata, pub reporter: Reporter, + #[serde(flatten)] pub report: serde_json::Value, } @@ -96,7 +99,7 @@ impl From for Ereport { serial_number, class, }, - reporter: Reporter::Sp { sp_type, slot: sp_slot.0 }, + reporter: Reporter::Sp { sp_type: sp_type.into(), slot: sp_slot.0 }, report, } } @@ -131,7 +134,7 @@ impl From for Ereport { } } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] pub struct EreportMetadata { pub time_collected: DateTime, pub time_deleted: Option>, @@ -141,22 +144,40 @@ pub struct EreportMetadata { pub class: Option, } -#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd)] +#[derive( + Clone, + Debug, + Eq, + PartialEq, + Ord, + PartialOrd, + serde::Serialize, + serde::Deserialize, +)] pub enum Reporter { - Sp { sp_type: SpType, slot: u16 }, + Sp { sp_type: nexus_types::inventory::SpType, slot: u16 }, HostOs { sled: SledUuid }, } impl std::fmt::Display for Reporter { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - Self::Sp { sp_type: SpType::Sled, slot } => { + Self::Sp { + sp_type: nexus_types::inventory::SpType::Sled, + slot, + } => { write!(f, "Sled (SP) {slot:02}") } - Self::Sp { sp_type: SpType::Switch, slot } => { + Self::Sp { + sp_type: nexus_types::inventory::SpType::Switch, + slot, + } => { write!(f, "Switch {slot}") } - Self::Sp { sp_type: SpType::Power, slot } => { + Self::Sp { + sp_type: nexus_types::inventory::SpType::Power, + slot, + } => { write!(f, "PSC {slot}") } Self::HostOs { sled } => { diff --git a/nexus/db-queries/src/db/datastore/ereport.rs b/nexus/db-queries/src/db/datastore/ereport.rs index a7cab6f9513..ca5e7f4aaea 100644 --- a/nexus/db-queries/src/db/datastore/ereport.rs +++ b/nexus/db-queries/src/db/datastore/ereport.rs @@ -17,7 +17,7 @@ use crate::db::model::SpMgsSlot; use crate::db::model::SpType; use crate::db::model::SqlU16; use crate::db::model::SqlU32; -use crate::db::pagination::paginated; +use crate::db::pagination::{paginated, paginated_multicolumn}; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::DateTime; use chrono::Utc; @@ -48,6 +48,44 @@ pub struct EreporterRestartBySerial { pub ereports: u32, } +/// A set of filters for fetching ereports. +#[derive(Clone, Debug, Default)] +pub struct EreportFilters { + /// If present, include only ereports that were collected at the specified + /// timestamp or later. + /// + /// If `end_time` is also present, this value *must* be earlier than + /// `end_time`. + pub start_time: Option>, + /// If present, include only ereports that were collected at the specified + /// timestamp or before. + /// + /// If `start_time` is also present, this value *must* be later than + /// `start_time`. + pub end_time: Option>, + /// If this list is non-empty, include only ereports that were reported by + /// systems with the provided serial numbers. + pub only_serials: Vec, + /// If this list is non-empty, include only ereports with the provided class + /// strings. + // TODO(eliza): globbing could be nice to add here eventually... + pub only_classes: Vec, +} + +impl EreportFilters { + fn check_time_range(&self) -> Result<(), Error> { + if let (Some(start), Some(end)) = (self.start_time, self.end_time) { + if start > end { + return Err(Error::invalid_request( + "start time must be before end time", + )); + } + } + + Ok(()) + } +} + impl DataStore { /// Fetch an ereport by its restart ID and ENA. /// @@ -93,6 +131,90 @@ impl DataStore { Err(Error::non_resourcetype_not_found(format!("ereport {id}"))) } + pub async fn host_ereports_fetch_matching( + &self, + opctx: &OpContext, + filters: &EreportFilters, + pagparams: &DataPageParams<'_, (Uuid, DbEna)>, + ) -> ListResultVec { + opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + filters.check_time_range()?; + + let mut query = paginated_multicolumn( + host_dsl::host_ereport, + (host_dsl::restart_id, host_dsl::ena), + pagparams, + ) + .filter(host_dsl::time_deleted.is_null()) + .select(HostEreport::as_select()); + + if let Some(start) = filters.start_time { + query = query.filter(host_dsl::time_collected.ge(start)); + } + + if let Some(end) = filters.end_time { + query = query.filter(host_dsl::time_collected.le(end)); + } + + if !filters.only_serials.is_empty() { + query = query.filter( + host_dsl::sled_serial.eq_any(filters.only_serials.clone()), + ); + } + + if !filters.only_classes.is_empty() { + query = query + .filter(host_dsl::class.eq_any(filters.only_classes.clone())); + } + + query + .load_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + pub async fn sp_ereports_fetch_matching( + &self, + opctx: &OpContext, + filters: &EreportFilters, + pagparams: &DataPageParams<'_, (Uuid, DbEna)>, + ) -> ListResultVec { + opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + filters.check_time_range()?; + + let mut query = paginated_multicolumn( + sp_dsl::sp_ereport, + (sp_dsl::restart_id, sp_dsl::ena), + pagparams, + ) + .filter(sp_dsl::time_deleted.is_null()) + .select(SpEreport::as_select()); + + if let Some(start) = filters.start_time { + query = query.filter(sp_dsl::time_collected.ge(start)); + } + + if let Some(end) = filters.end_time { + query = query.filter(sp_dsl::time_collected.le(end)); + } + + if !filters.only_serials.is_empty() { + query = query.filter( + sp_dsl::serial_number.eq_any(filters.only_serials.clone()), + ); + } + + if !filters.only_classes.is_empty() { + query = query + .filter(sp_dsl::class.eq_any(filters.only_classes.clone())); + } + + query + .load_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + /// List ereports from the SP with the given restart ID. pub async fn sp_ereport_list_by_restart( &self, @@ -144,7 +266,7 @@ impl DataStore { EreporterRestartBySerial { id: EreporterRestartUuid::from_untyped_uuid(restart_id), reporter_kind: Reporter::Sp { - sp_type, + sp_type: sp_type.into(), slot: sp_slot.into(), }, first_seen_at: first_seen.expect(FIRST_SEEN_NOT_NULL), @@ -261,8 +383,20 @@ impl DataStore { sled_id: SledUuid, ) -> Result, Error> { opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + self.host_latest_ereport_id_on_conn( + &*self.pool_connection_authorized(opctx).await?, + sled_id, + ) + .await + } + + async fn host_latest_ereport_id_on_conn( + &self, + conn: &async_bb8_diesel::Connection, + sled_id: SledUuid, + ) -> Result, Error> { let id = Self::host_latest_ereport_id_query(sled_id) - .get_result_async(&*self.pool_connection_authorized(opctx).await?) + .get_result_async(conn) .await .optional() .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))? @@ -314,6 +448,35 @@ impl DataStore { })?; Ok((created, latest)) } + + pub async fn host_ereports_insert( + &self, + opctx: &OpContext, + sled_id: SledUuid, + ereports: Vec, + ) -> CreateResult<(usize, Option)> { + opctx.authorize(authz::Action::CreateChild, &authz::FLEET).await?; + let conn = self.pool_connection_authorized(opctx).await?; + let created = diesel::insert_into(host_dsl::host_ereport) + .values(ereports) + .on_conflict((host_dsl::restart_id, host_dsl::ena)) + .do_nothing() + .execute_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + .internal_context("failed to insert ereports") + })?; + let latest = self + .host_latest_ereport_id_on_conn(&conn, sled_id) + .await + .map_err(|e| { + e.internal_context(format!( + "failed to refresh latest ereport ID for {sled_id}" + )) + })?; + Ok((created, latest)) + } } fn id_from_tuple( diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 55f0e582225..ddedac2d0f4 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -121,6 +121,7 @@ mod zpool; pub use address_lot::AddressLotCreateResult; pub use dns::DataStoreDnsTest; pub use dns::DnsVersionUpdateBuilder; +pub use ereport::EreportFilters; pub use instance::{ InstanceAndActiveVmm, InstanceGestalt, InstanceStateComputer, }; diff --git a/nexus/src/app/background/tasks/support_bundle_collector.rs b/nexus/src/app/background/tasks/support_bundle_collector.rs index 77a28ddff88..69cad6240f7 100644 --- a/nexus/src/app/background/tasks/support_bundle_collector.rs +++ b/nexus/src/app/background/tasks/support_bundle_collector.rs @@ -22,15 +22,20 @@ use gateway_client::types::SpIdentifier; use gateway_client::types::SpIgnition; use internal_dns_resolver::Resolver; use internal_dns_types::names::ServiceName; +use nexus_db_model::Ereport; use nexus_db_model::SupportBundle; use nexus_db_model::SupportBundleState; use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; +use nexus_db_queries::db::datastore; +use nexus_db_queries::db::datastore::EreportFilters; +use nexus_db_queries::db::pagination::Paginator; use nexus_types::deployment::SledFilter; use nexus_types::identity::Asset; use nexus_types::internal_api::background::SupportBundleCleanupReport; use nexus_types::internal_api::background::SupportBundleCollectionReport; +use nexus_types::internal_api::background::SupportBundleEreportStatus; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; use omicron_common::api::external::LookupType; @@ -49,10 +54,12 @@ use std::future::Future; use std::io::Write; use std::num::NonZeroU64; use std::sync::Arc; +use std::sync::atomic::{AtomicUsize, Ordering}; use tokio::io::AsyncReadExt; use tokio::io::AsyncSeekExt; use tokio::io::AsyncWriteExt; use tokio::io::SeekFrom; +use tokio_util::task::AbortOnDropHandle; use tufaceous_artifact::ArtifactHash; use zip::ZipArchive; use zip::ZipWriter; @@ -85,11 +92,20 @@ struct BundleRequest { // // Typically, this is CHUNK_SIZE, but can be modified for testing. transfer_chunk_size: NonZeroU64, + + ereport_query: Option, } impl Default for BundleRequest { fn default() -> Self { - Self { skip_sled_info: false, transfer_chunk_size: CHUNK_SIZE } + Self { + skip_sled_info: false, + transfer_chunk_size: CHUNK_SIZE, + ereport_query: Some(EreportFilters { + start_time: Some(chrono::Utc::now() - chrono::Days::new(7)), + ..EreportFilters::default() + }), + } } } @@ -410,6 +426,8 @@ impl SupportBundleCollector { request: request.clone(), bundle: bundle.clone(), transfer_chunk_size: request.transfer_chunk_size, + host_ereports_collected: AtomicUsize::new(0), + sp_ereports_collected: AtomicUsize::new(0), }); let authz_bundle = authz_support_bundle_from_id(bundle.id.into()); @@ -455,6 +473,8 @@ struct BundleCollection { request: BundleRequest, bundle: SupportBundle, transfer_chunk_size: NonZeroU64, + host_ereports_collected: AtomicUsize, + sp_ereports_collected: AtomicUsize, } impl BundleCollection { @@ -665,6 +685,32 @@ impl BundleCollection { ) .await?; + let ereport_collection = if let Some(ref ereport_filters) = + self.request.ereport_query + { + // If ereports are to be included in the bundle, have someone go do + // that in the background while we're gathering up other stuff. Note + // that the `JoinHandle`s for these tasks are wrapped in + // `AbortOnDropHandle`s for cancellation correctness; this ensures + // that if collecting the bundle is cancelled and this future is + // dropped, the tasks that we've spawned to collect ereports are + // aborted as well. + let dir = dir.path().join("ereports"); + let host = AbortOnDropHandle::new(tokio::spawn( + self.clone().collect_host_ereports( + ereport_filters.clone(), + dir.clone(), + ), + )); + let sp = AbortOnDropHandle::new(tokio::spawn( + self.clone().collect_sp_ereports(ereport_filters.clone(), dir), + )); + Some((host, sp)) + } else { + debug!(log, "Support bundle: ereports not requested"); + None + }; + let sp_dumps_dir = dir.path().join("sp_task_dumps"); tokio::fs::create_dir_all(&sp_dumps_dir).await.with_context(|| { format!("failed to create SP task dump directory {sp_dumps_dir}") @@ -712,6 +758,50 @@ impl BundleCollection { } } + if let Some((host, sp)) = ereport_collection { + let (host, sp) = tokio::join!(host, sp); + const TASK_FAILURE_MSG: &str = "task failed"; + let n_collected = + self.host_ereports_collected.load(Ordering::Acquire); + report.host_ereports = match host + .map_err(|e| anyhow::anyhow!("{TASK_FAILURE_MSG}: {e}")) + .and_then(|x| x) + { + Ok(_) => SupportBundleEreportStatus::Collected { n_collected }, + Err(err) => { + warn!( + &self.log, + "Support bundle: host ereport collection failed \ + ({n_collected} collected successfully)"; + "err" => ?err, + ); + SupportBundleEreportStatus::Failed { + n_collected, + error: err.to_string(), + } + } + }; + let n_collected = + self.sp_ereports_collected.load(Ordering::Acquire); + report.sp_ereports = match sp + .map_err(|e| anyhow::anyhow!("{TASK_FAILURE_MSG}: {e}")) + .and_then(|x| x) + { + Ok(_) => SupportBundleEreportStatus::Collected { n_collected }, + Err(err) => { + warn!( + &self.log, + "Support bundle: SP ereport collection failed \ + ({n_collected} collected successfully)"; + "err" => ?err, + ); + SupportBundleEreportStatus::Failed { + n_collected, + error: err.to_string(), + } + } + }; + } Ok(report) } @@ -861,6 +951,90 @@ impl BundleCollection { } return Ok(()); } + + async fn collect_sp_ereports( + self: Arc, + filters: EreportFilters, + dir: Utf8PathBuf, + ) -> anyhow::Result<()> { + let mut paginator = Paginator::new( + datastore::SQL_BATCH_SIZE, + dropshot::PaginationOrder::Ascending, + ); + while let Some(p) = paginator.next() { + let ereports = self + .datastore + .sp_ereports_fetch_matching( + &self.opctx, + &filters, + &p.current_pagparams(), + ) + .await + .map_err(|e| { + e.internal_context("failed to query for SP ereports") + })?; + paginator = p.found_batch(&ereports, &|ereport| { + (ereport.restart_id.into_untyped_uuid(), ereport.ena) + }); + + let n_ereports = ereports.len(); + for ereport in ereports { + write_ereport(ereport.into(), &dir).await?; + self.sp_ereports_collected.fetch_add(1, Ordering::Release); + } + debug!(self.log, "Support bundle: added {n_ereports} SP ereports"); + } + + info!( + self.log, + "Support bundle: collected {} total SP ereports", + self.sp_ereports_collected.load(Ordering::Relaxed) + ); + Ok(()) + } + + async fn collect_host_ereports( + self: Arc, + filters: EreportFilters, + dir: Utf8PathBuf, + ) -> anyhow::Result<()> { + let mut paginator = Paginator::new( + datastore::SQL_BATCH_SIZE, + dropshot::PaginationOrder::Ascending, + ); + while let Some(p) = paginator.next() { + let ereports = self + .datastore + .host_ereports_fetch_matching( + &self.opctx, + &filters, + &p.current_pagparams(), + ) + .await + .map_err(|e| { + e.internal_context("failed to query for host OS ereports") + })?; + paginator = p.found_batch(&ereports, &|ereport| { + (ereport.restart_id.into_untyped_uuid(), ereport.ena) + }); + let n_ereports = ereports.len(); + for ereport in ereports { + write_ereport(ereport.into(), &dir).await?; + self.host_ereports_collected.fetch_add(1, Ordering::Release); + } + debug!( + self.log, + "Support bundle: added {n_ereports} host OS ereports" + ); + } + + info!( + self.log, + "Support bundle: collected {} total host ereports", + self.host_ereports_collected.load(Ordering::Relaxed) + ); + Ok(()) + } } impl BackgroundTask for SupportBundleCollector { @@ -906,6 +1080,49 @@ impl BackgroundTask for SupportBundleCollector { } } +async fn write_ereport(ereport: Ereport, dir: &Utf8Path) -> anyhow::Result<()> { + // Here's where we construct the file path for each ereport JSON file, given + // the top-lebel ereport directory path. Each ereport is stored in a + // subdirectory for the serial number of the system that produced the + // ereport. These paths take the following form: + // + // {serial_number}/{restart_id}/{ENA}.json + // + // We can assume that the restart ID and serial number consist only of + // filesystem-safe characters, as the restart ID is known to be a UUID, and + // the ENA is just an integer. For the serial number, which we don't have + // full control over --- it came from the ereport metadata --- we must + // check that it doesn't contain any characters unsuitable for use in a + // filesystem path. + let sn = &ereport + .metadata + .serial_number + .as_deref() + // If the serial number contains any unsavoury characters, it goes in + // the `unknown_serial` hole! Note that the alleged serial number from + // the ereport will still be present in the JSON as a string, so we're + // not *lying* about what was received; we're just giving up on using it + // in the path. + .filter(|&s| is_fs_safe_single_path_component(s)) + .unwrap_or("unknown_serial"); + let dir = dir + .join(sn) + // N.B. that we call `into_untyped_uuid()` here, as the `Display` + // implementation for a typed UUID appends " (ereporter_restart)", which + // we don't want. + .join(ereport.id.restart_id.into_untyped_uuid().to_string()); + tokio::fs::create_dir_all(&dir) + .await + .with_context(|| format!("failed to create directory '{dir}'"))?; + let file_path = dir.join(format!("{}.json", ereport.id.ena)); + let json = serde_json::to_vec(&ereport).with_context(|| { + format!("failed to serialize ereport '{sn}:{}'", ereport.id) + })?; + tokio::fs::write(&file_path, json) + .await + .with_context(|| format!("failed to write {file_path}'")) +} + // Takes a directory "dir", and zips the contents into a single zipfile. fn bundle_to_zipfile(dir: &Utf8TempDir) -> anyhow::Result { let tempfile = tempfile_in(TEMPDIR)?; @@ -1188,6 +1405,48 @@ async fn save_sp_dumps( Ok(()) } +fn is_fs_safe_single_path_component(s: &str) -> bool { + // Might be path traversal... + if s == "." || s == ".." { + return false; + } + + if s == "~" { + return false; + } + + const BANNED_CHARS: &[char] = &[ + // Check for path separators. + // + // Naively, we might reach for `std::path::is_separator()` here. + // However, this function only checks if a path is a permitted + // separator on the *current* platform --- so, running on illumos, we + // will only check for Unix path separators. But, because the support + // bundle may be extracted on a workstation system by Oxide support + // personnel or by the customer, we should also make sure we don't + // allow the use of Windows path separators, which `is_separator()` + // won't check for on Unix systems. + '/', '\\', + // Characters forbidden on Windows, per: + // https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file#naming-conventions + '<', '>', ':', '"', '|', '?', '*', + ]; + + // Rather than using `s.contains()`, we do all the checks in one pass. + for c in s.chars() { + if BANNED_CHARS.contains(&c) { + return false; + } + + // Definitely no control characters! + if c.is_control() { + return false; + } + } + + true +} + #[cfg(test)] mod test { use super::*; @@ -1209,7 +1468,8 @@ mod test { use omicron_common::disk::SharedDatasetConfig; use omicron_common::zpool_name::ZpoolName; use omicron_uuid_kinds::{ - BlueprintUuid, DatasetUuid, PhysicalDiskUuid, SledUuid, + BlueprintUuid, DatasetUuid, EreporterRestartUuid, OmicronZoneUuid, + PhysicalDiskUuid, SledUuid, }; use uuid::Uuid; @@ -1350,6 +1610,139 @@ mod test { id } + async fn make_fake_ereports(datastore: &DataStore, opctx: &OpContext) { + use crate::db; + + const SP_SERIAL: &str = "BRM42000069"; + const HOST_SERIAL: &str = "BRM66600042"; + const GIMLET_PN: &str = "9130000019"; + // Make some SP ereports... + let sp_restart_id = EreporterRestartUuid::new_v4(); + datastore.sp_ereports_insert(&opctx, db::model::SpType::Sled, 8, vec![ + db::model::SpEreport { + restart_id: sp_restart_id.into(), + ena: ereport_types::Ena(1).into(), + time_collected: chrono::Utc::now(), + time_deleted: None, + collector_id: OmicronZoneUuid::new_v4().into(), + sp_type: db::model::SpType::Sled, + sp_slot: 8.into(), + part_number: Some(GIMLET_PN.to_string()), + serial_number: Some(SP_SERIAL.to_string()), + class: Some("ereport.fake.whatever".to_string()), + report: serde_json::json!({"hello world": true}) + }, + db::model::SpEreport { + restart_id: sp_restart_id.into(), + ena: ereport_types::Ena(2).into(), + time_collected: chrono::Utc::now(), + time_deleted: None, + collector_id: OmicronZoneUuid::new_v4().into(), + sp_type: db::model::SpType::Sled, + sp_slot: 8.into(), + part_number: Some(GIMLET_PN.to_string()), + serial_number: Some(SP_SERIAL.to_string()), + class: Some("ereport.something.blah".to_string()), + report: serde_json::json!({"system_working": "seems to be",}) + }, + db::model::SpEreport { + restart_id: EreporterRestartUuid::new_v4().into(), + ena: ereport_types::Ena(1).into(), + time_collected: chrono::Utc::now(), + time_deleted: None, + collector_id: OmicronZoneUuid::new_v4().into(), + sp_type: db::model::SpType::Sled, + sp_slot: 8.into(), + // Let's do a silly one! No VPD, to make sure that's also + // handled correctly. + part_number: None, + serial_number: None, + class: Some("ereport.fake.whatever".to_string()), + report: serde_json::json!({"hello_world": true}) + }, + ]).await.expect("failed to insert fake SP ereports"); + // And one from a different serial. N.B. that I made sure the number of + // host-OS and SP ereports are different for when we make assertions + // about the bundle report. + datastore + .sp_ereports_insert( + &opctx, + db::model::SpType::Switch, + 1, + vec![db::model::SpEreport { + restart_id: EreporterRestartUuid::new_v4().into(), + ena: ereport_types::Ena(1).into(), + time_collected: chrono::Utc::now(), + time_deleted: None, + collector_id: OmicronZoneUuid::new_v4().into(), + sp_type: db::model::SpType::Switch, + sp_slot: 1.into(), + part_number: Some("9130000006".to_string()), + serial_number: Some("BRM41000555".to_string()), + class: Some("ereport.fake.whatever".to_string()), + report: serde_json::json!({"im_a_sidecar": true}), + }], + ) + .await + .expect("failed to insert another fake SP ereport"); + // And some host OS ones... + let sled_id = SledUuid::new_v4(); + let restart_id = EreporterRestartUuid::new_v4().into(); + datastore + .host_ereports_insert( + &opctx, + sled_id, + vec![ + db::model::HostEreport { + restart_id, + ena: ereport_types::Ena(1).into(), + time_collected: chrono::Utc::now(), + time_deleted: None, + collector_id: OmicronZoneUuid::new_v4().into(), + sled_id: sled_id.into(), + sled_serial: HOST_SERIAL.to_string(), + class: Some("ereport.fake.whatever".to_string()), + report: serde_json::json!({"hello_world": true}), + }, + db::model::HostEreport { + restart_id, + ena: ereport_types::Ena(2).into(), + time_collected: chrono::Utc::now(), + time_deleted: None, + collector_id: OmicronZoneUuid::new_v4().into(), + sled_id: sled_id.into(), + sled_serial: HOST_SERIAL.to_string(), + class: Some("ereport.fake.whatever.thingy".to_string()), + report: serde_json::json!({"goodbye_world": false}), + }, + ], + ) + .await + .expect("failed to insert fake host OS ereports"); + // And another one with the same serial but different restart/sled IDs + let sled_id = SledUuid::new_v4(); + datastore + .host_ereports_insert( + &opctx, + sled_id, + vec![ + db::model::HostEreport { + restart_id: EreporterRestartUuid::new_v4().into(), + ena: ereport_types::Ena(1).into(), + time_collected: chrono::Utc::now(), + time_deleted: None, + collector_id: OmicronZoneUuid::new_v4().into(), + sled_id: sled_id.into(), + sled_serial: HOST_SERIAL.to_string(), + class: Some("ereport.something.hostos_related".to_string()), + report: serde_json::json!({"illumos": "very yes", "whatever": 42}), + }, + ], + ) + .await + .expect("failed to insert another fake host OS ereport"); + } + struct TestDataset { zpool_id: ZpoolUuid, dataset_id: DatasetUuid, @@ -1454,6 +1847,10 @@ mod test { let _datasets = TestDataset::setup(cptestctx, &datastore, &opctx, 1).await; + // Make up some ereports so that we can test that they're included in + // the bundle. + make_fake_ereports(&datastore, &opctx).await; + // Assign a bundle to ourselves. We expect to collect it on // the next call to "collect_bundle". let bundle = datastore @@ -1490,6 +1887,14 @@ mod test { assert!(report.listed_in_service_sleds); assert!(report.listed_sps); assert!(report.activated_in_db_ok); + assert_eq!( + report.host_ereports, + SupportBundleEreportStatus::Collected { n_collected: 3 } + ); + assert_eq!( + report.sp_ereports, + SupportBundleEreportStatus::Collected { n_collected: 4 } + ); let observed_bundle = datastore .support_bundle_get(&opctx, bundle.id.into()) @@ -1546,6 +1951,7 @@ mod test { let request = BundleRequest { skip_sled_info: true, transfer_chunk_size: NonZeroU64::new(16).unwrap(), + ereport_query: None, }; let report = collector diff --git a/nexus/tests/integration_tests/support_bundles.rs b/nexus/tests/integration_tests/support_bundles.rs index 5bd3f18faf0..215936dbecb 100644 --- a/nexus/tests/integration_tests/support_bundles.rs +++ b/nexus/tests/integration_tests/support_bundles.rs @@ -19,6 +19,7 @@ use nexus_types::external_api::shared::SupportBundleInfo; use nexus_types::external_api::shared::SupportBundleState; use nexus_types::internal_api::background::SupportBundleCleanupReport; use nexus_types::internal_api::background::SupportBundleCollectionReport; +use nexus_types::internal_api::background::SupportBundleEreportStatus; use omicron_uuid_kinds::SupportBundleUuid; use serde::Deserialize; use std::io::Cursor; @@ -492,6 +493,12 @@ async fn test_support_bundle_lifecycle(cptestctx: &ControlPlaneTestContext) { listed_in_service_sleds: true, listed_sps: true, activated_in_db_ok: true, + host_ereports: SupportBundleEreportStatus::Collected { + n_collected: 0 + }, + sp_ereports: SupportBundleEreportStatus::Collected { + n_collected: 0 + } }) ); let bundle = bundle_get(&client, bundle.id).await.unwrap(); @@ -589,6 +596,12 @@ async fn test_support_bundle_range_requests( listed_in_service_sleds: true, listed_sps: true, activated_in_db_ok: true, + host_ereports: SupportBundleEreportStatus::Collected { + n_collected: 0 + }, + sp_ereports: SupportBundleEreportStatus::Collected { + n_collected: 0 + } }) ); let bundle = bundle_get(&client, bundle.id).await.unwrap(); diff --git a/nexus/types/src/internal_api/background.rs b/nexus/types/src/internal_api/background.rs index 1395d9c7131..05ff60fbf27 100644 --- a/nexus/types/src/internal_api/background.rs +++ b/nexus/types/src/internal_api/background.rs @@ -236,6 +236,24 @@ pub struct SupportBundleCollectionReport { /// True iff the bundle was successfully made 'active' in the database. pub activated_in_db_ok: bool, + + /// Status of host OS ereport collection. + pub host_ereports: SupportBundleEreportStatus, + + /// Status of SP ereport collection. + pub sp_ereports: SupportBundleEreportStatus, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, Eq)] +pub enum SupportBundleEreportStatus { + /// Ereports were not requested for this bundle. + NotRequested, + + /// Ereports were collected successfully. + Collected { n_collected: usize }, + + /// Ereport collection failed, though some ereports may have been written. + Failed { n_collected: usize, error: String }, } impl SupportBundleCollectionReport { @@ -245,6 +263,8 @@ impl SupportBundleCollectionReport { listed_in_service_sleds: false, listed_sps: false, activated_in_db_ok: false, + host_ereports: SupportBundleEreportStatus::NotRequested, + sp_ereports: SupportBundleEreportStatus::NotRequested, } } } diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 3fbbb606e37..04e7e38a3ce 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -135,7 +135,7 @@ tokio = { version = "1.47.0", features = ["full", "test-util"] } tokio-postgres = { version = "0.7.13", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } tokio-rustls = { version = "0.26.0", default-features = false, features = ["logging", "ring", "tls12"] } tokio-stream = { version = "0.1.17", features = ["net", "sync"] } -tokio-util = { version = "0.7.15", features = ["codec", "io-util", "time"] } +tokio-util = { version = "0.7.15", features = ["codec", "io-util", "rt", "time"] } toml = { version = "0.7.8" } toml_datetime = { version = "0.6.11", default-features = false, features = ["serde"] } toml_edit-3c51e837cfc5589a = { package = "toml_edit", version = "0.22.27", features = ["serde"] } @@ -273,7 +273,7 @@ tokio = { version = "1.47.0", features = ["full", "test-util"] } tokio-postgres = { version = "0.7.13", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } tokio-rustls = { version = "0.26.0", default-features = false, features = ["logging", "ring", "tls12"] } tokio-stream = { version = "0.1.17", features = ["net", "sync"] } -tokio-util = { version = "0.7.15", features = ["codec", "io-util", "time"] } +tokio-util = { version = "0.7.15", features = ["codec", "io-util", "rt", "time"] } toml = { version = "0.7.8" } toml_datetime = { version = "0.6.11", default-features = false, features = ["serde"] } toml_edit-3c51e837cfc5589a = { package = "toml_edit", version = "0.22.27", features = ["serde"] }