Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion nexus/db-model/src/ereport.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,14 +117,15 @@ impl From<HostEreport> for Ereport {
sled_id,
class,
report,
part_number,
} = host_report;
Ereport {
id: EreportId { restart_id: restart_id.into(), ena: ena.into() },
metadata: EreportMetadata {
time_collected,
time_deleted,
collector_id: collector_id.into(),
part_number: None, // TODO
part_number,
serial_number: Some(sled_serial),
class,
},
Expand Down Expand Up @@ -253,4 +254,10 @@ pub struct HostEreport {
pub class: Option<String>,

pub report: serde_json::Value,

// It's a shame this has to be nullable, while the serial is not. However,
// this field was added in a migration, and we have to be able to handle the
// case where a sled record was hard-deleted when backfilling the ereport
// table's part_number column. Sad.
pub part_number: Option<String>,
}
3 changes: 2 additions & 1 deletion nexus/db-model/src/schema_versions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use std::{collections::BTreeMap, sync::LazyLock};
///
/// This must be updated when you change the database schema. Refer to
/// schema/crdb/README.adoc in the root of this repository for details.
pub const SCHEMA_VERSION: Version = Version::new(176, 0, 0);
pub const SCHEMA_VERSION: Version = Version::new(177, 0, 0);

/// List of all past database schema versions, in *reverse* order
///
Expand All @@ -28,6 +28,7 @@ static KNOWN_VERSIONS: LazyLock<Vec<KnownVersion>> = LazyLock::new(|| {
// | leaving the first copy as an example for the next person.
// v
// KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"),
KnownVersion::new(177, "add-host-ereport-part-number"),
KnownVersion::new(176, "audit-log"),
KnownVersion::new(175, "inv-host-phase-1-active-slot"),
KnownVersion::new(174, "add-tuf-rot-by-sign"),
Expand Down
2 changes: 2 additions & 0 deletions nexus/db-schema/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2664,6 +2664,8 @@ table! {
class -> Nullable<Text>,

report -> Jsonb,

part_number -> Nullable<Text>,
}
}

Expand Down
50 changes: 31 additions & 19 deletions nexus/src/app/background/tasks/support_bundle_collector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1081,32 +1081,41 @@ impl BackgroundTask for SupportBundleCollector {
}

async fn write_ereport(ereport: Ereport, dir: &Utf8Path) -> anyhow::Result<()> {
// Here's where we construct the file path for each ereport JSON file, given
// the top-lebel ereport directory path. Each ereport is stored in a
// subdirectory for the serial number of the system that produced the
// ereport. These paths take the following form:
// Here's where we construct the file path for each ereport JSON file,
// given the top-level ereport directory path. Each ereport is stored in a
// subdirectory for the part and serial numbers of the system that produced
// the ereport. Part numbers must be included in addition to serial
// numbers, as the v1 serial scheme only guarantees uniqueness within a
// part number. These paths take the following form:
//
// {serial_number}/{restart_id}/{ENA}.json
// {part-number}-{serial_number}/{restart_id}/{ENA}.json
//
// We can assume that the restart ID and serial number consist only of
// We can assume that the restart ID and ENA consist only of
// filesystem-safe characters, as the restart ID is known to be a UUID, and
// the ENA is just an integer. For the serial number, which we don't have
// full control over --- it came from the ereport metadata --- we must
// check that it doesn't contain any characters unsuitable for use in a
// filesystem path.
let sn = &ereport
// the ENA is just an integer. For the serial and part numbers, which
// Nexus doesn't have full control over --- it came from the ereport
// metadata --- we must check that it doesn't contain any characters
// unsuitable for use in a filesystem path.
let pn = ereport
.metadata
.part_number
.as_deref()
// If the part or serial numbers contain any unsavoury characters, it
// goes in the `unknown_serial` hole! Note that the alleged serial
// number from the ereport will still be present in the JSON as a
// string, so we're not *lying* about what was received; we're just
// giving up on using it in the path.
.filter(|&s| is_fs_safe_single_path_component(s))
.unwrap_or("unknown_part");
let sn = ereport
.metadata
.serial_number
.as_deref()
// If the serial number contains any unsavoury characters, it goes in
// the `unknown_serial` hole! Note that the alleged serial number from
// the ereport will still be present in the JSON as a string, so we're
// not *lying* about what was received; we're just giving up on using it
// in the path.
.filter(|&s| is_fs_safe_single_path_component(s))
.unwrap_or("unknown_serial");

let dir = dir
.join(sn)
.join(format!("{pn}-{sn}"))
// N.B. that we call `into_untyped_uuid()` here, as the `Display`
// implementation for a typed UUID appends " (ereporter_restart)", which
// we don't want.
Expand All @@ -1116,11 +1125,11 @@ async fn write_ereport(ereport: Ereport, dir: &Utf8Path) -> anyhow::Result<()> {
.with_context(|| format!("failed to create directory '{dir}'"))?;
let file_path = dir.join(format!("{}.json", ereport.id.ena));
let json = serde_json::to_vec(&ereport).with_context(|| {
format!("failed to serialize ereport '{sn}:{}'", ereport.id)
format!("failed to serialize ereport {pn}:{sn}/{}", ereport.id)
})?;
tokio::fs::write(&file_path, json)
.await
.with_context(|| format!("failed to write {file_path}'"))
.with_context(|| format!("failed to write '{file_path}'"))
}

// Takes a directory "dir", and zips the contents into a single zipfile.
Expand Down Expand Up @@ -1701,6 +1710,7 @@ mod test {
collector_id: OmicronZoneUuid::new_v4().into(),
sled_id: sled_id.into(),
sled_serial: HOST_SERIAL.to_string(),
part_number: Some(GIMLET_PN.to_string()),
class: Some("ereport.fake.whatever".to_string()),
report: serde_json::json!({"hello_world": true}),
},
Expand All @@ -1712,6 +1722,7 @@ mod test {
collector_id: OmicronZoneUuid::new_v4().into(),
sled_id: sled_id.into(),
sled_serial: HOST_SERIAL.to_string(),
part_number: Some(GIMLET_PN.to_string()),
class: Some("ereport.fake.whatever.thingy".to_string()),
report: serde_json::json!({"goodbye_world": false}),
},
Expand All @@ -1734,6 +1745,7 @@ mod test {
collector_id: OmicronZoneUuid::new_v4().into(),
sled_id: sled_id.into(),
sled_serial: HOST_SERIAL.to_string(),
part_number: Some(GIMLET_PN.to_string()),
class: Some("ereport.something.hostos_related".to_string()),
report: serde_json::json!({"illumos": "very yes", "whatever": 42}),
},
Expand Down
2 changes: 2 additions & 0 deletions schema/crdb/add-host-ereport-part-number/up1.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ALTER TABLE omicron.public.host_ereport
ADD COLUMN IF NOT EXISTS part_number STRING(63);
8 changes: 8 additions & 0 deletions schema/crdb/add-host-ereport-part-number/up2.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- backfill host OS ereports part_number column by querying the sled table. if
-- there isn't a sled for the ereport's sled UUID in the sled table (i.e. if the
-- sled record was hard deleted), leave it null.
SET LOCAL disallow_full_table_scans = off;
UPDATE omicron.public.host_ereport
SET part_number = sled.part_number
FROM sled
WHERE host_ereport.sled_id = sled.id;
4 changes: 3 additions & 1 deletion schema/crdb/dbinit.sql
Original file line number Diff line number Diff line change
Expand Up @@ -6460,6 +6460,8 @@ CREATE TABLE IF NOT EXISTS omicron.public.host_ereport (
*/
report JSONB NOT NULL,

part_number STRING(63),

PRIMARY KEY (restart_id, ena)
);

Expand Down Expand Up @@ -6495,7 +6497,7 @@ INSERT INTO omicron.public.db_metadata (
version,
target_version
) VALUES
(TRUE, NOW(), NOW(), '176.0.0', NULL)
(TRUE, NOW(), NOW(), '177.0.0', NULL)
ON CONFLICT DO NOTHING;

COMMIT;
Loading