Skip to content

Commit 84c3410

Browse files
Apply optimal drive settings for Ultrastar DC SN681 (#9263)
This adds the SDS6BA138PSP9X3 nvme drive to sled-agent's PREFERRED_NVME_DEVICE_SETTINGS hash map, additionally it shuffles some logic around to always apply a 4K LBA data size to all devices as Robert suggested in #8867 Fixes #8867 --------- Co-authored-by: Andy Fiddaman <[email protected]>
1 parent 7bc5275 commit 84c3410

File tree

1 file changed

+149
-96
lines changed

1 file changed

+149
-96
lines changed

sled-hardware/src/illumos/partitions.rs

Lines changed: 149 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,11 @@
77
use std::collections::HashMap;
88
use std::sync::OnceLock;
99

10+
use crate::DiskPaths;
11+
use crate::Partition;
12+
use crate::PooledDiskError;
1013
use crate::illumos::gpt;
11-
use crate::{DiskPaths, Partition, PooledDiskError};
14+
use crate::is_oxide_sled;
1215
use camino::Utf8Path;
1316
use illumos_utils::zpool::Zpool;
1417
use illumos_utils::zpool::ZpoolName;
@@ -26,11 +29,16 @@ static DEFAULT_NVME_LBA_DATA_SIZE: u64 = 4096;
2629
/// NVMe device settings for a particular NVMe model.
2730
struct NvmeDeviceSettings {
2831
/// The desired disk size for dealing with overprovisioning.
29-
size: u32,
32+
resize: NvmeVendorResize,
3033
/// An override for the default 4k LBA formatting.
3134
lba_data_size_override: Option<u64>,
3235
}
3336

37+
enum NvmeVendorResize {
38+
Wdc(u32),
39+
Unsupported,
40+
}
41+
3442
/// A mapping from model to desired settings.
3543
/// A device not found in this lookup table will not be modified by sled-agent.
3644
static PREFERRED_NVME_DEVICE_SETTINGS: OnceLock<
@@ -43,23 +51,45 @@ fn preferred_nvme_device_settings()
4351
HashMap::from([
4452
(
4553
"WUS4C6432DSP3X3",
46-
NvmeDeviceSettings { size: 3200, lba_data_size_override: None },
54+
NvmeDeviceSettings {
55+
resize: NvmeVendorResize::Wdc(3200),
56+
lba_data_size_override: None,
57+
},
4758
),
4859
(
4960
"WUS5EA138ESP7E1",
50-
NvmeDeviceSettings { size: 3200, lba_data_size_override: None },
61+
NvmeDeviceSettings {
62+
resize: NvmeVendorResize::Wdc(3200),
63+
lba_data_size_override: None,
64+
},
5165
),
5266
(
5367
"WUS5EA138ESP7E3",
54-
NvmeDeviceSettings { size: 3200, lba_data_size_override: None },
68+
NvmeDeviceSettings {
69+
resize: NvmeVendorResize::Wdc(3200),
70+
lba_data_size_override: None,
71+
},
5572
),
5673
(
5774
"WUS5EA176ESP7E1",
58-
NvmeDeviceSettings { size: 6400, lba_data_size_override: None },
75+
NvmeDeviceSettings {
76+
resize: NvmeVendorResize::Wdc(6400),
77+
lba_data_size_override: None,
78+
},
5979
),
6080
(
6181
"WUS5EA176ESP7E3",
62-
NvmeDeviceSettings { size: 6400, lba_data_size_override: None },
82+
NvmeDeviceSettings {
83+
resize: NvmeVendorResize::Wdc(6400),
84+
lba_data_size_override: None,
85+
},
86+
),
87+
(
88+
"SDS6BA138PSP9X3",
89+
NvmeDeviceSettings {
90+
resize: NvmeVendorResize::Unsupported,
91+
lba_data_size_override: None,
92+
},
6393
),
6494
])
6595
})
@@ -81,6 +111,8 @@ pub enum NvmeFormattingError {
81111
InfoError(#[from] libnvme::controller_info::NvmeInfoError),
82112
#[error("Could not find NVMe controller for disk with serial {0}")]
83113
NoController(String),
114+
#[error("Could not determine if host is an Oxide sled: {0}")]
115+
SystemDetection(#[source] anyhow::Error),
84116
}
85117

86118
// The expected layout of an M.2 device within the Oxide rack.
@@ -267,101 +299,122 @@ fn ensure_size_and_formatting(
267299
use libnvme::Nvme;
268300
use libnvme::namespace::NamespaceDiscoveryLevel;
269301

270-
let mut controller_found = false;
302+
// Check that we are on real Oxide hardware so that we avoid:
303+
// - Messing with NVMe devices in other environments
304+
// - Failing tests which use zvols rather than real NVMe devices
305+
// - Breaking virtual environments like a4x2 which likely don't expose or
306+
// implement changing the LBA on emulated devices.
307+
if !is_oxide_sled().map_err(NvmeFormattingError::SystemDetection)? {
308+
return Ok(());
309+
}
271310

272-
if let Some(nvme_settings) =
273-
preferred_nvme_device_settings().get(identity.model.as_str())
274-
{
275-
let nvme = Nvme::new()?;
276-
for controller in nvme.controller_discovery()? {
277-
let controller = controller?.write_lock().map_err(|(_, e)| e)?;
278-
let controller_info = controller.get_info()?;
279-
// Make sure we are operating on the correct NVMe device.
280-
if controller_info.serial() != identity.serial {
281-
continue;
282-
};
283-
controller_found = true;
284-
let nsdisc = controller
285-
.namespace_discovery(NamespaceDiscoveryLevel::Active)?;
286-
let namespaces =
287-
nsdisc.into_iter().collect::<Result<Vec<_>, _>>()?;
288-
if namespaces.len() != 1 {
289-
return Err(NvmeFormattingError::UnexpectedNamespaces(
290-
namespaces.len(),
291-
));
292-
}
293-
// Safe because verified there is exactly one namespace.
294-
let namespace = namespaces.into_iter().next().unwrap();
295-
296-
// NB: Only some vendors such as WDC support adjusting the size
297-
// of the disk to deal with overprovisioning. This will need to be
298-
// abstracted away if/when we ever start using another vendor with
299-
// this capability.
300-
let size = controller.wdc_resize_get()?;
301-
302-
// First we need to detach blkdev from the namespace.
303-
namespace.blkdev_detach()?;
304-
305-
// Resize the device if needed to ensure we get the expected
306-
// durability level in terms of drive writes per day.
307-
if size != nvme_settings.size {
308-
controller.wdc_resize_set(nvme_settings.size)?;
309-
info!(
310-
log,
311-
"Resized {} from {size} to {}",
312-
identity.serial,
313-
nvme_settings.size
314-
)
311+
let mut controller_found = false;
312+
let nvme = Nvme::new()?;
313+
314+
for controller in nvme.controller_discovery()? {
315+
let controller = controller?.write_lock().map_err(|(_, e)| e)?;
316+
let controller_info = controller.get_info()?;
317+
318+
// Make sure we are operating on the correct NVMe device.
319+
if controller_info.serial() != identity.serial {
320+
continue;
321+
};
322+
controller_found = true;
323+
let nsdisc =
324+
controller.namespace_discovery(NamespaceDiscoveryLevel::Active)?;
325+
let namespaces = nsdisc.into_iter().collect::<Result<Vec<_>, _>>()?;
326+
327+
// We only want to continue if there is a single namespace associated
328+
// with the device, so we accomplish this by pattern matching for it.
329+
let [namespace] = namespaces.as_slice() else {
330+
return Err(NvmeFormattingError::UnexpectedNamespaces(
331+
namespaces.len(),
332+
));
333+
};
334+
335+
// First we need to detach blkdev from the namespace.
336+
namespace.blkdev_detach()?;
337+
338+
// Check for a known nvme drive and apply our desired configuration.
339+
let mut wanted_data_size = DEFAULT_NVME_LBA_DATA_SIZE;
340+
if let Some(nvme_settings) =
341+
preferred_nvme_device_settings().get(identity.model.as_str())
342+
{
343+
match nvme_settings.resize {
344+
NvmeVendorResize::Wdc(provisioning_size) => {
345+
let size = controller.wdc_resize_get()?;
346+
347+
// Resize the device if needed to ensure we get the expected
348+
// durability level in terms of drive writes per day.
349+
if size != provisioning_size {
350+
controller.wdc_resize_set(provisioning_size)?;
351+
info!(
352+
log,
353+
"Resized {} from {size} to {provisioning_size}",
354+
identity.serial,
355+
)
356+
}
357+
}
358+
// This device doesn't have a vendor specific resize command to
359+
// deal with overprovisioning so there's nothing to do.
360+
NvmeVendorResize::Unsupported => (),
315361
}
316362

317-
// Find the LBA format we want to use for the device.
318-
let wanted_data_size = nvme_settings
319-
.lba_data_size_override
320-
.unwrap_or(DEFAULT_NVME_LBA_DATA_SIZE);
321-
let desired_lba = controller_info
322-
.lba_formats()
323-
.collect::<Result<Vec<_>, _>>()?
324-
.into_iter()
325-
.find(|lba| {
326-
lba.meta_size() == NVME_LBA_META_SIZE
327-
&& lba.data_size() == wanted_data_size
328-
})
329-
.ok_or_else(|| NvmeFormattingError::LbaFormatMissing)?;
330-
331-
// If the controller isn't formatted to our desired LBA we need to
332-
// issue a format request.
333-
let ns_info = namespace.get_info()?;
334-
let current_lba = ns_info.current_format()?;
335-
if current_lba.id() != desired_lba.id() {
336-
controller
337-
.format_request()?
338-
.set_lbaf(desired_lba.id())?
339-
// TODO map this to libnvme::BROADCAST_NAMESPACE once added
340-
.set_nsid(u32::MAX)?
341-
// No secure erase
342-
.set_ses(0)?
343-
.execute()?;
344-
345-
info!(
346-
log,
347-
"Formatted disk with serial {} to an LBA with data size \
348-
{wanted_data_size}",
349-
identity.serial,
350-
);
363+
if let Some(lba_data_size_override) =
364+
nvme_settings.lba_data_size_override
365+
{
366+
wanted_data_size = lba_data_size_override;
351367
}
368+
} else {
369+
info!(
370+
log,
371+
"There are no preferred NVMe settings for disk model {}; will \
372+
attempt to format to the default LBA data size for disk with \
373+
serial {}",
374+
identity.model,
375+
identity.serial
376+
);
377+
}
352378

353-
// Attach blkdev to the namespace again
354-
namespace.blkdev_attach()?;
379+
// Find the LBA format we want to use for the device.
380+
let desired_lba = controller_info
381+
.lba_formats()
382+
.collect::<Result<Vec<_>, _>>()?
383+
.into_iter()
384+
.find(|lba| {
385+
lba.meta_size() == NVME_LBA_META_SIZE
386+
&& lba.data_size() == wanted_data_size
387+
})
388+
.ok_or_else(|| NvmeFormattingError::LbaFormatMissing)?;
389+
390+
// If the controller isn't formatted to our desired LBA we need to
391+
// issue a format request.
392+
let ns_info = namespace.get_info()?;
393+
let current_lba = ns_info.current_format()?;
394+
if current_lba.id() != desired_lba.id() {
395+
controller
396+
.format_request()?
397+
.set_lbaf(desired_lba.id())?
398+
// TODO map this to libnvme::BROADCAST_NAMESPACE once added
399+
.set_nsid(u32::MAX)?
400+
// No secure erase
401+
.set_ses(0)?
402+
.execute()?;
403+
404+
info!(
405+
log,
406+
"Formatted disk with serial {} to an LBA with data size \
407+
{wanted_data_size}",
408+
identity.serial,
409+
);
355410
}
356-
} else {
357-
info!(
358-
log,
359-
"There are no preferred NVMe settings for disk model {}; nothing to\
360-
do for disk with serial {}",
361-
identity.model,
362-
identity.serial
363-
);
364-
return Ok(());
411+
412+
// Attach blkdev to the namespace again
413+
namespace.blkdev_attach()?;
414+
415+
// We found the disk and applied the settings so there's no use scanning
416+
// the rest of the devices.
417+
break;
365418
}
366419

367420
if !controller_found {

0 commit comments

Comments
 (0)