diff --git a/measureme/src/file_header.rs b/measureme/src/file_header.rs new file mode 100644 index 0000000..a8a934a --- /dev/null +++ b/measureme/src/file_header.rs @@ -0,0 +1,103 @@ +//! All binary files generated by measureme have a simple file header that +//! consists of a 4 byte file magic string and a 4 byte little-endian version +//! number. + +use byteorder::{ByteOrder, LittleEndian}; +use crate::serialization::SerializationSink; +use std::error::Error; + +pub const CURRENT_FILE_FORMAT_VERSION: u32 = 0; +pub const FILE_MAGIC_EVENT_STREAM: &[u8; 4] = b"MMES"; +pub const FILE_MAGIC_STRINGTABLE_DATA: &[u8; 4] = b"MMSD"; +pub const FILE_MAGIC_STRINGTABLE_INDEX: &[u8; 4] = b"MMSI"; + +/// The size of the file header in bytes. Note that functions in this module +/// rely on this size to be `8`. +pub const FILE_HEADER_SIZE: usize = 8; + +pub fn write_file_header(s: &S, file_magic: &[u8; 4]) { + // The implementation here relies on FILE_HEADER_SIZE to have the value 8. + // Let's make sure this assumption cannot be violated without being noticed. + assert_eq!(FILE_HEADER_SIZE, 8); + + s.write_atomic(FILE_HEADER_SIZE, |bytes| { + bytes[0 .. 4].copy_from_slice(file_magic); + LittleEndian::write_u32(&mut bytes[4..8], CURRENT_FILE_FORMAT_VERSION); + }); +} + +pub fn read_file_header( + bytes: &[u8], + expected_magic: &[u8; 4] +) -> Result> { + // The implementation here relies on FILE_HEADER_SIZE to have the value 8. + // Let's make sure this assumption cannot be violated without being noticed. + assert_eq!(FILE_HEADER_SIZE, 8); + + let actual_magic = &bytes[0 .. 4]; + + if actual_magic != expected_magic { + // FIXME: The error message should mention the file path in order to be + // more useful. + let msg = format!( + "Unexpected file magic `{:?}`. Expected `{:?}`", + actual_magic, + expected_magic, + ); + + return Err(From::from(msg)); + } + + Ok(LittleEndian::read_u32(&bytes[4..8])) +} + +pub fn strip_file_header(data: &[u8]) -> &[u8] { + &data[FILE_HEADER_SIZE ..] +} + + +#[cfg(test)] +mod tests { + use super::*; + use crate::serialization::test::TestSink; + + #[test] + fn roundtrip() { + let data_sink = TestSink::new(); + + write_file_header(&data_sink, FILE_MAGIC_EVENT_STREAM); + + let data = data_sink.into_bytes(); + + assert_eq!(read_file_header(&data, FILE_MAGIC_EVENT_STREAM).unwrap(), + CURRENT_FILE_FORMAT_VERSION); + } + + #[test] + fn invalid_magic() { + let data_sink = TestSink::new(); + write_file_header(&data_sink, FILE_MAGIC_STRINGTABLE_DATA); + let mut data = data_sink.into_bytes(); + + // Invalidate the filemagic + data[2] = 0; + assert!(read_file_header(&data, FILE_MAGIC_STRINGTABLE_DATA).is_err()); + } + + #[test] + fn other_version() { + let data_sink = TestSink::new(); + + write_file_header(&data_sink, FILE_MAGIC_STRINGTABLE_INDEX); + + let mut data = data_sink.into_bytes(); + + // Change version + data[4] = 0xFF; + data[5] = 0xFF; + data[6] = 0xFF; + data[7] = 0xFF; + assert_eq!(read_file_header(&data, FILE_MAGIC_STRINGTABLE_INDEX).unwrap(), + 0xFFFF_FFFF); + } +} diff --git a/measureme/src/lib.rs b/measureme/src/lib.rs index 740900e..9011d15 100644 --- a/measureme/src/lib.rs +++ b/measureme/src/lib.rs @@ -1,4 +1,5 @@ mod event; +mod file_header; mod file_serialization_sink; mod mmap_serialization_sink; mod profiler; diff --git a/measureme/src/profiler.rs b/measureme/src/profiler.rs index 0692f7b..ec51e15 100644 --- a/measureme/src/profiler.rs +++ b/measureme/src/profiler.rs @@ -1,3 +1,4 @@ +use crate::file_header::{write_file_header, FILE_MAGIC_EVENT_STREAM}; use crate::raw_event::{RawEvent, Timestamp, TimestampKind}; use crate::serialization::SerializationSink; use crate::stringtable::{SerializableString, StringId, StringTableBuilder}; @@ -32,6 +33,10 @@ impl Profiler { pub fn new(path_stem: &Path) -> Result, Box> { let paths = ProfilerFiles::new(path_stem); let event_sink = Arc::new(S::from_path(&paths.events_file)?); + + // The first thing in every file we generate must be the file header. + write_file_header(&*event_sink, FILE_MAGIC_EVENT_STREAM); + let string_table = StringTableBuilder::new( Arc::new(S::from_path(&paths.string_data_file)?), Arc::new(S::from_path(&paths.string_index_file)?), diff --git a/measureme/src/profiling_data.rs b/measureme/src/profiling_data.rs index 2df29a9..19a01f6 100644 --- a/measureme/src/profiling_data.rs +++ b/measureme/src/profiling_data.rs @@ -1,5 +1,7 @@ +use crate::file_header::FILE_HEADER_SIZE; use crate::event::Event; use crate::{ProfilerFiles, RawEvent, StringTable, TimestampKind}; +use std::error::Error; use std::fs; use std::mem; use std::path::Path; @@ -11,19 +13,19 @@ pub struct ProfilingData { } impl ProfilingData { - pub fn new(path_stem: &Path) -> ProfilingData { + pub fn new(path_stem: &Path) -> Result> { let paths = ProfilerFiles::new(path_stem); let string_data = fs::read(paths.string_data_file).expect("couldn't read string_data file"); let index_data = fs::read(paths.string_index_file).expect("couldn't read string_index file"); let event_data = fs::read(paths.events_file).expect("couldn't read events file"); - let string_table = StringTable::new(string_data, index_data); + let string_table = StringTable::new(string_data, index_data)?; - ProfilingData { + Ok(ProfilingData { string_table, event_data, - } + }) } pub fn iter(&self) -> impl Iterator> { @@ -53,15 +55,16 @@ impl<'a> Iterator for ProfilerEventIterator<'a> { type Item = Event<'a>; fn next(&mut self) -> Option> { - let raw_idx = self.curr_event_idx * mem::size_of::(); - let raw_idx_end = raw_idx + mem::size_of::(); - if raw_idx_end > self.data.event_data.len() { + let event_start_addr = FILE_HEADER_SIZE + + self.curr_event_idx * mem::size_of::(); + let event_end_addr = event_start_addr + mem::size_of::(); + if event_end_addr > self.data.event_data.len() { return None; } self.curr_event_idx += 1; - let raw_event_bytes = &self.data.event_data[raw_idx..raw_idx_end]; + let raw_event_bytes = &self.data.event_data[event_start_addr..event_end_addr]; let mut raw_event = RawEvent::default(); unsafe { diff --git a/measureme/src/stringtable.rs b/measureme/src/stringtable.rs index 5cbbd62..c13e926 100644 --- a/measureme/src/stringtable.rs +++ b/measureme/src/stringtable.rs @@ -12,10 +12,13 @@ //! UTF-8 bytes. The content of a `TAG_STR_REF` is the contents of the entry //! it references. +use crate::file_header::{write_file_header, read_file_header, strip_file_header, + FILE_MAGIC_STRINGTABLE_DATA, FILE_MAGIC_STRINGTABLE_INDEX}; use crate::serialization::{Addr, SerializationSink}; use byteorder::{ByteOrder, LittleEndian}; use rustc_hash::FxHashMap; use std::borrow::Cow; +use std::error::Error; use std::sync::atomic::{AtomicU32, Ordering}; use std::sync::Arc; @@ -117,6 +120,11 @@ fn deserialize_index_entry(bytes: &[u8]) -> (StringId, Addr) { impl StringTableBuilder { pub fn new(data_sink: Arc, index_sink: Arc) -> StringTableBuilder { + + // The first thing in every file we generate must be the file header. + write_file_header(&*data_sink, FILE_MAGIC_STRINGTABLE_DATA); + write_file_header(&*index_sink, FILE_MAGIC_STRINGTABLE_INDEX); + StringTableBuilder { data_sink, index_sink, @@ -230,12 +238,27 @@ pub struct StringTable { } impl<'data> StringTable { - pub fn new(string_data: Vec, index_data: Vec) -> StringTable { - assert!(index_data.len() % 8 == 0); + pub fn new(string_data: Vec, index_data: Vec) -> Result> { - let index: FxHashMap<_, _> = index_data.chunks(8).map(deserialize_index_entry).collect(); + let string_data_format = read_file_header(&string_data, FILE_MAGIC_STRINGTABLE_DATA)?; + let index_data_format = read_file_header(&index_data, FILE_MAGIC_STRINGTABLE_INDEX)?; + + if string_data_format != index_data_format { + Err("Mismatch between StringTable DATA and INDEX format version")?; + } + + if string_data_format != 0 { + Err(format!("StringTable file format version '{}' is not supported + by this version of `measureme`.", string_data_format))?; + } + + assert!(index_data.len() % 8 == 0); + let index: FxHashMap<_, _> = strip_file_header(&index_data) + .chunks(8) + .map(deserialize_index_entry) + .collect(); - StringTable { string_data, index } + Ok(StringTable { string_data, index }) } #[inline] @@ -245,7 +268,7 @@ impl<'data> StringTable { } #[cfg(test)] -mod test { +mod tests { use super::*; #[test] @@ -278,7 +301,7 @@ mod test { let data_bytes = Arc::try_unwrap(data_sink).unwrap().into_bytes(); let index_bytes = Arc::try_unwrap(index_sink).unwrap().into_bytes(); - let string_table = StringTable::new(data_bytes, index_bytes); + let string_table = StringTable::new(data_bytes, index_bytes).unwrap(); for (&id, &expected_string) in string_ids.iter().zip(expected_strings.iter()) { let str_ref = string_table.get(id); diff --git a/measureme/src/testing_common.rs b/measureme/src/testing_common.rs index 14bee8a..92e17c3 100644 --- a/measureme/src/testing_common.rs +++ b/measureme/src/testing_common.rs @@ -88,7 +88,7 @@ fn generate_profiling_data(filestem: &Path) -> Vec // Process some profiling data. This is the part that would run in a // post processing tool. fn process_profiling_data(filestem: &Path, expected_events: &[Event]) { - let profiling_data = ProfilingData::new(filestem); + let profiling_data = ProfilingData::new(filestem).unwrap(); let mut count = 0; diff --git a/mmview/src/main.rs b/mmview/src/main.rs index 5f1f165..40dee2b 100644 --- a/mmview/src/main.rs +++ b/mmview/src/main.rs @@ -1,3 +1,4 @@ +use std::error::Error; use std::path::PathBuf; use measureme::ProfilingData; @@ -8,12 +9,14 @@ struct Opt { file_prefix: PathBuf, } -fn main() { +fn main() -> Result<(), Box> { let opt = Opt::from_args(); - let data = ProfilingData::new(&opt.file_prefix); + let data = ProfilingData::new(&opt.file_prefix)?; for event in data.iter() { println!("{:?}", event); } + + Ok(()) } diff --git a/stack_collapse/src/main.rs b/stack_collapse/src/main.rs index 5a3a39f..163f5d2 100644 --- a/stack_collapse/src/main.rs +++ b/stack_collapse/src/main.rs @@ -1,3 +1,4 @@ +use std::error::Error; use std::fs::File; use std::io::{BufWriter, Write}; use std::path::PathBuf; @@ -20,10 +21,10 @@ struct Opt { interval: u64, } -fn main() -> Result<(), Box> { +fn main() -> Result<(), Box> { let opt = Opt::from_args(); - let profiling_data = ProfilingData::new(&opt.file_prefix); + let profiling_data = ProfilingData::new(&opt.file_prefix)?; let first_event_time = { let current_time = profiling_data.iter().next().unwrap().timestamp; diff --git a/summarize/src/main.rs b/summarize/src/main.rs index 2712939..227a3c9 100644 --- a/summarize/src/main.rs +++ b/summarize/src/main.rs @@ -1,6 +1,7 @@ #[macro_use] extern crate prettytable; +use std::error::Error; use std::fs::File; use std::io::BufWriter; use std::path::PathBuf; @@ -24,10 +25,10 @@ struct Opt { percent_above: f64, } -fn main() -> Result<(), Box> { +fn main() -> Result<(), Box> { let opt = Opt::from_args(); - let data = ProfilingData::new(&opt.file_prefix); + let data = ProfilingData::new(&opt.file_prefix)?; let mut results = analysis::perform_analysis(data);