diff --git a/src/mp3/header.rs b/src/mp3/header.rs index acb83e47a..49924a070 100644 --- a/src/mp3/header.rs +++ b/src/mp3/header.rs @@ -9,6 +9,37 @@ pub(crate) fn verify_frame_sync(frame_sync: [u8; 2]) -> bool { frame_sync[0] == 0xFF && frame_sync[1] >> 5 == 0b111 } +/// Searches for a frame sync (11 bits with the value 1 like `0b1111_1111_111`) in the input reader. +/// The search starts at the beginning of the reader and returns the index relative to this beginning. +/// Only the first match is returned and on no match, [`None`] is returned instead. +/// +/// Note that the search searches in 8 bit steps, i.e. the first 8 bits need to be byte aligned. +pub(crate) fn search_for_frame_sync(input: &mut R) -> std::io::Result> +where + R: Read, +{ + let mut iterator = input.bytes(); + let mut buffer = [0u8; 2]; + // Read the first byte, as each iteration expects that buffer 0 was set from a previous iteration. + // This is not the case in the first iteration, which is therefore a special case. + if let Some(byte) = iterator.next() { + buffer[0] = byte?; + } + // create a stream of overlapping 2 byte pairs + // example: [0x01, 0x02, 0x03, 0x04] should be analyzed as + // [0x01, 0x02], [0x02, 0x03], [0x03, 0x04] + for (index, byte) in iterator.enumerate() { + buffer[1] = byte?; + // check the two bytes in the buffer + if verify_frame_sync(buffer) { + return Ok(Some(index as u64)); + } + // if they do not match, copy the last byte in the buffer to the front for the next iteration + buffer[0] = buffer[1]; + } + Ok(None) +} + #[derive(PartialEq, Copy, Clone, Debug)] #[allow(missing_docs)] /// MPEG Audio version @@ -200,3 +231,21 @@ impl XingHeader { } } } + +#[cfg(test)] +mod tests { + #[test] + fn search_for_frame_sync() { + fn test(data: &[u8], expected_result: Option) { + use super::search_for_frame_sync; + assert_eq!( + search_for_frame_sync(&mut Box::new(data)).unwrap(), + expected_result + ); + } + + test(&[0xFF, 0xFB, 0x00], Some(0)); + test(&[0x00, 0x00, 0x01, 0xFF, 0xFB], Some(3)); + test(&[0x01, 0xFF], None); + } +} diff --git a/src/mp3/read.rs b/src/mp3/read.rs index 7d1c8241a..10bc466df 100644 --- a/src/mp3/read.rs +++ b/src/mp3/read.rs @@ -1,4 +1,4 @@ -use super::header::{verify_frame_sync, Header, XingHeader}; +use super::header::{search_for_frame_sync, Header, XingHeader}; use super::{Mp3File, Mp3Properties}; use crate::ape::constants::APE_PREAMBLE; #[cfg(feature = "ape")] @@ -12,7 +12,7 @@ use crate::id3::{find_id3v1, find_lyrics3v2, ID3FindResults}; use std::io::{Read, Seek, SeekFrom}; -use byteorder::ReadBytesExt; +use byteorder::{BigEndian, ReadBytesExt}; pub(super) fn read_from( reader: &mut R, @@ -80,17 +80,29 @@ where continue; } }, - _ if verify_frame_sync([header[0], header[1]]) => { - let start = reader.seek(SeekFrom::Current(0))? - 4; - let header = Header::read(u32::from_be_bytes(header))?; - - file.first_frame_offset = Some(start); - first_frame_header = Some(header); - - // We have found the first frame - break; + // metadata blocks might be followed by junk bytes before the first MP3 frame begins + _ => { + // seek back the length of the temporary header buffer + // so that all bytes are included in the search for a frame sync + #[allow(clippy::neg_multiply)] + let start_of_search_area = reader.seek(SeekFrom::Current(-1 * header.len() as i64))?; + if let Some(first_mp3_frame_start_relative) = search_for_frame_sync(reader)? { + let first_mp3_frame_start_absolute = + start_of_search_area + first_mp3_frame_start_relative; + + // read the first four bytes of the found frame + reader.seek(SeekFrom::Start(first_mp3_frame_start_absolute))?; + let header = Header::read(reader.read_u32::()?)?; + + file.first_frame_offset = Some(first_mp3_frame_start_absolute); + first_frame_header = Some(header); + + // We have found the first frame + break; + } + // the search for sync bits was unsuccessful + return Err(LoftyError::Mp3("File contains an invalid frame")); }, - _ => return Err(LoftyError::Mp3("File contains an invalid frame")), } } diff --git a/src/probe.rs b/src/probe.rs index fd6a016c3..69e457b62 100644 --- a/src/probe.rs +++ b/src/probe.rs @@ -2,7 +2,7 @@ use crate::ape::ApeFile; use crate::error::{LoftyError, Result}; use crate::iff::aiff::AiffFile; use crate::iff::wav::WavFile; -use crate::mp3::header::verify_frame_sync; +use crate::mp3::header::search_for_frame_sync; use crate::mp3::Mp3File; use crate::mp4::Mp4File; use crate::ogg::flac::FlacFile; @@ -148,42 +148,59 @@ impl Probe { } #[allow(clippy::shadow_unrelated)] - fn guess_inner(&mut self) -> Result> { + fn guess_inner(&mut self) -> std::io::Result> { + // temporary buffer for storing 36 bytes + // (36 is just a guess as to how long the data for estimating the file type might be) let mut buf = [0; 36]; - let pos = self.inner.seek(SeekFrom::Current(0))?; + // read the first 36 bytes and seek back to the starting position + let starting_position = self.inner.stream_position()?; let buf_len = std::io::copy( - &mut self.inner.by_ref().take(36), + &mut self.inner.by_ref().take(buf.len() as u64), &mut Cursor::new(&mut buf[..]), )? as usize; + self.inner.seek(SeekFrom::Start(starting_position))?; - self.inner.seek(SeekFrom::Start(pos))?; - + // estimate the file type by using these 36 bytes + // note that any error from `from_buffer_inner` are suppressed, as it returns an error on unknown format match FileType::from_buffer_inner(&buf[..buf_len]) { + // the file type was guessed based on these bytes Ok((Some(f_ty), _)) => Ok(Some(f_ty)), + // the first data block is ID3 data; this means other data can follow (e.g. APE or MP3 frames) Ok((None, id3_len)) => { - self.inner + // the position right after the ID3 block is the internal size value (id3_len) + // added to the length of the ID3 header (which is 10 bytes), + // as the size does not include the header itself + let position_after_id3_block = self + .inner .seek(SeekFrom::Current(i64::from(10 + id3_len)))?; - let mut ident = [0; 3]; - let buf_len = std::io::copy( - &mut self.inner.by_ref().take(3), - &mut Cursor::new(&mut ident[..]), - )?; - - self.inner.seek(SeekFrom::Start(pos))?; - - if buf_len < 3 { - return Err(LoftyError::UnknownFormat); - } - - if &ident == b"MAC" { - Ok(Some(FileType::APE)) - } else if verify_frame_sync([ident[0], ident[1]]) { - Ok(Some(FileType::MP3)) - } else { - Err(LoftyError::UnknownFormat) - } + let file_type_after_id3_block = { + // try to guess the file type after the ID3 block by inspecting the first 3 bytes + let mut ident = [0; 3]; + std::io::copy( + &mut self.inner.by_ref().take(ident.len() as u64), + &mut Cursor::new(&mut ident[..]), + )?; + + if &ident == b"MAC" { + Ok(Some(FileType::APE)) + } else { + // potentially some junk bytes are between the ID3 block and the following MP3 block + // search for any possible sync bits after the ID3 block + self.inner.seek(SeekFrom::Start(position_after_id3_block))?; + if search_for_frame_sync(&mut self.inner)?.is_some() { + Ok(Some(FileType::MP3)) + } else { + Ok(None) + } + } + }; + + // before returning any result for a file type, seek back to the front + self.inner.seek(SeekFrom::Start(starting_position))?; + + file_type_after_id3_block }, _ => Ok(None), } @@ -247,3 +264,34 @@ where { Probe::open(path)?.read(read_properties) } + +#[cfg(test)] +mod tests { + use crate::Probe; + + #[test] + fn mp3_file_id3v2_3() { + // test data that contains 4 bytes of junk (0x20) between the ID3 portion and the first MP3 frame + let data: [&[u8]; 4] = [ + // ID3v2.3 header (10 bytes) + &[0x49, 0x44, 0x33, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23], + // TALB frame + &[ + 0x54, 0x41, 0x4C, 0x42, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x01, 0xFF, 0xFE, 0x61, + 0x00, 0x61, 0x00, 0x61, 0x00, 0x61, 0x00, 0x61, 0x00, 0x61, 0x00, 0x61, 0x00, 0x61, + 0x00, 0x61, 0x00, 0x61, 0x00, 0x61, 0x00, + ], + // 4 bytes of junk + &[0x20, 0x20, 0x20, 0x20], + // start of MP3 frame (not all bytes are shown in this slice) + &[ + 0xFF, 0xFB, 0x50, 0xC4, 0x00, 0x03, 0xC0, 0x00, 0x01, 0xA4, 0x00, 0x00, 0x00, 0x20, + 0x00, 0x00, 0x34, 0x80, 0x00, 0x00, 0x04, + ], + ]; + let data: Vec = data.into_iter().flatten().cloned().collect(); + let data = std::io::Cursor::new(&data); + let probe = Probe::new(data).guess_file_type().unwrap(); + matches!(probe.file_type(), Some(crate::FileType::MP3)); + } +} diff --git a/tests/files/assets/junk_between_id3_and_mp3.mp3 b/tests/files/assets/junk_between_id3_and_mp3.mp3 new file mode 100644 index 000000000..b8fe60bf4 Binary files /dev/null and b/tests/files/assets/junk_between_id3_and_mp3.mp3 differ diff --git a/tests/files/mpeg.rs b/tests/files/mpeg.rs index 164a6b7e3..929c7d92b 100644 --- a/tests/files/mpeg.rs +++ b/tests/files/mpeg.rs @@ -1,5 +1,5 @@ use crate::{set_artist, temp_file, verify_artist}; -use lofty::{FileType, ItemKey, ItemValue, TagItem, TagType}; +use lofty::{Accessor, FileType, ItemKey, ItemValue, TagItem, TagType}; use std::io::{Seek, SeekFrom, Write}; #[test] @@ -19,6 +19,30 @@ fn read() { crate::verify_artist!(file, tag, TagType::Ape, "Baz artist", 1); } +#[test] +fn read_with_junk_bytes_between_frames() { + // Read a file that includes an ID3v2.3 data block followed by four bytes of junk data (0x20) + let file = + lofty::read_from_path("tests/files/assets/junk_between_id3_and_mp3.mp3", true).unwrap(); + + // note that the file contains ID3v2 and ID3v1 data + assert_eq!(file.file_type(), &FileType::MP3); + + let id3v2_tag = &file.tags()[0]; + assert_eq!(id3v2_tag.artist(), Some("artist test")); + assert_eq!(id3v2_tag.album(), Some("album test")); + assert_eq!(id3v2_tag.title(), Some("title test")); + assert_eq!( + id3v2_tag.get_string(&ItemKey::EncoderSettings), + Some("Lavf58.62.100") + ); + + let id3v1_tag = &file.tags()[1]; + assert_eq!(id3v1_tag.artist(), Some("artist test")); + assert_eq!(id3v1_tag.album(), Some("album test")); + assert_eq!(id3v1_tag.title(), Some("title test")); +} + #[test] fn write() { let mut file = temp_file!("tests/files/assets/a.mp3");