Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions src/mp3/header.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,37 @@ pub(crate) fn verify_frame_sync(frame_sync: [u8; 2]) -> bool {
frame_sync[0] == 0xFF && frame_sync[1] >> 5 == 0b111
}

/// Searches for a frame sync (11 bits with the value 1 like `0b1111_1111_111`) in the input reader.
/// The search starts at the beginning of the reader and returns the index relative to this beginning.
/// Only the first match is returned and on no match, [`None`] is returned instead.
///
/// Note that the search searches in 8 bit steps, i.e. the first 8 bits need to be byte aligned.
pub(crate) fn search_for_frame_sync<R>(input: &mut R) -> std::io::Result<Option<u64>>
where
R: Read,
{
let mut iterator = input.bytes();
let mut buffer = [0u8; 2];
// Read the first byte, as each iteration expects that buffer 0 was set from a previous iteration.
// This is not the case in the first iteration, which is therefore a special case.
if let Some(byte) = iterator.next() {
buffer[0] = byte?;
}
// create a stream of overlapping 2 byte pairs
// example: [0x01, 0x02, 0x03, 0x04] should be analyzed as
// [0x01, 0x02], [0x02, 0x03], [0x03, 0x04]
for (index, byte) in iterator.enumerate() {
buffer[1] = byte?;
// check the two bytes in the buffer
if verify_frame_sync(buffer) {
return Ok(Some(index as u64));
}
// if they do not match, copy the last byte in the buffer to the front for the next iteration
buffer[0] = buffer[1];
}
Ok(None)
}

#[derive(PartialEq, Copy, Clone, Debug)]
#[allow(missing_docs)]
/// MPEG Audio version
Expand Down Expand Up @@ -200,3 +231,21 @@ impl XingHeader {
}
}
}

#[cfg(test)]
mod tests {
#[test]
fn search_for_frame_sync() {
fn test(data: &[u8], expected_result: Option<u64>) {
use super::search_for_frame_sync;
assert_eq!(
search_for_frame_sync(&mut Box::new(data)).unwrap(),
expected_result
);
}

test(&[0xFF, 0xFB, 0x00], Some(0));
test(&[0x00, 0x00, 0x01, 0xFF, 0xFB], Some(3));
test(&[0x01, 0xFF], None);
}
}
36 changes: 24 additions & 12 deletions src/mp3/read.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use super::header::{verify_frame_sync, Header, XingHeader};
use super::header::{search_for_frame_sync, Header, XingHeader};
use super::{Mp3File, Mp3Properties};
use crate::ape::constants::APE_PREAMBLE;
#[cfg(feature = "ape")]
Expand All @@ -12,7 +12,7 @@ use crate::id3::{find_id3v1, find_lyrics3v2, ID3FindResults};

use std::io::{Read, Seek, SeekFrom};

use byteorder::ReadBytesExt;
use byteorder::{BigEndian, ReadBytesExt};

pub(super) fn read_from<R>(
reader: &mut R,
Expand Down Expand Up @@ -80,17 +80,29 @@ where
continue;
}
},
_ if verify_frame_sync([header[0], header[1]]) => {
let start = reader.seek(SeekFrom::Current(0))? - 4;
let header = Header::read(u32::from_be_bytes(header))?;

file.first_frame_offset = Some(start);
first_frame_header = Some(header);

// We have found the first frame
break;
// metadata blocks might be followed by junk bytes before the first MP3 frame begins
_ => {
// seek back the length of the temporary header buffer
// so that all bytes are included in the search for a frame sync
#[allow(clippy::neg_multiply)]
let start_of_search_area = reader.seek(SeekFrom::Current(-1 * header.len() as i64))?;
if let Some(first_mp3_frame_start_relative) = search_for_frame_sync(reader)? {
let first_mp3_frame_start_absolute =
start_of_search_area + first_mp3_frame_start_relative;

// read the first four bytes of the found frame
reader.seek(SeekFrom::Start(first_mp3_frame_start_absolute))?;
let header = Header::read(reader.read_u32::<BigEndian>()?)?;

file.first_frame_offset = Some(first_mp3_frame_start_absolute);
first_frame_header = Some(header);

// We have found the first frame
break;
}
// the search for sync bits was unsuccessful
return Err(LoftyError::Mp3("File contains an invalid frame"));
},
_ => return Err(LoftyError::Mp3("File contains an invalid frame")),
}
}

Expand Down
100 changes: 74 additions & 26 deletions src/probe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::ape::ApeFile;
use crate::error::{LoftyError, Result};
use crate::iff::aiff::AiffFile;
use crate::iff::wav::WavFile;
use crate::mp3::header::verify_frame_sync;
use crate::mp3::header::search_for_frame_sync;
use crate::mp3::Mp3File;
use crate::mp4::Mp4File;
use crate::ogg::flac::FlacFile;
Expand Down Expand Up @@ -148,42 +148,59 @@ impl<R: Read + Seek> Probe<R> {
}

#[allow(clippy::shadow_unrelated)]
fn guess_inner(&mut self) -> Result<Option<FileType>> {
fn guess_inner(&mut self) -> std::io::Result<Option<FileType>> {
// temporary buffer for storing 36 bytes
// (36 is just a guess as to how long the data for estimating the file type might be)
let mut buf = [0; 36];

let pos = self.inner.seek(SeekFrom::Current(0))?;
// read the first 36 bytes and seek back to the starting position
let starting_position = self.inner.stream_position()?;
let buf_len = std::io::copy(
&mut self.inner.by_ref().take(36),
&mut self.inner.by_ref().take(buf.len() as u64),
&mut Cursor::new(&mut buf[..]),
)? as usize;
self.inner.seek(SeekFrom::Start(starting_position))?;

self.inner.seek(SeekFrom::Start(pos))?;

// estimate the file type by using these 36 bytes
// note that any error from `from_buffer_inner` are suppressed, as it returns an error on unknown format
match FileType::from_buffer_inner(&buf[..buf_len]) {
// the file type was guessed based on these bytes
Ok((Some(f_ty), _)) => Ok(Some(f_ty)),
// the first data block is ID3 data; this means other data can follow (e.g. APE or MP3 frames)
Ok((None, id3_len)) => {
self.inner
// the position right after the ID3 block is the internal size value (id3_len)
// added to the length of the ID3 header (which is 10 bytes),
// as the size does not include the header itself
let position_after_id3_block = self
.inner
.seek(SeekFrom::Current(i64::from(10 + id3_len)))?;

let mut ident = [0; 3];
let buf_len = std::io::copy(
&mut self.inner.by_ref().take(3),
&mut Cursor::new(&mut ident[..]),
)?;

self.inner.seek(SeekFrom::Start(pos))?;

if buf_len < 3 {
return Err(LoftyError::UnknownFormat);
}

if &ident == b"MAC" {
Ok(Some(FileType::APE))
} else if verify_frame_sync([ident[0], ident[1]]) {
Ok(Some(FileType::MP3))
} else {
Err(LoftyError::UnknownFormat)
}
let file_type_after_id3_block = {
// try to guess the file type after the ID3 block by inspecting the first 3 bytes
let mut ident = [0; 3];
std::io::copy(
&mut self.inner.by_ref().take(ident.len() as u64),
&mut Cursor::new(&mut ident[..]),
)?;

if &ident == b"MAC" {
Ok(Some(FileType::APE))
} else {
// potentially some junk bytes are between the ID3 block and the following MP3 block
// search for any possible sync bits after the ID3 block
self.inner.seek(SeekFrom::Start(position_after_id3_block))?;
if search_for_frame_sync(&mut self.inner)?.is_some() {
Ok(Some(FileType::MP3))
} else {
Ok(None)
}
}
};

// before returning any result for a file type, seek back to the front
self.inner.seek(SeekFrom::Start(starting_position))?;

file_type_after_id3_block
},
_ => Ok(None),
}
Expand Down Expand Up @@ -247,3 +264,34 @@ where
{
Probe::open(path)?.read(read_properties)
}

#[cfg(test)]
mod tests {
use crate::Probe;

#[test]
fn mp3_file_id3v2_3() {
// test data that contains 4 bytes of junk (0x20) between the ID3 portion and the first MP3 frame
let data: [&[u8]; 4] = [
// ID3v2.3 header (10 bytes)
&[0x49, 0x44, 0x33, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23],
// TALB frame
&[
0x54, 0x41, 0x4C, 0x42, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x01, 0xFF, 0xFE, 0x61,
0x00, 0x61, 0x00, 0x61, 0x00, 0x61, 0x00, 0x61, 0x00, 0x61, 0x00, 0x61, 0x00, 0x61,
0x00, 0x61, 0x00, 0x61, 0x00, 0x61, 0x00,
],
// 4 bytes of junk
&[0x20, 0x20, 0x20, 0x20],
// start of MP3 frame (not all bytes are shown in this slice)
&[
0xFF, 0xFB, 0x50, 0xC4, 0x00, 0x03, 0xC0, 0x00, 0x01, 0xA4, 0x00, 0x00, 0x00, 0x20,
0x00, 0x00, 0x34, 0x80, 0x00, 0x00, 0x04,
],
];
let data: Vec<u8> = data.into_iter().flatten().cloned().collect();
let data = std::io::Cursor::new(&data);
let probe = Probe::new(data).guess_file_type().unwrap();
matches!(probe.file_type(), Some(crate::FileType::MP3));
}
}
Binary file added tests/files/assets/junk_between_id3_and_mp3.mp3
Binary file not shown.
26 changes: 25 additions & 1 deletion tests/files/mpeg.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::{set_artist, temp_file, verify_artist};
use lofty::{FileType, ItemKey, ItemValue, TagItem, TagType};
use lofty::{Accessor, FileType, ItemKey, ItemValue, TagItem, TagType};
use std::io::{Seek, SeekFrom, Write};

#[test]
Expand All @@ -19,6 +19,30 @@ fn read() {
crate::verify_artist!(file, tag, TagType::Ape, "Baz artist", 1);
}

#[test]
fn read_with_junk_bytes_between_frames() {
// Read a file that includes an ID3v2.3 data block followed by four bytes of junk data (0x20)
let file =
lofty::read_from_path("tests/files/assets/junk_between_id3_and_mp3.mp3", true).unwrap();

// note that the file contains ID3v2 and ID3v1 data
assert_eq!(file.file_type(), &FileType::MP3);

let id3v2_tag = &file.tags()[0];
assert_eq!(id3v2_tag.artist(), Some("artist test"));
assert_eq!(id3v2_tag.album(), Some("album test"));
assert_eq!(id3v2_tag.title(), Some("title test"));
assert_eq!(
id3v2_tag.get_string(&ItemKey::EncoderSettings),
Some("Lavf58.62.100")
);

let id3v1_tag = &file.tags()[1];
assert_eq!(id3v1_tag.artist(), Some("artist test"));
assert_eq!(id3v1_tag.album(), Some("album test"));
assert_eq!(id3v1_tag.title(), Some("title test"));
}

#[test]
fn write() {
let mut file = temp_file!("tests/files/assets/a.mp3");
Expand Down