Skip to content

Commit eb1dd84

Browse files
authored
Merge pull request #29 from localthomas/feature/id3-block-byte-skipping
2 parents c88cad0 + b9c4691 commit eb1dd84

File tree

5 files changed

+172
-39
lines changed

5 files changed

+172
-39
lines changed

src/mp3/header.rs

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,37 @@ pub(crate) fn verify_frame_sync(frame_sync: [u8; 2]) -> bool {
99
frame_sync[0] == 0xFF && frame_sync[1] >> 5 == 0b111
1010
}
1111

12+
/// Searches for a frame sync (11 bits with the value 1 like `0b1111_1111_111`) in the input reader.
13+
/// The search starts at the beginning of the reader and returns the index relative to this beginning.
14+
/// Only the first match is returned and on no match, [`None`] is returned instead.
15+
///
16+
/// Note that the search searches in 8 bit steps, i.e. the first 8 bits need to be byte aligned.
17+
pub(crate) fn search_for_frame_sync<R>(input: &mut R) -> std::io::Result<Option<u64>>
18+
where
19+
R: Read,
20+
{
21+
let mut iterator = input.bytes();
22+
let mut buffer = [0u8; 2];
23+
// Read the first byte, as each iteration expects that buffer 0 was set from a previous iteration.
24+
// This is not the case in the first iteration, which is therefore a special case.
25+
if let Some(byte) = iterator.next() {
26+
buffer[0] = byte?;
27+
}
28+
// create a stream of overlapping 2 byte pairs
29+
// example: [0x01, 0x02, 0x03, 0x04] should be analyzed as
30+
// [0x01, 0x02], [0x02, 0x03], [0x03, 0x04]
31+
for (index, byte) in iterator.enumerate() {
32+
buffer[1] = byte?;
33+
// check the two bytes in the buffer
34+
if verify_frame_sync(buffer) {
35+
return Ok(Some(index as u64));
36+
}
37+
// if they do not match, copy the last byte in the buffer to the front for the next iteration
38+
buffer[0] = buffer[1];
39+
}
40+
Ok(None)
41+
}
42+
1243
#[derive(PartialEq, Copy, Clone, Debug)]
1344
#[allow(missing_docs)]
1445
/// MPEG Audio version
@@ -200,3 +231,21 @@ impl XingHeader {
200231
}
201232
}
202233
}
234+
235+
#[cfg(test)]
236+
mod tests {
237+
#[test]
238+
fn search_for_frame_sync() {
239+
fn test(data: &[u8], expected_result: Option<u64>) {
240+
use super::search_for_frame_sync;
241+
assert_eq!(
242+
search_for_frame_sync(&mut Box::new(data)).unwrap(),
243+
expected_result
244+
);
245+
}
246+
247+
test(&[0xFF, 0xFB, 0x00], Some(0));
248+
test(&[0x00, 0x00, 0x01, 0xFF, 0xFB], Some(3));
249+
test(&[0x01, 0xFF], None);
250+
}
251+
}

src/mp3/read.rs

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use super::header::{verify_frame_sync, Header, XingHeader};
1+
use super::header::{search_for_frame_sync, Header, XingHeader};
22
use super::{Mp3File, Mp3Properties};
33
use crate::ape::constants::APE_PREAMBLE;
44
#[cfg(feature = "ape")]
@@ -12,7 +12,7 @@ use crate::id3::{find_id3v1, find_lyrics3v2, ID3FindResults};
1212

1313
use std::io::{Read, Seek, SeekFrom};
1414

15-
use byteorder::ReadBytesExt;
15+
use byteorder::{BigEndian, ReadBytesExt};
1616

1717
pub(super) fn read_from<R>(
1818
reader: &mut R,
@@ -80,17 +80,29 @@ where
8080
continue;
8181
}
8282
},
83-
_ if verify_frame_sync([header[0], header[1]]) => {
84-
let start = reader.seek(SeekFrom::Current(0))? - 4;
85-
let header = Header::read(u32::from_be_bytes(header))?;
86-
87-
file.first_frame_offset = Some(start);
88-
first_frame_header = Some(header);
89-
90-
// We have found the first frame
91-
break;
83+
// metadata blocks might be followed by junk bytes before the first MP3 frame begins
84+
_ => {
85+
// seek back the length of the temporary header buffer
86+
// so that all bytes are included in the search for a frame sync
87+
#[allow(clippy::neg_multiply)]
88+
let start_of_search_area = reader.seek(SeekFrom::Current(-1 * header.len() as i64))?;
89+
if let Some(first_mp3_frame_start_relative) = search_for_frame_sync(reader)? {
90+
let first_mp3_frame_start_absolute =
91+
start_of_search_area + first_mp3_frame_start_relative;
92+
93+
// read the first four bytes of the found frame
94+
reader.seek(SeekFrom::Start(first_mp3_frame_start_absolute))?;
95+
let header = Header::read(reader.read_u32::<BigEndian>()?)?;
96+
97+
file.first_frame_offset = Some(first_mp3_frame_start_absolute);
98+
first_frame_header = Some(header);
99+
100+
// We have found the first frame
101+
break;
102+
}
103+
// the search for sync bits was unsuccessful
104+
return Err(LoftyError::Mp3("File contains an invalid frame"));
92105
},
93-
_ => return Err(LoftyError::Mp3("File contains an invalid frame")),
94106
}
95107
}
96108

src/probe.rs

Lines changed: 74 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use crate::ape::ApeFile;
22
use crate::error::{LoftyError, Result};
33
use crate::iff::aiff::AiffFile;
44
use crate::iff::wav::WavFile;
5-
use crate::mp3::header::verify_frame_sync;
5+
use crate::mp3::header::search_for_frame_sync;
66
use crate::mp3::Mp3File;
77
use crate::mp4::Mp4File;
88
use crate::ogg::flac::FlacFile;
@@ -148,42 +148,59 @@ impl<R: Read + Seek> Probe<R> {
148148
}
149149

150150
#[allow(clippy::shadow_unrelated)]
151-
fn guess_inner(&mut self) -> Result<Option<FileType>> {
151+
fn guess_inner(&mut self) -> std::io::Result<Option<FileType>> {
152+
// temporary buffer for storing 36 bytes
153+
// (36 is just a guess as to how long the data for estimating the file type might be)
152154
let mut buf = [0; 36];
153155

154-
let pos = self.inner.seek(SeekFrom::Current(0))?;
156+
// read the first 36 bytes and seek back to the starting position
157+
let starting_position = self.inner.stream_position()?;
155158
let buf_len = std::io::copy(
156-
&mut self.inner.by_ref().take(36),
159+
&mut self.inner.by_ref().take(buf.len() as u64),
157160
&mut Cursor::new(&mut buf[..]),
158161
)? as usize;
162+
self.inner.seek(SeekFrom::Start(starting_position))?;
159163

160-
self.inner.seek(SeekFrom::Start(pos))?;
161-
164+
// estimate the file type by using these 36 bytes
165+
// note that any error from `from_buffer_inner` are suppressed, as it returns an error on unknown format
162166
match FileType::from_buffer_inner(&buf[..buf_len]) {
167+
// the file type was guessed based on these bytes
163168
Ok((Some(f_ty), _)) => Ok(Some(f_ty)),
169+
// the first data block is ID3 data; this means other data can follow (e.g. APE or MP3 frames)
164170
Ok((None, id3_len)) => {
165-
self.inner
171+
// the position right after the ID3 block is the internal size value (id3_len)
172+
// added to the length of the ID3 header (which is 10 bytes),
173+
// as the size does not include the header itself
174+
let position_after_id3_block = self
175+
.inner
166176
.seek(SeekFrom::Current(i64::from(10 + id3_len)))?;
167177

168-
let mut ident = [0; 3];
169-
let buf_len = std::io::copy(
170-
&mut self.inner.by_ref().take(3),
171-
&mut Cursor::new(&mut ident[..]),
172-
)?;
173-
174-
self.inner.seek(SeekFrom::Start(pos))?;
175-
176-
if buf_len < 3 {
177-
return Err(LoftyError::UnknownFormat);
178-
}
179-
180-
if &ident == b"MAC" {
181-
Ok(Some(FileType::APE))
182-
} else if verify_frame_sync([ident[0], ident[1]]) {
183-
Ok(Some(FileType::MP3))
184-
} else {
185-
Err(LoftyError::UnknownFormat)
186-
}
178+
let file_type_after_id3_block = {
179+
// try to guess the file type after the ID3 block by inspecting the first 3 bytes
180+
let mut ident = [0; 3];
181+
std::io::copy(
182+
&mut self.inner.by_ref().take(ident.len() as u64),
183+
&mut Cursor::new(&mut ident[..]),
184+
)?;
185+
186+
if &ident == b"MAC" {
187+
Ok(Some(FileType::APE))
188+
} else {
189+
// potentially some junk bytes are between the ID3 block and the following MP3 block
190+
// search for any possible sync bits after the ID3 block
191+
self.inner.seek(SeekFrom::Start(position_after_id3_block))?;
192+
if search_for_frame_sync(&mut self.inner)?.is_some() {
193+
Ok(Some(FileType::MP3))
194+
} else {
195+
Ok(None)
196+
}
197+
}
198+
};
199+
200+
// before returning any result for a file type, seek back to the front
201+
self.inner.seek(SeekFrom::Start(starting_position))?;
202+
203+
file_type_after_id3_block
187204
},
188205
_ => Ok(None),
189206
}
@@ -247,3 +264,34 @@ where
247264
{
248265
Probe::open(path)?.read(read_properties)
249266
}
267+
268+
#[cfg(test)]
269+
mod tests {
270+
use crate::Probe;
271+
272+
#[test]
273+
fn mp3_file_id3v2_3() {
274+
// test data that contains 4 bytes of junk (0x20) between the ID3 portion and the first MP3 frame
275+
let data: [&[u8]; 4] = [
276+
// ID3v2.3 header (10 bytes)
277+
&[0x49, 0x44, 0x33, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23],
278+
// TALB frame
279+
&[
280+
0x54, 0x41, 0x4C, 0x42, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x01, 0xFF, 0xFE, 0x61,
281+
0x00, 0x61, 0x00, 0x61, 0x00, 0x61, 0x00, 0x61, 0x00, 0x61, 0x00, 0x61, 0x00, 0x61,
282+
0x00, 0x61, 0x00, 0x61, 0x00, 0x61, 0x00,
283+
],
284+
// 4 bytes of junk
285+
&[0x20, 0x20, 0x20, 0x20],
286+
// start of MP3 frame (not all bytes are shown in this slice)
287+
&[
288+
0xFF, 0xFB, 0x50, 0xC4, 0x00, 0x03, 0xC0, 0x00, 0x01, 0xA4, 0x00, 0x00, 0x00, 0x20,
289+
0x00, 0x00, 0x34, 0x80, 0x00, 0x00, 0x04,
290+
],
291+
];
292+
let data: Vec<u8> = data.into_iter().flatten().cloned().collect();
293+
let data = std::io::Cursor::new(&data);
294+
let probe = Probe::new(data).guess_file_type().unwrap();
295+
matches!(probe.file_type(), Some(crate::FileType::MP3));
296+
}
297+
}
1.42 KB
Binary file not shown.

tests/files/mpeg.rs

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use crate::{set_artist, temp_file, verify_artist};
2-
use lofty::{FileType, ItemKey, ItemValue, TagItem, TagType};
2+
use lofty::{Accessor, FileType, ItemKey, ItemValue, TagItem, TagType};
33
use std::io::{Seek, SeekFrom, Write};
44

55
#[test]
@@ -19,6 +19,30 @@ fn read() {
1919
crate::verify_artist!(file, tag, TagType::Ape, "Baz artist", 1);
2020
}
2121

22+
#[test]
23+
fn read_with_junk_bytes_between_frames() {
24+
// Read a file that includes an ID3v2.3 data block followed by four bytes of junk data (0x20)
25+
let file =
26+
lofty::read_from_path("tests/files/assets/junk_between_id3_and_mp3.mp3", true).unwrap();
27+
28+
// note that the file contains ID3v2 and ID3v1 data
29+
assert_eq!(file.file_type(), &FileType::MP3);
30+
31+
let id3v2_tag = &file.tags()[0];
32+
assert_eq!(id3v2_tag.artist(), Some("artist test"));
33+
assert_eq!(id3v2_tag.album(), Some("album test"));
34+
assert_eq!(id3v2_tag.title(), Some("title test"));
35+
assert_eq!(
36+
id3v2_tag.get_string(&ItemKey::EncoderSettings),
37+
Some("Lavf58.62.100")
38+
);
39+
40+
let id3v1_tag = &file.tags()[1];
41+
assert_eq!(id3v1_tag.artist(), Some("artist test"));
42+
assert_eq!(id3v1_tag.album(), Some("album test"));
43+
assert_eq!(id3v1_tag.title(), Some("title test"));
44+
}
45+
2246
#[test]
2347
fn write() {
2448
let mut file = temp_file!("tests/files/assets/a.mp3");

0 commit comments

Comments
 (0)