From 553ab271a31a9573fb6e95d03d8f4d00e17d6511 Mon Sep 17 00:00:00 2001 From: Arcterus Date: Sat, 6 Dec 2014 02:35:26 -0800 Subject: [PATCH 1/3] serialize: base64: allow LF in addition to CRLF and optimize slightly It is useful to have configurable newlines in base64 as the standard leaves that for the implementation to decide. GNU `base64` apparently uses LF, which meant in `uutils` we had to manually convert the CRLF to LF. This made the program very slow for large inputs. [breaking-change] --- src/libserialize/base64.rs | 92 ++++++++++++++++++++++++++------------ src/libserialize/lib.rs | 2 +- 2 files changed, 64 insertions(+), 30 deletions(-) diff --git a/src/libserialize/base64.rs b/src/libserialize/base64.rs index dd5039c9b8283..1cb8fdd025d38 100644 --- a/src/libserialize/base64.rs +++ b/src/libserialize/base64.rs @@ -1,4 +1,4 @@ -// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT +// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT // file at the top-level directory of this distribution and at // http://rust-lang.org/COPYRIGHT. // @@ -14,6 +14,7 @@ pub use self::FromBase64Error::*; pub use self::CharacterSet::*; +pub use self::Newline::*; use std::fmt; use std::error; @@ -28,10 +29,22 @@ pub enum CharacterSet { impl Copy for CharacterSet {} +/// Available newline types +pub enum Newline { + /// A linefeed (i.e. Unix-style newline) + LF, + /// A carriage return and a linefeed (i.e. Windows-style newline) + CRLF +} + +impl Copy for Newline {} + /// Contains configuration parameters for `to_base64`. pub struct Config { /// Character set to use pub char_set: CharacterSet, + /// Newline to use + pub newline: Newline, /// True to pad output with `=` characters pub pad: bool, /// `Some(len)` to wrap lines at `len`, `None` to disable line wrapping @@ -42,15 +55,15 @@ impl Copy for Config {} /// Configuration for RFC 4648 standard base64 encoding pub static STANDARD: Config = - Config {char_set: Standard, pad: true, line_length: None}; + Config {char_set: Standard, newline: CRLF, pad: true, line_length: None}; /// Configuration for RFC 4648 base64url encoding pub static URL_SAFE: Config = - Config {char_set: UrlSafe, pad: false, line_length: None}; + Config {char_set: UrlSafe, newline: CRLF, pad: false, line_length: None}; /// Configuration for RFC 2045 MIME base64 encoding pub static MIME: Config = - Config {char_set: Standard, pad: true, line_length: Some(76)}; + Config {char_set: Standard, newline: CRLF, pad: true, line_length: Some(76)}; static STANDARD_CHARS: &'static[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\ abcdefghijklmnopqrstuvwxyz\ @@ -87,24 +100,29 @@ impl ToBase64 for [u8] { UrlSafe => URLSAFE_CHARS }; - let mut v = Vec::new(); + // In general, this Vec only needs (4/3) * self.len() memory, but + // addition is faster than multiplication and division. + let mut v = Vec::with_capacity(self.len() + self.len()); let mut i = 0; let mut cur_length = 0; let len = self.len(); - while i < len - (len % 3) { - match config.line_length { - Some(line_length) => - if cur_length >= line_length { - v.push(b'\r'); - v.push(b'\n'); - cur_length = 0; - }, - None => () + let mod_len = len % 3; + let cond_len = len - mod_len; + while i < cond_len { + let (first, second, third) = (self[i], self[i + 1], self[i + 2]); + if let Some(line_length) = config.line_length { + if cur_length >= line_length { + v.push_all(match config.newline { + LF => b"\n", + CRLF => b"\r\n" + }); + cur_length = 0; + } } - let n = (self[i] as u32) << 16 | - (self[i + 1] as u32) << 8 | - (self[i + 2] as u32); + let n = (first as u32) << 16 | + (second as u32) << 8 | + (third as u32); // This 24-bit number gets separated into four 6-bit numbers. v.push(bytes[((n >> 18) & 63) as uint]); @@ -116,20 +134,20 @@ impl ToBase64 for [u8] { i += 3; } - if len % 3 != 0 { - match config.line_length { - Some(line_length) => - if cur_length >= line_length { - v.push(b'\r'); - v.push(b'\n'); - }, - None => () + if mod_len != 0 { + if let Some(line_length) = config.line_length { + if cur_length >= line_length { + v.push_all(match config.newline { + LF => b"\n", + CRLF => b"\r\n" + }); + } } } // Heh, would be cool if we knew this was exhaustive // (the dream of bounded integer types) - match len % 3 { + match mod_len { 0 => (), 1 => { let n = (self[i] as u32) << 16; @@ -232,7 +250,7 @@ impl FromBase64 for str { impl FromBase64 for [u8] { fn from_base64(&self) -> Result, FromBase64Error> { - let mut r = Vec::new(); + let mut r = Vec::with_capacity(self.len()); let mut buf: u32 = 0; let mut modulus = 0i; @@ -288,7 +306,7 @@ impl FromBase64 for [u8] { mod tests { extern crate test; use self::test::Bencher; - use base64::{Config, FromBase64, ToBase64, STANDARD, URL_SAFE}; + use base64::{Config, FromBase64, ToBase64, STANDARD, URL_SAFE, LF}; #[test] fn test_to_base64_basic() { @@ -302,7 +320,7 @@ mod tests { } #[test] - fn test_to_base64_line_break() { + fn test_to_base64_crlf_line_break() { assert!(![0u8, ..1000].to_base64(Config {line_length: None, ..STANDARD}) .contains("\r\n")); assert_eq!("foobar".as_bytes().to_base64(Config {line_length: Some(4), @@ -310,6 +328,18 @@ mod tests { "Zm9v\r\nYmFy"); } + #[test] + fn test_to_base64_lf_line_break() { + assert!(![0u8, ..1000].to_base64(Config {line_length: None, newline: LF, + ..STANDARD}) + .as_slice() + .contains("\n")); + assert_eq!("foobar".as_bytes().to_base64(Config {line_length: Some(4), + newline: LF, + ..STANDARD}), + "Zm9v\nYmFy".to_string()); + } + #[test] fn test_to_base64_padding() { assert_eq!("f".as_bytes().to_base64(Config {pad: false, ..STANDARD}), "Zg"); @@ -344,6 +374,10 @@ mod tests { b"foobar"); assert_eq!("Zm9vYg==\r\n".from_base64().unwrap(), b"foob"); + assert_eq!("Zm9v\nYmFy".from_base64().unwrap(), + b"foobar"); + assert_eq!("Zm9vYg==\n".from_base64().unwrap(), + b"foob"); } #[test] diff --git a/src/libserialize/lib.rs b/src/libserialize/lib.rs index 9711d5c7209be..1cff4c334e743 100644 --- a/src/libserialize/lib.rs +++ b/src/libserialize/lib.rs @@ -23,7 +23,7 @@ Core encoding and decoding interfaces. html_root_url = "http://doc.rust-lang.org/nightly/", html_playground_url = "http://play.rust-lang.org/")] #![allow(unknown_features)] -#![feature(macro_rules, default_type_params, phase, slicing_syntax, globs)] +#![feature(macro_rules, default_type_params, phase, slicing_syntax, globs, if_let)] // test harness access #[cfg(test)] From a943a7a4e5cef797b8fdb946f3925a6ef705ca98 Mon Sep 17 00:00:00 2001 From: Arcterus Date: Sat, 6 Dec 2014 10:58:18 -0800 Subject: [PATCH 2/3] serialize: base64: improve newline handling speed --- src/libserialize/base64.rs | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/src/libserialize/base64.rs b/src/libserialize/base64.rs index 1cb8fdd025d38..17bb3dadaebc8 100644 --- a/src/libserialize/base64.rs +++ b/src/libserialize/base64.rs @@ -14,7 +14,6 @@ pub use self::FromBase64Error::*; pub use self::CharacterSet::*; -pub use self::Newline::*; use std::fmt; use std::error; @@ -55,15 +54,15 @@ impl Copy for Config {} /// Configuration for RFC 4648 standard base64 encoding pub static STANDARD: Config = - Config {char_set: Standard, newline: CRLF, pad: true, line_length: None}; + Config {char_set: Standard, newline: Newline::CRLF, pad: true, line_length: None}; /// Configuration for RFC 4648 base64url encoding pub static URL_SAFE: Config = - Config {char_set: UrlSafe, newline: CRLF, pad: false, line_length: None}; + Config {char_set: UrlSafe, newline: Newline::CRLF, pad: false, line_length: None}; /// Configuration for RFC 2045 MIME base64 encoding pub static MIME: Config = - Config {char_set: Standard, newline: CRLF, pad: true, line_length: Some(76)}; + Config {char_set: Standard, newline: Newline::CRLF, pad: true, line_length: Some(76)}; static STANDARD_CHARS: &'static[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\ abcdefghijklmnopqrstuvwxyz\ @@ -108,14 +107,15 @@ impl ToBase64 for [u8] { let len = self.len(); let mod_len = len % 3; let cond_len = len - mod_len; + let newline = match config.newline { + Newline::LF => b"\n", + Newline::CRLF => b"\r\n" + }; while i < cond_len { let (first, second, third) = (self[i], self[i + 1], self[i + 2]); if let Some(line_length) = config.line_length { if cur_length >= line_length { - v.push_all(match config.newline { - LF => b"\n", - CRLF => b"\r\n" - }); + v.push_all(newline); cur_length = 0; } } @@ -137,10 +137,7 @@ impl ToBase64 for [u8] { if mod_len != 0 { if let Some(line_length) = config.line_length { if cur_length >= line_length { - v.push_all(match config.newline { - LF => b"\n", - CRLF => b"\r\n" - }); + v.push_all(newline); } } } @@ -306,7 +303,7 @@ impl FromBase64 for [u8] { mod tests { extern crate test; use self::test::Bencher; - use base64::{Config, FromBase64, ToBase64, STANDARD, URL_SAFE, LF}; + use base64::{Config, Newline, FromBase64, ToBase64, STANDARD, URL_SAFE}; #[test] fn test_to_base64_basic() { @@ -330,12 +327,13 @@ mod tests { #[test] fn test_to_base64_lf_line_break() { - assert!(![0u8, ..1000].to_base64(Config {line_length: None, newline: LF, + assert!(![0u8, ..1000].to_base64(Config {line_length: None, + newline: Newline::LF, ..STANDARD}) .as_slice() .contains("\n")); assert_eq!("foobar".as_bytes().to_base64(Config {line_length: Some(4), - newline: LF, + newline: Newline::LF, ..STANDARD}), "Zm9v\nYmFy".to_string()); } From a119ad83c73c7e1c99c7e21fb2ab21bd9521077a Mon Sep 17 00:00:00 2001 From: Arcterus Date: Tue, 9 Dec 2014 00:21:43 -0800 Subject: [PATCH 3/3] serialize: base64: remove some .as_bytes() from the tests --- src/libserialize/base64.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/libserialize/base64.rs b/src/libserialize/base64.rs index 17bb3dadaebc8..59faf75c0c30c 100644 --- a/src/libserialize/base64.rs +++ b/src/libserialize/base64.rs @@ -320,8 +320,8 @@ mod tests { fn test_to_base64_crlf_line_break() { assert!(![0u8, ..1000].to_base64(Config {line_length: None, ..STANDARD}) .contains("\r\n")); - assert_eq!("foobar".as_bytes().to_base64(Config {line_length: Some(4), - ..STANDARD}), + assert_eq!(b"foobar".to_base64(Config {line_length: Some(4), + ..STANDARD}), "Zm9v\r\nYmFy"); } @@ -332,10 +332,10 @@ mod tests { ..STANDARD}) .as_slice() .contains("\n")); - assert_eq!("foobar".as_bytes().to_base64(Config {line_length: Some(4), - newline: Newline::LF, - ..STANDARD}), - "Zm9v\nYmFy".to_string()); + assert_eq!(b"foobar".to_base64(Config {line_length: Some(4), + newline: Newline::LF, + ..STANDARD}), + "Zm9v\nYmFy"); } #[test]