From 0a6a71a02c6a20a460a0cc335abf125479dcf078 Mon Sep 17 00:00:00 2001 From: Remi Rampin Date: Sun, 5 Jul 2015 18:17:37 -0400 Subject: [PATCH 1/5] Implement OsStr::from_bytes_slice() Should be from_bytes(), since that the name of the method in OsString; however this is already used in OsStrExt on Unix. Thus from_bytes_slice(), which makes it clear why the name is different (this one operates on slices without copying). --- src/libstd/ffi/os_str.rs | 18 ++++++++++ src/test/run-pass/osstr_conversions.rs | 49 ++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 src/test/run-pass/osstr_conversions.rs diff --git a/src/libstd/ffi/os_str.rs b/src/libstd/ffi/os_str.rs index 97bf33335b02a..64631e0ad9a94 100644 --- a/src/libstd/ffi/os_str.rs +++ b/src/libstd/ffi/os_str.rs @@ -41,6 +41,7 @@ use mem; use string::String; use ops; use cmp; +use str; use hash::{Hash, Hasher}; use vec::Vec; @@ -265,6 +266,23 @@ impl OsStr { } } + /// Converts a byte slice to an `OsStr` slice. + /// + /// # Platform behavior + /// + /// On Unix systems, this is a no-op. + /// + /// On Windows systems, only UTF-8 byte sequences will successfully + /// convert; non UTF-8 data will produce `None`. + #[unstable(feature = "convert", reason = "recently added")] + pub fn from_bytes_slice(bytes: &[u8]) -> Option<&OsStr> { + if cfg!(windows) { + str::from_utf8(bytes).ok().map(|s| s.as_ref()) + } else { + Some(unsafe { mem::transmute(bytes) }) + } + } + /// Creates a `CString` containing this `OsStr` data. /// /// Fails if the `OsStr` contains interior nulls. diff --git a/src/test/run-pass/osstr_conversions.rs b/src/test/run-pass/osstr_conversions.rs new file mode 100644 index 0000000000000..01d0c0e75e385 --- /dev/null +++ b/src/test/run-pass/osstr_conversions.rs @@ -0,0 +1,49 @@ +// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![feature(convert)] + +use std::ffi::{OsStr, OsString}; + +fn main() { + // Valid UTF-8 + let vec1: Vec = b"t\xC3\xA9st".to_vec(); + let oso1: OsString = OsString::from_bytes(vec1).unwrap(); + assert!(oso1.to_bytes() == Some(b"t\xC3\xA9st")); + assert!(oso1.to_str() == Some("t\u{E9}st")); + // Not UTF-8 + let vec2: Vec = b"t\xE9st".to_vec(); + let oso2: OsString = OsString::from_bytes(vec2).unwrap(); + if cfg!(windows) { + assert!(oso2.to_bytes() == None); + } else { + assert!(oso2.to_bytes() == Some(b"t\xE9st")); + } + assert_eq!(oso2.to_str(), None); + + // Valid UTF-8 + let by1: &[u8] = b"t\xC3\xA9st"; + let oss1: &OsStr = OsStr::from_bytes_slice(by1).unwrap(); + assert_eq!(oss1.to_bytes().unwrap().as_ptr(), by1.as_ptr()); + assert_eq!(oss1.to_str().unwrap().as_ptr(), by1.as_ptr()); + // Not UTF-8 + let by2: &[u8] = b"t\xE9st"; + let oss2: &OsStr = OsStr::from_bytes_slice(by2).unwrap(); + if cfg!(windows) { + assert_eq!(oss2.to_bytes(), None); + } else { + assert_eq!(oss2.to_bytes().unwrap().as_ptr(), by2.as_ptr()); + } + assert_eq!(oss2.to_str(), None); + + if cfg!(windows) { + // FIXME: needs valid-windows-utf16-invalid-unicode test cases + } +} From 094ca4a0eed81c077d7b2fbd621efd4e2d233b7a Mon Sep 17 00:00:00 2001 From: Remi Rampin Date: Wed, 29 Jul 2015 16:51:28 -0400 Subject: [PATCH 2/5] Rename from_bytes_slice() to from_platform_bytes() --- src/libstd/ffi/os_str.rs | 2 +- src/test/run-pass/osstr_conversions.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/libstd/ffi/os_str.rs b/src/libstd/ffi/os_str.rs index 64631e0ad9a94..094996c823114 100644 --- a/src/libstd/ffi/os_str.rs +++ b/src/libstd/ffi/os_str.rs @@ -275,7 +275,7 @@ impl OsStr { /// On Windows systems, only UTF-8 byte sequences will successfully /// convert; non UTF-8 data will produce `None`. #[unstable(feature = "convert", reason = "recently added")] - pub fn from_bytes_slice(bytes: &[u8]) -> Option<&OsStr> { + pub fn from_platform_bytes(bytes: &[u8]) -> Option<&OsStr> { if cfg!(windows) { str::from_utf8(bytes).ok().map(|s| s.as_ref()) } else { diff --git a/src/test/run-pass/osstr_conversions.rs b/src/test/run-pass/osstr_conversions.rs index 01d0c0e75e385..7e2004e40576b 100644 --- a/src/test/run-pass/osstr_conversions.rs +++ b/src/test/run-pass/osstr_conversions.rs @@ -30,12 +30,12 @@ fn main() { // Valid UTF-8 let by1: &[u8] = b"t\xC3\xA9st"; - let oss1: &OsStr = OsStr::from_bytes_slice(by1).unwrap(); + let oss1: &OsStr = OsStr::from_platform_bytes(by1).unwrap(); assert_eq!(oss1.to_bytes().unwrap().as_ptr(), by1.as_ptr()); assert_eq!(oss1.to_str().unwrap().as_ptr(), by1.as_ptr()); // Not UTF-8 let by2: &[u8] = b"t\xE9st"; - let oss2: &OsStr = OsStr::from_bytes_slice(by2).unwrap(); + let oss2: &OsStr = OsStr::from_platform_bytes(by2).unwrap(); if cfg!(windows) { assert_eq!(oss2.to_bytes(), None); } else { From 0d62bdb5e979b5c56f9ab2b28adc7fb2916faa89 Mon Sep 17 00:00:00 2001 From: Remi Rampin Date: Wed, 29 Jul 2015 16:59:57 -0400 Subject: [PATCH 3/5] Rename OsString::from_bytes() as well OsString::from_bytes() is now OsString::from_platform_bytes(), to match OsStr::from_platform_bytes(). The method was still unstable. --- src/libstd/ffi/os_str.rs | 2 +- src/test/run-pass/osstr_conversions.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/libstd/ffi/os_str.rs b/src/libstd/ffi/os_str.rs index 094996c823114..2389bd9f1e33d 100644 --- a/src/libstd/ffi/os_str.rs +++ b/src/libstd/ffi/os_str.rs @@ -78,7 +78,7 @@ impl OsString { /// On Windows system, only UTF-8 byte sequences will successfully /// convert; non UTF-8 data will produce `None`. #[unstable(feature = "convert", reason = "recently added")] - pub fn from_bytes(bytes: B) -> Option where B: Into> { + pub fn from_platform_bytes(bytes: B) -> Option where B: Into> { #[cfg(unix)] fn from_bytes_inner(vec: Vec) -> Option { use os::unix::ffi::OsStringExt; diff --git a/src/test/run-pass/osstr_conversions.rs b/src/test/run-pass/osstr_conversions.rs index 7e2004e40576b..d7ba15e716122 100644 --- a/src/test/run-pass/osstr_conversions.rs +++ b/src/test/run-pass/osstr_conversions.rs @@ -15,12 +15,12 @@ use std::ffi::{OsStr, OsString}; fn main() { // Valid UTF-8 let vec1: Vec = b"t\xC3\xA9st".to_vec(); - let oso1: OsString = OsString::from_bytes(vec1).unwrap(); + let oso1: OsString = OsString::from_platform_bytes(vec1).unwrap(); assert!(oso1.to_bytes() == Some(b"t\xC3\xA9st")); assert!(oso1.to_str() == Some("t\u{E9}st")); // Not UTF-8 let vec2: Vec = b"t\xE9st".to_vec(); - let oso2: OsString = OsString::from_bytes(vec2).unwrap(); + let oso2: OsString = OsString::from_platform_bytes(vec2).unwrap(); if cfg!(windows) { assert!(oso2.to_bytes() == None); } else { From b1a0a5b9c462aee5a6efc9bb7c3d9e6cd719afcc Mon Sep 17 00:00:00 2001 From: Remi Rampin Date: Thu, 30 Jul 2015 15:06:05 -0400 Subject: [PATCH 4/5] Use OsStrExt::from_bytes() instead of transmute() --- src/libstd/ffi/os_str.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/libstd/ffi/os_str.rs b/src/libstd/ffi/os_str.rs index 2389bd9f1e33d..6fc6e0e709489 100644 --- a/src/libstd/ffi/os_str.rs +++ b/src/libstd/ffi/os_str.rs @@ -279,7 +279,8 @@ impl OsStr { if cfg!(windows) { str::from_utf8(bytes).ok().map(|s| s.as_ref()) } else { - Some(unsafe { mem::transmute(bytes) }) + use os::unix::ffi::OsStrExt; + Some(::from_bytes(bytes)) } } From 3cd5b9c14aeb3212f8736f9dd02b2939d6d6b70a Mon Sep 17 00:00:00 2001 From: Remi Rampin Date: Wed, 29 Jul 2015 17:00:57 -0400 Subject: [PATCH 5/5] Make from_bytes() a deprecated alias of new name --- src/libstd/ffi/os_str.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/libstd/ffi/os_str.rs b/src/libstd/ffi/os_str.rs index 6fc6e0e709489..add76cce8f8ce 100644 --- a/src/libstd/ffi/os_str.rs +++ b/src/libstd/ffi/os_str.rs @@ -68,6 +68,21 @@ impl OsString { OsString { inner: Buf::from_string(String::new()) } } + /// Constructs an `OsString` from a byte sequence. + /// + /// # Platform behavior + /// + /// On Unix systems, any byte sequence can be successfully + /// converted into an `OsString`. + /// + /// On Windows system, only UTF-8 byte sequences will successfully + /// convert; non UTF-8 data will produce `None`. + #[deprecated(reason = "Renamed to from_platform_bytes", since = "1.2.0")] + #[unstable(feature = "convert", reason = "recently added")] + pub fn from_bytes(bytes: B) -> Option where B: Into> { + OsString::from_platform_bytes(bytes) + } + /// Constructs an `OsString` from a byte sequence. /// /// # Platform behavior