From 661558c17c746e2c0af5a51fa94f785990314045 Mon Sep 17 00:00:00 2001 From: Sam Privett Date: Thu, 10 Nov 2022 21:41:07 -0800 Subject: [PATCH 1/7] Implemented `Extend`, `Extend<&'a char>`, `FromIterator`, and `FromIterator<&'a char>` for String and WString https://github.com/ros2-rust/ros2_rust/issues/246 --- rosidl_runtime_rs/src/string.rs | 120 ++++++++++++++++++++++++++++++-- 1 file changed, 115 insertions(+), 5 deletions(-) diff --git a/rosidl_runtime_rs/src/string.rs b/rosidl_runtime_rs/src/string.rs index 201889a95..6129d491b 100644 --- a/rosidl_runtime_rs/src/string.rs +++ b/rosidl_runtime_rs/src/string.rs @@ -3,6 +3,8 @@ use std::ffi::CStr; use std::fmt::{self, Debug, Display}; use std::hash::{Hash, Hasher}; use std::ops::{Deref, DerefMut}; +use std::ptr; +use std::ptr::slice_from_raw_parts; #[cfg(feature = "serde")] mod serde; @@ -135,7 +137,7 @@ macro_rules! string_impl { }; // SAFETY: Passing in a zeroed string is safe. if !unsafe { $init(&mut msg as *mut _) } { - panic!("Sinit failed"); + panic!("$init failed"); } msg } @@ -224,10 +226,53 @@ macro_rules! string_impl { } } + impl FromIterator for $string { + fn from_iter>(iter: I) -> Self { + let mut buf = <$string>::default(); + buf.extend(iter); + buf + } + } + + impl<'a> FromIterator<&'a char> for $string { + fn from_iter>(iter: I) -> Self { + let mut buf = <$string>::default(); + buf.extend(iter); + buf + } + } + + impl Extend for $string { + fn extend>(&mut self, iter: I) { + let mut v = self.to_vec(); + let iterator = iter.into_iter(); + + iterator.for_each(|c| { + v.push(c as $char_type); + }); + + // SAFETY: It's okay to pass a non-zero-terminated string here since assignn + // uses the specified length and will append the 0 to the dest string itself. + if !unsafe { + $assignn( + self as *mut _, + v.as_ptr() as *const _, + v.len(), + ) + } { + panic!("$assignn failed"); + } + } + } + + impl<'a> Extend<&'a char> for $string { + fn extend>(&mut self, iter: I) { + self.extend(iter.into_iter().cloned()); + } + } + // SAFETY: A string is a simple data structure, and therefore not thread-specific. unsafe impl Send for $string {} - // SAFETY: A string does not have interior mutability, so it can be shared. - unsafe impl Sync for $string {} impl SequenceAlloc for $string { fn sequence_init(seq: &mut Sequence, size: usize) -> bool { @@ -243,6 +288,23 @@ macro_rules! string_impl { unsafe { $sequence_copy(in_seq as *const _, out_seq as *mut _) } } } + + impl $string { + + /// Returns a copy of `self` as a vector without the trailing null byte. + pub fn to_vec(&self) -> Vec<$char_type> { + let mut v: Vec<$char_type> = vec![]; + + // SAFETY: self.data points to self.size consecutive, initialized elements and + // isn't modified externally. + unsafe { + let s = slice_from_raw_parts(self.data, self.size); + v.extend_from_slice(s.as_ref().unwrap()); + }; + + v + } + } }; } @@ -274,7 +336,7 @@ string_impl!( impl From<&str> for String { fn from(s: &str) -> Self { let mut msg = Self { - data: std::ptr::null_mut(), + data: ptr::null_mut(), size: 0, capacity: 0, }; @@ -304,7 +366,7 @@ impl String { impl From<&str> for WString { fn from(s: &str) -> Self { let mut msg = Self { - data: std::ptr::null_mut(), + data: ptr::null_mut(), size: 0, capacity: 0, }; @@ -503,4 +565,52 @@ mod tests { s.as_str().try_into().unwrap() } } + + #[test] + fn string_from_char_iterator() { + // Base char case + let expected = String::from("abc"); + let actual = "abc".chars().collect::(); + + assert_eq!(expected, actual); + + // Empty case + let expected = String::from(""); + let actual = "".chars().collect::(); + + assert_eq!(expected, actual); + } + + #[test] + fn extend_string_with_char_iterator() { + let expected = WString::from("abcdef"); + let mut actual = WString::from("abc"); + actual.extend("def".chars()); + + assert_eq!(expected, actual); + } + + #[test] + fn wstring_from_char_iterator() { + // Base char case + let expected = WString::from("abc"); + let actual = "abc".chars().collect::(); + + assert_eq!(expected, actual); + + // Empty case + let expected = WString::from(""); + let actual = "".chars().collect::(); + + assert_eq!(expected, actual); + } + + #[test] + fn extend_wstring_with_char_iterator() { + let expected = WString::from("abcdef"); + let mut actual = WString::from("abc"); + actual.extend("def".chars()); + + assert_eq!(expected, actual); + } } From d173928f898e7d3b26fcadacbf2a0402fca60667 Mon Sep 17 00:00:00 2001 From: Sam Privett Date: Thu, 10 Nov 2022 21:48:23 -0800 Subject: [PATCH 2/7] Ran `cargo fmt` --- rosidl_runtime_rs/src/string.rs | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/rosidl_runtime_rs/src/string.rs b/rosidl_runtime_rs/src/string.rs index 6129d491b..dfcb1d5ce 100644 --- a/rosidl_runtime_rs/src/string.rs +++ b/rosidl_runtime_rs/src/string.rs @@ -253,13 +253,7 @@ macro_rules! string_impl { // SAFETY: It's okay to pass a non-zero-terminated string here since assignn // uses the specified length and will append the 0 to the dest string itself. - if !unsafe { - $assignn( - self as *mut _, - v.as_ptr() as *const _, - v.len(), - ) - } { + if !unsafe { $assignn(self as *mut _, v.as_ptr() as *const _, v.len()) } { panic!("$assignn failed"); } } @@ -290,17 +284,14 @@ macro_rules! string_impl { } impl $string { - /// Returns a copy of `self` as a vector without the trailing null byte. pub fn to_vec(&self) -> Vec<$char_type> { let mut v: Vec<$char_type> = vec![]; // SAFETY: self.data points to self.size consecutive, initialized elements and // isn't modified externally. - unsafe { - let s = slice_from_raw_parts(self.data, self.size); - v.extend_from_slice(s.as_ref().unwrap()); - }; + let s = unsafe { slice_from_raw_parts(self.data, self.size).as_ref() }; + v.extend_from_slice(s.unwrap()); v } From 4d5c0528d7c56a3948abd3f2a86df02cfd368aec Mon Sep 17 00:00:00 2001 From: Sam Privett Date: Sat, 12 Nov 2022 10:00:46 -0800 Subject: [PATCH 3/7] Fixed the implementation for non-ASCII characters, re-added the Sync impl --- rosidl_runtime_rs/src/string.rs | 52 +++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/rosidl_runtime_rs/src/string.rs b/rosidl_runtime_rs/src/string.rs index dfcb1d5ce..3156f5e38 100644 --- a/rosidl_runtime_rs/src/string.rs +++ b/rosidl_runtime_rs/src/string.rs @@ -4,7 +4,6 @@ use std::fmt::{self, Debug, Display}; use std::hash::{Hash, Hasher}; use std::ops::{Deref, DerefMut}; use std::ptr; -use std::ptr::slice_from_raw_parts; #[cfg(feature = "serde")] mod serde; @@ -114,7 +113,7 @@ pub struct StringExceedsBoundsError { // There is a lot of redundancy between String and WString, which this macro aims to reduce. macro_rules! string_impl { - ($string:ty, $char_type:ty, $unsigned_char_type:ty, $string_conversion_func:ident, $init:ident, $fini:ident, $assignn:ident, $sequence_init:ident, $sequence_fini:ident, $sequence_copy:ident) => { + ($string:ty, $char_type:ty, $unsigned_char_type:ty, $string_conversion_func:ident, $encoding_func:ident, $init:ident, $fini:ident, $assignn:ident, $sequence_init:ident, $sequence_fini:ident, $sequence_copy:ident) => { #[link(name = "rosidl_runtime_c")] extern "C" { fn $init(s: *mut $string) -> bool; @@ -247,8 +246,23 @@ macro_rules! string_impl { let mut v = self.to_vec(); let iterator = iter.into_iter(); - iterator.for_each(|c| { - v.push(c as $char_type); + iterator.for_each(|c: char| { + // UTF-8 and UTF-16 encoding requires at least 4 bytes for any character. + // Technically UTF-16 could just use a buffer size of 2 here, but that's more + // trouble than its worth since we are defining this function in the macro. + // See https://doc.rust-lang.org/std/primitive.char.html#method.encode_utf8 + let mut buf = [0; 4]; + c.$encoding_func(&mut buf); + + + let filtered_bytes: Vec<$unsigned_char_type> = buf + .into_iter() + .filter(|&c| c != (0 as $unsigned_char_type)) + .collect(); + + for encoded_char in filtered_bytes { + v.push(encoded_char as $char_type); + } }); // SAFETY: It's okay to pass a non-zero-terminated string here since assignn @@ -267,6 +281,8 @@ macro_rules! string_impl { // SAFETY: A string is a simple data structure, and therefore not thread-specific. unsafe impl Send for $string {} + // SAFETY: A string does not have interior mutability, so it can be shared. + unsafe impl Sync for $string {} impl SequenceAlloc for $string { fn sequence_init(seq: &mut Sequence, size: usize) -> bool { @@ -282,20 +298,6 @@ macro_rules! string_impl { unsafe { $sequence_copy(in_seq as *const _, out_seq as *mut _) } } } - - impl $string { - /// Returns a copy of `self` as a vector without the trailing null byte. - pub fn to_vec(&self) -> Vec<$char_type> { - let mut v: Vec<$char_type> = vec![]; - - // SAFETY: self.data points to self.size consecutive, initialized elements and - // isn't modified externally. - let s = unsafe { slice_from_raw_parts(self.data, self.size).as_ref() }; - v.extend_from_slice(s.unwrap()); - - v - } - } }; } @@ -304,6 +306,7 @@ string_impl!( std::os::raw::c_char, u8, from_utf8_lossy, + encode_utf8, rosidl_runtime_c__String__init, rosidl_runtime_c__String__fini, rosidl_runtime_c__String__assignn, @@ -316,6 +319,7 @@ string_impl!( std::os::raw::c_ushort, u16, from_utf16_lossy, + encode_utf16, rosidl_runtime_c__U16String__init, rosidl_runtime_c__U16String__fini, rosidl_runtime_c__U16String__assignn, @@ -570,6 +574,12 @@ mod tests { let actual = "".chars().collect::(); assert_eq!(expected, actual); + + // Non-ascii char case + let expected = String::from("Grüß Gott! 𝕊"); + let actual = "Grüß Gott! 𝕊".chars().collect::(); + + assert_eq!(expected, actual); } #[test] @@ -594,6 +604,12 @@ mod tests { let actual = "".chars().collect::(); assert_eq!(expected, actual); + + // Non-ascii char case + let expected = WString::from("Grüß Gott! 𝕊"); + let actual = "Grüß Gott! 𝕊".chars().collect::(); + + assert_eq!(expected, actual); } #[test] From 955003a867530a23abafcfc56eecc4be5b2a89f5 Mon Sep 17 00:00:00 2001 From: Sam Privett Date: Sat, 12 Nov 2022 10:45:09 -0800 Subject: [PATCH 4/7] Ran cargo format --- rosidl_runtime_rs/src/string.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/rosidl_runtime_rs/src/string.rs b/rosidl_runtime_rs/src/string.rs index 3156f5e38..d1e0a22ac 100644 --- a/rosidl_runtime_rs/src/string.rs +++ b/rosidl_runtime_rs/src/string.rs @@ -254,7 +254,6 @@ macro_rules! string_impl { let mut buf = [0; 4]; c.$encoding_func(&mut buf); - let filtered_bytes: Vec<$unsigned_char_type> = buf .into_iter() .filter(|&c| c != (0 as $unsigned_char_type)) From 99945e624ce74e79be562a1faed34e805a1ba9bd Mon Sep 17 00:00:00 2001 From: Sam Privett Date: Sat, 12 Nov 2022 15:16:45 -0800 Subject: [PATCH 5/7] Simplified `fn extend` implementation using `std::string::String`'s --- rosidl_runtime_rs/src/string.rs | 71 +++++++++++++++++++++------------ 1 file changed, 45 insertions(+), 26 deletions(-) diff --git a/rosidl_runtime_rs/src/string.rs b/rosidl_runtime_rs/src/string.rs index d1e0a22ac..7f2c5104c 100644 --- a/rosidl_runtime_rs/src/string.rs +++ b/rosidl_runtime_rs/src/string.rs @@ -243,32 +243,9 @@ macro_rules! string_impl { impl Extend for $string { fn extend>(&mut self, iter: I) { - let mut v = self.to_vec(); - let iterator = iter.into_iter(); - - iterator.for_each(|c: char| { - // UTF-8 and UTF-16 encoding requires at least 4 bytes for any character. - // Technically UTF-16 could just use a buffer size of 2 here, but that's more - // trouble than its worth since we are defining this function in the macro. - // See https://doc.rust-lang.org/std/primitive.char.html#method.encode_utf8 - let mut buf = [0; 4]; - c.$encoding_func(&mut buf); - - let filtered_bytes: Vec<$unsigned_char_type> = buf - .into_iter() - .filter(|&c| c != (0 as $unsigned_char_type)) - .collect(); - - for encoded_char in filtered_bytes { - v.push(encoded_char as $char_type); - } - }); - - // SAFETY: It's okay to pass a non-zero-terminated string here since assignn - // uses the specified length and will append the 0 to the dest string itself. - if !unsafe { $assignn(self as *mut _, v.as_ptr() as *const _, v.len()) } { - panic!("$assignn failed"); - } + let mut s = self.to_string(); + s.extend(iter); + *self = Self::from(&s); } } @@ -345,6 +322,25 @@ impl From<&str> for String { } } +impl From<&std::string::String> for String { + fn from(s: &std::string::String) -> Self { + let mut msg = Self { + data: ptr::null_mut(), + size: 0, + capacity: 0, + }; + + // SAFETY: It's okay to pass a non-zero-terminated string here since assignn uses the + // specified length and will append the 0 byte to the dest string itself. + if !unsafe { + rosidl_runtime_c__String__assignn(&mut msg as *mut _, s.as_ptr() as *const _, s.len()) + } { + panic!("rosidl_runtime_c__String__assignn failed"); + } + msg + } +} + impl String { /// Creates a CStr from this String. /// @@ -380,6 +376,29 @@ impl From<&str> for WString { } } +impl From<&std::string::String> for WString { + fn from(s: &std::string::String) -> Self { + let mut msg = Self { + data: ptr::null_mut(), + size: 0, + capacity: 0, + }; + let buf: Vec = s.encode_utf16().collect(); + // SAFETY: It's okay to pass a non-zero-terminated string here since assignn uses the + // specified length and will append the 0 to the dest string itself. + if !unsafe { + rosidl_runtime_c__U16String__assignn( + &mut msg as *mut _, + buf.as_ptr() as *const _, + buf.len(), + ) + } { + panic!("rosidl_runtime_c__U16String__assignn failed"); + } + msg + } +} + // ========================= impl for BoundedString ========================= impl Debug for BoundedString { From 0885dd752c7dd1bf6cc7bf3832cd039e152d4983 Mon Sep 17 00:00:00 2001 From: Sam Privett Date: Sat, 12 Nov 2022 15:21:26 -0800 Subject: [PATCH 6/7] Moved implementation into the macro and just call the &str from impl --- rosidl_runtime_rs/src/string.rs | 48 +++++---------------------------- 1 file changed, 6 insertions(+), 42 deletions(-) diff --git a/rosidl_runtime_rs/src/string.rs b/rosidl_runtime_rs/src/string.rs index 7f2c5104c..082ed41d4 100644 --- a/rosidl_runtime_rs/src/string.rs +++ b/rosidl_runtime_rs/src/string.rs @@ -225,6 +225,12 @@ macro_rules! string_impl { } } + impl From<&std::string::String> for $string { + fn from(s: &std::string::String) -> Self { + Self::from(s.as_str()) + } + } + impl FromIterator for $string { fn from_iter>(iter: I) -> Self { let mut buf = <$string>::default(); @@ -322,25 +328,6 @@ impl From<&str> for String { } } -impl From<&std::string::String> for String { - fn from(s: &std::string::String) -> Self { - let mut msg = Self { - data: ptr::null_mut(), - size: 0, - capacity: 0, - }; - - // SAFETY: It's okay to pass a non-zero-terminated string here since assignn uses the - // specified length and will append the 0 byte to the dest string itself. - if !unsafe { - rosidl_runtime_c__String__assignn(&mut msg as *mut _, s.as_ptr() as *const _, s.len()) - } { - panic!("rosidl_runtime_c__String__assignn failed"); - } - msg - } -} - impl String { /// Creates a CStr from this String. /// @@ -376,29 +363,6 @@ impl From<&str> for WString { } } -impl From<&std::string::String> for WString { - fn from(s: &std::string::String) -> Self { - let mut msg = Self { - data: ptr::null_mut(), - size: 0, - capacity: 0, - }; - let buf: Vec = s.encode_utf16().collect(); - // SAFETY: It's okay to pass a non-zero-terminated string here since assignn uses the - // specified length and will append the 0 to the dest string itself. - if !unsafe { - rosidl_runtime_c__U16String__assignn( - &mut msg as *mut _, - buf.as_ptr() as *const _, - buf.len(), - ) - } { - panic!("rosidl_runtime_c__U16String__assignn failed"); - } - msg - } -} - // ========================= impl for BoundedString ========================= impl Debug for BoundedString { From 55a4334d7c007a6cb73130c965a7172471176ec9 Mon Sep 17 00:00:00 2001 From: Sam Privett Date: Sun, 13 Nov 2022 11:57:42 -0800 Subject: [PATCH 7/7] PR feedback --- rosidl_runtime_rs/src/string.rs | 97 +++++++++++++++------------------ 1 file changed, 44 insertions(+), 53 deletions(-) diff --git a/rosidl_runtime_rs/src/string.rs b/rosidl_runtime_rs/src/string.rs index 082ed41d4..44043a60d 100644 --- a/rosidl_runtime_rs/src/string.rs +++ b/rosidl_runtime_rs/src/string.rs @@ -3,7 +3,6 @@ use std::ffi::CStr; use std::fmt::{self, Debug, Display}; use std::hash::{Hash, Hasher}; use std::ops::{Deref, DerefMut}; -use std::ptr; #[cfg(feature = "serde")] mod serde; @@ -113,7 +112,7 @@ pub struct StringExceedsBoundsError { // There is a lot of redundancy between String and WString, which this macro aims to reduce. macro_rules! string_impl { - ($string:ty, $char_type:ty, $unsigned_char_type:ty, $string_conversion_func:ident, $encoding_func:ident, $init:ident, $fini:ident, $assignn:ident, $sequence_init:ident, $sequence_fini:ident, $sequence_copy:ident) => { + ($string:ty, $char_type:ty, $unsigned_char_type:ty, $string_conversion_func:ident, $init:ident, $fini:ident, $assignn:ident, $sequence_init:ident, $sequence_fini:ident, $sequence_copy:ident) => { #[link(name = "rosidl_runtime_c")] extern "C" { fn $init(s: *mut $string) -> bool; @@ -127,21 +126,6 @@ macro_rules! string_impl { ) -> bool; } - impl Default for $string { - fn default() -> Self { - let mut msg = Self { - data: std::ptr::null_mut(), - size: 0, - capacity: 0, - }; - // SAFETY: Passing in a zeroed string is safe. - if !unsafe { $init(&mut msg as *mut _) } { - panic!("$init failed"); - } - msg - } - } - impl Clone for $string { fn clone(&self) -> Self { let mut msg = Self::default(); @@ -159,6 +143,21 @@ macro_rules! string_impl { } } + impl Default for $string { + fn default() -> Self { + let mut msg = Self { + data: std::ptr::null_mut(), + size: 0, + capacity: 0, + }; + // SAFETY: Passing in a zeroed string is safe. + if !unsafe { $init(&mut msg as *mut _) } { + panic!("$init failed"); + } + msg + } + } + // It's not guaranteed that there are no interior null bytes, hence no Deref to CStr. // This does not include the null byte at the end! impl Deref for $string { @@ -201,33 +200,17 @@ macro_rules! string_impl { impl Eq for $string {} - impl Hash for $string { - fn hash(&self, state: &mut H) { - self.deref().hash(state) - } - } - - impl PartialEq for $string { - fn eq(&self, other: &Self) -> bool { - self.deref().eq(other.deref()) - } - } - - impl Ord for $string { - fn cmp(&self, other: &Self) -> Ordering { - self.deref().cmp(other.deref()) - } - } - - impl PartialOrd for $string { - fn partial_cmp(&self, other: &Self) -> Option { - self.deref().partial_cmp(other.deref()) + impl Extend for $string { + fn extend>(&mut self, iter: I) { + let mut s = self.to_string(); + s.extend(iter); + *self = Self::from(s.as_str()); } } - impl From<&std::string::String> for $string { - fn from(s: &std::string::String) -> Self { - Self::from(s.as_str()) + impl<'a> Extend<&'a char> for $string { + fn extend>(&mut self, iter: I) { + self.extend(iter.into_iter().cloned()); } } @@ -247,17 +230,27 @@ macro_rules! string_impl { } } - impl Extend for $string { - fn extend>(&mut self, iter: I) { - let mut s = self.to_string(); - s.extend(iter); - *self = Self::from(&s); + impl Hash for $string { + fn hash(&self, state: &mut H) { + self.deref().hash(state) } } - impl<'a> Extend<&'a char> for $string { - fn extend>(&mut self, iter: I) { - self.extend(iter.into_iter().cloned()); + impl Ord for $string { + fn cmp(&self, other: &Self) -> Ordering { + self.deref().cmp(other.deref()) + } + } + + impl PartialEq for $string { + fn eq(&self, other: &Self) -> bool { + self.deref().eq(other.deref()) + } + } + + impl PartialOrd for $string { + fn partial_cmp(&self, other: &Self) -> Option { + self.deref().partial_cmp(other.deref()) } } @@ -288,7 +281,6 @@ string_impl!( std::os::raw::c_char, u8, from_utf8_lossy, - encode_utf8, rosidl_runtime_c__String__init, rosidl_runtime_c__String__fini, rosidl_runtime_c__String__assignn, @@ -301,7 +293,6 @@ string_impl!( std::os::raw::c_ushort, u16, from_utf16_lossy, - encode_utf16, rosidl_runtime_c__U16String__init, rosidl_runtime_c__U16String__fini, rosidl_runtime_c__U16String__assignn, @@ -313,7 +304,7 @@ string_impl!( impl From<&str> for String { fn from(s: &str) -> Self { let mut msg = Self { - data: ptr::null_mut(), + data: std::ptr::null_mut(), size: 0, capacity: 0, }; @@ -343,7 +334,7 @@ impl String { impl From<&str> for WString { fn from(s: &str) -> Self { let mut msg = Self { - data: ptr::null_mut(), + data: std::ptr::null_mut(), size: 0, capacity: 0, };